17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 55f684e24Ssp92102 * Common Development and Distribution License (the "License"). 65f684e24Ssp92102 * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*a85084caSmeem * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate #include <sys/types.h> 297c478bd9Sstevel@tonic-gate #include <sys/devops.h> 307c478bd9Sstevel@tonic-gate #include <sys/conf.h> 317c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 327c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 337c478bd9Sstevel@tonic-gate #include <sys/stat.h> 347c478bd9Sstevel@tonic-gate #include <sys/poll_impl.h> 357c478bd9Sstevel@tonic-gate #include <sys/errno.h> 367c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 377c478bd9Sstevel@tonic-gate #include <sys/mkdev.h> 387c478bd9Sstevel@tonic-gate #include <sys/debug.h> 397c478bd9Sstevel@tonic-gate #include <sys/file.h> 407c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 417c478bd9Sstevel@tonic-gate #include <sys/systm.h> 427c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 437c478bd9Sstevel@tonic-gate #include <sys/devpoll.h> 447c478bd9Sstevel@tonic-gate #include <sys/rctl.h> 457c478bd9Sstevel@tonic-gate #include <sys/resource.h> 467c478bd9Sstevel@tonic-gate 477c478bd9Sstevel@tonic-gate #define RESERVED 1 487c478bd9Sstevel@tonic-gate 497c478bd9Sstevel@tonic-gate /* local data struct */ 507c478bd9Sstevel@tonic-gate static dp_entry_t **devpolltbl; /* dev poll entries */ 517c478bd9Sstevel@tonic-gate static size_t dptblsize; 527c478bd9Sstevel@tonic-gate 537c478bd9Sstevel@tonic-gate static kmutex_t devpoll_lock; /* lock protecting dev tbl */ 547c478bd9Sstevel@tonic-gate int devpoll_init; /* is /dev/poll initialized already */ 557c478bd9Sstevel@tonic-gate 567c478bd9Sstevel@tonic-gate /* device local functions */ 577c478bd9Sstevel@tonic-gate 587c478bd9Sstevel@tonic-gate static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp); 597c478bd9Sstevel@tonic-gate static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp); 607c478bd9Sstevel@tonic-gate static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 617c478bd9Sstevel@tonic-gate int *rvalp); 627c478bd9Sstevel@tonic-gate static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, 637c478bd9Sstevel@tonic-gate struct pollhead **phpp); 647c478bd9Sstevel@tonic-gate static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp); 657c478bd9Sstevel@tonic-gate static dev_info_t *dpdevi; 667c478bd9Sstevel@tonic-gate 677c478bd9Sstevel@tonic-gate 687c478bd9Sstevel@tonic-gate static struct cb_ops dp_cb_ops = { 697c478bd9Sstevel@tonic-gate dpopen, /* open */ 707c478bd9Sstevel@tonic-gate dpclose, /* close */ 717c478bd9Sstevel@tonic-gate nodev, /* strategy */ 727c478bd9Sstevel@tonic-gate nodev, /* print */ 737c478bd9Sstevel@tonic-gate nodev, /* dump */ 747c478bd9Sstevel@tonic-gate nodev, /* read */ 757c478bd9Sstevel@tonic-gate dpwrite, /* write */ 767c478bd9Sstevel@tonic-gate dpioctl, /* ioctl */ 777c478bd9Sstevel@tonic-gate nodev, /* devmap */ 787c478bd9Sstevel@tonic-gate nodev, /* mmap */ 797c478bd9Sstevel@tonic-gate nodev, /* segmap */ 807c478bd9Sstevel@tonic-gate dppoll, /* poll */ 81a913d554Scth ddi_prop_op, /* prop_op */ 827c478bd9Sstevel@tonic-gate (struct streamtab *)0, /* streamtab */ 83a913d554Scth D_MP, /* flags */ 84a913d554Scth CB_REV, /* cb_ops revision */ 85a913d554Scth nodev, /* aread */ 86a913d554Scth nodev /* awrite */ 877c478bd9Sstevel@tonic-gate }; 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate static int dpattach(dev_info_t *, ddi_attach_cmd_t); 907c478bd9Sstevel@tonic-gate static int dpdetach(dev_info_t *, ddi_detach_cmd_t); 917c478bd9Sstevel@tonic-gate static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate static struct dev_ops dp_ops = { 947c478bd9Sstevel@tonic-gate DEVO_REV, /* devo_rev */ 957c478bd9Sstevel@tonic-gate 0, /* refcnt */ 967c478bd9Sstevel@tonic-gate dpinfo, /* info */ 977c478bd9Sstevel@tonic-gate nulldev, /* identify */ 987c478bd9Sstevel@tonic-gate nulldev, /* probe */ 997c478bd9Sstevel@tonic-gate dpattach, /* attach */ 1007c478bd9Sstevel@tonic-gate dpdetach, /* detach */ 1017c478bd9Sstevel@tonic-gate nodev, /* reset */ 1027c478bd9Sstevel@tonic-gate &dp_cb_ops, /* driver operations */ 1037c478bd9Sstevel@tonic-gate (struct bus_ops *)NULL, /* bus operations */ 1047c478bd9Sstevel@tonic-gate nulldev /* power */ 1057c478bd9Sstevel@tonic-gate }; 1067c478bd9Sstevel@tonic-gate 1077c478bd9Sstevel@tonic-gate 1087c478bd9Sstevel@tonic-gate static struct modldrv modldrv = { 1097c478bd9Sstevel@tonic-gate &mod_driverops, /* type of module - a driver */ 110*a85084caSmeem "/dev/poll driver", 1117c478bd9Sstevel@tonic-gate &dp_ops, 1127c478bd9Sstevel@tonic-gate }; 1137c478bd9Sstevel@tonic-gate 1147c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = { 1157c478bd9Sstevel@tonic-gate MODREV_1, 1167c478bd9Sstevel@tonic-gate (void *)&modldrv, 1177c478bd9Sstevel@tonic-gate NULL 1187c478bd9Sstevel@tonic-gate }; 1197c478bd9Sstevel@tonic-gate 1207c478bd9Sstevel@tonic-gate /* 1217c478bd9Sstevel@tonic-gate * Locking Design 1227c478bd9Sstevel@tonic-gate * 1237c478bd9Sstevel@tonic-gate * The /dev/poll driver shares most of its code with poll sys call whose 1247c478bd9Sstevel@tonic-gate * code is in common/syscall/poll.c. In poll(2) design, the pollcache 1257c478bd9Sstevel@tonic-gate * structure is per lwp. An implicit assumption is made there that some 1267c478bd9Sstevel@tonic-gate * portion of pollcache will never be touched by other lwps. E.g., in 1277c478bd9Sstevel@tonic-gate * poll(2) design, no lwp will ever need to grow bitmap of other lwp. 1287c478bd9Sstevel@tonic-gate * This assumption is not true for /dev/poll; hence the need for extra 1297c478bd9Sstevel@tonic-gate * locking. 1307c478bd9Sstevel@tonic-gate * 131da6c28aaSamw * To allow more parallelism, each /dev/poll file descriptor (indexed by 1327c478bd9Sstevel@tonic-gate * minor number) has its own lock. Since read (dpioctl) is a much more 1337c478bd9Sstevel@tonic-gate * frequent operation than write, we want to allow multiple reads on same 1347c478bd9Sstevel@tonic-gate * /dev/poll fd. However, we prevent writes from being starved by giving 1357c478bd9Sstevel@tonic-gate * priority to write operation. Theoretically writes can starve reads as 136da6c28aaSamw * well. But in practical sense this is not important because (1) writes 1377c478bd9Sstevel@tonic-gate * happens less often than reads, and (2) write operation defines the 1387c478bd9Sstevel@tonic-gate * content of poll fd a cache set. If writes happens so often that they 1397c478bd9Sstevel@tonic-gate * can starve reads, that means the cached set is very unstable. It may 1407c478bd9Sstevel@tonic-gate * not make sense to read an unstable cache set anyway. Therefore, the 1417c478bd9Sstevel@tonic-gate * writers starving readers case is not handled in this design. 1427c478bd9Sstevel@tonic-gate */ 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate int 1457c478bd9Sstevel@tonic-gate _init() 1467c478bd9Sstevel@tonic-gate { 1477c478bd9Sstevel@tonic-gate int error; 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate dptblsize = DEVPOLLSIZE; 1507c478bd9Sstevel@tonic-gate devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 1517c478bd9Sstevel@tonic-gate mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL); 1527c478bd9Sstevel@tonic-gate devpoll_init = 1; 1537c478bd9Sstevel@tonic-gate if ((error = mod_install(&modlinkage)) != 0) { 1547c478bd9Sstevel@tonic-gate mutex_destroy(&devpoll_lock); 1557c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 1567c478bd9Sstevel@tonic-gate devpoll_init = 0; 1577c478bd9Sstevel@tonic-gate } 1587c478bd9Sstevel@tonic-gate return (error); 1597c478bd9Sstevel@tonic-gate } 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate int 1627c478bd9Sstevel@tonic-gate _fini() 1637c478bd9Sstevel@tonic-gate { 1647c478bd9Sstevel@tonic-gate int error; 1657c478bd9Sstevel@tonic-gate 1667c478bd9Sstevel@tonic-gate if ((error = mod_remove(&modlinkage)) != 0) { 1677c478bd9Sstevel@tonic-gate return (error); 1687c478bd9Sstevel@tonic-gate } 1697c478bd9Sstevel@tonic-gate mutex_destroy(&devpoll_lock); 1707c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 1717c478bd9Sstevel@tonic-gate return (0); 1727c478bd9Sstevel@tonic-gate } 1737c478bd9Sstevel@tonic-gate 1747c478bd9Sstevel@tonic-gate int 1757c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 1767c478bd9Sstevel@tonic-gate { 1777c478bd9Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 1787c478bd9Sstevel@tonic-gate } 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1817c478bd9Sstevel@tonic-gate static int 1827c478bd9Sstevel@tonic-gate dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1837c478bd9Sstevel@tonic-gate { 1847c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, NULL) 1857c478bd9Sstevel@tonic-gate == DDI_FAILURE) { 1867c478bd9Sstevel@tonic-gate ddi_remove_minor_node(devi, NULL); 1877c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 1887c478bd9Sstevel@tonic-gate } 1897c478bd9Sstevel@tonic-gate dpdevi = devi; 1907c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 1917c478bd9Sstevel@tonic-gate } 1927c478bd9Sstevel@tonic-gate 1937c478bd9Sstevel@tonic-gate static int 1947c478bd9Sstevel@tonic-gate dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1957c478bd9Sstevel@tonic-gate { 1967c478bd9Sstevel@tonic-gate if (cmd != DDI_DETACH) 1977c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 1987c478bd9Sstevel@tonic-gate 1997c478bd9Sstevel@tonic-gate ddi_remove_minor_node(devi, NULL); 2007c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 2017c478bd9Sstevel@tonic-gate } 2027c478bd9Sstevel@tonic-gate 2037c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2047c478bd9Sstevel@tonic-gate static int 2057c478bd9Sstevel@tonic-gate dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 2067c478bd9Sstevel@tonic-gate { 2077c478bd9Sstevel@tonic-gate int error; 2087c478bd9Sstevel@tonic-gate 2097c478bd9Sstevel@tonic-gate switch (infocmd) { 2107c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 2117c478bd9Sstevel@tonic-gate *result = (void *)dpdevi; 2127c478bd9Sstevel@tonic-gate error = DDI_SUCCESS; 2137c478bd9Sstevel@tonic-gate break; 2147c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 2157c478bd9Sstevel@tonic-gate *result = (void *)0; 2167c478bd9Sstevel@tonic-gate error = DDI_SUCCESS; 2177c478bd9Sstevel@tonic-gate break; 2187c478bd9Sstevel@tonic-gate default: 2197c478bd9Sstevel@tonic-gate error = DDI_FAILURE; 2207c478bd9Sstevel@tonic-gate } 2217c478bd9Sstevel@tonic-gate return (error); 2227c478bd9Sstevel@tonic-gate } 2237c478bd9Sstevel@tonic-gate 2247c478bd9Sstevel@tonic-gate /* 2257c478bd9Sstevel@tonic-gate * dp_pcache_poll has similar logic to pcache_poll() in poll.c. The major 2267c478bd9Sstevel@tonic-gate * differences are: (1) /dev/poll requires scanning the bitmap starting at 2277c478bd9Sstevel@tonic-gate * where it was stopped last time, instead of always starting from 0, 2287c478bd9Sstevel@tonic-gate * (2) since user may not have cleaned up the cached fds when they are 2297c478bd9Sstevel@tonic-gate * closed, some polldats in cache may refer to closed or reused fds. We 2307c478bd9Sstevel@tonic-gate * need to check for those cases. 2317c478bd9Sstevel@tonic-gate * 2327c478bd9Sstevel@tonic-gate * NOTE: Upon closing an fd, automatic poll cache cleanup is done for 2337c478bd9Sstevel@tonic-gate * poll(2) caches but NOT for /dev/poll caches. So expect some 2347c478bd9Sstevel@tonic-gate * stale entries! 2357c478bd9Sstevel@tonic-gate */ 2367c478bd9Sstevel@tonic-gate static int 2377c478bd9Sstevel@tonic-gate dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) 2387c478bd9Sstevel@tonic-gate { 2397c478bd9Sstevel@tonic-gate int start, ostart, end; 2407c478bd9Sstevel@tonic-gate int fdcnt, fd; 2417c478bd9Sstevel@tonic-gate boolean_t done; 2427c478bd9Sstevel@tonic-gate file_t *fp; 2437c478bd9Sstevel@tonic-gate short revent; 2447c478bd9Sstevel@tonic-gate boolean_t no_wrap; 2457c478bd9Sstevel@tonic-gate pollhead_t *php; 2467c478bd9Sstevel@tonic-gate polldat_t *pdp; 2477c478bd9Sstevel@tonic-gate int error = 0; 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pcp->pc_lock)); 2507c478bd9Sstevel@tonic-gate if (pcp->pc_bitmap == NULL) { 2517c478bd9Sstevel@tonic-gate /* 2527c478bd9Sstevel@tonic-gate * No Need to search because no poll fd 2537c478bd9Sstevel@tonic-gate * has been cached. 2547c478bd9Sstevel@tonic-gate */ 2557c478bd9Sstevel@tonic-gate return (error); 2567c478bd9Sstevel@tonic-gate } 2577c478bd9Sstevel@tonic-gate retry: 2587c478bd9Sstevel@tonic-gate start = ostart = pcp->pc_mapstart; 2597c478bd9Sstevel@tonic-gate end = pcp->pc_mapend; 2607c478bd9Sstevel@tonic-gate php = NULL; 2617c478bd9Sstevel@tonic-gate 2627c478bd9Sstevel@tonic-gate if (start == 0) { 2637c478bd9Sstevel@tonic-gate /* 2647c478bd9Sstevel@tonic-gate * started from every begining, no need to wrap around. 2657c478bd9Sstevel@tonic-gate */ 2667c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 2677c478bd9Sstevel@tonic-gate } else { 2687c478bd9Sstevel@tonic-gate no_wrap = B_FALSE; 2697c478bd9Sstevel@tonic-gate } 2707c478bd9Sstevel@tonic-gate done = B_FALSE; 2717c478bd9Sstevel@tonic-gate fdcnt = 0; 2727c478bd9Sstevel@tonic-gate while ((fdcnt < nfds) && !done) { 2737c478bd9Sstevel@tonic-gate php = NULL; 2747c478bd9Sstevel@tonic-gate revent = 0; 2757c478bd9Sstevel@tonic-gate /* 2767c478bd9Sstevel@tonic-gate * Examine the bit map in a circular fashion 2777c478bd9Sstevel@tonic-gate * to avoid starvation. Always resume from 2787c478bd9Sstevel@tonic-gate * last stop. Scan till end of the map. Then 2797c478bd9Sstevel@tonic-gate * wrap around. 2807c478bd9Sstevel@tonic-gate */ 2817c478bd9Sstevel@tonic-gate fd = bt_getlowbit(pcp->pc_bitmap, start, end); 2827c478bd9Sstevel@tonic-gate ASSERT(fd <= end); 2837c478bd9Sstevel@tonic-gate if (fd >= 0) { 2847c478bd9Sstevel@tonic-gate if (fd == end) { 2857c478bd9Sstevel@tonic-gate if (no_wrap) { 2867c478bd9Sstevel@tonic-gate done = B_TRUE; 2877c478bd9Sstevel@tonic-gate } else { 2887c478bd9Sstevel@tonic-gate start = 0; 2897c478bd9Sstevel@tonic-gate end = ostart - 1; 2907c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 2917c478bd9Sstevel@tonic-gate } 2927c478bd9Sstevel@tonic-gate } else { 2937c478bd9Sstevel@tonic-gate start = fd + 1; 2947c478bd9Sstevel@tonic-gate } 2957c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, fd); 296*a85084caSmeem repoll: 2977c478bd9Sstevel@tonic-gate ASSERT(pdp != NULL); 2987c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 2997c478bd9Sstevel@tonic-gate if (pdp->pd_fp == NULL) { 3007c478bd9Sstevel@tonic-gate /* 3017c478bd9Sstevel@tonic-gate * The fd is POLLREMOVed. This fd is 3027c478bd9Sstevel@tonic-gate * logically no longer cached. So move 3037c478bd9Sstevel@tonic-gate * on to the next one. 3047c478bd9Sstevel@tonic-gate */ 3057c478bd9Sstevel@tonic-gate continue; 3067c478bd9Sstevel@tonic-gate } 3077c478bd9Sstevel@tonic-gate if ((fp = getf(fd)) == NULL) { 3087c478bd9Sstevel@tonic-gate /* 3097c478bd9Sstevel@tonic-gate * The fd has been closed, but user has not 3107c478bd9Sstevel@tonic-gate * done a POLLREMOVE on this fd yet. Instead 3117c478bd9Sstevel@tonic-gate * of cleaning it here implicitly, we return 3127c478bd9Sstevel@tonic-gate * POLLNVAL. This is consistent with poll(2) 3137c478bd9Sstevel@tonic-gate * polling a closed fd. Hope this will remind 3147c478bd9Sstevel@tonic-gate * user to do a POLLREMOVE. 3157c478bd9Sstevel@tonic-gate */ 3167c478bd9Sstevel@tonic-gate pfdp[fdcnt].fd = fd; 3177c478bd9Sstevel@tonic-gate pfdp[fdcnt].revents = POLLNVAL; 3187c478bd9Sstevel@tonic-gate fdcnt++; 3197c478bd9Sstevel@tonic-gate continue; 3207c478bd9Sstevel@tonic-gate } 3217c478bd9Sstevel@tonic-gate if (fp != pdp->pd_fp) { 3227c478bd9Sstevel@tonic-gate /* 3237c478bd9Sstevel@tonic-gate * user is polling on a cached fd which was 3247c478bd9Sstevel@tonic-gate * closed and then reused. Unfortunately 3257c478bd9Sstevel@tonic-gate * there is no good way to inform user. 3267c478bd9Sstevel@tonic-gate * If the file struct is also reused, we 3277c478bd9Sstevel@tonic-gate * may not be able to detect the fd reuse 3287c478bd9Sstevel@tonic-gate * at all. As long as this does not 3297c478bd9Sstevel@tonic-gate * cause system failure and/or memory leak, 3307c478bd9Sstevel@tonic-gate * we will play along. Man page states if 3317c478bd9Sstevel@tonic-gate * user does not clean up closed fds, polling 3327c478bd9Sstevel@tonic-gate * results will be indeterministic. 3337c478bd9Sstevel@tonic-gate * 3347c478bd9Sstevel@tonic-gate * XXX - perhaps log the detection of fd 3357c478bd9Sstevel@tonic-gate * reuse? 3367c478bd9Sstevel@tonic-gate */ 3377c478bd9Sstevel@tonic-gate pdp->pd_fp = fp; 3387c478bd9Sstevel@tonic-gate } 3397c478bd9Sstevel@tonic-gate /* 3407c478bd9Sstevel@tonic-gate * XXX - pollrelock() logic needs to know which 3417c478bd9Sstevel@tonic-gate * which pollcache lock to grab. It'd be a 3427c478bd9Sstevel@tonic-gate * cleaner solution if we could pass pcp as 3437c478bd9Sstevel@tonic-gate * an arguement in VOP_POLL interface instead 3447c478bd9Sstevel@tonic-gate * of implicitly passing it using thread_t 3457c478bd9Sstevel@tonic-gate * struct. On the other hand, changing VOP_POLL 3467c478bd9Sstevel@tonic-gate * interface will require all driver/file system 3477c478bd9Sstevel@tonic-gate * poll routine to change. May want to revisit 3487c478bd9Sstevel@tonic-gate * the tradeoff later. 3497c478bd9Sstevel@tonic-gate */ 3507c478bd9Sstevel@tonic-gate curthread->t_pollcache = pcp; 3517c478bd9Sstevel@tonic-gate error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, 352da6c28aaSamw &revent, &php, NULL); 3537c478bd9Sstevel@tonic-gate curthread->t_pollcache = NULL; 3547c478bd9Sstevel@tonic-gate releasef(fd); 3557c478bd9Sstevel@tonic-gate if (error != 0) { 3567c478bd9Sstevel@tonic-gate break; 3577c478bd9Sstevel@tonic-gate } 3587c478bd9Sstevel@tonic-gate /* 3597c478bd9Sstevel@tonic-gate * layered devices (e.g. console driver) 3607c478bd9Sstevel@tonic-gate * may change the vnode and thus the pollhead 3617c478bd9Sstevel@tonic-gate * pointer out from underneath us. 3627c478bd9Sstevel@tonic-gate */ 3637c478bd9Sstevel@tonic-gate if (php != NULL && pdp->pd_php != NULL && 3647c478bd9Sstevel@tonic-gate php != pdp->pd_php) { 3657c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 3667c478bd9Sstevel@tonic-gate pdp->pd_php = php; 3677c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 3687c478bd9Sstevel@tonic-gate /* 3697c478bd9Sstevel@tonic-gate * The bit should still be set. 3707c478bd9Sstevel@tonic-gate */ 3717c478bd9Sstevel@tonic-gate ASSERT(BT_TEST(pcp->pc_bitmap, fd)); 3727c478bd9Sstevel@tonic-gate goto retry; 3737c478bd9Sstevel@tonic-gate } 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate if (revent != 0) { 3767c478bd9Sstevel@tonic-gate pfdp[fdcnt].fd = fd; 3777c478bd9Sstevel@tonic-gate pfdp[fdcnt].events = pdp->pd_events; 3787c478bd9Sstevel@tonic-gate pfdp[fdcnt].revents = revent; 3797c478bd9Sstevel@tonic-gate fdcnt++; 3807c478bd9Sstevel@tonic-gate } else if (php != NULL) { 3817c478bd9Sstevel@tonic-gate /* 3827c478bd9Sstevel@tonic-gate * We clear a bit or cache a poll fd if 3837c478bd9Sstevel@tonic-gate * the driver returns a poll head ptr, 3847c478bd9Sstevel@tonic-gate * which is expected in the case of 0 3857c478bd9Sstevel@tonic-gate * revents. Some buggy driver may return 3867c478bd9Sstevel@tonic-gate * NULL php pointer with 0 revents. In 3877c478bd9Sstevel@tonic-gate * this case, we just treat the driver as 3887c478bd9Sstevel@tonic-gate * "noncachable" and not clearing the bit 3897c478bd9Sstevel@tonic-gate * in bitmap. 3907c478bd9Sstevel@tonic-gate */ 3917c478bd9Sstevel@tonic-gate if ((pdp->pd_php != NULL) && 3927c478bd9Sstevel@tonic-gate ((pcp->pc_flag & T_POLLWAKE) == 0)) { 3937c478bd9Sstevel@tonic-gate BT_CLEAR(pcp->pc_bitmap, fd); 3947c478bd9Sstevel@tonic-gate } 3957c478bd9Sstevel@tonic-gate if (pdp->pd_php == NULL) { 3967c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 3977c478bd9Sstevel@tonic-gate pdp->pd_php = php; 398*a85084caSmeem /* 399*a85084caSmeem * An event of interest may have 400*a85084caSmeem * arrived between the VOP_POLL() and 401*a85084caSmeem * the pollhead_insert(); check again. 402*a85084caSmeem */ 403*a85084caSmeem goto repoll; 4047c478bd9Sstevel@tonic-gate } 4057c478bd9Sstevel@tonic-gate } 4067c478bd9Sstevel@tonic-gate } else { 4077c478bd9Sstevel@tonic-gate /* 4087c478bd9Sstevel@tonic-gate * No bit set in the range. Check for wrap around. 4097c478bd9Sstevel@tonic-gate */ 4107c478bd9Sstevel@tonic-gate if (!no_wrap) { 4117c478bd9Sstevel@tonic-gate start = 0; 4127c478bd9Sstevel@tonic-gate end = ostart - 1; 4137c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 4147c478bd9Sstevel@tonic-gate } else { 4157c478bd9Sstevel@tonic-gate done = B_TRUE; 4167c478bd9Sstevel@tonic-gate } 4177c478bd9Sstevel@tonic-gate } 4187c478bd9Sstevel@tonic-gate } 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate if (!done) { 4217c478bd9Sstevel@tonic-gate pcp->pc_mapstart = start; 4227c478bd9Sstevel@tonic-gate } 4237c478bd9Sstevel@tonic-gate ASSERT(*fdcntp == 0); 4247c478bd9Sstevel@tonic-gate *fdcntp = fdcnt; 4257c478bd9Sstevel@tonic-gate return (error); 4267c478bd9Sstevel@tonic-gate } 4277c478bd9Sstevel@tonic-gate 4287c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4297c478bd9Sstevel@tonic-gate static int 4307c478bd9Sstevel@tonic-gate dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) 4317c478bd9Sstevel@tonic-gate { 4327c478bd9Sstevel@tonic-gate minor_t minordev; 4337c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 4347c478bd9Sstevel@tonic-gate pollcache_t *pcp; 4357c478bd9Sstevel@tonic-gate 4367c478bd9Sstevel@tonic-gate ASSERT(devpoll_init); 4377c478bd9Sstevel@tonic-gate ASSERT(dptblsize <= MAXMIN); 4387c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 4397c478bd9Sstevel@tonic-gate for (minordev = 0; minordev < dptblsize; minordev++) { 4407c478bd9Sstevel@tonic-gate if (devpolltbl[minordev] == NULL) { 4417c478bd9Sstevel@tonic-gate devpolltbl[minordev] = (dp_entry_t *)RESERVED; 4427c478bd9Sstevel@tonic-gate break; 4437c478bd9Sstevel@tonic-gate } 4447c478bd9Sstevel@tonic-gate } 4457c478bd9Sstevel@tonic-gate if (minordev == dptblsize) { 4467c478bd9Sstevel@tonic-gate dp_entry_t **newtbl; 4477c478bd9Sstevel@tonic-gate size_t oldsize; 4487c478bd9Sstevel@tonic-gate 4497c478bd9Sstevel@tonic-gate /* 4507c478bd9Sstevel@tonic-gate * Used up every entry in the existing devpoll table. 4517c478bd9Sstevel@tonic-gate * Grow the table by DEVPOLLSIZE. 4527c478bd9Sstevel@tonic-gate */ 4537c478bd9Sstevel@tonic-gate if ((oldsize = dptblsize) >= MAXMIN) { 4547c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 4557c478bd9Sstevel@tonic-gate return (ENXIO); 4567c478bd9Sstevel@tonic-gate } 4577c478bd9Sstevel@tonic-gate dptblsize += DEVPOLLSIZE; 4587c478bd9Sstevel@tonic-gate if (dptblsize > MAXMIN) { 4597c478bd9Sstevel@tonic-gate dptblsize = MAXMIN; 4607c478bd9Sstevel@tonic-gate } 4617c478bd9Sstevel@tonic-gate newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 4627c478bd9Sstevel@tonic-gate bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize); 4637c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * oldsize); 4647c478bd9Sstevel@tonic-gate devpolltbl = newtbl; 4657c478bd9Sstevel@tonic-gate devpolltbl[minordev] = (dp_entry_t *)RESERVED; 4667c478bd9Sstevel@tonic-gate } 4677c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 4687c478bd9Sstevel@tonic-gate 4697c478bd9Sstevel@tonic-gate dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP); 4707c478bd9Sstevel@tonic-gate /* 4717c478bd9Sstevel@tonic-gate * allocate a pollcache skeleton here. Delay allocating bitmap 4727c478bd9Sstevel@tonic-gate * structures until dpwrite() time, since we don't know the 4737c478bd9Sstevel@tonic-gate * optimal size yet. 4747c478bd9Sstevel@tonic-gate */ 4757c478bd9Sstevel@tonic-gate pcp = pcache_alloc(); 4767c478bd9Sstevel@tonic-gate dpep->dpe_pcache = pcp; 4777c478bd9Sstevel@tonic-gate pcp->pc_pid = curproc->p_pid; 4787c478bd9Sstevel@tonic-gate *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ 4797c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 4807c478bd9Sstevel@tonic-gate ASSERT(minordev < dptblsize); 4817c478bd9Sstevel@tonic-gate ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED); 4827c478bd9Sstevel@tonic-gate devpolltbl[minordev] = dpep; 4837c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 4847c478bd9Sstevel@tonic-gate return (0); 4857c478bd9Sstevel@tonic-gate } 4867c478bd9Sstevel@tonic-gate 4877c478bd9Sstevel@tonic-gate /* 4887c478bd9Sstevel@tonic-gate * Write to dev/poll add/remove fd's to/from a cached poll fd set, 4897c478bd9Sstevel@tonic-gate * or change poll events for a watched fd. 4907c478bd9Sstevel@tonic-gate */ 4917c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4927c478bd9Sstevel@tonic-gate static int 4937c478bd9Sstevel@tonic-gate dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) 4947c478bd9Sstevel@tonic-gate { 4957c478bd9Sstevel@tonic-gate minor_t minor; 4967c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 4977c478bd9Sstevel@tonic-gate pollcache_t *pcp; 4987c478bd9Sstevel@tonic-gate pollfd_t *pollfdp, *pfdp; 4997c478bd9Sstevel@tonic-gate int error; 5007c478bd9Sstevel@tonic-gate ssize_t uiosize; 5017c478bd9Sstevel@tonic-gate nfds_t pollfdnum; 5027c478bd9Sstevel@tonic-gate struct pollhead *php = NULL; 5037c478bd9Sstevel@tonic-gate polldat_t *pdp; 5047c478bd9Sstevel@tonic-gate int fd; 5057c478bd9Sstevel@tonic-gate file_t *fp; 5067c478bd9Sstevel@tonic-gate 5077c478bd9Sstevel@tonic-gate minor = getminor(dev); 5087c478bd9Sstevel@tonic-gate 5097c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 5107c478bd9Sstevel@tonic-gate ASSERT(minor < dptblsize); 5117c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 5127c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 5137c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 5147c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 5157c478bd9Sstevel@tonic-gate if (curproc->p_pid != pcp->pc_pid) { 5167c478bd9Sstevel@tonic-gate return (EACCES); 5177c478bd9Sstevel@tonic-gate } 5187c478bd9Sstevel@tonic-gate uiosize = uiop->uio_resid; 5197c478bd9Sstevel@tonic-gate pollfdnum = uiosize / sizeof (pollfd_t); 5207c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 5217c478bd9Sstevel@tonic-gate if (pollfdnum > (uint_t)rctl_enforced_value( 5227c478bd9Sstevel@tonic-gate rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { 5237c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 5247c478bd9Sstevel@tonic-gate curproc->p_rctls, curproc, RCA_SAFE); 5257c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 5267c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 5277c478bd9Sstevel@tonic-gate } 5287c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 5297c478bd9Sstevel@tonic-gate /* 5307c478bd9Sstevel@tonic-gate * Copy in the pollfd array. Walk through the array and add 5317c478bd9Sstevel@tonic-gate * each polled fd to the cached set. 5327c478bd9Sstevel@tonic-gate */ 5337c478bd9Sstevel@tonic-gate pollfdp = kmem_alloc(uiosize, KM_SLEEP); 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate /* 5367c478bd9Sstevel@tonic-gate * Although /dev/poll uses the write(2) interface to cache fds, it's 5377c478bd9Sstevel@tonic-gate * not supposed to function as a seekable device. To prevent offset 5387c478bd9Sstevel@tonic-gate * from growing and eventually exceed the maximum, reset the offset 5397c478bd9Sstevel@tonic-gate * here for every call. 5407c478bd9Sstevel@tonic-gate */ 5417c478bd9Sstevel@tonic-gate uiop->uio_loffset = 0; 5427c478bd9Sstevel@tonic-gate if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop)) 5437c478bd9Sstevel@tonic-gate != 0) { 5447c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 5457c478bd9Sstevel@tonic-gate return (error); 5467c478bd9Sstevel@tonic-gate } 5477c478bd9Sstevel@tonic-gate /* 5487c478bd9Sstevel@tonic-gate * We are about to enter the core portion of dpwrite(). Make sure this 5497c478bd9Sstevel@tonic-gate * write has exclusive access in this portion of the code, i.e., no 5507c478bd9Sstevel@tonic-gate * other writers in this code and no other readers in dpioctl. 5517c478bd9Sstevel@tonic-gate */ 5527c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 5537c478bd9Sstevel@tonic-gate dpep->dpe_writerwait++; 5547c478bd9Sstevel@tonic-gate while (dpep->dpe_refcnt != 0) { 5557c478bd9Sstevel@tonic-gate if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 5567c478bd9Sstevel@tonic-gate dpep->dpe_writerwait--; 5577c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 5587c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 5597c478bd9Sstevel@tonic-gate return (set_errno(EINTR)); 5607c478bd9Sstevel@tonic-gate } 5617c478bd9Sstevel@tonic-gate } 5627c478bd9Sstevel@tonic-gate dpep->dpe_writerwait--; 5637c478bd9Sstevel@tonic-gate dpep->dpe_flag |= DP_WRITER_PRESENT; 5647c478bd9Sstevel@tonic-gate dpep->dpe_refcnt++; 5657c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 5687c478bd9Sstevel@tonic-gate if (pcp->pc_bitmap == NULL) { 5697c478bd9Sstevel@tonic-gate pcache_create(pcp, pollfdnum); 5707c478bd9Sstevel@tonic-gate } 5717c478bd9Sstevel@tonic-gate for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) { 5727c478bd9Sstevel@tonic-gate fd = pfdp->fd; 5737c478bd9Sstevel@tonic-gate if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) 5747c478bd9Sstevel@tonic-gate continue; 5757c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, fd); 5767c478bd9Sstevel@tonic-gate if (pfdp->events != POLLREMOVE) { 5777c478bd9Sstevel@tonic-gate if (pdp == NULL) { 5787c478bd9Sstevel@tonic-gate pdp = pcache_alloc_fd(0); 5797c478bd9Sstevel@tonic-gate pdp->pd_fd = fd; 5807c478bd9Sstevel@tonic-gate pdp->pd_pcache = pcp; 5817c478bd9Sstevel@tonic-gate pcache_insert_fd(pcp, pdp, pollfdnum); 5827c478bd9Sstevel@tonic-gate } 5837c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 5847c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_pcache == pcp); 5857c478bd9Sstevel@tonic-gate if (fd >= pcp->pc_mapsize) { 5867c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 5877c478bd9Sstevel@tonic-gate pcache_grow_map(pcp, fd); 5887c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 5897c478bd9Sstevel@tonic-gate } 5907c478bd9Sstevel@tonic-gate if (fd > pcp->pc_mapend) { 5917c478bd9Sstevel@tonic-gate pcp->pc_mapend = fd; 5927c478bd9Sstevel@tonic-gate } 5937c478bd9Sstevel@tonic-gate if ((fp = getf(fd)) == NULL) { 5947c478bd9Sstevel@tonic-gate /* 5957c478bd9Sstevel@tonic-gate * The fd is not valid. Since we can't pass 5967c478bd9Sstevel@tonic-gate * this error back in the write() call, set 5977c478bd9Sstevel@tonic-gate * the bit in bitmap to force DP_POLL ioctl 5987c478bd9Sstevel@tonic-gate * to examine it. 5997c478bd9Sstevel@tonic-gate */ 6007c478bd9Sstevel@tonic-gate BT_SET(pcp->pc_bitmap, fd); 6017c478bd9Sstevel@tonic-gate pdp->pd_events |= pfdp->events; 6027c478bd9Sstevel@tonic-gate continue; 6037c478bd9Sstevel@tonic-gate } 6047c478bd9Sstevel@tonic-gate /* 6057c478bd9Sstevel@tonic-gate * Don't do VOP_POLL for an already cached fd with 6067c478bd9Sstevel@tonic-gate * same poll events. 6077c478bd9Sstevel@tonic-gate */ 6087c478bd9Sstevel@tonic-gate if ((pdp->pd_events == pfdp->events) && 6097c478bd9Sstevel@tonic-gate (pdp->pd_fp != NULL)) { 6107c478bd9Sstevel@tonic-gate /* 6117c478bd9Sstevel@tonic-gate * the events are already cached 6127c478bd9Sstevel@tonic-gate */ 6137c478bd9Sstevel@tonic-gate releasef(fd); 6147c478bd9Sstevel@tonic-gate continue; 6157c478bd9Sstevel@tonic-gate } 6167c478bd9Sstevel@tonic-gate 6177c478bd9Sstevel@tonic-gate /* 6187c478bd9Sstevel@tonic-gate * do VOP_POLL and cache this poll fd. 6197c478bd9Sstevel@tonic-gate */ 6207c478bd9Sstevel@tonic-gate /* 6217c478bd9Sstevel@tonic-gate * XXX - pollrelock() logic needs to know which 6227c478bd9Sstevel@tonic-gate * which pollcache lock to grab. It'd be a 6237c478bd9Sstevel@tonic-gate * cleaner solution if we could pass pcp as 6247c478bd9Sstevel@tonic-gate * an arguement in VOP_POLL interface instead 6257c478bd9Sstevel@tonic-gate * of implicitly passing it using thread_t 6267c478bd9Sstevel@tonic-gate * struct. On the other hand, changing VOP_POLL 6277c478bd9Sstevel@tonic-gate * interface will require all driver/file system 6287c478bd9Sstevel@tonic-gate * poll routine to change. May want to revisit 6297c478bd9Sstevel@tonic-gate * the tradeoff later. 6307c478bd9Sstevel@tonic-gate */ 6317c478bd9Sstevel@tonic-gate curthread->t_pollcache = pcp; 6327c478bd9Sstevel@tonic-gate error = VOP_POLL(fp->f_vnode, pfdp->events, 0, 633da6c28aaSamw &pfdp->revents, &php, NULL); 6347c478bd9Sstevel@tonic-gate curthread->t_pollcache = NULL; 6357c478bd9Sstevel@tonic-gate /* 636*a85084caSmeem * We always set the bit when this fd is cached; 637*a85084caSmeem * this forces the first DP_POLL to poll this fd. 6387c478bd9Sstevel@tonic-gate * Real performance gain comes from subsequent 639*a85084caSmeem * DP_POLL. We also attempt a pollhead_insert(); 640*a85084caSmeem * if it's not possible, we'll do it in dpioctl(). 6417c478bd9Sstevel@tonic-gate */ 6427c478bd9Sstevel@tonic-gate BT_SET(pcp->pc_bitmap, fd); 6437c478bd9Sstevel@tonic-gate if (error != 0) { 6447c478bd9Sstevel@tonic-gate releasef(fd); 6457c478bd9Sstevel@tonic-gate break; 6467c478bd9Sstevel@tonic-gate } 6477c478bd9Sstevel@tonic-gate pdp->pd_fp = fp; 6487c478bd9Sstevel@tonic-gate pdp->pd_events |= pfdp->events; 6497c478bd9Sstevel@tonic-gate if (php != NULL) { 6507c478bd9Sstevel@tonic-gate if (pdp->pd_php == NULL) { 6517c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 6527c478bd9Sstevel@tonic-gate pdp->pd_php = php; 6537c478bd9Sstevel@tonic-gate } else { 6547c478bd9Sstevel@tonic-gate if (pdp->pd_php != php) { 6557c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, 6567c478bd9Sstevel@tonic-gate pdp); 6577c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 6587c478bd9Sstevel@tonic-gate pdp->pd_php = php; 6597c478bd9Sstevel@tonic-gate } 6607c478bd9Sstevel@tonic-gate } 6617c478bd9Sstevel@tonic-gate 6627c478bd9Sstevel@tonic-gate } 6637c478bd9Sstevel@tonic-gate releasef(fd); 6647c478bd9Sstevel@tonic-gate } else { 6657c478bd9Sstevel@tonic-gate if (pdp == NULL) { 6667c478bd9Sstevel@tonic-gate continue; 6677c478bd9Sstevel@tonic-gate } 6687c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 6697c478bd9Sstevel@tonic-gate pdp->pd_fp = NULL; 6707c478bd9Sstevel@tonic-gate pdp->pd_events = 0; 6717c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_thread == NULL); 6727c478bd9Sstevel@tonic-gate if (pdp->pd_php != NULL) { 6737c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 6747c478bd9Sstevel@tonic-gate pdp->pd_php = NULL; 6757c478bd9Sstevel@tonic-gate } 6767c478bd9Sstevel@tonic-gate BT_CLEAR(pcp->pc_bitmap, fd); 6777c478bd9Sstevel@tonic-gate } 6787c478bd9Sstevel@tonic-gate } 6797c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 6807c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 6817c478bd9Sstevel@tonic-gate dpep->dpe_flag &= ~DP_WRITER_PRESENT; 6827c478bd9Sstevel@tonic-gate ASSERT(dpep->dpe_refcnt == 1); 6837c478bd9Sstevel@tonic-gate dpep->dpe_refcnt--; 6847c478bd9Sstevel@tonic-gate cv_broadcast(&dpep->dpe_cv); 6857c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 6867c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 6877c478bd9Sstevel@tonic-gate return (error); 6887c478bd9Sstevel@tonic-gate } 6897c478bd9Sstevel@tonic-gate 6907c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 6917c478bd9Sstevel@tonic-gate static int 6927c478bd9Sstevel@tonic-gate dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 6937c478bd9Sstevel@tonic-gate { 6947c478bd9Sstevel@tonic-gate timestruc_t now; 6957c478bd9Sstevel@tonic-gate timestruc_t rqtime; 6967c478bd9Sstevel@tonic-gate timestruc_t *rqtp = NULL; 6973348528fSdm120769 int timecheck = 0; 6987c478bd9Sstevel@tonic-gate minor_t minor; 6997c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 7007c478bd9Sstevel@tonic-gate pollcache_t *pcp; 7017c478bd9Sstevel@tonic-gate int error = 0; 7027c478bd9Sstevel@tonic-gate STRUCT_DECL(dvpoll, dvpoll); 7037c478bd9Sstevel@tonic-gate 7047c478bd9Sstevel@tonic-gate if (cmd == DP_POLL) { 7057c478bd9Sstevel@tonic-gate /* do this now, before we sleep on DP_WRITER_PRESENT below */ 7063348528fSdm120769 timecheck = timechanged; 7077c478bd9Sstevel@tonic-gate gethrestime(&now); 7087c478bd9Sstevel@tonic-gate } 7097c478bd9Sstevel@tonic-gate minor = getminor(dev); 7107c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 7117c478bd9Sstevel@tonic-gate ASSERT(minor < dptblsize); 7127c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 7137c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 7147c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 7157c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 7167c478bd9Sstevel@tonic-gate if (curproc->p_pid != pcp->pc_pid) 7177c478bd9Sstevel@tonic-gate return (EACCES); 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 7207c478bd9Sstevel@tonic-gate while ((dpep->dpe_flag & DP_WRITER_PRESENT) || 7217c478bd9Sstevel@tonic-gate (dpep->dpe_writerwait != 0)) { 7227c478bd9Sstevel@tonic-gate if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 7237c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 7247c478bd9Sstevel@tonic-gate return (EINTR); 7257c478bd9Sstevel@tonic-gate } 7267c478bd9Sstevel@tonic-gate } 7277c478bd9Sstevel@tonic-gate dpep->dpe_refcnt++; 7287c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate switch (cmd) { 7317c478bd9Sstevel@tonic-gate case DP_POLL: 7327c478bd9Sstevel@tonic-gate { 7337c478bd9Sstevel@tonic-gate pollstate_t *ps; 7347c478bd9Sstevel@tonic-gate nfds_t nfds; 7357c478bd9Sstevel@tonic-gate int fdcnt = 0; 7367c478bd9Sstevel@tonic-gate int time_out; 7377c478bd9Sstevel@tonic-gate int rval; 7387c478bd9Sstevel@tonic-gate 7397c478bd9Sstevel@tonic-gate STRUCT_INIT(dvpoll, mode); 7407c478bd9Sstevel@tonic-gate error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), 7417c478bd9Sstevel@tonic-gate STRUCT_SIZE(dvpoll)); 7427c478bd9Sstevel@tonic-gate if (error) { 7437c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 7447c478bd9Sstevel@tonic-gate return (EFAULT); 7457c478bd9Sstevel@tonic-gate } 7467c478bd9Sstevel@tonic-gate 7477c478bd9Sstevel@tonic-gate time_out = STRUCT_FGET(dvpoll, dp_timeout); 7487c478bd9Sstevel@tonic-gate if (time_out > 0) { 7497c478bd9Sstevel@tonic-gate /* 7507c478bd9Sstevel@tonic-gate * Determine the future time of the requested timeout. 7517c478bd9Sstevel@tonic-gate */ 7527c478bd9Sstevel@tonic-gate rqtp = &rqtime; 7537c478bd9Sstevel@tonic-gate rqtp->tv_sec = time_out / MILLISEC; 7547c478bd9Sstevel@tonic-gate rqtp->tv_nsec = (time_out % MILLISEC) * MICROSEC; 7557c478bd9Sstevel@tonic-gate timespecadd(rqtp, &now); 7567c478bd9Sstevel@tonic-gate } 7577c478bd9Sstevel@tonic-gate 7587c478bd9Sstevel@tonic-gate if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { 7597c478bd9Sstevel@tonic-gate /* 7607c478bd9Sstevel@tonic-gate * We are just using DP_POLL to sleep, so 7617c478bd9Sstevel@tonic-gate * we don't any of the devpoll apparatus. 7627c478bd9Sstevel@tonic-gate * Do not check for signals if we have a zero timeout. 7637c478bd9Sstevel@tonic-gate */ 7647c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 7657c478bd9Sstevel@tonic-gate if (time_out == 0) 7667c478bd9Sstevel@tonic-gate return (0); 7677c478bd9Sstevel@tonic-gate mutex_enter(&curthread->t_delay_lock); 7687c478bd9Sstevel@tonic-gate while ((rval = cv_waituntil_sig(&curthread->t_delay_cv, 7693348528fSdm120769 &curthread->t_delay_lock, rqtp, timecheck)) > 0) 7707c478bd9Sstevel@tonic-gate continue; 7717c478bd9Sstevel@tonic-gate mutex_exit(&curthread->t_delay_lock); 7727c478bd9Sstevel@tonic-gate return ((rval == 0)? EINTR : 0); 7737c478bd9Sstevel@tonic-gate } 7747c478bd9Sstevel@tonic-gate 7757c478bd9Sstevel@tonic-gate /* 7767c478bd9Sstevel@tonic-gate * XXX It'd be nice not to have to alloc each time. 7777c478bd9Sstevel@tonic-gate * But it requires another per thread structure hook. 7787c478bd9Sstevel@tonic-gate * Do it later if there is data suggest that. 7797c478bd9Sstevel@tonic-gate */ 7807c478bd9Sstevel@tonic-gate if ((ps = curthread->t_pollstate) == NULL) { 7817c478bd9Sstevel@tonic-gate curthread->t_pollstate = pollstate_create(); 7827c478bd9Sstevel@tonic-gate ps = curthread->t_pollstate; 7837c478bd9Sstevel@tonic-gate } 7847c478bd9Sstevel@tonic-gate if (ps->ps_dpbufsize < nfds) { 7857c478bd9Sstevel@tonic-gate struct proc *p = ttoproc(curthread); 7867c478bd9Sstevel@tonic-gate /* 7877c478bd9Sstevel@tonic-gate * The maximum size should be no large than 7887c478bd9Sstevel@tonic-gate * current maximum open file count. 7897c478bd9Sstevel@tonic-gate */ 7907c478bd9Sstevel@tonic-gate mutex_enter(&p->p_lock); 7915f684e24Ssp92102 if (nfds > p->p_fno_ctl) { 7927c478bd9Sstevel@tonic-gate mutex_exit(&p->p_lock); 7937c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 7947c478bd9Sstevel@tonic-gate return (EINVAL); 7957c478bd9Sstevel@tonic-gate } 7967c478bd9Sstevel@tonic-gate mutex_exit(&p->p_lock); 7977c478bd9Sstevel@tonic-gate kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) * 7987c478bd9Sstevel@tonic-gate ps->ps_dpbufsize); 7997c478bd9Sstevel@tonic-gate ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) * 8007c478bd9Sstevel@tonic-gate nfds, KM_SLEEP); 8017c478bd9Sstevel@tonic-gate ps->ps_dpbufsize = nfds; 8027c478bd9Sstevel@tonic-gate } 8037c478bd9Sstevel@tonic-gate 8047c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 8057c478bd9Sstevel@tonic-gate for (;;) { 8067c478bd9Sstevel@tonic-gate pcp->pc_flag = 0; 8077c478bd9Sstevel@tonic-gate error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt); 8087c478bd9Sstevel@tonic-gate if (fdcnt > 0 || error != 0) 8097c478bd9Sstevel@tonic-gate break; 8107c478bd9Sstevel@tonic-gate 8117c478bd9Sstevel@tonic-gate /* 8127c478bd9Sstevel@tonic-gate * A pollwake has happened since we polled cache. 8137c478bd9Sstevel@tonic-gate */ 8147c478bd9Sstevel@tonic-gate if (pcp->pc_flag & T_POLLWAKE) 8157c478bd9Sstevel@tonic-gate continue; 8167c478bd9Sstevel@tonic-gate 8177c478bd9Sstevel@tonic-gate /* 818da6c28aaSamw * Sleep until we are notified, signaled, or timed out. 8197c478bd9Sstevel@tonic-gate * Do not check for signals if we have a zero timeout. 8207c478bd9Sstevel@tonic-gate */ 8217c478bd9Sstevel@tonic-gate if (time_out == 0) /* immediate timeout */ 8227c478bd9Sstevel@tonic-gate break; 8237c478bd9Sstevel@tonic-gate rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock, 8243348528fSdm120769 rqtp, timecheck); 8257c478bd9Sstevel@tonic-gate /* 8267c478bd9Sstevel@tonic-gate * If we were awakened by a signal or timeout 8277c478bd9Sstevel@tonic-gate * then break the loop, else poll again. 8287c478bd9Sstevel@tonic-gate */ 8297c478bd9Sstevel@tonic-gate if (rval <= 0) { 8307c478bd9Sstevel@tonic-gate if (rval == 0) /* signal */ 8317c478bd9Sstevel@tonic-gate error = EINTR; 8327c478bd9Sstevel@tonic-gate break; 8337c478bd9Sstevel@tonic-gate } 8347c478bd9Sstevel@tonic-gate } 8357c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8367c478bd9Sstevel@tonic-gate 8377c478bd9Sstevel@tonic-gate if (error == 0 && fdcnt > 0) { 8387c478bd9Sstevel@tonic-gate if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll, 8397c478bd9Sstevel@tonic-gate dp_fds), sizeof (pollfd_t) * fdcnt)) { 8407c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 8417c478bd9Sstevel@tonic-gate return (EFAULT); 8427c478bd9Sstevel@tonic-gate } 8437c478bd9Sstevel@tonic-gate *rvalp = fdcnt; 8447c478bd9Sstevel@tonic-gate } 8457c478bd9Sstevel@tonic-gate break; 8467c478bd9Sstevel@tonic-gate } 8477c478bd9Sstevel@tonic-gate 8487c478bd9Sstevel@tonic-gate case DP_ISPOLLED: 8497c478bd9Sstevel@tonic-gate { 8507c478bd9Sstevel@tonic-gate pollfd_t pollfd; 8517c478bd9Sstevel@tonic-gate polldat_t *pdp; 8527c478bd9Sstevel@tonic-gate 8537c478bd9Sstevel@tonic-gate STRUCT_INIT(dvpoll, mode); 8547c478bd9Sstevel@tonic-gate error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t)); 8557c478bd9Sstevel@tonic-gate if (error) { 8567c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 8577c478bd9Sstevel@tonic-gate return (EFAULT); 8587c478bd9Sstevel@tonic-gate } 8597c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 8607c478bd9Sstevel@tonic-gate if (pcp->pc_hash == NULL) { 8617c478bd9Sstevel@tonic-gate /* 8627c478bd9Sstevel@tonic-gate * No Need to search because no poll fd 8637c478bd9Sstevel@tonic-gate * has been cached. 8647c478bd9Sstevel@tonic-gate */ 8657c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8667c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 8677c478bd9Sstevel@tonic-gate return (0); 8687c478bd9Sstevel@tonic-gate } 8697c478bd9Sstevel@tonic-gate if (pollfd.fd < 0) { 8707c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8717c478bd9Sstevel@tonic-gate break; 8727c478bd9Sstevel@tonic-gate } 8737c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, pollfd.fd); 8747c478bd9Sstevel@tonic-gate if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) && 8757c478bd9Sstevel@tonic-gate (pdp->pd_fp != NULL)) { 8767c478bd9Sstevel@tonic-gate pollfd.revents = pdp->pd_events; 8777c478bd9Sstevel@tonic-gate if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) { 8787c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8797c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 8807c478bd9Sstevel@tonic-gate return (EFAULT); 8817c478bd9Sstevel@tonic-gate } 8827c478bd9Sstevel@tonic-gate *rvalp = 1; 8837c478bd9Sstevel@tonic-gate } 8847c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8857c478bd9Sstevel@tonic-gate break; 8867c478bd9Sstevel@tonic-gate } 8877c478bd9Sstevel@tonic-gate 8887c478bd9Sstevel@tonic-gate default: 8897c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 8907c478bd9Sstevel@tonic-gate return (EINVAL); 8917c478bd9Sstevel@tonic-gate } 8927c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 8937c478bd9Sstevel@tonic-gate return (error); 8947c478bd9Sstevel@tonic-gate } 8957c478bd9Sstevel@tonic-gate 8967c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8977c478bd9Sstevel@tonic-gate static int 8987c478bd9Sstevel@tonic-gate dppoll(dev_t dev, short events, int anyyet, short *reventsp, 8997c478bd9Sstevel@tonic-gate struct pollhead **phpp) 9007c478bd9Sstevel@tonic-gate { 9017c478bd9Sstevel@tonic-gate /* 9027c478bd9Sstevel@tonic-gate * Polling on a /dev/poll fd is not fully supported yet. 9037c478bd9Sstevel@tonic-gate */ 9047c478bd9Sstevel@tonic-gate *reventsp = POLLERR; 9057c478bd9Sstevel@tonic-gate return (0); 9067c478bd9Sstevel@tonic-gate } 9077c478bd9Sstevel@tonic-gate 9087c478bd9Sstevel@tonic-gate /* 9097c478bd9Sstevel@tonic-gate * devpoll close should do enough clean up before the pollcache is deleted, 9107c478bd9Sstevel@tonic-gate * i.e., it should ensure no one still references the pollcache later. 9117c478bd9Sstevel@tonic-gate * There is no "permission" check in here. Any process having the last 9127c478bd9Sstevel@tonic-gate * reference of this /dev/poll fd can close. 9137c478bd9Sstevel@tonic-gate */ 9147c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 9157c478bd9Sstevel@tonic-gate static int 9167c478bd9Sstevel@tonic-gate dpclose(dev_t dev, int flag, int otyp, cred_t *credp) 9177c478bd9Sstevel@tonic-gate { 9187c478bd9Sstevel@tonic-gate minor_t minor; 9197c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 9207c478bd9Sstevel@tonic-gate pollcache_t *pcp; 9217c478bd9Sstevel@tonic-gate int i; 9227c478bd9Sstevel@tonic-gate polldat_t **hashtbl; 9237c478bd9Sstevel@tonic-gate polldat_t *pdp; 9247c478bd9Sstevel@tonic-gate 9257c478bd9Sstevel@tonic-gate minor = getminor(dev); 9267c478bd9Sstevel@tonic-gate 9277c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 9287c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 9297c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 9307c478bd9Sstevel@tonic-gate devpolltbl[minor] = NULL; 9317c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 9327c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 9337c478bd9Sstevel@tonic-gate ASSERT(pcp != NULL); 9347c478bd9Sstevel@tonic-gate /* 9357c478bd9Sstevel@tonic-gate * At this point, no other lwp can access this pollcache via the 9367c478bd9Sstevel@tonic-gate * /dev/poll fd. This pollcache is going away, so do the clean 9377c478bd9Sstevel@tonic-gate * up without the pc_lock. 9387c478bd9Sstevel@tonic-gate */ 9397c478bd9Sstevel@tonic-gate hashtbl = pcp->pc_hash; 9407c478bd9Sstevel@tonic-gate for (i = 0; i < pcp->pc_hashsize; i++) { 9417c478bd9Sstevel@tonic-gate for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 9427c478bd9Sstevel@tonic-gate if (pdp->pd_php != NULL) { 9437c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 9447c478bd9Sstevel@tonic-gate pdp->pd_php = NULL; 9457c478bd9Sstevel@tonic-gate pdp->pd_fp = NULL; 9467c478bd9Sstevel@tonic-gate } 9477c478bd9Sstevel@tonic-gate } 9487c478bd9Sstevel@tonic-gate } 9497c478bd9Sstevel@tonic-gate /* 9507c478bd9Sstevel@tonic-gate * pollwakeup() may still interact with this pollcache. Wait until 9517c478bd9Sstevel@tonic-gate * it is done. 9527c478bd9Sstevel@tonic-gate */ 9537c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_no_exit); 9547c478bd9Sstevel@tonic-gate ASSERT(pcp->pc_busy >= 0); 9557c478bd9Sstevel@tonic-gate while (pcp->pc_busy > 0) 9567c478bd9Sstevel@tonic-gate cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 9577c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_no_exit); 9587c478bd9Sstevel@tonic-gate pcache_destroy(pcp); 9597c478bd9Sstevel@tonic-gate ASSERT(dpep->dpe_refcnt == 0); 9607c478bd9Sstevel@tonic-gate kmem_free(dpep, sizeof (dp_entry_t)); 9617c478bd9Sstevel@tonic-gate return (0); 9627c478bd9Sstevel@tonic-gate } 963