17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 55f684e24Ssp92102 * Common Development and Distribution License (the "License"). 65f684e24Ssp92102 * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22a85084caSmeem * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 26cd1c8b85SMatthew Ahrens /* 27cd1c8b85SMatthew Ahrens * Copyright (c) 2012 by Delphix. All rights reserved. 28*bf75909aSPatrick Mooney * Copyright 2016 Joyent, Inc. 29cd1c8b85SMatthew Ahrens */ 307c478bd9Sstevel@tonic-gate 317c478bd9Sstevel@tonic-gate #include <sys/types.h> 327c478bd9Sstevel@tonic-gate #include <sys/devops.h> 337c478bd9Sstevel@tonic-gate #include <sys/conf.h> 347c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 357c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 367c478bd9Sstevel@tonic-gate #include <sys/stat.h> 377c478bd9Sstevel@tonic-gate #include <sys/poll_impl.h> 387c478bd9Sstevel@tonic-gate #include <sys/errno.h> 397c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 407c478bd9Sstevel@tonic-gate #include <sys/mkdev.h> 417c478bd9Sstevel@tonic-gate #include <sys/debug.h> 427c478bd9Sstevel@tonic-gate #include <sys/file.h> 437c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 447c478bd9Sstevel@tonic-gate #include <sys/systm.h> 457c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 467c478bd9Sstevel@tonic-gate #include <sys/devpoll.h> 477c478bd9Sstevel@tonic-gate #include <sys/rctl.h> 487c478bd9Sstevel@tonic-gate #include <sys/resource.h> 49a5eb7107SBryan Cantrill #include <sys/schedctl.h> 50a5eb7107SBryan Cantrill #include <sys/epoll.h> 517c478bd9Sstevel@tonic-gate 527c478bd9Sstevel@tonic-gate #define RESERVED 1 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate /* local data struct */ 557c478bd9Sstevel@tonic-gate static dp_entry_t **devpolltbl; /* dev poll entries */ 567c478bd9Sstevel@tonic-gate static size_t dptblsize; 577c478bd9Sstevel@tonic-gate 587c478bd9Sstevel@tonic-gate static kmutex_t devpoll_lock; /* lock protecting dev tbl */ 597c478bd9Sstevel@tonic-gate int devpoll_init; /* is /dev/poll initialized already */ 607c478bd9Sstevel@tonic-gate 617c478bd9Sstevel@tonic-gate /* device local functions */ 627c478bd9Sstevel@tonic-gate 637c478bd9Sstevel@tonic-gate static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp); 647c478bd9Sstevel@tonic-gate static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp); 657c478bd9Sstevel@tonic-gate static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 667c478bd9Sstevel@tonic-gate int *rvalp); 677c478bd9Sstevel@tonic-gate static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, 687c478bd9Sstevel@tonic-gate struct pollhead **phpp); 697c478bd9Sstevel@tonic-gate static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp); 707c478bd9Sstevel@tonic-gate static dev_info_t *dpdevi; 717c478bd9Sstevel@tonic-gate 727c478bd9Sstevel@tonic-gate 737c478bd9Sstevel@tonic-gate static struct cb_ops dp_cb_ops = { 747c478bd9Sstevel@tonic-gate dpopen, /* open */ 757c478bd9Sstevel@tonic-gate dpclose, /* close */ 767c478bd9Sstevel@tonic-gate nodev, /* strategy */ 777c478bd9Sstevel@tonic-gate nodev, /* print */ 787c478bd9Sstevel@tonic-gate nodev, /* dump */ 797c478bd9Sstevel@tonic-gate nodev, /* read */ 807c478bd9Sstevel@tonic-gate dpwrite, /* write */ 817c478bd9Sstevel@tonic-gate dpioctl, /* ioctl */ 827c478bd9Sstevel@tonic-gate nodev, /* devmap */ 837c478bd9Sstevel@tonic-gate nodev, /* mmap */ 847c478bd9Sstevel@tonic-gate nodev, /* segmap */ 857c478bd9Sstevel@tonic-gate dppoll, /* poll */ 86a913d554Scth ddi_prop_op, /* prop_op */ 877c478bd9Sstevel@tonic-gate (struct streamtab *)0, /* streamtab */ 88a913d554Scth D_MP, /* flags */ 89a913d554Scth CB_REV, /* cb_ops revision */ 90a913d554Scth nodev, /* aread */ 91a913d554Scth nodev /* awrite */ 927c478bd9Sstevel@tonic-gate }; 937c478bd9Sstevel@tonic-gate 947c478bd9Sstevel@tonic-gate static int dpattach(dev_info_t *, ddi_attach_cmd_t); 957c478bd9Sstevel@tonic-gate static int dpdetach(dev_info_t *, ddi_detach_cmd_t); 967c478bd9Sstevel@tonic-gate static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 977c478bd9Sstevel@tonic-gate 987c478bd9Sstevel@tonic-gate static struct dev_ops dp_ops = { 997c478bd9Sstevel@tonic-gate DEVO_REV, /* devo_rev */ 1007c478bd9Sstevel@tonic-gate 0, /* refcnt */ 1017c478bd9Sstevel@tonic-gate dpinfo, /* info */ 1027c478bd9Sstevel@tonic-gate nulldev, /* identify */ 1037c478bd9Sstevel@tonic-gate nulldev, /* probe */ 1047c478bd9Sstevel@tonic-gate dpattach, /* attach */ 1057c478bd9Sstevel@tonic-gate dpdetach, /* detach */ 1067c478bd9Sstevel@tonic-gate nodev, /* reset */ 1077c478bd9Sstevel@tonic-gate &dp_cb_ops, /* driver operations */ 1087c478bd9Sstevel@tonic-gate (struct bus_ops *)NULL, /* bus operations */ 10919397407SSherry Moore nulldev, /* power */ 11019397407SSherry Moore ddi_quiesce_not_needed, /* quiesce */ 1117c478bd9Sstevel@tonic-gate }; 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate 1147c478bd9Sstevel@tonic-gate static struct modldrv modldrv = { 1157c478bd9Sstevel@tonic-gate &mod_driverops, /* type of module - a driver */ 116a85084caSmeem "/dev/poll driver", 1177c478bd9Sstevel@tonic-gate &dp_ops, 1187c478bd9Sstevel@tonic-gate }; 1197c478bd9Sstevel@tonic-gate 1207c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = { 1217c478bd9Sstevel@tonic-gate MODREV_1, 1227c478bd9Sstevel@tonic-gate (void *)&modldrv, 1237c478bd9Sstevel@tonic-gate NULL 1247c478bd9Sstevel@tonic-gate }; 1257c478bd9Sstevel@tonic-gate 126f3bb54f3SPatrick Mooney static void pcachelink_assoc(pollcache_t *, pollcache_t *); 127f3bb54f3SPatrick Mooney static void pcachelink_mark_stale(pollcache_t *); 128f3bb54f3SPatrick Mooney static void pcachelink_purge_stale(pollcache_t *); 129f3bb54f3SPatrick Mooney static void pcachelink_purge_all(pollcache_t *); 130f3bb54f3SPatrick Mooney 131f3bb54f3SPatrick Mooney 1327c478bd9Sstevel@tonic-gate /* 1337c478bd9Sstevel@tonic-gate * Locking Design 1347c478bd9Sstevel@tonic-gate * 1357c478bd9Sstevel@tonic-gate * The /dev/poll driver shares most of its code with poll sys call whose 1367c478bd9Sstevel@tonic-gate * code is in common/syscall/poll.c. In poll(2) design, the pollcache 1377c478bd9Sstevel@tonic-gate * structure is per lwp. An implicit assumption is made there that some 1387c478bd9Sstevel@tonic-gate * portion of pollcache will never be touched by other lwps. E.g., in 1397c478bd9Sstevel@tonic-gate * poll(2) design, no lwp will ever need to grow bitmap of other lwp. 1407c478bd9Sstevel@tonic-gate * This assumption is not true for /dev/poll; hence the need for extra 1417c478bd9Sstevel@tonic-gate * locking. 1427c478bd9Sstevel@tonic-gate * 143da6c28aaSamw * To allow more parallelism, each /dev/poll file descriptor (indexed by 1447c478bd9Sstevel@tonic-gate * minor number) has its own lock. Since read (dpioctl) is a much more 1457c478bd9Sstevel@tonic-gate * frequent operation than write, we want to allow multiple reads on same 1467c478bd9Sstevel@tonic-gate * /dev/poll fd. However, we prevent writes from being starved by giving 1477c478bd9Sstevel@tonic-gate * priority to write operation. Theoretically writes can starve reads as 148da6c28aaSamw * well. But in practical sense this is not important because (1) writes 1497c478bd9Sstevel@tonic-gate * happens less often than reads, and (2) write operation defines the 1507c478bd9Sstevel@tonic-gate * content of poll fd a cache set. If writes happens so often that they 1517c478bd9Sstevel@tonic-gate * can starve reads, that means the cached set is very unstable. It may 1527c478bd9Sstevel@tonic-gate * not make sense to read an unstable cache set anyway. Therefore, the 1537c478bd9Sstevel@tonic-gate * writers starving readers case is not handled in this design. 1547c478bd9Sstevel@tonic-gate */ 1557c478bd9Sstevel@tonic-gate 1567c478bd9Sstevel@tonic-gate int 1577c478bd9Sstevel@tonic-gate _init() 1587c478bd9Sstevel@tonic-gate { 1597c478bd9Sstevel@tonic-gate int error; 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate dptblsize = DEVPOLLSIZE; 1627c478bd9Sstevel@tonic-gate devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 1637c478bd9Sstevel@tonic-gate mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL); 1647c478bd9Sstevel@tonic-gate devpoll_init = 1; 1657c478bd9Sstevel@tonic-gate if ((error = mod_install(&modlinkage)) != 0) { 1667c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 1677c478bd9Sstevel@tonic-gate devpoll_init = 0; 1687c478bd9Sstevel@tonic-gate } 1697c478bd9Sstevel@tonic-gate return (error); 1707c478bd9Sstevel@tonic-gate } 1717c478bd9Sstevel@tonic-gate 1727c478bd9Sstevel@tonic-gate int 1737c478bd9Sstevel@tonic-gate _fini() 1747c478bd9Sstevel@tonic-gate { 1757c478bd9Sstevel@tonic-gate int error; 1767c478bd9Sstevel@tonic-gate 1777c478bd9Sstevel@tonic-gate if ((error = mod_remove(&modlinkage)) != 0) { 1787c478bd9Sstevel@tonic-gate return (error); 1797c478bd9Sstevel@tonic-gate } 1807c478bd9Sstevel@tonic-gate mutex_destroy(&devpoll_lock); 1817c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 1827c478bd9Sstevel@tonic-gate return (0); 1837c478bd9Sstevel@tonic-gate } 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate int 1867c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 1877c478bd9Sstevel@tonic-gate { 1887c478bd9Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 1897c478bd9Sstevel@tonic-gate } 1907c478bd9Sstevel@tonic-gate 1917c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1927c478bd9Sstevel@tonic-gate static int 1937c478bd9Sstevel@tonic-gate dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1947c478bd9Sstevel@tonic-gate { 1957c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, NULL) 1967c478bd9Sstevel@tonic-gate == DDI_FAILURE) { 1977c478bd9Sstevel@tonic-gate ddi_remove_minor_node(devi, NULL); 1987c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 1997c478bd9Sstevel@tonic-gate } 2007c478bd9Sstevel@tonic-gate dpdevi = devi; 2017c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 2027c478bd9Sstevel@tonic-gate } 2037c478bd9Sstevel@tonic-gate 2047c478bd9Sstevel@tonic-gate static int 2057c478bd9Sstevel@tonic-gate dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd) 2067c478bd9Sstevel@tonic-gate { 2077c478bd9Sstevel@tonic-gate if (cmd != DDI_DETACH) 2087c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 2097c478bd9Sstevel@tonic-gate 2107c478bd9Sstevel@tonic-gate ddi_remove_minor_node(devi, NULL); 2117c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 2127c478bd9Sstevel@tonic-gate } 2137c478bd9Sstevel@tonic-gate 2147c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2157c478bd9Sstevel@tonic-gate static int 2167c478bd9Sstevel@tonic-gate dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 2177c478bd9Sstevel@tonic-gate { 2187c478bd9Sstevel@tonic-gate int error; 2197c478bd9Sstevel@tonic-gate 2207c478bd9Sstevel@tonic-gate switch (infocmd) { 2217c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 2227c478bd9Sstevel@tonic-gate *result = (void *)dpdevi; 2237c478bd9Sstevel@tonic-gate error = DDI_SUCCESS; 2247c478bd9Sstevel@tonic-gate break; 2257c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 2267c478bd9Sstevel@tonic-gate *result = (void *)0; 2277c478bd9Sstevel@tonic-gate error = DDI_SUCCESS; 2287c478bd9Sstevel@tonic-gate break; 2297c478bd9Sstevel@tonic-gate default: 2307c478bd9Sstevel@tonic-gate error = DDI_FAILURE; 2317c478bd9Sstevel@tonic-gate } 2327c478bd9Sstevel@tonic-gate return (error); 2337c478bd9Sstevel@tonic-gate } 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate /* 2367c478bd9Sstevel@tonic-gate * dp_pcache_poll has similar logic to pcache_poll() in poll.c. The major 2377c478bd9Sstevel@tonic-gate * differences are: (1) /dev/poll requires scanning the bitmap starting at 2387c478bd9Sstevel@tonic-gate * where it was stopped last time, instead of always starting from 0, 2397c478bd9Sstevel@tonic-gate * (2) since user may not have cleaned up the cached fds when they are 2407c478bd9Sstevel@tonic-gate * closed, some polldats in cache may refer to closed or reused fds. We 2417c478bd9Sstevel@tonic-gate * need to check for those cases. 2427c478bd9Sstevel@tonic-gate * 2437c478bd9Sstevel@tonic-gate * NOTE: Upon closing an fd, automatic poll cache cleanup is done for 2447c478bd9Sstevel@tonic-gate * poll(2) caches but NOT for /dev/poll caches. So expect some 2457c478bd9Sstevel@tonic-gate * stale entries! 2467c478bd9Sstevel@tonic-gate */ 2477c478bd9Sstevel@tonic-gate static int 248a5eb7107SBryan Cantrill dp_pcache_poll(dp_entry_t *dpep, void *dpbuf, 249a5eb7107SBryan Cantrill pollcache_t *pcp, nfds_t nfds, int *fdcntp) 2507c478bd9Sstevel@tonic-gate { 2517c478bd9Sstevel@tonic-gate int start, ostart, end; 2527c478bd9Sstevel@tonic-gate int fdcnt, fd; 2537c478bd9Sstevel@tonic-gate boolean_t done; 2547c478bd9Sstevel@tonic-gate file_t *fp; 2557c478bd9Sstevel@tonic-gate short revent; 2567c478bd9Sstevel@tonic-gate boolean_t no_wrap; 2577c478bd9Sstevel@tonic-gate pollhead_t *php; 2587c478bd9Sstevel@tonic-gate polldat_t *pdp; 259a5eb7107SBryan Cantrill pollfd_t *pfdp; 260a5eb7107SBryan Cantrill epoll_event_t *epoll; 2617c478bd9Sstevel@tonic-gate int error = 0; 262a5eb7107SBryan Cantrill short mask = POLLRDHUP | POLLWRBAND; 263f3bb54f3SPatrick Mooney boolean_t is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0; 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pcp->pc_lock)); 2667c478bd9Sstevel@tonic-gate if (pcp->pc_bitmap == NULL) { 2677c478bd9Sstevel@tonic-gate /* 2687c478bd9Sstevel@tonic-gate * No Need to search because no poll fd 2697c478bd9Sstevel@tonic-gate * has been cached. 2707c478bd9Sstevel@tonic-gate */ 2717c478bd9Sstevel@tonic-gate return (error); 2727c478bd9Sstevel@tonic-gate } 273a5eb7107SBryan Cantrill 274f3bb54f3SPatrick Mooney if (is_epoll) { 275a5eb7107SBryan Cantrill pfdp = NULL; 276a5eb7107SBryan Cantrill epoll = (epoll_event_t *)dpbuf; 277a5eb7107SBryan Cantrill } else { 278a5eb7107SBryan Cantrill pfdp = (pollfd_t *)dpbuf; 279a5eb7107SBryan Cantrill epoll = NULL; 280a5eb7107SBryan Cantrill } 2817c478bd9Sstevel@tonic-gate retry: 2827c478bd9Sstevel@tonic-gate start = ostart = pcp->pc_mapstart; 2837c478bd9Sstevel@tonic-gate end = pcp->pc_mapend; 2847c478bd9Sstevel@tonic-gate php = NULL; 2857c478bd9Sstevel@tonic-gate 2867c478bd9Sstevel@tonic-gate if (start == 0) { 2877c478bd9Sstevel@tonic-gate /* 2887c478bd9Sstevel@tonic-gate * started from every begining, no need to wrap around. 2897c478bd9Sstevel@tonic-gate */ 2907c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 2917c478bd9Sstevel@tonic-gate } else { 2927c478bd9Sstevel@tonic-gate no_wrap = B_FALSE; 2937c478bd9Sstevel@tonic-gate } 2947c478bd9Sstevel@tonic-gate done = B_FALSE; 2957c478bd9Sstevel@tonic-gate fdcnt = 0; 2967c478bd9Sstevel@tonic-gate while ((fdcnt < nfds) && !done) { 2977c478bd9Sstevel@tonic-gate php = NULL; 2987c478bd9Sstevel@tonic-gate revent = 0; 2997c478bd9Sstevel@tonic-gate /* 3007c478bd9Sstevel@tonic-gate * Examine the bit map in a circular fashion 3017c478bd9Sstevel@tonic-gate * to avoid starvation. Always resume from 3027c478bd9Sstevel@tonic-gate * last stop. Scan till end of the map. Then 3037c478bd9Sstevel@tonic-gate * wrap around. 3047c478bd9Sstevel@tonic-gate */ 3057c478bd9Sstevel@tonic-gate fd = bt_getlowbit(pcp->pc_bitmap, start, end); 3067c478bd9Sstevel@tonic-gate ASSERT(fd <= end); 3077c478bd9Sstevel@tonic-gate if (fd >= 0) { 3087c478bd9Sstevel@tonic-gate if (fd == end) { 3097c478bd9Sstevel@tonic-gate if (no_wrap) { 3107c478bd9Sstevel@tonic-gate done = B_TRUE; 3117c478bd9Sstevel@tonic-gate } else { 3127c478bd9Sstevel@tonic-gate start = 0; 3137c478bd9Sstevel@tonic-gate end = ostart - 1; 3147c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 3157c478bd9Sstevel@tonic-gate } 3167c478bd9Sstevel@tonic-gate } else { 3177c478bd9Sstevel@tonic-gate start = fd + 1; 3187c478bd9Sstevel@tonic-gate } 3197c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, fd); 320a85084caSmeem repoll: 3217c478bd9Sstevel@tonic-gate ASSERT(pdp != NULL); 3227c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 3237c478bd9Sstevel@tonic-gate if (pdp->pd_fp == NULL) { 3247c478bd9Sstevel@tonic-gate /* 3257c478bd9Sstevel@tonic-gate * The fd is POLLREMOVed. This fd is 3267c478bd9Sstevel@tonic-gate * logically no longer cached. So move 3277c478bd9Sstevel@tonic-gate * on to the next one. 3287c478bd9Sstevel@tonic-gate */ 3297c478bd9Sstevel@tonic-gate continue; 3307c478bd9Sstevel@tonic-gate } 3317c478bd9Sstevel@tonic-gate if ((fp = getf(fd)) == NULL) { 3327c478bd9Sstevel@tonic-gate /* 3337c478bd9Sstevel@tonic-gate * The fd has been closed, but user has not 3347c478bd9Sstevel@tonic-gate * done a POLLREMOVE on this fd yet. Instead 3357c478bd9Sstevel@tonic-gate * of cleaning it here implicitly, we return 3367c478bd9Sstevel@tonic-gate * POLLNVAL. This is consistent with poll(2) 3377c478bd9Sstevel@tonic-gate * polling a closed fd. Hope this will remind 3387c478bd9Sstevel@tonic-gate * user to do a POLLREMOVE. 3397c478bd9Sstevel@tonic-gate */ 340f3bb54f3SPatrick Mooney if (!is_epoll && pfdp != NULL) { 3417c478bd9Sstevel@tonic-gate pfdp[fdcnt].fd = fd; 3427c478bd9Sstevel@tonic-gate pfdp[fdcnt].revents = POLLNVAL; 3437c478bd9Sstevel@tonic-gate fdcnt++; 3447c478bd9Sstevel@tonic-gate continue; 3457c478bd9Sstevel@tonic-gate } 346a5eb7107SBryan Cantrill 347a5eb7107SBryan Cantrill /* 348a5eb7107SBryan Cantrill * In the epoll compatibility case, we actually 349a5eb7107SBryan Cantrill * perform the implicit removal to remain 350a5eb7107SBryan Cantrill * closer to the epoll semantics. 351a5eb7107SBryan Cantrill */ 352f3bb54f3SPatrick Mooney if (is_epoll) { 353a5eb7107SBryan Cantrill pdp->pd_fp = NULL; 354a5eb7107SBryan Cantrill pdp->pd_events = 0; 355a5eb7107SBryan Cantrill 356a5eb7107SBryan Cantrill if (php != NULL) { 357a5eb7107SBryan Cantrill pollhead_delete(php, pdp); 358a5eb7107SBryan Cantrill pdp->pd_php = NULL; 359a5eb7107SBryan Cantrill } 360a5eb7107SBryan Cantrill 361a5eb7107SBryan Cantrill BT_CLEAR(pcp->pc_bitmap, fd); 362a5eb7107SBryan Cantrill continue; 363a5eb7107SBryan Cantrill } 364f3bb54f3SPatrick Mooney } 365a5eb7107SBryan Cantrill 3667c478bd9Sstevel@tonic-gate if (fp != pdp->pd_fp) { 3677c478bd9Sstevel@tonic-gate /* 3687c478bd9Sstevel@tonic-gate * user is polling on a cached fd which was 3697c478bd9Sstevel@tonic-gate * closed and then reused. Unfortunately 3707c478bd9Sstevel@tonic-gate * there is no good way to inform user. 3717c478bd9Sstevel@tonic-gate * If the file struct is also reused, we 3727c478bd9Sstevel@tonic-gate * may not be able to detect the fd reuse 3737c478bd9Sstevel@tonic-gate * at all. As long as this does not 3747c478bd9Sstevel@tonic-gate * cause system failure and/or memory leak, 3757c478bd9Sstevel@tonic-gate * we will play along. Man page states if 3767c478bd9Sstevel@tonic-gate * user does not clean up closed fds, polling 3777c478bd9Sstevel@tonic-gate * results will be indeterministic. 3787c478bd9Sstevel@tonic-gate * 3797c478bd9Sstevel@tonic-gate * XXX - perhaps log the detection of fd 3807c478bd9Sstevel@tonic-gate * reuse? 3817c478bd9Sstevel@tonic-gate */ 3827c478bd9Sstevel@tonic-gate pdp->pd_fp = fp; 3837c478bd9Sstevel@tonic-gate } 3847c478bd9Sstevel@tonic-gate /* 3857c478bd9Sstevel@tonic-gate * XXX - pollrelock() logic needs to know which 3867c478bd9Sstevel@tonic-gate * which pollcache lock to grab. It'd be a 3877c478bd9Sstevel@tonic-gate * cleaner solution if we could pass pcp as 3887c478bd9Sstevel@tonic-gate * an arguement in VOP_POLL interface instead 3897c478bd9Sstevel@tonic-gate * of implicitly passing it using thread_t 3907c478bd9Sstevel@tonic-gate * struct. On the other hand, changing VOP_POLL 3917c478bd9Sstevel@tonic-gate * interface will require all driver/file system 3927c478bd9Sstevel@tonic-gate * poll routine to change. May want to revisit 3937c478bd9Sstevel@tonic-gate * the tradeoff later. 3947c478bd9Sstevel@tonic-gate */ 3957c478bd9Sstevel@tonic-gate curthread->t_pollcache = pcp; 3967c478bd9Sstevel@tonic-gate error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, 397da6c28aaSamw &revent, &php, NULL); 3987c478bd9Sstevel@tonic-gate curthread->t_pollcache = NULL; 3997c478bd9Sstevel@tonic-gate releasef(fd); 4007c478bd9Sstevel@tonic-gate if (error != 0) { 4017c478bd9Sstevel@tonic-gate break; 4027c478bd9Sstevel@tonic-gate } 403f3bb54f3SPatrick Mooney 4047c478bd9Sstevel@tonic-gate /* 4057c478bd9Sstevel@tonic-gate * layered devices (e.g. console driver) 4067c478bd9Sstevel@tonic-gate * may change the vnode and thus the pollhead 4077c478bd9Sstevel@tonic-gate * pointer out from underneath us. 4087c478bd9Sstevel@tonic-gate */ 4097c478bd9Sstevel@tonic-gate if (php != NULL && pdp->pd_php != NULL && 4107c478bd9Sstevel@tonic-gate php != pdp->pd_php) { 4117c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 4127c478bd9Sstevel@tonic-gate pdp->pd_php = php; 4137c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 4147c478bd9Sstevel@tonic-gate /* 4157c478bd9Sstevel@tonic-gate * The bit should still be set. 4167c478bd9Sstevel@tonic-gate */ 4177c478bd9Sstevel@tonic-gate ASSERT(BT_TEST(pcp->pc_bitmap, fd)); 4187c478bd9Sstevel@tonic-gate goto retry; 4197c478bd9Sstevel@tonic-gate } 4207c478bd9Sstevel@tonic-gate 4217c478bd9Sstevel@tonic-gate if (revent != 0) { 422a5eb7107SBryan Cantrill if (pfdp != NULL) { 4237c478bd9Sstevel@tonic-gate pfdp[fdcnt].fd = fd; 4247c478bd9Sstevel@tonic-gate pfdp[fdcnt].events = pdp->pd_events; 4257c478bd9Sstevel@tonic-gate pfdp[fdcnt].revents = revent; 426f3bb54f3SPatrick Mooney } else if (epoll != NULL) { 427a5eb7107SBryan Cantrill epoll_event_t *ep = &epoll[fdcnt]; 428a5eb7107SBryan Cantrill 429a5eb7107SBryan Cantrill ASSERT(epoll != NULL); 430a5eb7107SBryan Cantrill ep->data.u64 = pdp->pd_epolldata; 431a5eb7107SBryan Cantrill 432a5eb7107SBryan Cantrill /* 433a5eb7107SBryan Cantrill * If any of the event bits are set for 434a5eb7107SBryan Cantrill * which poll and epoll representations 435a5eb7107SBryan Cantrill * differ, swizzle in the native epoll 436a5eb7107SBryan Cantrill * values. 437a5eb7107SBryan Cantrill */ 438a5eb7107SBryan Cantrill if (revent & mask) { 439a5eb7107SBryan Cantrill ep->events = (revent & ~mask) | 440a5eb7107SBryan Cantrill ((revent & POLLRDHUP) ? 441a5eb7107SBryan Cantrill EPOLLRDHUP : 0) | 442a5eb7107SBryan Cantrill ((revent & POLLWRBAND) ? 443a5eb7107SBryan Cantrill EPOLLWRBAND : 0); 444a5eb7107SBryan Cantrill } else { 445a5eb7107SBryan Cantrill ep->events = revent; 446a5eb7107SBryan Cantrill } 447a5eb7107SBryan Cantrill 448a5eb7107SBryan Cantrill /* 449a5eb7107SBryan Cantrill * We define POLLWRNORM to be POLLOUT, 450a5eb7107SBryan Cantrill * but epoll has separate definitions 451a5eb7107SBryan Cantrill * for them; if POLLOUT is set and the 452a5eb7107SBryan Cantrill * user has asked for EPOLLWRNORM, set 453a5eb7107SBryan Cantrill * that as well. 454a5eb7107SBryan Cantrill */ 455a5eb7107SBryan Cantrill if ((revent & POLLOUT) && 456a5eb7107SBryan Cantrill (pdp->pd_events & EPOLLWRNORM)) { 457a5eb7107SBryan Cantrill ep->events |= EPOLLWRNORM; 458a5eb7107SBryan Cantrill } 459f3bb54f3SPatrick Mooney } else { 460f3bb54f3SPatrick Mooney pollstate_t *ps = 461f3bb54f3SPatrick Mooney curthread->t_pollstate; 462f3bb54f3SPatrick Mooney /* 463f3bb54f3SPatrick Mooney * The devpoll handle itself is being 464f3bb54f3SPatrick Mooney * polled. Notify the caller of any 465f3bb54f3SPatrick Mooney * readable event(s), leaving as much 466f3bb54f3SPatrick Mooney * state as possible untouched. 467f3bb54f3SPatrick Mooney */ 468f3bb54f3SPatrick Mooney VERIFY(fdcnt == 0); 469f3bb54f3SPatrick Mooney VERIFY(ps != NULL); 470f3bb54f3SPatrick Mooney 471f3bb54f3SPatrick Mooney /* 472f3bb54f3SPatrick Mooney * If a call to pollunlock() fails 473f3bb54f3SPatrick Mooney * during VOP_POLL, skip over the fd 474f3bb54f3SPatrick Mooney * and continue polling. 475f3bb54f3SPatrick Mooney * 476f3bb54f3SPatrick Mooney * Otherwise, report that there is an 477f3bb54f3SPatrick Mooney * event pending. 478f3bb54f3SPatrick Mooney */ 479f3bb54f3SPatrick Mooney if ((ps->ps_flags & POLLSTATE_ULFAIL) 480f3bb54f3SPatrick Mooney != 0) { 481f3bb54f3SPatrick Mooney ps->ps_flags &= 482f3bb54f3SPatrick Mooney ~POLLSTATE_ULFAIL; 483f3bb54f3SPatrick Mooney continue; 484f3bb54f3SPatrick Mooney } else { 485f3bb54f3SPatrick Mooney fdcnt++; 486f3bb54f3SPatrick Mooney break; 487f3bb54f3SPatrick Mooney } 488a5eb7107SBryan Cantrill } 489a5eb7107SBryan Cantrill 490a5eb7107SBryan Cantrill /* 491a5eb7107SBryan Cantrill * If POLLET is set, clear the bit in the 492a5eb7107SBryan Cantrill * bitmap -- which effectively latches the 493a5eb7107SBryan Cantrill * edge on a pollwakeup() from the driver. 494a5eb7107SBryan Cantrill */ 495a5eb7107SBryan Cantrill if (pdp->pd_events & POLLET) 496a5eb7107SBryan Cantrill BT_CLEAR(pcp->pc_bitmap, fd); 497a5eb7107SBryan Cantrill 498a5eb7107SBryan Cantrill /* 499a5eb7107SBryan Cantrill * If POLLONESHOT is set, perform the implicit 500a5eb7107SBryan Cantrill * POLLREMOVE. 501a5eb7107SBryan Cantrill */ 502a5eb7107SBryan Cantrill if (pdp->pd_events & POLLONESHOT) { 503a5eb7107SBryan Cantrill pdp->pd_fp = NULL; 504a5eb7107SBryan Cantrill pdp->pd_events = 0; 505a5eb7107SBryan Cantrill 506a5eb7107SBryan Cantrill if (php != NULL) { 507a5eb7107SBryan Cantrill pollhead_delete(php, pdp); 508a5eb7107SBryan Cantrill pdp->pd_php = NULL; 509a5eb7107SBryan Cantrill } 510a5eb7107SBryan Cantrill 511a5eb7107SBryan Cantrill BT_CLEAR(pcp->pc_bitmap, fd); 512a5eb7107SBryan Cantrill } 513a5eb7107SBryan Cantrill 5147c478bd9Sstevel@tonic-gate fdcnt++; 5157c478bd9Sstevel@tonic-gate } else if (php != NULL) { 5167c478bd9Sstevel@tonic-gate /* 5177c478bd9Sstevel@tonic-gate * We clear a bit or cache a poll fd if 5187c478bd9Sstevel@tonic-gate * the driver returns a poll head ptr, 5197c478bd9Sstevel@tonic-gate * which is expected in the case of 0 5207c478bd9Sstevel@tonic-gate * revents. Some buggy driver may return 5217c478bd9Sstevel@tonic-gate * NULL php pointer with 0 revents. In 5227c478bd9Sstevel@tonic-gate * this case, we just treat the driver as 5237c478bd9Sstevel@tonic-gate * "noncachable" and not clearing the bit 5247c478bd9Sstevel@tonic-gate * in bitmap. 5257c478bd9Sstevel@tonic-gate */ 5267c478bd9Sstevel@tonic-gate if ((pdp->pd_php != NULL) && 527a5eb7107SBryan Cantrill ((pcp->pc_flag & PC_POLLWAKE) == 0)) { 5287c478bd9Sstevel@tonic-gate BT_CLEAR(pcp->pc_bitmap, fd); 5297c478bd9Sstevel@tonic-gate } 5307c478bd9Sstevel@tonic-gate if (pdp->pd_php == NULL) { 5317c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 5327c478bd9Sstevel@tonic-gate pdp->pd_php = php; 533a85084caSmeem /* 534a85084caSmeem * An event of interest may have 535a85084caSmeem * arrived between the VOP_POLL() and 536a85084caSmeem * the pollhead_insert(); check again. 537a85084caSmeem */ 538a85084caSmeem goto repoll; 5397c478bd9Sstevel@tonic-gate } 5407c478bd9Sstevel@tonic-gate } 5417c478bd9Sstevel@tonic-gate } else { 5427c478bd9Sstevel@tonic-gate /* 5437c478bd9Sstevel@tonic-gate * No bit set in the range. Check for wrap around. 5447c478bd9Sstevel@tonic-gate */ 5457c478bd9Sstevel@tonic-gate if (!no_wrap) { 5467c478bd9Sstevel@tonic-gate start = 0; 5477c478bd9Sstevel@tonic-gate end = ostart - 1; 5487c478bd9Sstevel@tonic-gate no_wrap = B_TRUE; 5497c478bd9Sstevel@tonic-gate } else { 5507c478bd9Sstevel@tonic-gate done = B_TRUE; 5517c478bd9Sstevel@tonic-gate } 5527c478bd9Sstevel@tonic-gate } 5537c478bd9Sstevel@tonic-gate } 5547c478bd9Sstevel@tonic-gate 5557c478bd9Sstevel@tonic-gate if (!done) { 5567c478bd9Sstevel@tonic-gate pcp->pc_mapstart = start; 5577c478bd9Sstevel@tonic-gate } 5587c478bd9Sstevel@tonic-gate ASSERT(*fdcntp == 0); 5597c478bd9Sstevel@tonic-gate *fdcntp = fdcnt; 5607c478bd9Sstevel@tonic-gate return (error); 5617c478bd9Sstevel@tonic-gate } 5627c478bd9Sstevel@tonic-gate 5637c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5647c478bd9Sstevel@tonic-gate static int 5657c478bd9Sstevel@tonic-gate dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) 5667c478bd9Sstevel@tonic-gate { 5677c478bd9Sstevel@tonic-gate minor_t minordev; 5687c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 5697c478bd9Sstevel@tonic-gate pollcache_t *pcp; 5707c478bd9Sstevel@tonic-gate 5717c478bd9Sstevel@tonic-gate ASSERT(devpoll_init); 5727c478bd9Sstevel@tonic-gate ASSERT(dptblsize <= MAXMIN); 5737c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 5747c478bd9Sstevel@tonic-gate for (minordev = 0; minordev < dptblsize; minordev++) { 5757c478bd9Sstevel@tonic-gate if (devpolltbl[minordev] == NULL) { 5767c478bd9Sstevel@tonic-gate devpolltbl[minordev] = (dp_entry_t *)RESERVED; 5777c478bd9Sstevel@tonic-gate break; 5787c478bd9Sstevel@tonic-gate } 5797c478bd9Sstevel@tonic-gate } 5807c478bd9Sstevel@tonic-gate if (minordev == dptblsize) { 5817c478bd9Sstevel@tonic-gate dp_entry_t **newtbl; 5827c478bd9Sstevel@tonic-gate size_t oldsize; 5837c478bd9Sstevel@tonic-gate 5847c478bd9Sstevel@tonic-gate /* 5857c478bd9Sstevel@tonic-gate * Used up every entry in the existing devpoll table. 5867c478bd9Sstevel@tonic-gate * Grow the table by DEVPOLLSIZE. 5877c478bd9Sstevel@tonic-gate */ 5887c478bd9Sstevel@tonic-gate if ((oldsize = dptblsize) >= MAXMIN) { 5897c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 5907c478bd9Sstevel@tonic-gate return (ENXIO); 5917c478bd9Sstevel@tonic-gate } 5927c478bd9Sstevel@tonic-gate dptblsize += DEVPOLLSIZE; 5937c478bd9Sstevel@tonic-gate if (dptblsize > MAXMIN) { 5947c478bd9Sstevel@tonic-gate dptblsize = MAXMIN; 5957c478bd9Sstevel@tonic-gate } 5967c478bd9Sstevel@tonic-gate newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 5977c478bd9Sstevel@tonic-gate bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize); 5987c478bd9Sstevel@tonic-gate kmem_free(devpolltbl, sizeof (caddr_t) * oldsize); 5997c478bd9Sstevel@tonic-gate devpolltbl = newtbl; 6007c478bd9Sstevel@tonic-gate devpolltbl[minordev] = (dp_entry_t *)RESERVED; 6017c478bd9Sstevel@tonic-gate } 6027c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 6037c478bd9Sstevel@tonic-gate 6047c478bd9Sstevel@tonic-gate dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP); 6057c478bd9Sstevel@tonic-gate /* 6067c478bd9Sstevel@tonic-gate * allocate a pollcache skeleton here. Delay allocating bitmap 6077c478bd9Sstevel@tonic-gate * structures until dpwrite() time, since we don't know the 608a5eb7107SBryan Cantrill * optimal size yet. We also delay setting the pid until either 609a5eb7107SBryan Cantrill * dpwrite() or attempt to poll on the instance, allowing parents 610a5eb7107SBryan Cantrill * to create instances of /dev/poll for their children. (In the 611a5eb7107SBryan Cantrill * epoll compatibility case, this check isn't performed to maintain 612a5eb7107SBryan Cantrill * semantic compatibility.) 6137c478bd9Sstevel@tonic-gate */ 6147c478bd9Sstevel@tonic-gate pcp = pcache_alloc(); 6157c478bd9Sstevel@tonic-gate dpep->dpe_pcache = pcp; 616a5eb7107SBryan Cantrill pcp->pc_pid = -1; 6177c478bd9Sstevel@tonic-gate *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ 6187c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 6197c478bd9Sstevel@tonic-gate ASSERT(minordev < dptblsize); 6207c478bd9Sstevel@tonic-gate ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED); 6217c478bd9Sstevel@tonic-gate devpolltbl[minordev] = dpep; 6227c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 6237c478bd9Sstevel@tonic-gate return (0); 6247c478bd9Sstevel@tonic-gate } 6257c478bd9Sstevel@tonic-gate 6267c478bd9Sstevel@tonic-gate /* 6277c478bd9Sstevel@tonic-gate * Write to dev/poll add/remove fd's to/from a cached poll fd set, 6287c478bd9Sstevel@tonic-gate * or change poll events for a watched fd. 6297c478bd9Sstevel@tonic-gate */ 6307c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 6317c478bd9Sstevel@tonic-gate static int 6327c478bd9Sstevel@tonic-gate dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) 6337c478bd9Sstevel@tonic-gate { 6347c478bd9Sstevel@tonic-gate minor_t minor; 6357c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 6367c478bd9Sstevel@tonic-gate pollcache_t *pcp; 6377c478bd9Sstevel@tonic-gate pollfd_t *pollfdp, *pfdp; 638a5eb7107SBryan Cantrill dvpoll_epollfd_t *epfdp; 639a5eb7107SBryan Cantrill uintptr_t limit; 640a5eb7107SBryan Cantrill int error, size; 6417c478bd9Sstevel@tonic-gate ssize_t uiosize; 6427c478bd9Sstevel@tonic-gate nfds_t pollfdnum; 6437c478bd9Sstevel@tonic-gate struct pollhead *php = NULL; 6447c478bd9Sstevel@tonic-gate polldat_t *pdp; 6457c478bd9Sstevel@tonic-gate int fd; 6467c478bd9Sstevel@tonic-gate file_t *fp; 647f3bb54f3SPatrick Mooney boolean_t is_epoll, fds_added = B_FALSE; 6487c478bd9Sstevel@tonic-gate 6497c478bd9Sstevel@tonic-gate minor = getminor(dev); 6507c478bd9Sstevel@tonic-gate 6517c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 6527c478bd9Sstevel@tonic-gate ASSERT(minor < dptblsize); 6537c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 6547c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 6557c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 656f3bb54f3SPatrick Mooney 657f3bb54f3SPatrick Mooney mutex_enter(&dpep->dpe_lock); 6587c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 659f3bb54f3SPatrick Mooney is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0; 660f3bb54f3SPatrick Mooney size = (is_epoll) ? sizeof (dvpoll_epollfd_t) : sizeof (pollfd_t); 661f3bb54f3SPatrick Mooney mutex_exit(&dpep->dpe_lock); 662a5eb7107SBryan Cantrill 663f3bb54f3SPatrick Mooney if (!is_epoll && curproc->p_pid != pcp->pc_pid) { 664f3bb54f3SPatrick Mooney if (pcp->pc_pid != -1) { 6657c478bd9Sstevel@tonic-gate return (EACCES); 6667c478bd9Sstevel@tonic-gate } 667a5eb7107SBryan Cantrill 668f3bb54f3SPatrick Mooney pcp->pc_pid = curproc->p_pid; 669a5eb7107SBryan Cantrill } 670a5eb7107SBryan Cantrill 6717c478bd9Sstevel@tonic-gate uiosize = uiop->uio_resid; 672a5eb7107SBryan Cantrill pollfdnum = uiosize / size; 6737c478bd9Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 6747c478bd9Sstevel@tonic-gate if (pollfdnum > (uint_t)rctl_enforced_value( 6757c478bd9Sstevel@tonic-gate rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { 6767c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 6777c478bd9Sstevel@tonic-gate curproc->p_rctls, curproc, RCA_SAFE); 6787c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 679f3bb54f3SPatrick Mooney return (EINVAL); 6807c478bd9Sstevel@tonic-gate } 6817c478bd9Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 6827c478bd9Sstevel@tonic-gate /* 6837c478bd9Sstevel@tonic-gate * Copy in the pollfd array. Walk through the array and add 6847c478bd9Sstevel@tonic-gate * each polled fd to the cached set. 6857c478bd9Sstevel@tonic-gate */ 6867c478bd9Sstevel@tonic-gate pollfdp = kmem_alloc(uiosize, KM_SLEEP); 687a5eb7107SBryan Cantrill limit = (uintptr_t)pollfdp + (pollfdnum * size); 6887c478bd9Sstevel@tonic-gate 6897c478bd9Sstevel@tonic-gate /* 6907c478bd9Sstevel@tonic-gate * Although /dev/poll uses the write(2) interface to cache fds, it's 6917c478bd9Sstevel@tonic-gate * not supposed to function as a seekable device. To prevent offset 6927c478bd9Sstevel@tonic-gate * from growing and eventually exceed the maximum, reset the offset 6937c478bd9Sstevel@tonic-gate * here for every call. 6947c478bd9Sstevel@tonic-gate */ 6957c478bd9Sstevel@tonic-gate uiop->uio_loffset = 0; 6967c478bd9Sstevel@tonic-gate if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop)) 6977c478bd9Sstevel@tonic-gate != 0) { 6987c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 6997c478bd9Sstevel@tonic-gate return (error); 7007c478bd9Sstevel@tonic-gate } 7017c478bd9Sstevel@tonic-gate /* 7027c478bd9Sstevel@tonic-gate * We are about to enter the core portion of dpwrite(). Make sure this 7037c478bd9Sstevel@tonic-gate * write has exclusive access in this portion of the code, i.e., no 704f3bb54f3SPatrick Mooney * other writers in this code. 705f3bb54f3SPatrick Mooney * 706f3bb54f3SPatrick Mooney * Waiting for all readers to drop their references to the dpe is 707f3bb54f3SPatrick Mooney * unecessary since the pollcache itself is protected by pc_lock. 7087c478bd9Sstevel@tonic-gate */ 7097c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 7107c478bd9Sstevel@tonic-gate dpep->dpe_writerwait++; 711f3bb54f3SPatrick Mooney while ((dpep->dpe_flag & DP_WRITER_PRESENT) != 0) { 712f3bb54f3SPatrick Mooney ASSERT(dpep->dpe_refcnt != 0); 713a5eb7107SBryan Cantrill 7147c478bd9Sstevel@tonic-gate if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 7157c478bd9Sstevel@tonic-gate dpep->dpe_writerwait--; 7167c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 7177c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 718f3bb54f3SPatrick Mooney return (EINTR); 7197c478bd9Sstevel@tonic-gate } 7207c478bd9Sstevel@tonic-gate } 7217c478bd9Sstevel@tonic-gate dpep->dpe_writerwait--; 7227c478bd9Sstevel@tonic-gate dpep->dpe_flag |= DP_WRITER_PRESENT; 7237c478bd9Sstevel@tonic-gate dpep->dpe_refcnt++; 724a5eb7107SBryan Cantrill 725f3bb54f3SPatrick Mooney if (!is_epoll && (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0) { 726f3bb54f3SPatrick Mooney /* 727f3bb54f3SPatrick Mooney * The epoll compat mode was enabled while we were waiting to 728f3bb54f3SPatrick Mooney * establish write access. It is not safe to continue since 729f3bb54f3SPatrick Mooney * state was prepared for non-epoll operation. 730f3bb54f3SPatrick Mooney */ 731f3bb54f3SPatrick Mooney error = EBUSY; 732f3bb54f3SPatrick Mooney goto bypass; 733f3bb54f3SPatrick Mooney } 7347c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 7357c478bd9Sstevel@tonic-gate 736f3bb54f3SPatrick Mooney /* 737f3bb54f3SPatrick Mooney * Since the dpwrite() may recursively walk an added /dev/poll handle, 738f3bb54f3SPatrick Mooney * pollstate_enter() deadlock and loop detection must be used. 739f3bb54f3SPatrick Mooney */ 740f3bb54f3SPatrick Mooney (void) pollstate_create(); 741f3bb54f3SPatrick Mooney VERIFY(pollstate_enter(pcp) == PSE_SUCCESS); 742a5eb7107SBryan Cantrill 7437c478bd9Sstevel@tonic-gate if (pcp->pc_bitmap == NULL) { 7447c478bd9Sstevel@tonic-gate pcache_create(pcp, pollfdnum); 7457c478bd9Sstevel@tonic-gate } 746a5eb7107SBryan Cantrill for (pfdp = pollfdp; (uintptr_t)pfdp < limit; 747a5eb7107SBryan Cantrill pfdp = (pollfd_t *)((uintptr_t)pfdp + size)) { 7487c478bd9Sstevel@tonic-gate fd = pfdp->fd; 749a5eb7107SBryan Cantrill if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) { 750a5eb7107SBryan Cantrill /* 751a5eb7107SBryan Cantrill * epoll semantics demand that we return EBADF if our 752a5eb7107SBryan Cantrill * specified fd is invalid. 753a5eb7107SBryan Cantrill */ 754f3bb54f3SPatrick Mooney if (is_epoll) { 755a5eb7107SBryan Cantrill error = EBADF; 756a5eb7107SBryan Cantrill break; 757a5eb7107SBryan Cantrill } 758a5eb7107SBryan Cantrill 7597c478bd9Sstevel@tonic-gate continue; 760a5eb7107SBryan Cantrill } 761a5eb7107SBryan Cantrill 7627c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, fd); 7637c478bd9Sstevel@tonic-gate if (pfdp->events != POLLREMOVE) { 764a5eb7107SBryan Cantrill 765a5eb7107SBryan Cantrill fp = NULL; 766a5eb7107SBryan Cantrill 7677c478bd9Sstevel@tonic-gate if (pdp == NULL) { 768a5eb7107SBryan Cantrill /* 769a5eb7107SBryan Cantrill * If we're in epoll compatibility mode, check 770a5eb7107SBryan Cantrill * that the fd is valid before allocating 771a5eb7107SBryan Cantrill * anything for it; epoll semantics demand that 772a5eb7107SBryan Cantrill * we return EBADF if our specified fd is 773a5eb7107SBryan Cantrill * invalid. 774a5eb7107SBryan Cantrill */ 775f3bb54f3SPatrick Mooney if (is_epoll) { 776a5eb7107SBryan Cantrill if ((fp = getf(fd)) == NULL) { 777a5eb7107SBryan Cantrill error = EBADF; 778a5eb7107SBryan Cantrill break; 779a5eb7107SBryan Cantrill } 780a5eb7107SBryan Cantrill } 781a5eb7107SBryan Cantrill 7827c478bd9Sstevel@tonic-gate pdp = pcache_alloc_fd(0); 7837c478bd9Sstevel@tonic-gate pdp->pd_fd = fd; 7847c478bd9Sstevel@tonic-gate pdp->pd_pcache = pcp; 7857c478bd9Sstevel@tonic-gate pcache_insert_fd(pcp, pdp, pollfdnum); 786a5eb7107SBryan Cantrill } else { 787a5eb7107SBryan Cantrill /* 788a5eb7107SBryan Cantrill * epoll semantics demand that we error out if 789a5eb7107SBryan Cantrill * a file descriptor is added twice, which we 790a5eb7107SBryan Cantrill * check (imperfectly) by checking if we both 791a5eb7107SBryan Cantrill * have the file descriptor cached and the 792a5eb7107SBryan Cantrill * file pointer that correponds to the file 793a5eb7107SBryan Cantrill * descriptor matches our cached value. If 794a5eb7107SBryan Cantrill * there is a pointer mismatch, the file 795a5eb7107SBryan Cantrill * descriptor was closed without being removed. 796a5eb7107SBryan Cantrill * The converse is clearly not true, however, 797a5eb7107SBryan Cantrill * so to narrow the window by which a spurious 798a5eb7107SBryan Cantrill * EEXIST may be returned, we also check if 799a5eb7107SBryan Cantrill * this fp has been added to an epoll control 800a5eb7107SBryan Cantrill * descriptor in the past; if it hasn't, we 801a5eb7107SBryan Cantrill * know that this is due to fp reuse -- it's 802a5eb7107SBryan Cantrill * not a true EEXIST case. (By performing this 803a5eb7107SBryan Cantrill * additional check, we limit the window of 804a5eb7107SBryan Cantrill * spurious EEXIST to situations where a single 805a5eb7107SBryan Cantrill * file descriptor is being used across two or 806a5eb7107SBryan Cantrill * more epoll control descriptors -- and even 807a5eb7107SBryan Cantrill * then, the file descriptor must be closed and 808a5eb7107SBryan Cantrill * reused in a relatively tight time span.) 809a5eb7107SBryan Cantrill */ 810f3bb54f3SPatrick Mooney if (is_epoll) { 811a5eb7107SBryan Cantrill if (pdp->pd_fp != NULL && 812a5eb7107SBryan Cantrill (fp = getf(fd)) != NULL && 813a5eb7107SBryan Cantrill fp == pdp->pd_fp && 814a5eb7107SBryan Cantrill (fp->f_flag2 & FEPOLLED)) { 815a5eb7107SBryan Cantrill error = EEXIST; 816a5eb7107SBryan Cantrill releasef(fd); 817a5eb7107SBryan Cantrill break; 8187c478bd9Sstevel@tonic-gate } 819a5eb7107SBryan Cantrill 820a5eb7107SBryan Cantrill /* 821a5eb7107SBryan Cantrill * We have decided that the cached 822a5eb7107SBryan Cantrill * information was stale: it either 823a5eb7107SBryan Cantrill * didn't match, or the fp had never 824a5eb7107SBryan Cantrill * actually been epoll()'d on before. 825a5eb7107SBryan Cantrill * We need to now clear our pd_events 826a5eb7107SBryan Cantrill * to assure that we don't mistakenly 827a5eb7107SBryan Cantrill * operate on cached event disposition. 828a5eb7107SBryan Cantrill */ 829a5eb7107SBryan Cantrill pdp->pd_events = 0; 830a5eb7107SBryan Cantrill } 831a5eb7107SBryan Cantrill } 832a5eb7107SBryan Cantrill 833f3bb54f3SPatrick Mooney if (is_epoll) { 834a5eb7107SBryan Cantrill epfdp = (dvpoll_epollfd_t *)pfdp; 835a5eb7107SBryan Cantrill pdp->pd_epolldata = epfdp->dpep_data; 836a5eb7107SBryan Cantrill } 837a5eb7107SBryan Cantrill 8387c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 8397c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_pcache == pcp); 8407c478bd9Sstevel@tonic-gate if (fd >= pcp->pc_mapsize) { 8417c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 8427c478bd9Sstevel@tonic-gate pcache_grow_map(pcp, fd); 8437c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 8447c478bd9Sstevel@tonic-gate } 8457c478bd9Sstevel@tonic-gate if (fd > pcp->pc_mapend) { 8467c478bd9Sstevel@tonic-gate pcp->pc_mapend = fd; 8477c478bd9Sstevel@tonic-gate } 848a5eb7107SBryan Cantrill if (fp == NULL && (fp = getf(fd)) == NULL) { 8497c478bd9Sstevel@tonic-gate /* 8507c478bd9Sstevel@tonic-gate * The fd is not valid. Since we can't pass 8517c478bd9Sstevel@tonic-gate * this error back in the write() call, set 8527c478bd9Sstevel@tonic-gate * the bit in bitmap to force DP_POLL ioctl 8537c478bd9Sstevel@tonic-gate * to examine it. 8547c478bd9Sstevel@tonic-gate */ 8557c478bd9Sstevel@tonic-gate BT_SET(pcp->pc_bitmap, fd); 8567c478bd9Sstevel@tonic-gate pdp->pd_events |= pfdp->events; 8577c478bd9Sstevel@tonic-gate continue; 8587c478bd9Sstevel@tonic-gate } 859a5eb7107SBryan Cantrill 860a5eb7107SBryan Cantrill /* 861a5eb7107SBryan Cantrill * To (greatly) reduce EEXIST false positives, we 862a5eb7107SBryan Cantrill * denote that this fp has been epoll()'d. We do this 863a5eb7107SBryan Cantrill * regardless of epoll compatibility mode, as the flag 864a5eb7107SBryan Cantrill * is harmless if not in epoll compatibility mode. 865a5eb7107SBryan Cantrill */ 866a5eb7107SBryan Cantrill fp->f_flag2 |= FEPOLLED; 867a5eb7107SBryan Cantrill 8687c478bd9Sstevel@tonic-gate /* 8697c478bd9Sstevel@tonic-gate * Don't do VOP_POLL for an already cached fd with 8707c478bd9Sstevel@tonic-gate * same poll events. 8717c478bd9Sstevel@tonic-gate */ 8727c478bd9Sstevel@tonic-gate if ((pdp->pd_events == pfdp->events) && 873a5eb7107SBryan Cantrill (pdp->pd_fp == fp)) { 8747c478bd9Sstevel@tonic-gate /* 8757c478bd9Sstevel@tonic-gate * the events are already cached 8767c478bd9Sstevel@tonic-gate */ 8777c478bd9Sstevel@tonic-gate releasef(fd); 8787c478bd9Sstevel@tonic-gate continue; 8797c478bd9Sstevel@tonic-gate } 8807c478bd9Sstevel@tonic-gate 8817c478bd9Sstevel@tonic-gate /* 8827c478bd9Sstevel@tonic-gate * do VOP_POLL and cache this poll fd. 8837c478bd9Sstevel@tonic-gate */ 8847c478bd9Sstevel@tonic-gate /* 8857c478bd9Sstevel@tonic-gate * XXX - pollrelock() logic needs to know which 8867c478bd9Sstevel@tonic-gate * which pollcache lock to grab. It'd be a 8877c478bd9Sstevel@tonic-gate * cleaner solution if we could pass pcp as 8887c478bd9Sstevel@tonic-gate * an arguement in VOP_POLL interface instead 8897c478bd9Sstevel@tonic-gate * of implicitly passing it using thread_t 8907c478bd9Sstevel@tonic-gate * struct. On the other hand, changing VOP_POLL 8917c478bd9Sstevel@tonic-gate * interface will require all driver/file system 8927c478bd9Sstevel@tonic-gate * poll routine to change. May want to revisit 8937c478bd9Sstevel@tonic-gate * the tradeoff later. 8947c478bd9Sstevel@tonic-gate */ 8957c478bd9Sstevel@tonic-gate curthread->t_pollcache = pcp; 8967c478bd9Sstevel@tonic-gate error = VOP_POLL(fp->f_vnode, pfdp->events, 0, 897da6c28aaSamw &pfdp->revents, &php, NULL); 8987c478bd9Sstevel@tonic-gate curthread->t_pollcache = NULL; 8997c478bd9Sstevel@tonic-gate /* 900a85084caSmeem * We always set the bit when this fd is cached; 901a85084caSmeem * this forces the first DP_POLL to poll this fd. 9027c478bd9Sstevel@tonic-gate * Real performance gain comes from subsequent 903a85084caSmeem * DP_POLL. We also attempt a pollhead_insert(); 904a85084caSmeem * if it's not possible, we'll do it in dpioctl(). 9057c478bd9Sstevel@tonic-gate */ 9067c478bd9Sstevel@tonic-gate BT_SET(pcp->pc_bitmap, fd); 9077c478bd9Sstevel@tonic-gate if (error != 0) { 9087c478bd9Sstevel@tonic-gate releasef(fd); 9097c478bd9Sstevel@tonic-gate break; 9107c478bd9Sstevel@tonic-gate } 9117c478bd9Sstevel@tonic-gate pdp->pd_fp = fp; 9127c478bd9Sstevel@tonic-gate pdp->pd_events |= pfdp->events; 9137c478bd9Sstevel@tonic-gate if (php != NULL) { 9147c478bd9Sstevel@tonic-gate if (pdp->pd_php == NULL) { 9157c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 9167c478bd9Sstevel@tonic-gate pdp->pd_php = php; 9177c478bd9Sstevel@tonic-gate } else { 9187c478bd9Sstevel@tonic-gate if (pdp->pd_php != php) { 9197c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, 9207c478bd9Sstevel@tonic-gate pdp); 9217c478bd9Sstevel@tonic-gate pollhead_insert(php, pdp); 9227c478bd9Sstevel@tonic-gate pdp->pd_php = php; 9237c478bd9Sstevel@tonic-gate } 9247c478bd9Sstevel@tonic-gate } 9257c478bd9Sstevel@tonic-gate } 926f3bb54f3SPatrick Mooney fds_added = B_TRUE; 9277c478bd9Sstevel@tonic-gate releasef(fd); 9287c478bd9Sstevel@tonic-gate } else { 929a5eb7107SBryan Cantrill if (pdp == NULL || pdp->pd_fp == NULL) { 930f3bb54f3SPatrick Mooney if (is_epoll) { 931a5eb7107SBryan Cantrill /* 932a5eb7107SBryan Cantrill * As with the add case (above), epoll 933a5eb7107SBryan Cantrill * semantics demand that we error out 934a5eb7107SBryan Cantrill * in this case. 935a5eb7107SBryan Cantrill */ 936a5eb7107SBryan Cantrill error = ENOENT; 937a5eb7107SBryan Cantrill break; 938a5eb7107SBryan Cantrill } 939a5eb7107SBryan Cantrill 9407c478bd9Sstevel@tonic-gate continue; 9417c478bd9Sstevel@tonic-gate } 9427c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_fd == fd); 9437c478bd9Sstevel@tonic-gate pdp->pd_fp = NULL; 9447c478bd9Sstevel@tonic-gate pdp->pd_events = 0; 9457c478bd9Sstevel@tonic-gate ASSERT(pdp->pd_thread == NULL); 9467c478bd9Sstevel@tonic-gate if (pdp->pd_php != NULL) { 9477c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 9487c478bd9Sstevel@tonic-gate pdp->pd_php = NULL; 9497c478bd9Sstevel@tonic-gate } 9507c478bd9Sstevel@tonic-gate BT_CLEAR(pcp->pc_bitmap, fd); 9517c478bd9Sstevel@tonic-gate } 9527c478bd9Sstevel@tonic-gate } 953f3bb54f3SPatrick Mooney /* 954*bf75909aSPatrick Mooney * Wake any pollcache waiters so they can check the new descriptors. 955*bf75909aSPatrick Mooney * 956f3bb54f3SPatrick Mooney * Any fds added to an recursive-capable pollcache could themselves be 957f3bb54f3SPatrick Mooney * /dev/poll handles. To ensure that proper event propagation occurs, 958*bf75909aSPatrick Mooney * parent pollcaches are woken too, so that they can create any needed 959f3bb54f3SPatrick Mooney * pollcache links. 960f3bb54f3SPatrick Mooney */ 961f3bb54f3SPatrick Mooney if (fds_added) { 962*bf75909aSPatrick Mooney cv_broadcast(&pcp->pc_cv); 963f3bb54f3SPatrick Mooney pcache_wake_parents(pcp); 964f3bb54f3SPatrick Mooney } 965f3bb54f3SPatrick Mooney pollstate_exit(pcp); 9667c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 967f3bb54f3SPatrick Mooney bypass: 9687c478bd9Sstevel@tonic-gate dpep->dpe_flag &= ~DP_WRITER_PRESENT; 9697c478bd9Sstevel@tonic-gate dpep->dpe_refcnt--; 9707c478bd9Sstevel@tonic-gate cv_broadcast(&dpep->dpe_cv); 9717c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 9727c478bd9Sstevel@tonic-gate kmem_free(pollfdp, uiosize); 9737c478bd9Sstevel@tonic-gate return (error); 9747c478bd9Sstevel@tonic-gate } 9757c478bd9Sstevel@tonic-gate 976a5eb7107SBryan Cantrill #define DP_SIGMASK_RESTORE(ksetp) { \ 977a5eb7107SBryan Cantrill if (ksetp != NULL) { \ 978a5eb7107SBryan Cantrill mutex_enter(&p->p_lock); \ 979a5eb7107SBryan Cantrill if (lwp->lwp_cursig == 0) { \ 980a5eb7107SBryan Cantrill t->t_hold = lwp->lwp_sigoldmask; \ 981a5eb7107SBryan Cantrill t->t_flag &= ~T_TOMASK; \ 982a5eb7107SBryan Cantrill } \ 983a5eb7107SBryan Cantrill mutex_exit(&p->p_lock); \ 984a5eb7107SBryan Cantrill } \ 985a5eb7107SBryan Cantrill } 986a5eb7107SBryan Cantrill 9877c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 9887c478bd9Sstevel@tonic-gate static int 9897c478bd9Sstevel@tonic-gate dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 9907c478bd9Sstevel@tonic-gate { 9917c478bd9Sstevel@tonic-gate minor_t minor; 9927c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 9937c478bd9Sstevel@tonic-gate pollcache_t *pcp; 994cd1c8b85SMatthew Ahrens hrtime_t now; 9957c478bd9Sstevel@tonic-gate int error = 0; 996f3bb54f3SPatrick Mooney boolean_t is_epoll; 9977c478bd9Sstevel@tonic-gate STRUCT_DECL(dvpoll, dvpoll); 9987c478bd9Sstevel@tonic-gate 999a5eb7107SBryan Cantrill if (cmd == DP_POLL || cmd == DP_PPOLL) { 1000cd1c8b85SMatthew Ahrens /* do this now, before we sleep on DP_WRITER_PRESENT */ 1001cd1c8b85SMatthew Ahrens now = gethrtime(); 1002cd1c8b85SMatthew Ahrens } 1003cd1c8b85SMatthew Ahrens 10047c478bd9Sstevel@tonic-gate minor = getminor(dev); 10057c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 10067c478bd9Sstevel@tonic-gate ASSERT(minor < dptblsize); 10077c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 10087c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 10097c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 10107c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 10117c478bd9Sstevel@tonic-gate 10127c478bd9Sstevel@tonic-gate mutex_enter(&dpep->dpe_lock); 1013f3bb54f3SPatrick Mooney is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0; 1014a5eb7107SBryan Cantrill 1015a5eb7107SBryan Cantrill if (cmd == DP_EPOLLCOMPAT) { 1016a5eb7107SBryan Cantrill if (dpep->dpe_refcnt != 0) { 1017a5eb7107SBryan Cantrill /* 1018a5eb7107SBryan Cantrill * We can't turn on epoll compatibility while there 1019a5eb7107SBryan Cantrill * are outstanding operations. 1020a5eb7107SBryan Cantrill */ 1021a5eb7107SBryan Cantrill mutex_exit(&dpep->dpe_lock); 1022a5eb7107SBryan Cantrill return (EBUSY); 1023a5eb7107SBryan Cantrill } 1024a5eb7107SBryan Cantrill 1025a5eb7107SBryan Cantrill /* 1026a5eb7107SBryan Cantrill * epoll compatibility is a one-way street: there's no way 1027a5eb7107SBryan Cantrill * to turn it off for a particular open. 1028a5eb7107SBryan Cantrill */ 1029a5eb7107SBryan Cantrill dpep->dpe_flag |= DP_ISEPOLLCOMPAT; 1030a5eb7107SBryan Cantrill mutex_exit(&dpep->dpe_lock); 1031a5eb7107SBryan Cantrill 1032a5eb7107SBryan Cantrill return (0); 1033a5eb7107SBryan Cantrill } 1034a5eb7107SBryan Cantrill 1035f3bb54f3SPatrick Mooney if (!is_epoll && curproc->p_pid != pcp->pc_pid) { 1036a5eb7107SBryan Cantrill if (pcp->pc_pid != -1) { 1037a5eb7107SBryan Cantrill mutex_exit(&dpep->dpe_lock); 1038a5eb7107SBryan Cantrill return (EACCES); 1039a5eb7107SBryan Cantrill } 1040a5eb7107SBryan Cantrill 1041a5eb7107SBryan Cantrill pcp->pc_pid = curproc->p_pid; 1042a5eb7107SBryan Cantrill } 1043a5eb7107SBryan Cantrill 1044f3bb54f3SPatrick Mooney /* Wait until all writers have cleared the handle before continuing */ 1045f3bb54f3SPatrick Mooney while ((dpep->dpe_flag & DP_WRITER_PRESENT) != 0 || 10467c478bd9Sstevel@tonic-gate (dpep->dpe_writerwait != 0)) { 10477c478bd9Sstevel@tonic-gate if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 10487c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 10497c478bd9Sstevel@tonic-gate return (EINTR); 10507c478bd9Sstevel@tonic-gate } 10517c478bd9Sstevel@tonic-gate } 10527c478bd9Sstevel@tonic-gate dpep->dpe_refcnt++; 10537c478bd9Sstevel@tonic-gate mutex_exit(&dpep->dpe_lock); 10547c478bd9Sstevel@tonic-gate 10557c478bd9Sstevel@tonic-gate switch (cmd) { 10567c478bd9Sstevel@tonic-gate case DP_POLL: 1057a5eb7107SBryan Cantrill case DP_PPOLL: 10587c478bd9Sstevel@tonic-gate { 10597c478bd9Sstevel@tonic-gate pollstate_t *ps; 10607c478bd9Sstevel@tonic-gate nfds_t nfds; 10617c478bd9Sstevel@tonic-gate int fdcnt = 0; 1062a5eb7107SBryan Cantrill size_t size, fdsize, dpsize; 1063cd1c8b85SMatthew Ahrens hrtime_t deadline = 0; 1064a5eb7107SBryan Cantrill k_sigset_t *ksetp = NULL; 1065a5eb7107SBryan Cantrill k_sigset_t kset; 1066a5eb7107SBryan Cantrill sigset_t set; 1067a5eb7107SBryan Cantrill kthread_t *t = curthread; 1068a5eb7107SBryan Cantrill klwp_t *lwp = ttolwp(t); 1069a5eb7107SBryan Cantrill struct proc *p = ttoproc(curthread); 10707c478bd9Sstevel@tonic-gate 10717c478bd9Sstevel@tonic-gate STRUCT_INIT(dvpoll, mode); 1072a5eb7107SBryan Cantrill 1073a5eb7107SBryan Cantrill /* 1074a5eb7107SBryan Cantrill * The dp_setp member is only required/consumed for DP_PPOLL, 1075a5eb7107SBryan Cantrill * which otherwise uses the same structure as DP_POLL. 1076a5eb7107SBryan Cantrill */ 1077a5eb7107SBryan Cantrill if (cmd == DP_POLL) { 1078a5eb7107SBryan Cantrill dpsize = (uintptr_t)STRUCT_FADDR(dvpoll, dp_setp) - 1079a5eb7107SBryan Cantrill (uintptr_t)STRUCT_FADDR(dvpoll, dp_fds); 1080a5eb7107SBryan Cantrill } else { 1081a5eb7107SBryan Cantrill ASSERT(cmd == DP_PPOLL); 1082a5eb7107SBryan Cantrill dpsize = STRUCT_SIZE(dvpoll); 1083a5eb7107SBryan Cantrill } 1084a5eb7107SBryan Cantrill 1085a5eb7107SBryan Cantrill if ((mode & FKIOCTL) != 0) { 1086a5eb7107SBryan Cantrill /* Kernel-internal ioctl call */ 1087a5eb7107SBryan Cantrill bcopy((caddr_t)arg, STRUCT_BUF(dvpoll), dpsize); 1088a5eb7107SBryan Cantrill error = 0; 1089a5eb7107SBryan Cantrill } else { 10907c478bd9Sstevel@tonic-gate error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), 1091a5eb7107SBryan Cantrill dpsize); 1092a5eb7107SBryan Cantrill } 1093a5eb7107SBryan Cantrill 10947c478bd9Sstevel@tonic-gate if (error) { 10957c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 10967c478bd9Sstevel@tonic-gate return (EFAULT); 10977c478bd9Sstevel@tonic-gate } 10987c478bd9Sstevel@tonic-gate 1099cd1c8b85SMatthew Ahrens deadline = STRUCT_FGET(dvpoll, dp_timeout); 1100cd1c8b85SMatthew Ahrens if (deadline > 0) { 11017c478bd9Sstevel@tonic-gate /* 1102cd1c8b85SMatthew Ahrens * Convert the deadline from relative milliseconds 1103cd1c8b85SMatthew Ahrens * to absolute nanoseconds. They must wait for at 1104cd1c8b85SMatthew Ahrens * least a tick. 11057c478bd9Sstevel@tonic-gate */ 110619449258SJosef 'Jeff' Sipek deadline = MSEC2NSEC(deadline); 1107cd1c8b85SMatthew Ahrens deadline = MAX(deadline, nsec_per_tick); 1108cd1c8b85SMatthew Ahrens deadline += now; 11097c478bd9Sstevel@tonic-gate } 11107c478bd9Sstevel@tonic-gate 1111a5eb7107SBryan Cantrill if (cmd == DP_PPOLL) { 1112a5eb7107SBryan Cantrill void *setp = STRUCT_FGETP(dvpoll, dp_setp); 1113a5eb7107SBryan Cantrill 1114a5eb7107SBryan Cantrill if (setp != NULL) { 1115a5eb7107SBryan Cantrill if (copyin(setp, &set, sizeof (set))) { 1116a5eb7107SBryan Cantrill DP_REFRELE(dpep); 1117a5eb7107SBryan Cantrill return (EFAULT); 1118a5eb7107SBryan Cantrill } 1119a5eb7107SBryan Cantrill 1120a5eb7107SBryan Cantrill sigutok(&set, &kset); 1121a5eb7107SBryan Cantrill ksetp = &kset; 1122a5eb7107SBryan Cantrill 1123a5eb7107SBryan Cantrill mutex_enter(&p->p_lock); 1124a5eb7107SBryan Cantrill schedctl_finish_sigblock(t); 1125a5eb7107SBryan Cantrill lwp->lwp_sigoldmask = t->t_hold; 1126a5eb7107SBryan Cantrill t->t_hold = *ksetp; 1127a5eb7107SBryan Cantrill t->t_flag |= T_TOMASK; 1128a5eb7107SBryan Cantrill 1129a5eb7107SBryan Cantrill /* 1130a5eb7107SBryan Cantrill * Like ppoll() with a non-NULL sigset, we'll 1131a5eb7107SBryan Cantrill * call cv_reltimedwait_sig() just to check for 1132a5eb7107SBryan Cantrill * signals. This call will return immediately 1133a5eb7107SBryan Cantrill * with either 0 (signalled) or -1 (no signal). 1134a5eb7107SBryan Cantrill * There are some conditions whereby we can 1135a5eb7107SBryan Cantrill * get 0 from cv_reltimedwait_sig() without 1136a5eb7107SBryan Cantrill * a true signal (e.g., a directed stop), so 1137a5eb7107SBryan Cantrill * we restore our signal mask in the unlikely 1138a5eb7107SBryan Cantrill * event that lwp_cursig is 0. 1139a5eb7107SBryan Cantrill */ 1140a5eb7107SBryan Cantrill if (!cv_reltimedwait_sig(&t->t_delay_cv, 1141a5eb7107SBryan Cantrill &p->p_lock, 0, TR_CLOCK_TICK)) { 1142a5eb7107SBryan Cantrill if (lwp->lwp_cursig == 0) { 1143a5eb7107SBryan Cantrill t->t_hold = lwp->lwp_sigoldmask; 1144a5eb7107SBryan Cantrill t->t_flag &= ~T_TOMASK; 1145a5eb7107SBryan Cantrill } 1146a5eb7107SBryan Cantrill 1147a5eb7107SBryan Cantrill mutex_exit(&p->p_lock); 1148a5eb7107SBryan Cantrill 1149a5eb7107SBryan Cantrill DP_REFRELE(dpep); 1150a5eb7107SBryan Cantrill return (EINTR); 1151a5eb7107SBryan Cantrill } 1152a5eb7107SBryan Cantrill 1153a5eb7107SBryan Cantrill mutex_exit(&p->p_lock); 1154a5eb7107SBryan Cantrill } 1155a5eb7107SBryan Cantrill } 1156a5eb7107SBryan Cantrill 11577c478bd9Sstevel@tonic-gate if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { 11587c478bd9Sstevel@tonic-gate /* 11597c478bd9Sstevel@tonic-gate * We are just using DP_POLL to sleep, so 11607c478bd9Sstevel@tonic-gate * we don't any of the devpoll apparatus. 11617c478bd9Sstevel@tonic-gate * Do not check for signals if we have a zero timeout. 11627c478bd9Sstevel@tonic-gate */ 11637c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 1164a5eb7107SBryan Cantrill if (deadline == 0) { 1165a5eb7107SBryan Cantrill DP_SIGMASK_RESTORE(ksetp); 11667c478bd9Sstevel@tonic-gate return (0); 1167a5eb7107SBryan Cantrill } 1168a5eb7107SBryan Cantrill 11697c478bd9Sstevel@tonic-gate mutex_enter(&curthread->t_delay_lock); 1170cd1c8b85SMatthew Ahrens while ((error = 1171cd1c8b85SMatthew Ahrens cv_timedwait_sig_hrtime(&curthread->t_delay_cv, 1172cd1c8b85SMatthew Ahrens &curthread->t_delay_lock, deadline)) > 0) 11737c478bd9Sstevel@tonic-gate continue; 11747c478bd9Sstevel@tonic-gate mutex_exit(&curthread->t_delay_lock); 1175a5eb7107SBryan Cantrill 1176a5eb7107SBryan Cantrill DP_SIGMASK_RESTORE(ksetp); 1177a5eb7107SBryan Cantrill 1178cd1c8b85SMatthew Ahrens return (error == 0 ? EINTR : 0); 11797c478bd9Sstevel@tonic-gate } 11807c478bd9Sstevel@tonic-gate 1181f3bb54f3SPatrick Mooney if (is_epoll) { 1182a5eb7107SBryan Cantrill size = nfds * (fdsize = sizeof (epoll_event_t)); 1183a5eb7107SBryan Cantrill } else { 1184a5eb7107SBryan Cantrill size = nfds * (fdsize = sizeof (pollfd_t)); 1185a5eb7107SBryan Cantrill } 1186a5eb7107SBryan Cantrill 11877c478bd9Sstevel@tonic-gate /* 1188fe234e7cSMatt Amdur * XXX It would be nice not to have to alloc each time, but it 1189fe234e7cSMatt Amdur * requires another per thread structure hook. This can be 1190fe234e7cSMatt Amdur * implemented later if data suggests that it's necessary. 11917c478bd9Sstevel@tonic-gate */ 1192f3bb54f3SPatrick Mooney ps = pollstate_create(); 1193a5eb7107SBryan Cantrill 1194a5eb7107SBryan Cantrill if (ps->ps_dpbufsize < size) { 11957c478bd9Sstevel@tonic-gate /* 1196a5eb7107SBryan Cantrill * If nfds is larger than twice the current maximum 1197a5eb7107SBryan Cantrill * open file count, we'll silently clamp it. This 1198a5eb7107SBryan Cantrill * only limits our exposure to allocating an 1199a5eb7107SBryan Cantrill * inordinate amount of kernel memory; it doesn't 1200a5eb7107SBryan Cantrill * otherwise affect the semantics. (We have this 1201a5eb7107SBryan Cantrill * check at twice the maximum instead of merely the 1202a5eb7107SBryan Cantrill * maximum because some applications pass an nfds that 1203a5eb7107SBryan Cantrill * is only slightly larger than their limit.) 12047c478bd9Sstevel@tonic-gate */ 12057c478bd9Sstevel@tonic-gate mutex_enter(&p->p_lock); 1206a5eb7107SBryan Cantrill if ((nfds >> 1) > p->p_fno_ctl) { 1207a5eb7107SBryan Cantrill nfds = p->p_fno_ctl; 1208a5eb7107SBryan Cantrill size = nfds * fdsize; 12097c478bd9Sstevel@tonic-gate } 12107c478bd9Sstevel@tonic-gate mutex_exit(&p->p_lock); 1211a5eb7107SBryan Cantrill 1212a5eb7107SBryan Cantrill if (ps->ps_dpbufsize < size) { 1213a5eb7107SBryan Cantrill kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize); 1214a5eb7107SBryan Cantrill ps->ps_dpbuf = kmem_zalloc(size, KM_SLEEP); 1215a5eb7107SBryan Cantrill ps->ps_dpbufsize = size; 1216a5eb7107SBryan Cantrill } 12177c478bd9Sstevel@tonic-gate } 12187c478bd9Sstevel@tonic-gate 1219f3bb54f3SPatrick Mooney VERIFY(pollstate_enter(pcp) == PSE_SUCCESS); 12207c478bd9Sstevel@tonic-gate for (;;) { 1221a5eb7107SBryan Cantrill pcp->pc_flag &= ~PC_POLLWAKE; 1222a5eb7107SBryan Cantrill 1223f3bb54f3SPatrick Mooney /* 1224f3bb54f3SPatrick Mooney * Mark all child pcachelinks as stale. 1225f3bb54f3SPatrick Mooney * Those which are still part of the tree will be 1226f3bb54f3SPatrick Mooney * marked as valid during the poll. 1227f3bb54f3SPatrick Mooney */ 1228f3bb54f3SPatrick Mooney pcachelink_mark_stale(pcp); 1229f3bb54f3SPatrick Mooney 1230a5eb7107SBryan Cantrill error = dp_pcache_poll(dpep, ps->ps_dpbuf, 1231a5eb7107SBryan Cantrill pcp, nfds, &fdcnt); 12327c478bd9Sstevel@tonic-gate if (fdcnt > 0 || error != 0) 12337c478bd9Sstevel@tonic-gate break; 12347c478bd9Sstevel@tonic-gate 1235f3bb54f3SPatrick Mooney /* Purge still-stale child pcachelinks */ 1236f3bb54f3SPatrick Mooney pcachelink_purge_stale(pcp); 1237f3bb54f3SPatrick Mooney 12387c478bd9Sstevel@tonic-gate /* 12397c478bd9Sstevel@tonic-gate * A pollwake has happened since we polled cache. 12407c478bd9Sstevel@tonic-gate */ 1241a5eb7107SBryan Cantrill if (pcp->pc_flag & PC_POLLWAKE) 12427c478bd9Sstevel@tonic-gate continue; 12437c478bd9Sstevel@tonic-gate 12447c478bd9Sstevel@tonic-gate /* 1245da6c28aaSamw * Sleep until we are notified, signaled, or timed out. 12467c478bd9Sstevel@tonic-gate */ 1247cd1c8b85SMatthew Ahrens if (deadline == 0) { 1248cd1c8b85SMatthew Ahrens /* immediate timeout; do not check signals */ 12497c478bd9Sstevel@tonic-gate break; 1250cd1c8b85SMatthew Ahrens } 1251a5eb7107SBryan Cantrill 1252cd1c8b85SMatthew Ahrens error = cv_timedwait_sig_hrtime(&pcp->pc_cv, 1253cd1c8b85SMatthew Ahrens &pcp->pc_lock, deadline); 1254a5eb7107SBryan Cantrill 12557c478bd9Sstevel@tonic-gate /* 1256f3bb54f3SPatrick Mooney * If we were awakened by a signal or timeout then 1257f3bb54f3SPatrick Mooney * break the loop, else poll again. 12587c478bd9Sstevel@tonic-gate */ 1259cd1c8b85SMatthew Ahrens if (error <= 0) { 1260cd1c8b85SMatthew Ahrens error = (error == 0) ? EINTR : 0; 12617c478bd9Sstevel@tonic-gate break; 1262cd1c8b85SMatthew Ahrens } else { 1263cd1c8b85SMatthew Ahrens error = 0; 12647c478bd9Sstevel@tonic-gate } 12657c478bd9Sstevel@tonic-gate } 1266f3bb54f3SPatrick Mooney pollstate_exit(pcp); 12677c478bd9Sstevel@tonic-gate 1268a5eb7107SBryan Cantrill DP_SIGMASK_RESTORE(ksetp); 1269a5eb7107SBryan Cantrill 12707c478bd9Sstevel@tonic-gate if (error == 0 && fdcnt > 0) { 1271a5eb7107SBryan Cantrill if (copyout(ps->ps_dpbuf, 1272a5eb7107SBryan Cantrill STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) { 12737c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 12747c478bd9Sstevel@tonic-gate return (EFAULT); 12757c478bd9Sstevel@tonic-gate } 12767c478bd9Sstevel@tonic-gate *rvalp = fdcnt; 12777c478bd9Sstevel@tonic-gate } 12787c478bd9Sstevel@tonic-gate break; 12797c478bd9Sstevel@tonic-gate } 12807c478bd9Sstevel@tonic-gate 12817c478bd9Sstevel@tonic-gate case DP_ISPOLLED: 12827c478bd9Sstevel@tonic-gate { 12837c478bd9Sstevel@tonic-gate pollfd_t pollfd; 12847c478bd9Sstevel@tonic-gate polldat_t *pdp; 12857c478bd9Sstevel@tonic-gate 12867c478bd9Sstevel@tonic-gate STRUCT_INIT(dvpoll, mode); 12877c478bd9Sstevel@tonic-gate error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t)); 12887c478bd9Sstevel@tonic-gate if (error) { 12897c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 12907c478bd9Sstevel@tonic-gate return (EFAULT); 12917c478bd9Sstevel@tonic-gate } 12927c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_lock); 12937c478bd9Sstevel@tonic-gate if (pcp->pc_hash == NULL) { 12947c478bd9Sstevel@tonic-gate /* 12957c478bd9Sstevel@tonic-gate * No Need to search because no poll fd 12967c478bd9Sstevel@tonic-gate * has been cached. 12977c478bd9Sstevel@tonic-gate */ 12987c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 12997c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 13007c478bd9Sstevel@tonic-gate return (0); 13017c478bd9Sstevel@tonic-gate } 13027c478bd9Sstevel@tonic-gate if (pollfd.fd < 0) { 13037c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 13047c478bd9Sstevel@tonic-gate break; 13057c478bd9Sstevel@tonic-gate } 13067c478bd9Sstevel@tonic-gate pdp = pcache_lookup_fd(pcp, pollfd.fd); 13077c478bd9Sstevel@tonic-gate if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) && 13087c478bd9Sstevel@tonic-gate (pdp->pd_fp != NULL)) { 13097c478bd9Sstevel@tonic-gate pollfd.revents = pdp->pd_events; 13107c478bd9Sstevel@tonic-gate if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) { 13117c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 13127c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 13137c478bd9Sstevel@tonic-gate return (EFAULT); 13147c478bd9Sstevel@tonic-gate } 13157c478bd9Sstevel@tonic-gate *rvalp = 1; 13167c478bd9Sstevel@tonic-gate } 13177c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_lock); 13187c478bd9Sstevel@tonic-gate break; 13197c478bd9Sstevel@tonic-gate } 13207c478bd9Sstevel@tonic-gate 13217c478bd9Sstevel@tonic-gate default: 13227c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 13237c478bd9Sstevel@tonic-gate return (EINVAL); 13247c478bd9Sstevel@tonic-gate } 13257c478bd9Sstevel@tonic-gate DP_REFRELE(dpep); 13267c478bd9Sstevel@tonic-gate return (error); 13277c478bd9Sstevel@tonic-gate } 13287c478bd9Sstevel@tonic-gate 1329f3bb54f3SPatrick Mooney /* 1330f3bb54f3SPatrick Mooney * Overview of Recursive Polling 1331f3bb54f3SPatrick Mooney * 1332f3bb54f3SPatrick Mooney * It is possible for /dev/poll to poll for events on file descriptors which 1333f3bb54f3SPatrick Mooney * themselves are /dev/poll handles. Pending events in the child handle are 1334f3bb54f3SPatrick Mooney * represented as readable data via the POLLIN flag. To limit surface area, 1335f3bb54f3SPatrick Mooney * this recursion is presently allowed on only /dev/poll handles which have 1336f3bb54f3SPatrick Mooney * been placed in epoll mode via the DP_EPOLLCOMPAT ioctl. Recursion depth is 1337f3bb54f3SPatrick Mooney * limited to 5 in order to be consistent with Linux epoll. 1338f3bb54f3SPatrick Mooney * 1339f3bb54f3SPatrick Mooney * Extending dppoll() for VOP_POLL: 1340f3bb54f3SPatrick Mooney * 1341f3bb54f3SPatrick Mooney * The recursive /dev/poll implementation begins by extending dppoll() to 1342f3bb54f3SPatrick Mooney * report when resources contained in the pollcache have relevant event state. 1343f3bb54f3SPatrick Mooney * At the highest level, it means calling dp_pcache_poll() so it indicates if 1344f3bb54f3SPatrick Mooney * fd events are present without consuming them or altering the pollcache 1345f3bb54f3SPatrick Mooney * bitmap. This ensures that a subsequent DP_POLL operation on the bitmap will 1346f3bb54f3SPatrick Mooney * yield the initiating event. Additionally, the VOP_POLL should return in 1347f3bb54f3SPatrick Mooney * such a way that dp_pcache_poll() does not clear the parent bitmap entry 1348f3bb54f3SPatrick Mooney * which corresponds to the child /dev/poll fd. This means that child 1349f3bb54f3SPatrick Mooney * pollcaches will be checked during every poll which facilitates wake-up 1350f3bb54f3SPatrick Mooney * behavior detailed below. 1351f3bb54f3SPatrick Mooney * 1352f3bb54f3SPatrick Mooney * Pollcache Links and Wake Events: 1353f3bb54f3SPatrick Mooney * 1354f3bb54f3SPatrick Mooney * Recursive /dev/poll avoids complicated pollcache locking constraints during 1355f3bb54f3SPatrick Mooney * pollwakeup events by eschewing the traditional pollhead mechanism in favor 1356f3bb54f3SPatrick Mooney * of a different approach. For each pollcache at the root of a recursive 1357f3bb54f3SPatrick Mooney * /dev/poll "tree", pcachelink_t structures are established to all child 1358f3bb54f3SPatrick Mooney * /dev/poll pollcaches. During pollnotify() in a child pollcache, the 1359f3bb54f3SPatrick Mooney * linked list of pcachelink_t entries is walked, where those marked as valid 1360f3bb54f3SPatrick Mooney * incur a cv_broadcast to their parent pollcache. Most notably, these 1361f3bb54f3SPatrick Mooney * pcachelink_t cv wakeups are performed without acquiring pc_lock on the 1362f3bb54f3SPatrick Mooney * parent pollcache (which would require careful deadlock avoidance). This 1363f3bb54f3SPatrick Mooney * still allows the woken poll on the parent to discover the pertinent events 1364f3bb54f3SPatrick Mooney * due to the fact that bitmap entires for the child pollcache are always 1365f3bb54f3SPatrick Mooney * maintained by the dppoll() logic above. 1366f3bb54f3SPatrick Mooney * 1367f3bb54f3SPatrick Mooney * Depth Limiting and Loop Prevention: 1368f3bb54f3SPatrick Mooney * 1369f3bb54f3SPatrick Mooney * As each pollcache is encountered (either via DP_POLL or dppoll()), depth and 1370f3bb54f3SPatrick Mooney * loop constraints are enforced via pollstate_enter(). The pollcache_t 1371f3bb54f3SPatrick Mooney * pointer is compared against any existing entries in ps_pc_stack and is added 1372f3bb54f3SPatrick Mooney * to the end if no match (and therefore loop) is found. Once poll operations 1373f3bb54f3SPatrick Mooney * for a given pollcache_t are complete, pollstate_exit() clears the pointer 1374f3bb54f3SPatrick Mooney * from the list. The pollstate_enter() and pollstate_exit() functions are 1375f3bb54f3SPatrick Mooney * responsible for acquiring and releasing pc_lock, respectively. 1376f3bb54f3SPatrick Mooney * 1377f3bb54f3SPatrick Mooney * Deadlock Safety: 1378f3bb54f3SPatrick Mooney * 1379f3bb54f3SPatrick Mooney * Descending through a tree of recursive /dev/poll handles involves the tricky 1380f3bb54f3SPatrick Mooney * business of sequentially entering multiple pollcache locks. This tree 1381f3bb54f3SPatrick Mooney * topology cannot define a lock acquisition order in such a way that it is 1382f3bb54f3SPatrick Mooney * immune to deadlocks between threads. The pollstate_enter() and 1383f3bb54f3SPatrick Mooney * pollstate_exit() functions provide an interface for recursive /dev/poll 1384f3bb54f3SPatrick Mooney * operations to safely lock pollcaches while failing gracefully in the face of 1385f3bb54f3SPatrick Mooney * deadlocking topologies. (See pollstate_contend() for more detail about how 1386f3bb54f3SPatrick Mooney * deadlocks are detected and resolved.) 1387f3bb54f3SPatrick Mooney */ 1388f3bb54f3SPatrick Mooney 13897c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 13907c478bd9Sstevel@tonic-gate static int 13917c478bd9Sstevel@tonic-gate dppoll(dev_t dev, short events, int anyyet, short *reventsp, 13927c478bd9Sstevel@tonic-gate struct pollhead **phpp) 13937c478bd9Sstevel@tonic-gate { 1394a5eb7107SBryan Cantrill minor_t minor; 1395a5eb7107SBryan Cantrill dp_entry_t *dpep; 1396f3bb54f3SPatrick Mooney pollcache_t *pcp; 1397f3bb54f3SPatrick Mooney int res, rc = 0; 1398a5eb7107SBryan Cantrill 1399a5eb7107SBryan Cantrill minor = getminor(dev); 1400a5eb7107SBryan Cantrill mutex_enter(&devpoll_lock); 1401f3bb54f3SPatrick Mooney ASSERT(minor < dptblsize); 1402a5eb7107SBryan Cantrill dpep = devpolltbl[minor]; 1403a5eb7107SBryan Cantrill ASSERT(dpep != NULL); 1404a5eb7107SBryan Cantrill mutex_exit(&devpoll_lock); 1405a5eb7107SBryan Cantrill 1406f3bb54f3SPatrick Mooney mutex_enter(&dpep->dpe_lock); 1407f3bb54f3SPatrick Mooney if ((dpep->dpe_flag & DP_ISEPOLLCOMPAT) == 0) { 1408f3bb54f3SPatrick Mooney /* Poll recursion is not yet supported for non-epoll handles */ 14097c478bd9Sstevel@tonic-gate *reventsp = POLLERR; 1410f3bb54f3SPatrick Mooney mutex_exit(&dpep->dpe_lock); 14117c478bd9Sstevel@tonic-gate return (0); 1412f3bb54f3SPatrick Mooney } else { 1413f3bb54f3SPatrick Mooney dpep->dpe_refcnt++; 1414f3bb54f3SPatrick Mooney pcp = dpep->dpe_pcache; 1415f3bb54f3SPatrick Mooney mutex_exit(&dpep->dpe_lock); 1416f3bb54f3SPatrick Mooney } 1417f3bb54f3SPatrick Mooney 1418f3bb54f3SPatrick Mooney res = pollstate_enter(pcp); 1419f3bb54f3SPatrick Mooney if (res == PSE_SUCCESS) { 1420f3bb54f3SPatrick Mooney nfds_t nfds = 1; 1421f3bb54f3SPatrick Mooney int fdcnt = 0; 1422f3bb54f3SPatrick Mooney pollstate_t *ps = curthread->t_pollstate; 1423f3bb54f3SPatrick Mooney 1424f3bb54f3SPatrick Mooney rc = dp_pcache_poll(dpep, NULL, pcp, nfds, &fdcnt); 1425f3bb54f3SPatrick Mooney if (rc == 0) { 1426f3bb54f3SPatrick Mooney *reventsp = (fdcnt > 0) ? POLLIN : 0; 1427f3bb54f3SPatrick Mooney } 1428f3bb54f3SPatrick Mooney pcachelink_assoc(pcp, ps->ps_pc_stack[0]); 1429f3bb54f3SPatrick Mooney pollstate_exit(pcp); 1430f3bb54f3SPatrick Mooney } else { 1431f3bb54f3SPatrick Mooney switch (res) { 1432f3bb54f3SPatrick Mooney case PSE_FAIL_DEPTH: 1433f3bb54f3SPatrick Mooney rc = EINVAL; 1434f3bb54f3SPatrick Mooney break; 1435f3bb54f3SPatrick Mooney case PSE_FAIL_LOOP: 1436f3bb54f3SPatrick Mooney case PSE_FAIL_DEADLOCK: 1437f3bb54f3SPatrick Mooney rc = ELOOP; 1438f3bb54f3SPatrick Mooney break; 1439f3bb54f3SPatrick Mooney default: 1440f3bb54f3SPatrick Mooney /* 1441f3bb54f3SPatrick Mooney * If anything else has gone awry, such as being polled 1442f3bb54f3SPatrick Mooney * from an unexpected context, fall back to the 1443f3bb54f3SPatrick Mooney * recursion-intolerant response. 1444f3bb54f3SPatrick Mooney */ 1445f3bb54f3SPatrick Mooney *reventsp = POLLERR; 1446f3bb54f3SPatrick Mooney rc = 0; 1447f3bb54f3SPatrick Mooney break; 1448f3bb54f3SPatrick Mooney } 1449f3bb54f3SPatrick Mooney } 1450f3bb54f3SPatrick Mooney 1451f3bb54f3SPatrick Mooney DP_REFRELE(dpep); 1452f3bb54f3SPatrick Mooney return (rc); 14537c478bd9Sstevel@tonic-gate } 14547c478bd9Sstevel@tonic-gate 14557c478bd9Sstevel@tonic-gate /* 14567c478bd9Sstevel@tonic-gate * devpoll close should do enough clean up before the pollcache is deleted, 14577c478bd9Sstevel@tonic-gate * i.e., it should ensure no one still references the pollcache later. 14587c478bd9Sstevel@tonic-gate * There is no "permission" check in here. Any process having the last 14597c478bd9Sstevel@tonic-gate * reference of this /dev/poll fd can close. 14607c478bd9Sstevel@tonic-gate */ 14617c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 14627c478bd9Sstevel@tonic-gate static int 14637c478bd9Sstevel@tonic-gate dpclose(dev_t dev, int flag, int otyp, cred_t *credp) 14647c478bd9Sstevel@tonic-gate { 14657c478bd9Sstevel@tonic-gate minor_t minor; 14667c478bd9Sstevel@tonic-gate dp_entry_t *dpep; 14677c478bd9Sstevel@tonic-gate pollcache_t *pcp; 14687c478bd9Sstevel@tonic-gate int i; 14697c478bd9Sstevel@tonic-gate polldat_t **hashtbl; 14707c478bd9Sstevel@tonic-gate polldat_t *pdp; 14717c478bd9Sstevel@tonic-gate 14727c478bd9Sstevel@tonic-gate minor = getminor(dev); 14737c478bd9Sstevel@tonic-gate 14747c478bd9Sstevel@tonic-gate mutex_enter(&devpoll_lock); 14757c478bd9Sstevel@tonic-gate dpep = devpolltbl[minor]; 14767c478bd9Sstevel@tonic-gate ASSERT(dpep != NULL); 14777c478bd9Sstevel@tonic-gate devpolltbl[minor] = NULL; 14787c478bd9Sstevel@tonic-gate mutex_exit(&devpoll_lock); 14797c478bd9Sstevel@tonic-gate pcp = dpep->dpe_pcache; 14807c478bd9Sstevel@tonic-gate ASSERT(pcp != NULL); 14817c478bd9Sstevel@tonic-gate /* 14827c478bd9Sstevel@tonic-gate * At this point, no other lwp can access this pollcache via the 14837c478bd9Sstevel@tonic-gate * /dev/poll fd. This pollcache is going away, so do the clean 14847c478bd9Sstevel@tonic-gate * up without the pc_lock. 14857c478bd9Sstevel@tonic-gate */ 14867c478bd9Sstevel@tonic-gate hashtbl = pcp->pc_hash; 14877c478bd9Sstevel@tonic-gate for (i = 0; i < pcp->pc_hashsize; i++) { 14887c478bd9Sstevel@tonic-gate for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 14897c478bd9Sstevel@tonic-gate if (pdp->pd_php != NULL) { 14907c478bd9Sstevel@tonic-gate pollhead_delete(pdp->pd_php, pdp); 14917c478bd9Sstevel@tonic-gate pdp->pd_php = NULL; 14927c478bd9Sstevel@tonic-gate pdp->pd_fp = NULL; 14937c478bd9Sstevel@tonic-gate } 14947c478bd9Sstevel@tonic-gate } 14957c478bd9Sstevel@tonic-gate } 14967c478bd9Sstevel@tonic-gate /* 14977c478bd9Sstevel@tonic-gate * pollwakeup() may still interact with this pollcache. Wait until 14987c478bd9Sstevel@tonic-gate * it is done. 14997c478bd9Sstevel@tonic-gate */ 15007c478bd9Sstevel@tonic-gate mutex_enter(&pcp->pc_no_exit); 15017c478bd9Sstevel@tonic-gate ASSERT(pcp->pc_busy >= 0); 15027c478bd9Sstevel@tonic-gate while (pcp->pc_busy > 0) 15037c478bd9Sstevel@tonic-gate cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 15047c478bd9Sstevel@tonic-gate mutex_exit(&pcp->pc_no_exit); 1505f3bb54f3SPatrick Mooney 1506f3bb54f3SPatrick Mooney /* Clean up any pollcache links created via recursive /dev/poll */ 1507f3bb54f3SPatrick Mooney if (pcp->pc_parents != NULL || pcp->pc_children != NULL) { 1508f3bb54f3SPatrick Mooney /* 1509f3bb54f3SPatrick Mooney * Because of the locking rules for pcachelink manipulation, 1510f3bb54f3SPatrick Mooney * acquring pc_lock is required for this step. 1511f3bb54f3SPatrick Mooney */ 1512f3bb54f3SPatrick Mooney mutex_enter(&pcp->pc_lock); 1513f3bb54f3SPatrick Mooney pcachelink_purge_all(pcp); 1514f3bb54f3SPatrick Mooney mutex_exit(&pcp->pc_lock); 1515f3bb54f3SPatrick Mooney } 1516f3bb54f3SPatrick Mooney 15177c478bd9Sstevel@tonic-gate pcache_destroy(pcp); 15187c478bd9Sstevel@tonic-gate ASSERT(dpep->dpe_refcnt == 0); 15197c478bd9Sstevel@tonic-gate kmem_free(dpep, sizeof (dp_entry_t)); 15207c478bd9Sstevel@tonic-gate return (0); 15217c478bd9Sstevel@tonic-gate } 1522f3bb54f3SPatrick Mooney 1523f3bb54f3SPatrick Mooney static void 1524f3bb54f3SPatrick Mooney pcachelink_locked_rele(pcachelink_t *pl) 1525f3bb54f3SPatrick Mooney { 1526f3bb54f3SPatrick Mooney ASSERT(MUTEX_HELD(&pl->pcl_lock)); 1527f3bb54f3SPatrick Mooney VERIFY(pl->pcl_refcnt >= 1); 1528f3bb54f3SPatrick Mooney 1529f3bb54f3SPatrick Mooney pl->pcl_refcnt--; 1530f3bb54f3SPatrick Mooney if (pl->pcl_refcnt == 0) { 1531f3bb54f3SPatrick Mooney VERIFY(pl->pcl_state == PCL_INVALID); 1532f3bb54f3SPatrick Mooney ASSERT(pl->pcl_parent_pc == NULL); 1533f3bb54f3SPatrick Mooney ASSERT(pl->pcl_child_pc == NULL); 1534f3bb54f3SPatrick Mooney ASSERT(pl->pcl_parent_next == NULL); 1535f3bb54f3SPatrick Mooney ASSERT(pl->pcl_child_next == NULL); 1536f3bb54f3SPatrick Mooney 1537f3bb54f3SPatrick Mooney pl->pcl_state = PCL_FREE; 1538f3bb54f3SPatrick Mooney mutex_destroy(&pl->pcl_lock); 1539f3bb54f3SPatrick Mooney kmem_free(pl, sizeof (pcachelink_t)); 1540f3bb54f3SPatrick Mooney } else { 1541f3bb54f3SPatrick Mooney mutex_exit(&pl->pcl_lock); 1542f3bb54f3SPatrick Mooney } 1543f3bb54f3SPatrick Mooney } 1544f3bb54f3SPatrick Mooney 1545f3bb54f3SPatrick Mooney /* 1546f3bb54f3SPatrick Mooney * Associate parent and child pollcaches via a pcachelink_t. If an existing 1547f3bb54f3SPatrick Mooney * link (stale or valid) between the two is found, it will be reused. If a 1548f3bb54f3SPatrick Mooney * suitable link is not found for reuse, a new one will be allocated. 1549f3bb54f3SPatrick Mooney */ 1550f3bb54f3SPatrick Mooney static void 1551f3bb54f3SPatrick Mooney pcachelink_assoc(pollcache_t *child, pollcache_t *parent) 1552f3bb54f3SPatrick Mooney { 1553f3bb54f3SPatrick Mooney pcachelink_t *pl, **plpn; 1554f3bb54f3SPatrick Mooney 1555f3bb54f3SPatrick Mooney ASSERT(MUTEX_HELD(&child->pc_lock)); 1556f3bb54f3SPatrick Mooney ASSERT(MUTEX_HELD(&parent->pc_lock)); 1557f3bb54f3SPatrick Mooney 1558f3bb54f3SPatrick Mooney /* Search for an existing link we can reuse. */ 1559f3bb54f3SPatrick Mooney plpn = &child->pc_parents; 1560f3bb54f3SPatrick Mooney for (pl = child->pc_parents; pl != NULL; pl = *plpn) { 1561f3bb54f3SPatrick Mooney mutex_enter(&pl->pcl_lock); 1562f3bb54f3SPatrick Mooney if (pl->pcl_state == PCL_INVALID) { 1563f3bb54f3SPatrick Mooney /* Clean any invalid links while walking the list */ 1564f3bb54f3SPatrick Mooney *plpn = pl->pcl_parent_next; 1565f3bb54f3SPatrick Mooney pl->pcl_child_pc = NULL; 1566f3bb54f3SPatrick Mooney pl->pcl_parent_next = NULL; 1567f3bb54f3SPatrick Mooney pcachelink_locked_rele(pl); 1568f3bb54f3SPatrick Mooney } else if (pl->pcl_parent_pc == parent) { 1569f3bb54f3SPatrick Mooney /* Successfully found parent link */ 1570f3bb54f3SPatrick Mooney ASSERT(pl->pcl_state == PCL_VALID || 1571f3bb54f3SPatrick Mooney pl->pcl_state == PCL_STALE); 1572f3bb54f3SPatrick Mooney pl->pcl_state = PCL_VALID; 1573f3bb54f3SPatrick Mooney mutex_exit(&pl->pcl_lock); 1574f3bb54f3SPatrick Mooney return; 1575f3bb54f3SPatrick Mooney } else { 1576f3bb54f3SPatrick Mooney plpn = &pl->pcl_parent_next; 1577f3bb54f3SPatrick Mooney mutex_exit(&pl->pcl_lock); 1578f3bb54f3SPatrick Mooney } 1579f3bb54f3SPatrick Mooney } 1580f3bb54f3SPatrick Mooney 1581f3bb54f3SPatrick Mooney /* No existing link to the parent was found. Create a fresh one. */ 1582f3bb54f3SPatrick Mooney pl = kmem_zalloc(sizeof (pcachelink_t), KM_SLEEP); 1583f3bb54f3SPatrick Mooney mutex_init(&pl->pcl_lock, NULL, MUTEX_DEFAULT, NULL); 1584f3bb54f3SPatrick Mooney 1585f3bb54f3SPatrick Mooney pl->pcl_parent_pc = parent; 1586f3bb54f3SPatrick Mooney pl->pcl_child_next = parent->pc_children; 1587f3bb54f3SPatrick Mooney parent->pc_children = pl; 1588f3bb54f3SPatrick Mooney pl->pcl_refcnt++; 1589f3bb54f3SPatrick Mooney 1590f3bb54f3SPatrick Mooney pl->pcl_child_pc = child; 1591f3bb54f3SPatrick Mooney pl->pcl_parent_next = child->pc_parents; 1592f3bb54f3SPatrick Mooney child->pc_parents = pl; 1593f3bb54f3SPatrick Mooney pl->pcl_refcnt++; 1594f3bb54f3SPatrick Mooney 1595f3bb54f3SPatrick Mooney pl->pcl_state = PCL_VALID; 1596f3bb54f3SPatrick Mooney } 1597f3bb54f3SPatrick Mooney 1598f3bb54f3SPatrick Mooney /* 1599f3bb54f3SPatrick Mooney * Mark all child links in a pollcache as stale. Any invalid child links found 1600f3bb54f3SPatrick Mooney * during iteration are purged. 1601f3bb54f3SPatrick Mooney */ 1602f3bb54f3SPatrick Mooney static void 1603f3bb54f3SPatrick Mooney pcachelink_mark_stale(pollcache_t *pcp) 1604f3bb54f3SPatrick Mooney { 1605f3bb54f3SPatrick Mooney pcachelink_t *pl, **plpn; 1606f3bb54f3SPatrick Mooney 1607f3bb54f3SPatrick Mooney ASSERT(MUTEX_HELD(&pcp->pc_lock)); 1608f3bb54f3SPatrick Mooney 1609f3bb54f3SPatrick Mooney plpn = &pcp->pc_children; 1610f3bb54f3SPatrick Mooney for (pl = pcp->pc_children; pl != NULL; pl = *plpn) { 1611f3bb54f3SPatrick Mooney mutex_enter(&pl->pcl_lock); 1612f3bb54f3SPatrick Mooney if (pl->pcl_state == PCL_INVALID) { 1613f3bb54f3SPatrick Mooney /* 1614f3bb54f3SPatrick Mooney * Remove any invalid links while we are going to the 1615f3bb54f3SPatrick Mooney * trouble of walking the list. 1616f3bb54f3SPatrick Mooney */ 1617f3bb54f3SPatrick Mooney *plpn = pl->pcl_child_next; 1618f3bb54f3SPatrick Mooney pl->pcl_parent_pc = NULL; 1619f3bb54f3SPatrick Mooney pl->pcl_child_next = NULL; 1620f3bb54f3SPatrick Mooney pcachelink_locked_rele(pl); 1621f3bb54f3SPatrick Mooney } else { 1622f3bb54f3SPatrick Mooney pl->pcl_state = PCL_STALE; 1623f3bb54f3SPatrick Mooney plpn = &pl->pcl_child_next; 1624f3bb54f3SPatrick Mooney mutex_exit(&pl->pcl_lock); 1625f3bb54f3SPatrick Mooney } 1626f3bb54f3SPatrick Mooney } 1627f3bb54f3SPatrick Mooney } 1628f3bb54f3SPatrick Mooney 1629f3bb54f3SPatrick Mooney /* 1630f3bb54f3SPatrick Mooney * Purge all stale (or invalid) child links from a pollcache. 1631f3bb54f3SPatrick Mooney */ 1632f3bb54f3SPatrick Mooney static void 1633f3bb54f3SPatrick Mooney pcachelink_purge_stale(pollcache_t *pcp) 1634f3bb54f3SPatrick Mooney { 1635f3bb54f3SPatrick Mooney pcachelink_t *pl, **plpn; 1636f3bb54f3SPatrick Mooney 1637f3bb54f3SPatrick Mooney ASSERT(MUTEX_HELD(&pcp->pc_lock)); 1638f3bb54f3SPatrick Mooney 1639f3bb54f3SPatrick Mooney plpn = &pcp->pc_children; 1640f3bb54f3SPatrick Mooney for (pl = pcp->pc_children; pl != NULL; pl = *plpn) { 1641f3bb54f3SPatrick Mooney mutex_enter(&pl->pcl_lock); 1642f3bb54f3SPatrick Mooney switch (pl->pcl_state) { 1643f3bb54f3SPatrick Mooney case PCL_STALE: 1644f3bb54f3SPatrick Mooney pl->pcl_state = PCL_INVALID; 1645f3bb54f3SPatrick Mooney /* FALLTHROUGH */ 1646f3bb54f3SPatrick Mooney case PCL_INVALID: 1647f3bb54f3SPatrick Mooney *plpn = pl->pcl_child_next; 1648f3bb54f3SPatrick Mooney pl->pcl_parent_pc = NULL; 1649f3bb54f3SPatrick Mooney pl->pcl_child_next = NULL; 1650f3bb54f3SPatrick Mooney pcachelink_locked_rele(pl); 1651f3bb54f3SPatrick Mooney break; 1652f3bb54f3SPatrick Mooney default: 1653f3bb54f3SPatrick Mooney plpn = &pl->pcl_child_next; 1654f3bb54f3SPatrick Mooney mutex_exit(&pl->pcl_lock); 1655f3bb54f3SPatrick Mooney } 1656f3bb54f3SPatrick Mooney } 1657f3bb54f3SPatrick Mooney } 1658f3bb54f3SPatrick Mooney 1659f3bb54f3SPatrick Mooney /* 1660f3bb54f3SPatrick Mooney * Purge all child and parent links from a pollcache, regardless of status. 1661f3bb54f3SPatrick Mooney */ 1662f3bb54f3SPatrick Mooney static void 1663f3bb54f3SPatrick Mooney pcachelink_purge_all(pollcache_t *pcp) 1664f3bb54f3SPatrick Mooney { 1665f3bb54f3SPatrick Mooney pcachelink_t *pl, **plpn; 1666f3bb54f3SPatrick Mooney 1667f3bb54f3SPatrick Mooney ASSERT(MUTEX_HELD(&pcp->pc_lock)); 1668f3bb54f3SPatrick Mooney 1669f3bb54f3SPatrick Mooney plpn = &pcp->pc_parents; 1670f3bb54f3SPatrick Mooney for (pl = pcp->pc_parents; pl != NULL; pl = *plpn) { 1671f3bb54f3SPatrick Mooney mutex_enter(&pl->pcl_lock); 1672f3bb54f3SPatrick Mooney pl->pcl_state = PCL_INVALID; 1673f3bb54f3SPatrick Mooney *plpn = pl->pcl_parent_next; 1674f3bb54f3SPatrick Mooney pl->pcl_child_pc = NULL; 1675f3bb54f3SPatrick Mooney pl->pcl_parent_next = NULL; 1676f3bb54f3SPatrick Mooney pcachelink_locked_rele(pl); 1677f3bb54f3SPatrick Mooney } 1678f3bb54f3SPatrick Mooney 1679f3bb54f3SPatrick Mooney plpn = &pcp->pc_children; 1680f3bb54f3SPatrick Mooney for (pl = pcp->pc_children; pl != NULL; pl = *plpn) { 1681f3bb54f3SPatrick Mooney mutex_enter(&pl->pcl_lock); 1682f3bb54f3SPatrick Mooney pl->pcl_state = PCL_INVALID; 1683f3bb54f3SPatrick Mooney *plpn = pl->pcl_child_next; 1684f3bb54f3SPatrick Mooney pl->pcl_parent_pc = NULL; 1685f3bb54f3SPatrick Mooney pl->pcl_child_next = NULL; 1686f3bb54f3SPatrick Mooney pcachelink_locked_rele(pl); 1687f3bb54f3SPatrick Mooney } 1688f3bb54f3SPatrick Mooney 1689f3bb54f3SPatrick Mooney ASSERT(pcp->pc_parents == NULL); 1690f3bb54f3SPatrick Mooney ASSERT(pcp->pc_children == NULL); 1691f3bb54f3SPatrick Mooney } 1692