1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Portions of this source code were derived from Berkeley 4.3 BSD 32 * under license from the Regents of the University of California. 33 */ 34 35 #pragma ident "%Z%%M% %I% %E% SMI" 36 37 #include <sys/param.h> 38 #include <sys/isa_defs.h> 39 #include <sys/types.h> 40 #include <sys/sysmacros.h> 41 #include <sys/user.h> 42 #include <sys/systm.h> 43 #include <sys/errno.h> 44 #include <sys/time.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/mode.h> 48 #include <sys/proc.h> 49 #include <sys/uio.h> 50 #include <sys/poll_impl.h> 51 #include <sys/kmem.h> 52 #include <sys/cmn_err.h> 53 #include <sys/debug.h> 54 #include <sys/bitmap.h> 55 #include <sys/kstat.h> 56 #include <sys/rctl.h> 57 #include <sys/port_impl.h> 58 #include <sys/schedctl.h> 59 60 #define NPHLOCKS 64 /* Number of locks; must be power of 2 */ 61 #define PHLOCKADDR(php) &plocks[(((uintptr_t)(php)) >> 8) & (NPHLOCKS - 1)] 62 #define PHLOCK(php) PHLOCKADDR(php).pp_lock 63 #define PH_ENTER(php) mutex_enter(PHLOCK(php)) 64 #define PH_EXIT(php) mutex_exit(PHLOCK(php)) 65 #define VALID_POLL_EVENTS (POLLIN | POLLPRI | POLLOUT | POLLRDNORM \ 66 | POLLRDBAND | POLLWRBAND | POLLHUP | POLLERR | POLLNVAL) 67 68 /* 69 * global counters to collect some stats 70 */ 71 static struct { 72 kstat_named_t polllistmiss; /* failed to find a cached poll list */ 73 kstat_named_t pollcachehit; /* list matched 100% w/ cached one */ 74 kstat_named_t pollcachephit; /* list matched < 100% w/ cached one */ 75 kstat_named_t pollcachemiss; /* every list entry is dif from cache */ 76 } pollstats = { 77 { "polllistmiss", KSTAT_DATA_UINT64 }, 78 { "pollcachehit", KSTAT_DATA_UINT64 }, 79 { "pollcachephit", KSTAT_DATA_UINT64 }, 80 { "pollcachemiss", KSTAT_DATA_UINT64 } 81 }; 82 83 kstat_named_t *pollstats_ptr = (kstat_named_t *)&pollstats; 84 uint_t pollstats_ndata = sizeof (pollstats) / sizeof (kstat_named_t); 85 86 struct pplock { 87 kmutex_t pp_lock; 88 short pp_flag; 89 kcondvar_t pp_wait_cv; 90 int32_t pp_pad; /* to a nice round 16 bytes */ 91 }; 92 93 static struct pplock plocks[NPHLOCKS]; /* Hash array of pollhead locks */ 94 95 #ifdef DEBUG 96 static int pollchecksanity(pollstate_t *, nfds_t); 97 static int pollcheckxref(pollstate_t *, int); 98 static void pollcheckphlist(void); 99 static int pollcheckrevents(pollstate_t *, int, int, int); 100 static void checkpolldat(pollstate_t *); 101 #endif /* DEBUG */ 102 static int plist_chkdupfd(file_t *, polldat_t *, pollstate_t *, pollfd_t *, int, 103 int *); 104 105 /* 106 * Data structure overview: 107 * The per-thread poll state consists of 108 * one pollstate_t 109 * one pollcache_t 110 * one bitmap with one event bit per fd 111 * a (two-dimensional) hashed array of polldat_t structures - one entry 112 * per fd 113 * 114 * This conglomerate of data structures interact with 115 * the pollhead which is used by VOP_POLL and pollwakeup 116 * (protected by the PHLOCK, cached array of plocks), and 117 * the fpollinfo list hanging off the fi_list which is used to notify 118 * poll when a cached fd is closed. This is protected by uf_lock. 119 * 120 * Invariants: 121 * pd_php (pollhead pointer) is set iff (if and only if) the polldat 122 * is on that pollhead. This is modified atomically under pc_lock. 123 * 124 * pd_fp (file_t pointer) is set iff the thread is on the fpollinfo 125 * list for that open file. 126 * This is modified atomically under pc_lock. 127 * 128 * pd_count is the sum (over all values of i) of pd_ref[i].xf_refcnt. 129 * Iff pd_ref[i].xf_refcnt >= 1 then 130 * ps_pcacheset[i].pcs_pollfd[pd_ref[i].xf_position].fd == pd_fd 131 * Iff pd_ref[i].xf_refcnt > 1 then 132 * In ps_pcacheset[i].pcs_pollfd between index 133 * pd_ref[i].xf_position] and the end of the list 134 * there are xf_refcnt entries with .fd == pd_fd 135 * 136 * Locking design: 137 * Whenever possible the design relies on the fact that the poll cache state 138 * is per thread thus for both poll and exit it is self-synchronizing. 139 * Thus the key interactions where other threads access the state are: 140 * pollwakeup (and polltime), and 141 * close cleaning up the cached references to an open file 142 * 143 * The two key locks in poll proper is ps_lock and pc_lock. 144 * 145 * The ps_lock is used for synchronization between poll, (lwp_)exit and close 146 * to ensure that modifications to pollcacheset structure are serialized. 147 * This lock is held through most of poll() except where poll sleeps 148 * since there is little need to handle closes concurrently with the execution 149 * of poll. 150 * The pc_lock protects most of the fields in pollcache structure and polldat 151 * structures (which are accessed by poll, pollwakeup, and polltime) 152 * with the exception of fields that are only modified when only one thread 153 * can access this per-thread state. 154 * Those exceptions occur in poll when first allocating the per-thread state, 155 * when poll grows the number of polldat (never shrinks), and when 156 * exit/pollcleanup has ensured that there are no references from either 157 * pollheads or fpollinfo to the threads poll state. 158 * 159 * Poll(2) system call is the only path which ps_lock and pc_lock are both 160 * held, in that order. It needs ps_lock to synchronize with close and 161 * lwp_exit; and pc_lock with pollwakeup. 162 * 163 * The locking interaction between pc_lock and PHLOCK take into account 164 * that poll acquires these locks in the order of pc_lock and then PHLOCK 165 * while pollwakeup does it in the reverse order. Thus pollwakeup implements 166 * deadlock avoidance by dropping the locks and reacquiring them in the 167 * reverse order. For this to work pollwakeup needs to prevent the thread 168 * from exiting and freeing all of the poll related state. Thus is done 169 * using 170 * the pc_no_exit lock 171 * the pc_busy counter 172 * the pc_busy_cv condition variable 173 * 174 * The locking interaction between pc_lock and uf_lock has similar 175 * issues. Poll holds ps_lock and/or pc_lock across calls to getf/releasef 176 * which acquire uf_lock. The poll cleanup in close needs to hold uf_lock 177 * to prevent poll or exit from doing a delfpollinfo after which the thread 178 * might exit. But the cleanup needs to acquire pc_lock when modifying 179 * the poll cache state. The solution is to use pc_busy and do the close 180 * cleanup in two phases: 181 * First close calls pollblockexit which increments pc_busy. 182 * This prevents the per-thread poll related state from being freed. 183 * Then close drops uf_lock and calls pollcacheclean. 184 * This routine can then acquire pc_lock and remove any references 185 * to the closing fd (as well as recording that it has been closed 186 * so that a POLLNVAL can be generated even if the fd is reused before 187 * poll has been woken up and checked getf() again). 188 * 189 * When removing a polled fd from poll cache, the fd is always removed 190 * from pollhead list first and then from fpollinfo list, i.e., 191 * pollhead_delete() is called before delfpollinfo(). 192 * 193 * 194 * Locking hierarchy: 195 * pc_no_exit is a leaf level lock. 196 * ps_lock is held when acquiring pc_lock (except when pollwakeup 197 * acquires pc_lock). 198 * pc_lock might be held when acquiring PHLOCK (pollhead_insert/ 199 * pollhead_delete) 200 * pc_lock is always held (but this is not required) 201 * when acquiring PHLOCK (in polladd/pollhead_delete and pollwakeup called 202 * from pcache_clean_entry). 203 * pc_lock is held across addfpollinfo/delfpollinfo which acquire 204 * uf_lock. 205 * pc_lock is held across getf/releasef which acquire uf_lock. 206 * ps_lock might be held across getf/releasef which acquire uf_lock. 207 * pollwakeup tries to acquire pc_lock while holding PHLOCK 208 * but drops the locks and reacquire them in reverse order to avoid 209 * deadlock. 210 * 211 * Note also that there is deadlock avoidance support for VOP_POLL routines 212 * and pollwakeup involving a file system or driver lock. 213 * See below. 214 */ 215 216 /* 217 * Deadlock avoidance support for VOP_POLL() routines. This is 218 * sometimes necessary to prevent deadlock between polling threads 219 * (which hold poll locks on entry to xx_poll(), then acquire foo) 220 * and pollwakeup() threads (which hold foo, then acquire poll locks). 221 * 222 * pollunlock(void) releases whatever poll locks the current thread holds, 223 * returning a cookie for use by pollrelock(); 224 * 225 * pollrelock(cookie) reacquires previously dropped poll locks; 226 * 227 * polllock(php, mutex) does the common case: pollunlock(), 228 * acquire the problematic mutex, pollrelock(). 229 */ 230 int 231 pollunlock(void) 232 { 233 pollcache_t *pcp; 234 int lockstate = 0; 235 236 /* 237 * t_pollcache is set by /dev/poll and event ports (port_fd.c). 238 * If the pollrelock/pollunlock is called as a result of poll(2), 239 * the t_pollcache should be NULL. 240 */ 241 if (curthread->t_pollcache == NULL) 242 pcp = curthread->t_pollstate->ps_pcache; 243 else 244 pcp = curthread->t_pollcache; 245 246 if (mutex_owned(&pcp->pc_lock)) { 247 lockstate = 1; 248 mutex_exit(&pcp->pc_lock); 249 } 250 return (lockstate); 251 } 252 253 void 254 pollrelock(int lockstate) 255 { 256 pollcache_t *pcp; 257 258 /* 259 * t_pollcache is set by /dev/poll and event ports (port_fd.c). 260 * If the pollrelock/pollunlock is called as a result of poll(2), 261 * the t_pollcache should be NULL. 262 */ 263 if (curthread->t_pollcache == NULL) 264 pcp = curthread->t_pollstate->ps_pcache; 265 else 266 pcp = curthread->t_pollcache; 267 268 if (lockstate > 0) 269 mutex_enter(&pcp->pc_lock); 270 } 271 272 /* ARGSUSED */ 273 void 274 polllock(pollhead_t *php, kmutex_t *lp) 275 { 276 if (!mutex_tryenter(lp)) { 277 int lockstate = pollunlock(); 278 mutex_enter(lp); 279 pollrelock(lockstate); 280 } 281 } 282 283 static int 284 poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) 285 { 286 kthread_t *t = curthread; 287 klwp_t *lwp = ttolwp(t); 288 proc_t *p = ttoproc(t); 289 int fdcnt = 0; 290 int rval; 291 int i; 292 timespec_t *rqtp = NULL; 293 int imm_timeout = 0; 294 pollfd_t *pollfdp; 295 pollstate_t *ps; 296 pollcache_t *pcp; 297 int error = 0; 298 nfds_t old_nfds; 299 int cacheindex = 0; /* which cache set is used */ 300 301 /* 302 * Determine the precise future time of the requested timeout, if any. 303 */ 304 if (tsp != NULL) { 305 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 306 imm_timeout = 1; 307 else { 308 timespec_t now; 309 gethrestime(&now); 310 rqtp = tsp; 311 timespecadd(rqtp, &now); 312 } 313 } 314 315 /* 316 * Reset our signal mask, if requested. 317 */ 318 if (ksetp != NULL) { 319 mutex_enter(&p->p_lock); 320 schedctl_finish_sigblock(t); 321 lwp->lwp_sigoldmask = t->t_hold; 322 t->t_hold = *ksetp; 323 t->t_flag |= T_TOMASK; 324 /* 325 * Call cv_timedwait_sig() just to check for signals. 326 * We will return immediately with either 0 or -1. 327 */ 328 if (!cv_timedwait_sig(&t->t_delay_cv, &p->p_lock, lbolt)) { 329 mutex_exit(&p->p_lock); 330 error = EINTR; 331 goto pollout; 332 } 333 mutex_exit(&p->p_lock); 334 } 335 336 /* 337 * Check to see if this guy just wants to use poll() as a timeout. 338 * If yes then bypass all the other stuff and make him sleep. 339 */ 340 if (nfds == 0) { 341 /* 342 * Sleep until we have passed the requested future 343 * time or until interrupted by a signal. 344 * Do not check for signals if we have a zero timeout. 345 */ 346 if (!imm_timeout) { 347 mutex_enter(&t->t_delay_lock); 348 while ((rval = cv_waituntil_sig(&t->t_delay_cv, 349 &t->t_delay_lock, rqtp)) > 0) 350 continue; 351 mutex_exit(&t->t_delay_lock); 352 if (rval == 0) 353 error = EINTR; 354 } 355 goto pollout; 356 } 357 358 if (nfds >= p->p_fno_ctl) { 359 mutex_enter(&p->p_lock); 360 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 361 p->p_rctls, p, RCA_SAFE); 362 mutex_exit(&p->p_lock); 363 error = EINVAL; 364 goto pollout; 365 } 366 367 /* 368 * Need to allocate memory for pollstate before anything because 369 * the mutex and cv are created in this space 370 */ 371 if ((ps = t->t_pollstate) == NULL) { 372 t->t_pollstate = pollstate_create(); 373 ps = t->t_pollstate; 374 } 375 376 if (ps->ps_pcache == NULL) 377 ps->ps_pcache = pcache_alloc(); 378 pcp = ps->ps_pcache; 379 380 /* 381 * NOTE: for performance, buffers are saved across poll() calls. 382 * The theory is that if a process polls heavily, it tends to poll 383 * on the same set of descriptors. Therefore, we only reallocate 384 * buffers when nfds changes. There is no hysteresis control, 385 * because there is no data to suggest that this is necessary; 386 * the penalty of reallocating is not *that* great in any event. 387 */ 388 old_nfds = ps->ps_nfds; 389 if (nfds != old_nfds) { 390 391 kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t)); 392 pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); 393 ps->ps_pollfd = pollfdp; 394 ps->ps_nfds = nfds; 395 } 396 397 pollfdp = ps->ps_pollfd; 398 if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) { 399 error = EFAULT; 400 goto pollout; 401 } 402 403 if (fds == NULL) { 404 /* 405 * If the process has page 0 mapped, then the copyin() above 406 * will succeed even if fds is NULL. However, our cached 407 * poll lists are keyed by the address of the passed-in fds 408 * structure, and we use the value NULL to indicate an unused 409 * poll cache list entry. As such, we elect not to support 410 * NULL as a valid (user) memory address and fail the poll() 411 * call. 412 */ 413 error = EINVAL; 414 goto pollout; 415 } 416 417 /* 418 * If this thread polls for the first time, allocate ALL poll 419 * cache data structures and cache the poll fd list. This 420 * allocation is delayed till now because lwp's polling 0 fd 421 * (i.e. using poll as timeout()) don't need this memory. 422 */ 423 mutex_enter(&ps->ps_lock); 424 pcp = ps->ps_pcache; 425 ASSERT(pcp != NULL); 426 if (pcp->pc_bitmap == NULL) { 427 pcache_create(pcp, nfds); 428 /* 429 * poll and cache this poll fd list in ps_pcacheset[0]. 430 */ 431 error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex); 432 if (fdcnt || error) { 433 mutex_exit(&ps->ps_lock); 434 goto pollout; 435 } 436 } else { 437 pollcacheset_t *pcset = ps->ps_pcacheset; 438 439 /* 440 * Not first time polling. Select a cached poll list by 441 * matching user pollfd list buffer address. 442 */ 443 for (cacheindex = 0; cacheindex < ps->ps_nsets; cacheindex++) { 444 if (pcset[cacheindex].pcs_usradr == (uintptr_t)fds) { 445 if ((++pcset[cacheindex].pcs_count) == 0) { 446 /* 447 * counter is wrapping around. 448 */ 449 pcacheset_reset_count(ps, cacheindex); 450 } 451 /* 452 * examine and resolve possible 453 * difference of the current poll 454 * list and previously cached one. 455 * If there is an error during resolve(), 456 * the callee will guarantee the consistency 457 * of cached poll list and cache content. 458 */ 459 error = pcacheset_resolve(ps, nfds, &fdcnt, 460 cacheindex); 461 if (error) { 462 mutex_exit(&ps->ps_lock); 463 goto pollout; 464 } 465 break; 466 } 467 468 /* 469 * Note that pcs_usradr field of an used entry won't be 470 * NULL because it stores the address of passed-in fds, 471 * and NULL fds will not be cached (Then it is either 472 * the special timeout case when nfds is 0 or it returns 473 * failure directly). 474 */ 475 if (pcset[cacheindex].pcs_usradr == NULL) { 476 /* 477 * found an unused entry. Use it to cache 478 * this poll list. 479 */ 480 error = pcacheset_cache_list(ps, fds, &fdcnt, 481 cacheindex); 482 if (fdcnt || error) { 483 mutex_exit(&ps->ps_lock); 484 goto pollout; 485 } 486 break; 487 } 488 } 489 if (cacheindex == ps->ps_nsets) { 490 /* 491 * We failed to find a matching cached poll fd list. 492 * replace an old list. 493 */ 494 pollstats.polllistmiss.value.ui64++; 495 cacheindex = pcacheset_replace(ps); 496 ASSERT(cacheindex < ps->ps_nsets); 497 pcset[cacheindex].pcs_usradr = (uintptr_t)fds; 498 error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex); 499 if (error) { 500 mutex_exit(&ps->ps_lock); 501 goto pollout; 502 } 503 } 504 } 505 506 /* 507 * Always scan the bitmap with the lock on the pollcache held. 508 * This is to make sure that a wakeup does not come undetected. 509 * If the lock is not held, a pollwakeup could have come for an 510 * fd we already checked but before this thread sleeps, in which 511 * case the wakeup is missed. Now we hold the pcache lock and 512 * check the bitmap again. This will prevent wakeup from happening 513 * while we hold pcache lock since pollwakeup() will also lock 514 * the pcache before updating poll bitmap. 515 */ 516 mutex_enter(&pcp->pc_lock); 517 for (;;) { 518 pcp->pc_flag = 0; 519 error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex); 520 if (fdcnt || error) { 521 mutex_exit(&pcp->pc_lock); 522 mutex_exit(&ps->ps_lock); 523 break; 524 } 525 526 /* 527 * If T_POLLWAKE is set, a pollwakeup() was performed on 528 * one of the file descriptors. This can happen only if 529 * one of the VOP_POLL() functions dropped pcp->pc_lock. 530 * The only current cases of this is in procfs (prpoll()) 531 * and STREAMS (strpoll()). 532 */ 533 if (pcp->pc_flag & T_POLLWAKE) 534 continue; 535 536 /* 537 * If you get here, the poll of fds was unsuccessful. 538 * Wait until some fd becomes readable, writable, or gets 539 * an exception, or until a signal or a timeout occurs. 540 * Do not check for signals if we have a zero timeout. 541 */ 542 mutex_exit(&ps->ps_lock); 543 if (imm_timeout) 544 rval = -1; 545 else 546 rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock, 547 rqtp); 548 mutex_exit(&pcp->pc_lock); 549 /* 550 * If we have received a signal or timed out 551 * then break out and return. 552 */ 553 if (rval <= 0) { 554 if (rval == 0) 555 error = EINTR; 556 break; 557 } 558 /* 559 * We have not received a signal or timed out. 560 * Continue around and poll fds again. 561 */ 562 mutex_enter(&ps->ps_lock); 563 mutex_enter(&pcp->pc_lock); 564 } 565 566 pollout: 567 /* 568 * If we changed the signal mask but we received 569 * no signal then restore the signal mask. 570 * Otherwise psig() will deal with the signal mask. 571 */ 572 if (ksetp != NULL) { 573 mutex_enter(&p->p_lock); 574 if (lwp->lwp_cursig == 0) { 575 t->t_hold = lwp->lwp_sigoldmask; 576 t->t_flag &= ~T_TOMASK; 577 } 578 mutex_exit(&p->p_lock); 579 } 580 581 if (error) 582 return (set_errno(error)); 583 584 /* 585 * Copy out the events and return the fdcnt to the user. 586 */ 587 if (nfds != 0 && 588 copyout(pollfdp, fds, nfds * sizeof (pollfd_t))) 589 return (set_errno(EFAULT)); 590 591 #ifdef DEBUG 592 /* 593 * Another sanity check: 594 */ 595 if (fdcnt) { 596 int reventcnt = 0; 597 598 for (i = 0; i < nfds; i++) { 599 if (pollfdp[i].fd < 0) { 600 ASSERT(pollfdp[i].revents == 0); 601 continue; 602 } 603 if (pollfdp[i].revents) { 604 reventcnt++; 605 } 606 } 607 ASSERT(fdcnt == reventcnt); 608 } else { 609 for (i = 0; i < nfds; i++) { 610 ASSERT(pollfdp[i].revents == 0); 611 } 612 } 613 #endif /* DEBUG */ 614 615 return (fdcnt); 616 } 617 618 /* 619 * This system call trap exists solely for binary compatibility with 620 * old statically-linked applications. It is not called from libc. 621 * It should be removed in the next release. 622 */ 623 int 624 poll(pollfd_t *fds, nfds_t nfds, int time_out) 625 { 626 timespec_t ts; 627 timespec_t *tsp; 628 629 if (time_out < 0) 630 tsp = NULL; 631 else { 632 ts.tv_sec = time_out / MILLISEC; 633 ts.tv_nsec = (time_out % MILLISEC) * MICROSEC; 634 tsp = &ts; 635 } 636 637 return (poll_common(fds, nfds, tsp, NULL)); 638 } 639 640 /* 641 * This is the system call trap that poll(), 642 * select() and pselect() are built upon. 643 * It is a private interface between libc and the kernel. 644 */ 645 int 646 pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp) 647 { 648 timespec_t ts; 649 timespec_t *tsp; 650 sigset_t set; 651 k_sigset_t kset; 652 k_sigset_t *ksetp; 653 model_t datamodel = get_udatamodel(); 654 655 if (timeoutp == NULL) 656 tsp = NULL; 657 else { 658 if (datamodel == DATAMODEL_NATIVE) { 659 if (copyin(timeoutp, &ts, sizeof (ts))) 660 return (set_errno(EFAULT)); 661 } else { 662 timespec32_t ts32; 663 664 if (copyin(timeoutp, &ts32, sizeof (ts32))) 665 return (set_errno(EFAULT)); 666 TIMESPEC32_TO_TIMESPEC(&ts, &ts32) 667 } 668 669 if (itimerspecfix(&ts)) 670 return (set_errno(EINVAL)); 671 tsp = &ts; 672 } 673 674 if (setp == NULL) 675 ksetp = NULL; 676 else { 677 if (copyin(setp, &set, sizeof (set))) 678 return (set_errno(EFAULT)); 679 sigutok(&set, &kset); 680 ksetp = &kset; 681 } 682 683 return (poll_common(fds, nfds, tsp, ksetp)); 684 } 685 686 /* 687 * Clean up any state left around by poll(2). Called when a thread exits. 688 */ 689 void 690 pollcleanup() 691 { 692 pollstate_t *ps = curthread->t_pollstate; 693 pollcache_t *pcp; 694 695 if (ps == NULL) 696 return; 697 pcp = ps->ps_pcache; 698 /* 699 * free up all cached poll fds 700 */ 701 if (pcp == NULL) { 702 /* this pollstate is used by /dev/poll */ 703 goto pollcleanout; 704 } 705 706 if (pcp->pc_bitmap != NULL) { 707 ASSERT(MUTEX_NOT_HELD(&ps->ps_lock)); 708 /* 709 * a close lwp can race with us when cleaning up a polldat 710 * entry. We hold the ps_lock when cleaning hash table. 711 * Since this pollcache is going away anyway, there is no 712 * need to hold the pc_lock. 713 */ 714 mutex_enter(&ps->ps_lock); 715 pcache_clean(pcp); 716 mutex_exit(&ps->ps_lock); 717 #ifdef DEBUG 718 /* 719 * At this point, all fds cached by this lwp should be 720 * cleaned up. There should be no fd in fi_list still 721 * reference this thread. 722 */ 723 checkfpollinfo(); /* sanity check */ 724 pollcheckphlist(); /* sanity check */ 725 #endif /* DEBUG */ 726 } 727 /* 728 * Be sure no one is referencing thread before exiting 729 */ 730 mutex_enter(&pcp->pc_no_exit); 731 ASSERT(pcp->pc_busy >= 0); 732 while (pcp->pc_busy > 0) 733 cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 734 mutex_exit(&pcp->pc_no_exit); 735 pollcleanout: 736 pollstate_destroy(ps); 737 curthread->t_pollstate = NULL; 738 } 739 740 /* 741 * pollwakeup() - poke threads waiting in poll() for some event 742 * on a particular object. 743 * 744 * The threads hanging off of the specified pollhead structure are scanned. 745 * If their event mask matches the specified event(s), then pollnotify() is 746 * called to poke the thread. 747 * 748 * Multiple events may be specified. When POLLHUP or POLLERR are specified, 749 * all waiting threads are poked. 750 * 751 * It is important that pollnotify() not drop the lock protecting the list 752 * of threads. 753 */ 754 void 755 pollwakeup(pollhead_t *php, short events_arg) 756 { 757 polldat_t *pdp; 758 int events = (ushort_t)events_arg; 759 struct plist { 760 port_t *pp; 761 int pevents; 762 struct plist *next; 763 }; 764 struct plist *plhead = NULL, *pltail = NULL; 765 766 retry: 767 PH_ENTER(php); 768 769 for (pdp = php->ph_list; pdp; pdp = pdp->pd_next) { 770 if ((pdp->pd_events & events) || 771 (events & (POLLHUP | POLLERR))) { 772 773 pollcache_t *pcp; 774 775 if (pdp->pd_portev != NULL) { 776 port_kevent_t *pkevp = pdp->pd_portev; 777 /* 778 * Object (fd) is associated with an event port, 779 * => send event notification to the port. 780 */ 781 ASSERT(pkevp->portkev_source == PORT_SOURCE_FD); 782 mutex_enter(&pkevp->portkev_lock); 783 if (pkevp->portkev_flags & PORT_KEV_VALID) { 784 int pevents; 785 786 pkevp->portkev_flags &= ~PORT_KEV_VALID; 787 pkevp->portkev_events |= events & 788 (pdp->pd_events | POLLHUP | 789 POLLERR); 790 /* 791 * portkev_lock mutex will be released 792 * by port_send_event(). 793 */ 794 port_send_event(pkevp); 795 796 /* 797 * If we have some thread polling the 798 * port's fd, add it to the list. They 799 * will be notified later. 800 * The port_pollwkup() will flag the 801 * port_t so that it will not disappear 802 * till port_pollwkdone() is called. 803 */ 804 pevents = 805 port_pollwkup(pkevp->portkev_port); 806 if (pevents) { 807 struct plist *t; 808 t = kmem_zalloc( 809 sizeof (struct plist), 810 KM_SLEEP); 811 t->pp = pkevp->portkev_port; 812 t->pevents = pevents; 813 if (plhead == NULL) { 814 plhead = t; 815 } else { 816 pltail->next = t; 817 } 818 pltail = t; 819 } 820 } else { 821 mutex_exit(&pkevp->portkev_lock); 822 } 823 continue; 824 } 825 826 pcp = pdp->pd_pcache; 827 828 /* 829 * Try to grab the lock for this thread. If 830 * we don't get it then we may deadlock so 831 * back out and restart all over again. Note 832 * that the failure rate is very very low. 833 */ 834 if (mutex_tryenter(&pcp->pc_lock)) { 835 pollnotify(pcp, pdp->pd_fd); 836 mutex_exit(&pcp->pc_lock); 837 } else { 838 /* 839 * We are here because: 840 * 1) This thread has been woke up 841 * and is trying to get out of poll(). 842 * 2) Some other thread is also here 843 * but with a different pollhead lock. 844 * 845 * So, we need to drop the lock on pollhead 846 * because of (1) but we want to prevent 847 * that thread from doing lwp_exit() or 848 * devpoll close. We want to ensure that 849 * the pollcache pointer is still invalid. 850 * 851 * Solution: Grab the pcp->pc_no_exit lock, 852 * increment the pc_busy counter, drop every 853 * lock in sight. Get out of the way and wait 854 * for type (2) threads to finish. 855 */ 856 857 mutex_enter(&pcp->pc_no_exit); 858 pcp->pc_busy++; /* prevents exit()'s */ 859 mutex_exit(&pcp->pc_no_exit); 860 861 PH_EXIT(php); 862 mutex_enter(&pcp->pc_lock); 863 mutex_exit(&pcp->pc_lock); 864 mutex_enter(&pcp->pc_no_exit); 865 pcp->pc_busy--; 866 if (pcp->pc_busy == 0) { 867 /* 868 * Wakeup the thread waiting in 869 * thread_exit(). 870 */ 871 cv_signal(&pcp->pc_busy_cv); 872 } 873 mutex_exit(&pcp->pc_no_exit); 874 goto retry; 875 } 876 } 877 } 878 879 880 /* 881 * Event ports - If this php is of the port on the list, 882 * call port_pollwkdone() to release it. The port_pollwkdone() 883 * needs to be called before dropping the PH lock so that any new 884 * thread attempting to poll this port are blocked. There can be 885 * only one thread here in pollwakeup notifying this port's fd. 886 */ 887 if (plhead != NULL && &plhead->pp->port_pollhd == php) { 888 struct plist *t; 889 port_pollwkdone(plhead->pp); 890 t = plhead; 891 plhead = plhead->next; 892 kmem_free(t, sizeof (struct plist)); 893 } 894 PH_EXIT(php); 895 896 /* 897 * Event ports - Notify threads polling the event port's fd. 898 * This is normally done in port_send_event() where it calls 899 * pollwakeup() on the port. But, for PORT_SOURCE_FD source alone, 900 * we do it here in pollwakeup() to avoid a recursive call. 901 */ 902 if (plhead != NULL) { 903 php = &plhead->pp->port_pollhd; 904 events = plhead->pevents; 905 goto retry; 906 } 907 } 908 909 /* 910 * This function is called to inform a thread that 911 * an event being polled for has occurred. 912 * The pollstate lock on the thread should be held on entry. 913 */ 914 void 915 pollnotify(pollcache_t *pcp, int fd) 916 { 917 ASSERT(fd < pcp->pc_mapsize); 918 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 919 BT_SET(pcp->pc_bitmap, fd); 920 pcp->pc_flag |= T_POLLWAKE; 921 cv_signal(&pcp->pc_cv); 922 } 923 924 /* 925 * add a polldat entry to pollhead ph_list. The polldat struct is used 926 * by pollwakeup to wake sleeping pollers when polled events has happened. 927 */ 928 void 929 pollhead_insert(pollhead_t *php, polldat_t *pdp) 930 { 931 PH_ENTER(php); 932 ASSERT(pdp->pd_next == NULL); 933 #ifdef DEBUG 934 { 935 /* 936 * the polldat should not be already on the list 937 */ 938 polldat_t *wp; 939 for (wp = php->ph_list; wp; wp = wp->pd_next) { 940 ASSERT(wp != pdp); 941 } 942 } 943 #endif /* DEBUG */ 944 pdp->pd_next = php->ph_list; 945 php->ph_list = pdp; 946 PH_EXIT(php); 947 } 948 949 /* 950 * Delete the polldat entry from ph_list. 951 */ 952 void 953 pollhead_delete(pollhead_t *php, polldat_t *pdp) 954 { 955 polldat_t *wp; 956 polldat_t **wpp; 957 958 PH_ENTER(php); 959 for (wpp = &php->ph_list; (wp = *wpp) != NULL; wpp = &wp->pd_next) { 960 if (wp == pdp) { 961 *wpp = pdp->pd_next; 962 pdp->pd_next = NULL; 963 break; 964 } 965 } 966 #ifdef DEBUG 967 /* assert that pdp is no longer in the list */ 968 for (wp = *wpp; wp; wp = wp->pd_next) { 969 ASSERT(wp != pdp); 970 } 971 #endif /* DEBUG */ 972 PH_EXIT(php); 973 } 974 975 /* 976 * walk through the poll fd lists to see if they are identical. This is an 977 * expensive operation and should not be done more than once for each poll() 978 * call. 979 * 980 * As an optimization (i.e., not having to go through the lists more than 981 * once), this routine also clear the revents field of pollfd in 'current'. 982 * Zeroing out the revents field of each entry in current poll list is 983 * required by poll man page. 984 * 985 * Since the events field of cached list has illegal poll events filtered 986 * out, the current list applies the same filtering before comparison. 987 * 988 * The routine stops when it detects a meaningful difference, or when it 989 * exhausts the lists. 990 */ 991 int 992 pcacheset_cmp(pollfd_t *current, pollfd_t *cached, pollfd_t *newlist, int n) 993 { 994 int ix; 995 996 for (ix = 0; ix < n; ix++) { 997 if (current[ix].fd == cached[ix].fd) { 998 /* 999 * Filter out invalid poll events while we are in 1000 * inside the loop. 1001 */ 1002 if (current[ix].events & ~VALID_POLL_EVENTS) { 1003 current[ix].events &= VALID_POLL_EVENTS; 1004 if (newlist != NULL) 1005 newlist[ix].events = current[ix].events; 1006 } 1007 if (current[ix].events == cached[ix].events) { 1008 current[ix].revents = 0; 1009 continue; 1010 } 1011 } 1012 if ((current[ix].fd < 0) && (cached[ix].fd < 0)) { 1013 current[ix].revents = 0; 1014 continue; 1015 } 1016 return (ix); 1017 } 1018 return (ix); 1019 } 1020 1021 /* 1022 * This routine returns a pointer to a cached poll fd entry, or NULL if it 1023 * does not find it in the hash table. 1024 */ 1025 polldat_t * 1026 pcache_lookup_fd(pollcache_t *pcp, int fd) 1027 { 1028 int hashindex; 1029 polldat_t *pdp; 1030 1031 hashindex = POLLHASH(pcp->pc_hashsize, fd); 1032 pdp = pcp->pc_hash[hashindex]; 1033 while (pdp != NULL) { 1034 if (pdp->pd_fd == fd) 1035 break; 1036 pdp = pdp->pd_hashnext; 1037 } 1038 return (pdp); 1039 } 1040 1041 polldat_t * 1042 pcache_alloc_fd(int nsets) 1043 { 1044 polldat_t *pdp; 1045 1046 pdp = kmem_zalloc(sizeof (polldat_t), KM_SLEEP); 1047 if (nsets > 0) { 1048 pdp->pd_ref = kmem_zalloc(sizeof (xref_t) * nsets, KM_SLEEP); 1049 pdp->pd_nsets = nsets; 1050 } 1051 return (pdp); 1052 } 1053 1054 /* 1055 * This routine inserts a polldat into the pollcache's hash table. It 1056 * may be necessary to grow the size of the hash table. 1057 */ 1058 void 1059 pcache_insert_fd(pollcache_t *pcp, polldat_t *pdp, nfds_t nfds) 1060 { 1061 int hashindex; 1062 int fd; 1063 1064 if ((pcp->pc_fdcount > pcp->pc_hashsize * POLLHASHTHRESHOLD) || 1065 (nfds > pcp->pc_hashsize * POLLHASHTHRESHOLD)) { 1066 pcache_grow_hashtbl(pcp, nfds); 1067 } 1068 fd = pdp->pd_fd; 1069 hashindex = POLLHASH(pcp->pc_hashsize, fd); 1070 pdp->pd_hashnext = pcp->pc_hash[hashindex]; 1071 pcp->pc_hash[hashindex] = pdp; 1072 pcp->pc_fdcount++; 1073 1074 #ifdef DEBUG 1075 { 1076 /* 1077 * same fd should not appear on a hash list twice 1078 */ 1079 polldat_t *pdp1; 1080 for (pdp1 = pdp->pd_hashnext; pdp1; pdp1 = pdp1->pd_hashnext) { 1081 ASSERT(pdp->pd_fd != pdp1->pd_fd); 1082 } 1083 } 1084 #endif /* DEBUG */ 1085 } 1086 1087 /* 1088 * Grow the hash table -- either double the table size or round it to the 1089 * nearest multiples of POLLHASHCHUNKSZ, whichever is bigger. Rehash all the 1090 * elements on the hash table. 1091 */ 1092 void 1093 pcache_grow_hashtbl(pollcache_t *pcp, nfds_t nfds) 1094 { 1095 int oldsize; 1096 polldat_t **oldtbl; 1097 polldat_t *pdp, *pdp1; 1098 int i; 1099 #ifdef DEBUG 1100 int count = 0; 1101 #endif 1102 1103 ASSERT(pcp->pc_hashsize % POLLHASHCHUNKSZ == 0); 1104 oldsize = pcp->pc_hashsize; 1105 oldtbl = pcp->pc_hash; 1106 if (nfds > pcp->pc_hashsize * POLLHASHINC) { 1107 pcp->pc_hashsize = (nfds + POLLHASHCHUNKSZ - 1) & 1108 ~(POLLHASHCHUNKSZ - 1); 1109 } else { 1110 pcp->pc_hashsize = pcp->pc_hashsize * POLLHASHINC; 1111 } 1112 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (polldat_t *), 1113 KM_SLEEP); 1114 /* 1115 * rehash existing elements 1116 */ 1117 pcp->pc_fdcount = 0; 1118 for (i = 0; i < oldsize; i++) { 1119 pdp = oldtbl[i]; 1120 while (pdp != NULL) { 1121 pdp1 = pdp->pd_hashnext; 1122 pcache_insert_fd(pcp, pdp, nfds); 1123 pdp = pdp1; 1124 #ifdef DEBUG 1125 count++; 1126 #endif 1127 } 1128 } 1129 kmem_free(oldtbl, oldsize * sizeof (polldat_t *)); 1130 ASSERT(pcp->pc_fdcount == count); 1131 } 1132 1133 void 1134 pcache_grow_map(pollcache_t *pcp, int fd) 1135 { 1136 int newsize; 1137 ulong_t *newmap; 1138 1139 /* 1140 * grow to nearest multiple of POLLMAPCHUNK, assuming POLLMAPCHUNK is 1141 * power of 2. 1142 */ 1143 newsize = (fd + POLLMAPCHUNK) & ~(POLLMAPCHUNK - 1); 1144 newmap = kmem_zalloc((newsize / BT_NBIPUL) * sizeof (ulong_t), 1145 KM_SLEEP); 1146 /* 1147 * don't want pollwakeup to set a bit while growing the bitmap. 1148 */ 1149 ASSERT(mutex_owned(&pcp->pc_lock) == 0); 1150 mutex_enter(&pcp->pc_lock); 1151 bcopy(pcp->pc_bitmap, newmap, 1152 (pcp->pc_mapsize / BT_NBIPUL) * sizeof (ulong_t)); 1153 kmem_free(pcp->pc_bitmap, 1154 (pcp->pc_mapsize /BT_NBIPUL) * sizeof (ulong_t)); 1155 pcp->pc_bitmap = newmap; 1156 pcp->pc_mapsize = newsize; 1157 mutex_exit(&pcp->pc_lock); 1158 } 1159 1160 /* 1161 * remove all the reference from pollhead list and fpollinfo lists. 1162 */ 1163 void 1164 pcache_clean(pollcache_t *pcp) 1165 { 1166 int i; 1167 polldat_t **hashtbl; 1168 polldat_t *pdp; 1169 1170 ASSERT(MUTEX_HELD(&curthread->t_pollstate->ps_lock)); 1171 hashtbl = pcp->pc_hash; 1172 for (i = 0; i < pcp->pc_hashsize; i++) { 1173 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 1174 if (pdp->pd_php != NULL) { 1175 pollhead_delete(pdp->pd_php, pdp); 1176 pdp->pd_php = NULL; 1177 } 1178 if (pdp->pd_fp != NULL) { 1179 delfpollinfo(pdp->pd_fd); 1180 pdp->pd_fp = NULL; 1181 } 1182 } 1183 } 1184 } 1185 1186 void 1187 pcacheset_invalidate(pollstate_t *ps, polldat_t *pdp) 1188 { 1189 int i; 1190 int fd = pdp->pd_fd; 1191 1192 /* 1193 * we come here because an earlier close() on this cached poll fd. 1194 */ 1195 ASSERT(pdp->pd_fp == NULL); 1196 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1197 pdp->pd_events = 0; 1198 for (i = 0; i < ps->ps_nsets; i++) { 1199 xref_t *refp; 1200 pollcacheset_t *pcsp; 1201 1202 ASSERT(pdp->pd_ref != NULL); 1203 refp = &pdp->pd_ref[i]; 1204 if (refp->xf_refcnt) { 1205 ASSERT(refp->xf_position >= 0); 1206 pcsp = &ps->ps_pcacheset[i]; 1207 if (refp->xf_refcnt == 1) { 1208 pcsp->pcs_pollfd[refp->xf_position].fd = -1; 1209 refp->xf_refcnt = 0; 1210 pdp->pd_count--; 1211 } else if (refp->xf_refcnt > 1) { 1212 int j; 1213 1214 /* 1215 * turn off every appearance in pcs_pollfd list 1216 */ 1217 for (j = refp->xf_position; 1218 j < pcsp->pcs_nfds; j++) { 1219 if (pcsp->pcs_pollfd[j].fd == fd) { 1220 pcsp->pcs_pollfd[j].fd = -1; 1221 refp->xf_refcnt--; 1222 pdp->pd_count--; 1223 } 1224 } 1225 } 1226 ASSERT(refp->xf_refcnt == 0); 1227 refp->xf_position = POLLPOSINVAL; 1228 } 1229 } 1230 ASSERT(pdp->pd_count == 0); 1231 } 1232 1233 /* 1234 * Insert poll fd into the pollcache, and add poll registration. 1235 * This routine is called after getf() and before releasef(). So the vnode 1236 * can not disappear even if we block here. 1237 * If there is an error, the polled fd is not cached. 1238 */ 1239 int 1240 pcache_insert(pollstate_t *ps, file_t *fp, pollfd_t *pollfdp, int *fdcntp, 1241 ssize_t pos, int which) 1242 { 1243 pollcache_t *pcp = ps->ps_pcache; 1244 polldat_t *pdp; 1245 int error; 1246 int fd; 1247 pollhead_t *memphp = NULL; 1248 xref_t *refp; 1249 int newpollfd = 0; 1250 1251 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1252 /* 1253 * The poll caching uses the existing VOP_POLL interface. If there 1254 * is no polled events, we want the polled device to set its "some 1255 * one is sleeping in poll" flag. When the polled events happen 1256 * later, the driver will call pollwakeup(). We achieve this by 1257 * always passing 0 in the third parameter ("anyyet") when calling 1258 * VOP_POLL. This parameter is not looked at by drivers when the 1259 * polled events exist. If a driver chooses to ignore this parameter 1260 * and call pollwakeup whenever the polled events happen, that will 1261 * be OK too. 1262 */ 1263 ASSERT(curthread->t_pollcache == NULL); 1264 error = VOP_POLL(fp->f_vnode, pollfdp->events, 0, &pollfdp->revents, 1265 &memphp); 1266 if (error) { 1267 return (error); 1268 } 1269 if (pollfdp->revents) { 1270 (*fdcntp)++; 1271 } 1272 /* 1273 * polling the underlying device succeeded. Now we can cache it. 1274 * A close can't come in here because we have not done a releasef() 1275 * yet. 1276 */ 1277 fd = pollfdp->fd; 1278 pdp = pcache_lookup_fd(pcp, fd); 1279 if (pdp == NULL) { 1280 ASSERT(ps->ps_nsets > 0); 1281 pdp = pcache_alloc_fd(ps->ps_nsets); 1282 newpollfd = 1; 1283 } 1284 /* 1285 * If this entry was used to cache a poll fd which was closed, and 1286 * this entry has not been cleaned, do it now. 1287 */ 1288 if ((pdp->pd_count > 0) && (pdp->pd_fp == NULL)) { 1289 pcacheset_invalidate(ps, pdp); 1290 ASSERT(pdp->pd_next == NULL); 1291 } 1292 if (pdp->pd_count == 0) { 1293 pdp->pd_fd = fd; 1294 pdp->pd_fp = fp; 1295 addfpollinfo(fd); 1296 pdp->pd_thread = curthread; 1297 pdp->pd_pcache = pcp; 1298 /* 1299 * the entry is never used or cleared by removing a cached 1300 * pollfd (pcache_delete_fd). So all the fields should be clear. 1301 */ 1302 ASSERT(pdp->pd_next == NULL); 1303 } 1304 1305 /* 1306 * A polled fd is considered cached. So there should be a fpollinfo 1307 * entry on uf_fpollinfo list. 1308 */ 1309 ASSERT(infpollinfo(fd)); 1310 /* 1311 * If there is an inconsistency, we want to know it here. 1312 */ 1313 ASSERT(pdp->pd_fp == fp); 1314 1315 /* 1316 * XXX pd_events is a union of all polled events on this fd, possibly 1317 * by different threads. Unless this is a new first poll(), pd_events 1318 * never shrinks. If an event is no longer polled by a process, there 1319 * is no way to cancel that event. In that case, poll degrade to its 1320 * old form -- polling on this fd every time poll() is called. The 1321 * assumption is an app always polls the same type of events. 1322 */ 1323 pdp->pd_events |= pollfdp->events; 1324 1325 pdp->pd_count++; 1326 /* 1327 * There is not much special handling for multiple appearances of 1328 * same fd other than xf_position always recording the first 1329 * appearance in poll list. If this is called from pcacheset_cache_list, 1330 * a VOP_POLL is called on every pollfd entry; therefore each 1331 * revents and fdcnt should be set correctly. If this is called from 1332 * pcacheset_resolve, we don't care about fdcnt here. Pollreadmap will 1333 * pick up the right count and handle revents field of each pollfd 1334 * entry. 1335 */ 1336 ASSERT(pdp->pd_ref != NULL); 1337 refp = &pdp->pd_ref[which]; 1338 if (refp->xf_refcnt == 0) { 1339 refp->xf_position = pos; 1340 } else { 1341 /* 1342 * xf_position records the fd's first appearance in poll list 1343 */ 1344 if (pos < refp->xf_position) { 1345 refp->xf_position = pos; 1346 } 1347 } 1348 ASSERT(pollfdp->fd == ps->ps_pollfd[refp->xf_position].fd); 1349 refp->xf_refcnt++; 1350 if (fd >= pcp->pc_mapsize) { 1351 pcache_grow_map(pcp, fd); 1352 } 1353 if (fd > pcp->pc_mapend) { 1354 pcp->pc_mapend = fd; 1355 } 1356 if (newpollfd != 0) { 1357 pcache_insert_fd(ps->ps_pcache, pdp, ps->ps_nfds); 1358 } 1359 if (memphp) { 1360 if (pdp->pd_php == NULL) { 1361 pollhead_insert(memphp, pdp); 1362 pdp->pd_php = memphp; 1363 } else { 1364 if (memphp != pdp->pd_php) { 1365 /* 1366 * layered devices (e.g. console driver) 1367 * may change the vnode and thus the pollhead 1368 * pointer out from underneath us. 1369 */ 1370 pollhead_delete(pdp->pd_php, pdp); 1371 pollhead_insert(memphp, pdp); 1372 pdp->pd_php = memphp; 1373 } 1374 } 1375 } 1376 /* 1377 * Since there is a considerable window between VOP_POLL and when 1378 * we actually put the polldat struct on the pollhead list, we could 1379 * miss a pollwakeup. In the case of polling additional events, we 1380 * don't update the events until after VOP_POLL. So we could miss 1381 * pollwakeup there too. So we always set the bit here just to be 1382 * safe. The real performance gain is in subsequent pcache_poll. 1383 */ 1384 mutex_enter(&pcp->pc_lock); 1385 BT_SET(pcp->pc_bitmap, fd); 1386 mutex_exit(&pcp->pc_lock); 1387 return (0); 1388 } 1389 1390 /* 1391 * The entry is not really deleted. The fields are cleared so that the 1392 * entry is no longer useful, but it will remain in the hash table for reuse 1393 * later. It will be freed when the polling lwp exits. 1394 */ 1395 int 1396 pcache_delete_fd(pollstate_t *ps, int fd, size_t pos, int which, uint_t cevent) 1397 { 1398 pollcache_t *pcp = ps->ps_pcache; 1399 polldat_t *pdp; 1400 xref_t *refp; 1401 1402 ASSERT(fd < pcp->pc_mapsize); 1403 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1404 1405 pdp = pcache_lookup_fd(pcp, fd); 1406 ASSERT(pdp != NULL); 1407 ASSERT(pdp->pd_count > 0); 1408 ASSERT(pdp->pd_ref != NULL); 1409 refp = &pdp->pd_ref[which]; 1410 if (pdp->pd_count == 1) { 1411 pdp->pd_events = 0; 1412 refp->xf_position = POLLPOSINVAL; 1413 ASSERT(refp->xf_refcnt == 1); 1414 refp->xf_refcnt = 0; 1415 if (pdp->pd_php) { 1416 /* 1417 * It is possible for a wakeup thread to get ahead 1418 * of the following pollhead_delete and set the bit in 1419 * bitmap. It is OK because the bit will be cleared 1420 * here anyway. 1421 */ 1422 pollhead_delete(pdp->pd_php, pdp); 1423 pdp->pd_php = NULL; 1424 } 1425 pdp->pd_count = 0; 1426 if (pdp->pd_fp != NULL) { 1427 pdp->pd_fp = NULL; 1428 delfpollinfo(fd); 1429 } 1430 mutex_enter(&pcp->pc_lock); 1431 BT_CLEAR(pcp->pc_bitmap, fd); 1432 mutex_exit(&pcp->pc_lock); 1433 return (0); 1434 } 1435 if ((cevent & POLLCLOSED) == POLLCLOSED) { 1436 /* 1437 * fd cached here has been closed. This is the first 1438 * pcache_delete_fd called after the close. Clean up the 1439 * entire entry. 1440 */ 1441 pcacheset_invalidate(ps, pdp); 1442 ASSERT(pdp->pd_php == NULL); 1443 mutex_enter(&pcp->pc_lock); 1444 BT_CLEAR(pcp->pc_bitmap, fd); 1445 mutex_exit(&pcp->pc_lock); 1446 return (0); 1447 } 1448 #ifdef DEBUG 1449 if (getf(fd) != NULL) { 1450 ASSERT(infpollinfo(fd)); 1451 releasef(fd); 1452 } 1453 #endif /* DEBUG */ 1454 pdp->pd_count--; 1455 ASSERT(refp->xf_refcnt > 0); 1456 if (--refp->xf_refcnt == 0) { 1457 refp->xf_position = POLLPOSINVAL; 1458 } else { 1459 ASSERT(pos >= refp->xf_position); 1460 if (pos == refp->xf_position) { 1461 /* 1462 * The xref position is no longer valid. 1463 * Reset it to a special value and let 1464 * caller know it needs to updatexref() 1465 * with a new xf_position value. 1466 */ 1467 refp->xf_position = POLLPOSTRANS; 1468 return (1); 1469 } 1470 } 1471 return (0); 1472 } 1473 1474 void 1475 pcache_update_xref(pollcache_t *pcp, int fd, ssize_t pos, int which) 1476 { 1477 polldat_t *pdp; 1478 1479 pdp = pcache_lookup_fd(pcp, fd); 1480 ASSERT(pdp != NULL); 1481 ASSERT(pdp->pd_ref != NULL); 1482 pdp->pd_ref[which].xf_position = pos; 1483 } 1484 1485 #ifdef DEBUG 1486 /* 1487 * For each polled fd, it's either in the bitmap or cached in 1488 * pcache hash table. If this routine returns 0, something is wrong. 1489 */ 1490 static int 1491 pollchecksanity(pollstate_t *ps, nfds_t nfds) 1492 { 1493 int i; 1494 int fd; 1495 pollcache_t *pcp = ps->ps_pcache; 1496 polldat_t *pdp; 1497 pollfd_t *pollfdp = ps->ps_pollfd; 1498 file_t *fp; 1499 1500 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1501 for (i = 0; i < nfds; i++) { 1502 fd = pollfdp[i].fd; 1503 if (fd < 0) { 1504 ASSERT(pollfdp[i].revents == 0); 1505 continue; 1506 } 1507 if (pollfdp[i].revents == POLLNVAL) 1508 continue; 1509 if ((fp = getf(fd)) == NULL) 1510 continue; 1511 pdp = pcache_lookup_fd(pcp, fd); 1512 ASSERT(pdp != NULL); 1513 ASSERT(infpollinfo(fd)); 1514 ASSERT(pdp->pd_fp == fp); 1515 releasef(fd); 1516 if (BT_TEST(pcp->pc_bitmap, fd)) 1517 continue; 1518 if (pdp->pd_php == NULL) 1519 return (0); 1520 } 1521 return (1); 1522 } 1523 #endif /* DEBUG */ 1524 1525 /* 1526 * resolve the difference between the current poll list and a cached one. 1527 */ 1528 int 1529 pcacheset_resolve(pollstate_t *ps, nfds_t nfds, int *fdcntp, int which) 1530 { 1531 int i; 1532 pollcache_t *pcp = ps->ps_pcache; 1533 pollfd_t *newlist = NULL; 1534 pollfd_t *current = ps->ps_pollfd; 1535 pollfd_t *cached; 1536 pollcacheset_t *pcsp; 1537 int common; 1538 int count = 0; 1539 int offset; 1540 int remain; 1541 int fd; 1542 file_t *fp; 1543 int fdcnt = 0; 1544 int cnt = 0; 1545 nfds_t old_nfds; 1546 int error = 0; 1547 int mismatch = 0; 1548 1549 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1550 #ifdef DEBUG 1551 checkpolldat(ps); 1552 #endif 1553 pcsp = &ps->ps_pcacheset[which]; 1554 old_nfds = pcsp->pcs_nfds; 1555 common = (nfds > old_nfds) ? old_nfds : nfds; 1556 if (nfds != old_nfds) { 1557 /* 1558 * the length of poll list has changed. allocate a new 1559 * pollfd list. 1560 */ 1561 newlist = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); 1562 bcopy(current, newlist, sizeof (pollfd_t) * nfds); 1563 } 1564 /* 1565 * Compare the overlapping part of the current fd list with the 1566 * cached one. Whenever a difference is found, resolve it. 1567 * The comparison is done on the current poll list and the 1568 * cached list. But we may be setting up the newlist to be the 1569 * cached list for next poll. 1570 */ 1571 cached = pcsp->pcs_pollfd; 1572 remain = common; 1573 1574 while (count < common) { 1575 int tmpfd; 1576 pollfd_t *np; 1577 1578 np = (newlist != NULL) ? &newlist[count] : NULL; 1579 offset = pcacheset_cmp(¤t[count], &cached[count], np, 1580 remain); 1581 /* 1582 * Collect stats. If lists are completed the first time, 1583 * it's a hit. Otherwise, it's a partial hit or miss. 1584 */ 1585 if ((count == 0) && (offset == common)) { 1586 pollstats.pollcachehit.value.ui64++; 1587 } else { 1588 mismatch++; 1589 } 1590 count += offset; 1591 if (offset < remain) { 1592 ASSERT(count < common); 1593 ASSERT((current[count].fd != cached[count].fd) || 1594 (current[count].events != cached[count].events)); 1595 /* 1596 * Filter out invalid events. 1597 */ 1598 if (current[count].events & ~VALID_POLL_EVENTS) { 1599 if (newlist != NULL) { 1600 newlist[count].events = 1601 current[count].events &= 1602 VALID_POLL_EVENTS; 1603 } else { 1604 current[count].events &= 1605 VALID_POLL_EVENTS; 1606 } 1607 } 1608 /* 1609 * when resolving a difference, we always remove the 1610 * fd from cache before inserting one into cache. 1611 */ 1612 if (cached[count].fd >= 0) { 1613 tmpfd = cached[count].fd; 1614 if (pcache_delete_fd(ps, tmpfd, count, which, 1615 (uint_t)cached[count].events)) { 1616 /* 1617 * This should be rare but needed for 1618 * correctness. 1619 * 1620 * The first appearance in cached list 1621 * is being "turned off". The same fd 1622 * appear more than once in the cached 1623 * poll list. Find the next one on the 1624 * list and update the cached 1625 * xf_position field. 1626 */ 1627 for (i = count + 1; i < old_nfds; i++) { 1628 if (cached[i].fd == tmpfd) { 1629 pcache_update_xref(pcp, 1630 tmpfd, (ssize_t)i, 1631 which); 1632 break; 1633 } 1634 } 1635 ASSERT(i <= old_nfds); 1636 } 1637 /* 1638 * In case a new cache list is allocated, 1639 * need to keep both cache lists in sync 1640 * b/c the new one can be freed if we have 1641 * an error later. 1642 */ 1643 cached[count].fd = -1; 1644 if (newlist != NULL) { 1645 newlist[count].fd = -1; 1646 } 1647 } 1648 if ((tmpfd = current[count].fd) >= 0) { 1649 /* 1650 * add to the cached fd tbl and bitmap. 1651 */ 1652 if ((fp = getf(tmpfd)) == NULL) { 1653 current[count].revents = POLLNVAL; 1654 if (newlist != NULL) { 1655 newlist[count].fd = -1; 1656 } 1657 cached[count].fd = -1; 1658 fdcnt++; 1659 } else { 1660 /* 1661 * Here we don't care about the 1662 * fdcnt. We will examine the bitmap 1663 * later and pick up the correct 1664 * fdcnt there. So we never bother 1665 * to check value of 'cnt'. 1666 */ 1667 error = pcache_insert(ps, fp, 1668 ¤t[count], &cnt, 1669 (ssize_t)count, which); 1670 /* 1671 * if no error, we want to do releasef 1672 * after we updated cache poll list 1673 * entry so that close() won't race 1674 * us. 1675 */ 1676 if (error) { 1677 /* 1678 * If we encountered an error, 1679 * we have invalidated an 1680 * entry in cached poll list 1681 * (in pcache_delete_fd() above) 1682 * but failed to add one here. 1683 * This is OK b/c what's in the 1684 * cached list is consistent 1685 * with content of cache. 1686 * It will not have any ill 1687 * effect on next poll(). 1688 */ 1689 releasef(tmpfd); 1690 if (newlist != NULL) { 1691 kmem_free(newlist, 1692 nfds * 1693 sizeof (pollfd_t)); 1694 } 1695 return (error); 1696 } 1697 /* 1698 * If we have allocated a new(temp) 1699 * cache list, we need to keep both 1700 * in sync b/c the new one can be freed 1701 * if we have an error later. 1702 */ 1703 if (newlist != NULL) { 1704 newlist[count].fd = 1705 current[count].fd; 1706 newlist[count].events = 1707 current[count].events; 1708 } 1709 cached[count].fd = current[count].fd; 1710 cached[count].events = 1711 current[count].events; 1712 releasef(tmpfd); 1713 } 1714 } else { 1715 current[count].revents = 0; 1716 } 1717 count++; 1718 remain = common - count; 1719 } 1720 } 1721 if (mismatch != 0) { 1722 if (mismatch == common) { 1723 pollstats.pollcachemiss.value.ui64++; 1724 } else { 1725 pollstats.pollcachephit.value.ui64++; 1726 } 1727 } 1728 /* 1729 * take care of the non overlapping part of a list 1730 */ 1731 if (nfds > old_nfds) { 1732 ASSERT(newlist != NULL); 1733 for (i = old_nfds; i < nfds; i++) { 1734 /* filter out invalid events */ 1735 if (current[i].events & ~VALID_POLL_EVENTS) { 1736 newlist[i].events = current[i].events = 1737 current[i].events & VALID_POLL_EVENTS; 1738 } 1739 if ((fd = current[i].fd) < 0) { 1740 current[i].revents = 0; 1741 continue; 1742 } 1743 /* 1744 * add to the cached fd tbl and bitmap. 1745 */ 1746 if ((fp = getf(fd)) == NULL) { 1747 current[i].revents = POLLNVAL; 1748 newlist[i].fd = -1; 1749 fdcnt++; 1750 continue; 1751 } 1752 /* 1753 * Here we don't care about the 1754 * fdcnt. We will examine the bitmap 1755 * later and pick up the correct 1756 * fdcnt there. So we never bother to 1757 * check 'cnt'. 1758 */ 1759 error = pcache_insert(ps, fp, ¤t[i], &cnt, 1760 (ssize_t)i, which); 1761 releasef(fd); 1762 if (error) { 1763 /* 1764 * Here we are half way through adding newly 1765 * polled fd. Undo enough to keep the cache 1766 * list consistent with the cache content. 1767 */ 1768 pcacheset_remove_list(ps, current, old_nfds, 1769 i, which, 0); 1770 kmem_free(newlist, nfds * sizeof (pollfd_t)); 1771 return (error); 1772 } 1773 } 1774 } 1775 if (old_nfds > nfds) { 1776 /* 1777 * remove the fd's which are no longer polled. 1778 */ 1779 pcacheset_remove_list(ps, pcsp->pcs_pollfd, nfds, old_nfds, 1780 which, 1); 1781 } 1782 /* 1783 * set difference resolved. update nfds and cachedlist 1784 * in pollstate struct. 1785 */ 1786 if (newlist != NULL) { 1787 kmem_free(pcsp->pcs_pollfd, old_nfds * sizeof (pollfd_t)); 1788 /* 1789 * By now, the pollfd.revents field should 1790 * all be zeroed. 1791 */ 1792 pcsp->pcs_pollfd = newlist; 1793 pcsp->pcs_nfds = nfds; 1794 } 1795 ASSERT(*fdcntp == 0); 1796 *fdcntp = fdcnt; 1797 /* 1798 * By now for every fd in pollfdp, one of the following should be 1799 * true. Otherwise we will miss a polled event. 1800 * 1801 * 1. the bit corresponding to the fd in bitmap is set. So VOP_POLL 1802 * will be called on this fd in next poll. 1803 * 2. the fd is cached in the pcache (i.e. pd_php is set). So 1804 * pollnotify will happen. 1805 */ 1806 ASSERT(pollchecksanity(ps, nfds)); 1807 /* 1808 * make sure cross reference between cached poll lists and cached 1809 * poll fds are correct. 1810 */ 1811 ASSERT(pollcheckxref(ps, which)); 1812 /* 1813 * ensure each polldat in pollcache reference a polled fd in 1814 * pollcacheset. 1815 */ 1816 #ifdef DEBUG 1817 checkpolldat(ps); 1818 #endif 1819 return (0); 1820 } 1821 1822 #ifdef DEBUG 1823 static int 1824 pollscanrevents(pollcache_t *pcp, pollfd_t *pollfdp, nfds_t nfds) 1825 { 1826 int i; 1827 int reventcnt = 0; 1828 1829 for (i = 0; i < nfds; i++) { 1830 if (pollfdp[i].fd < 0) { 1831 ASSERT(pollfdp[i].revents == 0); 1832 continue; 1833 } 1834 if (pollfdp[i].revents) { 1835 reventcnt++; 1836 } 1837 if (pollfdp[i].revents && (pollfdp[i].revents != POLLNVAL)) { 1838 ASSERT(BT_TEST(pcp->pc_bitmap, pollfdp[i].fd)); 1839 } 1840 } 1841 return (reventcnt); 1842 } 1843 #endif /* DEBUG */ 1844 1845 /* 1846 * read the bitmap and poll on fds corresponding to the '1' bits. The ps_lock 1847 * is held upon entry. 1848 */ 1849 int 1850 pcache_poll(pollfd_t *pollfdp, pollstate_t *ps, nfds_t nfds, int *fdcntp, 1851 int which) 1852 { 1853 int i; 1854 pollcache_t *pcp; 1855 int fd; 1856 int begin, end, done; 1857 pollhead_t *php; 1858 int fdcnt; 1859 int error = 0; 1860 file_t *fp; 1861 polldat_t *pdp; 1862 xref_t *refp; 1863 int entry; 1864 1865 pcp = ps->ps_pcache; 1866 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1867 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 1868 retry: 1869 done = 0; 1870 begin = 0; 1871 fdcnt = 0; 1872 end = pcp->pc_mapend; 1873 while ((fdcnt < nfds) && !done) { 1874 php = NULL; 1875 /* 1876 * only poll fds which may have events 1877 */ 1878 fd = bt_getlowbit(pcp->pc_bitmap, begin, end); 1879 ASSERT(fd <= end); 1880 if (fd >= 0) { 1881 ASSERT(pollcheckrevents(ps, begin, fd, which)); 1882 /* 1883 * adjust map pointers for next round 1884 */ 1885 if (fd == end) { 1886 done = 1; 1887 } else { 1888 begin = fd + 1; 1889 } 1890 /* 1891 * A bitmap caches poll state information of 1892 * multiple poll lists. Call VOP_POLL only if 1893 * the bit corresponds to an fd in this poll 1894 * list. 1895 */ 1896 pdp = pcache_lookup_fd(pcp, fd); 1897 ASSERT(pdp != NULL); 1898 ASSERT(pdp->pd_ref != NULL); 1899 refp = &pdp->pd_ref[which]; 1900 if (refp->xf_refcnt == 0) 1901 continue; 1902 entry = refp->xf_position; 1903 ASSERT((entry >= 0) && (entry < nfds)); 1904 ASSERT(pollfdp[entry].fd == fd); 1905 /* 1906 * we are in this routine implies that we have 1907 * successfully polled this fd in the past. 1908 * Check to see this fd is closed while we are 1909 * blocked in poll. This ensures that we don't 1910 * miss a close on the fd in the case this fd is 1911 * reused. 1912 */ 1913 if (pdp->pd_fp == NULL) { 1914 ASSERT(pdp->pd_count > 0); 1915 pollfdp[entry].revents = POLLNVAL; 1916 fdcnt++; 1917 if (refp->xf_refcnt > 1) { 1918 /* 1919 * this fd appeared multiple time 1920 * in the poll list. Find all of them. 1921 */ 1922 for (i = entry + 1; i < nfds; i++) { 1923 if (pollfdp[i].fd == fd) { 1924 pollfdp[i].revents = 1925 POLLNVAL; 1926 fdcnt++; 1927 } 1928 } 1929 } 1930 pcacheset_invalidate(ps, pdp); 1931 continue; 1932 } 1933 /* 1934 * We can be here polling a device that is being 1935 * closed (i.e. the file pointer is set to NULL, 1936 * but pollcacheclean has not happened yet). 1937 */ 1938 if ((fp = getf(fd)) == NULL) { 1939 pollfdp[entry].revents = POLLNVAL; 1940 fdcnt++; 1941 if (refp->xf_refcnt > 1) { 1942 /* 1943 * this fd appeared multiple time 1944 * in the poll list. Find all of them. 1945 */ 1946 for (i = entry + 1; i < nfds; i++) { 1947 if (pollfdp[i].fd == fd) { 1948 pollfdp[i].revents = 1949 POLLNVAL; 1950 fdcnt++; 1951 } 1952 } 1953 } 1954 continue; 1955 } 1956 ASSERT(pdp->pd_fp == fp); 1957 ASSERT(infpollinfo(fd)); 1958 /* 1959 * Since we no longer hold poll head lock across 1960 * VOP_POLL, pollunlock logic can be simplifed. 1961 */ 1962 ASSERT(pdp->pd_php == NULL || 1963 MUTEX_NOT_HELD(PHLOCK(pdp->pd_php))); 1964 /* 1965 * underlying file systems may set a "pollpending" 1966 * flag when it sees the poll may block. Pollwakeup() 1967 * is called by wakeup thread if pollpending is set. 1968 * Pass a 0 fdcnt so that the underlying file system 1969 * will set the "pollpending" flag set when there is 1970 * no polled events. 1971 * 1972 * Use pollfdp[].events for actual polling because 1973 * the pd_events is union of all cached poll events 1974 * on this fd. The events parameter also affects 1975 * how the polled device sets the "poll pending" 1976 * flag. 1977 */ 1978 ASSERT(curthread->t_pollcache == NULL); 1979 error = VOP_POLL(fp->f_vnode, pollfdp[entry].events, 0, 1980 &pollfdp[entry].revents, &php); 1981 /* 1982 * releasef after completely done with this cached 1983 * poll entry. To prevent close() coming in to clear 1984 * this entry. 1985 */ 1986 if (error) { 1987 releasef(fd); 1988 break; 1989 } 1990 /* 1991 * layered devices (e.g. console driver) 1992 * may change the vnode and thus the pollhead 1993 * pointer out from underneath us. 1994 */ 1995 if (php != NULL && pdp->pd_php != NULL && 1996 php != pdp->pd_php) { 1997 releasef(fd); 1998 pollhead_delete(pdp->pd_php, pdp); 1999 pdp->pd_php = php; 2000 pollhead_insert(php, pdp); 2001 /* 2002 * We could have missed a wakeup on the new 2003 * target device. Make sure the new target 2004 * gets polled once. 2005 */ 2006 BT_SET(pcp->pc_bitmap, fd); 2007 goto retry; 2008 } 2009 2010 if (pollfdp[entry].revents) { 2011 ASSERT(refp->xf_refcnt >= 1); 2012 fdcnt++; 2013 if (refp->xf_refcnt > 1) { 2014 /* 2015 * this fd appeared multiple time 2016 * in the poll list. This is rare but 2017 * we have to look at all of them for 2018 * correctness. 2019 */ 2020 error = plist_chkdupfd(fp, pdp, ps, 2021 pollfdp, entry, &fdcnt); 2022 if (error > 0) { 2023 releasef(fd); 2024 break; 2025 } 2026 if (error < 0) { 2027 goto retry; 2028 } 2029 } 2030 releasef(fd); 2031 } else { 2032 /* 2033 * VOP_POLL didn't return any revents. We can 2034 * clear the bit in bitmap only if we have the 2035 * pollhead ptr cached and no other cached 2036 * entry is polling different events on this fd. 2037 * VOP_POLL may have dropped the ps_lock. Make 2038 * sure pollwakeup has not happened before clear 2039 * the bit. 2040 */ 2041 if ((pdp->pd_php != NULL) && 2042 (pollfdp[entry].events == pdp->pd_events) && 2043 ((pcp->pc_flag & T_POLLWAKE) == 0)) { 2044 BT_CLEAR(pcp->pc_bitmap, fd); 2045 } 2046 /* 2047 * if the fd can be cached now but not before, 2048 * do it now. 2049 */ 2050 if ((pdp->pd_php == NULL) && (php != NULL)) { 2051 pdp->pd_php = php; 2052 pollhead_insert(php, pdp); 2053 /* 2054 * We are inserting a polldat struct for 2055 * the first time. We may have missed a 2056 * wakeup on this device. Re-poll once. 2057 * This should be a rare event. 2058 */ 2059 releasef(fd); 2060 goto retry; 2061 } 2062 if (refp->xf_refcnt > 1) { 2063 /* 2064 * this fd appeared multiple time 2065 * in the poll list. This is rare but 2066 * we have to look at all of them for 2067 * correctness. 2068 */ 2069 error = plist_chkdupfd(fp, pdp, ps, 2070 pollfdp, entry, &fdcnt); 2071 if (error > 0) { 2072 releasef(fd); 2073 break; 2074 } 2075 if (error < 0) { 2076 goto retry; 2077 } 2078 } 2079 releasef(fd); 2080 } 2081 } else { 2082 done = 1; 2083 ASSERT(pollcheckrevents(ps, begin, end + 1, which)); 2084 } 2085 } 2086 if (!error) { 2087 ASSERT(*fdcntp + fdcnt == pollscanrevents(pcp, pollfdp, nfds)); 2088 *fdcntp += fdcnt; 2089 } 2090 return (error); 2091 } 2092 2093 /* 2094 * Going through the poll list without much locking. Poll all fds and 2095 * cache all valid fds in the pollcache. 2096 */ 2097 int 2098 pcacheset_cache_list(pollstate_t *ps, pollfd_t *fds, int *fdcntp, int which) 2099 { 2100 pollfd_t *pollfdp = ps->ps_pollfd; 2101 pollcacheset_t *pcacheset = ps->ps_pcacheset; 2102 pollfd_t *newfdlist; 2103 int i; 2104 int fd; 2105 file_t *fp; 2106 int error = 0; 2107 2108 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2109 ASSERT(which < ps->ps_nsets); 2110 ASSERT(pcacheset != NULL); 2111 ASSERT(pcacheset[which].pcs_pollfd == NULL); 2112 newfdlist = kmem_alloc(ps->ps_nfds * sizeof (pollfd_t), KM_SLEEP); 2113 /* 2114 * cache the new poll list in pollcachset. 2115 */ 2116 bcopy(pollfdp, newfdlist, sizeof (pollfd_t) * ps->ps_nfds); 2117 2118 pcacheset[which].pcs_pollfd = newfdlist; 2119 pcacheset[which].pcs_nfds = ps->ps_nfds; 2120 pcacheset[which].pcs_usradr = (uintptr_t)fds; 2121 2122 /* 2123 * We have saved a copy of current poll fd list in one pollcacheset. 2124 * The 'revents' field of the new list is not yet set to 0. Loop 2125 * through the new list just to do that is expensive. We do that 2126 * while polling the list. 2127 */ 2128 for (i = 0; i < ps->ps_nfds; i++) { 2129 fd = pollfdp[i].fd; 2130 /* 2131 * We also filter out the illegal poll events in the event 2132 * field for the cached poll list/set. 2133 */ 2134 if (pollfdp[i].events & ~VALID_POLL_EVENTS) { 2135 newfdlist[i].events = pollfdp[i].events = 2136 pollfdp[i].events & VALID_POLL_EVENTS; 2137 } 2138 if (fd < 0) { 2139 pollfdp[i].revents = 0; 2140 continue; 2141 } 2142 if ((fp = getf(fd)) == NULL) { 2143 pollfdp[i].revents = POLLNVAL; 2144 /* 2145 * invalidate this cache entry in the cached poll list 2146 */ 2147 newfdlist[i].fd = -1; 2148 (*fdcntp)++; 2149 continue; 2150 } 2151 /* 2152 * cache this fd. 2153 */ 2154 error = pcache_insert(ps, fp, &pollfdp[i], fdcntp, (ssize_t)i, 2155 which); 2156 releasef(fd); 2157 if (error) { 2158 /* 2159 * Here we are half way through caching a new 2160 * poll list. Undo every thing. 2161 */ 2162 pcacheset_remove_list(ps, pollfdp, 0, i, which, 0); 2163 kmem_free(newfdlist, ps->ps_nfds * sizeof (pollfd_t)); 2164 pcacheset[which].pcs_pollfd = NULL; 2165 pcacheset[which].pcs_usradr = NULL; 2166 break; 2167 } 2168 } 2169 return (error); 2170 } 2171 2172 /* 2173 * called by pollcacheclean() to set the fp NULL. It also sets polled events 2174 * in pcacheset entries to a special events 'POLLCLOSED'. Do a pollwakeup to 2175 * wake any sleeping poller, then remove the polldat from the driver. 2176 * The routine is called with ps_pcachelock held. 2177 */ 2178 void 2179 pcache_clean_entry(pollstate_t *ps, int fd) 2180 { 2181 pollcache_t *pcp; 2182 polldat_t *pdp; 2183 int i; 2184 2185 ASSERT(ps != NULL); 2186 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2187 pcp = ps->ps_pcache; 2188 ASSERT(pcp); 2189 pdp = pcache_lookup_fd(pcp, fd); 2190 ASSERT(pdp != NULL); 2191 /* 2192 * the corresponding fpollinfo in fi_list has been removed by 2193 * a close on this fd. Reset the cached fp ptr here. 2194 */ 2195 pdp->pd_fp = NULL; 2196 /* 2197 * XXX - This routine also touches data in pcacheset struct. 2198 * 2199 * set the event in cached poll lists to POLLCLOSED. This invalidate 2200 * the cached poll fd entry in that poll list, which will force a 2201 * removal of this cached entry in next poll(). The cleanup is done 2202 * at the removal time. 2203 */ 2204 ASSERT(pdp->pd_ref != NULL); 2205 for (i = 0; i < ps->ps_nsets; i++) { 2206 xref_t *refp; 2207 pollcacheset_t *pcsp; 2208 2209 refp = &pdp->pd_ref[i]; 2210 if (refp->xf_refcnt) { 2211 ASSERT(refp->xf_position >= 0); 2212 pcsp = &ps->ps_pcacheset[i]; 2213 if (refp->xf_refcnt == 1) { 2214 pcsp->pcs_pollfd[refp->xf_position].events = 2215 (short)POLLCLOSED; 2216 } 2217 if (refp->xf_refcnt > 1) { 2218 int j; 2219 /* 2220 * mark every matching entry in pcs_pollfd 2221 */ 2222 for (j = refp->xf_position; 2223 j < pcsp->pcs_nfds; j++) { 2224 if (pcsp->pcs_pollfd[j].fd == fd) { 2225 pcsp->pcs_pollfd[j].events = 2226 (short)POLLCLOSED; 2227 } 2228 } 2229 } 2230 } 2231 } 2232 if (pdp->pd_php) { 2233 pollwakeup(pdp->pd_php, POLLHUP); 2234 pollhead_delete(pdp->pd_php, pdp); 2235 pdp->pd_php = NULL; 2236 } 2237 } 2238 2239 /* 2240 * This is the first time this thread has ever polled, 2241 * so we have to create its pollstate structure. 2242 * This will persist for the life of the thread, 2243 * until it calls pollcleanup(). 2244 */ 2245 pollstate_t * 2246 pollstate_create(void) 2247 { 2248 pollstate_t *ps; 2249 2250 ps = kmem_zalloc(sizeof (pollstate_t), KM_SLEEP); 2251 ps->ps_nsets = POLLFDSETS; 2252 ps->ps_pcacheset = pcacheset_create(ps->ps_nsets); 2253 return (ps); 2254 } 2255 2256 void 2257 pollstate_destroy(pollstate_t *ps) 2258 { 2259 if (ps->ps_pollfd != NULL) { 2260 kmem_free(ps->ps_pollfd, ps->ps_nfds * sizeof (pollfd_t)); 2261 ps->ps_pollfd = NULL; 2262 } 2263 if (ps->ps_pcache != NULL) { 2264 pcache_destroy(ps->ps_pcache); 2265 ps->ps_pcache = NULL; 2266 } 2267 pcacheset_destroy(ps->ps_pcacheset, ps->ps_nsets); 2268 ps->ps_pcacheset = NULL; 2269 if (ps->ps_dpbuf != NULL) { 2270 kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize * sizeof (pollfd_t)); 2271 ps->ps_dpbuf = NULL; 2272 } 2273 mutex_destroy(&ps->ps_lock); 2274 kmem_free(ps, sizeof (pollstate_t)); 2275 } 2276 2277 /* 2278 * We are holding the appropriate uf_lock entering this routine. 2279 * Bump up the ps_busy count to prevent the thread from exiting. 2280 */ 2281 void 2282 pollblockexit(fpollinfo_t *fpip) 2283 { 2284 for (; fpip; fpip = fpip->fp_next) { 2285 pollcache_t *pcp = fpip->fp_thread->t_pollstate->ps_pcache; 2286 2287 mutex_enter(&pcp->pc_no_exit); 2288 pcp->pc_busy++; /* prevents exit()'s */ 2289 mutex_exit(&pcp->pc_no_exit); 2290 } 2291 } 2292 2293 /* 2294 * Complete phase 2 of cached poll fd cleanup. Call pcache_clean_entry to mark 2295 * the pcacheset events field POLLCLOSED to force the next poll() to remove 2296 * this cache entry. We can't clean the polldat entry clean up here because 2297 * lwp block in poll() needs the info to return. Wakeup anyone blocked in 2298 * poll and let exiting lwp go. No lock is help upon entry. So it's OK for 2299 * pcache_clean_entry to call pollwakeup(). 2300 */ 2301 void 2302 pollcacheclean(fpollinfo_t *fip, int fd) 2303 { 2304 struct fpollinfo *fpip, *fpip2; 2305 2306 fpip = fip; 2307 while (fpip) { 2308 pollstate_t *ps = fpip->fp_thread->t_pollstate; 2309 pollcache_t *pcp = ps->ps_pcache; 2310 2311 mutex_enter(&ps->ps_lock); 2312 pcache_clean_entry(ps, fd); 2313 mutex_exit(&ps->ps_lock); 2314 mutex_enter(&pcp->pc_no_exit); 2315 pcp->pc_busy--; 2316 if (pcp->pc_busy == 0) { 2317 /* 2318 * Wakeup the thread waiting in 2319 * thread_exit(). 2320 */ 2321 cv_signal(&pcp->pc_busy_cv); 2322 } 2323 mutex_exit(&pcp->pc_no_exit); 2324 2325 fpip2 = fpip; 2326 fpip = fpip->fp_next; 2327 kmem_free(fpip2, sizeof (fpollinfo_t)); 2328 } 2329 } 2330 2331 /* 2332 * one of the cache line's counter is wrapping around. Reset all cache line 2333 * counters to zero except one. This is simplistic, but probably works 2334 * effectively. 2335 */ 2336 void 2337 pcacheset_reset_count(pollstate_t *ps, int index) 2338 { 2339 int i; 2340 2341 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2342 for (i = 0; i < ps->ps_nsets; i++) { 2343 if (ps->ps_pcacheset[i].pcs_pollfd != NULL) { 2344 ps->ps_pcacheset[i].pcs_count = 0; 2345 } 2346 } 2347 ps->ps_pcacheset[index].pcs_count = 1; 2348 } 2349 2350 /* 2351 * this routine implements poll cache list replacement policy. 2352 * It is currently choose the "least used". 2353 */ 2354 int 2355 pcacheset_replace(pollstate_t *ps) 2356 { 2357 int i; 2358 int index = 0; 2359 2360 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2361 for (i = 1; i < ps->ps_nsets; i++) { 2362 if (ps->ps_pcacheset[index].pcs_count > 2363 ps->ps_pcacheset[i].pcs_count) { 2364 index = i; 2365 } 2366 } 2367 ps->ps_pcacheset[index].pcs_count = 0; 2368 return (index); 2369 } 2370 2371 /* 2372 * this routine is called by strclose to remove remaining polldat struct on 2373 * the pollhead list of the device being closed. There are two reasons as why 2374 * the polldat structures still remain on the pollhead list: 2375 * 2376 * (1) The layered device(e.g.the console driver). 2377 * In this case, the existence of a polldat implies that the thread putting 2378 * the polldat on this list has not exited yet. Before the thread exits, it 2379 * will have to hold this pollhead lock to remove the polldat. So holding the 2380 * pollhead lock here effectively prevents the thread which put the polldat 2381 * on this list from exiting. 2382 * 2383 * (2) /dev/poll. 2384 * When a polled fd is cached in /dev/poll, its polldat will remain on the 2385 * pollhead list if the process has not done a POLLREMOVE before closing the 2386 * polled fd. We just unlink it here. 2387 */ 2388 void 2389 pollhead_clean(pollhead_t *php) 2390 { 2391 polldat_t *pdp; 2392 2393 /* 2394 * In case(1), while we must prevent the thread in question from 2395 * exiting, we must also obey the proper locking order, i.e. 2396 * (ps_lock -> phlock). 2397 */ 2398 PH_ENTER(php); 2399 while (php->ph_list != NULL) { 2400 pollstate_t *ps; 2401 pollcache_t *pcp; 2402 2403 pdp = php->ph_list; 2404 ASSERT(pdp->pd_php == php); 2405 if (pdp->pd_thread == NULL) { 2406 /* 2407 * This is case(2). Since the ph_lock is sufficient 2408 * to synchronize this lwp with any other /dev/poll 2409 * lwp, just unlink the polldat. 2410 */ 2411 php->ph_list = pdp->pd_next; 2412 pdp->pd_php = NULL; 2413 pdp->pd_next = NULL; 2414 continue; 2415 } 2416 ps = pdp->pd_thread->t_pollstate; 2417 ASSERT(ps != NULL); 2418 pcp = pdp->pd_pcache; 2419 ASSERT(pcp != NULL); 2420 mutex_enter(&pcp->pc_no_exit); 2421 pcp->pc_busy++; /* prevents exit()'s */ 2422 mutex_exit(&pcp->pc_no_exit); 2423 /* 2424 * Now get the locks in proper order to avoid deadlock. 2425 */ 2426 PH_EXIT(php); 2427 mutex_enter(&ps->ps_lock); 2428 /* 2429 * while we dropped the pollhead lock, the element could be 2430 * taken off the list already. 2431 */ 2432 PH_ENTER(php); 2433 if (pdp->pd_php == php) { 2434 ASSERT(pdp == php->ph_list); 2435 php->ph_list = pdp->pd_next; 2436 pdp->pd_php = NULL; 2437 pdp->pd_next = NULL; 2438 } 2439 PH_EXIT(php); 2440 mutex_exit(&ps->ps_lock); 2441 mutex_enter(&pcp->pc_no_exit); 2442 pcp->pc_busy--; 2443 if (pcp->pc_busy == 0) { 2444 /* 2445 * Wakeup the thread waiting in 2446 * thread_exit(). 2447 */ 2448 cv_signal(&pcp->pc_busy_cv); 2449 } 2450 mutex_exit(&pcp->pc_no_exit); 2451 PH_ENTER(php); 2452 } 2453 PH_EXIT(php); 2454 } 2455 2456 /* 2457 * The remove_list is called to cleanup a partially cached 'current' list or 2458 * to remove a partial list which is no longer cached. The flag value of 1 2459 * indicates the second case. 2460 */ 2461 void 2462 pcacheset_remove_list(pollstate_t *ps, pollfd_t *pollfdp, int start, int end, 2463 int cacheindex, int flag) 2464 { 2465 int i; 2466 2467 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2468 for (i = start; i < end; i++) { 2469 if ((pollfdp[i].fd >= 0) && 2470 (flag || !(pollfdp[i].revents & POLLNVAL))) { 2471 if (pcache_delete_fd(ps, pollfdp[i].fd, i, cacheindex, 2472 (uint_t)pollfdp[i].events)) { 2473 int j; 2474 int fd = pollfdp[i].fd; 2475 2476 for (j = i + 1; j < end; j++) { 2477 if (pollfdp[j].fd == fd) { 2478 pcache_update_xref( 2479 ps->ps_pcache, fd, 2480 (ssize_t)j, cacheindex); 2481 break; 2482 } 2483 } 2484 ASSERT(j <= end); 2485 } 2486 } 2487 } 2488 } 2489 2490 #ifdef DEBUG 2491 2492 #include<sys/strsubr.h> 2493 /* 2494 * make sure curthread is not on anyone's pollhead list any more. 2495 */ 2496 static void 2497 pollcheckphlist() 2498 { 2499 int i; 2500 file_t *fp; 2501 uf_entry_t *ufp; 2502 uf_info_t *fip = P_FINFO(curproc); 2503 struct stdata *stp; 2504 polldat_t *pdp; 2505 2506 mutex_enter(&fip->fi_lock); 2507 for (i = 0; i < fip->fi_nfiles; i++) { 2508 UF_ENTER(ufp, fip, i); 2509 if ((fp = ufp->uf_file) != NULL) { 2510 if ((stp = fp->f_vnode->v_stream) != NULL) { 2511 PH_ENTER(&stp->sd_pollist); 2512 pdp = stp->sd_pollist.ph_list; 2513 while (pdp) { 2514 ASSERT(pdp->pd_thread != curthread); 2515 pdp = pdp->pd_next; 2516 } 2517 PH_EXIT(&stp->sd_pollist); 2518 } 2519 } 2520 UF_EXIT(ufp); 2521 } 2522 mutex_exit(&fip->fi_lock); 2523 } 2524 2525 /* 2526 * for resolved set poll list, the xref info in the pcache should be 2527 * consistent with this poll list. 2528 */ 2529 static int 2530 pollcheckxref(pollstate_t *ps, int cacheindex) 2531 { 2532 pollfd_t *pollfdp = ps->ps_pcacheset[cacheindex].pcs_pollfd; 2533 pollcache_t *pcp = ps->ps_pcache; 2534 polldat_t *pdp; 2535 int i; 2536 xref_t *refp; 2537 2538 for (i = 0; i < ps->ps_pcacheset[cacheindex].pcs_nfds; i++) { 2539 if (pollfdp[i].fd < 0) { 2540 continue; 2541 } 2542 pdp = pcache_lookup_fd(pcp, pollfdp[i].fd); 2543 ASSERT(pdp != NULL); 2544 ASSERT(pdp->pd_ref != NULL); 2545 refp = &pdp->pd_ref[cacheindex]; 2546 if (refp->xf_position >= 0) { 2547 ASSERT(refp->xf_refcnt >= 1); 2548 ASSERT(pollfdp[refp->xf_position].fd == pdp->pd_fd); 2549 if (refp->xf_refcnt > 1) { 2550 int j; 2551 int count = 0; 2552 2553 for (j = refp->xf_position; 2554 j < ps->ps_pcacheset[cacheindex].pcs_nfds; 2555 j++) { 2556 if (pollfdp[j].fd == pdp->pd_fd) { 2557 count++; 2558 } 2559 } 2560 ASSERT(count == refp->xf_refcnt); 2561 } 2562 } 2563 } 2564 return (1); 2565 } 2566 2567 /* 2568 * For every cached pollfd, its polldat struct should be consistent with 2569 * what is in the pcacheset lists. 2570 */ 2571 static void 2572 checkpolldat(pollstate_t *ps) 2573 { 2574 pollcache_t *pcp = ps->ps_pcache; 2575 polldat_t **hashtbl; 2576 int i; 2577 2578 hashtbl = pcp->pc_hash; 2579 for (i = 0; i < pcp->pc_hashsize; i++) { 2580 polldat_t *pdp; 2581 2582 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 2583 ASSERT(pdp->pd_ref != NULL); 2584 if (pdp->pd_count > 0) { 2585 xref_t *refp; 2586 int j; 2587 pollcacheset_t *pcsp; 2588 pollfd_t *pollfd; 2589 2590 for (j = 0; j < ps->ps_nsets; j++) { 2591 refp = &pdp->pd_ref[j]; 2592 if (refp->xf_refcnt > 0) { 2593 pcsp = &ps->ps_pcacheset[j]; 2594 ASSERT(refp->xf_position < pcsp->pcs_nfds); 2595 pollfd = pcsp->pcs_pollfd; 2596 ASSERT(pdp->pd_fd == pollfd[refp->xf_position].fd); 2597 } 2598 } 2599 } 2600 } 2601 } 2602 } 2603 2604 /* 2605 * every wfd element on ph_list must have a corresponding fpollinfo on the 2606 * uf_fpollinfo list. This is a variation of infpollinfo() w/o holding locks. 2607 */ 2608 void 2609 checkwfdlist(vnode_t *vp, fpollinfo_t *fpip) 2610 { 2611 stdata_t *stp; 2612 polldat_t *pdp; 2613 fpollinfo_t *fpip2; 2614 2615 if ((stp = vp->v_stream) == NULL) { 2616 return; 2617 } 2618 PH_ENTER(&stp->sd_pollist); 2619 for (pdp = stp->sd_pollist.ph_list; pdp; pdp = pdp->pd_next) { 2620 if (pdp->pd_thread->t_procp == curthread->t_procp) { 2621 for (fpip2 = fpip; fpip2; fpip2 = fpip2->fp_next) { 2622 if (pdp->pd_thread == fpip2->fp_thread) { 2623 break; 2624 } 2625 } 2626 ASSERT(fpip2 != NULL); 2627 } 2628 } 2629 PH_EXIT(&stp->sd_pollist); 2630 } 2631 2632 /* 2633 * For each cached fd whose bit is not set in bitmap, its revents field in 2634 * current poll list should be 0. 2635 */ 2636 static int 2637 pollcheckrevents(pollstate_t *ps, int begin, int end, int cacheindex) 2638 { 2639 pollcache_t *pcp = ps->ps_pcache; 2640 pollfd_t *pollfdp = ps->ps_pollfd; 2641 int i; 2642 2643 for (i = begin; i < end; i++) { 2644 polldat_t *pdp; 2645 2646 ASSERT(!BT_TEST(pcp->pc_bitmap, i)); 2647 pdp = pcache_lookup_fd(pcp, i); 2648 if (pdp && pdp->pd_fp != NULL) { 2649 xref_t *refp; 2650 int entry; 2651 2652 ASSERT(pdp->pd_ref != NULL); 2653 refp = &pdp->pd_ref[cacheindex]; 2654 if (refp->xf_refcnt == 0) { 2655 continue; 2656 } 2657 entry = refp->xf_position; 2658 ASSERT(entry >= 0); 2659 ASSERT(pollfdp[entry].revents == 0); 2660 if (refp->xf_refcnt > 1) { 2661 int j; 2662 2663 for (j = entry + 1; j < ps->ps_nfds; j++) { 2664 if (pollfdp[j].fd == i) { 2665 ASSERT(pollfdp[j].revents == 0); 2666 } 2667 } 2668 } 2669 } 2670 } 2671 return (1); 2672 } 2673 2674 #endif /* DEBUG */ 2675 2676 pollcache_t * 2677 pcache_alloc() 2678 { 2679 return (kmem_zalloc(sizeof (pollcache_t), KM_SLEEP)); 2680 } 2681 2682 void 2683 pcache_create(pollcache_t *pcp, nfds_t nfds) 2684 { 2685 size_t mapsize; 2686 2687 /* 2688 * allocate enough bits for the poll fd list 2689 */ 2690 if ((mapsize = POLLMAPCHUNK) <= nfds) { 2691 mapsize = (nfds + POLLMAPCHUNK - 1) & ~(POLLMAPCHUNK - 1); 2692 } 2693 pcp->pc_bitmap = kmem_zalloc((mapsize / BT_NBIPUL) * sizeof (ulong_t), 2694 KM_SLEEP); 2695 pcp->pc_mapsize = mapsize; 2696 /* 2697 * The hash size is at least POLLHASHCHUNKSZ. If user polls a large 2698 * number of fd to start with, allocate a bigger hash table (to the 2699 * nearest multiple of POLLHASHCHUNKSZ) because dynamically growing a 2700 * hash table is expensive. 2701 */ 2702 if (nfds < POLLHASHCHUNKSZ) { 2703 pcp->pc_hashsize = POLLHASHCHUNKSZ; 2704 } else { 2705 pcp->pc_hashsize = (nfds + POLLHASHCHUNKSZ - 1) & 2706 ~(POLLHASHCHUNKSZ - 1); 2707 } 2708 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (polldat_t *), 2709 KM_SLEEP); 2710 } 2711 2712 void 2713 pcache_destroy(pollcache_t *pcp) 2714 { 2715 polldat_t **hashtbl; 2716 int i; 2717 2718 hashtbl = pcp->pc_hash; 2719 for (i = 0; i < pcp->pc_hashsize; i++) { 2720 if (hashtbl[i] != NULL) { 2721 polldat_t *pdp, *pdp2; 2722 2723 pdp = hashtbl[i]; 2724 while (pdp != NULL) { 2725 pdp2 = pdp->pd_hashnext; 2726 if (pdp->pd_ref != NULL) { 2727 kmem_free(pdp->pd_ref, sizeof (xref_t) * 2728 pdp->pd_nsets); 2729 } 2730 kmem_free(pdp, sizeof (polldat_t)); 2731 pdp = pdp2; 2732 pcp->pc_fdcount--; 2733 } 2734 } 2735 } 2736 ASSERT(pcp->pc_fdcount == 0); 2737 kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize); 2738 kmem_free(pcp->pc_bitmap, 2739 sizeof (ulong_t) * (pcp->pc_mapsize/BT_NBIPUL)); 2740 mutex_destroy(&pcp->pc_no_exit); 2741 mutex_destroy(&pcp->pc_lock); 2742 cv_destroy(&pcp->pc_cv); 2743 cv_destroy(&pcp->pc_busy_cv); 2744 kmem_free(pcp, sizeof (pollcache_t)); 2745 } 2746 2747 pollcacheset_t * 2748 pcacheset_create(int nsets) 2749 { 2750 return (kmem_zalloc(sizeof (pollcacheset_t) * nsets, KM_SLEEP)); 2751 } 2752 2753 void 2754 pcacheset_destroy(pollcacheset_t *pcsp, int nsets) 2755 { 2756 int i; 2757 2758 for (i = 0; i < nsets; i++) { 2759 if (pcsp[i].pcs_pollfd != NULL) { 2760 kmem_free(pcsp[i].pcs_pollfd, pcsp[i].pcs_nfds * 2761 sizeof (pollfd_t)); 2762 } 2763 } 2764 kmem_free(pcsp, sizeof (pollcacheset_t) * nsets); 2765 } 2766 2767 /* 2768 * Check each duplicated poll fd in the poll list. It may be necessary to 2769 * VOP_POLL the same fd again using different poll events. getf() has been 2770 * done by caller. This routine returns 0 if it can sucessfully process the 2771 * entire poll fd list. It returns -1 if underlying vnode has changed during 2772 * a VOP_POLL, in which case the caller has to repoll. It returns a positive 2773 * value if VOP_POLL failed. 2774 */ 2775 static int 2776 plist_chkdupfd(file_t *fp, polldat_t *pdp, pollstate_t *psp, pollfd_t *pollfdp, 2777 int entry, int *fdcntp) 2778 { 2779 int i; 2780 int fd; 2781 nfds_t nfds = psp->ps_nfds; 2782 2783 fd = pollfdp[entry].fd; 2784 for (i = entry + 1; i < nfds; i++) { 2785 if (pollfdp[i].fd == fd) { 2786 if (pollfdp[i].events == pollfdp[entry].events) { 2787 if ((pollfdp[i].revents = 2788 pollfdp[entry].revents) != 0) { 2789 (*fdcntp)++; 2790 } 2791 } else { 2792 2793 int error; 2794 pollhead_t *php; 2795 pollcache_t *pcp = psp->ps_pcache; 2796 2797 /* 2798 * the events are different. VOP_POLL on this 2799 * fd so that we don't miss any revents. 2800 */ 2801 php = NULL; 2802 ASSERT(curthread->t_pollcache == NULL); 2803 error = VOP_POLL(fp->f_vnode, 2804 pollfdp[i].events, 0, 2805 &pollfdp[i].revents, &php); 2806 if (error) { 2807 return (error); 2808 } 2809 /* 2810 * layered devices(e.g. console driver) 2811 * may change the vnode and thus the pollhead 2812 * pointer out from underneath us. 2813 */ 2814 if (php != NULL && pdp->pd_php != NULL && 2815 php != pdp->pd_php) { 2816 pollhead_delete(pdp->pd_php, pdp); 2817 pdp->pd_php = php; 2818 pollhead_insert(php, pdp); 2819 /* 2820 * We could have missed a wakeup on the 2821 * new target device. Make sure the new 2822 * target gets polled once. 2823 */ 2824 BT_SET(pcp->pc_bitmap, fd); 2825 return (-1); 2826 } 2827 if (pollfdp[i].revents) { 2828 (*fdcntp)++; 2829 } 2830 } 2831 } 2832 } 2833 return (0); 2834 } 2835