1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Portions of this source code were derived from Berkeley 4.3 BSD 32 * under license from the Regents of the University of California. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/isa_defs.h> 37 #include <sys/types.h> 38 #include <sys/sysmacros.h> 39 #include <sys/user.h> 40 #include <sys/systm.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/vnode.h> 44 #include <sys/file.h> 45 #include <sys/mode.h> 46 #include <sys/proc.h> 47 #include <sys/uio.h> 48 #include <sys/poll_impl.h> 49 #include <sys/kmem.h> 50 #include <sys/cmn_err.h> 51 #include <sys/debug.h> 52 #include <sys/bitmap.h> 53 #include <sys/kstat.h> 54 #include <sys/rctl.h> 55 #include <sys/port_impl.h> 56 #include <sys/schedctl.h> 57 #include <sys/cpu.h> 58 59 #define NPHLOCKS 64 /* Number of locks; must be power of 2 */ 60 #define PHLOCKADDR(php) &plocks[(((uintptr_t)(php)) >> 8) & (NPHLOCKS - 1)] 61 #define PHLOCK(php) PHLOCKADDR(php).pp_lock 62 #define PH_ENTER(php) mutex_enter(PHLOCK(php)) 63 #define PH_EXIT(php) mutex_exit(PHLOCK(php)) 64 #define VALID_POLL_EVENTS (POLLIN | POLLPRI | POLLOUT | POLLRDNORM \ 65 | POLLRDBAND | POLLWRBAND | POLLHUP | POLLERR | POLLNVAL) 66 67 /* 68 * global counters to collect some stats 69 */ 70 static struct { 71 kstat_named_t polllistmiss; /* failed to find a cached poll list */ 72 kstat_named_t pollcachehit; /* list matched 100% w/ cached one */ 73 kstat_named_t pollcachephit; /* list matched < 100% w/ cached one */ 74 kstat_named_t pollcachemiss; /* every list entry is dif from cache */ 75 } pollstats = { 76 { "polllistmiss", KSTAT_DATA_UINT64 }, 77 { "pollcachehit", KSTAT_DATA_UINT64 }, 78 { "pollcachephit", KSTAT_DATA_UINT64 }, 79 { "pollcachemiss", KSTAT_DATA_UINT64 } 80 }; 81 82 kstat_named_t *pollstats_ptr = (kstat_named_t *)&pollstats; 83 uint_t pollstats_ndata = sizeof (pollstats) / sizeof (kstat_named_t); 84 85 struct pplock { 86 kmutex_t pp_lock; 87 short pp_flag; 88 kcondvar_t pp_wait_cv; 89 int32_t pp_pad; /* to a nice round 16 bytes */ 90 }; 91 92 static struct pplock plocks[NPHLOCKS]; /* Hash array of pollhead locks */ 93 94 #ifdef DEBUG 95 static int pollchecksanity(pollstate_t *, nfds_t); 96 static int pollcheckxref(pollstate_t *, int); 97 static void pollcheckphlist(void); 98 static int pollcheckrevents(pollstate_t *, int, int, int); 99 static void checkpolldat(pollstate_t *); 100 #endif /* DEBUG */ 101 static int plist_chkdupfd(file_t *, polldat_t *, pollstate_t *, pollfd_t *, int, 102 int *); 103 104 /* 105 * Data structure overview: 106 * The per-thread poll state consists of 107 * one pollstate_t 108 * one pollcache_t 109 * one bitmap with one event bit per fd 110 * a (two-dimensional) hashed array of polldat_t structures - one entry 111 * per fd 112 * 113 * This conglomerate of data structures interact with 114 * the pollhead which is used by VOP_POLL and pollwakeup 115 * (protected by the PHLOCK, cached array of plocks), and 116 * the fpollinfo list hanging off the fi_list which is used to notify 117 * poll when a cached fd is closed. This is protected by uf_lock. 118 * 119 * Invariants: 120 * pd_php (pollhead pointer) is set iff (if and only if) the polldat 121 * is on that pollhead. This is modified atomically under pc_lock. 122 * 123 * pd_fp (file_t pointer) is set iff the thread is on the fpollinfo 124 * list for that open file. 125 * This is modified atomically under pc_lock. 126 * 127 * pd_count is the sum (over all values of i) of pd_ref[i].xf_refcnt. 128 * Iff pd_ref[i].xf_refcnt >= 1 then 129 * ps_pcacheset[i].pcs_pollfd[pd_ref[i].xf_position].fd == pd_fd 130 * Iff pd_ref[i].xf_refcnt > 1 then 131 * In ps_pcacheset[i].pcs_pollfd between index 132 * pd_ref[i].xf_position] and the end of the list 133 * there are xf_refcnt entries with .fd == pd_fd 134 * 135 * Locking design: 136 * Whenever possible the design relies on the fact that the poll cache state 137 * is per thread thus for both poll and exit it is self-synchronizing. 138 * Thus the key interactions where other threads access the state are: 139 * pollwakeup (and polltime), and 140 * close cleaning up the cached references to an open file 141 * 142 * The two key locks in poll proper is ps_lock and pc_lock. 143 * 144 * The ps_lock is used for synchronization between poll, (lwp_)exit and close 145 * to ensure that modifications to pollcacheset structure are serialized. 146 * This lock is held through most of poll() except where poll sleeps 147 * since there is little need to handle closes concurrently with the execution 148 * of poll. 149 * The pc_lock protects most of the fields in pollcache structure and polldat 150 * structures (which are accessed by poll, pollwakeup, and polltime) 151 * with the exception of fields that are only modified when only one thread 152 * can access this per-thread state. 153 * Those exceptions occur in poll when first allocating the per-thread state, 154 * when poll grows the number of polldat (never shrinks), and when 155 * exit/pollcleanup has ensured that there are no references from either 156 * pollheads or fpollinfo to the threads poll state. 157 * 158 * Poll(2) system call is the only path which ps_lock and pc_lock are both 159 * held, in that order. It needs ps_lock to synchronize with close and 160 * lwp_exit; and pc_lock with pollwakeup. 161 * 162 * The locking interaction between pc_lock and PHLOCK take into account 163 * that poll acquires these locks in the order of pc_lock and then PHLOCK 164 * while pollwakeup does it in the reverse order. Thus pollwakeup implements 165 * deadlock avoidance by dropping the locks and reacquiring them in the 166 * reverse order. For this to work pollwakeup needs to prevent the thread 167 * from exiting and freeing all of the poll related state. Thus is done 168 * using 169 * the pc_no_exit lock 170 * the pc_busy counter 171 * the pc_busy_cv condition variable 172 * 173 * The locking interaction between pc_lock and uf_lock has similar 174 * issues. Poll holds ps_lock and/or pc_lock across calls to getf/releasef 175 * which acquire uf_lock. The poll cleanup in close needs to hold uf_lock 176 * to prevent poll or exit from doing a delfpollinfo after which the thread 177 * might exit. But the cleanup needs to acquire pc_lock when modifying 178 * the poll cache state. The solution is to use pc_busy and do the close 179 * cleanup in two phases: 180 * First close calls pollblockexit which increments pc_busy. 181 * This prevents the per-thread poll related state from being freed. 182 * Then close drops uf_lock and calls pollcacheclean. 183 * This routine can then acquire pc_lock and remove any references 184 * to the closing fd (as well as recording that it has been closed 185 * so that a POLLNVAL can be generated even if the fd is reused before 186 * poll has been woken up and checked getf() again). 187 * 188 * When removing a polled fd from poll cache, the fd is always removed 189 * from pollhead list first and then from fpollinfo list, i.e., 190 * pollhead_delete() is called before delfpollinfo(). 191 * 192 * 193 * Locking hierarchy: 194 * pc_no_exit is a leaf level lock. 195 * ps_lock is held when acquiring pc_lock (except when pollwakeup 196 * acquires pc_lock). 197 * pc_lock might be held when acquiring PHLOCK (pollhead_insert/ 198 * pollhead_delete) 199 * pc_lock is always held (but this is not required) 200 * when acquiring PHLOCK (in polladd/pollhead_delete and pollwakeup called 201 * from pcache_clean_entry). 202 * pc_lock is held across addfpollinfo/delfpollinfo which acquire 203 * uf_lock. 204 * pc_lock is held across getf/releasef which acquire uf_lock. 205 * ps_lock might be held across getf/releasef which acquire uf_lock. 206 * pollwakeup tries to acquire pc_lock while holding PHLOCK 207 * but drops the locks and reacquire them in reverse order to avoid 208 * deadlock. 209 * 210 * Note also that there is deadlock avoidance support for VOP_POLL routines 211 * and pollwakeup involving a file system or driver lock. 212 * See below. 213 */ 214 215 /* 216 * Deadlock avoidance support for VOP_POLL() routines. This is 217 * sometimes necessary to prevent deadlock between polling threads 218 * (which hold poll locks on entry to xx_poll(), then acquire foo) 219 * and pollwakeup() threads (which hold foo, then acquire poll locks). 220 * 221 * pollunlock(void) releases whatever poll locks the current thread holds, 222 * returning a cookie for use by pollrelock(); 223 * 224 * pollrelock(cookie) reacquires previously dropped poll locks; 225 * 226 * polllock(php, mutex) does the common case: pollunlock(), 227 * acquire the problematic mutex, pollrelock(). 228 */ 229 int 230 pollunlock(void) 231 { 232 pollcache_t *pcp; 233 int lockstate = 0; 234 235 /* 236 * t_pollcache is set by /dev/poll and event ports (port_fd.c). 237 * If the pollrelock/pollunlock is called as a result of poll(2), 238 * the t_pollcache should be NULL. 239 */ 240 if (curthread->t_pollcache == NULL) 241 pcp = curthread->t_pollstate->ps_pcache; 242 else 243 pcp = curthread->t_pollcache; 244 245 if (mutex_owned(&pcp->pc_lock)) { 246 lockstate = 1; 247 mutex_exit(&pcp->pc_lock); 248 } 249 return (lockstate); 250 } 251 252 void 253 pollrelock(int lockstate) 254 { 255 pollcache_t *pcp; 256 257 /* 258 * t_pollcache is set by /dev/poll and event ports (port_fd.c). 259 * If the pollrelock/pollunlock is called as a result of poll(2), 260 * the t_pollcache should be NULL. 261 */ 262 if (curthread->t_pollcache == NULL) 263 pcp = curthread->t_pollstate->ps_pcache; 264 else 265 pcp = curthread->t_pollcache; 266 267 if (lockstate > 0) 268 mutex_enter(&pcp->pc_lock); 269 } 270 271 /* ARGSUSED */ 272 void 273 polllock(pollhead_t *php, kmutex_t *lp) 274 { 275 if (!mutex_tryenter(lp)) { 276 int lockstate = pollunlock(); 277 mutex_enter(lp); 278 pollrelock(lockstate); 279 } 280 } 281 282 static int 283 poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) 284 { 285 kthread_t *t = curthread; 286 klwp_t *lwp = ttolwp(t); 287 proc_t *p = ttoproc(t); 288 int fdcnt = 0; 289 int rval; 290 int i; 291 timespec_t *rqtp = NULL; 292 int timecheck = 0; 293 int imm_timeout = 0; 294 pollfd_t *pollfdp; 295 pollstate_t *ps; 296 pollcache_t *pcp; 297 int error = 0; 298 nfds_t old_nfds; 299 int cacheindex = 0; /* which cache set is used */ 300 301 /* 302 * Determine the precise future time of the requested timeout, if any. 303 */ 304 if (tsp != NULL) { 305 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 306 imm_timeout = 1; 307 else { 308 timespec_t now; 309 timecheck = timechanged; 310 gethrestime(&now); 311 rqtp = tsp; 312 timespecadd(rqtp, &now); 313 } 314 } 315 316 /* 317 * Reset our signal mask, if requested. 318 */ 319 if (ksetp != NULL) { 320 mutex_enter(&p->p_lock); 321 schedctl_finish_sigblock(t); 322 lwp->lwp_sigoldmask = t->t_hold; 323 t->t_hold = *ksetp; 324 t->t_flag |= T_TOMASK; 325 /* 326 * Call cv_timedwait_sig() just to check for signals. 327 * We will return immediately with either 0 or -1. 328 */ 329 if (!cv_timedwait_sig(&t->t_delay_cv, &p->p_lock, lbolt)) { 330 mutex_exit(&p->p_lock); 331 error = EINTR; 332 goto pollout; 333 } 334 mutex_exit(&p->p_lock); 335 } 336 337 /* 338 * Check to see if this guy just wants to use poll() as a timeout. 339 * If yes then bypass all the other stuff and make him sleep. 340 */ 341 if (nfds == 0) { 342 /* 343 * Sleep until we have passed the requested future 344 * time or until interrupted by a signal. 345 * Do not check for signals if we have a zero timeout. 346 */ 347 if (!imm_timeout) { 348 mutex_enter(&t->t_delay_lock); 349 while ((rval = cv_waituntil_sig(&t->t_delay_cv, 350 &t->t_delay_lock, rqtp, timecheck)) > 0) 351 continue; 352 mutex_exit(&t->t_delay_lock); 353 if (rval == 0) 354 error = EINTR; 355 } 356 goto pollout; 357 } 358 359 if (nfds > p->p_fno_ctl) { 360 mutex_enter(&p->p_lock); 361 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 362 p->p_rctls, p, RCA_SAFE); 363 mutex_exit(&p->p_lock); 364 error = EINVAL; 365 goto pollout; 366 } 367 368 /* 369 * Need to allocate memory for pollstate before anything because 370 * the mutex and cv are created in this space 371 */ 372 if ((ps = t->t_pollstate) == NULL) { 373 t->t_pollstate = pollstate_create(); 374 ps = t->t_pollstate; 375 } 376 377 if (ps->ps_pcache == NULL) 378 ps->ps_pcache = pcache_alloc(); 379 pcp = ps->ps_pcache; 380 381 /* 382 * NOTE: for performance, buffers are saved across poll() calls. 383 * The theory is that if a process polls heavily, it tends to poll 384 * on the same set of descriptors. Therefore, we only reallocate 385 * buffers when nfds changes. There is no hysteresis control, 386 * because there is no data to suggest that this is necessary; 387 * the penalty of reallocating is not *that* great in any event. 388 */ 389 old_nfds = ps->ps_nfds; 390 if (nfds != old_nfds) { 391 392 if (old_nfds != 0) 393 kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t)); 394 395 if ((pollfdp = 396 kmem_alloc(nfds * sizeof (pollfd_t), KM_NOSLEEP)) == NULL) { 397 ps->ps_nfds = 0; 398 error = EAGAIN; 399 goto pollout; 400 } 401 402 ps->ps_pollfd = pollfdp; 403 ps->ps_nfds = nfds; 404 } 405 406 pollfdp = ps->ps_pollfd; 407 if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) { 408 error = EFAULT; 409 goto pollout; 410 } 411 412 if (fds == NULL) { 413 /* 414 * If the process has page 0 mapped, then the copyin() above 415 * will succeed even if fds is NULL. However, our cached 416 * poll lists are keyed by the address of the passed-in fds 417 * structure, and we use the value NULL to indicate an unused 418 * poll cache list entry. As such, we elect not to support 419 * NULL as a valid (user) memory address and fail the poll() 420 * call. 421 */ 422 error = EINVAL; 423 goto pollout; 424 } 425 426 /* 427 * If this thread polls for the first time, allocate ALL poll 428 * cache data structures and cache the poll fd list. This 429 * allocation is delayed till now because lwp's polling 0 fd 430 * (i.e. using poll as timeout()) don't need this memory. 431 */ 432 mutex_enter(&ps->ps_lock); 433 pcp = ps->ps_pcache; 434 ASSERT(pcp != NULL); 435 if (pcp->pc_bitmap == NULL) { 436 pcache_create(pcp, nfds); 437 /* 438 * poll and cache this poll fd list in ps_pcacheset[0]. 439 */ 440 error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex); 441 if (fdcnt || error) { 442 mutex_exit(&ps->ps_lock); 443 goto pollout; 444 } 445 } else { 446 pollcacheset_t *pcset = ps->ps_pcacheset; 447 448 /* 449 * Not first time polling. Select a cached poll list by 450 * matching user pollfd list buffer address. 451 */ 452 for (cacheindex = 0; cacheindex < ps->ps_nsets; cacheindex++) { 453 if (pcset[cacheindex].pcs_usradr == (uintptr_t)fds) { 454 if ((++pcset[cacheindex].pcs_count) == 0) { 455 /* 456 * counter is wrapping around. 457 */ 458 pcacheset_reset_count(ps, cacheindex); 459 } 460 /* 461 * examine and resolve possible 462 * difference of the current poll 463 * list and previously cached one. 464 * If there is an error during resolve(), 465 * the callee will guarantee the consistency 466 * of cached poll list and cache content. 467 */ 468 error = pcacheset_resolve(ps, nfds, &fdcnt, 469 cacheindex); 470 if (error) { 471 mutex_exit(&ps->ps_lock); 472 goto pollout; 473 } 474 break; 475 } 476 477 /* 478 * Note that pcs_usradr field of an used entry won't be 479 * NULL because it stores the address of passed-in fds, 480 * and NULL fds will not be cached (Then it is either 481 * the special timeout case when nfds is 0 or it returns 482 * failure directly). 483 */ 484 if (pcset[cacheindex].pcs_usradr == NULL) { 485 /* 486 * found an unused entry. Use it to cache 487 * this poll list. 488 */ 489 error = pcacheset_cache_list(ps, fds, &fdcnt, 490 cacheindex); 491 if (fdcnt || error) { 492 mutex_exit(&ps->ps_lock); 493 goto pollout; 494 } 495 break; 496 } 497 } 498 if (cacheindex == ps->ps_nsets) { 499 /* 500 * We failed to find a matching cached poll fd list. 501 * replace an old list. 502 */ 503 pollstats.polllistmiss.value.ui64++; 504 cacheindex = pcacheset_replace(ps); 505 ASSERT(cacheindex < ps->ps_nsets); 506 pcset[cacheindex].pcs_usradr = (uintptr_t)fds; 507 error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex); 508 if (error) { 509 mutex_exit(&ps->ps_lock); 510 goto pollout; 511 } 512 } 513 } 514 515 /* 516 * Always scan the bitmap with the lock on the pollcache held. 517 * This is to make sure that a wakeup does not come undetected. 518 * If the lock is not held, a pollwakeup could have come for an 519 * fd we already checked but before this thread sleeps, in which 520 * case the wakeup is missed. Now we hold the pcache lock and 521 * check the bitmap again. This will prevent wakeup from happening 522 * while we hold pcache lock since pollwakeup() will also lock 523 * the pcache before updating poll bitmap. 524 */ 525 mutex_enter(&pcp->pc_lock); 526 for (;;) { 527 pcp->pc_flag = 0; 528 error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex); 529 if (fdcnt || error) { 530 mutex_exit(&pcp->pc_lock); 531 mutex_exit(&ps->ps_lock); 532 break; 533 } 534 535 /* 536 * If T_POLLWAKE is set, a pollwakeup() was performed on 537 * one of the file descriptors. This can happen only if 538 * one of the VOP_POLL() functions dropped pcp->pc_lock. 539 * The only current cases of this is in procfs (prpoll()) 540 * and STREAMS (strpoll()). 541 */ 542 if (pcp->pc_flag & T_POLLWAKE) 543 continue; 544 545 /* 546 * If you get here, the poll of fds was unsuccessful. 547 * Wait until some fd becomes readable, writable, or gets 548 * an exception, or until a signal or a timeout occurs. 549 * Do not check for signals if we have a zero timeout. 550 */ 551 mutex_exit(&ps->ps_lock); 552 if (imm_timeout) 553 rval = -1; 554 else 555 rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock, 556 rqtp, timecheck); 557 mutex_exit(&pcp->pc_lock); 558 /* 559 * If we have received a signal or timed out 560 * then break out and return. 561 */ 562 if (rval <= 0) { 563 if (rval == 0) 564 error = EINTR; 565 break; 566 } 567 /* 568 * We have not received a signal or timed out. 569 * Continue around and poll fds again. 570 */ 571 mutex_enter(&ps->ps_lock); 572 mutex_enter(&pcp->pc_lock); 573 } 574 575 pollout: 576 /* 577 * If we changed the signal mask but we received 578 * no signal then restore the signal mask. 579 * Otherwise psig() will deal with the signal mask. 580 */ 581 if (ksetp != NULL) { 582 mutex_enter(&p->p_lock); 583 if (lwp->lwp_cursig == 0) { 584 t->t_hold = lwp->lwp_sigoldmask; 585 t->t_flag &= ~T_TOMASK; 586 } 587 mutex_exit(&p->p_lock); 588 } 589 590 if (error) 591 return (set_errno(error)); 592 593 /* 594 * Copy out the events and return the fdcnt to the user. 595 */ 596 if (nfds != 0 && 597 copyout(pollfdp, fds, nfds * sizeof (pollfd_t))) 598 return (set_errno(EFAULT)); 599 600 #ifdef DEBUG 601 /* 602 * Another sanity check: 603 */ 604 if (fdcnt) { 605 int reventcnt = 0; 606 607 for (i = 0; i < nfds; i++) { 608 if (pollfdp[i].fd < 0) { 609 ASSERT(pollfdp[i].revents == 0); 610 continue; 611 } 612 if (pollfdp[i].revents) { 613 reventcnt++; 614 } 615 } 616 ASSERT(fdcnt == reventcnt); 617 } else { 618 for (i = 0; i < nfds; i++) { 619 ASSERT(pollfdp[i].revents == 0); 620 } 621 } 622 #endif /* DEBUG */ 623 624 return (fdcnt); 625 } 626 627 /* 628 * This system call trap exists solely for binary compatibility with 629 * old statically-linked applications. It is not called from libc. 630 * It should be removed in the next release. 631 */ 632 int 633 poll(pollfd_t *fds, nfds_t nfds, int time_out) 634 { 635 timespec_t ts; 636 timespec_t *tsp; 637 638 if (time_out < 0) 639 tsp = NULL; 640 else { 641 ts.tv_sec = time_out / MILLISEC; 642 ts.tv_nsec = (time_out % MILLISEC) * MICROSEC; 643 tsp = &ts; 644 } 645 646 return (poll_common(fds, nfds, tsp, NULL)); 647 } 648 649 /* 650 * This is the system call trap that poll(), 651 * select() and pselect() are built upon. 652 * It is a private interface between libc and the kernel. 653 */ 654 int 655 pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp) 656 { 657 timespec_t ts; 658 timespec_t *tsp; 659 sigset_t set; 660 k_sigset_t kset; 661 k_sigset_t *ksetp; 662 model_t datamodel = get_udatamodel(); 663 664 if (timeoutp == NULL) 665 tsp = NULL; 666 else { 667 if (datamodel == DATAMODEL_NATIVE) { 668 if (copyin(timeoutp, &ts, sizeof (ts))) 669 return (set_errno(EFAULT)); 670 } else { 671 timespec32_t ts32; 672 673 if (copyin(timeoutp, &ts32, sizeof (ts32))) 674 return (set_errno(EFAULT)); 675 TIMESPEC32_TO_TIMESPEC(&ts, &ts32) 676 } 677 678 if (itimerspecfix(&ts)) 679 return (set_errno(EINVAL)); 680 tsp = &ts; 681 } 682 683 if (setp == NULL) 684 ksetp = NULL; 685 else { 686 if (copyin(setp, &set, sizeof (set))) 687 return (set_errno(EFAULT)); 688 sigutok(&set, &kset); 689 ksetp = &kset; 690 } 691 692 return (poll_common(fds, nfds, tsp, ksetp)); 693 } 694 695 /* 696 * Clean up any state left around by poll(2). Called when a thread exits. 697 */ 698 void 699 pollcleanup() 700 { 701 pollstate_t *ps = curthread->t_pollstate; 702 pollcache_t *pcp; 703 704 if (ps == NULL) 705 return; 706 pcp = ps->ps_pcache; 707 /* 708 * free up all cached poll fds 709 */ 710 if (pcp == NULL) { 711 /* this pollstate is used by /dev/poll */ 712 goto pollcleanout; 713 } 714 715 if (pcp->pc_bitmap != NULL) { 716 ASSERT(MUTEX_NOT_HELD(&ps->ps_lock)); 717 /* 718 * a close lwp can race with us when cleaning up a polldat 719 * entry. We hold the ps_lock when cleaning hash table. 720 * Since this pollcache is going away anyway, there is no 721 * need to hold the pc_lock. 722 */ 723 mutex_enter(&ps->ps_lock); 724 pcache_clean(pcp); 725 mutex_exit(&ps->ps_lock); 726 #ifdef DEBUG 727 /* 728 * At this point, all fds cached by this lwp should be 729 * cleaned up. There should be no fd in fi_list still 730 * reference this thread. 731 */ 732 checkfpollinfo(); /* sanity check */ 733 pollcheckphlist(); /* sanity check */ 734 #endif /* DEBUG */ 735 } 736 /* 737 * Be sure no one is referencing thread before exiting 738 */ 739 mutex_enter(&pcp->pc_no_exit); 740 ASSERT(pcp->pc_busy >= 0); 741 while (pcp->pc_busy > 0) 742 cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 743 mutex_exit(&pcp->pc_no_exit); 744 pollcleanout: 745 pollstate_destroy(ps); 746 curthread->t_pollstate = NULL; 747 } 748 749 /* 750 * pollwakeup() - poke threads waiting in poll() for some event 751 * on a particular object. 752 * 753 * The threads hanging off of the specified pollhead structure are scanned. 754 * If their event mask matches the specified event(s), then pollnotify() is 755 * called to poke the thread. 756 * 757 * Multiple events may be specified. When POLLHUP or POLLERR are specified, 758 * all waiting threads are poked. 759 * 760 * It is important that pollnotify() not drop the lock protecting the list 761 * of threads. 762 */ 763 void 764 pollwakeup(pollhead_t *php, short events_arg) 765 { 766 polldat_t *pdp; 767 int events = (ushort_t)events_arg; 768 struct plist { 769 port_t *pp; 770 int pevents; 771 struct plist *next; 772 }; 773 struct plist *plhead = NULL, *pltail = NULL; 774 775 retry: 776 PH_ENTER(php); 777 778 for (pdp = php->ph_list; pdp; pdp = pdp->pd_next) { 779 if ((pdp->pd_events & events) || 780 (events & (POLLHUP | POLLERR))) { 781 782 pollcache_t *pcp; 783 784 if (pdp->pd_portev != NULL) { 785 port_kevent_t *pkevp = pdp->pd_portev; 786 /* 787 * Object (fd) is associated with an event port, 788 * => send event notification to the port. 789 */ 790 ASSERT(pkevp->portkev_source == PORT_SOURCE_FD); 791 mutex_enter(&pkevp->portkev_lock); 792 if (pkevp->portkev_flags & PORT_KEV_VALID) { 793 int pevents; 794 795 pkevp->portkev_flags &= ~PORT_KEV_VALID; 796 pkevp->portkev_events |= events & 797 (pdp->pd_events | POLLHUP | 798 POLLERR); 799 /* 800 * portkev_lock mutex will be released 801 * by port_send_event(). 802 */ 803 port_send_event(pkevp); 804 805 /* 806 * If we have some thread polling the 807 * port's fd, add it to the list. They 808 * will be notified later. 809 * The port_pollwkup() will flag the 810 * port_t so that it will not disappear 811 * till port_pollwkdone() is called. 812 */ 813 pevents = 814 port_pollwkup(pkevp->portkev_port); 815 if (pevents) { 816 struct plist *t; 817 t = kmem_zalloc( 818 sizeof (struct plist), 819 KM_SLEEP); 820 t->pp = pkevp->portkev_port; 821 t->pevents = pevents; 822 if (plhead == NULL) { 823 plhead = t; 824 } else { 825 pltail->next = t; 826 } 827 pltail = t; 828 } 829 } else { 830 mutex_exit(&pkevp->portkev_lock); 831 } 832 continue; 833 } 834 835 pcp = pdp->pd_pcache; 836 837 /* 838 * Try to grab the lock for this thread. If 839 * we don't get it then we may deadlock so 840 * back out and restart all over again. Note 841 * that the failure rate is very very low. 842 */ 843 if (mutex_tryenter(&pcp->pc_lock)) { 844 pollnotify(pcp, pdp->pd_fd); 845 mutex_exit(&pcp->pc_lock); 846 } else { 847 /* 848 * We are here because: 849 * 1) This thread has been woke up 850 * and is trying to get out of poll(). 851 * 2) Some other thread is also here 852 * but with a different pollhead lock. 853 * 854 * So, we need to drop the lock on pollhead 855 * because of (1) but we want to prevent 856 * that thread from doing lwp_exit() or 857 * devpoll close. We want to ensure that 858 * the pollcache pointer is still invalid. 859 * 860 * Solution: Grab the pcp->pc_no_exit lock, 861 * increment the pc_busy counter, drop every 862 * lock in sight. Get out of the way and wait 863 * for type (2) threads to finish. 864 */ 865 866 mutex_enter(&pcp->pc_no_exit); 867 pcp->pc_busy++; /* prevents exit()'s */ 868 mutex_exit(&pcp->pc_no_exit); 869 870 PH_EXIT(php); 871 mutex_enter(&pcp->pc_lock); 872 mutex_exit(&pcp->pc_lock); 873 mutex_enter(&pcp->pc_no_exit); 874 pcp->pc_busy--; 875 if (pcp->pc_busy == 0) { 876 /* 877 * Wakeup the thread waiting in 878 * thread_exit(). 879 */ 880 cv_signal(&pcp->pc_busy_cv); 881 } 882 mutex_exit(&pcp->pc_no_exit); 883 goto retry; 884 } 885 } 886 } 887 888 889 /* 890 * Event ports - If this php is of the port on the list, 891 * call port_pollwkdone() to release it. The port_pollwkdone() 892 * needs to be called before dropping the PH lock so that any new 893 * thread attempting to poll this port are blocked. There can be 894 * only one thread here in pollwakeup notifying this port's fd. 895 */ 896 if (plhead != NULL && &plhead->pp->port_pollhd == php) { 897 struct plist *t; 898 port_pollwkdone(plhead->pp); 899 t = plhead; 900 plhead = plhead->next; 901 kmem_free(t, sizeof (struct plist)); 902 } 903 PH_EXIT(php); 904 905 /* 906 * Event ports - Notify threads polling the event port's fd. 907 * This is normally done in port_send_event() where it calls 908 * pollwakeup() on the port. But, for PORT_SOURCE_FD source alone, 909 * we do it here in pollwakeup() to avoid a recursive call. 910 */ 911 if (plhead != NULL) { 912 php = &plhead->pp->port_pollhd; 913 events = plhead->pevents; 914 goto retry; 915 } 916 } 917 918 /* 919 * This function is called to inform a thread that 920 * an event being polled for has occurred. 921 * The pollstate lock on the thread should be held on entry. 922 */ 923 void 924 pollnotify(pollcache_t *pcp, int fd) 925 { 926 ASSERT(fd < pcp->pc_mapsize); 927 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 928 BT_SET(pcp->pc_bitmap, fd); 929 pcp->pc_flag |= T_POLLWAKE; 930 cv_signal(&pcp->pc_cv); 931 } 932 933 /* 934 * add a polldat entry to pollhead ph_list. The polldat struct is used 935 * by pollwakeup to wake sleeping pollers when polled events has happened. 936 */ 937 void 938 pollhead_insert(pollhead_t *php, polldat_t *pdp) 939 { 940 PH_ENTER(php); 941 ASSERT(pdp->pd_next == NULL); 942 #ifdef DEBUG 943 { 944 /* 945 * the polldat should not be already on the list 946 */ 947 polldat_t *wp; 948 for (wp = php->ph_list; wp; wp = wp->pd_next) { 949 ASSERT(wp != pdp); 950 } 951 } 952 #endif /* DEBUG */ 953 pdp->pd_next = php->ph_list; 954 php->ph_list = pdp; 955 PH_EXIT(php); 956 } 957 958 /* 959 * Delete the polldat entry from ph_list. 960 */ 961 void 962 pollhead_delete(pollhead_t *php, polldat_t *pdp) 963 { 964 polldat_t *wp; 965 polldat_t **wpp; 966 967 PH_ENTER(php); 968 for (wpp = &php->ph_list; (wp = *wpp) != NULL; wpp = &wp->pd_next) { 969 if (wp == pdp) { 970 *wpp = pdp->pd_next; 971 pdp->pd_next = NULL; 972 break; 973 } 974 } 975 #ifdef DEBUG 976 /* assert that pdp is no longer in the list */ 977 for (wp = *wpp; wp; wp = wp->pd_next) { 978 ASSERT(wp != pdp); 979 } 980 #endif /* DEBUG */ 981 PH_EXIT(php); 982 } 983 984 /* 985 * walk through the poll fd lists to see if they are identical. This is an 986 * expensive operation and should not be done more than once for each poll() 987 * call. 988 * 989 * As an optimization (i.e., not having to go through the lists more than 990 * once), this routine also clear the revents field of pollfd in 'current'. 991 * Zeroing out the revents field of each entry in current poll list is 992 * required by poll man page. 993 * 994 * Since the events field of cached list has illegal poll events filtered 995 * out, the current list applies the same filtering before comparison. 996 * 997 * The routine stops when it detects a meaningful difference, or when it 998 * exhausts the lists. 999 */ 1000 int 1001 pcacheset_cmp(pollfd_t *current, pollfd_t *cached, pollfd_t *newlist, int n) 1002 { 1003 int ix; 1004 1005 for (ix = 0; ix < n; ix++) { 1006 /* Prefetch 64 bytes worth of 8-byte elements */ 1007 if ((ix & 0x7) == 0) { 1008 prefetch_write_many((caddr_t)¤t[ix + 8]); 1009 prefetch_write_many((caddr_t)&cached[ix + 8]); 1010 } 1011 if (current[ix].fd == cached[ix].fd) { 1012 /* 1013 * Filter out invalid poll events while we are in 1014 * inside the loop. 1015 */ 1016 if (current[ix].events & ~VALID_POLL_EVENTS) { 1017 current[ix].events &= VALID_POLL_EVENTS; 1018 if (newlist != NULL) 1019 newlist[ix].events = current[ix].events; 1020 } 1021 if (current[ix].events == cached[ix].events) { 1022 current[ix].revents = 0; 1023 continue; 1024 } 1025 } 1026 if ((current[ix].fd < 0) && (cached[ix].fd < 0)) { 1027 current[ix].revents = 0; 1028 continue; 1029 } 1030 return (ix); 1031 } 1032 return (ix); 1033 } 1034 1035 /* 1036 * This routine returns a pointer to a cached poll fd entry, or NULL if it 1037 * does not find it in the hash table. 1038 */ 1039 polldat_t * 1040 pcache_lookup_fd(pollcache_t *pcp, int fd) 1041 { 1042 int hashindex; 1043 polldat_t *pdp; 1044 1045 hashindex = POLLHASH(pcp->pc_hashsize, fd); 1046 pdp = pcp->pc_hash[hashindex]; 1047 while (pdp != NULL) { 1048 if (pdp->pd_fd == fd) 1049 break; 1050 pdp = pdp->pd_hashnext; 1051 } 1052 return (pdp); 1053 } 1054 1055 polldat_t * 1056 pcache_alloc_fd(int nsets) 1057 { 1058 polldat_t *pdp; 1059 1060 pdp = kmem_zalloc(sizeof (polldat_t), KM_SLEEP); 1061 if (nsets > 0) { 1062 pdp->pd_ref = kmem_zalloc(sizeof (xref_t) * nsets, KM_SLEEP); 1063 pdp->pd_nsets = nsets; 1064 } 1065 return (pdp); 1066 } 1067 1068 /* 1069 * This routine inserts a polldat into the pollcache's hash table. It 1070 * may be necessary to grow the size of the hash table. 1071 */ 1072 void 1073 pcache_insert_fd(pollcache_t *pcp, polldat_t *pdp, nfds_t nfds) 1074 { 1075 int hashindex; 1076 int fd; 1077 1078 if ((pcp->pc_fdcount > pcp->pc_hashsize * POLLHASHTHRESHOLD) || 1079 (nfds > pcp->pc_hashsize * POLLHASHTHRESHOLD)) { 1080 pcache_grow_hashtbl(pcp, nfds); 1081 } 1082 fd = pdp->pd_fd; 1083 hashindex = POLLHASH(pcp->pc_hashsize, fd); 1084 pdp->pd_hashnext = pcp->pc_hash[hashindex]; 1085 pcp->pc_hash[hashindex] = pdp; 1086 pcp->pc_fdcount++; 1087 1088 #ifdef DEBUG 1089 { 1090 /* 1091 * same fd should not appear on a hash list twice 1092 */ 1093 polldat_t *pdp1; 1094 for (pdp1 = pdp->pd_hashnext; pdp1; pdp1 = pdp1->pd_hashnext) { 1095 ASSERT(pdp->pd_fd != pdp1->pd_fd); 1096 } 1097 } 1098 #endif /* DEBUG */ 1099 } 1100 1101 /* 1102 * Grow the hash table -- either double the table size or round it to the 1103 * nearest multiples of POLLHASHCHUNKSZ, whichever is bigger. Rehash all the 1104 * elements on the hash table. 1105 */ 1106 void 1107 pcache_grow_hashtbl(pollcache_t *pcp, nfds_t nfds) 1108 { 1109 int oldsize; 1110 polldat_t **oldtbl; 1111 polldat_t *pdp, *pdp1; 1112 int i; 1113 #ifdef DEBUG 1114 int count = 0; 1115 #endif 1116 1117 ASSERT(pcp->pc_hashsize % POLLHASHCHUNKSZ == 0); 1118 oldsize = pcp->pc_hashsize; 1119 oldtbl = pcp->pc_hash; 1120 if (nfds > pcp->pc_hashsize * POLLHASHINC) { 1121 pcp->pc_hashsize = (nfds + POLLHASHCHUNKSZ - 1) & 1122 ~(POLLHASHCHUNKSZ - 1); 1123 } else { 1124 pcp->pc_hashsize = pcp->pc_hashsize * POLLHASHINC; 1125 } 1126 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (polldat_t *), 1127 KM_SLEEP); 1128 /* 1129 * rehash existing elements 1130 */ 1131 pcp->pc_fdcount = 0; 1132 for (i = 0; i < oldsize; i++) { 1133 pdp = oldtbl[i]; 1134 while (pdp != NULL) { 1135 pdp1 = pdp->pd_hashnext; 1136 pcache_insert_fd(pcp, pdp, nfds); 1137 pdp = pdp1; 1138 #ifdef DEBUG 1139 count++; 1140 #endif 1141 } 1142 } 1143 kmem_free(oldtbl, oldsize * sizeof (polldat_t *)); 1144 ASSERT(pcp->pc_fdcount == count); 1145 } 1146 1147 void 1148 pcache_grow_map(pollcache_t *pcp, int fd) 1149 { 1150 int newsize; 1151 ulong_t *newmap; 1152 1153 /* 1154 * grow to nearest multiple of POLLMAPCHUNK, assuming POLLMAPCHUNK is 1155 * power of 2. 1156 */ 1157 newsize = (fd + POLLMAPCHUNK) & ~(POLLMAPCHUNK - 1); 1158 newmap = kmem_zalloc((newsize / BT_NBIPUL) * sizeof (ulong_t), 1159 KM_SLEEP); 1160 /* 1161 * don't want pollwakeup to set a bit while growing the bitmap. 1162 */ 1163 ASSERT(mutex_owned(&pcp->pc_lock) == 0); 1164 mutex_enter(&pcp->pc_lock); 1165 bcopy(pcp->pc_bitmap, newmap, 1166 (pcp->pc_mapsize / BT_NBIPUL) * sizeof (ulong_t)); 1167 kmem_free(pcp->pc_bitmap, 1168 (pcp->pc_mapsize /BT_NBIPUL) * sizeof (ulong_t)); 1169 pcp->pc_bitmap = newmap; 1170 pcp->pc_mapsize = newsize; 1171 mutex_exit(&pcp->pc_lock); 1172 } 1173 1174 /* 1175 * remove all the reference from pollhead list and fpollinfo lists. 1176 */ 1177 void 1178 pcache_clean(pollcache_t *pcp) 1179 { 1180 int i; 1181 polldat_t **hashtbl; 1182 polldat_t *pdp; 1183 1184 ASSERT(MUTEX_HELD(&curthread->t_pollstate->ps_lock)); 1185 hashtbl = pcp->pc_hash; 1186 for (i = 0; i < pcp->pc_hashsize; i++) { 1187 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 1188 if (pdp->pd_php != NULL) { 1189 pollhead_delete(pdp->pd_php, pdp); 1190 pdp->pd_php = NULL; 1191 } 1192 if (pdp->pd_fp != NULL) { 1193 delfpollinfo(pdp->pd_fd); 1194 pdp->pd_fp = NULL; 1195 } 1196 } 1197 } 1198 } 1199 1200 void 1201 pcacheset_invalidate(pollstate_t *ps, polldat_t *pdp) 1202 { 1203 int i; 1204 int fd = pdp->pd_fd; 1205 1206 /* 1207 * we come here because an earlier close() on this cached poll fd. 1208 */ 1209 ASSERT(pdp->pd_fp == NULL); 1210 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1211 pdp->pd_events = 0; 1212 for (i = 0; i < ps->ps_nsets; i++) { 1213 xref_t *refp; 1214 pollcacheset_t *pcsp; 1215 1216 ASSERT(pdp->pd_ref != NULL); 1217 refp = &pdp->pd_ref[i]; 1218 if (refp->xf_refcnt) { 1219 ASSERT(refp->xf_position >= 0); 1220 pcsp = &ps->ps_pcacheset[i]; 1221 if (refp->xf_refcnt == 1) { 1222 pcsp->pcs_pollfd[refp->xf_position].fd = -1; 1223 refp->xf_refcnt = 0; 1224 pdp->pd_count--; 1225 } else if (refp->xf_refcnt > 1) { 1226 int j; 1227 1228 /* 1229 * turn off every appearance in pcs_pollfd list 1230 */ 1231 for (j = refp->xf_position; 1232 j < pcsp->pcs_nfds; j++) { 1233 if (pcsp->pcs_pollfd[j].fd == fd) { 1234 pcsp->pcs_pollfd[j].fd = -1; 1235 refp->xf_refcnt--; 1236 pdp->pd_count--; 1237 } 1238 } 1239 } 1240 ASSERT(refp->xf_refcnt == 0); 1241 refp->xf_position = POLLPOSINVAL; 1242 } 1243 } 1244 ASSERT(pdp->pd_count == 0); 1245 } 1246 1247 /* 1248 * Insert poll fd into the pollcache, and add poll registration. 1249 * This routine is called after getf() and before releasef(). So the vnode 1250 * can not disappear even if we block here. 1251 * If there is an error, the polled fd is not cached. 1252 */ 1253 int 1254 pcache_insert(pollstate_t *ps, file_t *fp, pollfd_t *pollfdp, int *fdcntp, 1255 ssize_t pos, int which) 1256 { 1257 pollcache_t *pcp = ps->ps_pcache; 1258 polldat_t *pdp; 1259 int error; 1260 int fd; 1261 pollhead_t *memphp = NULL; 1262 xref_t *refp; 1263 int newpollfd = 0; 1264 1265 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1266 /* 1267 * The poll caching uses the existing VOP_POLL interface. If there 1268 * is no polled events, we want the polled device to set its "some 1269 * one is sleeping in poll" flag. When the polled events happen 1270 * later, the driver will call pollwakeup(). We achieve this by 1271 * always passing 0 in the third parameter ("anyyet") when calling 1272 * VOP_POLL. This parameter is not looked at by drivers when the 1273 * polled events exist. If a driver chooses to ignore this parameter 1274 * and call pollwakeup whenever the polled events happen, that will 1275 * be OK too. 1276 */ 1277 ASSERT(curthread->t_pollcache == NULL); 1278 error = VOP_POLL(fp->f_vnode, pollfdp->events, 0, &pollfdp->revents, 1279 &memphp, NULL); 1280 if (error) { 1281 return (error); 1282 } 1283 if (pollfdp->revents) { 1284 (*fdcntp)++; 1285 } 1286 /* 1287 * polling the underlying device succeeded. Now we can cache it. 1288 * A close can't come in here because we have not done a releasef() 1289 * yet. 1290 */ 1291 fd = pollfdp->fd; 1292 pdp = pcache_lookup_fd(pcp, fd); 1293 if (pdp == NULL) { 1294 ASSERT(ps->ps_nsets > 0); 1295 pdp = pcache_alloc_fd(ps->ps_nsets); 1296 newpollfd = 1; 1297 } 1298 /* 1299 * If this entry was used to cache a poll fd which was closed, and 1300 * this entry has not been cleaned, do it now. 1301 */ 1302 if ((pdp->pd_count > 0) && (pdp->pd_fp == NULL)) { 1303 pcacheset_invalidate(ps, pdp); 1304 ASSERT(pdp->pd_next == NULL); 1305 } 1306 if (pdp->pd_count == 0) { 1307 pdp->pd_fd = fd; 1308 pdp->pd_fp = fp; 1309 addfpollinfo(fd); 1310 pdp->pd_thread = curthread; 1311 pdp->pd_pcache = pcp; 1312 /* 1313 * the entry is never used or cleared by removing a cached 1314 * pollfd (pcache_delete_fd). So all the fields should be clear. 1315 */ 1316 ASSERT(pdp->pd_next == NULL); 1317 } 1318 1319 /* 1320 * A polled fd is considered cached. So there should be a fpollinfo 1321 * entry on uf_fpollinfo list. 1322 */ 1323 ASSERT(infpollinfo(fd)); 1324 /* 1325 * If there is an inconsistency, we want to know it here. 1326 */ 1327 ASSERT(pdp->pd_fp == fp); 1328 1329 /* 1330 * XXX pd_events is a union of all polled events on this fd, possibly 1331 * by different threads. Unless this is a new first poll(), pd_events 1332 * never shrinks. If an event is no longer polled by a process, there 1333 * is no way to cancel that event. In that case, poll degrade to its 1334 * old form -- polling on this fd every time poll() is called. The 1335 * assumption is an app always polls the same type of events. 1336 */ 1337 pdp->pd_events |= pollfdp->events; 1338 1339 pdp->pd_count++; 1340 /* 1341 * There is not much special handling for multiple appearances of 1342 * same fd other than xf_position always recording the first 1343 * appearance in poll list. If this is called from pcacheset_cache_list, 1344 * a VOP_POLL is called on every pollfd entry; therefore each 1345 * revents and fdcnt should be set correctly. If this is called from 1346 * pcacheset_resolve, we don't care about fdcnt here. Pollreadmap will 1347 * pick up the right count and handle revents field of each pollfd 1348 * entry. 1349 */ 1350 ASSERT(pdp->pd_ref != NULL); 1351 refp = &pdp->pd_ref[which]; 1352 if (refp->xf_refcnt == 0) { 1353 refp->xf_position = pos; 1354 } else { 1355 /* 1356 * xf_position records the fd's first appearance in poll list 1357 */ 1358 if (pos < refp->xf_position) { 1359 refp->xf_position = pos; 1360 } 1361 } 1362 ASSERT(pollfdp->fd == ps->ps_pollfd[refp->xf_position].fd); 1363 refp->xf_refcnt++; 1364 if (fd >= pcp->pc_mapsize) { 1365 pcache_grow_map(pcp, fd); 1366 } 1367 if (fd > pcp->pc_mapend) { 1368 pcp->pc_mapend = fd; 1369 } 1370 if (newpollfd != 0) { 1371 pcache_insert_fd(ps->ps_pcache, pdp, ps->ps_nfds); 1372 } 1373 if (memphp) { 1374 if (pdp->pd_php == NULL) { 1375 pollhead_insert(memphp, pdp); 1376 pdp->pd_php = memphp; 1377 } else { 1378 if (memphp != pdp->pd_php) { 1379 /* 1380 * layered devices (e.g. console driver) 1381 * may change the vnode and thus the pollhead 1382 * pointer out from underneath us. 1383 */ 1384 pollhead_delete(pdp->pd_php, pdp); 1385 pollhead_insert(memphp, pdp); 1386 pdp->pd_php = memphp; 1387 } 1388 } 1389 } 1390 /* 1391 * Since there is a considerable window between VOP_POLL and when 1392 * we actually put the polldat struct on the pollhead list, we could 1393 * miss a pollwakeup. In the case of polling additional events, we 1394 * don't update the events until after VOP_POLL. So we could miss 1395 * pollwakeup there too. So we always set the bit here just to be 1396 * safe. The real performance gain is in subsequent pcache_poll. 1397 */ 1398 mutex_enter(&pcp->pc_lock); 1399 BT_SET(pcp->pc_bitmap, fd); 1400 mutex_exit(&pcp->pc_lock); 1401 return (0); 1402 } 1403 1404 /* 1405 * The entry is not really deleted. The fields are cleared so that the 1406 * entry is no longer useful, but it will remain in the hash table for reuse 1407 * later. It will be freed when the polling lwp exits. 1408 */ 1409 int 1410 pcache_delete_fd(pollstate_t *ps, int fd, size_t pos, int which, uint_t cevent) 1411 { 1412 pollcache_t *pcp = ps->ps_pcache; 1413 polldat_t *pdp; 1414 xref_t *refp; 1415 1416 ASSERT(fd < pcp->pc_mapsize); 1417 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1418 1419 pdp = pcache_lookup_fd(pcp, fd); 1420 ASSERT(pdp != NULL); 1421 ASSERT(pdp->pd_count > 0); 1422 ASSERT(pdp->pd_ref != NULL); 1423 refp = &pdp->pd_ref[which]; 1424 if (pdp->pd_count == 1) { 1425 pdp->pd_events = 0; 1426 refp->xf_position = POLLPOSINVAL; 1427 ASSERT(refp->xf_refcnt == 1); 1428 refp->xf_refcnt = 0; 1429 if (pdp->pd_php) { 1430 /* 1431 * It is possible for a wakeup thread to get ahead 1432 * of the following pollhead_delete and set the bit in 1433 * bitmap. It is OK because the bit will be cleared 1434 * here anyway. 1435 */ 1436 pollhead_delete(pdp->pd_php, pdp); 1437 pdp->pd_php = NULL; 1438 } 1439 pdp->pd_count = 0; 1440 if (pdp->pd_fp != NULL) { 1441 pdp->pd_fp = NULL; 1442 delfpollinfo(fd); 1443 } 1444 mutex_enter(&pcp->pc_lock); 1445 BT_CLEAR(pcp->pc_bitmap, fd); 1446 mutex_exit(&pcp->pc_lock); 1447 return (0); 1448 } 1449 if ((cevent & POLLCLOSED) == POLLCLOSED) { 1450 /* 1451 * fd cached here has been closed. This is the first 1452 * pcache_delete_fd called after the close. Clean up the 1453 * entire entry. 1454 */ 1455 pcacheset_invalidate(ps, pdp); 1456 ASSERT(pdp->pd_php == NULL); 1457 mutex_enter(&pcp->pc_lock); 1458 BT_CLEAR(pcp->pc_bitmap, fd); 1459 mutex_exit(&pcp->pc_lock); 1460 return (0); 1461 } 1462 #ifdef DEBUG 1463 if (getf(fd) != NULL) { 1464 ASSERT(infpollinfo(fd)); 1465 releasef(fd); 1466 } 1467 #endif /* DEBUG */ 1468 pdp->pd_count--; 1469 ASSERT(refp->xf_refcnt > 0); 1470 if (--refp->xf_refcnt == 0) { 1471 refp->xf_position = POLLPOSINVAL; 1472 } else { 1473 ASSERT(pos >= refp->xf_position); 1474 if (pos == refp->xf_position) { 1475 /* 1476 * The xref position is no longer valid. 1477 * Reset it to a special value and let 1478 * caller know it needs to updatexref() 1479 * with a new xf_position value. 1480 */ 1481 refp->xf_position = POLLPOSTRANS; 1482 return (1); 1483 } 1484 } 1485 return (0); 1486 } 1487 1488 void 1489 pcache_update_xref(pollcache_t *pcp, int fd, ssize_t pos, int which) 1490 { 1491 polldat_t *pdp; 1492 1493 pdp = pcache_lookup_fd(pcp, fd); 1494 ASSERT(pdp != NULL); 1495 ASSERT(pdp->pd_ref != NULL); 1496 pdp->pd_ref[which].xf_position = pos; 1497 } 1498 1499 #ifdef DEBUG 1500 /* 1501 * For each polled fd, it's either in the bitmap or cached in 1502 * pcache hash table. If this routine returns 0, something is wrong. 1503 */ 1504 static int 1505 pollchecksanity(pollstate_t *ps, nfds_t nfds) 1506 { 1507 int i; 1508 int fd; 1509 pollcache_t *pcp = ps->ps_pcache; 1510 polldat_t *pdp; 1511 pollfd_t *pollfdp = ps->ps_pollfd; 1512 file_t *fp; 1513 1514 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1515 for (i = 0; i < nfds; i++) { 1516 fd = pollfdp[i].fd; 1517 if (fd < 0) { 1518 ASSERT(pollfdp[i].revents == 0); 1519 continue; 1520 } 1521 if (pollfdp[i].revents == POLLNVAL) 1522 continue; 1523 if ((fp = getf(fd)) == NULL) 1524 continue; 1525 pdp = pcache_lookup_fd(pcp, fd); 1526 ASSERT(pdp != NULL); 1527 ASSERT(infpollinfo(fd)); 1528 ASSERT(pdp->pd_fp == fp); 1529 releasef(fd); 1530 if (BT_TEST(pcp->pc_bitmap, fd)) 1531 continue; 1532 if (pdp->pd_php == NULL) 1533 return (0); 1534 } 1535 return (1); 1536 } 1537 #endif /* DEBUG */ 1538 1539 /* 1540 * resolve the difference between the current poll list and a cached one. 1541 */ 1542 int 1543 pcacheset_resolve(pollstate_t *ps, nfds_t nfds, int *fdcntp, int which) 1544 { 1545 int i; 1546 pollcache_t *pcp = ps->ps_pcache; 1547 pollfd_t *newlist = NULL; 1548 pollfd_t *current = ps->ps_pollfd; 1549 pollfd_t *cached; 1550 pollcacheset_t *pcsp; 1551 int common; 1552 int count = 0; 1553 int offset; 1554 int remain; 1555 int fd; 1556 file_t *fp; 1557 int fdcnt = 0; 1558 int cnt = 0; 1559 nfds_t old_nfds; 1560 int error = 0; 1561 int mismatch = 0; 1562 1563 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1564 #ifdef DEBUG 1565 checkpolldat(ps); 1566 #endif 1567 pcsp = &ps->ps_pcacheset[which]; 1568 old_nfds = pcsp->pcs_nfds; 1569 common = (nfds > old_nfds) ? old_nfds : nfds; 1570 if (nfds != old_nfds) { 1571 /* 1572 * the length of poll list has changed. allocate a new 1573 * pollfd list. 1574 */ 1575 newlist = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); 1576 bcopy(current, newlist, sizeof (pollfd_t) * nfds); 1577 } 1578 /* 1579 * Compare the overlapping part of the current fd list with the 1580 * cached one. Whenever a difference is found, resolve it. 1581 * The comparison is done on the current poll list and the 1582 * cached list. But we may be setting up the newlist to be the 1583 * cached list for next poll. 1584 */ 1585 cached = pcsp->pcs_pollfd; 1586 remain = common; 1587 1588 while (count < common) { 1589 int tmpfd; 1590 pollfd_t *np; 1591 1592 np = (newlist != NULL) ? &newlist[count] : NULL; 1593 offset = pcacheset_cmp(¤t[count], &cached[count], np, 1594 remain); 1595 /* 1596 * Collect stats. If lists are completed the first time, 1597 * it's a hit. Otherwise, it's a partial hit or miss. 1598 */ 1599 if ((count == 0) && (offset == common)) { 1600 pollstats.pollcachehit.value.ui64++; 1601 } else { 1602 mismatch++; 1603 } 1604 count += offset; 1605 if (offset < remain) { 1606 ASSERT(count < common); 1607 ASSERT((current[count].fd != cached[count].fd) || 1608 (current[count].events != cached[count].events)); 1609 /* 1610 * Filter out invalid events. 1611 */ 1612 if (current[count].events & ~VALID_POLL_EVENTS) { 1613 if (newlist != NULL) { 1614 newlist[count].events = 1615 current[count].events &= 1616 VALID_POLL_EVENTS; 1617 } else { 1618 current[count].events &= 1619 VALID_POLL_EVENTS; 1620 } 1621 } 1622 /* 1623 * when resolving a difference, we always remove the 1624 * fd from cache before inserting one into cache. 1625 */ 1626 if (cached[count].fd >= 0) { 1627 tmpfd = cached[count].fd; 1628 if (pcache_delete_fd(ps, tmpfd, count, which, 1629 (uint_t)cached[count].events)) { 1630 /* 1631 * This should be rare but needed for 1632 * correctness. 1633 * 1634 * The first appearance in cached list 1635 * is being "turned off". The same fd 1636 * appear more than once in the cached 1637 * poll list. Find the next one on the 1638 * list and update the cached 1639 * xf_position field. 1640 */ 1641 for (i = count + 1; i < old_nfds; i++) { 1642 if (cached[i].fd == tmpfd) { 1643 pcache_update_xref(pcp, 1644 tmpfd, (ssize_t)i, 1645 which); 1646 break; 1647 } 1648 } 1649 ASSERT(i <= old_nfds); 1650 } 1651 /* 1652 * In case a new cache list is allocated, 1653 * need to keep both cache lists in sync 1654 * b/c the new one can be freed if we have 1655 * an error later. 1656 */ 1657 cached[count].fd = -1; 1658 if (newlist != NULL) { 1659 newlist[count].fd = -1; 1660 } 1661 } 1662 if ((tmpfd = current[count].fd) >= 0) { 1663 /* 1664 * add to the cached fd tbl and bitmap. 1665 */ 1666 if ((fp = getf(tmpfd)) == NULL) { 1667 current[count].revents = POLLNVAL; 1668 if (newlist != NULL) { 1669 newlist[count].fd = -1; 1670 } 1671 cached[count].fd = -1; 1672 fdcnt++; 1673 } else { 1674 /* 1675 * Here we don't care about the 1676 * fdcnt. We will examine the bitmap 1677 * later and pick up the correct 1678 * fdcnt there. So we never bother 1679 * to check value of 'cnt'. 1680 */ 1681 error = pcache_insert(ps, fp, 1682 ¤t[count], &cnt, 1683 (ssize_t)count, which); 1684 /* 1685 * if no error, we want to do releasef 1686 * after we updated cache poll list 1687 * entry so that close() won't race 1688 * us. 1689 */ 1690 if (error) { 1691 /* 1692 * If we encountered an error, 1693 * we have invalidated an 1694 * entry in cached poll list 1695 * (in pcache_delete_fd() above) 1696 * but failed to add one here. 1697 * This is OK b/c what's in the 1698 * cached list is consistent 1699 * with content of cache. 1700 * It will not have any ill 1701 * effect on next poll(). 1702 */ 1703 releasef(tmpfd); 1704 if (newlist != NULL) { 1705 kmem_free(newlist, 1706 nfds * 1707 sizeof (pollfd_t)); 1708 } 1709 return (error); 1710 } 1711 /* 1712 * If we have allocated a new(temp) 1713 * cache list, we need to keep both 1714 * in sync b/c the new one can be freed 1715 * if we have an error later. 1716 */ 1717 if (newlist != NULL) { 1718 newlist[count].fd = 1719 current[count].fd; 1720 newlist[count].events = 1721 current[count].events; 1722 } 1723 cached[count].fd = current[count].fd; 1724 cached[count].events = 1725 current[count].events; 1726 releasef(tmpfd); 1727 } 1728 } else { 1729 current[count].revents = 0; 1730 } 1731 count++; 1732 remain = common - count; 1733 } 1734 } 1735 if (mismatch != 0) { 1736 if (mismatch == common) { 1737 pollstats.pollcachemiss.value.ui64++; 1738 } else { 1739 pollstats.pollcachephit.value.ui64++; 1740 } 1741 } 1742 /* 1743 * take care of the non overlapping part of a list 1744 */ 1745 if (nfds > old_nfds) { 1746 ASSERT(newlist != NULL); 1747 for (i = old_nfds; i < nfds; i++) { 1748 /* filter out invalid events */ 1749 if (current[i].events & ~VALID_POLL_EVENTS) { 1750 newlist[i].events = current[i].events = 1751 current[i].events & VALID_POLL_EVENTS; 1752 } 1753 if ((fd = current[i].fd) < 0) { 1754 current[i].revents = 0; 1755 continue; 1756 } 1757 /* 1758 * add to the cached fd tbl and bitmap. 1759 */ 1760 if ((fp = getf(fd)) == NULL) { 1761 current[i].revents = POLLNVAL; 1762 newlist[i].fd = -1; 1763 fdcnt++; 1764 continue; 1765 } 1766 /* 1767 * Here we don't care about the 1768 * fdcnt. We will examine the bitmap 1769 * later and pick up the correct 1770 * fdcnt there. So we never bother to 1771 * check 'cnt'. 1772 */ 1773 error = pcache_insert(ps, fp, ¤t[i], &cnt, 1774 (ssize_t)i, which); 1775 releasef(fd); 1776 if (error) { 1777 /* 1778 * Here we are half way through adding newly 1779 * polled fd. Undo enough to keep the cache 1780 * list consistent with the cache content. 1781 */ 1782 pcacheset_remove_list(ps, current, old_nfds, 1783 i, which, 0); 1784 kmem_free(newlist, nfds * sizeof (pollfd_t)); 1785 return (error); 1786 } 1787 } 1788 } 1789 if (old_nfds > nfds) { 1790 /* 1791 * remove the fd's which are no longer polled. 1792 */ 1793 pcacheset_remove_list(ps, pcsp->pcs_pollfd, nfds, old_nfds, 1794 which, 1); 1795 } 1796 /* 1797 * set difference resolved. update nfds and cachedlist 1798 * in pollstate struct. 1799 */ 1800 if (newlist != NULL) { 1801 kmem_free(pcsp->pcs_pollfd, old_nfds * sizeof (pollfd_t)); 1802 /* 1803 * By now, the pollfd.revents field should 1804 * all be zeroed. 1805 */ 1806 pcsp->pcs_pollfd = newlist; 1807 pcsp->pcs_nfds = nfds; 1808 } 1809 ASSERT(*fdcntp == 0); 1810 *fdcntp = fdcnt; 1811 /* 1812 * By now for every fd in pollfdp, one of the following should be 1813 * true. Otherwise we will miss a polled event. 1814 * 1815 * 1. the bit corresponding to the fd in bitmap is set. So VOP_POLL 1816 * will be called on this fd in next poll. 1817 * 2. the fd is cached in the pcache (i.e. pd_php is set). So 1818 * pollnotify will happen. 1819 */ 1820 ASSERT(pollchecksanity(ps, nfds)); 1821 /* 1822 * make sure cross reference between cached poll lists and cached 1823 * poll fds are correct. 1824 */ 1825 ASSERT(pollcheckxref(ps, which)); 1826 /* 1827 * ensure each polldat in pollcache reference a polled fd in 1828 * pollcacheset. 1829 */ 1830 #ifdef DEBUG 1831 checkpolldat(ps); 1832 #endif 1833 return (0); 1834 } 1835 1836 #ifdef DEBUG 1837 static int 1838 pollscanrevents(pollcache_t *pcp, pollfd_t *pollfdp, nfds_t nfds) 1839 { 1840 int i; 1841 int reventcnt = 0; 1842 1843 for (i = 0; i < nfds; i++) { 1844 if (pollfdp[i].fd < 0) { 1845 ASSERT(pollfdp[i].revents == 0); 1846 continue; 1847 } 1848 if (pollfdp[i].revents) { 1849 reventcnt++; 1850 } 1851 if (pollfdp[i].revents && (pollfdp[i].revents != POLLNVAL)) { 1852 ASSERT(BT_TEST(pcp->pc_bitmap, pollfdp[i].fd)); 1853 } 1854 } 1855 return (reventcnt); 1856 } 1857 #endif /* DEBUG */ 1858 1859 /* 1860 * read the bitmap and poll on fds corresponding to the '1' bits. The ps_lock 1861 * is held upon entry. 1862 */ 1863 int 1864 pcache_poll(pollfd_t *pollfdp, pollstate_t *ps, nfds_t nfds, int *fdcntp, 1865 int which) 1866 { 1867 int i; 1868 pollcache_t *pcp; 1869 int fd; 1870 int begin, end, done; 1871 pollhead_t *php; 1872 int fdcnt; 1873 int error = 0; 1874 file_t *fp; 1875 polldat_t *pdp; 1876 xref_t *refp; 1877 int entry; 1878 1879 pcp = ps->ps_pcache; 1880 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1881 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 1882 retry: 1883 done = 0; 1884 begin = 0; 1885 fdcnt = 0; 1886 end = pcp->pc_mapend; 1887 while ((fdcnt < nfds) && !done) { 1888 php = NULL; 1889 /* 1890 * only poll fds which may have events 1891 */ 1892 fd = bt_getlowbit(pcp->pc_bitmap, begin, end); 1893 ASSERT(fd <= end); 1894 if (fd >= 0) { 1895 ASSERT(pollcheckrevents(ps, begin, fd, which)); 1896 /* 1897 * adjust map pointers for next round 1898 */ 1899 if (fd == end) { 1900 done = 1; 1901 } else { 1902 begin = fd + 1; 1903 } 1904 /* 1905 * A bitmap caches poll state information of 1906 * multiple poll lists. Call VOP_POLL only if 1907 * the bit corresponds to an fd in this poll 1908 * list. 1909 */ 1910 pdp = pcache_lookup_fd(pcp, fd); 1911 ASSERT(pdp != NULL); 1912 ASSERT(pdp->pd_ref != NULL); 1913 refp = &pdp->pd_ref[which]; 1914 if (refp->xf_refcnt == 0) 1915 continue; 1916 entry = refp->xf_position; 1917 ASSERT((entry >= 0) && (entry < nfds)); 1918 ASSERT(pollfdp[entry].fd == fd); 1919 /* 1920 * we are in this routine implies that we have 1921 * successfully polled this fd in the past. 1922 * Check to see this fd is closed while we are 1923 * blocked in poll. This ensures that we don't 1924 * miss a close on the fd in the case this fd is 1925 * reused. 1926 */ 1927 if (pdp->pd_fp == NULL) { 1928 ASSERT(pdp->pd_count > 0); 1929 pollfdp[entry].revents = POLLNVAL; 1930 fdcnt++; 1931 if (refp->xf_refcnt > 1) { 1932 /* 1933 * this fd appeared multiple time 1934 * in the poll list. Find all of them. 1935 */ 1936 for (i = entry + 1; i < nfds; i++) { 1937 if (pollfdp[i].fd == fd) { 1938 pollfdp[i].revents = 1939 POLLNVAL; 1940 fdcnt++; 1941 } 1942 } 1943 } 1944 pcacheset_invalidate(ps, pdp); 1945 continue; 1946 } 1947 /* 1948 * We can be here polling a device that is being 1949 * closed (i.e. the file pointer is set to NULL, 1950 * but pollcacheclean has not happened yet). 1951 */ 1952 if ((fp = getf(fd)) == NULL) { 1953 pollfdp[entry].revents = POLLNVAL; 1954 fdcnt++; 1955 if (refp->xf_refcnt > 1) { 1956 /* 1957 * this fd appeared multiple time 1958 * in the poll list. Find all of them. 1959 */ 1960 for (i = entry + 1; i < nfds; i++) { 1961 if (pollfdp[i].fd == fd) { 1962 pollfdp[i].revents = 1963 POLLNVAL; 1964 fdcnt++; 1965 } 1966 } 1967 } 1968 continue; 1969 } 1970 ASSERT(pdp->pd_fp == fp); 1971 ASSERT(infpollinfo(fd)); 1972 /* 1973 * Since we no longer hold poll head lock across 1974 * VOP_POLL, pollunlock logic can be simplifed. 1975 */ 1976 ASSERT(pdp->pd_php == NULL || 1977 MUTEX_NOT_HELD(PHLOCK(pdp->pd_php))); 1978 /* 1979 * underlying file systems may set a "pollpending" 1980 * flag when it sees the poll may block. Pollwakeup() 1981 * is called by wakeup thread if pollpending is set. 1982 * Pass a 0 fdcnt so that the underlying file system 1983 * will set the "pollpending" flag set when there is 1984 * no polled events. 1985 * 1986 * Use pollfdp[].events for actual polling because 1987 * the pd_events is union of all cached poll events 1988 * on this fd. The events parameter also affects 1989 * how the polled device sets the "poll pending" 1990 * flag. 1991 */ 1992 ASSERT(curthread->t_pollcache == NULL); 1993 error = VOP_POLL(fp->f_vnode, pollfdp[entry].events, 0, 1994 &pollfdp[entry].revents, &php, NULL); 1995 /* 1996 * releasef after completely done with this cached 1997 * poll entry. To prevent close() coming in to clear 1998 * this entry. 1999 */ 2000 if (error) { 2001 releasef(fd); 2002 break; 2003 } 2004 /* 2005 * layered devices (e.g. console driver) 2006 * may change the vnode and thus the pollhead 2007 * pointer out from underneath us. 2008 */ 2009 if (php != NULL && pdp->pd_php != NULL && 2010 php != pdp->pd_php) { 2011 releasef(fd); 2012 pollhead_delete(pdp->pd_php, pdp); 2013 pdp->pd_php = php; 2014 pollhead_insert(php, pdp); 2015 /* 2016 * We could have missed a wakeup on the new 2017 * target device. Make sure the new target 2018 * gets polled once. 2019 */ 2020 BT_SET(pcp->pc_bitmap, fd); 2021 goto retry; 2022 } 2023 2024 if (pollfdp[entry].revents) { 2025 ASSERT(refp->xf_refcnt >= 1); 2026 fdcnt++; 2027 if (refp->xf_refcnt > 1) { 2028 /* 2029 * this fd appeared multiple time 2030 * in the poll list. This is rare but 2031 * we have to look at all of them for 2032 * correctness. 2033 */ 2034 error = plist_chkdupfd(fp, pdp, ps, 2035 pollfdp, entry, &fdcnt); 2036 if (error > 0) { 2037 releasef(fd); 2038 break; 2039 } 2040 if (error < 0) { 2041 goto retry; 2042 } 2043 } 2044 releasef(fd); 2045 } else { 2046 /* 2047 * VOP_POLL didn't return any revents. We can 2048 * clear the bit in bitmap only if we have the 2049 * pollhead ptr cached and no other cached 2050 * entry is polling different events on this fd. 2051 * VOP_POLL may have dropped the ps_lock. Make 2052 * sure pollwakeup has not happened before clear 2053 * the bit. 2054 */ 2055 if ((pdp->pd_php != NULL) && 2056 (pollfdp[entry].events == pdp->pd_events) && 2057 ((pcp->pc_flag & T_POLLWAKE) == 0)) { 2058 BT_CLEAR(pcp->pc_bitmap, fd); 2059 } 2060 /* 2061 * if the fd can be cached now but not before, 2062 * do it now. 2063 */ 2064 if ((pdp->pd_php == NULL) && (php != NULL)) { 2065 pdp->pd_php = php; 2066 pollhead_insert(php, pdp); 2067 /* 2068 * We are inserting a polldat struct for 2069 * the first time. We may have missed a 2070 * wakeup on this device. Re-poll once. 2071 * This should be a rare event. 2072 */ 2073 releasef(fd); 2074 goto retry; 2075 } 2076 if (refp->xf_refcnt > 1) { 2077 /* 2078 * this fd appeared multiple time 2079 * in the poll list. This is rare but 2080 * we have to look at all of them for 2081 * correctness. 2082 */ 2083 error = plist_chkdupfd(fp, pdp, ps, 2084 pollfdp, entry, &fdcnt); 2085 if (error > 0) { 2086 releasef(fd); 2087 break; 2088 } 2089 if (error < 0) { 2090 goto retry; 2091 } 2092 } 2093 releasef(fd); 2094 } 2095 } else { 2096 done = 1; 2097 ASSERT(pollcheckrevents(ps, begin, end + 1, which)); 2098 } 2099 } 2100 if (!error) { 2101 ASSERT(*fdcntp + fdcnt == pollscanrevents(pcp, pollfdp, nfds)); 2102 *fdcntp += fdcnt; 2103 } 2104 return (error); 2105 } 2106 2107 /* 2108 * Going through the poll list without much locking. Poll all fds and 2109 * cache all valid fds in the pollcache. 2110 */ 2111 int 2112 pcacheset_cache_list(pollstate_t *ps, pollfd_t *fds, int *fdcntp, int which) 2113 { 2114 pollfd_t *pollfdp = ps->ps_pollfd; 2115 pollcacheset_t *pcacheset = ps->ps_pcacheset; 2116 pollfd_t *newfdlist; 2117 int i; 2118 int fd; 2119 file_t *fp; 2120 int error = 0; 2121 2122 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2123 ASSERT(which < ps->ps_nsets); 2124 ASSERT(pcacheset != NULL); 2125 ASSERT(pcacheset[which].pcs_pollfd == NULL); 2126 newfdlist = kmem_alloc(ps->ps_nfds * sizeof (pollfd_t), KM_SLEEP); 2127 /* 2128 * cache the new poll list in pollcachset. 2129 */ 2130 bcopy(pollfdp, newfdlist, sizeof (pollfd_t) * ps->ps_nfds); 2131 2132 pcacheset[which].pcs_pollfd = newfdlist; 2133 pcacheset[which].pcs_nfds = ps->ps_nfds; 2134 pcacheset[which].pcs_usradr = (uintptr_t)fds; 2135 2136 /* 2137 * We have saved a copy of current poll fd list in one pollcacheset. 2138 * The 'revents' field of the new list is not yet set to 0. Loop 2139 * through the new list just to do that is expensive. We do that 2140 * while polling the list. 2141 */ 2142 for (i = 0; i < ps->ps_nfds; i++) { 2143 fd = pollfdp[i].fd; 2144 /* 2145 * We also filter out the illegal poll events in the event 2146 * field for the cached poll list/set. 2147 */ 2148 if (pollfdp[i].events & ~VALID_POLL_EVENTS) { 2149 newfdlist[i].events = pollfdp[i].events = 2150 pollfdp[i].events & VALID_POLL_EVENTS; 2151 } 2152 if (fd < 0) { 2153 pollfdp[i].revents = 0; 2154 continue; 2155 } 2156 if ((fp = getf(fd)) == NULL) { 2157 pollfdp[i].revents = POLLNVAL; 2158 /* 2159 * invalidate this cache entry in the cached poll list 2160 */ 2161 newfdlist[i].fd = -1; 2162 (*fdcntp)++; 2163 continue; 2164 } 2165 /* 2166 * cache this fd. 2167 */ 2168 error = pcache_insert(ps, fp, &pollfdp[i], fdcntp, (ssize_t)i, 2169 which); 2170 releasef(fd); 2171 if (error) { 2172 /* 2173 * Here we are half way through caching a new 2174 * poll list. Undo every thing. 2175 */ 2176 pcacheset_remove_list(ps, pollfdp, 0, i, which, 0); 2177 kmem_free(newfdlist, ps->ps_nfds * sizeof (pollfd_t)); 2178 pcacheset[which].pcs_pollfd = NULL; 2179 pcacheset[which].pcs_usradr = NULL; 2180 break; 2181 } 2182 } 2183 return (error); 2184 } 2185 2186 /* 2187 * called by pollcacheclean() to set the fp NULL. It also sets polled events 2188 * in pcacheset entries to a special events 'POLLCLOSED'. Do a pollwakeup to 2189 * wake any sleeping poller, then remove the polldat from the driver. 2190 * The routine is called with ps_pcachelock held. 2191 */ 2192 void 2193 pcache_clean_entry(pollstate_t *ps, int fd) 2194 { 2195 pollcache_t *pcp; 2196 polldat_t *pdp; 2197 int i; 2198 2199 ASSERT(ps != NULL); 2200 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2201 pcp = ps->ps_pcache; 2202 ASSERT(pcp); 2203 pdp = pcache_lookup_fd(pcp, fd); 2204 ASSERT(pdp != NULL); 2205 /* 2206 * the corresponding fpollinfo in fi_list has been removed by 2207 * a close on this fd. Reset the cached fp ptr here. 2208 */ 2209 pdp->pd_fp = NULL; 2210 /* 2211 * XXX - This routine also touches data in pcacheset struct. 2212 * 2213 * set the event in cached poll lists to POLLCLOSED. This invalidate 2214 * the cached poll fd entry in that poll list, which will force a 2215 * removal of this cached entry in next poll(). The cleanup is done 2216 * at the removal time. 2217 */ 2218 ASSERT(pdp->pd_ref != NULL); 2219 for (i = 0; i < ps->ps_nsets; i++) { 2220 xref_t *refp; 2221 pollcacheset_t *pcsp; 2222 2223 refp = &pdp->pd_ref[i]; 2224 if (refp->xf_refcnt) { 2225 ASSERT(refp->xf_position >= 0); 2226 pcsp = &ps->ps_pcacheset[i]; 2227 if (refp->xf_refcnt == 1) { 2228 pcsp->pcs_pollfd[refp->xf_position].events = 2229 (short)POLLCLOSED; 2230 } 2231 if (refp->xf_refcnt > 1) { 2232 int j; 2233 /* 2234 * mark every matching entry in pcs_pollfd 2235 */ 2236 for (j = refp->xf_position; 2237 j < pcsp->pcs_nfds; j++) { 2238 if (pcsp->pcs_pollfd[j].fd == fd) { 2239 pcsp->pcs_pollfd[j].events = 2240 (short)POLLCLOSED; 2241 } 2242 } 2243 } 2244 } 2245 } 2246 if (pdp->pd_php) { 2247 pollwakeup(pdp->pd_php, POLLHUP); 2248 pollhead_delete(pdp->pd_php, pdp); 2249 pdp->pd_php = NULL; 2250 } 2251 } 2252 2253 /* 2254 * This is the first time this thread has ever polled, 2255 * so we have to create its pollstate structure. 2256 * This will persist for the life of the thread, 2257 * until it calls pollcleanup(). 2258 */ 2259 pollstate_t * 2260 pollstate_create(void) 2261 { 2262 pollstate_t *ps; 2263 2264 ps = kmem_zalloc(sizeof (pollstate_t), KM_SLEEP); 2265 ps->ps_nsets = POLLFDSETS; 2266 ps->ps_pcacheset = pcacheset_create(ps->ps_nsets); 2267 return (ps); 2268 } 2269 2270 void 2271 pollstate_destroy(pollstate_t *ps) 2272 { 2273 if (ps->ps_pollfd != NULL) { 2274 kmem_free(ps->ps_pollfd, ps->ps_nfds * sizeof (pollfd_t)); 2275 ps->ps_pollfd = NULL; 2276 } 2277 if (ps->ps_pcache != NULL) { 2278 pcache_destroy(ps->ps_pcache); 2279 ps->ps_pcache = NULL; 2280 } 2281 pcacheset_destroy(ps->ps_pcacheset, ps->ps_nsets); 2282 ps->ps_pcacheset = NULL; 2283 if (ps->ps_dpbuf != NULL) { 2284 kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize * sizeof (pollfd_t)); 2285 ps->ps_dpbuf = NULL; 2286 } 2287 mutex_destroy(&ps->ps_lock); 2288 kmem_free(ps, sizeof (pollstate_t)); 2289 } 2290 2291 /* 2292 * We are holding the appropriate uf_lock entering this routine. 2293 * Bump up the ps_busy count to prevent the thread from exiting. 2294 */ 2295 void 2296 pollblockexit(fpollinfo_t *fpip) 2297 { 2298 for (; fpip; fpip = fpip->fp_next) { 2299 pollcache_t *pcp = fpip->fp_thread->t_pollstate->ps_pcache; 2300 2301 mutex_enter(&pcp->pc_no_exit); 2302 pcp->pc_busy++; /* prevents exit()'s */ 2303 mutex_exit(&pcp->pc_no_exit); 2304 } 2305 } 2306 2307 /* 2308 * Complete phase 2 of cached poll fd cleanup. Call pcache_clean_entry to mark 2309 * the pcacheset events field POLLCLOSED to force the next poll() to remove 2310 * this cache entry. We can't clean the polldat entry clean up here because 2311 * lwp block in poll() needs the info to return. Wakeup anyone blocked in 2312 * poll and let exiting lwp go. No lock is help upon entry. So it's OK for 2313 * pcache_clean_entry to call pollwakeup(). 2314 */ 2315 void 2316 pollcacheclean(fpollinfo_t *fip, int fd) 2317 { 2318 struct fpollinfo *fpip, *fpip2; 2319 2320 fpip = fip; 2321 while (fpip) { 2322 pollstate_t *ps = fpip->fp_thread->t_pollstate; 2323 pollcache_t *pcp = ps->ps_pcache; 2324 2325 mutex_enter(&ps->ps_lock); 2326 pcache_clean_entry(ps, fd); 2327 mutex_exit(&ps->ps_lock); 2328 mutex_enter(&pcp->pc_no_exit); 2329 pcp->pc_busy--; 2330 if (pcp->pc_busy == 0) { 2331 /* 2332 * Wakeup the thread waiting in 2333 * thread_exit(). 2334 */ 2335 cv_signal(&pcp->pc_busy_cv); 2336 } 2337 mutex_exit(&pcp->pc_no_exit); 2338 2339 fpip2 = fpip; 2340 fpip = fpip->fp_next; 2341 kmem_free(fpip2, sizeof (fpollinfo_t)); 2342 } 2343 } 2344 2345 /* 2346 * one of the cache line's counter is wrapping around. Reset all cache line 2347 * counters to zero except one. This is simplistic, but probably works 2348 * effectively. 2349 */ 2350 void 2351 pcacheset_reset_count(pollstate_t *ps, int index) 2352 { 2353 int i; 2354 2355 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2356 for (i = 0; i < ps->ps_nsets; i++) { 2357 if (ps->ps_pcacheset[i].pcs_pollfd != NULL) { 2358 ps->ps_pcacheset[i].pcs_count = 0; 2359 } 2360 } 2361 ps->ps_pcacheset[index].pcs_count = 1; 2362 } 2363 2364 /* 2365 * this routine implements poll cache list replacement policy. 2366 * It is currently choose the "least used". 2367 */ 2368 int 2369 pcacheset_replace(pollstate_t *ps) 2370 { 2371 int i; 2372 int index = 0; 2373 2374 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2375 for (i = 1; i < ps->ps_nsets; i++) { 2376 if (ps->ps_pcacheset[index].pcs_count > 2377 ps->ps_pcacheset[i].pcs_count) { 2378 index = i; 2379 } 2380 } 2381 ps->ps_pcacheset[index].pcs_count = 0; 2382 return (index); 2383 } 2384 2385 /* 2386 * this routine is called by strclose to remove remaining polldat struct on 2387 * the pollhead list of the device being closed. There are two reasons as why 2388 * the polldat structures still remain on the pollhead list: 2389 * 2390 * (1) The layered device(e.g.the console driver). 2391 * In this case, the existence of a polldat implies that the thread putting 2392 * the polldat on this list has not exited yet. Before the thread exits, it 2393 * will have to hold this pollhead lock to remove the polldat. So holding the 2394 * pollhead lock here effectively prevents the thread which put the polldat 2395 * on this list from exiting. 2396 * 2397 * (2) /dev/poll. 2398 * When a polled fd is cached in /dev/poll, its polldat will remain on the 2399 * pollhead list if the process has not done a POLLREMOVE before closing the 2400 * polled fd. We just unlink it here. 2401 */ 2402 void 2403 pollhead_clean(pollhead_t *php) 2404 { 2405 polldat_t *pdp; 2406 2407 /* 2408 * In case(1), while we must prevent the thread in question from 2409 * exiting, we must also obey the proper locking order, i.e. 2410 * (ps_lock -> phlock). 2411 */ 2412 PH_ENTER(php); 2413 while (php->ph_list != NULL) { 2414 pollstate_t *ps; 2415 pollcache_t *pcp; 2416 2417 pdp = php->ph_list; 2418 ASSERT(pdp->pd_php == php); 2419 if (pdp->pd_thread == NULL) { 2420 /* 2421 * This is case(2). Since the ph_lock is sufficient 2422 * to synchronize this lwp with any other /dev/poll 2423 * lwp, just unlink the polldat. 2424 */ 2425 php->ph_list = pdp->pd_next; 2426 pdp->pd_php = NULL; 2427 pdp->pd_next = NULL; 2428 continue; 2429 } 2430 ps = pdp->pd_thread->t_pollstate; 2431 ASSERT(ps != NULL); 2432 pcp = pdp->pd_pcache; 2433 ASSERT(pcp != NULL); 2434 mutex_enter(&pcp->pc_no_exit); 2435 pcp->pc_busy++; /* prevents exit()'s */ 2436 mutex_exit(&pcp->pc_no_exit); 2437 /* 2438 * Now get the locks in proper order to avoid deadlock. 2439 */ 2440 PH_EXIT(php); 2441 mutex_enter(&ps->ps_lock); 2442 /* 2443 * while we dropped the pollhead lock, the element could be 2444 * taken off the list already. 2445 */ 2446 PH_ENTER(php); 2447 if (pdp->pd_php == php) { 2448 ASSERT(pdp == php->ph_list); 2449 php->ph_list = pdp->pd_next; 2450 pdp->pd_php = NULL; 2451 pdp->pd_next = NULL; 2452 } 2453 PH_EXIT(php); 2454 mutex_exit(&ps->ps_lock); 2455 mutex_enter(&pcp->pc_no_exit); 2456 pcp->pc_busy--; 2457 if (pcp->pc_busy == 0) { 2458 /* 2459 * Wakeup the thread waiting in 2460 * thread_exit(). 2461 */ 2462 cv_signal(&pcp->pc_busy_cv); 2463 } 2464 mutex_exit(&pcp->pc_no_exit); 2465 PH_ENTER(php); 2466 } 2467 PH_EXIT(php); 2468 } 2469 2470 /* 2471 * The remove_list is called to cleanup a partially cached 'current' list or 2472 * to remove a partial list which is no longer cached. The flag value of 1 2473 * indicates the second case. 2474 */ 2475 void 2476 pcacheset_remove_list(pollstate_t *ps, pollfd_t *pollfdp, int start, int end, 2477 int cacheindex, int flag) 2478 { 2479 int i; 2480 2481 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2482 for (i = start; i < end; i++) { 2483 if ((pollfdp[i].fd >= 0) && 2484 (flag || !(pollfdp[i].revents & POLLNVAL))) { 2485 if (pcache_delete_fd(ps, pollfdp[i].fd, i, cacheindex, 2486 (uint_t)pollfdp[i].events)) { 2487 int j; 2488 int fd = pollfdp[i].fd; 2489 2490 for (j = i + 1; j < end; j++) { 2491 if (pollfdp[j].fd == fd) { 2492 pcache_update_xref( 2493 ps->ps_pcache, fd, 2494 (ssize_t)j, cacheindex); 2495 break; 2496 } 2497 } 2498 ASSERT(j <= end); 2499 } 2500 } 2501 } 2502 } 2503 2504 #ifdef DEBUG 2505 2506 #include<sys/strsubr.h> 2507 /* 2508 * make sure curthread is not on anyone's pollhead list any more. 2509 */ 2510 static void 2511 pollcheckphlist() 2512 { 2513 int i; 2514 file_t *fp; 2515 uf_entry_t *ufp; 2516 uf_info_t *fip = P_FINFO(curproc); 2517 struct stdata *stp; 2518 polldat_t *pdp; 2519 2520 mutex_enter(&fip->fi_lock); 2521 for (i = 0; i < fip->fi_nfiles; i++) { 2522 UF_ENTER(ufp, fip, i); 2523 if ((fp = ufp->uf_file) != NULL) { 2524 if ((stp = fp->f_vnode->v_stream) != NULL) { 2525 PH_ENTER(&stp->sd_pollist); 2526 pdp = stp->sd_pollist.ph_list; 2527 while (pdp) { 2528 ASSERT(pdp->pd_thread != curthread); 2529 pdp = pdp->pd_next; 2530 } 2531 PH_EXIT(&stp->sd_pollist); 2532 } 2533 } 2534 UF_EXIT(ufp); 2535 } 2536 mutex_exit(&fip->fi_lock); 2537 } 2538 2539 /* 2540 * for resolved set poll list, the xref info in the pcache should be 2541 * consistent with this poll list. 2542 */ 2543 static int 2544 pollcheckxref(pollstate_t *ps, int cacheindex) 2545 { 2546 pollfd_t *pollfdp = ps->ps_pcacheset[cacheindex].pcs_pollfd; 2547 pollcache_t *pcp = ps->ps_pcache; 2548 polldat_t *pdp; 2549 int i; 2550 xref_t *refp; 2551 2552 for (i = 0; i < ps->ps_pcacheset[cacheindex].pcs_nfds; i++) { 2553 if (pollfdp[i].fd < 0) { 2554 continue; 2555 } 2556 pdp = pcache_lookup_fd(pcp, pollfdp[i].fd); 2557 ASSERT(pdp != NULL); 2558 ASSERT(pdp->pd_ref != NULL); 2559 refp = &pdp->pd_ref[cacheindex]; 2560 if (refp->xf_position >= 0) { 2561 ASSERT(refp->xf_refcnt >= 1); 2562 ASSERT(pollfdp[refp->xf_position].fd == pdp->pd_fd); 2563 if (refp->xf_refcnt > 1) { 2564 int j; 2565 int count = 0; 2566 2567 for (j = refp->xf_position; 2568 j < ps->ps_pcacheset[cacheindex].pcs_nfds; 2569 j++) { 2570 if (pollfdp[j].fd == pdp->pd_fd) { 2571 count++; 2572 } 2573 } 2574 ASSERT(count == refp->xf_refcnt); 2575 } 2576 } 2577 } 2578 return (1); 2579 } 2580 2581 /* 2582 * For every cached pollfd, its polldat struct should be consistent with 2583 * what is in the pcacheset lists. 2584 */ 2585 static void 2586 checkpolldat(pollstate_t *ps) 2587 { 2588 pollcache_t *pcp = ps->ps_pcache; 2589 polldat_t **hashtbl; 2590 int i; 2591 2592 hashtbl = pcp->pc_hash; 2593 for (i = 0; i < pcp->pc_hashsize; i++) { 2594 polldat_t *pdp; 2595 2596 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 2597 ASSERT(pdp->pd_ref != NULL); 2598 if (pdp->pd_count > 0) { 2599 xref_t *refp; 2600 int j; 2601 pollcacheset_t *pcsp; 2602 pollfd_t *pollfd; 2603 2604 for (j = 0; j < ps->ps_nsets; j++) { 2605 refp = &pdp->pd_ref[j]; 2606 if (refp->xf_refcnt > 0) { 2607 pcsp = &ps->ps_pcacheset[j]; 2608 ASSERT(refp->xf_position < pcsp->pcs_nfds); 2609 pollfd = pcsp->pcs_pollfd; 2610 ASSERT(pdp->pd_fd == pollfd[refp->xf_position].fd); 2611 } 2612 } 2613 } 2614 } 2615 } 2616 } 2617 2618 /* 2619 * every wfd element on ph_list must have a corresponding fpollinfo on the 2620 * uf_fpollinfo list. This is a variation of infpollinfo() w/o holding locks. 2621 */ 2622 void 2623 checkwfdlist(vnode_t *vp, fpollinfo_t *fpip) 2624 { 2625 stdata_t *stp; 2626 polldat_t *pdp; 2627 fpollinfo_t *fpip2; 2628 2629 if ((stp = vp->v_stream) == NULL) { 2630 return; 2631 } 2632 PH_ENTER(&stp->sd_pollist); 2633 for (pdp = stp->sd_pollist.ph_list; pdp; pdp = pdp->pd_next) { 2634 if (pdp->pd_thread != NULL && 2635 pdp->pd_thread->t_procp == curthread->t_procp) { 2636 for (fpip2 = fpip; fpip2; fpip2 = fpip2->fp_next) { 2637 if (pdp->pd_thread == fpip2->fp_thread) { 2638 break; 2639 } 2640 } 2641 ASSERT(fpip2 != NULL); 2642 } 2643 } 2644 PH_EXIT(&stp->sd_pollist); 2645 } 2646 2647 /* 2648 * For each cached fd whose bit is not set in bitmap, its revents field in 2649 * current poll list should be 0. 2650 */ 2651 static int 2652 pollcheckrevents(pollstate_t *ps, int begin, int end, int cacheindex) 2653 { 2654 pollcache_t *pcp = ps->ps_pcache; 2655 pollfd_t *pollfdp = ps->ps_pollfd; 2656 int i; 2657 2658 for (i = begin; i < end; i++) { 2659 polldat_t *pdp; 2660 2661 ASSERT(!BT_TEST(pcp->pc_bitmap, i)); 2662 pdp = pcache_lookup_fd(pcp, i); 2663 if (pdp && pdp->pd_fp != NULL) { 2664 xref_t *refp; 2665 int entry; 2666 2667 ASSERT(pdp->pd_ref != NULL); 2668 refp = &pdp->pd_ref[cacheindex]; 2669 if (refp->xf_refcnt == 0) { 2670 continue; 2671 } 2672 entry = refp->xf_position; 2673 ASSERT(entry >= 0); 2674 ASSERT(pollfdp[entry].revents == 0); 2675 if (refp->xf_refcnt > 1) { 2676 int j; 2677 2678 for (j = entry + 1; j < ps->ps_nfds; j++) { 2679 if (pollfdp[j].fd == i) { 2680 ASSERT(pollfdp[j].revents == 0); 2681 } 2682 } 2683 } 2684 } 2685 } 2686 return (1); 2687 } 2688 2689 #endif /* DEBUG */ 2690 2691 pollcache_t * 2692 pcache_alloc() 2693 { 2694 return (kmem_zalloc(sizeof (pollcache_t), KM_SLEEP)); 2695 } 2696 2697 void 2698 pcache_create(pollcache_t *pcp, nfds_t nfds) 2699 { 2700 size_t mapsize; 2701 2702 /* 2703 * allocate enough bits for the poll fd list 2704 */ 2705 if ((mapsize = POLLMAPCHUNK) <= nfds) { 2706 mapsize = (nfds + POLLMAPCHUNK - 1) & ~(POLLMAPCHUNK - 1); 2707 } 2708 pcp->pc_bitmap = kmem_zalloc((mapsize / BT_NBIPUL) * sizeof (ulong_t), 2709 KM_SLEEP); 2710 pcp->pc_mapsize = mapsize; 2711 /* 2712 * The hash size is at least POLLHASHCHUNKSZ. If user polls a large 2713 * number of fd to start with, allocate a bigger hash table (to the 2714 * nearest multiple of POLLHASHCHUNKSZ) because dynamically growing a 2715 * hash table is expensive. 2716 */ 2717 if (nfds < POLLHASHCHUNKSZ) { 2718 pcp->pc_hashsize = POLLHASHCHUNKSZ; 2719 } else { 2720 pcp->pc_hashsize = (nfds + POLLHASHCHUNKSZ - 1) & 2721 ~(POLLHASHCHUNKSZ - 1); 2722 } 2723 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (polldat_t *), 2724 KM_SLEEP); 2725 } 2726 2727 void 2728 pcache_destroy(pollcache_t *pcp) 2729 { 2730 polldat_t **hashtbl; 2731 int i; 2732 2733 hashtbl = pcp->pc_hash; 2734 for (i = 0; i < pcp->pc_hashsize; i++) { 2735 if (hashtbl[i] != NULL) { 2736 polldat_t *pdp, *pdp2; 2737 2738 pdp = hashtbl[i]; 2739 while (pdp != NULL) { 2740 pdp2 = pdp->pd_hashnext; 2741 if (pdp->pd_ref != NULL) { 2742 kmem_free(pdp->pd_ref, sizeof (xref_t) * 2743 pdp->pd_nsets); 2744 } 2745 kmem_free(pdp, sizeof (polldat_t)); 2746 pdp = pdp2; 2747 pcp->pc_fdcount--; 2748 } 2749 } 2750 } 2751 ASSERT(pcp->pc_fdcount == 0); 2752 kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize); 2753 kmem_free(pcp->pc_bitmap, 2754 sizeof (ulong_t) * (pcp->pc_mapsize/BT_NBIPUL)); 2755 mutex_destroy(&pcp->pc_no_exit); 2756 mutex_destroy(&pcp->pc_lock); 2757 cv_destroy(&pcp->pc_cv); 2758 cv_destroy(&pcp->pc_busy_cv); 2759 kmem_free(pcp, sizeof (pollcache_t)); 2760 } 2761 2762 pollcacheset_t * 2763 pcacheset_create(int nsets) 2764 { 2765 return (kmem_zalloc(sizeof (pollcacheset_t) * nsets, KM_SLEEP)); 2766 } 2767 2768 void 2769 pcacheset_destroy(pollcacheset_t *pcsp, int nsets) 2770 { 2771 int i; 2772 2773 for (i = 0; i < nsets; i++) { 2774 if (pcsp[i].pcs_pollfd != NULL) { 2775 kmem_free(pcsp[i].pcs_pollfd, pcsp[i].pcs_nfds * 2776 sizeof (pollfd_t)); 2777 } 2778 } 2779 kmem_free(pcsp, sizeof (pollcacheset_t) * nsets); 2780 } 2781 2782 /* 2783 * Check each duplicated poll fd in the poll list. It may be necessary to 2784 * VOP_POLL the same fd again using different poll events. getf() has been 2785 * done by caller. This routine returns 0 if it can sucessfully process the 2786 * entire poll fd list. It returns -1 if underlying vnode has changed during 2787 * a VOP_POLL, in which case the caller has to repoll. It returns a positive 2788 * value if VOP_POLL failed. 2789 */ 2790 static int 2791 plist_chkdupfd(file_t *fp, polldat_t *pdp, pollstate_t *psp, pollfd_t *pollfdp, 2792 int entry, int *fdcntp) 2793 { 2794 int i; 2795 int fd; 2796 nfds_t nfds = psp->ps_nfds; 2797 2798 fd = pollfdp[entry].fd; 2799 for (i = entry + 1; i < nfds; i++) { 2800 if (pollfdp[i].fd == fd) { 2801 if (pollfdp[i].events == pollfdp[entry].events) { 2802 if ((pollfdp[i].revents = 2803 pollfdp[entry].revents) != 0) { 2804 (*fdcntp)++; 2805 } 2806 } else { 2807 2808 int error; 2809 pollhead_t *php; 2810 pollcache_t *pcp = psp->ps_pcache; 2811 2812 /* 2813 * the events are different. VOP_POLL on this 2814 * fd so that we don't miss any revents. 2815 */ 2816 php = NULL; 2817 ASSERT(curthread->t_pollcache == NULL); 2818 error = VOP_POLL(fp->f_vnode, 2819 pollfdp[i].events, 0, 2820 &pollfdp[i].revents, &php, NULL); 2821 if (error) { 2822 return (error); 2823 } 2824 /* 2825 * layered devices(e.g. console driver) 2826 * may change the vnode and thus the pollhead 2827 * pointer out from underneath us. 2828 */ 2829 if (php != NULL && pdp->pd_php != NULL && 2830 php != pdp->pd_php) { 2831 pollhead_delete(pdp->pd_php, pdp); 2832 pdp->pd_php = php; 2833 pollhead_insert(php, pdp); 2834 /* 2835 * We could have missed a wakeup on the 2836 * new target device. Make sure the new 2837 * target gets polled once. 2838 */ 2839 BT_SET(pcp->pc_bitmap, fd); 2840 return (-1); 2841 } 2842 if (pollfdp[i].revents) { 2843 (*fdcntp)++; 2844 } 2845 } 2846 } 2847 } 2848 return (0); 2849 } 2850