1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Portions of this source code were derived from Berkeley 4.3 BSD 32 * under license from the Regents of the University of California. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/isa_defs.h> 37 #include <sys/types.h> 38 #include <sys/sysmacros.h> 39 #include <sys/user.h> 40 #include <sys/systm.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/vnode.h> 44 #include <sys/file.h> 45 #include <sys/mode.h> 46 #include <sys/proc.h> 47 #include <sys/uio.h> 48 #include <sys/poll_impl.h> 49 #include <sys/kmem.h> 50 #include <sys/cmn_err.h> 51 #include <sys/debug.h> 52 #include <sys/bitmap.h> 53 #include <sys/kstat.h> 54 #include <sys/rctl.h> 55 #include <sys/port_impl.h> 56 #include <sys/schedctl.h> 57 58 #define NPHLOCKS 64 /* Number of locks; must be power of 2 */ 59 #define PHLOCKADDR(php) &plocks[(((uintptr_t)(php)) >> 8) & (NPHLOCKS - 1)] 60 #define PHLOCK(php) PHLOCKADDR(php).pp_lock 61 #define PH_ENTER(php) mutex_enter(PHLOCK(php)) 62 #define PH_EXIT(php) mutex_exit(PHLOCK(php)) 63 #define VALID_POLL_EVENTS (POLLIN | POLLPRI | POLLOUT | POLLRDNORM \ 64 | POLLRDBAND | POLLWRBAND | POLLHUP | POLLERR | POLLNVAL) 65 66 /* 67 * global counters to collect some stats 68 */ 69 static struct { 70 kstat_named_t polllistmiss; /* failed to find a cached poll list */ 71 kstat_named_t pollcachehit; /* list matched 100% w/ cached one */ 72 kstat_named_t pollcachephit; /* list matched < 100% w/ cached one */ 73 kstat_named_t pollcachemiss; /* every list entry is dif from cache */ 74 } pollstats = { 75 { "polllistmiss", KSTAT_DATA_UINT64 }, 76 { "pollcachehit", KSTAT_DATA_UINT64 }, 77 { "pollcachephit", KSTAT_DATA_UINT64 }, 78 { "pollcachemiss", KSTAT_DATA_UINT64 } 79 }; 80 81 kstat_named_t *pollstats_ptr = (kstat_named_t *)&pollstats; 82 uint_t pollstats_ndata = sizeof (pollstats) / sizeof (kstat_named_t); 83 84 struct pplock { 85 kmutex_t pp_lock; 86 short pp_flag; 87 kcondvar_t pp_wait_cv; 88 int32_t pp_pad; /* to a nice round 16 bytes */ 89 }; 90 91 static struct pplock plocks[NPHLOCKS]; /* Hash array of pollhead locks */ 92 93 #ifdef DEBUG 94 static int pollchecksanity(pollstate_t *, nfds_t); 95 static int pollcheckxref(pollstate_t *, int); 96 static void pollcheckphlist(void); 97 static int pollcheckrevents(pollstate_t *, int, int, int); 98 static void checkpolldat(pollstate_t *); 99 #endif /* DEBUG */ 100 static int plist_chkdupfd(file_t *, polldat_t *, pollstate_t *, pollfd_t *, int, 101 int *); 102 103 /* 104 * Data structure overview: 105 * The per-thread poll state consists of 106 * one pollstate_t 107 * one pollcache_t 108 * one bitmap with one event bit per fd 109 * a (two-dimensional) hashed array of polldat_t structures - one entry 110 * per fd 111 * 112 * This conglomerate of data structures interact with 113 * the pollhead which is used by VOP_POLL and pollwakeup 114 * (protected by the PHLOCK, cached array of plocks), and 115 * the fpollinfo list hanging off the fi_list which is used to notify 116 * poll when a cached fd is closed. This is protected by uf_lock. 117 * 118 * Invariants: 119 * pd_php (pollhead pointer) is set iff (if and only if) the polldat 120 * is on that pollhead. This is modified atomically under pc_lock. 121 * 122 * pd_fp (file_t pointer) is set iff the thread is on the fpollinfo 123 * list for that open file. 124 * This is modified atomically under pc_lock. 125 * 126 * pd_count is the sum (over all values of i) of pd_ref[i].xf_refcnt. 127 * Iff pd_ref[i].xf_refcnt >= 1 then 128 * ps_pcacheset[i].pcs_pollfd[pd_ref[i].xf_position].fd == pd_fd 129 * Iff pd_ref[i].xf_refcnt > 1 then 130 * In ps_pcacheset[i].pcs_pollfd between index 131 * pd_ref[i].xf_position] and the end of the list 132 * there are xf_refcnt entries with .fd == pd_fd 133 * 134 * Locking design: 135 * Whenever possible the design relies on the fact that the poll cache state 136 * is per thread thus for both poll and exit it is self-synchronizing. 137 * Thus the key interactions where other threads access the state are: 138 * pollwakeup (and polltime), and 139 * close cleaning up the cached references to an open file 140 * 141 * The two key locks in poll proper is ps_lock and pc_lock. 142 * 143 * The ps_lock is used for synchronization between poll, (lwp_)exit and close 144 * to ensure that modifications to pollcacheset structure are serialized. 145 * This lock is held through most of poll() except where poll sleeps 146 * since there is little need to handle closes concurrently with the execution 147 * of poll. 148 * The pc_lock protects most of the fields in pollcache structure and polldat 149 * structures (which are accessed by poll, pollwakeup, and polltime) 150 * with the exception of fields that are only modified when only one thread 151 * can access this per-thread state. 152 * Those exceptions occur in poll when first allocating the per-thread state, 153 * when poll grows the number of polldat (never shrinks), and when 154 * exit/pollcleanup has ensured that there are no references from either 155 * pollheads or fpollinfo to the threads poll state. 156 * 157 * Poll(2) system call is the only path which ps_lock and pc_lock are both 158 * held, in that order. It needs ps_lock to synchronize with close and 159 * lwp_exit; and pc_lock with pollwakeup. 160 * 161 * The locking interaction between pc_lock and PHLOCK take into account 162 * that poll acquires these locks in the order of pc_lock and then PHLOCK 163 * while pollwakeup does it in the reverse order. Thus pollwakeup implements 164 * deadlock avoidance by dropping the locks and reacquiring them in the 165 * reverse order. For this to work pollwakeup needs to prevent the thread 166 * from exiting and freeing all of the poll related state. Thus is done 167 * using 168 * the pc_no_exit lock 169 * the pc_busy counter 170 * the pc_busy_cv condition variable 171 * 172 * The locking interaction between pc_lock and uf_lock has similar 173 * issues. Poll holds ps_lock and/or pc_lock across calls to getf/releasef 174 * which acquire uf_lock. The poll cleanup in close needs to hold uf_lock 175 * to prevent poll or exit from doing a delfpollinfo after which the thread 176 * might exit. But the cleanup needs to acquire pc_lock when modifying 177 * the poll cache state. The solution is to use pc_busy and do the close 178 * cleanup in two phases: 179 * First close calls pollblockexit which increments pc_busy. 180 * This prevents the per-thread poll related state from being freed. 181 * Then close drops uf_lock and calls pollcacheclean. 182 * This routine can then acquire pc_lock and remove any references 183 * to the closing fd (as well as recording that it has been closed 184 * so that a POLLNVAL can be generated even if the fd is reused before 185 * poll has been woken up and checked getf() again). 186 * 187 * When removing a polled fd from poll cache, the fd is always removed 188 * from pollhead list first and then from fpollinfo list, i.e., 189 * pollhead_delete() is called before delfpollinfo(). 190 * 191 * 192 * Locking hierarchy: 193 * pc_no_exit is a leaf level lock. 194 * ps_lock is held when acquiring pc_lock (except when pollwakeup 195 * acquires pc_lock). 196 * pc_lock might be held when acquiring PHLOCK (pollhead_insert/ 197 * pollhead_delete) 198 * pc_lock is always held (but this is not required) 199 * when acquiring PHLOCK (in polladd/pollhead_delete and pollwakeup called 200 * from pcache_clean_entry). 201 * pc_lock is held across addfpollinfo/delfpollinfo which acquire 202 * uf_lock. 203 * pc_lock is held across getf/releasef which acquire uf_lock. 204 * ps_lock might be held across getf/releasef which acquire uf_lock. 205 * pollwakeup tries to acquire pc_lock while holding PHLOCK 206 * but drops the locks and reacquire them in reverse order to avoid 207 * deadlock. 208 * 209 * Note also that there is deadlock avoidance support for VOP_POLL routines 210 * and pollwakeup involving a file system or driver lock. 211 * See below. 212 */ 213 214 /* 215 * Deadlock avoidance support for VOP_POLL() routines. This is 216 * sometimes necessary to prevent deadlock between polling threads 217 * (which hold poll locks on entry to xx_poll(), then acquire foo) 218 * and pollwakeup() threads (which hold foo, then acquire poll locks). 219 * 220 * pollunlock(void) releases whatever poll locks the current thread holds, 221 * returning a cookie for use by pollrelock(); 222 * 223 * pollrelock(cookie) reacquires previously dropped poll locks; 224 * 225 * polllock(php, mutex) does the common case: pollunlock(), 226 * acquire the problematic mutex, pollrelock(). 227 */ 228 int 229 pollunlock(void) 230 { 231 pollcache_t *pcp; 232 int lockstate = 0; 233 234 /* 235 * t_pollcache is set by /dev/poll and event ports (port_fd.c). 236 * If the pollrelock/pollunlock is called as a result of poll(2), 237 * the t_pollcache should be NULL. 238 */ 239 if (curthread->t_pollcache == NULL) 240 pcp = curthread->t_pollstate->ps_pcache; 241 else 242 pcp = curthread->t_pollcache; 243 244 if (mutex_owned(&pcp->pc_lock)) { 245 lockstate = 1; 246 mutex_exit(&pcp->pc_lock); 247 } 248 return (lockstate); 249 } 250 251 void 252 pollrelock(int lockstate) 253 { 254 pollcache_t *pcp; 255 256 /* 257 * t_pollcache is set by /dev/poll and event ports (port_fd.c). 258 * If the pollrelock/pollunlock is called as a result of poll(2), 259 * the t_pollcache should be NULL. 260 */ 261 if (curthread->t_pollcache == NULL) 262 pcp = curthread->t_pollstate->ps_pcache; 263 else 264 pcp = curthread->t_pollcache; 265 266 if (lockstate > 0) 267 mutex_enter(&pcp->pc_lock); 268 } 269 270 /* ARGSUSED */ 271 void 272 polllock(pollhead_t *php, kmutex_t *lp) 273 { 274 if (!mutex_tryenter(lp)) { 275 int lockstate = pollunlock(); 276 mutex_enter(lp); 277 pollrelock(lockstate); 278 } 279 } 280 281 static int 282 poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) 283 { 284 kthread_t *t = curthread; 285 klwp_t *lwp = ttolwp(t); 286 proc_t *p = ttoproc(t); 287 int fdcnt = 0; 288 int rval; 289 int i; 290 timespec_t *rqtp = NULL; 291 int timecheck = 0; 292 int imm_timeout = 0; 293 pollfd_t *pollfdp; 294 pollstate_t *ps; 295 pollcache_t *pcp; 296 int error = 0; 297 nfds_t old_nfds; 298 int cacheindex = 0; /* which cache set is used */ 299 300 /* 301 * Determine the precise future time of the requested timeout, if any. 302 */ 303 if (tsp != NULL) { 304 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 305 imm_timeout = 1; 306 else { 307 timespec_t now; 308 timecheck = timechanged; 309 gethrestime(&now); 310 rqtp = tsp; 311 timespecadd(rqtp, &now); 312 } 313 } 314 315 /* 316 * Reset our signal mask, if requested. 317 */ 318 if (ksetp != NULL) { 319 mutex_enter(&p->p_lock); 320 schedctl_finish_sigblock(t); 321 lwp->lwp_sigoldmask = t->t_hold; 322 t->t_hold = *ksetp; 323 t->t_flag |= T_TOMASK; 324 /* 325 * Call cv_timedwait_sig() just to check for signals. 326 * We will return immediately with either 0 or -1. 327 */ 328 if (!cv_timedwait_sig(&t->t_delay_cv, &p->p_lock, lbolt)) { 329 mutex_exit(&p->p_lock); 330 error = EINTR; 331 goto pollout; 332 } 333 mutex_exit(&p->p_lock); 334 } 335 336 /* 337 * Check to see if this guy just wants to use poll() as a timeout. 338 * If yes then bypass all the other stuff and make him sleep. 339 */ 340 if (nfds == 0) { 341 /* 342 * Sleep until we have passed the requested future 343 * time or until interrupted by a signal. 344 * Do not check for signals if we have a zero timeout. 345 */ 346 if (!imm_timeout) { 347 mutex_enter(&t->t_delay_lock); 348 while ((rval = cv_waituntil_sig(&t->t_delay_cv, 349 &t->t_delay_lock, rqtp, timecheck)) > 0) 350 continue; 351 mutex_exit(&t->t_delay_lock); 352 if (rval == 0) 353 error = EINTR; 354 } 355 goto pollout; 356 } 357 358 if (nfds > p->p_fno_ctl) { 359 mutex_enter(&p->p_lock); 360 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 361 p->p_rctls, p, RCA_SAFE); 362 mutex_exit(&p->p_lock); 363 error = EINVAL; 364 goto pollout; 365 } 366 367 /* 368 * Need to allocate memory for pollstate before anything because 369 * the mutex and cv are created in this space 370 */ 371 if ((ps = t->t_pollstate) == NULL) { 372 t->t_pollstate = pollstate_create(); 373 ps = t->t_pollstate; 374 } 375 376 if (ps->ps_pcache == NULL) 377 ps->ps_pcache = pcache_alloc(); 378 pcp = ps->ps_pcache; 379 380 /* 381 * NOTE: for performance, buffers are saved across poll() calls. 382 * The theory is that if a process polls heavily, it tends to poll 383 * on the same set of descriptors. Therefore, we only reallocate 384 * buffers when nfds changes. There is no hysteresis control, 385 * because there is no data to suggest that this is necessary; 386 * the penalty of reallocating is not *that* great in any event. 387 */ 388 old_nfds = ps->ps_nfds; 389 if (nfds != old_nfds) { 390 391 kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t)); 392 pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); 393 ps->ps_pollfd = pollfdp; 394 ps->ps_nfds = nfds; 395 } 396 397 pollfdp = ps->ps_pollfd; 398 if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) { 399 error = EFAULT; 400 goto pollout; 401 } 402 403 if (fds == NULL) { 404 /* 405 * If the process has page 0 mapped, then the copyin() above 406 * will succeed even if fds is NULL. However, our cached 407 * poll lists are keyed by the address of the passed-in fds 408 * structure, and we use the value NULL to indicate an unused 409 * poll cache list entry. As such, we elect not to support 410 * NULL as a valid (user) memory address and fail the poll() 411 * call. 412 */ 413 error = EINVAL; 414 goto pollout; 415 } 416 417 /* 418 * If this thread polls for the first time, allocate ALL poll 419 * cache data structures and cache the poll fd list. This 420 * allocation is delayed till now because lwp's polling 0 fd 421 * (i.e. using poll as timeout()) don't need this memory. 422 */ 423 mutex_enter(&ps->ps_lock); 424 pcp = ps->ps_pcache; 425 ASSERT(pcp != NULL); 426 if (pcp->pc_bitmap == NULL) { 427 pcache_create(pcp, nfds); 428 /* 429 * poll and cache this poll fd list in ps_pcacheset[0]. 430 */ 431 error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex); 432 if (fdcnt || error) { 433 mutex_exit(&ps->ps_lock); 434 goto pollout; 435 } 436 } else { 437 pollcacheset_t *pcset = ps->ps_pcacheset; 438 439 /* 440 * Not first time polling. Select a cached poll list by 441 * matching user pollfd list buffer address. 442 */ 443 for (cacheindex = 0; cacheindex < ps->ps_nsets; cacheindex++) { 444 if (pcset[cacheindex].pcs_usradr == (uintptr_t)fds) { 445 if ((++pcset[cacheindex].pcs_count) == 0) { 446 /* 447 * counter is wrapping around. 448 */ 449 pcacheset_reset_count(ps, cacheindex); 450 } 451 /* 452 * examine and resolve possible 453 * difference of the current poll 454 * list and previously cached one. 455 * If there is an error during resolve(), 456 * the callee will guarantee the consistency 457 * of cached poll list and cache content. 458 */ 459 error = pcacheset_resolve(ps, nfds, &fdcnt, 460 cacheindex); 461 if (error) { 462 mutex_exit(&ps->ps_lock); 463 goto pollout; 464 } 465 break; 466 } 467 468 /* 469 * Note that pcs_usradr field of an used entry won't be 470 * NULL because it stores the address of passed-in fds, 471 * and NULL fds will not be cached (Then it is either 472 * the special timeout case when nfds is 0 or it returns 473 * failure directly). 474 */ 475 if (pcset[cacheindex].pcs_usradr == NULL) { 476 /* 477 * found an unused entry. Use it to cache 478 * this poll list. 479 */ 480 error = pcacheset_cache_list(ps, fds, &fdcnt, 481 cacheindex); 482 if (fdcnt || error) { 483 mutex_exit(&ps->ps_lock); 484 goto pollout; 485 } 486 break; 487 } 488 } 489 if (cacheindex == ps->ps_nsets) { 490 /* 491 * We failed to find a matching cached poll fd list. 492 * replace an old list. 493 */ 494 pollstats.polllistmiss.value.ui64++; 495 cacheindex = pcacheset_replace(ps); 496 ASSERT(cacheindex < ps->ps_nsets); 497 pcset[cacheindex].pcs_usradr = (uintptr_t)fds; 498 error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex); 499 if (error) { 500 mutex_exit(&ps->ps_lock); 501 goto pollout; 502 } 503 } 504 } 505 506 /* 507 * Always scan the bitmap with the lock on the pollcache held. 508 * This is to make sure that a wakeup does not come undetected. 509 * If the lock is not held, a pollwakeup could have come for an 510 * fd we already checked but before this thread sleeps, in which 511 * case the wakeup is missed. Now we hold the pcache lock and 512 * check the bitmap again. This will prevent wakeup from happening 513 * while we hold pcache lock since pollwakeup() will also lock 514 * the pcache before updating poll bitmap. 515 */ 516 mutex_enter(&pcp->pc_lock); 517 for (;;) { 518 pcp->pc_flag = 0; 519 error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex); 520 if (fdcnt || error) { 521 mutex_exit(&pcp->pc_lock); 522 mutex_exit(&ps->ps_lock); 523 break; 524 } 525 526 /* 527 * If T_POLLWAKE is set, a pollwakeup() was performed on 528 * one of the file descriptors. This can happen only if 529 * one of the VOP_POLL() functions dropped pcp->pc_lock. 530 * The only current cases of this is in procfs (prpoll()) 531 * and STREAMS (strpoll()). 532 */ 533 if (pcp->pc_flag & T_POLLWAKE) 534 continue; 535 536 /* 537 * If you get here, the poll of fds was unsuccessful. 538 * Wait until some fd becomes readable, writable, or gets 539 * an exception, or until a signal or a timeout occurs. 540 * Do not check for signals if we have a zero timeout. 541 */ 542 mutex_exit(&ps->ps_lock); 543 if (imm_timeout) 544 rval = -1; 545 else 546 rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock, 547 rqtp, timecheck); 548 mutex_exit(&pcp->pc_lock); 549 /* 550 * If we have received a signal or timed out 551 * then break out and return. 552 */ 553 if (rval <= 0) { 554 if (rval == 0) 555 error = EINTR; 556 break; 557 } 558 /* 559 * We have not received a signal or timed out. 560 * Continue around and poll fds again. 561 */ 562 mutex_enter(&ps->ps_lock); 563 mutex_enter(&pcp->pc_lock); 564 } 565 566 pollout: 567 /* 568 * If we changed the signal mask but we received 569 * no signal then restore the signal mask. 570 * Otherwise psig() will deal with the signal mask. 571 */ 572 if (ksetp != NULL) { 573 mutex_enter(&p->p_lock); 574 if (lwp->lwp_cursig == 0) { 575 t->t_hold = lwp->lwp_sigoldmask; 576 t->t_flag &= ~T_TOMASK; 577 } 578 mutex_exit(&p->p_lock); 579 } 580 581 if (error) 582 return (set_errno(error)); 583 584 /* 585 * Copy out the events and return the fdcnt to the user. 586 */ 587 if (nfds != 0 && 588 copyout(pollfdp, fds, nfds * sizeof (pollfd_t))) 589 return (set_errno(EFAULT)); 590 591 #ifdef DEBUG 592 /* 593 * Another sanity check: 594 */ 595 if (fdcnt) { 596 int reventcnt = 0; 597 598 for (i = 0; i < nfds; i++) { 599 if (pollfdp[i].fd < 0) { 600 ASSERT(pollfdp[i].revents == 0); 601 continue; 602 } 603 if (pollfdp[i].revents) { 604 reventcnt++; 605 } 606 } 607 ASSERT(fdcnt == reventcnt); 608 } else { 609 for (i = 0; i < nfds; i++) { 610 ASSERT(pollfdp[i].revents == 0); 611 } 612 } 613 #endif /* DEBUG */ 614 615 return (fdcnt); 616 } 617 618 /* 619 * This system call trap exists solely for binary compatibility with 620 * old statically-linked applications. It is not called from libc. 621 * It should be removed in the next release. 622 */ 623 int 624 poll(pollfd_t *fds, nfds_t nfds, int time_out) 625 { 626 timespec_t ts; 627 timespec_t *tsp; 628 629 if (time_out < 0) 630 tsp = NULL; 631 else { 632 ts.tv_sec = time_out / MILLISEC; 633 ts.tv_nsec = (time_out % MILLISEC) * MICROSEC; 634 tsp = &ts; 635 } 636 637 return (poll_common(fds, nfds, tsp, NULL)); 638 } 639 640 /* 641 * This is the system call trap that poll(), 642 * select() and pselect() are built upon. 643 * It is a private interface between libc and the kernel. 644 */ 645 int 646 pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp) 647 { 648 timespec_t ts; 649 timespec_t *tsp; 650 sigset_t set; 651 k_sigset_t kset; 652 k_sigset_t *ksetp; 653 model_t datamodel = get_udatamodel(); 654 655 if (timeoutp == NULL) 656 tsp = NULL; 657 else { 658 if (datamodel == DATAMODEL_NATIVE) { 659 if (copyin(timeoutp, &ts, sizeof (ts))) 660 return (set_errno(EFAULT)); 661 } else { 662 timespec32_t ts32; 663 664 if (copyin(timeoutp, &ts32, sizeof (ts32))) 665 return (set_errno(EFAULT)); 666 TIMESPEC32_TO_TIMESPEC(&ts, &ts32) 667 } 668 669 if (itimerspecfix(&ts)) 670 return (set_errno(EINVAL)); 671 tsp = &ts; 672 } 673 674 if (setp == NULL) 675 ksetp = NULL; 676 else { 677 if (copyin(setp, &set, sizeof (set))) 678 return (set_errno(EFAULT)); 679 sigutok(&set, &kset); 680 ksetp = &kset; 681 } 682 683 return (poll_common(fds, nfds, tsp, ksetp)); 684 } 685 686 /* 687 * Clean up any state left around by poll(2). Called when a thread exits. 688 */ 689 void 690 pollcleanup() 691 { 692 pollstate_t *ps = curthread->t_pollstate; 693 pollcache_t *pcp; 694 695 if (ps == NULL) 696 return; 697 pcp = ps->ps_pcache; 698 /* 699 * free up all cached poll fds 700 */ 701 if (pcp == NULL) { 702 /* this pollstate is used by /dev/poll */ 703 goto pollcleanout; 704 } 705 706 if (pcp->pc_bitmap != NULL) { 707 ASSERT(MUTEX_NOT_HELD(&ps->ps_lock)); 708 /* 709 * a close lwp can race with us when cleaning up a polldat 710 * entry. We hold the ps_lock when cleaning hash table. 711 * Since this pollcache is going away anyway, there is no 712 * need to hold the pc_lock. 713 */ 714 mutex_enter(&ps->ps_lock); 715 pcache_clean(pcp); 716 mutex_exit(&ps->ps_lock); 717 #ifdef DEBUG 718 /* 719 * At this point, all fds cached by this lwp should be 720 * cleaned up. There should be no fd in fi_list still 721 * reference this thread. 722 */ 723 checkfpollinfo(); /* sanity check */ 724 pollcheckphlist(); /* sanity check */ 725 #endif /* DEBUG */ 726 } 727 /* 728 * Be sure no one is referencing thread before exiting 729 */ 730 mutex_enter(&pcp->pc_no_exit); 731 ASSERT(pcp->pc_busy >= 0); 732 while (pcp->pc_busy > 0) 733 cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 734 mutex_exit(&pcp->pc_no_exit); 735 pollcleanout: 736 pollstate_destroy(ps); 737 curthread->t_pollstate = NULL; 738 } 739 740 /* 741 * pollwakeup() - poke threads waiting in poll() for some event 742 * on a particular object. 743 * 744 * The threads hanging off of the specified pollhead structure are scanned. 745 * If their event mask matches the specified event(s), then pollnotify() is 746 * called to poke the thread. 747 * 748 * Multiple events may be specified. When POLLHUP or POLLERR are specified, 749 * all waiting threads are poked. 750 * 751 * It is important that pollnotify() not drop the lock protecting the list 752 * of threads. 753 */ 754 void 755 pollwakeup(pollhead_t *php, short events_arg) 756 { 757 polldat_t *pdp; 758 int events = (ushort_t)events_arg; 759 struct plist { 760 port_t *pp; 761 int pevents; 762 struct plist *next; 763 }; 764 struct plist *plhead = NULL, *pltail = NULL; 765 766 retry: 767 PH_ENTER(php); 768 769 for (pdp = php->ph_list; pdp; pdp = pdp->pd_next) { 770 if ((pdp->pd_events & events) || 771 (events & (POLLHUP | POLLERR))) { 772 773 pollcache_t *pcp; 774 775 if (pdp->pd_portev != NULL) { 776 port_kevent_t *pkevp = pdp->pd_portev; 777 /* 778 * Object (fd) is associated with an event port, 779 * => send event notification to the port. 780 */ 781 ASSERT(pkevp->portkev_source == PORT_SOURCE_FD); 782 mutex_enter(&pkevp->portkev_lock); 783 if (pkevp->portkev_flags & PORT_KEV_VALID) { 784 int pevents; 785 786 pkevp->portkev_flags &= ~PORT_KEV_VALID; 787 pkevp->portkev_events |= events & 788 (pdp->pd_events | POLLHUP | 789 POLLERR); 790 /* 791 * portkev_lock mutex will be released 792 * by port_send_event(). 793 */ 794 port_send_event(pkevp); 795 796 /* 797 * If we have some thread polling the 798 * port's fd, add it to the list. They 799 * will be notified later. 800 * The port_pollwkup() will flag the 801 * port_t so that it will not disappear 802 * till port_pollwkdone() is called. 803 */ 804 pevents = 805 port_pollwkup(pkevp->portkev_port); 806 if (pevents) { 807 struct plist *t; 808 t = kmem_zalloc( 809 sizeof (struct plist), 810 KM_SLEEP); 811 t->pp = pkevp->portkev_port; 812 t->pevents = pevents; 813 if (plhead == NULL) { 814 plhead = t; 815 } else { 816 pltail->next = t; 817 } 818 pltail = t; 819 } 820 } else { 821 mutex_exit(&pkevp->portkev_lock); 822 } 823 continue; 824 } 825 826 pcp = pdp->pd_pcache; 827 828 /* 829 * Try to grab the lock for this thread. If 830 * we don't get it then we may deadlock so 831 * back out and restart all over again. Note 832 * that the failure rate is very very low. 833 */ 834 if (mutex_tryenter(&pcp->pc_lock)) { 835 pollnotify(pcp, pdp->pd_fd); 836 mutex_exit(&pcp->pc_lock); 837 } else { 838 /* 839 * We are here because: 840 * 1) This thread has been woke up 841 * and is trying to get out of poll(). 842 * 2) Some other thread is also here 843 * but with a different pollhead lock. 844 * 845 * So, we need to drop the lock on pollhead 846 * because of (1) but we want to prevent 847 * that thread from doing lwp_exit() or 848 * devpoll close. We want to ensure that 849 * the pollcache pointer is still invalid. 850 * 851 * Solution: Grab the pcp->pc_no_exit lock, 852 * increment the pc_busy counter, drop every 853 * lock in sight. Get out of the way and wait 854 * for type (2) threads to finish. 855 */ 856 857 mutex_enter(&pcp->pc_no_exit); 858 pcp->pc_busy++; /* prevents exit()'s */ 859 mutex_exit(&pcp->pc_no_exit); 860 861 PH_EXIT(php); 862 mutex_enter(&pcp->pc_lock); 863 mutex_exit(&pcp->pc_lock); 864 mutex_enter(&pcp->pc_no_exit); 865 pcp->pc_busy--; 866 if (pcp->pc_busy == 0) { 867 /* 868 * Wakeup the thread waiting in 869 * thread_exit(). 870 */ 871 cv_signal(&pcp->pc_busy_cv); 872 } 873 mutex_exit(&pcp->pc_no_exit); 874 goto retry; 875 } 876 } 877 } 878 879 880 /* 881 * Event ports - If this php is of the port on the list, 882 * call port_pollwkdone() to release it. The port_pollwkdone() 883 * needs to be called before dropping the PH lock so that any new 884 * thread attempting to poll this port are blocked. There can be 885 * only one thread here in pollwakeup notifying this port's fd. 886 */ 887 if (plhead != NULL && &plhead->pp->port_pollhd == php) { 888 struct plist *t; 889 port_pollwkdone(plhead->pp); 890 t = plhead; 891 plhead = plhead->next; 892 kmem_free(t, sizeof (struct plist)); 893 } 894 PH_EXIT(php); 895 896 /* 897 * Event ports - Notify threads polling the event port's fd. 898 * This is normally done in port_send_event() where it calls 899 * pollwakeup() on the port. But, for PORT_SOURCE_FD source alone, 900 * we do it here in pollwakeup() to avoid a recursive call. 901 */ 902 if (plhead != NULL) { 903 php = &plhead->pp->port_pollhd; 904 events = plhead->pevents; 905 goto retry; 906 } 907 } 908 909 /* 910 * This function is called to inform a thread that 911 * an event being polled for has occurred. 912 * The pollstate lock on the thread should be held on entry. 913 */ 914 void 915 pollnotify(pollcache_t *pcp, int fd) 916 { 917 ASSERT(fd < pcp->pc_mapsize); 918 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 919 BT_SET(pcp->pc_bitmap, fd); 920 pcp->pc_flag |= T_POLLWAKE; 921 cv_signal(&pcp->pc_cv); 922 } 923 924 /* 925 * add a polldat entry to pollhead ph_list. The polldat struct is used 926 * by pollwakeup to wake sleeping pollers when polled events has happened. 927 */ 928 void 929 pollhead_insert(pollhead_t *php, polldat_t *pdp) 930 { 931 PH_ENTER(php); 932 ASSERT(pdp->pd_next == NULL); 933 #ifdef DEBUG 934 { 935 /* 936 * the polldat should not be already on the list 937 */ 938 polldat_t *wp; 939 for (wp = php->ph_list; wp; wp = wp->pd_next) { 940 ASSERT(wp != pdp); 941 } 942 } 943 #endif /* DEBUG */ 944 pdp->pd_next = php->ph_list; 945 php->ph_list = pdp; 946 PH_EXIT(php); 947 } 948 949 /* 950 * Delete the polldat entry from ph_list. 951 */ 952 void 953 pollhead_delete(pollhead_t *php, polldat_t *pdp) 954 { 955 polldat_t *wp; 956 polldat_t **wpp; 957 958 PH_ENTER(php); 959 for (wpp = &php->ph_list; (wp = *wpp) != NULL; wpp = &wp->pd_next) { 960 if (wp == pdp) { 961 *wpp = pdp->pd_next; 962 pdp->pd_next = NULL; 963 break; 964 } 965 } 966 #ifdef DEBUG 967 /* assert that pdp is no longer in the list */ 968 for (wp = *wpp; wp; wp = wp->pd_next) { 969 ASSERT(wp != pdp); 970 } 971 #endif /* DEBUG */ 972 PH_EXIT(php); 973 } 974 975 /* 976 * walk through the poll fd lists to see if they are identical. This is an 977 * expensive operation and should not be done more than once for each poll() 978 * call. 979 * 980 * As an optimization (i.e., not having to go through the lists more than 981 * once), this routine also clear the revents field of pollfd in 'current'. 982 * Zeroing out the revents field of each entry in current poll list is 983 * required by poll man page. 984 * 985 * Since the events field of cached list has illegal poll events filtered 986 * out, the current list applies the same filtering before comparison. 987 * 988 * The routine stops when it detects a meaningful difference, or when it 989 * exhausts the lists. 990 */ 991 int 992 pcacheset_cmp(pollfd_t *current, pollfd_t *cached, pollfd_t *newlist, int n) 993 { 994 int ix; 995 996 for (ix = 0; ix < n; ix++) { 997 /* Prefetch 64 bytes worth of 8-byte elements */ 998 if ((ix & 0x7) == 0) { 999 prefetch64((caddr_t)¤t[ix + 8]); 1000 prefetch64((caddr_t)&cached[ix + 8]); 1001 } 1002 if (current[ix].fd == cached[ix].fd) { 1003 /* 1004 * Filter out invalid poll events while we are in 1005 * inside the loop. 1006 */ 1007 if (current[ix].events & ~VALID_POLL_EVENTS) { 1008 current[ix].events &= VALID_POLL_EVENTS; 1009 if (newlist != NULL) 1010 newlist[ix].events = current[ix].events; 1011 } 1012 if (current[ix].events == cached[ix].events) { 1013 current[ix].revents = 0; 1014 continue; 1015 } 1016 } 1017 if ((current[ix].fd < 0) && (cached[ix].fd < 0)) { 1018 current[ix].revents = 0; 1019 continue; 1020 } 1021 return (ix); 1022 } 1023 return (ix); 1024 } 1025 1026 /* 1027 * This routine returns a pointer to a cached poll fd entry, or NULL if it 1028 * does not find it in the hash table. 1029 */ 1030 polldat_t * 1031 pcache_lookup_fd(pollcache_t *pcp, int fd) 1032 { 1033 int hashindex; 1034 polldat_t *pdp; 1035 1036 hashindex = POLLHASH(pcp->pc_hashsize, fd); 1037 pdp = pcp->pc_hash[hashindex]; 1038 while (pdp != NULL) { 1039 if (pdp->pd_fd == fd) 1040 break; 1041 pdp = pdp->pd_hashnext; 1042 } 1043 return (pdp); 1044 } 1045 1046 polldat_t * 1047 pcache_alloc_fd(int nsets) 1048 { 1049 polldat_t *pdp; 1050 1051 pdp = kmem_zalloc(sizeof (polldat_t), KM_SLEEP); 1052 if (nsets > 0) { 1053 pdp->pd_ref = kmem_zalloc(sizeof (xref_t) * nsets, KM_SLEEP); 1054 pdp->pd_nsets = nsets; 1055 } 1056 return (pdp); 1057 } 1058 1059 /* 1060 * This routine inserts a polldat into the pollcache's hash table. It 1061 * may be necessary to grow the size of the hash table. 1062 */ 1063 void 1064 pcache_insert_fd(pollcache_t *pcp, polldat_t *pdp, nfds_t nfds) 1065 { 1066 int hashindex; 1067 int fd; 1068 1069 if ((pcp->pc_fdcount > pcp->pc_hashsize * POLLHASHTHRESHOLD) || 1070 (nfds > pcp->pc_hashsize * POLLHASHTHRESHOLD)) { 1071 pcache_grow_hashtbl(pcp, nfds); 1072 } 1073 fd = pdp->pd_fd; 1074 hashindex = POLLHASH(pcp->pc_hashsize, fd); 1075 pdp->pd_hashnext = pcp->pc_hash[hashindex]; 1076 pcp->pc_hash[hashindex] = pdp; 1077 pcp->pc_fdcount++; 1078 1079 #ifdef DEBUG 1080 { 1081 /* 1082 * same fd should not appear on a hash list twice 1083 */ 1084 polldat_t *pdp1; 1085 for (pdp1 = pdp->pd_hashnext; pdp1; pdp1 = pdp1->pd_hashnext) { 1086 ASSERT(pdp->pd_fd != pdp1->pd_fd); 1087 } 1088 } 1089 #endif /* DEBUG */ 1090 } 1091 1092 /* 1093 * Grow the hash table -- either double the table size or round it to the 1094 * nearest multiples of POLLHASHCHUNKSZ, whichever is bigger. Rehash all the 1095 * elements on the hash table. 1096 */ 1097 void 1098 pcache_grow_hashtbl(pollcache_t *pcp, nfds_t nfds) 1099 { 1100 int oldsize; 1101 polldat_t **oldtbl; 1102 polldat_t *pdp, *pdp1; 1103 int i; 1104 #ifdef DEBUG 1105 int count = 0; 1106 #endif 1107 1108 ASSERT(pcp->pc_hashsize % POLLHASHCHUNKSZ == 0); 1109 oldsize = pcp->pc_hashsize; 1110 oldtbl = pcp->pc_hash; 1111 if (nfds > pcp->pc_hashsize * POLLHASHINC) { 1112 pcp->pc_hashsize = (nfds + POLLHASHCHUNKSZ - 1) & 1113 ~(POLLHASHCHUNKSZ - 1); 1114 } else { 1115 pcp->pc_hashsize = pcp->pc_hashsize * POLLHASHINC; 1116 } 1117 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (polldat_t *), 1118 KM_SLEEP); 1119 /* 1120 * rehash existing elements 1121 */ 1122 pcp->pc_fdcount = 0; 1123 for (i = 0; i < oldsize; i++) { 1124 pdp = oldtbl[i]; 1125 while (pdp != NULL) { 1126 pdp1 = pdp->pd_hashnext; 1127 pcache_insert_fd(pcp, pdp, nfds); 1128 pdp = pdp1; 1129 #ifdef DEBUG 1130 count++; 1131 #endif 1132 } 1133 } 1134 kmem_free(oldtbl, oldsize * sizeof (polldat_t *)); 1135 ASSERT(pcp->pc_fdcount == count); 1136 } 1137 1138 void 1139 pcache_grow_map(pollcache_t *pcp, int fd) 1140 { 1141 int newsize; 1142 ulong_t *newmap; 1143 1144 /* 1145 * grow to nearest multiple of POLLMAPCHUNK, assuming POLLMAPCHUNK is 1146 * power of 2. 1147 */ 1148 newsize = (fd + POLLMAPCHUNK) & ~(POLLMAPCHUNK - 1); 1149 newmap = kmem_zalloc((newsize / BT_NBIPUL) * sizeof (ulong_t), 1150 KM_SLEEP); 1151 /* 1152 * don't want pollwakeup to set a bit while growing the bitmap. 1153 */ 1154 ASSERT(mutex_owned(&pcp->pc_lock) == 0); 1155 mutex_enter(&pcp->pc_lock); 1156 bcopy(pcp->pc_bitmap, newmap, 1157 (pcp->pc_mapsize / BT_NBIPUL) * sizeof (ulong_t)); 1158 kmem_free(pcp->pc_bitmap, 1159 (pcp->pc_mapsize /BT_NBIPUL) * sizeof (ulong_t)); 1160 pcp->pc_bitmap = newmap; 1161 pcp->pc_mapsize = newsize; 1162 mutex_exit(&pcp->pc_lock); 1163 } 1164 1165 /* 1166 * remove all the reference from pollhead list and fpollinfo lists. 1167 */ 1168 void 1169 pcache_clean(pollcache_t *pcp) 1170 { 1171 int i; 1172 polldat_t **hashtbl; 1173 polldat_t *pdp; 1174 1175 ASSERT(MUTEX_HELD(&curthread->t_pollstate->ps_lock)); 1176 hashtbl = pcp->pc_hash; 1177 for (i = 0; i < pcp->pc_hashsize; i++) { 1178 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 1179 if (pdp->pd_php != NULL) { 1180 pollhead_delete(pdp->pd_php, pdp); 1181 pdp->pd_php = NULL; 1182 } 1183 if (pdp->pd_fp != NULL) { 1184 delfpollinfo(pdp->pd_fd); 1185 pdp->pd_fp = NULL; 1186 } 1187 } 1188 } 1189 } 1190 1191 void 1192 pcacheset_invalidate(pollstate_t *ps, polldat_t *pdp) 1193 { 1194 int i; 1195 int fd = pdp->pd_fd; 1196 1197 /* 1198 * we come here because an earlier close() on this cached poll fd. 1199 */ 1200 ASSERT(pdp->pd_fp == NULL); 1201 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1202 pdp->pd_events = 0; 1203 for (i = 0; i < ps->ps_nsets; i++) { 1204 xref_t *refp; 1205 pollcacheset_t *pcsp; 1206 1207 ASSERT(pdp->pd_ref != NULL); 1208 refp = &pdp->pd_ref[i]; 1209 if (refp->xf_refcnt) { 1210 ASSERT(refp->xf_position >= 0); 1211 pcsp = &ps->ps_pcacheset[i]; 1212 if (refp->xf_refcnt == 1) { 1213 pcsp->pcs_pollfd[refp->xf_position].fd = -1; 1214 refp->xf_refcnt = 0; 1215 pdp->pd_count--; 1216 } else if (refp->xf_refcnt > 1) { 1217 int j; 1218 1219 /* 1220 * turn off every appearance in pcs_pollfd list 1221 */ 1222 for (j = refp->xf_position; 1223 j < pcsp->pcs_nfds; j++) { 1224 if (pcsp->pcs_pollfd[j].fd == fd) { 1225 pcsp->pcs_pollfd[j].fd = -1; 1226 refp->xf_refcnt--; 1227 pdp->pd_count--; 1228 } 1229 } 1230 } 1231 ASSERT(refp->xf_refcnt == 0); 1232 refp->xf_position = POLLPOSINVAL; 1233 } 1234 } 1235 ASSERT(pdp->pd_count == 0); 1236 } 1237 1238 /* 1239 * Insert poll fd into the pollcache, and add poll registration. 1240 * This routine is called after getf() and before releasef(). So the vnode 1241 * can not disappear even if we block here. 1242 * If there is an error, the polled fd is not cached. 1243 */ 1244 int 1245 pcache_insert(pollstate_t *ps, file_t *fp, pollfd_t *pollfdp, int *fdcntp, 1246 ssize_t pos, int which) 1247 { 1248 pollcache_t *pcp = ps->ps_pcache; 1249 polldat_t *pdp; 1250 int error; 1251 int fd; 1252 pollhead_t *memphp = NULL; 1253 xref_t *refp; 1254 int newpollfd = 0; 1255 1256 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1257 /* 1258 * The poll caching uses the existing VOP_POLL interface. If there 1259 * is no polled events, we want the polled device to set its "some 1260 * one is sleeping in poll" flag. When the polled events happen 1261 * later, the driver will call pollwakeup(). We achieve this by 1262 * always passing 0 in the third parameter ("anyyet") when calling 1263 * VOP_POLL. This parameter is not looked at by drivers when the 1264 * polled events exist. If a driver chooses to ignore this parameter 1265 * and call pollwakeup whenever the polled events happen, that will 1266 * be OK too. 1267 */ 1268 ASSERT(curthread->t_pollcache == NULL); 1269 error = VOP_POLL(fp->f_vnode, pollfdp->events, 0, &pollfdp->revents, 1270 &memphp, NULL); 1271 if (error) { 1272 return (error); 1273 } 1274 if (pollfdp->revents) { 1275 (*fdcntp)++; 1276 } 1277 /* 1278 * polling the underlying device succeeded. Now we can cache it. 1279 * A close can't come in here because we have not done a releasef() 1280 * yet. 1281 */ 1282 fd = pollfdp->fd; 1283 pdp = pcache_lookup_fd(pcp, fd); 1284 if (pdp == NULL) { 1285 ASSERT(ps->ps_nsets > 0); 1286 pdp = pcache_alloc_fd(ps->ps_nsets); 1287 newpollfd = 1; 1288 } 1289 /* 1290 * If this entry was used to cache a poll fd which was closed, and 1291 * this entry has not been cleaned, do it now. 1292 */ 1293 if ((pdp->pd_count > 0) && (pdp->pd_fp == NULL)) { 1294 pcacheset_invalidate(ps, pdp); 1295 ASSERT(pdp->pd_next == NULL); 1296 } 1297 if (pdp->pd_count == 0) { 1298 pdp->pd_fd = fd; 1299 pdp->pd_fp = fp; 1300 addfpollinfo(fd); 1301 pdp->pd_thread = curthread; 1302 pdp->pd_pcache = pcp; 1303 /* 1304 * the entry is never used or cleared by removing a cached 1305 * pollfd (pcache_delete_fd). So all the fields should be clear. 1306 */ 1307 ASSERT(pdp->pd_next == NULL); 1308 } 1309 1310 /* 1311 * A polled fd is considered cached. So there should be a fpollinfo 1312 * entry on uf_fpollinfo list. 1313 */ 1314 ASSERT(infpollinfo(fd)); 1315 /* 1316 * If there is an inconsistency, we want to know it here. 1317 */ 1318 ASSERT(pdp->pd_fp == fp); 1319 1320 /* 1321 * XXX pd_events is a union of all polled events on this fd, possibly 1322 * by different threads. Unless this is a new first poll(), pd_events 1323 * never shrinks. If an event is no longer polled by a process, there 1324 * is no way to cancel that event. In that case, poll degrade to its 1325 * old form -- polling on this fd every time poll() is called. The 1326 * assumption is an app always polls the same type of events. 1327 */ 1328 pdp->pd_events |= pollfdp->events; 1329 1330 pdp->pd_count++; 1331 /* 1332 * There is not much special handling for multiple appearances of 1333 * same fd other than xf_position always recording the first 1334 * appearance in poll list. If this is called from pcacheset_cache_list, 1335 * a VOP_POLL is called on every pollfd entry; therefore each 1336 * revents and fdcnt should be set correctly. If this is called from 1337 * pcacheset_resolve, we don't care about fdcnt here. Pollreadmap will 1338 * pick up the right count and handle revents field of each pollfd 1339 * entry. 1340 */ 1341 ASSERT(pdp->pd_ref != NULL); 1342 refp = &pdp->pd_ref[which]; 1343 if (refp->xf_refcnt == 0) { 1344 refp->xf_position = pos; 1345 } else { 1346 /* 1347 * xf_position records the fd's first appearance in poll list 1348 */ 1349 if (pos < refp->xf_position) { 1350 refp->xf_position = pos; 1351 } 1352 } 1353 ASSERT(pollfdp->fd == ps->ps_pollfd[refp->xf_position].fd); 1354 refp->xf_refcnt++; 1355 if (fd >= pcp->pc_mapsize) { 1356 pcache_grow_map(pcp, fd); 1357 } 1358 if (fd > pcp->pc_mapend) { 1359 pcp->pc_mapend = fd; 1360 } 1361 if (newpollfd != 0) { 1362 pcache_insert_fd(ps->ps_pcache, pdp, ps->ps_nfds); 1363 } 1364 if (memphp) { 1365 if (pdp->pd_php == NULL) { 1366 pollhead_insert(memphp, pdp); 1367 pdp->pd_php = memphp; 1368 } else { 1369 if (memphp != pdp->pd_php) { 1370 /* 1371 * layered devices (e.g. console driver) 1372 * may change the vnode and thus the pollhead 1373 * pointer out from underneath us. 1374 */ 1375 pollhead_delete(pdp->pd_php, pdp); 1376 pollhead_insert(memphp, pdp); 1377 pdp->pd_php = memphp; 1378 } 1379 } 1380 } 1381 /* 1382 * Since there is a considerable window between VOP_POLL and when 1383 * we actually put the polldat struct on the pollhead list, we could 1384 * miss a pollwakeup. In the case of polling additional events, we 1385 * don't update the events until after VOP_POLL. So we could miss 1386 * pollwakeup there too. So we always set the bit here just to be 1387 * safe. The real performance gain is in subsequent pcache_poll. 1388 */ 1389 mutex_enter(&pcp->pc_lock); 1390 BT_SET(pcp->pc_bitmap, fd); 1391 mutex_exit(&pcp->pc_lock); 1392 return (0); 1393 } 1394 1395 /* 1396 * The entry is not really deleted. The fields are cleared so that the 1397 * entry is no longer useful, but it will remain in the hash table for reuse 1398 * later. It will be freed when the polling lwp exits. 1399 */ 1400 int 1401 pcache_delete_fd(pollstate_t *ps, int fd, size_t pos, int which, uint_t cevent) 1402 { 1403 pollcache_t *pcp = ps->ps_pcache; 1404 polldat_t *pdp; 1405 xref_t *refp; 1406 1407 ASSERT(fd < pcp->pc_mapsize); 1408 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1409 1410 pdp = pcache_lookup_fd(pcp, fd); 1411 ASSERT(pdp != NULL); 1412 ASSERT(pdp->pd_count > 0); 1413 ASSERT(pdp->pd_ref != NULL); 1414 refp = &pdp->pd_ref[which]; 1415 if (pdp->pd_count == 1) { 1416 pdp->pd_events = 0; 1417 refp->xf_position = POLLPOSINVAL; 1418 ASSERT(refp->xf_refcnt == 1); 1419 refp->xf_refcnt = 0; 1420 if (pdp->pd_php) { 1421 /* 1422 * It is possible for a wakeup thread to get ahead 1423 * of the following pollhead_delete and set the bit in 1424 * bitmap. It is OK because the bit will be cleared 1425 * here anyway. 1426 */ 1427 pollhead_delete(pdp->pd_php, pdp); 1428 pdp->pd_php = NULL; 1429 } 1430 pdp->pd_count = 0; 1431 if (pdp->pd_fp != NULL) { 1432 pdp->pd_fp = NULL; 1433 delfpollinfo(fd); 1434 } 1435 mutex_enter(&pcp->pc_lock); 1436 BT_CLEAR(pcp->pc_bitmap, fd); 1437 mutex_exit(&pcp->pc_lock); 1438 return (0); 1439 } 1440 if ((cevent & POLLCLOSED) == POLLCLOSED) { 1441 /* 1442 * fd cached here has been closed. This is the first 1443 * pcache_delete_fd called after the close. Clean up the 1444 * entire entry. 1445 */ 1446 pcacheset_invalidate(ps, pdp); 1447 ASSERT(pdp->pd_php == NULL); 1448 mutex_enter(&pcp->pc_lock); 1449 BT_CLEAR(pcp->pc_bitmap, fd); 1450 mutex_exit(&pcp->pc_lock); 1451 return (0); 1452 } 1453 #ifdef DEBUG 1454 if (getf(fd) != NULL) { 1455 ASSERT(infpollinfo(fd)); 1456 releasef(fd); 1457 } 1458 #endif /* DEBUG */ 1459 pdp->pd_count--; 1460 ASSERT(refp->xf_refcnt > 0); 1461 if (--refp->xf_refcnt == 0) { 1462 refp->xf_position = POLLPOSINVAL; 1463 } else { 1464 ASSERT(pos >= refp->xf_position); 1465 if (pos == refp->xf_position) { 1466 /* 1467 * The xref position is no longer valid. 1468 * Reset it to a special value and let 1469 * caller know it needs to updatexref() 1470 * with a new xf_position value. 1471 */ 1472 refp->xf_position = POLLPOSTRANS; 1473 return (1); 1474 } 1475 } 1476 return (0); 1477 } 1478 1479 void 1480 pcache_update_xref(pollcache_t *pcp, int fd, ssize_t pos, int which) 1481 { 1482 polldat_t *pdp; 1483 1484 pdp = pcache_lookup_fd(pcp, fd); 1485 ASSERT(pdp != NULL); 1486 ASSERT(pdp->pd_ref != NULL); 1487 pdp->pd_ref[which].xf_position = pos; 1488 } 1489 1490 #ifdef DEBUG 1491 /* 1492 * For each polled fd, it's either in the bitmap or cached in 1493 * pcache hash table. If this routine returns 0, something is wrong. 1494 */ 1495 static int 1496 pollchecksanity(pollstate_t *ps, nfds_t nfds) 1497 { 1498 int i; 1499 int fd; 1500 pollcache_t *pcp = ps->ps_pcache; 1501 polldat_t *pdp; 1502 pollfd_t *pollfdp = ps->ps_pollfd; 1503 file_t *fp; 1504 1505 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1506 for (i = 0; i < nfds; i++) { 1507 fd = pollfdp[i].fd; 1508 if (fd < 0) { 1509 ASSERT(pollfdp[i].revents == 0); 1510 continue; 1511 } 1512 if (pollfdp[i].revents == POLLNVAL) 1513 continue; 1514 if ((fp = getf(fd)) == NULL) 1515 continue; 1516 pdp = pcache_lookup_fd(pcp, fd); 1517 ASSERT(pdp != NULL); 1518 ASSERT(infpollinfo(fd)); 1519 ASSERT(pdp->pd_fp == fp); 1520 releasef(fd); 1521 if (BT_TEST(pcp->pc_bitmap, fd)) 1522 continue; 1523 if (pdp->pd_php == NULL) 1524 return (0); 1525 } 1526 return (1); 1527 } 1528 #endif /* DEBUG */ 1529 1530 /* 1531 * resolve the difference between the current poll list and a cached one. 1532 */ 1533 int 1534 pcacheset_resolve(pollstate_t *ps, nfds_t nfds, int *fdcntp, int which) 1535 { 1536 int i; 1537 pollcache_t *pcp = ps->ps_pcache; 1538 pollfd_t *newlist = NULL; 1539 pollfd_t *current = ps->ps_pollfd; 1540 pollfd_t *cached; 1541 pollcacheset_t *pcsp; 1542 int common; 1543 int count = 0; 1544 int offset; 1545 int remain; 1546 int fd; 1547 file_t *fp; 1548 int fdcnt = 0; 1549 int cnt = 0; 1550 nfds_t old_nfds; 1551 int error = 0; 1552 int mismatch = 0; 1553 1554 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1555 #ifdef DEBUG 1556 checkpolldat(ps); 1557 #endif 1558 pcsp = &ps->ps_pcacheset[which]; 1559 old_nfds = pcsp->pcs_nfds; 1560 common = (nfds > old_nfds) ? old_nfds : nfds; 1561 if (nfds != old_nfds) { 1562 /* 1563 * the length of poll list has changed. allocate a new 1564 * pollfd list. 1565 */ 1566 newlist = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); 1567 bcopy(current, newlist, sizeof (pollfd_t) * nfds); 1568 } 1569 /* 1570 * Compare the overlapping part of the current fd list with the 1571 * cached one. Whenever a difference is found, resolve it. 1572 * The comparison is done on the current poll list and the 1573 * cached list. But we may be setting up the newlist to be the 1574 * cached list for next poll. 1575 */ 1576 cached = pcsp->pcs_pollfd; 1577 remain = common; 1578 1579 while (count < common) { 1580 int tmpfd; 1581 pollfd_t *np; 1582 1583 np = (newlist != NULL) ? &newlist[count] : NULL; 1584 offset = pcacheset_cmp(¤t[count], &cached[count], np, 1585 remain); 1586 /* 1587 * Collect stats. If lists are completed the first time, 1588 * it's a hit. Otherwise, it's a partial hit or miss. 1589 */ 1590 if ((count == 0) && (offset == common)) { 1591 pollstats.pollcachehit.value.ui64++; 1592 } else { 1593 mismatch++; 1594 } 1595 count += offset; 1596 if (offset < remain) { 1597 ASSERT(count < common); 1598 ASSERT((current[count].fd != cached[count].fd) || 1599 (current[count].events != cached[count].events)); 1600 /* 1601 * Filter out invalid events. 1602 */ 1603 if (current[count].events & ~VALID_POLL_EVENTS) { 1604 if (newlist != NULL) { 1605 newlist[count].events = 1606 current[count].events &= 1607 VALID_POLL_EVENTS; 1608 } else { 1609 current[count].events &= 1610 VALID_POLL_EVENTS; 1611 } 1612 } 1613 /* 1614 * when resolving a difference, we always remove the 1615 * fd from cache before inserting one into cache. 1616 */ 1617 if (cached[count].fd >= 0) { 1618 tmpfd = cached[count].fd; 1619 if (pcache_delete_fd(ps, tmpfd, count, which, 1620 (uint_t)cached[count].events)) { 1621 /* 1622 * This should be rare but needed for 1623 * correctness. 1624 * 1625 * The first appearance in cached list 1626 * is being "turned off". The same fd 1627 * appear more than once in the cached 1628 * poll list. Find the next one on the 1629 * list and update the cached 1630 * xf_position field. 1631 */ 1632 for (i = count + 1; i < old_nfds; i++) { 1633 if (cached[i].fd == tmpfd) { 1634 pcache_update_xref(pcp, 1635 tmpfd, (ssize_t)i, 1636 which); 1637 break; 1638 } 1639 } 1640 ASSERT(i <= old_nfds); 1641 } 1642 /* 1643 * In case a new cache list is allocated, 1644 * need to keep both cache lists in sync 1645 * b/c the new one can be freed if we have 1646 * an error later. 1647 */ 1648 cached[count].fd = -1; 1649 if (newlist != NULL) { 1650 newlist[count].fd = -1; 1651 } 1652 } 1653 if ((tmpfd = current[count].fd) >= 0) { 1654 /* 1655 * add to the cached fd tbl and bitmap. 1656 */ 1657 if ((fp = getf(tmpfd)) == NULL) { 1658 current[count].revents = POLLNVAL; 1659 if (newlist != NULL) { 1660 newlist[count].fd = -1; 1661 } 1662 cached[count].fd = -1; 1663 fdcnt++; 1664 } else { 1665 /* 1666 * Here we don't care about the 1667 * fdcnt. We will examine the bitmap 1668 * later and pick up the correct 1669 * fdcnt there. So we never bother 1670 * to check value of 'cnt'. 1671 */ 1672 error = pcache_insert(ps, fp, 1673 ¤t[count], &cnt, 1674 (ssize_t)count, which); 1675 /* 1676 * if no error, we want to do releasef 1677 * after we updated cache poll list 1678 * entry so that close() won't race 1679 * us. 1680 */ 1681 if (error) { 1682 /* 1683 * If we encountered an error, 1684 * we have invalidated an 1685 * entry in cached poll list 1686 * (in pcache_delete_fd() above) 1687 * but failed to add one here. 1688 * This is OK b/c what's in the 1689 * cached list is consistent 1690 * with content of cache. 1691 * It will not have any ill 1692 * effect on next poll(). 1693 */ 1694 releasef(tmpfd); 1695 if (newlist != NULL) { 1696 kmem_free(newlist, 1697 nfds * 1698 sizeof (pollfd_t)); 1699 } 1700 return (error); 1701 } 1702 /* 1703 * If we have allocated a new(temp) 1704 * cache list, we need to keep both 1705 * in sync b/c the new one can be freed 1706 * if we have an error later. 1707 */ 1708 if (newlist != NULL) { 1709 newlist[count].fd = 1710 current[count].fd; 1711 newlist[count].events = 1712 current[count].events; 1713 } 1714 cached[count].fd = current[count].fd; 1715 cached[count].events = 1716 current[count].events; 1717 releasef(tmpfd); 1718 } 1719 } else { 1720 current[count].revents = 0; 1721 } 1722 count++; 1723 remain = common - count; 1724 } 1725 } 1726 if (mismatch != 0) { 1727 if (mismatch == common) { 1728 pollstats.pollcachemiss.value.ui64++; 1729 } else { 1730 pollstats.pollcachephit.value.ui64++; 1731 } 1732 } 1733 /* 1734 * take care of the non overlapping part of a list 1735 */ 1736 if (nfds > old_nfds) { 1737 ASSERT(newlist != NULL); 1738 for (i = old_nfds; i < nfds; i++) { 1739 /* filter out invalid events */ 1740 if (current[i].events & ~VALID_POLL_EVENTS) { 1741 newlist[i].events = current[i].events = 1742 current[i].events & VALID_POLL_EVENTS; 1743 } 1744 if ((fd = current[i].fd) < 0) { 1745 current[i].revents = 0; 1746 continue; 1747 } 1748 /* 1749 * add to the cached fd tbl and bitmap. 1750 */ 1751 if ((fp = getf(fd)) == NULL) { 1752 current[i].revents = POLLNVAL; 1753 newlist[i].fd = -1; 1754 fdcnt++; 1755 continue; 1756 } 1757 /* 1758 * Here we don't care about the 1759 * fdcnt. We will examine the bitmap 1760 * later and pick up the correct 1761 * fdcnt there. So we never bother to 1762 * check 'cnt'. 1763 */ 1764 error = pcache_insert(ps, fp, ¤t[i], &cnt, 1765 (ssize_t)i, which); 1766 releasef(fd); 1767 if (error) { 1768 /* 1769 * Here we are half way through adding newly 1770 * polled fd. Undo enough to keep the cache 1771 * list consistent with the cache content. 1772 */ 1773 pcacheset_remove_list(ps, current, old_nfds, 1774 i, which, 0); 1775 kmem_free(newlist, nfds * sizeof (pollfd_t)); 1776 return (error); 1777 } 1778 } 1779 } 1780 if (old_nfds > nfds) { 1781 /* 1782 * remove the fd's which are no longer polled. 1783 */ 1784 pcacheset_remove_list(ps, pcsp->pcs_pollfd, nfds, old_nfds, 1785 which, 1); 1786 } 1787 /* 1788 * set difference resolved. update nfds and cachedlist 1789 * in pollstate struct. 1790 */ 1791 if (newlist != NULL) { 1792 kmem_free(pcsp->pcs_pollfd, old_nfds * sizeof (pollfd_t)); 1793 /* 1794 * By now, the pollfd.revents field should 1795 * all be zeroed. 1796 */ 1797 pcsp->pcs_pollfd = newlist; 1798 pcsp->pcs_nfds = nfds; 1799 } 1800 ASSERT(*fdcntp == 0); 1801 *fdcntp = fdcnt; 1802 /* 1803 * By now for every fd in pollfdp, one of the following should be 1804 * true. Otherwise we will miss a polled event. 1805 * 1806 * 1. the bit corresponding to the fd in bitmap is set. So VOP_POLL 1807 * will be called on this fd in next poll. 1808 * 2. the fd is cached in the pcache (i.e. pd_php is set). So 1809 * pollnotify will happen. 1810 */ 1811 ASSERT(pollchecksanity(ps, nfds)); 1812 /* 1813 * make sure cross reference between cached poll lists and cached 1814 * poll fds are correct. 1815 */ 1816 ASSERT(pollcheckxref(ps, which)); 1817 /* 1818 * ensure each polldat in pollcache reference a polled fd in 1819 * pollcacheset. 1820 */ 1821 #ifdef DEBUG 1822 checkpolldat(ps); 1823 #endif 1824 return (0); 1825 } 1826 1827 #ifdef DEBUG 1828 static int 1829 pollscanrevents(pollcache_t *pcp, pollfd_t *pollfdp, nfds_t nfds) 1830 { 1831 int i; 1832 int reventcnt = 0; 1833 1834 for (i = 0; i < nfds; i++) { 1835 if (pollfdp[i].fd < 0) { 1836 ASSERT(pollfdp[i].revents == 0); 1837 continue; 1838 } 1839 if (pollfdp[i].revents) { 1840 reventcnt++; 1841 } 1842 if (pollfdp[i].revents && (pollfdp[i].revents != POLLNVAL)) { 1843 ASSERT(BT_TEST(pcp->pc_bitmap, pollfdp[i].fd)); 1844 } 1845 } 1846 return (reventcnt); 1847 } 1848 #endif /* DEBUG */ 1849 1850 /* 1851 * read the bitmap and poll on fds corresponding to the '1' bits. The ps_lock 1852 * is held upon entry. 1853 */ 1854 int 1855 pcache_poll(pollfd_t *pollfdp, pollstate_t *ps, nfds_t nfds, int *fdcntp, 1856 int which) 1857 { 1858 int i; 1859 pollcache_t *pcp; 1860 int fd; 1861 int begin, end, done; 1862 pollhead_t *php; 1863 int fdcnt; 1864 int error = 0; 1865 file_t *fp; 1866 polldat_t *pdp; 1867 xref_t *refp; 1868 int entry; 1869 1870 pcp = ps->ps_pcache; 1871 ASSERT(MUTEX_HELD(&ps->ps_lock)); 1872 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 1873 retry: 1874 done = 0; 1875 begin = 0; 1876 fdcnt = 0; 1877 end = pcp->pc_mapend; 1878 while ((fdcnt < nfds) && !done) { 1879 php = NULL; 1880 /* 1881 * only poll fds which may have events 1882 */ 1883 fd = bt_getlowbit(pcp->pc_bitmap, begin, end); 1884 ASSERT(fd <= end); 1885 if (fd >= 0) { 1886 ASSERT(pollcheckrevents(ps, begin, fd, which)); 1887 /* 1888 * adjust map pointers for next round 1889 */ 1890 if (fd == end) { 1891 done = 1; 1892 } else { 1893 begin = fd + 1; 1894 } 1895 /* 1896 * A bitmap caches poll state information of 1897 * multiple poll lists. Call VOP_POLL only if 1898 * the bit corresponds to an fd in this poll 1899 * list. 1900 */ 1901 pdp = pcache_lookup_fd(pcp, fd); 1902 ASSERT(pdp != NULL); 1903 ASSERT(pdp->pd_ref != NULL); 1904 refp = &pdp->pd_ref[which]; 1905 if (refp->xf_refcnt == 0) 1906 continue; 1907 entry = refp->xf_position; 1908 ASSERT((entry >= 0) && (entry < nfds)); 1909 ASSERT(pollfdp[entry].fd == fd); 1910 /* 1911 * we are in this routine implies that we have 1912 * successfully polled this fd in the past. 1913 * Check to see this fd is closed while we are 1914 * blocked in poll. This ensures that we don't 1915 * miss a close on the fd in the case this fd is 1916 * reused. 1917 */ 1918 if (pdp->pd_fp == NULL) { 1919 ASSERT(pdp->pd_count > 0); 1920 pollfdp[entry].revents = POLLNVAL; 1921 fdcnt++; 1922 if (refp->xf_refcnt > 1) { 1923 /* 1924 * this fd appeared multiple time 1925 * in the poll list. Find all of them. 1926 */ 1927 for (i = entry + 1; i < nfds; i++) { 1928 if (pollfdp[i].fd == fd) { 1929 pollfdp[i].revents = 1930 POLLNVAL; 1931 fdcnt++; 1932 } 1933 } 1934 } 1935 pcacheset_invalidate(ps, pdp); 1936 continue; 1937 } 1938 /* 1939 * We can be here polling a device that is being 1940 * closed (i.e. the file pointer is set to NULL, 1941 * but pollcacheclean has not happened yet). 1942 */ 1943 if ((fp = getf(fd)) == NULL) { 1944 pollfdp[entry].revents = POLLNVAL; 1945 fdcnt++; 1946 if (refp->xf_refcnt > 1) { 1947 /* 1948 * this fd appeared multiple time 1949 * in the poll list. Find all of them. 1950 */ 1951 for (i = entry + 1; i < nfds; i++) { 1952 if (pollfdp[i].fd == fd) { 1953 pollfdp[i].revents = 1954 POLLNVAL; 1955 fdcnt++; 1956 } 1957 } 1958 } 1959 continue; 1960 } 1961 ASSERT(pdp->pd_fp == fp); 1962 ASSERT(infpollinfo(fd)); 1963 /* 1964 * Since we no longer hold poll head lock across 1965 * VOP_POLL, pollunlock logic can be simplifed. 1966 */ 1967 ASSERT(pdp->pd_php == NULL || 1968 MUTEX_NOT_HELD(PHLOCK(pdp->pd_php))); 1969 /* 1970 * underlying file systems may set a "pollpending" 1971 * flag when it sees the poll may block. Pollwakeup() 1972 * is called by wakeup thread if pollpending is set. 1973 * Pass a 0 fdcnt so that the underlying file system 1974 * will set the "pollpending" flag set when there is 1975 * no polled events. 1976 * 1977 * Use pollfdp[].events for actual polling because 1978 * the pd_events is union of all cached poll events 1979 * on this fd. The events parameter also affects 1980 * how the polled device sets the "poll pending" 1981 * flag. 1982 */ 1983 ASSERT(curthread->t_pollcache == NULL); 1984 error = VOP_POLL(fp->f_vnode, pollfdp[entry].events, 0, 1985 &pollfdp[entry].revents, &php, NULL); 1986 /* 1987 * releasef after completely done with this cached 1988 * poll entry. To prevent close() coming in to clear 1989 * this entry. 1990 */ 1991 if (error) { 1992 releasef(fd); 1993 break; 1994 } 1995 /* 1996 * layered devices (e.g. console driver) 1997 * may change the vnode and thus the pollhead 1998 * pointer out from underneath us. 1999 */ 2000 if (php != NULL && pdp->pd_php != NULL && 2001 php != pdp->pd_php) { 2002 releasef(fd); 2003 pollhead_delete(pdp->pd_php, pdp); 2004 pdp->pd_php = php; 2005 pollhead_insert(php, pdp); 2006 /* 2007 * We could have missed a wakeup on the new 2008 * target device. Make sure the new target 2009 * gets polled once. 2010 */ 2011 BT_SET(pcp->pc_bitmap, fd); 2012 goto retry; 2013 } 2014 2015 if (pollfdp[entry].revents) { 2016 ASSERT(refp->xf_refcnt >= 1); 2017 fdcnt++; 2018 if (refp->xf_refcnt > 1) { 2019 /* 2020 * this fd appeared multiple time 2021 * in the poll list. This is rare but 2022 * we have to look at all of them for 2023 * correctness. 2024 */ 2025 error = plist_chkdupfd(fp, pdp, ps, 2026 pollfdp, entry, &fdcnt); 2027 if (error > 0) { 2028 releasef(fd); 2029 break; 2030 } 2031 if (error < 0) { 2032 goto retry; 2033 } 2034 } 2035 releasef(fd); 2036 } else { 2037 /* 2038 * VOP_POLL didn't return any revents. We can 2039 * clear the bit in bitmap only if we have the 2040 * pollhead ptr cached and no other cached 2041 * entry is polling different events on this fd. 2042 * VOP_POLL may have dropped the ps_lock. Make 2043 * sure pollwakeup has not happened before clear 2044 * the bit. 2045 */ 2046 if ((pdp->pd_php != NULL) && 2047 (pollfdp[entry].events == pdp->pd_events) && 2048 ((pcp->pc_flag & T_POLLWAKE) == 0)) { 2049 BT_CLEAR(pcp->pc_bitmap, fd); 2050 } 2051 /* 2052 * if the fd can be cached now but not before, 2053 * do it now. 2054 */ 2055 if ((pdp->pd_php == NULL) && (php != NULL)) { 2056 pdp->pd_php = php; 2057 pollhead_insert(php, pdp); 2058 /* 2059 * We are inserting a polldat struct for 2060 * the first time. We may have missed a 2061 * wakeup on this device. Re-poll once. 2062 * This should be a rare event. 2063 */ 2064 releasef(fd); 2065 goto retry; 2066 } 2067 if (refp->xf_refcnt > 1) { 2068 /* 2069 * this fd appeared multiple time 2070 * in the poll list. This is rare but 2071 * we have to look at all of them for 2072 * correctness. 2073 */ 2074 error = plist_chkdupfd(fp, pdp, ps, 2075 pollfdp, entry, &fdcnt); 2076 if (error > 0) { 2077 releasef(fd); 2078 break; 2079 } 2080 if (error < 0) { 2081 goto retry; 2082 } 2083 } 2084 releasef(fd); 2085 } 2086 } else { 2087 done = 1; 2088 ASSERT(pollcheckrevents(ps, begin, end + 1, which)); 2089 } 2090 } 2091 if (!error) { 2092 ASSERT(*fdcntp + fdcnt == pollscanrevents(pcp, pollfdp, nfds)); 2093 *fdcntp += fdcnt; 2094 } 2095 return (error); 2096 } 2097 2098 /* 2099 * Going through the poll list without much locking. Poll all fds and 2100 * cache all valid fds in the pollcache. 2101 */ 2102 int 2103 pcacheset_cache_list(pollstate_t *ps, pollfd_t *fds, int *fdcntp, int which) 2104 { 2105 pollfd_t *pollfdp = ps->ps_pollfd; 2106 pollcacheset_t *pcacheset = ps->ps_pcacheset; 2107 pollfd_t *newfdlist; 2108 int i; 2109 int fd; 2110 file_t *fp; 2111 int error = 0; 2112 2113 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2114 ASSERT(which < ps->ps_nsets); 2115 ASSERT(pcacheset != NULL); 2116 ASSERT(pcacheset[which].pcs_pollfd == NULL); 2117 newfdlist = kmem_alloc(ps->ps_nfds * sizeof (pollfd_t), KM_SLEEP); 2118 /* 2119 * cache the new poll list in pollcachset. 2120 */ 2121 bcopy(pollfdp, newfdlist, sizeof (pollfd_t) * ps->ps_nfds); 2122 2123 pcacheset[which].pcs_pollfd = newfdlist; 2124 pcacheset[which].pcs_nfds = ps->ps_nfds; 2125 pcacheset[which].pcs_usradr = (uintptr_t)fds; 2126 2127 /* 2128 * We have saved a copy of current poll fd list in one pollcacheset. 2129 * The 'revents' field of the new list is not yet set to 0. Loop 2130 * through the new list just to do that is expensive. We do that 2131 * while polling the list. 2132 */ 2133 for (i = 0; i < ps->ps_nfds; i++) { 2134 fd = pollfdp[i].fd; 2135 /* 2136 * We also filter out the illegal poll events in the event 2137 * field for the cached poll list/set. 2138 */ 2139 if (pollfdp[i].events & ~VALID_POLL_EVENTS) { 2140 newfdlist[i].events = pollfdp[i].events = 2141 pollfdp[i].events & VALID_POLL_EVENTS; 2142 } 2143 if (fd < 0) { 2144 pollfdp[i].revents = 0; 2145 continue; 2146 } 2147 if ((fp = getf(fd)) == NULL) { 2148 pollfdp[i].revents = POLLNVAL; 2149 /* 2150 * invalidate this cache entry in the cached poll list 2151 */ 2152 newfdlist[i].fd = -1; 2153 (*fdcntp)++; 2154 continue; 2155 } 2156 /* 2157 * cache this fd. 2158 */ 2159 error = pcache_insert(ps, fp, &pollfdp[i], fdcntp, (ssize_t)i, 2160 which); 2161 releasef(fd); 2162 if (error) { 2163 /* 2164 * Here we are half way through caching a new 2165 * poll list. Undo every thing. 2166 */ 2167 pcacheset_remove_list(ps, pollfdp, 0, i, which, 0); 2168 kmem_free(newfdlist, ps->ps_nfds * sizeof (pollfd_t)); 2169 pcacheset[which].pcs_pollfd = NULL; 2170 pcacheset[which].pcs_usradr = NULL; 2171 break; 2172 } 2173 } 2174 return (error); 2175 } 2176 2177 /* 2178 * called by pollcacheclean() to set the fp NULL. It also sets polled events 2179 * in pcacheset entries to a special events 'POLLCLOSED'. Do a pollwakeup to 2180 * wake any sleeping poller, then remove the polldat from the driver. 2181 * The routine is called with ps_pcachelock held. 2182 */ 2183 void 2184 pcache_clean_entry(pollstate_t *ps, int fd) 2185 { 2186 pollcache_t *pcp; 2187 polldat_t *pdp; 2188 int i; 2189 2190 ASSERT(ps != NULL); 2191 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2192 pcp = ps->ps_pcache; 2193 ASSERT(pcp); 2194 pdp = pcache_lookup_fd(pcp, fd); 2195 ASSERT(pdp != NULL); 2196 /* 2197 * the corresponding fpollinfo in fi_list has been removed by 2198 * a close on this fd. Reset the cached fp ptr here. 2199 */ 2200 pdp->pd_fp = NULL; 2201 /* 2202 * XXX - This routine also touches data in pcacheset struct. 2203 * 2204 * set the event in cached poll lists to POLLCLOSED. This invalidate 2205 * the cached poll fd entry in that poll list, which will force a 2206 * removal of this cached entry in next poll(). The cleanup is done 2207 * at the removal time. 2208 */ 2209 ASSERT(pdp->pd_ref != NULL); 2210 for (i = 0; i < ps->ps_nsets; i++) { 2211 xref_t *refp; 2212 pollcacheset_t *pcsp; 2213 2214 refp = &pdp->pd_ref[i]; 2215 if (refp->xf_refcnt) { 2216 ASSERT(refp->xf_position >= 0); 2217 pcsp = &ps->ps_pcacheset[i]; 2218 if (refp->xf_refcnt == 1) { 2219 pcsp->pcs_pollfd[refp->xf_position].events = 2220 (short)POLLCLOSED; 2221 } 2222 if (refp->xf_refcnt > 1) { 2223 int j; 2224 /* 2225 * mark every matching entry in pcs_pollfd 2226 */ 2227 for (j = refp->xf_position; 2228 j < pcsp->pcs_nfds; j++) { 2229 if (pcsp->pcs_pollfd[j].fd == fd) { 2230 pcsp->pcs_pollfd[j].events = 2231 (short)POLLCLOSED; 2232 } 2233 } 2234 } 2235 } 2236 } 2237 if (pdp->pd_php) { 2238 pollwakeup(pdp->pd_php, POLLHUP); 2239 pollhead_delete(pdp->pd_php, pdp); 2240 pdp->pd_php = NULL; 2241 } 2242 } 2243 2244 /* 2245 * This is the first time this thread has ever polled, 2246 * so we have to create its pollstate structure. 2247 * This will persist for the life of the thread, 2248 * until it calls pollcleanup(). 2249 */ 2250 pollstate_t * 2251 pollstate_create(void) 2252 { 2253 pollstate_t *ps; 2254 2255 ps = kmem_zalloc(sizeof (pollstate_t), KM_SLEEP); 2256 ps->ps_nsets = POLLFDSETS; 2257 ps->ps_pcacheset = pcacheset_create(ps->ps_nsets); 2258 return (ps); 2259 } 2260 2261 void 2262 pollstate_destroy(pollstate_t *ps) 2263 { 2264 if (ps->ps_pollfd != NULL) { 2265 kmem_free(ps->ps_pollfd, ps->ps_nfds * sizeof (pollfd_t)); 2266 ps->ps_pollfd = NULL; 2267 } 2268 if (ps->ps_pcache != NULL) { 2269 pcache_destroy(ps->ps_pcache); 2270 ps->ps_pcache = NULL; 2271 } 2272 pcacheset_destroy(ps->ps_pcacheset, ps->ps_nsets); 2273 ps->ps_pcacheset = NULL; 2274 if (ps->ps_dpbuf != NULL) { 2275 kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize * sizeof (pollfd_t)); 2276 ps->ps_dpbuf = NULL; 2277 } 2278 mutex_destroy(&ps->ps_lock); 2279 kmem_free(ps, sizeof (pollstate_t)); 2280 } 2281 2282 /* 2283 * We are holding the appropriate uf_lock entering this routine. 2284 * Bump up the ps_busy count to prevent the thread from exiting. 2285 */ 2286 void 2287 pollblockexit(fpollinfo_t *fpip) 2288 { 2289 for (; fpip; fpip = fpip->fp_next) { 2290 pollcache_t *pcp = fpip->fp_thread->t_pollstate->ps_pcache; 2291 2292 mutex_enter(&pcp->pc_no_exit); 2293 pcp->pc_busy++; /* prevents exit()'s */ 2294 mutex_exit(&pcp->pc_no_exit); 2295 } 2296 } 2297 2298 /* 2299 * Complete phase 2 of cached poll fd cleanup. Call pcache_clean_entry to mark 2300 * the pcacheset events field POLLCLOSED to force the next poll() to remove 2301 * this cache entry. We can't clean the polldat entry clean up here because 2302 * lwp block in poll() needs the info to return. Wakeup anyone blocked in 2303 * poll and let exiting lwp go. No lock is help upon entry. So it's OK for 2304 * pcache_clean_entry to call pollwakeup(). 2305 */ 2306 void 2307 pollcacheclean(fpollinfo_t *fip, int fd) 2308 { 2309 struct fpollinfo *fpip, *fpip2; 2310 2311 fpip = fip; 2312 while (fpip) { 2313 pollstate_t *ps = fpip->fp_thread->t_pollstate; 2314 pollcache_t *pcp = ps->ps_pcache; 2315 2316 mutex_enter(&ps->ps_lock); 2317 pcache_clean_entry(ps, fd); 2318 mutex_exit(&ps->ps_lock); 2319 mutex_enter(&pcp->pc_no_exit); 2320 pcp->pc_busy--; 2321 if (pcp->pc_busy == 0) { 2322 /* 2323 * Wakeup the thread waiting in 2324 * thread_exit(). 2325 */ 2326 cv_signal(&pcp->pc_busy_cv); 2327 } 2328 mutex_exit(&pcp->pc_no_exit); 2329 2330 fpip2 = fpip; 2331 fpip = fpip->fp_next; 2332 kmem_free(fpip2, sizeof (fpollinfo_t)); 2333 } 2334 } 2335 2336 /* 2337 * one of the cache line's counter is wrapping around. Reset all cache line 2338 * counters to zero except one. This is simplistic, but probably works 2339 * effectively. 2340 */ 2341 void 2342 pcacheset_reset_count(pollstate_t *ps, int index) 2343 { 2344 int i; 2345 2346 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2347 for (i = 0; i < ps->ps_nsets; i++) { 2348 if (ps->ps_pcacheset[i].pcs_pollfd != NULL) { 2349 ps->ps_pcacheset[i].pcs_count = 0; 2350 } 2351 } 2352 ps->ps_pcacheset[index].pcs_count = 1; 2353 } 2354 2355 /* 2356 * this routine implements poll cache list replacement policy. 2357 * It is currently choose the "least used". 2358 */ 2359 int 2360 pcacheset_replace(pollstate_t *ps) 2361 { 2362 int i; 2363 int index = 0; 2364 2365 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2366 for (i = 1; i < ps->ps_nsets; i++) { 2367 if (ps->ps_pcacheset[index].pcs_count > 2368 ps->ps_pcacheset[i].pcs_count) { 2369 index = i; 2370 } 2371 } 2372 ps->ps_pcacheset[index].pcs_count = 0; 2373 return (index); 2374 } 2375 2376 /* 2377 * this routine is called by strclose to remove remaining polldat struct on 2378 * the pollhead list of the device being closed. There are two reasons as why 2379 * the polldat structures still remain on the pollhead list: 2380 * 2381 * (1) The layered device(e.g.the console driver). 2382 * In this case, the existence of a polldat implies that the thread putting 2383 * the polldat on this list has not exited yet. Before the thread exits, it 2384 * will have to hold this pollhead lock to remove the polldat. So holding the 2385 * pollhead lock here effectively prevents the thread which put the polldat 2386 * on this list from exiting. 2387 * 2388 * (2) /dev/poll. 2389 * When a polled fd is cached in /dev/poll, its polldat will remain on the 2390 * pollhead list if the process has not done a POLLREMOVE before closing the 2391 * polled fd. We just unlink it here. 2392 */ 2393 void 2394 pollhead_clean(pollhead_t *php) 2395 { 2396 polldat_t *pdp; 2397 2398 /* 2399 * In case(1), while we must prevent the thread in question from 2400 * exiting, we must also obey the proper locking order, i.e. 2401 * (ps_lock -> phlock). 2402 */ 2403 PH_ENTER(php); 2404 while (php->ph_list != NULL) { 2405 pollstate_t *ps; 2406 pollcache_t *pcp; 2407 2408 pdp = php->ph_list; 2409 ASSERT(pdp->pd_php == php); 2410 if (pdp->pd_thread == NULL) { 2411 /* 2412 * This is case(2). Since the ph_lock is sufficient 2413 * to synchronize this lwp with any other /dev/poll 2414 * lwp, just unlink the polldat. 2415 */ 2416 php->ph_list = pdp->pd_next; 2417 pdp->pd_php = NULL; 2418 pdp->pd_next = NULL; 2419 continue; 2420 } 2421 ps = pdp->pd_thread->t_pollstate; 2422 ASSERT(ps != NULL); 2423 pcp = pdp->pd_pcache; 2424 ASSERT(pcp != NULL); 2425 mutex_enter(&pcp->pc_no_exit); 2426 pcp->pc_busy++; /* prevents exit()'s */ 2427 mutex_exit(&pcp->pc_no_exit); 2428 /* 2429 * Now get the locks in proper order to avoid deadlock. 2430 */ 2431 PH_EXIT(php); 2432 mutex_enter(&ps->ps_lock); 2433 /* 2434 * while we dropped the pollhead lock, the element could be 2435 * taken off the list already. 2436 */ 2437 PH_ENTER(php); 2438 if (pdp->pd_php == php) { 2439 ASSERT(pdp == php->ph_list); 2440 php->ph_list = pdp->pd_next; 2441 pdp->pd_php = NULL; 2442 pdp->pd_next = NULL; 2443 } 2444 PH_EXIT(php); 2445 mutex_exit(&ps->ps_lock); 2446 mutex_enter(&pcp->pc_no_exit); 2447 pcp->pc_busy--; 2448 if (pcp->pc_busy == 0) { 2449 /* 2450 * Wakeup the thread waiting in 2451 * thread_exit(). 2452 */ 2453 cv_signal(&pcp->pc_busy_cv); 2454 } 2455 mutex_exit(&pcp->pc_no_exit); 2456 PH_ENTER(php); 2457 } 2458 PH_EXIT(php); 2459 } 2460 2461 /* 2462 * The remove_list is called to cleanup a partially cached 'current' list or 2463 * to remove a partial list which is no longer cached. The flag value of 1 2464 * indicates the second case. 2465 */ 2466 void 2467 pcacheset_remove_list(pollstate_t *ps, pollfd_t *pollfdp, int start, int end, 2468 int cacheindex, int flag) 2469 { 2470 int i; 2471 2472 ASSERT(MUTEX_HELD(&ps->ps_lock)); 2473 for (i = start; i < end; i++) { 2474 if ((pollfdp[i].fd >= 0) && 2475 (flag || !(pollfdp[i].revents & POLLNVAL))) { 2476 if (pcache_delete_fd(ps, pollfdp[i].fd, i, cacheindex, 2477 (uint_t)pollfdp[i].events)) { 2478 int j; 2479 int fd = pollfdp[i].fd; 2480 2481 for (j = i + 1; j < end; j++) { 2482 if (pollfdp[j].fd == fd) { 2483 pcache_update_xref( 2484 ps->ps_pcache, fd, 2485 (ssize_t)j, cacheindex); 2486 break; 2487 } 2488 } 2489 ASSERT(j <= end); 2490 } 2491 } 2492 } 2493 } 2494 2495 #ifdef DEBUG 2496 2497 #include<sys/strsubr.h> 2498 /* 2499 * make sure curthread is not on anyone's pollhead list any more. 2500 */ 2501 static void 2502 pollcheckphlist() 2503 { 2504 int i; 2505 file_t *fp; 2506 uf_entry_t *ufp; 2507 uf_info_t *fip = P_FINFO(curproc); 2508 struct stdata *stp; 2509 polldat_t *pdp; 2510 2511 mutex_enter(&fip->fi_lock); 2512 for (i = 0; i < fip->fi_nfiles; i++) { 2513 UF_ENTER(ufp, fip, i); 2514 if ((fp = ufp->uf_file) != NULL) { 2515 if ((stp = fp->f_vnode->v_stream) != NULL) { 2516 PH_ENTER(&stp->sd_pollist); 2517 pdp = stp->sd_pollist.ph_list; 2518 while (pdp) { 2519 ASSERT(pdp->pd_thread != curthread); 2520 pdp = pdp->pd_next; 2521 } 2522 PH_EXIT(&stp->sd_pollist); 2523 } 2524 } 2525 UF_EXIT(ufp); 2526 } 2527 mutex_exit(&fip->fi_lock); 2528 } 2529 2530 /* 2531 * for resolved set poll list, the xref info in the pcache should be 2532 * consistent with this poll list. 2533 */ 2534 static int 2535 pollcheckxref(pollstate_t *ps, int cacheindex) 2536 { 2537 pollfd_t *pollfdp = ps->ps_pcacheset[cacheindex].pcs_pollfd; 2538 pollcache_t *pcp = ps->ps_pcache; 2539 polldat_t *pdp; 2540 int i; 2541 xref_t *refp; 2542 2543 for (i = 0; i < ps->ps_pcacheset[cacheindex].pcs_nfds; i++) { 2544 if (pollfdp[i].fd < 0) { 2545 continue; 2546 } 2547 pdp = pcache_lookup_fd(pcp, pollfdp[i].fd); 2548 ASSERT(pdp != NULL); 2549 ASSERT(pdp->pd_ref != NULL); 2550 refp = &pdp->pd_ref[cacheindex]; 2551 if (refp->xf_position >= 0) { 2552 ASSERT(refp->xf_refcnt >= 1); 2553 ASSERT(pollfdp[refp->xf_position].fd == pdp->pd_fd); 2554 if (refp->xf_refcnt > 1) { 2555 int j; 2556 int count = 0; 2557 2558 for (j = refp->xf_position; 2559 j < ps->ps_pcacheset[cacheindex].pcs_nfds; 2560 j++) { 2561 if (pollfdp[j].fd == pdp->pd_fd) { 2562 count++; 2563 } 2564 } 2565 ASSERT(count == refp->xf_refcnt); 2566 } 2567 } 2568 } 2569 return (1); 2570 } 2571 2572 /* 2573 * For every cached pollfd, its polldat struct should be consistent with 2574 * what is in the pcacheset lists. 2575 */ 2576 static void 2577 checkpolldat(pollstate_t *ps) 2578 { 2579 pollcache_t *pcp = ps->ps_pcache; 2580 polldat_t **hashtbl; 2581 int i; 2582 2583 hashtbl = pcp->pc_hash; 2584 for (i = 0; i < pcp->pc_hashsize; i++) { 2585 polldat_t *pdp; 2586 2587 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 2588 ASSERT(pdp->pd_ref != NULL); 2589 if (pdp->pd_count > 0) { 2590 xref_t *refp; 2591 int j; 2592 pollcacheset_t *pcsp; 2593 pollfd_t *pollfd; 2594 2595 for (j = 0; j < ps->ps_nsets; j++) { 2596 refp = &pdp->pd_ref[j]; 2597 if (refp->xf_refcnt > 0) { 2598 pcsp = &ps->ps_pcacheset[j]; 2599 ASSERT(refp->xf_position < pcsp->pcs_nfds); 2600 pollfd = pcsp->pcs_pollfd; 2601 ASSERT(pdp->pd_fd == pollfd[refp->xf_position].fd); 2602 } 2603 } 2604 } 2605 } 2606 } 2607 } 2608 2609 /* 2610 * every wfd element on ph_list must have a corresponding fpollinfo on the 2611 * uf_fpollinfo list. This is a variation of infpollinfo() w/o holding locks. 2612 */ 2613 void 2614 checkwfdlist(vnode_t *vp, fpollinfo_t *fpip) 2615 { 2616 stdata_t *stp; 2617 polldat_t *pdp; 2618 fpollinfo_t *fpip2; 2619 2620 if ((stp = vp->v_stream) == NULL) { 2621 return; 2622 } 2623 PH_ENTER(&stp->sd_pollist); 2624 for (pdp = stp->sd_pollist.ph_list; pdp; pdp = pdp->pd_next) { 2625 if (pdp->pd_thread->t_procp == curthread->t_procp) { 2626 for (fpip2 = fpip; fpip2; fpip2 = fpip2->fp_next) { 2627 if (pdp->pd_thread == fpip2->fp_thread) { 2628 break; 2629 } 2630 } 2631 ASSERT(fpip2 != NULL); 2632 } 2633 } 2634 PH_EXIT(&stp->sd_pollist); 2635 } 2636 2637 /* 2638 * For each cached fd whose bit is not set in bitmap, its revents field in 2639 * current poll list should be 0. 2640 */ 2641 static int 2642 pollcheckrevents(pollstate_t *ps, int begin, int end, int cacheindex) 2643 { 2644 pollcache_t *pcp = ps->ps_pcache; 2645 pollfd_t *pollfdp = ps->ps_pollfd; 2646 int i; 2647 2648 for (i = begin; i < end; i++) { 2649 polldat_t *pdp; 2650 2651 ASSERT(!BT_TEST(pcp->pc_bitmap, i)); 2652 pdp = pcache_lookup_fd(pcp, i); 2653 if (pdp && pdp->pd_fp != NULL) { 2654 xref_t *refp; 2655 int entry; 2656 2657 ASSERT(pdp->pd_ref != NULL); 2658 refp = &pdp->pd_ref[cacheindex]; 2659 if (refp->xf_refcnt == 0) { 2660 continue; 2661 } 2662 entry = refp->xf_position; 2663 ASSERT(entry >= 0); 2664 ASSERT(pollfdp[entry].revents == 0); 2665 if (refp->xf_refcnt > 1) { 2666 int j; 2667 2668 for (j = entry + 1; j < ps->ps_nfds; j++) { 2669 if (pollfdp[j].fd == i) { 2670 ASSERT(pollfdp[j].revents == 0); 2671 } 2672 } 2673 } 2674 } 2675 } 2676 return (1); 2677 } 2678 2679 #endif /* DEBUG */ 2680 2681 pollcache_t * 2682 pcache_alloc() 2683 { 2684 return (kmem_zalloc(sizeof (pollcache_t), KM_SLEEP)); 2685 } 2686 2687 void 2688 pcache_create(pollcache_t *pcp, nfds_t nfds) 2689 { 2690 size_t mapsize; 2691 2692 /* 2693 * allocate enough bits for the poll fd list 2694 */ 2695 if ((mapsize = POLLMAPCHUNK) <= nfds) { 2696 mapsize = (nfds + POLLMAPCHUNK - 1) & ~(POLLMAPCHUNK - 1); 2697 } 2698 pcp->pc_bitmap = kmem_zalloc((mapsize / BT_NBIPUL) * sizeof (ulong_t), 2699 KM_SLEEP); 2700 pcp->pc_mapsize = mapsize; 2701 /* 2702 * The hash size is at least POLLHASHCHUNKSZ. If user polls a large 2703 * number of fd to start with, allocate a bigger hash table (to the 2704 * nearest multiple of POLLHASHCHUNKSZ) because dynamically growing a 2705 * hash table is expensive. 2706 */ 2707 if (nfds < POLLHASHCHUNKSZ) { 2708 pcp->pc_hashsize = POLLHASHCHUNKSZ; 2709 } else { 2710 pcp->pc_hashsize = (nfds + POLLHASHCHUNKSZ - 1) & 2711 ~(POLLHASHCHUNKSZ - 1); 2712 } 2713 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (polldat_t *), 2714 KM_SLEEP); 2715 } 2716 2717 void 2718 pcache_destroy(pollcache_t *pcp) 2719 { 2720 polldat_t **hashtbl; 2721 int i; 2722 2723 hashtbl = pcp->pc_hash; 2724 for (i = 0; i < pcp->pc_hashsize; i++) { 2725 if (hashtbl[i] != NULL) { 2726 polldat_t *pdp, *pdp2; 2727 2728 pdp = hashtbl[i]; 2729 while (pdp != NULL) { 2730 pdp2 = pdp->pd_hashnext; 2731 if (pdp->pd_ref != NULL) { 2732 kmem_free(pdp->pd_ref, sizeof (xref_t) * 2733 pdp->pd_nsets); 2734 } 2735 kmem_free(pdp, sizeof (polldat_t)); 2736 pdp = pdp2; 2737 pcp->pc_fdcount--; 2738 } 2739 } 2740 } 2741 ASSERT(pcp->pc_fdcount == 0); 2742 kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize); 2743 kmem_free(pcp->pc_bitmap, 2744 sizeof (ulong_t) * (pcp->pc_mapsize/BT_NBIPUL)); 2745 mutex_destroy(&pcp->pc_no_exit); 2746 mutex_destroy(&pcp->pc_lock); 2747 cv_destroy(&pcp->pc_cv); 2748 cv_destroy(&pcp->pc_busy_cv); 2749 kmem_free(pcp, sizeof (pollcache_t)); 2750 } 2751 2752 pollcacheset_t * 2753 pcacheset_create(int nsets) 2754 { 2755 return (kmem_zalloc(sizeof (pollcacheset_t) * nsets, KM_SLEEP)); 2756 } 2757 2758 void 2759 pcacheset_destroy(pollcacheset_t *pcsp, int nsets) 2760 { 2761 int i; 2762 2763 for (i = 0; i < nsets; i++) { 2764 if (pcsp[i].pcs_pollfd != NULL) { 2765 kmem_free(pcsp[i].pcs_pollfd, pcsp[i].pcs_nfds * 2766 sizeof (pollfd_t)); 2767 } 2768 } 2769 kmem_free(pcsp, sizeof (pollcacheset_t) * nsets); 2770 } 2771 2772 /* 2773 * Check each duplicated poll fd in the poll list. It may be necessary to 2774 * VOP_POLL the same fd again using different poll events. getf() has been 2775 * done by caller. This routine returns 0 if it can sucessfully process the 2776 * entire poll fd list. It returns -1 if underlying vnode has changed during 2777 * a VOP_POLL, in which case the caller has to repoll. It returns a positive 2778 * value if VOP_POLL failed. 2779 */ 2780 static int 2781 plist_chkdupfd(file_t *fp, polldat_t *pdp, pollstate_t *psp, pollfd_t *pollfdp, 2782 int entry, int *fdcntp) 2783 { 2784 int i; 2785 int fd; 2786 nfds_t nfds = psp->ps_nfds; 2787 2788 fd = pollfdp[entry].fd; 2789 for (i = entry + 1; i < nfds; i++) { 2790 if (pollfdp[i].fd == fd) { 2791 if (pollfdp[i].events == pollfdp[entry].events) { 2792 if ((pollfdp[i].revents = 2793 pollfdp[entry].revents) != 0) { 2794 (*fdcntp)++; 2795 } 2796 } else { 2797 2798 int error; 2799 pollhead_t *php; 2800 pollcache_t *pcp = psp->ps_pcache; 2801 2802 /* 2803 * the events are different. VOP_POLL on this 2804 * fd so that we don't miss any revents. 2805 */ 2806 php = NULL; 2807 ASSERT(curthread->t_pollcache == NULL); 2808 error = VOP_POLL(fp->f_vnode, 2809 pollfdp[i].events, 0, 2810 &pollfdp[i].revents, &php, NULL); 2811 if (error) { 2812 return (error); 2813 } 2814 /* 2815 * layered devices(e.g. console driver) 2816 * may change the vnode and thus the pollhead 2817 * pointer out from underneath us. 2818 */ 2819 if (php != NULL && pdp->pd_php != NULL && 2820 php != pdp->pd_php) { 2821 pollhead_delete(pdp->pd_php, pdp); 2822 pdp->pd_php = php; 2823 pollhead_insert(php, pdp); 2824 /* 2825 * We could have missed a wakeup on the 2826 * new target device. Make sure the new 2827 * target gets polled once. 2828 */ 2829 BT_SET(pcp->pc_bitmap, fd); 2830 return (-1); 2831 } 2832 if (pollfdp[i].revents) { 2833 (*fdcntp)++; 2834 } 2835 } 2836 } 2837 } 2838 return (0); 2839 } 2840