1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/devops.h> 30 #include <sys/conf.h> 31 #include <sys/modctl.h> 32 #include <sys/sunddi.h> 33 #include <sys/stat.h> 34 #include <sys/poll_impl.h> 35 #include <sys/errno.h> 36 #include <sys/kmem.h> 37 #include <sys/mkdev.h> 38 #include <sys/debug.h> 39 #include <sys/file.h> 40 #include <sys/sysmacros.h> 41 #include <sys/systm.h> 42 #include <sys/bitmap.h> 43 #include <sys/devpoll.h> 44 #include <sys/rctl.h> 45 #include <sys/resource.h> 46 47 #define RESERVED 1 48 49 /* local data struct */ 50 static dp_entry_t **devpolltbl; /* dev poll entries */ 51 static size_t dptblsize; 52 53 static kmutex_t devpoll_lock; /* lock protecting dev tbl */ 54 int devpoll_init; /* is /dev/poll initialized already */ 55 56 /* device local functions */ 57 58 static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp); 59 static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp); 60 static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 61 int *rvalp); 62 static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, 63 struct pollhead **phpp); 64 static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp); 65 static dev_info_t *dpdevi; 66 67 68 static struct cb_ops dp_cb_ops = { 69 dpopen, /* open */ 70 dpclose, /* close */ 71 nodev, /* strategy */ 72 nodev, /* print */ 73 nodev, /* dump */ 74 nodev, /* read */ 75 dpwrite, /* write */ 76 dpioctl, /* ioctl */ 77 nodev, /* devmap */ 78 nodev, /* mmap */ 79 nodev, /* segmap */ 80 dppoll, /* poll */ 81 ddi_prop_op, /* prop_op */ 82 (struct streamtab *)0, /* streamtab */ 83 D_MP, /* flags */ 84 CB_REV, /* cb_ops revision */ 85 nodev, /* aread */ 86 nodev /* awrite */ 87 }; 88 89 static int dpattach(dev_info_t *, ddi_attach_cmd_t); 90 static int dpdetach(dev_info_t *, ddi_detach_cmd_t); 91 static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 92 93 static struct dev_ops dp_ops = { 94 DEVO_REV, /* devo_rev */ 95 0, /* refcnt */ 96 dpinfo, /* info */ 97 nulldev, /* identify */ 98 nulldev, /* probe */ 99 dpattach, /* attach */ 100 dpdetach, /* detach */ 101 nodev, /* reset */ 102 &dp_cb_ops, /* driver operations */ 103 (struct bus_ops *)NULL, /* bus operations */ 104 nulldev /* power */ 105 }; 106 107 108 static struct modldrv modldrv = { 109 &mod_driverops, /* type of module - a driver */ 110 "Dev Poll driver %I%", 111 &dp_ops, 112 }; 113 114 static struct modlinkage modlinkage = { 115 MODREV_1, 116 (void *)&modldrv, 117 NULL 118 }; 119 120 /* 121 * Locking Design 122 * 123 * The /dev/poll driver shares most of its code with poll sys call whose 124 * code is in common/syscall/poll.c. In poll(2) design, the pollcache 125 * structure is per lwp. An implicit assumption is made there that some 126 * portion of pollcache will never be touched by other lwps. E.g., in 127 * poll(2) design, no lwp will ever need to grow bitmap of other lwp. 128 * This assumption is not true for /dev/poll; hence the need for extra 129 * locking. 130 * 131 * To allow more parallelism, each /dev/poll file descriptor (indexed by 132 * minor number) has its own lock. Since read (dpioctl) is a much more 133 * frequent operation than write, we want to allow multiple reads on same 134 * /dev/poll fd. However, we prevent writes from being starved by giving 135 * priority to write operation. Theoretically writes can starve reads as 136 * well. But in practical sense this is not important because (1) writes 137 * happens less often than reads, and (2) write operation defines the 138 * content of poll fd a cache set. If writes happens so often that they 139 * can starve reads, that means the cached set is very unstable. It may 140 * not make sense to read an unstable cache set anyway. Therefore, the 141 * writers starving readers case is not handled in this design. 142 */ 143 144 int 145 _init() 146 { 147 int error; 148 149 dptblsize = DEVPOLLSIZE; 150 devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 151 mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL); 152 devpoll_init = 1; 153 if ((error = mod_install(&modlinkage)) != 0) { 154 mutex_destroy(&devpoll_lock); 155 kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 156 devpoll_init = 0; 157 } 158 return (error); 159 } 160 161 int 162 _fini() 163 { 164 int error; 165 166 if ((error = mod_remove(&modlinkage)) != 0) { 167 return (error); 168 } 169 mutex_destroy(&devpoll_lock); 170 kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 171 return (0); 172 } 173 174 int 175 _info(struct modinfo *modinfop) 176 { 177 return (mod_info(&modlinkage, modinfop)); 178 } 179 180 /*ARGSUSED*/ 181 static int 182 dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd) 183 { 184 if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, NULL) 185 == DDI_FAILURE) { 186 ddi_remove_minor_node(devi, NULL); 187 return (DDI_FAILURE); 188 } 189 dpdevi = devi; 190 return (DDI_SUCCESS); 191 } 192 193 static int 194 dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd) 195 { 196 if (cmd != DDI_DETACH) 197 return (DDI_FAILURE); 198 199 ddi_remove_minor_node(devi, NULL); 200 return (DDI_SUCCESS); 201 } 202 203 /* ARGSUSED */ 204 static int 205 dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 206 { 207 int error; 208 209 switch (infocmd) { 210 case DDI_INFO_DEVT2DEVINFO: 211 *result = (void *)dpdevi; 212 error = DDI_SUCCESS; 213 break; 214 case DDI_INFO_DEVT2INSTANCE: 215 *result = (void *)0; 216 error = DDI_SUCCESS; 217 break; 218 default: 219 error = DDI_FAILURE; 220 } 221 return (error); 222 } 223 224 /* 225 * dp_pcache_poll has similar logic to pcache_poll() in poll.c. The major 226 * differences are: (1) /dev/poll requires scanning the bitmap starting at 227 * where it was stopped last time, instead of always starting from 0, 228 * (2) since user may not have cleaned up the cached fds when they are 229 * closed, some polldats in cache may refer to closed or reused fds. We 230 * need to check for those cases. 231 * 232 * NOTE: Upon closing an fd, automatic poll cache cleanup is done for 233 * poll(2) caches but NOT for /dev/poll caches. So expect some 234 * stale entries! 235 */ 236 static int 237 dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) 238 { 239 int start, ostart, end; 240 int fdcnt, fd; 241 boolean_t done; 242 file_t *fp; 243 short revent; 244 boolean_t no_wrap; 245 pollhead_t *php; 246 polldat_t *pdp; 247 int error = 0; 248 249 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 250 if (pcp->pc_bitmap == NULL) { 251 /* 252 * No Need to search because no poll fd 253 * has been cached. 254 */ 255 return (error); 256 } 257 retry: 258 start = ostart = pcp->pc_mapstart; 259 end = pcp->pc_mapend; 260 php = NULL; 261 262 if (start == 0) { 263 /* 264 * started from every begining, no need to wrap around. 265 */ 266 no_wrap = B_TRUE; 267 } else { 268 no_wrap = B_FALSE; 269 } 270 done = B_FALSE; 271 fdcnt = 0; 272 while ((fdcnt < nfds) && !done) { 273 php = NULL; 274 revent = 0; 275 /* 276 * Examine the bit map in a circular fashion 277 * to avoid starvation. Always resume from 278 * last stop. Scan till end of the map. Then 279 * wrap around. 280 */ 281 fd = bt_getlowbit(pcp->pc_bitmap, start, end); 282 ASSERT(fd <= end); 283 if (fd >= 0) { 284 if (fd == end) { 285 if (no_wrap) { 286 done = B_TRUE; 287 } else { 288 start = 0; 289 end = ostart - 1; 290 no_wrap = B_TRUE; 291 } 292 } else { 293 start = fd + 1; 294 } 295 pdp = pcache_lookup_fd(pcp, fd); 296 ASSERT(pdp != NULL); 297 ASSERT(pdp->pd_fd == fd); 298 if (pdp->pd_fp == NULL) { 299 /* 300 * The fd is POLLREMOVed. This fd is 301 * logically no longer cached. So move 302 * on to the next one. 303 */ 304 continue; 305 } 306 if ((fp = getf(fd)) == NULL) { 307 /* 308 * The fd has been closed, but user has not 309 * done a POLLREMOVE on this fd yet. Instead 310 * of cleaning it here implicitly, we return 311 * POLLNVAL. This is consistent with poll(2) 312 * polling a closed fd. Hope this will remind 313 * user to do a POLLREMOVE. 314 */ 315 pfdp[fdcnt].fd = fd; 316 pfdp[fdcnt].revents = POLLNVAL; 317 fdcnt++; 318 continue; 319 } 320 if (fp != pdp->pd_fp) { 321 /* 322 * user is polling on a cached fd which was 323 * closed and then reused. Unfortunately 324 * there is no good way to inform user. 325 * If the file struct is also reused, we 326 * may not be able to detect the fd reuse 327 * at all. As long as this does not 328 * cause system failure and/or memory leak, 329 * we will play along. Man page states if 330 * user does not clean up closed fds, polling 331 * results will be indeterministic. 332 * 333 * XXX - perhaps log the detection of fd 334 * reuse? 335 */ 336 pdp->pd_fp = fp; 337 } 338 /* 339 * XXX - pollrelock() logic needs to know which 340 * which pollcache lock to grab. It'd be a 341 * cleaner solution if we could pass pcp as 342 * an arguement in VOP_POLL interface instead 343 * of implicitly passing it using thread_t 344 * struct. On the other hand, changing VOP_POLL 345 * interface will require all driver/file system 346 * poll routine to change. May want to revisit 347 * the tradeoff later. 348 */ 349 curthread->t_pollcache = pcp; 350 error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, 351 &revent, &php, NULL); 352 curthread->t_pollcache = NULL; 353 releasef(fd); 354 if (error != 0) { 355 break; 356 } 357 /* 358 * layered devices (e.g. console driver) 359 * may change the vnode and thus the pollhead 360 * pointer out from underneath us. 361 */ 362 if (php != NULL && pdp->pd_php != NULL && 363 php != pdp->pd_php) { 364 pollhead_delete(pdp->pd_php, pdp); 365 pdp->pd_php = php; 366 pollhead_insert(php, pdp); 367 /* 368 * The bit should still be set. 369 */ 370 ASSERT(BT_TEST(pcp->pc_bitmap, fd)); 371 goto retry; 372 } 373 374 if (revent != 0) { 375 pfdp[fdcnt].fd = fd; 376 pfdp[fdcnt].events = pdp->pd_events; 377 pfdp[fdcnt].revents = revent; 378 fdcnt++; 379 } else if (php != NULL) { 380 /* 381 * We clear a bit or cache a poll fd if 382 * the driver returns a poll head ptr, 383 * which is expected in the case of 0 384 * revents. Some buggy driver may return 385 * NULL php pointer with 0 revents. In 386 * this case, we just treat the driver as 387 * "noncachable" and not clearing the bit 388 * in bitmap. 389 */ 390 if ((pdp->pd_php != NULL) && 391 ((pcp->pc_flag & T_POLLWAKE) == 0)) { 392 BT_CLEAR(pcp->pc_bitmap, fd); 393 } 394 if (pdp->pd_php == NULL) { 395 pollhead_insert(php, pdp); 396 pdp->pd_php = php; 397 } 398 } 399 } else { 400 /* 401 * No bit set in the range. Check for wrap around. 402 */ 403 if (!no_wrap) { 404 start = 0; 405 end = ostart - 1; 406 no_wrap = B_TRUE; 407 } else { 408 done = B_TRUE; 409 } 410 } 411 } 412 413 if (!done) { 414 pcp->pc_mapstart = start; 415 } 416 ASSERT(*fdcntp == 0); 417 *fdcntp = fdcnt; 418 return (error); 419 } 420 421 /*ARGSUSED*/ 422 static int 423 dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) 424 { 425 minor_t minordev; 426 dp_entry_t *dpep; 427 pollcache_t *pcp; 428 429 ASSERT(devpoll_init); 430 ASSERT(dptblsize <= MAXMIN); 431 mutex_enter(&devpoll_lock); 432 for (minordev = 0; minordev < dptblsize; minordev++) { 433 if (devpolltbl[minordev] == NULL) { 434 devpolltbl[minordev] = (dp_entry_t *)RESERVED; 435 break; 436 } 437 } 438 if (minordev == dptblsize) { 439 dp_entry_t **newtbl; 440 size_t oldsize; 441 442 /* 443 * Used up every entry in the existing devpoll table. 444 * Grow the table by DEVPOLLSIZE. 445 */ 446 if ((oldsize = dptblsize) >= MAXMIN) { 447 mutex_exit(&devpoll_lock); 448 return (ENXIO); 449 } 450 dptblsize += DEVPOLLSIZE; 451 if (dptblsize > MAXMIN) { 452 dptblsize = MAXMIN; 453 } 454 newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 455 bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize); 456 kmem_free(devpolltbl, sizeof (caddr_t) * oldsize); 457 devpolltbl = newtbl; 458 devpolltbl[minordev] = (dp_entry_t *)RESERVED; 459 } 460 mutex_exit(&devpoll_lock); 461 462 dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP); 463 /* 464 * allocate a pollcache skeleton here. Delay allocating bitmap 465 * structures until dpwrite() time, since we don't know the 466 * optimal size yet. 467 */ 468 pcp = pcache_alloc(); 469 dpep->dpe_pcache = pcp; 470 pcp->pc_pid = curproc->p_pid; 471 *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ 472 mutex_enter(&devpoll_lock); 473 ASSERT(minordev < dptblsize); 474 ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED); 475 devpolltbl[minordev] = dpep; 476 mutex_exit(&devpoll_lock); 477 return (0); 478 } 479 480 /* 481 * Write to dev/poll add/remove fd's to/from a cached poll fd set, 482 * or change poll events for a watched fd. 483 */ 484 /*ARGSUSED*/ 485 static int 486 dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) 487 { 488 minor_t minor; 489 dp_entry_t *dpep; 490 pollcache_t *pcp; 491 pollfd_t *pollfdp, *pfdp; 492 int error; 493 ssize_t uiosize; 494 nfds_t pollfdnum; 495 struct pollhead *php = NULL; 496 polldat_t *pdp; 497 int fd; 498 file_t *fp; 499 500 minor = getminor(dev); 501 502 mutex_enter(&devpoll_lock); 503 ASSERT(minor < dptblsize); 504 dpep = devpolltbl[minor]; 505 ASSERT(dpep != NULL); 506 mutex_exit(&devpoll_lock); 507 pcp = dpep->dpe_pcache; 508 if (curproc->p_pid != pcp->pc_pid) { 509 return (EACCES); 510 } 511 uiosize = uiop->uio_resid; 512 pollfdnum = uiosize / sizeof (pollfd_t); 513 mutex_enter(&curproc->p_lock); 514 if (pollfdnum > (uint_t)rctl_enforced_value( 515 rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { 516 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 517 curproc->p_rctls, curproc, RCA_SAFE); 518 mutex_exit(&curproc->p_lock); 519 return (set_errno(EINVAL)); 520 } 521 mutex_exit(&curproc->p_lock); 522 /* 523 * Copy in the pollfd array. Walk through the array and add 524 * each polled fd to the cached set. 525 */ 526 pollfdp = kmem_alloc(uiosize, KM_SLEEP); 527 528 /* 529 * Although /dev/poll uses the write(2) interface to cache fds, it's 530 * not supposed to function as a seekable device. To prevent offset 531 * from growing and eventually exceed the maximum, reset the offset 532 * here for every call. 533 */ 534 uiop->uio_loffset = 0; 535 if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop)) 536 != 0) { 537 kmem_free(pollfdp, uiosize); 538 return (error); 539 } 540 /* 541 * We are about to enter the core portion of dpwrite(). Make sure this 542 * write has exclusive access in this portion of the code, i.e., no 543 * other writers in this code and no other readers in dpioctl. 544 */ 545 mutex_enter(&dpep->dpe_lock); 546 dpep->dpe_writerwait++; 547 while (dpep->dpe_refcnt != 0) { 548 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 549 dpep->dpe_writerwait--; 550 mutex_exit(&dpep->dpe_lock); 551 kmem_free(pollfdp, uiosize); 552 return (set_errno(EINTR)); 553 } 554 } 555 dpep->dpe_writerwait--; 556 dpep->dpe_flag |= DP_WRITER_PRESENT; 557 dpep->dpe_refcnt++; 558 mutex_exit(&dpep->dpe_lock); 559 560 mutex_enter(&pcp->pc_lock); 561 if (pcp->pc_bitmap == NULL) { 562 pcache_create(pcp, pollfdnum); 563 } 564 for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) { 565 fd = pfdp->fd; 566 if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) 567 continue; 568 pdp = pcache_lookup_fd(pcp, fd); 569 if (pfdp->events != POLLREMOVE) { 570 if (pdp == NULL) { 571 pdp = pcache_alloc_fd(0); 572 pdp->pd_fd = fd; 573 pdp->pd_pcache = pcp; 574 pcache_insert_fd(pcp, pdp, pollfdnum); 575 } 576 ASSERT(pdp->pd_fd == fd); 577 ASSERT(pdp->pd_pcache == pcp); 578 if (fd >= pcp->pc_mapsize) { 579 mutex_exit(&pcp->pc_lock); 580 pcache_grow_map(pcp, fd); 581 mutex_enter(&pcp->pc_lock); 582 } 583 if (fd > pcp->pc_mapend) { 584 pcp->pc_mapend = fd; 585 } 586 if ((fp = getf(fd)) == NULL) { 587 /* 588 * The fd is not valid. Since we can't pass 589 * this error back in the write() call, set 590 * the bit in bitmap to force DP_POLL ioctl 591 * to examine it. 592 */ 593 BT_SET(pcp->pc_bitmap, fd); 594 pdp->pd_events |= pfdp->events; 595 continue; 596 } 597 /* 598 * Don't do VOP_POLL for an already cached fd with 599 * same poll events. 600 */ 601 if ((pdp->pd_events == pfdp->events) && 602 (pdp->pd_fp != NULL)) { 603 /* 604 * the events are already cached 605 */ 606 releasef(fd); 607 continue; 608 } 609 610 /* 611 * do VOP_POLL and cache this poll fd. 612 */ 613 /* 614 * XXX - pollrelock() logic needs to know which 615 * which pollcache lock to grab. It'd be a 616 * cleaner solution if we could pass pcp as 617 * an arguement in VOP_POLL interface instead 618 * of implicitly passing it using thread_t 619 * struct. On the other hand, changing VOP_POLL 620 * interface will require all driver/file system 621 * poll routine to change. May want to revisit 622 * the tradeoff later. 623 */ 624 curthread->t_pollcache = pcp; 625 error = VOP_POLL(fp->f_vnode, pfdp->events, 0, 626 &pfdp->revents, &php, NULL); 627 curthread->t_pollcache = NULL; 628 /* 629 * We always set the bit when this fd is cached. 630 * So we don't have to worry about missing a 631 * pollwakeup between VOP_POLL and pollhead_insert. 632 * This forces the first DP_POLL to poll this fd. 633 * Real performance gain comes from subsequent 634 * DP_POLL. 635 */ 636 BT_SET(pcp->pc_bitmap, fd); 637 if (error != 0) { 638 releasef(fd); 639 break; 640 } 641 pdp->pd_fp = fp; 642 pdp->pd_events |= pfdp->events; 643 if (php != NULL) { 644 if (pdp->pd_php == NULL) { 645 pollhead_insert(php, pdp); 646 pdp->pd_php = php; 647 } else { 648 if (pdp->pd_php != php) { 649 pollhead_delete(pdp->pd_php, 650 pdp); 651 pollhead_insert(php, pdp); 652 pdp->pd_php = php; 653 } 654 } 655 656 } 657 releasef(fd); 658 } else { 659 if (pdp == NULL) { 660 continue; 661 } 662 ASSERT(pdp->pd_fd == fd); 663 pdp->pd_fp = NULL; 664 pdp->pd_events = 0; 665 ASSERT(pdp->pd_thread == NULL); 666 if (pdp->pd_php != NULL) { 667 pollhead_delete(pdp->pd_php, pdp); 668 pdp->pd_php = NULL; 669 } 670 BT_CLEAR(pcp->pc_bitmap, fd); 671 } 672 } 673 mutex_exit(&pcp->pc_lock); 674 mutex_enter(&dpep->dpe_lock); 675 dpep->dpe_flag &= ~DP_WRITER_PRESENT; 676 ASSERT(dpep->dpe_refcnt == 1); 677 dpep->dpe_refcnt--; 678 cv_broadcast(&dpep->dpe_cv); 679 mutex_exit(&dpep->dpe_lock); 680 kmem_free(pollfdp, uiosize); 681 return (error); 682 } 683 684 /*ARGSUSED*/ 685 static int 686 dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 687 { 688 timestruc_t now; 689 timestruc_t rqtime; 690 timestruc_t *rqtp = NULL; 691 int timecheck = 0; 692 minor_t minor; 693 dp_entry_t *dpep; 694 pollcache_t *pcp; 695 int error = 0; 696 STRUCT_DECL(dvpoll, dvpoll); 697 698 if (cmd == DP_POLL) { 699 /* do this now, before we sleep on DP_WRITER_PRESENT below */ 700 timecheck = timechanged; 701 gethrestime(&now); 702 } 703 minor = getminor(dev); 704 mutex_enter(&devpoll_lock); 705 ASSERT(minor < dptblsize); 706 dpep = devpolltbl[minor]; 707 mutex_exit(&devpoll_lock); 708 ASSERT(dpep != NULL); 709 pcp = dpep->dpe_pcache; 710 if (curproc->p_pid != pcp->pc_pid) 711 return (EACCES); 712 713 mutex_enter(&dpep->dpe_lock); 714 while ((dpep->dpe_flag & DP_WRITER_PRESENT) || 715 (dpep->dpe_writerwait != 0)) { 716 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 717 mutex_exit(&dpep->dpe_lock); 718 return (EINTR); 719 } 720 } 721 dpep->dpe_refcnt++; 722 mutex_exit(&dpep->dpe_lock); 723 724 switch (cmd) { 725 case DP_POLL: 726 { 727 pollstate_t *ps; 728 nfds_t nfds; 729 int fdcnt = 0; 730 int time_out; 731 int rval; 732 733 STRUCT_INIT(dvpoll, mode); 734 error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), 735 STRUCT_SIZE(dvpoll)); 736 if (error) { 737 DP_REFRELE(dpep); 738 return (EFAULT); 739 } 740 741 time_out = STRUCT_FGET(dvpoll, dp_timeout); 742 if (time_out > 0) { 743 /* 744 * Determine the future time of the requested timeout. 745 */ 746 rqtp = &rqtime; 747 rqtp->tv_sec = time_out / MILLISEC; 748 rqtp->tv_nsec = (time_out % MILLISEC) * MICROSEC; 749 timespecadd(rqtp, &now); 750 } 751 752 if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { 753 /* 754 * We are just using DP_POLL to sleep, so 755 * we don't any of the devpoll apparatus. 756 * Do not check for signals if we have a zero timeout. 757 */ 758 DP_REFRELE(dpep); 759 if (time_out == 0) 760 return (0); 761 mutex_enter(&curthread->t_delay_lock); 762 while ((rval = cv_waituntil_sig(&curthread->t_delay_cv, 763 &curthread->t_delay_lock, rqtp, timecheck)) > 0) 764 continue; 765 mutex_exit(&curthread->t_delay_lock); 766 return ((rval == 0)? EINTR : 0); 767 } 768 769 /* 770 * XXX It'd be nice not to have to alloc each time. 771 * But it requires another per thread structure hook. 772 * Do it later if there is data suggest that. 773 */ 774 if ((ps = curthread->t_pollstate) == NULL) { 775 curthread->t_pollstate = pollstate_create(); 776 ps = curthread->t_pollstate; 777 } 778 if (ps->ps_dpbufsize < nfds) { 779 struct proc *p = ttoproc(curthread); 780 /* 781 * The maximum size should be no large than 782 * current maximum open file count. 783 */ 784 mutex_enter(&p->p_lock); 785 if (nfds > p->p_fno_ctl) { 786 mutex_exit(&p->p_lock); 787 DP_REFRELE(dpep); 788 return (EINVAL); 789 } 790 mutex_exit(&p->p_lock); 791 kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) * 792 ps->ps_dpbufsize); 793 ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) * 794 nfds, KM_SLEEP); 795 ps->ps_dpbufsize = nfds; 796 } 797 798 mutex_enter(&pcp->pc_lock); 799 for (;;) { 800 pcp->pc_flag = 0; 801 error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt); 802 if (fdcnt > 0 || error != 0) 803 break; 804 805 /* 806 * A pollwake has happened since we polled cache. 807 */ 808 if (pcp->pc_flag & T_POLLWAKE) 809 continue; 810 811 /* 812 * Sleep until we are notified, signaled, or timed out. 813 * Do not check for signals if we have a zero timeout. 814 */ 815 if (time_out == 0) /* immediate timeout */ 816 break; 817 rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock, 818 rqtp, timecheck); 819 /* 820 * If we were awakened by a signal or timeout 821 * then break the loop, else poll again. 822 */ 823 if (rval <= 0) { 824 if (rval == 0) /* signal */ 825 error = EINTR; 826 break; 827 } 828 } 829 mutex_exit(&pcp->pc_lock); 830 831 if (error == 0 && fdcnt > 0) { 832 if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll, 833 dp_fds), sizeof (pollfd_t) * fdcnt)) { 834 DP_REFRELE(dpep); 835 return (EFAULT); 836 } 837 *rvalp = fdcnt; 838 } 839 break; 840 } 841 842 case DP_ISPOLLED: 843 { 844 pollfd_t pollfd; 845 polldat_t *pdp; 846 847 STRUCT_INIT(dvpoll, mode); 848 error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t)); 849 if (error) { 850 DP_REFRELE(dpep); 851 return (EFAULT); 852 } 853 mutex_enter(&pcp->pc_lock); 854 if (pcp->pc_hash == NULL) { 855 /* 856 * No Need to search because no poll fd 857 * has been cached. 858 */ 859 mutex_exit(&pcp->pc_lock); 860 DP_REFRELE(dpep); 861 return (0); 862 } 863 if (pollfd.fd < 0) { 864 mutex_exit(&pcp->pc_lock); 865 break; 866 } 867 pdp = pcache_lookup_fd(pcp, pollfd.fd); 868 if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) && 869 (pdp->pd_fp != NULL)) { 870 pollfd.revents = pdp->pd_events; 871 if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) { 872 mutex_exit(&pcp->pc_lock); 873 DP_REFRELE(dpep); 874 return (EFAULT); 875 } 876 *rvalp = 1; 877 } 878 mutex_exit(&pcp->pc_lock); 879 break; 880 } 881 882 default: 883 DP_REFRELE(dpep); 884 return (EINVAL); 885 } 886 DP_REFRELE(dpep); 887 return (error); 888 } 889 890 /*ARGSUSED*/ 891 static int 892 dppoll(dev_t dev, short events, int anyyet, short *reventsp, 893 struct pollhead **phpp) 894 { 895 /* 896 * Polling on a /dev/poll fd is not fully supported yet. 897 */ 898 *reventsp = POLLERR; 899 return (0); 900 } 901 902 /* 903 * devpoll close should do enough clean up before the pollcache is deleted, 904 * i.e., it should ensure no one still references the pollcache later. 905 * There is no "permission" check in here. Any process having the last 906 * reference of this /dev/poll fd can close. 907 */ 908 /*ARGSUSED*/ 909 static int 910 dpclose(dev_t dev, int flag, int otyp, cred_t *credp) 911 { 912 minor_t minor; 913 dp_entry_t *dpep; 914 pollcache_t *pcp; 915 int i; 916 polldat_t **hashtbl; 917 polldat_t *pdp; 918 919 minor = getminor(dev); 920 921 mutex_enter(&devpoll_lock); 922 dpep = devpolltbl[minor]; 923 ASSERT(dpep != NULL); 924 devpolltbl[minor] = NULL; 925 mutex_exit(&devpoll_lock); 926 pcp = dpep->dpe_pcache; 927 ASSERT(pcp != NULL); 928 /* 929 * At this point, no other lwp can access this pollcache via the 930 * /dev/poll fd. This pollcache is going away, so do the clean 931 * up without the pc_lock. 932 */ 933 hashtbl = pcp->pc_hash; 934 for (i = 0; i < pcp->pc_hashsize; i++) { 935 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 936 if (pdp->pd_php != NULL) { 937 pollhead_delete(pdp->pd_php, pdp); 938 pdp->pd_php = NULL; 939 pdp->pd_fp = NULL; 940 } 941 } 942 } 943 /* 944 * pollwakeup() may still interact with this pollcache. Wait until 945 * it is done. 946 */ 947 mutex_enter(&pcp->pc_no_exit); 948 ASSERT(pcp->pc_busy >= 0); 949 while (pcp->pc_busy > 0) 950 cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 951 mutex_exit(&pcp->pc_no_exit); 952 pcache_destroy(pcp); 953 ASSERT(dpep->dpe_refcnt == 0); 954 kmem_free(dpep, sizeof (dp_entry_t)); 955 return (0); 956 } 957