1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/devops.h> 31 #include <sys/conf.h> 32 #include <sys/modctl.h> 33 #include <sys/sunddi.h> 34 #include <sys/stat.h> 35 #include <sys/poll_impl.h> 36 #include <sys/errno.h> 37 #include <sys/kmem.h> 38 #include <sys/mkdev.h> 39 #include <sys/debug.h> 40 #include <sys/file.h> 41 #include <sys/sysmacros.h> 42 #include <sys/systm.h> 43 #include <sys/bitmap.h> 44 #include <sys/devpoll.h> 45 #include <sys/rctl.h> 46 #include <sys/resource.h> 47 48 #define RESERVED 1 49 50 /* local data struct */ 51 static dp_entry_t **devpolltbl; /* dev poll entries */ 52 static size_t dptblsize; 53 54 static kmutex_t devpoll_lock; /* lock protecting dev tbl */ 55 int devpoll_init; /* is /dev/poll initialized already */ 56 57 /* device local functions */ 58 59 static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp); 60 static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp); 61 static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 62 int *rvalp); 63 static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, 64 struct pollhead **phpp); 65 static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp); 66 static dev_info_t *dpdevi; 67 68 69 static struct cb_ops dp_cb_ops = { 70 dpopen, /* open */ 71 dpclose, /* close */ 72 nodev, /* strategy */ 73 nodev, /* print */ 74 nodev, /* dump */ 75 nodev, /* read */ 76 dpwrite, /* write */ 77 dpioctl, /* ioctl */ 78 nodev, /* devmap */ 79 nodev, /* mmap */ 80 nodev, /* segmap */ 81 dppoll, /* poll */ 82 ddi_prop_op, /* prop_op */ 83 (struct streamtab *)0, /* streamtab */ 84 D_MP, /* flags */ 85 CB_REV, /* cb_ops revision */ 86 nodev, /* aread */ 87 nodev /* awrite */ 88 }; 89 90 static int dpattach(dev_info_t *, ddi_attach_cmd_t); 91 static int dpdetach(dev_info_t *, ddi_detach_cmd_t); 92 static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 93 94 static struct dev_ops dp_ops = { 95 DEVO_REV, /* devo_rev */ 96 0, /* refcnt */ 97 dpinfo, /* info */ 98 nulldev, /* identify */ 99 nulldev, /* probe */ 100 dpattach, /* attach */ 101 dpdetach, /* detach */ 102 nodev, /* reset */ 103 &dp_cb_ops, /* driver operations */ 104 (struct bus_ops *)NULL, /* bus operations */ 105 nulldev /* power */ 106 }; 107 108 109 static struct modldrv modldrv = { 110 &mod_driverops, /* type of module - a driver */ 111 "Dev Poll driver %I%", 112 &dp_ops, 113 }; 114 115 static struct modlinkage modlinkage = { 116 MODREV_1, 117 (void *)&modldrv, 118 NULL 119 }; 120 121 /* 122 * Locking Design 123 * 124 * The /dev/poll driver shares most of its code with poll sys call whose 125 * code is in common/syscall/poll.c. In poll(2) design, the pollcache 126 * structure is per lwp. An implicit assumption is made there that some 127 * portion of pollcache will never be touched by other lwps. E.g., in 128 * poll(2) design, no lwp will ever need to grow bitmap of other lwp. 129 * This assumption is not true for /dev/poll; hence the need for extra 130 * locking. 131 * 132 * To allow more paralellism, each /dev/poll file descriptor (indexed by 133 * minor number) has its own lock. Since read (dpioctl) is a much more 134 * frequent operation than write, we want to allow multiple reads on same 135 * /dev/poll fd. However, we prevent writes from being starved by giving 136 * priority to write operation. Theoretically writes can starve reads as 137 * well. But in pratical sense this is not important because (1) writes 138 * happens less often than reads, and (2) write operation defines the 139 * content of poll fd a cache set. If writes happens so often that they 140 * can starve reads, that means the cached set is very unstable. It may 141 * not make sense to read an unstable cache set anyway. Therefore, the 142 * writers starving readers case is not handled in this design. 143 */ 144 145 int 146 _init() 147 { 148 int error; 149 150 dptblsize = DEVPOLLSIZE; 151 devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 152 mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL); 153 devpoll_init = 1; 154 if ((error = mod_install(&modlinkage)) != 0) { 155 mutex_destroy(&devpoll_lock); 156 kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 157 devpoll_init = 0; 158 } 159 return (error); 160 } 161 162 int 163 _fini() 164 { 165 int error; 166 167 if ((error = mod_remove(&modlinkage)) != 0) { 168 return (error); 169 } 170 mutex_destroy(&devpoll_lock); 171 kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 172 return (0); 173 } 174 175 int 176 _info(struct modinfo *modinfop) 177 { 178 return (mod_info(&modlinkage, modinfop)); 179 } 180 181 /*ARGSUSED*/ 182 static int 183 dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd) 184 { 185 if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, NULL) 186 == DDI_FAILURE) { 187 ddi_remove_minor_node(devi, NULL); 188 return (DDI_FAILURE); 189 } 190 dpdevi = devi; 191 return (DDI_SUCCESS); 192 } 193 194 static int 195 dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd) 196 { 197 if (cmd != DDI_DETACH) 198 return (DDI_FAILURE); 199 200 ddi_remove_minor_node(devi, NULL); 201 return (DDI_SUCCESS); 202 } 203 204 /* ARGSUSED */ 205 static int 206 dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 207 { 208 int error; 209 210 switch (infocmd) { 211 case DDI_INFO_DEVT2DEVINFO: 212 *result = (void *)dpdevi; 213 error = DDI_SUCCESS; 214 break; 215 case DDI_INFO_DEVT2INSTANCE: 216 *result = (void *)0; 217 error = DDI_SUCCESS; 218 break; 219 default: 220 error = DDI_FAILURE; 221 } 222 return (error); 223 } 224 225 /* 226 * dp_pcache_poll has similar logic to pcache_poll() in poll.c. The major 227 * differences are: (1) /dev/poll requires scanning the bitmap starting at 228 * where it was stopped last time, instead of always starting from 0, 229 * (2) since user may not have cleaned up the cached fds when they are 230 * closed, some polldats in cache may refer to closed or reused fds. We 231 * need to check for those cases. 232 * 233 * NOTE: Upon closing an fd, automatic poll cache cleanup is done for 234 * poll(2) caches but NOT for /dev/poll caches. So expect some 235 * stale entries! 236 */ 237 static int 238 dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) 239 { 240 int start, ostart, end; 241 int fdcnt, fd; 242 boolean_t done; 243 file_t *fp; 244 short revent; 245 boolean_t no_wrap; 246 pollhead_t *php; 247 polldat_t *pdp; 248 int error = 0; 249 250 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 251 if (pcp->pc_bitmap == NULL) { 252 /* 253 * No Need to search because no poll fd 254 * has been cached. 255 */ 256 return (error); 257 } 258 retry: 259 start = ostart = pcp->pc_mapstart; 260 end = pcp->pc_mapend; 261 php = NULL; 262 263 if (start == 0) { 264 /* 265 * started from every begining, no need to wrap around. 266 */ 267 no_wrap = B_TRUE; 268 } else { 269 no_wrap = B_FALSE; 270 } 271 done = B_FALSE; 272 fdcnt = 0; 273 while ((fdcnt < nfds) && !done) { 274 php = NULL; 275 revent = 0; 276 /* 277 * Examine the bit map in a circular fashion 278 * to avoid starvation. Always resume from 279 * last stop. Scan till end of the map. Then 280 * wrap around. 281 */ 282 fd = bt_getlowbit(pcp->pc_bitmap, start, end); 283 ASSERT(fd <= end); 284 if (fd >= 0) { 285 if (fd == end) { 286 if (no_wrap) { 287 done = B_TRUE; 288 } else { 289 start = 0; 290 end = ostart - 1; 291 no_wrap = B_TRUE; 292 } 293 } else { 294 start = fd + 1; 295 } 296 pdp = pcache_lookup_fd(pcp, fd); 297 ASSERT(pdp != NULL); 298 ASSERT(pdp->pd_fd == fd); 299 if (pdp->pd_fp == NULL) { 300 /* 301 * The fd is POLLREMOVed. This fd is 302 * logically no longer cached. So move 303 * on to the next one. 304 */ 305 continue; 306 } 307 if ((fp = getf(fd)) == NULL) { 308 /* 309 * The fd has been closed, but user has not 310 * done a POLLREMOVE on this fd yet. Instead 311 * of cleaning it here implicitly, we return 312 * POLLNVAL. This is consistent with poll(2) 313 * polling a closed fd. Hope this will remind 314 * user to do a POLLREMOVE. 315 */ 316 pfdp[fdcnt].fd = fd; 317 pfdp[fdcnt].revents = POLLNVAL; 318 fdcnt++; 319 continue; 320 } 321 if (fp != pdp->pd_fp) { 322 /* 323 * user is polling on a cached fd which was 324 * closed and then reused. Unfortunately 325 * there is no good way to inform user. 326 * If the file struct is also reused, we 327 * may not be able to detect the fd reuse 328 * at all. As long as this does not 329 * cause system failure and/or memory leak, 330 * we will play along. Man page states if 331 * user does not clean up closed fds, polling 332 * results will be indeterministic. 333 * 334 * XXX - perhaps log the detection of fd 335 * reuse? 336 */ 337 pdp->pd_fp = fp; 338 } 339 /* 340 * XXX - pollrelock() logic needs to know which 341 * which pollcache lock to grab. It'd be a 342 * cleaner solution if we could pass pcp as 343 * an arguement in VOP_POLL interface instead 344 * of implicitly passing it using thread_t 345 * struct. On the other hand, changing VOP_POLL 346 * interface will require all driver/file system 347 * poll routine to change. May want to revisit 348 * the tradeoff later. 349 */ 350 curthread->t_pollcache = pcp; 351 error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, 352 &revent, &php); 353 curthread->t_pollcache = NULL; 354 releasef(fd); 355 if (error != 0) { 356 break; 357 } 358 /* 359 * layered devices (e.g. console driver) 360 * may change the vnode and thus the pollhead 361 * pointer out from underneath us. 362 */ 363 if (php != NULL && pdp->pd_php != NULL && 364 php != pdp->pd_php) { 365 pollhead_delete(pdp->pd_php, pdp); 366 pdp->pd_php = php; 367 pollhead_insert(php, pdp); 368 /* 369 * The bit should still be set. 370 */ 371 ASSERT(BT_TEST(pcp->pc_bitmap, fd)); 372 goto retry; 373 } 374 375 if (revent != 0) { 376 pfdp[fdcnt].fd = fd; 377 pfdp[fdcnt].events = pdp->pd_events; 378 pfdp[fdcnt].revents = revent; 379 fdcnt++; 380 } else if (php != NULL) { 381 /* 382 * We clear a bit or cache a poll fd if 383 * the driver returns a poll head ptr, 384 * which is expected in the case of 0 385 * revents. Some buggy driver may return 386 * NULL php pointer with 0 revents. In 387 * this case, we just treat the driver as 388 * "noncachable" and not clearing the bit 389 * in bitmap. 390 */ 391 if ((pdp->pd_php != NULL) && 392 ((pcp->pc_flag & T_POLLWAKE) == 0)) { 393 BT_CLEAR(pcp->pc_bitmap, fd); 394 } 395 if (pdp->pd_php == NULL) { 396 pollhead_insert(php, pdp); 397 pdp->pd_php = php; 398 } 399 } 400 } else { 401 /* 402 * No bit set in the range. Check for wrap around. 403 */ 404 if (!no_wrap) { 405 start = 0; 406 end = ostart - 1; 407 no_wrap = B_TRUE; 408 } else { 409 done = B_TRUE; 410 } 411 } 412 } 413 414 if (!done) { 415 pcp->pc_mapstart = start; 416 } 417 ASSERT(*fdcntp == 0); 418 *fdcntp = fdcnt; 419 return (error); 420 } 421 422 /*ARGSUSED*/ 423 static int 424 dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) 425 { 426 minor_t minordev; 427 dp_entry_t *dpep; 428 pollcache_t *pcp; 429 430 ASSERT(devpoll_init); 431 ASSERT(dptblsize <= MAXMIN); 432 mutex_enter(&devpoll_lock); 433 for (minordev = 0; minordev < dptblsize; minordev++) { 434 if (devpolltbl[minordev] == NULL) { 435 devpolltbl[minordev] = (dp_entry_t *)RESERVED; 436 break; 437 } 438 } 439 if (minordev == dptblsize) { 440 dp_entry_t **newtbl; 441 size_t oldsize; 442 443 /* 444 * Used up every entry in the existing devpoll table. 445 * Grow the table by DEVPOLLSIZE. 446 */ 447 if ((oldsize = dptblsize) >= MAXMIN) { 448 mutex_exit(&devpoll_lock); 449 return (ENXIO); 450 } 451 dptblsize += DEVPOLLSIZE; 452 if (dptblsize > MAXMIN) { 453 dptblsize = MAXMIN; 454 } 455 newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 456 bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize); 457 kmem_free(devpolltbl, sizeof (caddr_t) * oldsize); 458 devpolltbl = newtbl; 459 devpolltbl[minordev] = (dp_entry_t *)RESERVED; 460 } 461 mutex_exit(&devpoll_lock); 462 463 dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP); 464 /* 465 * allocate a pollcache skeleton here. Delay allocating bitmap 466 * structures until dpwrite() time, since we don't know the 467 * optimal size yet. 468 */ 469 pcp = pcache_alloc(); 470 dpep->dpe_pcache = pcp; 471 pcp->pc_pid = curproc->p_pid; 472 *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ 473 mutex_enter(&devpoll_lock); 474 ASSERT(minordev < dptblsize); 475 ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED); 476 devpolltbl[minordev] = dpep; 477 mutex_exit(&devpoll_lock); 478 return (0); 479 } 480 481 /* 482 * Write to dev/poll add/remove fd's to/from a cached poll fd set, 483 * or change poll events for a watched fd. 484 */ 485 /*ARGSUSED*/ 486 static int 487 dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) 488 { 489 minor_t minor; 490 dp_entry_t *dpep; 491 pollcache_t *pcp; 492 pollfd_t *pollfdp, *pfdp; 493 int error; 494 ssize_t uiosize; 495 nfds_t pollfdnum; 496 struct pollhead *php = NULL; 497 polldat_t *pdp; 498 int fd; 499 file_t *fp; 500 501 minor = getminor(dev); 502 503 mutex_enter(&devpoll_lock); 504 ASSERT(minor < dptblsize); 505 dpep = devpolltbl[minor]; 506 ASSERT(dpep != NULL); 507 mutex_exit(&devpoll_lock); 508 pcp = dpep->dpe_pcache; 509 if (curproc->p_pid != pcp->pc_pid) { 510 return (EACCES); 511 } 512 uiosize = uiop->uio_resid; 513 pollfdnum = uiosize / sizeof (pollfd_t); 514 mutex_enter(&curproc->p_lock); 515 if (pollfdnum > (uint_t)rctl_enforced_value( 516 rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { 517 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 518 curproc->p_rctls, curproc, RCA_SAFE); 519 mutex_exit(&curproc->p_lock); 520 return (set_errno(EINVAL)); 521 } 522 mutex_exit(&curproc->p_lock); 523 /* 524 * Copy in the pollfd array. Walk through the array and add 525 * each polled fd to the cached set. 526 */ 527 pollfdp = kmem_alloc(uiosize, KM_SLEEP); 528 529 /* 530 * Although /dev/poll uses the write(2) interface to cache fds, it's 531 * not supposed to function as a seekable device. To prevent offset 532 * from growing and eventually exceed the maximum, reset the offset 533 * here for every call. 534 */ 535 uiop->uio_loffset = 0; 536 if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop)) 537 != 0) { 538 kmem_free(pollfdp, uiosize); 539 return (error); 540 } 541 /* 542 * We are about to enter the core portion of dpwrite(). Make sure this 543 * write has exclusive access in this portion of the code, i.e., no 544 * other writers in this code and no other readers in dpioctl. 545 */ 546 mutex_enter(&dpep->dpe_lock); 547 dpep->dpe_writerwait++; 548 while (dpep->dpe_refcnt != 0) { 549 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 550 dpep->dpe_writerwait--; 551 mutex_exit(&dpep->dpe_lock); 552 kmem_free(pollfdp, uiosize); 553 return (set_errno(EINTR)); 554 } 555 } 556 dpep->dpe_writerwait--; 557 dpep->dpe_flag |= DP_WRITER_PRESENT; 558 dpep->dpe_refcnt++; 559 mutex_exit(&dpep->dpe_lock); 560 561 mutex_enter(&pcp->pc_lock); 562 if (pcp->pc_bitmap == NULL) { 563 pcache_create(pcp, pollfdnum); 564 } 565 for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) { 566 fd = pfdp->fd; 567 if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) 568 continue; 569 pdp = pcache_lookup_fd(pcp, fd); 570 if (pfdp->events != POLLREMOVE) { 571 if (pdp == NULL) { 572 pdp = pcache_alloc_fd(0); 573 pdp->pd_fd = fd; 574 pdp->pd_pcache = pcp; 575 pcache_insert_fd(pcp, pdp, pollfdnum); 576 } 577 ASSERT(pdp->pd_fd == fd); 578 ASSERT(pdp->pd_pcache == pcp); 579 if (fd >= pcp->pc_mapsize) { 580 mutex_exit(&pcp->pc_lock); 581 pcache_grow_map(pcp, fd); 582 mutex_enter(&pcp->pc_lock); 583 } 584 if (fd > pcp->pc_mapend) { 585 pcp->pc_mapend = fd; 586 } 587 if ((fp = getf(fd)) == NULL) { 588 /* 589 * The fd is not valid. Since we can't pass 590 * this error back in the write() call, set 591 * the bit in bitmap to force DP_POLL ioctl 592 * to examine it. 593 */ 594 BT_SET(pcp->pc_bitmap, fd); 595 pdp->pd_events |= pfdp->events; 596 continue; 597 } 598 /* 599 * Don't do VOP_POLL for an already cached fd with 600 * same poll events. 601 */ 602 if ((pdp->pd_events == pfdp->events) && 603 (pdp->pd_fp != NULL)) { 604 /* 605 * the events are already cached 606 */ 607 releasef(fd); 608 continue; 609 } 610 611 /* 612 * do VOP_POLL and cache this poll fd. 613 */ 614 /* 615 * XXX - pollrelock() logic needs to know which 616 * which pollcache lock to grab. It'd be a 617 * cleaner solution if we could pass pcp as 618 * an arguement in VOP_POLL interface instead 619 * of implicitly passing it using thread_t 620 * struct. On the other hand, changing VOP_POLL 621 * interface will require all driver/file system 622 * poll routine to change. May want to revisit 623 * the tradeoff later. 624 */ 625 curthread->t_pollcache = pcp; 626 error = VOP_POLL(fp->f_vnode, pfdp->events, 0, 627 &pfdp->revents, &php); 628 curthread->t_pollcache = NULL; 629 /* 630 * We always set the bit when this fd is cached. 631 * So we don't have to worry about missing a 632 * pollwakeup between VOP_POLL and pollhead_insert. 633 * This forces the first DP_POLL to poll this fd. 634 * Real performance gain comes from subsequent 635 * DP_POLL. 636 */ 637 BT_SET(pcp->pc_bitmap, fd); 638 if (error != 0) { 639 releasef(fd); 640 break; 641 } 642 pdp->pd_fp = fp; 643 pdp->pd_events |= pfdp->events; 644 if (php != NULL) { 645 if (pdp->pd_php == NULL) { 646 pollhead_insert(php, pdp); 647 pdp->pd_php = php; 648 } else { 649 if (pdp->pd_php != php) { 650 pollhead_delete(pdp->pd_php, 651 pdp); 652 pollhead_insert(php, pdp); 653 pdp->pd_php = php; 654 } 655 } 656 657 } 658 releasef(fd); 659 } else { 660 if (pdp == NULL) { 661 continue; 662 } 663 ASSERT(pdp->pd_fd == fd); 664 pdp->pd_fp = NULL; 665 pdp->pd_events = 0; 666 ASSERT(pdp->pd_thread == NULL); 667 if (pdp->pd_php != NULL) { 668 pollhead_delete(pdp->pd_php, pdp); 669 pdp->pd_php = NULL; 670 } 671 BT_CLEAR(pcp->pc_bitmap, fd); 672 } 673 } 674 mutex_exit(&pcp->pc_lock); 675 mutex_enter(&dpep->dpe_lock); 676 dpep->dpe_flag &= ~DP_WRITER_PRESENT; 677 ASSERT(dpep->dpe_refcnt == 1); 678 dpep->dpe_refcnt--; 679 cv_broadcast(&dpep->dpe_cv); 680 mutex_exit(&dpep->dpe_lock); 681 kmem_free(pollfdp, uiosize); 682 return (error); 683 } 684 685 /*ARGSUSED*/ 686 static int 687 dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 688 { 689 timestruc_t now; 690 timestruc_t rqtime; 691 timestruc_t *rqtp = NULL; 692 int timecheck = 0; 693 minor_t minor; 694 dp_entry_t *dpep; 695 pollcache_t *pcp; 696 int error = 0; 697 STRUCT_DECL(dvpoll, dvpoll); 698 699 if (cmd == DP_POLL) { 700 /* do this now, before we sleep on DP_WRITER_PRESENT below */ 701 timecheck = timechanged; 702 gethrestime(&now); 703 } 704 minor = getminor(dev); 705 mutex_enter(&devpoll_lock); 706 ASSERT(minor < dptblsize); 707 dpep = devpolltbl[minor]; 708 mutex_exit(&devpoll_lock); 709 ASSERT(dpep != NULL); 710 pcp = dpep->dpe_pcache; 711 if (curproc->p_pid != pcp->pc_pid) 712 return (EACCES); 713 714 mutex_enter(&dpep->dpe_lock); 715 while ((dpep->dpe_flag & DP_WRITER_PRESENT) || 716 (dpep->dpe_writerwait != 0)) { 717 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 718 mutex_exit(&dpep->dpe_lock); 719 return (EINTR); 720 } 721 } 722 dpep->dpe_refcnt++; 723 mutex_exit(&dpep->dpe_lock); 724 725 switch (cmd) { 726 case DP_POLL: 727 { 728 pollstate_t *ps; 729 nfds_t nfds; 730 int fdcnt = 0; 731 int time_out; 732 int rval; 733 734 STRUCT_INIT(dvpoll, mode); 735 error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), 736 STRUCT_SIZE(dvpoll)); 737 if (error) { 738 DP_REFRELE(dpep); 739 return (EFAULT); 740 } 741 742 time_out = STRUCT_FGET(dvpoll, dp_timeout); 743 if (time_out > 0) { 744 /* 745 * Determine the future time of the requested timeout. 746 */ 747 rqtp = &rqtime; 748 rqtp->tv_sec = time_out / MILLISEC; 749 rqtp->tv_nsec = (time_out % MILLISEC) * MICROSEC; 750 timespecadd(rqtp, &now); 751 } 752 753 if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { 754 /* 755 * We are just using DP_POLL to sleep, so 756 * we don't any of the devpoll apparatus. 757 * Do not check for signals if we have a zero timeout. 758 */ 759 DP_REFRELE(dpep); 760 if (time_out == 0) 761 return (0); 762 mutex_enter(&curthread->t_delay_lock); 763 while ((rval = cv_waituntil_sig(&curthread->t_delay_cv, 764 &curthread->t_delay_lock, rqtp, timecheck)) > 0) 765 continue; 766 mutex_exit(&curthread->t_delay_lock); 767 return ((rval == 0)? EINTR : 0); 768 } 769 770 /* 771 * XXX It'd be nice not to have to alloc each time. 772 * But it requires another per thread structure hook. 773 * Do it later if there is data suggest that. 774 */ 775 if ((ps = curthread->t_pollstate) == NULL) { 776 curthread->t_pollstate = pollstate_create(); 777 ps = curthread->t_pollstate; 778 } 779 if (ps->ps_dpbufsize < nfds) { 780 struct proc *p = ttoproc(curthread); 781 /* 782 * The maximum size should be no large than 783 * current maximum open file count. 784 */ 785 mutex_enter(&p->p_lock); 786 if (nfds >= p->p_fno_ctl) { 787 mutex_exit(&p->p_lock); 788 DP_REFRELE(dpep); 789 return (EINVAL); 790 } 791 mutex_exit(&p->p_lock); 792 kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) * 793 ps->ps_dpbufsize); 794 ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) * 795 nfds, KM_SLEEP); 796 ps->ps_dpbufsize = nfds; 797 } 798 799 mutex_enter(&pcp->pc_lock); 800 for (;;) { 801 pcp->pc_flag = 0; 802 error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt); 803 if (fdcnt > 0 || error != 0) 804 break; 805 806 /* 807 * A pollwake has happened since we polled cache. 808 */ 809 if (pcp->pc_flag & T_POLLWAKE) 810 continue; 811 812 /* 813 * Sleep until we are notified, signalled, or timed out. 814 * Do not check for signals if we have a zero timeout. 815 */ 816 if (time_out == 0) /* immediate timeout */ 817 break; 818 rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock, 819 rqtp, timecheck); 820 /* 821 * If we were awakened by a signal or timeout 822 * then break the loop, else poll again. 823 */ 824 if (rval <= 0) { 825 if (rval == 0) /* signal */ 826 error = EINTR; 827 break; 828 } 829 } 830 mutex_exit(&pcp->pc_lock); 831 832 if (error == 0 && fdcnt > 0) { 833 if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll, 834 dp_fds), sizeof (pollfd_t) * fdcnt)) { 835 DP_REFRELE(dpep); 836 return (EFAULT); 837 } 838 *rvalp = fdcnt; 839 } 840 break; 841 } 842 843 case DP_ISPOLLED: 844 { 845 pollfd_t pollfd; 846 polldat_t *pdp; 847 848 STRUCT_INIT(dvpoll, mode); 849 error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t)); 850 if (error) { 851 DP_REFRELE(dpep); 852 return (EFAULT); 853 } 854 mutex_enter(&pcp->pc_lock); 855 if (pcp->pc_hash == NULL) { 856 /* 857 * No Need to search because no poll fd 858 * has been cached. 859 */ 860 mutex_exit(&pcp->pc_lock); 861 DP_REFRELE(dpep); 862 return (0); 863 } 864 if (pollfd.fd < 0) { 865 mutex_exit(&pcp->pc_lock); 866 break; 867 } 868 pdp = pcache_lookup_fd(pcp, pollfd.fd); 869 if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) && 870 (pdp->pd_fp != NULL)) { 871 pollfd.revents = pdp->pd_events; 872 if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) { 873 mutex_exit(&pcp->pc_lock); 874 DP_REFRELE(dpep); 875 return (EFAULT); 876 } 877 *rvalp = 1; 878 } 879 mutex_exit(&pcp->pc_lock); 880 break; 881 } 882 883 default: 884 DP_REFRELE(dpep); 885 return (EINVAL); 886 } 887 DP_REFRELE(dpep); 888 return (error); 889 } 890 891 /*ARGSUSED*/ 892 static int 893 dppoll(dev_t dev, short events, int anyyet, short *reventsp, 894 struct pollhead **phpp) 895 { 896 /* 897 * Polling on a /dev/poll fd is not fully supported yet. 898 */ 899 *reventsp = POLLERR; 900 return (0); 901 } 902 903 /* 904 * devpoll close should do enough clean up before the pollcache is deleted, 905 * i.e., it should ensure no one still references the pollcache later. 906 * There is no "permission" check in here. Any process having the last 907 * reference of this /dev/poll fd can close. 908 */ 909 /*ARGSUSED*/ 910 static int 911 dpclose(dev_t dev, int flag, int otyp, cred_t *credp) 912 { 913 minor_t minor; 914 dp_entry_t *dpep; 915 pollcache_t *pcp; 916 int i; 917 polldat_t **hashtbl; 918 polldat_t *pdp; 919 920 minor = getminor(dev); 921 922 mutex_enter(&devpoll_lock); 923 dpep = devpolltbl[minor]; 924 ASSERT(dpep != NULL); 925 devpolltbl[minor] = NULL; 926 mutex_exit(&devpoll_lock); 927 pcp = dpep->dpe_pcache; 928 ASSERT(pcp != NULL); 929 /* 930 * At this point, no other lwp can access this pollcache via the 931 * /dev/poll fd. This pollcache is going away, so do the clean 932 * up without the pc_lock. 933 */ 934 hashtbl = pcp->pc_hash; 935 for (i = 0; i < pcp->pc_hashsize; i++) { 936 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 937 if (pdp->pd_php != NULL) { 938 pollhead_delete(pdp->pd_php, pdp); 939 pdp->pd_php = NULL; 940 pdp->pd_fp = NULL; 941 } 942 } 943 } 944 /* 945 * pollwakeup() may still interact with this pollcache. Wait until 946 * it is done. 947 */ 948 mutex_enter(&pcp->pc_no_exit); 949 ASSERT(pcp->pc_busy >= 0); 950 while (pcp->pc_busy > 0) 951 cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 952 mutex_exit(&pcp->pc_no_exit); 953 pcache_destroy(pcp); 954 ASSERT(dpep->dpe_refcnt == 0); 955 kmem_free(dpep, sizeof (dp_entry_t)); 956 return (0); 957 } 958