1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2012 by Delphix. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/devops.h> 32 #include <sys/conf.h> 33 #include <sys/modctl.h> 34 #include <sys/sunddi.h> 35 #include <sys/stat.h> 36 #include <sys/poll_impl.h> 37 #include <sys/errno.h> 38 #include <sys/kmem.h> 39 #include <sys/mkdev.h> 40 #include <sys/debug.h> 41 #include <sys/file.h> 42 #include <sys/sysmacros.h> 43 #include <sys/systm.h> 44 #include <sys/bitmap.h> 45 #include <sys/devpoll.h> 46 #include <sys/rctl.h> 47 #include <sys/resource.h> 48 49 #define RESERVED 1 50 51 /* local data struct */ 52 static dp_entry_t **devpolltbl; /* dev poll entries */ 53 static size_t dptblsize; 54 55 static kmutex_t devpoll_lock; /* lock protecting dev tbl */ 56 int devpoll_init; /* is /dev/poll initialized already */ 57 58 /* device local functions */ 59 60 static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp); 61 static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp); 62 static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 63 int *rvalp); 64 static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, 65 struct pollhead **phpp); 66 static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp); 67 static dev_info_t *dpdevi; 68 69 70 static struct cb_ops dp_cb_ops = { 71 dpopen, /* open */ 72 dpclose, /* close */ 73 nodev, /* strategy */ 74 nodev, /* print */ 75 nodev, /* dump */ 76 nodev, /* read */ 77 dpwrite, /* write */ 78 dpioctl, /* ioctl */ 79 nodev, /* devmap */ 80 nodev, /* mmap */ 81 nodev, /* segmap */ 82 dppoll, /* poll */ 83 ddi_prop_op, /* prop_op */ 84 (struct streamtab *)0, /* streamtab */ 85 D_MP, /* flags */ 86 CB_REV, /* cb_ops revision */ 87 nodev, /* aread */ 88 nodev /* awrite */ 89 }; 90 91 static int dpattach(dev_info_t *, ddi_attach_cmd_t); 92 static int dpdetach(dev_info_t *, ddi_detach_cmd_t); 93 static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 94 95 static struct dev_ops dp_ops = { 96 DEVO_REV, /* devo_rev */ 97 0, /* refcnt */ 98 dpinfo, /* info */ 99 nulldev, /* identify */ 100 nulldev, /* probe */ 101 dpattach, /* attach */ 102 dpdetach, /* detach */ 103 nodev, /* reset */ 104 &dp_cb_ops, /* driver operations */ 105 (struct bus_ops *)NULL, /* bus operations */ 106 nulldev, /* power */ 107 ddi_quiesce_not_needed, /* quiesce */ 108 }; 109 110 111 static struct modldrv modldrv = { 112 &mod_driverops, /* type of module - a driver */ 113 "/dev/poll driver", 114 &dp_ops, 115 }; 116 117 static struct modlinkage modlinkage = { 118 MODREV_1, 119 (void *)&modldrv, 120 NULL 121 }; 122 123 /* 124 * Locking Design 125 * 126 * The /dev/poll driver shares most of its code with poll sys call whose 127 * code is in common/syscall/poll.c. In poll(2) design, the pollcache 128 * structure is per lwp. An implicit assumption is made there that some 129 * portion of pollcache will never be touched by other lwps. E.g., in 130 * poll(2) design, no lwp will ever need to grow bitmap of other lwp. 131 * This assumption is not true for /dev/poll; hence the need for extra 132 * locking. 133 * 134 * To allow more parallelism, each /dev/poll file descriptor (indexed by 135 * minor number) has its own lock. Since read (dpioctl) is a much more 136 * frequent operation than write, we want to allow multiple reads on same 137 * /dev/poll fd. However, we prevent writes from being starved by giving 138 * priority to write operation. Theoretically writes can starve reads as 139 * well. But in practical sense this is not important because (1) writes 140 * happens less often than reads, and (2) write operation defines the 141 * content of poll fd a cache set. If writes happens so often that they 142 * can starve reads, that means the cached set is very unstable. It may 143 * not make sense to read an unstable cache set anyway. Therefore, the 144 * writers starving readers case is not handled in this design. 145 */ 146 147 int 148 _init() 149 { 150 int error; 151 152 dptblsize = DEVPOLLSIZE; 153 devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 154 mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL); 155 devpoll_init = 1; 156 if ((error = mod_install(&modlinkage)) != 0) { 157 mutex_destroy(&devpoll_lock); 158 kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 159 devpoll_init = 0; 160 } 161 return (error); 162 } 163 164 int 165 _fini() 166 { 167 int error; 168 169 if ((error = mod_remove(&modlinkage)) != 0) { 170 return (error); 171 } 172 mutex_destroy(&devpoll_lock); 173 kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); 174 return (0); 175 } 176 177 int 178 _info(struct modinfo *modinfop) 179 { 180 return (mod_info(&modlinkage, modinfop)); 181 } 182 183 /*ARGSUSED*/ 184 static int 185 dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd) 186 { 187 if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, NULL) 188 == DDI_FAILURE) { 189 ddi_remove_minor_node(devi, NULL); 190 return (DDI_FAILURE); 191 } 192 dpdevi = devi; 193 return (DDI_SUCCESS); 194 } 195 196 static int 197 dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd) 198 { 199 if (cmd != DDI_DETACH) 200 return (DDI_FAILURE); 201 202 ddi_remove_minor_node(devi, NULL); 203 return (DDI_SUCCESS); 204 } 205 206 /* ARGSUSED */ 207 static int 208 dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 209 { 210 int error; 211 212 switch (infocmd) { 213 case DDI_INFO_DEVT2DEVINFO: 214 *result = (void *)dpdevi; 215 error = DDI_SUCCESS; 216 break; 217 case DDI_INFO_DEVT2INSTANCE: 218 *result = (void *)0; 219 error = DDI_SUCCESS; 220 break; 221 default: 222 error = DDI_FAILURE; 223 } 224 return (error); 225 } 226 227 /* 228 * dp_pcache_poll has similar logic to pcache_poll() in poll.c. The major 229 * differences are: (1) /dev/poll requires scanning the bitmap starting at 230 * where it was stopped last time, instead of always starting from 0, 231 * (2) since user may not have cleaned up the cached fds when they are 232 * closed, some polldats in cache may refer to closed or reused fds. We 233 * need to check for those cases. 234 * 235 * NOTE: Upon closing an fd, automatic poll cache cleanup is done for 236 * poll(2) caches but NOT for /dev/poll caches. So expect some 237 * stale entries! 238 */ 239 static int 240 dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) 241 { 242 int start, ostart, end; 243 int fdcnt, fd; 244 boolean_t done; 245 file_t *fp; 246 short revent; 247 boolean_t no_wrap; 248 pollhead_t *php; 249 polldat_t *pdp; 250 int error = 0; 251 252 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 253 if (pcp->pc_bitmap == NULL) { 254 /* 255 * No Need to search because no poll fd 256 * has been cached. 257 */ 258 return (error); 259 } 260 retry: 261 start = ostart = pcp->pc_mapstart; 262 end = pcp->pc_mapend; 263 php = NULL; 264 265 if (start == 0) { 266 /* 267 * started from every begining, no need to wrap around. 268 */ 269 no_wrap = B_TRUE; 270 } else { 271 no_wrap = B_FALSE; 272 } 273 done = B_FALSE; 274 fdcnt = 0; 275 while ((fdcnt < nfds) && !done) { 276 php = NULL; 277 revent = 0; 278 /* 279 * Examine the bit map in a circular fashion 280 * to avoid starvation. Always resume from 281 * last stop. Scan till end of the map. Then 282 * wrap around. 283 */ 284 fd = bt_getlowbit(pcp->pc_bitmap, start, end); 285 ASSERT(fd <= end); 286 if (fd >= 0) { 287 if (fd == end) { 288 if (no_wrap) { 289 done = B_TRUE; 290 } else { 291 start = 0; 292 end = ostart - 1; 293 no_wrap = B_TRUE; 294 } 295 } else { 296 start = fd + 1; 297 } 298 pdp = pcache_lookup_fd(pcp, fd); 299 repoll: 300 ASSERT(pdp != NULL); 301 ASSERT(pdp->pd_fd == fd); 302 if (pdp->pd_fp == NULL) { 303 /* 304 * The fd is POLLREMOVed. This fd is 305 * logically no longer cached. So move 306 * on to the next one. 307 */ 308 continue; 309 } 310 if ((fp = getf(fd)) == NULL) { 311 /* 312 * The fd has been closed, but user has not 313 * done a POLLREMOVE on this fd yet. Instead 314 * of cleaning it here implicitly, we return 315 * POLLNVAL. This is consistent with poll(2) 316 * polling a closed fd. Hope this will remind 317 * user to do a POLLREMOVE. 318 */ 319 pfdp[fdcnt].fd = fd; 320 pfdp[fdcnt].revents = POLLNVAL; 321 fdcnt++; 322 continue; 323 } 324 if (fp != pdp->pd_fp) { 325 /* 326 * user is polling on a cached fd which was 327 * closed and then reused. Unfortunately 328 * there is no good way to inform user. 329 * If the file struct is also reused, we 330 * may not be able to detect the fd reuse 331 * at all. As long as this does not 332 * cause system failure and/or memory leak, 333 * we will play along. Man page states if 334 * user does not clean up closed fds, polling 335 * results will be indeterministic. 336 * 337 * XXX - perhaps log the detection of fd 338 * reuse? 339 */ 340 pdp->pd_fp = fp; 341 } 342 /* 343 * XXX - pollrelock() logic needs to know which 344 * which pollcache lock to grab. It'd be a 345 * cleaner solution if we could pass pcp as 346 * an arguement in VOP_POLL interface instead 347 * of implicitly passing it using thread_t 348 * struct. On the other hand, changing VOP_POLL 349 * interface will require all driver/file system 350 * poll routine to change. May want to revisit 351 * the tradeoff later. 352 */ 353 curthread->t_pollcache = pcp; 354 error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, 355 &revent, &php, NULL); 356 curthread->t_pollcache = NULL; 357 releasef(fd); 358 if (error != 0) { 359 break; 360 } 361 /* 362 * layered devices (e.g. console driver) 363 * may change the vnode and thus the pollhead 364 * pointer out from underneath us. 365 */ 366 if (php != NULL && pdp->pd_php != NULL && 367 php != pdp->pd_php) { 368 pollhead_delete(pdp->pd_php, pdp); 369 pdp->pd_php = php; 370 pollhead_insert(php, pdp); 371 /* 372 * The bit should still be set. 373 */ 374 ASSERT(BT_TEST(pcp->pc_bitmap, fd)); 375 goto retry; 376 } 377 378 if (revent != 0) { 379 pfdp[fdcnt].fd = fd; 380 pfdp[fdcnt].events = pdp->pd_events; 381 pfdp[fdcnt].revents = revent; 382 fdcnt++; 383 } else if (php != NULL) { 384 /* 385 * We clear a bit or cache a poll fd if 386 * the driver returns a poll head ptr, 387 * which is expected in the case of 0 388 * revents. Some buggy driver may return 389 * NULL php pointer with 0 revents. In 390 * this case, we just treat the driver as 391 * "noncachable" and not clearing the bit 392 * in bitmap. 393 */ 394 if ((pdp->pd_php != NULL) && 395 ((pcp->pc_flag & T_POLLWAKE) == 0)) { 396 BT_CLEAR(pcp->pc_bitmap, fd); 397 } 398 if (pdp->pd_php == NULL) { 399 pollhead_insert(php, pdp); 400 pdp->pd_php = php; 401 /* 402 * An event of interest may have 403 * arrived between the VOP_POLL() and 404 * the pollhead_insert(); check again. 405 */ 406 goto repoll; 407 } 408 } 409 } else { 410 /* 411 * No bit set in the range. Check for wrap around. 412 */ 413 if (!no_wrap) { 414 start = 0; 415 end = ostart - 1; 416 no_wrap = B_TRUE; 417 } else { 418 done = B_TRUE; 419 } 420 } 421 } 422 423 if (!done) { 424 pcp->pc_mapstart = start; 425 } 426 ASSERT(*fdcntp == 0); 427 *fdcntp = fdcnt; 428 return (error); 429 } 430 431 /*ARGSUSED*/ 432 static int 433 dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) 434 { 435 minor_t minordev; 436 dp_entry_t *dpep; 437 pollcache_t *pcp; 438 439 ASSERT(devpoll_init); 440 ASSERT(dptblsize <= MAXMIN); 441 mutex_enter(&devpoll_lock); 442 for (minordev = 0; minordev < dptblsize; minordev++) { 443 if (devpolltbl[minordev] == NULL) { 444 devpolltbl[minordev] = (dp_entry_t *)RESERVED; 445 break; 446 } 447 } 448 if (minordev == dptblsize) { 449 dp_entry_t **newtbl; 450 size_t oldsize; 451 452 /* 453 * Used up every entry in the existing devpoll table. 454 * Grow the table by DEVPOLLSIZE. 455 */ 456 if ((oldsize = dptblsize) >= MAXMIN) { 457 mutex_exit(&devpoll_lock); 458 return (ENXIO); 459 } 460 dptblsize += DEVPOLLSIZE; 461 if (dptblsize > MAXMIN) { 462 dptblsize = MAXMIN; 463 } 464 newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP); 465 bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize); 466 kmem_free(devpolltbl, sizeof (caddr_t) * oldsize); 467 devpolltbl = newtbl; 468 devpolltbl[minordev] = (dp_entry_t *)RESERVED; 469 } 470 mutex_exit(&devpoll_lock); 471 472 dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP); 473 /* 474 * allocate a pollcache skeleton here. Delay allocating bitmap 475 * structures until dpwrite() time, since we don't know the 476 * optimal size yet. 477 */ 478 pcp = pcache_alloc(); 479 dpep->dpe_pcache = pcp; 480 pcp->pc_pid = curproc->p_pid; 481 *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ 482 mutex_enter(&devpoll_lock); 483 ASSERT(minordev < dptblsize); 484 ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED); 485 devpolltbl[minordev] = dpep; 486 mutex_exit(&devpoll_lock); 487 return (0); 488 } 489 490 /* 491 * Write to dev/poll add/remove fd's to/from a cached poll fd set, 492 * or change poll events for a watched fd. 493 */ 494 /*ARGSUSED*/ 495 static int 496 dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) 497 { 498 minor_t minor; 499 dp_entry_t *dpep; 500 pollcache_t *pcp; 501 pollfd_t *pollfdp, *pfdp; 502 int error; 503 ssize_t uiosize; 504 nfds_t pollfdnum; 505 struct pollhead *php = NULL; 506 polldat_t *pdp; 507 int fd; 508 file_t *fp; 509 510 minor = getminor(dev); 511 512 mutex_enter(&devpoll_lock); 513 ASSERT(minor < dptblsize); 514 dpep = devpolltbl[minor]; 515 ASSERT(dpep != NULL); 516 mutex_exit(&devpoll_lock); 517 pcp = dpep->dpe_pcache; 518 if (curproc->p_pid != pcp->pc_pid) { 519 return (EACCES); 520 } 521 uiosize = uiop->uio_resid; 522 pollfdnum = uiosize / sizeof (pollfd_t); 523 mutex_enter(&curproc->p_lock); 524 if (pollfdnum > (uint_t)rctl_enforced_value( 525 rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { 526 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 527 curproc->p_rctls, curproc, RCA_SAFE); 528 mutex_exit(&curproc->p_lock); 529 return (set_errno(EINVAL)); 530 } 531 mutex_exit(&curproc->p_lock); 532 /* 533 * Copy in the pollfd array. Walk through the array and add 534 * each polled fd to the cached set. 535 */ 536 pollfdp = kmem_alloc(uiosize, KM_SLEEP); 537 538 /* 539 * Although /dev/poll uses the write(2) interface to cache fds, it's 540 * not supposed to function as a seekable device. To prevent offset 541 * from growing and eventually exceed the maximum, reset the offset 542 * here for every call. 543 */ 544 uiop->uio_loffset = 0; 545 if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop)) 546 != 0) { 547 kmem_free(pollfdp, uiosize); 548 return (error); 549 } 550 /* 551 * We are about to enter the core portion of dpwrite(). Make sure this 552 * write has exclusive access in this portion of the code, i.e., no 553 * other writers in this code and no other readers in dpioctl. 554 */ 555 mutex_enter(&dpep->dpe_lock); 556 dpep->dpe_writerwait++; 557 while (dpep->dpe_refcnt != 0) { 558 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 559 dpep->dpe_writerwait--; 560 mutex_exit(&dpep->dpe_lock); 561 kmem_free(pollfdp, uiosize); 562 return (set_errno(EINTR)); 563 } 564 } 565 dpep->dpe_writerwait--; 566 dpep->dpe_flag |= DP_WRITER_PRESENT; 567 dpep->dpe_refcnt++; 568 mutex_exit(&dpep->dpe_lock); 569 570 mutex_enter(&pcp->pc_lock); 571 if (pcp->pc_bitmap == NULL) { 572 pcache_create(pcp, pollfdnum); 573 } 574 for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) { 575 fd = pfdp->fd; 576 if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) 577 continue; 578 pdp = pcache_lookup_fd(pcp, fd); 579 if (pfdp->events != POLLREMOVE) { 580 if (pdp == NULL) { 581 pdp = pcache_alloc_fd(0); 582 pdp->pd_fd = fd; 583 pdp->pd_pcache = pcp; 584 pcache_insert_fd(pcp, pdp, pollfdnum); 585 } 586 ASSERT(pdp->pd_fd == fd); 587 ASSERT(pdp->pd_pcache == pcp); 588 if (fd >= pcp->pc_mapsize) { 589 mutex_exit(&pcp->pc_lock); 590 pcache_grow_map(pcp, fd); 591 mutex_enter(&pcp->pc_lock); 592 } 593 if (fd > pcp->pc_mapend) { 594 pcp->pc_mapend = fd; 595 } 596 if ((fp = getf(fd)) == NULL) { 597 /* 598 * The fd is not valid. Since we can't pass 599 * this error back in the write() call, set 600 * the bit in bitmap to force DP_POLL ioctl 601 * to examine it. 602 */ 603 BT_SET(pcp->pc_bitmap, fd); 604 pdp->pd_events |= pfdp->events; 605 continue; 606 } 607 /* 608 * Don't do VOP_POLL for an already cached fd with 609 * same poll events. 610 */ 611 if ((pdp->pd_events == pfdp->events) && 612 (pdp->pd_fp != NULL)) { 613 /* 614 * the events are already cached 615 */ 616 releasef(fd); 617 continue; 618 } 619 620 /* 621 * do VOP_POLL and cache this poll fd. 622 */ 623 /* 624 * XXX - pollrelock() logic needs to know which 625 * which pollcache lock to grab. It'd be a 626 * cleaner solution if we could pass pcp as 627 * an arguement in VOP_POLL interface instead 628 * of implicitly passing it using thread_t 629 * struct. On the other hand, changing VOP_POLL 630 * interface will require all driver/file system 631 * poll routine to change. May want to revisit 632 * the tradeoff later. 633 */ 634 curthread->t_pollcache = pcp; 635 error = VOP_POLL(fp->f_vnode, pfdp->events, 0, 636 &pfdp->revents, &php, NULL); 637 curthread->t_pollcache = NULL; 638 /* 639 * We always set the bit when this fd is cached; 640 * this forces the first DP_POLL to poll this fd. 641 * Real performance gain comes from subsequent 642 * DP_POLL. We also attempt a pollhead_insert(); 643 * if it's not possible, we'll do it in dpioctl(). 644 */ 645 BT_SET(pcp->pc_bitmap, fd); 646 if (error != 0) { 647 releasef(fd); 648 break; 649 } 650 pdp->pd_fp = fp; 651 pdp->pd_events |= pfdp->events; 652 if (php != NULL) { 653 if (pdp->pd_php == NULL) { 654 pollhead_insert(php, pdp); 655 pdp->pd_php = php; 656 } else { 657 if (pdp->pd_php != php) { 658 pollhead_delete(pdp->pd_php, 659 pdp); 660 pollhead_insert(php, pdp); 661 pdp->pd_php = php; 662 } 663 } 664 665 } 666 releasef(fd); 667 } else { 668 if (pdp == NULL) { 669 continue; 670 } 671 ASSERT(pdp->pd_fd == fd); 672 pdp->pd_fp = NULL; 673 pdp->pd_events = 0; 674 ASSERT(pdp->pd_thread == NULL); 675 if (pdp->pd_php != NULL) { 676 pollhead_delete(pdp->pd_php, pdp); 677 pdp->pd_php = NULL; 678 } 679 BT_CLEAR(pcp->pc_bitmap, fd); 680 } 681 } 682 mutex_exit(&pcp->pc_lock); 683 mutex_enter(&dpep->dpe_lock); 684 dpep->dpe_flag &= ~DP_WRITER_PRESENT; 685 ASSERT(dpep->dpe_refcnt == 1); 686 dpep->dpe_refcnt--; 687 cv_broadcast(&dpep->dpe_cv); 688 mutex_exit(&dpep->dpe_lock); 689 kmem_free(pollfdp, uiosize); 690 return (error); 691 } 692 693 /*ARGSUSED*/ 694 static int 695 dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 696 { 697 minor_t minor; 698 dp_entry_t *dpep; 699 pollcache_t *pcp; 700 hrtime_t now; 701 int error = 0; 702 STRUCT_DECL(dvpoll, dvpoll); 703 704 if (cmd == DP_POLL) { 705 /* do this now, before we sleep on DP_WRITER_PRESENT */ 706 now = gethrtime(); 707 } 708 709 minor = getminor(dev); 710 mutex_enter(&devpoll_lock); 711 ASSERT(minor < dptblsize); 712 dpep = devpolltbl[minor]; 713 mutex_exit(&devpoll_lock); 714 ASSERT(dpep != NULL); 715 pcp = dpep->dpe_pcache; 716 if (curproc->p_pid != pcp->pc_pid) 717 return (EACCES); 718 719 mutex_enter(&dpep->dpe_lock); 720 while ((dpep->dpe_flag & DP_WRITER_PRESENT) || 721 (dpep->dpe_writerwait != 0)) { 722 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { 723 mutex_exit(&dpep->dpe_lock); 724 return (EINTR); 725 } 726 } 727 dpep->dpe_refcnt++; 728 mutex_exit(&dpep->dpe_lock); 729 730 switch (cmd) { 731 case DP_POLL: 732 { 733 pollstate_t *ps; 734 nfds_t nfds; 735 int fdcnt = 0; 736 hrtime_t deadline = 0; 737 738 STRUCT_INIT(dvpoll, mode); 739 error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), 740 STRUCT_SIZE(dvpoll)); 741 if (error) { 742 DP_REFRELE(dpep); 743 return (EFAULT); 744 } 745 746 deadline = STRUCT_FGET(dvpoll, dp_timeout); 747 if (deadline > 0) { 748 /* 749 * Convert the deadline from relative milliseconds 750 * to absolute nanoseconds. They must wait for at 751 * least a tick. 752 */ 753 deadline = deadline * NANOSEC / MILLISEC; 754 deadline = MAX(deadline, nsec_per_tick); 755 deadline += now; 756 } 757 758 if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { 759 /* 760 * We are just using DP_POLL to sleep, so 761 * we don't any of the devpoll apparatus. 762 * Do not check for signals if we have a zero timeout. 763 */ 764 DP_REFRELE(dpep); 765 if (deadline == 0) 766 return (0); 767 mutex_enter(&curthread->t_delay_lock); 768 while ((error = 769 cv_timedwait_sig_hrtime(&curthread->t_delay_cv, 770 &curthread->t_delay_lock, deadline)) > 0) 771 continue; 772 mutex_exit(&curthread->t_delay_lock); 773 return (error == 0 ? EINTR : 0); 774 } 775 776 /* 777 * XXX It would be nice not to have to alloc each time, but it 778 * requires another per thread structure hook. This can be 779 * implemented later if data suggests that it's necessary. 780 */ 781 if ((ps = curthread->t_pollstate) == NULL) { 782 curthread->t_pollstate = pollstate_create(); 783 ps = curthread->t_pollstate; 784 } 785 if (ps->ps_dpbufsize < nfds) { 786 struct proc *p = ttoproc(curthread); 787 /* 788 * The maximum size should be no large than 789 * current maximum open file count. 790 */ 791 mutex_enter(&p->p_lock); 792 if (nfds > p->p_fno_ctl) { 793 mutex_exit(&p->p_lock); 794 DP_REFRELE(dpep); 795 return (EINVAL); 796 } 797 mutex_exit(&p->p_lock); 798 kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) * 799 ps->ps_dpbufsize); 800 ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) * 801 nfds, KM_SLEEP); 802 ps->ps_dpbufsize = nfds; 803 } 804 805 mutex_enter(&pcp->pc_lock); 806 for (;;) { 807 pcp->pc_flag = 0; 808 error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt); 809 if (fdcnt > 0 || error != 0) 810 break; 811 812 /* 813 * A pollwake has happened since we polled cache. 814 */ 815 if (pcp->pc_flag & T_POLLWAKE) 816 continue; 817 818 /* 819 * Sleep until we are notified, signaled, or timed out. 820 */ 821 if (deadline == 0) { 822 /* immediate timeout; do not check signals */ 823 break; 824 } 825 error = cv_timedwait_sig_hrtime(&pcp->pc_cv, 826 &pcp->pc_lock, deadline); 827 /* 828 * If we were awakened by a signal or timeout 829 * then break the loop, else poll again. 830 */ 831 if (error <= 0) { 832 error = (error == 0) ? EINTR : 0; 833 break; 834 } else { 835 error = 0; 836 } 837 } 838 mutex_exit(&pcp->pc_lock); 839 840 if (error == 0 && fdcnt > 0) { 841 if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll, 842 dp_fds), sizeof (pollfd_t) * fdcnt)) { 843 DP_REFRELE(dpep); 844 return (EFAULT); 845 } 846 *rvalp = fdcnt; 847 } 848 break; 849 } 850 851 case DP_ISPOLLED: 852 { 853 pollfd_t pollfd; 854 polldat_t *pdp; 855 856 STRUCT_INIT(dvpoll, mode); 857 error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t)); 858 if (error) { 859 DP_REFRELE(dpep); 860 return (EFAULT); 861 } 862 mutex_enter(&pcp->pc_lock); 863 if (pcp->pc_hash == NULL) { 864 /* 865 * No Need to search because no poll fd 866 * has been cached. 867 */ 868 mutex_exit(&pcp->pc_lock); 869 DP_REFRELE(dpep); 870 return (0); 871 } 872 if (pollfd.fd < 0) { 873 mutex_exit(&pcp->pc_lock); 874 break; 875 } 876 pdp = pcache_lookup_fd(pcp, pollfd.fd); 877 if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) && 878 (pdp->pd_fp != NULL)) { 879 pollfd.revents = pdp->pd_events; 880 if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) { 881 mutex_exit(&pcp->pc_lock); 882 DP_REFRELE(dpep); 883 return (EFAULT); 884 } 885 *rvalp = 1; 886 } 887 mutex_exit(&pcp->pc_lock); 888 break; 889 } 890 891 default: 892 DP_REFRELE(dpep); 893 return (EINVAL); 894 } 895 DP_REFRELE(dpep); 896 return (error); 897 } 898 899 /*ARGSUSED*/ 900 static int 901 dppoll(dev_t dev, short events, int anyyet, short *reventsp, 902 struct pollhead **phpp) 903 { 904 /* 905 * Polling on a /dev/poll fd is not fully supported yet. 906 */ 907 *reventsp = POLLERR; 908 return (0); 909 } 910 911 /* 912 * devpoll close should do enough clean up before the pollcache is deleted, 913 * i.e., it should ensure no one still references the pollcache later. 914 * There is no "permission" check in here. Any process having the last 915 * reference of this /dev/poll fd can close. 916 */ 917 /*ARGSUSED*/ 918 static int 919 dpclose(dev_t dev, int flag, int otyp, cred_t *credp) 920 { 921 minor_t minor; 922 dp_entry_t *dpep; 923 pollcache_t *pcp; 924 int i; 925 polldat_t **hashtbl; 926 polldat_t *pdp; 927 928 minor = getminor(dev); 929 930 mutex_enter(&devpoll_lock); 931 dpep = devpolltbl[minor]; 932 ASSERT(dpep != NULL); 933 devpolltbl[minor] = NULL; 934 mutex_exit(&devpoll_lock); 935 pcp = dpep->dpe_pcache; 936 ASSERT(pcp != NULL); 937 /* 938 * At this point, no other lwp can access this pollcache via the 939 * /dev/poll fd. This pollcache is going away, so do the clean 940 * up without the pc_lock. 941 */ 942 hashtbl = pcp->pc_hash; 943 for (i = 0; i < pcp->pc_hashsize; i++) { 944 for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) { 945 if (pdp->pd_php != NULL) { 946 pollhead_delete(pdp->pd_php, pdp); 947 pdp->pd_php = NULL; 948 pdp->pd_fp = NULL; 949 } 950 } 951 } 952 /* 953 * pollwakeup() may still interact with this pollcache. Wait until 954 * it is done. 955 */ 956 mutex_enter(&pcp->pc_no_exit); 957 ASSERT(pcp->pc_busy >= 0); 958 while (pcp->pc_busy > 0) 959 cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); 960 mutex_exit(&pcp->pc_no_exit); 961 pcache_destroy(pcp); 962 ASSERT(dpep->dpe_refcnt == 0); 963 kmem_free(dpep, sizeof (dp_entry_t)); 964 return (0); 965 } 966