1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stat.h> 32 #include <sys/errno.h> 33 #include <sys/kmem.h> 34 #include <sys/sysmacros.h> 35 #include <sys/debug.h> 36 #include <sys/poll_impl.h> 37 #include <sys/port_impl.h> 38 39 #define PORTHASH_START 256 /* start cache space for events */ 40 #define PORTHASH_MULT 2 /* growth threshold and factor */ 41 42 /* local functions */ 43 static int port_fd_callback(void *, int *, pid_t, int, void *); 44 static int port_bind_pollhead(pollhead_t **, polldat_t *, short *); 45 static void port_remove_fd_local(portfd_t *, port_fdcache_t *); 46 static void port_close_sourcefd(void *, int, pid_t, int); 47 static void port_cache_insert_fd(port_fdcache_t *, polldat_t *); 48 49 /* 50 * port_fd_callback() 51 * The event port framework uses callback functions to notify associated 52 * event sources about actions on source specific objects. 53 * The source itself defines the "arg" required to identify the object with 54 * events. In the port_fd_callback() case the "arg" is a pointer to portfd_t 55 * structure. The portfd_t structure is specific for PORT_SOURCE_FD source. 56 * The port_fd_callback() function is notified in three cases: 57 * - PORT_CALLBACK_DEFAULT 58 * The object (fd) will be delivered to the application. 59 * - PORT_CALLBACK_DISSOCIATE 60 * The object (fd) will be dissociated from the port. 61 * - PORT_CALLBACK_CLOSE 62 * The object (fd) will be dissociated from the port because the port 63 * is being closed. 64 * A fd is shareable between processes only when 65 * - processes have the same fd id and 66 * - processes have the same fp. 67 * A fd becomes shareable: 68 * - on fork() across parent and child process and 69 * - when I_SENDFD is used to pass file descriptors between parent and child 70 * immediately after fork() (the sender and receiver must get the same 71 * file descriptor id). 72 * If a fd is shared between processes, all involved processes will get 73 * the same rights related to re-association of the fd with the port and 74 * retrieve of events from that fd. 75 * The process which associated the fd with a port for the first time 76 * becomes also the owner of the association. Only the owner of the 77 * association is allowed to dissociate the fd from the port. 78 */ 79 /* ARGSUSED */ 80 static int 81 port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 82 { 83 portfd_t *pfd = (portfd_t *)arg; 84 polldat_t *pdp = PFTOD(pfd); 85 port_fdcache_t *pcp; 86 file_t *fp; 87 int error; 88 89 ASSERT((pdp != NULL) && (events != NULL)); 90 switch (flag) { 91 case PORT_CALLBACK_DEFAULT: 92 if (curproc->p_pid != pid) { 93 /* 94 * Check if current process is allowed to retrieve 95 * events from this fd. 96 */ 97 fp = getf(pdp->pd_fd); 98 if (fp == NULL) { 99 error = EACCES; /* deny delivery of events */ 100 break; 101 } 102 releasef(pdp->pd_fd); 103 if (fp != pdp->pd_fp) { 104 error = EACCES; /* deny delivery of events */ 105 break; 106 } 107 } 108 *events = pdp->pd_portev->portkev_events; /* update events */ 109 error = 0; 110 break; 111 case PORT_CALLBACK_DISSOCIATE: 112 error = 0; 113 break; 114 case PORT_CALLBACK_CLOSE: 115 /* remove polldat/portfd struct */ 116 pdp->pd_portev = NULL; 117 pcp = (port_fdcache_t *)pdp->pd_pcache; 118 mutex_enter(&pcp->pc_lock); 119 pdp->pd_fp = NULL; 120 pdp->pd_events = 0; 121 if (pdp->pd_php != NULL) { 122 pollhead_delete(pdp->pd_php, pdp); 123 pdp->pd_php = NULL; 124 } 125 port_pcache_remove_fd(pcp, pfd); 126 mutex_exit(&pcp->pc_lock); 127 error = 0; 128 break; 129 default: 130 error = EINVAL; 131 break; 132 } 133 return (error); 134 } 135 136 /* 137 * This routine returns a pointer to a cached poll fd entry, or NULL if it 138 * does not find it in the hash table. 139 * The fd is used as index. 140 * The fd and the fp are used to detect a valid entry. 141 * This function returns a pointer to a valid portfd_t structure only when 142 * the fd and the fp in the args match the entries in polldat_t. 143 */ 144 portfd_t * 145 port_cache_lookup_fp(port_fdcache_t *pcp, int fd, file_t *fp) 146 { 147 polldat_t *pdp; 148 portfd_t **bucket; 149 150 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 151 bucket = PORT_FD_BUCKET(pcp, fd); 152 pdp = PFTOD(*bucket); 153 while (pdp != NULL) { 154 if (pdp->pd_fd == fd && pdp->pd_fp == fp) 155 break; 156 pdp = pdp->pd_hashnext; 157 } 158 return (PDTOF(pdp)); 159 } 160 161 /* 162 * port_associate_fd() 163 * This function associates new file descriptors with a port or 164 * reactivate already associated file descriptors. 165 * The reactivation also updates the events types to be checked and the 166 * attached user pointer. 167 * Per port a cache is used to store associated file descriptors. 168 * Internally the VOP_POLL interface is used to poll for existing events. 169 * The VOP_POLL interface can also deliver a pointer to a pollhead_t structure 170 * which is used to enqueue polldat_t structures with pending events. 171 * If VOP_POLL immediately returns valid events (revents) then those events 172 * will be submitted to the event port with port_send_event(). 173 * Otherwise VOP_POLL does not return events but it delivers a pointer to a 174 * pollhead_t structure. In such a case the corresponding file system behind 175 * VOP_POLL will use the pollwakeup() function to notify about exisiting 176 * events. 177 */ 178 int 179 port_associate_fd(port_t *pp, int source, uintptr_t object, int events, 180 void *user) 181 { 182 port_fdcache_t *pcp; 183 int fd; 184 struct pollhead *php = NULL; 185 portfd_t *pfd; 186 polldat_t *pdp; 187 file_t *fp; 188 port_kevent_t *pkevp; 189 short revents; 190 int error = 0; 191 192 pcp = pp->port_queue.portq_pcp; 193 if (object > (uintptr_t)INT_MAX) 194 return (EBADFD); 195 196 fd = object; 197 198 if ((fp = getf(fd)) == NULL) 199 return (EBADFD); 200 201 mutex_enter(&pcp->pc_lock); 202 if (pcp->pc_hash == NULL) { 203 /* 204 * This is the first time that a fd is being associated with 205 * the current port: 206 * - create PORT_SOURCE_FD cache 207 * - associate PORT_SOURCE_FD source with the port 208 */ 209 error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD, 210 NULL, port_close_sourcefd, pp, NULL); 211 if (error) { 212 mutex_exit(&pcp->pc_lock); 213 releasef(fd); 214 return (error); 215 } 216 217 /* create polldat cache */ 218 pcp->pc_hashsize = PORTHASH_START; 219 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * 220 sizeof (portfd_t *), KM_SLEEP); 221 pfd = NULL; 222 } else { 223 /* Check if the fd/fp is already associated with the port */ 224 pfd = port_cache_lookup_fp(pcp, fd, fp); 225 } 226 227 if (pfd == NULL) { 228 /* 229 * new entry 230 * Allocate a polldat_t structure per fd 231 * The use of the polldat_t structure to cache file descriptors 232 * is required to be able to share the pollwakeup() function 233 * with poll(2) and devpoll(7d). 234 */ 235 pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP); 236 pdp = PFTOD(pfd); 237 pdp->pd_fd = fd; 238 pdp->pd_fp = fp; 239 pdp->pd_pcache = (void *)pcp; 240 241 /* Allocate a port event structure per fd */ 242 error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED, 243 &pdp->pd_portev); 244 if (error) { 245 kmem_free(pfd, sizeof (portfd_t)); 246 releasef(fd); 247 mutex_exit(&pcp->pc_lock); 248 return (error); 249 } 250 pkevp = pdp->pd_portev; 251 pkevp->portkev_callback = port_fd_callback; 252 pkevp->portkev_arg = pfd; 253 254 /* add portfd_t entry to the cache */ 255 port_cache_insert_fd(pcp, pdp); 256 pkevp->portkev_object = fd; 257 pkevp->portkev_user = user; 258 259 /* 260 * Add current port to the file descriptor interested list 261 * The members of the list are notified when the file descriptor 262 * is closed. 263 */ 264 addfd_port(fd, pfd); 265 } else { 266 /* 267 * The file descriptor is already associated with the port 268 */ 269 pdp = PFTOD(pfd); 270 pkevp = pdp->pd_portev; 271 272 /* 273 * Check if the re-association happens before the last 274 * submitted event of the file descriptor was retrieved. 275 * Clear the PORT_KEV_VALID flag if set. No new events 276 * should get submitted after this flag is cleared. 277 */ 278 mutex_enter(&pkevp->portkev_lock); 279 if (pkevp->portkev_flags & PORT_KEV_VALID) { 280 pkevp->portkev_flags &= ~PORT_KEV_VALID; 281 } 282 if (pkevp->portkev_flags & PORT_KEV_DONEQ) { 283 mutex_exit(&pkevp->portkev_lock); 284 /* 285 * Remove any events that where already fired 286 * for this fd and are still in the port queue. 287 */ 288 port_remove_done_event(pkevp); 289 } else { 290 mutex_exit(&pkevp->portkev_lock); 291 } 292 pkevp->portkev_user = user; 293 } 294 295 mutex_enter(&pkevp->portkev_lock); 296 pkevp->portkev_events = 0; /* no fired events */ 297 pdp->pd_events = events; /* events associated */ 298 /* 299 * allow new events. 300 */ 301 pkevp->portkev_flags |= PORT_KEV_VALID; 302 mutex_exit(&pkevp->portkev_lock); 303 304 /* 305 * do VOP_POLL and cache this poll fd. 306 * 307 * XXX - pollrelock() logic needs to know 308 * which pollcache lock to grab. It'd be a 309 * cleaner solution if we could pass pcp as 310 * an arguement in VOP_POLL interface instead 311 * of implicitly passing it using thread_t 312 * struct. On the other hand, changing VOP_POLL 313 * interface will require all driver/file system 314 * poll routine to change. 315 */ 316 curthread->t_pollcache = (pollcache_t *)pcp; 317 error = VOP_POLL(fp->f_vnode, events, 0, &revents, &php); 318 curthread->t_pollcache = NULL; 319 320 /* 321 * To keep synchronization between VOP_POLL above and 322 * pollhead_insert below, it is necessary to 323 * call VOP_POLL() again (see port_bind_pollhead()). 324 */ 325 if (error) { 326 /* dissociate the fd from the port */ 327 delfd_port(fd, pfd); 328 port_remove_fd_local(pfd, pcp); 329 releasef(fd); 330 mutex_exit(&pcp->pc_lock); 331 return (error); 332 } 333 334 if (php != NULL) { 335 /* 336 * No events delivered yet. 337 * Bind pollhead pointer with current polldat_t structure. 338 * Sub-system will call pollwakeup() later with php as 339 * argument. 340 */ 341 error = port_bind_pollhead(&php, pdp, &revents); 342 if (error) { 343 delfd_port(fd, pfd); 344 port_remove_fd_local(pfd, pcp); 345 releasef(fd); 346 mutex_exit(&pcp->pc_lock); 347 return (error); 348 } 349 } 350 351 /* 352 * Check if new events where detected and no events have been 353 * delivered. The revents was already set after the VOP_POLL 354 * above or it was updated in port_bind_pollhead(). 355 */ 356 mutex_enter(&pkevp->portkev_lock); 357 if (revents && (pkevp->portkev_flags & PORT_KEV_VALID)) { 358 ASSERT((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0); 359 pkevp->portkev_flags &= ~PORT_KEV_VALID; 360 revents = revents & (pdp->pd_events | POLLHUP | POLLERR); 361 /* send events to the event port */ 362 pkevp->portkev_events = revents; 363 /* 364 * port_send_event will release the portkev_lock mutex. 365 */ 366 port_send_event(pkevp); 367 } else { 368 mutex_exit(&pkevp->portkev_lock); 369 } 370 371 releasef(fd); 372 mutex_exit(&pcp->pc_lock); 373 return (error); 374 } 375 376 /* 377 * The port_dissociate_fd() function dissociates the delivered file 378 * descriptor from the event port and removes already fired events. 379 * If a fd is shared between processes, all involved processes will get 380 * the same rights related to re-association of the fd with the port and 381 * retrieve of events from that fd. 382 * The process which associated the fd with a port for the first time 383 * becomes also the owner of the association. Only the owner of the 384 * association is allowed to dissociate the fd from the port. 385 */ 386 int 387 port_dissociate_fd(port_t *pp, uintptr_t object) 388 { 389 int fd; 390 port_fdcache_t *pcp; 391 portfd_t *pfd; 392 file_t *fp; 393 394 if (object > (uintptr_t)INT_MAX) 395 return (EBADFD); 396 397 fd = object; 398 pcp = pp->port_queue.portq_pcp; 399 400 mutex_enter(&pcp->pc_lock); 401 if (pcp->pc_hash == NULL) { 402 /* no file descriptor cache available */ 403 mutex_exit(&pcp->pc_lock); 404 return (0); 405 } 406 if ((fp = getf(fd)) == NULL) { 407 mutex_exit(&pcp->pc_lock); 408 return (EBADFD); 409 } 410 pfd = port_cache_lookup_fp(pcp, fd, fp); 411 if (pfd == NULL) { 412 releasef(fd); 413 mutex_exit(&pcp->pc_lock); 414 return (0); 415 } 416 /* only association owner is allowed to remove the association */ 417 if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) { 418 releasef(fd); 419 mutex_exit(&pcp->pc_lock); 420 return (EACCES); 421 } 422 423 /* remove port from the file descriptor interested list */ 424 delfd_port(fd, pfd); 425 releasef(fd); 426 427 /* remove polldat & port event structure */ 428 port_remove_fd_object(pfd, pp, pcp); 429 mutex_exit(&pcp->pc_lock); 430 return (0); 431 } 432 433 /* 434 * Remove the fd from the event port cache. 435 */ 436 static void 437 port_remove_fd_local(portfd_t *pfd, port_fdcache_t *pcp) 438 { 439 polldat_t *pdp = PFTOD(pfd); 440 441 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 442 pdp->pd_fp = NULL; 443 if (pdp->pd_php != NULL) { 444 pollhead_delete(pdp->pd_php, pdp); 445 pdp->pd_php = NULL; 446 } 447 port_free_event_local(pdp->pd_portev, 0); 448 /* remove polldat struct */ 449 port_pcache_remove_fd(pcp, pfd); 450 } 451 452 /* 453 * Associate event port polldat_t structure with sub-system pointer to 454 * a polhead_t structure. 455 */ 456 static int 457 port_bind_pollhead(pollhead_t **php, polldat_t *pdp, short *revents) 458 { 459 int error; 460 file_t *fp; 461 462 /* 463 * During re-association of a fd with a port the pd_php pointer 464 * is still the same as at the first association time. 465 */ 466 if (pdp->pd_php == *php) 467 return (0); /* already associated */ 468 469 /* polldat_t associated with another pollhead_t pointer */ 470 if (pdp->pd_php != NULL) 471 pollhead_delete(pdp->pd_php, pdp); 472 473 /* 474 * Before pollhead_insert() pollwakeup() will not detect a polldat 475 * entry in the ph_list and the event notification will disappear. 476 * This happens because polldat_t is still not associated with 477 * the pointer to the pollhead_t structure. 478 */ 479 pollhead_insert(*php, pdp); 480 481 /* 482 * From now on event notification can be detected in pollwakeup(), 483 * Use VOP_POLL() again to check the current status of the event. 484 */ 485 pdp->pd_php = *php; 486 fp = pdp->pd_fp; 487 curthread->t_pollcache = (pollcache_t *)pdp->pd_pcache; 488 error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, revents, php); 489 curthread->t_pollcache = NULL; 490 return (error); 491 } 492 493 /* 494 * Grow the hash table. Rehash all the elements on the hash table. 495 */ 496 static void 497 port_cache_grow_hashtbl(port_fdcache_t *pcp) 498 { 499 portfd_t **oldtbl; 500 polldat_t *pdp; 501 portfd_t *pfd; 502 polldat_t *pdp1; 503 int oldsize; 504 int i; 505 506 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 507 oldsize = pcp->pc_hashsize; 508 oldtbl = pcp->pc_hash; 509 pcp->pc_hashsize *= PORTHASH_MULT; 510 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *), 511 KM_SLEEP); 512 /* 513 * rehash existing elements 514 */ 515 pcp->pc_fdcount = 0; 516 for (i = 0; i < oldsize; i++) { 517 pfd = oldtbl[i]; 518 pdp = PFTOD(pfd); 519 while (pdp != NULL) { 520 pdp1 = pdp->pd_hashnext; 521 port_cache_insert_fd(pcp, pdp); 522 pdp = pdp1; 523 } 524 } 525 kmem_free(oldtbl, oldsize * sizeof (portfd_t *)); 526 } 527 /* 528 * This routine inserts a polldat into the portcache's hash table. It 529 * may be necessary to grow the size of the hash table. 530 */ 531 static void 532 port_cache_insert_fd(port_fdcache_t *pcp, polldat_t *pdp) 533 { 534 portfd_t **bucket; 535 536 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 537 if (pcp->pc_fdcount > (pcp->pc_hashsize * PORTHASH_MULT)) 538 port_cache_grow_hashtbl(pcp); 539 bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd); 540 pdp->pd_hashnext = PFTOD(*bucket); 541 *bucket = PDTOF(pdp); 542 pcp->pc_fdcount++; 543 } 544 545 546 /* 547 * The port_remove_portfd() function dissociates the port from the fd 548 * and vive versa. 549 */ 550 static void 551 port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp) 552 { 553 port_t *pp; 554 file_t *fp; 555 556 ASSERT(MUTEX_HELD(&pcp->pc_lock)); 557 pp = pdp->pd_portev->portkev_port; 558 fp = getf(pdp->pd_fd); 559 /* 560 * If we did not get the fp for pd_fd but its portfd_t 561 * still exist in the cache, it means the pd_fd is being 562 * closed by some other thread which will also free the portfd_t. 563 */ 564 if (fp != NULL) { 565 delfd_port(pdp->pd_fd, PDTOF(pdp)); 566 releasef(pdp->pd_fd); 567 port_remove_fd_object(PDTOF(pdp), pp, pcp); 568 } 569 } 570 571 /* 572 * This function is used by port_close_sourcefd() to destroy the cache 573 * on last close. 574 */ 575 static void 576 port_pcache_destroy(port_fdcache_t *pcp) 577 { 578 ASSERT(pcp->pc_fdcount == 0); 579 kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize); 580 mutex_destroy(&pcp->pc_lock); 581 kmem_free(pcp, sizeof (port_fdcache_t)); 582 } 583 584 /* 585 * port_close() calls this function to request the PORT_SOURCE_FD source 586 * to remove/free all resources allocated and associated with the port. 587 */ 588 /* ARGSUSED */ 589 static void 590 port_close_sourcefd(void *arg, int port, pid_t pid, int lastclose) 591 { 592 port_t *pp = arg; 593 port_fdcache_t *pcp; 594 portfd_t **hashtbl; 595 polldat_t *pdp; 596 polldat_t *pdpnext; 597 int index; 598 599 pcp = pp->port_queue.portq_pcp; 600 if (pcp == NULL) 601 /* no cache available -> nothing to do */ 602 return; 603 604 mutex_enter(&pcp->pc_lock); 605 /* 606 * Scan the cache and free all allocated portfd_t and port_kevent_t 607 * structures. 608 */ 609 hashtbl = pcp->pc_hash; 610 for (index = 0; index < pcp->pc_hashsize; index++) { 611 for (pdp = PFTOD(hashtbl[index]); pdp != NULL; pdp = pdpnext) { 612 pdpnext = pdp->pd_hashnext; 613 if (pid == pdp->pd_portev->portkev_pid) { 614 /* 615 * remove polldat + port_event_t from cache 616 * only when current process did the 617 * association. 618 */ 619 port_remove_portfd(pdp, pcp); 620 } 621 } 622 } 623 if (lastclose) { 624 /* 625 * Wait for all the portfd's to be freed. 626 * The remaining portfd_t's are the once we did not 627 * free in port_remove_portfd since some other thread 628 * is closing the fd. These threads will free the portfd_t's 629 * once we drop the pc_lock mutex. 630 */ 631 while (pcp->pc_fdcount) { 632 (void) cv_wait_sig(&pcp->pc_lclosecv, &pcp->pc_lock); 633 } 634 /* event port vnode will be destroyed -> remove everything */ 635 pp->port_queue.portq_pcp = NULL; 636 } 637 mutex_exit(&pcp->pc_lock); 638 /* 639 * last close: 640 * pollwakeup() can not further interact with this cache 641 * (all polldat structs are removed from pollhead entries). 642 */ 643 if (lastclose) 644 port_pcache_destroy(pcp); 645 } 646