1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/proc.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/cmn_err.h> 36 #include <sys/systm.h> 37 #include <vm/as.h> 38 #include <vm/page.h> 39 #include <sys/uio.h> 40 #include <sys/kmem.h> 41 #include <sys/debug.h> 42 #include <sys/aio_impl.h> 43 #include <sys/epm.h> 44 #include <sys/fs/snode.h> 45 #include <sys/siginfo.h> 46 #include <sys/cpuvar.h> 47 #include <sys/tnf_probe.h> 48 #include <sys/conf.h> 49 #include <sys/sdt.h> 50 51 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *); 52 void aio_done(struct buf *); 53 void aphysio_unlock(aio_req_t *); 54 void aio_cleanup(int); 55 void aio_cleanup_exit(void); 56 57 /* 58 * private functions 59 */ 60 static void aio_sigev_send(proc_t *, sigqueue_t *); 61 static void aio_hash_delete(aio_t *, aio_req_t *); 62 static void aio_lio_free(aio_t *, aio_lio_t *); 63 static void aio_cleanup_cleanupq(aio_t *, aio_req_t *, int); 64 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int); 65 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int); 66 static void aio_cleanup_portq(aio_t *, aio_req_t *, int); 67 68 /* 69 * async version of physio() that doesn't wait synchronously 70 * for the driver's strategy routine to complete. 71 */ 72 73 int 74 aphysio( 75 int (*strategy)(struct buf *), 76 int (*cancel)(struct buf *), 77 dev_t dev, 78 int rw, 79 void (*mincnt)(struct buf *), 80 struct aio_req *aio) 81 { 82 struct uio *uio = aio->aio_uio; 83 aio_req_t *reqp = (aio_req_t *)aio->aio_private; 84 struct buf *bp = &reqp->aio_req_buf; 85 struct iovec *iov; 86 struct as *as; 87 char *a; 88 int error; 89 size_t c; 90 struct page **pplist; 91 struct dev_ops *ops = devopsp[getmajor(dev)]; 92 93 if (uio->uio_loffset < 0) 94 return (EINVAL); 95 #ifdef _ILP32 96 /* 97 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents 98 * the maximum size that can be supported by the IO subsystem. 99 * XXX this code assumes a D_64BIT driver. 100 */ 101 if (uio->uio_loffset > SPEC_MAXOFFSET_T) 102 return (EINVAL); 103 #endif /* _ILP32 */ 104 105 TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */, 106 tnf_opaque, bp, bp, 107 tnf_device, device, dev, 108 tnf_offset, blkno, btodt(uio->uio_loffset), 109 tnf_size, size, uio->uio_iov->iov_len, 110 tnf_bioflags, rw, rw); 111 112 if (rw == B_READ) { 113 CPU_STATS_ADD_K(sys, phread, 1); 114 } else { 115 CPU_STATS_ADD_K(sys, phwrite, 1); 116 } 117 118 iov = uio->uio_iov; 119 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 120 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 121 122 bp->b_error = 0; 123 bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw; 124 bp->b_edev = dev; 125 bp->b_dev = cmpdev(dev); 126 bp->b_lblkno = btodt(uio->uio_loffset); 127 bp->b_offset = uio->uio_loffset; 128 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO, 129 (void *)bp->b_edev, (void **)&bp->b_dip); 130 131 /* 132 * Clustering: Clustering can set the b_iodone, b_forw and 133 * b_proc fields to cluster-specifc values. 134 */ 135 if (bp->b_iodone == NULL) { 136 bp->b_iodone = (int (*)()) aio_done; 137 /* b_forw points at an aio_req_t structure */ 138 bp->b_forw = (struct buf *)reqp; 139 bp->b_proc = curproc; 140 } 141 142 a = bp->b_un.b_addr = iov->iov_base; 143 c = bp->b_bcount = iov->iov_len; 144 145 (*mincnt)(bp); 146 if (bp->b_bcount != iov->iov_len) 147 return (ENOTSUP); 148 149 as = bp->b_proc->p_as; 150 151 error = as_pagelock(as, &pplist, a, 152 c, rw == B_READ? S_WRITE : S_READ); 153 if (error != 0) { 154 bp->b_flags |= B_ERROR; 155 bp->b_error = error; 156 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 157 return (error); 158 } 159 reqp->aio_req_flags |= AIO_PAGELOCKDONE; 160 bp->b_shadow = pplist; 161 if (pplist != NULL) { 162 bp->b_flags |= B_SHADOW; 163 } 164 165 if (cancel != anocancel) 166 cmn_err(CE_PANIC, 167 "aphysio: cancellation not supported, use anocancel"); 168 169 reqp->aio_req_cancel = cancel; 170 171 DTRACE_IO1(start, struct buf *, bp); 172 173 return ((*strategy)(bp)); 174 } 175 176 /*ARGSUSED*/ 177 int 178 anocancel(struct buf *bp) 179 { 180 return (ENXIO); 181 } 182 183 /* 184 * Called from biodone(). 185 * Notify process that a pending AIO has finished. 186 */ 187 188 /* 189 * Clustering: This function is made non-static as it is used 190 * by clustering s/w as contract private interface. 191 */ 192 193 void 194 aio_done(struct buf *bp) 195 { 196 proc_t *p; 197 struct as *as; 198 aio_req_t *reqp; 199 aio_lio_t *head = NULL; 200 aio_t *aiop; 201 sigqueue_t *sigev = NULL; 202 sigqueue_t *lio_sigev = NULL; 203 port_kevent_t *pkevp = NULL; 204 port_kevent_t *lio_pkevp = NULL; 205 int fd; 206 int cleanupqflag; 207 int pollqflag; 208 int portevpend; 209 void (*func)(); 210 int use_port = 0; 211 int reqp_flags = 0; 212 213 p = bp->b_proc; 214 as = p->p_as; 215 reqp = (aio_req_t *)bp->b_forw; 216 fd = reqp->aio_req_fd; 217 218 TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */, 219 tnf_opaque, bp, bp, 220 tnf_device, device, bp->b_edev, 221 tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset), 222 tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len, 223 tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE))); 224 225 /* 226 * mapout earlier so that more kmem is available when aio is 227 * heavily used. bug #1262082 228 */ 229 if (bp->b_flags & B_REMAPPED) 230 bp_mapout(bp); 231 232 /* decrement fd's ref count by one, now that aio request is done. */ 233 areleasef(fd, P_FINFO(p)); 234 235 aiop = p->p_aio; 236 ASSERT(aiop != NULL); 237 238 mutex_enter(&aiop->aio_portq_mutex); 239 mutex_enter(&aiop->aio_mutex); 240 ASSERT(aiop->aio_pending > 0); 241 ASSERT(reqp->aio_req_flags & AIO_PENDING); 242 aiop->aio_pending--; 243 reqp->aio_req_flags &= ~AIO_PENDING; 244 reqp_flags = reqp->aio_req_flags; 245 if ((pkevp = reqp->aio_req_portkev) != NULL) { 246 /* Event port notification is desired for this transaction */ 247 if (reqp->aio_req_flags & AIO_CLOSE_PORT) { 248 /* 249 * The port is being closed and it is waiting for 250 * pending asynchronous I/O transactions to complete. 251 */ 252 portevpend = --aiop->aio_portpendcnt; 253 aio_deq(&aiop->aio_portpending, reqp); 254 aio_enq(&aiop->aio_portq, reqp, 0); 255 mutex_exit(&aiop->aio_mutex); 256 mutex_exit(&aiop->aio_portq_mutex); 257 port_send_event(pkevp); 258 if (portevpend == 0) 259 cv_broadcast(&aiop->aio_portcv); 260 return; 261 } 262 263 if (aiop->aio_flags & AIO_CLEANUP) { 264 /* 265 * aio_cleanup_thread() is waiting for completion of 266 * transactions. 267 */ 268 mutex_enter(&as->a_contents); 269 aio_deq(&aiop->aio_portpending, reqp); 270 aio_enq(&aiop->aio_portcleanupq, reqp, 0); 271 cv_signal(&aiop->aio_cleanupcv); 272 mutex_exit(&as->a_contents); 273 mutex_exit(&aiop->aio_mutex); 274 mutex_exit(&aiop->aio_portq_mutex); 275 return; 276 } 277 278 aio_deq(&aiop->aio_portpending, reqp); 279 aio_enq(&aiop->aio_portq, reqp, 0); 280 281 use_port = 1; 282 } else { 283 /* 284 * when the AIO_CLEANUP flag is enabled for this 285 * process, or when the AIO_POLL bit is set for 286 * this request, special handling is required. 287 * otherwise the request is put onto the doneq. 288 */ 289 cleanupqflag = (aiop->aio_flags & AIO_CLEANUP); 290 pollqflag = (reqp->aio_req_flags & AIO_POLL); 291 if (cleanupqflag | pollqflag) { 292 293 if (cleanupqflag) 294 mutex_enter(&as->a_contents); 295 296 /* 297 * requests with their AIO_POLL bit set are put 298 * on the pollq, requests with sigevent structures 299 * or with listio heads are put on the notifyq, and 300 * the remaining requests don't require any special 301 * cleanup handling, so they're put onto the default 302 * cleanupq. 303 */ 304 if (pollqflag) 305 aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ); 306 else if (reqp->aio_req_sigqp || reqp->aio_req_lio) 307 aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ); 308 else 309 aio_enq(&aiop->aio_cleanupq, reqp, 310 AIO_CLEANUPQ); 311 312 if (cleanupqflag) { 313 cv_signal(&aiop->aio_cleanupcv); 314 mutex_exit(&as->a_contents); 315 mutex_exit(&aiop->aio_mutex); 316 mutex_exit(&aiop->aio_portq_mutex); 317 } else { 318 ASSERT(pollqflag); 319 /* block aio_cleanup_exit until we're done */ 320 aiop->aio_flags |= AIO_DONE_ACTIVE; 321 mutex_exit(&aiop->aio_mutex); 322 mutex_exit(&aiop->aio_portq_mutex); 323 /* 324 * let the cleanup processing happen from an AST 325 * set an AST on all threads in this process 326 */ 327 mutex_enter(&p->p_lock); 328 set_proc_ast(p); 329 mutex_exit(&p->p_lock); 330 mutex_enter(&aiop->aio_mutex); 331 /* wakeup anybody waiting in aiowait() */ 332 cv_broadcast(&aiop->aio_waitcv); 333 334 /* wakeup aio_cleanup_exit if needed */ 335 if (aiop->aio_flags & AIO_CLEANUP) 336 cv_signal(&aiop->aio_cleanupcv); 337 aiop->aio_flags &= ~AIO_DONE_ACTIVE; 338 mutex_exit(&aiop->aio_mutex); 339 } 340 return; 341 } 342 343 /* 344 * save req's sigevent pointer, and check its 345 * value after releasing aio_mutex lock. 346 */ 347 sigev = reqp->aio_req_sigqp; 348 reqp->aio_req_sigqp = NULL; 349 350 /* put request on done queue. */ 351 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 352 } /* portkevent */ 353 354 /* 355 * when list IO notification is enabled, a notification or 356 * signal is sent only when all entries in the list are done. 357 */ 358 if ((head = reqp->aio_req_lio) != NULL) { 359 ASSERT(head->lio_refcnt > 0); 360 if (--head->lio_refcnt == 0) { 361 /* 362 * save lio's sigevent pointer, and check 363 * its value after releasing aio_mutex lock. 364 */ 365 lio_sigev = head->lio_sigqp; 366 head->lio_sigqp = NULL; 367 cv_signal(&head->lio_notify); 368 if (head->lio_port >= 0 && 369 (lio_pkevp = head->lio_portkev) != NULL) 370 head->lio_port = -1; 371 } 372 } 373 374 /* 375 * if AIO_WAITN set then 376 * send signal only when we reached the 377 * required amount of IO's finished 378 * or when all IO's are done 379 */ 380 if (aiop->aio_flags & AIO_WAITN) { 381 if (aiop->aio_waitncnt > 0) 382 aiop->aio_waitncnt--; 383 if (aiop->aio_pending == 0 || 384 aiop->aio_waitncnt == 0) 385 cv_broadcast(&aiop->aio_waitcv); 386 } else { 387 cv_broadcast(&aiop->aio_waitcv); 388 } 389 390 mutex_exit(&aiop->aio_mutex); 391 mutex_exit(&aiop->aio_portq_mutex); 392 393 /* 394 * Could the cleanup thread be waiting for AIO with locked 395 * resources to finish? 396 * Ideally in that case cleanup thread should block on cleanupcv, 397 * but there is a window, where it could miss to see a new aio 398 * request that sneaked in. 399 */ 400 mutex_enter(&as->a_contents); 401 if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as)) 402 cv_broadcast(&as->a_cv); 403 mutex_exit(&as->a_contents); 404 405 if (sigev) 406 aio_sigev_send(p, sigev); 407 else if (!use_port && head == NULL) { 408 /* 409 * Send a SIGIO signal when the process has a handler enabled. 410 */ 411 if ((func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL && 412 func != SIG_IGN) 413 psignal(p, SIGIO); 414 } 415 if (pkevp) 416 port_send_event(pkevp); 417 if (lio_sigev) 418 aio_sigev_send(p, lio_sigev); 419 if (lio_pkevp) 420 port_send_event(lio_pkevp); 421 } 422 423 /* 424 * send a queued signal to the specified process when 425 * the event signal is non-NULL. A return value of 1 426 * will indicate that a signal is queued, and 0 means that 427 * no signal was specified, nor sent. 428 */ 429 static void 430 aio_sigev_send(proc_t *p, sigqueue_t *sigev) 431 { 432 ASSERT(sigev != NULL); 433 434 mutex_enter(&p->p_lock); 435 sigaddqa(p, NULL, sigev); 436 mutex_exit(&p->p_lock); 437 } 438 439 /* 440 * special case handling for zero length requests. the aio request 441 * short circuits the normal completion path since all that's required 442 * to complete this request is to copyout a zero to the aio request's 443 * return value. 444 */ 445 void 446 aio_zerolen(aio_req_t *reqp) 447 { 448 449 struct buf *bp = &reqp->aio_req_buf; 450 451 reqp->aio_req_flags |= AIO_ZEROLEN; 452 453 bp->b_forw = (struct buf *)reqp; 454 bp->b_proc = curproc; 455 456 bp->b_resid = 0; 457 bp->b_flags = 0; 458 459 aio_done(bp); 460 } 461 462 /* 463 * unlock pages previously locked by as_pagelock 464 */ 465 void 466 aphysio_unlock(aio_req_t *reqp) 467 { 468 struct buf *bp; 469 struct iovec *iov; 470 int flags; 471 472 if (reqp->aio_req_flags & AIO_PHYSIODONE) 473 return; 474 475 reqp->aio_req_flags |= AIO_PHYSIODONE; 476 477 if (reqp->aio_req_flags & AIO_ZEROLEN) 478 return; 479 480 bp = &reqp->aio_req_buf; 481 iov = reqp->aio_req_uio.uio_iov; 482 flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ); 483 if (reqp->aio_req_flags & AIO_PAGELOCKDONE) { 484 as_pageunlock(bp->b_proc->p_as, 485 bp->b_flags & B_SHADOW ? bp->b_shadow : NULL, 486 iov->iov_base, iov->iov_len, flags); 487 reqp->aio_req_flags &= ~AIO_PAGELOCKDONE; 488 } 489 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 490 bp->b_flags |= B_DONE; 491 } 492 493 /* 494 * deletes a requests id from the hash table of outstanding io. 495 */ 496 static void 497 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp) 498 { 499 long index; 500 aio_result_t *resultp = reqp->aio_req_resultp; 501 aio_req_t *current; 502 aio_req_t **nextp; 503 504 index = AIO_HASH(resultp); 505 nextp = (aiop->aio_hash + index); 506 while ((current = *nextp) != NULL) { 507 if (current->aio_req_resultp == resultp) { 508 *nextp = current->aio_hash_next; 509 return; 510 } 511 nextp = ¤t->aio_hash_next; 512 } 513 } 514 515 /* 516 * Put a list head struct onto its free list. 517 */ 518 static void 519 aio_lio_free(aio_t *aiop, aio_lio_t *head) 520 { 521 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 522 523 if (head->lio_sigqp != NULL) 524 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 525 head->lio_next = aiop->aio_lio_free; 526 aiop->aio_lio_free = head; 527 } 528 529 /* 530 * Put a reqp onto the freelist. 531 */ 532 void 533 aio_req_free(aio_t *aiop, aio_req_t *reqp) 534 { 535 aio_lio_t *liop; 536 537 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 538 539 if (reqp->aio_req_portkev) { 540 port_free_event(reqp->aio_req_portkev); 541 reqp->aio_req_portkev = NULL; 542 } 543 544 if ((liop = reqp->aio_req_lio) != NULL) { 545 if (--liop->lio_nent == 0) 546 aio_lio_free(aiop, liop); 547 reqp->aio_req_lio = NULL; 548 } 549 if (reqp->aio_req_sigqp != NULL) { 550 kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t)); 551 reqp->aio_req_sigqp = NULL; 552 } 553 reqp->aio_req_next = aiop->aio_free; 554 reqp->aio_req_prev = NULL; 555 aiop->aio_free = reqp; 556 aiop->aio_outstanding--; 557 if (aiop->aio_outstanding == 0) 558 cv_broadcast(&aiop->aio_waitcv); 559 aio_hash_delete(aiop, reqp); 560 } 561 562 /* 563 * Put a reqp onto the freelist. 564 */ 565 void 566 aio_req_free_port(aio_t *aiop, aio_req_t *reqp) 567 { 568 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 569 570 reqp->aio_req_next = aiop->aio_free; 571 reqp->aio_req_prev = NULL; 572 aiop->aio_free = reqp; 573 aiop->aio_outstanding--; 574 aio_hash_delete(aiop, reqp); 575 } 576 577 578 /* 579 * Verify the integrity of a queue. 580 */ 581 #if defined(DEBUG) 582 static void 583 aio_verify_queue(aio_req_t *head, 584 aio_req_t *entry_present, aio_req_t *entry_missing) 585 { 586 aio_req_t *reqp; 587 int found = 0; 588 int present = 0; 589 590 if ((reqp = head) != NULL) { 591 do { 592 ASSERT(reqp->aio_req_prev->aio_req_next == reqp); 593 ASSERT(reqp->aio_req_next->aio_req_prev == reqp); 594 if (entry_present == reqp) 595 found++; 596 if (entry_missing == reqp) 597 present++; 598 } while ((reqp = reqp->aio_req_next) != head); 599 } 600 ASSERT(entry_present == NULL || found == 1); 601 ASSERT(entry_missing == NULL || present == 0); 602 } 603 #else 604 #define aio_verify_queue(x, y, z) 605 #endif 606 607 /* 608 * Put a request onto the tail of a queue. 609 */ 610 void 611 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new) 612 { 613 aio_req_t *head; 614 aio_req_t *prev; 615 616 aio_verify_queue(*qhead, NULL, reqp); 617 618 if ((head = *qhead) == NULL) { 619 reqp->aio_req_next = reqp; 620 reqp->aio_req_prev = reqp; 621 *qhead = reqp; 622 } else { 623 reqp->aio_req_next = head; 624 reqp->aio_req_prev = prev = head->aio_req_prev; 625 prev->aio_req_next = reqp; 626 head->aio_req_prev = reqp; 627 } 628 reqp->aio_req_flags |= qflg_new; 629 } 630 631 /* 632 * Remove a request from its queue. 633 */ 634 void 635 aio_deq(aio_req_t **qhead, aio_req_t *reqp) 636 { 637 aio_verify_queue(*qhead, reqp, NULL); 638 639 if (reqp->aio_req_next == reqp) { 640 *qhead = NULL; 641 } else { 642 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 643 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 644 if (*qhead == reqp) 645 *qhead = reqp->aio_req_next; 646 } 647 reqp->aio_req_next = NULL; 648 reqp->aio_req_prev = NULL; 649 } 650 651 /* 652 * concatenate a specified queue with the cleanupq. the specified 653 * queue is put onto the tail of the cleanupq. all elements on the 654 * specified queue should have their aio_req_flags field cleared. 655 */ 656 /*ARGSUSED*/ 657 void 658 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg) 659 { 660 aio_req_t *cleanupqhead, *q2tail; 661 aio_req_t *reqp = q2; 662 663 do { 664 ASSERT(reqp->aio_req_flags & qflg); 665 reqp->aio_req_flags &= ~qflg; 666 reqp->aio_req_flags |= AIO_CLEANUPQ; 667 } while ((reqp = reqp->aio_req_next) != q2); 668 669 cleanupqhead = aiop->aio_cleanupq; 670 if (cleanupqhead == NULL) 671 aiop->aio_cleanupq = q2; 672 else { 673 cleanupqhead->aio_req_prev->aio_req_next = q2; 674 q2tail = q2->aio_req_prev; 675 q2tail->aio_req_next = cleanupqhead; 676 q2->aio_req_prev = cleanupqhead->aio_req_prev; 677 cleanupqhead->aio_req_prev = q2tail; 678 } 679 } 680 681 /* 682 * cleanup aio requests that are on the per-process poll queue. 683 */ 684 void 685 aio_cleanup(int flag) 686 { 687 aio_t *aiop = curproc->p_aio; 688 aio_req_t *pollqhead, *cleanupqhead, *notifyqhead; 689 aio_req_t *cleanupport; 690 aio_req_t *portq = NULL; 691 void (*func)(); 692 int signalled = 0; 693 int qflag = 0; 694 int exitflg; 695 696 ASSERT(aiop != NULL); 697 698 if (flag == AIO_CLEANUP_EXIT) 699 exitflg = AIO_CLEANUP_EXIT; 700 else 701 exitflg = 0; 702 703 /* 704 * We need to get the aio_cleanupq_mutex because we are calling 705 * aio_cleanup_cleanupq() 706 */ 707 mutex_enter(&aiop->aio_cleanupq_mutex); 708 /* 709 * take all the requests off the cleanupq, the notifyq, 710 * and the pollq. 711 */ 712 mutex_enter(&aiop->aio_mutex); 713 if ((cleanupqhead = aiop->aio_cleanupq) != NULL) { 714 aiop->aio_cleanupq = NULL; 715 qflag++; 716 } 717 if ((notifyqhead = aiop->aio_notifyq) != NULL) { 718 aiop->aio_notifyq = NULL; 719 qflag++; 720 } 721 if ((pollqhead = aiop->aio_pollq) != NULL) { 722 aiop->aio_pollq = NULL; 723 qflag++; 724 } 725 if (flag) { 726 if ((portq = aiop->aio_portq) != NULL) 727 qflag++; 728 729 if ((cleanupport = aiop->aio_portcleanupq) != NULL) { 730 aiop->aio_portcleanupq = NULL; 731 qflag++; 732 } 733 } 734 mutex_exit(&aiop->aio_mutex); 735 736 /* 737 * return immediately if cleanupq, pollq, and 738 * notifyq are all empty. someone else must have 739 * emptied them. 740 */ 741 if (!qflag) { 742 mutex_exit(&aiop->aio_cleanupq_mutex); 743 return; 744 } 745 746 /* 747 * do cleanup for the various queues. 748 */ 749 if (cleanupqhead) 750 aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg); 751 mutex_exit(&aiop->aio_cleanupq_mutex); 752 if (notifyqhead) 753 signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg); 754 if (pollqhead) 755 aio_cleanup_pollq(aiop, pollqhead, exitflg); 756 if (flag && (cleanupport || portq)) 757 aio_cleanup_portq(aiop, cleanupport, exitflg); 758 759 if (exitflg) 760 return; 761 762 /* 763 * If we have an active aio_cleanup_thread it's possible for 764 * this routine to push something on to the done queue after 765 * an aiowait/aiosuspend thread has already decided to block. 766 * This being the case, we need a cv_broadcast here to wake 767 * these threads up. It is simpler and cleaner to do this 768 * broadcast here than in the individual cleanup routines. 769 */ 770 771 mutex_enter(&aiop->aio_mutex); 772 cv_broadcast(&aiop->aio_waitcv); 773 mutex_exit(&aiop->aio_mutex); 774 775 /* 776 * Only if the process wasn't already signalled, 777 * determine if a SIGIO signal should be delievered. 778 */ 779 if (!signalled && 780 (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL && 781 func != SIG_IGN) 782 psignal(curproc, SIGIO); 783 } 784 785 786 /* 787 * Do cleanup for every element of the port cleanup queue. 788 */ 789 static void 790 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag) 791 { 792 aio_req_t *reqp; 793 aio_req_t *next; 794 aio_req_t *headp; 795 aio_lio_t *liop; 796 797 /* first check the portq */ 798 if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) { 799 mutex_enter(&aiop->aio_mutex); 800 if (aiop->aio_flags & AIO_CLEANUP) 801 aiop->aio_flags |= AIO_CLEANUP_PORT; 802 mutex_exit(&aiop->aio_mutex); 803 804 /* 805 * It is not allowed to hold locks during aphysio_unlock(). 806 * The aio_done() interrupt function will try to acquire 807 * aio_mutex and aio_portq_mutex. Therefore we disconnect 808 * the portq list from the aiop for the duration of the 809 * aphysio_unlock() loop below. 810 */ 811 mutex_enter(&aiop->aio_portq_mutex); 812 headp = aiop->aio_portq; 813 aiop->aio_portq = NULL; 814 mutex_exit(&aiop->aio_portq_mutex); 815 if ((reqp = headp) != NULL) { 816 do { 817 next = reqp->aio_req_next; 818 aphysio_unlock(reqp); 819 if (exitflag) { 820 mutex_enter(&aiop->aio_mutex); 821 aio_req_free(aiop, reqp); 822 mutex_exit(&aiop->aio_mutex); 823 } 824 } while ((reqp = next) != headp); 825 } 826 827 if (headp != NULL && exitflag == 0) { 828 /* move unlocked requests back to the port queue */ 829 aio_req_t *newq; 830 831 mutex_enter(&aiop->aio_portq_mutex); 832 if ((newq = aiop->aio_portq) != NULL) { 833 aio_req_t *headprev = headp->aio_req_prev; 834 aio_req_t *newqprev = newq->aio_req_prev; 835 836 headp->aio_req_prev = newqprev; 837 newq->aio_req_prev = headprev; 838 headprev->aio_req_next = newq; 839 newqprev->aio_req_next = headp; 840 } 841 aiop->aio_portq = headp; 842 cv_broadcast(&aiop->aio_portcv); 843 mutex_exit(&aiop->aio_portq_mutex); 844 } 845 } 846 847 /* now check the port cleanup queue */ 848 if ((reqp = cleanupq) == NULL) 849 return; 850 do { 851 next = reqp->aio_req_next; 852 aphysio_unlock(reqp); 853 if (exitflag) { 854 mutex_enter(&aiop->aio_mutex); 855 aio_req_free(aiop, reqp); 856 mutex_exit(&aiop->aio_mutex); 857 } else { 858 mutex_enter(&aiop->aio_portq_mutex); 859 aio_enq(&aiop->aio_portq, reqp, 0); 860 mutex_exit(&aiop->aio_portq_mutex); 861 port_send_event(reqp->aio_req_portkev); 862 if ((liop = reqp->aio_req_lio) != NULL) { 863 int send_event = 0; 864 865 mutex_enter(&aiop->aio_mutex); 866 ASSERT(liop->lio_refcnt > 0); 867 if (--liop->lio_refcnt == 0) { 868 if (liop->lio_port >= 0 && 869 liop->lio_portkev) { 870 liop->lio_port = -1; 871 send_event = 1; 872 } 873 } 874 mutex_exit(&aiop->aio_mutex); 875 if (send_event) 876 port_send_event(liop->lio_portkev); 877 } 878 } 879 } while ((reqp = next) != cleanupq); 880 } 881 882 /* 883 * Do cleanup for every element of the cleanupq. 884 */ 885 static void 886 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg) 887 { 888 aio_req_t *reqp, *next; 889 890 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 891 892 /* 893 * Since aio_req_done() or aio_req_find() use the HASH list to find 894 * the required requests, they could potentially take away elements 895 * if they are already done (AIO_DONEQ is set). 896 * The aio_cleanupq_mutex protects the queue for the duration of the 897 * loop from aio_req_done() and aio_req_find(). 898 */ 899 if ((reqp = qhead) == NULL) 900 return; 901 do { 902 ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ); 903 ASSERT(reqp->aio_req_portkev == NULL); 904 next = reqp->aio_req_next; 905 aphysio_unlock(reqp); 906 mutex_enter(&aiop->aio_mutex); 907 if (exitflg) 908 aio_req_free(aiop, reqp); 909 else 910 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 911 mutex_exit(&aiop->aio_mutex); 912 } while ((reqp = next) != qhead); 913 } 914 915 /* 916 * do cleanup for every element of the notify queue. 917 */ 918 static int 919 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg) 920 { 921 aio_req_t *reqp, *next; 922 aio_lio_t *liohead; 923 sigqueue_t *sigev, *lio_sigev = NULL; 924 int signalled = 0; 925 926 if ((reqp = qhead) == NULL) 927 return (0); 928 do { 929 ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ); 930 next = reqp->aio_req_next; 931 aphysio_unlock(reqp); 932 if (exitflg) { 933 mutex_enter(&aiop->aio_mutex); 934 aio_req_free(aiop, reqp); 935 mutex_exit(&aiop->aio_mutex); 936 } else { 937 mutex_enter(&aiop->aio_mutex); 938 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 939 sigev = reqp->aio_req_sigqp; 940 reqp->aio_req_sigqp = NULL; 941 if ((liohead = reqp->aio_req_lio) != NULL) { 942 ASSERT(liohead->lio_refcnt > 0); 943 if (--liohead->lio_refcnt == 0) { 944 cv_signal(&liohead->lio_notify); 945 lio_sigev = liohead->lio_sigqp; 946 liohead->lio_sigqp = NULL; 947 } 948 } 949 mutex_exit(&aiop->aio_mutex); 950 if (sigev) { 951 signalled++; 952 aio_sigev_send(reqp->aio_req_buf.b_proc, 953 sigev); 954 } 955 if (lio_sigev) { 956 signalled++; 957 aio_sigev_send(reqp->aio_req_buf.b_proc, 958 lio_sigev); 959 } 960 } 961 } while ((reqp = next) != qhead); 962 963 return (signalled); 964 } 965 966 /* 967 * Do cleanup for every element of the poll queue. 968 */ 969 static void 970 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg) 971 { 972 aio_req_t *reqp, *next; 973 974 /* 975 * As no other threads should be accessing the queue at this point, 976 * it isn't necessary to hold aio_mutex while we traverse its elements. 977 */ 978 if ((reqp = qhead) == NULL) 979 return; 980 do { 981 ASSERT(reqp->aio_req_flags & AIO_POLLQ); 982 next = reqp->aio_req_next; 983 aphysio_unlock(reqp); 984 if (exitflg) { 985 mutex_enter(&aiop->aio_mutex); 986 aio_req_free(aiop, reqp); 987 mutex_exit(&aiop->aio_mutex); 988 } else { 989 aio_copyout_result(reqp); 990 mutex_enter(&aiop->aio_mutex); 991 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 992 mutex_exit(&aiop->aio_mutex); 993 } 994 } while ((reqp = next) != qhead); 995 } 996 997 /* 998 * called by exit(). waits for all outstanding kaio to finish 999 * before the kaio resources are freed. 1000 */ 1001 void 1002 aio_cleanup_exit(void) 1003 { 1004 proc_t *p = curproc; 1005 aio_t *aiop = p->p_aio; 1006 aio_req_t *reqp, *next, *head; 1007 aio_lio_t *nxtlio, *liop; 1008 1009 /* 1010 * wait for all outstanding kaio to complete. process 1011 * is now single-threaded; no other kaio requests can 1012 * happen once aio_pending is zero. 1013 */ 1014 mutex_enter(&aiop->aio_mutex); 1015 aiop->aio_flags |= AIO_CLEANUP; 1016 while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE)) 1017 cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex); 1018 mutex_exit(&aiop->aio_mutex); 1019 1020 /* cleanup the cleanup-thread queues. */ 1021 aio_cleanup(AIO_CLEANUP_EXIT); 1022 1023 /* 1024 * Although this process is now single-threaded, we 1025 * still need to protect ourselves against a race with 1026 * aio_cleanup_dr_delete_memory(). 1027 */ 1028 mutex_enter(&p->p_lock); 1029 1030 /* 1031 * free up the done queue's resources. 1032 */ 1033 if ((head = aiop->aio_doneq) != NULL) { 1034 aiop->aio_doneq = NULL; 1035 reqp = head; 1036 do { 1037 next = reqp->aio_req_next; 1038 aphysio_unlock(reqp); 1039 kmem_free(reqp, sizeof (struct aio_req_t)); 1040 } while ((reqp = next) != head); 1041 } 1042 /* 1043 * release aio request freelist. 1044 */ 1045 for (reqp = aiop->aio_free; reqp != NULL; reqp = next) { 1046 next = reqp->aio_req_next; 1047 kmem_free(reqp, sizeof (struct aio_req_t)); 1048 } 1049 1050 /* 1051 * release io list head freelist. 1052 */ 1053 for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) { 1054 nxtlio = liop->lio_next; 1055 kmem_free(liop, sizeof (aio_lio_t)); 1056 } 1057 1058 if (aiop->aio_iocb) 1059 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 1060 1061 mutex_destroy(&aiop->aio_mutex); 1062 mutex_destroy(&aiop->aio_portq_mutex); 1063 mutex_destroy(&aiop->aio_cleanupq_mutex); 1064 p->p_aio = NULL; 1065 mutex_exit(&p->p_lock); 1066 kmem_free(aiop, sizeof (struct aio)); 1067 } 1068 1069 /* 1070 * copy out aio request's result to a user-level result_t buffer. 1071 */ 1072 void 1073 aio_copyout_result(aio_req_t *reqp) 1074 { 1075 struct buf *bp; 1076 struct iovec *iov; 1077 void *resultp; 1078 int error; 1079 size_t retval; 1080 1081 if (reqp->aio_req_flags & AIO_COPYOUTDONE) 1082 return; 1083 1084 reqp->aio_req_flags |= AIO_COPYOUTDONE; 1085 1086 iov = reqp->aio_req_uio.uio_iov; 1087 bp = &reqp->aio_req_buf; 1088 /* "resultp" points to user-level result_t buffer */ 1089 resultp = (void *)reqp->aio_req_resultp; 1090 if (bp->b_flags & B_ERROR) { 1091 if (bp->b_error) 1092 error = bp->b_error; 1093 else 1094 error = EIO; 1095 retval = (size_t)-1; 1096 } else { 1097 error = 0; 1098 retval = iov->iov_len - bp->b_resid; 1099 } 1100 #ifdef _SYSCALL32_IMPL 1101 if (get_udatamodel() == DATAMODEL_NATIVE) { 1102 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1103 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1104 } else { 1105 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1106 (int)retval); 1107 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1108 } 1109 #else 1110 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1111 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1112 #endif 1113 } 1114 1115 1116 void 1117 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp) 1118 { 1119 int errno; 1120 size_t retval; 1121 1122 if (bp->b_flags & B_ERROR) { 1123 if (bp->b_error) 1124 errno = bp->b_error; 1125 else 1126 errno = EIO; 1127 retval = (size_t)-1; 1128 } else { 1129 errno = 0; 1130 retval = iov->iov_len - bp->b_resid; 1131 } 1132 #ifdef _SYSCALL32_IMPL 1133 if (get_udatamodel() == DATAMODEL_NATIVE) { 1134 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1135 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1136 } else { 1137 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1138 (int)retval); 1139 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno); 1140 } 1141 #else 1142 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1143 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1144 #endif 1145 } 1146 1147 /* 1148 * This function is used to remove a request from the done queue. 1149 */ 1150 1151 void 1152 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp) 1153 { 1154 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex)); 1155 while (aiop->aio_portq == NULL) { 1156 /* 1157 * aio_portq is set to NULL when aio_cleanup_portq() 1158 * is working with the event queue. 1159 * The aio_cleanup_thread() uses aio_cleanup_portq() 1160 * to unlock all AIO buffers with completed transactions. 1161 * Wait here until aio_cleanup_portq() restores the 1162 * list of completed transactions in aio_portq. 1163 */ 1164 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1165 } 1166 aio_deq(&aiop->aio_portq, reqp); 1167 } 1168 1169 /* ARGSUSED */ 1170 void 1171 aio_close_port(void *arg, int port, pid_t pid, int lastclose) 1172 { 1173 aio_t *aiop; 1174 aio_req_t *reqp; 1175 aio_req_t *next; 1176 aio_req_t *headp; 1177 int counter; 1178 1179 if (arg == NULL) 1180 aiop = curproc->p_aio; 1181 else 1182 aiop = (aio_t *)arg; 1183 1184 /* 1185 * The PORT_SOURCE_AIO source is always associated with every new 1186 * created port by default. 1187 * If no asynchronous I/O transactions were associated with the port 1188 * then the aiop pointer will still be set to NULL. 1189 */ 1190 if (aiop == NULL) 1191 return; 1192 1193 /* 1194 * Within a process event ports can be used to collect events other 1195 * than PORT_SOURCE_AIO events. At the same time the process can submit 1196 * asynchronous I/Os transactions which are not associated with the 1197 * current port. 1198 * The current process oriented model of AIO uses a sigle queue for 1199 * pending events. On close the pending queue (queue of asynchronous 1200 * I/O transactions using event port notification) must be scanned 1201 * to detect and handle pending I/Os using the current port. 1202 */ 1203 mutex_enter(&aiop->aio_portq_mutex); 1204 mutex_enter(&aiop->aio_mutex); 1205 counter = 0; 1206 if ((headp = aiop->aio_portpending) != NULL) { 1207 reqp = headp; 1208 do { 1209 if (reqp->aio_req_portkev && 1210 reqp->aio_req_port == port) { 1211 reqp->aio_req_flags |= AIO_CLOSE_PORT; 1212 counter++; 1213 } 1214 } while ((reqp = reqp->aio_req_next) != headp); 1215 } 1216 if (counter == 0) { 1217 /* no AIOs pending */ 1218 mutex_exit(&aiop->aio_mutex); 1219 mutex_exit(&aiop->aio_portq_mutex); 1220 return; 1221 } 1222 aiop->aio_portpendcnt += counter; 1223 mutex_exit(&aiop->aio_mutex); 1224 while (aiop->aio_portpendcnt) 1225 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1226 1227 /* 1228 * all pending AIOs are completed. 1229 * check port doneq 1230 */ 1231 headp = NULL; 1232 if ((reqp = aiop->aio_portq) != NULL) { 1233 do { 1234 next = reqp->aio_req_next; 1235 if (reqp->aio_req_port == port) { 1236 /* dequeue request and discard event */ 1237 aio_req_remove_portq(aiop, reqp); 1238 port_free_event(reqp->aio_req_portkev); 1239 /* put request in temporary queue */ 1240 reqp->aio_req_next = headp; 1241 headp = reqp; 1242 } 1243 } while ((reqp = next) != aiop->aio_portq); 1244 } 1245 mutex_exit(&aiop->aio_portq_mutex); 1246 1247 /* headp points to the list of requests to be discarded */ 1248 for (reqp = headp; reqp != NULL; reqp = next) { 1249 next = reqp->aio_req_next; 1250 aphysio_unlock(reqp); 1251 mutex_enter(&aiop->aio_mutex); 1252 aio_req_free_port(aiop, reqp); 1253 mutex_exit(&aiop->aio_mutex); 1254 } 1255 1256 if (aiop->aio_flags & AIO_CLEANUP) 1257 cv_broadcast(&aiop->aio_waitcv); 1258 } 1259 1260 /* 1261 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread 1262 * to kick start the aio_cleanup_thread for the give process to do the 1263 * necessary cleanup. 1264 * This is needed so that delete_memory_thread can obtain writer locks 1265 * on pages that need to be relocated during a dr memory delete operation, 1266 * otherwise a deadly embrace may occur. 1267 */ 1268 int 1269 aio_cleanup_dr_delete_memory(proc_t *procp) 1270 { 1271 struct aio *aiop = procp->p_aio; 1272 struct as *as = procp->p_as; 1273 int ret = 0; 1274 1275 ASSERT(MUTEX_HELD(&procp->p_lock)); 1276 1277 mutex_enter(&as->a_contents); 1278 1279 if (aiop != NULL) { 1280 aiop->aio_rqclnup = 1; 1281 cv_broadcast(&as->a_cv); 1282 ret = 1; 1283 } 1284 mutex_exit(&as->a_contents); 1285 return (ret); 1286 } 1287