1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/proc.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/cmn_err.h> 36 #include <sys/systm.h> 37 #include <vm/as.h> 38 #include <vm/page.h> 39 #include <sys/uio.h> 40 #include <sys/kmem.h> 41 #include <sys/debug.h> 42 #include <sys/aio_impl.h> 43 #include <sys/epm.h> 44 #include <sys/fs/snode.h> 45 #include <sys/siginfo.h> 46 #include <sys/cpuvar.h> 47 #include <sys/tnf_probe.h> 48 #include <sys/conf.h> 49 #include <sys/sdt.h> 50 51 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *); 52 void aio_done(struct buf *); 53 void aphysio_unlock(aio_req_t *); 54 void aio_cleanup(int); 55 void aio_cleanup_exit(void); 56 57 /* 58 * private functions 59 */ 60 static void aio_sigev_send(proc_t *, sigqueue_t *); 61 static void aio_hash_delete(aio_t *, aio_req_t *); 62 static void aio_lio_free(aio_t *, aio_lio_t *); 63 static void aio_enq(aio_req_t **, aio_req_t *, int); 64 static void aio_cleanup_cleanupq(aio_t *, aio_req_t *, int); 65 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int); 66 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int); 67 static void aio_enq_doneq(aio_t *aiop, aio_req_t *reqp); 68 static void aio_enq_portq(aio_t *, aio_req_t *, int); 69 static void aio_enq_port_cleanupq(aio_t *, aio_req_t *); 70 static void aio_cleanup_portq(aio_t *, aio_req_t *, int); 71 72 /* 73 * async version of physio() that doesn't wait synchronously 74 * for the driver's strategy routine to complete. 75 */ 76 77 int 78 aphysio( 79 int (*strategy)(struct buf *), 80 int (*cancel)(struct buf *), 81 dev_t dev, 82 int rw, 83 void (*mincnt)(struct buf *), 84 struct aio_req *aio) 85 { 86 struct uio *uio = aio->aio_uio; 87 aio_req_t *reqp = (aio_req_t *)aio->aio_private; 88 struct buf *bp = &reqp->aio_req_buf; 89 struct iovec *iov; 90 struct as *as; 91 char *a; 92 int error; 93 size_t c; 94 struct page **pplist; 95 struct dev_ops *ops = devopsp[getmajor(dev)]; 96 97 if (uio->uio_loffset < 0) 98 return (EINVAL); 99 #ifdef _ILP32 100 /* 101 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents 102 * the maximum size that can be supported by the IO subsystem. 103 * XXX this code assumes a D_64BIT driver. 104 */ 105 if (uio->uio_loffset > SPEC_MAXOFFSET_T) 106 return (EINVAL); 107 #endif /* _ILP32 */ 108 109 TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */, 110 tnf_opaque, bp, bp, 111 tnf_device, device, dev, 112 tnf_offset, blkno, btodt(uio->uio_loffset), 113 tnf_size, size, uio->uio_iov->iov_len, 114 tnf_bioflags, rw, rw); 115 116 if (rw == B_READ) { 117 CPU_STATS_ADD_K(sys, phread, 1); 118 } else { 119 CPU_STATS_ADD_K(sys, phwrite, 1); 120 } 121 122 iov = uio->uio_iov; 123 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 124 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 125 126 bp->b_error = 0; 127 bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw; 128 bp->b_edev = dev; 129 bp->b_dev = cmpdev(dev); 130 bp->b_lblkno = btodt(uio->uio_loffset); 131 bp->b_offset = uio->uio_loffset; 132 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO, 133 (void *)bp->b_edev, (void **)&bp->b_dip); 134 135 /* 136 * Clustering: Clustering can set the b_iodone, b_forw and 137 * b_proc fields to cluster-specifc values. 138 */ 139 if (bp->b_iodone == NULL) { 140 bp->b_iodone = (int (*)()) aio_done; 141 /* b_forw points at an aio_req_t structure */ 142 bp->b_forw = (struct buf *)reqp; 143 bp->b_proc = curproc; 144 } 145 146 a = bp->b_un.b_addr = iov->iov_base; 147 c = bp->b_bcount = iov->iov_len; 148 149 (*mincnt)(bp); 150 if (bp->b_bcount != iov->iov_len) 151 return (ENOTSUP); 152 153 as = bp->b_proc->p_as; 154 155 error = as_pagelock(as, &pplist, a, 156 c, rw == B_READ? S_WRITE : S_READ); 157 if (error != 0) { 158 bp->b_flags |= B_ERROR; 159 bp->b_error = error; 160 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 161 return (error); 162 } 163 reqp->aio_req_flags |= AIO_PAGELOCKDONE; 164 bp->b_shadow = pplist; 165 if (pplist != NULL) { 166 bp->b_flags |= B_SHADOW; 167 } 168 169 if (cancel != anocancel) 170 cmn_err(CE_PANIC, 171 "aphysio: cancellation not supported, use anocancel"); 172 173 reqp->aio_req_cancel = cancel; 174 175 DTRACE_IO1(start, struct buf *, bp); 176 177 return ((*strategy)(bp)); 178 } 179 180 /*ARGSUSED*/ 181 int 182 anocancel(struct buf *bp) 183 { 184 return (ENXIO); 185 } 186 187 /* 188 * Called from biodone(). 189 * Notify process that a pending AIO has finished. 190 */ 191 192 /* 193 * Clustering: This function is made non-static as it is used 194 * by clustering s/w as contract private interface. 195 */ 196 197 void 198 aio_done(struct buf *bp) 199 { 200 proc_t *p; 201 struct as *as; 202 aio_req_t *reqp; 203 aio_lio_t *head; 204 aio_t *aiop; 205 sigqueue_t *sigev; 206 sigqueue_t *lio_sigev = NULL; 207 int fd; 208 int cleanupqflag; 209 int pollqflag; 210 int portevpend; 211 void (*func)(); 212 213 p = bp->b_proc; 214 reqp = (aio_req_t *)bp->b_forw; 215 fd = reqp->aio_req_fd; 216 217 TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */, 218 tnf_opaque, bp, bp, 219 tnf_device, device, bp->b_edev, 220 tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset), 221 tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len, 222 tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE))); 223 224 /* 225 * mapout earlier so that more kmem is available when aio is 226 * heavily used. bug #1262082 227 */ 228 if (bp->b_flags & B_REMAPPED) 229 bp_mapout(bp); 230 231 /* decrement fd's ref count by one, now that aio request is done. */ 232 areleasef(fd, P_FINFO(p)); 233 234 aiop = p->p_aio; 235 ASSERT(aiop != NULL); 236 237 if (reqp->aio_req_portkev) { 238 mutex_enter(&aiop->aio_portq_mutex); 239 mutex_enter(&aiop->aio_mutex); 240 aiop->aio_pending--; 241 reqp->aio_req_flags &= ~AIO_PENDING; 242 /* Event port notification is desired for this transaction */ 243 if (reqp->aio_req_flags & AIO_CLOSE_PORT) { 244 /* 245 * The port is being closed and it is waiting for 246 * pending asynchronous I/O transactions to complete. 247 */ 248 portevpend = --aiop->aio_portpendcnt; 249 aio_enq_portq(aiop, reqp, 1); 250 mutex_exit(&aiop->aio_mutex); 251 mutex_exit(&aiop->aio_portq_mutex); 252 (void) port_send_event(reqp->aio_req_portkev); 253 if (portevpend == 0) 254 cv_broadcast(&aiop->aio_portcv); 255 return; 256 } 257 258 if (aiop->aio_flags & AIO_CLEANUP) { 259 /* 260 * aio_cleanup_thread() is waiting for completion of 261 * transactions. 262 */ 263 as = p->p_as; 264 mutex_enter(&as->a_contents); 265 aio_enq_port_cleanupq(aiop, reqp); 266 cv_signal(&aiop->aio_cleanupcv); 267 mutex_exit(&as->a_contents); 268 mutex_exit(&aiop->aio_mutex); 269 mutex_exit(&aiop->aio_portq_mutex); 270 return; 271 } 272 273 aio_enq_portq(aiop, reqp, 1); 274 mutex_exit(&aiop->aio_mutex); 275 mutex_exit(&aiop->aio_portq_mutex); 276 (void) port_send_event(reqp->aio_req_portkev); 277 return; 278 } 279 280 mutex_enter(&aiop->aio_mutex); 281 ASSERT(aiop->aio_pending > 0); 282 ASSERT(reqp->aio_req_flags & AIO_PENDING); 283 aiop->aio_pending--; 284 reqp->aio_req_flags &= ~AIO_PENDING; 285 286 reqp->aio_req_next = NULL; 287 /* 288 * when the AIO_CLEANUP flag is enabled for this 289 * process, or when the AIO_POLL bit is set for 290 * this request, special handling is required. 291 * otherwise the request is put onto the doneq. 292 */ 293 cleanupqflag = (aiop->aio_flags & AIO_CLEANUP); 294 pollqflag = (reqp->aio_req_flags & AIO_POLL); 295 if (cleanupqflag | pollqflag) { 296 297 if (cleanupqflag) { 298 as = p->p_as; 299 mutex_enter(&as->a_contents); 300 } 301 302 /* 303 * requests with their AIO_POLL bit set are put 304 * on the pollq, requests with sigevent structures 305 * or with listio heads are put on the notifyq, and 306 * the remaining requests don't require any special 307 * cleanup handling, so they're put onto the default 308 * cleanupq. 309 */ 310 if (pollqflag) 311 aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ); 312 else if (reqp->aio_req_sigqp || reqp->aio_req_lio) 313 aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ); 314 else 315 aio_enq(&aiop->aio_cleanupq, reqp, AIO_CLEANUPQ); 316 317 if (cleanupqflag) { 318 cv_signal(&aiop->aio_cleanupcv); 319 mutex_exit(&as->a_contents); 320 mutex_exit(&aiop->aio_mutex); 321 } else { 322 ASSERT(pollqflag); 323 /* block aio_cleanup_exit until we're done */ 324 aiop->aio_flags |= AIO_DONE_ACTIVE; 325 mutex_exit(&aiop->aio_mutex); 326 /* 327 * let the cleanup processing happen from an 328 * AST. set an AST on all threads in this process 329 */ 330 mutex_enter(&p->p_lock); 331 set_proc_ast(p); 332 mutex_exit(&p->p_lock); 333 mutex_enter(&aiop->aio_mutex); 334 /* wakeup anybody waiting in aiowait() */ 335 cv_broadcast(&aiop->aio_waitcv); 336 337 /* wakeup aio_cleanup_exit if needed */ 338 if (aiop->aio_flags & AIO_CLEANUP) 339 cv_signal(&aiop->aio_cleanupcv); 340 aiop->aio_flags &= ~AIO_DONE_ACTIVE; 341 mutex_exit(&aiop->aio_mutex); 342 } 343 return; 344 } 345 346 /* put request on done queue. */ 347 aio_enq_doneq(aiop, reqp); 348 349 /* 350 * save req's sigevent pointer, and check its 351 * value after releasing aio_mutex lock. 352 */ 353 sigev = reqp->aio_req_sigqp; 354 reqp->aio_req_sigqp = NULL; 355 356 /* 357 * when list IO notification is enabled, a signal 358 * is sent only when all entries in the list are 359 * done. 360 */ 361 if ((head = reqp->aio_req_lio) != NULL) { 362 ASSERT(head->lio_refcnt > 0); 363 if (--head->lio_refcnt == 0) { 364 cv_signal(&head->lio_notify); 365 /* 366 * save lio's sigevent pointer, and check 367 * its value after releasing aio_mutex 368 * lock. 369 */ 370 lio_sigev = head->lio_sigqp; 371 head->lio_sigqp = NULL; 372 } 373 mutex_exit(&aiop->aio_mutex); 374 if (sigev) 375 aio_sigev_send(p, sigev); 376 if (lio_sigev) 377 aio_sigev_send(p, lio_sigev); 378 return; 379 } 380 381 /* 382 * if AIO_WAITN set then 383 * send signal only when we reached the 384 * required amount of IO's finished 385 * or when all IO's are done 386 */ 387 if (aiop->aio_flags & AIO_WAITN) { 388 if (aiop->aio_waitncnt > 0) 389 aiop->aio_waitncnt--; 390 if (aiop->aio_pending == 0 || 391 aiop->aio_waitncnt == 0) 392 cv_broadcast(&aiop->aio_waitcv); 393 } else { 394 cv_broadcast(&aiop->aio_waitcv); 395 } 396 397 mutex_exit(&aiop->aio_mutex); 398 if (sigev) 399 aio_sigev_send(p, sigev); 400 else { 401 /* 402 * send a SIGIO signal when the process 403 * has a handler enabled. 404 */ 405 if ((func = p->p_user.u_signal[SIGIO - 1]) != 406 SIG_DFL && (func != SIG_IGN)) 407 psignal(p, SIGIO); 408 } 409 } 410 411 /* 412 * send a queued signal to the specified process when 413 * the event signal is non-NULL. A return value of 1 414 * will indicate that a signal is queued, and 0 means that 415 * no signal was specified, nor sent. 416 */ 417 static void 418 aio_sigev_send(proc_t *p, sigqueue_t *sigev) 419 { 420 ASSERT(sigev != NULL); 421 422 mutex_enter(&p->p_lock); 423 sigaddqa(p, NULL, sigev); 424 mutex_exit(&p->p_lock); 425 } 426 427 /* 428 * special case handling for zero length requests. the aio request 429 * short circuits the normal completion path since all that's required 430 * to complete this request is to copyout a zero to the aio request's 431 * return value. 432 */ 433 void 434 aio_zerolen(aio_req_t *reqp) 435 { 436 437 struct buf *bp = &reqp->aio_req_buf; 438 439 reqp->aio_req_flags |= AIO_ZEROLEN; 440 441 bp->b_forw = (struct buf *)reqp; 442 bp->b_proc = curproc; 443 444 bp->b_resid = 0; 445 bp->b_flags = 0; 446 447 aio_done(bp); 448 } 449 450 /* 451 * unlock pages previously locked by as_pagelock 452 */ 453 void 454 aphysio_unlock(aio_req_t *reqp) 455 { 456 struct buf *bp; 457 struct iovec *iov; 458 int flags; 459 460 if (reqp->aio_req_flags & AIO_PHYSIODONE) 461 return; 462 463 reqp->aio_req_flags |= AIO_PHYSIODONE; 464 465 if (reqp->aio_req_flags & AIO_ZEROLEN) 466 return; 467 468 bp = &reqp->aio_req_buf; 469 iov = reqp->aio_req_uio.uio_iov; 470 flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ); 471 if (reqp->aio_req_flags & AIO_PAGELOCKDONE) { 472 as_pageunlock(bp->b_proc->p_as, 473 bp->b_flags & B_SHADOW ? bp->b_shadow : NULL, 474 iov->iov_base, iov->iov_len, flags); 475 reqp->aio_req_flags &= ~AIO_PAGELOCKDONE; 476 } 477 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 478 bp->b_flags |= B_DONE; 479 } 480 481 /* 482 * deletes a requests id from the hash table of outstanding 483 * io. 484 */ 485 static void 486 aio_hash_delete( 487 aio_t *aiop, 488 struct aio_req_t *reqp) 489 { 490 long index; 491 aio_result_t *resultp = reqp->aio_req_resultp; 492 aio_req_t *current; 493 aio_req_t **nextp; 494 495 index = AIO_HASH(resultp); 496 nextp = (aiop->aio_hash + index); 497 while ((current = *nextp) != NULL) { 498 if (current->aio_req_resultp == resultp) { 499 *nextp = current->aio_hash_next; 500 return; 501 } 502 nextp = ¤t->aio_hash_next; 503 } 504 } 505 506 /* 507 * Put a list head struct onto its free list. 508 */ 509 static void 510 aio_lio_free(aio_t *aiop, aio_lio_t *head) 511 { 512 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 513 514 if (head->lio_sigqp != NULL) 515 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 516 head->lio_next = aiop->aio_lio_free; 517 aiop->aio_lio_free = head; 518 } 519 520 /* 521 * Put a reqp onto the freelist. 522 */ 523 void 524 aio_req_free(aio_t *aiop, aio_req_t *reqp) 525 { 526 aio_lio_t *liop; 527 528 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 529 530 if (reqp->aio_req_portkev) { 531 port_free_event(reqp->aio_req_portkev); 532 reqp->aio_req_portkev = NULL; 533 } 534 535 if ((liop = reqp->aio_req_lio) != NULL) { 536 if (--liop->lio_nent == 0) 537 aio_lio_free(aiop, liop); 538 reqp->aio_req_lio = NULL; 539 } 540 if (reqp->aio_req_sigqp != NULL) 541 kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t)); 542 reqp->aio_req_next = aiop->aio_free; 543 aiop->aio_free = reqp; 544 aiop->aio_outstanding--; 545 if (aiop->aio_outstanding == 0) 546 cv_broadcast(&aiop->aio_waitcv); 547 aio_hash_delete(aiop, reqp); 548 } 549 550 /* 551 * Put a reqp onto the freelist. 552 */ 553 void 554 aio_req_free_port(aio_t *aiop, aio_req_t *reqp) 555 { 556 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 557 558 reqp->aio_req_next = aiop->aio_free; 559 aiop->aio_free = reqp; 560 aiop->aio_outstanding--; 561 aio_hash_delete(aiop, reqp); 562 } 563 564 565 /* 566 * Put a completed request onto its appropiate done queue. 567 */ 568 /*ARGSUSED*/ 569 static void 570 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new) 571 { 572 if (*qhead == NULL) { 573 *qhead = reqp; 574 reqp->aio_req_next = reqp; 575 reqp->aio_req_prev = reqp; 576 } else { 577 reqp->aio_req_next = *qhead; 578 reqp->aio_req_prev = (*qhead)->aio_req_prev; 579 reqp->aio_req_prev->aio_req_next = reqp; 580 (*qhead)->aio_req_prev = reqp; 581 } 582 583 reqp->aio_req_flags |= qflg_new; 584 } 585 586 /* 587 * Put a completed request onto its appropiate done queue. 588 */ 589 static void 590 aio_enq_doneq(aio_t *aiop, aio_req_t *reqp) 591 { 592 593 if (aiop->aio_doneq == NULL) { 594 aiop->aio_doneq = reqp; 595 reqp->aio_req_next = reqp; 596 reqp->aio_req_prev = reqp; 597 } else { 598 reqp->aio_req_next = aiop->aio_doneq; 599 reqp->aio_req_prev = aiop->aio_doneq->aio_req_prev; 600 reqp->aio_req_prev->aio_req_next = reqp; 601 aiop->aio_doneq->aio_req_prev = reqp; 602 } 603 604 reqp->aio_req_flags |= AIO_DONEQ; 605 } 606 607 #ifdef DEBUG 608 /* ARGSUSED */ 609 void 610 aio_check_flag(aio_req_t *reqp, int check, int val, int flag) 611 { 612 int lval; 613 if (reqp == NULL) 614 return; 615 lval = reqp->aio_req_flags & check; 616 ASSERT(lval == val); 617 } 618 619 void 620 aio_checkset_flag(aio_req_t *reqp, int checkdel, int set) 621 { 622 aio_check_flag(reqp, checkdel, checkdel, 0); 623 reqp->aio_req_flags &= ~checkdel; 624 reqp->aio_req_flags |= set; 625 626 aio_check_flag(reqp->aio_req_next, set, set, 1); 627 aio_check_flag(reqp->aio_req_prev, set, set, 2); 628 } 629 #endif /* DEBUG */ 630 631 /* 632 * Put a pending request onto the pending port queue. 633 */ 634 void 635 aio_enq_port_pending(aio_t *aiop, aio_req_t *reqp) 636 { 637 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 638 639 if (aiop->aio_portpending != NULL) { 640 reqp->aio_req_next = aiop->aio_portpending; 641 aiop->aio_portpending->aio_req_prev = reqp; 642 } else { 643 reqp->aio_req_next = NULL; 644 } 645 reqp->aio_req_prev = NULL; 646 aiop->aio_portpending = reqp; 647 #ifdef DEBUG 648 reqp->aio_req_flags |= AIO_REQ_PEND; 649 #endif 650 } 651 652 /* 653 * Put a completed request onto the port queue. 654 */ 655 static void 656 aio_enq_portq(aio_t *aiop, aio_req_t *reqp, int pending) 657 { 658 659 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex)); 660 if (pending) { 661 #ifdef DEBUG 662 aio_checkset_flag(reqp, AIO_REQ_PEND, AIO_REQ_PEND); 663 #endif 664 /* first take request out of the pending queue ... */ 665 if (reqp->aio_req_prev == NULL) 666 /* first request */ 667 aiop->aio_portpending = reqp->aio_req_next; 668 else 669 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 670 if (reqp->aio_req_next != NULL) 671 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 672 } 673 674 /* ... and insert request into done queue */ 675 if (aiop->aio_portq != NULL) { 676 reqp->aio_req_next = aiop->aio_portq; 677 aiop->aio_portq->aio_req_prev = reqp; 678 } else { 679 reqp->aio_req_next = NULL; 680 } 681 reqp->aio_req_prev = NULL; 682 aiop->aio_portq = reqp; 683 #ifdef DEBUG 684 if (pending) 685 aio_checkset_flag(reqp, AIO_REQ_PEND, AIO_REQ_PORTQ); 686 else 687 aio_checkset_flag(reqp, AIO_REQ_CLEAN, AIO_REQ_PORTQ); 688 #endif 689 } 690 691 /* 692 * Put a completed request onto the port cleanup queue. 693 */ 694 static void 695 aio_enq_port_cleanupq(aio_t *aiop, aio_req_t *reqp) 696 { 697 698 #ifdef DEBUG 699 aio_checkset_flag(reqp, AIO_REQ_PEND, AIO_REQ_PEND); 700 #endif 701 /* first take request out of the pending queue ... */ 702 if (reqp->aio_req_prev == NULL) 703 /* first request */ 704 aiop->aio_portpending = reqp->aio_req_next; 705 else 706 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 707 708 if (reqp->aio_req_next != NULL) 709 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 710 711 /* ... and insert request into the cleanup queue */ 712 reqp->aio_req_next = aiop->aio_portcleanupq; 713 aiop->aio_portcleanupq = reqp; 714 #ifdef DEBUG 715 reqp->aio_req_prev = NULL; 716 aio_checkset_flag(reqp, AIO_REQ_PEND, AIO_REQ_CLEAN); 717 #endif 718 } 719 720 /* 721 * concatenate a specified queue with the cleanupq. the specified 722 * queue is put onto the tail of the cleanupq. all elements on the 723 * specified queue should have their aio_req_flags field cleared. 724 */ 725 /*ARGSUSED*/ 726 void 727 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg) 728 { 729 aio_req_t *cleanupqhead, *q2tail; 730 731 #ifdef DEBUG 732 aio_req_t *reqp = q2; 733 734 do { 735 ASSERT(reqp->aio_req_flags & qflg); 736 reqp->aio_req_flags &= ~qflg; 737 reqp->aio_req_flags |= AIO_CLEANUPQ; 738 } while ((reqp = reqp->aio_req_next) != q2); 739 #endif 740 741 cleanupqhead = aiop->aio_cleanupq; 742 if (cleanupqhead == NULL) 743 aiop->aio_cleanupq = q2; 744 else { 745 cleanupqhead->aio_req_prev->aio_req_next = q2; 746 q2tail = q2->aio_req_prev; 747 q2tail->aio_req_next = cleanupqhead; 748 q2->aio_req_prev = cleanupqhead->aio_req_prev; 749 cleanupqhead->aio_req_prev = q2tail; 750 } 751 } 752 753 /* 754 * cleanup aio requests that are on the per-process poll queue. 755 */ 756 void 757 aio_cleanup(int flag) 758 { 759 aio_t *aiop = curproc->p_aio; 760 aio_req_t *pollqhead, *cleanupqhead, *notifyqhead; 761 aio_req_t *cleanupport; 762 aio_req_t *portq = NULL; 763 void (*func)(); 764 int signalled = 0; 765 int qflag = 0; 766 int exitflg; 767 768 ASSERT(aiop != NULL); 769 770 if (flag == AIO_CLEANUP_EXIT) 771 exitflg = AIO_CLEANUP_EXIT; 772 else 773 exitflg = 0; 774 775 /* 776 * We need to get the aio_cleanupq_mutex because we are calling 777 * aio_cleanup_cleanupq() 778 */ 779 mutex_enter(&aiop->aio_cleanupq_mutex); 780 /* 781 * take all the requests off the cleanupq, the notifyq, 782 * and the pollq. 783 */ 784 mutex_enter(&aiop->aio_mutex); 785 if ((cleanupqhead = aiop->aio_cleanupq) != NULL) { 786 aiop->aio_cleanupq = NULL; 787 qflag++; 788 } 789 if ((notifyqhead = aiop->aio_notifyq) != NULL) { 790 aiop->aio_notifyq = NULL; 791 qflag++; 792 } 793 if ((pollqhead = aiop->aio_pollq) != NULL) { 794 aiop->aio_pollq = NULL; 795 qflag++; 796 } 797 if (flag) { 798 if ((portq = aiop->aio_portq) != NULL) 799 qflag++; 800 801 if ((cleanupport = aiop->aio_portcleanupq) != NULL) { 802 aiop->aio_portcleanupq = NULL; 803 qflag++; 804 } 805 } 806 mutex_exit(&aiop->aio_mutex); 807 808 /* 809 * return immediately if cleanupq, pollq, and 810 * notifyq are all empty. someone else must have 811 * emptied them. 812 */ 813 if (!qflag) { 814 mutex_exit(&aiop->aio_cleanupq_mutex); 815 return; 816 } 817 818 /* 819 * do cleanup for the various queues. 820 */ 821 if (cleanupqhead) 822 aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg); 823 mutex_exit(&aiop->aio_cleanupq_mutex); 824 if (notifyqhead) 825 signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg); 826 if (pollqhead) 827 aio_cleanup_pollq(aiop, pollqhead, exitflg); 828 if (flag && (cleanupport || portq)) 829 aio_cleanup_portq(aiop, cleanupport, exitflg); 830 831 if (exitflg) 832 return; 833 834 /* 835 * If we have an active aio_cleanup_thread it's possible for 836 * this routine to push something on to the done queue after 837 * an aiowait/aiosuspend thread has already decided to block. 838 * This being the case, we need a cv_broadcast here to wake 839 * these threads up. It is simpler and cleaner to do this 840 * broadcast here than in the individual cleanup routines. 841 */ 842 843 mutex_enter(&aiop->aio_mutex); 844 cv_broadcast(&aiop->aio_waitcv); 845 mutex_exit(&aiop->aio_mutex); 846 847 /* 848 * Only if the process wasn't already signalled, 849 * determine if a SIGIO signal should be delievered. 850 */ 851 if (!signalled && 852 (func = curproc->p_user.u_signal[SIGIO - 1]) != SIG_DFL && 853 func != SIG_IGN) 854 psignal(curproc, SIGIO); 855 } 856 857 858 /* 859 * Do cleanup for every element of the port cleanup queue. 860 */ 861 static void 862 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag) 863 { 864 aio_req_t *reqp; 865 aio_req_t *next; 866 aio_req_t *headp; 867 aio_req_t *tailp; 868 869 /* first check the portq */ 870 if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) { 871 mutex_enter(&aiop->aio_mutex); 872 if (aiop->aio_flags & AIO_CLEANUP) 873 aiop->aio_flags |= AIO_CLEANUP_PORT; 874 mutex_exit(&aiop->aio_mutex); 875 876 mutex_enter(&aiop->aio_portq_mutex); 877 headp = aiop->aio_portq; 878 aiop->aio_portq = NULL; 879 mutex_exit(&aiop->aio_portq_mutex); 880 881 for (reqp = headp; reqp != NULL; reqp = next) { 882 tailp = reqp; 883 next = reqp->aio_req_next; 884 /* 885 * It is not allowed to hold locks during 886 * aphysio_unlock(). The aio_done() interrupt function 887 * will try to acquire aio_mutex and aio_portq_mutex. 888 */ 889 aphysio_unlock(reqp); 890 if (exitflag) { 891 mutex_enter(&aiop->aio_mutex); 892 aio_req_free(aiop, reqp); 893 mutex_exit(&aiop->aio_mutex); 894 } 895 } 896 897 if (headp != NULL && exitflag == 0) { 898 /* move unlocked requests back to the done queue */ 899 mutex_enter(&aiop->aio_portq_mutex); 900 if (aiop->aio_portq != NULL) { 901 tailp->aio_req_next = aiop->aio_portq; 902 aiop->aio_portq->aio_req_prev = tailp; 903 } 904 aiop->aio_portq = headp; 905 cv_broadcast(&aiop->aio_portcv); 906 mutex_exit(&aiop->aio_portq_mutex); 907 } 908 } 909 910 /* now check the port cleanup queue */ 911 for (reqp = cleanupq; reqp != NULL; reqp = next) { 912 #ifdef DEBUG 913 aio_checkset_flag(reqp, AIO_REQ_CLEAN, AIO_REQ_CLEAN); 914 #endif 915 next = reqp->aio_req_next; 916 aphysio_unlock(reqp); 917 if (exitflag) { 918 #ifdef DEBUG 919 aio_checkset_flag(reqp, AIO_REQ_CLEAN, AIO_REQ_FREE); 920 #endif 921 mutex_enter(&aiop->aio_mutex); 922 aio_req_free(aiop, reqp); 923 mutex_exit(&aiop->aio_mutex); 924 } else { 925 mutex_enter(&aiop->aio_portq_mutex); 926 aio_enq_portq(aiop, reqp, 0); 927 mutex_exit(&aiop->aio_portq_mutex); 928 (void) port_send_event(reqp->aio_req_portkev); 929 } 930 } 931 } 932 933 /* 934 * Do cleanup for every element of the cleanupq. 935 */ 936 static void 937 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg) 938 { 939 aio_req_t *reqp, *next; 940 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 941 942 /* 943 * Since aio_req_done() or aio_req_find() use the HASH list to find 944 * the required requests, they could potentially take away elements 945 * if they are already done (AIO_DONEQ is set). 946 * The aio_cleanupq_mutex protects the queue for the duration of the 947 * loop from aio_req_done() and aio_req_find(). 948 */ 949 950 qhead->aio_req_prev->aio_req_next = NULL; 951 for (reqp = qhead; reqp != NULL; reqp = next) { 952 ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ); 953 next = reqp->aio_req_next; 954 aphysio_unlock(reqp); 955 mutex_enter(&aiop->aio_mutex); 956 if (exitflg) { 957 /* 958 * reqp can't be referenced after its freed 959 */ 960 aio_req_free(aiop, reqp); 961 } else { 962 if (reqp->aio_req_portkev && 963 ((reqp->aio_req_flags & AIO_DONEQ) == 0)) { 964 aio_enq_doneq(aiop, reqp); 965 (void) port_send_event(reqp->aio_req_portkev); 966 } else { 967 aio_enq_doneq(aiop, reqp); 968 } 969 } 970 mutex_exit(&aiop->aio_mutex); 971 } 972 } 973 974 /* 975 * do cleanup for every element of the notify queue. 976 */ 977 static int 978 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg) 979 { 980 aio_req_t *reqp, *next; 981 aio_lio_t *liohead; 982 sigqueue_t *sigev, *lio_sigev = NULL; 983 int signalled = 0; 984 985 qhead->aio_req_prev->aio_req_next = NULL; 986 for (reqp = qhead; reqp != NULL; reqp = next) { 987 ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ); 988 next = reqp->aio_req_next; 989 aphysio_unlock(reqp); 990 if (exitflg) { 991 /* reqp cann't be referenced after its freed */ 992 mutex_enter(&aiop->aio_mutex); 993 aio_req_free(aiop, reqp); 994 mutex_exit(&aiop->aio_mutex); 995 continue; 996 } 997 mutex_enter(&aiop->aio_mutex); 998 aio_enq_doneq(aiop, reqp); 999 sigev = reqp->aio_req_sigqp; 1000 reqp->aio_req_sigqp = NULL; 1001 /* check if list IO completion notification is required */ 1002 if ((liohead = reqp->aio_req_lio) != NULL) { 1003 ASSERT(liohead->lio_refcnt > 0); 1004 if (--liohead->lio_refcnt == 0) { 1005 cv_signal(&liohead->lio_notify); 1006 lio_sigev = liohead->lio_sigqp; 1007 liohead->lio_sigqp = NULL; 1008 } 1009 } 1010 mutex_exit(&aiop->aio_mutex); 1011 if (sigev) { 1012 signalled++; 1013 aio_sigev_send(reqp->aio_req_buf.b_proc, sigev); 1014 } 1015 if (lio_sigev) { 1016 signalled++; 1017 aio_sigev_send(reqp->aio_req_buf.b_proc, lio_sigev); 1018 } 1019 } 1020 return (signalled); 1021 } 1022 1023 /* 1024 * Do cleanup for every element of the poll queue. 1025 */ 1026 static void 1027 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg) 1028 { 1029 aio_req_t *reqp, *next; 1030 1031 /* 1032 * As no other threads should be accessing the queue at this point, 1033 * it isn't necessary to hold aio_mutex while we traverse its elements. 1034 */ 1035 1036 qhead->aio_req_prev->aio_req_next = NULL; 1037 for (reqp = qhead; reqp != NULL; reqp = next) { 1038 ASSERT(reqp->aio_req_flags & AIO_POLLQ); 1039 next = reqp->aio_req_next; 1040 aphysio_unlock(reqp); 1041 if (exitflg) { 1042 /* reqp cann't be referenced after its freed */ 1043 mutex_enter(&aiop->aio_mutex); 1044 aio_req_free(aiop, reqp); 1045 mutex_exit(&aiop->aio_mutex); 1046 continue; 1047 } 1048 /* copy out request's result_t. */ 1049 aio_copyout_result(reqp); 1050 mutex_enter(&aiop->aio_mutex); 1051 aio_enq_doneq(aiop, reqp); 1052 mutex_exit(&aiop->aio_mutex); 1053 } 1054 } 1055 1056 /* 1057 * called by exit(). waits for all outstanding kaio to finish 1058 * before the kaio resources are freed. 1059 */ 1060 void 1061 aio_cleanup_exit(void) 1062 { 1063 proc_t *p = curproc; 1064 aio_t *aiop = p->p_aio; 1065 aio_req_t *reqp, *next, *head; 1066 aio_lio_t *nxtlio, *liop; 1067 1068 /* 1069 * wait for all outstanding kaio to complete. process 1070 * is now single-threaded; no other kaio requests can 1071 * happen once aio_pending is zero. 1072 */ 1073 mutex_enter(&aiop->aio_mutex); 1074 aiop->aio_flags |= AIO_CLEANUP; 1075 while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE)) 1076 cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex); 1077 mutex_exit(&aiop->aio_mutex); 1078 1079 /* cleanup the cleanup-thread queues. */ 1080 aio_cleanup(AIO_CLEANUP_EXIT); 1081 1082 /* 1083 * Although this process is now single-threaded, we 1084 * still need to protect ourselves against a race with 1085 * aio_cleanup_dr_delete_memory(). 1086 */ 1087 mutex_enter(&p->p_lock); 1088 1089 /* 1090 * free up the done queue's resources. 1091 */ 1092 if ((head = aiop->aio_doneq) != NULL) { 1093 head->aio_req_prev->aio_req_next = NULL; 1094 for (reqp = head; reqp != NULL; reqp = next) { 1095 next = reqp->aio_req_next; 1096 aphysio_unlock(reqp); 1097 kmem_free(reqp, sizeof (struct aio_req_t)); 1098 } 1099 } 1100 /* 1101 * release aio request freelist. 1102 */ 1103 for (reqp = aiop->aio_free; reqp != NULL; reqp = next) { 1104 next = reqp->aio_req_next; 1105 kmem_free(reqp, sizeof (struct aio_req_t)); 1106 } 1107 1108 /* 1109 * release io list head freelist. 1110 */ 1111 for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) { 1112 nxtlio = liop->lio_next; 1113 kmem_free(liop, sizeof (aio_lio_t)); 1114 } 1115 1116 if (aiop->aio_iocb) 1117 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 1118 1119 mutex_destroy(&aiop->aio_mutex); 1120 mutex_destroy(&aiop->aio_portq_mutex); 1121 mutex_destroy(&aiop->aio_cleanupq_mutex); 1122 p->p_aio = NULL; 1123 mutex_exit(&p->p_lock); 1124 kmem_free(aiop, sizeof (struct aio)); 1125 } 1126 1127 /* 1128 * copy out aio request's result to a user-level result_t buffer. 1129 */ 1130 void 1131 aio_copyout_result(aio_req_t *reqp) 1132 { 1133 struct buf *bp; 1134 struct iovec *iov; 1135 void *resultp; 1136 int error; 1137 size_t retval; 1138 1139 if (reqp->aio_req_flags & AIO_COPYOUTDONE) 1140 return; 1141 1142 reqp->aio_req_flags |= AIO_COPYOUTDONE; 1143 1144 iov = reqp->aio_req_uio.uio_iov; 1145 bp = &reqp->aio_req_buf; 1146 /* "resultp" points to user-level result_t buffer */ 1147 resultp = (void *)reqp->aio_req_resultp; 1148 if (bp->b_flags & B_ERROR) { 1149 if (bp->b_error) 1150 error = bp->b_error; 1151 else 1152 error = EIO; 1153 retval = (size_t)-1; 1154 } else { 1155 error = 0; 1156 retval = iov->iov_len - bp->b_resid; 1157 } 1158 #ifdef _SYSCALL32_IMPL 1159 if (get_udatamodel() == DATAMODEL_NATIVE) { 1160 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1161 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1162 } else { 1163 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1164 (int)retval); 1165 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1166 } 1167 #else 1168 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1169 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1170 #endif 1171 } 1172 1173 1174 void 1175 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp) 1176 { 1177 int errno; 1178 size_t retval; 1179 1180 if (bp->b_flags & B_ERROR) { 1181 if (bp->b_error) 1182 errno = bp->b_error; 1183 else 1184 errno = EIO; 1185 retval = (size_t)-1; 1186 } else { 1187 errno = 0; 1188 retval = iov->iov_len - bp->b_resid; 1189 } 1190 #ifdef _SYSCALL32_IMPL 1191 if (get_udatamodel() == DATAMODEL_NATIVE) { 1192 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1193 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1194 } else { 1195 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1196 (int)retval); 1197 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno); 1198 } 1199 #else 1200 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1201 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1202 #endif 1203 } 1204 1205 /* 1206 * This function is used to remove a request from the done queue. 1207 */ 1208 1209 void 1210 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp) 1211 { 1212 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex)); 1213 while (aiop->aio_portq == NULL) { 1214 /* 1215 * aio_portq is set to NULL when aio_cleanup_portq() 1216 * is working with the event queue. 1217 * The aio_cleanup_thread() uses aio_cleanup_portq() 1218 * to unlock all AIO buffers with completed transactions. 1219 * Wait here until aio_cleanup_portq() restores the 1220 * list of completed transactions in aio_portq. 1221 */ 1222 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1223 } 1224 if (reqp == aiop->aio_portq) { 1225 /* first request in the queue */ 1226 aiop->aio_portq = reqp->aio_req_next; 1227 } else { 1228 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 1229 if (reqp->aio_req_next) 1230 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 1231 } 1232 } 1233 1234 /* ARGSUSED */ 1235 void 1236 aio_close_port(void *arg, int port, pid_t pid, int lastclose) 1237 { 1238 aio_t *aiop; 1239 aio_req_t *reqp; 1240 aio_req_t *next; 1241 aio_req_t *headp; 1242 int counter; 1243 1244 if (arg == NULL) 1245 aiop = curproc->p_aio; 1246 else 1247 aiop = (aio_t *)arg; 1248 1249 /* 1250 * The PORT_SOURCE_AIO source is always associated with every new 1251 * created port by default. 1252 * If no asynchronous I/O transactions were associated with the port 1253 * then the aiop pointer will still be set to NULL. 1254 */ 1255 if (aiop == NULL) 1256 return; 1257 1258 /* 1259 * Within a process event ports can be used to collect events other 1260 * than PORT_SOURCE_AIO events. At the same time the process can submit 1261 * asynchronous I/Os transactions which are not associated with the 1262 * current port. 1263 * The current process oriented model of AIO uses a sigle queue for 1264 * pending events. On close the pending queue (queue of asynchronous 1265 * I/O transactions using event port notification) must be scanned 1266 * to detect and handle pending I/Os using the current port. 1267 */ 1268 mutex_enter(&aiop->aio_portq_mutex); 1269 mutex_enter(&aiop->aio_mutex); 1270 reqp = aiop->aio_portpending; 1271 for (counter = 0; reqp != NULL; reqp = reqp->aio_req_next) { 1272 if (reqp->aio_req_portkev && (reqp->aio_req_port == port)) { 1273 reqp->aio_req_flags |= AIO_CLOSE_PORT; 1274 counter++; 1275 } 1276 } 1277 if (counter == 0) { 1278 /* no AIOs pending */ 1279 mutex_exit(&aiop->aio_mutex); 1280 mutex_exit(&aiop->aio_portq_mutex); 1281 return; 1282 } 1283 aiop->aio_portpendcnt += counter; 1284 mutex_exit(&aiop->aio_mutex); 1285 while (aiop->aio_portpendcnt) 1286 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1287 1288 /* 1289 * all pending AIOs are completed. 1290 * check port doneq 1291 */ 1292 1293 reqp = aiop->aio_portq; 1294 headp = NULL; 1295 for (; reqp != NULL; reqp = next) { 1296 next = reqp->aio_req_next; 1297 if (reqp->aio_req_port == port) { 1298 /* discard event */ 1299 aio_req_remove_portq(aiop, reqp); 1300 port_free_event(reqp->aio_req_portkev); 1301 /* put request in temporary queue */ 1302 reqp->aio_req_next = headp; 1303 headp = reqp; 1304 } 1305 } 1306 mutex_exit(&aiop->aio_portq_mutex); 1307 1308 /* headp points to the list of requests to be discarded */ 1309 for (reqp = headp; reqp != NULL; reqp = next) { 1310 next = reqp->aio_req_next; 1311 aphysio_unlock(reqp); 1312 mutex_enter(&aiop->aio_mutex); 1313 aio_req_free_port(aiop, reqp); 1314 mutex_exit(&aiop->aio_mutex); 1315 } 1316 1317 if (aiop->aio_flags & AIO_CLEANUP) 1318 cv_broadcast(&aiop->aio_waitcv); 1319 } 1320 1321 /* 1322 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread 1323 * to kick start the aio_cleanup_thread for the give process to do the 1324 * necessary cleanup. 1325 * This is needed so that delete_memory_thread can obtain writer locks 1326 * on pages that need to be relocated during a dr memory delete operation, 1327 * otherwise a deadly embrace may occur. 1328 */ 1329 int 1330 aio_cleanup_dr_delete_memory(proc_t *procp) 1331 { 1332 struct aio *aiop = procp->p_aio; 1333 struct as *as = procp->p_as; 1334 int ret = 0; 1335 1336 ASSERT(MUTEX_HELD(&procp->p_lock)); 1337 1338 mutex_enter(&as->a_contents); 1339 1340 if (aiop != NULL) { 1341 aiop->aio_rqclnup = 1; 1342 cv_broadcast(&as->a_cv); 1343 ret = 1; 1344 } 1345 mutex_exit(&as->a_contents); 1346 return (ret); 1347 } 1348