1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/proc.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/cmn_err.h> 36 #include <sys/systm.h> 37 #include <vm/as.h> 38 #include <vm/page.h> 39 #include <sys/uio.h> 40 #include <sys/kmem.h> 41 #include <sys/debug.h> 42 #include <sys/aio_impl.h> 43 #include <sys/epm.h> 44 #include <sys/fs/snode.h> 45 #include <sys/siginfo.h> 46 #include <sys/cpuvar.h> 47 #include <sys/tnf_probe.h> 48 #include <sys/conf.h> 49 #include <sys/sdt.h> 50 51 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *); 52 void aio_done(struct buf *); 53 void aphysio_unlock(aio_req_t *); 54 void aio_cleanup(int); 55 void aio_cleanup_exit(void); 56 57 /* 58 * private functions 59 */ 60 static void aio_sigev_send(proc_t *, sigqueue_t *); 61 static void aio_hash_delete(aio_t *, aio_req_t *); 62 static void aio_lio_free(aio_t *, aio_lio_t *); 63 static void aio_cleanup_cleanupq(aio_t *, aio_req_t *, int); 64 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int); 65 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int); 66 static void aio_cleanup_portq(aio_t *, aio_req_t *, int); 67 68 /* 69 * async version of physio() that doesn't wait synchronously 70 * for the driver's strategy routine to complete. 71 */ 72 73 int 74 aphysio( 75 int (*strategy)(struct buf *), 76 int (*cancel)(struct buf *), 77 dev_t dev, 78 int rw, 79 void (*mincnt)(struct buf *), 80 struct aio_req *aio) 81 { 82 struct uio *uio = aio->aio_uio; 83 aio_req_t *reqp = (aio_req_t *)aio->aio_private; 84 struct buf *bp = &reqp->aio_req_buf; 85 struct iovec *iov; 86 struct as *as; 87 char *a; 88 int error; 89 size_t c; 90 struct page **pplist; 91 struct dev_ops *ops = devopsp[getmajor(dev)]; 92 93 if (uio->uio_loffset < 0) 94 return (EINVAL); 95 #ifdef _ILP32 96 /* 97 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents 98 * the maximum size that can be supported by the IO subsystem. 99 * XXX this code assumes a D_64BIT driver. 100 */ 101 if (uio->uio_loffset > SPEC_MAXOFFSET_T) 102 return (EINVAL); 103 #endif /* _ILP32 */ 104 105 TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */, 106 tnf_opaque, bp, bp, 107 tnf_device, device, dev, 108 tnf_offset, blkno, btodt(uio->uio_loffset), 109 tnf_size, size, uio->uio_iov->iov_len, 110 tnf_bioflags, rw, rw); 111 112 if (rw == B_READ) { 113 CPU_STATS_ADD_K(sys, phread, 1); 114 } else { 115 CPU_STATS_ADD_K(sys, phwrite, 1); 116 } 117 118 iov = uio->uio_iov; 119 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 120 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 121 122 bp->b_error = 0; 123 bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw; 124 bp->b_edev = dev; 125 bp->b_dev = cmpdev(dev); 126 bp->b_lblkno = btodt(uio->uio_loffset); 127 bp->b_offset = uio->uio_loffset; 128 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO, 129 (void *)bp->b_edev, (void **)&bp->b_dip); 130 131 /* 132 * Clustering: Clustering can set the b_iodone, b_forw and 133 * b_proc fields to cluster-specifc values. 134 */ 135 if (bp->b_iodone == NULL) { 136 bp->b_iodone = (int (*)()) aio_done; 137 /* b_forw points at an aio_req_t structure */ 138 bp->b_forw = (struct buf *)reqp; 139 bp->b_proc = curproc; 140 } 141 142 a = bp->b_un.b_addr = iov->iov_base; 143 c = bp->b_bcount = iov->iov_len; 144 145 (*mincnt)(bp); 146 if (bp->b_bcount != iov->iov_len) 147 return (ENOTSUP); 148 149 as = bp->b_proc->p_as; 150 151 error = as_pagelock(as, &pplist, a, 152 c, rw == B_READ? S_WRITE : S_READ); 153 if (error != 0) { 154 bp->b_flags |= B_ERROR; 155 bp->b_error = error; 156 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 157 return (error); 158 } 159 reqp->aio_req_flags |= AIO_PAGELOCKDONE; 160 bp->b_shadow = pplist; 161 if (pplist != NULL) { 162 bp->b_flags |= B_SHADOW; 163 } 164 165 if (cancel != anocancel) 166 cmn_err(CE_PANIC, 167 "aphysio: cancellation not supported, use anocancel"); 168 169 reqp->aio_req_cancel = cancel; 170 171 DTRACE_IO1(start, struct buf *, bp); 172 173 return ((*strategy)(bp)); 174 } 175 176 /*ARGSUSED*/ 177 int 178 anocancel(struct buf *bp) 179 { 180 return (ENXIO); 181 } 182 183 /* 184 * Called from biodone(). 185 * Notify process that a pending AIO has finished. 186 */ 187 188 /* 189 * Clustering: This function is made non-static as it is used 190 * by clustering s/w as contract private interface. 191 */ 192 193 void 194 aio_done(struct buf *bp) 195 { 196 proc_t *p; 197 struct as *as; 198 aio_req_t *reqp; 199 aio_lio_t *head = NULL; 200 aio_t *aiop; 201 sigqueue_t *sigev = NULL; 202 sigqueue_t *lio_sigev = NULL; 203 port_kevent_t *pkevp = NULL; 204 port_kevent_t *lio_pkevp = NULL; 205 int fd; 206 int cleanupqflag; 207 int pollqflag; 208 int portevpend; 209 void (*func)(); 210 int use_port = 0; 211 212 p = bp->b_proc; 213 reqp = (aio_req_t *)bp->b_forw; 214 fd = reqp->aio_req_fd; 215 216 TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */, 217 tnf_opaque, bp, bp, 218 tnf_device, device, bp->b_edev, 219 tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset), 220 tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len, 221 tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE))); 222 223 /* 224 * mapout earlier so that more kmem is available when aio is 225 * heavily used. bug #1262082 226 */ 227 if (bp->b_flags & B_REMAPPED) 228 bp_mapout(bp); 229 230 /* decrement fd's ref count by one, now that aio request is done. */ 231 areleasef(fd, P_FINFO(p)); 232 233 aiop = p->p_aio; 234 ASSERT(aiop != NULL); 235 236 mutex_enter(&aiop->aio_portq_mutex); 237 mutex_enter(&aiop->aio_mutex); 238 ASSERT(aiop->aio_pending > 0); 239 ASSERT(reqp->aio_req_flags & AIO_PENDING); 240 aiop->aio_pending--; 241 reqp->aio_req_flags &= ~AIO_PENDING; 242 if ((pkevp = reqp->aio_req_portkev) != NULL) { 243 /* Event port notification is desired for this transaction */ 244 if (reqp->aio_req_flags & AIO_CLOSE_PORT) { 245 /* 246 * The port is being closed and it is waiting for 247 * pending asynchronous I/O transactions to complete. 248 */ 249 portevpend = --aiop->aio_portpendcnt; 250 aio_deq(&aiop->aio_portpending, reqp); 251 aio_enq(&aiop->aio_portq, reqp, 0); 252 mutex_exit(&aiop->aio_mutex); 253 mutex_exit(&aiop->aio_portq_mutex); 254 port_send_event(pkevp); 255 if (portevpend == 0) 256 cv_broadcast(&aiop->aio_portcv); 257 return; 258 } 259 260 if (aiop->aio_flags & AIO_CLEANUP) { 261 /* 262 * aio_cleanup_thread() is waiting for completion of 263 * transactions. 264 */ 265 as = p->p_as; 266 mutex_enter(&as->a_contents); 267 aio_deq(&aiop->aio_portpending, reqp); 268 aio_enq(&aiop->aio_portcleanupq, reqp, 0); 269 cv_signal(&aiop->aio_cleanupcv); 270 mutex_exit(&as->a_contents); 271 mutex_exit(&aiop->aio_mutex); 272 mutex_exit(&aiop->aio_portq_mutex); 273 return; 274 } 275 276 aio_deq(&aiop->aio_portpending, reqp); 277 aio_enq(&aiop->aio_portq, reqp, 0); 278 279 use_port = 1; 280 } else { 281 /* 282 * when the AIO_CLEANUP flag is enabled for this 283 * process, or when the AIO_POLL bit is set for 284 * this request, special handling is required. 285 * otherwise the request is put onto the doneq. 286 */ 287 cleanupqflag = (aiop->aio_flags & AIO_CLEANUP); 288 pollqflag = (reqp->aio_req_flags & AIO_POLL); 289 if (cleanupqflag | pollqflag) { 290 291 if (cleanupqflag) { 292 as = p->p_as; 293 mutex_enter(&as->a_contents); 294 } 295 296 /* 297 * requests with their AIO_POLL bit set are put 298 * on the pollq, requests with sigevent structures 299 * or with listio heads are put on the notifyq, and 300 * the remaining requests don't require any special 301 * cleanup handling, so they're put onto the default 302 * cleanupq. 303 */ 304 if (pollqflag) 305 aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ); 306 else if (reqp->aio_req_sigqp || reqp->aio_req_lio) 307 aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ); 308 else 309 aio_enq(&aiop->aio_cleanupq, reqp, 310 AIO_CLEANUPQ); 311 312 if (cleanupqflag) { 313 cv_signal(&aiop->aio_cleanupcv); 314 mutex_exit(&as->a_contents); 315 mutex_exit(&aiop->aio_mutex); 316 mutex_exit(&aiop->aio_portq_mutex); 317 } else { 318 ASSERT(pollqflag); 319 /* block aio_cleanup_exit until we're done */ 320 aiop->aio_flags |= AIO_DONE_ACTIVE; 321 mutex_exit(&aiop->aio_mutex); 322 mutex_exit(&aiop->aio_portq_mutex); 323 /* 324 * let the cleanup processing happen from an AST 325 * set an AST on all threads in this process 326 */ 327 mutex_enter(&p->p_lock); 328 set_proc_ast(p); 329 mutex_exit(&p->p_lock); 330 mutex_enter(&aiop->aio_mutex); 331 /* wakeup anybody waiting in aiowait() */ 332 cv_broadcast(&aiop->aio_waitcv); 333 334 /* wakeup aio_cleanup_exit if needed */ 335 if (aiop->aio_flags & AIO_CLEANUP) 336 cv_signal(&aiop->aio_cleanupcv); 337 aiop->aio_flags &= ~AIO_DONE_ACTIVE; 338 mutex_exit(&aiop->aio_mutex); 339 } 340 return; 341 } 342 343 /* 344 * save req's sigevent pointer, and check its 345 * value after releasing aio_mutex lock. 346 */ 347 sigev = reqp->aio_req_sigqp; 348 reqp->aio_req_sigqp = NULL; 349 350 /* put request on done queue. */ 351 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 352 } /* portkevent */ 353 354 /* 355 * when list IO notification is enabled, a notification or 356 * signal is sent only when all entries in the list are done. 357 */ 358 if ((head = reqp->aio_req_lio) != NULL) { 359 ASSERT(head->lio_refcnt > 0); 360 if (--head->lio_refcnt == 0) { 361 /* 362 * save lio's sigevent pointer, and check 363 * its value after releasing aio_mutex lock. 364 */ 365 lio_sigev = head->lio_sigqp; 366 head->lio_sigqp = NULL; 367 cv_signal(&head->lio_notify); 368 if (head->lio_port >= 0 && 369 (lio_pkevp = head->lio_portkev) != NULL) 370 head->lio_port = -1; 371 } 372 } 373 374 /* 375 * if AIO_WAITN set then 376 * send signal only when we reached the 377 * required amount of IO's finished 378 * or when all IO's are done 379 */ 380 if (aiop->aio_flags & AIO_WAITN) { 381 if (aiop->aio_waitncnt > 0) 382 aiop->aio_waitncnt--; 383 if (aiop->aio_pending == 0 || 384 aiop->aio_waitncnt == 0) 385 cv_broadcast(&aiop->aio_waitcv); 386 } else { 387 cv_broadcast(&aiop->aio_waitcv); 388 } 389 390 mutex_exit(&aiop->aio_mutex); 391 mutex_exit(&aiop->aio_portq_mutex); 392 393 if (sigev) 394 aio_sigev_send(p, sigev); 395 else if (!use_port && head == NULL) { 396 /* 397 * Send a SIGIO signal when the process has a handler enabled. 398 */ 399 if ((func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL && 400 func != SIG_IGN) 401 psignal(p, SIGIO); 402 } 403 if (pkevp) 404 port_send_event(pkevp); 405 if (lio_sigev) 406 aio_sigev_send(p, lio_sigev); 407 if (lio_pkevp) 408 port_send_event(lio_pkevp); 409 } 410 411 /* 412 * send a queued signal to the specified process when 413 * the event signal is non-NULL. A return value of 1 414 * will indicate that a signal is queued, and 0 means that 415 * no signal was specified, nor sent. 416 */ 417 static void 418 aio_sigev_send(proc_t *p, sigqueue_t *sigev) 419 { 420 ASSERT(sigev != NULL); 421 422 mutex_enter(&p->p_lock); 423 sigaddqa(p, NULL, sigev); 424 mutex_exit(&p->p_lock); 425 } 426 427 /* 428 * special case handling for zero length requests. the aio request 429 * short circuits the normal completion path since all that's required 430 * to complete this request is to copyout a zero to the aio request's 431 * return value. 432 */ 433 void 434 aio_zerolen(aio_req_t *reqp) 435 { 436 437 struct buf *bp = &reqp->aio_req_buf; 438 439 reqp->aio_req_flags |= AIO_ZEROLEN; 440 441 bp->b_forw = (struct buf *)reqp; 442 bp->b_proc = curproc; 443 444 bp->b_resid = 0; 445 bp->b_flags = 0; 446 447 aio_done(bp); 448 } 449 450 /* 451 * unlock pages previously locked by as_pagelock 452 */ 453 void 454 aphysio_unlock(aio_req_t *reqp) 455 { 456 struct buf *bp; 457 struct iovec *iov; 458 int flags; 459 460 if (reqp->aio_req_flags & AIO_PHYSIODONE) 461 return; 462 463 reqp->aio_req_flags |= AIO_PHYSIODONE; 464 465 if (reqp->aio_req_flags & AIO_ZEROLEN) 466 return; 467 468 bp = &reqp->aio_req_buf; 469 iov = reqp->aio_req_uio.uio_iov; 470 flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ); 471 if (reqp->aio_req_flags & AIO_PAGELOCKDONE) { 472 as_pageunlock(bp->b_proc->p_as, 473 bp->b_flags & B_SHADOW ? bp->b_shadow : NULL, 474 iov->iov_base, iov->iov_len, flags); 475 reqp->aio_req_flags &= ~AIO_PAGELOCKDONE; 476 } 477 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 478 bp->b_flags |= B_DONE; 479 } 480 481 /* 482 * deletes a requests id from the hash table of outstanding io. 483 */ 484 static void 485 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp) 486 { 487 long index; 488 aio_result_t *resultp = reqp->aio_req_resultp; 489 aio_req_t *current; 490 aio_req_t **nextp; 491 492 index = AIO_HASH(resultp); 493 nextp = (aiop->aio_hash + index); 494 while ((current = *nextp) != NULL) { 495 if (current->aio_req_resultp == resultp) { 496 *nextp = current->aio_hash_next; 497 return; 498 } 499 nextp = ¤t->aio_hash_next; 500 } 501 } 502 503 /* 504 * Put a list head struct onto its free list. 505 */ 506 static void 507 aio_lio_free(aio_t *aiop, aio_lio_t *head) 508 { 509 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 510 511 if (head->lio_sigqp != NULL) 512 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 513 head->lio_next = aiop->aio_lio_free; 514 aiop->aio_lio_free = head; 515 } 516 517 /* 518 * Put a reqp onto the freelist. 519 */ 520 void 521 aio_req_free(aio_t *aiop, aio_req_t *reqp) 522 { 523 aio_lio_t *liop; 524 525 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 526 527 if (reqp->aio_req_portkev) { 528 port_free_event(reqp->aio_req_portkev); 529 reqp->aio_req_portkev = NULL; 530 } 531 532 if ((liop = reqp->aio_req_lio) != NULL) { 533 if (--liop->lio_nent == 0) 534 aio_lio_free(aiop, liop); 535 reqp->aio_req_lio = NULL; 536 } 537 if (reqp->aio_req_sigqp != NULL) { 538 kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t)); 539 reqp->aio_req_sigqp = NULL; 540 } 541 reqp->aio_req_next = aiop->aio_free; 542 reqp->aio_req_prev = NULL; 543 aiop->aio_free = reqp; 544 aiop->aio_outstanding--; 545 if (aiop->aio_outstanding == 0) 546 cv_broadcast(&aiop->aio_waitcv); 547 aio_hash_delete(aiop, reqp); 548 } 549 550 /* 551 * Put a reqp onto the freelist. 552 */ 553 void 554 aio_req_free_port(aio_t *aiop, aio_req_t *reqp) 555 { 556 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 557 558 reqp->aio_req_next = aiop->aio_free; 559 reqp->aio_req_prev = NULL; 560 aiop->aio_free = reqp; 561 aiop->aio_outstanding--; 562 aio_hash_delete(aiop, reqp); 563 } 564 565 566 /* 567 * Verify the integrity of a queue. 568 */ 569 #if defined(DEBUG) 570 static void 571 aio_verify_queue(aio_req_t *head, 572 aio_req_t *entry_present, aio_req_t *entry_missing) 573 { 574 aio_req_t *reqp; 575 int found = 0; 576 int present = 0; 577 578 if ((reqp = head) != NULL) { 579 do { 580 ASSERT(reqp->aio_req_prev->aio_req_next == reqp); 581 ASSERT(reqp->aio_req_next->aio_req_prev == reqp); 582 if (entry_present == reqp) 583 found++; 584 if (entry_missing == reqp) 585 present++; 586 } while ((reqp = reqp->aio_req_next) != head); 587 } 588 ASSERT(entry_present == NULL || found == 1); 589 ASSERT(entry_missing == NULL || present == 0); 590 } 591 #else 592 #define aio_verify_queue(x, y, z) 593 #endif 594 595 /* 596 * Put a request onto the tail of a queue. 597 */ 598 void 599 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new) 600 { 601 aio_req_t *head; 602 aio_req_t *prev; 603 604 aio_verify_queue(*qhead, NULL, reqp); 605 606 if ((head = *qhead) == NULL) { 607 reqp->aio_req_next = reqp; 608 reqp->aio_req_prev = reqp; 609 *qhead = reqp; 610 } else { 611 reqp->aio_req_next = head; 612 reqp->aio_req_prev = prev = head->aio_req_prev; 613 prev->aio_req_next = reqp; 614 head->aio_req_prev = reqp; 615 } 616 reqp->aio_req_flags |= qflg_new; 617 } 618 619 /* 620 * Remove a request from its queue. 621 */ 622 void 623 aio_deq(aio_req_t **qhead, aio_req_t *reqp) 624 { 625 aio_verify_queue(*qhead, reqp, NULL); 626 627 if (reqp->aio_req_next == reqp) { 628 *qhead = NULL; 629 } else { 630 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 631 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 632 if (*qhead == reqp) 633 *qhead = reqp->aio_req_next; 634 } 635 reqp->aio_req_next = NULL; 636 reqp->aio_req_prev = NULL; 637 } 638 639 /* 640 * concatenate a specified queue with the cleanupq. the specified 641 * queue is put onto the tail of the cleanupq. all elements on the 642 * specified queue should have their aio_req_flags field cleared. 643 */ 644 /*ARGSUSED*/ 645 void 646 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg) 647 { 648 aio_req_t *cleanupqhead, *q2tail; 649 aio_req_t *reqp = q2; 650 651 do { 652 ASSERT(reqp->aio_req_flags & qflg); 653 reqp->aio_req_flags &= ~qflg; 654 reqp->aio_req_flags |= AIO_CLEANUPQ; 655 } while ((reqp = reqp->aio_req_next) != q2); 656 657 cleanupqhead = aiop->aio_cleanupq; 658 if (cleanupqhead == NULL) 659 aiop->aio_cleanupq = q2; 660 else { 661 cleanupqhead->aio_req_prev->aio_req_next = q2; 662 q2tail = q2->aio_req_prev; 663 q2tail->aio_req_next = cleanupqhead; 664 q2->aio_req_prev = cleanupqhead->aio_req_prev; 665 cleanupqhead->aio_req_prev = q2tail; 666 } 667 } 668 669 /* 670 * cleanup aio requests that are on the per-process poll queue. 671 */ 672 void 673 aio_cleanup(int flag) 674 { 675 aio_t *aiop = curproc->p_aio; 676 aio_req_t *pollqhead, *cleanupqhead, *notifyqhead; 677 aio_req_t *cleanupport; 678 aio_req_t *portq = NULL; 679 void (*func)(); 680 int signalled = 0; 681 int qflag = 0; 682 int exitflg; 683 684 ASSERT(aiop != NULL); 685 686 if (flag == AIO_CLEANUP_EXIT) 687 exitflg = AIO_CLEANUP_EXIT; 688 else 689 exitflg = 0; 690 691 /* 692 * We need to get the aio_cleanupq_mutex because we are calling 693 * aio_cleanup_cleanupq() 694 */ 695 mutex_enter(&aiop->aio_cleanupq_mutex); 696 /* 697 * take all the requests off the cleanupq, the notifyq, 698 * and the pollq. 699 */ 700 mutex_enter(&aiop->aio_mutex); 701 if ((cleanupqhead = aiop->aio_cleanupq) != NULL) { 702 aiop->aio_cleanupq = NULL; 703 qflag++; 704 } 705 if ((notifyqhead = aiop->aio_notifyq) != NULL) { 706 aiop->aio_notifyq = NULL; 707 qflag++; 708 } 709 if ((pollqhead = aiop->aio_pollq) != NULL) { 710 aiop->aio_pollq = NULL; 711 qflag++; 712 } 713 if (flag) { 714 if ((portq = aiop->aio_portq) != NULL) 715 qflag++; 716 717 if ((cleanupport = aiop->aio_portcleanupq) != NULL) { 718 aiop->aio_portcleanupq = NULL; 719 qflag++; 720 } 721 } 722 mutex_exit(&aiop->aio_mutex); 723 724 /* 725 * return immediately if cleanupq, pollq, and 726 * notifyq are all empty. someone else must have 727 * emptied them. 728 */ 729 if (!qflag) { 730 mutex_exit(&aiop->aio_cleanupq_mutex); 731 return; 732 } 733 734 /* 735 * do cleanup for the various queues. 736 */ 737 if (cleanupqhead) 738 aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg); 739 mutex_exit(&aiop->aio_cleanupq_mutex); 740 if (notifyqhead) 741 signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg); 742 if (pollqhead) 743 aio_cleanup_pollq(aiop, pollqhead, exitflg); 744 if (flag && (cleanupport || portq)) 745 aio_cleanup_portq(aiop, cleanupport, exitflg); 746 747 if (exitflg) 748 return; 749 750 /* 751 * If we have an active aio_cleanup_thread it's possible for 752 * this routine to push something on to the done queue after 753 * an aiowait/aiosuspend thread has already decided to block. 754 * This being the case, we need a cv_broadcast here to wake 755 * these threads up. It is simpler and cleaner to do this 756 * broadcast here than in the individual cleanup routines. 757 */ 758 759 mutex_enter(&aiop->aio_mutex); 760 cv_broadcast(&aiop->aio_waitcv); 761 mutex_exit(&aiop->aio_mutex); 762 763 /* 764 * Only if the process wasn't already signalled, 765 * determine if a SIGIO signal should be delievered. 766 */ 767 if (!signalled && 768 (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL && 769 func != SIG_IGN) 770 psignal(curproc, SIGIO); 771 } 772 773 774 /* 775 * Do cleanup for every element of the port cleanup queue. 776 */ 777 static void 778 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag) 779 { 780 aio_req_t *reqp; 781 aio_req_t *next; 782 aio_req_t *headp; 783 aio_lio_t *liop; 784 785 /* first check the portq */ 786 if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) { 787 mutex_enter(&aiop->aio_mutex); 788 if (aiop->aio_flags & AIO_CLEANUP) 789 aiop->aio_flags |= AIO_CLEANUP_PORT; 790 mutex_exit(&aiop->aio_mutex); 791 792 /* 793 * It is not allowed to hold locks during aphysio_unlock(). 794 * The aio_done() interrupt function will try to acquire 795 * aio_mutex and aio_portq_mutex. Therefore we disconnect 796 * the portq list from the aiop for the duration of the 797 * aphysio_unlock() loop below. 798 */ 799 mutex_enter(&aiop->aio_portq_mutex); 800 headp = aiop->aio_portq; 801 aiop->aio_portq = NULL; 802 mutex_exit(&aiop->aio_portq_mutex); 803 if ((reqp = headp) != NULL) { 804 do { 805 next = reqp->aio_req_next; 806 aphysio_unlock(reqp); 807 if (exitflag) { 808 mutex_enter(&aiop->aio_mutex); 809 aio_req_free(aiop, reqp); 810 mutex_exit(&aiop->aio_mutex); 811 } 812 } while ((reqp = next) != headp); 813 } 814 815 if (headp != NULL && exitflag == 0) { 816 /* move unlocked requests back to the port queue */ 817 aio_req_t *newq; 818 819 mutex_enter(&aiop->aio_portq_mutex); 820 if ((newq = aiop->aio_portq) != NULL) { 821 aio_req_t *headprev = headp->aio_req_prev; 822 aio_req_t *newqprev = newq->aio_req_prev; 823 824 headp->aio_req_prev = newqprev; 825 newq->aio_req_prev = headprev; 826 headprev->aio_req_next = newq; 827 newqprev->aio_req_next = headp; 828 } 829 aiop->aio_portq = headp; 830 cv_broadcast(&aiop->aio_portcv); 831 mutex_exit(&aiop->aio_portq_mutex); 832 } 833 } 834 835 /* now check the port cleanup queue */ 836 if ((reqp = cleanupq) == NULL) 837 return; 838 do { 839 next = reqp->aio_req_next; 840 aphysio_unlock(reqp); 841 if (exitflag) { 842 mutex_enter(&aiop->aio_mutex); 843 aio_req_free(aiop, reqp); 844 mutex_exit(&aiop->aio_mutex); 845 } else { 846 mutex_enter(&aiop->aio_portq_mutex); 847 aio_enq(&aiop->aio_portq, reqp, 0); 848 mutex_exit(&aiop->aio_portq_mutex); 849 port_send_event(reqp->aio_req_portkev); 850 if ((liop = reqp->aio_req_lio) != NULL) { 851 int send_event = 0; 852 853 mutex_enter(&aiop->aio_mutex); 854 ASSERT(liop->lio_refcnt > 0); 855 if (--liop->lio_refcnt == 0) { 856 if (liop->lio_port >= 0 && 857 liop->lio_portkev) { 858 liop->lio_port = -1; 859 send_event = 1; 860 } 861 } 862 mutex_exit(&aiop->aio_mutex); 863 if (send_event) 864 port_send_event(liop->lio_portkev); 865 } 866 } 867 } while ((reqp = next) != cleanupq); 868 } 869 870 /* 871 * Do cleanup for every element of the cleanupq. 872 */ 873 static void 874 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg) 875 { 876 aio_req_t *reqp, *next; 877 878 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 879 880 /* 881 * Since aio_req_done() or aio_req_find() use the HASH list to find 882 * the required requests, they could potentially take away elements 883 * if they are already done (AIO_DONEQ is set). 884 * The aio_cleanupq_mutex protects the queue for the duration of the 885 * loop from aio_req_done() and aio_req_find(). 886 */ 887 if ((reqp = qhead) == NULL) 888 return; 889 do { 890 ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ); 891 ASSERT(reqp->aio_req_portkev == NULL); 892 next = reqp->aio_req_next; 893 aphysio_unlock(reqp); 894 mutex_enter(&aiop->aio_mutex); 895 if (exitflg) 896 aio_req_free(aiop, reqp); 897 else 898 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 899 mutex_exit(&aiop->aio_mutex); 900 } while ((reqp = next) != qhead); 901 } 902 903 /* 904 * do cleanup for every element of the notify queue. 905 */ 906 static int 907 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg) 908 { 909 aio_req_t *reqp, *next; 910 aio_lio_t *liohead; 911 sigqueue_t *sigev, *lio_sigev = NULL; 912 int signalled = 0; 913 914 if ((reqp = qhead) == NULL) 915 return (0); 916 do { 917 ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ); 918 next = reqp->aio_req_next; 919 aphysio_unlock(reqp); 920 if (exitflg) { 921 mutex_enter(&aiop->aio_mutex); 922 aio_req_free(aiop, reqp); 923 mutex_exit(&aiop->aio_mutex); 924 } else { 925 mutex_enter(&aiop->aio_mutex); 926 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 927 sigev = reqp->aio_req_sigqp; 928 reqp->aio_req_sigqp = NULL; 929 if ((liohead = reqp->aio_req_lio) != NULL) { 930 ASSERT(liohead->lio_refcnt > 0); 931 if (--liohead->lio_refcnt == 0) { 932 cv_signal(&liohead->lio_notify); 933 lio_sigev = liohead->lio_sigqp; 934 liohead->lio_sigqp = NULL; 935 } 936 } 937 mutex_exit(&aiop->aio_mutex); 938 if (sigev) { 939 signalled++; 940 aio_sigev_send(reqp->aio_req_buf.b_proc, 941 sigev); 942 } 943 if (lio_sigev) { 944 signalled++; 945 aio_sigev_send(reqp->aio_req_buf.b_proc, 946 lio_sigev); 947 } 948 } 949 } while ((reqp = next) != qhead); 950 951 return (signalled); 952 } 953 954 /* 955 * Do cleanup for every element of the poll queue. 956 */ 957 static void 958 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg) 959 { 960 aio_req_t *reqp, *next; 961 962 /* 963 * As no other threads should be accessing the queue at this point, 964 * it isn't necessary to hold aio_mutex while we traverse its elements. 965 */ 966 if ((reqp = qhead) == NULL) 967 return; 968 do { 969 ASSERT(reqp->aio_req_flags & AIO_POLLQ); 970 next = reqp->aio_req_next; 971 aphysio_unlock(reqp); 972 if (exitflg) { 973 mutex_enter(&aiop->aio_mutex); 974 aio_req_free(aiop, reqp); 975 mutex_exit(&aiop->aio_mutex); 976 } else { 977 aio_copyout_result(reqp); 978 mutex_enter(&aiop->aio_mutex); 979 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 980 mutex_exit(&aiop->aio_mutex); 981 } 982 } while ((reqp = next) != qhead); 983 } 984 985 /* 986 * called by exit(). waits for all outstanding kaio to finish 987 * before the kaio resources are freed. 988 */ 989 void 990 aio_cleanup_exit(void) 991 { 992 proc_t *p = curproc; 993 aio_t *aiop = p->p_aio; 994 aio_req_t *reqp, *next, *head; 995 aio_lio_t *nxtlio, *liop; 996 997 /* 998 * wait for all outstanding kaio to complete. process 999 * is now single-threaded; no other kaio requests can 1000 * happen once aio_pending is zero. 1001 */ 1002 mutex_enter(&aiop->aio_mutex); 1003 aiop->aio_flags |= AIO_CLEANUP; 1004 while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE)) 1005 cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex); 1006 mutex_exit(&aiop->aio_mutex); 1007 1008 /* cleanup the cleanup-thread queues. */ 1009 aio_cleanup(AIO_CLEANUP_EXIT); 1010 1011 /* 1012 * Although this process is now single-threaded, we 1013 * still need to protect ourselves against a race with 1014 * aio_cleanup_dr_delete_memory(). 1015 */ 1016 mutex_enter(&p->p_lock); 1017 1018 /* 1019 * free up the done queue's resources. 1020 */ 1021 if ((head = aiop->aio_doneq) != NULL) { 1022 aiop->aio_doneq = NULL; 1023 reqp = head; 1024 do { 1025 next = reqp->aio_req_next; 1026 aphysio_unlock(reqp); 1027 kmem_free(reqp, sizeof (struct aio_req_t)); 1028 } while ((reqp = next) != head); 1029 } 1030 /* 1031 * release aio request freelist. 1032 */ 1033 for (reqp = aiop->aio_free; reqp != NULL; reqp = next) { 1034 next = reqp->aio_req_next; 1035 kmem_free(reqp, sizeof (struct aio_req_t)); 1036 } 1037 1038 /* 1039 * release io list head freelist. 1040 */ 1041 for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) { 1042 nxtlio = liop->lio_next; 1043 kmem_free(liop, sizeof (aio_lio_t)); 1044 } 1045 1046 if (aiop->aio_iocb) 1047 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 1048 1049 mutex_destroy(&aiop->aio_mutex); 1050 mutex_destroy(&aiop->aio_portq_mutex); 1051 mutex_destroy(&aiop->aio_cleanupq_mutex); 1052 p->p_aio = NULL; 1053 mutex_exit(&p->p_lock); 1054 kmem_free(aiop, sizeof (struct aio)); 1055 } 1056 1057 /* 1058 * copy out aio request's result to a user-level result_t buffer. 1059 */ 1060 void 1061 aio_copyout_result(aio_req_t *reqp) 1062 { 1063 struct buf *bp; 1064 struct iovec *iov; 1065 void *resultp; 1066 int error; 1067 size_t retval; 1068 1069 if (reqp->aio_req_flags & AIO_COPYOUTDONE) 1070 return; 1071 1072 reqp->aio_req_flags |= AIO_COPYOUTDONE; 1073 1074 iov = reqp->aio_req_uio.uio_iov; 1075 bp = &reqp->aio_req_buf; 1076 /* "resultp" points to user-level result_t buffer */ 1077 resultp = (void *)reqp->aio_req_resultp; 1078 if (bp->b_flags & B_ERROR) { 1079 if (bp->b_error) 1080 error = bp->b_error; 1081 else 1082 error = EIO; 1083 retval = (size_t)-1; 1084 } else { 1085 error = 0; 1086 retval = iov->iov_len - bp->b_resid; 1087 } 1088 #ifdef _SYSCALL32_IMPL 1089 if (get_udatamodel() == DATAMODEL_NATIVE) { 1090 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1091 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1092 } else { 1093 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1094 (int)retval); 1095 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1096 } 1097 #else 1098 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1099 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1100 #endif 1101 } 1102 1103 1104 void 1105 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp) 1106 { 1107 int errno; 1108 size_t retval; 1109 1110 if (bp->b_flags & B_ERROR) { 1111 if (bp->b_error) 1112 errno = bp->b_error; 1113 else 1114 errno = EIO; 1115 retval = (size_t)-1; 1116 } else { 1117 errno = 0; 1118 retval = iov->iov_len - bp->b_resid; 1119 } 1120 #ifdef _SYSCALL32_IMPL 1121 if (get_udatamodel() == DATAMODEL_NATIVE) { 1122 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1123 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1124 } else { 1125 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1126 (int)retval); 1127 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno); 1128 } 1129 #else 1130 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1131 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1132 #endif 1133 } 1134 1135 /* 1136 * This function is used to remove a request from the done queue. 1137 */ 1138 1139 void 1140 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp) 1141 { 1142 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex)); 1143 while (aiop->aio_portq == NULL) { 1144 /* 1145 * aio_portq is set to NULL when aio_cleanup_portq() 1146 * is working with the event queue. 1147 * The aio_cleanup_thread() uses aio_cleanup_portq() 1148 * to unlock all AIO buffers with completed transactions. 1149 * Wait here until aio_cleanup_portq() restores the 1150 * list of completed transactions in aio_portq. 1151 */ 1152 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1153 } 1154 aio_deq(&aiop->aio_portq, reqp); 1155 } 1156 1157 /* ARGSUSED */ 1158 void 1159 aio_close_port(void *arg, int port, pid_t pid, int lastclose) 1160 { 1161 aio_t *aiop; 1162 aio_req_t *reqp; 1163 aio_req_t *next; 1164 aio_req_t *headp; 1165 int counter; 1166 1167 if (arg == NULL) 1168 aiop = curproc->p_aio; 1169 else 1170 aiop = (aio_t *)arg; 1171 1172 /* 1173 * The PORT_SOURCE_AIO source is always associated with every new 1174 * created port by default. 1175 * If no asynchronous I/O transactions were associated with the port 1176 * then the aiop pointer will still be set to NULL. 1177 */ 1178 if (aiop == NULL) 1179 return; 1180 1181 /* 1182 * Within a process event ports can be used to collect events other 1183 * than PORT_SOURCE_AIO events. At the same time the process can submit 1184 * asynchronous I/Os transactions which are not associated with the 1185 * current port. 1186 * The current process oriented model of AIO uses a sigle queue for 1187 * pending events. On close the pending queue (queue of asynchronous 1188 * I/O transactions using event port notification) must be scanned 1189 * to detect and handle pending I/Os using the current port. 1190 */ 1191 mutex_enter(&aiop->aio_portq_mutex); 1192 mutex_enter(&aiop->aio_mutex); 1193 counter = 0; 1194 if ((headp = aiop->aio_portpending) != NULL) { 1195 reqp = headp; 1196 do { 1197 if (reqp->aio_req_portkev && 1198 reqp->aio_req_port == port) { 1199 reqp->aio_req_flags |= AIO_CLOSE_PORT; 1200 counter++; 1201 } 1202 } while ((reqp = reqp->aio_req_next) != headp); 1203 } 1204 if (counter == 0) { 1205 /* no AIOs pending */ 1206 mutex_exit(&aiop->aio_mutex); 1207 mutex_exit(&aiop->aio_portq_mutex); 1208 return; 1209 } 1210 aiop->aio_portpendcnt += counter; 1211 mutex_exit(&aiop->aio_mutex); 1212 while (aiop->aio_portpendcnt) 1213 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1214 1215 /* 1216 * all pending AIOs are completed. 1217 * check port doneq 1218 */ 1219 headp = NULL; 1220 if ((reqp = aiop->aio_portq) != NULL) { 1221 do { 1222 next = reqp->aio_req_next; 1223 if (reqp->aio_req_port == port) { 1224 /* dequeue request and discard event */ 1225 aio_req_remove_portq(aiop, reqp); 1226 port_free_event(reqp->aio_req_portkev); 1227 /* put request in temporary queue */ 1228 reqp->aio_req_next = headp; 1229 headp = reqp; 1230 } 1231 } while ((reqp = next) != aiop->aio_portq); 1232 } 1233 mutex_exit(&aiop->aio_portq_mutex); 1234 1235 /* headp points to the list of requests to be discarded */ 1236 for (reqp = headp; reqp != NULL; reqp = next) { 1237 next = reqp->aio_req_next; 1238 aphysio_unlock(reqp); 1239 mutex_enter(&aiop->aio_mutex); 1240 aio_req_free_port(aiop, reqp); 1241 mutex_exit(&aiop->aio_mutex); 1242 } 1243 1244 if (aiop->aio_flags & AIO_CLEANUP) 1245 cv_broadcast(&aiop->aio_waitcv); 1246 } 1247 1248 /* 1249 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread 1250 * to kick start the aio_cleanup_thread for the give process to do the 1251 * necessary cleanup. 1252 * This is needed so that delete_memory_thread can obtain writer locks 1253 * on pages that need to be relocated during a dr memory delete operation, 1254 * otherwise a deadly embrace may occur. 1255 */ 1256 int 1257 aio_cleanup_dr_delete_memory(proc_t *procp) 1258 { 1259 struct aio *aiop = procp->p_aio; 1260 struct as *as = procp->p_as; 1261 int ret = 0; 1262 1263 ASSERT(MUTEX_HELD(&procp->p_lock)); 1264 1265 mutex_enter(&as->a_contents); 1266 1267 if (aiop != NULL) { 1268 aiop->aio_rqclnup = 1; 1269 cv_broadcast(&as->a_cv); 1270 ret = 1; 1271 } 1272 mutex_exit(&as->a_contents); 1273 return (ret); 1274 } 1275