1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/proc.h> 29 #include <sys/file.h> 30 #include <sys/errno.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <sys/cmn_err.h> 34 #include <sys/systm.h> 35 #include <vm/as.h> 36 #include <vm/page.h> 37 #include <sys/uio.h> 38 #include <sys/kmem.h> 39 #include <sys/debug.h> 40 #include <sys/aio_impl.h> 41 #include <sys/epm.h> 42 #include <sys/fs/snode.h> 43 #include <sys/siginfo.h> 44 #include <sys/cpuvar.h> 45 #include <sys/tnf_probe.h> 46 #include <sys/conf.h> 47 #include <sys/sdt.h> 48 49 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *); 50 void aio_done(struct buf *); 51 void aphysio_unlock(aio_req_t *); 52 void aio_cleanup(int); 53 void aio_cleanup_exit(void); 54 55 /* 56 * private functions 57 */ 58 static void aio_sigev_send(proc_t *, sigqueue_t *); 59 static void aio_hash_delete(aio_t *, aio_req_t *); 60 static void aio_lio_free(aio_t *, aio_lio_t *); 61 static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int); 62 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int); 63 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int); 64 static void aio_cleanup_portq(aio_t *, aio_req_t *, int); 65 66 /* 67 * async version of physio() that doesn't wait synchronously 68 * for the driver's strategy routine to complete. 69 */ 70 71 int 72 aphysio( 73 int (*strategy)(struct buf *), 74 int (*cancel)(struct buf *), 75 dev_t dev, 76 int rw, 77 void (*mincnt)(struct buf *), 78 struct aio_req *aio) 79 { 80 struct uio *uio = aio->aio_uio; 81 aio_req_t *reqp = (aio_req_t *)aio->aio_private; 82 struct buf *bp = &reqp->aio_req_buf; 83 struct iovec *iov; 84 struct as *as; 85 char *a; 86 int error; 87 size_t c; 88 struct page **pplist; 89 struct dev_ops *ops = devopsp[getmajor(dev)]; 90 91 if (uio->uio_loffset < 0) 92 return (EINVAL); 93 #ifdef _ILP32 94 /* 95 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents 96 * the maximum size that can be supported by the IO subsystem. 97 * XXX this code assumes a D_64BIT driver. 98 */ 99 if (uio->uio_loffset > SPEC_MAXOFFSET_T) 100 return (EINVAL); 101 #endif /* _ILP32 */ 102 103 TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */, 104 tnf_opaque, bp, bp, 105 tnf_device, device, dev, 106 tnf_offset, blkno, btodt(uio->uio_loffset), 107 tnf_size, size, uio->uio_iov->iov_len, 108 tnf_bioflags, rw, rw); 109 110 if (rw == B_READ) { 111 CPU_STATS_ADD_K(sys, phread, 1); 112 } else { 113 CPU_STATS_ADD_K(sys, phwrite, 1); 114 } 115 116 iov = uio->uio_iov; 117 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 118 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 119 120 bp->b_error = 0; 121 bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw; 122 bp->b_edev = dev; 123 bp->b_dev = cmpdev(dev); 124 bp->b_lblkno = btodt(uio->uio_loffset); 125 bp->b_offset = uio->uio_loffset; 126 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO, 127 (void *)bp->b_edev, (void **)&bp->b_dip); 128 129 /* 130 * Clustering: Clustering can set the b_iodone, b_forw and 131 * b_proc fields to cluster-specifc values. 132 */ 133 if (bp->b_iodone == NULL) { 134 bp->b_iodone = (int (*)()) aio_done; 135 /* b_forw points at an aio_req_t structure */ 136 bp->b_forw = (struct buf *)reqp; 137 bp->b_proc = curproc; 138 } 139 140 a = bp->b_un.b_addr = iov->iov_base; 141 c = bp->b_bcount = iov->iov_len; 142 143 (*mincnt)(bp); 144 if (bp->b_bcount != iov->iov_len) 145 return (ENOTSUP); 146 147 as = bp->b_proc->p_as; 148 149 error = as_pagelock(as, &pplist, a, 150 c, rw == B_READ? S_WRITE : S_READ); 151 if (error != 0) { 152 bp->b_flags |= B_ERROR; 153 bp->b_error = error; 154 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 155 return (error); 156 } 157 reqp->aio_req_flags |= AIO_PAGELOCKDONE; 158 bp->b_shadow = pplist; 159 if (pplist != NULL) { 160 bp->b_flags |= B_SHADOW; 161 } 162 163 if (cancel != anocancel) 164 cmn_err(CE_PANIC, 165 "aphysio: cancellation not supported, use anocancel"); 166 167 reqp->aio_req_cancel = cancel; 168 169 DTRACE_IO1(start, struct buf *, bp); 170 171 return ((*strategy)(bp)); 172 } 173 174 /*ARGSUSED*/ 175 int 176 anocancel(struct buf *bp) 177 { 178 return (ENXIO); 179 } 180 181 /* 182 * Called from biodone(). 183 * Notify process that a pending AIO has finished. 184 */ 185 186 /* 187 * Clustering: This function is made non-static as it is used 188 * by clustering s/w as contract private interface. 189 */ 190 191 void 192 aio_done(struct buf *bp) 193 { 194 proc_t *p; 195 struct as *as; 196 aio_req_t *reqp; 197 aio_lio_t *head = NULL; 198 aio_t *aiop; 199 sigqueue_t *sigev = NULL; 200 sigqueue_t *lio_sigev = NULL; 201 port_kevent_t *pkevp = NULL; 202 port_kevent_t *lio_pkevp = NULL; 203 int fd; 204 int cleanupqflag; 205 int pollqflag; 206 int portevpend; 207 void (*func)(); 208 int use_port = 0; 209 int reqp_flags = 0; 210 211 p = bp->b_proc; 212 as = p->p_as; 213 reqp = (aio_req_t *)bp->b_forw; 214 fd = reqp->aio_req_fd; 215 216 TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */, 217 tnf_opaque, bp, bp, 218 tnf_device, device, bp->b_edev, 219 tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset), 220 tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len, 221 tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE))); 222 223 /* 224 * mapout earlier so that more kmem is available when aio is 225 * heavily used. bug #1262082 226 */ 227 if (bp->b_flags & B_REMAPPED) 228 bp_mapout(bp); 229 230 /* decrement fd's ref count by one, now that aio request is done. */ 231 areleasef(fd, P_FINFO(p)); 232 233 aiop = p->p_aio; 234 ASSERT(aiop != NULL); 235 236 mutex_enter(&aiop->aio_portq_mutex); 237 mutex_enter(&aiop->aio_mutex); 238 ASSERT(aiop->aio_pending > 0); 239 ASSERT(reqp->aio_req_flags & AIO_PENDING); 240 aiop->aio_pending--; 241 reqp->aio_req_flags &= ~AIO_PENDING; 242 reqp_flags = reqp->aio_req_flags; 243 if ((pkevp = reqp->aio_req_portkev) != NULL) { 244 /* Event port notification is desired for this transaction */ 245 if (reqp->aio_req_flags & AIO_CLOSE_PORT) { 246 /* 247 * The port is being closed and it is waiting for 248 * pending asynchronous I/O transactions to complete. 249 */ 250 portevpend = --aiop->aio_portpendcnt; 251 aio_deq(&aiop->aio_portpending, reqp); 252 aio_enq(&aiop->aio_portq, reqp, 0); 253 mutex_exit(&aiop->aio_mutex); 254 mutex_exit(&aiop->aio_portq_mutex); 255 port_send_event(pkevp); 256 if (portevpend == 0) 257 cv_broadcast(&aiop->aio_portcv); 258 return; 259 } 260 261 if (aiop->aio_flags & AIO_CLEANUP) { 262 /* 263 * aio_cleanup_thread() is waiting for completion of 264 * transactions. 265 */ 266 mutex_enter(&as->a_contents); 267 aio_deq(&aiop->aio_portpending, reqp); 268 aio_enq(&aiop->aio_portcleanupq, reqp, 0); 269 cv_signal(&aiop->aio_cleanupcv); 270 mutex_exit(&as->a_contents); 271 mutex_exit(&aiop->aio_mutex); 272 mutex_exit(&aiop->aio_portq_mutex); 273 return; 274 } 275 276 aio_deq(&aiop->aio_portpending, reqp); 277 aio_enq(&aiop->aio_portq, reqp, 0); 278 279 use_port = 1; 280 } else { 281 /* 282 * when the AIO_CLEANUP flag is enabled for this 283 * process, or when the AIO_POLL bit is set for 284 * this request, special handling is required. 285 * otherwise the request is put onto the doneq. 286 */ 287 cleanupqflag = (aiop->aio_flags & AIO_CLEANUP); 288 pollqflag = (reqp->aio_req_flags & AIO_POLL); 289 if (cleanupqflag | pollqflag) { 290 291 if (cleanupqflag) 292 mutex_enter(&as->a_contents); 293 294 /* 295 * requests with their AIO_POLL bit set are put 296 * on the pollq, requests with sigevent structures 297 * or with listio heads are put on the notifyq, and 298 * the remaining requests don't require any special 299 * cleanup handling, so they're put onto the default 300 * cleanupq. 301 */ 302 if (pollqflag) 303 aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ); 304 else if (reqp->aio_req_sigqp || reqp->aio_req_lio) 305 aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ); 306 else 307 aio_enq(&aiop->aio_cleanupq, reqp, 308 AIO_CLEANUPQ); 309 310 if (cleanupqflag) { 311 cv_signal(&aiop->aio_cleanupcv); 312 mutex_exit(&as->a_contents); 313 mutex_exit(&aiop->aio_mutex); 314 mutex_exit(&aiop->aio_portq_mutex); 315 } else { 316 ASSERT(pollqflag); 317 /* block aio_cleanup_exit until we're done */ 318 aiop->aio_flags |= AIO_DONE_ACTIVE; 319 mutex_exit(&aiop->aio_mutex); 320 mutex_exit(&aiop->aio_portq_mutex); 321 /* 322 * let the cleanup processing happen from an AST 323 * set an AST on all threads in this process 324 */ 325 mutex_enter(&p->p_lock); 326 set_proc_ast(p); 327 mutex_exit(&p->p_lock); 328 mutex_enter(&aiop->aio_mutex); 329 /* wakeup anybody waiting in aiowait() */ 330 cv_broadcast(&aiop->aio_waitcv); 331 332 /* wakeup aio_cleanup_exit if needed */ 333 if (aiop->aio_flags & AIO_CLEANUP) 334 cv_signal(&aiop->aio_cleanupcv); 335 aiop->aio_flags &= ~AIO_DONE_ACTIVE; 336 mutex_exit(&aiop->aio_mutex); 337 } 338 return; 339 } 340 341 /* 342 * save req's sigevent pointer, and check its 343 * value after releasing aio_mutex lock. 344 */ 345 sigev = reqp->aio_req_sigqp; 346 reqp->aio_req_sigqp = NULL; 347 348 /* put request on done queue. */ 349 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 350 } /* portkevent */ 351 352 /* 353 * when list IO notification is enabled, a notification or 354 * signal is sent only when all entries in the list are done. 355 */ 356 if ((head = reqp->aio_req_lio) != NULL) { 357 ASSERT(head->lio_refcnt > 0); 358 if (--head->lio_refcnt == 0) { 359 /* 360 * save lio's sigevent pointer, and check 361 * its value after releasing aio_mutex lock. 362 */ 363 lio_sigev = head->lio_sigqp; 364 head->lio_sigqp = NULL; 365 cv_signal(&head->lio_notify); 366 if (head->lio_port >= 0 && 367 (lio_pkevp = head->lio_portkev) != NULL) 368 head->lio_port = -1; 369 } 370 } 371 372 /* 373 * if AIO_WAITN set then 374 * send signal only when we reached the 375 * required amount of IO's finished 376 * or when all IO's are done 377 */ 378 if (aiop->aio_flags & AIO_WAITN) { 379 if (aiop->aio_waitncnt > 0) 380 aiop->aio_waitncnt--; 381 if (aiop->aio_pending == 0 || 382 aiop->aio_waitncnt == 0) 383 cv_broadcast(&aiop->aio_waitcv); 384 } else { 385 cv_broadcast(&aiop->aio_waitcv); 386 } 387 388 mutex_exit(&aiop->aio_mutex); 389 mutex_exit(&aiop->aio_portq_mutex); 390 391 /* 392 * Could the cleanup thread be waiting for AIO with locked 393 * resources to finish? 394 * Ideally in that case cleanup thread should block on cleanupcv, 395 * but there is a window, where it could miss to see a new aio 396 * request that sneaked in. 397 */ 398 mutex_enter(&as->a_contents); 399 if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as)) 400 cv_broadcast(&as->a_cv); 401 mutex_exit(&as->a_contents); 402 403 if (sigev) 404 aio_sigev_send(p, sigev); 405 else if (!use_port && head == NULL) { 406 /* 407 * Send a SIGIO signal when the process has a handler enabled. 408 */ 409 if ((func = PTOU(p)->u_signal[SIGIO - 1]) != 410 SIG_DFL && (func != SIG_IGN)) { 411 psignal(p, SIGIO); 412 mutex_enter(&aiop->aio_mutex); 413 reqp->aio_req_flags |= AIO_SIGNALLED; 414 mutex_exit(&aiop->aio_mutex); 415 } 416 } 417 if (pkevp) 418 port_send_event(pkevp); 419 if (lio_sigev) 420 aio_sigev_send(p, lio_sigev); 421 if (lio_pkevp) 422 port_send_event(lio_pkevp); 423 } 424 425 /* 426 * send a queued signal to the specified process when 427 * the event signal is non-NULL. A return value of 1 428 * will indicate that a signal is queued, and 0 means that 429 * no signal was specified, nor sent. 430 */ 431 static void 432 aio_sigev_send(proc_t *p, sigqueue_t *sigev) 433 { 434 ASSERT(sigev != NULL); 435 436 mutex_enter(&p->p_lock); 437 sigaddqa(p, NULL, sigev); 438 mutex_exit(&p->p_lock); 439 } 440 441 /* 442 * special case handling for zero length requests. the aio request 443 * short circuits the normal completion path since all that's required 444 * to complete this request is to copyout a zero to the aio request's 445 * return value. 446 */ 447 void 448 aio_zerolen(aio_req_t *reqp) 449 { 450 451 struct buf *bp = &reqp->aio_req_buf; 452 453 reqp->aio_req_flags |= AIO_ZEROLEN; 454 455 bp->b_forw = (struct buf *)reqp; 456 bp->b_proc = curproc; 457 458 bp->b_resid = 0; 459 bp->b_flags = 0; 460 461 aio_done(bp); 462 } 463 464 /* 465 * unlock pages previously locked by as_pagelock 466 */ 467 void 468 aphysio_unlock(aio_req_t *reqp) 469 { 470 struct buf *bp; 471 struct iovec *iov; 472 int flags; 473 474 if (reqp->aio_req_flags & AIO_PHYSIODONE) 475 return; 476 477 reqp->aio_req_flags |= AIO_PHYSIODONE; 478 479 if (reqp->aio_req_flags & AIO_ZEROLEN) 480 return; 481 482 bp = &reqp->aio_req_buf; 483 iov = reqp->aio_req_uio.uio_iov; 484 flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ); 485 if (reqp->aio_req_flags & AIO_PAGELOCKDONE) { 486 as_pageunlock(bp->b_proc->p_as, 487 bp->b_flags & B_SHADOW ? bp->b_shadow : NULL, 488 iov->iov_base, iov->iov_len, flags); 489 reqp->aio_req_flags &= ~AIO_PAGELOCKDONE; 490 } 491 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 492 bp->b_flags |= B_DONE; 493 } 494 495 /* 496 * deletes a requests id from the hash table of outstanding io. 497 */ 498 static void 499 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp) 500 { 501 long index; 502 aio_result_t *resultp = reqp->aio_req_resultp; 503 aio_req_t *current; 504 aio_req_t **nextp; 505 506 index = AIO_HASH(resultp); 507 nextp = (aiop->aio_hash + index); 508 while ((current = *nextp) != NULL) { 509 if (current->aio_req_resultp == resultp) { 510 *nextp = current->aio_hash_next; 511 return; 512 } 513 nextp = ¤t->aio_hash_next; 514 } 515 } 516 517 /* 518 * Put a list head struct onto its free list. 519 */ 520 static void 521 aio_lio_free(aio_t *aiop, aio_lio_t *head) 522 { 523 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 524 525 if (head->lio_sigqp != NULL) 526 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 527 head->lio_next = aiop->aio_lio_free; 528 aiop->aio_lio_free = head; 529 } 530 531 /* 532 * Put a reqp onto the freelist. 533 */ 534 void 535 aio_req_free(aio_t *aiop, aio_req_t *reqp) 536 { 537 aio_lio_t *liop; 538 539 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 540 541 if (reqp->aio_req_portkev) { 542 port_free_event(reqp->aio_req_portkev); 543 reqp->aio_req_portkev = NULL; 544 } 545 546 if ((liop = reqp->aio_req_lio) != NULL) { 547 if (--liop->lio_nent == 0) 548 aio_lio_free(aiop, liop); 549 reqp->aio_req_lio = NULL; 550 } 551 if (reqp->aio_req_sigqp != NULL) { 552 kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t)); 553 reqp->aio_req_sigqp = NULL; 554 } 555 reqp->aio_req_next = aiop->aio_free; 556 reqp->aio_req_prev = NULL; 557 aiop->aio_free = reqp; 558 aiop->aio_outstanding--; 559 if (aiop->aio_outstanding == 0) 560 cv_broadcast(&aiop->aio_waitcv); 561 aio_hash_delete(aiop, reqp); 562 } 563 564 /* 565 * Put a reqp onto the freelist. 566 */ 567 void 568 aio_req_free_port(aio_t *aiop, aio_req_t *reqp) 569 { 570 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 571 572 reqp->aio_req_next = aiop->aio_free; 573 reqp->aio_req_prev = NULL; 574 aiop->aio_free = reqp; 575 aiop->aio_outstanding--; 576 aio_hash_delete(aiop, reqp); 577 } 578 579 580 /* 581 * Verify the integrity of a queue. 582 */ 583 #if defined(DEBUG) 584 static void 585 aio_verify_queue(aio_req_t *head, 586 aio_req_t *entry_present, aio_req_t *entry_missing) 587 { 588 aio_req_t *reqp; 589 int found = 0; 590 int present = 0; 591 592 if ((reqp = head) != NULL) { 593 do { 594 ASSERT(reqp->aio_req_prev->aio_req_next == reqp); 595 ASSERT(reqp->aio_req_next->aio_req_prev == reqp); 596 if (entry_present == reqp) 597 found++; 598 if (entry_missing == reqp) 599 present++; 600 } while ((reqp = reqp->aio_req_next) != head); 601 } 602 ASSERT(entry_present == NULL || found == 1); 603 ASSERT(entry_missing == NULL || present == 0); 604 } 605 #else 606 #define aio_verify_queue(x, y, z) 607 #endif 608 609 /* 610 * Put a request onto the tail of a queue. 611 */ 612 void 613 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new) 614 { 615 aio_req_t *head; 616 aio_req_t *prev; 617 618 aio_verify_queue(*qhead, NULL, reqp); 619 620 if ((head = *qhead) == NULL) { 621 reqp->aio_req_next = reqp; 622 reqp->aio_req_prev = reqp; 623 *qhead = reqp; 624 } else { 625 reqp->aio_req_next = head; 626 reqp->aio_req_prev = prev = head->aio_req_prev; 627 prev->aio_req_next = reqp; 628 head->aio_req_prev = reqp; 629 } 630 reqp->aio_req_flags |= qflg_new; 631 } 632 633 /* 634 * Remove a request from its queue. 635 */ 636 void 637 aio_deq(aio_req_t **qhead, aio_req_t *reqp) 638 { 639 aio_verify_queue(*qhead, reqp, NULL); 640 641 if (reqp->aio_req_next == reqp) { 642 *qhead = NULL; 643 } else { 644 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 645 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 646 if (*qhead == reqp) 647 *qhead = reqp->aio_req_next; 648 } 649 reqp->aio_req_next = NULL; 650 reqp->aio_req_prev = NULL; 651 } 652 653 /* 654 * concatenate a specified queue with the cleanupq. the specified 655 * queue is put onto the tail of the cleanupq. all elements on the 656 * specified queue should have their aio_req_flags field cleared. 657 */ 658 /*ARGSUSED*/ 659 void 660 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg) 661 { 662 aio_req_t *cleanupqhead, *q2tail; 663 aio_req_t *reqp = q2; 664 665 do { 666 ASSERT(reqp->aio_req_flags & qflg); 667 reqp->aio_req_flags &= ~qflg; 668 reqp->aio_req_flags |= AIO_CLEANUPQ; 669 } while ((reqp = reqp->aio_req_next) != q2); 670 671 cleanupqhead = aiop->aio_cleanupq; 672 if (cleanupqhead == NULL) 673 aiop->aio_cleanupq = q2; 674 else { 675 cleanupqhead->aio_req_prev->aio_req_next = q2; 676 q2tail = q2->aio_req_prev; 677 q2tail->aio_req_next = cleanupqhead; 678 q2->aio_req_prev = cleanupqhead->aio_req_prev; 679 cleanupqhead->aio_req_prev = q2tail; 680 } 681 } 682 683 /* 684 * cleanup aio requests that are on the per-process poll queue. 685 */ 686 void 687 aio_cleanup(int flag) 688 { 689 aio_t *aiop = curproc->p_aio; 690 aio_req_t *pollqhead, *cleanupqhead, *notifyqhead; 691 aio_req_t *cleanupport; 692 aio_req_t *portq = NULL; 693 void (*func)(); 694 int signalled = 0; 695 int qflag = 0; 696 int exitflg; 697 698 ASSERT(aiop != NULL); 699 700 if (flag == AIO_CLEANUP_EXIT) 701 exitflg = AIO_CLEANUP_EXIT; 702 else 703 exitflg = 0; 704 705 /* 706 * We need to get the aio_cleanupq_mutex because we are calling 707 * aio_cleanup_cleanupq() 708 */ 709 mutex_enter(&aiop->aio_cleanupq_mutex); 710 /* 711 * take all the requests off the cleanupq, the notifyq, 712 * and the pollq. 713 */ 714 mutex_enter(&aiop->aio_mutex); 715 if ((cleanupqhead = aiop->aio_cleanupq) != NULL) { 716 aiop->aio_cleanupq = NULL; 717 qflag++; 718 } 719 if ((notifyqhead = aiop->aio_notifyq) != NULL) { 720 aiop->aio_notifyq = NULL; 721 qflag++; 722 } 723 if ((pollqhead = aiop->aio_pollq) != NULL) { 724 aiop->aio_pollq = NULL; 725 qflag++; 726 } 727 if (flag) { 728 if ((portq = aiop->aio_portq) != NULL) 729 qflag++; 730 731 if ((cleanupport = aiop->aio_portcleanupq) != NULL) { 732 aiop->aio_portcleanupq = NULL; 733 qflag++; 734 } 735 } 736 mutex_exit(&aiop->aio_mutex); 737 738 /* 739 * return immediately if cleanupq, pollq, and 740 * notifyq are all empty. someone else must have 741 * emptied them. 742 */ 743 if (!qflag) { 744 mutex_exit(&aiop->aio_cleanupq_mutex); 745 return; 746 } 747 748 /* 749 * do cleanup for the various queues. 750 */ 751 if (cleanupqhead) 752 signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg); 753 mutex_exit(&aiop->aio_cleanupq_mutex); 754 if (notifyqhead) 755 signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg); 756 if (pollqhead) 757 aio_cleanup_pollq(aiop, pollqhead, exitflg); 758 if (flag && (cleanupport || portq)) 759 aio_cleanup_portq(aiop, cleanupport, exitflg); 760 761 if (exitflg) 762 return; 763 764 /* 765 * If we have an active aio_cleanup_thread it's possible for 766 * this routine to push something on to the done queue after 767 * an aiowait/aiosuspend thread has already decided to block. 768 * This being the case, we need a cv_broadcast here to wake 769 * these threads up. It is simpler and cleaner to do this 770 * broadcast here than in the individual cleanup routines. 771 */ 772 773 mutex_enter(&aiop->aio_mutex); 774 cv_broadcast(&aiop->aio_waitcv); 775 mutex_exit(&aiop->aio_mutex); 776 777 /* 778 * Only if the process wasn't already signalled, 779 * determine if a SIGIO signal should be delievered. 780 */ 781 if (!signalled && 782 (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL && 783 func != SIG_IGN) 784 psignal(curproc, SIGIO); 785 } 786 787 788 /* 789 * Do cleanup for every element of the port cleanup queue. 790 */ 791 static void 792 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag) 793 { 794 aio_req_t *reqp; 795 aio_req_t *next; 796 aio_req_t *headp; 797 aio_lio_t *liop; 798 799 /* first check the portq */ 800 if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) { 801 mutex_enter(&aiop->aio_mutex); 802 if (aiop->aio_flags & AIO_CLEANUP) 803 aiop->aio_flags |= AIO_CLEANUP_PORT; 804 mutex_exit(&aiop->aio_mutex); 805 806 /* 807 * It is not allowed to hold locks during aphysio_unlock(). 808 * The aio_done() interrupt function will try to acquire 809 * aio_mutex and aio_portq_mutex. Therefore we disconnect 810 * the portq list from the aiop for the duration of the 811 * aphysio_unlock() loop below. 812 */ 813 mutex_enter(&aiop->aio_portq_mutex); 814 headp = aiop->aio_portq; 815 aiop->aio_portq = NULL; 816 mutex_exit(&aiop->aio_portq_mutex); 817 if ((reqp = headp) != NULL) { 818 do { 819 next = reqp->aio_req_next; 820 aphysio_unlock(reqp); 821 if (exitflag) { 822 mutex_enter(&aiop->aio_mutex); 823 aio_req_free(aiop, reqp); 824 mutex_exit(&aiop->aio_mutex); 825 } 826 } while ((reqp = next) != headp); 827 } 828 829 if (headp != NULL && exitflag == 0) { 830 /* move unlocked requests back to the port queue */ 831 aio_req_t *newq; 832 833 mutex_enter(&aiop->aio_portq_mutex); 834 if ((newq = aiop->aio_portq) != NULL) { 835 aio_req_t *headprev = headp->aio_req_prev; 836 aio_req_t *newqprev = newq->aio_req_prev; 837 838 headp->aio_req_prev = newqprev; 839 newq->aio_req_prev = headprev; 840 headprev->aio_req_next = newq; 841 newqprev->aio_req_next = headp; 842 } 843 aiop->aio_portq = headp; 844 cv_broadcast(&aiop->aio_portcv); 845 mutex_exit(&aiop->aio_portq_mutex); 846 } 847 } 848 849 /* now check the port cleanup queue */ 850 if ((reqp = cleanupq) == NULL) 851 return; 852 do { 853 next = reqp->aio_req_next; 854 aphysio_unlock(reqp); 855 if (exitflag) { 856 mutex_enter(&aiop->aio_mutex); 857 aio_req_free(aiop, reqp); 858 mutex_exit(&aiop->aio_mutex); 859 } else { 860 mutex_enter(&aiop->aio_portq_mutex); 861 aio_enq(&aiop->aio_portq, reqp, 0); 862 mutex_exit(&aiop->aio_portq_mutex); 863 port_send_event(reqp->aio_req_portkev); 864 if ((liop = reqp->aio_req_lio) != NULL) { 865 int send_event = 0; 866 867 mutex_enter(&aiop->aio_mutex); 868 ASSERT(liop->lio_refcnt > 0); 869 if (--liop->lio_refcnt == 0) { 870 if (liop->lio_port >= 0 && 871 liop->lio_portkev) { 872 liop->lio_port = -1; 873 send_event = 1; 874 } 875 } 876 mutex_exit(&aiop->aio_mutex); 877 if (send_event) 878 port_send_event(liop->lio_portkev); 879 } 880 } 881 } while ((reqp = next) != cleanupq); 882 } 883 884 /* 885 * Do cleanup for every element of the cleanupq. 886 */ 887 static int 888 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg) 889 { 890 aio_req_t *reqp, *next; 891 int signalled = 0; 892 893 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 894 895 /* 896 * Since aio_req_done() or aio_req_find() use the HASH list to find 897 * the required requests, they could potentially take away elements 898 * if they are already done (AIO_DONEQ is set). 899 * The aio_cleanupq_mutex protects the queue for the duration of the 900 * loop from aio_req_done() and aio_req_find(). 901 */ 902 if ((reqp = qhead) == NULL) 903 return (0); 904 do { 905 ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ); 906 ASSERT(reqp->aio_req_portkev == NULL); 907 next = reqp->aio_req_next; 908 aphysio_unlock(reqp); 909 mutex_enter(&aiop->aio_mutex); 910 if (exitflg) 911 aio_req_free(aiop, reqp); 912 else 913 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 914 if (!exitflg && reqp->aio_req_flags & AIO_SIGNALLED) 915 signalled++; 916 mutex_exit(&aiop->aio_mutex); 917 } while ((reqp = next) != qhead); 918 return (signalled); 919 } 920 921 /* 922 * do cleanup for every element of the notify queue. 923 */ 924 static int 925 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg) 926 { 927 aio_req_t *reqp, *next; 928 aio_lio_t *liohead; 929 sigqueue_t *sigev, *lio_sigev = NULL; 930 int signalled = 0; 931 932 if ((reqp = qhead) == NULL) 933 return (0); 934 do { 935 ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ); 936 next = reqp->aio_req_next; 937 aphysio_unlock(reqp); 938 if (exitflg) { 939 mutex_enter(&aiop->aio_mutex); 940 aio_req_free(aiop, reqp); 941 mutex_exit(&aiop->aio_mutex); 942 } else { 943 mutex_enter(&aiop->aio_mutex); 944 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 945 sigev = reqp->aio_req_sigqp; 946 reqp->aio_req_sigqp = NULL; 947 if ((liohead = reqp->aio_req_lio) != NULL) { 948 ASSERT(liohead->lio_refcnt > 0); 949 if (--liohead->lio_refcnt == 0) { 950 cv_signal(&liohead->lio_notify); 951 lio_sigev = liohead->lio_sigqp; 952 liohead->lio_sigqp = NULL; 953 } 954 } 955 mutex_exit(&aiop->aio_mutex); 956 if (sigev) { 957 signalled++; 958 aio_sigev_send(reqp->aio_req_buf.b_proc, 959 sigev); 960 } 961 if (lio_sigev) { 962 signalled++; 963 aio_sigev_send(reqp->aio_req_buf.b_proc, 964 lio_sigev); 965 } 966 } 967 } while ((reqp = next) != qhead); 968 969 return (signalled); 970 } 971 972 /* 973 * Do cleanup for every element of the poll queue. 974 */ 975 static void 976 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg) 977 { 978 aio_req_t *reqp, *next; 979 980 /* 981 * As no other threads should be accessing the queue at this point, 982 * it isn't necessary to hold aio_mutex while we traverse its elements. 983 */ 984 if ((reqp = qhead) == NULL) 985 return; 986 do { 987 ASSERT(reqp->aio_req_flags & AIO_POLLQ); 988 next = reqp->aio_req_next; 989 aphysio_unlock(reqp); 990 if (exitflg) { 991 mutex_enter(&aiop->aio_mutex); 992 aio_req_free(aiop, reqp); 993 mutex_exit(&aiop->aio_mutex); 994 } else { 995 aio_copyout_result(reqp); 996 mutex_enter(&aiop->aio_mutex); 997 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 998 mutex_exit(&aiop->aio_mutex); 999 } 1000 } while ((reqp = next) != qhead); 1001 } 1002 1003 /* 1004 * called by exit(). waits for all outstanding kaio to finish 1005 * before the kaio resources are freed. 1006 */ 1007 void 1008 aio_cleanup_exit(void) 1009 { 1010 proc_t *p = curproc; 1011 aio_t *aiop = p->p_aio; 1012 aio_req_t *reqp, *next, *head; 1013 aio_lio_t *nxtlio, *liop; 1014 1015 /* 1016 * wait for all outstanding kaio to complete. process 1017 * is now single-threaded; no other kaio requests can 1018 * happen once aio_pending is zero. 1019 */ 1020 mutex_enter(&aiop->aio_mutex); 1021 aiop->aio_flags |= AIO_CLEANUP; 1022 while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE)) 1023 cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex); 1024 mutex_exit(&aiop->aio_mutex); 1025 1026 /* cleanup the cleanup-thread queues. */ 1027 aio_cleanup(AIO_CLEANUP_EXIT); 1028 1029 /* 1030 * Although this process is now single-threaded, we 1031 * still need to protect ourselves against a race with 1032 * aio_cleanup_dr_delete_memory(). 1033 */ 1034 mutex_enter(&p->p_lock); 1035 1036 /* 1037 * free up the done queue's resources. 1038 */ 1039 if ((head = aiop->aio_doneq) != NULL) { 1040 aiop->aio_doneq = NULL; 1041 reqp = head; 1042 do { 1043 next = reqp->aio_req_next; 1044 aphysio_unlock(reqp); 1045 kmem_free(reqp, sizeof (struct aio_req_t)); 1046 } while ((reqp = next) != head); 1047 } 1048 /* 1049 * release aio request freelist. 1050 */ 1051 for (reqp = aiop->aio_free; reqp != NULL; reqp = next) { 1052 next = reqp->aio_req_next; 1053 kmem_free(reqp, sizeof (struct aio_req_t)); 1054 } 1055 1056 /* 1057 * release io list head freelist. 1058 */ 1059 for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) { 1060 nxtlio = liop->lio_next; 1061 kmem_free(liop, sizeof (aio_lio_t)); 1062 } 1063 1064 if (aiop->aio_iocb) 1065 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 1066 1067 mutex_destroy(&aiop->aio_mutex); 1068 mutex_destroy(&aiop->aio_portq_mutex); 1069 mutex_destroy(&aiop->aio_cleanupq_mutex); 1070 p->p_aio = NULL; 1071 mutex_exit(&p->p_lock); 1072 kmem_free(aiop, sizeof (struct aio)); 1073 } 1074 1075 /* 1076 * copy out aio request's result to a user-level result_t buffer. 1077 */ 1078 void 1079 aio_copyout_result(aio_req_t *reqp) 1080 { 1081 struct buf *bp; 1082 struct iovec *iov; 1083 void *resultp; 1084 int error; 1085 size_t retval; 1086 1087 if (reqp->aio_req_flags & AIO_COPYOUTDONE) 1088 return; 1089 1090 reqp->aio_req_flags |= AIO_COPYOUTDONE; 1091 1092 iov = reqp->aio_req_uio.uio_iov; 1093 bp = &reqp->aio_req_buf; 1094 /* "resultp" points to user-level result_t buffer */ 1095 resultp = (void *)reqp->aio_req_resultp; 1096 if (bp->b_flags & B_ERROR) { 1097 if (bp->b_error) 1098 error = bp->b_error; 1099 else 1100 error = EIO; 1101 retval = (size_t)-1; 1102 } else { 1103 error = 0; 1104 retval = iov->iov_len - bp->b_resid; 1105 } 1106 #ifdef _SYSCALL32_IMPL 1107 if (get_udatamodel() == DATAMODEL_NATIVE) { 1108 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1109 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1110 } else { 1111 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1112 (int)retval); 1113 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1114 } 1115 #else 1116 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1117 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1118 #endif 1119 } 1120 1121 1122 void 1123 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp) 1124 { 1125 int errno; 1126 size_t retval; 1127 1128 if (bp->b_flags & B_ERROR) { 1129 if (bp->b_error) 1130 errno = bp->b_error; 1131 else 1132 errno = EIO; 1133 retval = (size_t)-1; 1134 } else { 1135 errno = 0; 1136 retval = iov->iov_len - bp->b_resid; 1137 } 1138 #ifdef _SYSCALL32_IMPL 1139 if (get_udatamodel() == DATAMODEL_NATIVE) { 1140 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1141 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1142 } else { 1143 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1144 (int)retval); 1145 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno); 1146 } 1147 #else 1148 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1149 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1150 #endif 1151 } 1152 1153 /* 1154 * This function is used to remove a request from the done queue. 1155 */ 1156 1157 void 1158 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp) 1159 { 1160 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex)); 1161 while (aiop->aio_portq == NULL) { 1162 /* 1163 * aio_portq is set to NULL when aio_cleanup_portq() 1164 * is working with the event queue. 1165 * The aio_cleanup_thread() uses aio_cleanup_portq() 1166 * to unlock all AIO buffers with completed transactions. 1167 * Wait here until aio_cleanup_portq() restores the 1168 * list of completed transactions in aio_portq. 1169 */ 1170 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1171 } 1172 aio_deq(&aiop->aio_portq, reqp); 1173 } 1174 1175 /* ARGSUSED */ 1176 void 1177 aio_close_port(void *arg, int port, pid_t pid, int lastclose) 1178 { 1179 aio_t *aiop; 1180 aio_req_t *reqp; 1181 aio_req_t *next; 1182 aio_req_t *headp; 1183 int counter; 1184 1185 if (arg == NULL) 1186 aiop = curproc->p_aio; 1187 else 1188 aiop = (aio_t *)arg; 1189 1190 /* 1191 * The PORT_SOURCE_AIO source is always associated with every new 1192 * created port by default. 1193 * If no asynchronous I/O transactions were associated with the port 1194 * then the aiop pointer will still be set to NULL. 1195 */ 1196 if (aiop == NULL) 1197 return; 1198 1199 /* 1200 * Within a process event ports can be used to collect events other 1201 * than PORT_SOURCE_AIO events. At the same time the process can submit 1202 * asynchronous I/Os transactions which are not associated with the 1203 * current port. 1204 * The current process oriented model of AIO uses a sigle queue for 1205 * pending events. On close the pending queue (queue of asynchronous 1206 * I/O transactions using event port notification) must be scanned 1207 * to detect and handle pending I/Os using the current port. 1208 */ 1209 mutex_enter(&aiop->aio_portq_mutex); 1210 mutex_enter(&aiop->aio_mutex); 1211 counter = 0; 1212 if ((headp = aiop->aio_portpending) != NULL) { 1213 reqp = headp; 1214 do { 1215 if (reqp->aio_req_portkev && 1216 reqp->aio_req_port == port) { 1217 reqp->aio_req_flags |= AIO_CLOSE_PORT; 1218 counter++; 1219 } 1220 } while ((reqp = reqp->aio_req_next) != headp); 1221 } 1222 if (counter == 0) { 1223 /* no AIOs pending */ 1224 mutex_exit(&aiop->aio_mutex); 1225 mutex_exit(&aiop->aio_portq_mutex); 1226 return; 1227 } 1228 aiop->aio_portpendcnt += counter; 1229 mutex_exit(&aiop->aio_mutex); 1230 while (aiop->aio_portpendcnt) 1231 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1232 1233 /* 1234 * all pending AIOs are completed. 1235 * check port doneq 1236 */ 1237 headp = NULL; 1238 if ((reqp = aiop->aio_portq) != NULL) { 1239 do { 1240 next = reqp->aio_req_next; 1241 if (reqp->aio_req_port == port) { 1242 /* dequeue request and discard event */ 1243 aio_req_remove_portq(aiop, reqp); 1244 port_free_event(reqp->aio_req_portkev); 1245 /* put request in temporary queue */ 1246 reqp->aio_req_next = headp; 1247 headp = reqp; 1248 } 1249 } while ((reqp = next) != aiop->aio_portq); 1250 } 1251 mutex_exit(&aiop->aio_portq_mutex); 1252 1253 /* headp points to the list of requests to be discarded */ 1254 for (reqp = headp; reqp != NULL; reqp = next) { 1255 next = reqp->aio_req_next; 1256 aphysio_unlock(reqp); 1257 mutex_enter(&aiop->aio_mutex); 1258 aio_req_free_port(aiop, reqp); 1259 mutex_exit(&aiop->aio_mutex); 1260 } 1261 1262 if (aiop->aio_flags & AIO_CLEANUP) 1263 cv_broadcast(&aiop->aio_waitcv); 1264 } 1265 1266 /* 1267 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread 1268 * to kick start the aio_cleanup_thread for the give process to do the 1269 * necessary cleanup. 1270 * This is needed so that delete_memory_thread can obtain writer locks 1271 * on pages that need to be relocated during a dr memory delete operation, 1272 * otherwise a deadly embrace may occur. 1273 */ 1274 int 1275 aio_cleanup_dr_delete_memory(proc_t *procp) 1276 { 1277 struct aio *aiop = procp->p_aio; 1278 struct as *as = procp->p_as; 1279 int ret = 0; 1280 1281 ASSERT(MUTEX_HELD(&procp->p_lock)); 1282 1283 mutex_enter(&as->a_contents); 1284 1285 if (aiop != NULL) { 1286 aiop->aio_rqclnup = 1; 1287 cv_broadcast(&as->a_cv); 1288 ret = 1; 1289 } 1290 mutex_exit(&as->a_contents); 1291 return (ret); 1292 } 1293