1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/proc.h> 29 #include <sys/file.h> 30 #include <sys/errno.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <sys/cmn_err.h> 34 #include <sys/systm.h> 35 #include <vm/as.h> 36 #include <vm/page.h> 37 #include <sys/uio.h> 38 #include <sys/kmem.h> 39 #include <sys/debug.h> 40 #include <sys/aio_impl.h> 41 #include <sys/epm.h> 42 #include <sys/fs/snode.h> 43 #include <sys/siginfo.h> 44 #include <sys/cpuvar.h> 45 #include <sys/tnf_probe.h> 46 #include <sys/conf.h> 47 #include <sys/sdt.h> 48 49 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *); 50 int aio_done(struct buf *); 51 void aphysio_unlock(aio_req_t *); 52 void aio_cleanup(int); 53 void aio_cleanup_exit(void); 54 55 /* 56 * private functions 57 */ 58 static void aio_sigev_send(proc_t *, sigqueue_t *); 59 static void aio_hash_delete(aio_t *, aio_req_t *); 60 static void aio_lio_free(aio_t *, aio_lio_t *); 61 static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int); 62 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int); 63 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int); 64 static void aio_cleanup_portq(aio_t *, aio_req_t *, int); 65 66 /* 67 * async version of physio() that doesn't wait synchronously 68 * for the driver's strategy routine to complete. 69 */ 70 71 int 72 aphysio( 73 int (*strategy)(struct buf *), 74 int (*cancel)(struct buf *), 75 dev_t dev, 76 int rw, 77 void (*mincnt)(struct buf *), 78 struct aio_req *aio) 79 { 80 struct uio *uio = aio->aio_uio; 81 aio_req_t *reqp = (aio_req_t *)aio->aio_private; 82 struct buf *bp = &reqp->aio_req_buf; 83 struct iovec *iov; 84 struct as *as; 85 char *a; 86 int error; 87 size_t c; 88 struct page **pplist; 89 struct dev_ops *ops = devopsp[getmajor(dev)]; 90 91 if (uio->uio_loffset < 0) 92 return (EINVAL); 93 #ifdef _ILP32 94 /* 95 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents 96 * the maximum size that can be supported by the IO subsystem. 97 * XXX this code assumes a D_64BIT driver. 98 */ 99 if (uio->uio_loffset > SPEC_MAXOFFSET_T) 100 return (EINVAL); 101 #endif /* _ILP32 */ 102 103 TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */, 104 tnf_opaque, bp, bp, 105 tnf_device, device, dev, 106 tnf_offset, blkno, btodt(uio->uio_loffset), 107 tnf_size, size, uio->uio_iov->iov_len, 108 tnf_bioflags, rw, rw); 109 110 if (rw == B_READ) { 111 CPU_STATS_ADD_K(sys, phread, 1); 112 } else { 113 CPU_STATS_ADD_K(sys, phwrite, 1); 114 } 115 116 iov = uio->uio_iov; 117 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 118 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 119 120 bp->b_error = 0; 121 bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw; 122 bp->b_edev = dev; 123 bp->b_dev = cmpdev(dev); 124 bp->b_lblkno = btodt(uio->uio_loffset); 125 bp->b_offset = uio->uio_loffset; 126 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO, 127 (void *)bp->b_edev, (void **)&bp->b_dip); 128 129 /* 130 * Clustering: Clustering can set the b_iodone, b_forw and 131 * b_proc fields to cluster-specifc values. 132 */ 133 if (bp->b_iodone == NULL) { 134 bp->b_iodone = aio_done; 135 /* b_forw points at an aio_req_t structure */ 136 bp->b_forw = (struct buf *)reqp; 137 bp->b_proc = curproc; 138 } 139 140 a = bp->b_un.b_addr = iov->iov_base; 141 c = bp->b_bcount = iov->iov_len; 142 143 (*mincnt)(bp); 144 if (bp->b_bcount != iov->iov_len) 145 return (ENOTSUP); 146 147 as = bp->b_proc->p_as; 148 149 error = as_pagelock(as, &pplist, a, 150 c, rw == B_READ? S_WRITE : S_READ); 151 if (error != 0) { 152 bp->b_flags |= B_ERROR; 153 bp->b_error = error; 154 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 155 return (error); 156 } 157 reqp->aio_req_flags |= AIO_PAGELOCKDONE; 158 bp->b_shadow = pplist; 159 if (pplist != NULL) { 160 bp->b_flags |= B_SHADOW; 161 } 162 163 if (cancel != anocancel) 164 cmn_err(CE_PANIC, 165 "aphysio: cancellation not supported, use anocancel"); 166 167 reqp->aio_req_cancel = cancel; 168 169 DTRACE_IO1(start, struct buf *, bp); 170 171 return ((*strategy)(bp)); 172 } 173 174 /*ARGSUSED*/ 175 int 176 anocancel(struct buf *bp) 177 { 178 return (ENXIO); 179 } 180 181 /* 182 * Called from biodone(). 183 * Notify process that a pending AIO has finished. 184 */ 185 186 /* 187 * Clustering: This function is made non-static as it is used 188 * by clustering s/w as contract private interface. 189 */ 190 191 int 192 aio_done(struct buf *bp) 193 { 194 proc_t *p; 195 struct as *as; 196 aio_req_t *reqp; 197 aio_lio_t *head = NULL; 198 aio_t *aiop; 199 sigqueue_t *sigev = NULL; 200 sigqueue_t *lio_sigev = NULL; 201 port_kevent_t *pkevp = NULL; 202 port_kevent_t *lio_pkevp = NULL; 203 int fd; 204 int cleanupqflag; 205 int pollqflag; 206 int portevpend; 207 void (*func)(); 208 int use_port = 0; 209 int reqp_flags = 0; 210 int send_signal = 0; 211 212 p = bp->b_proc; 213 as = p->p_as; 214 reqp = (aio_req_t *)bp->b_forw; 215 fd = reqp->aio_req_fd; 216 217 TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */, 218 tnf_opaque, bp, bp, 219 tnf_device, device, bp->b_edev, 220 tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset), 221 tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len, 222 tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE))); 223 224 /* 225 * mapout earlier so that more kmem is available when aio is 226 * heavily used. bug #1262082 227 */ 228 if (bp->b_flags & B_REMAPPED) 229 bp_mapout(bp); 230 231 /* decrement fd's ref count by one, now that aio request is done. */ 232 areleasef(fd, P_FINFO(p)); 233 234 aiop = p->p_aio; 235 ASSERT(aiop != NULL); 236 237 mutex_enter(&aiop->aio_portq_mutex); 238 mutex_enter(&aiop->aio_mutex); 239 ASSERT(aiop->aio_pending > 0); 240 ASSERT(reqp->aio_req_flags & AIO_PENDING); 241 aiop->aio_pending--; 242 reqp->aio_req_flags &= ~AIO_PENDING; 243 reqp_flags = reqp->aio_req_flags; 244 if ((pkevp = reqp->aio_req_portkev) != NULL) { 245 /* Event port notification is desired for this transaction */ 246 if (reqp->aio_req_flags & AIO_CLOSE_PORT) { 247 /* 248 * The port is being closed and it is waiting for 249 * pending asynchronous I/O transactions to complete. 250 */ 251 portevpend = --aiop->aio_portpendcnt; 252 aio_deq(&aiop->aio_portpending, reqp); 253 aio_enq(&aiop->aio_portq, reqp, 0); 254 mutex_exit(&aiop->aio_mutex); 255 mutex_exit(&aiop->aio_portq_mutex); 256 port_send_event(pkevp); 257 if (portevpend == 0) 258 cv_broadcast(&aiop->aio_portcv); 259 return (0); 260 } 261 262 if (aiop->aio_flags & AIO_CLEANUP) { 263 /* 264 * aio_cleanup_thread() is waiting for completion of 265 * transactions. 266 */ 267 mutex_enter(&as->a_contents); 268 aio_deq(&aiop->aio_portpending, reqp); 269 aio_enq(&aiop->aio_portcleanupq, reqp, 0); 270 cv_signal(&aiop->aio_cleanupcv); 271 mutex_exit(&as->a_contents); 272 mutex_exit(&aiop->aio_mutex); 273 mutex_exit(&aiop->aio_portq_mutex); 274 return (0); 275 } 276 277 aio_deq(&aiop->aio_portpending, reqp); 278 aio_enq(&aiop->aio_portq, reqp, 0); 279 280 use_port = 1; 281 } else { 282 /* 283 * when the AIO_CLEANUP flag is enabled for this 284 * process, or when the AIO_POLL bit is set for 285 * this request, special handling is required. 286 * otherwise the request is put onto the doneq. 287 */ 288 cleanupqflag = (aiop->aio_flags & AIO_CLEANUP); 289 pollqflag = (reqp->aio_req_flags & AIO_POLL); 290 if (cleanupqflag | pollqflag) { 291 292 if (cleanupqflag) 293 mutex_enter(&as->a_contents); 294 295 /* 296 * requests with their AIO_POLL bit set are put 297 * on the pollq, requests with sigevent structures 298 * or with listio heads are put on the notifyq, and 299 * the remaining requests don't require any special 300 * cleanup handling, so they're put onto the default 301 * cleanupq. 302 */ 303 if (pollqflag) 304 aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ); 305 else if (reqp->aio_req_sigqp || reqp->aio_req_lio) 306 aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ); 307 else 308 aio_enq(&aiop->aio_cleanupq, reqp, 309 AIO_CLEANUPQ); 310 311 if (cleanupqflag) { 312 cv_signal(&aiop->aio_cleanupcv); 313 mutex_exit(&as->a_contents); 314 mutex_exit(&aiop->aio_mutex); 315 mutex_exit(&aiop->aio_portq_mutex); 316 } else { 317 ASSERT(pollqflag); 318 /* block aio_cleanup_exit until we're done */ 319 aiop->aio_flags |= AIO_DONE_ACTIVE; 320 mutex_exit(&aiop->aio_mutex); 321 mutex_exit(&aiop->aio_portq_mutex); 322 /* 323 * let the cleanup processing happen from an AST 324 * set an AST on all threads in this process 325 */ 326 mutex_enter(&p->p_lock); 327 set_proc_ast(p); 328 mutex_exit(&p->p_lock); 329 mutex_enter(&aiop->aio_mutex); 330 /* wakeup anybody waiting in aiowait() */ 331 cv_broadcast(&aiop->aio_waitcv); 332 333 /* wakeup aio_cleanup_exit if needed */ 334 if (aiop->aio_flags & AIO_CLEANUP) 335 cv_signal(&aiop->aio_cleanupcv); 336 aiop->aio_flags &= ~AIO_DONE_ACTIVE; 337 mutex_exit(&aiop->aio_mutex); 338 } 339 return (0); 340 } 341 342 /* 343 * save req's sigevent pointer, and check its 344 * value after releasing aio_mutex lock. 345 */ 346 sigev = reqp->aio_req_sigqp; 347 reqp->aio_req_sigqp = NULL; 348 349 /* put request on done queue. */ 350 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 351 } /* portkevent */ 352 353 /* 354 * when list IO notification is enabled, a notification or 355 * signal is sent only when all entries in the list are done. 356 */ 357 if ((head = reqp->aio_req_lio) != NULL) { 358 ASSERT(head->lio_refcnt > 0); 359 if (--head->lio_refcnt == 0) { 360 /* 361 * save lio's sigevent pointer, and check 362 * its value after releasing aio_mutex lock. 363 */ 364 lio_sigev = head->lio_sigqp; 365 head->lio_sigqp = NULL; 366 cv_signal(&head->lio_notify); 367 if (head->lio_port >= 0 && 368 (lio_pkevp = head->lio_portkev) != NULL) 369 head->lio_port = -1; 370 } 371 } 372 373 /* 374 * if AIO_WAITN set then 375 * send signal only when we reached the 376 * required amount of IO's finished 377 * or when all IO's are done 378 */ 379 if (aiop->aio_flags & AIO_WAITN) { 380 if (aiop->aio_waitncnt > 0) 381 aiop->aio_waitncnt--; 382 if (aiop->aio_pending == 0 || 383 aiop->aio_waitncnt == 0) 384 cv_broadcast(&aiop->aio_waitcv); 385 } else { 386 cv_broadcast(&aiop->aio_waitcv); 387 } 388 389 /* 390 * No need to set this flag for pollq, portq, lio requests. 391 * If this is an old Solaris aio request, and the process has 392 * a SIGIO signal handler enabled, then send a SIGIO signal. 393 */ 394 if (!sigev && !use_port && head == NULL && 395 (reqp->aio_req_flags & AIO_SOLARIS) && 396 (func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL && 397 (func != SIG_IGN)) { 398 send_signal = 1; 399 reqp->aio_req_flags |= AIO_SIGNALLED; 400 } 401 402 mutex_exit(&aiop->aio_mutex); 403 mutex_exit(&aiop->aio_portq_mutex); 404 405 /* 406 * Could the cleanup thread be waiting for AIO with locked 407 * resources to finish? 408 * Ideally in that case cleanup thread should block on cleanupcv, 409 * but there is a window, where it could miss to see a new aio 410 * request that sneaked in. 411 */ 412 mutex_enter(&as->a_contents); 413 if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as)) 414 cv_broadcast(&as->a_cv); 415 mutex_exit(&as->a_contents); 416 417 if (sigev) 418 aio_sigev_send(p, sigev); 419 else if (send_signal) 420 psignal(p, SIGIO); 421 422 if (pkevp) 423 port_send_event(pkevp); 424 if (lio_sigev) 425 aio_sigev_send(p, lio_sigev); 426 if (lio_pkevp) 427 port_send_event(lio_pkevp); 428 429 return (0); 430 } 431 432 /* 433 * send a queued signal to the specified process when 434 * the event signal is non-NULL. A return value of 1 435 * will indicate that a signal is queued, and 0 means that 436 * no signal was specified, nor sent. 437 */ 438 static void 439 aio_sigev_send(proc_t *p, sigqueue_t *sigev) 440 { 441 ASSERT(sigev != NULL); 442 443 mutex_enter(&p->p_lock); 444 sigaddqa(p, NULL, sigev); 445 mutex_exit(&p->p_lock); 446 } 447 448 /* 449 * special case handling for zero length requests. the aio request 450 * short circuits the normal completion path since all that's required 451 * to complete this request is to copyout a zero to the aio request's 452 * return value. 453 */ 454 void 455 aio_zerolen(aio_req_t *reqp) 456 { 457 458 struct buf *bp = &reqp->aio_req_buf; 459 460 reqp->aio_req_flags |= AIO_ZEROLEN; 461 462 bp->b_forw = (struct buf *)reqp; 463 bp->b_proc = curproc; 464 465 bp->b_resid = 0; 466 bp->b_flags = 0; 467 468 aio_done(bp); 469 } 470 471 /* 472 * unlock pages previously locked by as_pagelock 473 */ 474 void 475 aphysio_unlock(aio_req_t *reqp) 476 { 477 struct buf *bp; 478 struct iovec *iov; 479 int flags; 480 481 if (reqp->aio_req_flags & AIO_PHYSIODONE) 482 return; 483 484 reqp->aio_req_flags |= AIO_PHYSIODONE; 485 486 if (reqp->aio_req_flags & AIO_ZEROLEN) 487 return; 488 489 bp = &reqp->aio_req_buf; 490 iov = reqp->aio_req_uio.uio_iov; 491 flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ); 492 if (reqp->aio_req_flags & AIO_PAGELOCKDONE) { 493 as_pageunlock(bp->b_proc->p_as, 494 bp->b_flags & B_SHADOW ? bp->b_shadow : NULL, 495 iov->iov_base, iov->iov_len, flags); 496 reqp->aio_req_flags &= ~AIO_PAGELOCKDONE; 497 } 498 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 499 bp->b_flags |= B_DONE; 500 } 501 502 /* 503 * deletes a requests id from the hash table of outstanding io. 504 */ 505 static void 506 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp) 507 { 508 long index; 509 aio_result_t *resultp = reqp->aio_req_resultp; 510 aio_req_t *current; 511 aio_req_t **nextp; 512 513 index = AIO_HASH(resultp); 514 nextp = (aiop->aio_hash + index); 515 while ((current = *nextp) != NULL) { 516 if (current->aio_req_resultp == resultp) { 517 *nextp = current->aio_hash_next; 518 return; 519 } 520 nextp = ¤t->aio_hash_next; 521 } 522 } 523 524 /* 525 * Put a list head struct onto its free list. 526 */ 527 static void 528 aio_lio_free(aio_t *aiop, aio_lio_t *head) 529 { 530 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 531 532 if (head->lio_sigqp != NULL) 533 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 534 head->lio_next = aiop->aio_lio_free; 535 aiop->aio_lio_free = head; 536 } 537 538 /* 539 * Put a reqp onto the freelist. 540 */ 541 void 542 aio_req_free(aio_t *aiop, aio_req_t *reqp) 543 { 544 aio_lio_t *liop; 545 546 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 547 548 if (reqp->aio_req_portkev) { 549 port_free_event(reqp->aio_req_portkev); 550 reqp->aio_req_portkev = NULL; 551 } 552 553 if ((liop = reqp->aio_req_lio) != NULL) { 554 if (--liop->lio_nent == 0) 555 aio_lio_free(aiop, liop); 556 reqp->aio_req_lio = NULL; 557 } 558 if (reqp->aio_req_sigqp != NULL) { 559 kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t)); 560 reqp->aio_req_sigqp = NULL; 561 } 562 reqp->aio_req_next = aiop->aio_free; 563 reqp->aio_req_prev = NULL; 564 aiop->aio_free = reqp; 565 aiop->aio_outstanding--; 566 if (aiop->aio_outstanding == 0) 567 cv_broadcast(&aiop->aio_waitcv); 568 aio_hash_delete(aiop, reqp); 569 } 570 571 /* 572 * Put a reqp onto the freelist. 573 */ 574 void 575 aio_req_free_port(aio_t *aiop, aio_req_t *reqp) 576 { 577 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 578 579 reqp->aio_req_next = aiop->aio_free; 580 reqp->aio_req_prev = NULL; 581 aiop->aio_free = reqp; 582 aiop->aio_outstanding--; 583 aio_hash_delete(aiop, reqp); 584 } 585 586 587 /* 588 * Verify the integrity of a queue. 589 */ 590 #if defined(DEBUG) 591 static void 592 aio_verify_queue(aio_req_t *head, 593 aio_req_t *entry_present, aio_req_t *entry_missing) 594 { 595 aio_req_t *reqp; 596 int found = 0; 597 int present = 0; 598 599 if ((reqp = head) != NULL) { 600 do { 601 ASSERT(reqp->aio_req_prev->aio_req_next == reqp); 602 ASSERT(reqp->aio_req_next->aio_req_prev == reqp); 603 if (entry_present == reqp) 604 found++; 605 if (entry_missing == reqp) 606 present++; 607 } while ((reqp = reqp->aio_req_next) != head); 608 } 609 ASSERT(entry_present == NULL || found == 1); 610 ASSERT(entry_missing == NULL || present == 0); 611 } 612 #else 613 #define aio_verify_queue(x, y, z) 614 #endif 615 616 /* 617 * Put a request onto the tail of a queue. 618 */ 619 void 620 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new) 621 { 622 aio_req_t *head; 623 aio_req_t *prev; 624 625 aio_verify_queue(*qhead, NULL, reqp); 626 627 if ((head = *qhead) == NULL) { 628 reqp->aio_req_next = reqp; 629 reqp->aio_req_prev = reqp; 630 *qhead = reqp; 631 } else { 632 reqp->aio_req_next = head; 633 reqp->aio_req_prev = prev = head->aio_req_prev; 634 prev->aio_req_next = reqp; 635 head->aio_req_prev = reqp; 636 } 637 reqp->aio_req_flags |= qflg_new; 638 } 639 640 /* 641 * Remove a request from its queue. 642 */ 643 void 644 aio_deq(aio_req_t **qhead, aio_req_t *reqp) 645 { 646 aio_verify_queue(*qhead, reqp, NULL); 647 648 if (reqp->aio_req_next == reqp) { 649 *qhead = NULL; 650 } else { 651 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 652 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 653 if (*qhead == reqp) 654 *qhead = reqp->aio_req_next; 655 } 656 reqp->aio_req_next = NULL; 657 reqp->aio_req_prev = NULL; 658 } 659 660 /* 661 * concatenate a specified queue with the cleanupq. the specified 662 * queue is put onto the tail of the cleanupq. all elements on the 663 * specified queue should have their aio_req_flags field cleared. 664 */ 665 /*ARGSUSED*/ 666 void 667 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg) 668 { 669 aio_req_t *cleanupqhead, *q2tail; 670 aio_req_t *reqp = q2; 671 672 do { 673 ASSERT(reqp->aio_req_flags & qflg); 674 reqp->aio_req_flags &= ~qflg; 675 reqp->aio_req_flags |= AIO_CLEANUPQ; 676 } while ((reqp = reqp->aio_req_next) != q2); 677 678 cleanupqhead = aiop->aio_cleanupq; 679 if (cleanupqhead == NULL) 680 aiop->aio_cleanupq = q2; 681 else { 682 cleanupqhead->aio_req_prev->aio_req_next = q2; 683 q2tail = q2->aio_req_prev; 684 q2tail->aio_req_next = cleanupqhead; 685 q2->aio_req_prev = cleanupqhead->aio_req_prev; 686 cleanupqhead->aio_req_prev = q2tail; 687 } 688 } 689 690 /* 691 * cleanup aio requests that are on the per-process poll queue. 692 */ 693 void 694 aio_cleanup(int flag) 695 { 696 aio_t *aiop = curproc->p_aio; 697 aio_req_t *pollqhead, *cleanupqhead, *notifyqhead; 698 aio_req_t *cleanupport; 699 aio_req_t *portq = NULL; 700 void (*func)(); 701 int signalled = 0; 702 int qflag = 0; 703 int exitflg; 704 705 ASSERT(aiop != NULL); 706 707 if (flag == AIO_CLEANUP_EXIT) 708 exitflg = AIO_CLEANUP_EXIT; 709 else 710 exitflg = 0; 711 712 /* 713 * We need to get the aio_cleanupq_mutex because we are calling 714 * aio_cleanup_cleanupq() 715 */ 716 mutex_enter(&aiop->aio_cleanupq_mutex); 717 /* 718 * take all the requests off the cleanupq, the notifyq, 719 * and the pollq. 720 */ 721 mutex_enter(&aiop->aio_mutex); 722 if ((cleanupqhead = aiop->aio_cleanupq) != NULL) { 723 aiop->aio_cleanupq = NULL; 724 qflag++; 725 } 726 if ((notifyqhead = aiop->aio_notifyq) != NULL) { 727 aiop->aio_notifyq = NULL; 728 qflag++; 729 } 730 if ((pollqhead = aiop->aio_pollq) != NULL) { 731 aiop->aio_pollq = NULL; 732 qflag++; 733 } 734 if (flag) { 735 if ((portq = aiop->aio_portq) != NULL) 736 qflag++; 737 738 if ((cleanupport = aiop->aio_portcleanupq) != NULL) { 739 aiop->aio_portcleanupq = NULL; 740 qflag++; 741 } 742 } 743 mutex_exit(&aiop->aio_mutex); 744 745 /* 746 * return immediately if cleanupq, pollq, and 747 * notifyq are all empty. someone else must have 748 * emptied them. 749 */ 750 if (!qflag) { 751 mutex_exit(&aiop->aio_cleanupq_mutex); 752 return; 753 } 754 755 /* 756 * do cleanup for the various queues. 757 */ 758 if (cleanupqhead) 759 signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg); 760 mutex_exit(&aiop->aio_cleanupq_mutex); 761 if (notifyqhead) 762 signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg); 763 if (pollqhead) 764 aio_cleanup_pollq(aiop, pollqhead, exitflg); 765 if (flag && (cleanupport || portq)) 766 aio_cleanup_portq(aiop, cleanupport, exitflg); 767 768 if (exitflg) 769 return; 770 771 /* 772 * If we have an active aio_cleanup_thread it's possible for 773 * this routine to push something on to the done queue after 774 * an aiowait/aiosuspend thread has already decided to block. 775 * This being the case, we need a cv_broadcast here to wake 776 * these threads up. It is simpler and cleaner to do this 777 * broadcast here than in the individual cleanup routines. 778 */ 779 780 mutex_enter(&aiop->aio_mutex); 781 /* 782 * If there has never been an old solaris aio request 783 * issued by this process, then do not send a SIGIO signal. 784 */ 785 if (!(aiop->aio_flags & AIO_SOLARIS_REQ)) 786 signalled = 1; 787 cv_broadcast(&aiop->aio_waitcv); 788 mutex_exit(&aiop->aio_mutex); 789 790 /* 791 * Only if the process wasn't already signalled, 792 * determine if a SIGIO signal should be delievered. 793 */ 794 if (!signalled && 795 (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL && 796 func != SIG_IGN) 797 psignal(curproc, SIGIO); 798 } 799 800 801 /* 802 * Do cleanup for every element of the port cleanup queue. 803 */ 804 static void 805 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag) 806 { 807 aio_req_t *reqp; 808 aio_req_t *next; 809 aio_req_t *headp; 810 aio_lio_t *liop; 811 812 /* first check the portq */ 813 if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) { 814 mutex_enter(&aiop->aio_mutex); 815 if (aiop->aio_flags & AIO_CLEANUP) 816 aiop->aio_flags |= AIO_CLEANUP_PORT; 817 mutex_exit(&aiop->aio_mutex); 818 819 /* 820 * It is not allowed to hold locks during aphysio_unlock(). 821 * The aio_done() interrupt function will try to acquire 822 * aio_mutex and aio_portq_mutex. Therefore we disconnect 823 * the portq list from the aiop for the duration of the 824 * aphysio_unlock() loop below. 825 */ 826 mutex_enter(&aiop->aio_portq_mutex); 827 headp = aiop->aio_portq; 828 aiop->aio_portq = NULL; 829 mutex_exit(&aiop->aio_portq_mutex); 830 if ((reqp = headp) != NULL) { 831 do { 832 next = reqp->aio_req_next; 833 aphysio_unlock(reqp); 834 if (exitflag) { 835 mutex_enter(&aiop->aio_mutex); 836 aio_req_free(aiop, reqp); 837 mutex_exit(&aiop->aio_mutex); 838 } 839 } while ((reqp = next) != headp); 840 } 841 842 if (headp != NULL && exitflag == 0) { 843 /* move unlocked requests back to the port queue */ 844 aio_req_t *newq; 845 846 mutex_enter(&aiop->aio_portq_mutex); 847 if ((newq = aiop->aio_portq) != NULL) { 848 aio_req_t *headprev = headp->aio_req_prev; 849 aio_req_t *newqprev = newq->aio_req_prev; 850 851 headp->aio_req_prev = newqprev; 852 newq->aio_req_prev = headprev; 853 headprev->aio_req_next = newq; 854 newqprev->aio_req_next = headp; 855 } 856 aiop->aio_portq = headp; 857 cv_broadcast(&aiop->aio_portcv); 858 mutex_exit(&aiop->aio_portq_mutex); 859 } 860 } 861 862 /* now check the port cleanup queue */ 863 if ((reqp = cleanupq) == NULL) 864 return; 865 do { 866 next = reqp->aio_req_next; 867 aphysio_unlock(reqp); 868 if (exitflag) { 869 mutex_enter(&aiop->aio_mutex); 870 aio_req_free(aiop, reqp); 871 mutex_exit(&aiop->aio_mutex); 872 } else { 873 mutex_enter(&aiop->aio_portq_mutex); 874 aio_enq(&aiop->aio_portq, reqp, 0); 875 mutex_exit(&aiop->aio_portq_mutex); 876 port_send_event(reqp->aio_req_portkev); 877 if ((liop = reqp->aio_req_lio) != NULL) { 878 int send_event = 0; 879 880 mutex_enter(&aiop->aio_mutex); 881 ASSERT(liop->lio_refcnt > 0); 882 if (--liop->lio_refcnt == 0) { 883 if (liop->lio_port >= 0 && 884 liop->lio_portkev) { 885 liop->lio_port = -1; 886 send_event = 1; 887 } 888 } 889 mutex_exit(&aiop->aio_mutex); 890 if (send_event) 891 port_send_event(liop->lio_portkev); 892 } 893 } 894 } while ((reqp = next) != cleanupq); 895 } 896 897 /* 898 * Do cleanup for every element of the cleanupq. 899 */ 900 static int 901 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg) 902 { 903 aio_req_t *reqp, *next; 904 int signalled = 0; 905 906 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 907 908 /* 909 * Since aio_req_done() or aio_req_find() use the HASH list to find 910 * the required requests, they could potentially take away elements 911 * if they are already done (AIO_DONEQ is set). 912 * The aio_cleanupq_mutex protects the queue for the duration of the 913 * loop from aio_req_done() and aio_req_find(). 914 */ 915 if ((reqp = qhead) == NULL) 916 return (0); 917 do { 918 ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ); 919 ASSERT(reqp->aio_req_portkev == NULL); 920 next = reqp->aio_req_next; 921 aphysio_unlock(reqp); 922 mutex_enter(&aiop->aio_mutex); 923 if (exitflg) 924 aio_req_free(aiop, reqp); 925 else 926 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 927 if (!exitflg) { 928 if (reqp->aio_req_flags & AIO_SIGNALLED) 929 signalled++; 930 else 931 reqp->aio_req_flags |= AIO_SIGNALLED; 932 } 933 mutex_exit(&aiop->aio_mutex); 934 } while ((reqp = next) != qhead); 935 return (signalled); 936 } 937 938 /* 939 * do cleanup for every element of the notify queue. 940 */ 941 static int 942 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg) 943 { 944 aio_req_t *reqp, *next; 945 aio_lio_t *liohead; 946 sigqueue_t *sigev, *lio_sigev = NULL; 947 int signalled = 0; 948 949 if ((reqp = qhead) == NULL) 950 return (0); 951 do { 952 ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ); 953 next = reqp->aio_req_next; 954 aphysio_unlock(reqp); 955 if (exitflg) { 956 mutex_enter(&aiop->aio_mutex); 957 aio_req_free(aiop, reqp); 958 mutex_exit(&aiop->aio_mutex); 959 } else { 960 mutex_enter(&aiop->aio_mutex); 961 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 962 sigev = reqp->aio_req_sigqp; 963 reqp->aio_req_sigqp = NULL; 964 if ((liohead = reqp->aio_req_lio) != NULL) { 965 ASSERT(liohead->lio_refcnt > 0); 966 if (--liohead->lio_refcnt == 0) { 967 cv_signal(&liohead->lio_notify); 968 lio_sigev = liohead->lio_sigqp; 969 liohead->lio_sigqp = NULL; 970 } 971 } 972 mutex_exit(&aiop->aio_mutex); 973 if (sigev) { 974 signalled++; 975 aio_sigev_send(reqp->aio_req_buf.b_proc, 976 sigev); 977 } 978 if (lio_sigev) { 979 signalled++; 980 aio_sigev_send(reqp->aio_req_buf.b_proc, 981 lio_sigev); 982 } 983 } 984 } while ((reqp = next) != qhead); 985 986 return (signalled); 987 } 988 989 /* 990 * Do cleanup for every element of the poll queue. 991 */ 992 static void 993 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg) 994 { 995 aio_req_t *reqp, *next; 996 997 /* 998 * As no other threads should be accessing the queue at this point, 999 * it isn't necessary to hold aio_mutex while we traverse its elements. 1000 */ 1001 if ((reqp = qhead) == NULL) 1002 return; 1003 do { 1004 ASSERT(reqp->aio_req_flags & AIO_POLLQ); 1005 next = reqp->aio_req_next; 1006 aphysio_unlock(reqp); 1007 if (exitflg) { 1008 mutex_enter(&aiop->aio_mutex); 1009 aio_req_free(aiop, reqp); 1010 mutex_exit(&aiop->aio_mutex); 1011 } else { 1012 aio_copyout_result(reqp); 1013 mutex_enter(&aiop->aio_mutex); 1014 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 1015 mutex_exit(&aiop->aio_mutex); 1016 } 1017 } while ((reqp = next) != qhead); 1018 } 1019 1020 /* 1021 * called by exit(). waits for all outstanding kaio to finish 1022 * before the kaio resources are freed. 1023 */ 1024 void 1025 aio_cleanup_exit(void) 1026 { 1027 proc_t *p = curproc; 1028 aio_t *aiop = p->p_aio; 1029 aio_req_t *reqp, *next, *head; 1030 aio_lio_t *nxtlio, *liop; 1031 1032 /* 1033 * wait for all outstanding kaio to complete. process 1034 * is now single-threaded; no other kaio requests can 1035 * happen once aio_pending is zero. 1036 */ 1037 mutex_enter(&aiop->aio_mutex); 1038 aiop->aio_flags |= AIO_CLEANUP; 1039 while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE)) 1040 cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex); 1041 mutex_exit(&aiop->aio_mutex); 1042 1043 /* cleanup the cleanup-thread queues. */ 1044 aio_cleanup(AIO_CLEANUP_EXIT); 1045 1046 /* 1047 * Although this process is now single-threaded, we 1048 * still need to protect ourselves against a race with 1049 * aio_cleanup_dr_delete_memory(). 1050 */ 1051 mutex_enter(&p->p_lock); 1052 1053 /* 1054 * free up the done queue's resources. 1055 */ 1056 if ((head = aiop->aio_doneq) != NULL) { 1057 aiop->aio_doneq = NULL; 1058 reqp = head; 1059 do { 1060 next = reqp->aio_req_next; 1061 aphysio_unlock(reqp); 1062 kmem_free(reqp, sizeof (struct aio_req_t)); 1063 } while ((reqp = next) != head); 1064 } 1065 /* 1066 * release aio request freelist. 1067 */ 1068 for (reqp = aiop->aio_free; reqp != NULL; reqp = next) { 1069 next = reqp->aio_req_next; 1070 kmem_free(reqp, sizeof (struct aio_req_t)); 1071 } 1072 1073 /* 1074 * release io list head freelist. 1075 */ 1076 for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) { 1077 nxtlio = liop->lio_next; 1078 kmem_free(liop, sizeof (aio_lio_t)); 1079 } 1080 1081 if (aiop->aio_iocb) 1082 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 1083 1084 mutex_destroy(&aiop->aio_mutex); 1085 mutex_destroy(&aiop->aio_portq_mutex); 1086 mutex_destroy(&aiop->aio_cleanupq_mutex); 1087 p->p_aio = NULL; 1088 mutex_exit(&p->p_lock); 1089 kmem_free(aiop, sizeof (struct aio)); 1090 } 1091 1092 /* 1093 * copy out aio request's result to a user-level result_t buffer. 1094 */ 1095 void 1096 aio_copyout_result(aio_req_t *reqp) 1097 { 1098 struct buf *bp; 1099 struct iovec *iov; 1100 void *resultp; 1101 int error; 1102 size_t retval; 1103 1104 if (reqp->aio_req_flags & AIO_COPYOUTDONE) 1105 return; 1106 1107 reqp->aio_req_flags |= AIO_COPYOUTDONE; 1108 1109 iov = reqp->aio_req_uio.uio_iov; 1110 bp = &reqp->aio_req_buf; 1111 /* "resultp" points to user-level result_t buffer */ 1112 resultp = (void *)reqp->aio_req_resultp; 1113 if (bp->b_flags & B_ERROR) { 1114 if (bp->b_error) 1115 error = bp->b_error; 1116 else 1117 error = EIO; 1118 retval = (size_t)-1; 1119 } else { 1120 error = 0; 1121 retval = iov->iov_len - bp->b_resid; 1122 } 1123 #ifdef _SYSCALL32_IMPL 1124 if (get_udatamodel() == DATAMODEL_NATIVE) { 1125 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1126 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1127 } else { 1128 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1129 (int)retval); 1130 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1131 } 1132 #else 1133 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1134 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1135 #endif 1136 } 1137 1138 1139 void 1140 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp) 1141 { 1142 int errno; 1143 size_t retval; 1144 1145 if (bp->b_flags & B_ERROR) { 1146 if (bp->b_error) 1147 errno = bp->b_error; 1148 else 1149 errno = EIO; 1150 retval = (size_t)-1; 1151 } else { 1152 errno = 0; 1153 retval = iov->iov_len - bp->b_resid; 1154 } 1155 #ifdef _SYSCALL32_IMPL 1156 if (get_udatamodel() == DATAMODEL_NATIVE) { 1157 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1158 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1159 } else { 1160 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1161 (int)retval); 1162 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno); 1163 } 1164 #else 1165 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1166 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1167 #endif 1168 } 1169 1170 /* 1171 * This function is used to remove a request from the done queue. 1172 */ 1173 1174 void 1175 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp) 1176 { 1177 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex)); 1178 while (aiop->aio_portq == NULL) { 1179 /* 1180 * aio_portq is set to NULL when aio_cleanup_portq() 1181 * is working with the event queue. 1182 * The aio_cleanup_thread() uses aio_cleanup_portq() 1183 * to unlock all AIO buffers with completed transactions. 1184 * Wait here until aio_cleanup_portq() restores the 1185 * list of completed transactions in aio_portq. 1186 */ 1187 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1188 } 1189 aio_deq(&aiop->aio_portq, reqp); 1190 } 1191 1192 /* ARGSUSED */ 1193 void 1194 aio_close_port(void *arg, int port, pid_t pid, int lastclose) 1195 { 1196 aio_t *aiop; 1197 aio_req_t *reqp; 1198 aio_req_t *next; 1199 aio_req_t *headp; 1200 int counter; 1201 1202 if (arg == NULL) 1203 aiop = curproc->p_aio; 1204 else 1205 aiop = (aio_t *)arg; 1206 1207 /* 1208 * The PORT_SOURCE_AIO source is always associated with every new 1209 * created port by default. 1210 * If no asynchronous I/O transactions were associated with the port 1211 * then the aiop pointer will still be set to NULL. 1212 */ 1213 if (aiop == NULL) 1214 return; 1215 1216 /* 1217 * Within a process event ports can be used to collect events other 1218 * than PORT_SOURCE_AIO events. At the same time the process can submit 1219 * asynchronous I/Os transactions which are not associated with the 1220 * current port. 1221 * The current process oriented model of AIO uses a sigle queue for 1222 * pending events. On close the pending queue (queue of asynchronous 1223 * I/O transactions using event port notification) must be scanned 1224 * to detect and handle pending I/Os using the current port. 1225 */ 1226 mutex_enter(&aiop->aio_portq_mutex); 1227 mutex_enter(&aiop->aio_mutex); 1228 counter = 0; 1229 if ((headp = aiop->aio_portpending) != NULL) { 1230 reqp = headp; 1231 do { 1232 if (reqp->aio_req_portkev && 1233 reqp->aio_req_port == port) { 1234 reqp->aio_req_flags |= AIO_CLOSE_PORT; 1235 counter++; 1236 } 1237 } while ((reqp = reqp->aio_req_next) != headp); 1238 } 1239 if (counter == 0) { 1240 /* no AIOs pending */ 1241 mutex_exit(&aiop->aio_mutex); 1242 mutex_exit(&aiop->aio_portq_mutex); 1243 return; 1244 } 1245 aiop->aio_portpendcnt += counter; 1246 mutex_exit(&aiop->aio_mutex); 1247 while (aiop->aio_portpendcnt) 1248 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1249 1250 /* 1251 * all pending AIOs are completed. 1252 * check port doneq 1253 */ 1254 headp = NULL; 1255 if ((reqp = aiop->aio_portq) != NULL) { 1256 do { 1257 next = reqp->aio_req_next; 1258 if (reqp->aio_req_port == port) { 1259 /* dequeue request and discard event */ 1260 aio_req_remove_portq(aiop, reqp); 1261 port_free_event(reqp->aio_req_portkev); 1262 /* put request in temporary queue */ 1263 reqp->aio_req_next = headp; 1264 headp = reqp; 1265 } 1266 } while ((reqp = next) != aiop->aio_portq); 1267 } 1268 mutex_exit(&aiop->aio_portq_mutex); 1269 1270 /* headp points to the list of requests to be discarded */ 1271 for (reqp = headp; reqp != NULL; reqp = next) { 1272 next = reqp->aio_req_next; 1273 aphysio_unlock(reqp); 1274 mutex_enter(&aiop->aio_mutex); 1275 aio_req_free_port(aiop, reqp); 1276 mutex_exit(&aiop->aio_mutex); 1277 } 1278 1279 if (aiop->aio_flags & AIO_CLEANUP) 1280 cv_broadcast(&aiop->aio_waitcv); 1281 } 1282 1283 /* 1284 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread 1285 * to kick start the aio_cleanup_thread for the give process to do the 1286 * necessary cleanup. 1287 * This is needed so that delete_memory_thread can obtain writer locks 1288 * on pages that need to be relocated during a dr memory delete operation, 1289 * otherwise a deadly embrace may occur. 1290 */ 1291 int 1292 aio_cleanup_dr_delete_memory(proc_t *procp) 1293 { 1294 struct aio *aiop = procp->p_aio; 1295 struct as *as = procp->p_as; 1296 int ret = 0; 1297 1298 ASSERT(MUTEX_HELD(&procp->p_lock)); 1299 1300 mutex_enter(&as->a_contents); 1301 1302 if (aiop != NULL) { 1303 aiop->aio_rqclnup = 1; 1304 cv_broadcast(&as->a_cv); 1305 ret = 1; 1306 } 1307 mutex_exit(&as->a_contents); 1308 return (ret); 1309 } 1310