1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/proc.h> 29 #include <sys/file.h> 30 #include <sys/errno.h> 31 #include <sys/param.h> 32 #include <sys/sysmacros.h> 33 #include <sys/cmn_err.h> 34 #include <sys/systm.h> 35 #include <vm/as.h> 36 #include <vm/page.h> 37 #include <sys/uio.h> 38 #include <sys/kmem.h> 39 #include <sys/debug.h> 40 #include <sys/aio_impl.h> 41 #include <sys/epm.h> 42 #include <sys/fs/snode.h> 43 #include <sys/siginfo.h> 44 #include <sys/cpuvar.h> 45 #include <sys/conf.h> 46 #include <sys/sdt.h> 47 48 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *); 49 int aio_done(struct buf *); 50 void aphysio_unlock(aio_req_t *); 51 void aio_cleanup(int); 52 void aio_cleanup_exit(void); 53 54 /* 55 * private functions 56 */ 57 static void aio_sigev_send(proc_t *, sigqueue_t *); 58 static void aio_hash_delete(aio_t *, aio_req_t *); 59 static void aio_lio_free(aio_t *, aio_lio_t *); 60 static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int); 61 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int); 62 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int); 63 static void aio_cleanup_portq(aio_t *, aio_req_t *, int); 64 65 /* 66 * async version of physio() that doesn't wait synchronously 67 * for the driver's strategy routine to complete. 68 */ 69 70 int 71 aphysio( 72 int (*strategy)(struct buf *), 73 int (*cancel)(struct buf *), 74 dev_t dev, 75 int rw, 76 void (*mincnt)(struct buf *), 77 struct aio_req *aio) 78 { 79 struct uio *uio = aio->aio_uio; 80 aio_req_t *reqp = (aio_req_t *)aio->aio_private; 81 struct buf *bp = &reqp->aio_req_buf; 82 struct iovec *iov; 83 struct as *as; 84 char *a; 85 int error; 86 size_t c; 87 struct page **pplist; 88 struct dev_ops *ops = devopsp[getmajor(dev)]; 89 90 if (uio->uio_loffset < 0) 91 return (EINVAL); 92 #ifdef _ILP32 93 /* 94 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents 95 * the maximum size that can be supported by the IO subsystem. 96 * XXX this code assumes a D_64BIT driver. 97 */ 98 if (uio->uio_loffset > SPEC_MAXOFFSET_T) 99 return (EINVAL); 100 #endif /* _ILP32 */ 101 102 if (rw == B_READ) { 103 CPU_STATS_ADD_K(sys, phread, 1); 104 } else { 105 CPU_STATS_ADD_K(sys, phwrite, 1); 106 } 107 108 iov = uio->uio_iov; 109 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 110 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 111 112 bp->b_error = 0; 113 bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw; 114 bp->b_edev = dev; 115 bp->b_dev = cmpdev(dev); 116 bp->b_lblkno = btodt(uio->uio_loffset); 117 bp->b_offset = uio->uio_loffset; 118 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO, 119 (void *)bp->b_edev, (void **)&bp->b_dip); 120 121 /* 122 * Clustering: Clustering can set the b_iodone, b_forw and 123 * b_proc fields to cluster-specifc values. 124 */ 125 if (bp->b_iodone == NULL) { 126 bp->b_iodone = aio_done; 127 /* b_forw points at an aio_req_t structure */ 128 bp->b_forw = (struct buf *)reqp; 129 bp->b_proc = curproc; 130 } 131 132 a = bp->b_un.b_addr = iov->iov_base; 133 c = bp->b_bcount = iov->iov_len; 134 135 (*mincnt)(bp); 136 if (bp->b_bcount != iov->iov_len) 137 return (ENOTSUP); 138 139 as = bp->b_proc->p_as; 140 141 error = as_pagelock(as, &pplist, a, 142 c, rw == B_READ? S_WRITE : S_READ); 143 if (error != 0) { 144 bp->b_flags |= B_ERROR; 145 bp->b_error = error; 146 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 147 return (error); 148 } 149 reqp->aio_req_flags |= AIO_PAGELOCKDONE; 150 bp->b_shadow = pplist; 151 if (pplist != NULL) { 152 bp->b_flags |= B_SHADOW; 153 } 154 155 if (cancel != anocancel) 156 cmn_err(CE_PANIC, 157 "aphysio: cancellation not supported, use anocancel"); 158 159 reqp->aio_req_cancel = cancel; 160 161 DTRACE_IO1(start, struct buf *, bp); 162 163 return ((*strategy)(bp)); 164 } 165 166 /*ARGSUSED*/ 167 int 168 anocancel(struct buf *bp) 169 { 170 return (ENXIO); 171 } 172 173 /* 174 * Called from biodone(). 175 * Notify process that a pending AIO has finished. 176 */ 177 178 /* 179 * Clustering: This function is made non-static as it is used 180 * by clustering s/w as contract private interface. 181 */ 182 183 int 184 aio_done(struct buf *bp) 185 { 186 proc_t *p; 187 struct as *as; 188 aio_req_t *reqp; 189 aio_lio_t *head = NULL; 190 aio_t *aiop; 191 sigqueue_t *sigev = NULL; 192 sigqueue_t *lio_sigev = NULL; 193 port_kevent_t *pkevp = NULL; 194 port_kevent_t *lio_pkevp = NULL; 195 int fd; 196 int cleanupqflag; 197 int pollqflag; 198 int portevpend; 199 void (*func)(); 200 int use_port = 0; 201 int reqp_flags = 0; 202 int send_signal = 0; 203 204 p = bp->b_proc; 205 as = p->p_as; 206 reqp = (aio_req_t *)bp->b_forw; 207 fd = reqp->aio_req_fd; 208 209 /* 210 * mapout earlier so that more kmem is available when aio is 211 * heavily used. bug #1262082 212 */ 213 if (bp->b_flags & B_REMAPPED) 214 bp_mapout(bp); 215 216 /* decrement fd's ref count by one, now that aio request is done. */ 217 areleasef(fd, P_FINFO(p)); 218 219 aiop = p->p_aio; 220 ASSERT(aiop != NULL); 221 222 mutex_enter(&aiop->aio_portq_mutex); 223 mutex_enter(&aiop->aio_mutex); 224 ASSERT(aiop->aio_pending > 0); 225 ASSERT(reqp->aio_req_flags & AIO_PENDING); 226 aiop->aio_pending--; 227 reqp->aio_req_flags &= ~AIO_PENDING; 228 reqp_flags = reqp->aio_req_flags; 229 if ((pkevp = reqp->aio_req_portkev) != NULL) { 230 /* Event port notification is desired for this transaction */ 231 if (reqp->aio_req_flags & AIO_CLOSE_PORT) { 232 /* 233 * The port is being closed and it is waiting for 234 * pending asynchronous I/O transactions to complete. 235 */ 236 portevpend = --aiop->aio_portpendcnt; 237 aio_deq(&aiop->aio_portpending, reqp); 238 aio_enq(&aiop->aio_portq, reqp, 0); 239 mutex_exit(&aiop->aio_mutex); 240 mutex_exit(&aiop->aio_portq_mutex); 241 port_send_event(pkevp); 242 if (portevpend == 0) 243 cv_broadcast(&aiop->aio_portcv); 244 return (0); 245 } 246 247 if (aiop->aio_flags & AIO_CLEANUP) { 248 /* 249 * aio_cleanup_thread() is waiting for completion of 250 * transactions. 251 */ 252 mutex_enter(&as->a_contents); 253 aio_deq(&aiop->aio_portpending, reqp); 254 aio_enq(&aiop->aio_portcleanupq, reqp, 0); 255 cv_signal(&aiop->aio_cleanupcv); 256 mutex_exit(&as->a_contents); 257 mutex_exit(&aiop->aio_mutex); 258 mutex_exit(&aiop->aio_portq_mutex); 259 return (0); 260 } 261 262 aio_deq(&aiop->aio_portpending, reqp); 263 aio_enq(&aiop->aio_portq, reqp, 0); 264 265 use_port = 1; 266 } else { 267 /* 268 * when the AIO_CLEANUP flag is enabled for this 269 * process, or when the AIO_POLL bit is set for 270 * this request, special handling is required. 271 * otherwise the request is put onto the doneq. 272 */ 273 cleanupqflag = (aiop->aio_flags & AIO_CLEANUP); 274 pollqflag = (reqp->aio_req_flags & AIO_POLL); 275 if (cleanupqflag | pollqflag) { 276 277 if (cleanupqflag) 278 mutex_enter(&as->a_contents); 279 280 /* 281 * requests with their AIO_POLL bit set are put 282 * on the pollq, requests with sigevent structures 283 * or with listio heads are put on the notifyq, and 284 * the remaining requests don't require any special 285 * cleanup handling, so they're put onto the default 286 * cleanupq. 287 */ 288 if (pollqflag) 289 aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ); 290 else if (reqp->aio_req_sigqp || reqp->aio_req_lio) 291 aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ); 292 else 293 aio_enq(&aiop->aio_cleanupq, reqp, 294 AIO_CLEANUPQ); 295 296 if (cleanupqflag) { 297 cv_signal(&aiop->aio_cleanupcv); 298 mutex_exit(&as->a_contents); 299 mutex_exit(&aiop->aio_mutex); 300 mutex_exit(&aiop->aio_portq_mutex); 301 } else { 302 ASSERT(pollqflag); 303 /* block aio_cleanup_exit until we're done */ 304 aiop->aio_flags |= AIO_DONE_ACTIVE; 305 mutex_exit(&aiop->aio_mutex); 306 mutex_exit(&aiop->aio_portq_mutex); 307 /* 308 * let the cleanup processing happen from an AST 309 * set an AST on all threads in this process 310 */ 311 mutex_enter(&p->p_lock); 312 set_proc_ast(p); 313 mutex_exit(&p->p_lock); 314 mutex_enter(&aiop->aio_mutex); 315 /* wakeup anybody waiting in aiowait() */ 316 cv_broadcast(&aiop->aio_waitcv); 317 318 /* wakeup aio_cleanup_exit if needed */ 319 if (aiop->aio_flags & AIO_CLEANUP) 320 cv_signal(&aiop->aio_cleanupcv); 321 aiop->aio_flags &= ~AIO_DONE_ACTIVE; 322 mutex_exit(&aiop->aio_mutex); 323 } 324 return (0); 325 } 326 327 /* 328 * save req's sigevent pointer, and check its 329 * value after releasing aio_mutex lock. 330 */ 331 sigev = reqp->aio_req_sigqp; 332 reqp->aio_req_sigqp = NULL; 333 334 /* put request on done queue. */ 335 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 336 } /* portkevent */ 337 338 /* 339 * when list IO notification is enabled, a notification or 340 * signal is sent only when all entries in the list are done. 341 */ 342 if ((head = reqp->aio_req_lio) != NULL) { 343 ASSERT(head->lio_refcnt > 0); 344 if (--head->lio_refcnt == 0) { 345 /* 346 * save lio's sigevent pointer, and check 347 * its value after releasing aio_mutex lock. 348 */ 349 lio_sigev = head->lio_sigqp; 350 head->lio_sigqp = NULL; 351 cv_signal(&head->lio_notify); 352 if (head->lio_port >= 0 && 353 (lio_pkevp = head->lio_portkev) != NULL) 354 head->lio_port = -1; 355 } 356 } 357 358 /* 359 * if AIO_WAITN set then 360 * send signal only when we reached the 361 * required amount of IO's finished 362 * or when all IO's are done 363 */ 364 if (aiop->aio_flags & AIO_WAITN) { 365 if (aiop->aio_waitncnt > 0) 366 aiop->aio_waitncnt--; 367 if (aiop->aio_pending == 0 || 368 aiop->aio_waitncnt == 0) 369 cv_broadcast(&aiop->aio_waitcv); 370 } else { 371 cv_broadcast(&aiop->aio_waitcv); 372 } 373 374 /* 375 * No need to set this flag for pollq, portq, lio requests. 376 * If this is an old Solaris aio request, and the process has 377 * a SIGIO signal handler enabled, then send a SIGIO signal. 378 */ 379 if (!sigev && !use_port && head == NULL && 380 (reqp->aio_req_flags & AIO_SOLARIS) && 381 (func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL && 382 (func != SIG_IGN)) { 383 send_signal = 1; 384 reqp->aio_req_flags |= AIO_SIGNALLED; 385 } 386 387 mutex_exit(&aiop->aio_mutex); 388 mutex_exit(&aiop->aio_portq_mutex); 389 390 /* 391 * Could the cleanup thread be waiting for AIO with locked 392 * resources to finish? 393 * Ideally in that case cleanup thread should block on cleanupcv, 394 * but there is a window, where it could miss to see a new aio 395 * request that sneaked in. 396 */ 397 mutex_enter(&as->a_contents); 398 if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as)) 399 cv_broadcast(&as->a_cv); 400 mutex_exit(&as->a_contents); 401 402 if (sigev) 403 aio_sigev_send(p, sigev); 404 else if (send_signal) 405 psignal(p, SIGIO); 406 407 if (pkevp) 408 port_send_event(pkevp); 409 if (lio_sigev) 410 aio_sigev_send(p, lio_sigev); 411 if (lio_pkevp) 412 port_send_event(lio_pkevp); 413 414 return (0); 415 } 416 417 /* 418 * send a queued signal to the specified process when 419 * the event signal is non-NULL. A return value of 1 420 * will indicate that a signal is queued, and 0 means that 421 * no signal was specified, nor sent. 422 */ 423 static void 424 aio_sigev_send(proc_t *p, sigqueue_t *sigev) 425 { 426 ASSERT(sigev != NULL); 427 428 mutex_enter(&p->p_lock); 429 sigaddqa(p, NULL, sigev); 430 mutex_exit(&p->p_lock); 431 } 432 433 /* 434 * special case handling for zero length requests. the aio request 435 * short circuits the normal completion path since all that's required 436 * to complete this request is to copyout a zero to the aio request's 437 * return value. 438 */ 439 void 440 aio_zerolen(aio_req_t *reqp) 441 { 442 443 struct buf *bp = &reqp->aio_req_buf; 444 445 reqp->aio_req_flags |= AIO_ZEROLEN; 446 447 bp->b_forw = (struct buf *)reqp; 448 bp->b_proc = curproc; 449 450 bp->b_resid = 0; 451 bp->b_flags = 0; 452 453 aio_done(bp); 454 } 455 456 /* 457 * unlock pages previously locked by as_pagelock 458 */ 459 void 460 aphysio_unlock(aio_req_t *reqp) 461 { 462 struct buf *bp; 463 struct iovec *iov; 464 int flags; 465 466 if (reqp->aio_req_flags & AIO_PHYSIODONE) 467 return; 468 469 reqp->aio_req_flags |= AIO_PHYSIODONE; 470 471 if (reqp->aio_req_flags & AIO_ZEROLEN) 472 return; 473 474 bp = &reqp->aio_req_buf; 475 iov = reqp->aio_req_uio.uio_iov; 476 flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ); 477 if (reqp->aio_req_flags & AIO_PAGELOCKDONE) { 478 as_pageunlock(bp->b_proc->p_as, 479 bp->b_flags & B_SHADOW ? bp->b_shadow : NULL, 480 iov->iov_base, iov->iov_len, flags); 481 reqp->aio_req_flags &= ~AIO_PAGELOCKDONE; 482 } 483 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW); 484 bp->b_flags |= B_DONE; 485 } 486 487 /* 488 * deletes a requests id from the hash table of outstanding io. 489 */ 490 static void 491 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp) 492 { 493 long index; 494 aio_result_t *resultp = reqp->aio_req_resultp; 495 aio_req_t *current; 496 aio_req_t **nextp; 497 498 index = AIO_HASH(resultp); 499 nextp = (aiop->aio_hash + index); 500 while ((current = *nextp) != NULL) { 501 if (current->aio_req_resultp == resultp) { 502 *nextp = current->aio_hash_next; 503 return; 504 } 505 nextp = ¤t->aio_hash_next; 506 } 507 } 508 509 /* 510 * Put a list head struct onto its free list. 511 */ 512 static void 513 aio_lio_free(aio_t *aiop, aio_lio_t *head) 514 { 515 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 516 517 if (head->lio_sigqp != NULL) 518 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 519 head->lio_next = aiop->aio_lio_free; 520 aiop->aio_lio_free = head; 521 } 522 523 /* 524 * Put a reqp onto the freelist. 525 */ 526 void 527 aio_req_free(aio_t *aiop, aio_req_t *reqp) 528 { 529 aio_lio_t *liop; 530 531 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 532 533 if (reqp->aio_req_portkev) { 534 port_free_event(reqp->aio_req_portkev); 535 reqp->aio_req_portkev = NULL; 536 } 537 538 if ((liop = reqp->aio_req_lio) != NULL) { 539 if (--liop->lio_nent == 0) 540 aio_lio_free(aiop, liop); 541 reqp->aio_req_lio = NULL; 542 } 543 if (reqp->aio_req_sigqp != NULL) { 544 kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t)); 545 reqp->aio_req_sigqp = NULL; 546 } 547 reqp->aio_req_next = aiop->aio_free; 548 reqp->aio_req_prev = NULL; 549 aiop->aio_free = reqp; 550 aiop->aio_outstanding--; 551 if (aiop->aio_outstanding == 0) 552 cv_broadcast(&aiop->aio_waitcv); 553 aio_hash_delete(aiop, reqp); 554 } 555 556 /* 557 * Put a reqp onto the freelist. 558 */ 559 void 560 aio_req_free_port(aio_t *aiop, aio_req_t *reqp) 561 { 562 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 563 564 reqp->aio_req_next = aiop->aio_free; 565 reqp->aio_req_prev = NULL; 566 aiop->aio_free = reqp; 567 aiop->aio_outstanding--; 568 aio_hash_delete(aiop, reqp); 569 } 570 571 572 /* 573 * Verify the integrity of a queue. 574 */ 575 #if defined(DEBUG) 576 static void 577 aio_verify_queue(aio_req_t *head, 578 aio_req_t *entry_present, aio_req_t *entry_missing) 579 { 580 aio_req_t *reqp; 581 int found = 0; 582 int present = 0; 583 584 if ((reqp = head) != NULL) { 585 do { 586 ASSERT(reqp->aio_req_prev->aio_req_next == reqp); 587 ASSERT(reqp->aio_req_next->aio_req_prev == reqp); 588 if (entry_present == reqp) 589 found++; 590 if (entry_missing == reqp) 591 present++; 592 } while ((reqp = reqp->aio_req_next) != head); 593 } 594 ASSERT(entry_present == NULL || found == 1); 595 ASSERT(entry_missing == NULL || present == 0); 596 } 597 #else 598 #define aio_verify_queue(x, y, z) 599 #endif 600 601 /* 602 * Put a request onto the tail of a queue. 603 */ 604 void 605 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new) 606 { 607 aio_req_t *head; 608 aio_req_t *prev; 609 610 aio_verify_queue(*qhead, NULL, reqp); 611 612 if ((head = *qhead) == NULL) { 613 reqp->aio_req_next = reqp; 614 reqp->aio_req_prev = reqp; 615 *qhead = reqp; 616 } else { 617 reqp->aio_req_next = head; 618 reqp->aio_req_prev = prev = head->aio_req_prev; 619 prev->aio_req_next = reqp; 620 head->aio_req_prev = reqp; 621 } 622 reqp->aio_req_flags |= qflg_new; 623 } 624 625 /* 626 * Remove a request from its queue. 627 */ 628 void 629 aio_deq(aio_req_t **qhead, aio_req_t *reqp) 630 { 631 aio_verify_queue(*qhead, reqp, NULL); 632 633 if (reqp->aio_req_next == reqp) { 634 *qhead = NULL; 635 } else { 636 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 637 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 638 if (*qhead == reqp) 639 *qhead = reqp->aio_req_next; 640 } 641 reqp->aio_req_next = NULL; 642 reqp->aio_req_prev = NULL; 643 } 644 645 /* 646 * concatenate a specified queue with the cleanupq. the specified 647 * queue is put onto the tail of the cleanupq. all elements on the 648 * specified queue should have their aio_req_flags field cleared. 649 */ 650 /*ARGSUSED*/ 651 void 652 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg) 653 { 654 aio_req_t *cleanupqhead, *q2tail; 655 aio_req_t *reqp = q2; 656 657 do { 658 ASSERT(reqp->aio_req_flags & qflg); 659 reqp->aio_req_flags &= ~qflg; 660 reqp->aio_req_flags |= AIO_CLEANUPQ; 661 } while ((reqp = reqp->aio_req_next) != q2); 662 663 cleanupqhead = aiop->aio_cleanupq; 664 if (cleanupqhead == NULL) 665 aiop->aio_cleanupq = q2; 666 else { 667 cleanupqhead->aio_req_prev->aio_req_next = q2; 668 q2tail = q2->aio_req_prev; 669 q2tail->aio_req_next = cleanupqhead; 670 q2->aio_req_prev = cleanupqhead->aio_req_prev; 671 cleanupqhead->aio_req_prev = q2tail; 672 } 673 } 674 675 /* 676 * cleanup aio requests that are on the per-process poll queue. 677 */ 678 void 679 aio_cleanup(int flag) 680 { 681 aio_t *aiop = curproc->p_aio; 682 aio_req_t *pollqhead, *cleanupqhead, *notifyqhead; 683 aio_req_t *cleanupport; 684 aio_req_t *portq = NULL; 685 void (*func)(); 686 int signalled = 0; 687 int qflag = 0; 688 int exitflg; 689 690 ASSERT(aiop != NULL); 691 692 if (flag == AIO_CLEANUP_EXIT) 693 exitflg = AIO_CLEANUP_EXIT; 694 else 695 exitflg = 0; 696 697 /* 698 * We need to get the aio_cleanupq_mutex because we are calling 699 * aio_cleanup_cleanupq() 700 */ 701 mutex_enter(&aiop->aio_cleanupq_mutex); 702 /* 703 * take all the requests off the cleanupq, the notifyq, 704 * and the pollq. 705 */ 706 mutex_enter(&aiop->aio_mutex); 707 if ((cleanupqhead = aiop->aio_cleanupq) != NULL) { 708 aiop->aio_cleanupq = NULL; 709 qflag++; 710 } 711 if ((notifyqhead = aiop->aio_notifyq) != NULL) { 712 aiop->aio_notifyq = NULL; 713 qflag++; 714 } 715 if ((pollqhead = aiop->aio_pollq) != NULL) { 716 aiop->aio_pollq = NULL; 717 qflag++; 718 } 719 if (flag) { 720 if ((portq = aiop->aio_portq) != NULL) 721 qflag++; 722 723 if ((cleanupport = aiop->aio_portcleanupq) != NULL) { 724 aiop->aio_portcleanupq = NULL; 725 qflag++; 726 } 727 } 728 mutex_exit(&aiop->aio_mutex); 729 730 /* 731 * return immediately if cleanupq, pollq, and 732 * notifyq are all empty. someone else must have 733 * emptied them. 734 */ 735 if (!qflag) { 736 mutex_exit(&aiop->aio_cleanupq_mutex); 737 return; 738 } 739 740 /* 741 * do cleanup for the various queues. 742 */ 743 if (cleanupqhead) 744 signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg); 745 mutex_exit(&aiop->aio_cleanupq_mutex); 746 if (notifyqhead) 747 signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg); 748 if (pollqhead) 749 aio_cleanup_pollq(aiop, pollqhead, exitflg); 750 if (flag && (cleanupport || portq)) 751 aio_cleanup_portq(aiop, cleanupport, exitflg); 752 753 if (exitflg) 754 return; 755 756 /* 757 * If we have an active aio_cleanup_thread it's possible for 758 * this routine to push something on to the done queue after 759 * an aiowait/aiosuspend thread has already decided to block. 760 * This being the case, we need a cv_broadcast here to wake 761 * these threads up. It is simpler and cleaner to do this 762 * broadcast here than in the individual cleanup routines. 763 */ 764 765 mutex_enter(&aiop->aio_mutex); 766 /* 767 * If there has never been an old solaris aio request 768 * issued by this process, then do not send a SIGIO signal. 769 */ 770 if (!(aiop->aio_flags & AIO_SOLARIS_REQ)) 771 signalled = 1; 772 cv_broadcast(&aiop->aio_waitcv); 773 mutex_exit(&aiop->aio_mutex); 774 775 /* 776 * Only if the process wasn't already signalled, 777 * determine if a SIGIO signal should be delievered. 778 */ 779 if (!signalled && 780 (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL && 781 func != SIG_IGN) 782 psignal(curproc, SIGIO); 783 } 784 785 786 /* 787 * Do cleanup for every element of the port cleanup queue. 788 */ 789 static void 790 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag) 791 { 792 aio_req_t *reqp; 793 aio_req_t *next; 794 aio_req_t *headp; 795 aio_lio_t *liop; 796 797 /* first check the portq */ 798 if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) { 799 mutex_enter(&aiop->aio_mutex); 800 if (aiop->aio_flags & AIO_CLEANUP) 801 aiop->aio_flags |= AIO_CLEANUP_PORT; 802 mutex_exit(&aiop->aio_mutex); 803 804 /* 805 * It is not allowed to hold locks during aphysio_unlock(). 806 * The aio_done() interrupt function will try to acquire 807 * aio_mutex and aio_portq_mutex. Therefore we disconnect 808 * the portq list from the aiop for the duration of the 809 * aphysio_unlock() loop below. 810 */ 811 mutex_enter(&aiop->aio_portq_mutex); 812 headp = aiop->aio_portq; 813 aiop->aio_portq = NULL; 814 mutex_exit(&aiop->aio_portq_mutex); 815 if ((reqp = headp) != NULL) { 816 do { 817 next = reqp->aio_req_next; 818 aphysio_unlock(reqp); 819 if (exitflag) { 820 mutex_enter(&aiop->aio_mutex); 821 aio_req_free(aiop, reqp); 822 mutex_exit(&aiop->aio_mutex); 823 } 824 } while ((reqp = next) != headp); 825 } 826 827 if (headp != NULL && exitflag == 0) { 828 /* move unlocked requests back to the port queue */ 829 aio_req_t *newq; 830 831 mutex_enter(&aiop->aio_portq_mutex); 832 if ((newq = aiop->aio_portq) != NULL) { 833 aio_req_t *headprev = headp->aio_req_prev; 834 aio_req_t *newqprev = newq->aio_req_prev; 835 836 headp->aio_req_prev = newqprev; 837 newq->aio_req_prev = headprev; 838 headprev->aio_req_next = newq; 839 newqprev->aio_req_next = headp; 840 } 841 aiop->aio_portq = headp; 842 cv_broadcast(&aiop->aio_portcv); 843 mutex_exit(&aiop->aio_portq_mutex); 844 } 845 } 846 847 /* now check the port cleanup queue */ 848 if ((reqp = cleanupq) == NULL) 849 return; 850 do { 851 next = reqp->aio_req_next; 852 aphysio_unlock(reqp); 853 if (exitflag) { 854 mutex_enter(&aiop->aio_mutex); 855 aio_req_free(aiop, reqp); 856 mutex_exit(&aiop->aio_mutex); 857 } else { 858 mutex_enter(&aiop->aio_portq_mutex); 859 aio_enq(&aiop->aio_portq, reqp, 0); 860 mutex_exit(&aiop->aio_portq_mutex); 861 port_send_event(reqp->aio_req_portkev); 862 if ((liop = reqp->aio_req_lio) != NULL) { 863 int send_event = 0; 864 865 mutex_enter(&aiop->aio_mutex); 866 ASSERT(liop->lio_refcnt > 0); 867 if (--liop->lio_refcnt == 0) { 868 if (liop->lio_port >= 0 && 869 liop->lio_portkev) { 870 liop->lio_port = -1; 871 send_event = 1; 872 } 873 } 874 mutex_exit(&aiop->aio_mutex); 875 if (send_event) 876 port_send_event(liop->lio_portkev); 877 } 878 } 879 } while ((reqp = next) != cleanupq); 880 } 881 882 /* 883 * Do cleanup for every element of the cleanupq. 884 */ 885 static int 886 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg) 887 { 888 aio_req_t *reqp, *next; 889 int signalled = 0; 890 891 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 892 893 /* 894 * Since aio_req_done() or aio_req_find() use the HASH list to find 895 * the required requests, they could potentially take away elements 896 * if they are already done (AIO_DONEQ is set). 897 * The aio_cleanupq_mutex protects the queue for the duration of the 898 * loop from aio_req_done() and aio_req_find(). 899 */ 900 if ((reqp = qhead) == NULL) 901 return (0); 902 do { 903 ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ); 904 ASSERT(reqp->aio_req_portkev == NULL); 905 next = reqp->aio_req_next; 906 aphysio_unlock(reqp); 907 mutex_enter(&aiop->aio_mutex); 908 if (exitflg) 909 aio_req_free(aiop, reqp); 910 else 911 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 912 if (!exitflg) { 913 if (reqp->aio_req_flags & AIO_SIGNALLED) 914 signalled++; 915 else 916 reqp->aio_req_flags |= AIO_SIGNALLED; 917 } 918 mutex_exit(&aiop->aio_mutex); 919 } while ((reqp = next) != qhead); 920 return (signalled); 921 } 922 923 /* 924 * do cleanup for every element of the notify queue. 925 */ 926 static int 927 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg) 928 { 929 aio_req_t *reqp, *next; 930 aio_lio_t *liohead; 931 sigqueue_t *sigev, *lio_sigev = NULL; 932 int signalled = 0; 933 934 if ((reqp = qhead) == NULL) 935 return (0); 936 do { 937 ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ); 938 next = reqp->aio_req_next; 939 aphysio_unlock(reqp); 940 if (exitflg) { 941 mutex_enter(&aiop->aio_mutex); 942 aio_req_free(aiop, reqp); 943 mutex_exit(&aiop->aio_mutex); 944 } else { 945 mutex_enter(&aiop->aio_mutex); 946 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 947 sigev = reqp->aio_req_sigqp; 948 reqp->aio_req_sigqp = NULL; 949 if ((liohead = reqp->aio_req_lio) != NULL) { 950 ASSERT(liohead->lio_refcnt > 0); 951 if (--liohead->lio_refcnt == 0) { 952 cv_signal(&liohead->lio_notify); 953 lio_sigev = liohead->lio_sigqp; 954 liohead->lio_sigqp = NULL; 955 } 956 } 957 mutex_exit(&aiop->aio_mutex); 958 if (sigev) { 959 signalled++; 960 aio_sigev_send(reqp->aio_req_buf.b_proc, 961 sigev); 962 } 963 if (lio_sigev) { 964 signalled++; 965 aio_sigev_send(reqp->aio_req_buf.b_proc, 966 lio_sigev); 967 } 968 } 969 } while ((reqp = next) != qhead); 970 971 return (signalled); 972 } 973 974 /* 975 * Do cleanup for every element of the poll queue. 976 */ 977 static void 978 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg) 979 { 980 aio_req_t *reqp, *next; 981 982 /* 983 * As no other threads should be accessing the queue at this point, 984 * it isn't necessary to hold aio_mutex while we traverse its elements. 985 */ 986 if ((reqp = qhead) == NULL) 987 return; 988 do { 989 ASSERT(reqp->aio_req_flags & AIO_POLLQ); 990 next = reqp->aio_req_next; 991 aphysio_unlock(reqp); 992 if (exitflg) { 993 mutex_enter(&aiop->aio_mutex); 994 aio_req_free(aiop, reqp); 995 mutex_exit(&aiop->aio_mutex); 996 } else { 997 aio_copyout_result(reqp); 998 mutex_enter(&aiop->aio_mutex); 999 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ); 1000 mutex_exit(&aiop->aio_mutex); 1001 } 1002 } while ((reqp = next) != qhead); 1003 } 1004 1005 /* 1006 * called by exit(). waits for all outstanding kaio to finish 1007 * before the kaio resources are freed. 1008 */ 1009 void 1010 aio_cleanup_exit(void) 1011 { 1012 proc_t *p = curproc; 1013 aio_t *aiop = p->p_aio; 1014 aio_req_t *reqp, *next, *head; 1015 aio_lio_t *nxtlio, *liop; 1016 1017 /* 1018 * wait for all outstanding kaio to complete. process 1019 * is now single-threaded; no other kaio requests can 1020 * happen once aio_pending is zero. 1021 */ 1022 mutex_enter(&aiop->aio_mutex); 1023 aiop->aio_flags |= AIO_CLEANUP; 1024 while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE)) 1025 cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex); 1026 mutex_exit(&aiop->aio_mutex); 1027 1028 /* cleanup the cleanup-thread queues. */ 1029 aio_cleanup(AIO_CLEANUP_EXIT); 1030 1031 /* 1032 * Although this process is now single-threaded, we 1033 * still need to protect ourselves against a race with 1034 * aio_cleanup_dr_delete_memory(). 1035 */ 1036 mutex_enter(&p->p_lock); 1037 1038 /* 1039 * free up the done queue's resources. 1040 */ 1041 if ((head = aiop->aio_doneq) != NULL) { 1042 aiop->aio_doneq = NULL; 1043 reqp = head; 1044 do { 1045 next = reqp->aio_req_next; 1046 aphysio_unlock(reqp); 1047 kmem_free(reqp, sizeof (struct aio_req_t)); 1048 } while ((reqp = next) != head); 1049 } 1050 /* 1051 * release aio request freelist. 1052 */ 1053 for (reqp = aiop->aio_free; reqp != NULL; reqp = next) { 1054 next = reqp->aio_req_next; 1055 kmem_free(reqp, sizeof (struct aio_req_t)); 1056 } 1057 1058 /* 1059 * release io list head freelist. 1060 */ 1061 for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) { 1062 nxtlio = liop->lio_next; 1063 kmem_free(liop, sizeof (aio_lio_t)); 1064 } 1065 1066 if (aiop->aio_iocb) 1067 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 1068 1069 mutex_destroy(&aiop->aio_mutex); 1070 mutex_destroy(&aiop->aio_portq_mutex); 1071 mutex_destroy(&aiop->aio_cleanupq_mutex); 1072 p->p_aio = NULL; 1073 mutex_exit(&p->p_lock); 1074 kmem_free(aiop, sizeof (struct aio)); 1075 } 1076 1077 /* 1078 * copy out aio request's result to a user-level result_t buffer. 1079 */ 1080 void 1081 aio_copyout_result(aio_req_t *reqp) 1082 { 1083 struct buf *bp; 1084 struct iovec *iov; 1085 void *resultp; 1086 int error; 1087 size_t retval; 1088 1089 if (reqp->aio_req_flags & AIO_COPYOUTDONE) 1090 return; 1091 1092 reqp->aio_req_flags |= AIO_COPYOUTDONE; 1093 1094 iov = reqp->aio_req_uio.uio_iov; 1095 bp = &reqp->aio_req_buf; 1096 /* "resultp" points to user-level result_t buffer */ 1097 resultp = (void *)reqp->aio_req_resultp; 1098 if (bp->b_flags & B_ERROR) { 1099 if (bp->b_error) 1100 error = bp->b_error; 1101 else 1102 error = EIO; 1103 retval = (size_t)-1; 1104 } else { 1105 error = 0; 1106 retval = iov->iov_len - bp->b_resid; 1107 } 1108 #ifdef _SYSCALL32_IMPL 1109 if (get_udatamodel() == DATAMODEL_NATIVE) { 1110 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1111 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1112 } else { 1113 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1114 (int)retval); 1115 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1116 } 1117 #else 1118 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1119 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1120 #endif 1121 } 1122 1123 1124 void 1125 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp) 1126 { 1127 int errno; 1128 size_t retval; 1129 1130 if (bp->b_flags & B_ERROR) { 1131 if (bp->b_error) 1132 errno = bp->b_error; 1133 else 1134 errno = EIO; 1135 retval = (size_t)-1; 1136 } else { 1137 errno = 0; 1138 retval = iov->iov_len - bp->b_resid; 1139 } 1140 #ifdef _SYSCALL32_IMPL 1141 if (get_udatamodel() == DATAMODEL_NATIVE) { 1142 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval); 1143 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1144 } else { 1145 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1146 (int)retval); 1147 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno); 1148 } 1149 #else 1150 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval); 1151 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno); 1152 #endif 1153 } 1154 1155 /* 1156 * This function is used to remove a request from the done queue. 1157 */ 1158 1159 void 1160 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp) 1161 { 1162 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex)); 1163 while (aiop->aio_portq == NULL) { 1164 /* 1165 * aio_portq is set to NULL when aio_cleanup_portq() 1166 * is working with the event queue. 1167 * The aio_cleanup_thread() uses aio_cleanup_portq() 1168 * to unlock all AIO buffers with completed transactions. 1169 * Wait here until aio_cleanup_portq() restores the 1170 * list of completed transactions in aio_portq. 1171 */ 1172 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1173 } 1174 aio_deq(&aiop->aio_portq, reqp); 1175 } 1176 1177 /* ARGSUSED */ 1178 void 1179 aio_close_port(void *arg, int port, pid_t pid, int lastclose) 1180 { 1181 aio_t *aiop; 1182 aio_req_t *reqp; 1183 aio_req_t *next; 1184 aio_req_t *headp; 1185 int counter; 1186 1187 if (arg == NULL) 1188 aiop = curproc->p_aio; 1189 else 1190 aiop = (aio_t *)arg; 1191 1192 /* 1193 * The PORT_SOURCE_AIO source is always associated with every new 1194 * created port by default. 1195 * If no asynchronous I/O transactions were associated with the port 1196 * then the aiop pointer will still be set to NULL. 1197 */ 1198 if (aiop == NULL) 1199 return; 1200 1201 /* 1202 * Within a process event ports can be used to collect events other 1203 * than PORT_SOURCE_AIO events. At the same time the process can submit 1204 * asynchronous I/Os transactions which are not associated with the 1205 * current port. 1206 * The current process oriented model of AIO uses a sigle queue for 1207 * pending events. On close the pending queue (queue of asynchronous 1208 * I/O transactions using event port notification) must be scanned 1209 * to detect and handle pending I/Os using the current port. 1210 */ 1211 mutex_enter(&aiop->aio_portq_mutex); 1212 mutex_enter(&aiop->aio_mutex); 1213 counter = 0; 1214 if ((headp = aiop->aio_portpending) != NULL) { 1215 reqp = headp; 1216 do { 1217 if (reqp->aio_req_portkev && 1218 reqp->aio_req_port == port) { 1219 reqp->aio_req_flags |= AIO_CLOSE_PORT; 1220 counter++; 1221 } 1222 } while ((reqp = reqp->aio_req_next) != headp); 1223 } 1224 if (counter == 0) { 1225 /* no AIOs pending */ 1226 mutex_exit(&aiop->aio_mutex); 1227 mutex_exit(&aiop->aio_portq_mutex); 1228 return; 1229 } 1230 aiop->aio_portpendcnt += counter; 1231 mutex_exit(&aiop->aio_mutex); 1232 while (aiop->aio_portpendcnt) 1233 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex); 1234 1235 /* 1236 * all pending AIOs are completed. 1237 * check port doneq 1238 */ 1239 headp = NULL; 1240 if ((reqp = aiop->aio_portq) != NULL) { 1241 do { 1242 next = reqp->aio_req_next; 1243 if (reqp->aio_req_port == port) { 1244 /* dequeue request and discard event */ 1245 aio_req_remove_portq(aiop, reqp); 1246 port_free_event(reqp->aio_req_portkev); 1247 /* put request in temporary queue */ 1248 reqp->aio_req_next = headp; 1249 headp = reqp; 1250 } 1251 } while ((reqp = next) != aiop->aio_portq); 1252 } 1253 mutex_exit(&aiop->aio_portq_mutex); 1254 1255 /* headp points to the list of requests to be discarded */ 1256 for (reqp = headp; reqp != NULL; reqp = next) { 1257 next = reqp->aio_req_next; 1258 aphysio_unlock(reqp); 1259 mutex_enter(&aiop->aio_mutex); 1260 aio_req_free_port(aiop, reqp); 1261 mutex_exit(&aiop->aio_mutex); 1262 } 1263 1264 if (aiop->aio_flags & AIO_CLEANUP) 1265 cv_broadcast(&aiop->aio_waitcv); 1266 } 1267 1268 /* 1269 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread 1270 * to kick start the aio_cleanup_thread for the give process to do the 1271 * necessary cleanup. 1272 * This is needed so that delete_memory_thread can obtain writer locks 1273 * on pages that need to be relocated during a dr memory delete operation, 1274 * otherwise a deadly embrace may occur. 1275 */ 1276 int 1277 aio_cleanup_dr_delete_memory(proc_t *procp) 1278 { 1279 struct aio *aiop = procp->p_aio; 1280 struct as *as = procp->p_as; 1281 int ret = 0; 1282 1283 ASSERT(MUTEX_HELD(&procp->p_lock)); 1284 1285 mutex_enter(&as->a_contents); 1286 1287 if (aiop != NULL) { 1288 aiop->aio_rqclnup = 1; 1289 cv_broadcast(&as->a_cv); 1290 ret = 1; 1291 } 1292 mutex_exit(&as->a_contents); 1293 return (ret); 1294 } 1295