1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * posix_aio.c implements the POSIX async. I/O functions. 29 * 30 * aio_read 31 * aio_write 32 * aio_error 33 * aio_return 34 * aio_suspend 35 * lio_listio 36 * aio_fsync 37 * aio_cancel 38 */ 39 40 #include "lint.h" 41 #include "thr_uberdata.h" 42 #include "asyncio.h" 43 #include <atomic.h> 44 #include <sys/file.h> 45 #include <sys/port.h> 46 47 extern int __fdsync(int, int); 48 49 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */ 50 51 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *); 52 53 /* defines for timedwait in __aio_waitn() and __aio_suspend() */ 54 #define AIO_TIMEOUT_INDEF -1 55 #define AIO_TIMEOUT_POLL 0 56 #define AIO_TIMEOUT_WAIT 1 57 #define AIO_TIMEOUT_UNDEF 2 58 59 /* 60 * List I/O stuff 61 */ 62 static void _lio_list_decr(aio_lio_t *); 63 static long aio_list_max = 0; 64 65 int 66 aio_read(aiocb_t *aiocbp) 67 { 68 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 69 errno = EINVAL; 70 return (-1); 71 } 72 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 73 errno = EBUSY; 74 return (-1); 75 } 76 if (_aio_sigev_thread(aiocbp) != 0) 77 return (-1); 78 aiocbp->aio_lio_opcode = LIO_READ; 79 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD, 80 (AIO_KAIO | AIO_NO_DUPS))); 81 } 82 83 int 84 aio_write(aiocb_t *aiocbp) 85 { 86 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 87 errno = EINVAL; 88 return (-1); 89 } 90 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 91 errno = EBUSY; 92 return (-1); 93 } 94 if (_aio_sigev_thread(aiocbp) != 0) 95 return (-1); 96 aiocbp->aio_lio_opcode = LIO_WRITE; 97 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE, 98 (AIO_KAIO | AIO_NO_DUPS))); 99 } 100 101 /* 102 * __lio_listio() cancellation handler. 103 */ 104 /* ARGSUSED */ 105 static void 106 _lio_listio_cleanup(aio_lio_t *head) 107 { 108 int freeit = 0; 109 110 ASSERT(MUTEX_HELD(&head->lio_mutex)); 111 if (head->lio_refcnt == 0) { 112 ASSERT(head->lio_nent == 0); 113 freeit = 1; 114 } 115 head->lio_waiting = 0; 116 sig_mutex_unlock(&head->lio_mutex); 117 if (freeit) 118 _aio_lio_free(head); 119 } 120 121 int 122 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 123 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 124 { 125 int aio_ufs = 0; 126 int oerrno = 0; 127 aio_lio_t *head = NULL; 128 aiocb_t *aiocbp; 129 int state = 0; 130 int EIOflg = 0; 131 int rw; 132 int do_kaio = 0; 133 int error; 134 int i; 135 136 if (!_kaio_ok) 137 _kaio_init(); 138 139 if (aio_list_max == 0) 140 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 141 142 if (nent <= 0 || nent > aio_list_max) { 143 errno = EINVAL; 144 return (-1); 145 } 146 147 switch (mode) { 148 case LIO_WAIT: 149 state = NOCHECK; 150 break; 151 case LIO_NOWAIT: 152 state = CHECK; 153 break; 154 default: 155 errno = EINVAL; 156 return (-1); 157 } 158 159 for (i = 0; i < nent; i++) { 160 if ((aiocbp = list[i]) == NULL) 161 continue; 162 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 163 errno = EBUSY; 164 return (-1); 165 } 166 if (_aio_sigev_thread(aiocbp) != 0) 167 return (-1); 168 if (aiocbp->aio_lio_opcode == LIO_NOP) 169 aiocbp->aio_state = NOCHECK; 170 else { 171 aiocbp->aio_state = state; 172 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 173 do_kaio++; 174 else 175 aiocbp->aio_resultp.aio_errno = ENOTSUP; 176 } 177 } 178 if (_aio_sigev_thread_init(sigevp) != 0) 179 return (-1); 180 181 if (do_kaio) { 182 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp); 183 if (error == 0) 184 return (0); 185 oerrno = errno; 186 } else { 187 oerrno = errno = ENOTSUP; 188 error = -1; 189 } 190 191 if (error == -1 && errno == ENOTSUP) { 192 error = errno = 0; 193 /* 194 * If LIO_WAIT, or notification required, allocate a list head. 195 */ 196 if (mode == LIO_WAIT || 197 (sigevp != NULL && 198 (sigevp->sigev_notify == SIGEV_SIGNAL || 199 sigevp->sigev_notify == SIGEV_THREAD || 200 sigevp->sigev_notify == SIGEV_PORT))) 201 head = _aio_lio_alloc(); 202 if (head) { 203 sig_mutex_lock(&head->lio_mutex); 204 head->lio_mode = mode; 205 head->lio_largefile = 0; 206 if (mode == LIO_NOWAIT && sigevp != NULL) { 207 if (sigevp->sigev_notify == SIGEV_THREAD) { 208 head->lio_port = sigevp->sigev_signo; 209 head->lio_event = AIOLIO; 210 head->lio_sigevent = sigevp; 211 head->lio_sigval.sival_ptr = 212 sigevp->sigev_value.sival_ptr; 213 } else if (sigevp->sigev_notify == SIGEV_PORT) { 214 port_notify_t *pn = 215 sigevp->sigev_value.sival_ptr; 216 head->lio_port = pn->portnfy_port; 217 head->lio_event = AIOLIO; 218 head->lio_sigevent = sigevp; 219 head->lio_sigval.sival_ptr = 220 pn->portnfy_user; 221 } else { /* SIGEV_SIGNAL */ 222 head->lio_signo = sigevp->sigev_signo; 223 head->lio_sigval.sival_ptr = 224 sigevp->sigev_value.sival_ptr; 225 } 226 } 227 head->lio_nent = head->lio_refcnt = nent; 228 sig_mutex_unlock(&head->lio_mutex); 229 } 230 /* 231 * find UFS requests, errno == ENOTSUP/EBADFD, 232 */ 233 for (i = 0; i < nent; i++) { 234 if ((aiocbp = list[i]) == NULL || 235 aiocbp->aio_lio_opcode == LIO_NOP || 236 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 237 aiocbp->aio_resultp.aio_errno != EBADFD)) { 238 if (head) 239 _lio_list_decr(head); 240 continue; 241 } 242 if (aiocbp->aio_resultp.aio_errno == EBADFD) 243 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 244 if (aiocbp->aio_reqprio != 0) { 245 aiocbp->aio_resultp.aio_errno = EINVAL; 246 aiocbp->aio_resultp.aio_return = -1; 247 EIOflg = 1; 248 if (head) 249 _lio_list_decr(head); 250 continue; 251 } 252 /* 253 * submit an AIO request with flags AIO_NO_KAIO 254 * to avoid the kaio() syscall in _aio_rw() 255 */ 256 switch (aiocbp->aio_lio_opcode) { 257 case LIO_READ: 258 rw = AIOAREAD; 259 break; 260 case LIO_WRITE: 261 rw = AIOAWRITE; 262 break; 263 } 264 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw, 265 (AIO_NO_KAIO | AIO_NO_DUPS)); 266 if (error == 0) 267 aio_ufs++; 268 else { 269 if (head) 270 _lio_list_decr(head); 271 aiocbp->aio_resultp.aio_errno = error; 272 EIOflg = 1; 273 } 274 } 275 } 276 if (EIOflg) { 277 errno = EIO; 278 return (-1); 279 } 280 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 281 /* 282 * call kaio(AIOLIOWAIT) to get all outstanding 283 * kernel AIO requests 284 */ 285 if ((nent - aio_ufs) > 0) 286 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 287 if (head != NULL && head->lio_nent > 0) { 288 sig_mutex_lock(&head->lio_mutex); 289 while (head->lio_refcnt > 0) { 290 int err; 291 head->lio_waiting = 1; 292 pthread_cleanup_push(_lio_listio_cleanup, head); 293 err = sig_cond_wait(&head->lio_cond_cv, 294 &head->lio_mutex); 295 pthread_cleanup_pop(0); 296 head->lio_waiting = 0; 297 if (err && head->lio_nent > 0) { 298 sig_mutex_unlock(&head->lio_mutex); 299 errno = err; 300 return (-1); 301 } 302 } 303 sig_mutex_unlock(&head->lio_mutex); 304 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 305 _aio_lio_free(head); 306 for (i = 0; i < nent; i++) { 307 if ((aiocbp = list[i]) != NULL && 308 aiocbp->aio_resultp.aio_errno) { 309 errno = EIO; 310 return (-1); 311 } 312 } 313 } 314 return (0); 315 } 316 return (error); 317 } 318 319 static void 320 _lio_list_decr(aio_lio_t *head) 321 { 322 sig_mutex_lock(&head->lio_mutex); 323 head->lio_nent--; 324 head->lio_refcnt--; 325 sig_mutex_unlock(&head->lio_mutex); 326 } 327 328 /* 329 * __aio_suspend() cancellation handler. 330 */ 331 /* ARGSUSED */ 332 static void 333 _aio_suspend_cleanup(int *counter) 334 { 335 ASSERT(MUTEX_HELD(&__aio_mutex)); 336 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */ 337 sig_mutex_unlock(&__aio_mutex); 338 } 339 340 static int 341 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile) 342 { 343 int cv_err; /* error code from cond_xxx() */ 344 int kerr; /* error code from _kaio(AIOSUSPEND) */ 345 int i; 346 timespec_t twait; /* copy of timo for internal calculations */ 347 timespec_t *wait = NULL; 348 int timedwait; 349 int req_outstanding; 350 aiocb_t **listp; 351 aiocb_t *aiocbp; 352 #if !defined(_LP64) 353 aiocb64_t **listp64; 354 aiocb64_t *aiocbp64; 355 #endif 356 hrtime_t hrtstart; 357 hrtime_t hrtend; 358 hrtime_t hrtres; 359 360 #if defined(_LP64) 361 if (largefile) 362 aio_panic("__aio_suspend: largefile set when _LP64 defined"); 363 #endif 364 365 if (nent <= 0) { 366 errno = EINVAL; 367 return (-1); 368 } 369 370 if (timo) { 371 if (timo->tv_sec < 0 || timo->tv_nsec < 0 || 372 timo->tv_nsec >= NANOSEC) { 373 errno = EINVAL; 374 return (-1); 375 } 376 /* Initialize start time if time monitoring desired */ 377 if (timo->tv_sec > 0 || timo->tv_nsec > 0) { 378 timedwait = AIO_TIMEOUT_WAIT; 379 hrtstart = gethrtime(); 380 } else { 381 /* content of timeout = 0 : polling */ 382 timedwait = AIO_TIMEOUT_POLL; 383 } 384 } else { 385 /* timeout pointer = NULL : wait indefinitely */ 386 timedwait = AIO_TIMEOUT_INDEF; 387 } 388 389 #if !defined(_LP64) 390 if (largefile) { 391 listp64 = (aiocb64_t **)list; 392 for (i = 0; i < nent; i++) { 393 if ((aiocbp64 = listp64[i]) != NULL && 394 aiocbp64->aio_state == CHECK) 395 aiocbp64->aio_state = CHECKED; 396 } 397 } else 398 #endif /* !_LP64 */ 399 { 400 listp = (aiocb_t **)list; 401 for (i = 0; i < nent; i++) { 402 if ((aiocbp = listp[i]) != NULL && 403 aiocbp->aio_state == CHECK) 404 aiocbp->aio_state = CHECKED; 405 } 406 } 407 408 sig_mutex_lock(&__aio_mutex); 409 410 /* 411 * The next "if -case" is required to accelerate the 412 * access to completed RAW-IO requests. 413 */ 414 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 415 /* Only kernel requests pending */ 416 417 /* 418 * _aio_kernel_suspend is used to detect completed non RAW-IO 419 * requests. 420 * As long as this thread resides in the kernel (_kaio) further 421 * asynchronous non RAW-IO requests could be submitted. 422 */ 423 _aio_kernel_suspend++; 424 425 /* 426 * Always do the kaio() call without using the KAIO_SUPPORTED() 427 * checks because it is not mandatory to have a valid fd 428 * set in the list entries, only the resultp must be set. 429 * 430 * _kaio(AIOSUSPEND ...) return values : 431 * 0: everythink ok, completed request found 432 * -1: error 433 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,) 434 * system call using _kaio(AIONOTIFY). It means, that some 435 * non RAW-IOs completed inbetween. 436 */ 437 438 pthread_cleanup_push(_aio_suspend_cleanup, 439 &_aio_kernel_suspend); 440 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 441 sig_mutex_unlock(&__aio_mutex); 442 _cancel_prologue(); 443 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 444 list, nent, timo, -1); 445 _cancel_epilogue(); 446 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */ 447 pthread_cleanup_pop(0); 448 449 _aio_kernel_suspend--; 450 451 if (!kerr) { 452 sig_mutex_unlock(&__aio_mutex); 453 return (0); 454 } 455 } else { 456 kerr = 1; /* simulation: _kaio detected AIONOTIFY */ 457 } 458 459 /* 460 * Return kernel error code if no other IOs are outstanding. 461 */ 462 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt; 463 464 sig_mutex_unlock(&__aio_mutex); 465 466 if (req_outstanding == 0) { 467 /* no IOs outstanding in the thread pool */ 468 if (kerr == 1) 469 /* return "no IOs completed" */ 470 errno = EAGAIN; 471 return (-1); 472 } 473 474 /* 475 * IOs using the thread pool are outstanding. 476 */ 477 if (timedwait == AIO_TIMEOUT_WAIT) { 478 /* time monitoring */ 479 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC + 480 (hrtime_t)timo->tv_nsec; 481 hrtres = hrtend - gethrtime(); 482 if (hrtres <= 0) 483 hrtres = 1; 484 twait.tv_sec = hrtres / (hrtime_t)NANOSEC; 485 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC; 486 wait = &twait; 487 } else if (timedwait == AIO_TIMEOUT_POLL) { 488 twait = *timo; /* content of timo = 0 : polling */ 489 wait = &twait; 490 } 491 492 for (;;) { 493 int error; 494 int inprogress; 495 496 /* first scan file system requests */ 497 inprogress = 0; 498 for (i = 0; i < nent; i++) { 499 #if !defined(_LP64) 500 if (largefile) { 501 if ((aiocbp64 = listp64[i]) == NULL) 502 continue; 503 error = aiocbp64->aio_resultp.aio_errno; 504 } else 505 #endif 506 { 507 if ((aiocbp = listp[i]) == NULL) 508 continue; 509 error = aiocbp->aio_resultp.aio_errno; 510 } 511 if (error == EINPROGRESS) 512 inprogress = 1; 513 else if (error != ECANCELED) { 514 errno = 0; 515 return (0); 516 } 517 } 518 519 sig_mutex_lock(&__aio_mutex); 520 521 /* 522 * If there aren't outstanding I/Os in the thread pool then 523 * we have to return here, provided that all kernel RAW-IOs 524 * also completed. 525 * If the kernel was notified to return, then we have to check 526 * possible pending RAW-IOs. 527 */ 528 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) { 529 sig_mutex_unlock(&__aio_mutex); 530 errno = EAGAIN; 531 break; 532 } 533 534 /* 535 * There are outstanding IOs in the thread pool or the kernel 536 * was notified to return. 537 * Check pending RAW-IOs first. 538 */ 539 if (kerr == 1) { 540 /* 541 * _aiodone just notified the kernel about 542 * completed non RAW-IOs (AIONOTIFY was detected). 543 */ 544 if (timedwait == AIO_TIMEOUT_WAIT) { 545 /* Update remaining timeout for the kernel */ 546 hrtres = hrtend - gethrtime(); 547 if (hrtres <= 0) { 548 /* timer expired */ 549 sig_mutex_unlock(&__aio_mutex); 550 errno = EAGAIN; 551 break; 552 } 553 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 554 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 555 } 556 _aio_kernel_suspend++; 557 558 pthread_cleanup_push(_aio_suspend_cleanup, 559 &_aio_kernel_suspend); 560 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 561 sig_mutex_unlock(&__aio_mutex); 562 _cancel_prologue(); 563 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 564 list, nent, wait, -1); 565 _cancel_epilogue(); 566 pthread_cleanup_pop(1); 567 pthread_cleanup_pop(0); 568 569 _aio_kernel_suspend--; 570 571 if (!kerr) { 572 sig_mutex_unlock(&__aio_mutex); 573 return (0); 574 } 575 } 576 577 if (timedwait == AIO_TIMEOUT_POLL) { 578 sig_mutex_unlock(&__aio_mutex); 579 errno = EAGAIN; 580 break; 581 } 582 583 if (timedwait == AIO_TIMEOUT_WAIT) { 584 /* Update remaining timeout */ 585 hrtres = hrtend - gethrtime(); 586 if (hrtres <= 0) { 587 /* timer expired */ 588 sig_mutex_unlock(&__aio_mutex); 589 errno = EAGAIN; 590 break; 591 } 592 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 593 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 594 } 595 596 if (_aio_outstand_cnt == 0) { 597 sig_mutex_unlock(&__aio_mutex); 598 continue; 599 } 600 601 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */ 602 603 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt); 604 if (timedwait == AIO_TIMEOUT_WAIT) { 605 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv, 606 &__aio_mutex, wait); 607 if (cv_err == ETIME) 608 cv_err = EAGAIN; 609 } else { 610 /* wait indefinitely */ 611 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex); 612 } 613 /* this decrements _aio_suscv_cnt and drops __aio_mutex */ 614 pthread_cleanup_pop(1); 615 616 if (cv_err) { 617 errno = cv_err; 618 break; 619 } 620 } 621 return (-1); 622 } 623 624 int 625 aio_suspend(const aiocb_t * const list[], int nent, 626 const timespec_t *timeout) 627 { 628 return (__aio_suspend((void **)list, nent, timeout, 0)); 629 } 630 631 int 632 aio_error(const aiocb_t *aiocbp) 633 { 634 const aio_result_t *resultp = &aiocbp->aio_resultp; 635 aio_req_t *reqp; 636 int error; 637 638 if ((error = resultp->aio_errno) == EINPROGRESS) { 639 if (aiocbp->aio_state == CHECK) { 640 /* 641 * Always do the kaio() call without using the 642 * KAIO_SUPPORTED() checks because it is not 643 * mandatory to have a valid fd set in the 644 * aiocb, only the resultp must be set. 645 */ 646 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) { 647 errno = EINVAL; 648 return (-1); 649 } 650 error = resultp->aio_errno; 651 } else if (aiocbp->aio_state == CHECKED) { 652 ((aiocb_t *)aiocbp)->aio_state = CHECK; 653 } 654 } else if (aiocbp->aio_state == USERAIO) { 655 sig_mutex_lock(&__aio_mutex); 656 if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) { 657 sig_mutex_unlock(&__aio_mutex); 658 ((aiocb_t *)aiocbp)->aio_state = CHECKED; 659 } else { 660 ((aiocb_t *)aiocbp)->aio_state = NOCHECK; 661 ASSERT(reqp->req_head == NULL); 662 (void) _aio_req_remove(reqp); 663 sig_mutex_unlock(&__aio_mutex); 664 _aio_req_free(reqp); 665 } 666 } 667 return (error); 668 } 669 670 ssize_t 671 aio_return(aiocb_t *aiocbp) 672 { 673 aio_result_t *resultp = &aiocbp->aio_resultp; 674 aio_req_t *reqp; 675 int error; 676 ssize_t retval; 677 678 /* 679 * The _aiodone() function stores resultp->aio_return before 680 * storing resultp->aio_errno (with an membar_producer() in 681 * between). We use membar_consumer() below to ensure proper 682 * memory ordering between _aiodone() and ourself. 683 */ 684 error = resultp->aio_errno; 685 membar_consumer(); 686 retval = resultp->aio_return; 687 688 /* 689 * we use this condition to indicate either that 690 * aio_return() has been called before or should 691 * not have been called yet. 692 */ 693 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 694 errno = error; 695 return (-1); 696 } 697 698 /* 699 * Before we return, mark the result as being returned so that later 700 * calls to aio_return() will return the fact that the result has 701 * already been returned. 702 */ 703 sig_mutex_lock(&__aio_mutex); 704 /* retest, in case more than one thread actually got in here */ 705 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 706 sig_mutex_unlock(&__aio_mutex); 707 errno = EINVAL; 708 return (-1); 709 } 710 resultp->aio_return = -1; 711 resultp->aio_errno = EINVAL; 712 if ((reqp = _aio_hash_del(resultp)) == NULL) 713 sig_mutex_unlock(&__aio_mutex); 714 else { 715 aiocbp->aio_state = NOCHECK; 716 ASSERT(reqp->req_head == NULL); 717 (void) _aio_req_remove(reqp); 718 sig_mutex_unlock(&__aio_mutex); 719 _aio_req_free(reqp); 720 } 721 722 if (retval == -1) 723 errno = error; 724 return (retval); 725 } 726 727 void 728 _lio_remove(aio_req_t *reqp) 729 { 730 aio_lio_t *head; 731 int refcnt; 732 733 if ((head = reqp->req_head) != NULL) { 734 sig_mutex_lock(&head->lio_mutex); 735 ASSERT(head->lio_refcnt == head->lio_nent); 736 refcnt = --head->lio_nent; 737 head->lio_refcnt--; 738 sig_mutex_unlock(&head->lio_mutex); 739 if (refcnt == 0) 740 _aio_lio_free(head); 741 reqp->req_head = NULL; 742 } 743 } 744 745 /* 746 * This function returns the number of asynchronous I/O requests submitted. 747 */ 748 static int 749 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 750 int workerscnt) 751 { 752 int i; 753 int error; 754 aio_worker_t *next = aiowp; 755 756 for (i = 0; i < workerscnt; i++) { 757 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 758 if (error != 0) { 759 sig_mutex_lock(&head->lio_mutex); 760 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 761 head->lio_nent -= workerscnt - i; 762 head->lio_refcnt -= workerscnt - i; 763 sig_mutex_unlock(&head->lio_mutex); 764 errno = EAGAIN; 765 return (i); 766 } 767 next = next->work_forw; 768 } 769 return (i); 770 } 771 772 int 773 aio_fsync(int op, aiocb_t *aiocbp) 774 { 775 aio_lio_t *head; 776 struct stat statb; 777 int fret; 778 779 if (aiocbp == NULL) 780 return (0); 781 if (op != O_DSYNC && op != O_SYNC) { 782 errno = EINVAL; 783 return (-1); 784 } 785 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 786 errno = EBUSY; 787 return (-1); 788 } 789 if (fstat(aiocbp->aio_fildes, &statb) < 0) 790 return (-1); 791 if (_aio_sigev_thread(aiocbp) != 0) 792 return (-1); 793 794 /* 795 * Kernel aio_fsync() is not supported. 796 * We force user-level aio_fsync() just 797 * for the notification side-effect. 798 */ 799 if (!__uaio_ok && __uaio_init() == -1) 800 return (-1); 801 802 /* 803 * The first asynchronous I/O request in the current process will 804 * create a bunch of workers (via __uaio_init()). If the number 805 * of workers is zero then the number of pending asynchronous I/O 806 * requests is zero. In such a case only execute the standard 807 * fsync(3C) or fdatasync(3RT) as appropriate. 808 */ 809 if (__rw_workerscnt == 0) { 810 if (op == O_DSYNC) 811 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 812 else 813 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 814 } 815 816 /* 817 * re-use aio_offset as the op field. 818 * O_DSYNC - fdatasync() 819 * O_SYNC - fsync() 820 */ 821 aiocbp->aio_offset = op; 822 aiocbp->aio_lio_opcode = AIOFSYNC; 823 824 /* 825 * Create a list of fsync requests. The worker that 826 * gets the last request will do the fsync request. 827 */ 828 head = _aio_lio_alloc(); 829 if (head == NULL) { 830 errno = EAGAIN; 831 return (-1); 832 } 833 head->lio_mode = LIO_FSYNC; 834 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 835 head->lio_largefile = 0; 836 837 /* 838 * Insert an fsync request on every worker's queue. 839 */ 840 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt); 841 if (fret != __rw_workerscnt) { 842 /* 843 * Fewer fsync requests than workers means that it was 844 * not possible to submit fsync requests to all workers. 845 * Actions: 846 * a) number of fsync requests submitted is 0: 847 * => free allocated memory (aio_lio_t). 848 * b) number of fsync requests submitted is > 0: 849 * => the last worker executing the fsync request 850 * will free the aio_lio_t struct. 851 */ 852 if (fret == 0) 853 _aio_lio_free(head); 854 return (-1); 855 } 856 return (0); 857 } 858 859 int 860 aio_cancel(int fd, aiocb_t *aiocbp) 861 { 862 aio_req_t *reqp; 863 aio_worker_t *aiowp; 864 int done = 0; 865 int canceled = 0; 866 struct stat buf; 867 868 if (fstat(fd, &buf) < 0) 869 return (-1); 870 871 if (aiocbp != NULL) { 872 if (fd != aiocbp->aio_fildes) { 873 errno = EINVAL; 874 return (-1); 875 } 876 if (aiocbp->aio_state == USERAIO) { 877 sig_mutex_lock(&__aio_mutex); 878 reqp = _aio_hash_find(&aiocbp->aio_resultp); 879 if (reqp == NULL) { 880 sig_mutex_unlock(&__aio_mutex); 881 return (AIO_ALLDONE); 882 } 883 aiowp = reqp->req_worker; 884 sig_mutex_lock(&aiowp->work_qlock1); 885 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 886 sig_mutex_unlock(&aiowp->work_qlock1); 887 sig_mutex_unlock(&__aio_mutex); 888 if (done) 889 return (AIO_ALLDONE); 890 if (canceled) 891 return (AIO_CANCELED); 892 return (AIO_NOTCANCELED); 893 } 894 if (aiocbp->aio_state == USERAIO_DONE) 895 return (AIO_ALLDONE); 896 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 897 } 898 899 return (aiocancel_all(fd)); 900 } 901 902 /* 903 * __aio_waitn() cancellation handler. 904 */ 905 /* ARGSUSED */ 906 static void 907 _aio_waitn_cleanup(void *arg) 908 { 909 ASSERT(MUTEX_HELD(&__aio_mutex)); 910 911 /* check for pending aio_waitn() calls */ 912 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING); 913 if (_aio_flags & AIO_LIB_WAITN_PENDING) { 914 _aio_flags &= ~AIO_LIB_WAITN_PENDING; 915 (void) cond_signal(&_aio_waitn_cv); 916 } 917 918 sig_mutex_unlock(&__aio_mutex); 919 } 920 921 /* 922 * aio_waitn can be used to reap the results of several I/O operations that 923 * were submitted asynchronously. The submission of I/Os can be done using 924 * existing POSIX interfaces: lio_listio, aio_write or aio_read. 925 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have 926 * completed and it returns the descriptors for these I/Os in "list". The 927 * maximum size of this list is given by "nent" and the actual number of I/Os 928 * completed is returned in "nwait". Otherwise aio_waitn might also 929 * return if the timeout expires. Additionally, aio_waitn returns 0 if 930 * successful or -1 if an error occurred. 931 */ 932 static int 933 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo) 934 { 935 int error = 0; 936 uint_t dnwait = 0; /* amount of requests in the waitn-done list */ 937 uint_t kwaitcnt; /* expected "done" requests from kernel */ 938 uint_t knentcnt; /* max. expected "done" requests from kernel */ 939 int uerrno = 0; 940 int kerrno = 0; /* save errno from _kaio() call */ 941 int timedwait = AIO_TIMEOUT_UNDEF; 942 aio_req_t *reqp; 943 timespec_t end; 944 timespec_t twait; /* copy of utimo for internal calculations */ 945 timespec_t *wait = NULL; 946 947 if (nent == 0 || *nwait == 0 || *nwait > nent) { 948 errno = EINVAL; 949 return (-1); 950 } 951 952 /* 953 * Only one running aio_waitn call per process allowed. 954 * Further calls will be blocked here until the running 955 * call finishes. 956 */ 957 958 sig_mutex_lock(&__aio_mutex); 959 960 while (_aio_flags & AIO_LIB_WAITN) { 961 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) { 962 sig_mutex_unlock(&__aio_mutex); 963 *nwait = 0; 964 return (0); 965 } 966 _aio_flags |= AIO_LIB_WAITN_PENDING; 967 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex); 968 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex); 969 pthread_cleanup_pop(0); 970 if (error != 0) { 971 sig_mutex_unlock(&__aio_mutex); 972 *nwait = 0; 973 errno = error; 974 return (-1); 975 } 976 } 977 978 pthread_cleanup_push(_aio_waitn_cleanup, NULL); 979 980 _aio_flags |= AIO_LIB_WAITN; 981 982 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 983 error = -1; 984 dnwait = 0; 985 goto out; 986 } 987 if (timedwait != AIO_TIMEOUT_INDEF) { 988 twait = *utimo; 989 wait = &twait; 990 } 991 992 /* 993 * If both counters are still set to zero, then only 994 * kernel requests are currently outstanding (raw-I/Os). 995 */ 996 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 997 for (;;) { 998 kwaitcnt = *nwait - dnwait; 999 knentcnt = nent - dnwait; 1000 if (knentcnt > AIO_WAITN_MAXIOCBS) 1001 knentcnt = AIO_WAITN_MAXIOCBS; 1002 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1003 1004 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1005 sig_mutex_unlock(&__aio_mutex); 1006 _cancel_prologue(); 1007 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1008 &kwaitcnt, wait); 1009 _cancel_epilogue(); 1010 pthread_cleanup_pop(1); 1011 1012 if (error == 0) { 1013 dnwait += kwaitcnt; 1014 if (dnwait >= *nwait || 1015 *nwait < AIO_WAITN_MAXIOCBS) 1016 break; 1017 if (timedwait == AIO_TIMEOUT_WAIT) { 1018 error = _aio_get_timedelta(&end, wait); 1019 if (error == -1) { 1020 /* timer expired */ 1021 errno = ETIME; 1022 break; 1023 } 1024 } 1025 continue; 1026 } 1027 if (errno == EAGAIN) { 1028 if (dnwait > 0) 1029 error = 0; 1030 break; 1031 } 1032 if (errno == ETIME || errno == EINTR) { 1033 dnwait += kwaitcnt; 1034 break; 1035 } 1036 /* fatal error */ 1037 break; 1038 } 1039 1040 goto out; 1041 } 1042 1043 /* File system I/Os outstanding ... */ 1044 1045 if (timedwait == AIO_TIMEOUT_UNDEF) { 1046 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 1047 error = -1; 1048 dnwait = 0; 1049 goto out; 1050 } 1051 if (timedwait != AIO_TIMEOUT_INDEF) { 1052 twait = *utimo; 1053 wait = &twait; 1054 } 1055 } 1056 1057 for (;;) { 1058 uint_t sum_reqs; 1059 1060 /* 1061 * Calculate sum of active non RAW-IO requests (sum_reqs). 1062 * If the expected amount of completed requests (*nwait) is 1063 * greater than the calculated sum (sum_reqs) then 1064 * use _kaio to check pending RAW-IO requests. 1065 */ 1066 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt; 1067 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0; 1068 1069 if (kwaitcnt != 0) { 1070 /* possibly some kernel I/Os outstanding */ 1071 knentcnt = nent - dnwait; 1072 if (knentcnt > AIO_WAITN_MAXIOCBS) 1073 knentcnt = AIO_WAITN_MAXIOCBS; 1074 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1075 1076 _aio_flags |= AIO_WAIT_INPROGRESS; 1077 1078 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1079 sig_mutex_unlock(&__aio_mutex); 1080 _cancel_prologue(); 1081 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1082 &kwaitcnt, wait); 1083 _cancel_epilogue(); 1084 pthread_cleanup_pop(1); 1085 1086 _aio_flags &= ~AIO_WAIT_INPROGRESS; 1087 1088 if (error == 0) { 1089 dnwait += kwaitcnt; 1090 } else { 1091 switch (errno) { 1092 case EINVAL: 1093 case EAGAIN: 1094 /* don't wait for kernel I/Os */ 1095 kerrno = 0; /* ignore _kaio() errno */ 1096 *nwait = _aio_doneq_cnt + 1097 _aio_outstand_cnt + dnwait; 1098 error = 0; 1099 break; 1100 case EINTR: 1101 case ETIME: 1102 /* just scan for completed LIB I/Os */ 1103 dnwait += kwaitcnt; 1104 timedwait = AIO_TIMEOUT_POLL; 1105 kerrno = errno; /* save _kaio() errno */ 1106 error = 0; 1107 break; 1108 default: 1109 kerrno = errno; /* save _kaio() errno */ 1110 break; 1111 } 1112 } 1113 if (error) 1114 break; /* fatal kernel error */ 1115 } 1116 1117 /* check completed FS requests in the "done" queue */ 1118 1119 while (_aio_doneq_cnt && dnwait < nent) { 1120 /* get done requests */ 1121 if ((reqp = _aio_req_remove(NULL)) != NULL) { 1122 (void) _aio_hash_del(reqp->req_resultp); 1123 list[dnwait++] = reqp->req_aiocbp; 1124 _aio_req_mark_done(reqp); 1125 _lio_remove(reqp); 1126 _aio_req_free(reqp); 1127 } 1128 } 1129 1130 if (dnwait >= *nwait) { 1131 /* min. requested amount of completed I/Os satisfied */ 1132 break; 1133 } 1134 if (timedwait == AIO_TIMEOUT_WAIT && 1135 (error = _aio_get_timedelta(&end, wait)) == -1) { 1136 /* timer expired */ 1137 uerrno = ETIME; 1138 break; 1139 } 1140 1141 /* 1142 * If some I/Os are outstanding and we have to wait for them, 1143 * then sleep here. _aiodone() will call _aio_waitn_wakeup() 1144 * to wakeup this thread as soon as the required amount of 1145 * completed I/Os is done. 1146 */ 1147 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) { 1148 /* 1149 * _aio_waitn_wakeup() will wake up this thread when: 1150 * - _aio_waitncnt requests are completed or 1151 * - _aio_outstand_cnt becomes zero. 1152 * sig_cond_reltimedwait() could also return with 1153 * a timeout error (ETIME). 1154 */ 1155 if (*nwait < _aio_outstand_cnt) 1156 _aio_waitncnt = *nwait; 1157 else 1158 _aio_waitncnt = _aio_outstand_cnt; 1159 1160 _aio_flags |= AIO_IO_WAITING; 1161 1162 if (wait) 1163 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv, 1164 &__aio_mutex, wait); 1165 else 1166 uerrno = sig_cond_wait(&_aio_iowait_cv, 1167 &__aio_mutex); 1168 1169 _aio_flags &= ~AIO_IO_WAITING; 1170 1171 if (uerrno == ETIME) { 1172 timedwait = AIO_TIMEOUT_POLL; 1173 continue; 1174 } 1175 if (uerrno != 0) 1176 timedwait = AIO_TIMEOUT_POLL; 1177 } 1178 1179 if (timedwait == AIO_TIMEOUT_POLL) { 1180 /* polling or timer expired */ 1181 break; 1182 } 1183 } 1184 1185 errno = uerrno == 0 ? kerrno : uerrno; 1186 if (errno) 1187 error = -1; 1188 else 1189 error = 0; 1190 1191 out: 1192 *nwait = dnwait; 1193 1194 pthread_cleanup_pop(1); /* drops __aio_mutex */ 1195 1196 return (error); 1197 } 1198 1199 int 1200 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait, 1201 const timespec_t *timeout) 1202 { 1203 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1204 } 1205 1206 void 1207 _aio_waitn_wakeup(void) 1208 { 1209 /* 1210 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that 1211 * it is waiting for completed I/Os. The number of required 1212 * completed I/Os is stored into "_aio_waitncnt". 1213 * aio_waitn() is woken up when 1214 * - there are no further outstanding I/Os 1215 * (_aio_outstand_cnt == 0) or 1216 * - the expected number of I/Os has completed. 1217 * Only one __aio_waitn() function waits for completed I/Os at 1218 * a time. 1219 * 1220 * __aio_suspend() increments "_aio_suscv_cnt" to notify 1221 * _aiodone() that at least one __aio_suspend() call is 1222 * waiting for completed I/Os. 1223 * There could be more than one __aio_suspend() function 1224 * waiting for completed I/Os. Because every function should 1225 * be waiting for different I/Os, _aiodone() has to wake up all 1226 * __aio_suspend() functions each time. 1227 * Every __aio_suspend() function will compare the recently 1228 * completed I/O with its own list. 1229 */ 1230 ASSERT(MUTEX_HELD(&__aio_mutex)); 1231 if (_aio_flags & AIO_IO_WAITING) { 1232 if (_aio_waitncnt > 0) 1233 _aio_waitncnt--; 1234 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 || 1235 _aio_suscv_cnt > 0) 1236 (void) cond_broadcast(&_aio_iowait_cv); 1237 } else { 1238 /* Wake up waiting aio_suspend calls */ 1239 if (_aio_suscv_cnt > 0) 1240 (void) cond_broadcast(&_aio_iowait_cv); 1241 } 1242 } 1243 1244 /* 1245 * timedwait values : 1246 * AIO_TIMEOUT_POLL : polling 1247 * AIO_TIMEOUT_WAIT : timeout 1248 * AIO_TIMEOUT_INDEF : wait indefinitely 1249 */ 1250 static int 1251 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait) 1252 { 1253 struct timeval curtime; 1254 1255 if (utimo) { 1256 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 || 1257 utimo->tv_nsec >= NANOSEC) { 1258 errno = EINVAL; 1259 return (-1); 1260 } 1261 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) { 1262 (void) gettimeofday(&curtime, NULL); 1263 end->tv_sec = utimo->tv_sec + curtime.tv_sec; 1264 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec; 1265 if (end->tv_nsec >= NANOSEC) { 1266 end->tv_nsec -= NANOSEC; 1267 end->tv_sec += 1; 1268 } 1269 *timedwait = AIO_TIMEOUT_WAIT; 1270 } else { 1271 /* polling */ 1272 *timedwait = AIO_TIMEOUT_POLL; 1273 } 1274 } else { 1275 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */ 1276 } 1277 return (0); 1278 } 1279 1280 #if !defined(_LP64) 1281 1282 int 1283 aio_read64(aiocb64_t *aiocbp) 1284 { 1285 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1286 errno = EINVAL; 1287 return (-1); 1288 } 1289 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1290 errno = EBUSY; 1291 return (-1); 1292 } 1293 if (_aio_sigev_thread64(aiocbp) != 0) 1294 return (-1); 1295 aiocbp->aio_lio_opcode = LIO_READ; 1296 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64, 1297 (AIO_KAIO | AIO_NO_DUPS))); 1298 } 1299 1300 int 1301 aio_write64(aiocb64_t *aiocbp) 1302 { 1303 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1304 errno = EINVAL; 1305 return (-1); 1306 } 1307 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1308 errno = EBUSY; 1309 return (-1); 1310 } 1311 if (_aio_sigev_thread64(aiocbp) != 0) 1312 return (-1); 1313 aiocbp->aio_lio_opcode = LIO_WRITE; 1314 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64, 1315 (AIO_KAIO | AIO_NO_DUPS))); 1316 } 1317 1318 int 1319 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 1320 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 1321 { 1322 int aio_ufs = 0; 1323 int oerrno = 0; 1324 aio_lio_t *head = NULL; 1325 aiocb64_t *aiocbp; 1326 int state = 0; 1327 int EIOflg = 0; 1328 int rw; 1329 int do_kaio = 0; 1330 int error; 1331 int i; 1332 1333 if (!_kaio_ok) 1334 _kaio_init(); 1335 1336 if (aio_list_max == 0) 1337 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 1338 1339 if (nent <= 0 || nent > aio_list_max) { 1340 errno = EINVAL; 1341 return (-1); 1342 } 1343 1344 switch (mode) { 1345 case LIO_WAIT: 1346 state = NOCHECK; 1347 break; 1348 case LIO_NOWAIT: 1349 state = CHECK; 1350 break; 1351 default: 1352 errno = EINVAL; 1353 return (-1); 1354 } 1355 1356 for (i = 0; i < nent; i++) { 1357 if ((aiocbp = list[i]) == NULL) 1358 continue; 1359 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1360 errno = EBUSY; 1361 return (-1); 1362 } 1363 if (_aio_sigev_thread64(aiocbp) != 0) 1364 return (-1); 1365 if (aiocbp->aio_lio_opcode == LIO_NOP) 1366 aiocbp->aio_state = NOCHECK; 1367 else { 1368 aiocbp->aio_state = state; 1369 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 1370 do_kaio++; 1371 else 1372 aiocbp->aio_resultp.aio_errno = ENOTSUP; 1373 } 1374 } 1375 if (_aio_sigev_thread_init(sigevp) != 0) 1376 return (-1); 1377 1378 if (do_kaio) { 1379 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp); 1380 if (error == 0) 1381 return (0); 1382 oerrno = errno; 1383 } else { 1384 oerrno = errno = ENOTSUP; 1385 error = -1; 1386 } 1387 1388 if (error == -1 && errno == ENOTSUP) { 1389 error = errno = 0; 1390 /* 1391 * If LIO_WAIT, or notification required, allocate a list head. 1392 */ 1393 if (mode == LIO_WAIT || 1394 (sigevp != NULL && 1395 (sigevp->sigev_notify == SIGEV_SIGNAL || 1396 sigevp->sigev_notify == SIGEV_THREAD || 1397 sigevp->sigev_notify == SIGEV_PORT))) 1398 head = _aio_lio_alloc(); 1399 if (head) { 1400 sig_mutex_lock(&head->lio_mutex); 1401 head->lio_mode = mode; 1402 head->lio_largefile = 1; 1403 if (mode == LIO_NOWAIT && sigevp != NULL) { 1404 if (sigevp->sigev_notify == SIGEV_THREAD) { 1405 head->lio_port = sigevp->sigev_signo; 1406 head->lio_event = AIOLIO64; 1407 head->lio_sigevent = sigevp; 1408 head->lio_sigval.sival_ptr = 1409 sigevp->sigev_value.sival_ptr; 1410 } else if (sigevp->sigev_notify == SIGEV_PORT) { 1411 port_notify_t *pn = 1412 sigevp->sigev_value.sival_ptr; 1413 head->lio_port = pn->portnfy_port; 1414 head->lio_event = AIOLIO64; 1415 head->lio_sigevent = sigevp; 1416 head->lio_sigval.sival_ptr = 1417 pn->portnfy_user; 1418 } else { /* SIGEV_SIGNAL */ 1419 head->lio_signo = sigevp->sigev_signo; 1420 head->lio_sigval.sival_ptr = 1421 sigevp->sigev_value.sival_ptr; 1422 } 1423 } 1424 head->lio_nent = head->lio_refcnt = nent; 1425 sig_mutex_unlock(&head->lio_mutex); 1426 } 1427 /* 1428 * find UFS requests, errno == ENOTSUP/EBADFD, 1429 */ 1430 for (i = 0; i < nent; i++) { 1431 if ((aiocbp = list[i]) == NULL || 1432 aiocbp->aio_lio_opcode == LIO_NOP || 1433 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 1434 aiocbp->aio_resultp.aio_errno != EBADFD)) { 1435 if (head) 1436 _lio_list_decr(head); 1437 continue; 1438 } 1439 if (aiocbp->aio_resultp.aio_errno == EBADFD) 1440 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 1441 if (aiocbp->aio_reqprio != 0) { 1442 aiocbp->aio_resultp.aio_errno = EINVAL; 1443 aiocbp->aio_resultp.aio_return = -1; 1444 EIOflg = 1; 1445 if (head) 1446 _lio_list_decr(head); 1447 continue; 1448 } 1449 /* 1450 * submit an AIO request with flags AIO_NO_KAIO 1451 * to avoid the kaio() syscall in _aio_rw() 1452 */ 1453 switch (aiocbp->aio_lio_opcode) { 1454 case LIO_READ: 1455 rw = AIOAREAD64; 1456 break; 1457 case LIO_WRITE: 1458 rw = AIOAWRITE64; 1459 break; 1460 } 1461 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw, 1462 (AIO_NO_KAIO | AIO_NO_DUPS)); 1463 if (error == 0) 1464 aio_ufs++; 1465 else { 1466 if (head) 1467 _lio_list_decr(head); 1468 aiocbp->aio_resultp.aio_errno = error; 1469 EIOflg = 1; 1470 } 1471 } 1472 } 1473 if (EIOflg) { 1474 errno = EIO; 1475 return (-1); 1476 } 1477 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 1478 /* 1479 * call kaio(AIOLIOWAIT) to get all outstanding 1480 * kernel AIO requests 1481 */ 1482 if ((nent - aio_ufs) > 0) 1483 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 1484 if (head != NULL && head->lio_nent > 0) { 1485 sig_mutex_lock(&head->lio_mutex); 1486 while (head->lio_refcnt > 0) { 1487 int err; 1488 head->lio_waiting = 1; 1489 pthread_cleanup_push(_lio_listio_cleanup, head); 1490 err = sig_cond_wait(&head->lio_cond_cv, 1491 &head->lio_mutex); 1492 pthread_cleanup_pop(0); 1493 head->lio_waiting = 0; 1494 if (err && head->lio_nent > 0) { 1495 sig_mutex_unlock(&head->lio_mutex); 1496 errno = err; 1497 return (-1); 1498 } 1499 } 1500 sig_mutex_unlock(&head->lio_mutex); 1501 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 1502 _aio_lio_free(head); 1503 for (i = 0; i < nent; i++) { 1504 if ((aiocbp = list[i]) != NULL && 1505 aiocbp->aio_resultp.aio_errno) { 1506 errno = EIO; 1507 return (-1); 1508 } 1509 } 1510 } 1511 return (0); 1512 } 1513 return (error); 1514 } 1515 1516 int 1517 aio_suspend64(const aiocb64_t * const list[], int nent, 1518 const timespec_t *timeout) 1519 { 1520 return (__aio_suspend((void **)list, nent, timeout, 1)); 1521 } 1522 1523 int 1524 aio_error64(const aiocb64_t *aiocbp) 1525 { 1526 const aio_result_t *resultp = &aiocbp->aio_resultp; 1527 int error; 1528 1529 if ((error = resultp->aio_errno) == EINPROGRESS) { 1530 if (aiocbp->aio_state == CHECK) { 1531 /* 1532 * Always do the kaio() call without using the 1533 * KAIO_SUPPORTED() checks because it is not 1534 * mandatory to have a valid fd set in the 1535 * aiocb, only the resultp must be set. 1536 */ 1537 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) { 1538 errno = EINVAL; 1539 return (-1); 1540 } 1541 error = resultp->aio_errno; 1542 } else if (aiocbp->aio_state == CHECKED) { 1543 ((aiocb64_t *)aiocbp)->aio_state = CHECK; 1544 } 1545 } 1546 return (error); 1547 } 1548 1549 ssize_t 1550 aio_return64(aiocb64_t *aiocbp) 1551 { 1552 aio_result_t *resultp = &aiocbp->aio_resultp; 1553 aio_req_t *reqp; 1554 int error; 1555 ssize_t retval; 1556 1557 /* 1558 * The _aiodone() function stores resultp->aio_return before 1559 * storing resultp->aio_errno (with an membar_producer() in 1560 * between). We use membar_consumer() below to ensure proper 1561 * memory ordering between _aiodone() and ourself. 1562 */ 1563 error = resultp->aio_errno; 1564 membar_consumer(); 1565 retval = resultp->aio_return; 1566 1567 /* 1568 * we use this condition to indicate either that 1569 * aio_return() has been called before or should 1570 * not have been called yet. 1571 */ 1572 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 1573 errno = error; 1574 return (-1); 1575 } 1576 1577 /* 1578 * Before we return, mark the result as being returned so that later 1579 * calls to aio_return() will return the fact that the result has 1580 * already been returned. 1581 */ 1582 sig_mutex_lock(&__aio_mutex); 1583 /* retest, in case more than one thread actually got in here */ 1584 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 1585 sig_mutex_unlock(&__aio_mutex); 1586 errno = EINVAL; 1587 return (-1); 1588 } 1589 resultp->aio_return = -1; 1590 resultp->aio_errno = EINVAL; 1591 if ((reqp = _aio_hash_del(resultp)) == NULL) 1592 sig_mutex_unlock(&__aio_mutex); 1593 else { 1594 aiocbp->aio_state = NOCHECK; 1595 ASSERT(reqp->req_head == NULL); 1596 (void) _aio_req_remove(reqp); 1597 sig_mutex_unlock(&__aio_mutex); 1598 _aio_req_free(reqp); 1599 } 1600 1601 if (retval == -1) 1602 errno = error; 1603 return (retval); 1604 } 1605 1606 static int 1607 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 1608 int workerscnt) 1609 { 1610 int i; 1611 int error; 1612 aio_worker_t *next = aiowp; 1613 1614 for (i = 0; i < workerscnt; i++) { 1615 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 1616 if (error != 0) { 1617 sig_mutex_lock(&head->lio_mutex); 1618 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 1619 head->lio_nent -= workerscnt - i; 1620 head->lio_refcnt -= workerscnt - i; 1621 sig_mutex_unlock(&head->lio_mutex); 1622 errno = EAGAIN; 1623 return (i); 1624 } 1625 next = next->work_forw; 1626 } 1627 return (i); 1628 } 1629 1630 int 1631 aio_fsync64(int op, aiocb64_t *aiocbp) 1632 { 1633 aio_lio_t *head; 1634 struct stat64 statb; 1635 int fret; 1636 1637 if (aiocbp == NULL) 1638 return (0); 1639 if (op != O_DSYNC && op != O_SYNC) { 1640 errno = EINVAL; 1641 return (-1); 1642 } 1643 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1644 errno = EBUSY; 1645 return (-1); 1646 } 1647 if (fstat64(aiocbp->aio_fildes, &statb) < 0) 1648 return (-1); 1649 if (_aio_sigev_thread64(aiocbp) != 0) 1650 return (-1); 1651 1652 /* 1653 * Kernel aio_fsync() is not supported. 1654 * We force user-level aio_fsync() just 1655 * for the notification side-effect. 1656 */ 1657 if (!__uaio_ok && __uaio_init() == -1) 1658 return (-1); 1659 1660 /* 1661 * The first asynchronous I/O request in the current process will 1662 * create a bunch of workers (via __uaio_init()). If the number 1663 * of workers is zero then the number of pending asynchronous I/O 1664 * requests is zero. In such a case only execute the standard 1665 * fsync(3C) or fdatasync(3RT) as appropriate. 1666 */ 1667 if (__rw_workerscnt == 0) { 1668 if (op == O_DSYNC) 1669 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 1670 else 1671 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 1672 } 1673 1674 /* 1675 * re-use aio_offset as the op field. 1676 * O_DSYNC - fdatasync() 1677 * O_SYNC - fsync() 1678 */ 1679 aiocbp->aio_offset = op; 1680 aiocbp->aio_lio_opcode = AIOFSYNC; 1681 1682 /* 1683 * Create a list of fsync requests. The worker that 1684 * gets the last request will do the fsync request. 1685 */ 1686 head = _aio_lio_alloc(); 1687 if (head == NULL) { 1688 errno = EAGAIN; 1689 return (-1); 1690 } 1691 head->lio_mode = LIO_FSYNC; 1692 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 1693 head->lio_largefile = 1; 1694 1695 /* 1696 * Insert an fsync request on every worker's queue. 1697 */ 1698 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt); 1699 if (fret != __rw_workerscnt) { 1700 /* 1701 * Fewer fsync requests than workers means that it was 1702 * not possible to submit fsync requests to all workers. 1703 * Actions: 1704 * a) number of fsync requests submitted is 0: 1705 * => free allocated memory (aio_lio_t). 1706 * b) number of fsync requests submitted is > 0: 1707 * => the last worker executing the fsync request 1708 * will free the aio_lio_t struct. 1709 */ 1710 if (fret == 0) 1711 _aio_lio_free(head); 1712 return (-1); 1713 } 1714 return (0); 1715 } 1716 1717 int 1718 aio_cancel64(int fd, aiocb64_t *aiocbp) 1719 { 1720 aio_req_t *reqp; 1721 aio_worker_t *aiowp; 1722 int done = 0; 1723 int canceled = 0; 1724 struct stat64 buf; 1725 1726 if (fstat64(fd, &buf) < 0) 1727 return (-1); 1728 1729 if (aiocbp != NULL) { 1730 if (fd != aiocbp->aio_fildes) { 1731 errno = EINVAL; 1732 return (-1); 1733 } 1734 if (aiocbp->aio_state == USERAIO) { 1735 sig_mutex_lock(&__aio_mutex); 1736 reqp = _aio_hash_find(&aiocbp->aio_resultp); 1737 if (reqp == NULL) { 1738 sig_mutex_unlock(&__aio_mutex); 1739 return (AIO_ALLDONE); 1740 } 1741 aiowp = reqp->req_worker; 1742 sig_mutex_lock(&aiowp->work_qlock1); 1743 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 1744 sig_mutex_unlock(&aiowp->work_qlock1); 1745 sig_mutex_unlock(&__aio_mutex); 1746 if (done) 1747 return (AIO_ALLDONE); 1748 if (canceled) 1749 return (AIO_CANCELED); 1750 return (AIO_NOTCANCELED); 1751 } 1752 if (aiocbp->aio_state == USERAIO_DONE) 1753 return (AIO_ALLDONE); 1754 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 1755 } 1756 1757 return (aiocancel_all(fd)); 1758 } 1759 1760 int 1761 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait, 1762 const timespec_t *timeout) 1763 { 1764 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1765 } 1766 1767 #endif /* !defined(_LP64) */ 1768