1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * posix_aio.c implements the POSIX async. I/O functions. 29 * 30 * aio_read 31 * aio_write 32 * aio_error 33 * aio_return 34 * aio_suspend 35 * lio_listio 36 * aio_fsync 37 * aio_cancel 38 */ 39 40 #include "lint.h" 41 #include "thr_uberdata.h" 42 #include "asyncio.h" 43 #include <atomic.h> 44 #include <sys/file.h> 45 #include <sys/port.h> 46 47 extern int __fdsync(int, int); 48 49 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */ 50 51 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *); 52 53 /* defines for timedwait in __aio_waitn() and __aio_suspend() */ 54 #define AIO_TIMEOUT_INDEF -1 55 #define AIO_TIMEOUT_POLL 0 56 #define AIO_TIMEOUT_WAIT 1 57 #define AIO_TIMEOUT_UNDEF 2 58 59 /* 60 * List I/O stuff 61 */ 62 static void _lio_list_decr(aio_lio_t *); 63 static long aio_list_max = 0; 64 65 int 66 aio_read(aiocb_t *aiocbp) 67 { 68 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 69 errno = EINVAL; 70 return (-1); 71 } 72 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 73 errno = EBUSY; 74 return (-1); 75 } 76 if (_aio_sigev_thread(aiocbp) != 0) 77 return (-1); 78 aiocbp->aio_lio_opcode = LIO_READ; 79 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD, 80 (AIO_KAIO | AIO_NO_DUPS))); 81 } 82 83 int 84 aio_write(aiocb_t *aiocbp) 85 { 86 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 87 errno = EINVAL; 88 return (-1); 89 } 90 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 91 errno = EBUSY; 92 return (-1); 93 } 94 if (_aio_sigev_thread(aiocbp) != 0) 95 return (-1); 96 aiocbp->aio_lio_opcode = LIO_WRITE; 97 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE, 98 (AIO_KAIO | AIO_NO_DUPS))); 99 } 100 101 /* 102 * __lio_listio() cancellation handler. 103 */ 104 /* ARGSUSED */ 105 static void 106 _lio_listio_cleanup(aio_lio_t *head) 107 { 108 int freeit = 0; 109 110 ASSERT(MUTEX_HELD(&head->lio_mutex)); 111 if (head->lio_refcnt == 0) { 112 ASSERT(head->lio_nent == 0); 113 freeit = 1; 114 } 115 head->lio_waiting = 0; 116 sig_mutex_unlock(&head->lio_mutex); 117 if (freeit) 118 _aio_lio_free(head); 119 } 120 121 int 122 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 123 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 124 { 125 int aio_ufs = 0; 126 int oerrno = 0; 127 aio_lio_t *head = NULL; 128 aiocb_t *aiocbp; 129 int state = 0; 130 int EIOflg = 0; 131 int rw; 132 int do_kaio = 0; 133 int error; 134 int i; 135 136 if (!_kaio_ok) 137 _kaio_init(); 138 139 if (aio_list_max == 0) 140 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 141 142 if (nent <= 0 || nent > aio_list_max) { 143 errno = EINVAL; 144 return (-1); 145 } 146 147 switch (mode) { 148 case LIO_WAIT: 149 state = NOCHECK; 150 break; 151 case LIO_NOWAIT: 152 state = CHECK; 153 break; 154 default: 155 errno = EINVAL; 156 return (-1); 157 } 158 159 for (i = 0; i < nent; i++) { 160 if ((aiocbp = list[i]) == NULL) 161 continue; 162 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 163 errno = EBUSY; 164 return (-1); 165 } 166 if (_aio_sigev_thread(aiocbp) != 0) 167 return (-1); 168 if (aiocbp->aio_lio_opcode == LIO_NOP) 169 aiocbp->aio_state = NOCHECK; 170 else { 171 aiocbp->aio_state = state; 172 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 173 do_kaio++; 174 else 175 aiocbp->aio_resultp.aio_errno = ENOTSUP; 176 } 177 } 178 if (_aio_sigev_thread_init(sigevp) != 0) 179 return (-1); 180 181 if (do_kaio) { 182 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp); 183 if (error == 0) 184 return (0); 185 oerrno = errno; 186 } else { 187 oerrno = errno = ENOTSUP; 188 error = -1; 189 } 190 191 if (error == -1 && errno == ENOTSUP) { 192 error = errno = 0; 193 /* 194 * If LIO_WAIT, or notification required, allocate a list head. 195 */ 196 if (mode == LIO_WAIT || 197 (sigevp != NULL && 198 (sigevp->sigev_notify == SIGEV_SIGNAL || 199 sigevp->sigev_notify == SIGEV_THREAD || 200 sigevp->sigev_notify == SIGEV_PORT))) 201 head = _aio_lio_alloc(); 202 if (head) { 203 sig_mutex_lock(&head->lio_mutex); 204 head->lio_mode = mode; 205 head->lio_largefile = 0; 206 if (mode == LIO_NOWAIT && sigevp != NULL) { 207 if (sigevp->sigev_notify == SIGEV_THREAD) { 208 head->lio_port = sigevp->sigev_signo; 209 head->lio_event = AIOLIO; 210 head->lio_sigevent = sigevp; 211 head->lio_sigval.sival_ptr = 212 sigevp->sigev_value.sival_ptr; 213 } else if (sigevp->sigev_notify == SIGEV_PORT) { 214 port_notify_t *pn = 215 sigevp->sigev_value.sival_ptr; 216 head->lio_port = pn->portnfy_port; 217 head->lio_event = AIOLIO; 218 head->lio_sigevent = sigevp; 219 head->lio_sigval.sival_ptr = 220 pn->portnfy_user; 221 } else { /* SIGEV_SIGNAL */ 222 head->lio_signo = sigevp->sigev_signo; 223 head->lio_sigval.sival_ptr = 224 sigevp->sigev_value.sival_ptr; 225 } 226 } 227 head->lio_nent = head->lio_refcnt = nent; 228 sig_mutex_unlock(&head->lio_mutex); 229 } 230 /* 231 * find UFS requests, errno == ENOTSUP/EBADFD, 232 */ 233 for (i = 0; i < nent; i++) { 234 if ((aiocbp = list[i]) == NULL || 235 aiocbp->aio_lio_opcode == LIO_NOP || 236 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 237 aiocbp->aio_resultp.aio_errno != EBADFD)) { 238 if (head) 239 _lio_list_decr(head); 240 continue; 241 } 242 if (aiocbp->aio_resultp.aio_errno == EBADFD) 243 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 244 if (aiocbp->aio_reqprio != 0) { 245 aiocbp->aio_resultp.aio_errno = EINVAL; 246 aiocbp->aio_resultp.aio_return = -1; 247 EIOflg = 1; 248 if (head) 249 _lio_list_decr(head); 250 continue; 251 } 252 /* 253 * submit an AIO request with flags AIO_NO_KAIO 254 * to avoid the kaio() syscall in _aio_rw() 255 */ 256 switch (aiocbp->aio_lio_opcode) { 257 case LIO_READ: 258 rw = AIOAREAD; 259 break; 260 case LIO_WRITE: 261 rw = AIOAWRITE; 262 break; 263 } 264 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw, 265 (AIO_NO_KAIO | AIO_NO_DUPS)); 266 if (error == 0) 267 aio_ufs++; 268 else { 269 if (head) 270 _lio_list_decr(head); 271 aiocbp->aio_resultp.aio_errno = error; 272 EIOflg = 1; 273 } 274 } 275 } 276 if (EIOflg) { 277 errno = EIO; 278 return (-1); 279 } 280 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 281 /* 282 * call kaio(AIOLIOWAIT) to get all outstanding 283 * kernel AIO requests 284 */ 285 if ((nent - aio_ufs) > 0) 286 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 287 if (head != NULL && head->lio_nent > 0) { 288 sig_mutex_lock(&head->lio_mutex); 289 while (head->lio_refcnt > 0) { 290 int err; 291 head->lio_waiting = 1; 292 pthread_cleanup_push(_lio_listio_cleanup, head); 293 err = sig_cond_wait(&head->lio_cond_cv, 294 &head->lio_mutex); 295 pthread_cleanup_pop(0); 296 head->lio_waiting = 0; 297 if (err && head->lio_nent > 0) { 298 sig_mutex_unlock(&head->lio_mutex); 299 errno = err; 300 return (-1); 301 } 302 } 303 sig_mutex_unlock(&head->lio_mutex); 304 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 305 _aio_lio_free(head); 306 for (i = 0; i < nent; i++) { 307 if ((aiocbp = list[i]) != NULL && 308 aiocbp->aio_resultp.aio_errno) { 309 errno = EIO; 310 return (-1); 311 } 312 } 313 } 314 return (0); 315 } 316 return (error); 317 } 318 319 static void 320 _lio_list_decr(aio_lio_t *head) 321 { 322 sig_mutex_lock(&head->lio_mutex); 323 head->lio_nent--; 324 head->lio_refcnt--; 325 sig_mutex_unlock(&head->lio_mutex); 326 } 327 328 /* 329 * __aio_suspend() cancellation handler. 330 */ 331 /* ARGSUSED */ 332 static void 333 _aio_suspend_cleanup(int *counter) 334 { 335 ASSERT(MUTEX_HELD(&__aio_mutex)); 336 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */ 337 sig_mutex_unlock(&__aio_mutex); 338 } 339 340 static int 341 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile) 342 { 343 int cv_err; /* error code from cond_xxx() */ 344 int kerr; /* error code from _kaio(AIOSUSPEND) */ 345 int i; 346 timespec_t twait; /* copy of timo for internal calculations */ 347 timespec_t *wait = NULL; 348 int timedwait; 349 int req_outstanding; 350 aiocb_t **listp; 351 aiocb_t *aiocbp; 352 #if !defined(_LP64) 353 aiocb64_t **listp64; 354 aiocb64_t *aiocbp64; 355 #endif 356 hrtime_t hrtstart; 357 hrtime_t hrtend; 358 hrtime_t hrtres; 359 360 #if defined(_LP64) 361 if (largefile) 362 aio_panic("__aio_suspend: largefile set when _LP64 defined"); 363 #endif 364 365 if (nent <= 0) { 366 errno = EINVAL; 367 return (-1); 368 } 369 370 if (timo) { 371 if (timo->tv_sec < 0 || timo->tv_nsec < 0 || 372 timo->tv_nsec >= NANOSEC) { 373 errno = EINVAL; 374 return (-1); 375 } 376 /* Initialize start time if time monitoring desired */ 377 if (timo->tv_sec > 0 || timo->tv_nsec > 0) { 378 timedwait = AIO_TIMEOUT_WAIT; 379 hrtstart = gethrtime(); 380 } else { 381 /* content of timeout = 0 : polling */ 382 timedwait = AIO_TIMEOUT_POLL; 383 } 384 } else { 385 /* timeout pointer = NULL : wait indefinitely */ 386 timedwait = AIO_TIMEOUT_INDEF; 387 } 388 389 #if !defined(_LP64) 390 if (largefile) { 391 listp64 = (aiocb64_t **)list; 392 for (i = 0; i < nent; i++) { 393 if ((aiocbp64 = listp64[i]) != NULL && 394 aiocbp64->aio_state == CHECK) 395 aiocbp64->aio_state = CHECKED; 396 } 397 } else 398 #endif /* !_LP64 */ 399 { 400 listp = (aiocb_t **)list; 401 for (i = 0; i < nent; i++) { 402 if ((aiocbp = listp[i]) != NULL && 403 aiocbp->aio_state == CHECK) 404 aiocbp->aio_state = CHECKED; 405 } 406 } 407 408 sig_mutex_lock(&__aio_mutex); 409 410 /* 411 * The next "if -case" is required to accelerate the 412 * access to completed RAW-IO requests. 413 */ 414 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 415 /* Only kernel requests pending */ 416 417 /* 418 * _aio_kernel_suspend is used to detect completed non RAW-IO 419 * requests. 420 * As long as this thread resides in the kernel (_kaio) further 421 * asynchronous non RAW-IO requests could be submitted. 422 */ 423 _aio_kernel_suspend++; 424 425 /* 426 * Always do the kaio() call without using the KAIO_SUPPORTED() 427 * checks because it is not mandatory to have a valid fd 428 * set in the list entries, only the resultp must be set. 429 * 430 * _kaio(AIOSUSPEND ...) return values : 431 * 0: everythink ok, completed request found 432 * -1: error 433 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,) 434 * system call using _kaio(AIONOTIFY). It means, that some 435 * non RAW-IOs completed inbetween. 436 */ 437 438 pthread_cleanup_push(_aio_suspend_cleanup, 439 &_aio_kernel_suspend); 440 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 441 sig_mutex_unlock(&__aio_mutex); 442 _cancel_prologue(); 443 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 444 list, nent, timo, -1); 445 _cancel_epilogue(); 446 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */ 447 pthread_cleanup_pop(0); 448 449 _aio_kernel_suspend--; 450 451 if (!kerr) { 452 sig_mutex_unlock(&__aio_mutex); 453 return (0); 454 } 455 } else { 456 kerr = 1; /* simulation: _kaio detected AIONOTIFY */ 457 } 458 459 /* 460 * Return kernel error code if no other IOs are outstanding. 461 */ 462 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt; 463 464 sig_mutex_unlock(&__aio_mutex); 465 466 if (req_outstanding == 0) { 467 /* no IOs outstanding in the thread pool */ 468 if (kerr == 1) 469 /* return "no IOs completed" */ 470 errno = EAGAIN; 471 return (-1); 472 } 473 474 /* 475 * IOs using the thread pool are outstanding. 476 */ 477 if (timedwait == AIO_TIMEOUT_WAIT) { 478 /* time monitoring */ 479 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC + 480 (hrtime_t)timo->tv_nsec; 481 hrtres = hrtend - gethrtime(); 482 if (hrtres <= 0) 483 hrtres = 1; 484 twait.tv_sec = hrtres / (hrtime_t)NANOSEC; 485 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC; 486 wait = &twait; 487 } else if (timedwait == AIO_TIMEOUT_POLL) { 488 twait = *timo; /* content of timo = 0 : polling */ 489 wait = &twait; 490 } 491 492 for (;;) { 493 int error; 494 int inprogress; 495 496 /* first scan file system requests */ 497 inprogress = 0; 498 for (i = 0; i < nent; i++) { 499 #if !defined(_LP64) 500 if (largefile) { 501 if ((aiocbp64 = listp64[i]) == NULL) 502 continue; 503 error = aiocbp64->aio_resultp.aio_errno; 504 } else 505 #endif 506 { 507 if ((aiocbp = listp[i]) == NULL) 508 continue; 509 error = aiocbp->aio_resultp.aio_errno; 510 } 511 if (error == EINPROGRESS) 512 inprogress = 1; 513 else if (error != ECANCELED) { 514 errno = 0; 515 return (0); 516 } 517 } 518 519 sig_mutex_lock(&__aio_mutex); 520 521 /* 522 * If there aren't outstanding I/Os in the thread pool then 523 * we have to return here, provided that all kernel RAW-IOs 524 * also completed. 525 * If the kernel was notified to return, then we have to check 526 * possible pending RAW-IOs. 527 */ 528 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) { 529 sig_mutex_unlock(&__aio_mutex); 530 errno = EAGAIN; 531 break; 532 } 533 534 /* 535 * There are outstanding IOs in the thread pool or the kernel 536 * was notified to return. 537 * Check pending RAW-IOs first. 538 */ 539 if (kerr == 1) { 540 /* 541 * _aiodone just notified the kernel about 542 * completed non RAW-IOs (AIONOTIFY was detected). 543 */ 544 if (timedwait == AIO_TIMEOUT_WAIT) { 545 /* Update remaining timeout for the kernel */ 546 hrtres = hrtend - gethrtime(); 547 if (hrtres <= 0) { 548 /* timer expired */ 549 sig_mutex_unlock(&__aio_mutex); 550 errno = EAGAIN; 551 break; 552 } 553 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 554 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 555 } 556 _aio_kernel_suspend++; 557 558 pthread_cleanup_push(_aio_suspend_cleanup, 559 &_aio_kernel_suspend); 560 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 561 sig_mutex_unlock(&__aio_mutex); 562 _cancel_prologue(); 563 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 564 list, nent, wait, -1); 565 _cancel_epilogue(); 566 pthread_cleanup_pop(1); 567 pthread_cleanup_pop(0); 568 569 _aio_kernel_suspend--; 570 571 if (!kerr) { 572 sig_mutex_unlock(&__aio_mutex); 573 return (0); 574 } 575 } 576 577 if (timedwait == AIO_TIMEOUT_POLL) { 578 sig_mutex_unlock(&__aio_mutex); 579 errno = EAGAIN; 580 break; 581 } 582 583 if (timedwait == AIO_TIMEOUT_WAIT) { 584 /* Update remaining timeout */ 585 hrtres = hrtend - gethrtime(); 586 if (hrtres <= 0) { 587 /* timer expired */ 588 sig_mutex_unlock(&__aio_mutex); 589 errno = EAGAIN; 590 break; 591 } 592 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 593 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 594 } 595 596 if (_aio_outstand_cnt == 0) { 597 sig_mutex_unlock(&__aio_mutex); 598 continue; 599 } 600 601 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */ 602 603 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt); 604 if (timedwait == AIO_TIMEOUT_WAIT) { 605 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv, 606 &__aio_mutex, wait); 607 if (cv_err == ETIME) 608 cv_err = EAGAIN; 609 } else { 610 /* wait indefinitely */ 611 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex); 612 } 613 /* this decrements _aio_suscv_cnt and drops __aio_mutex */ 614 pthread_cleanup_pop(1); 615 616 if (cv_err) { 617 errno = cv_err; 618 break; 619 } 620 } 621 return (-1); 622 } 623 624 int 625 aio_suspend(const aiocb_t * const list[], int nent, 626 const timespec_t *timeout) 627 { 628 return (__aio_suspend((void **)list, nent, timeout, 0)); 629 } 630 631 int 632 aio_error(const aiocb_t *aiocbp) 633 { 634 const aio_result_t *resultp = &aiocbp->aio_resultp; 635 aio_req_t *reqp; 636 int error; 637 638 if ((error = resultp->aio_errno) == EINPROGRESS) { 639 if (aiocbp->aio_state == CHECK) { 640 /* 641 * Always do the kaio() call without using the 642 * KAIO_SUPPORTED() checks because it is not 643 * mandatory to have a valid fd set in the 644 * aiocb, only the resultp must be set. 645 */ 646 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) { 647 errno = EINVAL; 648 return (-1); 649 } 650 error = resultp->aio_errno; 651 } else if (aiocbp->aio_state == CHECKED) { 652 ((aiocb_t *)aiocbp)->aio_state = CHECK; 653 } 654 } else if (aiocbp->aio_state == USERAIO) { 655 sig_mutex_lock(&__aio_mutex); 656 if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) { 657 sig_mutex_unlock(&__aio_mutex); 658 ((aiocb_t *)aiocbp)->aio_state = CHECKED; 659 } else { 660 ((aiocb_t *)aiocbp)->aio_state = NOCHECK; 661 ASSERT(reqp->req_head == NULL); 662 (void) _aio_req_remove(reqp); 663 sig_mutex_unlock(&__aio_mutex); 664 _aio_req_free(reqp); 665 } 666 } 667 return (error); 668 } 669 670 ssize_t 671 aio_return(aiocb_t *aiocbp) 672 { 673 aio_result_t *resultp = &aiocbp->aio_resultp; 674 aio_req_t *reqp; 675 int error; 676 ssize_t retval; 677 678 /* 679 * The _aiodone() function stores resultp->aio_return before 680 * storing resultp->aio_errno (with an membar_producer() in 681 * between). We use membar_consumer() below to ensure proper 682 * memory ordering between _aiodone() and ourself. 683 */ 684 error = resultp->aio_errno; 685 membar_consumer(); 686 retval = resultp->aio_return; 687 688 /* 689 * we use this condition to indicate either that 690 * aio_return() has been called before or should 691 * not have been called yet. 692 */ 693 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 694 errno = error; 695 return (-1); 696 } 697 698 /* 699 * Before we return, mark the result as being returned so that later 700 * calls to aio_return() will return the fact that the result has 701 * already been returned. 702 */ 703 sig_mutex_lock(&__aio_mutex); 704 /* retest, in case more than one thread actually got in here */ 705 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 706 sig_mutex_unlock(&__aio_mutex); 707 errno = EINVAL; 708 return (-1); 709 } 710 resultp->aio_return = -1; 711 resultp->aio_errno = EINVAL; 712 if ((reqp = _aio_hash_del(resultp)) == NULL) 713 sig_mutex_unlock(&__aio_mutex); 714 else { 715 aiocbp->aio_state = NOCHECK; 716 ASSERT(reqp->req_head == NULL); 717 (void) _aio_req_remove(reqp); 718 sig_mutex_unlock(&__aio_mutex); 719 _aio_req_free(reqp); 720 } 721 722 if (retval == -1) 723 errno = error; 724 return (retval); 725 } 726 727 void 728 _lio_remove(aio_req_t *reqp) 729 { 730 aio_lio_t *head; 731 int refcnt; 732 733 if ((head = reqp->req_head) != NULL) { 734 sig_mutex_lock(&head->lio_mutex); 735 ASSERT(head->lio_refcnt == head->lio_nent); 736 refcnt = --head->lio_nent; 737 head->lio_refcnt--; 738 sig_mutex_unlock(&head->lio_mutex); 739 if (refcnt == 0) 740 _aio_lio_free(head); 741 reqp->req_head = NULL; 742 } 743 } 744 745 /* 746 * This function returns the number of asynchronous I/O requests submitted. 747 */ 748 static int 749 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 750 int workerscnt) 751 { 752 int i; 753 int error; 754 aio_worker_t *next = aiowp; 755 756 for (i = 0; i < workerscnt; i++) { 757 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 758 if (error != 0) { 759 sig_mutex_lock(&head->lio_mutex); 760 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 761 head->lio_nent -= workerscnt - i; 762 head->lio_refcnt -= workerscnt - i; 763 sig_mutex_unlock(&head->lio_mutex); 764 errno = EAGAIN; 765 return (i); 766 } 767 next = next->work_forw; 768 } 769 return (i); 770 } 771 772 int 773 aio_fsync(int op, aiocb_t *aiocbp) 774 { 775 aio_lio_t *head; 776 struct stat statb; 777 int fret; 778 779 if (aiocbp == NULL) 780 return (0); 781 if (op != O_DSYNC && op != O_SYNC) { 782 errno = EINVAL; 783 return (-1); 784 } 785 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 786 errno = EBUSY; 787 return (-1); 788 } 789 if (fstat(aiocbp->aio_fildes, &statb) < 0) 790 return (-1); 791 if (_aio_sigev_thread(aiocbp) != 0) 792 return (-1); 793 794 /* 795 * Kernel aio_fsync() is not supported. 796 * We force user-level aio_fsync() just 797 * for the notification side-effect. 798 */ 799 if (!__uaio_ok && __uaio_init() == -1) 800 return (-1); 801 802 /* 803 * The first asynchronous I/O request in the current process will 804 * create a bunch of workers (via __uaio_init()). If the number 805 * of workers is zero then the number of pending asynchronous I/O 806 * requests is zero. In such a case only execute the standard 807 * fsync(3C) or fdatasync(3RT) as appropriate. 808 */ 809 if (__rw_workerscnt == 0) { 810 if (op == O_DSYNC) 811 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 812 else 813 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 814 } 815 816 /* 817 * re-use aio_offset as the op field. 818 * O_DSYNC - fdatasync() 819 * O_SYNC - fsync() 820 */ 821 aiocbp->aio_offset = op; 822 aiocbp->aio_lio_opcode = AIOFSYNC; 823 824 /* 825 * Create a list of fsync requests. The worker that 826 * gets the last request will do the fsync request. 827 */ 828 head = _aio_lio_alloc(); 829 if (head == NULL) { 830 errno = EAGAIN; 831 return (-1); 832 } 833 head->lio_mode = LIO_FSYNC; 834 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 835 head->lio_largefile = 0; 836 837 /* 838 * Insert an fsync request on every worker's queue. 839 */ 840 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt); 841 if (fret != __rw_workerscnt) { 842 /* 843 * Fewer fsync requests than workers means that it was 844 * not possible to submit fsync requests to all workers. 845 * Actions: 846 * a) number of fsync requests submitted is 0: 847 * => free allocated memory (aio_lio_t). 848 * b) number of fsync requests submitted is > 0: 849 * => the last worker executing the fsync request 850 * will free the aio_lio_t struct. 851 */ 852 if (fret == 0) 853 _aio_lio_free(head); 854 return (-1); 855 } 856 return (0); 857 } 858 859 int 860 aio_cancel(int fd, aiocb_t *aiocbp) 861 { 862 aio_req_t *reqp; 863 aio_worker_t *aiowp; 864 int done = 0; 865 int canceled = 0; 866 struct stat buf; 867 868 if (fstat(fd, &buf) < 0) 869 return (-1); 870 871 if (aiocbp != NULL) { 872 if (fd != aiocbp->aio_fildes) { 873 errno = EINVAL; 874 return (-1); 875 } 876 if (aiocbp->aio_state == USERAIO) { 877 sig_mutex_lock(&__aio_mutex); 878 reqp = _aio_hash_find(&aiocbp->aio_resultp); 879 if (reqp == NULL) { 880 sig_mutex_unlock(&__aio_mutex); 881 return (AIO_ALLDONE); 882 } 883 aiowp = reqp->req_worker; 884 sig_mutex_lock(&aiowp->work_qlock1); 885 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 886 sig_mutex_unlock(&aiowp->work_qlock1); 887 sig_mutex_unlock(&__aio_mutex); 888 if (done) 889 return (AIO_ALLDONE); 890 if (canceled) 891 return (AIO_CANCELED); 892 return (AIO_NOTCANCELED); 893 } 894 if (aiocbp->aio_state == USERAIO_DONE) 895 return (AIO_ALLDONE); 896 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 897 } 898 899 return (aiocancel_all(fd)); 900 } 901 902 /* 903 * __aio_waitn() cancellation handler. 904 */ 905 static void 906 _aio_waitn_cleanup(void *arg __unused) 907 { 908 ASSERT(MUTEX_HELD(&__aio_mutex)); 909 910 /* check for pending aio_waitn() calls */ 911 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING); 912 if (_aio_flags & AIO_LIB_WAITN_PENDING) { 913 _aio_flags &= ~AIO_LIB_WAITN_PENDING; 914 (void) cond_signal(&_aio_waitn_cv); 915 } 916 917 sig_mutex_unlock(&__aio_mutex); 918 } 919 920 /* 921 * aio_waitn can be used to reap the results of several I/O operations that 922 * were submitted asynchronously. The submission of I/Os can be done using 923 * existing POSIX interfaces: lio_listio, aio_write or aio_read. 924 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have 925 * completed and it returns the descriptors for these I/Os in "list". The 926 * maximum size of this list is given by "nent" and the actual number of I/Os 927 * completed is returned in "nwait". Otherwise aio_waitn might also 928 * return if the timeout expires. Additionally, aio_waitn returns 0 if 929 * successful or -1 if an error occurred. 930 */ 931 static int 932 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo) 933 { 934 int error = 0; 935 uint_t dnwait = 0; /* amount of requests in the waitn-done list */ 936 uint_t kwaitcnt; /* expected "done" requests from kernel */ 937 uint_t knentcnt; /* max. expected "done" requests from kernel */ 938 int uerrno = 0; 939 int kerrno = 0; /* save errno from _kaio() call */ 940 int timedwait = AIO_TIMEOUT_UNDEF; 941 aio_req_t *reqp; 942 timespec_t end; 943 timespec_t twait; /* copy of utimo for internal calculations */ 944 timespec_t *wait = NULL; 945 946 if (nent == 0 || *nwait == 0 || *nwait > nent) { 947 errno = EINVAL; 948 return (-1); 949 } 950 951 /* 952 * Only one running aio_waitn call per process allowed. 953 * Further calls will be blocked here until the running 954 * call finishes. 955 */ 956 957 sig_mutex_lock(&__aio_mutex); 958 959 while (_aio_flags & AIO_LIB_WAITN) { 960 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) { 961 sig_mutex_unlock(&__aio_mutex); 962 *nwait = 0; 963 return (0); 964 } 965 _aio_flags |= AIO_LIB_WAITN_PENDING; 966 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex); 967 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex); 968 pthread_cleanup_pop(0); 969 if (error != 0) { 970 sig_mutex_unlock(&__aio_mutex); 971 *nwait = 0; 972 errno = error; 973 return (-1); 974 } 975 } 976 977 pthread_cleanup_push(_aio_waitn_cleanup, NULL); 978 979 _aio_flags |= AIO_LIB_WAITN; 980 981 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 982 error = -1; 983 dnwait = 0; 984 goto out; 985 } 986 if (timedwait != AIO_TIMEOUT_INDEF) { 987 twait = *utimo; 988 wait = &twait; 989 } 990 991 /* 992 * If both counters are still set to zero, then only 993 * kernel requests are currently outstanding (raw-I/Os). 994 */ 995 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 996 for (;;) { 997 kwaitcnt = *nwait - dnwait; 998 knentcnt = nent - dnwait; 999 if (knentcnt > AIO_WAITN_MAXIOCBS) 1000 knentcnt = AIO_WAITN_MAXIOCBS; 1001 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1002 1003 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1004 sig_mutex_unlock(&__aio_mutex); 1005 _cancel_prologue(); 1006 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1007 &kwaitcnt, wait); 1008 _cancel_epilogue(); 1009 pthread_cleanup_pop(1); 1010 1011 if (error == 0) { 1012 dnwait += kwaitcnt; 1013 if (dnwait >= *nwait || 1014 *nwait < AIO_WAITN_MAXIOCBS) 1015 break; 1016 if (timedwait == AIO_TIMEOUT_WAIT) { 1017 error = _aio_get_timedelta(&end, wait); 1018 if (error == -1) { 1019 /* timer expired */ 1020 errno = ETIME; 1021 break; 1022 } 1023 } 1024 continue; 1025 } 1026 if (errno == EAGAIN) { 1027 if (dnwait > 0) 1028 error = 0; 1029 break; 1030 } 1031 if (errno == ETIME || errno == EINTR) { 1032 dnwait += kwaitcnt; 1033 break; 1034 } 1035 /* fatal error */ 1036 break; 1037 } 1038 1039 goto out; 1040 } 1041 1042 /* File system I/Os outstanding ... */ 1043 1044 if (timedwait == AIO_TIMEOUT_UNDEF) { 1045 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 1046 error = -1; 1047 dnwait = 0; 1048 goto out; 1049 } 1050 if (timedwait != AIO_TIMEOUT_INDEF) { 1051 twait = *utimo; 1052 wait = &twait; 1053 } 1054 } 1055 1056 for (;;) { 1057 uint_t sum_reqs; 1058 1059 /* 1060 * Calculate sum of active non RAW-IO requests (sum_reqs). 1061 * If the expected amount of completed requests (*nwait) is 1062 * greater than the calculated sum (sum_reqs) then 1063 * use _kaio to check pending RAW-IO requests. 1064 */ 1065 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt; 1066 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0; 1067 1068 if (kwaitcnt != 0) { 1069 /* possibly some kernel I/Os outstanding */ 1070 knentcnt = nent - dnwait; 1071 if (knentcnt > AIO_WAITN_MAXIOCBS) 1072 knentcnt = AIO_WAITN_MAXIOCBS; 1073 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1074 1075 _aio_flags |= AIO_WAIT_INPROGRESS; 1076 1077 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1078 sig_mutex_unlock(&__aio_mutex); 1079 _cancel_prologue(); 1080 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1081 &kwaitcnt, wait); 1082 _cancel_epilogue(); 1083 pthread_cleanup_pop(1); 1084 1085 _aio_flags &= ~AIO_WAIT_INPROGRESS; 1086 1087 if (error == 0) { 1088 dnwait += kwaitcnt; 1089 } else { 1090 switch (errno) { 1091 case EINVAL: 1092 case EAGAIN: 1093 /* don't wait for kernel I/Os */ 1094 kerrno = 0; /* ignore _kaio() errno */ 1095 *nwait = _aio_doneq_cnt + 1096 _aio_outstand_cnt + dnwait; 1097 error = 0; 1098 break; 1099 case EINTR: 1100 case ETIME: 1101 /* just scan for completed LIB I/Os */ 1102 dnwait += kwaitcnt; 1103 timedwait = AIO_TIMEOUT_POLL; 1104 kerrno = errno; /* save _kaio() errno */ 1105 error = 0; 1106 break; 1107 default: 1108 kerrno = errno; /* save _kaio() errno */ 1109 break; 1110 } 1111 } 1112 if (error) 1113 break; /* fatal kernel error */ 1114 } 1115 1116 /* check completed FS requests in the "done" queue */ 1117 1118 while (_aio_doneq_cnt && dnwait < nent) { 1119 /* get done requests */ 1120 if ((reqp = _aio_req_remove(NULL)) != NULL) { 1121 (void) _aio_hash_del(reqp->req_resultp); 1122 list[dnwait++] = reqp->req_aiocbp; 1123 _aio_req_mark_done(reqp); 1124 _lio_remove(reqp); 1125 _aio_req_free(reqp); 1126 } 1127 } 1128 1129 if (dnwait >= *nwait) { 1130 /* min. requested amount of completed I/Os satisfied */ 1131 break; 1132 } 1133 if (timedwait == AIO_TIMEOUT_WAIT && 1134 (error = _aio_get_timedelta(&end, wait)) == -1) { 1135 /* timer expired */ 1136 uerrno = ETIME; 1137 break; 1138 } 1139 1140 /* 1141 * If some I/Os are outstanding and we have to wait for them, 1142 * then sleep here. _aiodone() will call _aio_waitn_wakeup() 1143 * to wakeup this thread as soon as the required amount of 1144 * completed I/Os is done. 1145 */ 1146 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) { 1147 /* 1148 * _aio_waitn_wakeup() will wake up this thread when: 1149 * - _aio_waitncnt requests are completed or 1150 * - _aio_outstand_cnt becomes zero. 1151 * sig_cond_reltimedwait() could also return with 1152 * a timeout error (ETIME). 1153 */ 1154 if (*nwait < _aio_outstand_cnt) 1155 _aio_waitncnt = *nwait; 1156 else 1157 _aio_waitncnt = _aio_outstand_cnt; 1158 1159 _aio_flags |= AIO_IO_WAITING; 1160 1161 if (wait) 1162 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv, 1163 &__aio_mutex, wait); 1164 else 1165 uerrno = sig_cond_wait(&_aio_iowait_cv, 1166 &__aio_mutex); 1167 1168 _aio_flags &= ~AIO_IO_WAITING; 1169 1170 if (uerrno == ETIME) { 1171 timedwait = AIO_TIMEOUT_POLL; 1172 continue; 1173 } 1174 if (uerrno != 0) 1175 timedwait = AIO_TIMEOUT_POLL; 1176 } 1177 1178 if (timedwait == AIO_TIMEOUT_POLL) { 1179 /* polling or timer expired */ 1180 break; 1181 } 1182 } 1183 1184 errno = uerrno == 0 ? kerrno : uerrno; 1185 if (errno) 1186 error = -1; 1187 else 1188 error = 0; 1189 1190 out: 1191 *nwait = dnwait; 1192 1193 pthread_cleanup_pop(1); /* drops __aio_mutex */ 1194 1195 return (error); 1196 } 1197 1198 int 1199 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait, 1200 const timespec_t *timeout) 1201 { 1202 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1203 } 1204 1205 void 1206 _aio_waitn_wakeup(void) 1207 { 1208 /* 1209 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that 1210 * it is waiting for completed I/Os. The number of required 1211 * completed I/Os is stored into "_aio_waitncnt". 1212 * aio_waitn() is woken up when 1213 * - there are no further outstanding I/Os 1214 * (_aio_outstand_cnt == 0) or 1215 * - the expected number of I/Os has completed. 1216 * Only one __aio_waitn() function waits for completed I/Os at 1217 * a time. 1218 * 1219 * __aio_suspend() increments "_aio_suscv_cnt" to notify 1220 * _aiodone() that at least one __aio_suspend() call is 1221 * waiting for completed I/Os. 1222 * There could be more than one __aio_suspend() function 1223 * waiting for completed I/Os. Because every function should 1224 * be waiting for different I/Os, _aiodone() has to wake up all 1225 * __aio_suspend() functions each time. 1226 * Every __aio_suspend() function will compare the recently 1227 * completed I/O with its own list. 1228 */ 1229 ASSERT(MUTEX_HELD(&__aio_mutex)); 1230 if (_aio_flags & AIO_IO_WAITING) { 1231 if (_aio_waitncnt > 0) 1232 _aio_waitncnt--; 1233 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 || 1234 _aio_suscv_cnt > 0) 1235 (void) cond_broadcast(&_aio_iowait_cv); 1236 } else { 1237 /* Wake up waiting aio_suspend calls */ 1238 if (_aio_suscv_cnt > 0) 1239 (void) cond_broadcast(&_aio_iowait_cv); 1240 } 1241 } 1242 1243 /* 1244 * timedwait values : 1245 * AIO_TIMEOUT_POLL : polling 1246 * AIO_TIMEOUT_WAIT : timeout 1247 * AIO_TIMEOUT_INDEF : wait indefinitely 1248 */ 1249 static int 1250 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait) 1251 { 1252 struct timeval curtime; 1253 1254 if (utimo) { 1255 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 || 1256 utimo->tv_nsec >= NANOSEC) { 1257 errno = EINVAL; 1258 return (-1); 1259 } 1260 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) { 1261 (void) gettimeofday(&curtime, NULL); 1262 end->tv_sec = utimo->tv_sec + curtime.tv_sec; 1263 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec; 1264 if (end->tv_nsec >= NANOSEC) { 1265 end->tv_nsec -= NANOSEC; 1266 end->tv_sec += 1; 1267 } 1268 *timedwait = AIO_TIMEOUT_WAIT; 1269 } else { 1270 /* polling */ 1271 *timedwait = AIO_TIMEOUT_POLL; 1272 } 1273 } else { 1274 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */ 1275 } 1276 return (0); 1277 } 1278 1279 #if !defined(_LP64) 1280 1281 int 1282 aio_read64(aiocb64_t *aiocbp) 1283 { 1284 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1285 errno = EINVAL; 1286 return (-1); 1287 } 1288 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1289 errno = EBUSY; 1290 return (-1); 1291 } 1292 if (_aio_sigev_thread64(aiocbp) != 0) 1293 return (-1); 1294 aiocbp->aio_lio_opcode = LIO_READ; 1295 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64, 1296 (AIO_KAIO | AIO_NO_DUPS))); 1297 } 1298 1299 int 1300 aio_write64(aiocb64_t *aiocbp) 1301 { 1302 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1303 errno = EINVAL; 1304 return (-1); 1305 } 1306 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1307 errno = EBUSY; 1308 return (-1); 1309 } 1310 if (_aio_sigev_thread64(aiocbp) != 0) 1311 return (-1); 1312 aiocbp->aio_lio_opcode = LIO_WRITE; 1313 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64, 1314 (AIO_KAIO | AIO_NO_DUPS))); 1315 } 1316 1317 int 1318 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 1319 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 1320 { 1321 int aio_ufs = 0; 1322 int oerrno = 0; 1323 aio_lio_t *head = NULL; 1324 aiocb64_t *aiocbp; 1325 int state = 0; 1326 int EIOflg = 0; 1327 int rw; 1328 int do_kaio = 0; 1329 int error; 1330 int i; 1331 1332 if (!_kaio_ok) 1333 _kaio_init(); 1334 1335 if (aio_list_max == 0) 1336 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 1337 1338 if (nent <= 0 || nent > aio_list_max) { 1339 errno = EINVAL; 1340 return (-1); 1341 } 1342 1343 switch (mode) { 1344 case LIO_WAIT: 1345 state = NOCHECK; 1346 break; 1347 case LIO_NOWAIT: 1348 state = CHECK; 1349 break; 1350 default: 1351 errno = EINVAL; 1352 return (-1); 1353 } 1354 1355 for (i = 0; i < nent; i++) { 1356 if ((aiocbp = list[i]) == NULL) 1357 continue; 1358 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1359 errno = EBUSY; 1360 return (-1); 1361 } 1362 if (_aio_sigev_thread64(aiocbp) != 0) 1363 return (-1); 1364 if (aiocbp->aio_lio_opcode == LIO_NOP) 1365 aiocbp->aio_state = NOCHECK; 1366 else { 1367 aiocbp->aio_state = state; 1368 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 1369 do_kaio++; 1370 else 1371 aiocbp->aio_resultp.aio_errno = ENOTSUP; 1372 } 1373 } 1374 if (_aio_sigev_thread_init(sigevp) != 0) 1375 return (-1); 1376 1377 if (do_kaio) { 1378 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp); 1379 if (error == 0) 1380 return (0); 1381 oerrno = errno; 1382 } else { 1383 oerrno = errno = ENOTSUP; 1384 error = -1; 1385 } 1386 1387 if (error == -1 && errno == ENOTSUP) { 1388 error = errno = 0; 1389 /* 1390 * If LIO_WAIT, or notification required, allocate a list head. 1391 */ 1392 if (mode == LIO_WAIT || 1393 (sigevp != NULL && 1394 (sigevp->sigev_notify == SIGEV_SIGNAL || 1395 sigevp->sigev_notify == SIGEV_THREAD || 1396 sigevp->sigev_notify == SIGEV_PORT))) 1397 head = _aio_lio_alloc(); 1398 if (head) { 1399 sig_mutex_lock(&head->lio_mutex); 1400 head->lio_mode = mode; 1401 head->lio_largefile = 1; 1402 if (mode == LIO_NOWAIT && sigevp != NULL) { 1403 if (sigevp->sigev_notify == SIGEV_THREAD) { 1404 head->lio_port = sigevp->sigev_signo; 1405 head->lio_event = AIOLIO64; 1406 head->lio_sigevent = sigevp; 1407 head->lio_sigval.sival_ptr = 1408 sigevp->sigev_value.sival_ptr; 1409 } else if (sigevp->sigev_notify == SIGEV_PORT) { 1410 port_notify_t *pn = 1411 sigevp->sigev_value.sival_ptr; 1412 head->lio_port = pn->portnfy_port; 1413 head->lio_event = AIOLIO64; 1414 head->lio_sigevent = sigevp; 1415 head->lio_sigval.sival_ptr = 1416 pn->portnfy_user; 1417 } else { /* SIGEV_SIGNAL */ 1418 head->lio_signo = sigevp->sigev_signo; 1419 head->lio_sigval.sival_ptr = 1420 sigevp->sigev_value.sival_ptr; 1421 } 1422 } 1423 head->lio_nent = head->lio_refcnt = nent; 1424 sig_mutex_unlock(&head->lio_mutex); 1425 } 1426 /* 1427 * find UFS requests, errno == ENOTSUP/EBADFD, 1428 */ 1429 for (i = 0; i < nent; i++) { 1430 if ((aiocbp = list[i]) == NULL || 1431 aiocbp->aio_lio_opcode == LIO_NOP || 1432 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 1433 aiocbp->aio_resultp.aio_errno != EBADFD)) { 1434 if (head) 1435 _lio_list_decr(head); 1436 continue; 1437 } 1438 if (aiocbp->aio_resultp.aio_errno == EBADFD) 1439 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 1440 if (aiocbp->aio_reqprio != 0) { 1441 aiocbp->aio_resultp.aio_errno = EINVAL; 1442 aiocbp->aio_resultp.aio_return = -1; 1443 EIOflg = 1; 1444 if (head) 1445 _lio_list_decr(head); 1446 continue; 1447 } 1448 /* 1449 * submit an AIO request with flags AIO_NO_KAIO 1450 * to avoid the kaio() syscall in _aio_rw() 1451 */ 1452 switch (aiocbp->aio_lio_opcode) { 1453 case LIO_READ: 1454 rw = AIOAREAD64; 1455 break; 1456 case LIO_WRITE: 1457 rw = AIOAWRITE64; 1458 break; 1459 } 1460 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw, 1461 (AIO_NO_KAIO | AIO_NO_DUPS)); 1462 if (error == 0) 1463 aio_ufs++; 1464 else { 1465 if (head) 1466 _lio_list_decr(head); 1467 aiocbp->aio_resultp.aio_errno = error; 1468 EIOflg = 1; 1469 } 1470 } 1471 } 1472 if (EIOflg) { 1473 errno = EIO; 1474 return (-1); 1475 } 1476 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 1477 /* 1478 * call kaio(AIOLIOWAIT) to get all outstanding 1479 * kernel AIO requests 1480 */ 1481 if ((nent - aio_ufs) > 0) 1482 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 1483 if (head != NULL && head->lio_nent > 0) { 1484 sig_mutex_lock(&head->lio_mutex); 1485 while (head->lio_refcnt > 0) { 1486 int err; 1487 head->lio_waiting = 1; 1488 pthread_cleanup_push(_lio_listio_cleanup, head); 1489 err = sig_cond_wait(&head->lio_cond_cv, 1490 &head->lio_mutex); 1491 pthread_cleanup_pop(0); 1492 head->lio_waiting = 0; 1493 if (err && head->lio_nent > 0) { 1494 sig_mutex_unlock(&head->lio_mutex); 1495 errno = err; 1496 return (-1); 1497 } 1498 } 1499 sig_mutex_unlock(&head->lio_mutex); 1500 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 1501 _aio_lio_free(head); 1502 for (i = 0; i < nent; i++) { 1503 if ((aiocbp = list[i]) != NULL && 1504 aiocbp->aio_resultp.aio_errno) { 1505 errno = EIO; 1506 return (-1); 1507 } 1508 } 1509 } 1510 return (0); 1511 } 1512 return (error); 1513 } 1514 1515 int 1516 aio_suspend64(const aiocb64_t * const list[], int nent, 1517 const timespec_t *timeout) 1518 { 1519 return (__aio_suspend((void **)list, nent, timeout, 1)); 1520 } 1521 1522 int 1523 aio_error64(const aiocb64_t *aiocbp) 1524 { 1525 const aio_result_t *resultp = &aiocbp->aio_resultp; 1526 int error; 1527 1528 if ((error = resultp->aio_errno) == EINPROGRESS) { 1529 if (aiocbp->aio_state == CHECK) { 1530 /* 1531 * Always do the kaio() call without using the 1532 * KAIO_SUPPORTED() checks because it is not 1533 * mandatory to have a valid fd set in the 1534 * aiocb, only the resultp must be set. 1535 */ 1536 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) { 1537 errno = EINVAL; 1538 return (-1); 1539 } 1540 error = resultp->aio_errno; 1541 } else if (aiocbp->aio_state == CHECKED) { 1542 ((aiocb64_t *)aiocbp)->aio_state = CHECK; 1543 } 1544 } 1545 return (error); 1546 } 1547 1548 ssize_t 1549 aio_return64(aiocb64_t *aiocbp) 1550 { 1551 aio_result_t *resultp = &aiocbp->aio_resultp; 1552 aio_req_t *reqp; 1553 int error; 1554 ssize_t retval; 1555 1556 /* 1557 * The _aiodone() function stores resultp->aio_return before 1558 * storing resultp->aio_errno (with an membar_producer() in 1559 * between). We use membar_consumer() below to ensure proper 1560 * memory ordering between _aiodone() and ourself. 1561 */ 1562 error = resultp->aio_errno; 1563 membar_consumer(); 1564 retval = resultp->aio_return; 1565 1566 /* 1567 * we use this condition to indicate either that 1568 * aio_return() has been called before or should 1569 * not have been called yet. 1570 */ 1571 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 1572 errno = error; 1573 return (-1); 1574 } 1575 1576 /* 1577 * Before we return, mark the result as being returned so that later 1578 * calls to aio_return() will return the fact that the result has 1579 * already been returned. 1580 */ 1581 sig_mutex_lock(&__aio_mutex); 1582 /* retest, in case more than one thread actually got in here */ 1583 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 1584 sig_mutex_unlock(&__aio_mutex); 1585 errno = EINVAL; 1586 return (-1); 1587 } 1588 resultp->aio_return = -1; 1589 resultp->aio_errno = EINVAL; 1590 if ((reqp = _aio_hash_del(resultp)) == NULL) 1591 sig_mutex_unlock(&__aio_mutex); 1592 else { 1593 aiocbp->aio_state = NOCHECK; 1594 ASSERT(reqp->req_head == NULL); 1595 (void) _aio_req_remove(reqp); 1596 sig_mutex_unlock(&__aio_mutex); 1597 _aio_req_free(reqp); 1598 } 1599 1600 if (retval == -1) 1601 errno = error; 1602 return (retval); 1603 } 1604 1605 static int 1606 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 1607 int workerscnt) 1608 { 1609 int i; 1610 int error; 1611 aio_worker_t *next = aiowp; 1612 1613 for (i = 0; i < workerscnt; i++) { 1614 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 1615 if (error != 0) { 1616 sig_mutex_lock(&head->lio_mutex); 1617 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 1618 head->lio_nent -= workerscnt - i; 1619 head->lio_refcnt -= workerscnt - i; 1620 sig_mutex_unlock(&head->lio_mutex); 1621 errno = EAGAIN; 1622 return (i); 1623 } 1624 next = next->work_forw; 1625 } 1626 return (i); 1627 } 1628 1629 int 1630 aio_fsync64(int op, aiocb64_t *aiocbp) 1631 { 1632 aio_lio_t *head; 1633 struct stat64 statb; 1634 int fret; 1635 1636 if (aiocbp == NULL) 1637 return (0); 1638 if (op != O_DSYNC && op != O_SYNC) { 1639 errno = EINVAL; 1640 return (-1); 1641 } 1642 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1643 errno = EBUSY; 1644 return (-1); 1645 } 1646 if (fstat64(aiocbp->aio_fildes, &statb) < 0) 1647 return (-1); 1648 if (_aio_sigev_thread64(aiocbp) != 0) 1649 return (-1); 1650 1651 /* 1652 * Kernel aio_fsync() is not supported. 1653 * We force user-level aio_fsync() just 1654 * for the notification side-effect. 1655 */ 1656 if (!__uaio_ok && __uaio_init() == -1) 1657 return (-1); 1658 1659 /* 1660 * The first asynchronous I/O request in the current process will 1661 * create a bunch of workers (via __uaio_init()). If the number 1662 * of workers is zero then the number of pending asynchronous I/O 1663 * requests is zero. In such a case only execute the standard 1664 * fsync(3C) or fdatasync(3RT) as appropriate. 1665 */ 1666 if (__rw_workerscnt == 0) { 1667 if (op == O_DSYNC) 1668 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 1669 else 1670 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 1671 } 1672 1673 /* 1674 * re-use aio_offset as the op field. 1675 * O_DSYNC - fdatasync() 1676 * O_SYNC - fsync() 1677 */ 1678 aiocbp->aio_offset = op; 1679 aiocbp->aio_lio_opcode = AIOFSYNC; 1680 1681 /* 1682 * Create a list of fsync requests. The worker that 1683 * gets the last request will do the fsync request. 1684 */ 1685 head = _aio_lio_alloc(); 1686 if (head == NULL) { 1687 errno = EAGAIN; 1688 return (-1); 1689 } 1690 head->lio_mode = LIO_FSYNC; 1691 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 1692 head->lio_largefile = 1; 1693 1694 /* 1695 * Insert an fsync request on every worker's queue. 1696 */ 1697 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt); 1698 if (fret != __rw_workerscnt) { 1699 /* 1700 * Fewer fsync requests than workers means that it was 1701 * not possible to submit fsync requests to all workers. 1702 * Actions: 1703 * a) number of fsync requests submitted is 0: 1704 * => free allocated memory (aio_lio_t). 1705 * b) number of fsync requests submitted is > 0: 1706 * => the last worker executing the fsync request 1707 * will free the aio_lio_t struct. 1708 */ 1709 if (fret == 0) 1710 _aio_lio_free(head); 1711 return (-1); 1712 } 1713 return (0); 1714 } 1715 1716 int 1717 aio_cancel64(int fd, aiocb64_t *aiocbp) 1718 { 1719 aio_req_t *reqp; 1720 aio_worker_t *aiowp; 1721 int done = 0; 1722 int canceled = 0; 1723 struct stat64 buf; 1724 1725 if (fstat64(fd, &buf) < 0) 1726 return (-1); 1727 1728 if (aiocbp != NULL) { 1729 if (fd != aiocbp->aio_fildes) { 1730 errno = EINVAL; 1731 return (-1); 1732 } 1733 if (aiocbp->aio_state == USERAIO) { 1734 sig_mutex_lock(&__aio_mutex); 1735 reqp = _aio_hash_find(&aiocbp->aio_resultp); 1736 if (reqp == NULL) { 1737 sig_mutex_unlock(&__aio_mutex); 1738 return (AIO_ALLDONE); 1739 } 1740 aiowp = reqp->req_worker; 1741 sig_mutex_lock(&aiowp->work_qlock1); 1742 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 1743 sig_mutex_unlock(&aiowp->work_qlock1); 1744 sig_mutex_unlock(&__aio_mutex); 1745 if (done) 1746 return (AIO_ALLDONE); 1747 if (canceled) 1748 return (AIO_CANCELED); 1749 return (AIO_NOTCANCELED); 1750 } 1751 if (aiocbp->aio_state == USERAIO_DONE) 1752 return (AIO_ALLDONE); 1753 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 1754 } 1755 1756 return (aiocancel_all(fd)); 1757 } 1758 1759 int 1760 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait, 1761 const timespec_t *timeout) 1762 { 1763 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1764 } 1765 1766 #endif /* !defined(_LP64) */ 1767