1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * posix_aio.c implements the POSIX async. I/O functions. 29 * 30 * aio_read 31 * aio_write 32 * aio_error 33 * aio_return 34 * aio_suspend 35 * lio_listio 36 * aio_fsync 37 * aio_cancel 38 */ 39 40 #include "lint.h" 41 #include "thr_uberdata.h" 42 #include "libc.h" 43 #include "asyncio.h" 44 #include <atomic.h> 45 #include <sys/file.h> 46 #include <sys/port.h> 47 48 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */ 49 50 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *); 51 52 /* defines for timedwait in __aio_waitn() and __aio_suspend() */ 53 #define AIO_TIMEOUT_INDEF -1 54 #define AIO_TIMEOUT_POLL 0 55 #define AIO_TIMEOUT_WAIT 1 56 #define AIO_TIMEOUT_UNDEF 2 57 58 /* 59 * List I/O stuff 60 */ 61 static void _lio_list_decr(aio_lio_t *); 62 static long aio_list_max = 0; 63 64 int 65 aio_read(aiocb_t *aiocbp) 66 { 67 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 68 errno = EINVAL; 69 return (-1); 70 } 71 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 72 errno = EBUSY; 73 return (-1); 74 } 75 if (_aio_sigev_thread(aiocbp) != 0) 76 return (-1); 77 aiocbp->aio_lio_opcode = LIO_READ; 78 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD, 79 (AIO_KAIO | AIO_NO_DUPS))); 80 } 81 82 int 83 aio_write(aiocb_t *aiocbp) 84 { 85 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 86 errno = EINVAL; 87 return (-1); 88 } 89 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 90 errno = EBUSY; 91 return (-1); 92 } 93 if (_aio_sigev_thread(aiocbp) != 0) 94 return (-1); 95 aiocbp->aio_lio_opcode = LIO_WRITE; 96 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE, 97 (AIO_KAIO | AIO_NO_DUPS))); 98 } 99 100 /* 101 * __lio_listio() cancellation handler. 102 */ 103 /* ARGSUSED */ 104 static void 105 _lio_listio_cleanup(aio_lio_t *head) 106 { 107 int freeit = 0; 108 109 ASSERT(MUTEX_HELD(&head->lio_mutex)); 110 if (head->lio_refcnt == 0) { 111 ASSERT(head->lio_nent == 0); 112 freeit = 1; 113 } 114 head->lio_waiting = 0; 115 sig_mutex_unlock(&head->lio_mutex); 116 if (freeit) 117 _aio_lio_free(head); 118 } 119 120 int 121 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 122 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 123 { 124 int aio_ufs = 0; 125 int oerrno = 0; 126 aio_lio_t *head = NULL; 127 aiocb_t *aiocbp; 128 int state = 0; 129 int EIOflg = 0; 130 int rw; 131 int do_kaio = 0; 132 int error; 133 int i; 134 135 if (!_kaio_ok) 136 _kaio_init(); 137 138 if (aio_list_max == 0) 139 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 140 141 if (nent <= 0 || nent > aio_list_max) { 142 errno = EINVAL; 143 return (-1); 144 } 145 146 switch (mode) { 147 case LIO_WAIT: 148 state = NOCHECK; 149 break; 150 case LIO_NOWAIT: 151 state = CHECK; 152 break; 153 default: 154 errno = EINVAL; 155 return (-1); 156 } 157 158 for (i = 0; i < nent; i++) { 159 if ((aiocbp = list[i]) == NULL) 160 continue; 161 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 162 errno = EBUSY; 163 return (-1); 164 } 165 if (_aio_sigev_thread(aiocbp) != 0) 166 return (-1); 167 if (aiocbp->aio_lio_opcode == LIO_NOP) 168 aiocbp->aio_state = NOCHECK; 169 else { 170 aiocbp->aio_state = state; 171 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 172 do_kaio++; 173 else 174 aiocbp->aio_resultp.aio_errno = ENOTSUP; 175 } 176 } 177 if (_aio_sigev_thread_init(sigevp) != 0) 178 return (-1); 179 180 if (do_kaio) { 181 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp); 182 if (error == 0) 183 return (0); 184 oerrno = errno; 185 } else { 186 oerrno = errno = ENOTSUP; 187 error = -1; 188 } 189 190 if (error == -1 && errno == ENOTSUP) { 191 error = errno = 0; 192 /* 193 * If LIO_WAIT, or notification required, allocate a list head. 194 */ 195 if (mode == LIO_WAIT || 196 (sigevp != NULL && 197 (sigevp->sigev_notify == SIGEV_SIGNAL || 198 sigevp->sigev_notify == SIGEV_THREAD || 199 sigevp->sigev_notify == SIGEV_PORT))) 200 head = _aio_lio_alloc(); 201 if (head) { 202 sig_mutex_lock(&head->lio_mutex); 203 head->lio_mode = mode; 204 head->lio_largefile = 0; 205 if (mode == LIO_NOWAIT && sigevp != NULL) { 206 if (sigevp->sigev_notify == SIGEV_THREAD) { 207 head->lio_port = sigevp->sigev_signo; 208 head->lio_event = AIOLIO; 209 head->lio_sigevent = sigevp; 210 head->lio_sigval.sival_ptr = 211 sigevp->sigev_value.sival_ptr; 212 } else if (sigevp->sigev_notify == SIGEV_PORT) { 213 port_notify_t *pn = 214 sigevp->sigev_value.sival_ptr; 215 head->lio_port = pn->portnfy_port; 216 head->lio_event = AIOLIO; 217 head->lio_sigevent = sigevp; 218 head->lio_sigval.sival_ptr = 219 pn->portnfy_user; 220 } else { /* SIGEV_SIGNAL */ 221 head->lio_signo = sigevp->sigev_signo; 222 head->lio_sigval.sival_ptr = 223 sigevp->sigev_value.sival_ptr; 224 } 225 } 226 head->lio_nent = head->lio_refcnt = nent; 227 sig_mutex_unlock(&head->lio_mutex); 228 } 229 /* 230 * find UFS requests, errno == ENOTSUP/EBADFD, 231 */ 232 for (i = 0; i < nent; i++) { 233 if ((aiocbp = list[i]) == NULL || 234 aiocbp->aio_lio_opcode == LIO_NOP || 235 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 236 aiocbp->aio_resultp.aio_errno != EBADFD)) { 237 if (head) 238 _lio_list_decr(head); 239 continue; 240 } 241 if (aiocbp->aio_resultp.aio_errno == EBADFD) 242 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 243 if (aiocbp->aio_reqprio != 0) { 244 aiocbp->aio_resultp.aio_errno = EINVAL; 245 aiocbp->aio_resultp.aio_return = -1; 246 EIOflg = 1; 247 if (head) 248 _lio_list_decr(head); 249 continue; 250 } 251 /* 252 * submit an AIO request with flags AIO_NO_KAIO 253 * to avoid the kaio() syscall in _aio_rw() 254 */ 255 switch (aiocbp->aio_lio_opcode) { 256 case LIO_READ: 257 rw = AIOAREAD; 258 break; 259 case LIO_WRITE: 260 rw = AIOAWRITE; 261 break; 262 } 263 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw, 264 (AIO_NO_KAIO | AIO_NO_DUPS)); 265 if (error == 0) 266 aio_ufs++; 267 else { 268 if (head) 269 _lio_list_decr(head); 270 aiocbp->aio_resultp.aio_errno = error; 271 EIOflg = 1; 272 } 273 } 274 } 275 if (EIOflg) { 276 errno = EIO; 277 return (-1); 278 } 279 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 280 /* 281 * call kaio(AIOLIOWAIT) to get all outstanding 282 * kernel AIO requests 283 */ 284 if ((nent - aio_ufs) > 0) 285 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 286 if (head != NULL && head->lio_nent > 0) { 287 sig_mutex_lock(&head->lio_mutex); 288 while (head->lio_refcnt > 0) { 289 int err; 290 head->lio_waiting = 1; 291 pthread_cleanup_push(_lio_listio_cleanup, head); 292 err = sig_cond_wait(&head->lio_cond_cv, 293 &head->lio_mutex); 294 pthread_cleanup_pop(0); 295 head->lio_waiting = 0; 296 if (err && head->lio_nent > 0) { 297 sig_mutex_unlock(&head->lio_mutex); 298 errno = err; 299 return (-1); 300 } 301 } 302 sig_mutex_unlock(&head->lio_mutex); 303 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 304 _aio_lio_free(head); 305 for (i = 0; i < nent; i++) { 306 if ((aiocbp = list[i]) != NULL && 307 aiocbp->aio_resultp.aio_errno) { 308 errno = EIO; 309 return (-1); 310 } 311 } 312 } 313 return (0); 314 } 315 return (error); 316 } 317 318 static void 319 _lio_list_decr(aio_lio_t *head) 320 { 321 sig_mutex_lock(&head->lio_mutex); 322 head->lio_nent--; 323 head->lio_refcnt--; 324 sig_mutex_unlock(&head->lio_mutex); 325 } 326 327 /* 328 * __aio_suspend() cancellation handler. 329 */ 330 /* ARGSUSED */ 331 static void 332 _aio_suspend_cleanup(int *counter) 333 { 334 ASSERT(MUTEX_HELD(&__aio_mutex)); 335 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */ 336 sig_mutex_unlock(&__aio_mutex); 337 } 338 339 static int 340 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile) 341 { 342 int cv_err; /* error code from cond_xxx() */ 343 int kerr; /* error code from _kaio(AIOSUSPEND) */ 344 int i; 345 timespec_t twait; /* copy of timo for internal calculations */ 346 timespec_t *wait = NULL; 347 int timedwait; 348 int req_outstanding; 349 aiocb_t **listp; 350 aiocb_t *aiocbp; 351 #if !defined(_LP64) 352 aiocb64_t **listp64; 353 aiocb64_t *aiocbp64; 354 #endif 355 hrtime_t hrtstart; 356 hrtime_t hrtend; 357 hrtime_t hrtres; 358 359 #if defined(_LP64) 360 if (largefile) 361 aio_panic("__aio_suspend: largefile set when _LP64 defined"); 362 #endif 363 364 if (nent <= 0) { 365 errno = EINVAL; 366 return (-1); 367 } 368 369 if (timo) { 370 if (timo->tv_sec < 0 || timo->tv_nsec < 0 || 371 timo->tv_nsec >= NANOSEC) { 372 errno = EINVAL; 373 return (-1); 374 } 375 /* Initialize start time if time monitoring desired */ 376 if (timo->tv_sec > 0 || timo->tv_nsec > 0) { 377 timedwait = AIO_TIMEOUT_WAIT; 378 hrtstart = gethrtime(); 379 } else { 380 /* content of timeout = 0 : polling */ 381 timedwait = AIO_TIMEOUT_POLL; 382 } 383 } else { 384 /* timeout pointer = NULL : wait indefinitely */ 385 timedwait = AIO_TIMEOUT_INDEF; 386 } 387 388 #if !defined(_LP64) 389 if (largefile) { 390 listp64 = (aiocb64_t **)list; 391 for (i = 0; i < nent; i++) { 392 if ((aiocbp64 = listp64[i]) != NULL && 393 aiocbp64->aio_state == CHECK) 394 aiocbp64->aio_state = CHECKED; 395 } 396 } else 397 #endif /* !_LP64 */ 398 { 399 listp = (aiocb_t **)list; 400 for (i = 0; i < nent; i++) { 401 if ((aiocbp = listp[i]) != NULL && 402 aiocbp->aio_state == CHECK) 403 aiocbp->aio_state = CHECKED; 404 } 405 } 406 407 sig_mutex_lock(&__aio_mutex); 408 409 /* 410 * The next "if -case" is required to accelerate the 411 * access to completed RAW-IO requests. 412 */ 413 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 414 /* Only kernel requests pending */ 415 416 /* 417 * _aio_kernel_suspend is used to detect completed non RAW-IO 418 * requests. 419 * As long as this thread resides in the kernel (_kaio) further 420 * asynchronous non RAW-IO requests could be submitted. 421 */ 422 _aio_kernel_suspend++; 423 424 /* 425 * Always do the kaio() call without using the KAIO_SUPPORTED() 426 * checks because it is not mandatory to have a valid fd 427 * set in the list entries, only the resultp must be set. 428 * 429 * _kaio(AIOSUSPEND ...) return values : 430 * 0: everythink ok, completed request found 431 * -1: error 432 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,) 433 * system call using _kaio(AIONOTIFY). It means, that some 434 * non RAW-IOs completed inbetween. 435 */ 436 437 pthread_cleanup_push(_aio_suspend_cleanup, 438 &_aio_kernel_suspend); 439 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 440 sig_mutex_unlock(&__aio_mutex); 441 _cancel_prologue(); 442 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 443 list, nent, timo, -1); 444 _cancel_epilogue(); 445 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */ 446 pthread_cleanup_pop(0); 447 448 _aio_kernel_suspend--; 449 450 if (!kerr) { 451 sig_mutex_unlock(&__aio_mutex); 452 return (0); 453 } 454 } else { 455 kerr = 1; /* simulation: _kaio detected AIONOTIFY */ 456 } 457 458 /* 459 * Return kernel error code if no other IOs are outstanding. 460 */ 461 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt; 462 463 sig_mutex_unlock(&__aio_mutex); 464 465 if (req_outstanding == 0) { 466 /* no IOs outstanding in the thread pool */ 467 if (kerr == 1) 468 /* return "no IOs completed" */ 469 errno = EAGAIN; 470 return (-1); 471 } 472 473 /* 474 * IOs using the thread pool are outstanding. 475 */ 476 if (timedwait == AIO_TIMEOUT_WAIT) { 477 /* time monitoring */ 478 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC + 479 (hrtime_t)timo->tv_nsec; 480 hrtres = hrtend - gethrtime(); 481 if (hrtres <= 0) 482 hrtres = 1; 483 twait.tv_sec = hrtres / (hrtime_t)NANOSEC; 484 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC; 485 wait = &twait; 486 } else if (timedwait == AIO_TIMEOUT_POLL) { 487 twait = *timo; /* content of timo = 0 : polling */ 488 wait = &twait; 489 } 490 491 for (;;) { 492 int error; 493 int inprogress; 494 495 /* first scan file system requests */ 496 inprogress = 0; 497 for (i = 0; i < nent; i++) { 498 #if !defined(_LP64) 499 if (largefile) { 500 if ((aiocbp64 = listp64[i]) == NULL) 501 continue; 502 error = aiocbp64->aio_resultp.aio_errno; 503 } else 504 #endif 505 { 506 if ((aiocbp = listp[i]) == NULL) 507 continue; 508 error = aiocbp->aio_resultp.aio_errno; 509 } 510 if (error == EINPROGRESS) 511 inprogress = 1; 512 else if (error != ECANCELED) { 513 errno = 0; 514 return (0); 515 } 516 } 517 518 sig_mutex_lock(&__aio_mutex); 519 520 /* 521 * If there aren't outstanding I/Os in the thread pool then 522 * we have to return here, provided that all kernel RAW-IOs 523 * also completed. 524 * If the kernel was notified to return, then we have to check 525 * possible pending RAW-IOs. 526 */ 527 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) { 528 sig_mutex_unlock(&__aio_mutex); 529 errno = EAGAIN; 530 break; 531 } 532 533 /* 534 * There are outstanding IOs in the thread pool or the kernel 535 * was notified to return. 536 * Check pending RAW-IOs first. 537 */ 538 if (kerr == 1) { 539 /* 540 * _aiodone just notified the kernel about 541 * completed non RAW-IOs (AIONOTIFY was detected). 542 */ 543 if (timedwait == AIO_TIMEOUT_WAIT) { 544 /* Update remaining timeout for the kernel */ 545 hrtres = hrtend - gethrtime(); 546 if (hrtres <= 0) { 547 /* timer expired */ 548 sig_mutex_unlock(&__aio_mutex); 549 errno = EAGAIN; 550 break; 551 } 552 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 553 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 554 } 555 _aio_kernel_suspend++; 556 557 pthread_cleanup_push(_aio_suspend_cleanup, 558 &_aio_kernel_suspend); 559 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 560 sig_mutex_unlock(&__aio_mutex); 561 _cancel_prologue(); 562 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 563 list, nent, wait, -1); 564 _cancel_epilogue(); 565 pthread_cleanup_pop(1); 566 pthread_cleanup_pop(0); 567 568 _aio_kernel_suspend--; 569 570 if (!kerr) { 571 sig_mutex_unlock(&__aio_mutex); 572 return (0); 573 } 574 } 575 576 if (timedwait == AIO_TIMEOUT_POLL) { 577 sig_mutex_unlock(&__aio_mutex); 578 errno = EAGAIN; 579 break; 580 } 581 582 if (timedwait == AIO_TIMEOUT_WAIT) { 583 /* Update remaining timeout */ 584 hrtres = hrtend - gethrtime(); 585 if (hrtres <= 0) { 586 /* timer expired */ 587 sig_mutex_unlock(&__aio_mutex); 588 errno = EAGAIN; 589 break; 590 } 591 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 592 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 593 } 594 595 if (_aio_outstand_cnt == 0) { 596 sig_mutex_unlock(&__aio_mutex); 597 continue; 598 } 599 600 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */ 601 602 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt); 603 if (timedwait == AIO_TIMEOUT_WAIT) { 604 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv, 605 &__aio_mutex, wait); 606 if (cv_err == ETIME) 607 cv_err = EAGAIN; 608 } else { 609 /* wait indefinitely */ 610 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex); 611 } 612 /* this decrements _aio_suscv_cnt and drops __aio_mutex */ 613 pthread_cleanup_pop(1); 614 615 if (cv_err) { 616 errno = cv_err; 617 break; 618 } 619 } 620 return (-1); 621 } 622 623 int 624 aio_suspend(const aiocb_t * const list[], int nent, 625 const timespec_t *timeout) 626 { 627 return (__aio_suspend((void **)list, nent, timeout, 0)); 628 } 629 630 int 631 aio_error(const aiocb_t *aiocbp) 632 { 633 const aio_result_t *resultp = &aiocbp->aio_resultp; 634 aio_req_t *reqp; 635 int error; 636 637 if ((error = resultp->aio_errno) == EINPROGRESS) { 638 if (aiocbp->aio_state == CHECK) { 639 /* 640 * Always do the kaio() call without using the 641 * KAIO_SUPPORTED() checks because it is not 642 * mandatory to have a valid fd set in the 643 * aiocb, only the resultp must be set. 644 */ 645 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) { 646 errno = EINVAL; 647 return (-1); 648 } 649 error = resultp->aio_errno; 650 } else if (aiocbp->aio_state == CHECKED) { 651 ((aiocb_t *)aiocbp)->aio_state = CHECK; 652 } 653 } else if (aiocbp->aio_state == USERAIO) { 654 sig_mutex_lock(&__aio_mutex); 655 if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) { 656 sig_mutex_unlock(&__aio_mutex); 657 ((aiocb_t *)aiocbp)->aio_state = CHECKED; 658 } else { 659 ((aiocb_t *)aiocbp)->aio_state = NOCHECK; 660 ASSERT(reqp->req_head == NULL); 661 (void) _aio_req_remove(reqp); 662 sig_mutex_unlock(&__aio_mutex); 663 _aio_req_free(reqp); 664 } 665 } 666 return (error); 667 } 668 669 ssize_t 670 aio_return(aiocb_t *aiocbp) 671 { 672 aio_result_t *resultp = &aiocbp->aio_resultp; 673 aio_req_t *reqp; 674 int error; 675 ssize_t retval; 676 677 /* 678 * The _aiodone() function stores resultp->aio_return before 679 * storing resultp->aio_errno (with an membar_producer() in 680 * between). We use membar_consumer() below to ensure proper 681 * memory ordering between _aiodone() and ourself. 682 */ 683 error = resultp->aio_errno; 684 membar_consumer(); 685 retval = resultp->aio_return; 686 687 /* 688 * we use this condition to indicate either that 689 * aio_return() has been called before or should 690 * not have been called yet. 691 */ 692 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 693 errno = error; 694 return (-1); 695 } 696 697 /* 698 * Before we return, mark the result as being returned so that later 699 * calls to aio_return() will return the fact that the result has 700 * already been returned. 701 */ 702 sig_mutex_lock(&__aio_mutex); 703 /* retest, in case more than one thread actually got in here */ 704 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 705 sig_mutex_unlock(&__aio_mutex); 706 errno = EINVAL; 707 return (-1); 708 } 709 resultp->aio_return = -1; 710 resultp->aio_errno = EINVAL; 711 if ((reqp = _aio_hash_del(resultp)) == NULL) 712 sig_mutex_unlock(&__aio_mutex); 713 else { 714 aiocbp->aio_state = NOCHECK; 715 ASSERT(reqp->req_head == NULL); 716 (void) _aio_req_remove(reqp); 717 sig_mutex_unlock(&__aio_mutex); 718 _aio_req_free(reqp); 719 } 720 721 if (retval == -1) 722 errno = error; 723 return (retval); 724 } 725 726 void 727 _lio_remove(aio_req_t *reqp) 728 { 729 aio_lio_t *head; 730 int refcnt; 731 732 if ((head = reqp->req_head) != NULL) { 733 sig_mutex_lock(&head->lio_mutex); 734 ASSERT(head->lio_refcnt == head->lio_nent); 735 refcnt = --head->lio_nent; 736 head->lio_refcnt--; 737 sig_mutex_unlock(&head->lio_mutex); 738 if (refcnt == 0) 739 _aio_lio_free(head); 740 reqp->req_head = NULL; 741 } 742 } 743 744 /* 745 * This function returns the number of asynchronous I/O requests submitted. 746 */ 747 static int 748 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 749 int workerscnt) 750 { 751 int i; 752 int error; 753 aio_worker_t *next = aiowp; 754 755 for (i = 0; i < workerscnt; i++) { 756 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 757 if (error != 0) { 758 sig_mutex_lock(&head->lio_mutex); 759 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 760 head->lio_nent -= workerscnt - i; 761 head->lio_refcnt -= workerscnt - i; 762 sig_mutex_unlock(&head->lio_mutex); 763 errno = EAGAIN; 764 return (i); 765 } 766 next = next->work_forw; 767 } 768 return (i); 769 } 770 771 int 772 aio_fsync(int op, aiocb_t *aiocbp) 773 { 774 aio_lio_t *head; 775 struct stat statb; 776 int fret; 777 778 if (aiocbp == NULL) 779 return (0); 780 if (op != O_DSYNC && op != O_SYNC) { 781 errno = EINVAL; 782 return (-1); 783 } 784 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 785 errno = EBUSY; 786 return (-1); 787 } 788 if (fstat(aiocbp->aio_fildes, &statb) < 0) 789 return (-1); 790 if (_aio_sigev_thread(aiocbp) != 0) 791 return (-1); 792 793 /* 794 * Kernel aio_fsync() is not supported. 795 * We force user-level aio_fsync() just 796 * for the notification side-effect. 797 */ 798 if (!__uaio_ok && __uaio_init() == -1) 799 return (-1); 800 801 /* 802 * The first asynchronous I/O request in the current process will 803 * create a bunch of workers (via __uaio_init()). If the number 804 * of workers is zero then the number of pending asynchronous I/O 805 * requests is zero. In such a case only execute the standard 806 * fsync(3C) or fdatasync(3RT) as appropriate. 807 */ 808 if (__rw_workerscnt == 0) { 809 if (op == O_DSYNC) 810 return (__fdsync(aiocbp->aio_fildes, FDSYNC_DATA)); 811 else 812 return (__fdsync(aiocbp->aio_fildes, FDSYNC_FILE)); 813 } 814 815 /* 816 * re-use aio_offset as the op field. 817 * O_DSYNC - fdatasync() 818 * O_SYNC - fsync() 819 */ 820 aiocbp->aio_offset = op; 821 aiocbp->aio_lio_opcode = AIOFSYNC; 822 823 /* 824 * Create a list of fsync requests. The worker that 825 * gets the last request will do the fsync request. 826 */ 827 head = _aio_lio_alloc(); 828 if (head == NULL) { 829 errno = EAGAIN; 830 return (-1); 831 } 832 head->lio_mode = LIO_FSYNC; 833 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 834 head->lio_largefile = 0; 835 836 /* 837 * Insert an fsync request on every worker's queue. 838 */ 839 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt); 840 if (fret != __rw_workerscnt) { 841 /* 842 * Fewer fsync requests than workers means that it was 843 * not possible to submit fsync requests to all workers. 844 * Actions: 845 * a) number of fsync requests submitted is 0: 846 * => free allocated memory (aio_lio_t). 847 * b) number of fsync requests submitted is > 0: 848 * => the last worker executing the fsync request 849 * will free the aio_lio_t struct. 850 */ 851 if (fret == 0) 852 _aio_lio_free(head); 853 return (-1); 854 } 855 return (0); 856 } 857 858 int 859 aio_cancel(int fd, aiocb_t *aiocbp) 860 { 861 aio_req_t *reqp; 862 aio_worker_t *aiowp; 863 int done = 0; 864 int canceled = 0; 865 struct stat buf; 866 867 if (fstat(fd, &buf) < 0) 868 return (-1); 869 870 if (aiocbp != NULL) { 871 if (fd != aiocbp->aio_fildes) { 872 errno = EINVAL; 873 return (-1); 874 } 875 if (aiocbp->aio_state == USERAIO) { 876 sig_mutex_lock(&__aio_mutex); 877 reqp = _aio_hash_find(&aiocbp->aio_resultp); 878 if (reqp == NULL) { 879 sig_mutex_unlock(&__aio_mutex); 880 return (AIO_ALLDONE); 881 } 882 aiowp = reqp->req_worker; 883 sig_mutex_lock(&aiowp->work_qlock1); 884 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 885 sig_mutex_unlock(&aiowp->work_qlock1); 886 sig_mutex_unlock(&__aio_mutex); 887 if (done) 888 return (AIO_ALLDONE); 889 if (canceled) 890 return (AIO_CANCELED); 891 return (AIO_NOTCANCELED); 892 } 893 if (aiocbp->aio_state == USERAIO_DONE) 894 return (AIO_ALLDONE); 895 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 896 } 897 898 return (aiocancel_all(fd)); 899 } 900 901 /* 902 * __aio_waitn() cancellation handler. 903 */ 904 static void 905 _aio_waitn_cleanup(void *arg __unused) 906 { 907 ASSERT(MUTEX_HELD(&__aio_mutex)); 908 909 /* check for pending aio_waitn() calls */ 910 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING); 911 if (_aio_flags & AIO_LIB_WAITN_PENDING) { 912 _aio_flags &= ~AIO_LIB_WAITN_PENDING; 913 (void) cond_signal(&_aio_waitn_cv); 914 } 915 916 sig_mutex_unlock(&__aio_mutex); 917 } 918 919 /* 920 * aio_waitn can be used to reap the results of several I/O operations that 921 * were submitted asynchronously. The submission of I/Os can be done using 922 * existing POSIX interfaces: lio_listio, aio_write or aio_read. 923 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have 924 * completed and it returns the descriptors for these I/Os in "list". The 925 * maximum size of this list is given by "nent" and the actual number of I/Os 926 * completed is returned in "nwait". Otherwise aio_waitn might also 927 * return if the timeout expires. Additionally, aio_waitn returns 0 if 928 * successful or -1 if an error occurred. 929 */ 930 static int 931 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo) 932 { 933 int error = 0; 934 uint_t dnwait = 0; /* amount of requests in the waitn-done list */ 935 uint_t kwaitcnt; /* expected "done" requests from kernel */ 936 uint_t knentcnt; /* max. expected "done" requests from kernel */ 937 int uerrno = 0; 938 int kerrno = 0; /* save errno from _kaio() call */ 939 int timedwait = AIO_TIMEOUT_UNDEF; 940 aio_req_t *reqp; 941 timespec_t end; 942 timespec_t twait; /* copy of utimo for internal calculations */ 943 timespec_t *wait = NULL; 944 945 if (nent == 0 || *nwait == 0 || *nwait > nent) { 946 errno = EINVAL; 947 return (-1); 948 } 949 950 /* 951 * Only one running aio_waitn call per process allowed. 952 * Further calls will be blocked here until the running 953 * call finishes. 954 */ 955 956 sig_mutex_lock(&__aio_mutex); 957 958 while (_aio_flags & AIO_LIB_WAITN) { 959 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) { 960 sig_mutex_unlock(&__aio_mutex); 961 *nwait = 0; 962 return (0); 963 } 964 _aio_flags |= AIO_LIB_WAITN_PENDING; 965 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex); 966 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex); 967 pthread_cleanup_pop(0); 968 if (error != 0) { 969 sig_mutex_unlock(&__aio_mutex); 970 *nwait = 0; 971 errno = error; 972 return (-1); 973 } 974 } 975 976 pthread_cleanup_push(_aio_waitn_cleanup, NULL); 977 978 _aio_flags |= AIO_LIB_WAITN; 979 980 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 981 error = -1; 982 dnwait = 0; 983 goto out; 984 } 985 if (timedwait != AIO_TIMEOUT_INDEF) { 986 twait = *utimo; 987 wait = &twait; 988 } 989 990 /* 991 * If both counters are still set to zero, then only 992 * kernel requests are currently outstanding (raw-I/Os). 993 */ 994 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 995 for (;;) { 996 kwaitcnt = *nwait - dnwait; 997 knentcnt = nent - dnwait; 998 if (knentcnt > AIO_WAITN_MAXIOCBS) 999 knentcnt = AIO_WAITN_MAXIOCBS; 1000 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1001 1002 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1003 sig_mutex_unlock(&__aio_mutex); 1004 _cancel_prologue(); 1005 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1006 &kwaitcnt, wait); 1007 _cancel_epilogue(); 1008 pthread_cleanup_pop(1); 1009 1010 if (error == 0) { 1011 dnwait += kwaitcnt; 1012 if (dnwait >= *nwait || 1013 *nwait < AIO_WAITN_MAXIOCBS) 1014 break; 1015 if (timedwait == AIO_TIMEOUT_WAIT) { 1016 error = _aio_get_timedelta(&end, wait); 1017 if (error == -1) { 1018 /* timer expired */ 1019 errno = ETIME; 1020 break; 1021 } 1022 } 1023 continue; 1024 } 1025 if (errno == EAGAIN) { 1026 if (dnwait > 0) 1027 error = 0; 1028 break; 1029 } 1030 if (errno == ETIME || errno == EINTR) { 1031 dnwait += kwaitcnt; 1032 break; 1033 } 1034 /* fatal error */ 1035 break; 1036 } 1037 1038 goto out; 1039 } 1040 1041 /* File system I/Os outstanding ... */ 1042 1043 if (timedwait == AIO_TIMEOUT_UNDEF) { 1044 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 1045 error = -1; 1046 dnwait = 0; 1047 goto out; 1048 } 1049 if (timedwait != AIO_TIMEOUT_INDEF) { 1050 twait = *utimo; 1051 wait = &twait; 1052 } 1053 } 1054 1055 for (;;) { 1056 uint_t sum_reqs; 1057 1058 /* 1059 * Calculate sum of active non RAW-IO requests (sum_reqs). 1060 * If the expected amount of completed requests (*nwait) is 1061 * greater than the calculated sum (sum_reqs) then 1062 * use _kaio to check pending RAW-IO requests. 1063 */ 1064 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt; 1065 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0; 1066 1067 if (kwaitcnt != 0) { 1068 /* possibly some kernel I/Os outstanding */ 1069 knentcnt = nent - dnwait; 1070 if (knentcnt > AIO_WAITN_MAXIOCBS) 1071 knentcnt = AIO_WAITN_MAXIOCBS; 1072 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1073 1074 _aio_flags |= AIO_WAIT_INPROGRESS; 1075 1076 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1077 sig_mutex_unlock(&__aio_mutex); 1078 _cancel_prologue(); 1079 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1080 &kwaitcnt, wait); 1081 _cancel_epilogue(); 1082 pthread_cleanup_pop(1); 1083 1084 _aio_flags &= ~AIO_WAIT_INPROGRESS; 1085 1086 if (error == 0) { 1087 dnwait += kwaitcnt; 1088 } else { 1089 switch (errno) { 1090 case EINVAL: 1091 case EAGAIN: 1092 /* don't wait for kernel I/Os */ 1093 kerrno = 0; /* ignore _kaio() errno */ 1094 *nwait = _aio_doneq_cnt + 1095 _aio_outstand_cnt + dnwait; 1096 error = 0; 1097 break; 1098 case EINTR: 1099 case ETIME: 1100 /* just scan for completed LIB I/Os */ 1101 dnwait += kwaitcnt; 1102 timedwait = AIO_TIMEOUT_POLL; 1103 kerrno = errno; /* save _kaio() errno */ 1104 error = 0; 1105 break; 1106 default: 1107 kerrno = errno; /* save _kaio() errno */ 1108 break; 1109 } 1110 } 1111 if (error) 1112 break; /* fatal kernel error */ 1113 } 1114 1115 /* check completed FS requests in the "done" queue */ 1116 1117 while (_aio_doneq_cnt && dnwait < nent) { 1118 /* get done requests */ 1119 if ((reqp = _aio_req_remove(NULL)) != NULL) { 1120 (void) _aio_hash_del(reqp->req_resultp); 1121 list[dnwait++] = reqp->req_aiocbp; 1122 _aio_req_mark_done(reqp); 1123 _lio_remove(reqp); 1124 _aio_req_free(reqp); 1125 } 1126 } 1127 1128 if (dnwait >= *nwait) { 1129 /* min. requested amount of completed I/Os satisfied */ 1130 break; 1131 } 1132 if (timedwait == AIO_TIMEOUT_WAIT && 1133 (error = _aio_get_timedelta(&end, wait)) == -1) { 1134 /* timer expired */ 1135 uerrno = ETIME; 1136 break; 1137 } 1138 1139 /* 1140 * If some I/Os are outstanding and we have to wait for them, 1141 * then sleep here. _aiodone() will call _aio_waitn_wakeup() 1142 * to wakeup this thread as soon as the required amount of 1143 * completed I/Os is done. 1144 */ 1145 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) { 1146 /* 1147 * _aio_waitn_wakeup() will wake up this thread when: 1148 * - _aio_waitncnt requests are completed or 1149 * - _aio_outstand_cnt becomes zero. 1150 * sig_cond_reltimedwait() could also return with 1151 * a timeout error (ETIME). 1152 */ 1153 if (*nwait < _aio_outstand_cnt) 1154 _aio_waitncnt = *nwait; 1155 else 1156 _aio_waitncnt = _aio_outstand_cnt; 1157 1158 _aio_flags |= AIO_IO_WAITING; 1159 1160 if (wait) 1161 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv, 1162 &__aio_mutex, wait); 1163 else 1164 uerrno = sig_cond_wait(&_aio_iowait_cv, 1165 &__aio_mutex); 1166 1167 _aio_flags &= ~AIO_IO_WAITING; 1168 1169 if (uerrno == ETIME) { 1170 timedwait = AIO_TIMEOUT_POLL; 1171 continue; 1172 } 1173 if (uerrno != 0) 1174 timedwait = AIO_TIMEOUT_POLL; 1175 } 1176 1177 if (timedwait == AIO_TIMEOUT_POLL) { 1178 /* polling or timer expired */ 1179 break; 1180 } 1181 } 1182 1183 errno = uerrno == 0 ? kerrno : uerrno; 1184 if (errno) 1185 error = -1; 1186 else 1187 error = 0; 1188 1189 out: 1190 *nwait = dnwait; 1191 1192 pthread_cleanup_pop(1); /* drops __aio_mutex */ 1193 1194 return (error); 1195 } 1196 1197 int 1198 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait, 1199 const timespec_t *timeout) 1200 { 1201 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1202 } 1203 1204 void 1205 _aio_waitn_wakeup(void) 1206 { 1207 /* 1208 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that 1209 * it is waiting for completed I/Os. The number of required 1210 * completed I/Os is stored into "_aio_waitncnt". 1211 * aio_waitn() is woken up when 1212 * - there are no further outstanding I/Os 1213 * (_aio_outstand_cnt == 0) or 1214 * - the expected number of I/Os has completed. 1215 * Only one __aio_waitn() function waits for completed I/Os at 1216 * a time. 1217 * 1218 * __aio_suspend() increments "_aio_suscv_cnt" to notify 1219 * _aiodone() that at least one __aio_suspend() call is 1220 * waiting for completed I/Os. 1221 * There could be more than one __aio_suspend() function 1222 * waiting for completed I/Os. Because every function should 1223 * be waiting for different I/Os, _aiodone() has to wake up all 1224 * __aio_suspend() functions each time. 1225 * Every __aio_suspend() function will compare the recently 1226 * completed I/O with its own list. 1227 */ 1228 ASSERT(MUTEX_HELD(&__aio_mutex)); 1229 if (_aio_flags & AIO_IO_WAITING) { 1230 if (_aio_waitncnt > 0) 1231 _aio_waitncnt--; 1232 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 || 1233 _aio_suscv_cnt > 0) 1234 (void) cond_broadcast(&_aio_iowait_cv); 1235 } else { 1236 /* Wake up waiting aio_suspend calls */ 1237 if (_aio_suscv_cnt > 0) 1238 (void) cond_broadcast(&_aio_iowait_cv); 1239 } 1240 } 1241 1242 /* 1243 * timedwait values : 1244 * AIO_TIMEOUT_POLL : polling 1245 * AIO_TIMEOUT_WAIT : timeout 1246 * AIO_TIMEOUT_INDEF : wait indefinitely 1247 */ 1248 static int 1249 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait) 1250 { 1251 struct timeval curtime; 1252 1253 if (utimo) { 1254 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 || 1255 utimo->tv_nsec >= NANOSEC) { 1256 errno = EINVAL; 1257 return (-1); 1258 } 1259 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) { 1260 (void) gettimeofday(&curtime, NULL); 1261 end->tv_sec = utimo->tv_sec + curtime.tv_sec; 1262 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec; 1263 if (end->tv_nsec >= NANOSEC) { 1264 end->tv_nsec -= NANOSEC; 1265 end->tv_sec += 1; 1266 } 1267 *timedwait = AIO_TIMEOUT_WAIT; 1268 } else { 1269 /* polling */ 1270 *timedwait = AIO_TIMEOUT_POLL; 1271 } 1272 } else { 1273 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */ 1274 } 1275 return (0); 1276 } 1277 1278 #if !defined(_LP64) 1279 1280 int 1281 aio_read64(aiocb64_t *aiocbp) 1282 { 1283 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1284 errno = EINVAL; 1285 return (-1); 1286 } 1287 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1288 errno = EBUSY; 1289 return (-1); 1290 } 1291 if (_aio_sigev_thread64(aiocbp) != 0) 1292 return (-1); 1293 aiocbp->aio_lio_opcode = LIO_READ; 1294 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64, 1295 (AIO_KAIO | AIO_NO_DUPS))); 1296 } 1297 1298 int 1299 aio_write64(aiocb64_t *aiocbp) 1300 { 1301 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1302 errno = EINVAL; 1303 return (-1); 1304 } 1305 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1306 errno = EBUSY; 1307 return (-1); 1308 } 1309 if (_aio_sigev_thread64(aiocbp) != 0) 1310 return (-1); 1311 aiocbp->aio_lio_opcode = LIO_WRITE; 1312 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64, 1313 (AIO_KAIO | AIO_NO_DUPS))); 1314 } 1315 1316 int 1317 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 1318 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 1319 { 1320 int aio_ufs = 0; 1321 int oerrno = 0; 1322 aio_lio_t *head = NULL; 1323 aiocb64_t *aiocbp; 1324 int state = 0; 1325 int EIOflg = 0; 1326 int rw; 1327 int do_kaio = 0; 1328 int error; 1329 int i; 1330 1331 if (!_kaio_ok) 1332 _kaio_init(); 1333 1334 if (aio_list_max == 0) 1335 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 1336 1337 if (nent <= 0 || nent > aio_list_max) { 1338 errno = EINVAL; 1339 return (-1); 1340 } 1341 1342 switch (mode) { 1343 case LIO_WAIT: 1344 state = NOCHECK; 1345 break; 1346 case LIO_NOWAIT: 1347 state = CHECK; 1348 break; 1349 default: 1350 errno = EINVAL; 1351 return (-1); 1352 } 1353 1354 for (i = 0; i < nent; i++) { 1355 if ((aiocbp = list[i]) == NULL) 1356 continue; 1357 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1358 errno = EBUSY; 1359 return (-1); 1360 } 1361 if (_aio_sigev_thread64(aiocbp) != 0) 1362 return (-1); 1363 if (aiocbp->aio_lio_opcode == LIO_NOP) 1364 aiocbp->aio_state = NOCHECK; 1365 else { 1366 aiocbp->aio_state = state; 1367 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 1368 do_kaio++; 1369 else 1370 aiocbp->aio_resultp.aio_errno = ENOTSUP; 1371 } 1372 } 1373 if (_aio_sigev_thread_init(sigevp) != 0) 1374 return (-1); 1375 1376 if (do_kaio) { 1377 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp); 1378 if (error == 0) 1379 return (0); 1380 oerrno = errno; 1381 } else { 1382 oerrno = errno = ENOTSUP; 1383 error = -1; 1384 } 1385 1386 if (error == -1 && errno == ENOTSUP) { 1387 error = errno = 0; 1388 /* 1389 * If LIO_WAIT, or notification required, allocate a list head. 1390 */ 1391 if (mode == LIO_WAIT || 1392 (sigevp != NULL && 1393 (sigevp->sigev_notify == SIGEV_SIGNAL || 1394 sigevp->sigev_notify == SIGEV_THREAD || 1395 sigevp->sigev_notify == SIGEV_PORT))) 1396 head = _aio_lio_alloc(); 1397 if (head) { 1398 sig_mutex_lock(&head->lio_mutex); 1399 head->lio_mode = mode; 1400 head->lio_largefile = 1; 1401 if (mode == LIO_NOWAIT && sigevp != NULL) { 1402 if (sigevp->sigev_notify == SIGEV_THREAD) { 1403 head->lio_port = sigevp->sigev_signo; 1404 head->lio_event = AIOLIO64; 1405 head->lio_sigevent = sigevp; 1406 head->lio_sigval.sival_ptr = 1407 sigevp->sigev_value.sival_ptr; 1408 } else if (sigevp->sigev_notify == SIGEV_PORT) { 1409 port_notify_t *pn = 1410 sigevp->sigev_value.sival_ptr; 1411 head->lio_port = pn->portnfy_port; 1412 head->lio_event = AIOLIO64; 1413 head->lio_sigevent = sigevp; 1414 head->lio_sigval.sival_ptr = 1415 pn->portnfy_user; 1416 } else { /* SIGEV_SIGNAL */ 1417 head->lio_signo = sigevp->sigev_signo; 1418 head->lio_sigval.sival_ptr = 1419 sigevp->sigev_value.sival_ptr; 1420 } 1421 } 1422 head->lio_nent = head->lio_refcnt = nent; 1423 sig_mutex_unlock(&head->lio_mutex); 1424 } 1425 /* 1426 * find UFS requests, errno == ENOTSUP/EBADFD, 1427 */ 1428 for (i = 0; i < nent; i++) { 1429 if ((aiocbp = list[i]) == NULL || 1430 aiocbp->aio_lio_opcode == LIO_NOP || 1431 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 1432 aiocbp->aio_resultp.aio_errno != EBADFD)) { 1433 if (head) 1434 _lio_list_decr(head); 1435 continue; 1436 } 1437 if (aiocbp->aio_resultp.aio_errno == EBADFD) 1438 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 1439 if (aiocbp->aio_reqprio != 0) { 1440 aiocbp->aio_resultp.aio_errno = EINVAL; 1441 aiocbp->aio_resultp.aio_return = -1; 1442 EIOflg = 1; 1443 if (head) 1444 _lio_list_decr(head); 1445 continue; 1446 } 1447 /* 1448 * submit an AIO request with flags AIO_NO_KAIO 1449 * to avoid the kaio() syscall in _aio_rw() 1450 */ 1451 switch (aiocbp->aio_lio_opcode) { 1452 case LIO_READ: 1453 rw = AIOAREAD64; 1454 break; 1455 case LIO_WRITE: 1456 rw = AIOAWRITE64; 1457 break; 1458 } 1459 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw, 1460 (AIO_NO_KAIO | AIO_NO_DUPS)); 1461 if (error == 0) 1462 aio_ufs++; 1463 else { 1464 if (head) 1465 _lio_list_decr(head); 1466 aiocbp->aio_resultp.aio_errno = error; 1467 EIOflg = 1; 1468 } 1469 } 1470 } 1471 if (EIOflg) { 1472 errno = EIO; 1473 return (-1); 1474 } 1475 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 1476 /* 1477 * call kaio(AIOLIOWAIT) to get all outstanding 1478 * kernel AIO requests 1479 */ 1480 if ((nent - aio_ufs) > 0) 1481 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 1482 if (head != NULL && head->lio_nent > 0) { 1483 sig_mutex_lock(&head->lio_mutex); 1484 while (head->lio_refcnt > 0) { 1485 int err; 1486 head->lio_waiting = 1; 1487 pthread_cleanup_push(_lio_listio_cleanup, head); 1488 err = sig_cond_wait(&head->lio_cond_cv, 1489 &head->lio_mutex); 1490 pthread_cleanup_pop(0); 1491 head->lio_waiting = 0; 1492 if (err && head->lio_nent > 0) { 1493 sig_mutex_unlock(&head->lio_mutex); 1494 errno = err; 1495 return (-1); 1496 } 1497 } 1498 sig_mutex_unlock(&head->lio_mutex); 1499 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 1500 _aio_lio_free(head); 1501 for (i = 0; i < nent; i++) { 1502 if ((aiocbp = list[i]) != NULL && 1503 aiocbp->aio_resultp.aio_errno) { 1504 errno = EIO; 1505 return (-1); 1506 } 1507 } 1508 } 1509 return (0); 1510 } 1511 return (error); 1512 } 1513 1514 int 1515 aio_suspend64(const aiocb64_t * const list[], int nent, 1516 const timespec_t *timeout) 1517 { 1518 return (__aio_suspend((void **)list, nent, timeout, 1)); 1519 } 1520 1521 int 1522 aio_error64(const aiocb64_t *aiocbp) 1523 { 1524 const aio_result_t *resultp = &aiocbp->aio_resultp; 1525 int error; 1526 1527 if ((error = resultp->aio_errno) == EINPROGRESS) { 1528 if (aiocbp->aio_state == CHECK) { 1529 /* 1530 * Always do the kaio() call without using the 1531 * KAIO_SUPPORTED() checks because it is not 1532 * mandatory to have a valid fd set in the 1533 * aiocb, only the resultp must be set. 1534 */ 1535 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) { 1536 errno = EINVAL; 1537 return (-1); 1538 } 1539 error = resultp->aio_errno; 1540 } else if (aiocbp->aio_state == CHECKED) { 1541 ((aiocb64_t *)aiocbp)->aio_state = CHECK; 1542 } 1543 } 1544 return (error); 1545 } 1546 1547 ssize_t 1548 aio_return64(aiocb64_t *aiocbp) 1549 { 1550 aio_result_t *resultp = &aiocbp->aio_resultp; 1551 aio_req_t *reqp; 1552 int error; 1553 ssize_t retval; 1554 1555 /* 1556 * The _aiodone() function stores resultp->aio_return before 1557 * storing resultp->aio_errno (with an membar_producer() in 1558 * between). We use membar_consumer() below to ensure proper 1559 * memory ordering between _aiodone() and ourself. 1560 */ 1561 error = resultp->aio_errno; 1562 membar_consumer(); 1563 retval = resultp->aio_return; 1564 1565 /* 1566 * we use this condition to indicate either that 1567 * aio_return() has been called before or should 1568 * not have been called yet. 1569 */ 1570 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 1571 errno = error; 1572 return (-1); 1573 } 1574 1575 /* 1576 * Before we return, mark the result as being returned so that later 1577 * calls to aio_return() will return the fact that the result has 1578 * already been returned. 1579 */ 1580 sig_mutex_lock(&__aio_mutex); 1581 /* retest, in case more than one thread actually got in here */ 1582 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 1583 sig_mutex_unlock(&__aio_mutex); 1584 errno = EINVAL; 1585 return (-1); 1586 } 1587 resultp->aio_return = -1; 1588 resultp->aio_errno = EINVAL; 1589 if ((reqp = _aio_hash_del(resultp)) == NULL) 1590 sig_mutex_unlock(&__aio_mutex); 1591 else { 1592 aiocbp->aio_state = NOCHECK; 1593 ASSERT(reqp->req_head == NULL); 1594 (void) _aio_req_remove(reqp); 1595 sig_mutex_unlock(&__aio_mutex); 1596 _aio_req_free(reqp); 1597 } 1598 1599 if (retval == -1) 1600 errno = error; 1601 return (retval); 1602 } 1603 1604 static int 1605 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 1606 int workerscnt) 1607 { 1608 int i; 1609 int error; 1610 aio_worker_t *next = aiowp; 1611 1612 for (i = 0; i < workerscnt; i++) { 1613 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 1614 if (error != 0) { 1615 sig_mutex_lock(&head->lio_mutex); 1616 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 1617 head->lio_nent -= workerscnt - i; 1618 head->lio_refcnt -= workerscnt - i; 1619 sig_mutex_unlock(&head->lio_mutex); 1620 errno = EAGAIN; 1621 return (i); 1622 } 1623 next = next->work_forw; 1624 } 1625 return (i); 1626 } 1627 1628 int 1629 aio_fsync64(int op, aiocb64_t *aiocbp) 1630 { 1631 aio_lio_t *head; 1632 struct stat64 statb; 1633 int fret; 1634 1635 if (aiocbp == NULL) 1636 return (0); 1637 if (op != O_DSYNC && op != O_SYNC) { 1638 errno = EINVAL; 1639 return (-1); 1640 } 1641 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1642 errno = EBUSY; 1643 return (-1); 1644 } 1645 if (fstat64(aiocbp->aio_fildes, &statb) < 0) 1646 return (-1); 1647 if (_aio_sigev_thread64(aiocbp) != 0) 1648 return (-1); 1649 1650 /* 1651 * Kernel aio_fsync() is not supported. 1652 * We force user-level aio_fsync() just 1653 * for the notification side-effect. 1654 */ 1655 if (!__uaio_ok && __uaio_init() == -1) 1656 return (-1); 1657 1658 /* 1659 * The first asynchronous I/O request in the current process will 1660 * create a bunch of workers (via __uaio_init()). If the number 1661 * of workers is zero then the number of pending asynchronous I/O 1662 * requests is zero. In such a case only execute the standard 1663 * fsync(3C) or fdatasync(3RT) as appropriate. 1664 */ 1665 if (__rw_workerscnt == 0) { 1666 if (op == O_DSYNC) 1667 return (__fdsync(aiocbp->aio_fildes, FDSYNC_DATA)); 1668 else 1669 return (__fdsync(aiocbp->aio_fildes, FDSYNC_FILE)); 1670 } 1671 1672 /* 1673 * re-use aio_offset as the op field. 1674 * O_DSYNC - fdatasync() 1675 * O_SYNC - fsync() 1676 */ 1677 aiocbp->aio_offset = op; 1678 aiocbp->aio_lio_opcode = AIOFSYNC; 1679 1680 /* 1681 * Create a list of fsync requests. The worker that 1682 * gets the last request will do the fsync request. 1683 */ 1684 head = _aio_lio_alloc(); 1685 if (head == NULL) { 1686 errno = EAGAIN; 1687 return (-1); 1688 } 1689 head->lio_mode = LIO_FSYNC; 1690 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 1691 head->lio_largefile = 1; 1692 1693 /* 1694 * Insert an fsync request on every worker's queue. 1695 */ 1696 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt); 1697 if (fret != __rw_workerscnt) { 1698 /* 1699 * Fewer fsync requests than workers means that it was 1700 * not possible to submit fsync requests to all workers. 1701 * Actions: 1702 * a) number of fsync requests submitted is 0: 1703 * => free allocated memory (aio_lio_t). 1704 * b) number of fsync requests submitted is > 0: 1705 * => the last worker executing the fsync request 1706 * will free the aio_lio_t struct. 1707 */ 1708 if (fret == 0) 1709 _aio_lio_free(head); 1710 return (-1); 1711 } 1712 return (0); 1713 } 1714 1715 int 1716 aio_cancel64(int fd, aiocb64_t *aiocbp) 1717 { 1718 aio_req_t *reqp; 1719 aio_worker_t *aiowp; 1720 int done = 0; 1721 int canceled = 0; 1722 struct stat64 buf; 1723 1724 if (fstat64(fd, &buf) < 0) 1725 return (-1); 1726 1727 if (aiocbp != NULL) { 1728 if (fd != aiocbp->aio_fildes) { 1729 errno = EINVAL; 1730 return (-1); 1731 } 1732 if (aiocbp->aio_state == USERAIO) { 1733 sig_mutex_lock(&__aio_mutex); 1734 reqp = _aio_hash_find(&aiocbp->aio_resultp); 1735 if (reqp == NULL) { 1736 sig_mutex_unlock(&__aio_mutex); 1737 return (AIO_ALLDONE); 1738 } 1739 aiowp = reqp->req_worker; 1740 sig_mutex_lock(&aiowp->work_qlock1); 1741 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 1742 sig_mutex_unlock(&aiowp->work_qlock1); 1743 sig_mutex_unlock(&__aio_mutex); 1744 if (done) 1745 return (AIO_ALLDONE); 1746 if (canceled) 1747 return (AIO_CANCELED); 1748 return (AIO_NOTCANCELED); 1749 } 1750 if (aiocbp->aio_state == USERAIO_DONE) 1751 return (AIO_ALLDONE); 1752 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 1753 } 1754 1755 return (aiocancel_all(fd)); 1756 } 1757 1758 int 1759 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait, 1760 const timespec_t *timeout) 1761 { 1762 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1763 } 1764 1765 #endif /* !defined(_LP64) */ 1766