1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * posix_aio.c implements the POSIX async. I/O functions. 29 * 30 * aio_read 31 * aio_write 32 * aio_error 33 * aio_return 34 * aio_suspend 35 * lio_listio 36 * aio_fsync 37 * aio_cancel 38 */ 39 40 #include "lint.h" 41 #include "thr_uberdata.h" 42 #include "asyncio.h" 43 #include <atomic.h> 44 #include <sys/file.h> 45 #include <sys/port.h> 46 47 extern int __fdsync(int, int); 48 49 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */ 50 51 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *); 52 53 /* defines for timedwait in __aio_waitn() and __aio_suspend() */ 54 #define AIO_TIMEOUT_INDEF -1 55 #define AIO_TIMEOUT_POLL 0 56 #define AIO_TIMEOUT_WAIT 1 57 #define AIO_TIMEOUT_UNDEF 2 58 59 /* 60 * List I/O stuff 61 */ 62 static void _lio_list_decr(aio_lio_t *); 63 static long aio_list_max = 0; 64 65 int 66 aio_read(aiocb_t *aiocbp) 67 { 68 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 69 errno = EINVAL; 70 return (-1); 71 } 72 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 73 errno = EBUSY; 74 return (-1); 75 } 76 if (_aio_sigev_thread(aiocbp) != 0) 77 return (-1); 78 aiocbp->aio_lio_opcode = LIO_READ; 79 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD, 80 (AIO_KAIO | AIO_NO_DUPS))); 81 } 82 83 int 84 aio_write(aiocb_t *aiocbp) 85 { 86 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 87 errno = EINVAL; 88 return (-1); 89 } 90 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 91 errno = EBUSY; 92 return (-1); 93 } 94 if (_aio_sigev_thread(aiocbp) != 0) 95 return (-1); 96 aiocbp->aio_lio_opcode = LIO_WRITE; 97 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE, 98 (AIO_KAIO | AIO_NO_DUPS))); 99 } 100 101 /* 102 * __lio_listio() cancellation handler. 103 */ 104 /* ARGSUSED */ 105 static void 106 _lio_listio_cleanup(aio_lio_t *head) 107 { 108 int freeit = 0; 109 110 ASSERT(MUTEX_HELD(&head->lio_mutex)); 111 if (head->lio_refcnt == 0) { 112 ASSERT(head->lio_nent == 0); 113 freeit = 1; 114 } 115 head->lio_waiting = 0; 116 sig_mutex_unlock(&head->lio_mutex); 117 if (freeit) 118 _aio_lio_free(head); 119 } 120 121 int 122 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 123 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 124 { 125 int aio_ufs = 0; 126 int oerrno = 0; 127 aio_lio_t *head = NULL; 128 aiocb_t *aiocbp; 129 int state = 0; 130 int EIOflg = 0; 131 int rw; 132 int do_kaio = 0; 133 int error; 134 int i; 135 136 if (!_kaio_ok) 137 _kaio_init(); 138 139 if (aio_list_max == 0) 140 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 141 142 if (nent <= 0 || nent > aio_list_max) { 143 errno = EINVAL; 144 return (-1); 145 } 146 147 switch (mode) { 148 case LIO_WAIT: 149 state = NOCHECK; 150 break; 151 case LIO_NOWAIT: 152 state = CHECK; 153 break; 154 default: 155 errno = EINVAL; 156 return (-1); 157 } 158 159 for (i = 0; i < nent; i++) { 160 if ((aiocbp = list[i]) == NULL) 161 continue; 162 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 163 errno = EBUSY; 164 return (-1); 165 } 166 if (_aio_sigev_thread(aiocbp) != 0) 167 return (-1); 168 if (aiocbp->aio_lio_opcode == LIO_NOP) 169 aiocbp->aio_state = NOCHECK; 170 else { 171 aiocbp->aio_state = state; 172 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 173 do_kaio++; 174 else 175 aiocbp->aio_resultp.aio_errno = ENOTSUP; 176 } 177 } 178 if (_aio_sigev_thread_init(sigevp) != 0) 179 return (-1); 180 181 if (do_kaio) { 182 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp); 183 if (error == 0) 184 return (0); 185 oerrno = errno; 186 } else { 187 oerrno = errno = ENOTSUP; 188 error = -1; 189 } 190 191 if (error == -1 && errno == ENOTSUP) { 192 error = errno = 0; 193 /* 194 * If LIO_WAIT, or notification required, allocate a list head. 195 */ 196 if (mode == LIO_WAIT || 197 (sigevp != NULL && 198 (sigevp->sigev_notify == SIGEV_SIGNAL || 199 sigevp->sigev_notify == SIGEV_THREAD || 200 sigevp->sigev_notify == SIGEV_PORT))) 201 head = _aio_lio_alloc(); 202 if (head) { 203 sig_mutex_lock(&head->lio_mutex); 204 head->lio_mode = mode; 205 head->lio_largefile = 0; 206 if (mode == LIO_NOWAIT && sigevp != NULL) { 207 if (sigevp->sigev_notify == SIGEV_THREAD) { 208 head->lio_port = sigevp->sigev_signo; 209 head->lio_event = AIOLIO; 210 head->lio_sigevent = sigevp; 211 head->lio_sigval.sival_ptr = 212 sigevp->sigev_value.sival_ptr; 213 } else if (sigevp->sigev_notify == SIGEV_PORT) { 214 port_notify_t *pn = 215 sigevp->sigev_value.sival_ptr; 216 head->lio_port = pn->portnfy_port; 217 head->lio_event = AIOLIO; 218 head->lio_sigevent = sigevp; 219 head->lio_sigval.sival_ptr = 220 pn->portnfy_user; 221 } else { /* SIGEV_SIGNAL */ 222 head->lio_signo = sigevp->sigev_signo; 223 head->lio_sigval.sival_ptr = 224 sigevp->sigev_value.sival_ptr; 225 } 226 } 227 head->lio_nent = head->lio_refcnt = nent; 228 sig_mutex_unlock(&head->lio_mutex); 229 } 230 /* 231 * find UFS requests, errno == ENOTSUP/EBADFD, 232 */ 233 for (i = 0; i < nent; i++) { 234 if ((aiocbp = list[i]) == NULL || 235 aiocbp->aio_lio_opcode == LIO_NOP || 236 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 237 aiocbp->aio_resultp.aio_errno != EBADFD)) { 238 if (head) 239 _lio_list_decr(head); 240 continue; 241 } 242 if (aiocbp->aio_resultp.aio_errno == EBADFD) 243 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 244 if (aiocbp->aio_reqprio != 0) { 245 aiocbp->aio_resultp.aio_errno = EINVAL; 246 aiocbp->aio_resultp.aio_return = -1; 247 EIOflg = 1; 248 if (head) 249 _lio_list_decr(head); 250 continue; 251 } 252 /* 253 * submit an AIO request with flags AIO_NO_KAIO 254 * to avoid the kaio() syscall in _aio_rw() 255 */ 256 switch (aiocbp->aio_lio_opcode) { 257 case LIO_READ: 258 rw = AIOAREAD; 259 break; 260 case LIO_WRITE: 261 rw = AIOAWRITE; 262 break; 263 } 264 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw, 265 (AIO_NO_KAIO | AIO_NO_DUPS)); 266 if (error == 0) 267 aio_ufs++; 268 else { 269 if (head) 270 _lio_list_decr(head); 271 aiocbp->aio_resultp.aio_errno = error; 272 EIOflg = 1; 273 } 274 } 275 } 276 if (EIOflg) { 277 errno = EIO; 278 return (-1); 279 } 280 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 281 /* 282 * call kaio(AIOLIOWAIT) to get all outstanding 283 * kernel AIO requests 284 */ 285 if ((nent - aio_ufs) > 0) 286 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 287 if (head != NULL && head->lio_nent > 0) { 288 sig_mutex_lock(&head->lio_mutex); 289 while (head->lio_refcnt > 0) { 290 int err; 291 head->lio_waiting = 1; 292 pthread_cleanup_push(_lio_listio_cleanup, head); 293 err = sig_cond_wait(&head->lio_cond_cv, 294 &head->lio_mutex); 295 pthread_cleanup_pop(0); 296 head->lio_waiting = 0; 297 if (err && head->lio_nent > 0) { 298 sig_mutex_unlock(&head->lio_mutex); 299 errno = err; 300 return (-1); 301 } 302 } 303 sig_mutex_unlock(&head->lio_mutex); 304 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 305 _aio_lio_free(head); 306 for (i = 0; i < nent; i++) { 307 if ((aiocbp = list[i]) != NULL && 308 aiocbp->aio_resultp.aio_errno) { 309 errno = EIO; 310 return (-1); 311 } 312 } 313 } 314 return (0); 315 } 316 return (error); 317 } 318 319 static void 320 _lio_list_decr(aio_lio_t *head) 321 { 322 sig_mutex_lock(&head->lio_mutex); 323 head->lio_nent--; 324 head->lio_refcnt--; 325 sig_mutex_unlock(&head->lio_mutex); 326 } 327 328 /* 329 * __aio_suspend() cancellation handler. 330 */ 331 /* ARGSUSED */ 332 static void 333 _aio_suspend_cleanup(int *counter) 334 { 335 ASSERT(MUTEX_HELD(&__aio_mutex)); 336 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */ 337 sig_mutex_unlock(&__aio_mutex); 338 } 339 340 static int 341 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile) 342 { 343 int cv_err; /* error code from cond_xxx() */ 344 int kerr; /* error code from _kaio(AIOSUSPEND) */ 345 int i; 346 timespec_t twait; /* copy of timo for internal calculations */ 347 timespec_t *wait = NULL; 348 int timedwait; 349 int req_outstanding; 350 aiocb_t **listp; 351 aiocb_t *aiocbp; 352 #if !defined(_LP64) 353 aiocb64_t **listp64; 354 aiocb64_t *aiocbp64; 355 #endif 356 hrtime_t hrtstart; 357 hrtime_t hrtend; 358 hrtime_t hrtres; 359 360 #if defined(_LP64) 361 if (largefile) 362 aio_panic("__aio_suspend: largefile set when _LP64 defined"); 363 #endif 364 365 if (nent <= 0) { 366 errno = EINVAL; 367 return (-1); 368 } 369 370 if (timo) { 371 if (timo->tv_sec < 0 || timo->tv_nsec < 0 || 372 timo->tv_nsec >= NANOSEC) { 373 errno = EINVAL; 374 return (-1); 375 } 376 /* Initialize start time if time monitoring desired */ 377 if (timo->tv_sec > 0 || timo->tv_nsec > 0) { 378 timedwait = AIO_TIMEOUT_WAIT; 379 hrtstart = gethrtime(); 380 } else { 381 /* content of timeout = 0 : polling */ 382 timedwait = AIO_TIMEOUT_POLL; 383 } 384 } else { 385 /* timeout pointer = NULL : wait indefinitely */ 386 timedwait = AIO_TIMEOUT_INDEF; 387 } 388 389 #if !defined(_LP64) 390 if (largefile) { 391 listp64 = (aiocb64_t **)list; 392 for (i = 0; i < nent; i++) { 393 if ((aiocbp64 = listp64[i]) != NULL && 394 aiocbp64->aio_state == CHECK) 395 aiocbp64->aio_state = CHECKED; 396 } 397 } else 398 #endif /* !_LP64 */ 399 { 400 listp = (aiocb_t **)list; 401 for (i = 0; i < nent; i++) { 402 if ((aiocbp = listp[i]) != NULL && 403 aiocbp->aio_state == CHECK) 404 aiocbp->aio_state = CHECKED; 405 } 406 } 407 408 sig_mutex_lock(&__aio_mutex); 409 410 /* 411 * The next "if -case" is required to accelerate the 412 * access to completed RAW-IO requests. 413 */ 414 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 415 /* Only kernel requests pending */ 416 417 /* 418 * _aio_kernel_suspend is used to detect completed non RAW-IO 419 * requests. 420 * As long as this thread resides in the kernel (_kaio) further 421 * asynchronous non RAW-IO requests could be submitted. 422 */ 423 _aio_kernel_suspend++; 424 425 /* 426 * Always do the kaio() call without using the KAIO_SUPPORTED() 427 * checks because it is not mandatory to have a valid fd 428 * set in the list entries, only the resultp must be set. 429 * 430 * _kaio(AIOSUSPEND ...) return values : 431 * 0: everythink ok, completed request found 432 * -1: error 433 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,) 434 * system call using _kaio(AIONOTIFY). It means, that some 435 * non RAW-IOs completed inbetween. 436 */ 437 438 pthread_cleanup_push(_aio_suspend_cleanup, 439 &_aio_kernel_suspend); 440 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 441 sig_mutex_unlock(&__aio_mutex); 442 _cancel_prologue(); 443 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 444 list, nent, timo, -1); 445 _cancel_epilogue(); 446 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */ 447 pthread_cleanup_pop(0); 448 449 _aio_kernel_suspend--; 450 451 if (!kerr) { 452 sig_mutex_unlock(&__aio_mutex); 453 return (0); 454 } 455 } else { 456 kerr = 1; /* simulation: _kaio detected AIONOTIFY */ 457 } 458 459 /* 460 * Return kernel error code if no other IOs are outstanding. 461 */ 462 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt; 463 464 sig_mutex_unlock(&__aio_mutex); 465 466 if (req_outstanding == 0) { 467 /* no IOs outstanding in the thread pool */ 468 if (kerr == 1) 469 /* return "no IOs completed" */ 470 errno = EAGAIN; 471 return (-1); 472 } 473 474 /* 475 * IOs using the thread pool are outstanding. 476 */ 477 if (timedwait == AIO_TIMEOUT_WAIT) { 478 /* time monitoring */ 479 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC + 480 (hrtime_t)timo->tv_nsec; 481 hrtres = hrtend - gethrtime(); 482 if (hrtres <= 0) 483 hrtres = 1; 484 twait.tv_sec = hrtres / (hrtime_t)NANOSEC; 485 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC; 486 wait = &twait; 487 } else if (timedwait == AIO_TIMEOUT_POLL) { 488 twait = *timo; /* content of timo = 0 : polling */ 489 wait = &twait; 490 } 491 492 for (;;) { 493 int error; 494 int inprogress; 495 496 /* first scan file system requests */ 497 inprogress = 0; 498 for (i = 0; i < nent; i++) { 499 #if !defined(_LP64) 500 if (largefile) { 501 if ((aiocbp64 = listp64[i]) == NULL) 502 continue; 503 error = aiocbp64->aio_resultp.aio_errno; 504 } else 505 #endif 506 { 507 if ((aiocbp = listp[i]) == NULL) 508 continue; 509 error = aiocbp->aio_resultp.aio_errno; 510 } 511 if (error == EINPROGRESS) 512 inprogress = 1; 513 else if (error != ECANCELED) { 514 errno = 0; 515 return (0); 516 } 517 } 518 519 sig_mutex_lock(&__aio_mutex); 520 521 /* 522 * If there aren't outstanding I/Os in the thread pool then 523 * we have to return here, provided that all kernel RAW-IOs 524 * also completed. 525 * If the kernel was notified to return, then we have to check 526 * possible pending RAW-IOs. 527 */ 528 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) { 529 sig_mutex_unlock(&__aio_mutex); 530 errno = EAGAIN; 531 break; 532 } 533 534 /* 535 * There are outstanding IOs in the thread pool or the kernel 536 * was notified to return. 537 * Check pending RAW-IOs first. 538 */ 539 if (kerr == 1) { 540 /* 541 * _aiodone just notified the kernel about 542 * completed non RAW-IOs (AIONOTIFY was detected). 543 */ 544 if (timedwait == AIO_TIMEOUT_WAIT) { 545 /* Update remaining timeout for the kernel */ 546 hrtres = hrtend - gethrtime(); 547 if (hrtres <= 0) { 548 /* timer expired */ 549 sig_mutex_unlock(&__aio_mutex); 550 errno = EAGAIN; 551 break; 552 } 553 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 554 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 555 } 556 _aio_kernel_suspend++; 557 558 pthread_cleanup_push(_aio_suspend_cleanup, 559 &_aio_kernel_suspend); 560 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 561 sig_mutex_unlock(&__aio_mutex); 562 _cancel_prologue(); 563 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 564 list, nent, wait, -1); 565 _cancel_epilogue(); 566 pthread_cleanup_pop(1); 567 pthread_cleanup_pop(0); 568 569 _aio_kernel_suspend--; 570 571 if (!kerr) { 572 sig_mutex_unlock(&__aio_mutex); 573 return (0); 574 } 575 } 576 577 if (timedwait == AIO_TIMEOUT_POLL) { 578 sig_mutex_unlock(&__aio_mutex); 579 errno = EAGAIN; 580 break; 581 } 582 583 if (timedwait == AIO_TIMEOUT_WAIT) { 584 /* Update remaining timeout */ 585 hrtres = hrtend - gethrtime(); 586 if (hrtres <= 0) { 587 /* timer expired */ 588 sig_mutex_unlock(&__aio_mutex); 589 errno = EAGAIN; 590 break; 591 } 592 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 593 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 594 } 595 596 if (_aio_outstand_cnt == 0) { 597 sig_mutex_unlock(&__aio_mutex); 598 continue; 599 } 600 601 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */ 602 603 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt); 604 if (timedwait == AIO_TIMEOUT_WAIT) { 605 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv, 606 &__aio_mutex, wait); 607 if (cv_err == ETIME) 608 cv_err = EAGAIN; 609 } else { 610 /* wait indefinitely */ 611 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex); 612 } 613 /* this decrements _aio_suscv_cnt and drops __aio_mutex */ 614 pthread_cleanup_pop(1); 615 616 if (cv_err) { 617 errno = cv_err; 618 break; 619 } 620 } 621 return (-1); 622 } 623 624 int 625 aio_suspend(const aiocb_t * const list[], int nent, 626 const timespec_t *timeout) 627 { 628 return (__aio_suspend((void **)list, nent, timeout, 0)); 629 } 630 631 int 632 aio_error(const aiocb_t *aiocbp) 633 { 634 const aio_result_t *resultp = &aiocbp->aio_resultp; 635 int error; 636 637 if ((error = resultp->aio_errno) == EINPROGRESS) { 638 if (aiocbp->aio_state == CHECK) { 639 /* 640 * Always do the kaio() call without using the 641 * KAIO_SUPPORTED() checks because it is not 642 * mandatory to have a valid fd set in the 643 * aiocb, only the resultp must be set. 644 */ 645 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) { 646 errno = EINVAL; 647 return (-1); 648 } 649 error = resultp->aio_errno; 650 } else if (aiocbp->aio_state == CHECKED) { 651 ((aiocb_t *)aiocbp)->aio_state = CHECK; 652 } 653 } 654 return (error); 655 } 656 657 ssize_t 658 aio_return(aiocb_t *aiocbp) 659 { 660 aio_result_t *resultp = &aiocbp->aio_resultp; 661 aio_req_t *reqp; 662 int error; 663 ssize_t retval; 664 665 /* 666 * The _aiodone() function stores resultp->aio_return before 667 * storing resultp->aio_errno (with an membar_producer() in 668 * between). We use membar_consumer() below to ensure proper 669 * memory ordering between _aiodone() and ourself. 670 */ 671 error = resultp->aio_errno; 672 membar_consumer(); 673 retval = resultp->aio_return; 674 675 /* 676 * we use this condition to indicate either that 677 * aio_return() has been called before or should 678 * not have been called yet. 679 */ 680 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 681 errno = error; 682 return (-1); 683 } 684 685 /* 686 * Before we return, mark the result as being returned so that later 687 * calls to aio_return() will return the fact that the result has 688 * already been returned. 689 */ 690 sig_mutex_lock(&__aio_mutex); 691 /* retest, in case more than one thread actually got in here */ 692 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 693 sig_mutex_unlock(&__aio_mutex); 694 errno = EINVAL; 695 return (-1); 696 } 697 resultp->aio_return = -1; 698 resultp->aio_errno = EINVAL; 699 if ((reqp = _aio_hash_del(resultp)) == NULL) 700 sig_mutex_unlock(&__aio_mutex); 701 else { 702 aiocbp->aio_state = NOCHECK; 703 ASSERT(reqp->req_head == NULL); 704 (void) _aio_req_remove(reqp); 705 sig_mutex_unlock(&__aio_mutex); 706 _aio_req_free(reqp); 707 } 708 709 if (retval == -1) 710 errno = error; 711 return (retval); 712 } 713 714 void 715 _lio_remove(aio_req_t *reqp) 716 { 717 aio_lio_t *head; 718 int refcnt; 719 720 if ((head = reqp->req_head) != NULL) { 721 sig_mutex_lock(&head->lio_mutex); 722 ASSERT(head->lio_refcnt == head->lio_nent); 723 refcnt = --head->lio_nent; 724 head->lio_refcnt--; 725 sig_mutex_unlock(&head->lio_mutex); 726 if (refcnt == 0) 727 _aio_lio_free(head); 728 reqp->req_head = NULL; 729 } 730 } 731 732 /* 733 * This function returns the number of asynchronous I/O requests submitted. 734 */ 735 static int 736 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 737 int workerscnt) 738 { 739 int i; 740 int error; 741 aio_worker_t *next = aiowp; 742 743 for (i = 0; i < workerscnt; i++) { 744 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 745 if (error != 0) { 746 sig_mutex_lock(&head->lio_mutex); 747 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 748 head->lio_nent -= workerscnt - i; 749 head->lio_refcnt -= workerscnt - i; 750 sig_mutex_unlock(&head->lio_mutex); 751 errno = EAGAIN; 752 return (i); 753 } 754 next = next->work_forw; 755 } 756 return (i); 757 } 758 759 int 760 aio_fsync(int op, aiocb_t *aiocbp) 761 { 762 aio_lio_t *head; 763 struct stat statb; 764 int fret; 765 766 if (aiocbp == NULL) 767 return (0); 768 if (op != O_DSYNC && op != O_SYNC) { 769 errno = EINVAL; 770 return (-1); 771 } 772 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 773 errno = EBUSY; 774 return (-1); 775 } 776 if (fstat(aiocbp->aio_fildes, &statb) < 0) 777 return (-1); 778 if (_aio_sigev_thread(aiocbp) != 0) 779 return (-1); 780 781 /* 782 * Kernel aio_fsync() is not supported. 783 * We force user-level aio_fsync() just 784 * for the notification side-effect. 785 */ 786 if (!__uaio_ok && __uaio_init() == -1) 787 return (-1); 788 789 /* 790 * The first asynchronous I/O request in the current process will 791 * create a bunch of workers (via __uaio_init()). If the number 792 * of workers is zero then the number of pending asynchronous I/O 793 * requests is zero. In such a case only execute the standard 794 * fsync(3C) or fdatasync(3RT) as appropriate. 795 */ 796 if (__rw_workerscnt == 0) { 797 if (op == O_DSYNC) 798 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 799 else 800 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 801 } 802 803 /* 804 * re-use aio_offset as the op field. 805 * O_DSYNC - fdatasync() 806 * O_SYNC - fsync() 807 */ 808 aiocbp->aio_offset = op; 809 aiocbp->aio_lio_opcode = AIOFSYNC; 810 811 /* 812 * Create a list of fsync requests. The worker that 813 * gets the last request will do the fsync request. 814 */ 815 head = _aio_lio_alloc(); 816 if (head == NULL) { 817 errno = EAGAIN; 818 return (-1); 819 } 820 head->lio_mode = LIO_FSYNC; 821 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 822 head->lio_largefile = 0; 823 824 /* 825 * Insert an fsync request on every worker's queue. 826 */ 827 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt); 828 if (fret != __rw_workerscnt) { 829 /* 830 * Fewer fsync requests than workers means that it was 831 * not possible to submit fsync requests to all workers. 832 * Actions: 833 * a) number of fsync requests submitted is 0: 834 * => free allocated memory (aio_lio_t). 835 * b) number of fsync requests submitted is > 0: 836 * => the last worker executing the fsync request 837 * will free the aio_lio_t struct. 838 */ 839 if (fret == 0) 840 _aio_lio_free(head); 841 return (-1); 842 } 843 return (0); 844 } 845 846 int 847 aio_cancel(int fd, aiocb_t *aiocbp) 848 { 849 aio_req_t *reqp; 850 aio_worker_t *aiowp; 851 int done = 0; 852 int canceled = 0; 853 struct stat buf; 854 855 if (fstat(fd, &buf) < 0) 856 return (-1); 857 858 if (aiocbp != NULL) { 859 if (fd != aiocbp->aio_fildes) { 860 errno = EINVAL; 861 return (-1); 862 } 863 if (aiocbp->aio_state == USERAIO) { 864 sig_mutex_lock(&__aio_mutex); 865 reqp = _aio_hash_find(&aiocbp->aio_resultp); 866 if (reqp == NULL) { 867 sig_mutex_unlock(&__aio_mutex); 868 return (AIO_ALLDONE); 869 } 870 aiowp = reqp->req_worker; 871 sig_mutex_lock(&aiowp->work_qlock1); 872 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 873 sig_mutex_unlock(&aiowp->work_qlock1); 874 sig_mutex_unlock(&__aio_mutex); 875 if (done) 876 return (AIO_ALLDONE); 877 if (canceled) 878 return (AIO_CANCELED); 879 return (AIO_NOTCANCELED); 880 } 881 if (aiocbp->aio_state == USERAIO_DONE) 882 return (AIO_ALLDONE); 883 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 884 } 885 886 return (aiocancel_all(fd)); 887 } 888 889 /* 890 * __aio_waitn() cancellation handler. 891 */ 892 /* ARGSUSED */ 893 static void 894 _aio_waitn_cleanup(void *arg) 895 { 896 ASSERT(MUTEX_HELD(&__aio_mutex)); 897 898 /* check for pending aio_waitn() calls */ 899 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING); 900 if (_aio_flags & AIO_LIB_WAITN_PENDING) { 901 _aio_flags &= ~AIO_LIB_WAITN_PENDING; 902 (void) cond_signal(&_aio_waitn_cv); 903 } 904 905 sig_mutex_unlock(&__aio_mutex); 906 } 907 908 /* 909 * aio_waitn can be used to reap the results of several I/O operations that 910 * were submitted asynchronously. The submission of I/Os can be done using 911 * existing POSIX interfaces: lio_listio, aio_write or aio_read. 912 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have 913 * completed and it returns the descriptors for these I/Os in "list". The 914 * maximum size of this list is given by "nent" and the actual number of I/Os 915 * completed is returned in "nwait". Otherwise aio_waitn might also 916 * return if the timeout expires. Additionally, aio_waitn returns 0 if 917 * successful or -1 if an error occurred. 918 */ 919 static int 920 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo) 921 { 922 int error = 0; 923 uint_t dnwait = 0; /* amount of requests in the waitn-done list */ 924 uint_t kwaitcnt; /* expected "done" requests from kernel */ 925 uint_t knentcnt; /* max. expected "done" requests from kernel */ 926 int uerrno = 0; 927 int kerrno = 0; /* save errno from _kaio() call */ 928 int timedwait = AIO_TIMEOUT_UNDEF; 929 aio_req_t *reqp; 930 timespec_t end; 931 timespec_t twait; /* copy of utimo for internal calculations */ 932 timespec_t *wait = NULL; 933 934 if (nent == 0 || *nwait == 0 || *nwait > nent) { 935 errno = EINVAL; 936 return (-1); 937 } 938 939 /* 940 * Only one running aio_waitn call per process allowed. 941 * Further calls will be blocked here until the running 942 * call finishes. 943 */ 944 945 sig_mutex_lock(&__aio_mutex); 946 947 while (_aio_flags & AIO_LIB_WAITN) { 948 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) { 949 sig_mutex_unlock(&__aio_mutex); 950 *nwait = 0; 951 return (0); 952 } 953 _aio_flags |= AIO_LIB_WAITN_PENDING; 954 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex); 955 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex); 956 pthread_cleanup_pop(0); 957 if (error != 0) { 958 sig_mutex_unlock(&__aio_mutex); 959 *nwait = 0; 960 errno = error; 961 return (-1); 962 } 963 } 964 965 pthread_cleanup_push(_aio_waitn_cleanup, NULL); 966 967 _aio_flags |= AIO_LIB_WAITN; 968 969 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 970 error = -1; 971 dnwait = 0; 972 goto out; 973 } 974 if (timedwait != AIO_TIMEOUT_INDEF) { 975 twait = *utimo; 976 wait = &twait; 977 } 978 979 /* 980 * If both counters are still set to zero, then only 981 * kernel requests are currently outstanding (raw-I/Os). 982 */ 983 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 984 for (;;) { 985 kwaitcnt = *nwait - dnwait; 986 knentcnt = nent - dnwait; 987 if (knentcnt > AIO_WAITN_MAXIOCBS) 988 knentcnt = AIO_WAITN_MAXIOCBS; 989 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 990 991 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 992 sig_mutex_unlock(&__aio_mutex); 993 _cancel_prologue(); 994 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 995 &kwaitcnt, wait); 996 _cancel_epilogue(); 997 pthread_cleanup_pop(1); 998 999 if (error == 0) { 1000 dnwait += kwaitcnt; 1001 if (dnwait >= *nwait || 1002 *nwait < AIO_WAITN_MAXIOCBS) 1003 break; 1004 if (timedwait == AIO_TIMEOUT_WAIT) { 1005 error = _aio_get_timedelta(&end, wait); 1006 if (error == -1) { 1007 /* timer expired */ 1008 errno = ETIME; 1009 break; 1010 } 1011 } 1012 continue; 1013 } 1014 if (errno == EAGAIN) { 1015 if (dnwait > 0) 1016 error = 0; 1017 break; 1018 } 1019 if (errno == ETIME || errno == EINTR) { 1020 dnwait += kwaitcnt; 1021 break; 1022 } 1023 /* fatal error */ 1024 break; 1025 } 1026 1027 goto out; 1028 } 1029 1030 /* File system I/Os outstanding ... */ 1031 1032 if (timedwait == AIO_TIMEOUT_UNDEF) { 1033 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 1034 error = -1; 1035 dnwait = 0; 1036 goto out; 1037 } 1038 if (timedwait != AIO_TIMEOUT_INDEF) { 1039 twait = *utimo; 1040 wait = &twait; 1041 } 1042 } 1043 1044 for (;;) { 1045 uint_t sum_reqs; 1046 1047 /* 1048 * Calculate sum of active non RAW-IO requests (sum_reqs). 1049 * If the expected amount of completed requests (*nwait) is 1050 * greater than the calculated sum (sum_reqs) then 1051 * use _kaio to check pending RAW-IO requests. 1052 */ 1053 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt; 1054 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0; 1055 1056 if (kwaitcnt != 0) { 1057 /* possibly some kernel I/Os outstanding */ 1058 knentcnt = nent - dnwait; 1059 if (knentcnt > AIO_WAITN_MAXIOCBS) 1060 knentcnt = AIO_WAITN_MAXIOCBS; 1061 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1062 1063 _aio_flags |= AIO_WAIT_INPROGRESS; 1064 1065 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1066 sig_mutex_unlock(&__aio_mutex); 1067 _cancel_prologue(); 1068 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1069 &kwaitcnt, wait); 1070 _cancel_epilogue(); 1071 pthread_cleanup_pop(1); 1072 1073 _aio_flags &= ~AIO_WAIT_INPROGRESS; 1074 1075 if (error == 0) { 1076 dnwait += kwaitcnt; 1077 } else { 1078 switch (errno) { 1079 case EINVAL: 1080 case EAGAIN: 1081 /* don't wait for kernel I/Os */ 1082 kerrno = 0; /* ignore _kaio() errno */ 1083 *nwait = _aio_doneq_cnt + 1084 _aio_outstand_cnt + dnwait; 1085 error = 0; 1086 break; 1087 case EINTR: 1088 case ETIME: 1089 /* just scan for completed LIB I/Os */ 1090 dnwait += kwaitcnt; 1091 timedwait = AIO_TIMEOUT_POLL; 1092 kerrno = errno; /* save _kaio() errno */ 1093 error = 0; 1094 break; 1095 default: 1096 kerrno = errno; /* save _kaio() errno */ 1097 break; 1098 } 1099 } 1100 if (error) 1101 break; /* fatal kernel error */ 1102 } 1103 1104 /* check completed FS requests in the "done" queue */ 1105 1106 while (_aio_doneq_cnt && dnwait < nent) { 1107 /* get done requests */ 1108 if ((reqp = _aio_req_remove(NULL)) != NULL) { 1109 (void) _aio_hash_del(reqp->req_resultp); 1110 list[dnwait++] = reqp->req_aiocbp; 1111 _aio_req_mark_done(reqp); 1112 _lio_remove(reqp); 1113 _aio_req_free(reqp); 1114 } 1115 } 1116 1117 if (dnwait >= *nwait) { 1118 /* min. requested amount of completed I/Os satisfied */ 1119 break; 1120 } 1121 if (timedwait == AIO_TIMEOUT_WAIT && 1122 (error = _aio_get_timedelta(&end, wait)) == -1) { 1123 /* timer expired */ 1124 uerrno = ETIME; 1125 break; 1126 } 1127 1128 /* 1129 * If some I/Os are outstanding and we have to wait for them, 1130 * then sleep here. _aiodone() will call _aio_waitn_wakeup() 1131 * to wakeup this thread as soon as the required amount of 1132 * completed I/Os is done. 1133 */ 1134 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) { 1135 /* 1136 * _aio_waitn_wakeup() will wake up this thread when: 1137 * - _aio_waitncnt requests are completed or 1138 * - _aio_outstand_cnt becomes zero. 1139 * sig_cond_reltimedwait() could also return with 1140 * a timeout error (ETIME). 1141 */ 1142 if (*nwait < _aio_outstand_cnt) 1143 _aio_waitncnt = *nwait; 1144 else 1145 _aio_waitncnt = _aio_outstand_cnt; 1146 1147 _aio_flags |= AIO_IO_WAITING; 1148 1149 if (wait) 1150 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv, 1151 &__aio_mutex, wait); 1152 else 1153 uerrno = sig_cond_wait(&_aio_iowait_cv, 1154 &__aio_mutex); 1155 1156 _aio_flags &= ~AIO_IO_WAITING; 1157 1158 if (uerrno == ETIME) { 1159 timedwait = AIO_TIMEOUT_POLL; 1160 continue; 1161 } 1162 if (uerrno != 0) 1163 timedwait = AIO_TIMEOUT_POLL; 1164 } 1165 1166 if (timedwait == AIO_TIMEOUT_POLL) { 1167 /* polling or timer expired */ 1168 break; 1169 } 1170 } 1171 1172 errno = uerrno == 0 ? kerrno : uerrno; 1173 if (errno) 1174 error = -1; 1175 else 1176 error = 0; 1177 1178 out: 1179 *nwait = dnwait; 1180 1181 pthread_cleanup_pop(1); /* drops __aio_mutex */ 1182 1183 return (error); 1184 } 1185 1186 int 1187 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait, 1188 const timespec_t *timeout) 1189 { 1190 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1191 } 1192 1193 void 1194 _aio_waitn_wakeup(void) 1195 { 1196 /* 1197 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that 1198 * it is waiting for completed I/Os. The number of required 1199 * completed I/Os is stored into "_aio_waitncnt". 1200 * aio_waitn() is woken up when 1201 * - there are no further outstanding I/Os 1202 * (_aio_outstand_cnt == 0) or 1203 * - the expected number of I/Os has completed. 1204 * Only one __aio_waitn() function waits for completed I/Os at 1205 * a time. 1206 * 1207 * __aio_suspend() increments "_aio_suscv_cnt" to notify 1208 * _aiodone() that at least one __aio_suspend() call is 1209 * waiting for completed I/Os. 1210 * There could be more than one __aio_suspend() function 1211 * waiting for completed I/Os. Because every function should 1212 * be waiting for different I/Os, _aiodone() has to wake up all 1213 * __aio_suspend() functions each time. 1214 * Every __aio_suspend() function will compare the recently 1215 * completed I/O with its own list. 1216 */ 1217 ASSERT(MUTEX_HELD(&__aio_mutex)); 1218 if (_aio_flags & AIO_IO_WAITING) { 1219 if (_aio_waitncnt > 0) 1220 _aio_waitncnt--; 1221 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 || 1222 _aio_suscv_cnt > 0) 1223 (void) cond_broadcast(&_aio_iowait_cv); 1224 } else { 1225 /* Wake up waiting aio_suspend calls */ 1226 if (_aio_suscv_cnt > 0) 1227 (void) cond_broadcast(&_aio_iowait_cv); 1228 } 1229 } 1230 1231 /* 1232 * timedwait values : 1233 * AIO_TIMEOUT_POLL : polling 1234 * AIO_TIMEOUT_WAIT : timeout 1235 * AIO_TIMEOUT_INDEF : wait indefinitely 1236 */ 1237 static int 1238 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait) 1239 { 1240 struct timeval curtime; 1241 1242 if (utimo) { 1243 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 || 1244 utimo->tv_nsec >= NANOSEC) { 1245 errno = EINVAL; 1246 return (-1); 1247 } 1248 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) { 1249 (void) gettimeofday(&curtime, NULL); 1250 end->tv_sec = utimo->tv_sec + curtime.tv_sec; 1251 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec; 1252 if (end->tv_nsec >= NANOSEC) { 1253 end->tv_nsec -= NANOSEC; 1254 end->tv_sec += 1; 1255 } 1256 *timedwait = AIO_TIMEOUT_WAIT; 1257 } else { 1258 /* polling */ 1259 *timedwait = AIO_TIMEOUT_POLL; 1260 } 1261 } else { 1262 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */ 1263 } 1264 return (0); 1265 } 1266 1267 #if !defined(_LP64) 1268 1269 int 1270 aio_read64(aiocb64_t *aiocbp) 1271 { 1272 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1273 errno = EINVAL; 1274 return (-1); 1275 } 1276 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1277 errno = EBUSY; 1278 return (-1); 1279 } 1280 if (_aio_sigev_thread64(aiocbp) != 0) 1281 return (-1); 1282 aiocbp->aio_lio_opcode = LIO_READ; 1283 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64, 1284 (AIO_KAIO | AIO_NO_DUPS))); 1285 } 1286 1287 int 1288 aio_write64(aiocb64_t *aiocbp) 1289 { 1290 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1291 errno = EINVAL; 1292 return (-1); 1293 } 1294 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1295 errno = EBUSY; 1296 return (-1); 1297 } 1298 if (_aio_sigev_thread64(aiocbp) != 0) 1299 return (-1); 1300 aiocbp->aio_lio_opcode = LIO_WRITE; 1301 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64, 1302 (AIO_KAIO | AIO_NO_DUPS))); 1303 } 1304 1305 int 1306 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 1307 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 1308 { 1309 int aio_ufs = 0; 1310 int oerrno = 0; 1311 aio_lio_t *head = NULL; 1312 aiocb64_t *aiocbp; 1313 int state = 0; 1314 int EIOflg = 0; 1315 int rw; 1316 int do_kaio = 0; 1317 int error; 1318 int i; 1319 1320 if (!_kaio_ok) 1321 _kaio_init(); 1322 1323 if (aio_list_max == 0) 1324 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 1325 1326 if (nent <= 0 || nent > aio_list_max) { 1327 errno = EINVAL; 1328 return (-1); 1329 } 1330 1331 switch (mode) { 1332 case LIO_WAIT: 1333 state = NOCHECK; 1334 break; 1335 case LIO_NOWAIT: 1336 state = CHECK; 1337 break; 1338 default: 1339 errno = EINVAL; 1340 return (-1); 1341 } 1342 1343 for (i = 0; i < nent; i++) { 1344 if ((aiocbp = list[i]) == NULL) 1345 continue; 1346 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1347 errno = EBUSY; 1348 return (-1); 1349 } 1350 if (_aio_sigev_thread64(aiocbp) != 0) 1351 return (-1); 1352 if (aiocbp->aio_lio_opcode == LIO_NOP) 1353 aiocbp->aio_state = NOCHECK; 1354 else { 1355 aiocbp->aio_state = state; 1356 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 1357 do_kaio++; 1358 else 1359 aiocbp->aio_resultp.aio_errno = ENOTSUP; 1360 } 1361 } 1362 if (_aio_sigev_thread_init(sigevp) != 0) 1363 return (-1); 1364 1365 if (do_kaio) { 1366 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp); 1367 if (error == 0) 1368 return (0); 1369 oerrno = errno; 1370 } else { 1371 oerrno = errno = ENOTSUP; 1372 error = -1; 1373 } 1374 1375 if (error == -1 && errno == ENOTSUP) { 1376 error = errno = 0; 1377 /* 1378 * If LIO_WAIT, or notification required, allocate a list head. 1379 */ 1380 if (mode == LIO_WAIT || 1381 (sigevp != NULL && 1382 (sigevp->sigev_notify == SIGEV_SIGNAL || 1383 sigevp->sigev_notify == SIGEV_THREAD || 1384 sigevp->sigev_notify == SIGEV_PORT))) 1385 head = _aio_lio_alloc(); 1386 if (head) { 1387 sig_mutex_lock(&head->lio_mutex); 1388 head->lio_mode = mode; 1389 head->lio_largefile = 1; 1390 if (mode == LIO_NOWAIT && sigevp != NULL) { 1391 if (sigevp->sigev_notify == SIGEV_THREAD) { 1392 head->lio_port = sigevp->sigev_signo; 1393 head->lio_event = AIOLIO64; 1394 head->lio_sigevent = sigevp; 1395 head->lio_sigval.sival_ptr = 1396 sigevp->sigev_value.sival_ptr; 1397 } else if (sigevp->sigev_notify == SIGEV_PORT) { 1398 port_notify_t *pn = 1399 sigevp->sigev_value.sival_ptr; 1400 head->lio_port = pn->portnfy_port; 1401 head->lio_event = AIOLIO64; 1402 head->lio_sigevent = sigevp; 1403 head->lio_sigval.sival_ptr = 1404 pn->portnfy_user; 1405 } else { /* SIGEV_SIGNAL */ 1406 head->lio_signo = sigevp->sigev_signo; 1407 head->lio_sigval.sival_ptr = 1408 sigevp->sigev_value.sival_ptr; 1409 } 1410 } 1411 head->lio_nent = head->lio_refcnt = nent; 1412 sig_mutex_unlock(&head->lio_mutex); 1413 } 1414 /* 1415 * find UFS requests, errno == ENOTSUP/EBADFD, 1416 */ 1417 for (i = 0; i < nent; i++) { 1418 if ((aiocbp = list[i]) == NULL || 1419 aiocbp->aio_lio_opcode == LIO_NOP || 1420 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 1421 aiocbp->aio_resultp.aio_errno != EBADFD)) { 1422 if (head) 1423 _lio_list_decr(head); 1424 continue; 1425 } 1426 if (aiocbp->aio_resultp.aio_errno == EBADFD) 1427 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 1428 if (aiocbp->aio_reqprio != 0) { 1429 aiocbp->aio_resultp.aio_errno = EINVAL; 1430 aiocbp->aio_resultp.aio_return = -1; 1431 EIOflg = 1; 1432 if (head) 1433 _lio_list_decr(head); 1434 continue; 1435 } 1436 /* 1437 * submit an AIO request with flags AIO_NO_KAIO 1438 * to avoid the kaio() syscall in _aio_rw() 1439 */ 1440 switch (aiocbp->aio_lio_opcode) { 1441 case LIO_READ: 1442 rw = AIOAREAD64; 1443 break; 1444 case LIO_WRITE: 1445 rw = AIOAWRITE64; 1446 break; 1447 } 1448 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw, 1449 (AIO_NO_KAIO | AIO_NO_DUPS)); 1450 if (error == 0) 1451 aio_ufs++; 1452 else { 1453 if (head) 1454 _lio_list_decr(head); 1455 aiocbp->aio_resultp.aio_errno = error; 1456 EIOflg = 1; 1457 } 1458 } 1459 } 1460 if (EIOflg) { 1461 errno = EIO; 1462 return (-1); 1463 } 1464 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 1465 /* 1466 * call kaio(AIOLIOWAIT) to get all outstanding 1467 * kernel AIO requests 1468 */ 1469 if ((nent - aio_ufs) > 0) 1470 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 1471 if (head != NULL && head->lio_nent > 0) { 1472 sig_mutex_lock(&head->lio_mutex); 1473 while (head->lio_refcnt > 0) { 1474 int err; 1475 head->lio_waiting = 1; 1476 pthread_cleanup_push(_lio_listio_cleanup, head); 1477 err = sig_cond_wait(&head->lio_cond_cv, 1478 &head->lio_mutex); 1479 pthread_cleanup_pop(0); 1480 head->lio_waiting = 0; 1481 if (err && head->lio_nent > 0) { 1482 sig_mutex_unlock(&head->lio_mutex); 1483 errno = err; 1484 return (-1); 1485 } 1486 } 1487 sig_mutex_unlock(&head->lio_mutex); 1488 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 1489 _aio_lio_free(head); 1490 for (i = 0; i < nent; i++) { 1491 if ((aiocbp = list[i]) != NULL && 1492 aiocbp->aio_resultp.aio_errno) { 1493 errno = EIO; 1494 return (-1); 1495 } 1496 } 1497 } 1498 return (0); 1499 } 1500 return (error); 1501 } 1502 1503 int 1504 aio_suspend64(const aiocb64_t * const list[], int nent, 1505 const timespec_t *timeout) 1506 { 1507 return (__aio_suspend((void **)list, nent, timeout, 1)); 1508 } 1509 1510 int 1511 aio_error64(const aiocb64_t *aiocbp) 1512 { 1513 const aio_result_t *resultp = &aiocbp->aio_resultp; 1514 int error; 1515 1516 if ((error = resultp->aio_errno) == EINPROGRESS) { 1517 if (aiocbp->aio_state == CHECK) { 1518 /* 1519 * Always do the kaio() call without using the 1520 * KAIO_SUPPORTED() checks because it is not 1521 * mandatory to have a valid fd set in the 1522 * aiocb, only the resultp must be set. 1523 */ 1524 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) { 1525 errno = EINVAL; 1526 return (-1); 1527 } 1528 error = resultp->aio_errno; 1529 } else if (aiocbp->aio_state == CHECKED) { 1530 ((aiocb64_t *)aiocbp)->aio_state = CHECK; 1531 } 1532 } 1533 return (error); 1534 } 1535 1536 ssize_t 1537 aio_return64(aiocb64_t *aiocbp) 1538 { 1539 aio_result_t *resultp = &aiocbp->aio_resultp; 1540 aio_req_t *reqp; 1541 int error; 1542 ssize_t retval; 1543 1544 /* 1545 * The _aiodone() function stores resultp->aio_return before 1546 * storing resultp->aio_errno (with an membar_producer() in 1547 * between). We use membar_consumer() below to ensure proper 1548 * memory ordering between _aiodone() and ourself. 1549 */ 1550 error = resultp->aio_errno; 1551 membar_consumer(); 1552 retval = resultp->aio_return; 1553 1554 /* 1555 * we use this condition to indicate either that 1556 * aio_return() has been called before or should 1557 * not have been called yet. 1558 */ 1559 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 1560 errno = error; 1561 return (-1); 1562 } 1563 1564 /* 1565 * Before we return, mark the result as being returned so that later 1566 * calls to aio_return() will return the fact that the result has 1567 * already been returned. 1568 */ 1569 sig_mutex_lock(&__aio_mutex); 1570 /* retest, in case more than one thread actually got in here */ 1571 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 1572 sig_mutex_unlock(&__aio_mutex); 1573 errno = EINVAL; 1574 return (-1); 1575 } 1576 resultp->aio_return = -1; 1577 resultp->aio_errno = EINVAL; 1578 if ((reqp = _aio_hash_del(resultp)) == NULL) 1579 sig_mutex_unlock(&__aio_mutex); 1580 else { 1581 aiocbp->aio_state = NOCHECK; 1582 ASSERT(reqp->req_head == NULL); 1583 (void) _aio_req_remove(reqp); 1584 sig_mutex_unlock(&__aio_mutex); 1585 _aio_req_free(reqp); 1586 } 1587 1588 if (retval == -1) 1589 errno = error; 1590 return (retval); 1591 } 1592 1593 static int 1594 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 1595 int workerscnt) 1596 { 1597 int i; 1598 int error; 1599 aio_worker_t *next = aiowp; 1600 1601 for (i = 0; i < workerscnt; i++) { 1602 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 1603 if (error != 0) { 1604 sig_mutex_lock(&head->lio_mutex); 1605 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 1606 head->lio_nent -= workerscnt - i; 1607 head->lio_refcnt -= workerscnt - i; 1608 sig_mutex_unlock(&head->lio_mutex); 1609 errno = EAGAIN; 1610 return (i); 1611 } 1612 next = next->work_forw; 1613 } 1614 return (i); 1615 } 1616 1617 int 1618 aio_fsync64(int op, aiocb64_t *aiocbp) 1619 { 1620 aio_lio_t *head; 1621 struct stat64 statb; 1622 int fret; 1623 1624 if (aiocbp == NULL) 1625 return (0); 1626 if (op != O_DSYNC && op != O_SYNC) { 1627 errno = EINVAL; 1628 return (-1); 1629 } 1630 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1631 errno = EBUSY; 1632 return (-1); 1633 } 1634 if (fstat64(aiocbp->aio_fildes, &statb) < 0) 1635 return (-1); 1636 if (_aio_sigev_thread64(aiocbp) != 0) 1637 return (-1); 1638 1639 /* 1640 * Kernel aio_fsync() is not supported. 1641 * We force user-level aio_fsync() just 1642 * for the notification side-effect. 1643 */ 1644 if (!__uaio_ok && __uaio_init() == -1) 1645 return (-1); 1646 1647 /* 1648 * The first asynchronous I/O request in the current process will 1649 * create a bunch of workers (via __uaio_init()). If the number 1650 * of workers is zero then the number of pending asynchronous I/O 1651 * requests is zero. In such a case only execute the standard 1652 * fsync(3C) or fdatasync(3RT) as appropriate. 1653 */ 1654 if (__rw_workerscnt == 0) { 1655 if (op == O_DSYNC) 1656 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 1657 else 1658 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 1659 } 1660 1661 /* 1662 * re-use aio_offset as the op field. 1663 * O_DSYNC - fdatasync() 1664 * O_SYNC - fsync() 1665 */ 1666 aiocbp->aio_offset = op; 1667 aiocbp->aio_lio_opcode = AIOFSYNC; 1668 1669 /* 1670 * Create a list of fsync requests. The worker that 1671 * gets the last request will do the fsync request. 1672 */ 1673 head = _aio_lio_alloc(); 1674 if (head == NULL) { 1675 errno = EAGAIN; 1676 return (-1); 1677 } 1678 head->lio_mode = LIO_FSYNC; 1679 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 1680 head->lio_largefile = 1; 1681 1682 /* 1683 * Insert an fsync request on every worker's queue. 1684 */ 1685 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt); 1686 if (fret != __rw_workerscnt) { 1687 /* 1688 * Fewer fsync requests than workers means that it was 1689 * not possible to submit fsync requests to all workers. 1690 * Actions: 1691 * a) number of fsync requests submitted is 0: 1692 * => free allocated memory (aio_lio_t). 1693 * b) number of fsync requests submitted is > 0: 1694 * => the last worker executing the fsync request 1695 * will free the aio_lio_t struct. 1696 */ 1697 if (fret == 0) 1698 _aio_lio_free(head); 1699 return (-1); 1700 } 1701 return (0); 1702 } 1703 1704 int 1705 aio_cancel64(int fd, aiocb64_t *aiocbp) 1706 { 1707 aio_req_t *reqp; 1708 aio_worker_t *aiowp; 1709 int done = 0; 1710 int canceled = 0; 1711 struct stat64 buf; 1712 1713 if (fstat64(fd, &buf) < 0) 1714 return (-1); 1715 1716 if (aiocbp != NULL) { 1717 if (fd != aiocbp->aio_fildes) { 1718 errno = EINVAL; 1719 return (-1); 1720 } 1721 if (aiocbp->aio_state == USERAIO) { 1722 sig_mutex_lock(&__aio_mutex); 1723 reqp = _aio_hash_find(&aiocbp->aio_resultp); 1724 if (reqp == NULL) { 1725 sig_mutex_unlock(&__aio_mutex); 1726 return (AIO_ALLDONE); 1727 } 1728 aiowp = reqp->req_worker; 1729 sig_mutex_lock(&aiowp->work_qlock1); 1730 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 1731 sig_mutex_unlock(&aiowp->work_qlock1); 1732 sig_mutex_unlock(&__aio_mutex); 1733 if (done) 1734 return (AIO_ALLDONE); 1735 if (canceled) 1736 return (AIO_CANCELED); 1737 return (AIO_NOTCANCELED); 1738 } 1739 if (aiocbp->aio_state == USERAIO_DONE) 1740 return (AIO_ALLDONE); 1741 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 1742 } 1743 1744 return (aiocancel_all(fd)); 1745 } 1746 1747 int 1748 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait, 1749 const timespec_t *timeout) 1750 { 1751 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1752 } 1753 1754 #endif /* !defined(_LP64) */ 1755