1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * posix_aio.c implements the POSIX async. I/O functions. 31 * 32 * aio_read 33 * aio_write 34 * aio_error 35 * aio_return 36 * aio_suspend 37 * lio_listio 38 * aio_fsync 39 * aio_cancel 40 */ 41 42 #include "lint.h" 43 #include "thr_uberdata.h" 44 #include "asyncio.h" 45 #include <atomic.h> 46 #include <sys/file.h> 47 #include <sys/port.h> 48 49 extern int __fdsync(int, int); 50 51 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */ 52 53 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *); 54 55 /* defines for timedwait in __aio_waitn() and __aio_suspend() */ 56 #define AIO_TIMEOUT_INDEF -1 57 #define AIO_TIMEOUT_POLL 0 58 #define AIO_TIMEOUT_WAIT 1 59 #define AIO_TIMEOUT_UNDEF 2 60 61 /* 62 * List I/O stuff 63 */ 64 static void _lio_list_decr(aio_lio_t *); 65 static long aio_list_max = 0; 66 67 int 68 aio_read(aiocb_t *aiocbp) 69 { 70 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 71 errno = EINVAL; 72 return (-1); 73 } 74 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 75 errno = EBUSY; 76 return (-1); 77 } 78 if (_aio_sigev_thread(aiocbp) != 0) 79 return (-1); 80 aiocbp->aio_lio_opcode = LIO_READ; 81 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD, 82 (AIO_KAIO | AIO_NO_DUPS))); 83 } 84 85 int 86 aio_write(aiocb_t *aiocbp) 87 { 88 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 89 errno = EINVAL; 90 return (-1); 91 } 92 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 93 errno = EBUSY; 94 return (-1); 95 } 96 if (_aio_sigev_thread(aiocbp) != 0) 97 return (-1); 98 aiocbp->aio_lio_opcode = LIO_WRITE; 99 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE, 100 (AIO_KAIO | AIO_NO_DUPS))); 101 } 102 103 /* 104 * __lio_listio() cancellation handler. 105 */ 106 /* ARGSUSED */ 107 static void 108 _lio_listio_cleanup(aio_lio_t *head) 109 { 110 int freeit = 0; 111 112 ASSERT(MUTEX_HELD(&head->lio_mutex)); 113 if (head->lio_refcnt == 0) { 114 ASSERT(head->lio_nent == 0); 115 freeit = 1; 116 } 117 head->lio_waiting = 0; 118 sig_mutex_unlock(&head->lio_mutex); 119 if (freeit) 120 _aio_lio_free(head); 121 } 122 123 int 124 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 125 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 126 { 127 int aio_ufs = 0; 128 int oerrno = 0; 129 aio_lio_t *head = NULL; 130 aiocb_t *aiocbp; 131 int state = 0; 132 int EIOflg = 0; 133 int rw; 134 int do_kaio = 0; 135 int error; 136 int i; 137 138 if (!_kaio_ok) 139 _kaio_init(); 140 141 if (aio_list_max == 0) 142 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 143 144 if (nent <= 0 || nent > aio_list_max) { 145 errno = EINVAL; 146 return (-1); 147 } 148 149 switch (mode) { 150 case LIO_WAIT: 151 state = NOCHECK; 152 break; 153 case LIO_NOWAIT: 154 state = CHECK; 155 break; 156 default: 157 errno = EINVAL; 158 return (-1); 159 } 160 161 for (i = 0; i < nent; i++) { 162 if ((aiocbp = list[i]) == NULL) 163 continue; 164 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 165 errno = EBUSY; 166 return (-1); 167 } 168 if (_aio_sigev_thread(aiocbp) != 0) 169 return (-1); 170 if (aiocbp->aio_lio_opcode == LIO_NOP) 171 aiocbp->aio_state = NOCHECK; 172 else { 173 aiocbp->aio_state = state; 174 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 175 do_kaio++; 176 else 177 aiocbp->aio_resultp.aio_errno = ENOTSUP; 178 } 179 } 180 if (_aio_sigev_thread_init(sigevp) != 0) 181 return (-1); 182 183 if (do_kaio) { 184 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp); 185 if (error == 0) 186 return (0); 187 oerrno = errno; 188 } else { 189 oerrno = errno = ENOTSUP; 190 error = -1; 191 } 192 193 if (error == -1 && errno == ENOTSUP) { 194 error = errno = 0; 195 /* 196 * If LIO_WAIT, or notification required, allocate a list head. 197 */ 198 if (mode == LIO_WAIT || 199 (sigevp != NULL && 200 (sigevp->sigev_notify == SIGEV_SIGNAL || 201 sigevp->sigev_notify == SIGEV_THREAD || 202 sigevp->sigev_notify == SIGEV_PORT))) 203 head = _aio_lio_alloc(); 204 if (head) { 205 sig_mutex_lock(&head->lio_mutex); 206 head->lio_mode = mode; 207 head->lio_largefile = 0; 208 if (mode == LIO_NOWAIT && sigevp != NULL) { 209 if (sigevp->sigev_notify == SIGEV_THREAD) { 210 head->lio_port = sigevp->sigev_signo; 211 head->lio_event = AIOLIO; 212 head->lio_sigevent = sigevp; 213 head->lio_sigval.sival_ptr = 214 sigevp->sigev_value.sival_ptr; 215 } else if (sigevp->sigev_notify == SIGEV_PORT) { 216 port_notify_t *pn = 217 sigevp->sigev_value.sival_ptr; 218 head->lio_port = pn->portnfy_port; 219 head->lio_event = AIOLIO; 220 head->lio_sigevent = sigevp; 221 head->lio_sigval.sival_ptr = 222 pn->portnfy_user; 223 } else { /* SIGEV_SIGNAL */ 224 head->lio_signo = sigevp->sigev_signo; 225 head->lio_sigval.sival_ptr = 226 sigevp->sigev_value.sival_ptr; 227 } 228 } 229 head->lio_nent = head->lio_refcnt = nent; 230 sig_mutex_unlock(&head->lio_mutex); 231 } 232 /* 233 * find UFS requests, errno == ENOTSUP/EBADFD, 234 */ 235 for (i = 0; i < nent; i++) { 236 if ((aiocbp = list[i]) == NULL || 237 aiocbp->aio_lio_opcode == LIO_NOP || 238 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 239 aiocbp->aio_resultp.aio_errno != EBADFD)) { 240 if (head) 241 _lio_list_decr(head); 242 continue; 243 } 244 if (aiocbp->aio_resultp.aio_errno == EBADFD) 245 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 246 if (aiocbp->aio_reqprio != 0) { 247 aiocbp->aio_resultp.aio_errno = EINVAL; 248 aiocbp->aio_resultp.aio_return = -1; 249 EIOflg = 1; 250 if (head) 251 _lio_list_decr(head); 252 continue; 253 } 254 /* 255 * submit an AIO request with flags AIO_NO_KAIO 256 * to avoid the kaio() syscall in _aio_rw() 257 */ 258 switch (aiocbp->aio_lio_opcode) { 259 case LIO_READ: 260 rw = AIOAREAD; 261 break; 262 case LIO_WRITE: 263 rw = AIOAWRITE; 264 break; 265 } 266 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw, 267 (AIO_NO_KAIO | AIO_NO_DUPS)); 268 if (error == 0) 269 aio_ufs++; 270 else { 271 if (head) 272 _lio_list_decr(head); 273 aiocbp->aio_resultp.aio_errno = error; 274 EIOflg = 1; 275 } 276 } 277 } 278 if (EIOflg) { 279 errno = EIO; 280 return (-1); 281 } 282 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 283 /* 284 * call kaio(AIOLIOWAIT) to get all outstanding 285 * kernel AIO requests 286 */ 287 if ((nent - aio_ufs) > 0) 288 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 289 if (head != NULL && head->lio_nent > 0) { 290 sig_mutex_lock(&head->lio_mutex); 291 while (head->lio_refcnt > 0) { 292 int err; 293 head->lio_waiting = 1; 294 pthread_cleanup_push(_lio_listio_cleanup, head); 295 err = sig_cond_wait(&head->lio_cond_cv, 296 &head->lio_mutex); 297 pthread_cleanup_pop(0); 298 head->lio_waiting = 0; 299 if (err && head->lio_nent > 0) { 300 sig_mutex_unlock(&head->lio_mutex); 301 errno = err; 302 return (-1); 303 } 304 } 305 sig_mutex_unlock(&head->lio_mutex); 306 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 307 _aio_lio_free(head); 308 for (i = 0; i < nent; i++) { 309 if ((aiocbp = list[i]) != NULL && 310 aiocbp->aio_resultp.aio_errno) { 311 errno = EIO; 312 return (-1); 313 } 314 } 315 } 316 return (0); 317 } 318 return (error); 319 } 320 321 static void 322 _lio_list_decr(aio_lio_t *head) 323 { 324 sig_mutex_lock(&head->lio_mutex); 325 head->lio_nent--; 326 head->lio_refcnt--; 327 sig_mutex_unlock(&head->lio_mutex); 328 } 329 330 /* 331 * __aio_suspend() cancellation handler. 332 */ 333 /* ARGSUSED */ 334 static void 335 _aio_suspend_cleanup(int *counter) 336 { 337 ASSERT(MUTEX_HELD(&__aio_mutex)); 338 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */ 339 sig_mutex_unlock(&__aio_mutex); 340 } 341 342 static int 343 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile) 344 { 345 int cv_err; /* error code from cond_xxx() */ 346 int kerr; /* error code from _kaio(AIOSUSPEND) */ 347 int i; 348 timespec_t twait; /* copy of timo for internal calculations */ 349 timespec_t *wait = NULL; 350 int timedwait; 351 int req_outstanding; 352 aiocb_t **listp; 353 aiocb_t *aiocbp; 354 #if !defined(_LP64) 355 aiocb64_t **listp64; 356 aiocb64_t *aiocbp64; 357 #endif 358 hrtime_t hrtstart; 359 hrtime_t hrtend; 360 hrtime_t hrtres; 361 362 #if defined(_LP64) 363 if (largefile) 364 aio_panic("__aio_suspend: largefile set when _LP64 defined"); 365 #endif 366 367 if (nent <= 0) { 368 errno = EINVAL; 369 return (-1); 370 } 371 372 if (timo) { 373 if (timo->tv_sec < 0 || timo->tv_nsec < 0 || 374 timo->tv_nsec >= NANOSEC) { 375 errno = EINVAL; 376 return (-1); 377 } 378 /* Initialize start time if time monitoring desired */ 379 if (timo->tv_sec > 0 || timo->tv_nsec > 0) { 380 timedwait = AIO_TIMEOUT_WAIT; 381 hrtstart = gethrtime(); 382 } else { 383 /* content of timeout = 0 : polling */ 384 timedwait = AIO_TIMEOUT_POLL; 385 } 386 } else { 387 /* timeout pointer = NULL : wait indefinitely */ 388 timedwait = AIO_TIMEOUT_INDEF; 389 } 390 391 #if !defined(_LP64) 392 if (largefile) { 393 listp64 = (aiocb64_t **)list; 394 for (i = 0; i < nent; i++) { 395 if ((aiocbp64 = listp64[i]) != NULL && 396 aiocbp64->aio_state == CHECK) 397 aiocbp64->aio_state = CHECKED; 398 } 399 } else 400 #endif /* !_LP64 */ 401 { 402 listp = (aiocb_t **)list; 403 for (i = 0; i < nent; i++) { 404 if ((aiocbp = listp[i]) != NULL && 405 aiocbp->aio_state == CHECK) 406 aiocbp->aio_state = CHECKED; 407 } 408 } 409 410 sig_mutex_lock(&__aio_mutex); 411 412 /* 413 * The next "if -case" is required to accelerate the 414 * access to completed RAW-IO requests. 415 */ 416 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 417 /* Only kernel requests pending */ 418 419 /* 420 * _aio_kernel_suspend is used to detect completed non RAW-IO 421 * requests. 422 * As long as this thread resides in the kernel (_kaio) further 423 * asynchronous non RAW-IO requests could be submitted. 424 */ 425 _aio_kernel_suspend++; 426 427 /* 428 * Always do the kaio() call without using the KAIO_SUPPORTED() 429 * checks because it is not mandatory to have a valid fd 430 * set in the list entries, only the resultp must be set. 431 * 432 * _kaio(AIOSUSPEND ...) return values : 433 * 0: everythink ok, completed request found 434 * -1: error 435 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,) 436 * system call using _kaio(AIONOTIFY). It means, that some 437 * non RAW-IOs completed inbetween. 438 */ 439 440 pthread_cleanup_push(_aio_suspend_cleanup, 441 &_aio_kernel_suspend); 442 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 443 sig_mutex_unlock(&__aio_mutex); 444 _cancel_prologue(); 445 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 446 list, nent, timo, -1); 447 _cancel_epilogue(); 448 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */ 449 pthread_cleanup_pop(0); 450 451 _aio_kernel_suspend--; 452 453 if (!kerr) { 454 sig_mutex_unlock(&__aio_mutex); 455 return (0); 456 } 457 } else { 458 kerr = 1; /* simulation: _kaio detected AIONOTIFY */ 459 } 460 461 /* 462 * Return kernel error code if no other IOs are outstanding. 463 */ 464 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt; 465 466 sig_mutex_unlock(&__aio_mutex); 467 468 if (req_outstanding == 0) { 469 /* no IOs outstanding in the thread pool */ 470 if (kerr == 1) 471 /* return "no IOs completed" */ 472 errno = EAGAIN; 473 return (-1); 474 } 475 476 /* 477 * IOs using the thread pool are outstanding. 478 */ 479 if (timedwait == AIO_TIMEOUT_WAIT) { 480 /* time monitoring */ 481 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC + 482 (hrtime_t)timo->tv_nsec; 483 hrtres = hrtend - gethrtime(); 484 if (hrtres <= 0) 485 hrtres = 1; 486 twait.tv_sec = hrtres / (hrtime_t)NANOSEC; 487 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC; 488 wait = &twait; 489 } else if (timedwait == AIO_TIMEOUT_POLL) { 490 twait = *timo; /* content of timo = 0 : polling */ 491 wait = &twait; 492 } 493 494 for (;;) { 495 int error; 496 int inprogress; 497 498 /* first scan file system requests */ 499 inprogress = 0; 500 for (i = 0; i < nent; i++) { 501 #if !defined(_LP64) 502 if (largefile) { 503 if ((aiocbp64 = listp64[i]) == NULL) 504 continue; 505 error = aiocbp64->aio_resultp.aio_errno; 506 } else 507 #endif 508 { 509 if ((aiocbp = listp[i]) == NULL) 510 continue; 511 error = aiocbp->aio_resultp.aio_errno; 512 } 513 if (error == EINPROGRESS) 514 inprogress = 1; 515 else if (error != ECANCELED) { 516 errno = 0; 517 return (0); 518 } 519 } 520 521 sig_mutex_lock(&__aio_mutex); 522 523 /* 524 * If there aren't outstanding I/Os in the thread pool then 525 * we have to return here, provided that all kernel RAW-IOs 526 * also completed. 527 * If the kernel was notified to return, then we have to check 528 * possible pending RAW-IOs. 529 */ 530 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) { 531 sig_mutex_unlock(&__aio_mutex); 532 errno = EAGAIN; 533 break; 534 } 535 536 /* 537 * There are outstanding IOs in the thread pool or the kernel 538 * was notified to return. 539 * Check pending RAW-IOs first. 540 */ 541 if (kerr == 1) { 542 /* 543 * _aiodone just notified the kernel about 544 * completed non RAW-IOs (AIONOTIFY was detected). 545 */ 546 if (timedwait == AIO_TIMEOUT_WAIT) { 547 /* Update remaining timeout for the kernel */ 548 hrtres = hrtend - gethrtime(); 549 if (hrtres <= 0) { 550 /* timer expired */ 551 sig_mutex_unlock(&__aio_mutex); 552 errno = EAGAIN; 553 break; 554 } 555 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 556 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 557 } 558 _aio_kernel_suspend++; 559 560 pthread_cleanup_push(_aio_suspend_cleanup, 561 &_aio_kernel_suspend); 562 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 563 sig_mutex_unlock(&__aio_mutex); 564 _cancel_prologue(); 565 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 566 list, nent, wait, -1); 567 _cancel_epilogue(); 568 pthread_cleanup_pop(1); 569 pthread_cleanup_pop(0); 570 571 _aio_kernel_suspend--; 572 573 if (!kerr) { 574 sig_mutex_unlock(&__aio_mutex); 575 return (0); 576 } 577 } 578 579 if (timedwait == AIO_TIMEOUT_POLL) { 580 sig_mutex_unlock(&__aio_mutex); 581 errno = EAGAIN; 582 break; 583 } 584 585 if (timedwait == AIO_TIMEOUT_WAIT) { 586 /* Update remaining timeout */ 587 hrtres = hrtend - gethrtime(); 588 if (hrtres <= 0) { 589 /* timer expired */ 590 sig_mutex_unlock(&__aio_mutex); 591 errno = EAGAIN; 592 break; 593 } 594 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 595 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 596 } 597 598 if (_aio_outstand_cnt == 0) { 599 sig_mutex_unlock(&__aio_mutex); 600 continue; 601 } 602 603 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */ 604 605 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt); 606 if (timedwait == AIO_TIMEOUT_WAIT) { 607 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv, 608 &__aio_mutex, wait); 609 if (cv_err == ETIME) 610 cv_err = EAGAIN; 611 } else { 612 /* wait indefinitely */ 613 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex); 614 } 615 /* this decrements _aio_suscv_cnt and drops __aio_mutex */ 616 pthread_cleanup_pop(1); 617 618 if (cv_err) { 619 errno = cv_err; 620 break; 621 } 622 } 623 return (-1); 624 } 625 626 int 627 aio_suspend(const aiocb_t * const list[], int nent, 628 const timespec_t *timeout) 629 { 630 return (__aio_suspend((void **)list, nent, timeout, 0)); 631 } 632 633 int 634 aio_error(const aiocb_t *aiocbp) 635 { 636 const aio_result_t *resultp = &aiocbp->aio_resultp; 637 int error; 638 639 if ((error = resultp->aio_errno) == EINPROGRESS) { 640 if (aiocbp->aio_state == CHECK) { 641 /* 642 * Always do the kaio() call without using the 643 * KAIO_SUPPORTED() checks because it is not 644 * mandatory to have a valid fd set in the 645 * aiocb, only the resultp must be set. 646 */ 647 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) { 648 errno = EINVAL; 649 return (-1); 650 } 651 error = resultp->aio_errno; 652 } else if (aiocbp->aio_state == CHECKED) { 653 ((aiocb_t *)aiocbp)->aio_state = CHECK; 654 } 655 } 656 return (error); 657 } 658 659 ssize_t 660 aio_return(aiocb_t *aiocbp) 661 { 662 aio_result_t *resultp = &aiocbp->aio_resultp; 663 aio_req_t *reqp; 664 int error; 665 ssize_t retval; 666 667 /* 668 * The _aiodone() function stores resultp->aio_return before 669 * storing resultp->aio_errno (with an membar_producer() in 670 * between). We use membar_consumer() below to ensure proper 671 * memory ordering between _aiodone() and ourself. 672 */ 673 error = resultp->aio_errno; 674 membar_consumer(); 675 retval = resultp->aio_return; 676 677 /* 678 * we use this condition to indicate either that 679 * aio_return() has been called before or should 680 * not have been called yet. 681 */ 682 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 683 errno = error; 684 return (-1); 685 } 686 687 /* 688 * Before we return, mark the result as being returned so that later 689 * calls to aio_return() will return the fact that the result has 690 * already been returned. 691 */ 692 sig_mutex_lock(&__aio_mutex); 693 /* retest, in case more than one thread actually got in here */ 694 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 695 sig_mutex_unlock(&__aio_mutex); 696 errno = EINVAL; 697 return (-1); 698 } 699 resultp->aio_return = -1; 700 resultp->aio_errno = EINVAL; 701 if ((reqp = _aio_hash_del(resultp)) == NULL) 702 sig_mutex_unlock(&__aio_mutex); 703 else { 704 aiocbp->aio_state = NOCHECK; 705 ASSERT(reqp->req_head == NULL); 706 (void) _aio_req_remove(reqp); 707 sig_mutex_unlock(&__aio_mutex); 708 _aio_req_free(reqp); 709 } 710 711 if (retval == -1) 712 errno = error; 713 return (retval); 714 } 715 716 void 717 _lio_remove(aio_req_t *reqp) 718 { 719 aio_lio_t *head; 720 int refcnt; 721 722 if ((head = reqp->req_head) != NULL) { 723 sig_mutex_lock(&head->lio_mutex); 724 ASSERT(head->lio_refcnt == head->lio_nent); 725 refcnt = --head->lio_nent; 726 head->lio_refcnt--; 727 sig_mutex_unlock(&head->lio_mutex); 728 if (refcnt == 0) 729 _aio_lio_free(head); 730 reqp->req_head = NULL; 731 } 732 } 733 734 /* 735 * This function returns the number of asynchronous I/O requests submitted. 736 */ 737 static int 738 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 739 int workerscnt) 740 { 741 int i; 742 int error; 743 aio_worker_t *next = aiowp; 744 745 for (i = 0; i < workerscnt; i++) { 746 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 747 if (error != 0) { 748 sig_mutex_lock(&head->lio_mutex); 749 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 750 head->lio_nent -= workerscnt - i; 751 head->lio_refcnt -= workerscnt - i; 752 sig_mutex_unlock(&head->lio_mutex); 753 errno = EAGAIN; 754 return (i); 755 } 756 next = next->work_forw; 757 } 758 return (i); 759 } 760 761 int 762 aio_fsync(int op, aiocb_t *aiocbp) 763 { 764 aio_lio_t *head; 765 struct stat statb; 766 int fret; 767 768 if (aiocbp == NULL) 769 return (0); 770 if (op != O_DSYNC && op != O_SYNC) { 771 errno = EINVAL; 772 return (-1); 773 } 774 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 775 errno = EBUSY; 776 return (-1); 777 } 778 if (fstat(aiocbp->aio_fildes, &statb) < 0) 779 return (-1); 780 if (_aio_sigev_thread(aiocbp) != 0) 781 return (-1); 782 783 /* 784 * Kernel aio_fsync() is not supported. 785 * We force user-level aio_fsync() just 786 * for the notification side-effect. 787 */ 788 if (!__uaio_ok && __uaio_init() == -1) 789 return (-1); 790 791 /* 792 * The first asynchronous I/O request in the current process will 793 * create a bunch of workers (via __uaio_init()). If the number 794 * of workers is zero then the number of pending asynchronous I/O 795 * requests is zero. In such a case only execute the standard 796 * fsync(3C) or fdatasync(3RT) as appropriate. 797 */ 798 if (__rw_workerscnt == 0) { 799 if (op == O_DSYNC) 800 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 801 else 802 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 803 } 804 805 /* 806 * re-use aio_offset as the op field. 807 * O_DSYNC - fdatasync() 808 * O_SYNC - fsync() 809 */ 810 aiocbp->aio_offset = op; 811 aiocbp->aio_lio_opcode = AIOFSYNC; 812 813 /* 814 * Create a list of fsync requests. The worker that 815 * gets the last request will do the fsync request. 816 */ 817 head = _aio_lio_alloc(); 818 if (head == NULL) { 819 errno = EAGAIN; 820 return (-1); 821 } 822 head->lio_mode = LIO_FSYNC; 823 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 824 head->lio_largefile = 0; 825 826 /* 827 * Insert an fsync request on every worker's queue. 828 */ 829 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt); 830 if (fret != __rw_workerscnt) { 831 /* 832 * Fewer fsync requests than workers means that it was 833 * not possible to submit fsync requests to all workers. 834 * Actions: 835 * a) number of fsync requests submitted is 0: 836 * => free allocated memory (aio_lio_t). 837 * b) number of fsync requests submitted is > 0: 838 * => the last worker executing the fsync request 839 * will free the aio_lio_t struct. 840 */ 841 if (fret == 0) 842 _aio_lio_free(head); 843 return (-1); 844 } 845 return (0); 846 } 847 848 int 849 aio_cancel(int fd, aiocb_t *aiocbp) 850 { 851 aio_req_t *reqp; 852 aio_worker_t *aiowp; 853 int done = 0; 854 int canceled = 0; 855 struct stat buf; 856 857 if (fstat(fd, &buf) < 0) 858 return (-1); 859 860 if (aiocbp != NULL) { 861 if (fd != aiocbp->aio_fildes) { 862 errno = EINVAL; 863 return (-1); 864 } 865 if (aiocbp->aio_state == USERAIO) { 866 sig_mutex_lock(&__aio_mutex); 867 reqp = _aio_hash_find(&aiocbp->aio_resultp); 868 if (reqp == NULL) { 869 sig_mutex_unlock(&__aio_mutex); 870 return (AIO_ALLDONE); 871 } 872 aiowp = reqp->req_worker; 873 sig_mutex_lock(&aiowp->work_qlock1); 874 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 875 sig_mutex_unlock(&aiowp->work_qlock1); 876 sig_mutex_unlock(&__aio_mutex); 877 if (done) 878 return (AIO_ALLDONE); 879 if (canceled) 880 return (AIO_CANCELED); 881 return (AIO_NOTCANCELED); 882 } 883 if (aiocbp->aio_state == USERAIO_DONE) 884 return (AIO_ALLDONE); 885 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 886 } 887 888 return (aiocancel_all(fd)); 889 } 890 891 /* 892 * __aio_waitn() cancellation handler. 893 */ 894 /* ARGSUSED */ 895 static void 896 _aio_waitn_cleanup(void *arg) 897 { 898 ASSERT(MUTEX_HELD(&__aio_mutex)); 899 900 /* check for pending aio_waitn() calls */ 901 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING); 902 if (_aio_flags & AIO_LIB_WAITN_PENDING) { 903 _aio_flags &= ~AIO_LIB_WAITN_PENDING; 904 (void) cond_signal(&_aio_waitn_cv); 905 } 906 907 sig_mutex_unlock(&__aio_mutex); 908 } 909 910 /* 911 * aio_waitn can be used to reap the results of several I/O operations that 912 * were submitted asynchronously. The submission of I/Os can be done using 913 * existing POSIX interfaces: lio_listio, aio_write or aio_read. 914 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have 915 * completed and it returns the descriptors for these I/Os in "list". The 916 * maximum size of this list is given by "nent" and the actual number of I/Os 917 * completed is returned in "nwait". Otherwise aio_waitn might also 918 * return if the timeout expires. Additionally, aio_waitn returns 0 if 919 * successful or -1 if an error occurred. 920 */ 921 static int 922 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo) 923 { 924 int error = 0; 925 uint_t dnwait = 0; /* amount of requests in the waitn-done list */ 926 uint_t kwaitcnt; /* expected "done" requests from kernel */ 927 uint_t knentcnt; /* max. expected "done" requests from kernel */ 928 int uerrno = 0; 929 int kerrno = 0; /* save errno from _kaio() call */ 930 int timedwait = AIO_TIMEOUT_UNDEF; 931 aio_req_t *reqp; 932 timespec_t end; 933 timespec_t twait; /* copy of utimo for internal calculations */ 934 timespec_t *wait = NULL; 935 936 if (nent == 0 || *nwait == 0 || *nwait > nent) { 937 errno = EINVAL; 938 return (-1); 939 } 940 941 /* 942 * Only one running aio_waitn call per process allowed. 943 * Further calls will be blocked here until the running 944 * call finishes. 945 */ 946 947 sig_mutex_lock(&__aio_mutex); 948 949 while (_aio_flags & AIO_LIB_WAITN) { 950 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) { 951 sig_mutex_unlock(&__aio_mutex); 952 *nwait = 0; 953 return (0); 954 } 955 _aio_flags |= AIO_LIB_WAITN_PENDING; 956 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex); 957 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex); 958 pthread_cleanup_pop(0); 959 if (error != 0) { 960 sig_mutex_unlock(&__aio_mutex); 961 *nwait = 0; 962 errno = error; 963 return (-1); 964 } 965 } 966 967 pthread_cleanup_push(_aio_waitn_cleanup, NULL); 968 969 _aio_flags |= AIO_LIB_WAITN; 970 971 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 972 error = -1; 973 dnwait = 0; 974 goto out; 975 } 976 if (timedwait != AIO_TIMEOUT_INDEF) { 977 twait = *utimo; 978 wait = &twait; 979 } 980 981 /* 982 * If both counters are still set to zero, then only 983 * kernel requests are currently outstanding (raw-I/Os). 984 */ 985 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 986 for (;;) { 987 kwaitcnt = *nwait - dnwait; 988 knentcnt = nent - dnwait; 989 if (knentcnt > AIO_WAITN_MAXIOCBS) 990 knentcnt = AIO_WAITN_MAXIOCBS; 991 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 992 993 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 994 sig_mutex_unlock(&__aio_mutex); 995 _cancel_prologue(); 996 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 997 &kwaitcnt, wait); 998 _cancel_epilogue(); 999 pthread_cleanup_pop(1); 1000 1001 if (error == 0) { 1002 dnwait += kwaitcnt; 1003 if (dnwait >= *nwait || 1004 *nwait < AIO_WAITN_MAXIOCBS) 1005 break; 1006 if (timedwait == AIO_TIMEOUT_WAIT) { 1007 error = _aio_get_timedelta(&end, wait); 1008 if (error == -1) { 1009 /* timer expired */ 1010 errno = ETIME; 1011 break; 1012 } 1013 } 1014 continue; 1015 } 1016 if (errno == EAGAIN) { 1017 if (dnwait > 0) 1018 error = 0; 1019 break; 1020 } 1021 if (errno == ETIME || errno == EINTR) { 1022 dnwait += kwaitcnt; 1023 break; 1024 } 1025 /* fatal error */ 1026 break; 1027 } 1028 1029 goto out; 1030 } 1031 1032 /* File system I/Os outstanding ... */ 1033 1034 if (timedwait == AIO_TIMEOUT_UNDEF) { 1035 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 1036 error = -1; 1037 dnwait = 0; 1038 goto out; 1039 } 1040 if (timedwait != AIO_TIMEOUT_INDEF) { 1041 twait = *utimo; 1042 wait = &twait; 1043 } 1044 } 1045 1046 for (;;) { 1047 uint_t sum_reqs; 1048 1049 /* 1050 * Calculate sum of active non RAW-IO requests (sum_reqs). 1051 * If the expected amount of completed requests (*nwait) is 1052 * greater than the calculated sum (sum_reqs) then 1053 * use _kaio to check pending RAW-IO requests. 1054 */ 1055 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt; 1056 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0; 1057 1058 if (kwaitcnt != 0) { 1059 /* possibly some kernel I/Os outstanding */ 1060 knentcnt = nent - dnwait; 1061 if (knentcnt > AIO_WAITN_MAXIOCBS) 1062 knentcnt = AIO_WAITN_MAXIOCBS; 1063 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1064 1065 _aio_flags |= AIO_WAIT_INPROGRESS; 1066 1067 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1068 sig_mutex_unlock(&__aio_mutex); 1069 _cancel_prologue(); 1070 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1071 &kwaitcnt, wait); 1072 _cancel_epilogue(); 1073 pthread_cleanup_pop(1); 1074 1075 _aio_flags &= ~AIO_WAIT_INPROGRESS; 1076 1077 if (error == 0) { 1078 dnwait += kwaitcnt; 1079 } else { 1080 switch (errno) { 1081 case EINVAL: 1082 case EAGAIN: 1083 /* don't wait for kernel I/Os */ 1084 kerrno = 0; /* ignore _kaio() errno */ 1085 *nwait = _aio_doneq_cnt + 1086 _aio_outstand_cnt + dnwait; 1087 error = 0; 1088 break; 1089 case EINTR: 1090 case ETIME: 1091 /* just scan for completed LIB I/Os */ 1092 dnwait += kwaitcnt; 1093 timedwait = AIO_TIMEOUT_POLL; 1094 kerrno = errno; /* save _kaio() errno */ 1095 error = 0; 1096 break; 1097 default: 1098 kerrno = errno; /* save _kaio() errno */ 1099 break; 1100 } 1101 } 1102 if (error) 1103 break; /* fatal kernel error */ 1104 } 1105 1106 /* check completed FS requests in the "done" queue */ 1107 1108 while (_aio_doneq_cnt && dnwait < nent) { 1109 /* get done requests */ 1110 if ((reqp = _aio_req_remove(NULL)) != NULL) { 1111 (void) _aio_hash_del(reqp->req_resultp); 1112 list[dnwait++] = reqp->req_aiocbp; 1113 _aio_req_mark_done(reqp); 1114 _lio_remove(reqp); 1115 _aio_req_free(reqp); 1116 } 1117 } 1118 1119 if (dnwait >= *nwait) { 1120 /* min. requested amount of completed I/Os satisfied */ 1121 break; 1122 } 1123 if (timedwait == AIO_TIMEOUT_WAIT && 1124 (error = _aio_get_timedelta(&end, wait)) == -1) { 1125 /* timer expired */ 1126 uerrno = ETIME; 1127 break; 1128 } 1129 1130 /* 1131 * If some I/Os are outstanding and we have to wait for them, 1132 * then sleep here. _aiodone() will call _aio_waitn_wakeup() 1133 * to wakeup this thread as soon as the required amount of 1134 * completed I/Os is done. 1135 */ 1136 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) { 1137 /* 1138 * _aio_waitn_wakeup() will wake up this thread when: 1139 * - _aio_waitncnt requests are completed or 1140 * - _aio_outstand_cnt becomes zero. 1141 * sig_cond_reltimedwait() could also return with 1142 * a timeout error (ETIME). 1143 */ 1144 if (*nwait < _aio_outstand_cnt) 1145 _aio_waitncnt = *nwait; 1146 else 1147 _aio_waitncnt = _aio_outstand_cnt; 1148 1149 _aio_flags |= AIO_IO_WAITING; 1150 1151 if (wait) 1152 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv, 1153 &__aio_mutex, wait); 1154 else 1155 uerrno = sig_cond_wait(&_aio_iowait_cv, 1156 &__aio_mutex); 1157 1158 _aio_flags &= ~AIO_IO_WAITING; 1159 1160 if (uerrno == ETIME) { 1161 timedwait = AIO_TIMEOUT_POLL; 1162 continue; 1163 } 1164 if (uerrno != 0) 1165 timedwait = AIO_TIMEOUT_POLL; 1166 } 1167 1168 if (timedwait == AIO_TIMEOUT_POLL) { 1169 /* polling or timer expired */ 1170 break; 1171 } 1172 } 1173 1174 errno = uerrno == 0 ? kerrno : uerrno; 1175 if (errno) 1176 error = -1; 1177 else 1178 error = 0; 1179 1180 out: 1181 *nwait = dnwait; 1182 1183 pthread_cleanup_pop(1); /* drops __aio_mutex */ 1184 1185 return (error); 1186 } 1187 1188 int 1189 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait, 1190 const timespec_t *timeout) 1191 { 1192 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1193 } 1194 1195 void 1196 _aio_waitn_wakeup(void) 1197 { 1198 /* 1199 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that 1200 * it is waiting for completed I/Os. The number of required 1201 * completed I/Os is stored into "_aio_waitncnt". 1202 * aio_waitn() is woken up when 1203 * - there are no further outstanding I/Os 1204 * (_aio_outstand_cnt == 0) or 1205 * - the expected number of I/Os has completed. 1206 * Only one __aio_waitn() function waits for completed I/Os at 1207 * a time. 1208 * 1209 * __aio_suspend() increments "_aio_suscv_cnt" to notify 1210 * _aiodone() that at least one __aio_suspend() call is 1211 * waiting for completed I/Os. 1212 * There could be more than one __aio_suspend() function 1213 * waiting for completed I/Os. Because every function should 1214 * be waiting for different I/Os, _aiodone() has to wake up all 1215 * __aio_suspend() functions each time. 1216 * Every __aio_suspend() function will compare the recently 1217 * completed I/O with its own list. 1218 */ 1219 ASSERT(MUTEX_HELD(&__aio_mutex)); 1220 if (_aio_flags & AIO_IO_WAITING) { 1221 if (_aio_waitncnt > 0) 1222 _aio_waitncnt--; 1223 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 || 1224 _aio_suscv_cnt > 0) 1225 (void) cond_broadcast(&_aio_iowait_cv); 1226 } else { 1227 /* Wake up waiting aio_suspend calls */ 1228 if (_aio_suscv_cnt > 0) 1229 (void) cond_broadcast(&_aio_iowait_cv); 1230 } 1231 } 1232 1233 /* 1234 * timedwait values : 1235 * AIO_TIMEOUT_POLL : polling 1236 * AIO_TIMEOUT_WAIT : timeout 1237 * AIO_TIMEOUT_INDEF : wait indefinitely 1238 */ 1239 static int 1240 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait) 1241 { 1242 struct timeval curtime; 1243 1244 if (utimo) { 1245 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 || 1246 utimo->tv_nsec >= NANOSEC) { 1247 errno = EINVAL; 1248 return (-1); 1249 } 1250 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) { 1251 (void) gettimeofday(&curtime, NULL); 1252 end->tv_sec = utimo->tv_sec + curtime.tv_sec; 1253 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec; 1254 if (end->tv_nsec >= NANOSEC) { 1255 end->tv_nsec -= NANOSEC; 1256 end->tv_sec += 1; 1257 } 1258 *timedwait = AIO_TIMEOUT_WAIT; 1259 } else { 1260 /* polling */ 1261 *timedwait = AIO_TIMEOUT_POLL; 1262 } 1263 } else { 1264 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */ 1265 } 1266 return (0); 1267 } 1268 1269 #if !defined(_LP64) 1270 1271 int 1272 aio_read64(aiocb64_t *aiocbp) 1273 { 1274 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1275 errno = EINVAL; 1276 return (-1); 1277 } 1278 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1279 errno = EBUSY; 1280 return (-1); 1281 } 1282 if (_aio_sigev_thread64(aiocbp) != 0) 1283 return (-1); 1284 aiocbp->aio_lio_opcode = LIO_READ; 1285 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64, 1286 (AIO_KAIO | AIO_NO_DUPS))); 1287 } 1288 1289 int 1290 aio_write64(aiocb64_t *aiocbp) 1291 { 1292 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1293 errno = EINVAL; 1294 return (-1); 1295 } 1296 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1297 errno = EBUSY; 1298 return (-1); 1299 } 1300 if (_aio_sigev_thread64(aiocbp) != 0) 1301 return (-1); 1302 aiocbp->aio_lio_opcode = LIO_WRITE; 1303 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64, 1304 (AIO_KAIO | AIO_NO_DUPS))); 1305 } 1306 1307 int 1308 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 1309 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 1310 { 1311 int aio_ufs = 0; 1312 int oerrno = 0; 1313 aio_lio_t *head = NULL; 1314 aiocb64_t *aiocbp; 1315 int state = 0; 1316 int EIOflg = 0; 1317 int rw; 1318 int do_kaio = 0; 1319 int error; 1320 int i; 1321 1322 if (!_kaio_ok) 1323 _kaio_init(); 1324 1325 if (aio_list_max == 0) 1326 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 1327 1328 if (nent <= 0 || nent > aio_list_max) { 1329 errno = EINVAL; 1330 return (-1); 1331 } 1332 1333 switch (mode) { 1334 case LIO_WAIT: 1335 state = NOCHECK; 1336 break; 1337 case LIO_NOWAIT: 1338 state = CHECK; 1339 break; 1340 default: 1341 errno = EINVAL; 1342 return (-1); 1343 } 1344 1345 for (i = 0; i < nent; i++) { 1346 if ((aiocbp = list[i]) == NULL) 1347 continue; 1348 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1349 errno = EBUSY; 1350 return (-1); 1351 } 1352 if (_aio_sigev_thread64(aiocbp) != 0) 1353 return (-1); 1354 if (aiocbp->aio_lio_opcode == LIO_NOP) 1355 aiocbp->aio_state = NOCHECK; 1356 else { 1357 aiocbp->aio_state = state; 1358 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 1359 do_kaio++; 1360 else 1361 aiocbp->aio_resultp.aio_errno = ENOTSUP; 1362 } 1363 } 1364 if (_aio_sigev_thread_init(sigevp) != 0) 1365 return (-1); 1366 1367 if (do_kaio) { 1368 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp); 1369 if (error == 0) 1370 return (0); 1371 oerrno = errno; 1372 } else { 1373 oerrno = errno = ENOTSUP; 1374 error = -1; 1375 } 1376 1377 if (error == -1 && errno == ENOTSUP) { 1378 error = errno = 0; 1379 /* 1380 * If LIO_WAIT, or notification required, allocate a list head. 1381 */ 1382 if (mode == LIO_WAIT || 1383 (sigevp != NULL && 1384 (sigevp->sigev_notify == SIGEV_SIGNAL || 1385 sigevp->sigev_notify == SIGEV_THREAD || 1386 sigevp->sigev_notify == SIGEV_PORT))) 1387 head = _aio_lio_alloc(); 1388 if (head) { 1389 sig_mutex_lock(&head->lio_mutex); 1390 head->lio_mode = mode; 1391 head->lio_largefile = 1; 1392 if (mode == LIO_NOWAIT && sigevp != NULL) { 1393 if (sigevp->sigev_notify == SIGEV_THREAD) { 1394 head->lio_port = sigevp->sigev_signo; 1395 head->lio_event = AIOLIO64; 1396 head->lio_sigevent = sigevp; 1397 head->lio_sigval.sival_ptr = 1398 sigevp->sigev_value.sival_ptr; 1399 } else if (sigevp->sigev_notify == SIGEV_PORT) { 1400 port_notify_t *pn = 1401 sigevp->sigev_value.sival_ptr; 1402 head->lio_port = pn->portnfy_port; 1403 head->lio_event = AIOLIO64; 1404 head->lio_sigevent = sigevp; 1405 head->lio_sigval.sival_ptr = 1406 pn->portnfy_user; 1407 } else { /* SIGEV_SIGNAL */ 1408 head->lio_signo = sigevp->sigev_signo; 1409 head->lio_sigval.sival_ptr = 1410 sigevp->sigev_value.sival_ptr; 1411 } 1412 } 1413 head->lio_nent = head->lio_refcnt = nent; 1414 sig_mutex_unlock(&head->lio_mutex); 1415 } 1416 /* 1417 * find UFS requests, errno == ENOTSUP/EBADFD, 1418 */ 1419 for (i = 0; i < nent; i++) { 1420 if ((aiocbp = list[i]) == NULL || 1421 aiocbp->aio_lio_opcode == LIO_NOP || 1422 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 1423 aiocbp->aio_resultp.aio_errno != EBADFD)) { 1424 if (head) 1425 _lio_list_decr(head); 1426 continue; 1427 } 1428 if (aiocbp->aio_resultp.aio_errno == EBADFD) 1429 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 1430 if (aiocbp->aio_reqprio != 0) { 1431 aiocbp->aio_resultp.aio_errno = EINVAL; 1432 aiocbp->aio_resultp.aio_return = -1; 1433 EIOflg = 1; 1434 if (head) 1435 _lio_list_decr(head); 1436 continue; 1437 } 1438 /* 1439 * submit an AIO request with flags AIO_NO_KAIO 1440 * to avoid the kaio() syscall in _aio_rw() 1441 */ 1442 switch (aiocbp->aio_lio_opcode) { 1443 case LIO_READ: 1444 rw = AIOAREAD64; 1445 break; 1446 case LIO_WRITE: 1447 rw = AIOAWRITE64; 1448 break; 1449 } 1450 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw, 1451 (AIO_NO_KAIO | AIO_NO_DUPS)); 1452 if (error == 0) 1453 aio_ufs++; 1454 else { 1455 if (head) 1456 _lio_list_decr(head); 1457 aiocbp->aio_resultp.aio_errno = error; 1458 EIOflg = 1; 1459 } 1460 } 1461 } 1462 if (EIOflg) { 1463 errno = EIO; 1464 return (-1); 1465 } 1466 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 1467 /* 1468 * call kaio(AIOLIOWAIT) to get all outstanding 1469 * kernel AIO requests 1470 */ 1471 if ((nent - aio_ufs) > 0) 1472 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 1473 if (head != NULL && head->lio_nent > 0) { 1474 sig_mutex_lock(&head->lio_mutex); 1475 while (head->lio_refcnt > 0) { 1476 int err; 1477 head->lio_waiting = 1; 1478 pthread_cleanup_push(_lio_listio_cleanup, head); 1479 err = sig_cond_wait(&head->lio_cond_cv, 1480 &head->lio_mutex); 1481 pthread_cleanup_pop(0); 1482 head->lio_waiting = 0; 1483 if (err && head->lio_nent > 0) { 1484 sig_mutex_unlock(&head->lio_mutex); 1485 errno = err; 1486 return (-1); 1487 } 1488 } 1489 sig_mutex_unlock(&head->lio_mutex); 1490 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 1491 _aio_lio_free(head); 1492 for (i = 0; i < nent; i++) { 1493 if ((aiocbp = list[i]) != NULL && 1494 aiocbp->aio_resultp.aio_errno) { 1495 errno = EIO; 1496 return (-1); 1497 } 1498 } 1499 } 1500 return (0); 1501 } 1502 return (error); 1503 } 1504 1505 int 1506 aio_suspend64(const aiocb64_t * const list[], int nent, 1507 const timespec_t *timeout) 1508 { 1509 return (__aio_suspend((void **)list, nent, timeout, 1)); 1510 } 1511 1512 int 1513 aio_error64(const aiocb64_t *aiocbp) 1514 { 1515 const aio_result_t *resultp = &aiocbp->aio_resultp; 1516 int error; 1517 1518 if ((error = resultp->aio_errno) == EINPROGRESS) { 1519 if (aiocbp->aio_state == CHECK) { 1520 /* 1521 * Always do the kaio() call without using the 1522 * KAIO_SUPPORTED() checks because it is not 1523 * mandatory to have a valid fd set in the 1524 * aiocb, only the resultp must be set. 1525 */ 1526 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) { 1527 errno = EINVAL; 1528 return (-1); 1529 } 1530 error = resultp->aio_errno; 1531 } else if (aiocbp->aio_state == CHECKED) { 1532 ((aiocb64_t *)aiocbp)->aio_state = CHECK; 1533 } 1534 } 1535 return (error); 1536 } 1537 1538 ssize_t 1539 aio_return64(aiocb64_t *aiocbp) 1540 { 1541 aio_result_t *resultp = &aiocbp->aio_resultp; 1542 aio_req_t *reqp; 1543 int error; 1544 ssize_t retval; 1545 1546 /* 1547 * The _aiodone() function stores resultp->aio_return before 1548 * storing resultp->aio_errno (with an membar_producer() in 1549 * between). We use membar_consumer() below to ensure proper 1550 * memory ordering between _aiodone() and ourself. 1551 */ 1552 error = resultp->aio_errno; 1553 membar_consumer(); 1554 retval = resultp->aio_return; 1555 1556 /* 1557 * we use this condition to indicate either that 1558 * aio_return() has been called before or should 1559 * not have been called yet. 1560 */ 1561 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 1562 errno = error; 1563 return (-1); 1564 } 1565 1566 /* 1567 * Before we return, mark the result as being returned so that later 1568 * calls to aio_return() will return the fact that the result has 1569 * already been returned. 1570 */ 1571 sig_mutex_lock(&__aio_mutex); 1572 /* retest, in case more than one thread actually got in here */ 1573 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 1574 sig_mutex_unlock(&__aio_mutex); 1575 errno = EINVAL; 1576 return (-1); 1577 } 1578 resultp->aio_return = -1; 1579 resultp->aio_errno = EINVAL; 1580 if ((reqp = _aio_hash_del(resultp)) == NULL) 1581 sig_mutex_unlock(&__aio_mutex); 1582 else { 1583 aiocbp->aio_state = NOCHECK; 1584 ASSERT(reqp->req_head == NULL); 1585 (void) _aio_req_remove(reqp); 1586 sig_mutex_unlock(&__aio_mutex); 1587 _aio_req_free(reqp); 1588 } 1589 1590 if (retval == -1) 1591 errno = error; 1592 return (retval); 1593 } 1594 1595 static int 1596 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 1597 int workerscnt) 1598 { 1599 int i; 1600 int error; 1601 aio_worker_t *next = aiowp; 1602 1603 for (i = 0; i < workerscnt; i++) { 1604 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 1605 if (error != 0) { 1606 sig_mutex_lock(&head->lio_mutex); 1607 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 1608 head->lio_nent -= workerscnt - i; 1609 head->lio_refcnt -= workerscnt - i; 1610 sig_mutex_unlock(&head->lio_mutex); 1611 errno = EAGAIN; 1612 return (i); 1613 } 1614 next = next->work_forw; 1615 } 1616 return (i); 1617 } 1618 1619 int 1620 aio_fsync64(int op, aiocb64_t *aiocbp) 1621 { 1622 aio_lio_t *head; 1623 struct stat statb; 1624 int fret; 1625 1626 if (aiocbp == NULL) 1627 return (0); 1628 if (op != O_DSYNC && op != O_SYNC) { 1629 errno = EINVAL; 1630 return (-1); 1631 } 1632 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1633 errno = EBUSY; 1634 return (-1); 1635 } 1636 if (fstat(aiocbp->aio_fildes, &statb) < 0) 1637 return (-1); 1638 if (_aio_sigev_thread64(aiocbp) != 0) 1639 return (-1); 1640 1641 /* 1642 * Kernel aio_fsync() is not supported. 1643 * We force user-level aio_fsync() just 1644 * for the notification side-effect. 1645 */ 1646 if (!__uaio_ok && __uaio_init() == -1) 1647 return (-1); 1648 1649 /* 1650 * The first asynchronous I/O request in the current process will 1651 * create a bunch of workers (via __uaio_init()). If the number 1652 * of workers is zero then the number of pending asynchronous I/O 1653 * requests is zero. In such a case only execute the standard 1654 * fsync(3C) or fdatasync(3RT) as appropriate. 1655 */ 1656 if (__rw_workerscnt == 0) { 1657 if (op == O_DSYNC) 1658 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 1659 else 1660 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 1661 } 1662 1663 /* 1664 * re-use aio_offset as the op field. 1665 * O_DSYNC - fdatasync() 1666 * O_SYNC - fsync() 1667 */ 1668 aiocbp->aio_offset = op; 1669 aiocbp->aio_lio_opcode = AIOFSYNC; 1670 1671 /* 1672 * Create a list of fsync requests. The worker that 1673 * gets the last request will do the fsync request. 1674 */ 1675 head = _aio_lio_alloc(); 1676 if (head == NULL) { 1677 errno = EAGAIN; 1678 return (-1); 1679 } 1680 head->lio_mode = LIO_FSYNC; 1681 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 1682 head->lio_largefile = 1; 1683 1684 /* 1685 * Insert an fsync request on every worker's queue. 1686 */ 1687 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt); 1688 if (fret != __rw_workerscnt) { 1689 /* 1690 * Fewer fsync requests than workers means that it was 1691 * not possible to submit fsync requests to all workers. 1692 * Actions: 1693 * a) number of fsync requests submitted is 0: 1694 * => free allocated memory (aio_lio_t). 1695 * b) number of fsync requests submitted is > 0: 1696 * => the last worker executing the fsync request 1697 * will free the aio_lio_t struct. 1698 */ 1699 if (fret == 0) 1700 _aio_lio_free(head); 1701 return (-1); 1702 } 1703 return (0); 1704 } 1705 1706 int 1707 aio_cancel64(int fd, aiocb64_t *aiocbp) 1708 { 1709 aio_req_t *reqp; 1710 aio_worker_t *aiowp; 1711 int done = 0; 1712 int canceled = 0; 1713 struct stat buf; 1714 1715 if (fstat(fd, &buf) < 0) 1716 return (-1); 1717 1718 if (aiocbp != NULL) { 1719 if (fd != aiocbp->aio_fildes) { 1720 errno = EINVAL; 1721 return (-1); 1722 } 1723 if (aiocbp->aio_state == USERAIO) { 1724 sig_mutex_lock(&__aio_mutex); 1725 reqp = _aio_hash_find(&aiocbp->aio_resultp); 1726 if (reqp == NULL) { 1727 sig_mutex_unlock(&__aio_mutex); 1728 return (AIO_ALLDONE); 1729 } 1730 aiowp = reqp->req_worker; 1731 sig_mutex_lock(&aiowp->work_qlock1); 1732 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 1733 sig_mutex_unlock(&aiowp->work_qlock1); 1734 sig_mutex_unlock(&__aio_mutex); 1735 if (done) 1736 return (AIO_ALLDONE); 1737 if (canceled) 1738 return (AIO_CANCELED); 1739 return (AIO_NOTCANCELED); 1740 } 1741 if (aiocbp->aio_state == USERAIO_DONE) 1742 return (AIO_ALLDONE); 1743 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 1744 } 1745 1746 return (aiocancel_all(fd)); 1747 } 1748 1749 int 1750 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait, 1751 const timespec_t *timeout) 1752 { 1753 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1754 } 1755 1756 #endif /* !defined(_LP64) */ 1757