1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * posix_aio.c implements the POSIX async. I/O functions. 31 * 32 * aio_read 33 * aio_write 34 * aio_error 35 * aio_return 36 * aio_suspend 37 * lio_listio 38 * aio_fsync 39 * aio_cancel 40 */ 41 42 #include "synonyms.h" 43 #include "thr_uberdata.h" 44 #include "asyncio.h" 45 #include <atomic.h> 46 #include <sys/file.h> 47 #include <sys/port.h> 48 49 extern int __fdsync(int, int); 50 51 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */ 52 53 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *); 54 55 /* defines for timedwait in __aio_waitn() and __aio_suspend() */ 56 #define AIO_TIMEOUT_INDEF -1 57 #define AIO_TIMEOUT_POLL 0 58 #define AIO_TIMEOUT_WAIT 1 59 #define AIO_TIMEOUT_UNDEF 2 60 61 /* 62 * List I/O stuff 63 */ 64 static void _lio_list_decr(aio_lio_t *); 65 static long aio_list_max = 0; 66 67 int 68 aio_read(aiocb_t *aiocbp) 69 { 70 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 71 errno = EINVAL; 72 return (-1); 73 } 74 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 75 errno = EBUSY; 76 return (-1); 77 } 78 if (_aio_sigev_thread(aiocbp) != 0) 79 return (-1); 80 aiocbp->aio_lio_opcode = LIO_READ; 81 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD, 82 (AIO_KAIO | AIO_NO_DUPS))); 83 } 84 85 int 86 aio_write(aiocb_t *aiocbp) 87 { 88 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 89 errno = EINVAL; 90 return (-1); 91 } 92 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 93 errno = EBUSY; 94 return (-1); 95 } 96 if (_aio_sigev_thread(aiocbp) != 0) 97 return (-1); 98 aiocbp->aio_lio_opcode = LIO_WRITE; 99 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE, 100 (AIO_KAIO | AIO_NO_DUPS))); 101 } 102 103 /* 104 * __lio_listio() cancellation handler. 105 */ 106 /* ARGSUSED */ 107 static void 108 _lio_listio_cleanup(aio_lio_t *head) 109 { 110 int freeit = 0; 111 112 ASSERT(MUTEX_HELD(&head->lio_mutex)); 113 if (head->lio_refcnt == 0) { 114 ASSERT(head->lio_nent == 0); 115 freeit = 1; 116 } 117 head->lio_waiting = 0; 118 sig_mutex_unlock(&head->lio_mutex); 119 if (freeit) 120 _aio_lio_free(head); 121 } 122 123 int 124 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 125 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 126 { 127 int aio_ufs = 0; 128 int oerrno = 0; 129 aio_lio_t *head = NULL; 130 aiocb_t *aiocbp; 131 int state = 0; 132 int EIOflg = 0; 133 int rw; 134 int do_kaio = 0; 135 int error; 136 int i; 137 138 if (!_kaio_ok) 139 _kaio_init(); 140 141 if (aio_list_max == 0) 142 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 143 144 if (nent <= 0 || nent > aio_list_max) { 145 errno = EINVAL; 146 return (-1); 147 } 148 149 switch (mode) { 150 case LIO_WAIT: 151 state = NOCHECK; 152 break; 153 case LIO_NOWAIT: 154 state = CHECK; 155 break; 156 default: 157 errno = EINVAL; 158 return (-1); 159 } 160 161 for (i = 0; i < nent; i++) { 162 if ((aiocbp = list[i]) == NULL) 163 continue; 164 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 165 errno = EBUSY; 166 return (-1); 167 } 168 if (_aio_sigev_thread(aiocbp) != 0) 169 return (-1); 170 if (aiocbp->aio_lio_opcode == LIO_NOP) 171 aiocbp->aio_state = NOCHECK; 172 else { 173 aiocbp->aio_state = state; 174 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 175 do_kaio++; 176 else 177 aiocbp->aio_resultp.aio_errno = ENOTSUP; 178 } 179 } 180 if (_aio_sigev_thread_init(sigevp) != 0) 181 return (-1); 182 183 if (do_kaio) { 184 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp); 185 if (error == 0) 186 return (0); 187 oerrno = errno; 188 } else { 189 oerrno = errno = ENOTSUP; 190 error = -1; 191 } 192 193 if (error == -1 && errno == ENOTSUP) { 194 error = errno = 0; 195 /* 196 * If LIO_WAIT, or notification required, allocate a list head. 197 */ 198 if (mode == LIO_WAIT || 199 (sigevp != NULL && 200 (sigevp->sigev_notify == SIGEV_SIGNAL || 201 sigevp->sigev_notify == SIGEV_THREAD || 202 sigevp->sigev_notify == SIGEV_PORT))) 203 head = _aio_lio_alloc(); 204 if (head) { 205 sig_mutex_lock(&head->lio_mutex); 206 head->lio_mode = mode; 207 head->lio_largefile = 0; 208 if (mode == LIO_NOWAIT && sigevp != NULL) { 209 if (sigevp->sigev_notify == SIGEV_THREAD) { 210 head->lio_port = sigevp->sigev_signo; 211 head->lio_event = AIOLIO; 212 head->lio_sigevent = sigevp; 213 head->lio_sigval.sival_ptr = 214 sigevp->sigev_value.sival_ptr; 215 } else if (sigevp->sigev_notify == SIGEV_PORT) { 216 port_notify_t *pn = 217 sigevp->sigev_value.sival_ptr; 218 head->lio_port = pn->portnfy_port; 219 head->lio_event = AIOLIO; 220 head->lio_sigevent = sigevp; 221 head->lio_sigval.sival_ptr = 222 pn->portnfy_user; 223 } else { /* SIGEV_SIGNAL */ 224 head->lio_signo = sigevp->sigev_signo; 225 head->lio_sigval.sival_ptr = 226 sigevp->sigev_value.sival_ptr; 227 } 228 } 229 head->lio_nent = head->lio_refcnt = nent; 230 sig_mutex_unlock(&head->lio_mutex); 231 } 232 /* 233 * find UFS requests, errno == ENOTSUP/EBADFD, 234 */ 235 for (i = 0; i < nent; i++) { 236 if ((aiocbp = list[i]) == NULL || 237 aiocbp->aio_lio_opcode == LIO_NOP || 238 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 239 aiocbp->aio_resultp.aio_errno != EBADFD)) { 240 if (head) 241 _lio_list_decr(head); 242 continue; 243 } 244 if (aiocbp->aio_resultp.aio_errno == EBADFD) 245 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 246 if (aiocbp->aio_reqprio != 0) { 247 aiocbp->aio_resultp.aio_errno = EINVAL; 248 aiocbp->aio_resultp.aio_return = -1; 249 EIOflg = 1; 250 if (head) 251 _lio_list_decr(head); 252 continue; 253 } 254 /* 255 * submit an AIO request with flags AIO_NO_KAIO 256 * to avoid the kaio() syscall in _aio_rw() 257 */ 258 switch (aiocbp->aio_lio_opcode) { 259 case LIO_READ: 260 rw = AIOAREAD; 261 break; 262 case LIO_WRITE: 263 rw = AIOAWRITE; 264 break; 265 } 266 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw, 267 (AIO_NO_KAIO | AIO_NO_DUPS)); 268 if (error == 0) 269 aio_ufs++; 270 else { 271 if (head) 272 _lio_list_decr(head); 273 aiocbp->aio_resultp.aio_errno = error; 274 EIOflg = 1; 275 } 276 } 277 } 278 if (EIOflg) { 279 errno = EIO; 280 return (-1); 281 } 282 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 283 /* 284 * call kaio(AIOLIOWAIT) to get all outstanding 285 * kernel AIO requests 286 */ 287 if ((nent - aio_ufs) > 0) 288 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 289 if (head != NULL && head->lio_nent > 0) { 290 sig_mutex_lock(&head->lio_mutex); 291 while (head->lio_refcnt > 0) { 292 int err; 293 head->lio_waiting = 1; 294 pthread_cleanup_push(_lio_listio_cleanup, head); 295 err = sig_cond_wait(&head->lio_cond_cv, 296 &head->lio_mutex); 297 pthread_cleanup_pop(0); 298 head->lio_waiting = 0; 299 if (err && head->lio_nent > 0) { 300 sig_mutex_unlock(&head->lio_mutex); 301 errno = err; 302 return (-1); 303 } 304 } 305 sig_mutex_unlock(&head->lio_mutex); 306 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 307 _aio_lio_free(head); 308 for (i = 0; i < nent; i++) { 309 if ((aiocbp = list[i]) != NULL && 310 aiocbp->aio_resultp.aio_errno) { 311 errno = EIO; 312 return (-1); 313 } 314 } 315 } 316 return (0); 317 } 318 return (error); 319 } 320 321 static void 322 _lio_list_decr(aio_lio_t *head) 323 { 324 sig_mutex_lock(&head->lio_mutex); 325 head->lio_nent--; 326 head->lio_refcnt--; 327 sig_mutex_unlock(&head->lio_mutex); 328 } 329 330 /* 331 * __aio_suspend() cancellation handler. 332 */ 333 /* ARGSUSED */ 334 static void 335 _aio_suspend_cleanup(int *counter) 336 { 337 ASSERT(MUTEX_HELD(&__aio_mutex)); 338 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */ 339 sig_mutex_unlock(&__aio_mutex); 340 } 341 342 static int 343 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile) 344 { 345 int cv_err; /* error code from cond_xxx() */ 346 int kerr; /* error code from _kaio(AIOSUSPEND) */ 347 int i; 348 timespec_t twait; /* copy of timo for internal calculations */ 349 timespec_t *wait = NULL; 350 int timedwait; 351 int req_outstanding; 352 aiocb_t **listp; 353 aiocb_t *aiocbp; 354 #if !defined(_LP64) 355 aiocb64_t **listp64; 356 aiocb64_t *aiocbp64; 357 #endif 358 hrtime_t hrtstart; 359 hrtime_t hrtend; 360 hrtime_t hrtres; 361 362 #if defined(_LP64) 363 if (largefile) 364 aio_panic("__aio_suspend: largefile set when _LP64 defined"); 365 #endif 366 367 if (nent <= 0) { 368 errno = EINVAL; 369 return (-1); 370 } 371 372 if (timo) { 373 if (timo->tv_sec < 0 || timo->tv_nsec < 0 || 374 timo->tv_nsec >= NANOSEC) { 375 errno = EINVAL; 376 return (-1); 377 } 378 /* Initialize start time if time monitoring desired */ 379 if (timo->tv_sec > 0 || timo->tv_nsec > 0) { 380 timedwait = AIO_TIMEOUT_WAIT; 381 hrtstart = gethrtime(); 382 } else { 383 /* content of timeout = 0 : polling */ 384 timedwait = AIO_TIMEOUT_POLL; 385 } 386 } else { 387 /* timeout pointer = NULL : wait indefinitely */ 388 timedwait = AIO_TIMEOUT_INDEF; 389 } 390 391 #if !defined(_LP64) 392 if (largefile) { 393 listp64 = (aiocb64_t **)list; 394 for (i = 0; i < nent; i++) { 395 if ((aiocbp64 = listp64[i]) != NULL && 396 aiocbp64->aio_state == CHECK) 397 aiocbp64->aio_state = CHECKED; 398 } 399 } else 400 #endif /* !_LP64 */ 401 { 402 listp = (aiocb_t **)list; 403 for (i = 0; i < nent; i++) { 404 if ((aiocbp = listp[i]) != NULL && 405 aiocbp->aio_state == CHECK) 406 aiocbp->aio_state = CHECKED; 407 } 408 } 409 410 sig_mutex_lock(&__aio_mutex); 411 412 /* 413 * The next "if -case" is required to accelerate the 414 * access to completed RAW-IO requests. 415 */ 416 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 417 /* Only kernel requests pending */ 418 419 /* 420 * _aio_kernel_suspend is used to detect completed non RAW-IO 421 * requests. 422 * As long as this thread resides in the kernel (_kaio) further 423 * asynchronous non RAW-IO requests could be submitted. 424 */ 425 _aio_kernel_suspend++; 426 427 /* 428 * Always do the kaio() call without using the KAIO_SUPPORTED() 429 * checks because it is not mandatory to have a valid fd 430 * set in the list entries, only the resultp must be set. 431 * 432 * _kaio(AIOSUSPEND ...) return values : 433 * 0: everythink ok, completed request found 434 * -1: error 435 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,) 436 * system call using _kaio(AIONOTIFY). It means, that some 437 * non RAW-IOs completed inbetween. 438 */ 439 440 pthread_cleanup_push(_aio_suspend_cleanup, 441 &_aio_kernel_suspend); 442 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 443 sig_mutex_unlock(&__aio_mutex); 444 _cancel_prologue(); 445 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 446 list, nent, timo, -1); 447 _cancel_epilogue(); 448 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */ 449 pthread_cleanup_pop(0); 450 451 _aio_kernel_suspend--; 452 453 if (!kerr) { 454 sig_mutex_unlock(&__aio_mutex); 455 return (0); 456 } 457 } else { 458 kerr = 1; /* simulation: _kaio detected AIONOTIFY */ 459 } 460 461 /* 462 * Return kernel error code if no other IOs are outstanding. 463 */ 464 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt; 465 466 sig_mutex_unlock(&__aio_mutex); 467 468 if (req_outstanding == 0) { 469 /* no IOs outstanding in the thread pool */ 470 if (kerr == 1) 471 /* return "no IOs completed" */ 472 errno = EAGAIN; 473 return (-1); 474 } 475 476 /* 477 * IOs using the thread pool are outstanding. 478 */ 479 if (timedwait == AIO_TIMEOUT_WAIT) { 480 /* time monitoring */ 481 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC + 482 (hrtime_t)timo->tv_nsec; 483 hrtres = hrtend - gethrtime(); 484 if (hrtres <= 0) 485 hrtres = 1; 486 twait.tv_sec = hrtres / (hrtime_t)NANOSEC; 487 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC; 488 wait = &twait; 489 } else if (timedwait == AIO_TIMEOUT_POLL) { 490 twait = *timo; /* content of timo = 0 : polling */ 491 wait = &twait; 492 } 493 494 for (;;) { 495 int error; 496 int inprogress; 497 498 /* first scan file system requests */ 499 inprogress = 0; 500 for (i = 0; i < nent; i++) { 501 #if !defined(_LP64) 502 if (largefile) { 503 if ((aiocbp64 = listp64[i]) == NULL) 504 continue; 505 error = aiocbp64->aio_resultp.aio_errno; 506 } else 507 #endif 508 { 509 if ((aiocbp = listp[i]) == NULL) 510 continue; 511 error = aiocbp->aio_resultp.aio_errno; 512 } 513 if (error == EINPROGRESS) 514 inprogress = 1; 515 else if (error != ECANCELED) { 516 errno = 0; 517 return (0); 518 } 519 } 520 521 sig_mutex_lock(&__aio_mutex); 522 523 /* 524 * If there aren't outstanding I/Os in the thread pool then 525 * we have to return here, provided that all kernel RAW-IOs 526 * also completed. 527 * If the kernel was notified to return, then we have to check 528 * possible pending RAW-IOs. 529 */ 530 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) { 531 sig_mutex_unlock(&__aio_mutex); 532 errno = EAGAIN; 533 break; 534 } 535 536 /* 537 * There are outstanding IOs in the thread pool or the kernel 538 * was notified to return. 539 * Check pending RAW-IOs first. 540 */ 541 if (kerr == 1) { 542 /* 543 * _aiodone just notified the kernel about 544 * completed non RAW-IOs (AIONOTIFY was detected). 545 */ 546 if (timedwait == AIO_TIMEOUT_WAIT) { 547 /* Update remaining timeout for the kernel */ 548 hrtres = hrtend - gethrtime(); 549 if (hrtres <= 0) { 550 /* timer expired */ 551 sig_mutex_unlock(&__aio_mutex); 552 errno = EAGAIN; 553 break; 554 } 555 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 556 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 557 } 558 _aio_kernel_suspend++; 559 560 pthread_cleanup_push(_aio_suspend_cleanup, 561 &_aio_kernel_suspend); 562 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 563 sig_mutex_unlock(&__aio_mutex); 564 _cancel_prologue(); 565 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND, 566 list, nent, wait, -1); 567 _cancel_epilogue(); 568 pthread_cleanup_pop(1); 569 pthread_cleanup_pop(0); 570 571 _aio_kernel_suspend--; 572 573 if (!kerr) { 574 sig_mutex_unlock(&__aio_mutex); 575 return (0); 576 } 577 } 578 579 if (timedwait == AIO_TIMEOUT_POLL) { 580 sig_mutex_unlock(&__aio_mutex); 581 errno = EAGAIN; 582 break; 583 } 584 585 if (timedwait == AIO_TIMEOUT_WAIT) { 586 /* Update remaining timeout */ 587 hrtres = hrtend - gethrtime(); 588 if (hrtres <= 0) { 589 /* timer expired */ 590 sig_mutex_unlock(&__aio_mutex); 591 errno = EAGAIN; 592 break; 593 } 594 wait->tv_sec = hrtres / (hrtime_t)NANOSEC; 595 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC; 596 } 597 598 if (_aio_outstand_cnt == 0) { 599 sig_mutex_unlock(&__aio_mutex); 600 continue; 601 } 602 603 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */ 604 605 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt); 606 if (timedwait == AIO_TIMEOUT_WAIT) { 607 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv, 608 &__aio_mutex, wait); 609 if (cv_err == ETIME) 610 cv_err = EAGAIN; 611 } else { 612 /* wait indefinitely */ 613 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex); 614 } 615 /* this decrements _aio_suscv_cnt and drops __aio_mutex */ 616 pthread_cleanup_pop(1); 617 618 if (cv_err) { 619 errno = cv_err; 620 break; 621 } 622 } 623 return (-1); 624 } 625 626 int 627 aio_suspend(const aiocb_t * const list[], int nent, 628 const timespec_t *timeout) 629 { 630 return (__aio_suspend((void **)list, nent, timeout, 0)); 631 } 632 633 int 634 aio_error(const aiocb_t *aiocbp) 635 { 636 const aio_result_t *resultp = &aiocbp->aio_resultp; 637 int error; 638 639 if ((error = resultp->aio_errno) == EINPROGRESS) { 640 if (aiocbp->aio_state == CHECK) { 641 /* 642 * Always do the kaio() call without using the 643 * KAIO_SUPPORTED() checks because it is not 644 * mandatory to have a valid fd set in the 645 * aiocb, only the resultp must be set. 646 */ 647 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) { 648 errno = EINVAL; 649 return (-1); 650 } 651 error = resultp->aio_errno; 652 } else if (aiocbp->aio_state == CHECKED) { 653 ((aiocb_t *)aiocbp)->aio_state = CHECK; 654 } 655 } 656 return (error); 657 } 658 659 ssize_t 660 aio_return(aiocb_t *aiocbp) 661 { 662 aio_result_t *resultp = &aiocbp->aio_resultp; 663 aio_req_t *reqp; 664 int error; 665 ssize_t retval; 666 667 /* 668 * The _aiodone() function stores resultp->aio_return before 669 * storing resultp->aio_errno (with an membar_producer() in 670 * between). We use membar_consumer() below to ensure proper 671 * memory ordering between _aiodone() and ourself. 672 */ 673 error = resultp->aio_errno; 674 membar_consumer(); 675 retval = resultp->aio_return; 676 677 /* 678 * we use this condition to indicate either that 679 * aio_return() has been called before or should 680 * not have been called yet. 681 */ 682 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 683 errno = error; 684 return (-1); 685 } 686 687 /* 688 * Before we return, mark the result as being returned so that later 689 * calls to aio_return() will return the fact that the result has 690 * already been returned. 691 */ 692 sig_mutex_lock(&__aio_mutex); 693 /* retest, in case more than one thread actually got in here */ 694 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 695 sig_mutex_unlock(&__aio_mutex); 696 errno = EINVAL; 697 return (-1); 698 } 699 resultp->aio_return = -1; 700 resultp->aio_errno = EINVAL; 701 if ((reqp = _aio_hash_del(resultp)) == NULL) 702 sig_mutex_unlock(&__aio_mutex); 703 else { 704 aiocbp->aio_state = NOCHECK; 705 ASSERT(reqp->req_head == NULL); 706 (void) _aio_req_remove(reqp); 707 sig_mutex_unlock(&__aio_mutex); 708 _aio_req_free(reqp); 709 } 710 711 if (retval == -1) 712 errno = error; 713 return (retval); 714 } 715 716 void 717 _lio_remove(aio_req_t *reqp) 718 { 719 aio_lio_t *head; 720 int refcnt; 721 722 if ((head = reqp->req_head) != NULL) { 723 sig_mutex_lock(&head->lio_mutex); 724 ASSERT(head->lio_refcnt == head->lio_nent); 725 refcnt = --head->lio_nent; 726 head->lio_refcnt--; 727 sig_mutex_unlock(&head->lio_mutex); 728 if (refcnt == 0) 729 _aio_lio_free(head); 730 reqp->req_head = NULL; 731 } 732 } 733 734 /* 735 * This function returns the number of asynchronous I/O requests submitted. 736 */ 737 static int 738 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 739 int workerscnt) 740 { 741 int i; 742 int error; 743 aio_worker_t *next = aiowp; 744 745 for (i = 0; i < workerscnt; i++) { 746 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 747 if (error != 0) { 748 sig_mutex_lock(&head->lio_mutex); 749 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 750 head->lio_nent -= workerscnt - i; 751 head->lio_refcnt -= workerscnt - i; 752 sig_mutex_unlock(&head->lio_mutex); 753 errno = EAGAIN; 754 return (i); 755 } 756 next = next->work_forw; 757 } 758 return (i); 759 } 760 761 int 762 aio_fsync(int op, aiocb_t *aiocbp) 763 { 764 aio_lio_t *head; 765 struct stat statb; 766 int fret; 767 768 if (aiocbp == NULL) 769 return (0); 770 if (op != O_DSYNC && op != O_SYNC) { 771 errno = EINVAL; 772 return (-1); 773 } 774 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 775 errno = EBUSY; 776 return (-1); 777 } 778 if (fstat(aiocbp->aio_fildes, &statb) < 0) 779 return (-1); 780 if (_aio_sigev_thread(aiocbp) != 0) 781 return (-1); 782 783 /* 784 * Kernel aio_fsync() is not supported. 785 * We force user-level aio_fsync() just 786 * for the notification side-effect. 787 */ 788 if (!__uaio_ok && __uaio_init() == -1) 789 return (-1); 790 791 /* 792 * The first asynchronous I/O request in the current process will 793 * create a bunch of workers (via __uaio_init()). If the number 794 * of workers is zero then the number of pending asynchronous I/O 795 * requests is zero. In such a case only execute the standard 796 * fsync(3C) or fdatasync(3RT) as appropriate. 797 */ 798 if (__rw_workerscnt == 0) { 799 if (op == O_DSYNC) 800 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 801 else 802 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 803 } 804 805 /* 806 * re-use aio_offset as the op field. 807 * O_DSYNC - fdatasync() 808 * O_SYNC - fsync() 809 */ 810 aiocbp->aio_offset = op; 811 aiocbp->aio_lio_opcode = AIOFSYNC; 812 813 /* 814 * Create a list of fsync requests. The worker that 815 * gets the last request will do the fsync request. 816 */ 817 head = _aio_lio_alloc(); 818 if (head == NULL) { 819 errno = EAGAIN; 820 return (-1); 821 } 822 head->lio_mode = LIO_FSYNC; 823 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 824 head->lio_largefile = 0; 825 826 /* 827 * Insert an fsync request on every worker's queue. 828 */ 829 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt); 830 if (fret != __rw_workerscnt) { 831 /* 832 * Fewer fsync requests than workers means that it was 833 * not possible to submit fsync requests to all workers. 834 * Actions: 835 * a) number of fsync requests submitted is 0: 836 * => free allocated memory (aio_lio_t). 837 * b) number of fsync requests submitted is > 0: 838 * => the last worker executing the fsync request 839 * will free the aio_lio_t struct. 840 */ 841 if (fret == 0) 842 _aio_lio_free(head); 843 return (-1); 844 } 845 return (0); 846 } 847 848 int 849 aio_cancel(int fd, aiocb_t *aiocbp) 850 { 851 aio_req_t *reqp; 852 aio_worker_t *aiowp; 853 int done = 0; 854 int canceled = 0; 855 struct stat buf; 856 857 if (fstat(fd, &buf) < 0) 858 return (-1); 859 860 if (aiocbp != NULL) { 861 if (fd != aiocbp->aio_fildes) { 862 errno = EINVAL; 863 return (-1); 864 } 865 if (aiocbp->aio_state == USERAIO) { 866 sig_mutex_lock(&__aio_mutex); 867 reqp = _aio_hash_find(&aiocbp->aio_resultp); 868 if (reqp == NULL) { 869 sig_mutex_unlock(&__aio_mutex); 870 return (AIO_ALLDONE); 871 } 872 aiowp = reqp->req_worker; 873 sig_mutex_lock(&aiowp->work_qlock1); 874 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 875 sig_mutex_unlock(&aiowp->work_qlock1); 876 sig_mutex_unlock(&__aio_mutex); 877 if (done) 878 return (AIO_ALLDONE); 879 if (canceled) 880 return (AIO_CANCELED); 881 return (AIO_NOTCANCELED); 882 } 883 if (aiocbp->aio_state == USERAIO_DONE) 884 return (AIO_ALLDONE); 885 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 886 } 887 888 return (aiocancel_all(fd)); 889 } 890 891 /* 892 * __aio_waitn() cancellation handler. 893 */ 894 /* ARGSUSED */ 895 static void 896 _aio_waitn_cleanup(void *arg) 897 { 898 ASSERT(MUTEX_HELD(&__aio_mutex)); 899 900 /* check for pending aio_waitn() calls */ 901 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING); 902 if (_aio_flags & AIO_LIB_WAITN_PENDING) { 903 _aio_flags &= ~AIO_LIB_WAITN_PENDING; 904 (void) cond_signal(&_aio_waitn_cv); 905 } 906 907 sig_mutex_unlock(&__aio_mutex); 908 } 909 910 /* 911 * aio_waitn can be used to reap the results of several I/O operations that 912 * were submitted asynchronously. The submission of I/Os can be done using 913 * existing POSIX interfaces: lio_listio, aio_write or aio_read. 914 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have 915 * completed and it returns the descriptors for these I/Os in "list". The 916 * maximum size of this list is given by "nent" and the actual number of I/Os 917 * completed is returned in "nwait". Otherwise aio_waitn might also 918 * return if the timeout expires. Additionally, aio_waitn returns 0 if 919 * successful or -1 if an error occurred. 920 */ 921 static int 922 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo) 923 { 924 int error = 0; 925 uint_t dnwait = 0; /* amount of requests in the waitn-done list */ 926 uint_t kwaitcnt; /* expected "done" requests from kernel */ 927 uint_t knentcnt; /* max. expected "done" requests from kernel */ 928 int uerrno = 0; 929 int kerrno = 0; /* save errno from _kaio() call */ 930 int timedwait = AIO_TIMEOUT_UNDEF; 931 aio_req_t *reqp; 932 timespec_t end; 933 timespec_t twait; /* copy of utimo for internal calculations */ 934 timespec_t *wait = NULL; 935 936 if (nent == 0 || *nwait == 0 || *nwait > nent) { 937 errno = EINVAL; 938 return (-1); 939 } 940 941 /* 942 * Only one running aio_waitn call per process allowed. 943 * Further calls will be blocked here until the running 944 * call finishes. 945 */ 946 947 sig_mutex_lock(&__aio_mutex); 948 949 while (_aio_flags & AIO_LIB_WAITN) { 950 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) { 951 sig_mutex_unlock(&__aio_mutex); 952 *nwait = 0; 953 return (0); 954 } 955 _aio_flags |= AIO_LIB_WAITN_PENDING; 956 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex); 957 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex); 958 pthread_cleanup_pop(0); 959 if (error != 0) { 960 sig_mutex_unlock(&__aio_mutex); 961 *nwait = 0; 962 errno = error; 963 return (-1); 964 } 965 } 966 967 pthread_cleanup_push(_aio_waitn_cleanup, NULL); 968 969 _aio_flags |= AIO_LIB_WAITN; 970 971 if (*nwait >= AIO_WAITN_MAXIOCBS) { 972 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 973 error = -1; 974 dnwait = 0; 975 goto out; 976 } 977 if (timedwait != AIO_TIMEOUT_INDEF) { 978 twait = *utimo; 979 wait = &twait; 980 } 981 } 982 983 /* 984 * If both counters are still set to zero, then only 985 * kernel requests are currently outstanding (raw-I/Os). 986 */ 987 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) { 988 for (;;) { 989 kwaitcnt = *nwait - dnwait; 990 knentcnt = nent - dnwait; 991 if (knentcnt > AIO_WAITN_MAXIOCBS) 992 knentcnt = AIO_WAITN_MAXIOCBS; 993 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 994 995 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 996 sig_mutex_unlock(&__aio_mutex); 997 _cancel_prologue(); 998 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 999 &kwaitcnt, wait); 1000 _cancel_epilogue(); 1001 pthread_cleanup_pop(1); 1002 1003 if (error == 0) { 1004 dnwait += kwaitcnt; 1005 if (dnwait >= *nwait || 1006 *nwait < AIO_WAITN_MAXIOCBS) 1007 break; 1008 if (timedwait == AIO_TIMEOUT_WAIT) { 1009 error = _aio_get_timedelta(&end, wait); 1010 if (error == -1) { 1011 /* timer expired */ 1012 errno = ETIME; 1013 break; 1014 } 1015 } 1016 continue; 1017 } 1018 if (errno == EAGAIN) { 1019 if (dnwait > 0) 1020 error = 0; 1021 break; 1022 } 1023 if (errno == ETIME || errno == EINTR) { 1024 dnwait += kwaitcnt; 1025 break; 1026 } 1027 /* fatal error */ 1028 break; 1029 } 1030 1031 goto out; 1032 } 1033 1034 /* File system I/Os outstanding ... */ 1035 1036 if (timedwait == AIO_TIMEOUT_UNDEF) { 1037 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) { 1038 error = -1; 1039 dnwait = 0; 1040 goto out; 1041 } 1042 if (timedwait != AIO_TIMEOUT_INDEF) { 1043 twait = *utimo; 1044 wait = &twait; 1045 } 1046 } 1047 1048 for (;;) { 1049 uint_t sum_reqs; 1050 1051 /* 1052 * Calculate sum of active non RAW-IO requests (sum_reqs). 1053 * If the expected amount of completed requests (*nwait) is 1054 * greater than the calculated sum (sum_reqs) then 1055 * use _kaio to check pending RAW-IO requests. 1056 */ 1057 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt; 1058 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0; 1059 1060 if (kwaitcnt != 0) { 1061 /* possibly some kernel I/Os outstanding */ 1062 knentcnt = nent - dnwait; 1063 if (knentcnt > AIO_WAITN_MAXIOCBS) 1064 knentcnt = AIO_WAITN_MAXIOCBS; 1065 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt; 1066 1067 _aio_flags |= AIO_WAIT_INPROGRESS; 1068 1069 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex); 1070 sig_mutex_unlock(&__aio_mutex); 1071 _cancel_prologue(); 1072 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt, 1073 &kwaitcnt, wait); 1074 _cancel_epilogue(); 1075 pthread_cleanup_pop(1); 1076 1077 _aio_flags &= ~AIO_WAIT_INPROGRESS; 1078 1079 if (error == 0) { 1080 dnwait += kwaitcnt; 1081 } else { 1082 switch (errno) { 1083 case EINVAL: 1084 case EAGAIN: 1085 /* don't wait for kernel I/Os */ 1086 kerrno = 0; /* ignore _kaio() errno */ 1087 *nwait = _aio_doneq_cnt + 1088 _aio_outstand_cnt + dnwait; 1089 error = 0; 1090 break; 1091 case EINTR: 1092 case ETIME: 1093 /* just scan for completed LIB I/Os */ 1094 dnwait += kwaitcnt; 1095 timedwait = AIO_TIMEOUT_POLL; 1096 kerrno = errno; /* save _kaio() errno */ 1097 error = 0; 1098 break; 1099 default: 1100 kerrno = errno; /* save _kaio() errno */ 1101 break; 1102 } 1103 } 1104 if (error) 1105 break; /* fatal kernel error */ 1106 } 1107 1108 /* check completed FS requests in the "done" queue */ 1109 1110 while (_aio_doneq_cnt && dnwait < nent) { 1111 /* get done requests */ 1112 if ((reqp = _aio_req_remove(NULL)) != NULL) { 1113 (void) _aio_hash_del(reqp->req_resultp); 1114 list[dnwait++] = reqp->req_aiocbp; 1115 _aio_req_mark_done(reqp); 1116 _lio_remove(reqp); 1117 _aio_req_free(reqp); 1118 } 1119 } 1120 1121 if (dnwait >= *nwait) { 1122 /* min. requested amount of completed I/Os satisfied */ 1123 break; 1124 } 1125 if (timedwait == AIO_TIMEOUT_WAIT && 1126 (error = _aio_get_timedelta(&end, wait)) == -1) { 1127 /* timer expired */ 1128 uerrno = ETIME; 1129 break; 1130 } 1131 1132 /* 1133 * If some I/Os are outstanding and we have to wait for them, 1134 * then sleep here. _aiodone() will call _aio_waitn_wakeup() 1135 * to wakeup this thread as soon as the required amount of 1136 * completed I/Os is done. 1137 */ 1138 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) { 1139 /* 1140 * _aio_waitn_wakeup() will wake up this thread when: 1141 * - _aio_waitncnt requests are completed or 1142 * - _aio_outstand_cnt becomes zero. 1143 * sig_cond_reltimedwait() could also return with 1144 * a timeout error (ETIME). 1145 */ 1146 if (*nwait < _aio_outstand_cnt) 1147 _aio_waitncnt = *nwait; 1148 else 1149 _aio_waitncnt = _aio_outstand_cnt; 1150 1151 _aio_flags |= AIO_IO_WAITING; 1152 1153 if (wait) 1154 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv, 1155 &__aio_mutex, wait); 1156 else 1157 uerrno = sig_cond_wait(&_aio_iowait_cv, 1158 &__aio_mutex); 1159 1160 _aio_flags &= ~AIO_IO_WAITING; 1161 1162 if (uerrno == ETIME) { 1163 timedwait = AIO_TIMEOUT_POLL; 1164 continue; 1165 } 1166 if (uerrno != 0) 1167 timedwait = AIO_TIMEOUT_POLL; 1168 } 1169 1170 if (timedwait == AIO_TIMEOUT_POLL) { 1171 /* polling or timer expired */ 1172 break; 1173 } 1174 } 1175 1176 errno = uerrno == 0 ? kerrno : uerrno; 1177 if (errno) 1178 error = -1; 1179 else 1180 error = 0; 1181 1182 out: 1183 *nwait = dnwait; 1184 1185 pthread_cleanup_pop(1); /* drops __aio_mutex */ 1186 1187 return (error); 1188 } 1189 1190 int 1191 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait, 1192 const timespec_t *timeout) 1193 { 1194 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1195 } 1196 1197 void 1198 _aio_waitn_wakeup(void) 1199 { 1200 /* 1201 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that 1202 * it is waiting for completed I/Os. The number of required 1203 * completed I/Os is stored into "_aio_waitncnt". 1204 * aio_waitn() is woken up when 1205 * - there are no further outstanding I/Os 1206 * (_aio_outstand_cnt == 0) or 1207 * - the expected number of I/Os has completed. 1208 * Only one __aio_waitn() function waits for completed I/Os at 1209 * a time. 1210 * 1211 * __aio_suspend() increments "_aio_suscv_cnt" to notify 1212 * _aiodone() that at least one __aio_suspend() call is 1213 * waiting for completed I/Os. 1214 * There could be more than one __aio_suspend() function 1215 * waiting for completed I/Os. Because every function should 1216 * be waiting for different I/Os, _aiodone() has to wake up all 1217 * __aio_suspend() functions each time. 1218 * Every __aio_suspend() function will compare the recently 1219 * completed I/O with its own list. 1220 */ 1221 ASSERT(MUTEX_HELD(&__aio_mutex)); 1222 if (_aio_flags & AIO_IO_WAITING) { 1223 if (_aio_waitncnt > 0) 1224 _aio_waitncnt--; 1225 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 || 1226 _aio_suscv_cnt > 0) 1227 (void) cond_broadcast(&_aio_iowait_cv); 1228 } else { 1229 /* Wake up waiting aio_suspend calls */ 1230 if (_aio_suscv_cnt > 0) 1231 (void) cond_broadcast(&_aio_iowait_cv); 1232 } 1233 } 1234 1235 /* 1236 * timedwait values : 1237 * AIO_TIMEOUT_POLL : polling 1238 * AIO_TIMEOUT_WAIT : timeout 1239 * AIO_TIMEOUT_INDEF : wait indefinitely 1240 */ 1241 static int 1242 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait) 1243 { 1244 struct timeval curtime; 1245 1246 if (utimo) { 1247 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 || 1248 utimo->tv_nsec >= NANOSEC) { 1249 errno = EINVAL; 1250 return (-1); 1251 } 1252 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) { 1253 (void) gettimeofday(&curtime, NULL); 1254 end->tv_sec = utimo->tv_sec + curtime.tv_sec; 1255 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec; 1256 if (end->tv_nsec >= NANOSEC) { 1257 end->tv_nsec -= NANOSEC; 1258 end->tv_sec += 1; 1259 } 1260 *timedwait = AIO_TIMEOUT_WAIT; 1261 } else { 1262 /* polling */ 1263 *timedwait = AIO_TIMEOUT_POLL; 1264 } 1265 } else { 1266 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */ 1267 } 1268 return (0); 1269 } 1270 1271 #if !defined(_LP64) 1272 1273 int 1274 aio_read64(aiocb64_t *aiocbp) 1275 { 1276 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1277 errno = EINVAL; 1278 return (-1); 1279 } 1280 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1281 errno = EBUSY; 1282 return (-1); 1283 } 1284 if (_aio_sigev_thread64(aiocbp) != 0) 1285 return (-1); 1286 aiocbp->aio_lio_opcode = LIO_READ; 1287 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64, 1288 (AIO_KAIO | AIO_NO_DUPS))); 1289 } 1290 1291 int 1292 aio_write64(aiocb64_t *aiocbp) 1293 { 1294 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) { 1295 errno = EINVAL; 1296 return (-1); 1297 } 1298 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1299 errno = EBUSY; 1300 return (-1); 1301 } 1302 if (_aio_sigev_thread64(aiocbp) != 0) 1303 return (-1); 1304 aiocbp->aio_lio_opcode = LIO_WRITE; 1305 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64, 1306 (AIO_KAIO | AIO_NO_DUPS))); 1307 } 1308 1309 int 1310 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list, 1311 int nent, struct sigevent *_RESTRICT_KYWD sigevp) 1312 { 1313 int aio_ufs = 0; 1314 int oerrno = 0; 1315 aio_lio_t *head = NULL; 1316 aiocb64_t *aiocbp; 1317 int state = 0; 1318 int EIOflg = 0; 1319 int rw; 1320 int do_kaio = 0; 1321 int error; 1322 int i; 1323 1324 if (!_kaio_ok) 1325 _kaio_init(); 1326 1327 if (aio_list_max == 0) 1328 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX); 1329 1330 if (nent <= 0 || nent > aio_list_max) { 1331 errno = EINVAL; 1332 return (-1); 1333 } 1334 1335 switch (mode) { 1336 case LIO_WAIT: 1337 state = NOCHECK; 1338 break; 1339 case LIO_NOWAIT: 1340 state = CHECK; 1341 break; 1342 default: 1343 errno = EINVAL; 1344 return (-1); 1345 } 1346 1347 for (i = 0; i < nent; i++) { 1348 if ((aiocbp = list[i]) == NULL) 1349 continue; 1350 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1351 errno = EBUSY; 1352 return (-1); 1353 } 1354 if (_aio_sigev_thread64(aiocbp) != 0) 1355 return (-1); 1356 if (aiocbp->aio_lio_opcode == LIO_NOP) 1357 aiocbp->aio_state = NOCHECK; 1358 else { 1359 aiocbp->aio_state = state; 1360 if (KAIO_SUPPORTED(aiocbp->aio_fildes)) 1361 do_kaio++; 1362 else 1363 aiocbp->aio_resultp.aio_errno = ENOTSUP; 1364 } 1365 } 1366 if (_aio_sigev_thread_init(sigevp) != 0) 1367 return (-1); 1368 1369 if (do_kaio) { 1370 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp); 1371 if (error == 0) 1372 return (0); 1373 oerrno = errno; 1374 } else { 1375 oerrno = errno = ENOTSUP; 1376 error = -1; 1377 } 1378 1379 if (error == -1 && errno == ENOTSUP) { 1380 error = errno = 0; 1381 /* 1382 * If LIO_WAIT, or notification required, allocate a list head. 1383 */ 1384 if (mode == LIO_WAIT || 1385 (sigevp != NULL && 1386 (sigevp->sigev_notify == SIGEV_SIGNAL || 1387 sigevp->sigev_notify == SIGEV_THREAD || 1388 sigevp->sigev_notify == SIGEV_PORT))) 1389 head = _aio_lio_alloc(); 1390 if (head) { 1391 sig_mutex_lock(&head->lio_mutex); 1392 head->lio_mode = mode; 1393 head->lio_largefile = 1; 1394 if (mode == LIO_NOWAIT && sigevp != NULL) { 1395 if (sigevp->sigev_notify == SIGEV_THREAD) { 1396 head->lio_port = sigevp->sigev_signo; 1397 head->lio_event = AIOLIO64; 1398 head->lio_sigevent = sigevp; 1399 head->lio_sigval.sival_ptr = 1400 sigevp->sigev_value.sival_ptr; 1401 } else if (sigevp->sigev_notify == SIGEV_PORT) { 1402 port_notify_t *pn = 1403 sigevp->sigev_value.sival_ptr; 1404 head->lio_port = pn->portnfy_port; 1405 head->lio_event = AIOLIO64; 1406 head->lio_sigevent = sigevp; 1407 head->lio_sigval.sival_ptr = 1408 pn->portnfy_user; 1409 } else { /* SIGEV_SIGNAL */ 1410 head->lio_signo = sigevp->sigev_signo; 1411 head->lio_sigval.sival_ptr = 1412 sigevp->sigev_value.sival_ptr; 1413 } 1414 } 1415 head->lio_nent = head->lio_refcnt = nent; 1416 sig_mutex_unlock(&head->lio_mutex); 1417 } 1418 /* 1419 * find UFS requests, errno == ENOTSUP/EBADFD, 1420 */ 1421 for (i = 0; i < nent; i++) { 1422 if ((aiocbp = list[i]) == NULL || 1423 aiocbp->aio_lio_opcode == LIO_NOP || 1424 (aiocbp->aio_resultp.aio_errno != ENOTSUP && 1425 aiocbp->aio_resultp.aio_errno != EBADFD)) { 1426 if (head) 1427 _lio_list_decr(head); 1428 continue; 1429 } 1430 if (aiocbp->aio_resultp.aio_errno == EBADFD) 1431 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 1432 if (aiocbp->aio_reqprio != 0) { 1433 aiocbp->aio_resultp.aio_errno = EINVAL; 1434 aiocbp->aio_resultp.aio_return = -1; 1435 EIOflg = 1; 1436 if (head) 1437 _lio_list_decr(head); 1438 continue; 1439 } 1440 /* 1441 * submit an AIO request with flags AIO_NO_KAIO 1442 * to avoid the kaio() syscall in _aio_rw() 1443 */ 1444 switch (aiocbp->aio_lio_opcode) { 1445 case LIO_READ: 1446 rw = AIOAREAD64; 1447 break; 1448 case LIO_WRITE: 1449 rw = AIOAWRITE64; 1450 break; 1451 } 1452 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw, 1453 (AIO_NO_KAIO | AIO_NO_DUPS)); 1454 if (error == 0) 1455 aio_ufs++; 1456 else { 1457 if (head) 1458 _lio_list_decr(head); 1459 aiocbp->aio_resultp.aio_errno = error; 1460 EIOflg = 1; 1461 } 1462 } 1463 } 1464 if (EIOflg) { 1465 errno = EIO; 1466 return (-1); 1467 } 1468 if (mode == LIO_WAIT && oerrno == ENOTSUP) { 1469 /* 1470 * call kaio(AIOLIOWAIT) to get all outstanding 1471 * kernel AIO requests 1472 */ 1473 if ((nent - aio_ufs) > 0) 1474 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp); 1475 if (head != NULL && head->lio_nent > 0) { 1476 sig_mutex_lock(&head->lio_mutex); 1477 while (head->lio_refcnt > 0) { 1478 int err; 1479 head->lio_waiting = 1; 1480 pthread_cleanup_push(_lio_listio_cleanup, head); 1481 err = sig_cond_wait(&head->lio_cond_cv, 1482 &head->lio_mutex); 1483 pthread_cleanup_pop(0); 1484 head->lio_waiting = 0; 1485 if (err && head->lio_nent > 0) { 1486 sig_mutex_unlock(&head->lio_mutex); 1487 errno = err; 1488 return (-1); 1489 } 1490 } 1491 sig_mutex_unlock(&head->lio_mutex); 1492 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 1493 _aio_lio_free(head); 1494 for (i = 0; i < nent; i++) { 1495 if ((aiocbp = list[i]) != NULL && 1496 aiocbp->aio_resultp.aio_errno) { 1497 errno = EIO; 1498 return (-1); 1499 } 1500 } 1501 } 1502 return (0); 1503 } 1504 return (error); 1505 } 1506 1507 int 1508 aio_suspend64(const aiocb64_t * const list[], int nent, 1509 const timespec_t *timeout) 1510 { 1511 return (__aio_suspend((void **)list, nent, timeout, 1)); 1512 } 1513 1514 int 1515 aio_error64(const aiocb64_t *aiocbp) 1516 { 1517 const aio_result_t *resultp = &aiocbp->aio_resultp; 1518 int error; 1519 1520 if ((error = resultp->aio_errno) == EINPROGRESS) { 1521 if (aiocbp->aio_state == CHECK) { 1522 /* 1523 * Always do the kaio() call without using the 1524 * KAIO_SUPPORTED() checks because it is not 1525 * mandatory to have a valid fd set in the 1526 * aiocb, only the resultp must be set. 1527 */ 1528 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) { 1529 errno = EINVAL; 1530 return (-1); 1531 } 1532 error = resultp->aio_errno; 1533 } else if (aiocbp->aio_state == CHECKED) { 1534 ((aiocb64_t *)aiocbp)->aio_state = CHECK; 1535 } 1536 } 1537 return (error); 1538 } 1539 1540 ssize_t 1541 aio_return64(aiocb64_t *aiocbp) 1542 { 1543 aio_result_t *resultp = &aiocbp->aio_resultp; 1544 aio_req_t *reqp; 1545 int error; 1546 ssize_t retval; 1547 1548 /* 1549 * The _aiodone() function stores resultp->aio_return before 1550 * storing resultp->aio_errno (with an membar_producer() in 1551 * between). We use membar_consumer() below to ensure proper 1552 * memory ordering between _aiodone() and ourself. 1553 */ 1554 error = resultp->aio_errno; 1555 membar_consumer(); 1556 retval = resultp->aio_return; 1557 1558 /* 1559 * we use this condition to indicate either that 1560 * aio_return() has been called before or should 1561 * not have been called yet. 1562 */ 1563 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) { 1564 errno = error; 1565 return (-1); 1566 } 1567 1568 /* 1569 * Before we return, mark the result as being returned so that later 1570 * calls to aio_return() will return the fact that the result has 1571 * already been returned. 1572 */ 1573 sig_mutex_lock(&__aio_mutex); 1574 /* retest, in case more than one thread actually got in here */ 1575 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) { 1576 sig_mutex_unlock(&__aio_mutex); 1577 errno = EINVAL; 1578 return (-1); 1579 } 1580 resultp->aio_return = -1; 1581 resultp->aio_errno = EINVAL; 1582 if ((reqp = _aio_hash_del(resultp)) == NULL) 1583 sig_mutex_unlock(&__aio_mutex); 1584 else { 1585 aiocbp->aio_state = NOCHECK; 1586 ASSERT(reqp->req_head == NULL); 1587 (void) _aio_req_remove(reqp); 1588 sig_mutex_unlock(&__aio_mutex); 1589 _aio_req_free(reqp); 1590 } 1591 1592 if (retval == -1) 1593 errno = error; 1594 return (retval); 1595 } 1596 1597 static int 1598 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp, 1599 int workerscnt) 1600 { 1601 int i; 1602 int error; 1603 aio_worker_t *next = aiowp; 1604 1605 for (i = 0; i < workerscnt; i++) { 1606 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO); 1607 if (error != 0) { 1608 sig_mutex_lock(&head->lio_mutex); 1609 head->lio_mode = LIO_DESTROY; /* ignore fsync */ 1610 head->lio_nent -= workerscnt - i; 1611 head->lio_refcnt -= workerscnt - i; 1612 sig_mutex_unlock(&head->lio_mutex); 1613 errno = EAGAIN; 1614 return (i); 1615 } 1616 next = next->work_forw; 1617 } 1618 return (i); 1619 } 1620 1621 int 1622 aio_fsync64(int op, aiocb64_t *aiocbp) 1623 { 1624 aio_lio_t *head; 1625 struct stat statb; 1626 int fret; 1627 1628 if (aiocbp == NULL) 1629 return (0); 1630 if (op != O_DSYNC && op != O_SYNC) { 1631 errno = EINVAL; 1632 return (-1); 1633 } 1634 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) { 1635 errno = EBUSY; 1636 return (-1); 1637 } 1638 if (fstat(aiocbp->aio_fildes, &statb) < 0) 1639 return (-1); 1640 if (_aio_sigev_thread64(aiocbp) != 0) 1641 return (-1); 1642 1643 /* 1644 * Kernel aio_fsync() is not supported. 1645 * We force user-level aio_fsync() just 1646 * for the notification side-effect. 1647 */ 1648 if (!__uaio_ok && __uaio_init() == -1) 1649 return (-1); 1650 1651 /* 1652 * The first asynchronous I/O request in the current process will 1653 * create a bunch of workers (via __uaio_init()). If the number 1654 * of workers is zero then the number of pending asynchronous I/O 1655 * requests is zero. In such a case only execute the standard 1656 * fsync(3C) or fdatasync(3RT) as appropriate. 1657 */ 1658 if (__rw_workerscnt == 0) { 1659 if (op == O_DSYNC) 1660 return (__fdsync(aiocbp->aio_fildes, FDSYNC)); 1661 else 1662 return (__fdsync(aiocbp->aio_fildes, FSYNC)); 1663 } 1664 1665 /* 1666 * re-use aio_offset as the op field. 1667 * O_DSYNC - fdatasync() 1668 * O_SYNC - fsync() 1669 */ 1670 aiocbp->aio_offset = op; 1671 aiocbp->aio_lio_opcode = AIOFSYNC; 1672 1673 /* 1674 * Create a list of fsync requests. The worker that 1675 * gets the last request will do the fsync request. 1676 */ 1677 head = _aio_lio_alloc(); 1678 if (head == NULL) { 1679 errno = EAGAIN; 1680 return (-1); 1681 } 1682 head->lio_mode = LIO_FSYNC; 1683 head->lio_nent = head->lio_refcnt = __rw_workerscnt; 1684 head->lio_largefile = 1; 1685 1686 /* 1687 * Insert an fsync request on every worker's queue. 1688 */ 1689 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt); 1690 if (fret != __rw_workerscnt) { 1691 /* 1692 * Fewer fsync requests than workers means that it was 1693 * not possible to submit fsync requests to all workers. 1694 * Actions: 1695 * a) number of fsync requests submitted is 0: 1696 * => free allocated memory (aio_lio_t). 1697 * b) number of fsync requests submitted is > 0: 1698 * => the last worker executing the fsync request 1699 * will free the aio_lio_t struct. 1700 */ 1701 if (fret == 0) 1702 _aio_lio_free(head); 1703 return (-1); 1704 } 1705 return (0); 1706 } 1707 1708 int 1709 aio_cancel64(int fd, aiocb64_t *aiocbp) 1710 { 1711 aio_req_t *reqp; 1712 aio_worker_t *aiowp; 1713 int done = 0; 1714 int canceled = 0; 1715 struct stat buf; 1716 1717 if (fstat(fd, &buf) < 0) 1718 return (-1); 1719 1720 if (aiocbp != NULL) { 1721 if (fd != aiocbp->aio_fildes) { 1722 errno = EINVAL; 1723 return (-1); 1724 } 1725 if (aiocbp->aio_state == USERAIO) { 1726 sig_mutex_lock(&__aio_mutex); 1727 reqp = _aio_hash_find(&aiocbp->aio_resultp); 1728 if (reqp == NULL) { 1729 sig_mutex_unlock(&__aio_mutex); 1730 return (AIO_ALLDONE); 1731 } 1732 aiowp = reqp->req_worker; 1733 sig_mutex_lock(&aiowp->work_qlock1); 1734 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 1735 sig_mutex_unlock(&aiowp->work_qlock1); 1736 sig_mutex_unlock(&__aio_mutex); 1737 if (done) 1738 return (AIO_ALLDONE); 1739 if (canceled) 1740 return (AIO_CANCELED); 1741 return (AIO_NOTCANCELED); 1742 } 1743 if (aiocbp->aio_state == USERAIO_DONE) 1744 return (AIO_ALLDONE); 1745 return ((int)_kaio(AIOCANCEL, fd, aiocbp)); 1746 } 1747 1748 return (aiocancel_all(fd)); 1749 } 1750 1751 int 1752 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait, 1753 const timespec_t *timeout) 1754 { 1755 return (__aio_waitn((void **)list, nent, nwait, timeout)); 1756 } 1757 1758 #endif /* !defined(_LP64) */ 1759