1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * posix_aio.c implements the POSIX async. I/O functions.
29 *
30 * aio_read
31 * aio_write
32 * aio_error
33 * aio_return
34 * aio_suspend
35 * lio_listio
36 * aio_fsync
37 * aio_cancel
38 */
39
40 #include "lint.h"
41 #include "thr_uberdata.h"
42 #include "libc.h"
43 #include "asyncio.h"
44 #include <atomic.h>
45 #include <sys/file.h>
46 #include <sys/port.h>
47
48 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */
49
50 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *);
51
52 /* defines for timedwait in __aio_waitn() and __aio_suspend() */
53 #define AIO_TIMEOUT_INDEF -1
54 #define AIO_TIMEOUT_POLL 0
55 #define AIO_TIMEOUT_WAIT 1
56 #define AIO_TIMEOUT_UNDEF 2
57
58 /*
59 * List I/O stuff
60 */
61 static void _lio_list_decr(aio_lio_t *);
62 static long aio_list_max = 0;
63
64 int
aio_read(aiocb_t * aiocbp)65 aio_read(aiocb_t *aiocbp)
66 {
67 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
68 errno = EINVAL;
69 return (-1);
70 }
71 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
72 errno = EBUSY;
73 return (-1);
74 }
75 if (_aio_sigev_thread(aiocbp) != 0)
76 return (-1);
77 aiocbp->aio_lio_opcode = LIO_READ;
78 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD,
79 (AIO_KAIO | AIO_NO_DUPS)));
80 }
81
82 int
aio_write(aiocb_t * aiocbp)83 aio_write(aiocb_t *aiocbp)
84 {
85 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
86 errno = EINVAL;
87 return (-1);
88 }
89 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
90 errno = EBUSY;
91 return (-1);
92 }
93 if (_aio_sigev_thread(aiocbp) != 0)
94 return (-1);
95 aiocbp->aio_lio_opcode = LIO_WRITE;
96 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE,
97 (AIO_KAIO | AIO_NO_DUPS)));
98 }
99
100 /*
101 * __lio_listio() cancellation handler.
102 */
103 /* ARGSUSED */
104 static void
_lio_listio_cleanup(aio_lio_t * head)105 _lio_listio_cleanup(aio_lio_t *head)
106 {
107 int freeit = 0;
108
109 ASSERT(MUTEX_HELD(&head->lio_mutex));
110 if (head->lio_refcnt == 0) {
111 ASSERT(head->lio_nent == 0);
112 freeit = 1;
113 }
114 head->lio_waiting = 0;
115 sig_mutex_unlock(&head->lio_mutex);
116 if (freeit)
117 _aio_lio_free(head);
118 }
119
120 int
lio_listio(int mode,aiocb_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)121 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
122 int nent, struct sigevent *_RESTRICT_KYWD sigevp)
123 {
124 int aio_ufs = 0;
125 int oerrno = 0;
126 aio_lio_t *head = NULL;
127 aiocb_t *aiocbp;
128 int state = 0;
129 int EIOflg = 0;
130 int rw;
131 int do_kaio = 0;
132 int error;
133 int i;
134
135 if (!_kaio_ok)
136 _kaio_init();
137
138 if (aio_list_max == 0)
139 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
140
141 if (nent <= 0 || nent > aio_list_max) {
142 errno = EINVAL;
143 return (-1);
144 }
145
146 switch (mode) {
147 case LIO_WAIT:
148 state = NOCHECK;
149 break;
150 case LIO_NOWAIT:
151 state = CHECK;
152 break;
153 default:
154 errno = EINVAL;
155 return (-1);
156 }
157
158 for (i = 0; i < nent; i++) {
159 if ((aiocbp = list[i]) == NULL)
160 continue;
161 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
162 errno = EBUSY;
163 return (-1);
164 }
165 if (_aio_sigev_thread(aiocbp) != 0)
166 return (-1);
167 if (aiocbp->aio_lio_opcode == LIO_NOP)
168 aiocbp->aio_state = NOCHECK;
169 else {
170 aiocbp->aio_state = state;
171 if (KAIO_SUPPORTED(aiocbp->aio_fildes))
172 do_kaio++;
173 else
174 aiocbp->aio_resultp.aio_errno = ENOTSUP;
175 }
176 }
177 if (_aio_sigev_thread_init(sigevp) != 0)
178 return (-1);
179
180 if (do_kaio) {
181 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp);
182 if (error == 0)
183 return (0);
184 oerrno = errno;
185 } else {
186 oerrno = errno = ENOTSUP;
187 error = -1;
188 }
189
190 if (error == -1 && errno == ENOTSUP) {
191 error = errno = 0;
192 /*
193 * If LIO_WAIT, or notification required, allocate a list head.
194 */
195 if (mode == LIO_WAIT ||
196 (sigevp != NULL &&
197 (sigevp->sigev_notify == SIGEV_SIGNAL ||
198 sigevp->sigev_notify == SIGEV_THREAD ||
199 sigevp->sigev_notify == SIGEV_PORT)))
200 head = _aio_lio_alloc();
201 if (head) {
202 sig_mutex_lock(&head->lio_mutex);
203 head->lio_mode = mode;
204 head->lio_largefile = 0;
205 if (mode == LIO_NOWAIT && sigevp != NULL) {
206 if (sigevp->sigev_notify == SIGEV_THREAD) {
207 head->lio_port = sigevp->sigev_signo;
208 head->lio_event = AIOLIO;
209 head->lio_sigevent = sigevp;
210 head->lio_sigval.sival_ptr =
211 sigevp->sigev_value.sival_ptr;
212 } else if (sigevp->sigev_notify == SIGEV_PORT) {
213 port_notify_t *pn =
214 sigevp->sigev_value.sival_ptr;
215 head->lio_port = pn->portnfy_port;
216 head->lio_event = AIOLIO;
217 head->lio_sigevent = sigevp;
218 head->lio_sigval.sival_ptr =
219 pn->portnfy_user;
220 } else { /* SIGEV_SIGNAL */
221 head->lio_signo = sigevp->sigev_signo;
222 head->lio_sigval.sival_ptr =
223 sigevp->sigev_value.sival_ptr;
224 }
225 }
226 head->lio_nent = head->lio_refcnt = nent;
227 sig_mutex_unlock(&head->lio_mutex);
228 }
229 /*
230 * find UFS requests, errno == ENOTSUP/EBADFD,
231 */
232 for (i = 0; i < nent; i++) {
233 if ((aiocbp = list[i]) == NULL ||
234 aiocbp->aio_lio_opcode == LIO_NOP ||
235 (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
236 aiocbp->aio_resultp.aio_errno != EBADFD)) {
237 if (head)
238 _lio_list_decr(head);
239 continue;
240 }
241 if (aiocbp->aio_resultp.aio_errno == EBADFD)
242 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
243 if (aiocbp->aio_reqprio != 0) {
244 aiocbp->aio_resultp.aio_errno = EINVAL;
245 aiocbp->aio_resultp.aio_return = -1;
246 EIOflg = 1;
247 if (head)
248 _lio_list_decr(head);
249 continue;
250 }
251 /*
252 * submit an AIO request with flags AIO_NO_KAIO
253 * to avoid the kaio() syscall in _aio_rw()
254 */
255 switch (aiocbp->aio_lio_opcode) {
256 case LIO_READ:
257 rw = AIOAREAD;
258 break;
259 case LIO_WRITE:
260 rw = AIOAWRITE;
261 break;
262 }
263 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw,
264 (AIO_NO_KAIO | AIO_NO_DUPS));
265 if (error == 0)
266 aio_ufs++;
267 else {
268 if (head)
269 _lio_list_decr(head);
270 aiocbp->aio_resultp.aio_errno = error;
271 EIOflg = 1;
272 }
273 }
274 }
275 if (EIOflg) {
276 errno = EIO;
277 return (-1);
278 }
279 if (mode == LIO_WAIT && oerrno == ENOTSUP) {
280 /*
281 * call kaio(AIOLIOWAIT) to get all outstanding
282 * kernel AIO requests
283 */
284 if ((nent - aio_ufs) > 0)
285 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
286 if (head != NULL && head->lio_nent > 0) {
287 sig_mutex_lock(&head->lio_mutex);
288 while (head->lio_refcnt > 0) {
289 int err;
290 head->lio_waiting = 1;
291 pthread_cleanup_push(_lio_listio_cleanup, head);
292 err = sig_cond_wait(&head->lio_cond_cv,
293 &head->lio_mutex);
294 pthread_cleanup_pop(0);
295 head->lio_waiting = 0;
296 if (err && head->lio_nent > 0) {
297 sig_mutex_unlock(&head->lio_mutex);
298 errno = err;
299 return (-1);
300 }
301 }
302 sig_mutex_unlock(&head->lio_mutex);
303 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
304 _aio_lio_free(head);
305 for (i = 0; i < nent; i++) {
306 if ((aiocbp = list[i]) != NULL &&
307 aiocbp->aio_resultp.aio_errno) {
308 errno = EIO;
309 return (-1);
310 }
311 }
312 }
313 return (0);
314 }
315 return (error);
316 }
317
318 static void
_lio_list_decr(aio_lio_t * head)319 _lio_list_decr(aio_lio_t *head)
320 {
321 sig_mutex_lock(&head->lio_mutex);
322 head->lio_nent--;
323 head->lio_refcnt--;
324 sig_mutex_unlock(&head->lio_mutex);
325 }
326
327 /*
328 * __aio_suspend() cancellation handler.
329 */
330 /* ARGSUSED */
331 static void
_aio_suspend_cleanup(int * counter)332 _aio_suspend_cleanup(int *counter)
333 {
334 ASSERT(MUTEX_HELD(&__aio_mutex));
335 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */
336 sig_mutex_unlock(&__aio_mutex);
337 }
338
339 static int
__aio_suspend(void ** list,int nent,const timespec_t * timo,int largefile)340 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile)
341 {
342 int cv_err; /* error code from cond_xxx() */
343 int kerr; /* error code from _kaio(AIOSUSPEND) */
344 int i;
345 timespec_t twait; /* copy of timo for internal calculations */
346 timespec_t *wait = NULL;
347 int timedwait;
348 int req_outstanding;
349 aiocb_t **listp;
350 aiocb_t *aiocbp;
351 #if !defined(_LP64)
352 aiocb64_t **listp64;
353 aiocb64_t *aiocbp64;
354 #endif
355 hrtime_t hrtstart;
356 hrtime_t hrtend;
357 hrtime_t hrtres;
358
359 #if defined(_LP64)
360 if (largefile)
361 aio_panic("__aio_suspend: largefile set when _LP64 defined");
362 #endif
363
364 if (nent <= 0) {
365 errno = EINVAL;
366 return (-1);
367 }
368
369 if (timo) {
370 if (timo->tv_sec < 0 || timo->tv_nsec < 0 ||
371 timo->tv_nsec >= NANOSEC) {
372 errno = EINVAL;
373 return (-1);
374 }
375 /* Initialize start time if time monitoring desired */
376 if (timo->tv_sec > 0 || timo->tv_nsec > 0) {
377 timedwait = AIO_TIMEOUT_WAIT;
378 hrtstart = gethrtime();
379 } else {
380 /* content of timeout = 0 : polling */
381 timedwait = AIO_TIMEOUT_POLL;
382 }
383 } else {
384 /* timeout pointer = NULL : wait indefinitely */
385 timedwait = AIO_TIMEOUT_INDEF;
386 }
387
388 #if !defined(_LP64)
389 if (largefile) {
390 listp64 = (aiocb64_t **)list;
391 for (i = 0; i < nent; i++) {
392 if ((aiocbp64 = listp64[i]) != NULL &&
393 aiocbp64->aio_state == CHECK)
394 aiocbp64->aio_state = CHECKED;
395 }
396 } else
397 #endif /* !_LP64 */
398 {
399 listp = (aiocb_t **)list;
400 for (i = 0; i < nent; i++) {
401 if ((aiocbp = listp[i]) != NULL &&
402 aiocbp->aio_state == CHECK)
403 aiocbp->aio_state = CHECKED;
404 }
405 }
406
407 sig_mutex_lock(&__aio_mutex);
408
409 /*
410 * The next "if -case" is required to accelerate the
411 * access to completed RAW-IO requests.
412 */
413 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
414 /* Only kernel requests pending */
415
416 /*
417 * _aio_kernel_suspend is used to detect completed non RAW-IO
418 * requests.
419 * As long as this thread resides in the kernel (_kaio) further
420 * asynchronous non RAW-IO requests could be submitted.
421 */
422 _aio_kernel_suspend++;
423
424 /*
425 * Always do the kaio() call without using the KAIO_SUPPORTED()
426 * checks because it is not mandatory to have a valid fd
427 * set in the list entries, only the resultp must be set.
428 *
429 * _kaio(AIOSUSPEND ...) return values :
430 * 0: everythink ok, completed request found
431 * -1: error
432 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
433 * system call using _kaio(AIONOTIFY). It means, that some
434 * non RAW-IOs completed inbetween.
435 */
436
437 pthread_cleanup_push(_aio_suspend_cleanup,
438 &_aio_kernel_suspend);
439 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
440 sig_mutex_unlock(&__aio_mutex);
441 _cancel_prologue();
442 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
443 list, nent, timo, -1);
444 _cancel_epilogue();
445 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */
446 pthread_cleanup_pop(0);
447
448 _aio_kernel_suspend--;
449
450 if (!kerr) {
451 sig_mutex_unlock(&__aio_mutex);
452 return (0);
453 }
454 } else {
455 kerr = 1; /* simulation: _kaio detected AIONOTIFY */
456 }
457
458 /*
459 * Return kernel error code if no other IOs are outstanding.
460 */
461 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt;
462
463 sig_mutex_unlock(&__aio_mutex);
464
465 if (req_outstanding == 0) {
466 /* no IOs outstanding in the thread pool */
467 if (kerr == 1)
468 /* return "no IOs completed" */
469 errno = EAGAIN;
470 return (-1);
471 }
472
473 /*
474 * IOs using the thread pool are outstanding.
475 */
476 if (timedwait == AIO_TIMEOUT_WAIT) {
477 /* time monitoring */
478 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC +
479 (hrtime_t)timo->tv_nsec;
480 hrtres = hrtend - gethrtime();
481 if (hrtres <= 0)
482 hrtres = 1;
483 twait.tv_sec = hrtres / (hrtime_t)NANOSEC;
484 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC;
485 wait = &twait;
486 } else if (timedwait == AIO_TIMEOUT_POLL) {
487 twait = *timo; /* content of timo = 0 : polling */
488 wait = &twait;
489 }
490
491 for (;;) {
492 int error;
493 int inprogress;
494
495 /* first scan file system requests */
496 inprogress = 0;
497 for (i = 0; i < nent; i++) {
498 #if !defined(_LP64)
499 if (largefile) {
500 if ((aiocbp64 = listp64[i]) == NULL)
501 continue;
502 error = aiocbp64->aio_resultp.aio_errno;
503 } else
504 #endif
505 {
506 if ((aiocbp = listp[i]) == NULL)
507 continue;
508 error = aiocbp->aio_resultp.aio_errno;
509 }
510 if (error == EINPROGRESS)
511 inprogress = 1;
512 else if (error != ECANCELED) {
513 errno = 0;
514 return (0);
515 }
516 }
517
518 sig_mutex_lock(&__aio_mutex);
519
520 /*
521 * If there aren't outstanding I/Os in the thread pool then
522 * we have to return here, provided that all kernel RAW-IOs
523 * also completed.
524 * If the kernel was notified to return, then we have to check
525 * possible pending RAW-IOs.
526 */
527 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) {
528 sig_mutex_unlock(&__aio_mutex);
529 errno = EAGAIN;
530 break;
531 }
532
533 /*
534 * There are outstanding IOs in the thread pool or the kernel
535 * was notified to return.
536 * Check pending RAW-IOs first.
537 */
538 if (kerr == 1) {
539 /*
540 * _aiodone just notified the kernel about
541 * completed non RAW-IOs (AIONOTIFY was detected).
542 */
543 if (timedwait == AIO_TIMEOUT_WAIT) {
544 /* Update remaining timeout for the kernel */
545 hrtres = hrtend - gethrtime();
546 if (hrtres <= 0) {
547 /* timer expired */
548 sig_mutex_unlock(&__aio_mutex);
549 errno = EAGAIN;
550 break;
551 }
552 wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
553 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
554 }
555 _aio_kernel_suspend++;
556
557 pthread_cleanup_push(_aio_suspend_cleanup,
558 &_aio_kernel_suspend);
559 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
560 sig_mutex_unlock(&__aio_mutex);
561 _cancel_prologue();
562 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
563 list, nent, wait, -1);
564 _cancel_epilogue();
565 pthread_cleanup_pop(1);
566 pthread_cleanup_pop(0);
567
568 _aio_kernel_suspend--;
569
570 if (!kerr) {
571 sig_mutex_unlock(&__aio_mutex);
572 return (0);
573 }
574 }
575
576 if (timedwait == AIO_TIMEOUT_POLL) {
577 sig_mutex_unlock(&__aio_mutex);
578 errno = EAGAIN;
579 break;
580 }
581
582 if (timedwait == AIO_TIMEOUT_WAIT) {
583 /* Update remaining timeout */
584 hrtres = hrtend - gethrtime();
585 if (hrtres <= 0) {
586 /* timer expired */
587 sig_mutex_unlock(&__aio_mutex);
588 errno = EAGAIN;
589 break;
590 }
591 wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
592 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
593 }
594
595 if (_aio_outstand_cnt == 0) {
596 sig_mutex_unlock(&__aio_mutex);
597 continue;
598 }
599
600 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */
601
602 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt);
603 if (timedwait == AIO_TIMEOUT_WAIT) {
604 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv,
605 &__aio_mutex, wait);
606 if (cv_err == ETIME)
607 cv_err = EAGAIN;
608 } else {
609 /* wait indefinitely */
610 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex);
611 }
612 /* this decrements _aio_suscv_cnt and drops __aio_mutex */
613 pthread_cleanup_pop(1);
614
615 if (cv_err) {
616 errno = cv_err;
617 break;
618 }
619 }
620 return (-1);
621 }
622
623 int
aio_suspend(const aiocb_t * const list[],int nent,const timespec_t * timeout)624 aio_suspend(const aiocb_t * const list[], int nent,
625 const timespec_t *timeout)
626 {
627 return (__aio_suspend((void **)list, nent, timeout, 0));
628 }
629
630 int
aio_error(const aiocb_t * aiocbp)631 aio_error(const aiocb_t *aiocbp)
632 {
633 const aio_result_t *resultp = &aiocbp->aio_resultp;
634 aio_req_t *reqp;
635 int error;
636
637 if ((error = resultp->aio_errno) == EINPROGRESS) {
638 if (aiocbp->aio_state == CHECK) {
639 /*
640 * Always do the kaio() call without using the
641 * KAIO_SUPPORTED() checks because it is not
642 * mandatory to have a valid fd set in the
643 * aiocb, only the resultp must be set.
644 */
645 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) {
646 errno = EINVAL;
647 return (-1);
648 }
649 error = resultp->aio_errno;
650 } else if (aiocbp->aio_state == CHECKED) {
651 ((aiocb_t *)aiocbp)->aio_state = CHECK;
652 }
653 } else if (aiocbp->aio_state == USERAIO) {
654 sig_mutex_lock(&__aio_mutex);
655 if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) {
656 sig_mutex_unlock(&__aio_mutex);
657 ((aiocb_t *)aiocbp)->aio_state = CHECKED;
658 } else {
659 ((aiocb_t *)aiocbp)->aio_state = NOCHECK;
660 ASSERT(reqp->req_head == NULL);
661 (void) _aio_req_remove(reqp);
662 sig_mutex_unlock(&__aio_mutex);
663 _aio_req_free(reqp);
664 }
665 }
666 return (error);
667 }
668
669 ssize_t
aio_return(aiocb_t * aiocbp)670 aio_return(aiocb_t *aiocbp)
671 {
672 aio_result_t *resultp = &aiocbp->aio_resultp;
673 aio_req_t *reqp;
674 int error;
675 ssize_t retval;
676
677 /*
678 * The _aiodone() function stores resultp->aio_return before
679 * storing resultp->aio_errno (with an membar_producer() in
680 * between). We use membar_consumer() below to ensure proper
681 * memory ordering between _aiodone() and ourself.
682 */
683 error = resultp->aio_errno;
684 membar_consumer();
685 retval = resultp->aio_return;
686
687 /*
688 * we use this condition to indicate either that
689 * aio_return() has been called before or should
690 * not have been called yet.
691 */
692 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
693 errno = error;
694 return (-1);
695 }
696
697 /*
698 * Before we return, mark the result as being returned so that later
699 * calls to aio_return() will return the fact that the result has
700 * already been returned.
701 */
702 sig_mutex_lock(&__aio_mutex);
703 /* retest, in case more than one thread actually got in here */
704 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
705 sig_mutex_unlock(&__aio_mutex);
706 errno = EINVAL;
707 return (-1);
708 }
709 resultp->aio_return = -1;
710 resultp->aio_errno = EINVAL;
711 if ((reqp = _aio_hash_del(resultp)) == NULL)
712 sig_mutex_unlock(&__aio_mutex);
713 else {
714 aiocbp->aio_state = NOCHECK;
715 ASSERT(reqp->req_head == NULL);
716 (void) _aio_req_remove(reqp);
717 sig_mutex_unlock(&__aio_mutex);
718 _aio_req_free(reqp);
719 }
720
721 if (retval == -1)
722 errno = error;
723 return (retval);
724 }
725
726 void
_lio_remove(aio_req_t * reqp)727 _lio_remove(aio_req_t *reqp)
728 {
729 aio_lio_t *head;
730 int refcnt;
731
732 if ((head = reqp->req_head) != NULL) {
733 sig_mutex_lock(&head->lio_mutex);
734 ASSERT(head->lio_refcnt == head->lio_nent);
735 refcnt = --head->lio_nent;
736 head->lio_refcnt--;
737 sig_mutex_unlock(&head->lio_mutex);
738 if (refcnt == 0)
739 _aio_lio_free(head);
740 reqp->req_head = NULL;
741 }
742 }
743
744 /*
745 * This function returns the number of asynchronous I/O requests submitted.
746 */
747 static int
__aio_fsync_bar(aiocb_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)748 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
749 int workerscnt)
750 {
751 int i;
752 int error;
753 aio_worker_t *next = aiowp;
754
755 for (i = 0; i < workerscnt; i++) {
756 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
757 if (error != 0) {
758 sig_mutex_lock(&head->lio_mutex);
759 head->lio_mode = LIO_DESTROY; /* ignore fsync */
760 head->lio_nent -= workerscnt - i;
761 head->lio_refcnt -= workerscnt - i;
762 sig_mutex_unlock(&head->lio_mutex);
763 errno = EAGAIN;
764 return (i);
765 }
766 next = next->work_forw;
767 }
768 return (i);
769 }
770
771 int
aio_fsync(int op,aiocb_t * aiocbp)772 aio_fsync(int op, aiocb_t *aiocbp)
773 {
774 aio_lio_t *head;
775 struct stat statb;
776 int fret;
777
778 if (aiocbp == NULL)
779 return (0);
780 if (op != O_DSYNC && op != O_SYNC) {
781 errno = EINVAL;
782 return (-1);
783 }
784 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
785 errno = EBUSY;
786 return (-1);
787 }
788 if (fstat(aiocbp->aio_fildes, &statb) < 0)
789 return (-1);
790 if (_aio_sigev_thread(aiocbp) != 0)
791 return (-1);
792
793 /*
794 * Kernel aio_fsync() is not supported.
795 * We force user-level aio_fsync() just
796 * for the notification side-effect.
797 */
798 if (!__uaio_ok && __uaio_init() == -1)
799 return (-1);
800
801 /*
802 * The first asynchronous I/O request in the current process will
803 * create a bunch of workers (via __uaio_init()). If the number
804 * of workers is zero then the number of pending asynchronous I/O
805 * requests is zero. In such a case only execute the standard
806 * fsync(3C) or fdatasync(3RT) as appropriate.
807 */
808 if (__rw_workerscnt == 0) {
809 if (op == O_DSYNC)
810 return (__fdsync(aiocbp->aio_fildes, FDSYNC_DATA));
811 else
812 return (__fdsync(aiocbp->aio_fildes, FDSYNC_FILE));
813 }
814
815 /*
816 * re-use aio_offset as the op field.
817 * O_DSYNC - fdatasync()
818 * O_SYNC - fsync()
819 */
820 aiocbp->aio_offset = op;
821 aiocbp->aio_lio_opcode = AIOFSYNC;
822
823 /*
824 * Create a list of fsync requests. The worker that
825 * gets the last request will do the fsync request.
826 */
827 head = _aio_lio_alloc();
828 if (head == NULL) {
829 errno = EAGAIN;
830 return (-1);
831 }
832 head->lio_mode = LIO_FSYNC;
833 head->lio_nent = head->lio_refcnt = __rw_workerscnt;
834 head->lio_largefile = 0;
835
836 /*
837 * Insert an fsync request on every worker's queue.
838 */
839 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt);
840 if (fret != __rw_workerscnt) {
841 /*
842 * Fewer fsync requests than workers means that it was
843 * not possible to submit fsync requests to all workers.
844 * Actions:
845 * a) number of fsync requests submitted is 0:
846 * => free allocated memory (aio_lio_t).
847 * b) number of fsync requests submitted is > 0:
848 * => the last worker executing the fsync request
849 * will free the aio_lio_t struct.
850 */
851 if (fret == 0)
852 _aio_lio_free(head);
853 return (-1);
854 }
855 return (0);
856 }
857
858 int
aio_cancel(int fd,aiocb_t * aiocbp)859 aio_cancel(int fd, aiocb_t *aiocbp)
860 {
861 aio_req_t *reqp;
862 aio_worker_t *aiowp;
863 int done = 0;
864 int canceled = 0;
865 struct stat buf;
866
867 if (fstat(fd, &buf) < 0)
868 return (-1);
869
870 if (aiocbp != NULL) {
871 if (fd != aiocbp->aio_fildes) {
872 errno = EINVAL;
873 return (-1);
874 }
875 if (aiocbp->aio_state == USERAIO) {
876 sig_mutex_lock(&__aio_mutex);
877 reqp = _aio_hash_find(&aiocbp->aio_resultp);
878 if (reqp == NULL) {
879 sig_mutex_unlock(&__aio_mutex);
880 return (AIO_ALLDONE);
881 }
882 aiowp = reqp->req_worker;
883 sig_mutex_lock(&aiowp->work_qlock1);
884 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
885 sig_mutex_unlock(&aiowp->work_qlock1);
886 sig_mutex_unlock(&__aio_mutex);
887 if (done)
888 return (AIO_ALLDONE);
889 if (canceled)
890 return (AIO_CANCELED);
891 return (AIO_NOTCANCELED);
892 }
893 if (aiocbp->aio_state == USERAIO_DONE)
894 return (AIO_ALLDONE);
895 return ((int)_kaio(AIOCANCEL, fd, aiocbp));
896 }
897
898 return (aiocancel_all(fd));
899 }
900
901 /*
902 * __aio_waitn() cancellation handler.
903 */
904 static void
_aio_waitn_cleanup(void * arg __unused)905 _aio_waitn_cleanup(void *arg __unused)
906 {
907 ASSERT(MUTEX_HELD(&__aio_mutex));
908
909 /* check for pending aio_waitn() calls */
910 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING);
911 if (_aio_flags & AIO_LIB_WAITN_PENDING) {
912 _aio_flags &= ~AIO_LIB_WAITN_PENDING;
913 (void) cond_signal(&_aio_waitn_cv);
914 }
915
916 sig_mutex_unlock(&__aio_mutex);
917 }
918
919 /*
920 * aio_waitn can be used to reap the results of several I/O operations that
921 * were submitted asynchronously. The submission of I/Os can be done using
922 * existing POSIX interfaces: lio_listio, aio_write or aio_read.
923 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have
924 * completed and it returns the descriptors for these I/Os in "list". The
925 * maximum size of this list is given by "nent" and the actual number of I/Os
926 * completed is returned in "nwait". Otherwise aio_waitn might also
927 * return if the timeout expires. Additionally, aio_waitn returns 0 if
928 * successful or -1 if an error occurred.
929 */
930 static int
__aio_waitn(void ** list,uint_t nent,uint_t * nwait,const timespec_t * utimo)931 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo)
932 {
933 int error = 0;
934 uint_t dnwait = 0; /* amount of requests in the waitn-done list */
935 uint_t kwaitcnt; /* expected "done" requests from kernel */
936 uint_t knentcnt; /* max. expected "done" requests from kernel */
937 int uerrno = 0;
938 int kerrno = 0; /* save errno from _kaio() call */
939 int timedwait = AIO_TIMEOUT_UNDEF;
940 aio_req_t *reqp;
941 timespec_t end;
942 timespec_t twait; /* copy of utimo for internal calculations */
943 timespec_t *wait = NULL;
944
945 if (nent == 0 || *nwait == 0 || *nwait > nent) {
946 errno = EINVAL;
947 return (-1);
948 }
949
950 /*
951 * Only one running aio_waitn call per process allowed.
952 * Further calls will be blocked here until the running
953 * call finishes.
954 */
955
956 sig_mutex_lock(&__aio_mutex);
957
958 while (_aio_flags & AIO_LIB_WAITN) {
959 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) {
960 sig_mutex_unlock(&__aio_mutex);
961 *nwait = 0;
962 return (0);
963 }
964 _aio_flags |= AIO_LIB_WAITN_PENDING;
965 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex);
966 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex);
967 pthread_cleanup_pop(0);
968 if (error != 0) {
969 sig_mutex_unlock(&__aio_mutex);
970 *nwait = 0;
971 errno = error;
972 return (-1);
973 }
974 }
975
976 pthread_cleanup_push(_aio_waitn_cleanup, NULL);
977
978 _aio_flags |= AIO_LIB_WAITN;
979
980 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
981 error = -1;
982 dnwait = 0;
983 goto out;
984 }
985 if (timedwait != AIO_TIMEOUT_INDEF) {
986 twait = *utimo;
987 wait = &twait;
988 }
989
990 /*
991 * If both counters are still set to zero, then only
992 * kernel requests are currently outstanding (raw-I/Os).
993 */
994 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
995 for (;;) {
996 kwaitcnt = *nwait - dnwait;
997 knentcnt = nent - dnwait;
998 if (knentcnt > AIO_WAITN_MAXIOCBS)
999 knentcnt = AIO_WAITN_MAXIOCBS;
1000 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
1001
1002 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
1003 sig_mutex_unlock(&__aio_mutex);
1004 _cancel_prologue();
1005 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
1006 &kwaitcnt, wait);
1007 _cancel_epilogue();
1008 pthread_cleanup_pop(1);
1009
1010 if (error == 0) {
1011 dnwait += kwaitcnt;
1012 if (dnwait >= *nwait ||
1013 *nwait < AIO_WAITN_MAXIOCBS)
1014 break;
1015 if (timedwait == AIO_TIMEOUT_WAIT) {
1016 error = _aio_get_timedelta(&end, wait);
1017 if (error == -1) {
1018 /* timer expired */
1019 errno = ETIME;
1020 break;
1021 }
1022 }
1023 continue;
1024 }
1025 if (errno == EAGAIN) {
1026 if (dnwait > 0)
1027 error = 0;
1028 break;
1029 }
1030 if (errno == ETIME || errno == EINTR) {
1031 dnwait += kwaitcnt;
1032 break;
1033 }
1034 /* fatal error */
1035 break;
1036 }
1037
1038 goto out;
1039 }
1040
1041 /* File system I/Os outstanding ... */
1042
1043 if (timedwait == AIO_TIMEOUT_UNDEF) {
1044 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
1045 error = -1;
1046 dnwait = 0;
1047 goto out;
1048 }
1049 if (timedwait != AIO_TIMEOUT_INDEF) {
1050 twait = *utimo;
1051 wait = &twait;
1052 }
1053 }
1054
1055 for (;;) {
1056 uint_t sum_reqs;
1057
1058 /*
1059 * Calculate sum of active non RAW-IO requests (sum_reqs).
1060 * If the expected amount of completed requests (*nwait) is
1061 * greater than the calculated sum (sum_reqs) then
1062 * use _kaio to check pending RAW-IO requests.
1063 */
1064 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt;
1065 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0;
1066
1067 if (kwaitcnt != 0) {
1068 /* possibly some kernel I/Os outstanding */
1069 knentcnt = nent - dnwait;
1070 if (knentcnt > AIO_WAITN_MAXIOCBS)
1071 knentcnt = AIO_WAITN_MAXIOCBS;
1072 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
1073
1074 _aio_flags |= AIO_WAIT_INPROGRESS;
1075
1076 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
1077 sig_mutex_unlock(&__aio_mutex);
1078 _cancel_prologue();
1079 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
1080 &kwaitcnt, wait);
1081 _cancel_epilogue();
1082 pthread_cleanup_pop(1);
1083
1084 _aio_flags &= ~AIO_WAIT_INPROGRESS;
1085
1086 if (error == 0) {
1087 dnwait += kwaitcnt;
1088 } else {
1089 switch (errno) {
1090 case EINVAL:
1091 case EAGAIN:
1092 /* don't wait for kernel I/Os */
1093 kerrno = 0; /* ignore _kaio() errno */
1094 *nwait = _aio_doneq_cnt +
1095 _aio_outstand_cnt + dnwait;
1096 error = 0;
1097 break;
1098 case EINTR:
1099 case ETIME:
1100 /* just scan for completed LIB I/Os */
1101 dnwait += kwaitcnt;
1102 timedwait = AIO_TIMEOUT_POLL;
1103 kerrno = errno; /* save _kaio() errno */
1104 error = 0;
1105 break;
1106 default:
1107 kerrno = errno; /* save _kaio() errno */
1108 break;
1109 }
1110 }
1111 if (error)
1112 break; /* fatal kernel error */
1113 }
1114
1115 /* check completed FS requests in the "done" queue */
1116
1117 while (_aio_doneq_cnt && dnwait < nent) {
1118 /* get done requests */
1119 if ((reqp = _aio_req_remove(NULL)) != NULL) {
1120 (void) _aio_hash_del(reqp->req_resultp);
1121 list[dnwait++] = reqp->req_aiocbp;
1122 _aio_req_mark_done(reqp);
1123 _lio_remove(reqp);
1124 _aio_req_free(reqp);
1125 }
1126 }
1127
1128 if (dnwait >= *nwait) {
1129 /* min. requested amount of completed I/Os satisfied */
1130 break;
1131 }
1132 if (timedwait == AIO_TIMEOUT_WAIT &&
1133 (error = _aio_get_timedelta(&end, wait)) == -1) {
1134 /* timer expired */
1135 uerrno = ETIME;
1136 break;
1137 }
1138
1139 /*
1140 * If some I/Os are outstanding and we have to wait for them,
1141 * then sleep here. _aiodone() will call _aio_waitn_wakeup()
1142 * to wakeup this thread as soon as the required amount of
1143 * completed I/Os is done.
1144 */
1145 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) {
1146 /*
1147 * _aio_waitn_wakeup() will wake up this thread when:
1148 * - _aio_waitncnt requests are completed or
1149 * - _aio_outstand_cnt becomes zero.
1150 * sig_cond_reltimedwait() could also return with
1151 * a timeout error (ETIME).
1152 */
1153 if (*nwait < _aio_outstand_cnt)
1154 _aio_waitncnt = *nwait;
1155 else
1156 _aio_waitncnt = _aio_outstand_cnt;
1157
1158 _aio_flags |= AIO_IO_WAITING;
1159
1160 if (wait)
1161 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv,
1162 &__aio_mutex, wait);
1163 else
1164 uerrno = sig_cond_wait(&_aio_iowait_cv,
1165 &__aio_mutex);
1166
1167 _aio_flags &= ~AIO_IO_WAITING;
1168
1169 if (uerrno == ETIME) {
1170 timedwait = AIO_TIMEOUT_POLL;
1171 continue;
1172 }
1173 if (uerrno != 0)
1174 timedwait = AIO_TIMEOUT_POLL;
1175 }
1176
1177 if (timedwait == AIO_TIMEOUT_POLL) {
1178 /* polling or timer expired */
1179 break;
1180 }
1181 }
1182
1183 errno = uerrno == 0 ? kerrno : uerrno;
1184 if (errno)
1185 error = -1;
1186 else
1187 error = 0;
1188
1189 out:
1190 *nwait = dnwait;
1191
1192 pthread_cleanup_pop(1); /* drops __aio_mutex */
1193
1194 return (error);
1195 }
1196
1197 int
aio_waitn(aiocb_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)1198 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait,
1199 const timespec_t *timeout)
1200 {
1201 return (__aio_waitn((void **)list, nent, nwait, timeout));
1202 }
1203
1204 void
_aio_waitn_wakeup(void)1205 _aio_waitn_wakeup(void)
1206 {
1207 /*
1208 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
1209 * it is waiting for completed I/Os. The number of required
1210 * completed I/Os is stored into "_aio_waitncnt".
1211 * aio_waitn() is woken up when
1212 * - there are no further outstanding I/Os
1213 * (_aio_outstand_cnt == 0) or
1214 * - the expected number of I/Os has completed.
1215 * Only one __aio_waitn() function waits for completed I/Os at
1216 * a time.
1217 *
1218 * __aio_suspend() increments "_aio_suscv_cnt" to notify
1219 * _aiodone() that at least one __aio_suspend() call is
1220 * waiting for completed I/Os.
1221 * There could be more than one __aio_suspend() function
1222 * waiting for completed I/Os. Because every function should
1223 * be waiting for different I/Os, _aiodone() has to wake up all
1224 * __aio_suspend() functions each time.
1225 * Every __aio_suspend() function will compare the recently
1226 * completed I/O with its own list.
1227 */
1228 ASSERT(MUTEX_HELD(&__aio_mutex));
1229 if (_aio_flags & AIO_IO_WAITING) {
1230 if (_aio_waitncnt > 0)
1231 _aio_waitncnt--;
1232 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
1233 _aio_suscv_cnt > 0)
1234 (void) cond_broadcast(&_aio_iowait_cv);
1235 } else {
1236 /* Wake up waiting aio_suspend calls */
1237 if (_aio_suscv_cnt > 0)
1238 (void) cond_broadcast(&_aio_iowait_cv);
1239 }
1240 }
1241
1242 /*
1243 * timedwait values :
1244 * AIO_TIMEOUT_POLL : polling
1245 * AIO_TIMEOUT_WAIT : timeout
1246 * AIO_TIMEOUT_INDEF : wait indefinitely
1247 */
1248 static int
_aio_check_timeout(const timespec_t * utimo,timespec_t * end,int * timedwait)1249 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait)
1250 {
1251 struct timeval curtime;
1252
1253 if (utimo) {
1254 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 ||
1255 utimo->tv_nsec >= NANOSEC) {
1256 errno = EINVAL;
1257 return (-1);
1258 }
1259 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) {
1260 (void) gettimeofday(&curtime, NULL);
1261 end->tv_sec = utimo->tv_sec + curtime.tv_sec;
1262 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec;
1263 if (end->tv_nsec >= NANOSEC) {
1264 end->tv_nsec -= NANOSEC;
1265 end->tv_sec += 1;
1266 }
1267 *timedwait = AIO_TIMEOUT_WAIT;
1268 } else {
1269 /* polling */
1270 *timedwait = AIO_TIMEOUT_POLL;
1271 }
1272 } else {
1273 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */
1274 }
1275 return (0);
1276 }
1277
1278 #if !defined(_LP64)
1279
1280 int
aio_read64(aiocb64_t * aiocbp)1281 aio_read64(aiocb64_t *aiocbp)
1282 {
1283 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
1284 errno = EINVAL;
1285 return (-1);
1286 }
1287 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1288 errno = EBUSY;
1289 return (-1);
1290 }
1291 if (_aio_sigev_thread64(aiocbp) != 0)
1292 return (-1);
1293 aiocbp->aio_lio_opcode = LIO_READ;
1294 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64,
1295 (AIO_KAIO | AIO_NO_DUPS)));
1296 }
1297
1298 int
aio_write64(aiocb64_t * aiocbp)1299 aio_write64(aiocb64_t *aiocbp)
1300 {
1301 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
1302 errno = EINVAL;
1303 return (-1);
1304 }
1305 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1306 errno = EBUSY;
1307 return (-1);
1308 }
1309 if (_aio_sigev_thread64(aiocbp) != 0)
1310 return (-1);
1311 aiocbp->aio_lio_opcode = LIO_WRITE;
1312 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64,
1313 (AIO_KAIO | AIO_NO_DUPS)));
1314 }
1315
1316 int
lio_listio64(int mode,aiocb64_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)1317 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
1318 int nent, struct sigevent *_RESTRICT_KYWD sigevp)
1319 {
1320 int aio_ufs = 0;
1321 int oerrno = 0;
1322 aio_lio_t *head = NULL;
1323 aiocb64_t *aiocbp;
1324 int state = 0;
1325 int EIOflg = 0;
1326 int rw;
1327 int do_kaio = 0;
1328 int error;
1329 int i;
1330
1331 if (!_kaio_ok)
1332 _kaio_init();
1333
1334 if (aio_list_max == 0)
1335 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
1336
1337 if (nent <= 0 || nent > aio_list_max) {
1338 errno = EINVAL;
1339 return (-1);
1340 }
1341
1342 switch (mode) {
1343 case LIO_WAIT:
1344 state = NOCHECK;
1345 break;
1346 case LIO_NOWAIT:
1347 state = CHECK;
1348 break;
1349 default:
1350 errno = EINVAL;
1351 return (-1);
1352 }
1353
1354 for (i = 0; i < nent; i++) {
1355 if ((aiocbp = list[i]) == NULL)
1356 continue;
1357 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1358 errno = EBUSY;
1359 return (-1);
1360 }
1361 if (_aio_sigev_thread64(aiocbp) != 0)
1362 return (-1);
1363 if (aiocbp->aio_lio_opcode == LIO_NOP)
1364 aiocbp->aio_state = NOCHECK;
1365 else {
1366 aiocbp->aio_state = state;
1367 if (KAIO_SUPPORTED(aiocbp->aio_fildes))
1368 do_kaio++;
1369 else
1370 aiocbp->aio_resultp.aio_errno = ENOTSUP;
1371 }
1372 }
1373 if (_aio_sigev_thread_init(sigevp) != 0)
1374 return (-1);
1375
1376 if (do_kaio) {
1377 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp);
1378 if (error == 0)
1379 return (0);
1380 oerrno = errno;
1381 } else {
1382 oerrno = errno = ENOTSUP;
1383 error = -1;
1384 }
1385
1386 if (error == -1 && errno == ENOTSUP) {
1387 error = errno = 0;
1388 /*
1389 * If LIO_WAIT, or notification required, allocate a list head.
1390 */
1391 if (mode == LIO_WAIT ||
1392 (sigevp != NULL &&
1393 (sigevp->sigev_notify == SIGEV_SIGNAL ||
1394 sigevp->sigev_notify == SIGEV_THREAD ||
1395 sigevp->sigev_notify == SIGEV_PORT)))
1396 head = _aio_lio_alloc();
1397 if (head) {
1398 sig_mutex_lock(&head->lio_mutex);
1399 head->lio_mode = mode;
1400 head->lio_largefile = 1;
1401 if (mode == LIO_NOWAIT && sigevp != NULL) {
1402 if (sigevp->sigev_notify == SIGEV_THREAD) {
1403 head->lio_port = sigevp->sigev_signo;
1404 head->lio_event = AIOLIO64;
1405 head->lio_sigevent = sigevp;
1406 head->lio_sigval.sival_ptr =
1407 sigevp->sigev_value.sival_ptr;
1408 } else if (sigevp->sigev_notify == SIGEV_PORT) {
1409 port_notify_t *pn =
1410 sigevp->sigev_value.sival_ptr;
1411 head->lio_port = pn->portnfy_port;
1412 head->lio_event = AIOLIO64;
1413 head->lio_sigevent = sigevp;
1414 head->lio_sigval.sival_ptr =
1415 pn->portnfy_user;
1416 } else { /* SIGEV_SIGNAL */
1417 head->lio_signo = sigevp->sigev_signo;
1418 head->lio_sigval.sival_ptr =
1419 sigevp->sigev_value.sival_ptr;
1420 }
1421 }
1422 head->lio_nent = head->lio_refcnt = nent;
1423 sig_mutex_unlock(&head->lio_mutex);
1424 }
1425 /*
1426 * find UFS requests, errno == ENOTSUP/EBADFD,
1427 */
1428 for (i = 0; i < nent; i++) {
1429 if ((aiocbp = list[i]) == NULL ||
1430 aiocbp->aio_lio_opcode == LIO_NOP ||
1431 (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
1432 aiocbp->aio_resultp.aio_errno != EBADFD)) {
1433 if (head)
1434 _lio_list_decr(head);
1435 continue;
1436 }
1437 if (aiocbp->aio_resultp.aio_errno == EBADFD)
1438 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
1439 if (aiocbp->aio_reqprio != 0) {
1440 aiocbp->aio_resultp.aio_errno = EINVAL;
1441 aiocbp->aio_resultp.aio_return = -1;
1442 EIOflg = 1;
1443 if (head)
1444 _lio_list_decr(head);
1445 continue;
1446 }
1447 /*
1448 * submit an AIO request with flags AIO_NO_KAIO
1449 * to avoid the kaio() syscall in _aio_rw()
1450 */
1451 switch (aiocbp->aio_lio_opcode) {
1452 case LIO_READ:
1453 rw = AIOAREAD64;
1454 break;
1455 case LIO_WRITE:
1456 rw = AIOAWRITE64;
1457 break;
1458 }
1459 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw,
1460 (AIO_NO_KAIO | AIO_NO_DUPS));
1461 if (error == 0)
1462 aio_ufs++;
1463 else {
1464 if (head)
1465 _lio_list_decr(head);
1466 aiocbp->aio_resultp.aio_errno = error;
1467 EIOflg = 1;
1468 }
1469 }
1470 }
1471 if (EIOflg) {
1472 errno = EIO;
1473 return (-1);
1474 }
1475 if (mode == LIO_WAIT && oerrno == ENOTSUP) {
1476 /*
1477 * call kaio(AIOLIOWAIT) to get all outstanding
1478 * kernel AIO requests
1479 */
1480 if ((nent - aio_ufs) > 0)
1481 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
1482 if (head != NULL && head->lio_nent > 0) {
1483 sig_mutex_lock(&head->lio_mutex);
1484 while (head->lio_refcnt > 0) {
1485 int err;
1486 head->lio_waiting = 1;
1487 pthread_cleanup_push(_lio_listio_cleanup, head);
1488 err = sig_cond_wait(&head->lio_cond_cv,
1489 &head->lio_mutex);
1490 pthread_cleanup_pop(0);
1491 head->lio_waiting = 0;
1492 if (err && head->lio_nent > 0) {
1493 sig_mutex_unlock(&head->lio_mutex);
1494 errno = err;
1495 return (-1);
1496 }
1497 }
1498 sig_mutex_unlock(&head->lio_mutex);
1499 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
1500 _aio_lio_free(head);
1501 for (i = 0; i < nent; i++) {
1502 if ((aiocbp = list[i]) != NULL &&
1503 aiocbp->aio_resultp.aio_errno) {
1504 errno = EIO;
1505 return (-1);
1506 }
1507 }
1508 }
1509 return (0);
1510 }
1511 return (error);
1512 }
1513
1514 int
aio_suspend64(const aiocb64_t * const list[],int nent,const timespec_t * timeout)1515 aio_suspend64(const aiocb64_t * const list[], int nent,
1516 const timespec_t *timeout)
1517 {
1518 return (__aio_suspend((void **)list, nent, timeout, 1));
1519 }
1520
1521 int
aio_error64(const aiocb64_t * aiocbp)1522 aio_error64(const aiocb64_t *aiocbp)
1523 {
1524 const aio_result_t *resultp = &aiocbp->aio_resultp;
1525 int error;
1526
1527 if ((error = resultp->aio_errno) == EINPROGRESS) {
1528 if (aiocbp->aio_state == CHECK) {
1529 /*
1530 * Always do the kaio() call without using the
1531 * KAIO_SUPPORTED() checks because it is not
1532 * mandatory to have a valid fd set in the
1533 * aiocb, only the resultp must be set.
1534 */
1535 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) {
1536 errno = EINVAL;
1537 return (-1);
1538 }
1539 error = resultp->aio_errno;
1540 } else if (aiocbp->aio_state == CHECKED) {
1541 ((aiocb64_t *)aiocbp)->aio_state = CHECK;
1542 }
1543 }
1544 return (error);
1545 }
1546
1547 ssize_t
aio_return64(aiocb64_t * aiocbp)1548 aio_return64(aiocb64_t *aiocbp)
1549 {
1550 aio_result_t *resultp = &aiocbp->aio_resultp;
1551 aio_req_t *reqp;
1552 int error;
1553 ssize_t retval;
1554
1555 /*
1556 * The _aiodone() function stores resultp->aio_return before
1557 * storing resultp->aio_errno (with an membar_producer() in
1558 * between). We use membar_consumer() below to ensure proper
1559 * memory ordering between _aiodone() and ourself.
1560 */
1561 error = resultp->aio_errno;
1562 membar_consumer();
1563 retval = resultp->aio_return;
1564
1565 /*
1566 * we use this condition to indicate either that
1567 * aio_return() has been called before or should
1568 * not have been called yet.
1569 */
1570 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
1571 errno = error;
1572 return (-1);
1573 }
1574
1575 /*
1576 * Before we return, mark the result as being returned so that later
1577 * calls to aio_return() will return the fact that the result has
1578 * already been returned.
1579 */
1580 sig_mutex_lock(&__aio_mutex);
1581 /* retest, in case more than one thread actually got in here */
1582 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
1583 sig_mutex_unlock(&__aio_mutex);
1584 errno = EINVAL;
1585 return (-1);
1586 }
1587 resultp->aio_return = -1;
1588 resultp->aio_errno = EINVAL;
1589 if ((reqp = _aio_hash_del(resultp)) == NULL)
1590 sig_mutex_unlock(&__aio_mutex);
1591 else {
1592 aiocbp->aio_state = NOCHECK;
1593 ASSERT(reqp->req_head == NULL);
1594 (void) _aio_req_remove(reqp);
1595 sig_mutex_unlock(&__aio_mutex);
1596 _aio_req_free(reqp);
1597 }
1598
1599 if (retval == -1)
1600 errno = error;
1601 return (retval);
1602 }
1603
1604 static int
__aio_fsync_bar64(aiocb64_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)1605 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
1606 int workerscnt)
1607 {
1608 int i;
1609 int error;
1610 aio_worker_t *next = aiowp;
1611
1612 for (i = 0; i < workerscnt; i++) {
1613 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
1614 if (error != 0) {
1615 sig_mutex_lock(&head->lio_mutex);
1616 head->lio_mode = LIO_DESTROY; /* ignore fsync */
1617 head->lio_nent -= workerscnt - i;
1618 head->lio_refcnt -= workerscnt - i;
1619 sig_mutex_unlock(&head->lio_mutex);
1620 errno = EAGAIN;
1621 return (i);
1622 }
1623 next = next->work_forw;
1624 }
1625 return (i);
1626 }
1627
1628 int
aio_fsync64(int op,aiocb64_t * aiocbp)1629 aio_fsync64(int op, aiocb64_t *aiocbp)
1630 {
1631 aio_lio_t *head;
1632 struct stat64 statb;
1633 int fret;
1634
1635 if (aiocbp == NULL)
1636 return (0);
1637 if (op != O_DSYNC && op != O_SYNC) {
1638 errno = EINVAL;
1639 return (-1);
1640 }
1641 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1642 errno = EBUSY;
1643 return (-1);
1644 }
1645 if (fstat64(aiocbp->aio_fildes, &statb) < 0)
1646 return (-1);
1647 if (_aio_sigev_thread64(aiocbp) != 0)
1648 return (-1);
1649
1650 /*
1651 * Kernel aio_fsync() is not supported.
1652 * We force user-level aio_fsync() just
1653 * for the notification side-effect.
1654 */
1655 if (!__uaio_ok && __uaio_init() == -1)
1656 return (-1);
1657
1658 /*
1659 * The first asynchronous I/O request in the current process will
1660 * create a bunch of workers (via __uaio_init()). If the number
1661 * of workers is zero then the number of pending asynchronous I/O
1662 * requests is zero. In such a case only execute the standard
1663 * fsync(3C) or fdatasync(3RT) as appropriate.
1664 */
1665 if (__rw_workerscnt == 0) {
1666 if (op == O_DSYNC)
1667 return (__fdsync(aiocbp->aio_fildes, FDSYNC_DATA));
1668 else
1669 return (__fdsync(aiocbp->aio_fildes, FDSYNC_FILE));
1670 }
1671
1672 /*
1673 * re-use aio_offset as the op field.
1674 * O_DSYNC - fdatasync()
1675 * O_SYNC - fsync()
1676 */
1677 aiocbp->aio_offset = op;
1678 aiocbp->aio_lio_opcode = AIOFSYNC;
1679
1680 /*
1681 * Create a list of fsync requests. The worker that
1682 * gets the last request will do the fsync request.
1683 */
1684 head = _aio_lio_alloc();
1685 if (head == NULL) {
1686 errno = EAGAIN;
1687 return (-1);
1688 }
1689 head->lio_mode = LIO_FSYNC;
1690 head->lio_nent = head->lio_refcnt = __rw_workerscnt;
1691 head->lio_largefile = 1;
1692
1693 /*
1694 * Insert an fsync request on every worker's queue.
1695 */
1696 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt);
1697 if (fret != __rw_workerscnt) {
1698 /*
1699 * Fewer fsync requests than workers means that it was
1700 * not possible to submit fsync requests to all workers.
1701 * Actions:
1702 * a) number of fsync requests submitted is 0:
1703 * => free allocated memory (aio_lio_t).
1704 * b) number of fsync requests submitted is > 0:
1705 * => the last worker executing the fsync request
1706 * will free the aio_lio_t struct.
1707 */
1708 if (fret == 0)
1709 _aio_lio_free(head);
1710 return (-1);
1711 }
1712 return (0);
1713 }
1714
1715 int
aio_cancel64(int fd,aiocb64_t * aiocbp)1716 aio_cancel64(int fd, aiocb64_t *aiocbp)
1717 {
1718 aio_req_t *reqp;
1719 aio_worker_t *aiowp;
1720 int done = 0;
1721 int canceled = 0;
1722 struct stat64 buf;
1723
1724 if (fstat64(fd, &buf) < 0)
1725 return (-1);
1726
1727 if (aiocbp != NULL) {
1728 if (fd != aiocbp->aio_fildes) {
1729 errno = EINVAL;
1730 return (-1);
1731 }
1732 if (aiocbp->aio_state == USERAIO) {
1733 sig_mutex_lock(&__aio_mutex);
1734 reqp = _aio_hash_find(&aiocbp->aio_resultp);
1735 if (reqp == NULL) {
1736 sig_mutex_unlock(&__aio_mutex);
1737 return (AIO_ALLDONE);
1738 }
1739 aiowp = reqp->req_worker;
1740 sig_mutex_lock(&aiowp->work_qlock1);
1741 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
1742 sig_mutex_unlock(&aiowp->work_qlock1);
1743 sig_mutex_unlock(&__aio_mutex);
1744 if (done)
1745 return (AIO_ALLDONE);
1746 if (canceled)
1747 return (AIO_CANCELED);
1748 return (AIO_NOTCANCELED);
1749 }
1750 if (aiocbp->aio_state == USERAIO_DONE)
1751 return (AIO_ALLDONE);
1752 return ((int)_kaio(AIOCANCEL, fd, aiocbp));
1753 }
1754
1755 return (aiocancel_all(fd));
1756 }
1757
1758 int
aio_waitn64(aiocb64_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)1759 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait,
1760 const timespec_t *timeout)
1761 {
1762 return (__aio_waitn((void **)list, nent, nwait, timeout));
1763 }
1764
1765 #endif /* !defined(_LP64) */
1766