1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * posix_aio.c implements the POSIX async. I/O functions.
29 *
30 * aio_read
31 * aio_write
32 * aio_error
33 * aio_return
34 * aio_suspend
35 * lio_listio
36 * aio_fsync
37 * aio_cancel
38 */
39
40 #include "lint.h"
41 #include "thr_uberdata.h"
42 #include "asyncio.h"
43 #include <atomic.h>
44 #include <sys/file.h>
45 #include <sys/port.h>
46
47 extern int __fdsync(int, int);
48
49 cond_t _aio_waitn_cv = DEFAULTCV; /* wait for end of aio_waitn */
50
51 static int _aio_check_timeout(const timespec_t *, timespec_t *, int *);
52
53 /* defines for timedwait in __aio_waitn() and __aio_suspend() */
54 #define AIO_TIMEOUT_INDEF -1
55 #define AIO_TIMEOUT_POLL 0
56 #define AIO_TIMEOUT_WAIT 1
57 #define AIO_TIMEOUT_UNDEF 2
58
59 /*
60 * List I/O stuff
61 */
62 static void _lio_list_decr(aio_lio_t *);
63 static long aio_list_max = 0;
64
65 int
aio_read(aiocb_t * aiocbp)66 aio_read(aiocb_t *aiocbp)
67 {
68 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
69 errno = EINVAL;
70 return (-1);
71 }
72 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
73 errno = EBUSY;
74 return (-1);
75 }
76 if (_aio_sigev_thread(aiocbp) != 0)
77 return (-1);
78 aiocbp->aio_lio_opcode = LIO_READ;
79 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD,
80 (AIO_KAIO | AIO_NO_DUPS)));
81 }
82
83 int
aio_write(aiocb_t * aiocbp)84 aio_write(aiocb_t *aiocbp)
85 {
86 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
87 errno = EINVAL;
88 return (-1);
89 }
90 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
91 errno = EBUSY;
92 return (-1);
93 }
94 if (_aio_sigev_thread(aiocbp) != 0)
95 return (-1);
96 aiocbp->aio_lio_opcode = LIO_WRITE;
97 return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE,
98 (AIO_KAIO | AIO_NO_DUPS)));
99 }
100
101 /*
102 * __lio_listio() cancellation handler.
103 */
104 /* ARGSUSED */
105 static void
_lio_listio_cleanup(aio_lio_t * head)106 _lio_listio_cleanup(aio_lio_t *head)
107 {
108 int freeit = 0;
109
110 ASSERT(MUTEX_HELD(&head->lio_mutex));
111 if (head->lio_refcnt == 0) {
112 ASSERT(head->lio_nent == 0);
113 freeit = 1;
114 }
115 head->lio_waiting = 0;
116 sig_mutex_unlock(&head->lio_mutex);
117 if (freeit)
118 _aio_lio_free(head);
119 }
120
121 int
lio_listio(int mode,aiocb_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)122 lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
123 int nent, struct sigevent *_RESTRICT_KYWD sigevp)
124 {
125 int aio_ufs = 0;
126 int oerrno = 0;
127 aio_lio_t *head = NULL;
128 aiocb_t *aiocbp;
129 int state = 0;
130 int EIOflg = 0;
131 int rw;
132 int do_kaio = 0;
133 int error;
134 int i;
135
136 if (!_kaio_ok)
137 _kaio_init();
138
139 if (aio_list_max == 0)
140 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
141
142 if (nent <= 0 || nent > aio_list_max) {
143 errno = EINVAL;
144 return (-1);
145 }
146
147 switch (mode) {
148 case LIO_WAIT:
149 state = NOCHECK;
150 break;
151 case LIO_NOWAIT:
152 state = CHECK;
153 break;
154 default:
155 errno = EINVAL;
156 return (-1);
157 }
158
159 for (i = 0; i < nent; i++) {
160 if ((aiocbp = list[i]) == NULL)
161 continue;
162 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
163 errno = EBUSY;
164 return (-1);
165 }
166 if (_aio_sigev_thread(aiocbp) != 0)
167 return (-1);
168 if (aiocbp->aio_lio_opcode == LIO_NOP)
169 aiocbp->aio_state = NOCHECK;
170 else {
171 aiocbp->aio_state = state;
172 if (KAIO_SUPPORTED(aiocbp->aio_fildes))
173 do_kaio++;
174 else
175 aiocbp->aio_resultp.aio_errno = ENOTSUP;
176 }
177 }
178 if (_aio_sigev_thread_init(sigevp) != 0)
179 return (-1);
180
181 if (do_kaio) {
182 error = (int)_kaio(AIOLIO, mode, list, nent, sigevp);
183 if (error == 0)
184 return (0);
185 oerrno = errno;
186 } else {
187 oerrno = errno = ENOTSUP;
188 error = -1;
189 }
190
191 if (error == -1 && errno == ENOTSUP) {
192 error = errno = 0;
193 /*
194 * If LIO_WAIT, or notification required, allocate a list head.
195 */
196 if (mode == LIO_WAIT ||
197 (sigevp != NULL &&
198 (sigevp->sigev_notify == SIGEV_SIGNAL ||
199 sigevp->sigev_notify == SIGEV_THREAD ||
200 sigevp->sigev_notify == SIGEV_PORT)))
201 head = _aio_lio_alloc();
202 if (head) {
203 sig_mutex_lock(&head->lio_mutex);
204 head->lio_mode = mode;
205 head->lio_largefile = 0;
206 if (mode == LIO_NOWAIT && sigevp != NULL) {
207 if (sigevp->sigev_notify == SIGEV_THREAD) {
208 head->lio_port = sigevp->sigev_signo;
209 head->lio_event = AIOLIO;
210 head->lio_sigevent = sigevp;
211 head->lio_sigval.sival_ptr =
212 sigevp->sigev_value.sival_ptr;
213 } else if (sigevp->sigev_notify == SIGEV_PORT) {
214 port_notify_t *pn =
215 sigevp->sigev_value.sival_ptr;
216 head->lio_port = pn->portnfy_port;
217 head->lio_event = AIOLIO;
218 head->lio_sigevent = sigevp;
219 head->lio_sigval.sival_ptr =
220 pn->portnfy_user;
221 } else { /* SIGEV_SIGNAL */
222 head->lio_signo = sigevp->sigev_signo;
223 head->lio_sigval.sival_ptr =
224 sigevp->sigev_value.sival_ptr;
225 }
226 }
227 head->lio_nent = head->lio_refcnt = nent;
228 sig_mutex_unlock(&head->lio_mutex);
229 }
230 /*
231 * find UFS requests, errno == ENOTSUP/EBADFD,
232 */
233 for (i = 0; i < nent; i++) {
234 if ((aiocbp = list[i]) == NULL ||
235 aiocbp->aio_lio_opcode == LIO_NOP ||
236 (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
237 aiocbp->aio_resultp.aio_errno != EBADFD)) {
238 if (head)
239 _lio_list_decr(head);
240 continue;
241 }
242 if (aiocbp->aio_resultp.aio_errno == EBADFD)
243 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
244 if (aiocbp->aio_reqprio != 0) {
245 aiocbp->aio_resultp.aio_errno = EINVAL;
246 aiocbp->aio_resultp.aio_return = -1;
247 EIOflg = 1;
248 if (head)
249 _lio_list_decr(head);
250 continue;
251 }
252 /*
253 * submit an AIO request with flags AIO_NO_KAIO
254 * to avoid the kaio() syscall in _aio_rw()
255 */
256 switch (aiocbp->aio_lio_opcode) {
257 case LIO_READ:
258 rw = AIOAREAD;
259 break;
260 case LIO_WRITE:
261 rw = AIOAWRITE;
262 break;
263 }
264 error = _aio_rw(aiocbp, head, &__nextworker_rw, rw,
265 (AIO_NO_KAIO | AIO_NO_DUPS));
266 if (error == 0)
267 aio_ufs++;
268 else {
269 if (head)
270 _lio_list_decr(head);
271 aiocbp->aio_resultp.aio_errno = error;
272 EIOflg = 1;
273 }
274 }
275 }
276 if (EIOflg) {
277 errno = EIO;
278 return (-1);
279 }
280 if (mode == LIO_WAIT && oerrno == ENOTSUP) {
281 /*
282 * call kaio(AIOLIOWAIT) to get all outstanding
283 * kernel AIO requests
284 */
285 if ((nent - aio_ufs) > 0)
286 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
287 if (head != NULL && head->lio_nent > 0) {
288 sig_mutex_lock(&head->lio_mutex);
289 while (head->lio_refcnt > 0) {
290 int err;
291 head->lio_waiting = 1;
292 pthread_cleanup_push(_lio_listio_cleanup, head);
293 err = sig_cond_wait(&head->lio_cond_cv,
294 &head->lio_mutex);
295 pthread_cleanup_pop(0);
296 head->lio_waiting = 0;
297 if (err && head->lio_nent > 0) {
298 sig_mutex_unlock(&head->lio_mutex);
299 errno = err;
300 return (-1);
301 }
302 }
303 sig_mutex_unlock(&head->lio_mutex);
304 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
305 _aio_lio_free(head);
306 for (i = 0; i < nent; i++) {
307 if ((aiocbp = list[i]) != NULL &&
308 aiocbp->aio_resultp.aio_errno) {
309 errno = EIO;
310 return (-1);
311 }
312 }
313 }
314 return (0);
315 }
316 return (error);
317 }
318
319 static void
_lio_list_decr(aio_lio_t * head)320 _lio_list_decr(aio_lio_t *head)
321 {
322 sig_mutex_lock(&head->lio_mutex);
323 head->lio_nent--;
324 head->lio_refcnt--;
325 sig_mutex_unlock(&head->lio_mutex);
326 }
327
328 /*
329 * __aio_suspend() cancellation handler.
330 */
331 /* ARGSUSED */
332 static void
_aio_suspend_cleanup(int * counter)333 _aio_suspend_cleanup(int *counter)
334 {
335 ASSERT(MUTEX_HELD(&__aio_mutex));
336 (*counter)--; /* _aio_kernel_suspend or _aio_suscv_cnt */
337 sig_mutex_unlock(&__aio_mutex);
338 }
339
340 static int
__aio_suspend(void ** list,int nent,const timespec_t * timo,int largefile)341 __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile)
342 {
343 int cv_err; /* error code from cond_xxx() */
344 int kerr; /* error code from _kaio(AIOSUSPEND) */
345 int i;
346 timespec_t twait; /* copy of timo for internal calculations */
347 timespec_t *wait = NULL;
348 int timedwait;
349 int req_outstanding;
350 aiocb_t **listp;
351 aiocb_t *aiocbp;
352 #if !defined(_LP64)
353 aiocb64_t **listp64;
354 aiocb64_t *aiocbp64;
355 #endif
356 hrtime_t hrtstart;
357 hrtime_t hrtend;
358 hrtime_t hrtres;
359
360 #if defined(_LP64)
361 if (largefile)
362 aio_panic("__aio_suspend: largefile set when _LP64 defined");
363 #endif
364
365 if (nent <= 0) {
366 errno = EINVAL;
367 return (-1);
368 }
369
370 if (timo) {
371 if (timo->tv_sec < 0 || timo->tv_nsec < 0 ||
372 timo->tv_nsec >= NANOSEC) {
373 errno = EINVAL;
374 return (-1);
375 }
376 /* Initialize start time if time monitoring desired */
377 if (timo->tv_sec > 0 || timo->tv_nsec > 0) {
378 timedwait = AIO_TIMEOUT_WAIT;
379 hrtstart = gethrtime();
380 } else {
381 /* content of timeout = 0 : polling */
382 timedwait = AIO_TIMEOUT_POLL;
383 }
384 } else {
385 /* timeout pointer = NULL : wait indefinitely */
386 timedwait = AIO_TIMEOUT_INDEF;
387 }
388
389 #if !defined(_LP64)
390 if (largefile) {
391 listp64 = (aiocb64_t **)list;
392 for (i = 0; i < nent; i++) {
393 if ((aiocbp64 = listp64[i]) != NULL &&
394 aiocbp64->aio_state == CHECK)
395 aiocbp64->aio_state = CHECKED;
396 }
397 } else
398 #endif /* !_LP64 */
399 {
400 listp = (aiocb_t **)list;
401 for (i = 0; i < nent; i++) {
402 if ((aiocbp = listp[i]) != NULL &&
403 aiocbp->aio_state == CHECK)
404 aiocbp->aio_state = CHECKED;
405 }
406 }
407
408 sig_mutex_lock(&__aio_mutex);
409
410 /*
411 * The next "if -case" is required to accelerate the
412 * access to completed RAW-IO requests.
413 */
414 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
415 /* Only kernel requests pending */
416
417 /*
418 * _aio_kernel_suspend is used to detect completed non RAW-IO
419 * requests.
420 * As long as this thread resides in the kernel (_kaio) further
421 * asynchronous non RAW-IO requests could be submitted.
422 */
423 _aio_kernel_suspend++;
424
425 /*
426 * Always do the kaio() call without using the KAIO_SUPPORTED()
427 * checks because it is not mandatory to have a valid fd
428 * set in the list entries, only the resultp must be set.
429 *
430 * _kaio(AIOSUSPEND ...) return values :
431 * 0: everythink ok, completed request found
432 * -1: error
433 * 1: no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
434 * system call using _kaio(AIONOTIFY). It means, that some
435 * non RAW-IOs completed inbetween.
436 */
437
438 pthread_cleanup_push(_aio_suspend_cleanup,
439 &_aio_kernel_suspend);
440 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
441 sig_mutex_unlock(&__aio_mutex);
442 _cancel_prologue();
443 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
444 list, nent, timo, -1);
445 _cancel_epilogue();
446 pthread_cleanup_pop(1); /* sig_mutex_lock(&__aio_mutex) */
447 pthread_cleanup_pop(0);
448
449 _aio_kernel_suspend--;
450
451 if (!kerr) {
452 sig_mutex_unlock(&__aio_mutex);
453 return (0);
454 }
455 } else {
456 kerr = 1; /* simulation: _kaio detected AIONOTIFY */
457 }
458
459 /*
460 * Return kernel error code if no other IOs are outstanding.
461 */
462 req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt;
463
464 sig_mutex_unlock(&__aio_mutex);
465
466 if (req_outstanding == 0) {
467 /* no IOs outstanding in the thread pool */
468 if (kerr == 1)
469 /* return "no IOs completed" */
470 errno = EAGAIN;
471 return (-1);
472 }
473
474 /*
475 * IOs using the thread pool are outstanding.
476 */
477 if (timedwait == AIO_TIMEOUT_WAIT) {
478 /* time monitoring */
479 hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC +
480 (hrtime_t)timo->tv_nsec;
481 hrtres = hrtend - gethrtime();
482 if (hrtres <= 0)
483 hrtres = 1;
484 twait.tv_sec = hrtres / (hrtime_t)NANOSEC;
485 twait.tv_nsec = hrtres % (hrtime_t)NANOSEC;
486 wait = &twait;
487 } else if (timedwait == AIO_TIMEOUT_POLL) {
488 twait = *timo; /* content of timo = 0 : polling */
489 wait = &twait;
490 }
491
492 for (;;) {
493 int error;
494 int inprogress;
495
496 /* first scan file system requests */
497 inprogress = 0;
498 for (i = 0; i < nent; i++) {
499 #if !defined(_LP64)
500 if (largefile) {
501 if ((aiocbp64 = listp64[i]) == NULL)
502 continue;
503 error = aiocbp64->aio_resultp.aio_errno;
504 } else
505 #endif
506 {
507 if ((aiocbp = listp[i]) == NULL)
508 continue;
509 error = aiocbp->aio_resultp.aio_errno;
510 }
511 if (error == EINPROGRESS)
512 inprogress = 1;
513 else if (error != ECANCELED) {
514 errno = 0;
515 return (0);
516 }
517 }
518
519 sig_mutex_lock(&__aio_mutex);
520
521 /*
522 * If there aren't outstanding I/Os in the thread pool then
523 * we have to return here, provided that all kernel RAW-IOs
524 * also completed.
525 * If the kernel was notified to return, then we have to check
526 * possible pending RAW-IOs.
527 */
528 if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) {
529 sig_mutex_unlock(&__aio_mutex);
530 errno = EAGAIN;
531 break;
532 }
533
534 /*
535 * There are outstanding IOs in the thread pool or the kernel
536 * was notified to return.
537 * Check pending RAW-IOs first.
538 */
539 if (kerr == 1) {
540 /*
541 * _aiodone just notified the kernel about
542 * completed non RAW-IOs (AIONOTIFY was detected).
543 */
544 if (timedwait == AIO_TIMEOUT_WAIT) {
545 /* Update remaining timeout for the kernel */
546 hrtres = hrtend - gethrtime();
547 if (hrtres <= 0) {
548 /* timer expired */
549 sig_mutex_unlock(&__aio_mutex);
550 errno = EAGAIN;
551 break;
552 }
553 wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
554 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
555 }
556 _aio_kernel_suspend++;
557
558 pthread_cleanup_push(_aio_suspend_cleanup,
559 &_aio_kernel_suspend);
560 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
561 sig_mutex_unlock(&__aio_mutex);
562 _cancel_prologue();
563 kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
564 list, nent, wait, -1);
565 _cancel_epilogue();
566 pthread_cleanup_pop(1);
567 pthread_cleanup_pop(0);
568
569 _aio_kernel_suspend--;
570
571 if (!kerr) {
572 sig_mutex_unlock(&__aio_mutex);
573 return (0);
574 }
575 }
576
577 if (timedwait == AIO_TIMEOUT_POLL) {
578 sig_mutex_unlock(&__aio_mutex);
579 errno = EAGAIN;
580 break;
581 }
582
583 if (timedwait == AIO_TIMEOUT_WAIT) {
584 /* Update remaining timeout */
585 hrtres = hrtend - gethrtime();
586 if (hrtres <= 0) {
587 /* timer expired */
588 sig_mutex_unlock(&__aio_mutex);
589 errno = EAGAIN;
590 break;
591 }
592 wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
593 wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
594 }
595
596 if (_aio_outstand_cnt == 0) {
597 sig_mutex_unlock(&__aio_mutex);
598 continue;
599 }
600
601 _aio_suscv_cnt++; /* ID for _aiodone (wake up) */
602
603 pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt);
604 if (timedwait == AIO_TIMEOUT_WAIT) {
605 cv_err = sig_cond_reltimedwait(&_aio_iowait_cv,
606 &__aio_mutex, wait);
607 if (cv_err == ETIME)
608 cv_err = EAGAIN;
609 } else {
610 /* wait indefinitely */
611 cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex);
612 }
613 /* this decrements _aio_suscv_cnt and drops __aio_mutex */
614 pthread_cleanup_pop(1);
615
616 if (cv_err) {
617 errno = cv_err;
618 break;
619 }
620 }
621 return (-1);
622 }
623
624 int
aio_suspend(const aiocb_t * const list[],int nent,const timespec_t * timeout)625 aio_suspend(const aiocb_t * const list[], int nent,
626 const timespec_t *timeout)
627 {
628 return (__aio_suspend((void **)list, nent, timeout, 0));
629 }
630
631 int
aio_error(const aiocb_t * aiocbp)632 aio_error(const aiocb_t *aiocbp)
633 {
634 const aio_result_t *resultp = &aiocbp->aio_resultp;
635 aio_req_t *reqp;
636 int error;
637
638 if ((error = resultp->aio_errno) == EINPROGRESS) {
639 if (aiocbp->aio_state == CHECK) {
640 /*
641 * Always do the kaio() call without using the
642 * KAIO_SUPPORTED() checks because it is not
643 * mandatory to have a valid fd set in the
644 * aiocb, only the resultp must be set.
645 */
646 if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) {
647 errno = EINVAL;
648 return (-1);
649 }
650 error = resultp->aio_errno;
651 } else if (aiocbp->aio_state == CHECKED) {
652 ((aiocb_t *)aiocbp)->aio_state = CHECK;
653 }
654 } else if (aiocbp->aio_state == USERAIO) {
655 sig_mutex_lock(&__aio_mutex);
656 if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) {
657 sig_mutex_unlock(&__aio_mutex);
658 ((aiocb_t *)aiocbp)->aio_state = CHECKED;
659 } else {
660 ((aiocb_t *)aiocbp)->aio_state = NOCHECK;
661 ASSERT(reqp->req_head == NULL);
662 (void) _aio_req_remove(reqp);
663 sig_mutex_unlock(&__aio_mutex);
664 _aio_req_free(reqp);
665 }
666 }
667 return (error);
668 }
669
670 ssize_t
aio_return(aiocb_t * aiocbp)671 aio_return(aiocb_t *aiocbp)
672 {
673 aio_result_t *resultp = &aiocbp->aio_resultp;
674 aio_req_t *reqp;
675 int error;
676 ssize_t retval;
677
678 /*
679 * The _aiodone() function stores resultp->aio_return before
680 * storing resultp->aio_errno (with an membar_producer() in
681 * between). We use membar_consumer() below to ensure proper
682 * memory ordering between _aiodone() and ourself.
683 */
684 error = resultp->aio_errno;
685 membar_consumer();
686 retval = resultp->aio_return;
687
688 /*
689 * we use this condition to indicate either that
690 * aio_return() has been called before or should
691 * not have been called yet.
692 */
693 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
694 errno = error;
695 return (-1);
696 }
697
698 /*
699 * Before we return, mark the result as being returned so that later
700 * calls to aio_return() will return the fact that the result has
701 * already been returned.
702 */
703 sig_mutex_lock(&__aio_mutex);
704 /* retest, in case more than one thread actually got in here */
705 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
706 sig_mutex_unlock(&__aio_mutex);
707 errno = EINVAL;
708 return (-1);
709 }
710 resultp->aio_return = -1;
711 resultp->aio_errno = EINVAL;
712 if ((reqp = _aio_hash_del(resultp)) == NULL)
713 sig_mutex_unlock(&__aio_mutex);
714 else {
715 aiocbp->aio_state = NOCHECK;
716 ASSERT(reqp->req_head == NULL);
717 (void) _aio_req_remove(reqp);
718 sig_mutex_unlock(&__aio_mutex);
719 _aio_req_free(reqp);
720 }
721
722 if (retval == -1)
723 errno = error;
724 return (retval);
725 }
726
727 void
_lio_remove(aio_req_t * reqp)728 _lio_remove(aio_req_t *reqp)
729 {
730 aio_lio_t *head;
731 int refcnt;
732
733 if ((head = reqp->req_head) != NULL) {
734 sig_mutex_lock(&head->lio_mutex);
735 ASSERT(head->lio_refcnt == head->lio_nent);
736 refcnt = --head->lio_nent;
737 head->lio_refcnt--;
738 sig_mutex_unlock(&head->lio_mutex);
739 if (refcnt == 0)
740 _aio_lio_free(head);
741 reqp->req_head = NULL;
742 }
743 }
744
745 /*
746 * This function returns the number of asynchronous I/O requests submitted.
747 */
748 static int
__aio_fsync_bar(aiocb_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)749 __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
750 int workerscnt)
751 {
752 int i;
753 int error;
754 aio_worker_t *next = aiowp;
755
756 for (i = 0; i < workerscnt; i++) {
757 error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
758 if (error != 0) {
759 sig_mutex_lock(&head->lio_mutex);
760 head->lio_mode = LIO_DESTROY; /* ignore fsync */
761 head->lio_nent -= workerscnt - i;
762 head->lio_refcnt -= workerscnt - i;
763 sig_mutex_unlock(&head->lio_mutex);
764 errno = EAGAIN;
765 return (i);
766 }
767 next = next->work_forw;
768 }
769 return (i);
770 }
771
772 int
aio_fsync(int op,aiocb_t * aiocbp)773 aio_fsync(int op, aiocb_t *aiocbp)
774 {
775 aio_lio_t *head;
776 struct stat statb;
777 int fret;
778
779 if (aiocbp == NULL)
780 return (0);
781 if (op != O_DSYNC && op != O_SYNC) {
782 errno = EINVAL;
783 return (-1);
784 }
785 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
786 errno = EBUSY;
787 return (-1);
788 }
789 if (fstat(aiocbp->aio_fildes, &statb) < 0)
790 return (-1);
791 if (_aio_sigev_thread(aiocbp) != 0)
792 return (-1);
793
794 /*
795 * Kernel aio_fsync() is not supported.
796 * We force user-level aio_fsync() just
797 * for the notification side-effect.
798 */
799 if (!__uaio_ok && __uaio_init() == -1)
800 return (-1);
801
802 /*
803 * The first asynchronous I/O request in the current process will
804 * create a bunch of workers (via __uaio_init()). If the number
805 * of workers is zero then the number of pending asynchronous I/O
806 * requests is zero. In such a case only execute the standard
807 * fsync(3C) or fdatasync(3RT) as appropriate.
808 */
809 if (__rw_workerscnt == 0) {
810 if (op == O_DSYNC)
811 return (__fdsync(aiocbp->aio_fildes, FDSYNC));
812 else
813 return (__fdsync(aiocbp->aio_fildes, FSYNC));
814 }
815
816 /*
817 * re-use aio_offset as the op field.
818 * O_DSYNC - fdatasync()
819 * O_SYNC - fsync()
820 */
821 aiocbp->aio_offset = op;
822 aiocbp->aio_lio_opcode = AIOFSYNC;
823
824 /*
825 * Create a list of fsync requests. The worker that
826 * gets the last request will do the fsync request.
827 */
828 head = _aio_lio_alloc();
829 if (head == NULL) {
830 errno = EAGAIN;
831 return (-1);
832 }
833 head->lio_mode = LIO_FSYNC;
834 head->lio_nent = head->lio_refcnt = __rw_workerscnt;
835 head->lio_largefile = 0;
836
837 /*
838 * Insert an fsync request on every worker's queue.
839 */
840 fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt);
841 if (fret != __rw_workerscnt) {
842 /*
843 * Fewer fsync requests than workers means that it was
844 * not possible to submit fsync requests to all workers.
845 * Actions:
846 * a) number of fsync requests submitted is 0:
847 * => free allocated memory (aio_lio_t).
848 * b) number of fsync requests submitted is > 0:
849 * => the last worker executing the fsync request
850 * will free the aio_lio_t struct.
851 */
852 if (fret == 0)
853 _aio_lio_free(head);
854 return (-1);
855 }
856 return (0);
857 }
858
859 int
aio_cancel(int fd,aiocb_t * aiocbp)860 aio_cancel(int fd, aiocb_t *aiocbp)
861 {
862 aio_req_t *reqp;
863 aio_worker_t *aiowp;
864 int done = 0;
865 int canceled = 0;
866 struct stat buf;
867
868 if (fstat(fd, &buf) < 0)
869 return (-1);
870
871 if (aiocbp != NULL) {
872 if (fd != aiocbp->aio_fildes) {
873 errno = EINVAL;
874 return (-1);
875 }
876 if (aiocbp->aio_state == USERAIO) {
877 sig_mutex_lock(&__aio_mutex);
878 reqp = _aio_hash_find(&aiocbp->aio_resultp);
879 if (reqp == NULL) {
880 sig_mutex_unlock(&__aio_mutex);
881 return (AIO_ALLDONE);
882 }
883 aiowp = reqp->req_worker;
884 sig_mutex_lock(&aiowp->work_qlock1);
885 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
886 sig_mutex_unlock(&aiowp->work_qlock1);
887 sig_mutex_unlock(&__aio_mutex);
888 if (done)
889 return (AIO_ALLDONE);
890 if (canceled)
891 return (AIO_CANCELED);
892 return (AIO_NOTCANCELED);
893 }
894 if (aiocbp->aio_state == USERAIO_DONE)
895 return (AIO_ALLDONE);
896 return ((int)_kaio(AIOCANCEL, fd, aiocbp));
897 }
898
899 return (aiocancel_all(fd));
900 }
901
902 /*
903 * __aio_waitn() cancellation handler.
904 */
905 /* ARGSUSED */
906 static void
_aio_waitn_cleanup(void * arg)907 _aio_waitn_cleanup(void *arg)
908 {
909 ASSERT(MUTEX_HELD(&__aio_mutex));
910
911 /* check for pending aio_waitn() calls */
912 _aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING);
913 if (_aio_flags & AIO_LIB_WAITN_PENDING) {
914 _aio_flags &= ~AIO_LIB_WAITN_PENDING;
915 (void) cond_signal(&_aio_waitn_cv);
916 }
917
918 sig_mutex_unlock(&__aio_mutex);
919 }
920
921 /*
922 * aio_waitn can be used to reap the results of several I/O operations that
923 * were submitted asynchronously. The submission of I/Os can be done using
924 * existing POSIX interfaces: lio_listio, aio_write or aio_read.
925 * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have
926 * completed and it returns the descriptors for these I/Os in "list". The
927 * maximum size of this list is given by "nent" and the actual number of I/Os
928 * completed is returned in "nwait". Otherwise aio_waitn might also
929 * return if the timeout expires. Additionally, aio_waitn returns 0 if
930 * successful or -1 if an error occurred.
931 */
932 static int
__aio_waitn(void ** list,uint_t nent,uint_t * nwait,const timespec_t * utimo)933 __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo)
934 {
935 int error = 0;
936 uint_t dnwait = 0; /* amount of requests in the waitn-done list */
937 uint_t kwaitcnt; /* expected "done" requests from kernel */
938 uint_t knentcnt; /* max. expected "done" requests from kernel */
939 int uerrno = 0;
940 int kerrno = 0; /* save errno from _kaio() call */
941 int timedwait = AIO_TIMEOUT_UNDEF;
942 aio_req_t *reqp;
943 timespec_t end;
944 timespec_t twait; /* copy of utimo for internal calculations */
945 timespec_t *wait = NULL;
946
947 if (nent == 0 || *nwait == 0 || *nwait > nent) {
948 errno = EINVAL;
949 return (-1);
950 }
951
952 /*
953 * Only one running aio_waitn call per process allowed.
954 * Further calls will be blocked here until the running
955 * call finishes.
956 */
957
958 sig_mutex_lock(&__aio_mutex);
959
960 while (_aio_flags & AIO_LIB_WAITN) {
961 if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) {
962 sig_mutex_unlock(&__aio_mutex);
963 *nwait = 0;
964 return (0);
965 }
966 _aio_flags |= AIO_LIB_WAITN_PENDING;
967 pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex);
968 error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex);
969 pthread_cleanup_pop(0);
970 if (error != 0) {
971 sig_mutex_unlock(&__aio_mutex);
972 *nwait = 0;
973 errno = error;
974 return (-1);
975 }
976 }
977
978 pthread_cleanup_push(_aio_waitn_cleanup, NULL);
979
980 _aio_flags |= AIO_LIB_WAITN;
981
982 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
983 error = -1;
984 dnwait = 0;
985 goto out;
986 }
987 if (timedwait != AIO_TIMEOUT_INDEF) {
988 twait = *utimo;
989 wait = &twait;
990 }
991
992 /*
993 * If both counters are still set to zero, then only
994 * kernel requests are currently outstanding (raw-I/Os).
995 */
996 if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
997 for (;;) {
998 kwaitcnt = *nwait - dnwait;
999 knentcnt = nent - dnwait;
1000 if (knentcnt > AIO_WAITN_MAXIOCBS)
1001 knentcnt = AIO_WAITN_MAXIOCBS;
1002 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
1003
1004 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
1005 sig_mutex_unlock(&__aio_mutex);
1006 _cancel_prologue();
1007 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
1008 &kwaitcnt, wait);
1009 _cancel_epilogue();
1010 pthread_cleanup_pop(1);
1011
1012 if (error == 0) {
1013 dnwait += kwaitcnt;
1014 if (dnwait >= *nwait ||
1015 *nwait < AIO_WAITN_MAXIOCBS)
1016 break;
1017 if (timedwait == AIO_TIMEOUT_WAIT) {
1018 error = _aio_get_timedelta(&end, wait);
1019 if (error == -1) {
1020 /* timer expired */
1021 errno = ETIME;
1022 break;
1023 }
1024 }
1025 continue;
1026 }
1027 if (errno == EAGAIN) {
1028 if (dnwait > 0)
1029 error = 0;
1030 break;
1031 }
1032 if (errno == ETIME || errno == EINTR) {
1033 dnwait += kwaitcnt;
1034 break;
1035 }
1036 /* fatal error */
1037 break;
1038 }
1039
1040 goto out;
1041 }
1042
1043 /* File system I/Os outstanding ... */
1044
1045 if (timedwait == AIO_TIMEOUT_UNDEF) {
1046 if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
1047 error = -1;
1048 dnwait = 0;
1049 goto out;
1050 }
1051 if (timedwait != AIO_TIMEOUT_INDEF) {
1052 twait = *utimo;
1053 wait = &twait;
1054 }
1055 }
1056
1057 for (;;) {
1058 uint_t sum_reqs;
1059
1060 /*
1061 * Calculate sum of active non RAW-IO requests (sum_reqs).
1062 * If the expected amount of completed requests (*nwait) is
1063 * greater than the calculated sum (sum_reqs) then
1064 * use _kaio to check pending RAW-IO requests.
1065 */
1066 sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt;
1067 kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0;
1068
1069 if (kwaitcnt != 0) {
1070 /* possibly some kernel I/Os outstanding */
1071 knentcnt = nent - dnwait;
1072 if (knentcnt > AIO_WAITN_MAXIOCBS)
1073 knentcnt = AIO_WAITN_MAXIOCBS;
1074 kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
1075
1076 _aio_flags |= AIO_WAIT_INPROGRESS;
1077
1078 pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
1079 sig_mutex_unlock(&__aio_mutex);
1080 _cancel_prologue();
1081 error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
1082 &kwaitcnt, wait);
1083 _cancel_epilogue();
1084 pthread_cleanup_pop(1);
1085
1086 _aio_flags &= ~AIO_WAIT_INPROGRESS;
1087
1088 if (error == 0) {
1089 dnwait += kwaitcnt;
1090 } else {
1091 switch (errno) {
1092 case EINVAL:
1093 case EAGAIN:
1094 /* don't wait for kernel I/Os */
1095 kerrno = 0; /* ignore _kaio() errno */
1096 *nwait = _aio_doneq_cnt +
1097 _aio_outstand_cnt + dnwait;
1098 error = 0;
1099 break;
1100 case EINTR:
1101 case ETIME:
1102 /* just scan for completed LIB I/Os */
1103 dnwait += kwaitcnt;
1104 timedwait = AIO_TIMEOUT_POLL;
1105 kerrno = errno; /* save _kaio() errno */
1106 error = 0;
1107 break;
1108 default:
1109 kerrno = errno; /* save _kaio() errno */
1110 break;
1111 }
1112 }
1113 if (error)
1114 break; /* fatal kernel error */
1115 }
1116
1117 /* check completed FS requests in the "done" queue */
1118
1119 while (_aio_doneq_cnt && dnwait < nent) {
1120 /* get done requests */
1121 if ((reqp = _aio_req_remove(NULL)) != NULL) {
1122 (void) _aio_hash_del(reqp->req_resultp);
1123 list[dnwait++] = reqp->req_aiocbp;
1124 _aio_req_mark_done(reqp);
1125 _lio_remove(reqp);
1126 _aio_req_free(reqp);
1127 }
1128 }
1129
1130 if (dnwait >= *nwait) {
1131 /* min. requested amount of completed I/Os satisfied */
1132 break;
1133 }
1134 if (timedwait == AIO_TIMEOUT_WAIT &&
1135 (error = _aio_get_timedelta(&end, wait)) == -1) {
1136 /* timer expired */
1137 uerrno = ETIME;
1138 break;
1139 }
1140
1141 /*
1142 * If some I/Os are outstanding and we have to wait for them,
1143 * then sleep here. _aiodone() will call _aio_waitn_wakeup()
1144 * to wakeup this thread as soon as the required amount of
1145 * completed I/Os is done.
1146 */
1147 if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) {
1148 /*
1149 * _aio_waitn_wakeup() will wake up this thread when:
1150 * - _aio_waitncnt requests are completed or
1151 * - _aio_outstand_cnt becomes zero.
1152 * sig_cond_reltimedwait() could also return with
1153 * a timeout error (ETIME).
1154 */
1155 if (*nwait < _aio_outstand_cnt)
1156 _aio_waitncnt = *nwait;
1157 else
1158 _aio_waitncnt = _aio_outstand_cnt;
1159
1160 _aio_flags |= AIO_IO_WAITING;
1161
1162 if (wait)
1163 uerrno = sig_cond_reltimedwait(&_aio_iowait_cv,
1164 &__aio_mutex, wait);
1165 else
1166 uerrno = sig_cond_wait(&_aio_iowait_cv,
1167 &__aio_mutex);
1168
1169 _aio_flags &= ~AIO_IO_WAITING;
1170
1171 if (uerrno == ETIME) {
1172 timedwait = AIO_TIMEOUT_POLL;
1173 continue;
1174 }
1175 if (uerrno != 0)
1176 timedwait = AIO_TIMEOUT_POLL;
1177 }
1178
1179 if (timedwait == AIO_TIMEOUT_POLL) {
1180 /* polling or timer expired */
1181 break;
1182 }
1183 }
1184
1185 errno = uerrno == 0 ? kerrno : uerrno;
1186 if (errno)
1187 error = -1;
1188 else
1189 error = 0;
1190
1191 out:
1192 *nwait = dnwait;
1193
1194 pthread_cleanup_pop(1); /* drops __aio_mutex */
1195
1196 return (error);
1197 }
1198
1199 int
aio_waitn(aiocb_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)1200 aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait,
1201 const timespec_t *timeout)
1202 {
1203 return (__aio_waitn((void **)list, nent, nwait, timeout));
1204 }
1205
1206 void
_aio_waitn_wakeup(void)1207 _aio_waitn_wakeup(void)
1208 {
1209 /*
1210 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
1211 * it is waiting for completed I/Os. The number of required
1212 * completed I/Os is stored into "_aio_waitncnt".
1213 * aio_waitn() is woken up when
1214 * - there are no further outstanding I/Os
1215 * (_aio_outstand_cnt == 0) or
1216 * - the expected number of I/Os has completed.
1217 * Only one __aio_waitn() function waits for completed I/Os at
1218 * a time.
1219 *
1220 * __aio_suspend() increments "_aio_suscv_cnt" to notify
1221 * _aiodone() that at least one __aio_suspend() call is
1222 * waiting for completed I/Os.
1223 * There could be more than one __aio_suspend() function
1224 * waiting for completed I/Os. Because every function should
1225 * be waiting for different I/Os, _aiodone() has to wake up all
1226 * __aio_suspend() functions each time.
1227 * Every __aio_suspend() function will compare the recently
1228 * completed I/O with its own list.
1229 */
1230 ASSERT(MUTEX_HELD(&__aio_mutex));
1231 if (_aio_flags & AIO_IO_WAITING) {
1232 if (_aio_waitncnt > 0)
1233 _aio_waitncnt--;
1234 if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
1235 _aio_suscv_cnt > 0)
1236 (void) cond_broadcast(&_aio_iowait_cv);
1237 } else {
1238 /* Wake up waiting aio_suspend calls */
1239 if (_aio_suscv_cnt > 0)
1240 (void) cond_broadcast(&_aio_iowait_cv);
1241 }
1242 }
1243
1244 /*
1245 * timedwait values :
1246 * AIO_TIMEOUT_POLL : polling
1247 * AIO_TIMEOUT_WAIT : timeout
1248 * AIO_TIMEOUT_INDEF : wait indefinitely
1249 */
1250 static int
_aio_check_timeout(const timespec_t * utimo,timespec_t * end,int * timedwait)1251 _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait)
1252 {
1253 struct timeval curtime;
1254
1255 if (utimo) {
1256 if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 ||
1257 utimo->tv_nsec >= NANOSEC) {
1258 errno = EINVAL;
1259 return (-1);
1260 }
1261 if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) {
1262 (void) gettimeofday(&curtime, NULL);
1263 end->tv_sec = utimo->tv_sec + curtime.tv_sec;
1264 end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec;
1265 if (end->tv_nsec >= NANOSEC) {
1266 end->tv_nsec -= NANOSEC;
1267 end->tv_sec += 1;
1268 }
1269 *timedwait = AIO_TIMEOUT_WAIT;
1270 } else {
1271 /* polling */
1272 *timedwait = AIO_TIMEOUT_POLL;
1273 }
1274 } else {
1275 *timedwait = AIO_TIMEOUT_INDEF; /* wait indefinitely */
1276 }
1277 return (0);
1278 }
1279
1280 #if !defined(_LP64)
1281
1282 int
aio_read64(aiocb64_t * aiocbp)1283 aio_read64(aiocb64_t *aiocbp)
1284 {
1285 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
1286 errno = EINVAL;
1287 return (-1);
1288 }
1289 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1290 errno = EBUSY;
1291 return (-1);
1292 }
1293 if (_aio_sigev_thread64(aiocbp) != 0)
1294 return (-1);
1295 aiocbp->aio_lio_opcode = LIO_READ;
1296 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64,
1297 (AIO_KAIO | AIO_NO_DUPS)));
1298 }
1299
1300 int
aio_write64(aiocb64_t * aiocbp)1301 aio_write64(aiocb64_t *aiocbp)
1302 {
1303 if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
1304 errno = EINVAL;
1305 return (-1);
1306 }
1307 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1308 errno = EBUSY;
1309 return (-1);
1310 }
1311 if (_aio_sigev_thread64(aiocbp) != 0)
1312 return (-1);
1313 aiocbp->aio_lio_opcode = LIO_WRITE;
1314 return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64,
1315 (AIO_KAIO | AIO_NO_DUPS)));
1316 }
1317
1318 int
lio_listio64(int mode,aiocb64_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)1319 lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
1320 int nent, struct sigevent *_RESTRICT_KYWD sigevp)
1321 {
1322 int aio_ufs = 0;
1323 int oerrno = 0;
1324 aio_lio_t *head = NULL;
1325 aiocb64_t *aiocbp;
1326 int state = 0;
1327 int EIOflg = 0;
1328 int rw;
1329 int do_kaio = 0;
1330 int error;
1331 int i;
1332
1333 if (!_kaio_ok)
1334 _kaio_init();
1335
1336 if (aio_list_max == 0)
1337 aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
1338
1339 if (nent <= 0 || nent > aio_list_max) {
1340 errno = EINVAL;
1341 return (-1);
1342 }
1343
1344 switch (mode) {
1345 case LIO_WAIT:
1346 state = NOCHECK;
1347 break;
1348 case LIO_NOWAIT:
1349 state = CHECK;
1350 break;
1351 default:
1352 errno = EINVAL;
1353 return (-1);
1354 }
1355
1356 for (i = 0; i < nent; i++) {
1357 if ((aiocbp = list[i]) == NULL)
1358 continue;
1359 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1360 errno = EBUSY;
1361 return (-1);
1362 }
1363 if (_aio_sigev_thread64(aiocbp) != 0)
1364 return (-1);
1365 if (aiocbp->aio_lio_opcode == LIO_NOP)
1366 aiocbp->aio_state = NOCHECK;
1367 else {
1368 aiocbp->aio_state = state;
1369 if (KAIO_SUPPORTED(aiocbp->aio_fildes))
1370 do_kaio++;
1371 else
1372 aiocbp->aio_resultp.aio_errno = ENOTSUP;
1373 }
1374 }
1375 if (_aio_sigev_thread_init(sigevp) != 0)
1376 return (-1);
1377
1378 if (do_kaio) {
1379 error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp);
1380 if (error == 0)
1381 return (0);
1382 oerrno = errno;
1383 } else {
1384 oerrno = errno = ENOTSUP;
1385 error = -1;
1386 }
1387
1388 if (error == -1 && errno == ENOTSUP) {
1389 error = errno = 0;
1390 /*
1391 * If LIO_WAIT, or notification required, allocate a list head.
1392 */
1393 if (mode == LIO_WAIT ||
1394 (sigevp != NULL &&
1395 (sigevp->sigev_notify == SIGEV_SIGNAL ||
1396 sigevp->sigev_notify == SIGEV_THREAD ||
1397 sigevp->sigev_notify == SIGEV_PORT)))
1398 head = _aio_lio_alloc();
1399 if (head) {
1400 sig_mutex_lock(&head->lio_mutex);
1401 head->lio_mode = mode;
1402 head->lio_largefile = 1;
1403 if (mode == LIO_NOWAIT && sigevp != NULL) {
1404 if (sigevp->sigev_notify == SIGEV_THREAD) {
1405 head->lio_port = sigevp->sigev_signo;
1406 head->lio_event = AIOLIO64;
1407 head->lio_sigevent = sigevp;
1408 head->lio_sigval.sival_ptr =
1409 sigevp->sigev_value.sival_ptr;
1410 } else if (sigevp->sigev_notify == SIGEV_PORT) {
1411 port_notify_t *pn =
1412 sigevp->sigev_value.sival_ptr;
1413 head->lio_port = pn->portnfy_port;
1414 head->lio_event = AIOLIO64;
1415 head->lio_sigevent = sigevp;
1416 head->lio_sigval.sival_ptr =
1417 pn->portnfy_user;
1418 } else { /* SIGEV_SIGNAL */
1419 head->lio_signo = sigevp->sigev_signo;
1420 head->lio_sigval.sival_ptr =
1421 sigevp->sigev_value.sival_ptr;
1422 }
1423 }
1424 head->lio_nent = head->lio_refcnt = nent;
1425 sig_mutex_unlock(&head->lio_mutex);
1426 }
1427 /*
1428 * find UFS requests, errno == ENOTSUP/EBADFD,
1429 */
1430 for (i = 0; i < nent; i++) {
1431 if ((aiocbp = list[i]) == NULL ||
1432 aiocbp->aio_lio_opcode == LIO_NOP ||
1433 (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
1434 aiocbp->aio_resultp.aio_errno != EBADFD)) {
1435 if (head)
1436 _lio_list_decr(head);
1437 continue;
1438 }
1439 if (aiocbp->aio_resultp.aio_errno == EBADFD)
1440 SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
1441 if (aiocbp->aio_reqprio != 0) {
1442 aiocbp->aio_resultp.aio_errno = EINVAL;
1443 aiocbp->aio_resultp.aio_return = -1;
1444 EIOflg = 1;
1445 if (head)
1446 _lio_list_decr(head);
1447 continue;
1448 }
1449 /*
1450 * submit an AIO request with flags AIO_NO_KAIO
1451 * to avoid the kaio() syscall in _aio_rw()
1452 */
1453 switch (aiocbp->aio_lio_opcode) {
1454 case LIO_READ:
1455 rw = AIOAREAD64;
1456 break;
1457 case LIO_WRITE:
1458 rw = AIOAWRITE64;
1459 break;
1460 }
1461 error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw,
1462 (AIO_NO_KAIO | AIO_NO_DUPS));
1463 if (error == 0)
1464 aio_ufs++;
1465 else {
1466 if (head)
1467 _lio_list_decr(head);
1468 aiocbp->aio_resultp.aio_errno = error;
1469 EIOflg = 1;
1470 }
1471 }
1472 }
1473 if (EIOflg) {
1474 errno = EIO;
1475 return (-1);
1476 }
1477 if (mode == LIO_WAIT && oerrno == ENOTSUP) {
1478 /*
1479 * call kaio(AIOLIOWAIT) to get all outstanding
1480 * kernel AIO requests
1481 */
1482 if ((nent - aio_ufs) > 0)
1483 (void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
1484 if (head != NULL && head->lio_nent > 0) {
1485 sig_mutex_lock(&head->lio_mutex);
1486 while (head->lio_refcnt > 0) {
1487 int err;
1488 head->lio_waiting = 1;
1489 pthread_cleanup_push(_lio_listio_cleanup, head);
1490 err = sig_cond_wait(&head->lio_cond_cv,
1491 &head->lio_mutex);
1492 pthread_cleanup_pop(0);
1493 head->lio_waiting = 0;
1494 if (err && head->lio_nent > 0) {
1495 sig_mutex_unlock(&head->lio_mutex);
1496 errno = err;
1497 return (-1);
1498 }
1499 }
1500 sig_mutex_unlock(&head->lio_mutex);
1501 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
1502 _aio_lio_free(head);
1503 for (i = 0; i < nent; i++) {
1504 if ((aiocbp = list[i]) != NULL &&
1505 aiocbp->aio_resultp.aio_errno) {
1506 errno = EIO;
1507 return (-1);
1508 }
1509 }
1510 }
1511 return (0);
1512 }
1513 return (error);
1514 }
1515
1516 int
aio_suspend64(const aiocb64_t * const list[],int nent,const timespec_t * timeout)1517 aio_suspend64(const aiocb64_t * const list[], int nent,
1518 const timespec_t *timeout)
1519 {
1520 return (__aio_suspend((void **)list, nent, timeout, 1));
1521 }
1522
1523 int
aio_error64(const aiocb64_t * aiocbp)1524 aio_error64(const aiocb64_t *aiocbp)
1525 {
1526 const aio_result_t *resultp = &aiocbp->aio_resultp;
1527 int error;
1528
1529 if ((error = resultp->aio_errno) == EINPROGRESS) {
1530 if (aiocbp->aio_state == CHECK) {
1531 /*
1532 * Always do the kaio() call without using the
1533 * KAIO_SUPPORTED() checks because it is not
1534 * mandatory to have a valid fd set in the
1535 * aiocb, only the resultp must be set.
1536 */
1537 if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) {
1538 errno = EINVAL;
1539 return (-1);
1540 }
1541 error = resultp->aio_errno;
1542 } else if (aiocbp->aio_state == CHECKED) {
1543 ((aiocb64_t *)aiocbp)->aio_state = CHECK;
1544 }
1545 }
1546 return (error);
1547 }
1548
1549 ssize_t
aio_return64(aiocb64_t * aiocbp)1550 aio_return64(aiocb64_t *aiocbp)
1551 {
1552 aio_result_t *resultp = &aiocbp->aio_resultp;
1553 aio_req_t *reqp;
1554 int error;
1555 ssize_t retval;
1556
1557 /*
1558 * The _aiodone() function stores resultp->aio_return before
1559 * storing resultp->aio_errno (with an membar_producer() in
1560 * between). We use membar_consumer() below to ensure proper
1561 * memory ordering between _aiodone() and ourself.
1562 */
1563 error = resultp->aio_errno;
1564 membar_consumer();
1565 retval = resultp->aio_return;
1566
1567 /*
1568 * we use this condition to indicate either that
1569 * aio_return() has been called before or should
1570 * not have been called yet.
1571 */
1572 if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
1573 errno = error;
1574 return (-1);
1575 }
1576
1577 /*
1578 * Before we return, mark the result as being returned so that later
1579 * calls to aio_return() will return the fact that the result has
1580 * already been returned.
1581 */
1582 sig_mutex_lock(&__aio_mutex);
1583 /* retest, in case more than one thread actually got in here */
1584 if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
1585 sig_mutex_unlock(&__aio_mutex);
1586 errno = EINVAL;
1587 return (-1);
1588 }
1589 resultp->aio_return = -1;
1590 resultp->aio_errno = EINVAL;
1591 if ((reqp = _aio_hash_del(resultp)) == NULL)
1592 sig_mutex_unlock(&__aio_mutex);
1593 else {
1594 aiocbp->aio_state = NOCHECK;
1595 ASSERT(reqp->req_head == NULL);
1596 (void) _aio_req_remove(reqp);
1597 sig_mutex_unlock(&__aio_mutex);
1598 _aio_req_free(reqp);
1599 }
1600
1601 if (retval == -1)
1602 errno = error;
1603 return (retval);
1604 }
1605
1606 static int
__aio_fsync_bar64(aiocb64_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)1607 __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
1608 int workerscnt)
1609 {
1610 int i;
1611 int error;
1612 aio_worker_t *next = aiowp;
1613
1614 for (i = 0; i < workerscnt; i++) {
1615 error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
1616 if (error != 0) {
1617 sig_mutex_lock(&head->lio_mutex);
1618 head->lio_mode = LIO_DESTROY; /* ignore fsync */
1619 head->lio_nent -= workerscnt - i;
1620 head->lio_refcnt -= workerscnt - i;
1621 sig_mutex_unlock(&head->lio_mutex);
1622 errno = EAGAIN;
1623 return (i);
1624 }
1625 next = next->work_forw;
1626 }
1627 return (i);
1628 }
1629
1630 int
aio_fsync64(int op,aiocb64_t * aiocbp)1631 aio_fsync64(int op, aiocb64_t *aiocbp)
1632 {
1633 aio_lio_t *head;
1634 struct stat64 statb;
1635 int fret;
1636
1637 if (aiocbp == NULL)
1638 return (0);
1639 if (op != O_DSYNC && op != O_SYNC) {
1640 errno = EINVAL;
1641 return (-1);
1642 }
1643 if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1644 errno = EBUSY;
1645 return (-1);
1646 }
1647 if (fstat64(aiocbp->aio_fildes, &statb) < 0)
1648 return (-1);
1649 if (_aio_sigev_thread64(aiocbp) != 0)
1650 return (-1);
1651
1652 /*
1653 * Kernel aio_fsync() is not supported.
1654 * We force user-level aio_fsync() just
1655 * for the notification side-effect.
1656 */
1657 if (!__uaio_ok && __uaio_init() == -1)
1658 return (-1);
1659
1660 /*
1661 * The first asynchronous I/O request in the current process will
1662 * create a bunch of workers (via __uaio_init()). If the number
1663 * of workers is zero then the number of pending asynchronous I/O
1664 * requests is zero. In such a case only execute the standard
1665 * fsync(3C) or fdatasync(3RT) as appropriate.
1666 */
1667 if (__rw_workerscnt == 0) {
1668 if (op == O_DSYNC)
1669 return (__fdsync(aiocbp->aio_fildes, FDSYNC));
1670 else
1671 return (__fdsync(aiocbp->aio_fildes, FSYNC));
1672 }
1673
1674 /*
1675 * re-use aio_offset as the op field.
1676 * O_DSYNC - fdatasync()
1677 * O_SYNC - fsync()
1678 */
1679 aiocbp->aio_offset = op;
1680 aiocbp->aio_lio_opcode = AIOFSYNC;
1681
1682 /*
1683 * Create a list of fsync requests. The worker that
1684 * gets the last request will do the fsync request.
1685 */
1686 head = _aio_lio_alloc();
1687 if (head == NULL) {
1688 errno = EAGAIN;
1689 return (-1);
1690 }
1691 head->lio_mode = LIO_FSYNC;
1692 head->lio_nent = head->lio_refcnt = __rw_workerscnt;
1693 head->lio_largefile = 1;
1694
1695 /*
1696 * Insert an fsync request on every worker's queue.
1697 */
1698 fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt);
1699 if (fret != __rw_workerscnt) {
1700 /*
1701 * Fewer fsync requests than workers means that it was
1702 * not possible to submit fsync requests to all workers.
1703 * Actions:
1704 * a) number of fsync requests submitted is 0:
1705 * => free allocated memory (aio_lio_t).
1706 * b) number of fsync requests submitted is > 0:
1707 * => the last worker executing the fsync request
1708 * will free the aio_lio_t struct.
1709 */
1710 if (fret == 0)
1711 _aio_lio_free(head);
1712 return (-1);
1713 }
1714 return (0);
1715 }
1716
1717 int
aio_cancel64(int fd,aiocb64_t * aiocbp)1718 aio_cancel64(int fd, aiocb64_t *aiocbp)
1719 {
1720 aio_req_t *reqp;
1721 aio_worker_t *aiowp;
1722 int done = 0;
1723 int canceled = 0;
1724 struct stat64 buf;
1725
1726 if (fstat64(fd, &buf) < 0)
1727 return (-1);
1728
1729 if (aiocbp != NULL) {
1730 if (fd != aiocbp->aio_fildes) {
1731 errno = EINVAL;
1732 return (-1);
1733 }
1734 if (aiocbp->aio_state == USERAIO) {
1735 sig_mutex_lock(&__aio_mutex);
1736 reqp = _aio_hash_find(&aiocbp->aio_resultp);
1737 if (reqp == NULL) {
1738 sig_mutex_unlock(&__aio_mutex);
1739 return (AIO_ALLDONE);
1740 }
1741 aiowp = reqp->req_worker;
1742 sig_mutex_lock(&aiowp->work_qlock1);
1743 (void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
1744 sig_mutex_unlock(&aiowp->work_qlock1);
1745 sig_mutex_unlock(&__aio_mutex);
1746 if (done)
1747 return (AIO_ALLDONE);
1748 if (canceled)
1749 return (AIO_CANCELED);
1750 return (AIO_NOTCANCELED);
1751 }
1752 if (aiocbp->aio_state == USERAIO_DONE)
1753 return (AIO_ALLDONE);
1754 return ((int)_kaio(AIOCANCEL, fd, aiocbp));
1755 }
1756
1757 return (aiocancel_all(fd));
1758 }
1759
1760 int
aio_waitn64(aiocb64_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)1761 aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait,
1762 const timespec_t *timeout)
1763 {
1764 return (__aio_waitn((void **)list, nent, nwait, timeout));
1765 }
1766
1767 #endif /* !defined(_LP64) */
1768