1f841f6adSraf /* 2f841f6adSraf * CDDL HEADER START 3f841f6adSraf * 4f841f6adSraf * The contents of this file are subject to the terms of the 5f841f6adSraf * Common Development and Distribution License (the "License"). 6f841f6adSraf * You may not use this file except in compliance with the License. 7f841f6adSraf * 8f841f6adSraf * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9f841f6adSraf * or http://www.opensolaris.org/os/licensing. 10f841f6adSraf * See the License for the specific language governing permissions 11f841f6adSraf * and limitations under the License. 12f841f6adSraf * 13f841f6adSraf * When distributing Covered Code, include this CDDL HEADER in each 14f841f6adSraf * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15f841f6adSraf * If applicable, add the following below this CDDL HEADER, with the 16f841f6adSraf * fields enclosed by brackets "[]" replaced with your own identifying 17f841f6adSraf * information: Portions Copyright [yyyy] [name of copyright owner] 18f841f6adSraf * 19f841f6adSraf * CDDL HEADER END 20f841f6adSraf */ 21f841f6adSraf 22f841f6adSraf /* 23a574db85Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24f841f6adSraf * Use is subject to license terms. 25f841f6adSraf */ 26f841f6adSraf 27f841f6adSraf #pragma ident "%Z%%M% %I% %E% SMI" 28f841f6adSraf 297257d1b4Sraf #include "lint.h" 30f841f6adSraf #include "thr_uberdata.h" 31f841f6adSraf #include "asyncio.h" 32f841f6adSraf #include <atomic.h> 33f841f6adSraf #include <sys/param.h> 34f841f6adSraf #include <sys/file.h> 35f841f6adSraf #include <sys/port.h> 36f841f6adSraf 37f841f6adSraf static int _aio_hash_insert(aio_result_t *, aio_req_t *); 38f841f6adSraf static aio_req_t *_aio_req_get(aio_worker_t *); 39f841f6adSraf static void _aio_req_add(aio_req_t *, aio_worker_t **, int); 40f841f6adSraf static void _aio_req_del(aio_worker_t *, aio_req_t *, int); 41f841f6adSraf static void _aio_work_done(aio_worker_t *); 42f841f6adSraf static void _aio_enq_doneq(aio_req_t *); 43f841f6adSraf 44f841f6adSraf extern void _aio_lio_free(aio_lio_t *); 45f841f6adSraf 46f841f6adSraf extern int __fdsync(int, int); 474d86dd30Sraf extern int __fcntl(int, int, ...); 48f841f6adSraf extern int _port_dispatch(int, int, int, int, uintptr_t, void *); 49f841f6adSraf 50f841f6adSraf static int _aio_fsync_del(aio_worker_t *, aio_req_t *); 51f841f6adSraf static void _aiodone(aio_req_t *, ssize_t, int); 52f841f6adSraf static void _aio_cancel_work(aio_worker_t *, int, int *, int *); 53f841f6adSraf static void _aio_finish_request(aio_worker_t *, ssize_t, int); 54f841f6adSraf 55f841f6adSraf /* 56f841f6adSraf * switch for kernel async I/O 57f841f6adSraf */ 58f841f6adSraf int _kaio_ok = 0; /* 0 = disabled, 1 = on, -1 = error */ 59f841f6adSraf 60f841f6adSraf /* 61f841f6adSraf * Key for thread-specific data 62f841f6adSraf */ 63f841f6adSraf pthread_key_t _aio_key; 64f841f6adSraf 65f841f6adSraf /* 66f841f6adSraf * Array for determining whether or not a file supports kaio. 67f841f6adSraf * Initialized in _kaio_init(). 68f841f6adSraf */ 69f841f6adSraf uint32_t *_kaio_supported = NULL; 70f841f6adSraf 71f841f6adSraf /* 72f841f6adSraf * workers for read/write requests 73f841f6adSraf * (__aio_mutex lock protects circular linked list of workers) 74f841f6adSraf */ 75f841f6adSraf aio_worker_t *__workers_rw; /* circular list of AIO workers */ 76f841f6adSraf aio_worker_t *__nextworker_rw; /* next worker in list of workers */ 77f841f6adSraf int __rw_workerscnt; /* number of read/write workers */ 78f841f6adSraf 79f841f6adSraf /* 80f841f6adSraf * worker for notification requests. 81f841f6adSraf */ 82f841f6adSraf aio_worker_t *__workers_no; /* circular list of AIO workers */ 83f841f6adSraf aio_worker_t *__nextworker_no; /* next worker in list of workers */ 84f841f6adSraf int __no_workerscnt; /* number of write workers */ 85f841f6adSraf 86f841f6adSraf aio_req_t *_aio_done_tail; /* list of done requests */ 87f841f6adSraf aio_req_t *_aio_done_head; 88f841f6adSraf 89f841f6adSraf mutex_t __aio_initlock = DEFAULTMUTEX; /* makes aio initialization atomic */ 90f841f6adSraf cond_t __aio_initcv = DEFAULTCV; 91f841f6adSraf int __aio_initbusy = 0; 92f841f6adSraf 93f841f6adSraf mutex_t __aio_mutex = DEFAULTMUTEX; /* protects counts, and linked lists */ 94f841f6adSraf cond_t _aio_iowait_cv = DEFAULTCV; /* wait for userland I/Os */ 95f841f6adSraf 96f841f6adSraf pid_t __pid = (pid_t)-1; /* initialize as invalid pid */ 97f841f6adSraf int _sigio_enabled = 0; /* when set, send SIGIO signal */ 98f841f6adSraf 99f841f6adSraf aio_hash_t *_aio_hash; 100f841f6adSraf 101f841f6adSraf aio_req_t *_aio_doneq; /* double linked done queue list */ 102f841f6adSraf 103f841f6adSraf int _aio_donecnt = 0; 104f841f6adSraf int _aio_waitncnt = 0; /* # of requests for aio_waitn */ 105f841f6adSraf int _aio_doneq_cnt = 0; 106f841f6adSraf int _aio_outstand_cnt = 0; /* # of outstanding requests */ 107f841f6adSraf int _kaio_outstand_cnt = 0; /* # of outstanding kaio requests */ 108f841f6adSraf int _aio_req_done_cnt = 0; /* req. done but not in "done queue" */ 109f841f6adSraf int _aio_kernel_suspend = 0; /* active kernel kaio calls */ 110f841f6adSraf int _aio_suscv_cnt = 0; /* aio_suspend calls waiting on cv's */ 111f841f6adSraf 112f841f6adSraf int _max_workers = 256; /* max number of workers permitted */ 113f841f6adSraf int _min_workers = 4; /* min number of workers */ 114f841f6adSraf int _minworkload = 2; /* min number of request in q */ 115f841f6adSraf int _aio_worker_cnt = 0; /* number of workers to do requests */ 116f841f6adSraf int __uaio_ok = 0; /* AIO has been enabled */ 117f841f6adSraf sigset_t _worker_set; /* worker's signal mask */ 118f841f6adSraf 119f841f6adSraf int _aiowait_flag = 0; /* when set, aiowait() is inprogress */ 120f841f6adSraf int _aio_flags = 0; /* see asyncio.h defines for */ 121f841f6adSraf 122f841f6adSraf aio_worker_t *_kaiowp = NULL; /* points to kaio cleanup thread */ 123f841f6adSraf 124f841f6adSraf int hz; /* clock ticks per second */ 125f841f6adSraf 126f841f6adSraf static int 127f841f6adSraf _kaio_supported_init(void) 128f841f6adSraf { 129f841f6adSraf void *ptr; 130f841f6adSraf size_t size; 131f841f6adSraf 132f841f6adSraf if (_kaio_supported != NULL) /* already initialized */ 133f841f6adSraf return (0); 134f841f6adSraf 135f841f6adSraf size = MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t); 136f841f6adSraf ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, 137f841f6adSraf MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 138f841f6adSraf if (ptr == MAP_FAILED) 139f841f6adSraf return (-1); 140f841f6adSraf _kaio_supported = ptr; 141f841f6adSraf return (0); 142f841f6adSraf } 143f841f6adSraf 144f841f6adSraf /* 145f841f6adSraf * The aio subsystem is initialized when an AIO request is made. 146f841f6adSraf * Constants are initialized like the max number of workers that 147f841f6adSraf * the subsystem can create, and the minimum number of workers 148f841f6adSraf * permitted before imposing some restrictions. Also, some 149f841f6adSraf * workers are created. 150f841f6adSraf */ 151f841f6adSraf int 152f841f6adSraf __uaio_init(void) 153f841f6adSraf { 154f841f6adSraf int ret = -1; 155f841f6adSraf int i; 156a574db85Sraf int cancel_state; 157f841f6adSraf 158f841f6adSraf lmutex_lock(&__aio_initlock); 159a574db85Sraf (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state); 160f841f6adSraf while (__aio_initbusy) 161a574db85Sraf (void) cond_wait(&__aio_initcv, &__aio_initlock); 162a574db85Sraf (void) pthread_setcancelstate(cancel_state, NULL); 163f841f6adSraf if (__uaio_ok) { /* already initialized */ 164f841f6adSraf lmutex_unlock(&__aio_initlock); 165f841f6adSraf return (0); 166f841f6adSraf } 167f841f6adSraf __aio_initbusy = 1; 168f841f6adSraf lmutex_unlock(&__aio_initlock); 169f841f6adSraf 170f841f6adSraf hz = (int)sysconf(_SC_CLK_TCK); 171f841f6adSraf __pid = getpid(); 172f841f6adSraf 173f841f6adSraf setup_cancelsig(SIGAIOCANCEL); 174f841f6adSraf 175f841f6adSraf if (_kaio_supported_init() != 0) 176f841f6adSraf goto out; 177f841f6adSraf 178f841f6adSraf /* 179f841f6adSraf * Allocate and initialize the hash table. 180f7499066Ssp92102 * Do this only once, even if __uaio_init() is called twice. 181f841f6adSraf */ 182f7499066Ssp92102 if (_aio_hash == NULL) { 183f841f6adSraf /* LINTED pointer cast */ 184f841f6adSraf _aio_hash = (aio_hash_t *)mmap(NULL, 185f841f6adSraf HASHSZ * sizeof (aio_hash_t), PROT_READ | PROT_WRITE, 186f841f6adSraf MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 187f841f6adSraf if ((void *)_aio_hash == MAP_FAILED) { 188f841f6adSraf _aio_hash = NULL; 189f841f6adSraf goto out; 190f841f6adSraf } 191f841f6adSraf for (i = 0; i < HASHSZ; i++) 192f7499066Ssp92102 (void) mutex_init(&_aio_hash[i].hash_lock, 193f7499066Ssp92102 USYNC_THREAD, NULL); 194f7499066Ssp92102 } 195f841f6adSraf 196f841f6adSraf /* 197f841f6adSraf * Initialize worker's signal mask to only catch SIGAIOCANCEL. 198f841f6adSraf */ 199f841f6adSraf (void) sigfillset(&_worker_set); 200f841f6adSraf (void) sigdelset(&_worker_set, SIGAIOCANCEL); 201f841f6adSraf 202f841f6adSraf /* 203f7499066Ssp92102 * Create one worker to send asynchronous notifications. 204f7499066Ssp92102 * Do this only once, even if __uaio_init() is called twice. 205f7499066Ssp92102 */ 206f7499066Ssp92102 if (__no_workerscnt == 0 && 207f7499066Ssp92102 (_aio_create_worker(NULL, AIONOTIFY) != 0)) { 208f7499066Ssp92102 errno = EAGAIN; 209f7499066Ssp92102 goto out; 210f7499066Ssp92102 } 211f7499066Ssp92102 212f7499066Ssp92102 /* 213f841f6adSraf * Create the minimum number of read/write workers. 214f7499066Ssp92102 * And later check whether atleast one worker is created; 215f7499066Ssp92102 * lwp_create() calls could fail because of segkp exhaustion. 216f841f6adSraf */ 217f841f6adSraf for (i = 0; i < _min_workers; i++) 218f841f6adSraf (void) _aio_create_worker(NULL, AIOREAD); 219f7499066Ssp92102 if (__rw_workerscnt == 0) { 220f7499066Ssp92102 errno = EAGAIN; 221f7499066Ssp92102 goto out; 222f7499066Ssp92102 } 223f841f6adSraf 224f841f6adSraf ret = 0; 225f841f6adSraf out: 226f841f6adSraf lmutex_lock(&__aio_initlock); 227f841f6adSraf if (ret == 0) 228f841f6adSraf __uaio_ok = 1; 229f841f6adSraf __aio_initbusy = 0; 230f841f6adSraf (void) cond_broadcast(&__aio_initcv); 231f841f6adSraf lmutex_unlock(&__aio_initlock); 232f841f6adSraf return (ret); 233f841f6adSraf } 234f841f6adSraf 235f841f6adSraf /* 236f841f6adSraf * Called from close() before actually performing the real _close(). 237f841f6adSraf */ 238f841f6adSraf void 239f841f6adSraf _aio_close(int fd) 240f841f6adSraf { 241f841f6adSraf if (fd < 0) /* avoid cancelling everything */ 242f841f6adSraf return; 243f841f6adSraf /* 244f841f6adSraf * Cancel all outstanding aio requests for this file descriptor. 245f841f6adSraf */ 246f841f6adSraf if (__uaio_ok) 247f841f6adSraf (void) aiocancel_all(fd); 248f841f6adSraf /* 249f841f6adSraf * If we have allocated the bit array, clear the bit for this file. 250f841f6adSraf * The next open may re-use this file descriptor and the new file 251f841f6adSraf * may have different kaio() behaviour. 252f841f6adSraf */ 253f841f6adSraf if (_kaio_supported != NULL) 254f841f6adSraf CLEAR_KAIO_SUPPORTED(fd); 255f841f6adSraf } 256f841f6adSraf 257f841f6adSraf /* 258f841f6adSraf * special kaio cleanup thread sits in a loop in the 259f841f6adSraf * kernel waiting for pending kaio requests to complete. 260f841f6adSraf */ 261f841f6adSraf void * 262f841f6adSraf _kaio_cleanup_thread(void *arg) 263f841f6adSraf { 264f841f6adSraf if (pthread_setspecific(_aio_key, arg) != 0) 265f841f6adSraf aio_panic("_kaio_cleanup_thread, pthread_setspecific()"); 266f841f6adSraf (void) _kaio(AIOSTART); 267f841f6adSraf return (arg); 268f841f6adSraf } 269f841f6adSraf 270f841f6adSraf /* 271f841f6adSraf * initialize kaio. 272f841f6adSraf */ 273f841f6adSraf void 274f841f6adSraf _kaio_init() 275f841f6adSraf { 276f841f6adSraf int error; 277f841f6adSraf sigset_t oset; 278a574db85Sraf int cancel_state; 279f841f6adSraf 280f841f6adSraf lmutex_lock(&__aio_initlock); 281a574db85Sraf (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state); 282f841f6adSraf while (__aio_initbusy) 283a574db85Sraf (void) cond_wait(&__aio_initcv, &__aio_initlock); 284a574db85Sraf (void) pthread_setcancelstate(cancel_state, NULL); 285f841f6adSraf if (_kaio_ok) { /* already initialized */ 286f841f6adSraf lmutex_unlock(&__aio_initlock); 287f841f6adSraf return; 288f841f6adSraf } 289f841f6adSraf __aio_initbusy = 1; 290f841f6adSraf lmutex_unlock(&__aio_initlock); 291f841f6adSraf 292f841f6adSraf if (_kaio_supported_init() != 0) 293f841f6adSraf error = ENOMEM; 294f841f6adSraf else if ((_kaiowp = _aio_worker_alloc()) == NULL) 295f841f6adSraf error = ENOMEM; 296f841f6adSraf else if ((error = (int)_kaio(AIOINIT)) == 0) { 297f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &maskset, &oset); 298f841f6adSraf error = thr_create(NULL, AIOSTKSIZE, _kaio_cleanup_thread, 299f841f6adSraf _kaiowp, THR_DAEMON, &_kaiowp->work_tid); 300f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &oset, NULL); 301f841f6adSraf } 302f841f6adSraf if (error && _kaiowp != NULL) { 303f841f6adSraf _aio_worker_free(_kaiowp); 304f841f6adSraf _kaiowp = NULL; 305f841f6adSraf } 306f841f6adSraf 307f841f6adSraf lmutex_lock(&__aio_initlock); 308f841f6adSraf if (error) 309f841f6adSraf _kaio_ok = -1; 310f841f6adSraf else 311f841f6adSraf _kaio_ok = 1; 312f841f6adSraf __aio_initbusy = 0; 313f841f6adSraf (void) cond_broadcast(&__aio_initcv); 314f841f6adSraf lmutex_unlock(&__aio_initlock); 315f841f6adSraf } 316f841f6adSraf 317f841f6adSraf int 318f841f6adSraf aioread(int fd, caddr_t buf, int bufsz, off_t offset, int whence, 319f841f6adSraf aio_result_t *resultp) 320f841f6adSraf { 321f841f6adSraf return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOREAD)); 322f841f6adSraf } 323f841f6adSraf 324f841f6adSraf int 325f841f6adSraf aiowrite(int fd, caddr_t buf, int bufsz, off_t offset, int whence, 326f841f6adSraf aio_result_t *resultp) 327f841f6adSraf { 328f841f6adSraf return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOWRITE)); 329f841f6adSraf } 330f841f6adSraf 331f841f6adSraf #if !defined(_LP64) 332f841f6adSraf int 333f841f6adSraf aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, 334f841f6adSraf aio_result_t *resultp) 335f841f6adSraf { 336f841f6adSraf return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAREAD64)); 337f841f6adSraf } 338f841f6adSraf 339f841f6adSraf int 340f841f6adSraf aiowrite64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, 341f841f6adSraf aio_result_t *resultp) 342f841f6adSraf { 343f841f6adSraf return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAWRITE64)); 344f841f6adSraf } 345f841f6adSraf #endif /* !defined(_LP64) */ 346f841f6adSraf 347f841f6adSraf int 348f841f6adSraf _aiorw(int fd, caddr_t buf, int bufsz, offset_t offset, int whence, 349f841f6adSraf aio_result_t *resultp, int mode) 350f841f6adSraf { 351f841f6adSraf aio_req_t *reqp; 352f841f6adSraf aio_args_t *ap; 353f841f6adSraf offset_t loffset; 354967072a1Spraks struct stat64 stat64; 355f841f6adSraf int error = 0; 356f841f6adSraf int kerr; 357f841f6adSraf int umode; 358f841f6adSraf 359f841f6adSraf switch (whence) { 360f841f6adSraf 361f841f6adSraf case SEEK_SET: 362f841f6adSraf loffset = offset; 363f841f6adSraf break; 364f841f6adSraf case SEEK_CUR: 365f841f6adSraf if ((loffset = llseek(fd, 0, SEEK_CUR)) == -1) 366f841f6adSraf error = -1; 367f841f6adSraf else 368f841f6adSraf loffset += offset; 369f841f6adSraf break; 370f841f6adSraf case SEEK_END: 371967072a1Spraks if (fstat64(fd, &stat64) == -1) 372f841f6adSraf error = -1; 373f841f6adSraf else 374967072a1Spraks loffset = offset + stat64.st_size; 375f841f6adSraf break; 376f841f6adSraf default: 377f841f6adSraf errno = EINVAL; 378f841f6adSraf error = -1; 379f841f6adSraf } 380f841f6adSraf 381f841f6adSraf if (error) 382f841f6adSraf return (error); 383f841f6adSraf 384f841f6adSraf /* initialize kaio */ 385f841f6adSraf if (!_kaio_ok) 386f841f6adSraf _kaio_init(); 387f841f6adSraf 388f841f6adSraf /* 389f841f6adSraf * _aio_do_request() needs the original request code (mode) to be able 390f841f6adSraf * to choose the appropiate 32/64 bit function. All other functions 391f841f6adSraf * only require the difference between READ and WRITE (umode). 392f841f6adSraf */ 393f841f6adSraf if (mode == AIOAREAD64 || mode == AIOAWRITE64) 394f841f6adSraf umode = mode - AIOAREAD64; 395f841f6adSraf else 396f841f6adSraf umode = mode; 397f841f6adSraf 398f841f6adSraf /* 399f841f6adSraf * Try kernel aio first. 400f841f6adSraf * If errno is ENOTSUP/EBADFD, fall back to the thread implementation. 401f841f6adSraf */ 402f841f6adSraf if (_kaio_ok > 0 && KAIO_SUPPORTED(fd)) { 403f841f6adSraf resultp->aio_errno = 0; 404f841f6adSraf sig_mutex_lock(&__aio_mutex); 405f841f6adSraf _kaio_outstand_cnt++; 406967072a1Spraks sig_mutex_unlock(&__aio_mutex); 407f841f6adSraf kerr = (int)_kaio(((resultp->aio_return == AIO_INPROGRESS) ? 408f841f6adSraf (umode | AIO_POLL_BIT) : umode), 409f841f6adSraf fd, buf, bufsz, loffset, resultp); 410f841f6adSraf if (kerr == 0) { 411f841f6adSraf return (0); 412f841f6adSraf } 413967072a1Spraks sig_mutex_lock(&__aio_mutex); 414f841f6adSraf _kaio_outstand_cnt--; 415f841f6adSraf sig_mutex_unlock(&__aio_mutex); 416f841f6adSraf if (errno != ENOTSUP && errno != EBADFD) 417f841f6adSraf return (-1); 418f841f6adSraf if (errno == EBADFD) 419f841f6adSraf SET_KAIO_NOT_SUPPORTED(fd); 420f841f6adSraf } 421f841f6adSraf 422f841f6adSraf if (!__uaio_ok && __uaio_init() == -1) 423f841f6adSraf return (-1); 424f841f6adSraf 425f841f6adSraf if ((reqp = _aio_req_alloc()) == NULL) { 426f841f6adSraf errno = EAGAIN; 427f841f6adSraf return (-1); 428f841f6adSraf } 429f841f6adSraf 430f841f6adSraf /* 431f841f6adSraf * _aio_do_request() checks reqp->req_op to differentiate 432f841f6adSraf * between 32 and 64 bit access. 433f841f6adSraf */ 434f841f6adSraf reqp->req_op = mode; 435f841f6adSraf reqp->req_resultp = resultp; 436f841f6adSraf ap = &reqp->req_args; 437f841f6adSraf ap->fd = fd; 438f841f6adSraf ap->buf = buf; 439f841f6adSraf ap->bufsz = bufsz; 440f841f6adSraf ap->offset = loffset; 441f841f6adSraf 442f841f6adSraf if (_aio_hash_insert(resultp, reqp) != 0) { 443f841f6adSraf _aio_req_free(reqp); 444f841f6adSraf errno = EINVAL; 445f841f6adSraf return (-1); 446f841f6adSraf } 447f841f6adSraf /* 448f841f6adSraf * _aio_req_add() only needs the difference between READ and 449f841f6adSraf * WRITE to choose the right worker queue. 450f841f6adSraf */ 451f841f6adSraf _aio_req_add(reqp, &__nextworker_rw, umode); 452f841f6adSraf return (0); 453f841f6adSraf } 454f841f6adSraf 455f841f6adSraf int 456f841f6adSraf aiocancel(aio_result_t *resultp) 457f841f6adSraf { 458f841f6adSraf aio_req_t *reqp; 459f841f6adSraf aio_worker_t *aiowp; 460f841f6adSraf int ret; 461f841f6adSraf int done = 0; 462f841f6adSraf int canceled = 0; 463f841f6adSraf 464f841f6adSraf if (!__uaio_ok) { 465f841f6adSraf errno = EINVAL; 466f841f6adSraf return (-1); 467f841f6adSraf } 468f841f6adSraf 469f841f6adSraf sig_mutex_lock(&__aio_mutex); 470f841f6adSraf reqp = _aio_hash_find(resultp); 471f841f6adSraf if (reqp == NULL) { 472f841f6adSraf if (_aio_outstand_cnt == _aio_req_done_cnt) 473f841f6adSraf errno = EINVAL; 474f841f6adSraf else 475f841f6adSraf errno = EACCES; 476f841f6adSraf ret = -1; 477f841f6adSraf } else { 478f841f6adSraf aiowp = reqp->req_worker; 479f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 480f841f6adSraf (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 481f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 482f841f6adSraf 483f841f6adSraf if (canceled) { 484f841f6adSraf ret = 0; 485f841f6adSraf } else { 486f841f6adSraf if (_aio_outstand_cnt == 0 || 487f841f6adSraf _aio_outstand_cnt == _aio_req_done_cnt) 488f841f6adSraf errno = EINVAL; 489f841f6adSraf else 490f841f6adSraf errno = EACCES; 491f841f6adSraf ret = -1; 492f841f6adSraf } 493f841f6adSraf } 494f841f6adSraf sig_mutex_unlock(&__aio_mutex); 495f841f6adSraf return (ret); 496f841f6adSraf } 497f841f6adSraf 498a574db85Sraf /* ARGSUSED */ 499a574db85Sraf static void 500a574db85Sraf _aiowait_cleanup(void *arg) 501a574db85Sraf { 502a574db85Sraf sig_mutex_lock(&__aio_mutex); 503a574db85Sraf _aiowait_flag--; 504a574db85Sraf sig_mutex_unlock(&__aio_mutex); 505a574db85Sraf } 506a574db85Sraf 507f841f6adSraf /* 508a574db85Sraf * This must be asynch safe and cancel safe 509f841f6adSraf */ 510f841f6adSraf aio_result_t * 511f841f6adSraf aiowait(struct timeval *uwait) 512f841f6adSraf { 513f841f6adSraf aio_result_t *uresultp; 514f841f6adSraf aio_result_t *kresultp; 515f841f6adSraf aio_result_t *resultp; 516f841f6adSraf int dontblock; 517f841f6adSraf int timedwait = 0; 518f841f6adSraf int kaio_errno = 0; 519f841f6adSraf struct timeval twait; 520f841f6adSraf struct timeval *wait = NULL; 521f841f6adSraf hrtime_t hrtend; 522f841f6adSraf hrtime_t hres; 523f841f6adSraf 524f841f6adSraf if (uwait) { 525f841f6adSraf /* 526f841f6adSraf * Check for a valid specified wait time. 527f841f6adSraf * If it is invalid, fail the call right away. 528f841f6adSraf */ 529f841f6adSraf if (uwait->tv_sec < 0 || uwait->tv_usec < 0 || 530f841f6adSraf uwait->tv_usec >= MICROSEC) { 531f841f6adSraf errno = EINVAL; 532f841f6adSraf return ((aio_result_t *)-1); 533f841f6adSraf } 534f841f6adSraf 535f841f6adSraf if (uwait->tv_sec > 0 || uwait->tv_usec > 0) { 536f841f6adSraf hrtend = gethrtime() + 537f841f6adSraf (hrtime_t)uwait->tv_sec * NANOSEC + 538f841f6adSraf (hrtime_t)uwait->tv_usec * (NANOSEC / MICROSEC); 539f841f6adSraf twait = *uwait; 540f841f6adSraf wait = &twait; 541f841f6adSraf timedwait++; 542f841f6adSraf } else { 543f841f6adSraf /* polling */ 544f841f6adSraf sig_mutex_lock(&__aio_mutex); 545f841f6adSraf if (_kaio_outstand_cnt == 0) { 546f841f6adSraf kresultp = (aio_result_t *)-1; 547f841f6adSraf } else { 548f841f6adSraf kresultp = (aio_result_t *)_kaio(AIOWAIT, 549f841f6adSraf (struct timeval *)-1, 1); 550f841f6adSraf if (kresultp != (aio_result_t *)-1 && 551f841f6adSraf kresultp != NULL && 552f841f6adSraf kresultp != (aio_result_t *)1) { 553f841f6adSraf _kaio_outstand_cnt--; 554f841f6adSraf sig_mutex_unlock(&__aio_mutex); 555f841f6adSraf return (kresultp); 556f841f6adSraf } 557f841f6adSraf } 558f841f6adSraf uresultp = _aio_req_done(); 559f841f6adSraf sig_mutex_unlock(&__aio_mutex); 560f841f6adSraf if (uresultp != NULL && 561f841f6adSraf uresultp != (aio_result_t *)-1) { 562f841f6adSraf return (uresultp); 563f841f6adSraf } 564f841f6adSraf if (uresultp == (aio_result_t *)-1 && 565f841f6adSraf kresultp == (aio_result_t *)-1) { 566f841f6adSraf errno = EINVAL; 567f841f6adSraf return ((aio_result_t *)-1); 568f841f6adSraf } else { 569f841f6adSraf return (NULL); 570f841f6adSraf } 571f841f6adSraf } 572f841f6adSraf } 573f841f6adSraf 574f841f6adSraf for (;;) { 575f841f6adSraf sig_mutex_lock(&__aio_mutex); 576f841f6adSraf uresultp = _aio_req_done(); 577f841f6adSraf if (uresultp != NULL && uresultp != (aio_result_t *)-1) { 578f841f6adSraf sig_mutex_unlock(&__aio_mutex); 579f841f6adSraf resultp = uresultp; 580f841f6adSraf break; 581f841f6adSraf } 582f841f6adSraf _aiowait_flag++; 583f841f6adSraf dontblock = (uresultp == (aio_result_t *)-1); 584f841f6adSraf if (dontblock && _kaio_outstand_cnt == 0) { 585f841f6adSraf kresultp = (aio_result_t *)-1; 586f841f6adSraf kaio_errno = EINVAL; 587f841f6adSraf } else { 588f841f6adSraf sig_mutex_unlock(&__aio_mutex); 589a574db85Sraf pthread_cleanup_push(_aiowait_cleanup, NULL); 590a574db85Sraf _cancel_prologue(); 591f841f6adSraf kresultp = (aio_result_t *)_kaio(AIOWAIT, 592f841f6adSraf wait, dontblock); 593a574db85Sraf _cancel_epilogue(); 594a574db85Sraf pthread_cleanup_pop(0); 595f841f6adSraf sig_mutex_lock(&__aio_mutex); 596f841f6adSraf kaio_errno = errno; 597f841f6adSraf } 598f841f6adSraf _aiowait_flag--; 599f841f6adSraf sig_mutex_unlock(&__aio_mutex); 600f841f6adSraf if (kresultp == (aio_result_t *)1) { 601f841f6adSraf /* aiowait() awakened by an aionotify() */ 602f841f6adSraf continue; 603f841f6adSraf } else if (kresultp != NULL && 604f841f6adSraf kresultp != (aio_result_t *)-1) { 605f841f6adSraf resultp = kresultp; 606f841f6adSraf sig_mutex_lock(&__aio_mutex); 607f841f6adSraf _kaio_outstand_cnt--; 608f841f6adSraf sig_mutex_unlock(&__aio_mutex); 609f841f6adSraf break; 610f841f6adSraf } else if (kresultp == (aio_result_t *)-1 && 611f841f6adSraf kaio_errno == EINVAL && 612f841f6adSraf uresultp == (aio_result_t *)-1) { 613f841f6adSraf errno = kaio_errno; 614f841f6adSraf resultp = (aio_result_t *)-1; 615f841f6adSraf break; 616f841f6adSraf } else if (kresultp == (aio_result_t *)-1 && 617f841f6adSraf kaio_errno == EINTR) { 618f841f6adSraf errno = kaio_errno; 619f841f6adSraf resultp = (aio_result_t *)-1; 620f841f6adSraf break; 621f841f6adSraf } else if (timedwait) { 622f841f6adSraf hres = hrtend - gethrtime(); 623f841f6adSraf if (hres <= 0) { 624f841f6adSraf /* time is up; return */ 625f841f6adSraf resultp = NULL; 626f841f6adSraf break; 627f841f6adSraf } else { 628f841f6adSraf /* 629f841f6adSraf * Some time left. Round up the remaining time 630f841f6adSraf * in nanoseconds to microsec. Retry the call. 631f841f6adSraf */ 632f841f6adSraf hres += (NANOSEC / MICROSEC) - 1; 633f841f6adSraf wait->tv_sec = hres / NANOSEC; 634f841f6adSraf wait->tv_usec = 635f841f6adSraf (hres % NANOSEC) / (NANOSEC / MICROSEC); 636f841f6adSraf } 637f841f6adSraf } else { 638f841f6adSraf ASSERT(kresultp == NULL && uresultp == NULL); 639f841f6adSraf resultp = NULL; 640f841f6adSraf continue; 641f841f6adSraf } 642f841f6adSraf } 643f841f6adSraf return (resultp); 644f841f6adSraf } 645f841f6adSraf 646f841f6adSraf /* 647f841f6adSraf * _aio_get_timedelta calculates the remaining time and stores the result 648f841f6adSraf * into timespec_t *wait. 649f841f6adSraf */ 650f841f6adSraf 651f841f6adSraf int 652f841f6adSraf _aio_get_timedelta(timespec_t *end, timespec_t *wait) 653f841f6adSraf { 654f841f6adSraf int ret = 0; 655f841f6adSraf struct timeval cur; 656f841f6adSraf timespec_t curtime; 657f841f6adSraf 658f841f6adSraf (void) gettimeofday(&cur, NULL); 659f841f6adSraf curtime.tv_sec = cur.tv_sec; 660f841f6adSraf curtime.tv_nsec = cur.tv_usec * 1000; /* convert us to ns */ 661f841f6adSraf 662f841f6adSraf if (end->tv_sec >= curtime.tv_sec) { 663f841f6adSraf wait->tv_sec = end->tv_sec - curtime.tv_sec; 664f841f6adSraf if (end->tv_nsec >= curtime.tv_nsec) { 665f841f6adSraf wait->tv_nsec = end->tv_nsec - curtime.tv_nsec; 666f841f6adSraf if (wait->tv_sec == 0 && wait->tv_nsec == 0) 667f841f6adSraf ret = -1; /* timer expired */ 668f841f6adSraf } else { 669f841f6adSraf if (end->tv_sec > curtime.tv_sec) { 670f841f6adSraf wait->tv_sec -= 1; 671f841f6adSraf wait->tv_nsec = NANOSEC - 672f841f6adSraf (curtime.tv_nsec - end->tv_nsec); 673f841f6adSraf } else { 674f841f6adSraf ret = -1; /* timer expired */ 675f841f6adSraf } 676f841f6adSraf } 677f841f6adSraf } else { 678f841f6adSraf ret = -1; 679f841f6adSraf } 680f841f6adSraf return (ret); 681f841f6adSraf } 682f841f6adSraf 683f841f6adSraf /* 684f841f6adSraf * If closing by file descriptor: we will simply cancel all the outstanding 685f841f6adSraf * aio`s and return. Those aio's in question will have either noticed the 686f841f6adSraf * cancellation notice before, during, or after initiating io. 687f841f6adSraf */ 688f841f6adSraf int 689f841f6adSraf aiocancel_all(int fd) 690f841f6adSraf { 691f841f6adSraf aio_req_t *reqp; 692*bced1f33Spraks aio_req_t **reqpp, *last; 693f841f6adSraf aio_worker_t *first; 694f841f6adSraf aio_worker_t *next; 695f841f6adSraf int canceled = 0; 696f841f6adSraf int done = 0; 697f841f6adSraf int cancelall = 0; 698f841f6adSraf 699f841f6adSraf sig_mutex_lock(&__aio_mutex); 700f841f6adSraf 701f841f6adSraf if (_aio_outstand_cnt == 0) { 702f841f6adSraf sig_mutex_unlock(&__aio_mutex); 703f841f6adSraf return (AIO_ALLDONE); 704f841f6adSraf } 705f841f6adSraf 706f841f6adSraf /* 707f841f6adSraf * Cancel requests from the read/write workers' queues. 708f841f6adSraf */ 709f841f6adSraf first = __nextworker_rw; 710f841f6adSraf next = first; 711f841f6adSraf do { 712f841f6adSraf _aio_cancel_work(next, fd, &canceled, &done); 713f841f6adSraf } while ((next = next->work_forw) != first); 714f841f6adSraf 715f841f6adSraf /* 716f841f6adSraf * finally, check if there are requests on the done queue that 717f841f6adSraf * should be canceled. 718f841f6adSraf */ 719f841f6adSraf if (fd < 0) 720f841f6adSraf cancelall = 1; 721f841f6adSraf reqpp = &_aio_done_tail; 722*bced1f33Spraks last = _aio_done_tail; 723f841f6adSraf while ((reqp = *reqpp) != NULL) { 724f841f6adSraf if (cancelall || reqp->req_args.fd == fd) { 725f841f6adSraf *reqpp = reqp->req_next; 726*bced1f33Spraks if (last == reqp) { 727*bced1f33Spraks last = reqp->req_next; 728*bced1f33Spraks } 729*bced1f33Spraks if (_aio_done_head == reqp) { 730*bced1f33Spraks /* this should be the last req in list */ 731*bced1f33Spraks _aio_done_head = last; 732*bced1f33Spraks } 733f841f6adSraf _aio_donecnt--; 734*bced1f33Spraks _aio_set_result(reqp, -1, ECANCELED); 735f841f6adSraf (void) _aio_hash_del(reqp->req_resultp); 736f841f6adSraf _aio_req_free(reqp); 737*bced1f33Spraks } else { 738f841f6adSraf reqpp = &reqp->req_next; 739*bced1f33Spraks last = reqp; 740f841f6adSraf } 741*bced1f33Spraks } 742*bced1f33Spraks 743f841f6adSraf if (cancelall) { 744f841f6adSraf ASSERT(_aio_donecnt == 0); 745f841f6adSraf _aio_done_head = NULL; 746f841f6adSraf } 747f841f6adSraf sig_mutex_unlock(&__aio_mutex); 748f841f6adSraf 749f841f6adSraf if (canceled && done == 0) 750f841f6adSraf return (AIO_CANCELED); 751f841f6adSraf else if (done && canceled == 0) 752f841f6adSraf return (AIO_ALLDONE); 753f841f6adSraf else if ((canceled + done == 0) && KAIO_SUPPORTED(fd)) 754f841f6adSraf return ((int)_kaio(AIOCANCEL, fd, NULL)); 755f841f6adSraf return (AIO_NOTCANCELED); 756f841f6adSraf } 757f841f6adSraf 758f841f6adSraf /* 759f841f6adSraf * Cancel requests from a given work queue. If the file descriptor 760f841f6adSraf * parameter, fd, is non-negative, then only cancel those requests 761f841f6adSraf * in this queue that are to this file descriptor. If the fd 762f841f6adSraf * parameter is -1, then cancel all requests. 763f841f6adSraf */ 764f841f6adSraf static void 765f841f6adSraf _aio_cancel_work(aio_worker_t *aiowp, int fd, int *canceled, int *done) 766f841f6adSraf { 767f841f6adSraf aio_req_t *reqp; 768f841f6adSraf 769f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 770f841f6adSraf /* 771f841f6adSraf * cancel queued requests first. 772f841f6adSraf */ 773f841f6adSraf reqp = aiowp->work_tail1; 774f841f6adSraf while (reqp != NULL) { 775f841f6adSraf if (fd < 0 || reqp->req_args.fd == fd) { 776f841f6adSraf if (_aio_cancel_req(aiowp, reqp, canceled, done)) { 777f841f6adSraf /* 778f841f6adSraf * Callers locks were dropped. 779f841f6adSraf * reqp is invalid; start traversing 780f841f6adSraf * the list from the beginning again. 781f841f6adSraf */ 782f841f6adSraf reqp = aiowp->work_tail1; 783f841f6adSraf continue; 784f841f6adSraf } 785f841f6adSraf } 786f841f6adSraf reqp = reqp->req_next; 787f841f6adSraf } 788f841f6adSraf /* 789f841f6adSraf * Since the queued requests have been canceled, there can 790f841f6adSraf * only be one inprogress request that should be canceled. 791f841f6adSraf */ 792f841f6adSraf if ((reqp = aiowp->work_req) != NULL && 793f841f6adSraf (fd < 0 || reqp->req_args.fd == fd)) 794f841f6adSraf (void) _aio_cancel_req(aiowp, reqp, canceled, done); 795f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 796f841f6adSraf } 797f841f6adSraf 798f841f6adSraf /* 799f841f6adSraf * Cancel a request. Return 1 if the callers locks were temporarily 800f841f6adSraf * dropped, otherwise return 0. 801f841f6adSraf */ 802f841f6adSraf int 803f841f6adSraf _aio_cancel_req(aio_worker_t *aiowp, aio_req_t *reqp, int *canceled, int *done) 804f841f6adSraf { 805f841f6adSraf int ostate = reqp->req_state; 806f841f6adSraf 807f841f6adSraf ASSERT(MUTEX_HELD(&__aio_mutex)); 808f841f6adSraf ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); 809f841f6adSraf if (ostate == AIO_REQ_CANCELED) 810f841f6adSraf return (0); 811*bced1f33Spraks if (ostate == AIO_REQ_DONE && !POSIX_AIO(reqp) && 812*bced1f33Spraks aiowp->work_prev1 == reqp) { 813*bced1f33Spraks ASSERT(aiowp->work_done1 != 0); 814*bced1f33Spraks /* 815*bced1f33Spraks * If not on the done queue yet, just mark it CANCELED, 816*bced1f33Spraks * _aio_work_done() will do the necessary clean up. 817*bced1f33Spraks * This is required to ensure that aiocancel_all() cancels 818*bced1f33Spraks * all the outstanding requests, including this one which 819*bced1f33Spraks * is not yet on done queue but has been marked done. 820*bced1f33Spraks */ 821*bced1f33Spraks _aio_set_result(reqp, -1, ECANCELED); 822*bced1f33Spraks (void) _aio_hash_del(reqp->req_resultp); 823*bced1f33Spraks reqp->req_state = AIO_REQ_CANCELED; 824*bced1f33Spraks (*canceled)++; 825*bced1f33Spraks return (0); 826*bced1f33Spraks } 827*bced1f33Spraks 828f841f6adSraf if (ostate == AIO_REQ_DONE || ostate == AIO_REQ_DONEQ) { 829f841f6adSraf (*done)++; 830f841f6adSraf return (0); 831f841f6adSraf } 832f841f6adSraf if (reqp->req_op == AIOFSYNC && reqp != aiowp->work_req) { 833f841f6adSraf ASSERT(POSIX_AIO(reqp)); 834f841f6adSraf /* Cancel the queued aio_fsync() request */ 835f841f6adSraf if (!reqp->req_head->lio_canned) { 836f841f6adSraf reqp->req_head->lio_canned = 1; 837f841f6adSraf _aio_outstand_cnt--; 838f841f6adSraf (*canceled)++; 839f841f6adSraf } 840f841f6adSraf return (0); 841f841f6adSraf } 842f841f6adSraf reqp->req_state = AIO_REQ_CANCELED; 843f841f6adSraf _aio_req_del(aiowp, reqp, ostate); 844f841f6adSraf (void) _aio_hash_del(reqp->req_resultp); 845f841f6adSraf (*canceled)++; 846f841f6adSraf if (reqp == aiowp->work_req) { 847f841f6adSraf ASSERT(ostate == AIO_REQ_INPROGRESS); 848f841f6adSraf /* 849f841f6adSraf * Set the result values now, before _aiodone() is called. 850f841f6adSraf * We do this because the application can expect aio_return 851f841f6adSraf * and aio_errno to be set to -1 and ECANCELED, respectively, 852f841f6adSraf * immediately after a successful return from aiocancel() 853f841f6adSraf * or aio_cancel(). 854f841f6adSraf */ 855f841f6adSraf _aio_set_result(reqp, -1, ECANCELED); 856f841f6adSraf (void) thr_kill(aiowp->work_tid, SIGAIOCANCEL); 857f841f6adSraf return (0); 858f841f6adSraf } 859f841f6adSraf if (!POSIX_AIO(reqp)) { 860f841f6adSraf _aio_outstand_cnt--; 861f841f6adSraf _aio_set_result(reqp, -1, ECANCELED); 862*bced1f33Spraks _aio_req_free(reqp); 863f841f6adSraf return (0); 864f841f6adSraf } 865f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 866f841f6adSraf sig_mutex_unlock(&__aio_mutex); 867f841f6adSraf _aiodone(reqp, -1, ECANCELED); 868f841f6adSraf sig_mutex_lock(&__aio_mutex); 869f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 870f841f6adSraf return (1); 871f841f6adSraf } 872f841f6adSraf 873f841f6adSraf int 874f841f6adSraf _aio_create_worker(aio_req_t *reqp, int mode) 875f841f6adSraf { 876f841f6adSraf aio_worker_t *aiowp, **workers, **nextworker; 877f841f6adSraf int *aio_workerscnt; 878f841f6adSraf void *(*func)(void *); 879f841f6adSraf sigset_t oset; 880f841f6adSraf int error; 881f841f6adSraf 882f841f6adSraf /* 883f841f6adSraf * Put the new worker thread in the right queue. 884f841f6adSraf */ 885f841f6adSraf switch (mode) { 886f841f6adSraf case AIOREAD: 887f841f6adSraf case AIOWRITE: 888f841f6adSraf case AIOAREAD: 889f841f6adSraf case AIOAWRITE: 890f841f6adSraf #if !defined(_LP64) 891f841f6adSraf case AIOAREAD64: 892f841f6adSraf case AIOAWRITE64: 893f841f6adSraf #endif 894f841f6adSraf workers = &__workers_rw; 895f841f6adSraf nextworker = &__nextworker_rw; 896f841f6adSraf aio_workerscnt = &__rw_workerscnt; 897f841f6adSraf func = _aio_do_request; 898f841f6adSraf break; 899f841f6adSraf case AIONOTIFY: 900f841f6adSraf workers = &__workers_no; 901f841f6adSraf nextworker = &__nextworker_no; 902f841f6adSraf func = _aio_do_notify; 903f841f6adSraf aio_workerscnt = &__no_workerscnt; 904f841f6adSraf break; 905f841f6adSraf default: 906f841f6adSraf aio_panic("_aio_create_worker: invalid mode"); 907f841f6adSraf break; 908f841f6adSraf } 909f841f6adSraf 910f841f6adSraf if ((aiowp = _aio_worker_alloc()) == NULL) 911f841f6adSraf return (-1); 912f841f6adSraf 913f841f6adSraf if (reqp) { 914f841f6adSraf reqp->req_state = AIO_REQ_QUEUED; 915f841f6adSraf reqp->req_worker = aiowp; 916f841f6adSraf aiowp->work_head1 = reqp; 917f841f6adSraf aiowp->work_tail1 = reqp; 918f841f6adSraf aiowp->work_next1 = reqp; 919f841f6adSraf aiowp->work_count1 = 1; 920f841f6adSraf aiowp->work_minload1 = 1; 921f841f6adSraf } 922f841f6adSraf 923f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &maskset, &oset); 924f841f6adSraf error = thr_create(NULL, AIOSTKSIZE, func, aiowp, 925f841f6adSraf THR_DAEMON | THR_SUSPENDED, &aiowp->work_tid); 926f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &oset, NULL); 927f841f6adSraf if (error) { 928f841f6adSraf if (reqp) { 929f841f6adSraf reqp->req_state = 0; 930f841f6adSraf reqp->req_worker = NULL; 931f841f6adSraf } 932f841f6adSraf _aio_worker_free(aiowp); 933f841f6adSraf return (-1); 934f841f6adSraf } 935f841f6adSraf 936f841f6adSraf lmutex_lock(&__aio_mutex); 937f841f6adSraf (*aio_workerscnt)++; 938f841f6adSraf if (*workers == NULL) { 939f841f6adSraf aiowp->work_forw = aiowp; 940f841f6adSraf aiowp->work_backw = aiowp; 941f841f6adSraf *nextworker = aiowp; 942f841f6adSraf *workers = aiowp; 943f841f6adSraf } else { 944f841f6adSraf aiowp->work_backw = (*workers)->work_backw; 945f841f6adSraf aiowp->work_forw = (*workers); 946f841f6adSraf (*workers)->work_backw->work_forw = aiowp; 947f841f6adSraf (*workers)->work_backw = aiowp; 948f841f6adSraf } 949f841f6adSraf _aio_worker_cnt++; 950f841f6adSraf lmutex_unlock(&__aio_mutex); 951f841f6adSraf 952f841f6adSraf (void) thr_continue(aiowp->work_tid); 953f841f6adSraf 954f841f6adSraf return (0); 955f841f6adSraf } 956f841f6adSraf 957f841f6adSraf /* 958f841f6adSraf * This is the worker's main routine. 959f841f6adSraf * The task of this function is to execute all queued requests; 960f841f6adSraf * once the last pending request is executed this function will block 961f841f6adSraf * in _aio_idle(). A new incoming request must wakeup this thread to 962f841f6adSraf * restart the work. 963f841f6adSraf * Every worker has an own work queue. The queue lock is required 964f841f6adSraf * to synchronize the addition of new requests for this worker or 965f841f6adSraf * cancellation of pending/running requests. 966f841f6adSraf * 967f841f6adSraf * Cancellation scenarios: 968f841f6adSraf * The cancellation of a request is being done asynchronously using 969f841f6adSraf * _aio_cancel_req() from another thread context. 970f841f6adSraf * A queued request can be cancelled in different manners : 971f841f6adSraf * a) request is queued but not "in progress" or "done" (AIO_REQ_QUEUED): 972f841f6adSraf * - lock the queue -> remove the request -> unlock the queue 973f841f6adSraf * - this function/thread does not detect this cancellation process 974f841f6adSraf * b) request is in progress (AIO_REQ_INPROGRESS) : 975f841f6adSraf * - this function first allow the cancellation of the running 976f841f6adSraf * request with the flag "work_cancel_flg=1" 977f841f6adSraf * see _aio_req_get() -> _aio_cancel_on() 978f841f6adSraf * During this phase, it is allowed to interrupt the worker 979f841f6adSraf * thread running the request (this thread) using the SIGAIOCANCEL 980f841f6adSraf * signal. 981f841f6adSraf * Once this thread returns from the kernel (because the request 982f841f6adSraf * is just done), then it must disable a possible cancellation 983f841f6adSraf * and proceed to finish the request. To disable the cancellation 984f841f6adSraf * this thread must use _aio_cancel_off() to set "work_cancel_flg=0". 985f841f6adSraf * c) request is already done (AIO_REQ_DONE || AIO_REQ_DONEQ): 986f841f6adSraf * same procedure as in a) 987f841f6adSraf * 988f841f6adSraf * To b) 989f841f6adSraf * This thread uses sigsetjmp() to define the position in the code, where 990f841f6adSraf * it wish to continue working in the case that a SIGAIOCANCEL signal 991f841f6adSraf * is detected. 992f841f6adSraf * Normally this thread should get the cancellation signal during the 993f841f6adSraf * kernel phase (reading or writing). In that case the signal handler 994f841f6adSraf * aiosigcancelhndlr() is activated using the worker thread context, 995f841f6adSraf * which again will use the siglongjmp() function to break the standard 996f841f6adSraf * code flow and jump to the "sigsetjmp" position, provided that 997f841f6adSraf * "work_cancel_flg" is set to "1". 998f841f6adSraf * Because the "work_cancel_flg" is only manipulated by this worker 999f841f6adSraf * thread and it can only run on one CPU at a given time, it is not 1000f841f6adSraf * necessary to protect that flag with the queue lock. 1001f841f6adSraf * Returning from the kernel (read or write system call) we must 1002f841f6adSraf * first disable the use of the SIGAIOCANCEL signal and accordingly 1003f841f6adSraf * the use of the siglongjmp() function to prevent a possible deadlock: 1004f841f6adSraf * - It can happens that this worker thread returns from the kernel and 1005f841f6adSraf * blocks in "work_qlock1", 1006f841f6adSraf * - then a second thread cancels the apparently "in progress" request 1007f841f6adSraf * and sends the SIGAIOCANCEL signal to the worker thread, 1008f841f6adSraf * - the worker thread gets assigned the "work_qlock1" and will returns 1009f841f6adSraf * from the kernel, 1010f841f6adSraf * - the kernel detects the pending signal and activates the signal 1011f841f6adSraf * handler instead, 1012f841f6adSraf * - if the "work_cancel_flg" is still set then the signal handler 1013f841f6adSraf * should use siglongjmp() to cancel the "in progress" request and 1014f841f6adSraf * it would try to acquire the same work_qlock1 in _aio_req_get() 1015f841f6adSraf * for a second time => deadlock. 1016f841f6adSraf * To avoid that situation we disable the cancellation of the request 1017f841f6adSraf * in progress BEFORE we try to acquire the work_qlock1. 1018f841f6adSraf * In that case the signal handler will not call siglongjmp() and the 1019f841f6adSraf * worker thread will continue running the standard code flow. 1020f841f6adSraf * Then this thread must check the AIO_REQ_CANCELED flag to emulate 1021f841f6adSraf * an eventually required siglongjmp() freeing the work_qlock1 and 1022f841f6adSraf * avoiding a deadlock. 1023f841f6adSraf */ 1024f841f6adSraf void * 1025f841f6adSraf _aio_do_request(void *arglist) 1026f841f6adSraf { 1027f841f6adSraf aio_worker_t *aiowp = (aio_worker_t *)arglist; 1028f841f6adSraf ulwp_t *self = curthread; 1029f841f6adSraf struct aio_args *arg; 1030f841f6adSraf aio_req_t *reqp; /* current AIO request */ 1031f841f6adSraf ssize_t retval; 10324d86dd30Sraf int append; 1033f841f6adSraf int error; 1034f841f6adSraf 1035f841f6adSraf if (pthread_setspecific(_aio_key, aiowp) != 0) 1036f841f6adSraf aio_panic("_aio_do_request, pthread_setspecific()"); 1037f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &_worker_set, NULL); 1038f841f6adSraf ASSERT(aiowp->work_req == NULL); 1039f841f6adSraf 1040f841f6adSraf /* 1041f841f6adSraf * We resume here when an operation is cancelled. 1042f841f6adSraf * On first entry, aiowp->work_req == NULL, so all 1043f841f6adSraf * we do is block SIGAIOCANCEL. 1044f841f6adSraf */ 1045f841f6adSraf (void) sigsetjmp(aiowp->work_jmp_buf, 0); 1046f841f6adSraf ASSERT(self->ul_sigdefer == 0); 1047f841f6adSraf 1048f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1049f841f6adSraf if (aiowp->work_req != NULL) 1050f841f6adSraf _aio_finish_request(aiowp, -1, ECANCELED); 1051f841f6adSraf 1052f841f6adSraf for (;;) { 1053f841f6adSraf /* 1054f841f6adSraf * Put completed requests on aio_done_list. This has 1055f841f6adSraf * to be done as part of the main loop to ensure that 1056f841f6adSraf * we don't artificially starve any aiowait'ers. 1057f841f6adSraf */ 1058f841f6adSraf if (aiowp->work_done1) 1059f841f6adSraf _aio_work_done(aiowp); 1060f841f6adSraf 1061f841f6adSraf top: 1062f841f6adSraf /* consume any deferred SIGAIOCANCEL signal here */ 1063f841f6adSraf sigon(self); 1064f841f6adSraf sigoff(self); 1065f841f6adSraf 1066f841f6adSraf while ((reqp = _aio_req_get(aiowp)) == NULL) { 1067f841f6adSraf if (_aio_idle(aiowp) != 0) 1068f841f6adSraf goto top; 1069f841f6adSraf } 1070f841f6adSraf arg = &reqp->req_args; 1071f841f6adSraf ASSERT(reqp->req_state == AIO_REQ_INPROGRESS || 1072f841f6adSraf reqp->req_state == AIO_REQ_CANCELED); 1073f841f6adSraf error = 0; 1074f841f6adSraf 1075f841f6adSraf switch (reqp->req_op) { 1076f841f6adSraf case AIOREAD: 1077f841f6adSraf case AIOAREAD: 1078f841f6adSraf sigon(self); /* unblock SIGAIOCANCEL */ 1079f841f6adSraf retval = pread(arg->fd, arg->buf, 1080f841f6adSraf arg->bufsz, arg->offset); 1081f841f6adSraf if (retval == -1) { 1082f841f6adSraf if (errno == ESPIPE) { 1083f841f6adSraf retval = read(arg->fd, 1084f841f6adSraf arg->buf, arg->bufsz); 1085f841f6adSraf if (retval == -1) 1086f841f6adSraf error = errno; 1087f841f6adSraf } else { 1088f841f6adSraf error = errno; 1089f841f6adSraf } 1090f841f6adSraf } 1091f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1092f841f6adSraf break; 1093f841f6adSraf case AIOWRITE: 1094f841f6adSraf case AIOAWRITE: 10954d86dd30Sraf /* 10964d86dd30Sraf * The SUSv3 POSIX spec for aio_write() states: 10974d86dd30Sraf * If O_APPEND is set for the file descriptor, 10984d86dd30Sraf * write operations append to the file in the 10994d86dd30Sraf * same order as the calls were made. 11004d86dd30Sraf * but, somewhat inconsistently, it requires pwrite() 11014d86dd30Sraf * to ignore the O_APPEND setting. So we have to use 11024d86dd30Sraf * fcntl() to get the open modes and call write() for 11034d86dd30Sraf * the O_APPEND case. 11044d86dd30Sraf */ 11054d86dd30Sraf append = (__fcntl(arg->fd, F_GETFL) & O_APPEND); 1106f841f6adSraf sigon(self); /* unblock SIGAIOCANCEL */ 11074d86dd30Sraf retval = append? 11084d86dd30Sraf write(arg->fd, arg->buf, arg->bufsz) : 11094d86dd30Sraf pwrite(arg->fd, arg->buf, arg->bufsz, 11104d86dd30Sraf arg->offset); 1111f841f6adSraf if (retval == -1) { 1112f841f6adSraf if (errno == ESPIPE) { 1113f841f6adSraf retval = write(arg->fd, 1114f841f6adSraf arg->buf, arg->bufsz); 1115f841f6adSraf if (retval == -1) 1116f841f6adSraf error = errno; 1117f841f6adSraf } else { 1118f841f6adSraf error = errno; 1119f841f6adSraf } 1120f841f6adSraf } 1121f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1122f841f6adSraf break; 1123f841f6adSraf #if !defined(_LP64) 1124f841f6adSraf case AIOAREAD64: 1125f841f6adSraf sigon(self); /* unblock SIGAIOCANCEL */ 1126f841f6adSraf retval = pread64(arg->fd, arg->buf, 1127f841f6adSraf arg->bufsz, arg->offset); 1128f841f6adSraf if (retval == -1) { 1129f841f6adSraf if (errno == ESPIPE) { 1130f841f6adSraf retval = read(arg->fd, 1131f841f6adSraf arg->buf, arg->bufsz); 1132f841f6adSraf if (retval == -1) 1133f841f6adSraf error = errno; 1134f841f6adSraf } else { 1135f841f6adSraf error = errno; 1136f841f6adSraf } 1137f841f6adSraf } 1138f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1139f841f6adSraf break; 1140f841f6adSraf case AIOAWRITE64: 11414d86dd30Sraf /* 11424d86dd30Sraf * The SUSv3 POSIX spec for aio_write() states: 11434d86dd30Sraf * If O_APPEND is set for the file descriptor, 11444d86dd30Sraf * write operations append to the file in the 11454d86dd30Sraf * same order as the calls were made. 11464d86dd30Sraf * but, somewhat inconsistently, it requires pwrite() 11474d86dd30Sraf * to ignore the O_APPEND setting. So we have to use 11484d86dd30Sraf * fcntl() to get the open modes and call write() for 11494d86dd30Sraf * the O_APPEND case. 11504d86dd30Sraf */ 11514d86dd30Sraf append = (__fcntl(arg->fd, F_GETFL) & O_APPEND); 1152f841f6adSraf sigon(self); /* unblock SIGAIOCANCEL */ 11534d86dd30Sraf retval = append? 11544d86dd30Sraf write(arg->fd, arg->buf, arg->bufsz) : 11554d86dd30Sraf pwrite64(arg->fd, arg->buf, arg->bufsz, 11564d86dd30Sraf arg->offset); 1157f841f6adSraf if (retval == -1) { 1158f841f6adSraf if (errno == ESPIPE) { 1159f841f6adSraf retval = write(arg->fd, 1160f841f6adSraf arg->buf, arg->bufsz); 1161f841f6adSraf if (retval == -1) 1162f841f6adSraf error = errno; 1163f841f6adSraf } else { 1164f841f6adSraf error = errno; 1165f841f6adSraf } 1166f841f6adSraf } 1167f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1168f841f6adSraf break; 1169f841f6adSraf #endif /* !defined(_LP64) */ 1170f841f6adSraf case AIOFSYNC: 1171f841f6adSraf if (_aio_fsync_del(aiowp, reqp)) 1172f841f6adSraf goto top; 1173f841f6adSraf ASSERT(reqp->req_head == NULL); 1174f841f6adSraf /* 1175f841f6adSraf * All writes for this fsync request are now 1176f841f6adSraf * acknowledged. Now make these writes visible 1177f841f6adSraf * and put the final request into the hash table. 1178f841f6adSraf */ 1179f841f6adSraf if (reqp->req_state == AIO_REQ_CANCELED) { 1180f841f6adSraf /* EMPTY */; 1181f841f6adSraf } else if (arg->offset == O_SYNC) { 1182f841f6adSraf if ((retval = __fdsync(arg->fd, FSYNC)) == -1) 1183f841f6adSraf error = errno; 1184f841f6adSraf } else { 1185f841f6adSraf if ((retval = __fdsync(arg->fd, FDSYNC)) == -1) 1186f841f6adSraf error = errno; 1187f841f6adSraf } 1188f841f6adSraf if (_aio_hash_insert(reqp->req_resultp, reqp) != 0) 1189f841f6adSraf aio_panic("_aio_do_request(): AIOFSYNC: " 1190f841f6adSraf "request already in hash table"); 1191f841f6adSraf break; 1192f841f6adSraf default: 1193f841f6adSraf aio_panic("_aio_do_request, bad op"); 1194f841f6adSraf } 1195f841f6adSraf 1196f841f6adSraf _aio_finish_request(aiowp, retval, error); 1197f841f6adSraf } 1198f841f6adSraf /* NOTREACHED */ 1199f841f6adSraf return (NULL); 1200f841f6adSraf } 1201f841f6adSraf 1202f841f6adSraf /* 1203f841f6adSraf * Perform the tail processing for _aio_do_request(). 1204f841f6adSraf * The in-progress request may or may not have been cancelled. 1205f841f6adSraf */ 1206f841f6adSraf static void 1207f841f6adSraf _aio_finish_request(aio_worker_t *aiowp, ssize_t retval, int error) 1208f841f6adSraf { 1209f841f6adSraf aio_req_t *reqp; 1210f841f6adSraf 1211f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1212f841f6adSraf if ((reqp = aiowp->work_req) == NULL) 1213f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1214f841f6adSraf else { 1215f841f6adSraf aiowp->work_req = NULL; 1216f841f6adSraf if (reqp->req_state == AIO_REQ_CANCELED) { 1217f841f6adSraf retval = -1; 1218f841f6adSraf error = ECANCELED; 1219f841f6adSraf } 1220f841f6adSraf if (!POSIX_AIO(reqp)) { 122134b3058fSpraks int notify; 1222*bced1f33Spraks if (reqp->req_state == AIO_REQ_INPROGRESS) { 1223*bced1f33Spraks reqp->req_state = AIO_REQ_DONE; 1224*bced1f33Spraks _aio_set_result(reqp, retval, error); 1225*bced1f33Spraks } 1226f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1227f841f6adSraf sig_mutex_lock(&__aio_mutex); 122834b3058fSpraks /* 122934b3058fSpraks * If it was canceled, this request will not be 123034b3058fSpraks * added to done list. Just free it. 123134b3058fSpraks */ 123234b3058fSpraks if (error == ECANCELED) { 1233f841f6adSraf _aio_outstand_cnt--; 123434b3058fSpraks _aio_req_free(reqp); 123534b3058fSpraks } else { 123634b3058fSpraks _aio_req_done_cnt++; 123734b3058fSpraks } 123834b3058fSpraks /* 123934b3058fSpraks * Notify any thread that may have blocked 124034b3058fSpraks * because it saw an outstanding request. 124134b3058fSpraks */ 124234b3058fSpraks notify = 0; 124334b3058fSpraks if (_aio_outstand_cnt == 0 && _aiowait_flag) { 124434b3058fSpraks notify = 1; 124534b3058fSpraks } 1246f841f6adSraf sig_mutex_unlock(&__aio_mutex); 124734b3058fSpraks if (notify) { 124834b3058fSpraks (void) _kaio(AIONOTIFY); 124934b3058fSpraks } 1250f841f6adSraf } else { 1251f841f6adSraf if (reqp->req_state == AIO_REQ_INPROGRESS) 1252f841f6adSraf reqp->req_state = AIO_REQ_DONE; 1253f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1254f841f6adSraf _aiodone(reqp, retval, error); 1255f841f6adSraf } 1256f841f6adSraf } 1257f841f6adSraf } 1258f841f6adSraf 1259f841f6adSraf void 1260f841f6adSraf _aio_req_mark_done(aio_req_t *reqp) 1261f841f6adSraf { 1262f841f6adSraf #if !defined(_LP64) 1263f841f6adSraf if (reqp->req_largefile) 1264f841f6adSraf ((aiocb64_t *)reqp->req_aiocbp)->aio_state = USERAIO_DONE; 1265f841f6adSraf else 1266f841f6adSraf #endif 1267f841f6adSraf ((aiocb_t *)reqp->req_aiocbp)->aio_state = USERAIO_DONE; 1268f841f6adSraf } 1269f841f6adSraf 1270f841f6adSraf /* 1271f841f6adSraf * Sleep for 'ticks' clock ticks to give somebody else a chance to run, 1272f841f6adSraf * hopefully to consume one of our queued signals. 1273f841f6adSraf */ 1274f841f6adSraf static void 1275f841f6adSraf _aio_delay(int ticks) 1276f841f6adSraf { 1277f841f6adSraf (void) usleep(ticks * (MICROSEC / hz)); 1278f841f6adSraf } 1279f841f6adSraf 1280f841f6adSraf /* 1281f841f6adSraf * Actually send the notifications. 1282f841f6adSraf * We could block indefinitely here if the application 1283f841f6adSraf * is not listening for the signal or port notifications. 1284f841f6adSraf */ 1285f841f6adSraf static void 1286f841f6adSraf send_notification(notif_param_t *npp) 1287f841f6adSraf { 1288f841f6adSraf extern int __sigqueue(pid_t pid, int signo, 1289f841f6adSraf /* const union sigval */ void *value, int si_code, int block); 1290f841f6adSraf 1291f841f6adSraf if (npp->np_signo) 1292f841f6adSraf (void) __sigqueue(__pid, npp->np_signo, npp->np_user, 1293f841f6adSraf SI_ASYNCIO, 1); 1294f841f6adSraf else if (npp->np_port >= 0) 1295f841f6adSraf (void) _port_dispatch(npp->np_port, 0, PORT_SOURCE_AIO, 1296f841f6adSraf npp->np_event, npp->np_object, npp->np_user); 1297f841f6adSraf 1298f841f6adSraf if (npp->np_lio_signo) 1299f841f6adSraf (void) __sigqueue(__pid, npp->np_lio_signo, npp->np_lio_user, 1300f841f6adSraf SI_ASYNCIO, 1); 1301f841f6adSraf else if (npp->np_lio_port >= 0) 1302f841f6adSraf (void) _port_dispatch(npp->np_lio_port, 0, PORT_SOURCE_AIO, 1303f841f6adSraf npp->np_lio_event, npp->np_lio_object, npp->np_lio_user); 1304f841f6adSraf } 1305f841f6adSraf 1306f841f6adSraf /* 1307f841f6adSraf * Asynchronous notification worker. 1308f841f6adSraf */ 1309f841f6adSraf void * 1310f841f6adSraf _aio_do_notify(void *arg) 1311f841f6adSraf { 1312f841f6adSraf aio_worker_t *aiowp = (aio_worker_t *)arg; 1313f841f6adSraf aio_req_t *reqp; 1314f841f6adSraf 1315f841f6adSraf /* 1316f841f6adSraf * This isn't really necessary. All signals are blocked. 1317f841f6adSraf */ 1318f841f6adSraf if (pthread_setspecific(_aio_key, aiowp) != 0) 1319f841f6adSraf aio_panic("_aio_do_notify, pthread_setspecific()"); 1320f841f6adSraf 1321f841f6adSraf /* 1322f841f6adSraf * Notifications are never cancelled. 1323f841f6adSraf * All signals remain blocked, forever. 1324f841f6adSraf */ 1325f841f6adSraf for (;;) { 1326f841f6adSraf while ((reqp = _aio_req_get(aiowp)) == NULL) { 1327f841f6adSraf if (_aio_idle(aiowp) != 0) 1328f841f6adSraf aio_panic("_aio_do_notify: _aio_idle() failed"); 1329f841f6adSraf } 1330f841f6adSraf send_notification(&reqp->req_notify); 1331f841f6adSraf _aio_req_free(reqp); 1332f841f6adSraf } 1333f841f6adSraf 1334f841f6adSraf /* NOTREACHED */ 1335f841f6adSraf return (NULL); 1336f841f6adSraf } 1337f841f6adSraf 1338f841f6adSraf /* 1339f841f6adSraf * Do the completion semantics for a request that was either canceled 1340f841f6adSraf * by _aio_cancel_req() or was completed by _aio_do_request(). 1341f841f6adSraf */ 1342f841f6adSraf static void 1343f841f6adSraf _aiodone(aio_req_t *reqp, ssize_t retval, int error) 1344f841f6adSraf { 1345f841f6adSraf aio_result_t *resultp = reqp->req_resultp; 1346f841f6adSraf int notify = 0; 1347f841f6adSraf aio_lio_t *head; 1348f841f6adSraf int sigev_none; 1349f841f6adSraf int sigev_signal; 1350f841f6adSraf int sigev_thread; 1351f841f6adSraf int sigev_port; 1352f841f6adSraf notif_param_t np; 1353f841f6adSraf 1354f841f6adSraf /* 1355f841f6adSraf * We call _aiodone() only for Posix I/O. 1356f841f6adSraf */ 1357f841f6adSraf ASSERT(POSIX_AIO(reqp)); 1358f841f6adSraf 1359f841f6adSraf sigev_none = 0; 1360f841f6adSraf sigev_signal = 0; 1361f841f6adSraf sigev_thread = 0; 1362f841f6adSraf sigev_port = 0; 1363f841f6adSraf np.np_signo = 0; 1364f841f6adSraf np.np_port = -1; 1365f841f6adSraf np.np_lio_signo = 0; 1366f841f6adSraf np.np_lio_port = -1; 1367f841f6adSraf 1368f841f6adSraf switch (reqp->req_sigevent.sigev_notify) { 1369f841f6adSraf case SIGEV_NONE: 1370f841f6adSraf sigev_none = 1; 1371f841f6adSraf break; 1372f841f6adSraf case SIGEV_SIGNAL: 1373f841f6adSraf sigev_signal = 1; 1374f841f6adSraf break; 1375f841f6adSraf case SIGEV_THREAD: 1376f841f6adSraf sigev_thread = 1; 1377f841f6adSraf break; 1378f841f6adSraf case SIGEV_PORT: 1379f841f6adSraf sigev_port = 1; 1380f841f6adSraf break; 1381f841f6adSraf default: 1382f841f6adSraf aio_panic("_aiodone: improper sigev_notify"); 1383f841f6adSraf break; 1384f841f6adSraf } 1385f841f6adSraf 1386f841f6adSraf /* 1387f841f6adSraf * Figure out the notification parameters while holding __aio_mutex. 1388f841f6adSraf * Actually perform the notifications after dropping __aio_mutex. 1389f841f6adSraf * This allows us to sleep for a long time (if the notifications 1390f841f6adSraf * incur delays) without impeding other async I/O operations. 1391f841f6adSraf */ 1392f841f6adSraf 1393f841f6adSraf sig_mutex_lock(&__aio_mutex); 1394f841f6adSraf 1395f841f6adSraf if (sigev_signal) { 1396f841f6adSraf if ((np.np_signo = reqp->req_sigevent.sigev_signo) != 0) 1397f841f6adSraf notify = 1; 1398f841f6adSraf np.np_user = reqp->req_sigevent.sigev_value.sival_ptr; 1399f841f6adSraf } else if (sigev_thread | sigev_port) { 1400f841f6adSraf if ((np.np_port = reqp->req_sigevent.sigev_signo) >= 0) 1401f841f6adSraf notify = 1; 1402f841f6adSraf np.np_event = reqp->req_op; 1403f841f6adSraf if (np.np_event == AIOFSYNC && reqp->req_largefile) 1404f841f6adSraf np.np_event = AIOFSYNC64; 1405f841f6adSraf np.np_object = (uintptr_t)reqp->req_aiocbp; 1406f841f6adSraf np.np_user = reqp->req_sigevent.sigev_value.sival_ptr; 1407f841f6adSraf } 1408f841f6adSraf 1409f841f6adSraf if (resultp->aio_errno == EINPROGRESS) 1410f841f6adSraf _aio_set_result(reqp, retval, error); 1411f841f6adSraf 1412f841f6adSraf _aio_outstand_cnt--; 1413f841f6adSraf 1414f841f6adSraf head = reqp->req_head; 1415f841f6adSraf reqp->req_head = NULL; 1416f841f6adSraf 1417f841f6adSraf if (sigev_none) { 1418f841f6adSraf _aio_enq_doneq(reqp); 1419f841f6adSraf reqp = NULL; 1420f841f6adSraf } else { 1421f841f6adSraf (void) _aio_hash_del(resultp); 1422f841f6adSraf _aio_req_mark_done(reqp); 1423f841f6adSraf } 1424f841f6adSraf 1425f841f6adSraf _aio_waitn_wakeup(); 1426f841f6adSraf 1427f841f6adSraf /* 1428f841f6adSraf * __aio_waitn() sets AIO_WAIT_INPROGRESS and 1429f841f6adSraf * __aio_suspend() increments "_aio_kernel_suspend" 1430f841f6adSraf * when they are waiting in the kernel for completed I/Os. 1431f841f6adSraf * 1432f841f6adSraf * _kaio(AIONOTIFY) awakes the corresponding function 1433f841f6adSraf * in the kernel; then the corresponding __aio_waitn() or 1434f841f6adSraf * __aio_suspend() function could reap the recently 1435f841f6adSraf * completed I/Os (_aiodone()). 1436f841f6adSraf */ 1437f841f6adSraf if ((_aio_flags & AIO_WAIT_INPROGRESS) || _aio_kernel_suspend > 0) 1438f841f6adSraf (void) _kaio(AIONOTIFY); 1439f841f6adSraf 1440f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1441f841f6adSraf 1442f841f6adSraf if (head != NULL) { 1443f841f6adSraf /* 1444f841f6adSraf * If all the lio requests have completed, 1445f841f6adSraf * prepare to notify the waiting thread. 1446f841f6adSraf */ 1447f841f6adSraf sig_mutex_lock(&head->lio_mutex); 1448f841f6adSraf ASSERT(head->lio_refcnt == head->lio_nent); 1449f841f6adSraf if (head->lio_refcnt == 1) { 1450f841f6adSraf int waiting = 0; 1451f841f6adSraf if (head->lio_mode == LIO_WAIT) { 1452f841f6adSraf if ((waiting = head->lio_waiting) != 0) 1453f841f6adSraf (void) cond_signal(&head->lio_cond_cv); 1454f841f6adSraf } else if (head->lio_port < 0) { /* none or signal */ 1455f841f6adSraf if ((np.np_lio_signo = head->lio_signo) != 0) 1456f841f6adSraf notify = 1; 1457f841f6adSraf np.np_lio_user = head->lio_sigval.sival_ptr; 1458f841f6adSraf } else { /* thread or port */ 1459f841f6adSraf notify = 1; 1460f841f6adSraf np.np_lio_port = head->lio_port; 1461f841f6adSraf np.np_lio_event = head->lio_event; 1462f841f6adSraf np.np_lio_object = 1463f841f6adSraf (uintptr_t)head->lio_sigevent; 1464f841f6adSraf np.np_lio_user = head->lio_sigval.sival_ptr; 1465f841f6adSraf } 1466f841f6adSraf head->lio_nent = head->lio_refcnt = 0; 1467f841f6adSraf sig_mutex_unlock(&head->lio_mutex); 1468f841f6adSraf if (waiting == 0) 1469f841f6adSraf _aio_lio_free(head); 1470f841f6adSraf } else { 1471f841f6adSraf head->lio_nent--; 1472f841f6adSraf head->lio_refcnt--; 1473f841f6adSraf sig_mutex_unlock(&head->lio_mutex); 1474f841f6adSraf } 1475f841f6adSraf } 1476f841f6adSraf 1477f841f6adSraf /* 1478f841f6adSraf * The request is completed; now perform the notifications. 1479f841f6adSraf */ 1480f841f6adSraf if (notify) { 1481f841f6adSraf if (reqp != NULL) { 1482f841f6adSraf /* 1483f841f6adSraf * We usually put the request on the notification 1484f841f6adSraf * queue because we don't want to block and delay 1485f841f6adSraf * other operations behind us in the work queue. 1486f841f6adSraf * Also we must never block on a cancel notification 1487f841f6adSraf * because we are being called from an application 1488f841f6adSraf * thread in this case and that could lead to deadlock 1489f841f6adSraf * if no other thread is receiving notificatins. 1490f841f6adSraf */ 1491f841f6adSraf reqp->req_notify = np; 1492f841f6adSraf reqp->req_op = AIONOTIFY; 1493f841f6adSraf _aio_req_add(reqp, &__workers_no, AIONOTIFY); 1494f841f6adSraf reqp = NULL; 1495f841f6adSraf } else { 1496f841f6adSraf /* 1497f841f6adSraf * We already put the request on the done queue, 1498f841f6adSraf * so we can't queue it to the notification queue. 1499f841f6adSraf * Just do the notification directly. 1500f841f6adSraf */ 1501f841f6adSraf send_notification(&np); 1502f841f6adSraf } 1503f841f6adSraf } 1504f841f6adSraf 1505f841f6adSraf if (reqp != NULL) 1506f841f6adSraf _aio_req_free(reqp); 1507f841f6adSraf } 1508f841f6adSraf 1509f841f6adSraf /* 1510f841f6adSraf * Delete fsync requests from list head until there is 1511f841f6adSraf * only one left. Return 0 when there is only one, 1512f841f6adSraf * otherwise return a non-zero value. 1513f841f6adSraf */ 1514f841f6adSraf static int 1515f841f6adSraf _aio_fsync_del(aio_worker_t *aiowp, aio_req_t *reqp) 1516f841f6adSraf { 1517f841f6adSraf aio_lio_t *head = reqp->req_head; 1518f841f6adSraf int rval = 0; 1519f841f6adSraf 1520f841f6adSraf ASSERT(reqp == aiowp->work_req); 1521f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1522f841f6adSraf sig_mutex_lock(&head->lio_mutex); 1523f841f6adSraf if (head->lio_refcnt > 1) { 1524f841f6adSraf head->lio_refcnt--; 1525f841f6adSraf head->lio_nent--; 1526f841f6adSraf aiowp->work_req = NULL; 1527f841f6adSraf sig_mutex_unlock(&head->lio_mutex); 1528f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1529f841f6adSraf sig_mutex_lock(&__aio_mutex); 1530f841f6adSraf _aio_outstand_cnt--; 1531f841f6adSraf _aio_waitn_wakeup(); 1532f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1533f841f6adSraf _aio_req_free(reqp); 1534f841f6adSraf return (1); 1535f841f6adSraf } 1536f841f6adSraf ASSERT(head->lio_nent == 1 && head->lio_refcnt == 1); 1537f841f6adSraf reqp->req_head = NULL; 1538f841f6adSraf if (head->lio_canned) 1539f841f6adSraf reqp->req_state = AIO_REQ_CANCELED; 1540f841f6adSraf if (head->lio_mode == LIO_DESTROY) { 1541f841f6adSraf aiowp->work_req = NULL; 1542f841f6adSraf rval = 1; 1543f841f6adSraf } 1544f841f6adSraf sig_mutex_unlock(&head->lio_mutex); 1545f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1546f841f6adSraf head->lio_refcnt--; 1547f841f6adSraf head->lio_nent--; 1548f841f6adSraf _aio_lio_free(head); 1549f841f6adSraf if (rval != 0) 1550f841f6adSraf _aio_req_free(reqp); 1551f841f6adSraf return (rval); 1552f841f6adSraf } 1553f841f6adSraf 1554f841f6adSraf /* 1555f841f6adSraf * A worker is set idle when its work queue is empty. 1556f841f6adSraf * The worker checks again that it has no more work 1557f841f6adSraf * and then goes to sleep waiting for more work. 1558f841f6adSraf */ 1559f841f6adSraf int 1560f841f6adSraf _aio_idle(aio_worker_t *aiowp) 1561f841f6adSraf { 1562f841f6adSraf int error = 0; 1563f841f6adSraf 1564f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1565f841f6adSraf if (aiowp->work_count1 == 0) { 1566f841f6adSraf ASSERT(aiowp->work_minload1 == 0); 1567f841f6adSraf aiowp->work_idleflg = 1; 1568f841f6adSraf /* 1569f841f6adSraf * A cancellation handler is not needed here. 1570f841f6adSraf * aio worker threads are never cancelled via pthread_cancel(). 1571f841f6adSraf */ 1572f841f6adSraf error = sig_cond_wait(&aiowp->work_idle_cv, 1573f841f6adSraf &aiowp->work_qlock1); 1574f841f6adSraf /* 1575f841f6adSraf * The idle flag is normally cleared before worker is awakened 1576f841f6adSraf * by aio_req_add(). On error (EINTR), we clear it ourself. 1577f841f6adSraf */ 1578f841f6adSraf if (error) 1579f841f6adSraf aiowp->work_idleflg = 0; 1580f841f6adSraf } 1581f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1582f841f6adSraf return (error); 1583f841f6adSraf } 1584f841f6adSraf 1585f841f6adSraf /* 1586f841f6adSraf * A worker's completed AIO requests are placed onto a global 1587f841f6adSraf * done queue. The application is only sent a SIGIO signal if 1588f841f6adSraf * the process has a handler enabled and it is not waiting via 1589f841f6adSraf * aiowait(). 1590f841f6adSraf */ 1591f841f6adSraf static void 1592f841f6adSraf _aio_work_done(aio_worker_t *aiowp) 1593f841f6adSraf { 1594f841f6adSraf aio_req_t *reqp; 1595f841f6adSraf 1596*bced1f33Spraks sig_mutex_lock(&__aio_mutex); 1597f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1598f841f6adSraf reqp = aiowp->work_prev1; 1599f841f6adSraf reqp->req_next = NULL; 1600f841f6adSraf aiowp->work_done1 = 0; 1601f841f6adSraf aiowp->work_tail1 = aiowp->work_next1; 1602f841f6adSraf if (aiowp->work_tail1 == NULL) 1603f841f6adSraf aiowp->work_head1 = NULL; 1604f841f6adSraf aiowp->work_prev1 = NULL; 1605f841f6adSraf _aio_outstand_cnt--; 1606f841f6adSraf _aio_req_done_cnt--; 1607*bced1f33Spraks if (reqp->req_state == AIO_REQ_CANCELED) { 1608*bced1f33Spraks /* 1609*bced1f33Spraks * Request got cancelled after it was marked done. This can 1610*bced1f33Spraks * happen because _aio_finish_request() marks it AIO_REQ_DONE 1611*bced1f33Spraks * and drops all locks. Don't add the request to the done 1612*bced1f33Spraks * queue and just discard it. 1613*bced1f33Spraks */ 1614*bced1f33Spraks sig_mutex_unlock(&aiowp->work_qlock1); 1615*bced1f33Spraks _aio_req_free(reqp); 1616*bced1f33Spraks if (_aio_outstand_cnt == 0 && _aiowait_flag) { 1617*bced1f33Spraks sig_mutex_unlock(&__aio_mutex); 1618*bced1f33Spraks (void) _kaio(AIONOTIFY); 1619*bced1f33Spraks } else { 1620*bced1f33Spraks sig_mutex_unlock(&__aio_mutex); 1621*bced1f33Spraks } 1622*bced1f33Spraks return; 1623*bced1f33Spraks } 1624*bced1f33Spraks sig_mutex_unlock(&aiowp->work_qlock1); 1625*bced1f33Spraks _aio_donecnt++; 1626f841f6adSraf ASSERT(_aio_donecnt > 0 && 1627f841f6adSraf _aio_outstand_cnt >= 0 && 1628f841f6adSraf _aio_req_done_cnt >= 0); 1629f841f6adSraf ASSERT(reqp != NULL); 1630f841f6adSraf 1631f841f6adSraf if (_aio_done_tail == NULL) { 1632f841f6adSraf _aio_done_head = _aio_done_tail = reqp; 1633f841f6adSraf } else { 1634f841f6adSraf _aio_done_head->req_next = reqp; 1635f841f6adSraf _aio_done_head = reqp; 1636f841f6adSraf } 1637f841f6adSraf 1638f841f6adSraf if (_aiowait_flag) { 1639f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1640f841f6adSraf (void) _kaio(AIONOTIFY); 1641f841f6adSraf } else { 1642f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1643f841f6adSraf if (_sigio_enabled) 1644f841f6adSraf (void) kill(__pid, SIGIO); 1645f841f6adSraf } 1646f841f6adSraf } 1647f841f6adSraf 1648f841f6adSraf /* 1649f841f6adSraf * The done queue consists of AIO requests that are in either the 1650f841f6adSraf * AIO_REQ_DONE or AIO_REQ_CANCELED state. Requests that were cancelled 1651f841f6adSraf * are discarded. If the done queue is empty then NULL is returned. 1652f841f6adSraf * Otherwise the address of a done aio_result_t is returned. 1653f841f6adSraf */ 1654f841f6adSraf aio_result_t * 1655f841f6adSraf _aio_req_done(void) 1656f841f6adSraf { 1657f841f6adSraf aio_req_t *reqp; 1658f841f6adSraf aio_result_t *resultp; 1659f841f6adSraf 1660f841f6adSraf ASSERT(MUTEX_HELD(&__aio_mutex)); 1661f841f6adSraf 1662f841f6adSraf if ((reqp = _aio_done_tail) != NULL) { 1663f841f6adSraf if ((_aio_done_tail = reqp->req_next) == NULL) 1664f841f6adSraf _aio_done_head = NULL; 1665f841f6adSraf ASSERT(_aio_donecnt > 0); 1666f841f6adSraf _aio_donecnt--; 1667f841f6adSraf (void) _aio_hash_del(reqp->req_resultp); 1668f841f6adSraf resultp = reqp->req_resultp; 1669f841f6adSraf ASSERT(reqp->req_state == AIO_REQ_DONE); 1670f841f6adSraf _aio_req_free(reqp); 1671f841f6adSraf return (resultp); 1672f841f6adSraf } 1673f841f6adSraf /* is queue empty? */ 1674f841f6adSraf if (reqp == NULL && _aio_outstand_cnt == 0) { 1675f841f6adSraf return ((aio_result_t *)-1); 1676f841f6adSraf } 1677f841f6adSraf return (NULL); 1678f841f6adSraf } 1679f841f6adSraf 1680f841f6adSraf /* 1681f841f6adSraf * Set the return and errno values for the application's use. 1682f841f6adSraf * 1683f841f6adSraf * For the Posix interfaces, we must set the return value first followed 1684f841f6adSraf * by the errno value because the Posix interfaces allow for a change 1685f841f6adSraf * in the errno value from EINPROGRESS to something else to signal 1686f841f6adSraf * the completion of the asynchronous request. 1687f841f6adSraf * 1688f841f6adSraf * The opposite is true for the Solaris interfaces. These allow for 1689f841f6adSraf * a change in the return value from AIO_INPROGRESS to something else 1690f841f6adSraf * to signal the completion of the asynchronous request. 1691f841f6adSraf */ 1692f841f6adSraf void 1693f841f6adSraf _aio_set_result(aio_req_t *reqp, ssize_t retval, int error) 1694f841f6adSraf { 1695f841f6adSraf aio_result_t *resultp = reqp->req_resultp; 1696f841f6adSraf 1697f841f6adSraf if (POSIX_AIO(reqp)) { 1698f841f6adSraf resultp->aio_return = retval; 1699f841f6adSraf membar_producer(); 1700f841f6adSraf resultp->aio_errno = error; 1701f841f6adSraf } else { 1702f841f6adSraf resultp->aio_errno = error; 1703f841f6adSraf membar_producer(); 1704f841f6adSraf resultp->aio_return = retval; 1705f841f6adSraf } 1706f841f6adSraf } 1707f841f6adSraf 1708f841f6adSraf /* 1709f841f6adSraf * Add an AIO request onto the next work queue. 1710f841f6adSraf * A circular list of workers is used to choose the next worker. 1711f841f6adSraf */ 1712f841f6adSraf void 1713f841f6adSraf _aio_req_add(aio_req_t *reqp, aio_worker_t **nextworker, int mode) 1714f841f6adSraf { 1715f841f6adSraf ulwp_t *self = curthread; 1716f841f6adSraf aio_worker_t *aiowp; 1717f841f6adSraf aio_worker_t *first; 1718f841f6adSraf int load_bal_flg = 1; 1719f841f6adSraf int found; 1720f841f6adSraf 1721f841f6adSraf ASSERT(reqp->req_state != AIO_REQ_DONEQ); 1722f841f6adSraf reqp->req_next = NULL; 1723f841f6adSraf /* 1724f841f6adSraf * Try to acquire the next worker's work queue. If it is locked, 1725f841f6adSraf * then search the list of workers until a queue is found unlocked, 1726f841f6adSraf * or until the list is completely traversed at which point another 1727f841f6adSraf * worker will be created. 1728f841f6adSraf */ 1729f841f6adSraf sigoff(self); /* defer SIGIO */ 1730f841f6adSraf sig_mutex_lock(&__aio_mutex); 1731f841f6adSraf first = aiowp = *nextworker; 1732f841f6adSraf if (mode != AIONOTIFY) 1733f841f6adSraf _aio_outstand_cnt++; 1734f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1735f841f6adSraf 1736f841f6adSraf switch (mode) { 1737f841f6adSraf case AIOREAD: 1738f841f6adSraf case AIOWRITE: 1739f841f6adSraf case AIOAREAD: 1740f841f6adSraf case AIOAWRITE: 1741f841f6adSraf #if !defined(_LP64) 1742f841f6adSraf case AIOAREAD64: 1743f841f6adSraf case AIOAWRITE64: 1744f841f6adSraf #endif 1745f841f6adSraf /* try to find an idle worker */ 1746f841f6adSraf found = 0; 1747f841f6adSraf do { 1748f841f6adSraf if (sig_mutex_trylock(&aiowp->work_qlock1) == 0) { 1749f841f6adSraf if (aiowp->work_idleflg) { 1750f841f6adSraf found = 1; 1751f841f6adSraf break; 1752f841f6adSraf } 1753f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1754f841f6adSraf } 1755f841f6adSraf } while ((aiowp = aiowp->work_forw) != first); 1756f841f6adSraf 1757f841f6adSraf if (found) { 1758f841f6adSraf aiowp->work_minload1++; 1759f841f6adSraf break; 1760f841f6adSraf } 1761f841f6adSraf 1762f841f6adSraf /* try to acquire some worker's queue lock */ 1763f841f6adSraf do { 1764f841f6adSraf if (sig_mutex_trylock(&aiowp->work_qlock1) == 0) { 1765f841f6adSraf found = 1; 1766f841f6adSraf break; 1767f841f6adSraf } 1768f841f6adSraf } while ((aiowp = aiowp->work_forw) != first); 1769f841f6adSraf 1770f841f6adSraf /* 1771f841f6adSraf * Create more workers when the workers appear overloaded. 1772f841f6adSraf * Either all the workers are busy draining their queues 1773f841f6adSraf * or no worker's queue lock could be acquired. 1774f841f6adSraf */ 1775f841f6adSraf if (!found) { 1776f841f6adSraf if (_aio_worker_cnt < _max_workers) { 1777f841f6adSraf if (_aio_create_worker(reqp, mode)) 1778f841f6adSraf aio_panic("_aio_req_add: add worker"); 1779f841f6adSraf sigon(self); /* reenable SIGIO */ 1780f841f6adSraf return; 1781f841f6adSraf } 1782f841f6adSraf 1783f841f6adSraf /* 1784f841f6adSraf * No worker available and we have created 1785f841f6adSraf * _max_workers, keep going through the 1786f841f6adSraf * list slowly until we get a lock 1787f841f6adSraf */ 1788f841f6adSraf while (sig_mutex_trylock(&aiowp->work_qlock1) != 0) { 1789f841f6adSraf /* 1790f841f6adSraf * give someone else a chance 1791f841f6adSraf */ 1792f841f6adSraf _aio_delay(1); 1793f841f6adSraf aiowp = aiowp->work_forw; 1794f841f6adSraf } 1795f841f6adSraf } 1796f841f6adSraf 1797f841f6adSraf ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); 1798f841f6adSraf if (_aio_worker_cnt < _max_workers && 1799f841f6adSraf aiowp->work_minload1 >= _minworkload) { 1800f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1801f841f6adSraf sig_mutex_lock(&__aio_mutex); 1802f841f6adSraf *nextworker = aiowp->work_forw; 1803f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1804f841f6adSraf if (_aio_create_worker(reqp, mode)) 1805f841f6adSraf aio_panic("aio_req_add: add worker"); 1806f841f6adSraf sigon(self); /* reenable SIGIO */ 1807f841f6adSraf return; 1808f841f6adSraf } 1809f841f6adSraf aiowp->work_minload1++; 1810f841f6adSraf break; 1811f841f6adSraf case AIOFSYNC: 1812f841f6adSraf case AIONOTIFY: 1813f841f6adSraf load_bal_flg = 0; 1814f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1815f841f6adSraf break; 1816f841f6adSraf default: 1817f841f6adSraf aio_panic("_aio_req_add: invalid mode"); 1818f841f6adSraf break; 1819f841f6adSraf } 1820f841f6adSraf /* 1821f841f6adSraf * Put request onto worker's work queue. 1822f841f6adSraf */ 1823f841f6adSraf if (aiowp->work_tail1 == NULL) { 1824f841f6adSraf ASSERT(aiowp->work_count1 == 0); 1825f841f6adSraf aiowp->work_tail1 = reqp; 1826f841f6adSraf aiowp->work_next1 = reqp; 1827f841f6adSraf } else { 1828f841f6adSraf aiowp->work_head1->req_next = reqp; 1829f841f6adSraf if (aiowp->work_next1 == NULL) 1830f841f6adSraf aiowp->work_next1 = reqp; 1831f841f6adSraf } 1832f841f6adSraf reqp->req_state = AIO_REQ_QUEUED; 1833f841f6adSraf reqp->req_worker = aiowp; 1834f841f6adSraf aiowp->work_head1 = reqp; 1835f841f6adSraf /* 1836f841f6adSraf * Awaken worker if it is not currently active. 1837f841f6adSraf */ 1838f841f6adSraf if (aiowp->work_count1++ == 0 && aiowp->work_idleflg) { 1839f841f6adSraf aiowp->work_idleflg = 0; 1840f841f6adSraf (void) cond_signal(&aiowp->work_idle_cv); 1841f841f6adSraf } 1842f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1843f841f6adSraf 1844f841f6adSraf if (load_bal_flg) { 1845f841f6adSraf sig_mutex_lock(&__aio_mutex); 1846f841f6adSraf *nextworker = aiowp->work_forw; 1847f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1848f841f6adSraf } 1849f841f6adSraf sigon(self); /* reenable SIGIO */ 1850f841f6adSraf } 1851f841f6adSraf 1852f841f6adSraf /* 1853f841f6adSraf * Get an AIO request for a specified worker. 1854f841f6adSraf * If the work queue is empty, return NULL. 1855f841f6adSraf */ 1856f841f6adSraf aio_req_t * 1857f841f6adSraf _aio_req_get(aio_worker_t *aiowp) 1858f841f6adSraf { 1859f841f6adSraf aio_req_t *reqp; 1860f841f6adSraf 1861f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1862f841f6adSraf if ((reqp = aiowp->work_next1) != NULL) { 1863f841f6adSraf /* 1864f841f6adSraf * Remove a POSIX request from the queue; the 1865f841f6adSraf * request queue is a singularly linked list 1866f841f6adSraf * with a previous pointer. The request is 1867f841f6adSraf * removed by updating the previous pointer. 1868f841f6adSraf * 1869f841f6adSraf * Non-posix requests are left on the queue 1870f841f6adSraf * to eventually be placed on the done queue. 1871f841f6adSraf */ 1872f841f6adSraf 1873f841f6adSraf if (POSIX_AIO(reqp)) { 1874f841f6adSraf if (aiowp->work_prev1 == NULL) { 1875f841f6adSraf aiowp->work_tail1 = reqp->req_next; 1876f841f6adSraf if (aiowp->work_tail1 == NULL) 1877f841f6adSraf aiowp->work_head1 = NULL; 1878f841f6adSraf } else { 1879f841f6adSraf aiowp->work_prev1->req_next = reqp->req_next; 1880f841f6adSraf if (aiowp->work_head1 == reqp) 1881f841f6adSraf aiowp->work_head1 = reqp->req_next; 1882f841f6adSraf } 1883f841f6adSraf 1884f841f6adSraf } else { 1885f841f6adSraf aiowp->work_prev1 = reqp; 1886f841f6adSraf ASSERT(aiowp->work_done1 >= 0); 1887f841f6adSraf aiowp->work_done1++; 1888f841f6adSraf } 1889f841f6adSraf ASSERT(reqp != reqp->req_next); 1890f841f6adSraf aiowp->work_next1 = reqp->req_next; 1891f841f6adSraf ASSERT(aiowp->work_count1 >= 1); 1892f841f6adSraf aiowp->work_count1--; 1893f841f6adSraf switch (reqp->req_op) { 1894f841f6adSraf case AIOREAD: 1895f841f6adSraf case AIOWRITE: 1896f841f6adSraf case AIOAREAD: 1897f841f6adSraf case AIOAWRITE: 1898f841f6adSraf #if !defined(_LP64) 1899f841f6adSraf case AIOAREAD64: 1900f841f6adSraf case AIOAWRITE64: 1901f841f6adSraf #endif 1902f841f6adSraf ASSERT(aiowp->work_minload1 > 0); 1903f841f6adSraf aiowp->work_minload1--; 1904f841f6adSraf break; 1905f841f6adSraf } 1906f841f6adSraf reqp->req_state = AIO_REQ_INPROGRESS; 1907f841f6adSraf } 1908f841f6adSraf aiowp->work_req = reqp; 1909f841f6adSraf ASSERT(reqp != NULL || aiowp->work_count1 == 0); 1910f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1911f841f6adSraf return (reqp); 1912f841f6adSraf } 1913f841f6adSraf 1914f841f6adSraf static void 1915f841f6adSraf _aio_req_del(aio_worker_t *aiowp, aio_req_t *reqp, int ostate) 1916f841f6adSraf { 1917f841f6adSraf aio_req_t **last; 1918f841f6adSraf aio_req_t *lastrp; 1919f841f6adSraf aio_req_t *next; 1920f841f6adSraf 1921f841f6adSraf ASSERT(aiowp != NULL); 1922f841f6adSraf ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); 1923f841f6adSraf if (POSIX_AIO(reqp)) { 1924f841f6adSraf if (ostate != AIO_REQ_QUEUED) 1925f841f6adSraf return; 1926f841f6adSraf } 1927f841f6adSraf last = &aiowp->work_tail1; 1928f841f6adSraf lastrp = aiowp->work_tail1; 1929f841f6adSraf ASSERT(ostate == AIO_REQ_QUEUED || ostate == AIO_REQ_INPROGRESS); 1930f841f6adSraf while ((next = *last) != NULL) { 1931f841f6adSraf if (next == reqp) { 1932f841f6adSraf *last = next->req_next; 1933f841f6adSraf if (aiowp->work_next1 == next) 1934f841f6adSraf aiowp->work_next1 = next->req_next; 1935f841f6adSraf 1936*bced1f33Spraks /* 1937*bced1f33Spraks * if this is the first request on the queue, move 1938*bced1f33Spraks * the lastrp pointer forward. 1939*bced1f33Spraks */ 1940*bced1f33Spraks if (lastrp == next) 1941*bced1f33Spraks lastrp = next->req_next; 1942*bced1f33Spraks 1943*bced1f33Spraks /* 1944*bced1f33Spraks * if this request is pointed by work_head1, then 1945*bced1f33Spraks * make work_head1 point to the last request that is 1946*bced1f33Spraks * present on the queue. 1947*bced1f33Spraks */ 1948f841f6adSraf if (aiowp->work_head1 == next) 1949f841f6adSraf aiowp->work_head1 = lastrp; 1950*bced1f33Spraks 1951*bced1f33Spraks /* 1952*bced1f33Spraks * work_prev1 is used only in non posix case and it 1953*bced1f33Spraks * points to the current AIO_REQ_INPROGRESS request. 1954*bced1f33Spraks * If work_prev1 points to this request which is being 1955*bced1f33Spraks * deleted, make work_prev1 NULL and set work_done1 1956*bced1f33Spraks * to 0. 1957*bced1f33Spraks * 1958*bced1f33Spraks * A worker thread can be processing only one request 1959*bced1f33Spraks * at a time. 1960*bced1f33Spraks */ 1961*bced1f33Spraks if (aiowp->work_prev1 == next) { 1962*bced1f33Spraks ASSERT(ostate == AIO_REQ_INPROGRESS && 1963*bced1f33Spraks !POSIX_AIO(reqp) && aiowp->work_done1 > 0); 1964*bced1f33Spraks aiowp->work_prev1 = NULL; 1965*bced1f33Spraks aiowp->work_done1--; 1966f841f6adSraf } 1967f841f6adSraf 1968f841f6adSraf if (ostate == AIO_REQ_QUEUED) { 1969f841f6adSraf ASSERT(aiowp->work_count1 >= 1); 1970f841f6adSraf aiowp->work_count1--; 1971f841f6adSraf ASSERT(aiowp->work_minload1 >= 1); 1972f841f6adSraf aiowp->work_minload1--; 1973f841f6adSraf } 1974f841f6adSraf return; 1975f841f6adSraf } 1976f841f6adSraf last = &next->req_next; 1977f841f6adSraf lastrp = next; 1978f841f6adSraf } 1979f841f6adSraf /* NOTREACHED */ 1980f841f6adSraf } 1981f841f6adSraf 1982f841f6adSraf static void 1983f841f6adSraf _aio_enq_doneq(aio_req_t *reqp) 1984f841f6adSraf { 1985f841f6adSraf if (_aio_doneq == NULL) { 1986f841f6adSraf _aio_doneq = reqp; 1987f841f6adSraf reqp->req_next = reqp->req_prev = reqp; 1988f841f6adSraf } else { 1989f841f6adSraf reqp->req_next = _aio_doneq; 1990f841f6adSraf reqp->req_prev = _aio_doneq->req_prev; 1991f841f6adSraf _aio_doneq->req_prev->req_next = reqp; 1992f841f6adSraf _aio_doneq->req_prev = reqp; 1993f841f6adSraf } 1994f841f6adSraf reqp->req_state = AIO_REQ_DONEQ; 1995f841f6adSraf _aio_doneq_cnt++; 1996f841f6adSraf } 1997f841f6adSraf 1998f841f6adSraf /* 1999f841f6adSraf * caller owns the _aio_mutex 2000f841f6adSraf */ 2001f841f6adSraf aio_req_t * 2002f841f6adSraf _aio_req_remove(aio_req_t *reqp) 2003f841f6adSraf { 2004f841f6adSraf if (reqp && reqp->req_state != AIO_REQ_DONEQ) 2005f841f6adSraf return (NULL); 2006f841f6adSraf 2007f841f6adSraf if (reqp) { 2008f841f6adSraf /* request in done queue */ 2009f841f6adSraf if (_aio_doneq == reqp) 2010f841f6adSraf _aio_doneq = reqp->req_next; 2011f841f6adSraf if (_aio_doneq == reqp) { 2012f841f6adSraf /* only one request on queue */ 2013f841f6adSraf _aio_doneq = NULL; 2014f841f6adSraf } else { 2015f841f6adSraf aio_req_t *tmp = reqp->req_next; 2016f841f6adSraf reqp->req_prev->req_next = tmp; 2017f841f6adSraf tmp->req_prev = reqp->req_prev; 2018f841f6adSraf } 2019f841f6adSraf } else if ((reqp = _aio_doneq) != NULL) { 2020f841f6adSraf if (reqp == reqp->req_next) { 2021f841f6adSraf /* only one request on queue */ 2022f841f6adSraf _aio_doneq = NULL; 2023f841f6adSraf } else { 2024f841f6adSraf reqp->req_prev->req_next = _aio_doneq = reqp->req_next; 2025f841f6adSraf _aio_doneq->req_prev = reqp->req_prev; 2026f841f6adSraf } 2027f841f6adSraf } 2028f841f6adSraf if (reqp) { 2029f841f6adSraf _aio_doneq_cnt--; 2030f841f6adSraf reqp->req_next = reqp->req_prev = reqp; 2031f841f6adSraf reqp->req_state = AIO_REQ_DONE; 2032f841f6adSraf } 2033f841f6adSraf return (reqp); 2034f841f6adSraf } 2035f841f6adSraf 2036f841f6adSraf /* 2037f841f6adSraf * An AIO request is identified by an aio_result_t pointer. The library 2038f841f6adSraf * maps this aio_result_t pointer to its internal representation using a 2039f841f6adSraf * hash table. This function adds an aio_result_t pointer to the hash table. 2040f841f6adSraf */ 2041f841f6adSraf static int 2042f841f6adSraf _aio_hash_insert(aio_result_t *resultp, aio_req_t *reqp) 2043f841f6adSraf { 2044f841f6adSraf aio_hash_t *hashp; 2045f841f6adSraf aio_req_t **prev; 2046f841f6adSraf aio_req_t *next; 2047f841f6adSraf 2048f841f6adSraf hashp = _aio_hash + AIOHASH(resultp); 2049f841f6adSraf lmutex_lock(&hashp->hash_lock); 2050f841f6adSraf prev = &hashp->hash_ptr; 2051f841f6adSraf while ((next = *prev) != NULL) { 2052f841f6adSraf if (resultp == next->req_resultp) { 2053f841f6adSraf lmutex_unlock(&hashp->hash_lock); 2054f841f6adSraf return (-1); 2055f841f6adSraf } 2056f841f6adSraf prev = &next->req_link; 2057f841f6adSraf } 2058f841f6adSraf *prev = reqp; 2059f841f6adSraf ASSERT(reqp->req_link == NULL); 2060f841f6adSraf lmutex_unlock(&hashp->hash_lock); 2061f841f6adSraf return (0); 2062f841f6adSraf } 2063f841f6adSraf 2064f841f6adSraf /* 2065f841f6adSraf * Remove an entry from the hash table. 2066f841f6adSraf */ 2067f841f6adSraf aio_req_t * 2068f841f6adSraf _aio_hash_del(aio_result_t *resultp) 2069f841f6adSraf { 2070f841f6adSraf aio_hash_t *hashp; 2071f841f6adSraf aio_req_t **prev; 2072f841f6adSraf aio_req_t *next = NULL; 2073f841f6adSraf 2074f841f6adSraf if (_aio_hash != NULL) { 2075f841f6adSraf hashp = _aio_hash + AIOHASH(resultp); 2076f841f6adSraf lmutex_lock(&hashp->hash_lock); 2077f841f6adSraf prev = &hashp->hash_ptr; 2078f841f6adSraf while ((next = *prev) != NULL) { 2079f841f6adSraf if (resultp == next->req_resultp) { 2080f841f6adSraf *prev = next->req_link; 2081f841f6adSraf next->req_link = NULL; 2082f841f6adSraf break; 2083f841f6adSraf } 2084f841f6adSraf prev = &next->req_link; 2085f841f6adSraf } 2086f841f6adSraf lmutex_unlock(&hashp->hash_lock); 2087f841f6adSraf } 2088f841f6adSraf return (next); 2089f841f6adSraf } 2090f841f6adSraf 2091f841f6adSraf /* 2092f841f6adSraf * find an entry in the hash table 2093f841f6adSraf */ 2094f841f6adSraf aio_req_t * 2095f841f6adSraf _aio_hash_find(aio_result_t *resultp) 2096f841f6adSraf { 2097f841f6adSraf aio_hash_t *hashp; 2098f841f6adSraf aio_req_t **prev; 2099f841f6adSraf aio_req_t *next = NULL; 2100f841f6adSraf 2101f841f6adSraf if (_aio_hash != NULL) { 2102f841f6adSraf hashp = _aio_hash + AIOHASH(resultp); 2103f841f6adSraf lmutex_lock(&hashp->hash_lock); 2104f841f6adSraf prev = &hashp->hash_ptr; 2105f841f6adSraf while ((next = *prev) != NULL) { 2106f841f6adSraf if (resultp == next->req_resultp) 2107f841f6adSraf break; 2108f841f6adSraf prev = &next->req_link; 2109f841f6adSraf } 2110f841f6adSraf lmutex_unlock(&hashp->hash_lock); 2111f841f6adSraf } 2112f841f6adSraf return (next); 2113f841f6adSraf } 2114f841f6adSraf 2115f841f6adSraf /* 2116f841f6adSraf * AIO interface for POSIX 2117f841f6adSraf */ 2118f841f6adSraf int 2119f841f6adSraf _aio_rw(aiocb_t *aiocbp, aio_lio_t *lio_head, aio_worker_t **nextworker, 2120f841f6adSraf int mode, int flg) 2121f841f6adSraf { 2122f841f6adSraf aio_req_t *reqp; 2123f841f6adSraf aio_args_t *ap; 2124f841f6adSraf int kerr; 2125f841f6adSraf 2126f841f6adSraf if (aiocbp == NULL) { 2127f841f6adSraf errno = EINVAL; 2128f841f6adSraf return (-1); 2129f841f6adSraf } 2130f841f6adSraf 2131f841f6adSraf /* initialize kaio */ 2132f841f6adSraf if (!_kaio_ok) 2133f841f6adSraf _kaio_init(); 2134f841f6adSraf 2135f841f6adSraf aiocbp->aio_state = NOCHECK; 2136f841f6adSraf 2137f841f6adSraf /* 2138f841f6adSraf * If we have been called because a list I/O 2139f841f6adSraf * kaio() failed, we dont want to repeat the 2140f841f6adSraf * system call 2141f841f6adSraf */ 2142f841f6adSraf 2143f841f6adSraf if (flg & AIO_KAIO) { 2144f841f6adSraf /* 2145f841f6adSraf * Try kernel aio first. 2146f841f6adSraf * If errno is ENOTSUP/EBADFD, 2147f841f6adSraf * fall back to the thread implementation. 2148f841f6adSraf */ 2149f841f6adSraf if (_kaio_ok > 0 && KAIO_SUPPORTED(aiocbp->aio_fildes)) { 2150f841f6adSraf aiocbp->aio_resultp.aio_errno = EINPROGRESS; 2151f841f6adSraf aiocbp->aio_state = CHECK; 2152f841f6adSraf kerr = (int)_kaio(mode, aiocbp); 2153f841f6adSraf if (kerr == 0) 2154f841f6adSraf return (0); 2155f841f6adSraf if (errno != ENOTSUP && errno != EBADFD) { 2156f841f6adSraf aiocbp->aio_resultp.aio_errno = errno; 2157f841f6adSraf aiocbp->aio_resultp.aio_return = -1; 2158f841f6adSraf aiocbp->aio_state = NOCHECK; 2159f841f6adSraf return (-1); 2160f841f6adSraf } 2161f841f6adSraf if (errno == EBADFD) 2162f841f6adSraf SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 2163f841f6adSraf } 2164f841f6adSraf } 2165f841f6adSraf 2166f841f6adSraf aiocbp->aio_resultp.aio_errno = EINPROGRESS; 2167f841f6adSraf aiocbp->aio_state = USERAIO; 2168f841f6adSraf 2169f841f6adSraf if (!__uaio_ok && __uaio_init() == -1) 2170f841f6adSraf return (-1); 2171f841f6adSraf 2172f841f6adSraf if ((reqp = _aio_req_alloc()) == NULL) { 2173f841f6adSraf errno = EAGAIN; 2174f841f6adSraf return (-1); 2175f841f6adSraf } 2176f841f6adSraf 2177f841f6adSraf /* 2178f841f6adSraf * If an LIO request, add the list head to the aio request 2179f841f6adSraf */ 2180f841f6adSraf reqp->req_head = lio_head; 2181f841f6adSraf reqp->req_type = AIO_POSIX_REQ; 2182f841f6adSraf reqp->req_op = mode; 2183f841f6adSraf reqp->req_largefile = 0; 2184f841f6adSraf 2185f841f6adSraf if (aiocbp->aio_sigevent.sigev_notify == SIGEV_NONE) { 2186f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_NONE; 2187f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 2188f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_SIGNAL; 2189f841f6adSraf reqp->req_sigevent.sigev_signo = 2190f841f6adSraf aiocbp->aio_sigevent.sigev_signo; 2191f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2192f841f6adSraf aiocbp->aio_sigevent.sigev_value.sival_ptr; 2193f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_PORT) { 2194f841f6adSraf port_notify_t *pn = aiocbp->aio_sigevent.sigev_value.sival_ptr; 2195f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_PORT; 2196f841f6adSraf /* 2197f841f6adSraf * Reuse the sigevent structure to contain the port number 2198f841f6adSraf * and the user value. Same for SIGEV_THREAD, below. 2199f841f6adSraf */ 2200f841f6adSraf reqp->req_sigevent.sigev_signo = 2201f841f6adSraf pn->portnfy_port; 2202f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2203f841f6adSraf pn->portnfy_user; 2204f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_THREAD) { 2205f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_THREAD; 2206f841f6adSraf /* 2207f841f6adSraf * The sigevent structure contains the port number 2208f841f6adSraf * and the user value. Same for SIGEV_PORT, above. 2209f841f6adSraf */ 2210f841f6adSraf reqp->req_sigevent.sigev_signo = 2211f841f6adSraf aiocbp->aio_sigevent.sigev_signo; 2212f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2213f841f6adSraf aiocbp->aio_sigevent.sigev_value.sival_ptr; 2214f841f6adSraf } 2215f841f6adSraf 2216f841f6adSraf reqp->req_resultp = &aiocbp->aio_resultp; 2217f841f6adSraf reqp->req_aiocbp = aiocbp; 2218f841f6adSraf ap = &reqp->req_args; 2219f841f6adSraf ap->fd = aiocbp->aio_fildes; 2220f841f6adSraf ap->buf = (caddr_t)aiocbp->aio_buf; 2221f841f6adSraf ap->bufsz = aiocbp->aio_nbytes; 2222f841f6adSraf ap->offset = aiocbp->aio_offset; 2223f841f6adSraf 2224f841f6adSraf if ((flg & AIO_NO_DUPS) && 2225f841f6adSraf _aio_hash_insert(&aiocbp->aio_resultp, reqp) != 0) { 2226f841f6adSraf aio_panic("_aio_rw(): request already in hash table"); 2227f841f6adSraf _aio_req_free(reqp); 2228f841f6adSraf errno = EINVAL; 2229f841f6adSraf return (-1); 2230f841f6adSraf } 2231f841f6adSraf _aio_req_add(reqp, nextworker, mode); 2232f841f6adSraf return (0); 2233f841f6adSraf } 2234f841f6adSraf 2235f841f6adSraf #if !defined(_LP64) 2236f841f6adSraf /* 2237f841f6adSraf * 64-bit AIO interface for POSIX 2238f841f6adSraf */ 2239f841f6adSraf int 2240f841f6adSraf _aio_rw64(aiocb64_t *aiocbp, aio_lio_t *lio_head, aio_worker_t **nextworker, 2241f841f6adSraf int mode, int flg) 2242f841f6adSraf { 2243f841f6adSraf aio_req_t *reqp; 2244f841f6adSraf aio_args_t *ap; 2245f841f6adSraf int kerr; 2246f841f6adSraf 2247f841f6adSraf if (aiocbp == NULL) { 2248f841f6adSraf errno = EINVAL; 2249f841f6adSraf return (-1); 2250f841f6adSraf } 2251f841f6adSraf 2252f841f6adSraf /* initialize kaio */ 2253f841f6adSraf if (!_kaio_ok) 2254f841f6adSraf _kaio_init(); 2255f841f6adSraf 2256f841f6adSraf aiocbp->aio_state = NOCHECK; 2257f841f6adSraf 2258f841f6adSraf /* 2259f841f6adSraf * If we have been called because a list I/O 2260f841f6adSraf * kaio() failed, we dont want to repeat the 2261f841f6adSraf * system call 2262f841f6adSraf */ 2263f841f6adSraf 2264f841f6adSraf if (flg & AIO_KAIO) { 2265f841f6adSraf /* 2266f841f6adSraf * Try kernel aio first. 2267f841f6adSraf * If errno is ENOTSUP/EBADFD, 2268f841f6adSraf * fall back to the thread implementation. 2269f841f6adSraf */ 2270f841f6adSraf if (_kaio_ok > 0 && KAIO_SUPPORTED(aiocbp->aio_fildes)) { 2271f841f6adSraf aiocbp->aio_resultp.aio_errno = EINPROGRESS; 2272f841f6adSraf aiocbp->aio_state = CHECK; 2273f841f6adSraf kerr = (int)_kaio(mode, aiocbp); 2274f841f6adSraf if (kerr == 0) 2275f841f6adSraf return (0); 2276f841f6adSraf if (errno != ENOTSUP && errno != EBADFD) { 2277f841f6adSraf aiocbp->aio_resultp.aio_errno = errno; 2278f841f6adSraf aiocbp->aio_resultp.aio_return = -1; 2279f841f6adSraf aiocbp->aio_state = NOCHECK; 2280f841f6adSraf return (-1); 2281f841f6adSraf } 2282f841f6adSraf if (errno == EBADFD) 2283f841f6adSraf SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 2284f841f6adSraf } 2285f841f6adSraf } 2286f841f6adSraf 2287f841f6adSraf aiocbp->aio_resultp.aio_errno = EINPROGRESS; 2288f841f6adSraf aiocbp->aio_state = USERAIO; 2289f841f6adSraf 2290f841f6adSraf if (!__uaio_ok && __uaio_init() == -1) 2291f841f6adSraf return (-1); 2292f841f6adSraf 2293f841f6adSraf if ((reqp = _aio_req_alloc()) == NULL) { 2294f841f6adSraf errno = EAGAIN; 2295f841f6adSraf return (-1); 2296f841f6adSraf } 2297f841f6adSraf 2298f841f6adSraf /* 2299f841f6adSraf * If an LIO request, add the list head to the aio request 2300f841f6adSraf */ 2301f841f6adSraf reqp->req_head = lio_head; 2302f841f6adSraf reqp->req_type = AIO_POSIX_REQ; 2303f841f6adSraf reqp->req_op = mode; 2304f841f6adSraf reqp->req_largefile = 1; 2305f841f6adSraf 2306f841f6adSraf if (aiocbp->aio_sigevent.sigev_notify == SIGEV_NONE) { 2307f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_NONE; 2308f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 2309f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_SIGNAL; 2310f841f6adSraf reqp->req_sigevent.sigev_signo = 2311f841f6adSraf aiocbp->aio_sigevent.sigev_signo; 2312f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2313f841f6adSraf aiocbp->aio_sigevent.sigev_value.sival_ptr; 2314f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_PORT) { 2315f841f6adSraf port_notify_t *pn = aiocbp->aio_sigevent.sigev_value.sival_ptr; 2316f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_PORT; 2317f841f6adSraf reqp->req_sigevent.sigev_signo = 2318f841f6adSraf pn->portnfy_port; 2319f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2320f841f6adSraf pn->portnfy_user; 2321f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_THREAD) { 2322f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_THREAD; 2323f841f6adSraf reqp->req_sigevent.sigev_signo = 2324f841f6adSraf aiocbp->aio_sigevent.sigev_signo; 2325f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2326f841f6adSraf aiocbp->aio_sigevent.sigev_value.sival_ptr; 2327f841f6adSraf } 2328f841f6adSraf 2329f841f6adSraf reqp->req_resultp = &aiocbp->aio_resultp; 2330f841f6adSraf reqp->req_aiocbp = aiocbp; 2331f841f6adSraf ap = &reqp->req_args; 2332f841f6adSraf ap->fd = aiocbp->aio_fildes; 2333f841f6adSraf ap->buf = (caddr_t)aiocbp->aio_buf; 2334f841f6adSraf ap->bufsz = aiocbp->aio_nbytes; 2335f841f6adSraf ap->offset = aiocbp->aio_offset; 2336f841f6adSraf 2337f841f6adSraf if ((flg & AIO_NO_DUPS) && 2338f841f6adSraf _aio_hash_insert(&aiocbp->aio_resultp, reqp) != 0) { 2339f841f6adSraf aio_panic("_aio_rw64(): request already in hash table"); 2340f841f6adSraf _aio_req_free(reqp); 2341f841f6adSraf errno = EINVAL; 2342f841f6adSraf return (-1); 2343f841f6adSraf } 2344f841f6adSraf _aio_req_add(reqp, nextworker, mode); 2345f841f6adSraf return (0); 2346f841f6adSraf } 2347f841f6adSraf #endif /* !defined(_LP64) */ 2348