1*f841f6adSraf /* 2*f841f6adSraf * CDDL HEADER START 3*f841f6adSraf * 4*f841f6adSraf * The contents of this file are subject to the terms of the 5*f841f6adSraf * Common Development and Distribution License (the "License"). 6*f841f6adSraf * You may not use this file except in compliance with the License. 7*f841f6adSraf * 8*f841f6adSraf * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*f841f6adSraf * or http://www.opensolaris.org/os/licensing. 10*f841f6adSraf * See the License for the specific language governing permissions 11*f841f6adSraf * and limitations under the License. 12*f841f6adSraf * 13*f841f6adSraf * When distributing Covered Code, include this CDDL HEADER in each 14*f841f6adSraf * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*f841f6adSraf * If applicable, add the following below this CDDL HEADER, with the 16*f841f6adSraf * fields enclosed by brackets "[]" replaced with your own identifying 17*f841f6adSraf * information: Portions Copyright [yyyy] [name of copyright owner] 18*f841f6adSraf * 19*f841f6adSraf * CDDL HEADER END 20*f841f6adSraf */ 21*f841f6adSraf 22*f841f6adSraf /* 23*f841f6adSraf * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24*f841f6adSraf * Use is subject to license terms. 25*f841f6adSraf */ 26*f841f6adSraf 27*f841f6adSraf #ifndef _ASYNCIO_H 28*f841f6adSraf #define _ASYNCIO_H 29*f841f6adSraf 30*f841f6adSraf #pragma ident "%Z%%M% %I% %E% SMI" 31*f841f6adSraf 32*f841f6adSraf #ifdef __cplusplus 33*f841f6adSraf extern "C" { 34*f841f6adSraf #endif 35*f841f6adSraf 36*f841f6adSraf #include <stdio.h> 37*f841f6adSraf #include <stdlib.h> 38*f841f6adSraf #include <unistd.h> 39*f841f6adSraf #include <string.h> 40*f841f6adSraf #include <errno.h> 41*f841f6adSraf #include <sys/types.h> 42*f841f6adSraf #include <sys/stat.h> 43*f841f6adSraf #include <thread.h> 44*f841f6adSraf #include <pthread.h> 45*f841f6adSraf #include <setjmp.h> 46*f841f6adSraf #include <signal.h> 47*f841f6adSraf #include <siginfo.h> 48*f841f6adSraf #include <aio.h> 49*f841f6adSraf #include <limits.h> 50*f841f6adSraf #include <ucontext.h> 51*f841f6adSraf #include <sys/asynch.h> 52*f841f6adSraf #include <sys/mman.h> 53*f841f6adSraf 54*f841f6adSraf #if !defined(_LP64) 55*f841f6adSraf #define AIOSTKSIZE (64 * 1024) 56*f841f6adSraf #else 57*f841f6adSraf #define AIOSTKSIZE (128 * 1024) 58*f841f6adSraf #endif 59*f841f6adSraf 60*f841f6adSraf #define SIGAIOCANCEL SIGLWP /* special aio cancelation signal */ 61*f841f6adSraf 62*f841f6adSraf #define AIO_WAITN_MAXIOCBS 32768 /* max. iocbs per system call */ 63*f841f6adSraf 64*f841f6adSraf /* 65*f841f6adSraf * Declare structure types. The structures themselves are defined below. 66*f841f6adSraf */ 67*f841f6adSraf typedef struct aio_args aio_args_t; 68*f841f6adSraf typedef struct aio_lio aio_lio_t; 69*f841f6adSraf typedef struct notif_param notif_param_t; 70*f841f6adSraf typedef struct aio_req aio_req_t; 71*f841f6adSraf typedef struct aio_worker aio_worker_t; 72*f841f6adSraf typedef struct aio_hash aio_hash_t; 73*f841f6adSraf 74*f841f6adSraf struct aio_args { 75*f841f6adSraf int fd; 76*f841f6adSraf caddr_t buf; 77*f841f6adSraf size_t bufsz; 78*f841f6adSraf offset_t offset; 79*f841f6adSraf }; 80*f841f6adSraf 81*f841f6adSraf /* 82*f841f6adSraf * list head for UFS list I/O 83*f841f6adSraf */ 84*f841f6adSraf struct aio_lio { 85*f841f6adSraf mutex_t lio_mutex; /* list mutex */ 86*f841f6adSraf cond_t lio_cond_cv; /* list notification for I/O done */ 87*f841f6adSraf aio_lio_t *lio_next; /* pointer to next on freelist */ 88*f841f6adSraf char lio_mode; /* LIO_WAIT/LIO_NOWAIT */ 89*f841f6adSraf char lio_canned; /* lio was canceled */ 90*f841f6adSraf char lio_largefile; /* largefile operation */ 91*f841f6adSraf char lio_waiting; /* waiting in __lio_listio() */ 92*f841f6adSraf int lio_nent; /* Number of list I/O's */ 93*f841f6adSraf int lio_refcnt; /* outstanding I/O's */ 94*f841f6adSraf int lio_event; /* Event number for notification */ 95*f841f6adSraf int lio_port; /* Port number for notification */ 96*f841f6adSraf int lio_signo; /* Signal number for notification */ 97*f841f6adSraf union sigval lio_sigval; /* Signal parameter */ 98*f841f6adSraf uintptr_t lio_object; /* for SIGEV_THREAD or SIGEV_PORT */ 99*f841f6adSraf struct sigevent *lio_sigevent; /* Notification function and attr. */ 100*f841f6adSraf }; 101*f841f6adSraf 102*f841f6adSraf /* 103*f841f6adSraf * Notification parameters 104*f841f6adSraf */ 105*f841f6adSraf struct notif_param { 106*f841f6adSraf int np_signo; /* SIGEV_SIGNAL */ 107*f841f6adSraf int np_port; /* SIGEV_THREAD or SIGEV_PORT */ 108*f841f6adSraf void *np_user; 109*f841f6adSraf int np_event; 110*f841f6adSraf uintptr_t np_object; 111*f841f6adSraf int np_lio_signo; /* listio: SIGEV_SIGNAL */ 112*f841f6adSraf int np_lio_port; /* listio: SIGEV_THREAD or SIGEV_PORT */ 113*f841f6adSraf void *np_lio_user; 114*f841f6adSraf int np_lio_event; 115*f841f6adSraf uintptr_t np_lio_object; 116*f841f6adSraf }; 117*f841f6adSraf 118*f841f6adSraf struct aio_req { 119*f841f6adSraf /* 120*f841f6adSraf * fields protected by _aio_mutex lock. 121*f841f6adSraf */ 122*f841f6adSraf aio_req_t *req_link; /* hash/freelist chain link */ 123*f841f6adSraf /* 124*f841f6adSraf * when req is on the doneq, then req_next is protected by 125*f841f6adSraf * the _aio_mutex lock. when the req is on a work q, then 126*f841f6adSraf * req_next is protected by a worker's work_qlock1 lock. 127*f841f6adSraf */ 128*f841f6adSraf aio_req_t *req_next; /* request/done queue link */ 129*f841f6adSraf aio_req_t *req_prev; /* double linked list */ 130*f841f6adSraf /* 131*f841f6adSraf * fields protected by a worker's work_qlock1 lock. 132*f841f6adSraf */ 133*f841f6adSraf char req_state; /* AIO_REQ_QUEUED, ... */ 134*f841f6adSraf /* 135*f841f6adSraf * fields require no locking. 136*f841f6adSraf */ 137*f841f6adSraf char req_type; /* AIO_POSIX_REQ or not */ 138*f841f6adSraf char req_largefile; /* largefile operation */ 139*f841f6adSraf char req_op; /* AIOREAD, etc. */ 140*f841f6adSraf aio_worker_t *req_worker; /* associate request with worker */ 141*f841f6adSraf aio_result_t *req_resultp; /* address of result buffer */ 142*f841f6adSraf aio_args_t req_args; /* arglist */ 143*f841f6adSraf aio_lio_t *req_head; /* list head for LIO */ 144*f841f6adSraf struct sigevent req_sigevent; 145*f841f6adSraf void *req_aiocbp; /* ptr to aiocb or aiocb64 */ 146*f841f6adSraf notif_param_t req_notify; /* notification parameters */ 147*f841f6adSraf }; 148*f841f6adSraf 149*f841f6adSraf /* special lio type that destroys itself when lio refcnt becomes zero */ 150*f841f6adSraf #define LIO_FSYNC LIO_WAIT+1 151*f841f6adSraf #define LIO_DESTROY LIO_FSYNC+1 152*f841f6adSraf 153*f841f6adSraf /* lio flags */ 154*f841f6adSraf #define LIO_FSYNC_CANCELED 0x1 155*f841f6adSraf 156*f841f6adSraf /* values for aio_state */ 157*f841f6adSraf 158*f841f6adSraf #define AIO_REQ_QUEUED 1 159*f841f6adSraf #define AIO_REQ_INPROGRESS 2 160*f841f6adSraf #define AIO_REQ_CANCELED 3 161*f841f6adSraf #define AIO_REQ_DONE 4 162*f841f6adSraf #define AIO_REQ_FREE 5 163*f841f6adSraf #define AIO_REQ_DONEQ 6 164*f841f6adSraf 165*f841f6adSraf /* use KAIO in _aio_rw() */ 166*f841f6adSraf #define AIO_NO_KAIO 0x0 167*f841f6adSraf #define AIO_KAIO 0x1 168*f841f6adSraf #define AIO_NO_DUPS 0x2 169*f841f6adSraf 170*f841f6adSraf #define AIO_POSIX_REQ 0x1 171*f841f6adSraf 172*f841f6adSraf #define CHECK 1 173*f841f6adSraf #define NOCHECK 2 174*f841f6adSraf #define CHECKED 3 175*f841f6adSraf #define USERAIO 4 176*f841f6adSraf #define USERAIO_DONE 5 177*f841f6adSraf 178*f841f6adSraf /* values for _aio_flags */ 179*f841f6adSraf 180*f841f6adSraf /* if set, _aiodone() notifies aio_waitn about done requests */ 181*f841f6adSraf #define AIO_WAIT_INPROGRESS 0x1 182*f841f6adSraf /* if set, _aiodone() wakes up functions waiting for completed I/Os */ 183*f841f6adSraf #define AIO_IO_WAITING 0x2 184*f841f6adSraf #define AIO_LIB_WAITN 0x4 /* aio_waitn in progress */ 185*f841f6adSraf #define AIO_LIB_WAITN_PENDING 0x8 /* aio_waitn requests pending */ 186*f841f6adSraf 187*f841f6adSraf /* 188*f841f6adSraf * Before a kaio() system call, the fd will be checked 189*f841f6adSraf * to ensure that kernel async. I/O is supported for this file. 190*f841f6adSraf * The only way to find out is if a kaio() call returns ENOTSUP, 191*f841f6adSraf * so the default will always be to try the kaio() call. Only in 192*f841f6adSraf * the specific instance of a kaio() call returning ENOTSUP 193*f841f6adSraf * will we stop submitting kaio() calls for that fd. 194*f841f6adSraf * If the fd is outside the array bounds, we will allow the kaio() 195*f841f6adSraf * call. 196*f841f6adSraf * 197*f841f6adSraf * The only way that an fd entry can go from ENOTSUP to supported 198*f841f6adSraf * is if that fd is freed up by a close(), and close will clear 199*f841f6adSraf * the entry for that fd. 200*f841f6adSraf * 201*f841f6adSraf * Each fd gets a bit in the array _kaio_supported[]. 202*f841f6adSraf * 203*f841f6adSraf * uint32_t _kaio_supported[MAX_KAIO_FDARRAY_SIZE]; 204*f841f6adSraf * 205*f841f6adSraf * Array is MAX_KAIO_ARRAY_SIZE of 32-bit elements, for 8kb. 206*f841f6adSraf * If more than (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE) 207*f841f6adSraf * files are open, this can be expanded. 208*f841f6adSraf */ 209*f841f6adSraf 210*f841f6adSraf #define MAX_KAIO_FDARRAY_SIZE 2048 211*f841f6adSraf #define KAIO_FDARRAY_ELEM_SIZE WORD_BIT /* uint32_t */ 212*f841f6adSraf 213*f841f6adSraf #define MAX_KAIO_FDS (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE) 214*f841f6adSraf 215*f841f6adSraf #define VALID_FD(fdes) ((fdes) >= 0 && (fdes) < MAX_KAIO_FDS) 216*f841f6adSraf 217*f841f6adSraf #define KAIO_SUPPORTED(fdes) \ 218*f841f6adSraf (!VALID_FD(fdes) || \ 219*f841f6adSraf ((_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] & \ 220*f841f6adSraf (uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))) == 0)) 221*f841f6adSraf 222*f841f6adSraf #define SET_KAIO_NOT_SUPPORTED(fdes) \ 223*f841f6adSraf if (VALID_FD(fdes)) \ 224*f841f6adSraf _kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] |= \ 225*f841f6adSraf (uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE)) 226*f841f6adSraf 227*f841f6adSraf #define CLEAR_KAIO_SUPPORTED(fdes) \ 228*f841f6adSraf if (VALID_FD(fdes)) \ 229*f841f6adSraf _kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] &= \ 230*f841f6adSraf ~(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE)) 231*f841f6adSraf 232*f841f6adSraf struct aio_worker { 233*f841f6adSraf aio_worker_t *work_forw; /* forward link in list of workers */ 234*f841f6adSraf aio_worker_t *work_backw; /* backwards link in list of workers */ 235*f841f6adSraf mutex_t work_qlock1; /* lock for work queue 1 */ 236*f841f6adSraf cond_t work_idle_cv; /* place to sleep when idle */ 237*f841f6adSraf aio_req_t *work_head1; /* head of work request queue 1 */ 238*f841f6adSraf aio_req_t *work_tail1; /* tail of work request queue 1 */ 239*f841f6adSraf aio_req_t *work_next1; /* work queue one's next pointer */ 240*f841f6adSraf aio_req_t *work_prev1; /* last request done from queue 1 */ 241*f841f6adSraf aio_req_t *work_req; /* active work request */ 242*f841f6adSraf thread_t work_tid; /* worker's thread-id */ 243*f841f6adSraf int work_count1; /* length of work queue one */ 244*f841f6adSraf int work_done1; /* number of requests done */ 245*f841f6adSraf int work_minload1; /* min length of queue */ 246*f841f6adSraf int work_idleflg; /* when set, worker is idle */ 247*f841f6adSraf sigjmp_buf work_jmp_buf; /* cancellation point */ 248*f841f6adSraf }; 249*f841f6adSraf 250*f841f6adSraf struct aio_hash { /* resultp hash table */ 251*f841f6adSraf mutex_t hash_lock; 252*f841f6adSraf aio_req_t *hash_ptr; 253*f841f6adSraf #if !defined(_LP64) 254*f841f6adSraf void *hash_pad; /* ensure sizeof (aio_hash_t) == 32 */ 255*f841f6adSraf #endif 256*f841f6adSraf }; 257*f841f6adSraf 258*f841f6adSraf extern aio_hash_t *_aio_hash; 259*f841f6adSraf 260*f841f6adSraf #define HASHSZ 2048 /* power of 2 */ 261*f841f6adSraf #define AIOHASH(resultp) ((((uintptr_t)(resultp) >> 17) ^ \ 262*f841f6adSraf ((uintptr_t)(resultp) >> 2)) & (HASHSZ - 1)) 263*f841f6adSraf #define POSIX_AIO(x) ((x)->req_type == AIO_POSIX_REQ) 264*f841f6adSraf 265*f841f6adSraf extern int __uaio_init(void); 266*f841f6adSraf extern void _kaio_init(void); 267*f841f6adSraf extern intptr_t _kaio(int, ...); 268*f841f6adSraf extern int _aiorw(int, caddr_t, int, offset_t, int, aio_result_t *, int); 269*f841f6adSraf extern int _aio_rw(aiocb_t *, aio_lio_t *, aio_worker_t **, int, int); 270*f841f6adSraf #if !defined(_LP64) 271*f841f6adSraf extern int _aio_rw64(aiocb64_t *, aio_lio_t *, aio_worker_t **, int, int); 272*f841f6adSraf #endif 273*f841f6adSraf extern int _aio_create_worker(aio_req_t *, int); 274*f841f6adSraf extern int _aio_cancel_req(aio_worker_t *, aio_req_t *, int *, int *); 275*f841f6adSraf extern int aiocancel_all(int); 276*f841f6adSraf extern void aio_panic(const char *); 277*f841f6adSraf extern aio_req_t *_aio_hash_find(aio_result_t *); 278*f841f6adSraf extern aio_req_t *_aio_hash_del(aio_result_t *); 279*f841f6adSraf extern void _aio_req_mark_done(aio_req_t *); 280*f841f6adSraf extern void _aio_waitn_wakeup(void); 281*f841f6adSraf extern aio_worker_t *_aio_worker_alloc(void); 282*f841f6adSraf extern void _aio_worker_free(void *); 283*f841f6adSraf extern aio_req_t *_aio_req_alloc(void); 284*f841f6adSraf extern void _aio_req_free(aio_req_t *); 285*f841f6adSraf extern aio_lio_t *_aio_lio_alloc(void); 286*f841f6adSraf extern void _aio_lio_free(aio_lio_t *); 287*f841f6adSraf extern int _aio_idle(aio_worker_t *); 288*f841f6adSraf extern void *_aio_do_request(void *); 289*f841f6adSraf extern void *_aio_do_notify(void *); 290*f841f6adSraf extern void _lio_remove(aio_req_t *); 291*f841f6adSraf extern aio_req_t *_aio_req_remove(aio_req_t *); 292*f841f6adSraf extern int _aio_get_timedelta(timespec_t *, timespec_t *); 293*f841f6adSraf extern aio_result_t *_aio_req_done(void); 294*f841f6adSraf extern void _aio_set_result(aio_req_t *, ssize_t, int); 295*f841f6adSraf extern int _aio_sigev_thread_init(struct sigevent *); 296*f841f6adSraf extern int _aio_sigev_thread(aiocb_t *); 297*f841f6adSraf #if !defined(_LP64) 298*f841f6adSraf extern int _aio_sigev_thread64(aiocb64_t *); 299*f841f6adSraf #endif 300*f841f6adSraf 301*f841f6adSraf extern aio_worker_t *_kaiowp; /* points to kaio cleanup thread */ 302*f841f6adSraf extern aio_worker_t *__workers_rw; /* list of all rw workers */ 303*f841f6adSraf extern aio_worker_t *__nextworker_rw; /* worker chosen for next rw request */ 304*f841f6adSraf extern int __rw_workerscnt; /* number of rw workers */ 305*f841f6adSraf extern aio_worker_t *__workers_no; /* list of all notification workers */ 306*f841f6adSraf extern aio_worker_t *__nextworker_no; /* worker chosen, next notification */ 307*f841f6adSraf extern int __no_workerscnt; /* number of notification workers */ 308*f841f6adSraf extern mutex_t __aio_initlock; /* makes aio initialization atomic */ 309*f841f6adSraf extern cond_t __aio_initcv; 310*f841f6adSraf extern int __aio_initbusy; 311*f841f6adSraf extern mutex_t __aio_mutex; /* global aio lock */ 312*f841f6adSraf extern cond_t _aio_iowait_cv; /* wait for userland I/Os */ 313*f841f6adSraf extern cond_t _aio_waitn_cv; /* wait for end of aio_waitn */ 314*f841f6adSraf extern int _max_workers; /* max number of workers permitted */ 315*f841f6adSraf extern int _min_workers; /* min number of workers */ 316*f841f6adSraf extern sigset_t _worker_set; /* worker's signal mask */ 317*f841f6adSraf extern int _aio_worker_cnt; /* number of AIO workers */ 318*f841f6adSraf extern int _sigio_enabled; /* when set, send SIGIO signal */ 319*f841f6adSraf extern pid_t __pid; /* process's PID */ 320*f841f6adSraf extern int __uaio_ok; /* indicates if aio is initialized */ 321*f841f6adSraf extern int _kaio_ok; /* indicates if kaio is initialized */ 322*f841f6adSraf extern pthread_key_t _aio_key; /* for thread-specific data */ 323*f841f6adSraf extern aio_req_t *_aio_done_tail; /* list of done requests */ 324*f841f6adSraf extern aio_req_t *_aio_done_head; 325*f841f6adSraf extern aio_req_t *_aio_doneq; 326*f841f6adSraf extern int _aio_freelist_cnt; 327*f841f6adSraf extern int _aio_allocated_cnt; 328*f841f6adSraf extern int _aio_donecnt; 329*f841f6adSraf extern int _aio_doneq_cnt; 330*f841f6adSraf extern int _aio_waitncnt; /* # of requests for aio_waitn */ 331*f841f6adSraf extern int _aio_outstand_cnt; /* # of outstanding requests */ 332*f841f6adSraf extern int _kaio_outstand_cnt; /* # of outstanding kaio requests */ 333*f841f6adSraf extern int _aio_req_done_cnt; /* req. done but not in "done queue" */ 334*f841f6adSraf extern int _aio_kernel_suspend; /* active kernel kaio calls */ 335*f841f6adSraf extern int _aio_suscv_cnt; /* aio_suspend calls waiting on cv's */ 336*f841f6adSraf extern int _aiowait_flag; /* when set, aiowait() is inprogress */ 337*f841f6adSraf extern int _aio_flags; /* see defines, above */ 338*f841f6adSraf extern uint32_t *_kaio_supported; 339*f841f6adSraf 340*f841f6adSraf extern const sigset_t maskset; /* all maskable signals */ 341*f841f6adSraf 342*f841f6adSraf #ifdef __cplusplus 343*f841f6adSraf } 344*f841f6adSraf #endif 345*f841f6adSraf 346*f841f6adSraf #endif /* _ASYNCIO_H */ 347