xref: /titanic_44/usr/src/lib/libc/inc/asyncio.h (revision f841f6ad96ea6675d6c6b35c749eaac601799fdf)
1*f841f6adSraf /*
2*f841f6adSraf  * CDDL HEADER START
3*f841f6adSraf  *
4*f841f6adSraf  * The contents of this file are subject to the terms of the
5*f841f6adSraf  * Common Development and Distribution License (the "License").
6*f841f6adSraf  * You may not use this file except in compliance with the License.
7*f841f6adSraf  *
8*f841f6adSraf  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*f841f6adSraf  * or http://www.opensolaris.org/os/licensing.
10*f841f6adSraf  * See the License for the specific language governing permissions
11*f841f6adSraf  * and limitations under the License.
12*f841f6adSraf  *
13*f841f6adSraf  * When distributing Covered Code, include this CDDL HEADER in each
14*f841f6adSraf  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*f841f6adSraf  * If applicable, add the following below this CDDL HEADER, with the
16*f841f6adSraf  * fields enclosed by brackets "[]" replaced with your own identifying
17*f841f6adSraf  * information: Portions Copyright [yyyy] [name of copyright owner]
18*f841f6adSraf  *
19*f841f6adSraf  * CDDL HEADER END
20*f841f6adSraf  */
21*f841f6adSraf 
22*f841f6adSraf /*
23*f841f6adSraf  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24*f841f6adSraf  * Use is subject to license terms.
25*f841f6adSraf  */
26*f841f6adSraf 
27*f841f6adSraf #ifndef	_ASYNCIO_H
28*f841f6adSraf #define	_ASYNCIO_H
29*f841f6adSraf 
30*f841f6adSraf #pragma ident	"%Z%%M%	%I%	%E% SMI"
31*f841f6adSraf 
32*f841f6adSraf #ifdef	__cplusplus
33*f841f6adSraf extern "C" {
34*f841f6adSraf #endif
35*f841f6adSraf 
36*f841f6adSraf #include <stdio.h>
37*f841f6adSraf #include <stdlib.h>
38*f841f6adSraf #include <unistd.h>
39*f841f6adSraf #include <string.h>
40*f841f6adSraf #include <errno.h>
41*f841f6adSraf #include <sys/types.h>
42*f841f6adSraf #include <sys/stat.h>
43*f841f6adSraf #include <thread.h>
44*f841f6adSraf #include <pthread.h>
45*f841f6adSraf #include <setjmp.h>
46*f841f6adSraf #include <signal.h>
47*f841f6adSraf #include <siginfo.h>
48*f841f6adSraf #include <aio.h>
49*f841f6adSraf #include <limits.h>
50*f841f6adSraf #include <ucontext.h>
51*f841f6adSraf #include <sys/asynch.h>
52*f841f6adSraf #include <sys/mman.h>
53*f841f6adSraf 
54*f841f6adSraf #if !defined(_LP64)
55*f841f6adSraf #define	AIOSTKSIZE	(64 * 1024)
56*f841f6adSraf #else
57*f841f6adSraf #define	AIOSTKSIZE	(128 * 1024)
58*f841f6adSraf #endif
59*f841f6adSraf 
60*f841f6adSraf #define	SIGAIOCANCEL		SIGLWP	/* special aio cancelation signal */
61*f841f6adSraf 
62*f841f6adSraf #define	AIO_WAITN_MAXIOCBS	32768	/* max. iocbs per system call */
63*f841f6adSraf 
64*f841f6adSraf /*
65*f841f6adSraf  * Declare structure types.  The structures themselves are defined below.
66*f841f6adSraf  */
67*f841f6adSraf typedef struct aio_args		aio_args_t;
68*f841f6adSraf typedef struct aio_lio		aio_lio_t;
69*f841f6adSraf typedef struct notif_param	notif_param_t;
70*f841f6adSraf typedef struct aio_req		aio_req_t;
71*f841f6adSraf typedef struct aio_worker	aio_worker_t;
72*f841f6adSraf typedef struct aio_hash		aio_hash_t;
73*f841f6adSraf 
74*f841f6adSraf struct aio_args {
75*f841f6adSraf 	int 		fd;
76*f841f6adSraf 	caddr_t		buf;
77*f841f6adSraf 	size_t		bufsz;
78*f841f6adSraf 	offset_t	offset;
79*f841f6adSraf };
80*f841f6adSraf 
81*f841f6adSraf /*
82*f841f6adSraf  * list head for UFS list I/O
83*f841f6adSraf  */
84*f841f6adSraf struct aio_lio {
85*f841f6adSraf 	mutex_t		lio_mutex;	/* list mutex */
86*f841f6adSraf 	cond_t		lio_cond_cv;	/* list notification for I/O done */
87*f841f6adSraf 	aio_lio_t	*lio_next;	/* pointer to next on freelist */
88*f841f6adSraf 	char		lio_mode;	/* LIO_WAIT/LIO_NOWAIT */
89*f841f6adSraf 	char		lio_canned;	/* lio was canceled */
90*f841f6adSraf 	char		lio_largefile;	/* largefile operation */
91*f841f6adSraf 	char		lio_waiting;	/* waiting in __lio_listio() */
92*f841f6adSraf 	int		lio_nent;	/* Number of list I/O's */
93*f841f6adSraf 	int		lio_refcnt;	/* outstanding I/O's */
94*f841f6adSraf 	int		lio_event;	/* Event number for notification */
95*f841f6adSraf 	int		lio_port;	/* Port number for notification */
96*f841f6adSraf 	int		lio_signo;	/* Signal number for notification */
97*f841f6adSraf 	union sigval	lio_sigval;	/* Signal parameter */
98*f841f6adSraf 	uintptr_t	lio_object;	/* for SIGEV_THREAD or SIGEV_PORT */
99*f841f6adSraf 	struct sigevent	*lio_sigevent;	/* Notification function and attr. */
100*f841f6adSraf };
101*f841f6adSraf 
102*f841f6adSraf /*
103*f841f6adSraf  * Notification parameters
104*f841f6adSraf  */
105*f841f6adSraf struct notif_param {
106*f841f6adSraf 	int		np_signo;	/* SIGEV_SIGNAL */
107*f841f6adSraf 	int		np_port;	/* SIGEV_THREAD or SIGEV_PORT */
108*f841f6adSraf 	void		*np_user;
109*f841f6adSraf 	int		np_event;
110*f841f6adSraf 	uintptr_t	np_object;
111*f841f6adSraf 	int		np_lio_signo;	/* listio: SIGEV_SIGNAL */
112*f841f6adSraf 	int		np_lio_port;	/* listio: SIGEV_THREAD or SIGEV_PORT */
113*f841f6adSraf 	void		*np_lio_user;
114*f841f6adSraf 	int		np_lio_event;
115*f841f6adSraf 	uintptr_t	np_lio_object;
116*f841f6adSraf };
117*f841f6adSraf 
118*f841f6adSraf struct aio_req {
119*f841f6adSraf 	/*
120*f841f6adSraf 	 * fields protected by _aio_mutex lock.
121*f841f6adSraf 	 */
122*f841f6adSraf 	aio_req_t *req_link;		/* hash/freelist chain link */
123*f841f6adSraf 	/*
124*f841f6adSraf 	 * when req is on the doneq, then req_next is protected by
125*f841f6adSraf 	 * the _aio_mutex lock. when the req is on a work q, then
126*f841f6adSraf 	 * req_next is protected by a worker's work_qlock1 lock.
127*f841f6adSraf 	 */
128*f841f6adSraf 	aio_req_t *req_next;		/* request/done queue link */
129*f841f6adSraf 	aio_req_t *req_prev;		/* double linked list */
130*f841f6adSraf 	/*
131*f841f6adSraf 	 * fields protected by a worker's work_qlock1 lock.
132*f841f6adSraf 	 */
133*f841f6adSraf 	char		req_state;	/* AIO_REQ_QUEUED, ... */
134*f841f6adSraf 	/*
135*f841f6adSraf 	 * fields require no locking.
136*f841f6adSraf 	 */
137*f841f6adSraf 	char		req_type;	/* AIO_POSIX_REQ or not */
138*f841f6adSraf 	char		req_largefile;	/* largefile operation */
139*f841f6adSraf 	char		req_op;		/* AIOREAD, etc. */
140*f841f6adSraf 	aio_worker_t	*req_worker;	/* associate request with worker */
141*f841f6adSraf 	aio_result_t	*req_resultp;	/* address of result buffer */
142*f841f6adSraf 	aio_args_t	req_args;	/* arglist */
143*f841f6adSraf 	aio_lio_t	*req_head;	/* list head for LIO */
144*f841f6adSraf 	struct sigevent	req_sigevent;
145*f841f6adSraf 	void		*req_aiocbp;	/* ptr to aiocb or aiocb64 */
146*f841f6adSraf 	notif_param_t	req_notify;	/* notification parameters */
147*f841f6adSraf };
148*f841f6adSraf 
149*f841f6adSraf /* special lio type that destroys itself when lio refcnt becomes zero */
150*f841f6adSraf #define	LIO_FSYNC	LIO_WAIT+1
151*f841f6adSraf #define	LIO_DESTROY	LIO_FSYNC+1
152*f841f6adSraf 
153*f841f6adSraf /* lio flags */
154*f841f6adSraf #define	LIO_FSYNC_CANCELED	0x1
155*f841f6adSraf 
156*f841f6adSraf /* values for aio_state */
157*f841f6adSraf 
158*f841f6adSraf #define	AIO_REQ_QUEUED		1
159*f841f6adSraf #define	AIO_REQ_INPROGRESS	2
160*f841f6adSraf #define	AIO_REQ_CANCELED	3
161*f841f6adSraf #define	AIO_REQ_DONE 		4
162*f841f6adSraf #define	AIO_REQ_FREE		5
163*f841f6adSraf #define	AIO_REQ_DONEQ 		6
164*f841f6adSraf 
165*f841f6adSraf /* use KAIO in _aio_rw() */
166*f841f6adSraf #define	AIO_NO_KAIO		0x0
167*f841f6adSraf #define	AIO_KAIO		0x1
168*f841f6adSraf #define	AIO_NO_DUPS		0x2
169*f841f6adSraf 
170*f841f6adSraf #define	AIO_POSIX_REQ		0x1
171*f841f6adSraf 
172*f841f6adSraf #define	CHECK			1
173*f841f6adSraf #define	NOCHECK			2
174*f841f6adSraf #define	CHECKED			3
175*f841f6adSraf #define	USERAIO			4
176*f841f6adSraf #define	USERAIO_DONE		5
177*f841f6adSraf 
178*f841f6adSraf /* values for _aio_flags */
179*f841f6adSraf 
180*f841f6adSraf /* if set, _aiodone() notifies aio_waitn about done requests */
181*f841f6adSraf #define	AIO_WAIT_INPROGRESS	0x1
182*f841f6adSraf /* if set, _aiodone() wakes up functions waiting for completed I/Os */
183*f841f6adSraf #define	AIO_IO_WAITING		0x2
184*f841f6adSraf #define	AIO_LIB_WAITN		0x4	/* aio_waitn in progress */
185*f841f6adSraf #define	AIO_LIB_WAITN_PENDING	0x8	/* aio_waitn requests pending */
186*f841f6adSraf 
187*f841f6adSraf /*
188*f841f6adSraf  * Before a kaio() system call, the fd will be checked
189*f841f6adSraf  * to ensure that kernel async. I/O is supported for this file.
190*f841f6adSraf  * The only way to find out is if a kaio() call returns ENOTSUP,
191*f841f6adSraf  * so the default will always be to try the kaio() call. Only in
192*f841f6adSraf  * the specific instance of a kaio() call returning ENOTSUP
193*f841f6adSraf  * will we stop submitting kaio() calls for that fd.
194*f841f6adSraf  * If the fd is outside the array bounds, we will allow the kaio()
195*f841f6adSraf  * call.
196*f841f6adSraf  *
197*f841f6adSraf  * The only way that an fd entry can go from ENOTSUP to supported
198*f841f6adSraf  * is if that fd is freed up by a close(), and close will clear
199*f841f6adSraf  * the entry for that fd.
200*f841f6adSraf  *
201*f841f6adSraf  * Each fd gets a bit in the array _kaio_supported[].
202*f841f6adSraf  *
203*f841f6adSraf  * uint32_t	_kaio_supported[MAX_KAIO_FDARRAY_SIZE];
204*f841f6adSraf  *
205*f841f6adSraf  * Array is MAX_KAIO_ARRAY_SIZE of 32-bit elements, for 8kb.
206*f841f6adSraf  * If more than (MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE)
207*f841f6adSraf  * files are open, this can be expanded.
208*f841f6adSraf  */
209*f841f6adSraf 
210*f841f6adSraf #define	MAX_KAIO_FDARRAY_SIZE		2048
211*f841f6adSraf #define	KAIO_FDARRAY_ELEM_SIZE		WORD_BIT	/* uint32_t */
212*f841f6adSraf 
213*f841f6adSraf #define	MAX_KAIO_FDS	(MAX_KAIO_FDARRAY_SIZE * KAIO_FDARRAY_ELEM_SIZE)
214*f841f6adSraf 
215*f841f6adSraf #define	VALID_FD(fdes)		((fdes) >= 0 && (fdes) < MAX_KAIO_FDS)
216*f841f6adSraf 
217*f841f6adSraf #define	KAIO_SUPPORTED(fdes)						\
218*f841f6adSraf 	(!VALID_FD(fdes) || 						\
219*f841f6adSraf 		((_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] &	\
220*f841f6adSraf 		(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))) == 0))
221*f841f6adSraf 
222*f841f6adSraf #define	SET_KAIO_NOT_SUPPORTED(fdes)					\
223*f841f6adSraf 	if (VALID_FD(fdes))						\
224*f841f6adSraf 		_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] |=	\
225*f841f6adSraf 		(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))
226*f841f6adSraf 
227*f841f6adSraf #define	CLEAR_KAIO_SUPPORTED(fdes)					\
228*f841f6adSraf 	if (VALID_FD(fdes))						\
229*f841f6adSraf 		_kaio_supported[(fdes) / KAIO_FDARRAY_ELEM_SIZE] &=	\
230*f841f6adSraf 		~(uint32_t)(1 << ((fdes) % KAIO_FDARRAY_ELEM_SIZE))
231*f841f6adSraf 
232*f841f6adSraf struct aio_worker {
233*f841f6adSraf 	aio_worker_t *work_forw;	/* forward link in list of workers */
234*f841f6adSraf 	aio_worker_t *work_backw;	/* backwards link in list of workers */
235*f841f6adSraf 	mutex_t work_qlock1;		/* lock for work queue 1 */
236*f841f6adSraf 	cond_t work_idle_cv;		/* place to sleep when idle */
237*f841f6adSraf 	aio_req_t *work_head1;		/* head of work request queue 1 */
238*f841f6adSraf 	aio_req_t *work_tail1;		/* tail of work request queue 1 */
239*f841f6adSraf 	aio_req_t *work_next1;		/* work queue one's next pointer */
240*f841f6adSraf 	aio_req_t *work_prev1;		/* last request done from queue 1 */
241*f841f6adSraf 	aio_req_t *work_req;		/* active work request */
242*f841f6adSraf 	thread_t work_tid;		/* worker's thread-id */
243*f841f6adSraf 	int work_count1;		/* length of work queue one */
244*f841f6adSraf 	int work_done1;			/* number of requests done */
245*f841f6adSraf 	int work_minload1;		/* min length of queue */
246*f841f6adSraf 	int work_idleflg;		/* when set, worker is idle */
247*f841f6adSraf 	sigjmp_buf work_jmp_buf;	/* cancellation point */
248*f841f6adSraf };
249*f841f6adSraf 
250*f841f6adSraf struct aio_hash {			/* resultp hash table */
251*f841f6adSraf 	mutex_t		hash_lock;
252*f841f6adSraf 	aio_req_t	*hash_ptr;
253*f841f6adSraf #if !defined(_LP64)
254*f841f6adSraf 	void		*hash_pad;	/* ensure sizeof (aio_hash_t) == 32 */
255*f841f6adSraf #endif
256*f841f6adSraf };
257*f841f6adSraf 
258*f841f6adSraf extern aio_hash_t *_aio_hash;
259*f841f6adSraf 
260*f841f6adSraf #define	HASHSZ			2048	/* power of 2 */
261*f841f6adSraf #define	AIOHASH(resultp)	((((uintptr_t)(resultp) >> 17) ^ \
262*f841f6adSraf 				((uintptr_t)(resultp) >> 2)) & (HASHSZ - 1))
263*f841f6adSraf #define	POSIX_AIO(x)		((x)->req_type == AIO_POSIX_REQ)
264*f841f6adSraf 
265*f841f6adSraf extern int __uaio_init(void);
266*f841f6adSraf extern void _kaio_init(void);
267*f841f6adSraf extern intptr_t _kaio(int, ...);
268*f841f6adSraf extern int _aiorw(int, caddr_t, int, offset_t, int, aio_result_t *, int);
269*f841f6adSraf extern int _aio_rw(aiocb_t *, aio_lio_t *, aio_worker_t **, int, int);
270*f841f6adSraf #if !defined(_LP64)
271*f841f6adSraf extern int _aio_rw64(aiocb64_t *, aio_lio_t *, aio_worker_t **, int, int);
272*f841f6adSraf #endif
273*f841f6adSraf extern int _aio_create_worker(aio_req_t *, int);
274*f841f6adSraf extern int _aio_cancel_req(aio_worker_t *, aio_req_t *, int *, int *);
275*f841f6adSraf extern int aiocancel_all(int);
276*f841f6adSraf extern void aio_panic(const char *);
277*f841f6adSraf extern aio_req_t *_aio_hash_find(aio_result_t *);
278*f841f6adSraf extern aio_req_t *_aio_hash_del(aio_result_t *);
279*f841f6adSraf extern void _aio_req_mark_done(aio_req_t *);
280*f841f6adSraf extern void _aio_waitn_wakeup(void);
281*f841f6adSraf extern aio_worker_t *_aio_worker_alloc(void);
282*f841f6adSraf extern void _aio_worker_free(void *);
283*f841f6adSraf extern aio_req_t *_aio_req_alloc(void);
284*f841f6adSraf extern void _aio_req_free(aio_req_t *);
285*f841f6adSraf extern aio_lio_t *_aio_lio_alloc(void);
286*f841f6adSraf extern void _aio_lio_free(aio_lio_t *);
287*f841f6adSraf extern int _aio_idle(aio_worker_t *);
288*f841f6adSraf extern void *_aio_do_request(void *);
289*f841f6adSraf extern void *_aio_do_notify(void *);
290*f841f6adSraf extern void _lio_remove(aio_req_t *);
291*f841f6adSraf extern aio_req_t *_aio_req_remove(aio_req_t *);
292*f841f6adSraf extern int _aio_get_timedelta(timespec_t *, timespec_t *);
293*f841f6adSraf extern aio_result_t *_aio_req_done(void);
294*f841f6adSraf extern void _aio_set_result(aio_req_t *, ssize_t, int);
295*f841f6adSraf extern int _aio_sigev_thread_init(struct sigevent *);
296*f841f6adSraf extern int _aio_sigev_thread(aiocb_t *);
297*f841f6adSraf #if !defined(_LP64)
298*f841f6adSraf extern int _aio_sigev_thread64(aiocb64_t *);
299*f841f6adSraf #endif
300*f841f6adSraf 
301*f841f6adSraf extern aio_worker_t *_kaiowp;		/* points to kaio cleanup thread */
302*f841f6adSraf extern aio_worker_t *__workers_rw;	/* list of all rw workers */
303*f841f6adSraf extern aio_worker_t *__nextworker_rw;	/* worker chosen for next rw request */
304*f841f6adSraf extern int __rw_workerscnt;		/* number of rw workers */
305*f841f6adSraf extern aio_worker_t *__workers_no;	/* list of all notification workers */
306*f841f6adSraf extern aio_worker_t *__nextworker_no;	/* worker chosen, next notification */
307*f841f6adSraf extern int __no_workerscnt;		/* number of notification workers */
308*f841f6adSraf extern mutex_t __aio_initlock;		/* makes aio initialization atomic */
309*f841f6adSraf extern cond_t __aio_initcv;
310*f841f6adSraf extern int __aio_initbusy;
311*f841f6adSraf extern mutex_t __aio_mutex;		/* global aio lock */
312*f841f6adSraf extern cond_t _aio_iowait_cv;		/* wait for userland I/Os */
313*f841f6adSraf extern cond_t _aio_waitn_cv;		/* wait for end of aio_waitn */
314*f841f6adSraf extern int _max_workers;		/* max number of workers permitted */
315*f841f6adSraf extern int _min_workers;		/* min number of workers */
316*f841f6adSraf extern sigset_t _worker_set;		/* worker's signal mask */
317*f841f6adSraf extern int _aio_worker_cnt;		/* number of AIO workers */
318*f841f6adSraf extern int _sigio_enabled;		/* when set, send SIGIO signal */
319*f841f6adSraf extern pid_t __pid;			/* process's PID */
320*f841f6adSraf extern int __uaio_ok;			/* indicates if aio is initialized */
321*f841f6adSraf extern int _kaio_ok;			/* indicates if kaio is initialized */
322*f841f6adSraf extern pthread_key_t _aio_key;		/* for thread-specific data */
323*f841f6adSraf extern aio_req_t *_aio_done_tail;	/* list of done requests */
324*f841f6adSraf extern aio_req_t *_aio_done_head;
325*f841f6adSraf extern aio_req_t *_aio_doneq;
326*f841f6adSraf extern int _aio_freelist_cnt;
327*f841f6adSraf extern int _aio_allocated_cnt;
328*f841f6adSraf extern int _aio_donecnt;
329*f841f6adSraf extern int _aio_doneq_cnt;
330*f841f6adSraf extern int _aio_waitncnt;		/* # of requests for aio_waitn */
331*f841f6adSraf extern int _aio_outstand_cnt;		/* # of outstanding requests */
332*f841f6adSraf extern int _kaio_outstand_cnt;		/* # of outstanding kaio requests */
333*f841f6adSraf extern int _aio_req_done_cnt;		/* req. done but not in "done queue" */
334*f841f6adSraf extern int _aio_kernel_suspend;		/* active kernel kaio calls */
335*f841f6adSraf extern int _aio_suscv_cnt;		/* aio_suspend calls waiting on cv's */
336*f841f6adSraf extern int _aiowait_flag;		/* when set, aiowait() is inprogress */
337*f841f6adSraf extern int _aio_flags;			/* see defines, above */
338*f841f6adSraf extern uint32_t *_kaio_supported;
339*f841f6adSraf 
340*f841f6adSraf extern const sigset_t maskset;		/* all maskable signals */
341*f841f6adSraf 
342*f841f6adSraf #ifdef	__cplusplus
343*f841f6adSraf }
344*f841f6adSraf #endif
345*f841f6adSraf 
346*f841f6adSraf #endif	/* _ASYNCIO_H */
347