1f841f6adSraf /*
2f841f6adSraf * CDDL HEADER START
3f841f6adSraf *
4f841f6adSraf * The contents of this file are subject to the terms of the
5f841f6adSraf * Common Development and Distribution License (the "License").
6f841f6adSraf * You may not use this file except in compliance with the License.
7f841f6adSraf *
8f841f6adSraf * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9f841f6adSraf * or http://www.opensolaris.org/os/licensing.
10f841f6adSraf * See the License for the specific language governing permissions
11f841f6adSraf * and limitations under the License.
12f841f6adSraf *
13f841f6adSraf * When distributing Covered Code, include this CDDL HEADER in each
14f841f6adSraf * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15f841f6adSraf * If applicable, add the following below this CDDL HEADER, with the
16f841f6adSraf * fields enclosed by brackets "[]" replaced with your own identifying
17f841f6adSraf * information: Portions Copyright [yyyy] [name of copyright owner]
18f841f6adSraf *
19f841f6adSraf * CDDL HEADER END
20f841f6adSraf */
21f841f6adSraf
22f841f6adSraf /*
23*7257d1b4Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24f841f6adSraf * Use is subject to license terms.
25f841f6adSraf */
26f841f6adSraf
27*7257d1b4Sraf #include "lint.h"
28f841f6adSraf #include "thr_uberdata.h"
29f841f6adSraf #include "asyncio.h"
30f841f6adSraf
31f841f6adSraf /*
32f841f6adSraf * The aio subsystem memory allocation strategy:
33f841f6adSraf *
34f841f6adSraf * For each of the structure types we wish to allocate/free
35f841f6adSraf * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate
36f841f6adSraf * chunks of memory which are then subdivided into individual
37f841f6adSraf * elements which are put into a free list from which allocations
38f841f6adSraf * are made and to which frees are returned.
39f841f6adSraf *
40f841f6adSraf * Chunks start small (8 Kbytes) and get larger (size doubling)
41f841f6adSraf * as more chunks are needed. This keeps memory usage small for
42f841f6adSraf * light use and fragmentation small for heavy use.
43f841f6adSraf *
44f841f6adSraf * Chunks are never unmapped except as an aftermath of fork()
45f841f6adSraf * in the child process, when they are all unmapped (because
46f841f6adSraf * all of the worker threads disappear in the child).
47f841f6adSraf */
48f841f6adSraf
49f841f6adSraf #define INITIAL_CHUNKSIZE (8 * 1024)
50f841f6adSraf
51f841f6adSraf /*
52f841f6adSraf * The header structure for each chunk.
53f841f6adSraf * A pointer and a size_t ensures proper alignment for whatever follows.
54f841f6adSraf */
55f841f6adSraf typedef struct chunk {
56f841f6adSraf struct chunk *chunk_next; /* linked list */
57f841f6adSraf size_t chunk_size; /* size of this chunk */
58f841f6adSraf } chunk_t;
59f841f6adSraf
60f841f6adSraf chunk_t *chunk_list = NULL; /* list of all chunks */
61f841f6adSraf mutex_t chunk_lock = DEFAULTMUTEX;
62f841f6adSraf
63f841f6adSraf chunk_t *
chunk_alloc(size_t size)64f841f6adSraf chunk_alloc(size_t size)
65f841f6adSraf {
66f841f6adSraf chunk_t *chp = NULL;
67f841f6adSraf void *ptr;
68f841f6adSraf
69f841f6adSraf ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
70f841f6adSraf MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
71f841f6adSraf if (ptr != MAP_FAILED) {
72f841f6adSraf lmutex_lock(&chunk_lock);
73f841f6adSraf chp = ptr;
74f841f6adSraf chp->chunk_next = chunk_list;
75f841f6adSraf chunk_list = chp;
76f841f6adSraf chp->chunk_size = size;
77f841f6adSraf lmutex_unlock(&chunk_lock);
78f841f6adSraf }
79f841f6adSraf
80f841f6adSraf return (chp);
81f841f6adSraf }
82f841f6adSraf
83f841f6adSraf aio_worker_t *worker_freelist = NULL; /* free list of worker structures */
84f841f6adSraf aio_worker_t *worker_freelast = NULL;
85f841f6adSraf size_t worker_chunksize = 0;
86f841f6adSraf mutex_t worker_lock = DEFAULTMUTEX;
87f841f6adSraf
88f841f6adSraf /*
89f841f6adSraf * Allocate a worker control block.
90f841f6adSraf */
91f841f6adSraf aio_worker_t *
_aio_worker_alloc(void)92f841f6adSraf _aio_worker_alloc(void)
93f841f6adSraf {
94f841f6adSraf aio_worker_t *aiowp;
95f841f6adSraf chunk_t *chp;
96f841f6adSraf size_t chunksize;
97f841f6adSraf int nelem;
98f841f6adSraf int i;
99f841f6adSraf
100f841f6adSraf lmutex_lock(&worker_lock);
101f841f6adSraf if ((aiowp = worker_freelist) == NULL) {
102f841f6adSraf if ((chunksize = 2 * worker_chunksize) == 0)
103f841f6adSraf chunksize = INITIAL_CHUNKSIZE;
104f841f6adSraf if ((chp = chunk_alloc(chunksize)) == NULL) {
105f841f6adSraf lmutex_unlock(&worker_lock);
106f841f6adSraf return (NULL);
107f841f6adSraf }
108f841f6adSraf worker_chunksize = chunksize;
109f841f6adSraf worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1);
110f841f6adSraf nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t);
111f841f6adSraf for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++)
112f841f6adSraf aiowp->work_forw = aiowp + 1;
113f841f6adSraf worker_freelast = aiowp - 1;
114f841f6adSraf worker_freelast->work_forw = NULL;
115f841f6adSraf aiowp = worker_freelist;
116f841f6adSraf }
117f841f6adSraf if ((worker_freelist = aiowp->work_forw) == NULL)
118f841f6adSraf worker_freelast = NULL;
119f841f6adSraf lmutex_unlock(&worker_lock);
120f841f6adSraf
121f841f6adSraf aiowp->work_forw = NULL;
122f841f6adSraf (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL);
123f841f6adSraf (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL);
124f841f6adSraf
125f841f6adSraf return (aiowp);
126f841f6adSraf }
127f841f6adSraf
128f841f6adSraf /*
129f841f6adSraf * Free a worker control block.
130f841f6adSraf * Declared with void *arg so it can be a pthread_key_create() destructor.
131f841f6adSraf */
132f841f6adSraf void
_aio_worker_free(void * arg)133f841f6adSraf _aio_worker_free(void *arg)
134f841f6adSraf {
135f841f6adSraf aio_worker_t *aiowp = arg;
136f841f6adSraf
137f841f6adSraf (void) mutex_destroy(&aiowp->work_qlock1);
138f841f6adSraf (void) cond_destroy(&aiowp->work_idle_cv);
139f841f6adSraf (void) memset(aiowp, 0, sizeof (*aiowp));
140f841f6adSraf
141f841f6adSraf lmutex_lock(&worker_lock);
142f841f6adSraf if (worker_freelast == NULL) {
143f841f6adSraf worker_freelist = worker_freelast = aiowp;
144f841f6adSraf } else {
145f841f6adSraf worker_freelast->work_forw = aiowp;
146f841f6adSraf worker_freelast = aiowp;
147f841f6adSraf }
148f841f6adSraf lmutex_unlock(&worker_lock);
149f841f6adSraf }
150f841f6adSraf
151f841f6adSraf aio_req_t *_aio_freelist = NULL; /* free list of request structures */
152f841f6adSraf aio_req_t *_aio_freelast = NULL;
153f841f6adSraf size_t request_chunksize = 0;
154f841f6adSraf int _aio_freelist_cnt = 0;
155f841f6adSraf int _aio_allocated_cnt = 0;
156f841f6adSraf mutex_t __aio_cache_lock = DEFAULTMUTEX;
157f841f6adSraf
158f841f6adSraf /*
159f841f6adSraf * Allocate an aio request structure.
160f841f6adSraf */
161f841f6adSraf aio_req_t *
_aio_req_alloc(void)162f841f6adSraf _aio_req_alloc(void)
163f841f6adSraf {
164f841f6adSraf aio_req_t *reqp;
165f841f6adSraf chunk_t *chp;
166f841f6adSraf size_t chunksize;
167f841f6adSraf int nelem;
168f841f6adSraf int i;
169f841f6adSraf
170f841f6adSraf lmutex_lock(&__aio_cache_lock);
171f841f6adSraf if ((reqp = _aio_freelist) == NULL) {
172f841f6adSraf if ((chunksize = 2 * request_chunksize) == 0)
173f841f6adSraf chunksize = INITIAL_CHUNKSIZE;
174f841f6adSraf if ((chp = chunk_alloc(chunksize)) == NULL) {
175f841f6adSraf lmutex_unlock(&__aio_cache_lock);
176f841f6adSraf return (NULL);
177f841f6adSraf }
178f841f6adSraf request_chunksize = chunksize;
179f841f6adSraf _aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1);
180f841f6adSraf nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t);
181f841f6adSraf for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) {
182f841f6adSraf reqp->req_state = AIO_REQ_FREE;
183f841f6adSraf reqp->req_link = reqp + 1;
184f841f6adSraf }
185f841f6adSraf _aio_freelast = reqp - 1;
186f841f6adSraf _aio_freelast->req_link = NULL;
187f841f6adSraf _aio_freelist_cnt = nelem;
188f841f6adSraf reqp = _aio_freelist;
189f841f6adSraf }
190f841f6adSraf if ((_aio_freelist = reqp->req_link) == NULL)
191f841f6adSraf _aio_freelast = NULL;
192f841f6adSraf _aio_freelist_cnt--;
193f841f6adSraf _aio_allocated_cnt++;
194f841f6adSraf lmutex_unlock(&__aio_cache_lock);
195f841f6adSraf
196f841f6adSraf ASSERT(reqp->req_state == AIO_REQ_FREE);
197f841f6adSraf reqp->req_state = 0;
198f841f6adSraf reqp->req_link = NULL;
199f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_NONE;
200f841f6adSraf
201f841f6adSraf return (reqp);
202f841f6adSraf }
203f841f6adSraf
204f841f6adSraf /*
205f841f6adSraf * Free an aio request structure.
206f841f6adSraf */
207f841f6adSraf void
_aio_req_free(aio_req_t * reqp)208f841f6adSraf _aio_req_free(aio_req_t *reqp)
209f841f6adSraf {
210f841f6adSraf ASSERT(reqp->req_state != AIO_REQ_FREE &&
211f841f6adSraf reqp->req_state != AIO_REQ_DONEQ);
212f841f6adSraf (void) memset(reqp, 0, sizeof (*reqp));
213f841f6adSraf reqp->req_state = AIO_REQ_FREE;
214f841f6adSraf
215f841f6adSraf lmutex_lock(&__aio_cache_lock);
216f841f6adSraf if (_aio_freelast == NULL) {
217f841f6adSraf _aio_freelist = _aio_freelast = reqp;
218f841f6adSraf } else {
219f841f6adSraf _aio_freelast->req_link = reqp;
220f841f6adSraf _aio_freelast = reqp;
221f841f6adSraf }
222f841f6adSraf _aio_freelist_cnt++;
223f841f6adSraf _aio_allocated_cnt--;
224f841f6adSraf lmutex_unlock(&__aio_cache_lock);
225f841f6adSraf }
226f841f6adSraf
227f841f6adSraf aio_lio_t *_lio_head_freelist = NULL; /* free list of lio head structures */
228f841f6adSraf aio_lio_t *_lio_head_freelast = NULL;
229f841f6adSraf size_t lio_head_chunksize = 0;
230f841f6adSraf int _lio_alloc = 0;
231f841f6adSraf int _lio_free = 0;
232f841f6adSraf mutex_t __lio_mutex = DEFAULTMUTEX;
233f841f6adSraf
234f841f6adSraf /*
235f841f6adSraf * Allocate a listio head structure.
236f841f6adSraf */
237f841f6adSraf aio_lio_t *
_aio_lio_alloc(void)238f841f6adSraf _aio_lio_alloc(void)
239f841f6adSraf {
240f841f6adSraf aio_lio_t *head;
241f841f6adSraf chunk_t *chp;
242f841f6adSraf size_t chunksize;
243f841f6adSraf int nelem;
244f841f6adSraf int i;
245f841f6adSraf
246f841f6adSraf lmutex_lock(&__lio_mutex);
247f841f6adSraf if ((head = _lio_head_freelist) == NULL) {
248f841f6adSraf if ((chunksize = 2 * lio_head_chunksize) == 0)
249f841f6adSraf chunksize = INITIAL_CHUNKSIZE;
250f841f6adSraf if ((chp = chunk_alloc(chunksize)) == NULL) {
251f841f6adSraf lmutex_unlock(&__lio_mutex);
252f841f6adSraf return (NULL);
253f841f6adSraf }
254f841f6adSraf lio_head_chunksize = chunksize;
255f841f6adSraf _lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1);
256f841f6adSraf nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t);
257f841f6adSraf for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++)
258f841f6adSraf head->lio_next = head + 1;
259f841f6adSraf _lio_head_freelast = head - 1;
260f841f6adSraf _lio_head_freelast->lio_next = NULL;
261f841f6adSraf _lio_alloc += nelem;
262f841f6adSraf _lio_free = nelem;
263f841f6adSraf head = _lio_head_freelist;
264f841f6adSraf }
265f841f6adSraf if ((_lio_head_freelist = head->lio_next) == NULL)
266f841f6adSraf _lio_head_freelast = NULL;
267f841f6adSraf _lio_free--;
268f841f6adSraf lmutex_unlock(&__lio_mutex);
269f841f6adSraf
270f841f6adSraf ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
271f841f6adSraf head->lio_next = NULL;
272f841f6adSraf head->lio_port = -1;
273f841f6adSraf (void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL);
274f841f6adSraf (void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL);
275f841f6adSraf
276f841f6adSraf return (head);
277f841f6adSraf }
278f841f6adSraf
279f841f6adSraf /*
280f841f6adSraf * Free a listio head structure.
281f841f6adSraf */
282f841f6adSraf void
_aio_lio_free(aio_lio_t * head)283f841f6adSraf _aio_lio_free(aio_lio_t *head)
284f841f6adSraf {
285f841f6adSraf ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
286f841f6adSraf (void) mutex_destroy(&head->lio_mutex);
287f841f6adSraf (void) cond_destroy(&head->lio_cond_cv);
288f841f6adSraf (void) memset(head, 0, sizeof (*head));
289f841f6adSraf
290f841f6adSraf lmutex_lock(&__lio_mutex);
291f841f6adSraf if (_lio_head_freelast == NULL) {
292f841f6adSraf _lio_head_freelist = _lio_head_freelast = head;
293f841f6adSraf } else {
294f841f6adSraf _lio_head_freelast->lio_next = head;
295f841f6adSraf _lio_head_freelast = head;
296f841f6adSraf }
297f841f6adSraf _lio_free++;
298f841f6adSraf lmutex_unlock(&__lio_mutex);
299f841f6adSraf }
300f841f6adSraf
301f841f6adSraf void
postfork1_child_aio(void)302f841f6adSraf postfork1_child_aio(void)
303f841f6adSraf {
304f841f6adSraf chunk_t *chp;
305f841f6adSraf
306f841f6adSraf /*
307f841f6adSraf * All of the workers are gone; free their structures.
308f841f6adSraf */
309f841f6adSraf if (_kaio_supported != NULL) {
310f841f6adSraf (void) munmap((void *)_kaio_supported,
311f841f6adSraf MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t));
312f841f6adSraf _kaio_supported = NULL;
313f841f6adSraf }
314f841f6adSraf if (_aio_hash != NULL) {
315f841f6adSraf (void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t));
316f841f6adSraf _aio_hash = NULL;
317f841f6adSraf }
318f841f6adSraf for (chp = chunk_list; chp != NULL; chp = chunk_list) {
319f841f6adSraf chunk_list = chp->chunk_next;
320f841f6adSraf (void) munmap((void *)chp, chp->chunk_size);
321f841f6adSraf }
322f841f6adSraf
323f841f6adSraf /*
324f841f6adSraf * Reinitialize global variables
325f841f6adSraf */
326f841f6adSraf
327f841f6adSraf worker_freelist = NULL;
328f841f6adSraf worker_freelast = NULL;
329f841f6adSraf worker_chunksize = 0;
330f841f6adSraf (void) mutex_init(&worker_lock, USYNC_THREAD, NULL);
331f841f6adSraf
332f841f6adSraf _aio_freelist = NULL;
333f841f6adSraf _aio_freelast = NULL;
334f841f6adSraf request_chunksize = 0;
335f841f6adSraf _aio_freelist_cnt = 0;
336f841f6adSraf _aio_allocated_cnt = 0;
337f841f6adSraf (void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL);
338f841f6adSraf
339f841f6adSraf _lio_head_freelist = NULL;
340f841f6adSraf _lio_head_freelast = NULL;
341f841f6adSraf lio_head_chunksize = 0;
342f841f6adSraf _lio_alloc = 0;
343f841f6adSraf _lio_free = 0;
344f841f6adSraf (void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL);
345f841f6adSraf
346f841f6adSraf (void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL);
347f841f6adSraf (void) cond_init(&__aio_initcv, USYNC_THREAD, NULL);
348f841f6adSraf __aio_initbusy = 0;
349f841f6adSraf
350f841f6adSraf (void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL);
351f841f6adSraf (void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL);
352f841f6adSraf (void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL);
353f841f6adSraf
354f841f6adSraf _kaio_ok = 0;
355f841f6adSraf __uaio_ok = 0;
356f841f6adSraf
357f841f6adSraf _kaiowp = NULL;
358f841f6adSraf
359f841f6adSraf __workers_rw = NULL;
360f841f6adSraf __nextworker_rw = NULL;
361f841f6adSraf __rw_workerscnt = 0;
362f841f6adSraf
363f841f6adSraf __workers_no = NULL;
364f841f6adSraf __nextworker_no = NULL;
365f841f6adSraf __no_workerscnt = 0;
366f841f6adSraf
367f841f6adSraf _aio_worker_cnt = 0;
368f841f6adSraf
369f841f6adSraf _aio_done_head = NULL;
370f841f6adSraf _aio_done_tail = NULL;
371f841f6adSraf _aio_donecnt = 0;
372f841f6adSraf
373f841f6adSraf _aio_doneq = NULL;
374f841f6adSraf _aio_doneq_cnt = 0;
375f841f6adSraf
376f841f6adSraf _aio_waitncnt = 0;
377f841f6adSraf _aio_outstand_cnt = 0;
378f841f6adSraf _kaio_outstand_cnt = 0;
379f841f6adSraf _aio_req_done_cnt = 0;
380f841f6adSraf _aio_kernel_suspend = 0;
381f841f6adSraf _aio_suscv_cnt = 0;
382f841f6adSraf
383f841f6adSraf _aiowait_flag = 0;
384f841f6adSraf _aio_flags = 0;
385f841f6adSraf }
386f841f6adSraf
387f841f6adSraf #define DISPLAY(var) \
388f841f6adSraf (void) fprintf(stderr, #var "\t= %d\n", var)
389f841f6adSraf
390f841f6adSraf static void
_aio_exit_info(void)391f841f6adSraf _aio_exit_info(void)
392f841f6adSraf {
393f841f6adSraf if ((_kaio_ok | __uaio_ok) == 0)
394f841f6adSraf return;
395f841f6adSraf (void) fprintf(stderr, "\n");
396f841f6adSraf DISPLAY(_aio_freelist_cnt);
397f841f6adSraf DISPLAY(_aio_allocated_cnt);
398f841f6adSraf DISPLAY(_lio_alloc);
399f841f6adSraf DISPLAY(_lio_free);
400f841f6adSraf DISPLAY(__rw_workerscnt);
401f841f6adSraf DISPLAY(__no_workerscnt);
402f841f6adSraf DISPLAY(_aio_worker_cnt);
403f841f6adSraf DISPLAY(_aio_donecnt);
404f841f6adSraf DISPLAY(_aio_doneq_cnt);
405f841f6adSraf DISPLAY(_aio_waitncnt);
406f841f6adSraf DISPLAY(_aio_outstand_cnt);
407f841f6adSraf DISPLAY(_kaio_outstand_cnt);
408f841f6adSraf DISPLAY(_aio_req_done_cnt);
409f841f6adSraf DISPLAY(_aio_kernel_suspend);
410f841f6adSraf DISPLAY(_aio_suscv_cnt);
411f841f6adSraf DISPLAY(_aiowait_flag);
412f841f6adSraf DISPLAY(_aio_flags);
413f841f6adSraf }
414f841f6adSraf
415f841f6adSraf void
init_aio(void)416f841f6adSraf init_aio(void)
417f841f6adSraf {
418f841f6adSraf char *str;
419f841f6adSraf
420f841f6adSraf (void) pthread_key_create(&_aio_key, _aio_worker_free);
421f841f6adSraf if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) {
422f841f6adSraf if ((_min_workers = atoi(str)) <= 0)
423f841f6adSraf _min_workers = 4;
424f841f6adSraf }
425f841f6adSraf if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) {
426f841f6adSraf if ((_max_workers = atoi(str)) <= 0)
427f841f6adSraf _max_workers = 256;
428f841f6adSraf if (_max_workers < _min_workers + 1)
429f841f6adSraf _max_workers = _min_workers + 1;
430f841f6adSraf }
431f841f6adSraf if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0)
432f841f6adSraf (void) atexit(_aio_exit_info);
433f841f6adSraf }
434