1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 #include "asyncio.h"
32
33 /*
34 * The aio subsystem memory allocation strategy:
35 *
36 * For each of the structure types we wish to allocate/free
37 * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate
38 * chunks of memory which are then subdivided into individual
39 * elements which are put into a free list from which allocations
40 * are made and to which frees are returned.
41 *
42 * Chunks start small (8 Kbytes) and get larger (size doubling)
43 * as more chunks are needed. This keeps memory usage small for
44 * light use and fragmentation small for heavy use.
45 *
46 * Chunks are never unmapped except as an aftermath of fork()
47 * in the child process, when they are all unmapped (because
48 * all of the worker threads disappear in the child).
49 */
50
51 #define INITIAL_CHUNKSIZE (8 * 1024)
52
53 /*
54 * The header structure for each chunk.
55 * A pointer and a size_t ensures proper alignment for whatever follows.
56 */
57 typedef struct chunk {
58 struct chunk *chunk_next; /* linked list */
59 size_t chunk_size; /* size of this chunk */
60 } chunk_t;
61
62 chunk_t *chunk_list = NULL; /* list of all chunks */
63 mutex_t chunk_lock = DEFAULTMUTEX;
64
65 chunk_t *
chunk_alloc(size_t size)66 chunk_alloc(size_t size)
67 {
68 chunk_t *chp = NULL;
69 void *ptr;
70
71 ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
72 MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
73 if (ptr != MAP_FAILED) {
74 lmutex_lock(&chunk_lock);
75 chp = ptr;
76 chp->chunk_next = chunk_list;
77 chunk_list = chp;
78 chp->chunk_size = size;
79 lmutex_unlock(&chunk_lock);
80 }
81
82 return (chp);
83 }
84
85 aio_worker_t *worker_freelist = NULL; /* free list of worker structures */
86 aio_worker_t *worker_freelast = NULL;
87 size_t worker_chunksize = 0;
88 mutex_t worker_lock = DEFAULTMUTEX;
89
90 /*
91 * Allocate a worker control block.
92 */
93 aio_worker_t *
_aio_worker_alloc(void)94 _aio_worker_alloc(void)
95 {
96 aio_worker_t *aiowp;
97 chunk_t *chp;
98 size_t chunksize;
99 int nelem;
100 int i;
101
102 lmutex_lock(&worker_lock);
103 if ((aiowp = worker_freelist) == NULL) {
104 if ((chunksize = 2 * worker_chunksize) == 0)
105 chunksize = INITIAL_CHUNKSIZE;
106 if ((chp = chunk_alloc(chunksize)) == NULL) {
107 lmutex_unlock(&worker_lock);
108 return (NULL);
109 }
110 worker_chunksize = chunksize;
111 worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1);
112 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t);
113 for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++)
114 aiowp->work_forw = aiowp + 1;
115 worker_freelast = aiowp - 1;
116 worker_freelast->work_forw = NULL;
117 aiowp = worker_freelist;
118 }
119 if ((worker_freelist = aiowp->work_forw) == NULL)
120 worker_freelast = NULL;
121 lmutex_unlock(&worker_lock);
122
123 aiowp->work_forw = NULL;
124 (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL);
125 (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL);
126
127 return (aiowp);
128 }
129
130 /*
131 * Free a worker control block.
132 * Declared with void *arg so it can be a pthread_key_create() destructor.
133 */
134 void
_aio_worker_free(void * arg)135 _aio_worker_free(void *arg)
136 {
137 aio_worker_t *aiowp = arg;
138
139 (void) mutex_destroy(&aiowp->work_qlock1);
140 (void) cond_destroy(&aiowp->work_idle_cv);
141 (void) memset(aiowp, 0, sizeof (*aiowp));
142
143 lmutex_lock(&worker_lock);
144 if (worker_freelast == NULL) {
145 worker_freelist = worker_freelast = aiowp;
146 } else {
147 worker_freelast->work_forw = aiowp;
148 worker_freelast = aiowp;
149 }
150 lmutex_unlock(&worker_lock);
151 }
152
153 aio_req_t *_aio_freelist = NULL; /* free list of request structures */
154 aio_req_t *_aio_freelast = NULL;
155 size_t request_chunksize = 0;
156 int _aio_freelist_cnt = 0;
157 int _aio_allocated_cnt = 0;
158 mutex_t __aio_cache_lock = DEFAULTMUTEX;
159
160 /*
161 * Allocate an aio request structure.
162 */
163 aio_req_t *
_aio_req_alloc(void)164 _aio_req_alloc(void)
165 {
166 aio_req_t *reqp;
167 chunk_t *chp;
168 size_t chunksize;
169 int nelem;
170 int i;
171
172 lmutex_lock(&__aio_cache_lock);
173 if ((reqp = _aio_freelist) == NULL) {
174 if ((chunksize = 2 * request_chunksize) == 0)
175 chunksize = INITIAL_CHUNKSIZE;
176 if ((chp = chunk_alloc(chunksize)) == NULL) {
177 lmutex_unlock(&__aio_cache_lock);
178 return (NULL);
179 }
180 request_chunksize = chunksize;
181 _aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1);
182 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t);
183 for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) {
184 reqp->req_state = AIO_REQ_FREE;
185 reqp->req_link = reqp + 1;
186 }
187 _aio_freelast = reqp - 1;
188 _aio_freelast->req_link = NULL;
189 _aio_freelist_cnt = nelem;
190 reqp = _aio_freelist;
191 }
192 if ((_aio_freelist = reqp->req_link) == NULL)
193 _aio_freelast = NULL;
194 _aio_freelist_cnt--;
195 _aio_allocated_cnt++;
196 lmutex_unlock(&__aio_cache_lock);
197
198 ASSERT(reqp->req_state == AIO_REQ_FREE);
199 reqp->req_state = 0;
200 reqp->req_link = NULL;
201 reqp->req_sigevent.sigev_notify = SIGEV_NONE;
202
203 return (reqp);
204 }
205
206 /*
207 * Free an aio request structure.
208 */
209 void
_aio_req_free(aio_req_t * reqp)210 _aio_req_free(aio_req_t *reqp)
211 {
212 ASSERT(reqp->req_state != AIO_REQ_FREE &&
213 reqp->req_state != AIO_REQ_DONEQ);
214 (void) memset(reqp, 0, sizeof (*reqp));
215 reqp->req_state = AIO_REQ_FREE;
216
217 lmutex_lock(&__aio_cache_lock);
218 if (_aio_freelast == NULL) {
219 _aio_freelist = _aio_freelast = reqp;
220 } else {
221 _aio_freelast->req_link = reqp;
222 _aio_freelast = reqp;
223 }
224 _aio_freelist_cnt++;
225 _aio_allocated_cnt--;
226 lmutex_unlock(&__aio_cache_lock);
227 }
228
229 aio_lio_t *_lio_head_freelist = NULL; /* free list of lio head structures */
230 aio_lio_t *_lio_head_freelast = NULL;
231 size_t lio_head_chunksize = 0;
232 int _lio_alloc = 0;
233 int _lio_free = 0;
234 mutex_t __lio_mutex = DEFAULTMUTEX;
235
236 /*
237 * Allocate a listio head structure.
238 */
239 aio_lio_t *
_aio_lio_alloc(void)240 _aio_lio_alloc(void)
241 {
242 aio_lio_t *head;
243 chunk_t *chp;
244 size_t chunksize;
245 int nelem;
246 int i;
247
248 lmutex_lock(&__lio_mutex);
249 if ((head = _lio_head_freelist) == NULL) {
250 if ((chunksize = 2 * lio_head_chunksize) == 0)
251 chunksize = INITIAL_CHUNKSIZE;
252 if ((chp = chunk_alloc(chunksize)) == NULL) {
253 lmutex_unlock(&__lio_mutex);
254 return (NULL);
255 }
256 lio_head_chunksize = chunksize;
257 _lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1);
258 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t);
259 for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++)
260 head->lio_next = head + 1;
261 _lio_head_freelast = head - 1;
262 _lio_head_freelast->lio_next = NULL;
263 _lio_alloc += nelem;
264 _lio_free = nelem;
265 head = _lio_head_freelist;
266 }
267 if ((_lio_head_freelist = head->lio_next) == NULL)
268 _lio_head_freelast = NULL;
269 _lio_free--;
270 lmutex_unlock(&__lio_mutex);
271
272 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
273 head->lio_next = NULL;
274 head->lio_port = -1;
275 (void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL);
276 (void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL);
277
278 return (head);
279 }
280
281 /*
282 * Free a listio head structure.
283 */
284 void
_aio_lio_free(aio_lio_t * head)285 _aio_lio_free(aio_lio_t *head)
286 {
287 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
288 (void) mutex_destroy(&head->lio_mutex);
289 (void) cond_destroy(&head->lio_cond_cv);
290 (void) memset(head, 0, sizeof (*head));
291
292 lmutex_lock(&__lio_mutex);
293 if (_lio_head_freelast == NULL) {
294 _lio_head_freelist = _lio_head_freelast = head;
295 } else {
296 _lio_head_freelast->lio_next = head;
297 _lio_head_freelast = head;
298 }
299 _lio_free++;
300 lmutex_unlock(&__lio_mutex);
301 }
302
303 void
postfork1_child_aio(void)304 postfork1_child_aio(void)
305 {
306 chunk_t *chp;
307
308 /*
309 * All of the workers are gone; free their structures.
310 */
311 if (_kaio_supported != NULL) {
312 (void) munmap((void *)_kaio_supported,
313 MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t));
314 _kaio_supported = NULL;
315 }
316 if (_aio_hash != NULL) {
317 (void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t));
318 _aio_hash = NULL;
319 }
320 for (chp = chunk_list; chp != NULL; chp = chunk_list) {
321 chunk_list = chp->chunk_next;
322 (void) munmap((void *)chp, chp->chunk_size);
323 }
324
325 /*
326 * Reinitialize global variables
327 */
328
329 worker_freelist = NULL;
330 worker_freelast = NULL;
331 worker_chunksize = 0;
332 (void) mutex_init(&worker_lock, USYNC_THREAD, NULL);
333
334 _aio_freelist = NULL;
335 _aio_freelast = NULL;
336 request_chunksize = 0;
337 _aio_freelist_cnt = 0;
338 _aio_allocated_cnt = 0;
339 (void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL);
340
341 _lio_head_freelist = NULL;
342 _lio_head_freelast = NULL;
343 lio_head_chunksize = 0;
344 _lio_alloc = 0;
345 _lio_free = 0;
346 (void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL);
347
348 (void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL);
349 (void) cond_init(&__aio_initcv, USYNC_THREAD, NULL);
350 __aio_initbusy = 0;
351
352 (void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL);
353 (void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL);
354 (void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL);
355
356 _kaio_ok = 0;
357 __uaio_ok = 0;
358
359 _kaiowp = NULL;
360
361 __workers_rw = NULL;
362 __nextworker_rw = NULL;
363 __rw_workerscnt = 0;
364
365 __workers_no = NULL;
366 __nextworker_no = NULL;
367 __no_workerscnt = 0;
368
369 _aio_worker_cnt = 0;
370
371 _aio_done_head = NULL;
372 _aio_done_tail = NULL;
373 _aio_donecnt = 0;
374
375 _aio_doneq = NULL;
376 _aio_doneq_cnt = 0;
377
378 _aio_waitncnt = 0;
379 _aio_outstand_cnt = 0;
380 _kaio_outstand_cnt = 0;
381 _aio_req_done_cnt = 0;
382 _aio_kernel_suspend = 0;
383 _aio_suscv_cnt = 0;
384
385 _aiowait_flag = 0;
386 _aio_flags = 0;
387 }
388
389 #define DISPLAY(var) \
390 (void) fprintf(stderr, #var "\t= %d\n", var)
391
392 static void
_aio_exit_info(void)393 _aio_exit_info(void)
394 {
395 if ((_kaio_ok | __uaio_ok) == 0)
396 return;
397 (void) fprintf(stderr, "\n");
398 DISPLAY(_aio_freelist_cnt);
399 DISPLAY(_aio_allocated_cnt);
400 DISPLAY(_lio_alloc);
401 DISPLAY(_lio_free);
402 DISPLAY(__rw_workerscnt);
403 DISPLAY(__no_workerscnt);
404 DISPLAY(_aio_worker_cnt);
405 DISPLAY(_aio_donecnt);
406 DISPLAY(_aio_doneq_cnt);
407 DISPLAY(_aio_waitncnt);
408 DISPLAY(_aio_outstand_cnt);
409 DISPLAY(_kaio_outstand_cnt);
410 DISPLAY(_aio_req_done_cnt);
411 DISPLAY(_aio_kernel_suspend);
412 DISPLAY(_aio_suscv_cnt);
413 DISPLAY(_aiowait_flag);
414 DISPLAY(_aio_flags);
415 }
416
417 void
init_aio(void)418 init_aio(void)
419 {
420 char *str;
421
422 (void) pthread_key_create(&_aio_key, _aio_worker_free);
423 if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) {
424 if ((_min_workers = atoi(str)) <= 0)
425 _min_workers = 4;
426 }
427 if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) {
428 if ((_max_workers = atoi(str)) <= 0)
429 _max_workers = 256;
430 if (_max_workers < _min_workers + 1)
431 _max_workers = _min_workers + 1;
432 }
433 if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0)
434 (void) atexit(_aio_exit_info);
435 }
436