1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include "lint.h"
28 #include "thr_uberdata.h"
29 #include "asyncio.h"
30
31 /*
32 * The aio subsystem memory allocation strategy:
33 *
34 * For each of the structure types we wish to allocate/free
35 * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate
36 * chunks of memory which are then subdivided into individual
37 * elements which are put into a free list from which allocations
38 * are made and to which frees are returned.
39 *
40 * Chunks start small (8 Kbytes) and get larger (size doubling)
41 * as more chunks are needed. This keeps memory usage small for
42 * light use and fragmentation small for heavy use.
43 *
44 * Chunks are never unmapped except as an aftermath of fork()
45 * in the child process, when they are all unmapped (because
46 * all of the worker threads disappear in the child).
47 */
48
49 #define INITIAL_CHUNKSIZE (8 * 1024)
50
51 /*
52 * The header structure for each chunk.
53 * A pointer and a size_t ensures proper alignment for whatever follows.
54 */
55 typedef struct chunk {
56 struct chunk *chunk_next; /* linked list */
57 size_t chunk_size; /* size of this chunk */
58 } chunk_t;
59
60 chunk_t *chunk_list = NULL; /* list of all chunks */
61 mutex_t chunk_lock = DEFAULTMUTEX;
62
63 chunk_t *
chunk_alloc(size_t size)64 chunk_alloc(size_t size)
65 {
66 chunk_t *chp = NULL;
67 void *ptr;
68
69 ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
70 MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
71 if (ptr != MAP_FAILED) {
72 lmutex_lock(&chunk_lock);
73 chp = ptr;
74 chp->chunk_next = chunk_list;
75 chunk_list = chp;
76 chp->chunk_size = size;
77 lmutex_unlock(&chunk_lock);
78 }
79
80 return (chp);
81 }
82
83 aio_worker_t *worker_freelist = NULL; /* free list of worker structures */
84 aio_worker_t *worker_freelast = NULL;
85 size_t worker_chunksize = 0;
86 mutex_t worker_lock = DEFAULTMUTEX;
87
88 /*
89 * Allocate a worker control block.
90 */
91 aio_worker_t *
_aio_worker_alloc(void)92 _aio_worker_alloc(void)
93 {
94 aio_worker_t *aiowp;
95 chunk_t *chp;
96 size_t chunksize;
97 int nelem;
98 int i;
99
100 lmutex_lock(&worker_lock);
101 if ((aiowp = worker_freelist) == NULL) {
102 if ((chunksize = 2 * worker_chunksize) == 0)
103 chunksize = INITIAL_CHUNKSIZE;
104 if ((chp = chunk_alloc(chunksize)) == NULL) {
105 lmutex_unlock(&worker_lock);
106 return (NULL);
107 }
108 worker_chunksize = chunksize;
109 worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1);
110 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t);
111 for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++)
112 aiowp->work_forw = aiowp + 1;
113 worker_freelast = aiowp - 1;
114 worker_freelast->work_forw = NULL;
115 aiowp = worker_freelist;
116 }
117 if ((worker_freelist = aiowp->work_forw) == NULL)
118 worker_freelast = NULL;
119 lmutex_unlock(&worker_lock);
120
121 aiowp->work_forw = NULL;
122 (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL);
123 (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL);
124
125 return (aiowp);
126 }
127
128 /*
129 * Free a worker control block.
130 * Declared with void *arg so it can be a pthread_key_create() destructor.
131 */
132 void
_aio_worker_free(void * arg)133 _aio_worker_free(void *arg)
134 {
135 aio_worker_t *aiowp = arg;
136
137 (void) mutex_destroy(&aiowp->work_qlock1);
138 (void) cond_destroy(&aiowp->work_idle_cv);
139 (void) memset(aiowp, 0, sizeof (*aiowp));
140
141 lmutex_lock(&worker_lock);
142 if (worker_freelast == NULL) {
143 worker_freelist = worker_freelast = aiowp;
144 } else {
145 worker_freelast->work_forw = aiowp;
146 worker_freelast = aiowp;
147 }
148 lmutex_unlock(&worker_lock);
149 }
150
151 aio_req_t *_aio_freelist = NULL; /* free list of request structures */
152 aio_req_t *_aio_freelast = NULL;
153 size_t request_chunksize = 0;
154 int _aio_freelist_cnt = 0;
155 int _aio_allocated_cnt = 0;
156 mutex_t __aio_cache_lock = DEFAULTMUTEX;
157
158 /*
159 * Allocate an aio request structure.
160 */
161 aio_req_t *
_aio_req_alloc(void)162 _aio_req_alloc(void)
163 {
164 aio_req_t *reqp;
165 chunk_t *chp;
166 size_t chunksize;
167 int nelem;
168 int i;
169
170 lmutex_lock(&__aio_cache_lock);
171 if ((reqp = _aio_freelist) == NULL) {
172 if ((chunksize = 2 * request_chunksize) == 0)
173 chunksize = INITIAL_CHUNKSIZE;
174 if ((chp = chunk_alloc(chunksize)) == NULL) {
175 lmutex_unlock(&__aio_cache_lock);
176 return (NULL);
177 }
178 request_chunksize = chunksize;
179 _aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1);
180 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t);
181 for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) {
182 reqp->req_state = AIO_REQ_FREE;
183 reqp->req_link = reqp + 1;
184 }
185 _aio_freelast = reqp - 1;
186 _aio_freelast->req_link = NULL;
187 _aio_freelist_cnt = nelem;
188 reqp = _aio_freelist;
189 }
190 if ((_aio_freelist = reqp->req_link) == NULL)
191 _aio_freelast = NULL;
192 _aio_freelist_cnt--;
193 _aio_allocated_cnt++;
194 lmutex_unlock(&__aio_cache_lock);
195
196 ASSERT(reqp->req_state == AIO_REQ_FREE);
197 reqp->req_state = 0;
198 reqp->req_link = NULL;
199 reqp->req_sigevent.sigev_notify = SIGEV_NONE;
200
201 return (reqp);
202 }
203
204 /*
205 * Free an aio request structure.
206 */
207 void
_aio_req_free(aio_req_t * reqp)208 _aio_req_free(aio_req_t *reqp)
209 {
210 ASSERT(reqp->req_state != AIO_REQ_FREE &&
211 reqp->req_state != AIO_REQ_DONEQ);
212 (void) memset(reqp, 0, sizeof (*reqp));
213 reqp->req_state = AIO_REQ_FREE;
214
215 lmutex_lock(&__aio_cache_lock);
216 if (_aio_freelast == NULL) {
217 _aio_freelist = _aio_freelast = reqp;
218 } else {
219 _aio_freelast->req_link = reqp;
220 _aio_freelast = reqp;
221 }
222 _aio_freelist_cnt++;
223 _aio_allocated_cnt--;
224 lmutex_unlock(&__aio_cache_lock);
225 }
226
227 aio_lio_t *_lio_head_freelist = NULL; /* free list of lio head structures */
228 aio_lio_t *_lio_head_freelast = NULL;
229 size_t lio_head_chunksize = 0;
230 int _lio_alloc = 0;
231 int _lio_free = 0;
232 mutex_t __lio_mutex = DEFAULTMUTEX;
233
234 /*
235 * Allocate a listio head structure.
236 */
237 aio_lio_t *
_aio_lio_alloc(void)238 _aio_lio_alloc(void)
239 {
240 aio_lio_t *head;
241 chunk_t *chp;
242 size_t chunksize;
243 int nelem;
244 int i;
245
246 lmutex_lock(&__lio_mutex);
247 if ((head = _lio_head_freelist) == NULL) {
248 if ((chunksize = 2 * lio_head_chunksize) == 0)
249 chunksize = INITIAL_CHUNKSIZE;
250 if ((chp = chunk_alloc(chunksize)) == NULL) {
251 lmutex_unlock(&__lio_mutex);
252 return (NULL);
253 }
254 lio_head_chunksize = chunksize;
255 _lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1);
256 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t);
257 for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++)
258 head->lio_next = head + 1;
259 _lio_head_freelast = head - 1;
260 _lio_head_freelast->lio_next = NULL;
261 _lio_alloc += nelem;
262 _lio_free = nelem;
263 head = _lio_head_freelist;
264 }
265 if ((_lio_head_freelist = head->lio_next) == NULL)
266 _lio_head_freelast = NULL;
267 _lio_free--;
268 lmutex_unlock(&__lio_mutex);
269
270 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
271 head->lio_next = NULL;
272 head->lio_port = -1;
273 (void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL);
274 (void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL);
275
276 return (head);
277 }
278
279 /*
280 * Free a listio head structure.
281 */
282 void
_aio_lio_free(aio_lio_t * head)283 _aio_lio_free(aio_lio_t *head)
284 {
285 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
286 (void) mutex_destroy(&head->lio_mutex);
287 (void) cond_destroy(&head->lio_cond_cv);
288 (void) memset(head, 0, sizeof (*head));
289
290 lmutex_lock(&__lio_mutex);
291 if (_lio_head_freelast == NULL) {
292 _lio_head_freelist = _lio_head_freelast = head;
293 } else {
294 _lio_head_freelast->lio_next = head;
295 _lio_head_freelast = head;
296 }
297 _lio_free++;
298 lmutex_unlock(&__lio_mutex);
299 }
300
301 void
postfork1_child_aio(void)302 postfork1_child_aio(void)
303 {
304 chunk_t *chp;
305
306 /*
307 * All of the workers are gone; free their structures.
308 */
309 if (_kaio_supported != NULL) {
310 (void) munmap((void *)_kaio_supported,
311 MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t));
312 _kaio_supported = NULL;
313 }
314 if (_aio_hash != NULL) {
315 (void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t));
316 _aio_hash = NULL;
317 }
318 for (chp = chunk_list; chp != NULL; chp = chunk_list) {
319 chunk_list = chp->chunk_next;
320 (void) munmap((void *)chp, chp->chunk_size);
321 }
322
323 /*
324 * Reinitialize global variables
325 */
326
327 worker_freelist = NULL;
328 worker_freelast = NULL;
329 worker_chunksize = 0;
330 (void) mutex_init(&worker_lock, USYNC_THREAD, NULL);
331
332 _aio_freelist = NULL;
333 _aio_freelast = NULL;
334 request_chunksize = 0;
335 _aio_freelist_cnt = 0;
336 _aio_allocated_cnt = 0;
337 (void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL);
338
339 _lio_head_freelist = NULL;
340 _lio_head_freelast = NULL;
341 lio_head_chunksize = 0;
342 _lio_alloc = 0;
343 _lio_free = 0;
344 (void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL);
345
346 (void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL);
347 (void) cond_init(&__aio_initcv, USYNC_THREAD, NULL);
348 __aio_initbusy = 0;
349
350 (void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL);
351 (void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL);
352 (void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL);
353
354 _kaio_ok = 0;
355 __uaio_ok = 0;
356
357 _kaiowp = NULL;
358
359 __workers_rw = NULL;
360 __nextworker_rw = NULL;
361 __rw_workerscnt = 0;
362
363 __workers_no = NULL;
364 __nextworker_no = NULL;
365 __no_workerscnt = 0;
366
367 _aio_worker_cnt = 0;
368
369 _aio_done_head = NULL;
370 _aio_done_tail = NULL;
371 _aio_donecnt = 0;
372
373 _aio_doneq = NULL;
374 _aio_doneq_cnt = 0;
375
376 _aio_waitncnt = 0;
377 _aio_outstand_cnt = 0;
378 _kaio_outstand_cnt = 0;
379 _aio_req_done_cnt = 0;
380 _aio_kernel_suspend = 0;
381 _aio_suscv_cnt = 0;
382
383 _aiowait_flag = 0;
384 _aio_flags = 0;
385 }
386
387 #define DISPLAY(var) \
388 (void) fprintf(stderr, #var "\t= %d\n", var)
389
390 static void
_aio_exit_info(void)391 _aio_exit_info(void)
392 {
393 if ((_kaio_ok | __uaio_ok) == 0)
394 return;
395 (void) fprintf(stderr, "\n");
396 DISPLAY(_aio_freelist_cnt);
397 DISPLAY(_aio_allocated_cnt);
398 DISPLAY(_lio_alloc);
399 DISPLAY(_lio_free);
400 DISPLAY(__rw_workerscnt);
401 DISPLAY(__no_workerscnt);
402 DISPLAY(_aio_worker_cnt);
403 DISPLAY(_aio_donecnt);
404 DISPLAY(_aio_doneq_cnt);
405 DISPLAY(_aio_waitncnt);
406 DISPLAY(_aio_outstand_cnt);
407 DISPLAY(_kaio_outstand_cnt);
408 DISPLAY(_aio_req_done_cnt);
409 DISPLAY(_aio_kernel_suspend);
410 DISPLAY(_aio_suscv_cnt);
411 DISPLAY(_aiowait_flag);
412 DISPLAY(_aio_flags);
413 }
414
415 void
init_aio(void)416 init_aio(void)
417 {
418 char *str;
419
420 (void) pthread_key_create(&_aio_key, _aio_worker_free);
421 if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) {
422 if ((_min_workers = atoi(str)) <= 0)
423 _min_workers = 4;
424 }
425 if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) {
426 if ((_max_workers = atoi(str)) <= 0)
427 _max_workers = 256;
428 if (_max_workers < _min_workers + 1)
429 _max_workers = _min_workers + 1;
430 }
431 if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0)
432 (void) atexit(_aio_exit_info);
433 }
434