1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "lint.h" 28 #include "thr_uberdata.h" 29 #include "asyncio.h" 30 31 /* 32 * The aio subsystem memory allocation strategy: 33 * 34 * For each of the structure types we wish to allocate/free 35 * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate 36 * chunks of memory which are then subdivided into individual 37 * elements which are put into a free list from which allocations 38 * are made and to which frees are returned. 39 * 40 * Chunks start small (8 Kbytes) and get larger (size doubling) 41 * as more chunks are needed. This keeps memory usage small for 42 * light use and fragmentation small for heavy use. 43 * 44 * Chunks are never unmapped except as an aftermath of fork() 45 * in the child process, when they are all unmapped (because 46 * all of the worker threads disappear in the child). 47 */ 48 49 #define INITIAL_CHUNKSIZE (8 * 1024) 50 51 /* 52 * The header structure for each chunk. 53 * A pointer and a size_t ensures proper alignment for whatever follows. 54 */ 55 typedef struct chunk { 56 struct chunk *chunk_next; /* linked list */ 57 size_t chunk_size; /* size of this chunk */ 58 } chunk_t; 59 60 chunk_t *chunk_list = NULL; /* list of all chunks */ 61 mutex_t chunk_lock = DEFAULTMUTEX; 62 63 chunk_t * 64 chunk_alloc(size_t size) 65 { 66 chunk_t *chp = NULL; 67 void *ptr; 68 69 ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, 70 MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 71 if (ptr != MAP_FAILED) { 72 lmutex_lock(&chunk_lock); 73 chp = ptr; 74 chp->chunk_next = chunk_list; 75 chunk_list = chp; 76 chp->chunk_size = size; 77 lmutex_unlock(&chunk_lock); 78 } 79 80 return (chp); 81 } 82 83 aio_worker_t *worker_freelist = NULL; /* free list of worker structures */ 84 aio_worker_t *worker_freelast = NULL; 85 size_t worker_chunksize = 0; 86 mutex_t worker_lock = DEFAULTMUTEX; 87 88 /* 89 * Allocate a worker control block. 90 */ 91 aio_worker_t * 92 _aio_worker_alloc(void) 93 { 94 aio_worker_t *aiowp; 95 chunk_t *chp; 96 size_t chunksize; 97 int nelem; 98 int i; 99 100 lmutex_lock(&worker_lock); 101 if ((aiowp = worker_freelist) == NULL) { 102 if ((chunksize = 2 * worker_chunksize) == 0) 103 chunksize = INITIAL_CHUNKSIZE; 104 if ((chp = chunk_alloc(chunksize)) == NULL) { 105 lmutex_unlock(&worker_lock); 106 return (NULL); 107 } 108 worker_chunksize = chunksize; 109 worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1); 110 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t); 111 for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++) 112 aiowp->work_forw = aiowp + 1; 113 worker_freelast = aiowp - 1; 114 worker_freelast->work_forw = NULL; 115 aiowp = worker_freelist; 116 } 117 if ((worker_freelist = aiowp->work_forw) == NULL) 118 worker_freelast = NULL; 119 lmutex_unlock(&worker_lock); 120 121 aiowp->work_forw = NULL; 122 (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL); 123 (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL); 124 125 return (aiowp); 126 } 127 128 /* 129 * Free a worker control block. 130 * Declared with void *arg so it can be a pthread_key_create() destructor. 131 */ 132 void 133 _aio_worker_free(void *arg) 134 { 135 aio_worker_t *aiowp = arg; 136 137 (void) mutex_destroy(&aiowp->work_qlock1); 138 (void) cond_destroy(&aiowp->work_idle_cv); 139 (void) memset(aiowp, 0, sizeof (*aiowp)); 140 141 lmutex_lock(&worker_lock); 142 if (worker_freelast == NULL) { 143 worker_freelist = worker_freelast = aiowp; 144 } else { 145 worker_freelast->work_forw = aiowp; 146 worker_freelast = aiowp; 147 } 148 lmutex_unlock(&worker_lock); 149 } 150 151 aio_req_t *_aio_freelist = NULL; /* free list of request structures */ 152 aio_req_t *_aio_freelast = NULL; 153 size_t request_chunksize = 0; 154 int _aio_freelist_cnt = 0; 155 int _aio_allocated_cnt = 0; 156 mutex_t __aio_cache_lock = DEFAULTMUTEX; 157 158 /* 159 * Allocate an aio request structure. 160 */ 161 aio_req_t * 162 _aio_req_alloc(void) 163 { 164 aio_req_t *reqp; 165 chunk_t *chp; 166 size_t chunksize; 167 int nelem; 168 int i; 169 170 lmutex_lock(&__aio_cache_lock); 171 if ((reqp = _aio_freelist) == NULL) { 172 if ((chunksize = 2 * request_chunksize) == 0) 173 chunksize = INITIAL_CHUNKSIZE; 174 if ((chp = chunk_alloc(chunksize)) == NULL) { 175 lmutex_unlock(&__aio_cache_lock); 176 return (NULL); 177 } 178 request_chunksize = chunksize; 179 _aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1); 180 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t); 181 for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) { 182 reqp->req_state = AIO_REQ_FREE; 183 reqp->req_link = reqp + 1; 184 } 185 _aio_freelast = reqp - 1; 186 _aio_freelast->req_link = NULL; 187 _aio_freelist_cnt = nelem; 188 reqp = _aio_freelist; 189 } 190 if ((_aio_freelist = reqp->req_link) == NULL) 191 _aio_freelast = NULL; 192 _aio_freelist_cnt--; 193 _aio_allocated_cnt++; 194 lmutex_unlock(&__aio_cache_lock); 195 196 ASSERT(reqp->req_state == AIO_REQ_FREE); 197 reqp->req_state = 0; 198 reqp->req_link = NULL; 199 reqp->req_sigevent.sigev_notify = SIGEV_NONE; 200 201 return (reqp); 202 } 203 204 /* 205 * Free an aio request structure. 206 */ 207 void 208 _aio_req_free(aio_req_t *reqp) 209 { 210 ASSERT(reqp->req_state != AIO_REQ_FREE && 211 reqp->req_state != AIO_REQ_DONEQ); 212 (void) memset(reqp, 0, sizeof (*reqp)); 213 reqp->req_state = AIO_REQ_FREE; 214 215 lmutex_lock(&__aio_cache_lock); 216 if (_aio_freelast == NULL) { 217 _aio_freelist = _aio_freelast = reqp; 218 } else { 219 _aio_freelast->req_link = reqp; 220 _aio_freelast = reqp; 221 } 222 _aio_freelist_cnt++; 223 _aio_allocated_cnt--; 224 lmutex_unlock(&__aio_cache_lock); 225 } 226 227 aio_lio_t *_lio_head_freelist = NULL; /* free list of lio head structures */ 228 aio_lio_t *_lio_head_freelast = NULL; 229 size_t lio_head_chunksize = 0; 230 int _lio_alloc = 0; 231 int _lio_free = 0; 232 mutex_t __lio_mutex = DEFAULTMUTEX; 233 234 /* 235 * Allocate a listio head structure. 236 */ 237 aio_lio_t * 238 _aio_lio_alloc(void) 239 { 240 aio_lio_t *head; 241 chunk_t *chp; 242 size_t chunksize; 243 int nelem; 244 int i; 245 246 lmutex_lock(&__lio_mutex); 247 if ((head = _lio_head_freelist) == NULL) { 248 if ((chunksize = 2 * lio_head_chunksize) == 0) 249 chunksize = INITIAL_CHUNKSIZE; 250 if ((chp = chunk_alloc(chunksize)) == NULL) { 251 lmutex_unlock(&__lio_mutex); 252 return (NULL); 253 } 254 lio_head_chunksize = chunksize; 255 _lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1); 256 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t); 257 for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++) 258 head->lio_next = head + 1; 259 _lio_head_freelast = head - 1; 260 _lio_head_freelast->lio_next = NULL; 261 _lio_alloc += nelem; 262 _lio_free = nelem; 263 head = _lio_head_freelist; 264 } 265 if ((_lio_head_freelist = head->lio_next) == NULL) 266 _lio_head_freelast = NULL; 267 _lio_free--; 268 lmutex_unlock(&__lio_mutex); 269 270 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 271 head->lio_next = NULL; 272 head->lio_port = -1; 273 (void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL); 274 (void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL); 275 276 return (head); 277 } 278 279 /* 280 * Free a listio head structure. 281 */ 282 void 283 _aio_lio_free(aio_lio_t *head) 284 { 285 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 286 (void) mutex_destroy(&head->lio_mutex); 287 (void) cond_destroy(&head->lio_cond_cv); 288 (void) memset(head, 0, sizeof (*head)); 289 290 lmutex_lock(&__lio_mutex); 291 if (_lio_head_freelast == NULL) { 292 _lio_head_freelist = _lio_head_freelast = head; 293 } else { 294 _lio_head_freelast->lio_next = head; 295 _lio_head_freelast = head; 296 } 297 _lio_free++; 298 lmutex_unlock(&__lio_mutex); 299 } 300 301 void 302 postfork1_child_aio(void) 303 { 304 chunk_t *chp; 305 306 /* 307 * All of the workers are gone; free their structures. 308 */ 309 if (_kaio_supported != NULL) { 310 (void) munmap((void *)_kaio_supported, 311 MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t)); 312 _kaio_supported = NULL; 313 } 314 if (_aio_hash != NULL) { 315 (void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t)); 316 _aio_hash = NULL; 317 } 318 for (chp = chunk_list; chp != NULL; chp = chunk_list) { 319 chunk_list = chp->chunk_next; 320 (void) munmap((void *)chp, chp->chunk_size); 321 } 322 323 /* 324 * Reinitialize global variables 325 */ 326 327 worker_freelist = NULL; 328 worker_freelast = NULL; 329 worker_chunksize = 0; 330 (void) mutex_init(&worker_lock, USYNC_THREAD, NULL); 331 332 _aio_freelist = NULL; 333 _aio_freelast = NULL; 334 request_chunksize = 0; 335 _aio_freelist_cnt = 0; 336 _aio_allocated_cnt = 0; 337 (void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL); 338 339 _lio_head_freelist = NULL; 340 _lio_head_freelast = NULL; 341 lio_head_chunksize = 0; 342 _lio_alloc = 0; 343 _lio_free = 0; 344 (void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL); 345 346 (void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL); 347 (void) cond_init(&__aio_initcv, USYNC_THREAD, NULL); 348 __aio_initbusy = 0; 349 350 (void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL); 351 (void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL); 352 (void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL); 353 354 _kaio_ok = 0; 355 __uaio_ok = 0; 356 357 _kaiowp = NULL; 358 359 __workers_rw = NULL; 360 __nextworker_rw = NULL; 361 __rw_workerscnt = 0; 362 363 __workers_no = NULL; 364 __nextworker_no = NULL; 365 __no_workerscnt = 0; 366 367 _aio_worker_cnt = 0; 368 369 _aio_done_head = NULL; 370 _aio_done_tail = NULL; 371 _aio_donecnt = 0; 372 373 _aio_doneq = NULL; 374 _aio_doneq_cnt = 0; 375 376 _aio_waitncnt = 0; 377 _aio_outstand_cnt = 0; 378 _kaio_outstand_cnt = 0; 379 _aio_req_done_cnt = 0; 380 _aio_kernel_suspend = 0; 381 _aio_suscv_cnt = 0; 382 383 _aiowait_flag = 0; 384 _aio_flags = 0; 385 } 386 387 #define DISPLAY(var) \ 388 (void) fprintf(stderr, #var "\t= %d\n", var) 389 390 static void 391 _aio_exit_info(void) 392 { 393 if ((_kaio_ok | __uaio_ok) == 0) 394 return; 395 (void) fprintf(stderr, "\n"); 396 DISPLAY(_aio_freelist_cnt); 397 DISPLAY(_aio_allocated_cnt); 398 DISPLAY(_lio_alloc); 399 DISPLAY(_lio_free); 400 DISPLAY(__rw_workerscnt); 401 DISPLAY(__no_workerscnt); 402 DISPLAY(_aio_worker_cnt); 403 DISPLAY(_aio_donecnt); 404 DISPLAY(_aio_doneq_cnt); 405 DISPLAY(_aio_waitncnt); 406 DISPLAY(_aio_outstand_cnt); 407 DISPLAY(_kaio_outstand_cnt); 408 DISPLAY(_aio_req_done_cnt); 409 DISPLAY(_aio_kernel_suspend); 410 DISPLAY(_aio_suscv_cnt); 411 DISPLAY(_aiowait_flag); 412 DISPLAY(_aio_flags); 413 } 414 415 void 416 init_aio(void) 417 { 418 char *str; 419 420 (void) pthread_key_create(&_aio_key, _aio_worker_free); 421 if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) { 422 if ((_min_workers = atoi(str)) <= 0) 423 _min_workers = 4; 424 } 425 if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) { 426 if ((_max_workers = atoi(str)) <= 0) 427 _max_workers = 256; 428 if (_max_workers < _min_workers + 1) 429 _max_workers = _min_workers + 1; 430 } 431 if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0) 432 (void) atexit(_aio_exit_info); 433 } 434