1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "lint.h" 30 #include "thr_uberdata.h" 31 #include "asyncio.h" 32 33 /* 34 * The aio subsystem memory allocation strategy: 35 * 36 * For each of the structure types we wish to allocate/free 37 * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate 38 * chunks of memory which are then subdivided into individual 39 * elements which are put into a free list from which allocations 40 * are made and to which frees are returned. 41 * 42 * Chunks start small (8 Kbytes) and get larger (size doubling) 43 * as more chunks are needed. This keeps memory usage small for 44 * light use and fragmentation small for heavy use. 45 * 46 * Chunks are never unmapped except as an aftermath of fork() 47 * in the child process, when they are all unmapped (because 48 * all of the worker threads disappear in the child). 49 */ 50 51 #define INITIAL_CHUNKSIZE (8 * 1024) 52 53 /* 54 * The header structure for each chunk. 55 * A pointer and a size_t ensures proper alignment for whatever follows. 56 */ 57 typedef struct chunk { 58 struct chunk *chunk_next; /* linked list */ 59 size_t chunk_size; /* size of this chunk */ 60 } chunk_t; 61 62 chunk_t *chunk_list = NULL; /* list of all chunks */ 63 mutex_t chunk_lock = DEFAULTMUTEX; 64 65 chunk_t * 66 chunk_alloc(size_t size) 67 { 68 chunk_t *chp = NULL; 69 void *ptr; 70 71 ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, 72 MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 73 if (ptr != MAP_FAILED) { 74 lmutex_lock(&chunk_lock); 75 chp = ptr; 76 chp->chunk_next = chunk_list; 77 chunk_list = chp; 78 chp->chunk_size = size; 79 lmutex_unlock(&chunk_lock); 80 } 81 82 return (chp); 83 } 84 85 aio_worker_t *worker_freelist = NULL; /* free list of worker structures */ 86 aio_worker_t *worker_freelast = NULL; 87 size_t worker_chunksize = 0; 88 mutex_t worker_lock = DEFAULTMUTEX; 89 90 /* 91 * Allocate a worker control block. 92 */ 93 aio_worker_t * 94 _aio_worker_alloc(void) 95 { 96 aio_worker_t *aiowp; 97 chunk_t *chp; 98 size_t chunksize; 99 int nelem; 100 int i; 101 102 lmutex_lock(&worker_lock); 103 if ((aiowp = worker_freelist) == NULL) { 104 if ((chunksize = 2 * worker_chunksize) == 0) 105 chunksize = INITIAL_CHUNKSIZE; 106 if ((chp = chunk_alloc(chunksize)) == NULL) { 107 lmutex_unlock(&worker_lock); 108 return (NULL); 109 } 110 worker_chunksize = chunksize; 111 worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1); 112 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t); 113 for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++) 114 aiowp->work_forw = aiowp + 1; 115 worker_freelast = aiowp - 1; 116 worker_freelast->work_forw = NULL; 117 aiowp = worker_freelist; 118 } 119 if ((worker_freelist = aiowp->work_forw) == NULL) 120 worker_freelast = NULL; 121 lmutex_unlock(&worker_lock); 122 123 aiowp->work_forw = NULL; 124 (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL); 125 (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL); 126 127 return (aiowp); 128 } 129 130 /* 131 * Free a worker control block. 132 * Declared with void *arg so it can be a pthread_key_create() destructor. 133 */ 134 void 135 _aio_worker_free(void *arg) 136 { 137 aio_worker_t *aiowp = arg; 138 139 (void) mutex_destroy(&aiowp->work_qlock1); 140 (void) cond_destroy(&aiowp->work_idle_cv); 141 (void) memset(aiowp, 0, sizeof (*aiowp)); 142 143 lmutex_lock(&worker_lock); 144 if (worker_freelast == NULL) { 145 worker_freelist = worker_freelast = aiowp; 146 } else { 147 worker_freelast->work_forw = aiowp; 148 worker_freelast = aiowp; 149 } 150 lmutex_unlock(&worker_lock); 151 } 152 153 aio_req_t *_aio_freelist = NULL; /* free list of request structures */ 154 aio_req_t *_aio_freelast = NULL; 155 size_t request_chunksize = 0; 156 int _aio_freelist_cnt = 0; 157 int _aio_allocated_cnt = 0; 158 mutex_t __aio_cache_lock = DEFAULTMUTEX; 159 160 /* 161 * Allocate an aio request structure. 162 */ 163 aio_req_t * 164 _aio_req_alloc(void) 165 { 166 aio_req_t *reqp; 167 chunk_t *chp; 168 size_t chunksize; 169 int nelem; 170 int i; 171 172 lmutex_lock(&__aio_cache_lock); 173 if ((reqp = _aio_freelist) == NULL) { 174 if ((chunksize = 2 * request_chunksize) == 0) 175 chunksize = INITIAL_CHUNKSIZE; 176 if ((chp = chunk_alloc(chunksize)) == NULL) { 177 lmutex_unlock(&__aio_cache_lock); 178 return (NULL); 179 } 180 request_chunksize = chunksize; 181 _aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1); 182 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t); 183 for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) { 184 reqp->req_state = AIO_REQ_FREE; 185 reqp->req_link = reqp + 1; 186 } 187 _aio_freelast = reqp - 1; 188 _aio_freelast->req_link = NULL; 189 _aio_freelist_cnt = nelem; 190 reqp = _aio_freelist; 191 } 192 if ((_aio_freelist = reqp->req_link) == NULL) 193 _aio_freelast = NULL; 194 _aio_freelist_cnt--; 195 _aio_allocated_cnt++; 196 lmutex_unlock(&__aio_cache_lock); 197 198 ASSERT(reqp->req_state == AIO_REQ_FREE); 199 reqp->req_state = 0; 200 reqp->req_link = NULL; 201 reqp->req_sigevent.sigev_notify = SIGEV_NONE; 202 203 return (reqp); 204 } 205 206 /* 207 * Free an aio request structure. 208 */ 209 void 210 _aio_req_free(aio_req_t *reqp) 211 { 212 ASSERT(reqp->req_state != AIO_REQ_FREE && 213 reqp->req_state != AIO_REQ_DONEQ); 214 (void) memset(reqp, 0, sizeof (*reqp)); 215 reqp->req_state = AIO_REQ_FREE; 216 217 lmutex_lock(&__aio_cache_lock); 218 if (_aio_freelast == NULL) { 219 _aio_freelist = _aio_freelast = reqp; 220 } else { 221 _aio_freelast->req_link = reqp; 222 _aio_freelast = reqp; 223 } 224 _aio_freelist_cnt++; 225 _aio_allocated_cnt--; 226 lmutex_unlock(&__aio_cache_lock); 227 } 228 229 aio_lio_t *_lio_head_freelist = NULL; /* free list of lio head structures */ 230 aio_lio_t *_lio_head_freelast = NULL; 231 size_t lio_head_chunksize = 0; 232 int _lio_alloc = 0; 233 int _lio_free = 0; 234 mutex_t __lio_mutex = DEFAULTMUTEX; 235 236 /* 237 * Allocate a listio head structure. 238 */ 239 aio_lio_t * 240 _aio_lio_alloc(void) 241 { 242 aio_lio_t *head; 243 chunk_t *chp; 244 size_t chunksize; 245 int nelem; 246 int i; 247 248 lmutex_lock(&__lio_mutex); 249 if ((head = _lio_head_freelist) == NULL) { 250 if ((chunksize = 2 * lio_head_chunksize) == 0) 251 chunksize = INITIAL_CHUNKSIZE; 252 if ((chp = chunk_alloc(chunksize)) == NULL) { 253 lmutex_unlock(&__lio_mutex); 254 return (NULL); 255 } 256 lio_head_chunksize = chunksize; 257 _lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1); 258 nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t); 259 for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++) 260 head->lio_next = head + 1; 261 _lio_head_freelast = head - 1; 262 _lio_head_freelast->lio_next = NULL; 263 _lio_alloc += nelem; 264 _lio_free = nelem; 265 head = _lio_head_freelist; 266 } 267 if ((_lio_head_freelist = head->lio_next) == NULL) 268 _lio_head_freelast = NULL; 269 _lio_free--; 270 lmutex_unlock(&__lio_mutex); 271 272 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 273 head->lio_next = NULL; 274 head->lio_port = -1; 275 (void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL); 276 (void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL); 277 278 return (head); 279 } 280 281 /* 282 * Free a listio head structure. 283 */ 284 void 285 _aio_lio_free(aio_lio_t *head) 286 { 287 ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0); 288 (void) mutex_destroy(&head->lio_mutex); 289 (void) cond_destroy(&head->lio_cond_cv); 290 (void) memset(head, 0, sizeof (*head)); 291 292 lmutex_lock(&__lio_mutex); 293 if (_lio_head_freelast == NULL) { 294 _lio_head_freelist = _lio_head_freelast = head; 295 } else { 296 _lio_head_freelast->lio_next = head; 297 _lio_head_freelast = head; 298 } 299 _lio_free++; 300 lmutex_unlock(&__lio_mutex); 301 } 302 303 void 304 postfork1_child_aio(void) 305 { 306 chunk_t *chp; 307 308 /* 309 * All of the workers are gone; free their structures. 310 */ 311 if (_kaio_supported != NULL) { 312 (void) munmap((void *)_kaio_supported, 313 MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t)); 314 _kaio_supported = NULL; 315 } 316 if (_aio_hash != NULL) { 317 (void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t)); 318 _aio_hash = NULL; 319 } 320 for (chp = chunk_list; chp != NULL; chp = chunk_list) { 321 chunk_list = chp->chunk_next; 322 (void) munmap((void *)chp, chp->chunk_size); 323 } 324 325 /* 326 * Reinitialize global variables 327 */ 328 329 worker_freelist = NULL; 330 worker_freelast = NULL; 331 worker_chunksize = 0; 332 (void) mutex_init(&worker_lock, USYNC_THREAD, NULL); 333 334 _aio_freelist = NULL; 335 _aio_freelast = NULL; 336 request_chunksize = 0; 337 _aio_freelist_cnt = 0; 338 _aio_allocated_cnt = 0; 339 (void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL); 340 341 _lio_head_freelist = NULL; 342 _lio_head_freelast = NULL; 343 lio_head_chunksize = 0; 344 _lio_alloc = 0; 345 _lio_free = 0; 346 (void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL); 347 348 (void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL); 349 (void) cond_init(&__aio_initcv, USYNC_THREAD, NULL); 350 __aio_initbusy = 0; 351 352 (void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL); 353 (void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL); 354 (void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL); 355 356 _kaio_ok = 0; 357 __uaio_ok = 0; 358 359 _kaiowp = NULL; 360 361 __workers_rw = NULL; 362 __nextworker_rw = NULL; 363 __rw_workerscnt = 0; 364 365 __workers_no = NULL; 366 __nextworker_no = NULL; 367 __no_workerscnt = 0; 368 369 _aio_worker_cnt = 0; 370 371 _aio_done_head = NULL; 372 _aio_done_tail = NULL; 373 _aio_donecnt = 0; 374 375 _aio_doneq = NULL; 376 _aio_doneq_cnt = 0; 377 378 _aio_waitncnt = 0; 379 _aio_outstand_cnt = 0; 380 _kaio_outstand_cnt = 0; 381 _aio_req_done_cnt = 0; 382 _aio_kernel_suspend = 0; 383 _aio_suscv_cnt = 0; 384 385 _aiowait_flag = 0; 386 _aio_flags = 0; 387 } 388 389 #define DISPLAY(var) \ 390 (void) fprintf(stderr, #var "\t= %d\n", var) 391 392 static void 393 _aio_exit_info(void) 394 { 395 if ((_kaio_ok | __uaio_ok) == 0) 396 return; 397 (void) fprintf(stderr, "\n"); 398 DISPLAY(_aio_freelist_cnt); 399 DISPLAY(_aio_allocated_cnt); 400 DISPLAY(_lio_alloc); 401 DISPLAY(_lio_free); 402 DISPLAY(__rw_workerscnt); 403 DISPLAY(__no_workerscnt); 404 DISPLAY(_aio_worker_cnt); 405 DISPLAY(_aio_donecnt); 406 DISPLAY(_aio_doneq_cnt); 407 DISPLAY(_aio_waitncnt); 408 DISPLAY(_aio_outstand_cnt); 409 DISPLAY(_kaio_outstand_cnt); 410 DISPLAY(_aio_req_done_cnt); 411 DISPLAY(_aio_kernel_suspend); 412 DISPLAY(_aio_suscv_cnt); 413 DISPLAY(_aiowait_flag); 414 DISPLAY(_aio_flags); 415 } 416 417 void 418 init_aio(void) 419 { 420 char *str; 421 422 (void) pthread_key_create(&_aio_key, _aio_worker_free); 423 if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) { 424 if ((_min_workers = atoi(str)) <= 0) 425 _min_workers = 4; 426 } 427 if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) { 428 if ((_max_workers = atoi(str)) <= 0) 429 _max_workers = 256; 430 if (_max_workers < _min_workers + 1) 431 _max_workers = _min_workers + 1; 432 } 433 if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0) 434 (void) atexit(_aio_exit_info); 435 } 436