1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "lint.h" 28 #include "thr_uberdata.h" 29 #include <stddef.h> 30 31 /* 32 * These symbols should not be exported from libc, but 33 * /lib/libm.so.2 references them. libm needs to be fixed. 34 * Also, some older versions of the Studio compiler/debugger 35 * components reference them. These need to be fixed, too. 36 */ 37 #pragma weak _thr_getspecific = thr_getspecific 38 #pragma weak _thr_keycreate = thr_keycreate 39 #pragma weak _thr_setspecific = thr_setspecific 40 41 /* 42 * 128 million keys should be enough for anyone. 43 * This allocates half a gigabyte of memory for the keys themselves and 44 * half a gigabyte of memory for each thread that uses the largest key. 45 */ 46 #define MAX_KEYS 0x08000000U 47 48 int 49 thr_keycreate(thread_key_t *pkey, void (*destructor)(void *)) 50 { 51 tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata; 52 void (**old_data)(void *) = NULL; 53 void (**new_data)(void *); 54 uint_t old_nkeys; 55 uint_t new_nkeys; 56 57 lmutex_lock(&tsdm->tsdm_lock); 58 59 /* 60 * Unfortunately, pthread_getspecific() specifies that a 61 * pthread_getspecific() on an allocated key upon which the 62 * calling thread has not performed a pthread_setspecifc() 63 * must return NULL. Consider the following sequence: 64 * 65 * pthread_key_create(&key); 66 * pthread_setspecific(key, datum); 67 * pthread_key_delete(&key); 68 * pthread_key_create(&key); 69 * val = pthread_getspecific(key); 70 * 71 * According to POSIX, if the deleted key is reused for the new 72 * key returned by the second pthread_key_create(), then the 73 * pthread_getspecific() in the above example must return NULL 74 * (and not the stale datum). The implementation is thus left 75 * with two alternatives: 76 * 77 * (1) Reuse deleted keys. If this is to be implemented optimally, 78 * it requires that pthread_key_create() somehow associate 79 * the value NULL with the new (reused) key for each thread. 80 * Keeping the hot path fast and lock-free induces substantial 81 * complexity on the implementation. 82 * 83 * (2) Never reuse deleted keys. This allows the pthread_getspecific() 84 * implementation to simply perform a check against the number 85 * of keys set by the calling thread, returning NULL if the 86 * specified key is larger than the highest set key. This has 87 * the disadvantage of wasting memory (a program which simply 88 * loops calling pthread_key_create()/pthread_key_delete() 89 * will ultimately run out of memory), but permits an optimal 90 * pthread_getspecific() while allowing for simple key creation 91 * and deletion. 92 * 93 * All Solaris implementations have opted for (2). Given the 94 * ~10 years that this has been in the field, it is safe to assume 95 * that applications don't loop creating and destroying keys; we 96 * stick with (2). 97 */ 98 if (tsdm->tsdm_nused == (old_nkeys = tsdm->tsdm_nkeys)) { 99 /* 100 * We need to allocate or double the number of keys. 101 * tsdm->tsdm_nused must always be a power of two. 102 */ 103 if ((new_nkeys = (old_nkeys << 1)) == 0) 104 new_nkeys = 8; 105 106 if (new_nkeys > MAX_KEYS) { 107 lmutex_unlock(&tsdm->tsdm_lock); 108 return (EAGAIN); 109 } 110 if ((new_data = lmalloc(new_nkeys * sizeof (void *))) == NULL) { 111 lmutex_unlock(&tsdm->tsdm_lock); 112 return (ENOMEM); 113 } 114 if ((old_data = tsdm->tsdm_destro) == NULL) { 115 /* key == 0 is always invalid */ 116 new_data[0] = TSD_UNALLOCATED; 117 tsdm->tsdm_nused = 1; 118 } else { 119 (void) memcpy(new_data, old_data, 120 old_nkeys * sizeof (void *)); 121 } 122 tsdm->tsdm_destro = new_data; 123 tsdm->tsdm_nkeys = new_nkeys; 124 } 125 126 *pkey = tsdm->tsdm_nused; 127 tsdm->tsdm_destro[tsdm->tsdm_nused++] = destructor; 128 lmutex_unlock(&tsdm->tsdm_lock); 129 130 if (old_data != NULL) 131 lfree(old_data, old_nkeys * sizeof (void *)); 132 133 return (0); 134 } 135 136 #pragma weak _pthread_key_create = pthread_key_create 137 int 138 pthread_key_create(pthread_key_t *pkey, void (*destructor)(void *)) 139 { 140 return (thr_keycreate(pkey, destructor)); 141 } 142 143 /* 144 * Same as thr_keycreate(), above, except that the key creation 145 * is performed only once. This relies upon the fact that a key 146 * value of THR_ONCE_KEY is invalid, and requires that the key be 147 * allocated with a value of THR_ONCE_KEY before calling here. 148 * THR_ONCE_KEY and PTHREAD_ONCE_KEY_NP, defined in <thread.h> 149 * and <pthread.h> respectively, must have the same value. 150 * Example: 151 * 152 * static pthread_key_t key = PTHREAD_ONCE_KEY_NP; 153 * ... 154 * pthread_key_create_once_np(&key, destructor); 155 */ 156 #pragma weak pthread_key_create_once_np = thr_keycreate_once 157 int 158 thr_keycreate_once(thread_key_t *keyp, void (*destructor)(void *)) 159 { 160 static mutex_t key_lock = DEFAULTMUTEX; 161 thread_key_t key; 162 int error; 163 164 if (*keyp == THR_ONCE_KEY) { 165 lmutex_lock(&key_lock); 166 if (*keyp == THR_ONCE_KEY) { 167 error = thr_keycreate(&key, destructor); 168 if (error) { 169 lmutex_unlock(&key_lock); 170 return (error); 171 } 172 membar_producer(); 173 *keyp = key; 174 } 175 lmutex_unlock(&key_lock); 176 } 177 membar_consumer(); 178 179 return (0); 180 } 181 182 int 183 pthread_key_delete(pthread_key_t key) 184 { 185 tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata; 186 187 lmutex_lock(&tsdm->tsdm_lock); 188 189 if (key >= tsdm->tsdm_nused || 190 tsdm->tsdm_destro[key] == TSD_UNALLOCATED) { 191 lmutex_unlock(&tsdm->tsdm_lock); 192 return (EINVAL); 193 } 194 195 tsdm->tsdm_destro[key] = TSD_UNALLOCATED; 196 lmutex_unlock(&tsdm->tsdm_lock); 197 198 return (0); 199 } 200 201 /* 202 * Blessedly, the pthread_getspecific() interface is much better than the 203 * thr_getspecific() interface in that it cannot return an error status. 204 * Thus, if the key specified is bogus, pthread_getspecific()'s behavior 205 * is undefined. As an added bonus (and as an artificat of not returning 206 * an error code), the requested datum is returned rather than stored 207 * through a parameter -- thereby avoiding the unnecessary store/load pair 208 * incurred by thr_getspecific(). Every once in a while, the Standards 209 * get it right -- but usually by accident. 210 */ 211 void * 212 pthread_getspecific(pthread_key_t key) 213 { 214 tsd_t *stsd; 215 216 /* 217 * We are cycle-shaving in this function because some 218 * applications make heavy use of it and one machine cycle 219 * can make a measurable difference in performance. This 220 * is why we waste a little memory and allocate a NULL value 221 * for the invalid key == 0 in curthread->ul_ftsd[0] rather 222 * than adjusting the key by subtracting one. 223 */ 224 if (key < TSD_NFAST) 225 return (curthread->ul_ftsd[key]); 226 227 if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) 228 return (stsd->tsd_data[key]); 229 230 return (NULL); 231 } 232 233 int 234 thr_getspecific(thread_key_t key, void **valuep) 235 { 236 tsd_t *stsd; 237 238 /* 239 * Amazingly, some application code (and worse, some particularly 240 * fugly Solaris library code) _relies_ on the fact that 0 is always 241 * an invalid key. To preserve this semantic, 0 is never returned 242 * as a key from thr_/pthread_key_create(); we explicitly check 243 * for it here and return EINVAL. 244 */ 245 if (key == 0) 246 return (EINVAL); 247 248 if (key < TSD_NFAST) 249 *valuep = curthread->ul_ftsd[key]; 250 else if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) 251 *valuep = stsd->tsd_data[key]; 252 else 253 *valuep = NULL; 254 255 return (0); 256 } 257 258 /* 259 * We call thr_setspecific_slow() when the key specified 260 * is beyond the current thread's currently allocated range. 261 * This case is in a separate function because we want 262 * the compiler to optimize for the common case. 263 */ 264 static int 265 thr_setspecific_slow(thread_key_t key, void *value) 266 { 267 ulwp_t *self = curthread; 268 tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata; 269 tsd_t *stsd; 270 tsd_t *ntsd; 271 uint_t nkeys; 272 273 /* 274 * It isn't necessary to grab locks in this path; 275 * tsdm->tsdm_nused can only increase. 276 */ 277 if (key >= tsdm->tsdm_nused) 278 return (EINVAL); 279 280 /* 281 * We would like to test (tsdm->tsdm_destro[key] == TSD_UNALLOCATED) 282 * here but that would require acquiring tsdm->tsdm_lock and we 283 * want to avoid locks in this path. 284 * 285 * We have a key which is (or at least _was_) valid. If this key 286 * is later deleted (or indeed, is deleted before we set the value), 287 * we don't care; such a condition would indicate an application 288 * race for which POSIX thankfully leaves the behavior unspecified. 289 * 290 * First, determine our new size. To avoid allocating more than we 291 * have to, continue doubling our size only until the new key fits. 292 * stsd->tsd_nalloc must always be a power of two. 293 */ 294 nkeys = ((stsd = self->ul_stsd) != NULL)? stsd->tsd_nalloc : 8; 295 for (; key >= nkeys; nkeys <<= 1) 296 continue; 297 298 /* 299 * Allocate the new TSD. 300 */ 301 if ((ntsd = lmalloc(nkeys * sizeof (void *))) == NULL) 302 return (ENOMEM); 303 304 if (stsd != NULL) { 305 /* 306 * Copy the old TSD across to the new. 307 */ 308 (void) memcpy(ntsd, stsd, stsd->tsd_nalloc * sizeof (void *)); 309 lfree(stsd, stsd->tsd_nalloc * sizeof (void *)); 310 } 311 312 ntsd->tsd_nalloc = nkeys; 313 ntsd->tsd_data[key] = value; 314 self->ul_stsd = ntsd; 315 316 return (0); 317 } 318 319 int 320 thr_setspecific(thread_key_t key, void *value) 321 { 322 tsd_t *stsd; 323 int ret; 324 ulwp_t *self = curthread; 325 326 /* 327 * See the comment in thr_getspecific(), above. 328 */ 329 if (key == 0) 330 return (EINVAL); 331 332 if (key < TSD_NFAST) { 333 curthread->ul_ftsd[key] = value; 334 return (0); 335 } 336 337 if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) { 338 stsd->tsd_data[key] = value; 339 return (0); 340 } 341 342 /* 343 * This is a critical region since we are dealing with memory 344 * allocation and free. Similar protection required in tsd_free(). 345 */ 346 enter_critical(self); 347 ret = thr_setspecific_slow(key, value); 348 exit_critical(self); 349 return (ret); 350 } 351 352 int 353 pthread_setspecific(pthread_key_t key, const void *value) 354 { 355 return (thr_setspecific(key, (void *)value)); 356 } 357 358 /* 359 * Contract-private interface for java. See PSARC/2003/159 360 * 361 * If the key falls within the TSD_NFAST range, return a non-negative 362 * offset that can be used by the caller to fetch the TSD data value 363 * directly out of the thread structure using %g7 (sparc) or %gs (x86). 364 * With the advent of TLS, %g7 and %gs are part of the ABI, even though 365 * the definition of the thread structure itself (ulwp_t) is private. 366 * 367 * We guarantee that the offset returned on sparc will fit within 368 * a SIMM13 field (that is, it is less than 2048). 369 * 370 * On failure (key is not in the TSD_NFAST range), return -1. 371 */ 372 ptrdiff_t 373 _thr_slot_offset(thread_key_t key) 374 { 375 if (key != 0 && key < TSD_NFAST) 376 return ((ptrdiff_t)offsetof(ulwp_t, ul_ftsd[key])); 377 return (-1); 378 } 379 380 /* 381 * This is called by _thrp_exit() to apply destructors to the thread's tsd. 382 */ 383 void 384 tsd_exit() 385 { 386 ulwp_t *self = curthread; 387 tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata; 388 thread_key_t key; 389 int recheck; 390 void *val; 391 void (*func)(void *); 392 393 lmutex_lock(&tsdm->tsdm_lock); 394 395 do { 396 recheck = 0; 397 398 for (key = 1; key < TSD_NFAST && 399 key < tsdm->tsdm_nused; key++) { 400 if ((func = tsdm->tsdm_destro[key]) != NULL && 401 func != TSD_UNALLOCATED && 402 (val = self->ul_ftsd[key]) != NULL) { 403 self->ul_ftsd[key] = NULL; 404 lmutex_unlock(&tsdm->tsdm_lock); 405 (*func)(val); 406 lmutex_lock(&tsdm->tsdm_lock); 407 recheck = 1; 408 } 409 } 410 411 if (self->ul_stsd == NULL) 412 continue; 413 414 /* 415 * Any of these destructors could cause us to grow the number 416 * TSD keys in the slow TSD; we cannot cache the slow TSD 417 * pointer through this loop. 418 */ 419 for (; key < self->ul_stsd->tsd_nalloc && 420 key < tsdm->tsdm_nused; key++) { 421 if ((func = tsdm->tsdm_destro[key]) != NULL && 422 func != TSD_UNALLOCATED && 423 (val = self->ul_stsd->tsd_data[key]) != NULL) { 424 self->ul_stsd->tsd_data[key] = NULL; 425 lmutex_unlock(&tsdm->tsdm_lock); 426 (*func)(val); 427 lmutex_lock(&tsdm->tsdm_lock); 428 recheck = 1; 429 } 430 } 431 } while (recheck); 432 433 lmutex_unlock(&tsdm->tsdm_lock); 434 435 /* 436 * We're done; if we have slow TSD, we need to free it. 437 */ 438 tsd_free(self); 439 } 440 441 void 442 tsd_free(ulwp_t *ulwp) 443 { 444 tsd_t *stsd; 445 ulwp_t *self = curthread; 446 447 enter_critical(self); 448 if ((stsd = ulwp->ul_stsd) != NULL) 449 lfree(stsd, stsd->tsd_nalloc * sizeof (void *)); 450 ulwp->ul_stsd = NULL; 451 exit_critical(self); 452 } 453