xref: /illumos-gate/usr/src/lib/libc/port/threads/tsd.c (revision a6e6969cf9cfe2070eae4cd6071f76b0fa4f539f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 #include <stddef.h>
32 
33 /*
34  * 128 million keys should be enough for anyone.
35  * This allocates half a gigabyte of memory for the keys themselves and
36  * half a gigabyte of memory for each thread that uses the largest key.
37  */
38 #define	MAX_KEYS	0x08000000U
39 
40 #pragma weak thr_keycreate = _thr_keycreate
41 #pragma weak pthread_key_create = _thr_keycreate
42 #pragma weak _pthread_key_create = _thr_keycreate
43 int
44 _thr_keycreate(thread_key_t *pkey, void (*destructor)(void *))
45 {
46 	tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
47 	void (**old_data)(void *) = NULL;
48 	void (**new_data)(void *);
49 	uint_t old_nkeys;
50 	uint_t new_nkeys;
51 
52 	lmutex_lock(&tsdm->tsdm_lock);
53 
54 	/*
55 	 * Unfortunately, pthread_getspecific() specifies that a
56 	 * pthread_getspecific() on an allocated key upon which the
57 	 * calling thread has not performed a pthread_setspecifc()
58 	 * must return NULL.  Consider the following sequence:
59 	 *
60 	 *	pthread_key_create(&key);
61 	 *	pthread_setspecific(key, datum);
62 	 *	pthread_key_delete(&key);
63 	 *	pthread_key_create(&key);
64 	 *	val = pthread_getspecific(key);
65 	 *
66 	 * According to POSIX, if the deleted key is reused for the new
67 	 * key returned by the second pthread_key_create(), then the
68 	 * pthread_getspecific() in the above example must return NULL
69 	 * (and not the stale datum).  The implementation is thus left
70 	 * with two alternatives:
71 	 *
72 	 *  (1)	Reuse deleted keys.  If this is to be implemented optimally,
73 	 *	it requires that pthread_key_create() somehow associate
74 	 *	the value NULL with the new (reused) key for each thread.
75 	 *	Keeping the hot path fast and lock-free induces substantial
76 	 *	complexity on the implementation.
77 	 *
78 	 *  (2)	Never reuse deleted keys. This allows the pthread_getspecific()
79 	 *	implementation to simply perform a check against the number
80 	 *	of keys set by the calling thread, returning NULL if the
81 	 *	specified key is larger than the highest set key.  This has
82 	 *	the disadvantage of wasting memory (a program which simply
83 	 *	loops calling pthread_key_create()/pthread_key_delete()
84 	 *	will ultimately run out of memory), but permits an optimal
85 	 *	pthread_getspecific() while allowing for simple key creation
86 	 *	and deletion.
87 	 *
88 	 * All Solaris implementations have opted for (2).  Given the
89 	 * ~10 years that this has been in the field, it is safe to assume
90 	 * that applications don't loop creating and destroying keys; we
91 	 * stick with (2).
92 	 */
93 	if (tsdm->tsdm_nused == (old_nkeys = tsdm->tsdm_nkeys)) {
94 		/*
95 		 * We need to allocate or double the number of keys.
96 		 * tsdm->tsdm_nused must always be a power of two.
97 		 */
98 		if ((new_nkeys = (old_nkeys << 1)) == 0)
99 			new_nkeys = 8;
100 
101 		if (new_nkeys > MAX_KEYS) {
102 			lmutex_unlock(&tsdm->tsdm_lock);
103 			return (EAGAIN);
104 		}
105 		if ((new_data = lmalloc(new_nkeys * sizeof (void *))) == NULL) {
106 			lmutex_unlock(&tsdm->tsdm_lock);
107 			return (ENOMEM);
108 		}
109 		if ((old_data = tsdm->tsdm_destro) == NULL) {
110 			/* key == 0 is always invalid */
111 			new_data[0] = TSD_UNALLOCATED;
112 			tsdm->tsdm_nused = 1;
113 		} else {
114 			(void) _private_memcpy(new_data, old_data,
115 				old_nkeys * sizeof (void *));
116 		}
117 		tsdm->tsdm_destro = new_data;
118 		tsdm->tsdm_nkeys = new_nkeys;
119 	}
120 
121 	*pkey = tsdm->tsdm_nused;
122 	tsdm->tsdm_destro[tsdm->tsdm_nused++] = destructor;
123 	lmutex_unlock(&tsdm->tsdm_lock);
124 
125 	if (old_data != NULL)
126 		lfree(old_data, old_nkeys * sizeof (void *));
127 
128 	return (0);
129 }
130 
131 /*
132  * Same as _thr_keycreate(), above, except that the key creation
133  * is performed only once.  This relies upon the fact that a key
134  * value of THR_ONCE_KEY is invalid, and requires that the key be
135  * allocated with a value of THR_ONCE_KEY before calling here.
136  * THR_ONCE_KEY and PTHREAD_ONCE_KEY_NP, defined in <thread.h>
137  * and <pthread.h> respectively, must have the same value.
138  * Example:
139  *
140  *	static pthread_key_t key = PTHREAD_ONCE_KEY_NP;
141  *	...
142  *	pthread_key_create_once_np(&key, destructor);
143  */
144 #pragma weak pthread_key_create_once_np = _thr_keycreate_once
145 #pragma weak _pthread_key_create_once_np = _thr_keycreate_once
146 #pragma weak thr_keycreate_once = _thr_keycreate_once
147 int
148 _thr_keycreate_once(thread_key_t *keyp, void (*destructor)(void *))
149 {
150 	static mutex_t key_lock = DEFAULTMUTEX;
151 	thread_key_t key;
152 	int error;
153 
154 	if (*keyp == THR_ONCE_KEY) {
155 		lmutex_lock(&key_lock);
156 		if (*keyp == THR_ONCE_KEY) {
157 			error = _thr_keycreate(&key, destructor);
158 			if (error) {
159 				lmutex_unlock(&key_lock);
160 				return (error);
161 			}
162 			_membar_producer();
163 			*keyp = key;
164 		}
165 		lmutex_unlock(&key_lock);
166 	}
167 	_membar_consumer();
168 
169 	return (0);
170 }
171 
172 #pragma weak pthread_key_delete = _thr_key_delete
173 #pragma weak _pthread_key_delete = _thr_key_delete
174 int
175 _thr_key_delete(thread_key_t key)
176 {
177 	tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
178 
179 	lmutex_lock(&tsdm->tsdm_lock);
180 
181 	if (key >= tsdm->tsdm_nused ||
182 	    tsdm->tsdm_destro[key] == TSD_UNALLOCATED) {
183 		lmutex_unlock(&tsdm->tsdm_lock);
184 		return (EINVAL);
185 	}
186 
187 	tsdm->tsdm_destro[key] = TSD_UNALLOCATED;
188 	lmutex_unlock(&tsdm->tsdm_lock);
189 
190 	return (0);
191 }
192 
193 /*
194  * Blessedly, the pthread_getspecific() interface is much better than the
195  * thr_getspecific() interface in that it cannot return an error status.
196  * Thus, if the key specified is bogus, pthread_getspecific()'s behavior
197  * is undefined.  As an added bonus (and as an artificat of not returning
198  * an error code), the requested datum is returned rather than stored
199  * through a parameter -- thereby avoiding the unnecessary store/load pair
200  * incurred by thr_getspecific().  Every once in a while, the Standards
201  * get it right -- but usually by accident.
202  */
203 #pragma weak	pthread_getspecific	= _pthread_getspecific
204 void *
205 _pthread_getspecific(pthread_key_t key)
206 {
207 	tsd_t *stsd;
208 
209 	/*
210 	 * We are cycle-shaving in this function because some
211 	 * applications make heavy use of it and one machine cycle
212 	 * can make a measurable difference in performance.  This
213 	 * is why we waste a little memory and allocate a NULL value
214 	 * for the invalid key == 0 in curthread->ul_ftsd[0] rather
215 	 * than adjusting the key by subtracting one.
216 	 */
217 	if (key < TSD_NFAST)
218 		return (curthread->ul_ftsd[key]);
219 
220 	if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
221 		return (stsd->tsd_data[key]);
222 
223 	return (NULL);
224 }
225 
226 #pragma weak thr_getspecific = _thr_getspecific
227 int
228 _thr_getspecific(thread_key_t key, void **valuep)
229 {
230 	tsd_t *stsd;
231 
232 	/*
233 	 * Amazingly, some application code (and worse, some particularly
234 	 * fugly Solaris library code) _relies_ on the fact that 0 is always
235 	 * an invalid key.  To preserve this semantic, 0 is never returned
236 	 * as a key from thr_/pthread_key_create(); we explicitly check
237 	 * for it here and return EINVAL.
238 	 */
239 	if (key == 0)
240 		return (EINVAL);
241 
242 	if (key < TSD_NFAST)
243 		*valuep = curthread->ul_ftsd[key];
244 	else if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
245 		*valuep = stsd->tsd_data[key];
246 	else
247 		*valuep = NULL;
248 
249 	return (0);
250 }
251 
252 /*
253  * We call _thr_setspecific_slow() when the key specified
254  * is beyond the current thread's currently allocated range.
255  * This case is in a separate function because we want
256  * the compiler to optimize for the common case.
257  */
258 static int
259 _thr_setspecific_slow(thread_key_t key, void *value)
260 {
261 	ulwp_t *self = curthread;
262 	tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
263 	tsd_t *stsd;
264 	tsd_t *ntsd;
265 	uint_t nkeys;
266 
267 	/*
268 	 * It isn't necessary to grab locks in this path;
269 	 * tsdm->tsdm_nused can only increase.
270 	 */
271 	if (key >= tsdm->tsdm_nused)
272 		return (EINVAL);
273 
274 	/*
275 	 * We would like to test (tsdm->tsdm_destro[key] == TSD_UNALLOCATED)
276 	 * here but that would require acquiring tsdm->tsdm_lock and we
277 	 * want to avoid locks in this path.
278 	 *
279 	 * We have a key which is (or at least _was_) valid.  If this key
280 	 * is later deleted (or indeed, is deleted before we set the value),
281 	 * we don't care; such a condition would indicate an application
282 	 * race for which POSIX thankfully leaves the behavior unspecified.
283 	 *
284 	 * First, determine our new size.  To avoid allocating more than we
285 	 * have to, continue doubling our size only until the new key fits.
286 	 * stsd->tsd_nalloc must always be a power of two.
287 	 */
288 	nkeys = ((stsd = self->ul_stsd) != NULL)? stsd->tsd_nalloc : 8;
289 	for (; key >= nkeys; nkeys <<= 1)
290 		continue;
291 
292 	/*
293 	 * Allocate the new TSD.
294 	 */
295 	if ((ntsd = lmalloc(nkeys * sizeof (void *))) == NULL)
296 		return (ENOMEM);
297 
298 	if (stsd != NULL) {
299 		/*
300 		 * Copy the old TSD across to the new.
301 		 */
302 		(void) _private_memcpy(ntsd, stsd,
303 			stsd->tsd_nalloc * sizeof (void *));
304 		lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
305 	}
306 
307 	ntsd->tsd_nalloc = nkeys;
308 	ntsd->tsd_data[key] = value;
309 	self->ul_stsd = ntsd;
310 
311 	return (0);
312 }
313 
314 #pragma weak thr_setspecific = _thr_setspecific
315 #pragma weak pthread_setspecific = _thr_setspecific
316 #pragma weak _pthread_setspecific = _thr_setspecific
317 int
318 _thr_setspecific(thread_key_t key, void *value)
319 {
320 	tsd_t *stsd;
321 	int ret;
322 	ulwp_t *self = curthread;
323 
324 	/*
325 	 * See the comment in _thr_getspecific(), above.
326 	 */
327 	if (key == 0)
328 		return (EINVAL);
329 
330 	if (key < TSD_NFAST) {
331 		curthread->ul_ftsd[key] = value;
332 		return (0);
333 	}
334 
335 	if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) {
336 		stsd->tsd_data[key] = value;
337 		return (0);
338 	}
339 
340 	/*
341 	 * This is a critical region since we are dealing with memory
342 	 * allocation and free. Similar protection required in tsd_free().
343 	 */
344 	enter_critical(self);
345 	ret = _thr_setspecific_slow(key, value);
346 	exit_critical(self);
347 	return (ret);
348 }
349 
350 /*
351  * Contract-private interface for java.  See PSARC/2003/159
352  *
353  * If the key falls within the TSD_NFAST range, return a non-negative
354  * offset that can be used by the caller to fetch the TSD data value
355  * directly out of the thread structure using %g7 (sparc) or %gs (x86).
356  * With the advent of TLS, %g7 and %gs are part of the ABI, even though
357  * the definition of the thread structure itself (ulwp_t) is private.
358  *
359  * We guarantee that the offset returned on sparc will fit within
360  * a SIMM13 field (that is, it is less than 2048).
361  *
362  * On failure (key is not in the TSD_NFAST range), return -1.
363  */
364 ptrdiff_t
365 _thr_slot_offset(thread_key_t key)
366 {
367 	if (key != 0 && key < TSD_NFAST)
368 		return ((ptrdiff_t)offsetof(ulwp_t, ul_ftsd[key]));
369 	return (-1);
370 }
371 
372 /*
373  * This is called by _thrp_exit() to apply destructors to the thread's tsd.
374  */
375 void
376 tsd_exit()
377 {
378 	ulwp_t *self = curthread;
379 	tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
380 	thread_key_t key;
381 	int recheck;
382 	void *val;
383 	void (*func)(void *);
384 
385 	lmutex_lock(&tsdm->tsdm_lock);
386 
387 	do {
388 		recheck = 0;
389 
390 		for (key = 1; key < TSD_NFAST &&
391 		    key < tsdm->tsdm_nused; key++) {
392 			if ((func = tsdm->tsdm_destro[key]) != NULL &&
393 			    func != TSD_UNALLOCATED &&
394 			    (val = self->ul_ftsd[key]) != NULL) {
395 				self->ul_ftsd[key] = NULL;
396 				lmutex_unlock(&tsdm->tsdm_lock);
397 				(*func)(val);
398 				lmutex_lock(&tsdm->tsdm_lock);
399 				recheck = 1;
400 			}
401 		}
402 
403 		if (self->ul_stsd == NULL)
404 			continue;
405 
406 		/*
407 		 * Any of these destructors could cause us to grow the number
408 		 * TSD keys in the slow TSD; we cannot cache the slow TSD
409 		 * pointer through this loop.
410 		 */
411 		for (; key < self->ul_stsd->tsd_nalloc &&
412 		    key < tsdm->tsdm_nused; key++) {
413 			if ((func = tsdm->tsdm_destro[key]) != NULL &&
414 			    func != TSD_UNALLOCATED &&
415 			    (val = self->ul_stsd->tsd_data[key]) != NULL) {
416 				self->ul_stsd->tsd_data[key] = NULL;
417 				lmutex_unlock(&tsdm->tsdm_lock);
418 				(*func)(val);
419 				lmutex_lock(&tsdm->tsdm_lock);
420 				recheck = 1;
421 			}
422 		}
423 	} while (recheck);
424 
425 	lmutex_unlock(&tsdm->tsdm_lock);
426 
427 	/*
428 	 * We're done; if we have slow TSD, we need to free it.
429 	 */
430 	tsd_free(self);
431 }
432 
433 void
434 tsd_free(ulwp_t *ulwp)
435 {
436 	tsd_t *stsd;
437 	ulwp_t *self = curthread;
438 
439 	enter_critical(self);
440 	if ((stsd = ulwp->ul_stsd) != NULL)
441 		lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
442 	ulwp->ul_stsd = NULL;
443 	exit_critical(self);
444 }
445