xref: /illumos-gate/usr/src/lib/libc/port/threads/tsd.c (revision 8b80e8cb6855118d46f605e91b5ed4ce83417395)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 #include <stddef.h>
32 
33 /*
34  * These symbols should not be exported from libc, but
35  * /lib/libm.so.2 references them.  libm needs to be fixed.
36  * Also, some older versions of the Studio compiler/debugger
37  * components reference them.  These need to be fixed, too.
38  */
39 #pragma weak _thr_getspecific = thr_getspecific
40 #pragma weak _thr_keycreate = thr_keycreate
41 #pragma weak _thr_setspecific = thr_setspecific
42 
43 /*
44  * 128 million keys should be enough for anyone.
45  * This allocates half a gigabyte of memory for the keys themselves and
46  * half a gigabyte of memory for each thread that uses the largest key.
47  */
48 #define	MAX_KEYS	0x08000000U
49 
50 int
51 thr_keycreate(thread_key_t *pkey, void (*destructor)(void *))
52 {
53 	tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
54 	void (**old_data)(void *) = NULL;
55 	void (**new_data)(void *);
56 	uint_t old_nkeys;
57 	uint_t new_nkeys;
58 
59 	lmutex_lock(&tsdm->tsdm_lock);
60 
61 	/*
62 	 * Unfortunately, pthread_getspecific() specifies that a
63 	 * pthread_getspecific() on an allocated key upon which the
64 	 * calling thread has not performed a pthread_setspecifc()
65 	 * must return NULL.  Consider the following sequence:
66 	 *
67 	 *	pthread_key_create(&key);
68 	 *	pthread_setspecific(key, datum);
69 	 *	pthread_key_delete(&key);
70 	 *	pthread_key_create(&key);
71 	 *	val = pthread_getspecific(key);
72 	 *
73 	 * According to POSIX, if the deleted key is reused for the new
74 	 * key returned by the second pthread_key_create(), then the
75 	 * pthread_getspecific() in the above example must return NULL
76 	 * (and not the stale datum).  The implementation is thus left
77 	 * with two alternatives:
78 	 *
79 	 *  (1)	Reuse deleted keys.  If this is to be implemented optimally,
80 	 *	it requires that pthread_key_create() somehow associate
81 	 *	the value NULL with the new (reused) key for each thread.
82 	 *	Keeping the hot path fast and lock-free induces substantial
83 	 *	complexity on the implementation.
84 	 *
85 	 *  (2)	Never reuse deleted keys. This allows the pthread_getspecific()
86 	 *	implementation to simply perform a check against the number
87 	 *	of keys set by the calling thread, returning NULL if the
88 	 *	specified key is larger than the highest set key.  This has
89 	 *	the disadvantage of wasting memory (a program which simply
90 	 *	loops calling pthread_key_create()/pthread_key_delete()
91 	 *	will ultimately run out of memory), but permits an optimal
92 	 *	pthread_getspecific() while allowing for simple key creation
93 	 *	and deletion.
94 	 *
95 	 * All Solaris implementations have opted for (2).  Given the
96 	 * ~10 years that this has been in the field, it is safe to assume
97 	 * that applications don't loop creating and destroying keys; we
98 	 * stick with (2).
99 	 */
100 	if (tsdm->tsdm_nused == (old_nkeys = tsdm->tsdm_nkeys)) {
101 		/*
102 		 * We need to allocate or double the number of keys.
103 		 * tsdm->tsdm_nused must always be a power of two.
104 		 */
105 		if ((new_nkeys = (old_nkeys << 1)) == 0)
106 			new_nkeys = 8;
107 
108 		if (new_nkeys > MAX_KEYS) {
109 			lmutex_unlock(&tsdm->tsdm_lock);
110 			return (EAGAIN);
111 		}
112 		if ((new_data = lmalloc(new_nkeys * sizeof (void *))) == NULL) {
113 			lmutex_unlock(&tsdm->tsdm_lock);
114 			return (ENOMEM);
115 		}
116 		if ((old_data = tsdm->tsdm_destro) == NULL) {
117 			/* key == 0 is always invalid */
118 			new_data[0] = TSD_UNALLOCATED;
119 			tsdm->tsdm_nused = 1;
120 		} else {
121 			(void) memcpy(new_data, old_data,
122 			    old_nkeys * sizeof (void *));
123 		}
124 		tsdm->tsdm_destro = new_data;
125 		tsdm->tsdm_nkeys = new_nkeys;
126 	}
127 
128 	*pkey = tsdm->tsdm_nused;
129 	tsdm->tsdm_destro[tsdm->tsdm_nused++] = destructor;
130 	lmutex_unlock(&tsdm->tsdm_lock);
131 
132 	if (old_data != NULL)
133 		lfree(old_data, old_nkeys * sizeof (void *));
134 
135 	return (0);
136 }
137 
138 #pragma weak _pthread_key_create = pthread_key_create
139 int
140 pthread_key_create(pthread_key_t *pkey, void (*destructor)(void *))
141 {
142 	return (thr_keycreate(pkey, destructor));
143 }
144 
145 /*
146  * Same as thr_keycreate(), above, except that the key creation
147  * is performed only once.  This relies upon the fact that a key
148  * value of THR_ONCE_KEY is invalid, and requires that the key be
149  * allocated with a value of THR_ONCE_KEY before calling here.
150  * THR_ONCE_KEY and PTHREAD_ONCE_KEY_NP, defined in <thread.h>
151  * and <pthread.h> respectively, must have the same value.
152  * Example:
153  *
154  *	static pthread_key_t key = PTHREAD_ONCE_KEY_NP;
155  *	...
156  *	pthread_key_create_once_np(&key, destructor);
157  */
158 #pragma weak pthread_key_create_once_np = thr_keycreate_once
159 int
160 thr_keycreate_once(thread_key_t *keyp, void (*destructor)(void *))
161 {
162 	static mutex_t key_lock = DEFAULTMUTEX;
163 	thread_key_t key;
164 	int error;
165 
166 	if (*keyp == THR_ONCE_KEY) {
167 		lmutex_lock(&key_lock);
168 		if (*keyp == THR_ONCE_KEY) {
169 			error = thr_keycreate(&key, destructor);
170 			if (error) {
171 				lmutex_unlock(&key_lock);
172 				return (error);
173 			}
174 			membar_producer();
175 			*keyp = key;
176 		}
177 		lmutex_unlock(&key_lock);
178 	}
179 	membar_consumer();
180 
181 	return (0);
182 }
183 
184 int
185 pthread_key_delete(pthread_key_t key)
186 {
187 	tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
188 
189 	lmutex_lock(&tsdm->tsdm_lock);
190 
191 	if (key >= tsdm->tsdm_nused ||
192 	    tsdm->tsdm_destro[key] == TSD_UNALLOCATED) {
193 		lmutex_unlock(&tsdm->tsdm_lock);
194 		return (EINVAL);
195 	}
196 
197 	tsdm->tsdm_destro[key] = TSD_UNALLOCATED;
198 	lmutex_unlock(&tsdm->tsdm_lock);
199 
200 	return (0);
201 }
202 
203 /*
204  * Blessedly, the pthread_getspecific() interface is much better than the
205  * thr_getspecific() interface in that it cannot return an error status.
206  * Thus, if the key specified is bogus, pthread_getspecific()'s behavior
207  * is undefined.  As an added bonus (and as an artificat of not returning
208  * an error code), the requested datum is returned rather than stored
209  * through a parameter -- thereby avoiding the unnecessary store/load pair
210  * incurred by thr_getspecific().  Every once in a while, the Standards
211  * get it right -- but usually by accident.
212  */
213 void *
214 pthread_getspecific(pthread_key_t key)
215 {
216 	tsd_t *stsd;
217 
218 	/*
219 	 * We are cycle-shaving in this function because some
220 	 * applications make heavy use of it and one machine cycle
221 	 * can make a measurable difference in performance.  This
222 	 * is why we waste a little memory and allocate a NULL value
223 	 * for the invalid key == 0 in curthread->ul_ftsd[0] rather
224 	 * than adjusting the key by subtracting one.
225 	 */
226 	if (key < TSD_NFAST)
227 		return (curthread->ul_ftsd[key]);
228 
229 	if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
230 		return (stsd->tsd_data[key]);
231 
232 	return (NULL);
233 }
234 
235 int
236 thr_getspecific(thread_key_t key, void **valuep)
237 {
238 	tsd_t *stsd;
239 
240 	/*
241 	 * Amazingly, some application code (and worse, some particularly
242 	 * fugly Solaris library code) _relies_ on the fact that 0 is always
243 	 * an invalid key.  To preserve this semantic, 0 is never returned
244 	 * as a key from thr_/pthread_key_create(); we explicitly check
245 	 * for it here and return EINVAL.
246 	 */
247 	if (key == 0)
248 		return (EINVAL);
249 
250 	if (key < TSD_NFAST)
251 		*valuep = curthread->ul_ftsd[key];
252 	else if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
253 		*valuep = stsd->tsd_data[key];
254 	else
255 		*valuep = NULL;
256 
257 	return (0);
258 }
259 
260 /*
261  * We call thr_setspecific_slow() when the key specified
262  * is beyond the current thread's currently allocated range.
263  * This case is in a separate function because we want
264  * the compiler to optimize for the common case.
265  */
266 static int
267 thr_setspecific_slow(thread_key_t key, void *value)
268 {
269 	ulwp_t *self = curthread;
270 	tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
271 	tsd_t *stsd;
272 	tsd_t *ntsd;
273 	uint_t nkeys;
274 
275 	/*
276 	 * It isn't necessary to grab locks in this path;
277 	 * tsdm->tsdm_nused can only increase.
278 	 */
279 	if (key >= tsdm->tsdm_nused)
280 		return (EINVAL);
281 
282 	/*
283 	 * We would like to test (tsdm->tsdm_destro[key] == TSD_UNALLOCATED)
284 	 * here but that would require acquiring tsdm->tsdm_lock and we
285 	 * want to avoid locks in this path.
286 	 *
287 	 * We have a key which is (or at least _was_) valid.  If this key
288 	 * is later deleted (or indeed, is deleted before we set the value),
289 	 * we don't care; such a condition would indicate an application
290 	 * race for which POSIX thankfully leaves the behavior unspecified.
291 	 *
292 	 * First, determine our new size.  To avoid allocating more than we
293 	 * have to, continue doubling our size only until the new key fits.
294 	 * stsd->tsd_nalloc must always be a power of two.
295 	 */
296 	nkeys = ((stsd = self->ul_stsd) != NULL)? stsd->tsd_nalloc : 8;
297 	for (; key >= nkeys; nkeys <<= 1)
298 		continue;
299 
300 	/*
301 	 * Allocate the new TSD.
302 	 */
303 	if ((ntsd = lmalloc(nkeys * sizeof (void *))) == NULL)
304 		return (ENOMEM);
305 
306 	if (stsd != NULL) {
307 		/*
308 		 * Copy the old TSD across to the new.
309 		 */
310 		(void) memcpy(ntsd, stsd, stsd->tsd_nalloc * sizeof (void *));
311 		lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
312 	}
313 
314 	ntsd->tsd_nalloc = nkeys;
315 	ntsd->tsd_data[key] = value;
316 	self->ul_stsd = ntsd;
317 
318 	return (0);
319 }
320 
321 int
322 thr_setspecific(thread_key_t key, void *value)
323 {
324 	tsd_t *stsd;
325 	int ret;
326 	ulwp_t *self = curthread;
327 
328 	/*
329 	 * See the comment in thr_getspecific(), above.
330 	 */
331 	if (key == 0)
332 		return (EINVAL);
333 
334 	if (key < TSD_NFAST) {
335 		curthread->ul_ftsd[key] = value;
336 		return (0);
337 	}
338 
339 	if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) {
340 		stsd->tsd_data[key] = value;
341 		return (0);
342 	}
343 
344 	/*
345 	 * This is a critical region since we are dealing with memory
346 	 * allocation and free. Similar protection required in tsd_free().
347 	 */
348 	enter_critical(self);
349 	ret = thr_setspecific_slow(key, value);
350 	exit_critical(self);
351 	return (ret);
352 }
353 
354 int
355 pthread_setspecific(pthread_key_t key, const void *value)
356 {
357 	return (thr_setspecific(key, (void *)value));
358 }
359 
360 /*
361  * Contract-private interface for java.  See PSARC/2003/159
362  *
363  * If the key falls within the TSD_NFAST range, return a non-negative
364  * offset that can be used by the caller to fetch the TSD data value
365  * directly out of the thread structure using %g7 (sparc) or %gs (x86).
366  * With the advent of TLS, %g7 and %gs are part of the ABI, even though
367  * the definition of the thread structure itself (ulwp_t) is private.
368  *
369  * We guarantee that the offset returned on sparc will fit within
370  * a SIMM13 field (that is, it is less than 2048).
371  *
372  * On failure (key is not in the TSD_NFAST range), return -1.
373  */
374 ptrdiff_t
375 _thr_slot_offset(thread_key_t key)
376 {
377 	if (key != 0 && key < TSD_NFAST)
378 		return ((ptrdiff_t)offsetof(ulwp_t, ul_ftsd[key]));
379 	return (-1);
380 }
381 
382 /*
383  * This is called by _thrp_exit() to apply destructors to the thread's tsd.
384  */
385 void
386 tsd_exit()
387 {
388 	ulwp_t *self = curthread;
389 	tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
390 	thread_key_t key;
391 	int recheck;
392 	void *val;
393 	void (*func)(void *);
394 
395 	lmutex_lock(&tsdm->tsdm_lock);
396 
397 	do {
398 		recheck = 0;
399 
400 		for (key = 1; key < TSD_NFAST &&
401 		    key < tsdm->tsdm_nused; key++) {
402 			if ((func = tsdm->tsdm_destro[key]) != NULL &&
403 			    func != TSD_UNALLOCATED &&
404 			    (val = self->ul_ftsd[key]) != NULL) {
405 				self->ul_ftsd[key] = NULL;
406 				lmutex_unlock(&tsdm->tsdm_lock);
407 				(*func)(val);
408 				lmutex_lock(&tsdm->tsdm_lock);
409 				recheck = 1;
410 			}
411 		}
412 
413 		if (self->ul_stsd == NULL)
414 			continue;
415 
416 		/*
417 		 * Any of these destructors could cause us to grow the number
418 		 * TSD keys in the slow TSD; we cannot cache the slow TSD
419 		 * pointer through this loop.
420 		 */
421 		for (; key < self->ul_stsd->tsd_nalloc &&
422 		    key < tsdm->tsdm_nused; key++) {
423 			if ((func = tsdm->tsdm_destro[key]) != NULL &&
424 			    func != TSD_UNALLOCATED &&
425 			    (val = self->ul_stsd->tsd_data[key]) != NULL) {
426 				self->ul_stsd->tsd_data[key] = NULL;
427 				lmutex_unlock(&tsdm->tsdm_lock);
428 				(*func)(val);
429 				lmutex_lock(&tsdm->tsdm_lock);
430 				recheck = 1;
431 			}
432 		}
433 	} while (recheck);
434 
435 	lmutex_unlock(&tsdm->tsdm_lock);
436 
437 	/*
438 	 * We're done; if we have slow TSD, we need to free it.
439 	 */
440 	tsd_free(self);
441 }
442 
443 void
444 tsd_free(ulwp_t *ulwp)
445 {
446 	tsd_t *stsd;
447 	ulwp_t *self = curthread;
448 
449 	enter_critical(self);
450 	if ((stsd = ulwp->ul_stsd) != NULL)
451 		lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
452 	ulwp->ul_stsd = NULL;
453 	exit_critical(self);
454 }
455