xref: /freebsd/libexec/rtld-elf/rtld_lock.c (revision 29332c0dcee1e80c9fb871e06c3160bd5deb1b44)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright 1999, 2000 John D. Polstra.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  *	from: FreeBSD: src/libexec/rtld-elf/sparc64/lockdflt.c,v 1.3 2002/10/09
28  * $FreeBSD$
29  */
30 
31 /*
32  * Thread locking implementation for the dynamic linker.
33  *
34  * We use the "simple, non-scalable reader-preference lock" from:
35  *
36  *   J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer
37  *   Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on
38  *   Principles and Practice of Parallel Programming, April 1991.
39  *
40  * In this algorithm the lock is a single word.  Its low-order bit is
41  * set when a writer holds the lock.  The remaining high-order bits
42  * contain a count of readers desiring the lock.  The algorithm requires
43  * atomic "compare_and_store" and "add" operations, which we take
44  * from machine/atomic.h.
45  */
46 
47 #include <sys/param.h>
48 #include <sys/signalvar.h>
49 #include <signal.h>
50 #include <stdlib.h>
51 #include <time.h>
52 
53 #include "debug.h"
54 #include "rtld.h"
55 #include "rtld_machdep.h"
56 #include "rtld_libc.h"
57 
58 void _rtld_thread_init(struct RtldLockInfo *) __exported;
59 void _rtld_atfork_pre(int *) __exported;
60 void _rtld_atfork_post(int *) __exported;
61 
62 static char def_dlerror_msg[512];
63 static int def_dlerror_seen_val = 1;
64 
65 static char *
66 def_dlerror_loc(void)
67 {
68 	return (def_dlerror_msg);
69 }
70 
71 static int *
72 def_dlerror_seen(void)
73 {
74 	return (&def_dlerror_seen_val);
75 }
76 
77 #define WAFLAG		0x1	/* A writer holds the lock */
78 #define RC_INCR		0x2	/* Adjusts count of readers desiring lock */
79 
80 typedef struct Struct_Lock {
81 	volatile u_int lock;
82 	void *base;
83 } Lock;
84 
85 static sigset_t fullsigmask, oldsigmask;
86 static int thread_flag, wnested;
87 static uint32_t fsigblock;
88 
89 static void *
90 def_lock_create(void)
91 {
92     void *base;
93     char *p;
94     uintptr_t r;
95     Lock *l;
96 
97     /*
98      * Arrange for the lock to occupy its own cache line.  First, we
99      * optimistically allocate just a cache line, hoping that malloc
100      * will give us a well-aligned block of memory.  If that doesn't
101      * work, we allocate a larger block and take a well-aligned cache
102      * line from it.
103      */
104     base = xmalloc(CACHE_LINE_SIZE);
105     p = (char *)base;
106     if ((uintptr_t)p % CACHE_LINE_SIZE != 0) {
107 	free(base);
108 	base = xmalloc(2 * CACHE_LINE_SIZE);
109 	p = (char *)base;
110 	if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0)
111 	    p += CACHE_LINE_SIZE - r;
112     }
113     l = (Lock *)p;
114     l->base = base;
115     l->lock = 0;
116     return l;
117 }
118 
119 static void
120 def_lock_destroy(void *lock)
121 {
122     Lock *l = (Lock *)lock;
123 
124     free(l->base);
125 }
126 
127 static void
128 def_rlock_acquire(void *lock)
129 {
130     Lock *l = (Lock *)lock;
131 
132     atomic_add_acq_int(&l->lock, RC_INCR);
133     while (l->lock & WAFLAG)
134 	    ;	/* Spin */
135 }
136 
137 static void
138 sig_fastunblock(void)
139 {
140 	uint32_t oldval;
141 
142 	assert((fsigblock & ~SIGFASTBLOCK_FLAGS) >= SIGFASTBLOCK_INC);
143 	oldval = atomic_fetchadd_32(&fsigblock, -SIGFASTBLOCK_INC);
144 	if (oldval == (SIGFASTBLOCK_PEND | SIGFASTBLOCK_INC))
145 		__sys_sigfastblock(SIGFASTBLOCK_UNBLOCK, NULL);
146 }
147 
148 static void
149 def_wlock_acquire(void *lock)
150 {
151 	Lock *l;
152 	sigset_t tmp_oldsigmask;
153 
154 	l = (Lock *)lock;
155 	if (ld_fast_sigblock) {
156 		for (;;) {
157 			atomic_add_32(&fsigblock, SIGFASTBLOCK_INC);
158 			if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG))
159 				break;
160 			sig_fastunblock();
161 		}
162 	} else {
163 		for (;;) {
164 			sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
165 			if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG))
166 				break;
167 			sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
168 		}
169 		if (atomic_fetchadd_int(&wnested, 1) == 0)
170 			oldsigmask = tmp_oldsigmask;
171 	}
172 }
173 
174 static void
175 def_lock_release(void *lock)
176 {
177 	Lock *l;
178 
179 	l = (Lock *)lock;
180 	if ((l->lock & WAFLAG) == 0)
181 		atomic_add_rel_int(&l->lock, -RC_INCR);
182 	else {
183 		atomic_add_rel_int(&l->lock, -WAFLAG);
184 		if (ld_fast_sigblock)
185 			sig_fastunblock();
186 		else if (atomic_fetchadd_int(&wnested, -1) == 1)
187 			sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
188 	}
189 }
190 
191 static int
192 def_thread_set_flag(int mask)
193 {
194 	int old_val = thread_flag;
195 	thread_flag |= mask;
196 	return (old_val);
197 }
198 
199 static int
200 def_thread_clr_flag(int mask)
201 {
202 	int old_val = thread_flag;
203 	thread_flag &= ~mask;
204 	return (old_val);
205 }
206 
207 /*
208  * Public interface exposed to the rest of the dynamic linker.
209  */
210 struct RtldLockInfo lockinfo;
211 static struct RtldLockInfo deflockinfo;
212 
213 static __inline int
214 thread_mask_set(int mask)
215 {
216 	return lockinfo.thread_set_flag(mask);
217 }
218 
219 static __inline void
220 thread_mask_clear(int mask)
221 {
222 	lockinfo.thread_clr_flag(mask);
223 }
224 
225 #define	RTLD_LOCK_CNT	3
226 static struct rtld_lock {
227 	void	*handle;
228 	int	 mask;
229 } rtld_locks[RTLD_LOCK_CNT];
230 
231 rtld_lock_t	rtld_bind_lock = &rtld_locks[0];
232 rtld_lock_t	rtld_libc_lock = &rtld_locks[1];
233 rtld_lock_t	rtld_phdr_lock = &rtld_locks[2];
234 
235 void
236 rlock_acquire(rtld_lock_t lock, RtldLockState *lockstate)
237 {
238 
239 	if (lockstate == NULL)
240 		return;
241 
242 	if (thread_mask_set(lock->mask) & lock->mask) {
243 		dbg("rlock_acquire: recursed");
244 		lockstate->lockstate = RTLD_LOCK_UNLOCKED;
245 		return;
246 	}
247 	lockinfo.rlock_acquire(lock->handle);
248 	lockstate->lockstate = RTLD_LOCK_RLOCKED;
249 }
250 
251 void
252 wlock_acquire(rtld_lock_t lock, RtldLockState *lockstate)
253 {
254 
255 	if (lockstate == NULL)
256 		return;
257 
258 	if (thread_mask_set(lock->mask) & lock->mask) {
259 		dbg("wlock_acquire: recursed");
260 		lockstate->lockstate = RTLD_LOCK_UNLOCKED;
261 		return;
262 	}
263 	lockinfo.wlock_acquire(lock->handle);
264 	lockstate->lockstate = RTLD_LOCK_WLOCKED;
265 }
266 
267 void
268 lock_release(rtld_lock_t lock, RtldLockState *lockstate)
269 {
270 
271 	if (lockstate == NULL)
272 		return;
273 
274 	switch (lockstate->lockstate) {
275 	case RTLD_LOCK_UNLOCKED:
276 		break;
277 	case RTLD_LOCK_RLOCKED:
278 	case RTLD_LOCK_WLOCKED:
279 		thread_mask_clear(lock->mask);
280 		lockinfo.lock_release(lock->handle);
281 		break;
282 	default:
283 		assert(0);
284 	}
285 }
286 
287 void
288 lock_upgrade(rtld_lock_t lock, RtldLockState *lockstate)
289 {
290 
291 	if (lockstate == NULL)
292 		return;
293 
294 	lock_release(lock, lockstate);
295 	wlock_acquire(lock, lockstate);
296 }
297 
298 void
299 lock_restart_for_upgrade(RtldLockState *lockstate)
300 {
301 
302 	if (lockstate == NULL)
303 		return;
304 
305 	switch (lockstate->lockstate) {
306 	case RTLD_LOCK_UNLOCKED:
307 	case RTLD_LOCK_WLOCKED:
308 		break;
309 	case RTLD_LOCK_RLOCKED:
310 		siglongjmp(lockstate->env, 1);
311 		break;
312 	default:
313 		assert(0);
314 	}
315 }
316 
317 void
318 dlerror_dflt_init(void)
319 {
320 	lockinfo.dlerror_loc = def_dlerror_loc;
321 	lockinfo.dlerror_loc_sz = sizeof(def_dlerror_msg);
322 	lockinfo.dlerror_seen = def_dlerror_seen;
323 }
324 
325 void
326 lockdflt_init(void)
327 {
328 	int i;
329 
330 	deflockinfo.rtli_version = RTLI_VERSION;
331 	deflockinfo.lock_create = def_lock_create;
332 	deflockinfo.lock_destroy = def_lock_destroy;
333 	deflockinfo.rlock_acquire = def_rlock_acquire;
334 	deflockinfo.wlock_acquire = def_wlock_acquire;
335 	deflockinfo.lock_release = def_lock_release;
336 	deflockinfo.thread_set_flag = def_thread_set_flag;
337 	deflockinfo.thread_clr_flag = def_thread_clr_flag;
338 	deflockinfo.at_fork = NULL;
339 	deflockinfo.dlerror_loc = def_dlerror_loc;
340 	deflockinfo.dlerror_loc_sz = sizeof(def_dlerror_msg);
341 	deflockinfo.dlerror_seen = def_dlerror_seen;
342 
343 	for (i = 0; i < RTLD_LOCK_CNT; i++) {
344 		rtld_locks[i].mask   = (1 << i);
345 		rtld_locks[i].handle = NULL;
346 	}
347 
348 	memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo));
349 	_rtld_thread_init(NULL);
350 	if (ld_fast_sigblock) {
351 		__sys_sigfastblock(SIGFASTBLOCK_SETPTR, &fsigblock);
352 	} else {
353 		/*
354 		 * Construct a mask to block all signals.  Note that
355 		 * blocked traps mean that the process is terminated
356 		 * if trap occurs while we are in locked section, with
357 		 * the default settings for kern.forcesigexit.
358 		 */
359 		sigfillset(&fullsigmask);
360 	}
361 }
362 
363 /*
364  * Callback function to allow threads implementation to
365  * register their own locking primitives if the default
366  * one is not suitable.
367  * The current context should be the only context
368  * executing at the invocation time.
369  */
370 void
371 _rtld_thread_init(struct RtldLockInfo *pli)
372 {
373 	const Obj_Entry *obj;
374 	SymLook req;
375 	void *locks[RTLD_LOCK_CNT];
376 	int flags, i, res;
377 
378 	if (pli == NULL) {
379 		lockinfo.rtli_version = RTLI_VERSION;
380 	} else {
381 		lockinfo.rtli_version = RTLI_VERSION_ONE;
382 		obj = obj_from_addr(pli->lock_create);
383 		if (obj != NULL) {
384 			symlook_init(&req, "_pli_rtli_version");
385 			res = symlook_obj(&req, obj);
386 			if (res == 0)
387 				lockinfo.rtli_version = pli->rtli_version;
388 		}
389 	}
390 
391 	/* disable all locking while this function is running */
392 	flags =	thread_mask_set(~0);
393 
394 	if (pli == NULL)
395 		pli = &deflockinfo;
396 	else if (ld_fast_sigblock) {
397 		fsigblock = 0;
398 		__sys_sigfastblock(SIGFASTBLOCK_UNSETPTR, NULL);
399 	}
400 
401 	for (i = 0; i < RTLD_LOCK_CNT; i++)
402 		if ((locks[i] = pli->lock_create()) == NULL)
403 			break;
404 
405 	if (i < RTLD_LOCK_CNT) {
406 		while (--i >= 0)
407 			pli->lock_destroy(locks[i]);
408 		abort();
409 	}
410 
411 	for (i = 0; i < RTLD_LOCK_CNT; i++) {
412 		if (rtld_locks[i].handle == NULL)
413 			continue;
414 		if (flags & rtld_locks[i].mask)
415 			lockinfo.lock_release(rtld_locks[i].handle);
416 		lockinfo.lock_destroy(rtld_locks[i].handle);
417 	}
418 
419 	for (i = 0; i < RTLD_LOCK_CNT; i++) {
420 		rtld_locks[i].handle = locks[i];
421 		if (flags & rtld_locks[i].mask)
422 			pli->wlock_acquire(rtld_locks[i].handle);
423 	}
424 
425 	lockinfo.lock_create = pli->lock_create;
426 	lockinfo.lock_destroy = pli->lock_destroy;
427 	lockinfo.rlock_acquire = pli->rlock_acquire;
428 	lockinfo.wlock_acquire = pli->wlock_acquire;
429 	lockinfo.lock_release  = pli->lock_release;
430 	lockinfo.thread_set_flag = pli->thread_set_flag;
431 	lockinfo.thread_clr_flag = pli->thread_clr_flag;
432 	lockinfo.at_fork = pli->at_fork;
433 	if (lockinfo.rtli_version > RTLI_VERSION_ONE && pli != NULL) {
434 		strlcpy(pli->dlerror_loc(), lockinfo.dlerror_loc(),
435 		    lockinfo.dlerror_loc_sz);
436 		lockinfo.dlerror_loc = pli->dlerror_loc;
437 		lockinfo.dlerror_loc_sz = pli->dlerror_loc_sz;
438 		lockinfo.dlerror_seen = pli->dlerror_seen;
439 	}
440 
441 	/* restore thread locking state, this time with new locks */
442 	thread_mask_clear(~0);
443 	thread_mask_set(flags);
444 	dbg("_rtld_thread_init: done");
445 }
446 
447 void
448 _rtld_atfork_pre(int *locks)
449 {
450 	RtldLockState ls[2];
451 
452 	if (locks == NULL)
453 		return;
454 
455 	/*
456 	 * Warning: this did not worked well with the rtld compat
457 	 * locks above, when the thread signal mask was corrupted (set
458 	 * to all signals blocked) if two locks were taken
459 	 * simultaneously in the write mode.  The caller of the
460 	 * _rtld_atfork_pre() must provide the working implementation
461 	 * of the locks anyway, and libthr locks are fine.
462 	 */
463 	wlock_acquire(rtld_phdr_lock, &ls[0]);
464 	wlock_acquire(rtld_bind_lock, &ls[1]);
465 
466 	/* XXXKIB: I am really sorry for this. */
467 	locks[0] = ls[1].lockstate;
468 	locks[2] = ls[0].lockstate;
469 }
470 
471 void
472 _rtld_atfork_post(int *locks)
473 {
474 	RtldLockState ls[2];
475 
476 	if (locks == NULL)
477 		return;
478 
479 	bzero(ls, sizeof(ls));
480 	ls[0].lockstate = locks[2];
481 	ls[1].lockstate = locks[0];
482 	lock_release(rtld_bind_lock, &ls[1]);
483 	lock_release(rtld_phdr_lock, &ls[0]);
484 }
485