xref: /freebsd/libexec/rtld-elf/rtld_lock.c (revision e32fecd0c2c3ee37c47ee100f169e7eb0282a873)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright 1999, 2000 John D. Polstra.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  *	from: FreeBSD: src/libexec/rtld-elf/sparc64/lockdflt.c,v 1.3 2002/10/09
28  * $FreeBSD$
29  */
30 
31 /*
32  * Thread locking implementation for the dynamic linker.
33  *
34  * We use the "simple, non-scalable reader-preference lock" from:
35  *
36  *   J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer
37  *   Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on
38  *   Principles and Practice of Parallel Programming, April 1991.
39  *
40  * In this algorithm the lock is a single word.  Its low-order bit is
41  * set when a writer holds the lock.  The remaining high-order bits
42  * contain a count of readers desiring the lock.  The algorithm requires
43  * atomic "compare_and_store" and "add" operations, which we take
44  * from machine/atomic.h.
45  */
46 
47 #include <sys/param.h>
48 #include <sys/signalvar.h>
49 #include <signal.h>
50 #include <stdlib.h>
51 #include <time.h>
52 
53 #include "debug.h"
54 #include "rtld.h"
55 #include "rtld_machdep.h"
56 #include "rtld_libc.h"
57 
58 void _rtld_thread_init(struct RtldLockInfo *) __exported;
59 void _rtld_atfork_pre(int *) __exported;
60 void _rtld_atfork_post(int *) __exported;
61 
62 static char def_dlerror_msg[512];
63 static int def_dlerror_seen_val = 1;
64 
65 static char *
66 def_dlerror_loc(void)
67 {
68 	return (def_dlerror_msg);
69 }
70 
71 static int *
72 def_dlerror_seen(void)
73 {
74 	return (&def_dlerror_seen_val);
75 }
76 
77 #define WAFLAG		0x1	/* A writer holds the lock */
78 #define RC_INCR		0x2	/* Adjusts count of readers desiring lock */
79 
80 typedef struct Struct_Lock {
81 	volatile u_int lock;
82 	void *base;
83 } Lock;
84 
85 static sigset_t fullsigmask, oldsigmask;
86 static int thread_flag, wnested;
87 static uint32_t fsigblock;
88 
89 static void *
90 def_lock_create(void)
91 {
92 	void *base;
93 	char *p;
94 	uintptr_t r;
95 	Lock *l;
96 
97 	/*
98 	 * Arrange for the lock to occupy its own cache line.  First, we
99 	 * optimistically allocate just a cache line, hoping that malloc
100 	 * will give us a well-aligned block of memory.  If that doesn't
101 	 * work, we allocate a larger block and take a well-aligned cache
102 	 * line from it.
103 	 */
104 	base = xmalloc(CACHE_LINE_SIZE);
105 	p = base;
106 	if ((uintptr_t)p % CACHE_LINE_SIZE != 0) {
107 		free(base);
108 		base = xmalloc(2 * CACHE_LINE_SIZE);
109 		p = base;
110 		if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0)
111 			p += CACHE_LINE_SIZE - r;
112 	}
113 	l = (Lock *)p;
114 	l->base = base;
115 	l->lock = 0;
116 	return (l);
117 }
118 
119 static void
120 def_lock_destroy(void *lock)
121 {
122 	Lock *l = lock;
123 
124 	free(l->base);
125 }
126 
127 static void
128 sig_fastunblock(void)
129 {
130 	uint32_t oldval;
131 
132 	assert((fsigblock & ~SIGFASTBLOCK_FLAGS) >= SIGFASTBLOCK_INC);
133 	oldval = atomic_fetchadd_32(&fsigblock, -SIGFASTBLOCK_INC);
134 	if (oldval == (SIGFASTBLOCK_PEND | SIGFASTBLOCK_INC))
135 		__sys_sigfastblock(SIGFASTBLOCK_UNBLOCK, NULL);
136 }
137 
138 static bool
139 def_lock_acquire_set(Lock *l, bool wlock)
140 {
141 	if (wlock) {
142 		if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG))
143 			return (true);
144 	} else {
145 		atomic_add_acq_int(&l->lock, RC_INCR);
146 		if ((l->lock & WAFLAG) == 0)
147 			return (true);
148 		atomic_add_int(&l->lock, -RC_INCR);
149 	}
150 	return (false);
151 }
152 
153 static void
154 def_lock_acquire(Lock *l, bool wlock)
155 {
156 	sigset_t tmp_oldsigmask;
157 
158 	if (ld_fast_sigblock) {
159 		for (;;) {
160 			atomic_add_32(&fsigblock, SIGFASTBLOCK_INC);
161 			if (def_lock_acquire_set(l, wlock))
162 				break;
163 			sig_fastunblock();
164 		}
165 	} else {
166 		for (;;) {
167 			sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
168 			if (def_lock_acquire_set(l, wlock))
169 				break;
170 			sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
171 		}
172 		if (atomic_fetchadd_int(&wnested, 1) == 0)
173 			oldsigmask = tmp_oldsigmask;
174 	}
175 }
176 
177 static void
178 def_rlock_acquire(void *lock)
179 {
180 	def_lock_acquire(lock, false);
181 }
182 
183 static void
184 def_wlock_acquire(void *lock)
185 {
186 	def_lock_acquire(lock, true);
187 }
188 
189 static void
190 def_lock_release(void *lock)
191 {
192 	Lock *l = lock;
193 
194 	atomic_add_rel_int(&l->lock, -((l->lock & WAFLAG) == 0 ?
195 	    RC_INCR : WAFLAG));
196 	if (ld_fast_sigblock)
197 		sig_fastunblock();
198 	else if (atomic_fetchadd_int(&wnested, -1) == 1)
199 		sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
200 }
201 
202 static int
203 def_thread_set_flag(int mask)
204 {
205 	int old_val = thread_flag;
206 
207 	thread_flag |= mask;
208 	return (old_val);
209 }
210 
211 static int
212 def_thread_clr_flag(int mask)
213 {
214 	int old_val = thread_flag;
215 
216 	thread_flag &= ~mask;
217 	return (old_val);
218 }
219 
220 /*
221  * Public interface exposed to the rest of the dynamic linker.
222  */
223 struct RtldLockInfo lockinfo;
224 static struct RtldLockInfo deflockinfo;
225 
226 static __inline int
227 thread_mask_set(int mask)
228 {
229 	return (lockinfo.thread_set_flag(mask));
230 }
231 
232 static __inline void
233 thread_mask_clear(int mask)
234 {
235 	lockinfo.thread_clr_flag(mask);
236 }
237 
238 #define	RTLD_LOCK_CNT	3
239 static struct rtld_lock {
240 	void	*handle;
241 	int	 mask;
242 } rtld_locks[RTLD_LOCK_CNT];
243 
244 rtld_lock_t	rtld_bind_lock = &rtld_locks[0];
245 rtld_lock_t	rtld_libc_lock = &rtld_locks[1];
246 rtld_lock_t	rtld_phdr_lock = &rtld_locks[2];
247 
248 void
249 rlock_acquire(rtld_lock_t lock, RtldLockState *lockstate)
250 {
251 
252 	if (lockstate == NULL)
253 		return;
254 
255 	if (thread_mask_set(lock->mask) & lock->mask) {
256 		dbg("rlock_acquire: recursed");
257 		lockstate->lockstate = RTLD_LOCK_UNLOCKED;
258 		return;
259 	}
260 	lockinfo.rlock_acquire(lock->handle);
261 	lockstate->lockstate = RTLD_LOCK_RLOCKED;
262 }
263 
264 void
265 wlock_acquire(rtld_lock_t lock, RtldLockState *lockstate)
266 {
267 
268 	if (lockstate == NULL)
269 		return;
270 
271 	if (thread_mask_set(lock->mask) & lock->mask) {
272 		dbg("wlock_acquire: recursed");
273 		lockstate->lockstate = RTLD_LOCK_UNLOCKED;
274 		return;
275 	}
276 	lockinfo.wlock_acquire(lock->handle);
277 	lockstate->lockstate = RTLD_LOCK_WLOCKED;
278 }
279 
280 void
281 lock_release(rtld_lock_t lock, RtldLockState *lockstate)
282 {
283 
284 	if (lockstate == NULL)
285 		return;
286 
287 	switch (lockstate->lockstate) {
288 	case RTLD_LOCK_UNLOCKED:
289 		break;
290 	case RTLD_LOCK_RLOCKED:
291 	case RTLD_LOCK_WLOCKED:
292 		thread_mask_clear(lock->mask);
293 		lockinfo.lock_release(lock->handle);
294 		break;
295 	default:
296 		assert(0);
297 	}
298 }
299 
300 void
301 lock_upgrade(rtld_lock_t lock, RtldLockState *lockstate)
302 {
303 
304 	if (lockstate == NULL)
305 		return;
306 
307 	lock_release(lock, lockstate);
308 	wlock_acquire(lock, lockstate);
309 }
310 
311 void
312 lock_restart_for_upgrade(RtldLockState *lockstate)
313 {
314 
315 	if (lockstate == NULL)
316 		return;
317 
318 	switch (lockstate->lockstate) {
319 	case RTLD_LOCK_UNLOCKED:
320 	case RTLD_LOCK_WLOCKED:
321 		break;
322 	case RTLD_LOCK_RLOCKED:
323 		siglongjmp(lockstate->env, 1);
324 		break;
325 	default:
326 		assert(0);
327 	}
328 }
329 
330 void
331 dlerror_dflt_init(void)
332 {
333 	lockinfo.dlerror_loc = def_dlerror_loc;
334 	lockinfo.dlerror_loc_sz = sizeof(def_dlerror_msg);
335 	lockinfo.dlerror_seen = def_dlerror_seen;
336 }
337 
338 void
339 lockdflt_init(void)
340 {
341 	int i;
342 
343 	deflockinfo.rtli_version = RTLI_VERSION;
344 	deflockinfo.lock_create = def_lock_create;
345 	deflockinfo.lock_destroy = def_lock_destroy;
346 	deflockinfo.rlock_acquire = def_rlock_acquire;
347 	deflockinfo.wlock_acquire = def_wlock_acquire;
348 	deflockinfo.lock_release = def_lock_release;
349 	deflockinfo.thread_set_flag = def_thread_set_flag;
350 	deflockinfo.thread_clr_flag = def_thread_clr_flag;
351 	deflockinfo.at_fork = NULL;
352 	deflockinfo.dlerror_loc = def_dlerror_loc;
353 	deflockinfo.dlerror_loc_sz = sizeof(def_dlerror_msg);
354 	deflockinfo.dlerror_seen = def_dlerror_seen;
355 
356 	for (i = 0; i < RTLD_LOCK_CNT; i++) {
357 		rtld_locks[i].mask   = (1 << i);
358 		rtld_locks[i].handle = NULL;
359 	}
360 
361 	memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo));
362 	_rtld_thread_init(NULL);
363 	if (ld_fast_sigblock) {
364 		__sys_sigfastblock(SIGFASTBLOCK_SETPTR, &fsigblock);
365 	} else {
366 		/*
367 		 * Construct a mask to block all signals.  Note that
368 		 * blocked traps mean that the process is terminated
369 		 * if trap occurs while we are in locked section, with
370 		 * the default settings for kern.forcesigexit.
371 		 */
372 		sigfillset(&fullsigmask);
373 	}
374 }
375 
376 /*
377  * Callback function to allow threads implementation to
378  * register their own locking primitives if the default
379  * one is not suitable.
380  * The current context should be the only context
381  * executing at the invocation time.
382  */
383 void
384 _rtld_thread_init(struct RtldLockInfo *pli)
385 {
386 	const Obj_Entry *obj;
387 	SymLook req;
388 	void *locks[RTLD_LOCK_CNT];
389 	int flags, i, res;
390 
391 	if (pli == NULL) {
392 		lockinfo.rtli_version = RTLI_VERSION;
393 	} else {
394 		lockinfo.rtli_version = RTLI_VERSION_ONE;
395 		obj = obj_from_addr(pli->lock_create);
396 		if (obj != NULL) {
397 			symlook_init(&req, "_pli_rtli_version");
398 			res = symlook_obj(&req, obj);
399 			if (res == 0)
400 				lockinfo.rtli_version = pli->rtli_version;
401 		}
402 	}
403 
404 	/* disable all locking while this function is running */
405 	flags =	thread_mask_set(~0);
406 
407 	if (pli == NULL)
408 		pli = &deflockinfo;
409 	else if (ld_fast_sigblock) {
410 		fsigblock = 0;
411 		__sys_sigfastblock(SIGFASTBLOCK_UNSETPTR, NULL);
412 	}
413 
414 	for (i = 0; i < RTLD_LOCK_CNT; i++)
415 		if ((locks[i] = pli->lock_create()) == NULL)
416 			break;
417 
418 	if (i < RTLD_LOCK_CNT) {
419 		while (--i >= 0)
420 			pli->lock_destroy(locks[i]);
421 		abort();
422 	}
423 
424 	for (i = 0; i < RTLD_LOCK_CNT; i++) {
425 		if (rtld_locks[i].handle == NULL)
426 			continue;
427 		if (flags & rtld_locks[i].mask)
428 			lockinfo.lock_release(rtld_locks[i].handle);
429 		lockinfo.lock_destroy(rtld_locks[i].handle);
430 	}
431 
432 	for (i = 0; i < RTLD_LOCK_CNT; i++) {
433 		rtld_locks[i].handle = locks[i];
434 		if (flags & rtld_locks[i].mask)
435 			pli->wlock_acquire(rtld_locks[i].handle);
436 	}
437 
438 	lockinfo.lock_create = pli->lock_create;
439 	lockinfo.lock_destroy = pli->lock_destroy;
440 	lockinfo.rlock_acquire = pli->rlock_acquire;
441 	lockinfo.wlock_acquire = pli->wlock_acquire;
442 	lockinfo.lock_release  = pli->lock_release;
443 	lockinfo.thread_set_flag = pli->thread_set_flag;
444 	lockinfo.thread_clr_flag = pli->thread_clr_flag;
445 	lockinfo.at_fork = pli->at_fork;
446 	if (lockinfo.rtli_version > RTLI_VERSION_ONE && pli != NULL) {
447 		strlcpy(pli->dlerror_loc(), lockinfo.dlerror_loc(),
448 		    lockinfo.dlerror_loc_sz);
449 		lockinfo.dlerror_loc = pli->dlerror_loc;
450 		lockinfo.dlerror_loc_sz = pli->dlerror_loc_sz;
451 		lockinfo.dlerror_seen = pli->dlerror_seen;
452 	}
453 
454 	/* restore thread locking state, this time with new locks */
455 	thread_mask_clear(~0);
456 	thread_mask_set(flags);
457 	dbg("_rtld_thread_init: done");
458 }
459 
460 void
461 _rtld_atfork_pre(int *locks)
462 {
463 	RtldLockState ls[2];
464 
465 	if (locks == NULL)
466 		return;
467 
468 	/*
469 	 * Warning: this did not worked well with the rtld compat
470 	 * locks above, when the thread signal mask was corrupted (set
471 	 * to all signals blocked) if two locks were taken
472 	 * simultaneously in the write mode.  The caller of the
473 	 * _rtld_atfork_pre() must provide the working implementation
474 	 * of the locks anyway, and libthr locks are fine.
475 	 */
476 	wlock_acquire(rtld_phdr_lock, &ls[0]);
477 	wlock_acquire(rtld_bind_lock, &ls[1]);
478 
479 	/* XXXKIB: I am really sorry for this. */
480 	locks[0] = ls[1].lockstate;
481 	locks[2] = ls[0].lockstate;
482 }
483 
484 void
485 _rtld_atfork_post(int *locks)
486 {
487 	RtldLockState ls[2];
488 
489 	if (locks == NULL)
490 		return;
491 
492 	bzero(ls, sizeof(ls));
493 	ls[0].lockstate = locks[2];
494 	ls[1].lockstate = locks[0];
495 	lock_release(rtld_bind_lock, &ls[1]);
496 	lock_release(rtld_phdr_lock, &ls[0]);
497 }
498