1 /*- 2 * Copyright 1999, 2000 John D. Polstra. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * 25 * from: FreeBSD: src/libexec/rtld-elf/sparc64/lockdflt.c,v 1.3 2002/10/09 26 * $FreeBSD$ 27 */ 28 29 /* 30 * Thread locking implementation for the dynamic linker. 31 * 32 * We use the "simple, non-scalable reader-preference lock" from: 33 * 34 * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer 35 * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on 36 * Principles and Practice of Parallel Programming, April 1991. 37 * 38 * In this algorithm the lock is a single word. Its low-order bit is 39 * set when a writer holds the lock. The remaining high-order bits 40 * contain a count of readers desiring the lock. The algorithm requires 41 * atomic "compare_and_store" and "add" operations, which we take 42 * from machine/atomic.h. 43 */ 44 45 #include <sys/param.h> 46 #include <signal.h> 47 #include <stdlib.h> 48 #include <time.h> 49 50 #include "debug.h" 51 #include "rtld.h" 52 #include "rtld_machdep.h" 53 54 void _rtld_thread_init(struct RtldLockInfo *) __exported; 55 void _rtld_atfork_pre(int *) __exported; 56 void _rtld_atfork_post(int *) __exported; 57 58 #define WAFLAG 0x1 /* A writer holds the lock */ 59 #define RC_INCR 0x2 /* Adjusts count of readers desiring lock */ 60 61 typedef struct Struct_Lock { 62 volatile u_int lock; 63 void *base; 64 } Lock; 65 66 static sigset_t fullsigmask, oldsigmask; 67 static int thread_flag, wnested; 68 69 static void * 70 def_lock_create(void) 71 { 72 void *base; 73 char *p; 74 uintptr_t r; 75 Lock *l; 76 77 /* 78 * Arrange for the lock to occupy its own cache line. First, we 79 * optimistically allocate just a cache line, hoping that malloc 80 * will give us a well-aligned block of memory. If that doesn't 81 * work, we allocate a larger block and take a well-aligned cache 82 * line from it. 83 */ 84 base = xmalloc(CACHE_LINE_SIZE); 85 p = (char *)base; 86 if ((uintptr_t)p % CACHE_LINE_SIZE != 0) { 87 free(base); 88 base = xmalloc(2 * CACHE_LINE_SIZE); 89 p = (char *)base; 90 if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0) 91 p += CACHE_LINE_SIZE - r; 92 } 93 l = (Lock *)p; 94 l->base = base; 95 l->lock = 0; 96 return l; 97 } 98 99 static void 100 def_lock_destroy(void *lock) 101 { 102 Lock *l = (Lock *)lock; 103 104 free(l->base); 105 } 106 107 static void 108 def_rlock_acquire(void *lock) 109 { 110 Lock *l = (Lock *)lock; 111 112 atomic_add_acq_int(&l->lock, RC_INCR); 113 while (l->lock & WAFLAG) 114 ; /* Spin */ 115 } 116 117 static void 118 def_wlock_acquire(void *lock) 119 { 120 Lock *l; 121 sigset_t tmp_oldsigmask; 122 123 l = (Lock *)lock; 124 for (;;) { 125 sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask); 126 if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG)) 127 break; 128 sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL); 129 } 130 if (atomic_fetchadd_int(&wnested, 1) == 0) 131 oldsigmask = tmp_oldsigmask; 132 } 133 134 static void 135 def_lock_release(void *lock) 136 { 137 Lock *l; 138 139 l = (Lock *)lock; 140 if ((l->lock & WAFLAG) == 0) 141 atomic_add_rel_int(&l->lock, -RC_INCR); 142 else { 143 assert(wnested > 0); 144 atomic_add_rel_int(&l->lock, -WAFLAG); 145 if (atomic_fetchadd_int(&wnested, -1) == 1) 146 sigprocmask(SIG_SETMASK, &oldsigmask, NULL); 147 } 148 } 149 150 static int 151 def_thread_set_flag(int mask) 152 { 153 int old_val = thread_flag; 154 thread_flag |= mask; 155 return (old_val); 156 } 157 158 static int 159 def_thread_clr_flag(int mask) 160 { 161 int old_val = thread_flag; 162 thread_flag &= ~mask; 163 return (old_val); 164 } 165 166 /* 167 * Public interface exposed to the rest of the dynamic linker. 168 */ 169 static struct RtldLockInfo lockinfo; 170 static struct RtldLockInfo deflockinfo; 171 172 static __inline int 173 thread_mask_set(int mask) 174 { 175 return lockinfo.thread_set_flag(mask); 176 } 177 178 static __inline void 179 thread_mask_clear(int mask) 180 { 181 lockinfo.thread_clr_flag(mask); 182 } 183 184 #define RTLD_LOCK_CNT 3 185 struct rtld_lock { 186 void *handle; 187 int mask; 188 } rtld_locks[RTLD_LOCK_CNT]; 189 190 rtld_lock_t rtld_bind_lock = &rtld_locks[0]; 191 rtld_lock_t rtld_libc_lock = &rtld_locks[1]; 192 rtld_lock_t rtld_phdr_lock = &rtld_locks[2]; 193 194 void 195 rlock_acquire(rtld_lock_t lock, RtldLockState *lockstate) 196 { 197 198 if (lockstate == NULL) 199 return; 200 201 if (thread_mask_set(lock->mask) & lock->mask) { 202 dbg("rlock_acquire: recursed"); 203 lockstate->lockstate = RTLD_LOCK_UNLOCKED; 204 return; 205 } 206 lockinfo.rlock_acquire(lock->handle); 207 lockstate->lockstate = RTLD_LOCK_RLOCKED; 208 } 209 210 void 211 wlock_acquire(rtld_lock_t lock, RtldLockState *lockstate) 212 { 213 214 if (lockstate == NULL) 215 return; 216 217 if (thread_mask_set(lock->mask) & lock->mask) { 218 dbg("wlock_acquire: recursed"); 219 lockstate->lockstate = RTLD_LOCK_UNLOCKED; 220 return; 221 } 222 lockinfo.wlock_acquire(lock->handle); 223 lockstate->lockstate = RTLD_LOCK_WLOCKED; 224 } 225 226 void 227 lock_release(rtld_lock_t lock, RtldLockState *lockstate) 228 { 229 230 if (lockstate == NULL) 231 return; 232 233 switch (lockstate->lockstate) { 234 case RTLD_LOCK_UNLOCKED: 235 break; 236 case RTLD_LOCK_RLOCKED: 237 case RTLD_LOCK_WLOCKED: 238 thread_mask_clear(lock->mask); 239 lockinfo.lock_release(lock->handle); 240 break; 241 default: 242 assert(0); 243 } 244 } 245 246 void 247 lock_upgrade(rtld_lock_t lock, RtldLockState *lockstate) 248 { 249 250 if (lockstate == NULL) 251 return; 252 253 lock_release(lock, lockstate); 254 wlock_acquire(lock, lockstate); 255 } 256 257 void 258 lock_restart_for_upgrade(RtldLockState *lockstate) 259 { 260 261 if (lockstate == NULL) 262 return; 263 264 switch (lockstate->lockstate) { 265 case RTLD_LOCK_UNLOCKED: 266 case RTLD_LOCK_WLOCKED: 267 break; 268 case RTLD_LOCK_RLOCKED: 269 siglongjmp(lockstate->env, 1); 270 break; 271 default: 272 assert(0); 273 } 274 } 275 276 void 277 lockdflt_init(void) 278 { 279 int i; 280 281 deflockinfo.rtli_version = RTLI_VERSION; 282 deflockinfo.lock_create = def_lock_create; 283 deflockinfo.lock_destroy = def_lock_destroy; 284 deflockinfo.rlock_acquire = def_rlock_acquire; 285 deflockinfo.wlock_acquire = def_wlock_acquire; 286 deflockinfo.lock_release = def_lock_release; 287 deflockinfo.thread_set_flag = def_thread_set_flag; 288 deflockinfo.thread_clr_flag = def_thread_clr_flag; 289 deflockinfo.at_fork = NULL; 290 291 for (i = 0; i < RTLD_LOCK_CNT; i++) { 292 rtld_locks[i].mask = (1 << i); 293 rtld_locks[i].handle = NULL; 294 } 295 296 memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo)); 297 _rtld_thread_init(NULL); 298 /* 299 * Construct a mask to block all signals except traps which might 300 * conceivably be generated within the dynamic linker itself. 301 */ 302 sigfillset(&fullsigmask); 303 sigdelset(&fullsigmask, SIGILL); 304 sigdelset(&fullsigmask, SIGTRAP); 305 sigdelset(&fullsigmask, SIGABRT); 306 sigdelset(&fullsigmask, SIGEMT); 307 sigdelset(&fullsigmask, SIGFPE); 308 sigdelset(&fullsigmask, SIGBUS); 309 sigdelset(&fullsigmask, SIGSEGV); 310 sigdelset(&fullsigmask, SIGSYS); 311 } 312 313 /* 314 * Callback function to allow threads implementation to 315 * register their own locking primitives if the default 316 * one is not suitable. 317 * The current context should be the only context 318 * executing at the invocation time. 319 */ 320 void 321 _rtld_thread_init(struct RtldLockInfo *pli) 322 { 323 int flags, i; 324 void *locks[RTLD_LOCK_CNT]; 325 326 /* disable all locking while this function is running */ 327 flags = thread_mask_set(~0); 328 329 if (pli == NULL) 330 pli = &deflockinfo; 331 332 333 for (i = 0; i < RTLD_LOCK_CNT; i++) 334 if ((locks[i] = pli->lock_create()) == NULL) 335 break; 336 337 if (i < RTLD_LOCK_CNT) { 338 while (--i >= 0) 339 pli->lock_destroy(locks[i]); 340 abort(); 341 } 342 343 for (i = 0; i < RTLD_LOCK_CNT; i++) { 344 if (rtld_locks[i].handle == NULL) 345 continue; 346 if (flags & rtld_locks[i].mask) 347 lockinfo.lock_release(rtld_locks[i].handle); 348 lockinfo.lock_destroy(rtld_locks[i].handle); 349 } 350 351 for (i = 0; i < RTLD_LOCK_CNT; i++) { 352 rtld_locks[i].handle = locks[i]; 353 if (flags & rtld_locks[i].mask) 354 pli->wlock_acquire(rtld_locks[i].handle); 355 } 356 357 lockinfo.lock_create = pli->lock_create; 358 lockinfo.lock_destroy = pli->lock_destroy; 359 lockinfo.rlock_acquire = pli->rlock_acquire; 360 lockinfo.wlock_acquire = pli->wlock_acquire; 361 lockinfo.lock_release = pli->lock_release; 362 lockinfo.thread_set_flag = pli->thread_set_flag; 363 lockinfo.thread_clr_flag = pli->thread_clr_flag; 364 lockinfo.at_fork = pli->at_fork; 365 366 /* restore thread locking state, this time with new locks */ 367 thread_mask_clear(~0); 368 thread_mask_set(flags); 369 dbg("_rtld_thread_init: done"); 370 } 371 372 void 373 _rtld_atfork_pre(int *locks) 374 { 375 RtldLockState ls[2]; 376 377 if (locks == NULL) 378 return; 379 380 /* 381 * Warning: this did not worked well with the rtld compat 382 * locks above, when the thread signal mask was corrupted (set 383 * to all signals blocked) if two locks were taken 384 * simultaneously in the write mode. The caller of the 385 * _rtld_atfork_pre() must provide the working implementation 386 * of the locks anyway, and libthr locks are fine. 387 */ 388 wlock_acquire(rtld_phdr_lock, &ls[0]); 389 wlock_acquire(rtld_bind_lock, &ls[1]); 390 391 /* XXXKIB: I am really sorry for this. */ 392 locks[0] = ls[1].lockstate; 393 locks[2] = ls[0].lockstate; 394 } 395 396 void 397 _rtld_atfork_post(int *locks) 398 { 399 RtldLockState ls[2]; 400 401 if (locks == NULL) 402 return; 403 404 bzero(ls, sizeof(ls)); 405 ls[0].lockstate = locks[2]; 406 ls[1].lockstate = locks[0]; 407 lock_release(rtld_bind_lock, &ls[1]); 408 lock_release(rtld_phdr_lock, &ls[0]); 409 } 410