1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* 27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/cred.h> 38 #include <sys/user.h> 39 #include <sys/errno.h> 40 #include <sys/file.h> 41 #include <sys/proc.h> 42 #include <sys/prsystm.h> 43 #include <sys/kmem.h> 44 #include <sys/sobject.h> 45 #include <sys/fault.h> 46 #include <sys/procfs.h> 47 #include <sys/watchpoint.h> 48 #include <sys/time.h> 49 #include <sys/cmn_err.h> 50 #include <sys/machlock.h> 51 #include <sys/debug.h> 52 #include <sys/synch.h> 53 #include <sys/synch32.h> 54 #include <sys/mman.h> 55 #include <sys/class.h> 56 #include <sys/schedctl.h> 57 #include <sys/sleepq.h> 58 #include <sys/policy.h> 59 #include <sys/tnf_probe.h> 60 #include <sys/lwpchan_impl.h> 61 #include <sys/turnstile.h> 62 #include <sys/atomic.h> 63 #include <sys/lwp_timer_impl.h> 64 #include <sys/lwp_upimutex_impl.h> 65 #include <vm/as.h> 66 #include <sys/sdt.h> 67 68 static kthread_t *lwpsobj_owner(caddr_t); 69 static void lwp_unsleep(kthread_t *t); 70 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 71 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 72 73 extern int lwp_cond_signal(lwp_cond_t *cv); 74 75 /* 76 * Maximum number of user prio inheritance locks that can be held by a thread. 77 * Used to limit kmem for each thread. This is a per-thread limit that 78 * can be administered on a system wide basis (using /etc/system). 79 * 80 * Also, when a limit, say maxlwps is added for numbers of lwps within a 81 * process, the per-thread limit automatically becomes a process-wide limit 82 * of maximum number of held upi locks within a process: 83 * maxheldupimx = maxnestupimx * maxlwps; 84 */ 85 static uint32_t maxnestupimx = 2000; 86 87 /* 88 * The sobj_ops vector exports a set of functions needed when a thread 89 * is asleep on a synchronization object of this type. 90 */ 91 static sobj_ops_t lwp_sobj_ops = { 92 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 93 }; 94 95 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 96 97 static sobj_ops_t lwp_sobj_pi_ops = { 98 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 99 turnstile_change_pri 100 }; 101 102 static sleepq_head_t lwpsleepq[NSLEEPQ]; 103 upib_t upimutextab[UPIMUTEX_TABSIZE]; 104 105 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 106 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 107 108 /* 109 * We know that both lc_wchan and lc_wchan0 are addresses that most 110 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 111 * 'pool' is either 0 or 1. 112 */ 113 #define LWPCHAN_LOCK_HASH(X, pool) \ 114 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 115 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 116 117 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 118 119 /* 120 * Is this a POSIX threads user-level lock requiring priority inheritance? 121 */ 122 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 123 124 static sleepq_head_t * 125 lwpsqhash(lwpchan_t *lwpchan) 126 { 127 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 128 return (&lwpsleepq[SQHASHINDEX(x)]); 129 } 130 131 /* 132 * Lock an lwpchan. 133 * Keep this in sync with lwpchan_unlock(), below. 134 */ 135 static void 136 lwpchan_lock(lwpchan_t *lwpchan, int pool) 137 { 138 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 139 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 140 } 141 142 /* 143 * Unlock an lwpchan. 144 * Keep this in sync with lwpchan_lock(), above. 145 */ 146 static void 147 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 148 { 149 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 150 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 151 } 152 153 /* 154 * Delete mappings from the lwpchan cache for pages that are being 155 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 156 * all mappings within the range are deleted from the lwpchan cache. 157 */ 158 void 159 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 160 { 161 lwpchan_data_t *lcp; 162 lwpchan_hashbucket_t *hashbucket; 163 lwpchan_hashbucket_t *endbucket; 164 lwpchan_entry_t *ent; 165 lwpchan_entry_t **prev; 166 caddr_t addr; 167 168 mutex_enter(&p->p_lcp_lock); 169 lcp = p->p_lcp; 170 hashbucket = lcp->lwpchan_cache; 171 endbucket = hashbucket + lcp->lwpchan_size; 172 for (; hashbucket < endbucket; hashbucket++) { 173 if (hashbucket->lwpchan_chain == NULL) 174 continue; 175 mutex_enter(&hashbucket->lwpchan_lock); 176 prev = &hashbucket->lwpchan_chain; 177 /* check entire chain */ 178 while ((ent = *prev) != NULL) { 179 addr = ent->lwpchan_addr; 180 if (start <= addr && addr < end) { 181 *prev = ent->lwpchan_next; 182 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 183 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 184 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 185 kmem_free(ent, sizeof (*ent)); 186 atomic_add_32(&lcp->lwpchan_entries, -1); 187 } else { 188 prev = &ent->lwpchan_next; 189 } 190 } 191 mutex_exit(&hashbucket->lwpchan_lock); 192 } 193 mutex_exit(&p->p_lcp_lock); 194 } 195 196 /* 197 * Given an lwpchan cache pointer and a process virtual address, 198 * return a pointer to the corresponding lwpchan hash bucket. 199 */ 200 static lwpchan_hashbucket_t * 201 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 202 { 203 uint_t i; 204 205 /* 206 * All user-level sync object addresses are 8-byte aligned. 207 * Ignore the lowest 3 bits of the address and use the 208 * higher-order 2*lwpchan_bits bits for the hash index. 209 */ 210 addr >>= 3; 211 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 212 return (lcp->lwpchan_cache + i); 213 } 214 215 /* 216 * (Re)allocate the per-process lwpchan cache. 217 */ 218 static void 219 lwpchan_alloc_cache(proc_t *p, uint_t bits) 220 { 221 lwpchan_data_t *lcp; 222 lwpchan_data_t *old_lcp; 223 lwpchan_hashbucket_t *hashbucket; 224 lwpchan_hashbucket_t *endbucket; 225 lwpchan_hashbucket_t *newbucket; 226 lwpchan_entry_t *ent; 227 lwpchan_entry_t *next; 228 uint_t count; 229 230 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 231 232 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 233 lcp->lwpchan_bits = bits; 234 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 235 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 236 lcp->lwpchan_entries = 0; 237 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 238 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 239 lcp->lwpchan_next_data = NULL; 240 241 mutex_enter(&p->p_lcp_lock); 242 if ((old_lcp = p->p_lcp) != NULL) { 243 if (old_lcp->lwpchan_bits >= bits) { 244 /* someone beat us to it */ 245 mutex_exit(&p->p_lcp_lock); 246 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 247 sizeof (lwpchan_hashbucket_t)); 248 kmem_free(lcp, sizeof (lwpchan_data_t)); 249 return; 250 } 251 /* 252 * Acquire all of the old hash table locks. 253 */ 254 hashbucket = old_lcp->lwpchan_cache; 255 endbucket = hashbucket + old_lcp->lwpchan_size; 256 for (; hashbucket < endbucket; hashbucket++) 257 mutex_enter(&hashbucket->lwpchan_lock); 258 /* 259 * Move all of the old hash table entries to the 260 * new hash table. The new hash table has not yet 261 * been installed so we don't need any of its locks. 262 */ 263 count = 0; 264 hashbucket = old_lcp->lwpchan_cache; 265 for (; hashbucket < endbucket; hashbucket++) { 266 ent = hashbucket->lwpchan_chain; 267 while (ent != NULL) { 268 next = ent->lwpchan_next; 269 newbucket = lwpchan_bucket(lcp, 270 (uintptr_t)ent->lwpchan_addr); 271 ent->lwpchan_next = newbucket->lwpchan_chain; 272 newbucket->lwpchan_chain = ent; 273 ent = next; 274 count++; 275 } 276 hashbucket->lwpchan_chain = NULL; 277 } 278 lcp->lwpchan_entries = count; 279 } 280 281 /* 282 * Retire the old hash table. We can't actually kmem_free() it 283 * now because someone may still have a pointer to it. Instead, 284 * we link it onto the new hash table's list of retired hash tables. 285 * The new hash table is double the size of the previous one, so 286 * the total size of all retired hash tables is less than the size 287 * of the new one. exit() and exec() free the retired hash tables 288 * (see lwpchan_destroy_cache(), below). 289 */ 290 lcp->lwpchan_next_data = old_lcp; 291 292 /* 293 * As soon as we store the new lcp, future locking operations will 294 * use it. Therefore, we must ensure that all the state we've just 295 * established reaches global visibility before the new lcp does. 296 */ 297 membar_producer(); 298 p->p_lcp = lcp; 299 300 if (old_lcp != NULL) { 301 /* 302 * Release all of the old hash table locks. 303 */ 304 hashbucket = old_lcp->lwpchan_cache; 305 for (; hashbucket < endbucket; hashbucket++) 306 mutex_exit(&hashbucket->lwpchan_lock); 307 } 308 mutex_exit(&p->p_lcp_lock); 309 } 310 311 /* 312 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 313 * Called when the process exits or execs. All lwps except one have 314 * exited so we need no locks here. 315 */ 316 void 317 lwpchan_destroy_cache(int exec) 318 { 319 proc_t *p = curproc; 320 lwpchan_hashbucket_t *hashbucket; 321 lwpchan_hashbucket_t *endbucket; 322 lwpchan_data_t *lcp; 323 lwpchan_entry_t *ent; 324 lwpchan_entry_t *next; 325 uint16_t lockflg; 326 327 lcp = p->p_lcp; 328 p->p_lcp = NULL; 329 330 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 331 hashbucket = lcp->lwpchan_cache; 332 endbucket = hashbucket + lcp->lwpchan_size; 333 for (; hashbucket < endbucket; hashbucket++) { 334 ent = hashbucket->lwpchan_chain; 335 hashbucket->lwpchan_chain = NULL; 336 while (ent != NULL) { 337 next = ent->lwpchan_next; 338 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 339 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 340 lwp_mutex_cleanup(ent, lockflg); 341 kmem_free(ent, sizeof (*ent)); 342 ent = next; 343 } 344 } 345 346 while (lcp != NULL) { 347 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 348 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 349 sizeof (lwpchan_hashbucket_t)); 350 kmem_free(lcp, sizeof (lwpchan_data_t)); 351 lcp = next_lcp; 352 } 353 } 354 355 /* 356 * Return zero when there is an entry in the lwpchan cache for the 357 * given process virtual address and non-zero when there is not. 358 * The returned non-zero value is the current length of the 359 * hash chain plus one. The caller holds the hash bucket lock. 360 */ 361 static uint_t 362 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 363 lwpchan_hashbucket_t *hashbucket) 364 { 365 lwpchan_entry_t *ent; 366 uint_t count = 1; 367 368 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 369 if (ent->lwpchan_addr == addr) { 370 if (ent->lwpchan_type != type || 371 ent->lwpchan_pool != pool) { 372 /* 373 * This shouldn't happen, but might if the 374 * process reuses its memory for different 375 * types of sync objects. We test first 376 * to avoid grabbing the memory cache line. 377 */ 378 ent->lwpchan_type = (uint16_t)type; 379 ent->lwpchan_pool = (uint16_t)pool; 380 } 381 *lwpchan = ent->lwpchan_lwpchan; 382 return (0); 383 } 384 count++; 385 } 386 return (count); 387 } 388 389 /* 390 * Return the cached lwpchan mapping if cached, otherwise insert 391 * a virtual address to lwpchan mapping into the cache. 392 */ 393 static int 394 lwpchan_get_mapping(struct as *as, caddr_t addr, 395 int type, lwpchan_t *lwpchan, int pool) 396 { 397 proc_t *p = curproc; 398 lwpchan_data_t *lcp; 399 lwpchan_hashbucket_t *hashbucket; 400 lwpchan_entry_t *ent; 401 memid_t memid; 402 uint_t count; 403 uint_t bits; 404 405 top: 406 /* initialize the lwpchan cache, if necesary */ 407 if ((lcp = p->p_lcp) == NULL) { 408 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 409 goto top; 410 } 411 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 412 mutex_enter(&hashbucket->lwpchan_lock); 413 if (lcp != p->p_lcp) { 414 /* someone resized the lwpchan cache; start over */ 415 mutex_exit(&hashbucket->lwpchan_lock); 416 goto top; 417 } 418 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 419 /* it's in the cache */ 420 mutex_exit(&hashbucket->lwpchan_lock); 421 return (1); 422 } 423 mutex_exit(&hashbucket->lwpchan_lock); 424 if (as_getmemid(as, addr, &memid) != 0) 425 return (0); 426 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 427 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 428 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 429 mutex_enter(&hashbucket->lwpchan_lock); 430 if (lcp != p->p_lcp) { 431 /* someone resized the lwpchan cache; start over */ 432 mutex_exit(&hashbucket->lwpchan_lock); 433 kmem_free(ent, sizeof (*ent)); 434 goto top; 435 } 436 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 437 if (count == 0) { 438 /* someone else added this entry to the cache */ 439 mutex_exit(&hashbucket->lwpchan_lock); 440 kmem_free(ent, sizeof (*ent)); 441 return (1); 442 } 443 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 444 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 445 /* hash chain too long; reallocate the hash table */ 446 mutex_exit(&hashbucket->lwpchan_lock); 447 kmem_free(ent, sizeof (*ent)); 448 lwpchan_alloc_cache(p, bits + 1); 449 goto top; 450 } 451 ent->lwpchan_addr = addr; 452 ent->lwpchan_type = (uint16_t)type; 453 ent->lwpchan_pool = (uint16_t)pool; 454 ent->lwpchan_lwpchan = *lwpchan; 455 ent->lwpchan_next = hashbucket->lwpchan_chain; 456 hashbucket->lwpchan_chain = ent; 457 atomic_add_32(&lcp->lwpchan_entries, 1); 458 mutex_exit(&hashbucket->lwpchan_lock); 459 return (1); 460 } 461 462 /* 463 * Return a unique pair of identifiers that corresponds to a 464 * synchronization object's virtual address. Process-shared 465 * sync objects usually get vnode/offset from as_getmemid(). 466 */ 467 static int 468 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 469 { 470 /* 471 * If the lwp synch object is defined to be process-private, 472 * we just make the first field of the lwpchan be 'as' and 473 * the second field be the synch object's virtual address. 474 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 475 * The lwpchan cache is used only for process-shared objects. 476 */ 477 if ((type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) == 0) { 478 lwpchan->lc_wchan0 = (caddr_t)as; 479 lwpchan->lc_wchan = addr; 480 return (1); 481 } 482 /* check the lwpchan cache for mapping */ 483 return (lwpchan_get_mapping(as, addr, type, lwpchan, pool)); 484 } 485 486 static void 487 lwp_block(lwpchan_t *lwpchan) 488 { 489 kthread_t *t = curthread; 490 klwp_t *lwp = ttolwp(t); 491 sleepq_head_t *sqh; 492 493 thread_lock(t); 494 t->t_flag |= T_WAKEABLE; 495 t->t_lwpchan = *lwpchan; 496 t->t_sobj_ops = &lwp_sobj_ops; 497 t->t_release = 0; 498 sqh = lwpsqhash(lwpchan); 499 disp_lock_enter_high(&sqh->sq_lock); 500 CL_SLEEP(t); 501 DTRACE_SCHED(sleep); 502 THREAD_SLEEP(t, &sqh->sq_lock); 503 sleepq_insert(&sqh->sq_queue, t); 504 thread_unlock(t); 505 lwp->lwp_asleep = 1; 506 lwp->lwp_sysabort = 0; 507 lwp->lwp_ru.nvcsw++; 508 (void) new_mstate(curthread, LMS_SLEEP); 509 } 510 511 static kthread_t * 512 lwpsobj_pi_owner(upimutex_t *up) 513 { 514 return (up->upi_owner); 515 } 516 517 static struct upimutex * 518 upi_get(upib_t *upibp, lwpchan_t *lcp) 519 { 520 struct upimutex *upip; 521 522 for (upip = upibp->upib_first; upip != NULL; 523 upip = upip->upi_nextchain) { 524 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 525 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 526 break; 527 } 528 return (upip); 529 } 530 531 static void 532 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 533 { 534 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 535 536 /* 537 * Insert upimutex at front of list. Maybe a bit unfair 538 * but assume that not many lwpchans hash to the same 539 * upimutextab bucket, i.e. the list of upimutexes from 540 * upib_first is not too long. 541 */ 542 upimutex->upi_nextchain = upibp->upib_first; 543 upibp->upib_first = upimutex; 544 } 545 546 static void 547 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 548 { 549 struct upimutex **prev; 550 551 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 552 553 prev = &upibp->upib_first; 554 while (*prev != upimutex) { 555 prev = &(*prev)->upi_nextchain; 556 } 557 *prev = upimutex->upi_nextchain; 558 upimutex->upi_nextchain = NULL; 559 } 560 561 /* 562 * Add upimutex to chain of upimutexes held by curthread. 563 * Returns number of upimutexes held by curthread. 564 */ 565 static uint32_t 566 upi_mylist_add(struct upimutex *upimutex) 567 { 568 kthread_t *t = curthread; 569 570 /* 571 * Insert upimutex at front of list of upimutexes owned by t. This 572 * would match typical LIFO order in which nested locks are acquired 573 * and released. 574 */ 575 upimutex->upi_nextowned = t->t_upimutex; 576 t->t_upimutex = upimutex; 577 t->t_nupinest++; 578 ASSERT(t->t_nupinest > 0); 579 return (t->t_nupinest); 580 } 581 582 /* 583 * Delete upimutex from list of upimutexes owned by curthread. 584 */ 585 static void 586 upi_mylist_del(struct upimutex *upimutex) 587 { 588 kthread_t *t = curthread; 589 struct upimutex **prev; 590 591 /* 592 * Since the order in which nested locks are acquired and released, 593 * is typically LIFO, and typical nesting levels are not too deep, the 594 * following should not be expensive in the general case. 595 */ 596 prev = &t->t_upimutex; 597 while (*prev != upimutex) { 598 prev = &(*prev)->upi_nextowned; 599 } 600 *prev = upimutex->upi_nextowned; 601 upimutex->upi_nextowned = NULL; 602 ASSERT(t->t_nupinest > 0); 603 t->t_nupinest--; 604 } 605 606 /* 607 * Returns true if upimutex is owned. Should be called only when upim points 608 * to kmem which cannot disappear from underneath. 609 */ 610 static int 611 upi_owned(upimutex_t *upim) 612 { 613 return (upim->upi_owner == curthread); 614 } 615 616 /* 617 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 618 */ 619 static struct upimutex * 620 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 621 { 622 lwpchan_t lwpchan; 623 upib_t *upibp; 624 struct upimutex *upimutex; 625 626 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 627 &lwpchan, LWPCHAN_MPPOOL)) 628 return (NULL); 629 630 upibp = &UPI_CHAIN(lwpchan); 631 mutex_enter(&upibp->upib_lock); 632 upimutex = upi_get(upibp, &lwpchan); 633 if (upimutex == NULL || upimutex->upi_owner != curthread) { 634 mutex_exit(&upibp->upib_lock); 635 return (NULL); 636 } 637 mutex_exit(&upibp->upib_lock); 638 return (upimutex); 639 } 640 641 /* 642 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 643 * no lock hand-off occurrs. 644 */ 645 static void 646 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 647 { 648 turnstile_t *ts; 649 upib_t *upibp; 650 kthread_t *newowner; 651 652 upi_mylist_del(upimutex); 653 upibp = upimutex->upi_upibp; 654 mutex_enter(&upibp->upib_lock); 655 if (upimutex->upi_waiter != 0) { /* if waiters */ 656 ts = turnstile_lookup(upimutex); 657 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 658 /* hand-off lock to highest prio waiter */ 659 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 660 upimutex->upi_owner = newowner; 661 if (ts->ts_waiters == 1) 662 upimutex->upi_waiter = 0; 663 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 664 mutex_exit(&upibp->upib_lock); 665 return; 666 } else if (ts != NULL) { 667 /* LOCK_NOTRECOVERABLE: wakeup all */ 668 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 669 } else { 670 /* 671 * Misleading w bit. Waiters might have been 672 * interrupted. No need to clear the w bit (upimutex 673 * will soon be freed). Re-calculate PI from existing 674 * waiters. 675 */ 676 turnstile_exit(upimutex); 677 turnstile_pi_recalc(); 678 } 679 } 680 /* 681 * no waiters, or LOCK_NOTRECOVERABLE. 682 * remove from the bucket chain of upi mutexes. 683 * de-allocate kernel memory (upimutex). 684 */ 685 upi_chain_del(upimutex->upi_upibp, upimutex); 686 mutex_exit(&upibp->upib_lock); 687 kmem_free(upimutex, sizeof (upimutex_t)); 688 } 689 690 static int 691 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 692 { 693 label_t ljb; 694 int error = 0; 695 lwpchan_t lwpchan; 696 uint16_t flag; 697 upib_t *upibp; 698 volatile struct upimutex *upimutex = NULL; 699 turnstile_t *ts; 700 uint32_t nupinest; 701 volatile int upilocked = 0; 702 703 if (on_fault(&ljb)) { 704 if (upilocked) 705 upimutex_unlock((upimutex_t *)upimutex, 0); 706 error = EFAULT; 707 goto out; 708 } 709 /* 710 * The apparent assumption made in implementing other _lwp_* synch 711 * primitives, is that get_lwpchan() does not return a unique cookie 712 * for the case where 2 processes (one forked from the other) point 713 * at the same underlying object, which is typed USYNC_PROCESS, but 714 * mapped MAP_PRIVATE, since the object has not yet been written to, 715 * in the child process. 716 * 717 * Since get_lwpchan() has been fixed, it is not necessary to do the 718 * dummy writes to force a COW fault as in other places (which should 719 * be fixed). 720 */ 721 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 722 &lwpchan, LWPCHAN_MPPOOL)) { 723 error = EFAULT; 724 goto out; 725 } 726 upibp = &UPI_CHAIN(lwpchan); 727 retry: 728 mutex_enter(&upibp->upib_lock); 729 upimutex = upi_get(upibp, &lwpchan); 730 if (upimutex == NULL) { 731 /* lock available since lwpchan has no upimutex */ 732 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 733 upi_chain_add(upibp, (upimutex_t *)upimutex); 734 upimutex->upi_owner = curthread; /* grab lock */ 735 upimutex->upi_upibp = upibp; 736 upimutex->upi_vaddr = lp; 737 upimutex->upi_lwpchan = lwpchan; 738 mutex_exit(&upibp->upib_lock); 739 nupinest = upi_mylist_add((upimutex_t *)upimutex); 740 upilocked = 1; 741 fuword16_noerr(&lp->mutex_flag, &flag); 742 if (nupinest > maxnestupimx && 743 secpolicy_resource(CRED()) != 0) { 744 upimutex_unlock((upimutex_t *)upimutex, flag); 745 error = ENOMEM; 746 goto out; 747 } 748 if (flag & LOCK_OWNERDEAD) { 749 /* 750 * Return with upimutex held. 751 */ 752 error = EOWNERDEAD; 753 } else if (flag & LOCK_NOTRECOVERABLE) { 754 /* 755 * Since the setting of LOCK_NOTRECOVERABLE 756 * was done under the high-level upi mutex, 757 * in lwp_upimutex_unlock(), this flag needs to 758 * be checked while holding the upi mutex. 759 * If set, this thread should return without 760 * the lock held, and with the right error 761 * code. 762 */ 763 upimutex_unlock((upimutex_t *)upimutex, flag); 764 upilocked = 0; 765 error = ENOTRECOVERABLE; 766 } 767 goto out; 768 } 769 /* 770 * If a upimutex object exists, it must have an owner. 771 * This is due to lock hand-off, and release of upimutex when no 772 * waiters are present at unlock time, 773 */ 774 ASSERT(upimutex->upi_owner != NULL); 775 if (upimutex->upi_owner == curthread) { 776 /* 777 * The user wrapper can check if the mutex type is 778 * ERRORCHECK: if not, it should stall at user-level. 779 * If so, it should return the error code. 780 */ 781 mutex_exit(&upibp->upib_lock); 782 error = EDEADLK; 783 goto out; 784 } 785 if (try == UPIMUTEX_TRY) { 786 mutex_exit(&upibp->upib_lock); 787 error = EBUSY; 788 goto out; 789 } 790 /* 791 * Block for the lock. 792 * Put the lwp in an orderly state for debugging. 793 * Calling prstop() has to be done here, and not in 794 * turnstile_block(), since the preceding call to 795 * turnstile_lookup() raises the PIL to a level 796 * at which calls to prstop() should not be made. 797 */ 798 if ((error = lwptp->lwpt_time_error) != 0) { 799 /* 800 * The SUSV3 Posix spec is very clear that we 801 * should get no error from validating the 802 * timer until we would actually sleep. 803 */ 804 mutex_exit(&upibp->upib_lock); 805 goto out; 806 } 807 prstop(PR_REQUESTED, 0); 808 if (lwptp->lwpt_tsp != NULL) { 809 /* 810 * If we successfully queue the timeout 811 * (lwp_timer_enqueue() returns zero), 812 * then don't drop t_delay_lock until we are 813 * on the sleep queue (in turnstile_block()). 814 * Otherwise we will get an immediate timeout 815 * when we attempt to sleep in turnstile_block(). 816 */ 817 mutex_enter(&curthread->t_delay_lock); 818 if (lwp_timer_enqueue(lwptp) != 0) 819 mutex_exit(&curthread->t_delay_lock); 820 } 821 /* 822 * Now, set the waiter bit and block for the lock in turnstile_block(). 823 * No need to preserve the previous wbit since a lock try is not 824 * attempted after setting the wait bit. Wait bit is set under 825 * the upib_lock, which is not released until the turnstile lock 826 * is acquired. Say, the upimutex is L: 827 * 828 * 1. upib_lock is held so the waiter does not have to retry L after 829 * setting the wait bit: since the owner has to grab the upib_lock 830 * to unlock L, it will certainly see the wait bit set. 831 * 2. upib_lock is not released until the turnstile lock is acquired. 832 * This is the key to preventing a missed wake-up. Otherwise, the 833 * owner could acquire the upib_lock, and the tc_lock, to call 834 * turnstile_wakeup(). All this, before the waiter gets tc_lock 835 * to sleep in turnstile_block(). turnstile_wakeup() will then not 836 * find this waiter, resulting in the missed wakeup. 837 * 3. The upib_lock, being a kernel mutex, cannot be released while 838 * holding the tc_lock (since mutex_exit() could need to acquire 839 * the same tc_lock)...and so is held when calling turnstile_block(). 840 * The address of upib_lock is passed to turnstile_block() which 841 * releases it after releasing all turnstile locks, and before going 842 * to sleep in swtch(). 843 * 4. The waiter value cannot be a count of waiters, because a waiter 844 * can be interrupted. The interrupt occurs under the tc_lock, at 845 * which point, the upib_lock cannot be locked, to decrement waiter 846 * count. So, just treat the waiter state as a bit, not a count. 847 */ 848 ts = turnstile_lookup((upimutex_t *)upimutex); 849 upimutex->upi_waiter = 1; 850 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 851 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 852 /* 853 * Hand-off implies that we wakeup holding the lock, except when: 854 * - deadlock is detected 855 * - lock is not recoverable 856 * - we got an interrupt or timeout 857 * If we wake up due to an interrupt or timeout, we may 858 * or may not be holding the lock due to mutex hand-off. 859 * Use lwp_upimutex_owned() to check if we do hold the lock. 860 */ 861 if (error != 0) { 862 if ((error == EINTR || error == ETIME) && 863 (upimutex = lwp_upimutex_owned(lp, type))) { 864 /* 865 * Unlock and return - the re-startable syscall will 866 * try the lock again if we got EINTR. 867 */ 868 (void) upi_mylist_add((upimutex_t *)upimutex); 869 upimutex_unlock((upimutex_t *)upimutex, 0); 870 } 871 /* 872 * The only other possible error is EDEADLK. If so, upimutex 873 * is valid, since its owner is deadlocked with curthread. 874 */ 875 ASSERT(error == EINTR || error == ETIME || 876 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 877 ASSERT(!lwp_upimutex_owned(lp, type)); 878 goto out; 879 } 880 if (lwp_upimutex_owned(lp, type)) { 881 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 882 nupinest = upi_mylist_add((upimutex_t *)upimutex); 883 upilocked = 1; 884 } 885 /* 886 * Now, need to read the user-level lp->mutex_flag to do the following: 887 * 888 * - if lock is held, check if EOWNERDEAD should be returned 889 * - if lock isn't held, check if ENOTRECOVERABLE should be returned 890 * 891 * Now, either lp->mutex_flag is readable or it's not. If not 892 * readable, the on_fault path will cause a return with EFAULT as 893 * it should. If it is readable, the state of the flag encodes the 894 * robustness state of the lock: 895 * 896 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD setting 897 * will influence the return code appropriately. If the upimutex is 898 * not locked here, this could be due to a spurious wake-up or a 899 * NOTRECOVERABLE event. The flag's setting can be used to distinguish 900 * between these two events. 901 */ 902 fuword16_noerr(&lp->mutex_flag, &flag); 903 if (upilocked) { 904 /* 905 * If the thread wakes up from turnstile_block with the lock 906 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 907 * since it would not have been handed-off the lock. 908 * So, no need to check for this case. 909 */ 910 if (nupinest > maxnestupimx && 911 secpolicy_resource(CRED()) != 0) { 912 upimutex_unlock((upimutex_t *)upimutex, flag); 913 upilocked = 0; 914 error = ENOMEM; 915 } else if (flag & LOCK_OWNERDEAD) { 916 error = EOWNERDEAD; 917 } 918 } else { 919 /* 920 * Wake-up without the upimutex held. Either this is a 921 * spurious wake-up (due to signals, forkall(), whatever), or 922 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 923 * of the mutex flag can be used to distinguish between the 924 * two events. 925 */ 926 if (flag & LOCK_NOTRECOVERABLE) { 927 error = ENOTRECOVERABLE; 928 } else { 929 /* 930 * Here, the flag could be set to LOCK_OWNERDEAD or 931 * not. In both cases, this is a spurious wakeup, 932 * since the upi lock is not held, but the thread 933 * has returned from turnstile_block(). 934 * 935 * The user flag could be LOCK_OWNERDEAD if, at the 936 * same time as curthread having been woken up 937 * spuriously, the owner (say Tdead) has died, marked 938 * the mutex flag accordingly, and handed off the lock 939 * to some other waiter (say Tnew). curthread just 940 * happened to read the flag while Tnew has yet to deal 941 * with the owner-dead event. 942 * 943 * In this event, curthread should retry the lock. 944 * If Tnew is able to cleanup the lock, curthread 945 * will eventually get the lock with a zero error code, 946 * If Tnew is unable to cleanup, its eventual call to 947 * unlock the lock will result in the mutex flag being 948 * set to LOCK_NOTRECOVERABLE, and the wake-up of 949 * all waiters, including curthread, which will then 950 * eventually return ENOTRECOVERABLE due to the above 951 * check. 952 * 953 * Of course, if the user-flag is not set with 954 * LOCK_OWNERDEAD, retrying is the thing to do, since 955 * this is definitely a spurious wakeup. 956 */ 957 goto retry; 958 } 959 } 960 961 out: 962 no_fault(); 963 return (error); 964 } 965 966 967 static int 968 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 969 { 970 label_t ljb; 971 int error = 0; 972 lwpchan_t lwpchan; 973 uint16_t flag; 974 upib_t *upibp; 975 volatile struct upimutex *upimutex = NULL; 976 volatile int upilocked = 0; 977 978 if (on_fault(&ljb)) { 979 if (upilocked) 980 upimutex_unlock((upimutex_t *)upimutex, 0); 981 error = EFAULT; 982 goto out; 983 } 984 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 985 &lwpchan, LWPCHAN_MPPOOL)) { 986 error = EFAULT; 987 goto out; 988 } 989 upibp = &UPI_CHAIN(lwpchan); 990 mutex_enter(&upibp->upib_lock); 991 upimutex = upi_get(upibp, &lwpchan); 992 /* 993 * If the lock is not held, or the owner is not curthread, return 994 * error. The user-level wrapper can return this error or stall, 995 * depending on whether mutex is of ERRORCHECK type or not. 996 */ 997 if (upimutex == NULL || upimutex->upi_owner != curthread) { 998 mutex_exit(&upibp->upib_lock); 999 error = EPERM; 1000 goto out; 1001 } 1002 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1003 upilocked = 1; 1004 fuword16_noerr(&lp->mutex_flag, &flag); 1005 if (flag & LOCK_OWNERDEAD) { 1006 /* 1007 * transition mutex to the LOCK_NOTRECOVERABLE state. 1008 */ 1009 flag &= ~LOCK_OWNERDEAD; 1010 flag |= LOCK_NOTRECOVERABLE; 1011 suword16_noerr(&lp->mutex_flag, flag); 1012 } 1013 upimutex_unlock((upimutex_t *)upimutex, flag); 1014 upilocked = 0; 1015 out: 1016 no_fault(); 1017 return (error); 1018 } 1019 1020 /* 1021 * Mark user mutex state, corresponding to kernel upimutex, as LOCK_OWNERDEAD. 1022 */ 1023 static int 1024 upi_dead(upimutex_t *upip) 1025 { 1026 label_t ljb; 1027 int error = 0; 1028 lwp_mutex_t *lp; 1029 uint16_t flag; 1030 1031 if (on_fault(&ljb)) { 1032 error = EFAULT; 1033 goto out; 1034 } 1035 1036 lp = upip->upi_vaddr; 1037 fuword16_noerr(&lp->mutex_flag, &flag); 1038 flag |= LOCK_OWNERDEAD; 1039 suword16_noerr(&lp->mutex_flag, flag); 1040 out: 1041 no_fault(); 1042 return (error); 1043 } 1044 1045 /* 1046 * Unlock all upimutexes held by curthread, since curthread is dying. 1047 * For each upimutex, attempt to mark its corresponding user mutex object as 1048 * dead. 1049 */ 1050 void 1051 upimutex_cleanup() 1052 { 1053 kthread_t *t = curthread; 1054 struct upimutex *upip; 1055 1056 while ((upip = t->t_upimutex) != NULL) { 1057 if (upi_dead(upip) != 0) { 1058 /* 1059 * If the user object associated with this upimutex is 1060 * unmapped, unlock upimutex with the 1061 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1062 * woken up. Since user object is unmapped, it could 1063 * not be marked as dead or notrecoverable. 1064 * The waiters will now all wake up and return 1065 * ENOTRECOVERABLE, since they would find that the lock 1066 * has not been handed-off to them. 1067 * See lwp_upimutex_lock(). 1068 */ 1069 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1070 } else { 1071 /* 1072 * The user object has been updated as dead. 1073 * Unlock the upimutex: if no waiters, upip kmem will 1074 * be freed. If there is a waiter, the lock will be 1075 * handed off. If exit() is in progress, each existing 1076 * waiter will successively get the lock, as owners 1077 * die, and each new owner will call this routine as 1078 * it dies. The last owner will free kmem, since 1079 * it will find the upimutex has no waiters. So, 1080 * eventually, the kmem is guaranteed to be freed. 1081 */ 1082 upimutex_unlock(upip, 0); 1083 } 1084 /* 1085 * Note that the call to upimutex_unlock() above will delete 1086 * upimutex from the t_upimutexes chain. And so the 1087 * while loop will eventually terminate. 1088 */ 1089 } 1090 } 1091 1092 int 1093 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1094 { 1095 kthread_t *t = curthread; 1096 klwp_t *lwp = ttolwp(t); 1097 proc_t *p = ttoproc(t); 1098 lwp_timer_t lwpt; 1099 caddr_t timedwait; 1100 int error = 0; 1101 int time_error; 1102 clock_t tim = -1; 1103 uchar_t waiters; 1104 volatile int locked = 0; 1105 volatile int watched = 0; 1106 label_t ljb; 1107 volatile uint8_t type = 0; 1108 lwpchan_t lwpchan; 1109 sleepq_head_t *sqh; 1110 static int iswanted(); 1111 uint16_t flag; 1112 int imm_timeout = 0; 1113 1114 if ((caddr_t)lp >= p->p_as->a_userlimit) 1115 return (set_errno(EFAULT)); 1116 1117 timedwait = (caddr_t)tsp; 1118 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1119 lwpt.lwpt_imm_timeout) { 1120 imm_timeout = 1; 1121 timedwait = NULL; 1122 } 1123 1124 /* 1125 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1126 * this micro state is really a run state. If the thread indeed blocks, 1127 * this state becomes valid. If not, the state is converted back to 1128 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1129 * when blocking. 1130 */ 1131 (void) new_mstate(t, LMS_USER_LOCK); 1132 if (on_fault(&ljb)) { 1133 if (locked) 1134 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1135 error = EFAULT; 1136 goto out; 1137 } 1138 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1139 if (UPIMUTEX(type)) { 1140 no_fault(); 1141 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1142 if ((error == 0 || error == EOWNERDEAD) && 1143 (type & USYNC_PROCESS)) 1144 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1145 if (tsp && !time_error) /* copyout the residual time left */ 1146 error = lwp_timer_copyout(&lwpt, error); 1147 if (error) 1148 return (set_errno(error)); 1149 return (0); 1150 } 1151 /* 1152 * Force Copy-on-write fault if lwp_mutex_t object is 1153 * defined to be MAP_PRIVATE and it was initialized to 1154 * USYNC_PROCESS. 1155 */ 1156 suword8_noerr(&lp->mutex_type, type); 1157 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1158 &lwpchan, LWPCHAN_MPPOOL)) { 1159 error = EFAULT; 1160 goto out; 1161 } 1162 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1163 locked = 1; 1164 fuword8_noerr(&lp->mutex_waiters, &waiters); 1165 suword8_noerr(&lp->mutex_waiters, 1); 1166 if (type & USYNC_PROCESS_ROBUST) { 1167 fuword16_noerr(&lp->mutex_flag, &flag); 1168 if (flag & LOCK_NOTRECOVERABLE) { 1169 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1170 error = ENOTRECOVERABLE; 1171 goto out; 1172 } 1173 } 1174 1175 /* 1176 * If watchpoints are set, they need to be restored, since 1177 * atomic accesses of memory such as the call to ulock_try() 1178 * below cannot be watched. 1179 */ 1180 1181 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1182 1183 while (!ulock_try(&lp->mutex_lockw)) { 1184 if (time_error) { 1185 /* 1186 * The SUSV3 Posix spec is very clear that we 1187 * should get no error from validating the 1188 * timer until we would actually sleep. 1189 */ 1190 error = time_error; 1191 break; 1192 } 1193 1194 if (watched) { 1195 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1196 watched = 0; 1197 } 1198 1199 /* 1200 * Put the lwp in an orderly state for debugging. 1201 */ 1202 prstop(PR_REQUESTED, 0); 1203 if (timedwait) { 1204 /* 1205 * If we successfully queue the timeout, 1206 * then don't drop t_delay_lock until 1207 * we are on the sleep queue (below). 1208 */ 1209 mutex_enter(&t->t_delay_lock); 1210 if (lwp_timer_enqueue(&lwpt) != 0) { 1211 mutex_exit(&t->t_delay_lock); 1212 imm_timeout = 1; 1213 timedwait = NULL; 1214 } 1215 } 1216 lwp_block(&lwpchan); 1217 /* 1218 * Nothing should happen to cause the lwp to go to 1219 * sleep again until after it returns from swtch(). 1220 */ 1221 if (timedwait) 1222 mutex_exit(&t->t_delay_lock); 1223 locked = 0; 1224 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1225 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1226 setrun(t); 1227 swtch(); 1228 t->t_flag &= ~T_WAKEABLE; 1229 if (timedwait) 1230 tim = lwp_timer_dequeue(&lwpt); 1231 setallwatch(); 1232 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1233 error = EINTR; 1234 else if (imm_timeout || (timedwait && tim == -1)) 1235 error = ETIME; 1236 if (error) { 1237 lwp->lwp_asleep = 0; 1238 lwp->lwp_sysabort = 0; 1239 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1240 S_WRITE); 1241 1242 /* 1243 * Need to re-compute waiters bit. The waiters field in 1244 * the lock is not reliable. Either of two things could 1245 * have occurred: no lwp may have called lwp_release() 1246 * for me but I have woken up due to a signal or 1247 * timeout. In this case, the waiter bit is incorrect 1248 * since it is still set to 1, set above. 1249 * OR an lwp_release() did occur for some other lwp on 1250 * the same lwpchan. In this case, the waiter bit is 1251 * correct. But which event occurred, one can't tell. 1252 * So, recompute. 1253 */ 1254 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1255 locked = 1; 1256 sqh = lwpsqhash(&lwpchan); 1257 disp_lock_enter(&sqh->sq_lock); 1258 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1259 disp_lock_exit(&sqh->sq_lock); 1260 break; 1261 } 1262 lwp->lwp_asleep = 0; 1263 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1264 S_WRITE); 1265 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1266 locked = 1; 1267 fuword8_noerr(&lp->mutex_waiters, &waiters); 1268 suword8_noerr(&lp->mutex_waiters, 1); 1269 if (type & USYNC_PROCESS_ROBUST) { 1270 fuword16_noerr(&lp->mutex_flag, &flag); 1271 if (flag & LOCK_NOTRECOVERABLE) { 1272 error = ENOTRECOVERABLE; 1273 break; 1274 } 1275 } 1276 } 1277 1278 if (t->t_mstate == LMS_USER_LOCK) 1279 (void) new_mstate(t, LMS_SYSTEM); 1280 1281 if (!error && (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))) { 1282 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1283 if (type & USYNC_PROCESS_ROBUST) { 1284 fuword16_noerr(&lp->mutex_flag, &flag); 1285 if (flag & LOCK_OWNERDEAD) 1286 error = EOWNERDEAD; 1287 else if (flag & LOCK_UNMAPPED) 1288 error = ELOCKUNMAPPED; 1289 } 1290 } 1291 suword8_noerr(&lp->mutex_waiters, waiters); 1292 locked = 0; 1293 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1294 out: 1295 no_fault(); 1296 if (watched) 1297 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1298 if (tsp && !time_error) /* copyout the residual time left */ 1299 error = lwp_timer_copyout(&lwpt, error); 1300 if (error) 1301 return (set_errno(error)); 1302 return (0); 1303 } 1304 1305 /* 1306 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1307 * libc now calls lwp_mutex_timedlock(lp, NULL). 1308 * This system call trap continues to exist solely for the benefit 1309 * of old statically-linked binaries from Solaris 9 and before. 1310 * It should be removed from the system when we no longer care 1311 * about such applications. 1312 */ 1313 int 1314 lwp_mutex_lock(lwp_mutex_t *lp) 1315 { 1316 return (lwp_mutex_timedlock(lp, NULL)); 1317 } 1318 1319 static int 1320 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1321 { 1322 /* 1323 * The caller holds the dispatcher lock on the sleep queue. 1324 */ 1325 while (t != NULL) { 1326 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1327 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1328 return (1); 1329 t = t->t_link; 1330 } 1331 return (0); 1332 } 1333 1334 /* 1335 * Return the highest priority thread sleeping on this lwpchan. 1336 */ 1337 static kthread_t * 1338 lwp_queue_waiter(lwpchan_t *lwpchan) 1339 { 1340 sleepq_head_t *sqh; 1341 kthread_t *tp; 1342 1343 sqh = lwpsqhash(lwpchan); 1344 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1345 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1346 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1347 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1348 break; 1349 } 1350 disp_lock_exit(&sqh->sq_lock); 1351 return (tp); 1352 } 1353 1354 static int 1355 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1356 { 1357 sleepq_head_t *sqh; 1358 kthread_t *tp; 1359 kthread_t **tpp; 1360 1361 sqh = lwpsqhash(lwpchan); 1362 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1363 tpp = &sqh->sq_queue.sq_first; 1364 while ((tp = *tpp) != NULL) { 1365 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1366 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1367 /* 1368 * The following is typically false. It could be true 1369 * only if lwp_release() is called from 1370 * lwp_mutex_wakeup() after reading the waiters field 1371 * from memory in which the lwp lock used to be, but has 1372 * since been re-used to hold a lwp cv or lwp semaphore. 1373 * The thread "tp" found to match the lwp lock's wchan 1374 * is actually sleeping for the cv or semaphore which 1375 * now has the same wchan. In this case, lwp_release() 1376 * should return failure. 1377 */ 1378 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1379 ASSERT(sync_type == 0); 1380 /* 1381 * assert that this can happen only for mutexes 1382 * i.e. sync_type == 0, for correctly written 1383 * user programs. 1384 */ 1385 disp_lock_exit(&sqh->sq_lock); 1386 return (0); 1387 } 1388 *waiters = iswanted(tp->t_link, lwpchan); 1389 sleepq_unlink(tpp, tp); 1390 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1391 tp->t_wchan0 = NULL; 1392 tp->t_wchan = NULL; 1393 tp->t_sobj_ops = NULL; 1394 tp->t_release = 1; 1395 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1396 CL_WAKEUP(tp); 1397 thread_unlock(tp); /* drop run queue lock */ 1398 return (1); 1399 } 1400 tpp = &tp->t_link; 1401 } 1402 *waiters = 0; 1403 disp_lock_exit(&sqh->sq_lock); 1404 return (0); 1405 } 1406 1407 static void 1408 lwp_release_all(lwpchan_t *lwpchan) 1409 { 1410 sleepq_head_t *sqh; 1411 kthread_t *tp; 1412 kthread_t **tpp; 1413 1414 sqh = lwpsqhash(lwpchan); 1415 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1416 tpp = &sqh->sq_queue.sq_first; 1417 while ((tp = *tpp) != NULL) { 1418 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1419 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1420 sleepq_unlink(tpp, tp); 1421 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1422 tp->t_wchan0 = NULL; 1423 tp->t_wchan = NULL; 1424 tp->t_sobj_ops = NULL; 1425 CL_WAKEUP(tp); 1426 thread_unlock_high(tp); /* release run queue lock */ 1427 } else { 1428 tpp = &tp->t_link; 1429 } 1430 } 1431 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1432 } 1433 1434 /* 1435 * unblock a lwp that is trying to acquire this mutex. the blocked 1436 * lwp resumes and retries to acquire the lock. 1437 */ 1438 int 1439 lwp_mutex_wakeup(lwp_mutex_t *lp) 1440 { 1441 proc_t *p = ttoproc(curthread); 1442 lwpchan_t lwpchan; 1443 uchar_t waiters; 1444 volatile int locked = 0; 1445 volatile int watched = 0; 1446 volatile uint8_t type = 0; 1447 label_t ljb; 1448 int error = 0; 1449 1450 if ((caddr_t)lp >= p->p_as->a_userlimit) 1451 return (set_errno(EFAULT)); 1452 1453 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1454 1455 if (on_fault(&ljb)) { 1456 if (locked) 1457 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1458 error = EFAULT; 1459 goto out; 1460 } 1461 /* 1462 * Force Copy-on-write fault if lwp_mutex_t object is 1463 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS 1464 */ 1465 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1466 suword8_noerr(&lp->mutex_type, type); 1467 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1468 &lwpchan, LWPCHAN_MPPOOL)) { 1469 error = EFAULT; 1470 goto out; 1471 } 1472 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1473 locked = 1; 1474 /* 1475 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1476 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1477 * may fail. If it fails, do not write into the waiter bit. 1478 * The call to lwp_release() might fail due to one of three reasons: 1479 * 1480 * 1. due to the thread which set the waiter bit not actually 1481 * sleeping since it got the lock on the re-try. The waiter 1482 * bit will then be correctly updated by that thread. This 1483 * window may be closed by reading the wait bit again here 1484 * and not calling lwp_release() at all if it is zero. 1485 * 2. the thread which set the waiter bit and went to sleep 1486 * was woken up by a signal. This time, the waiter recomputes 1487 * the wait bit in the return with EINTR code. 1488 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1489 * memory that has been re-used after the lock was dropped. 1490 * In this case, writing into the waiter bit would cause data 1491 * corruption. 1492 */ 1493 if (lwp_release(&lwpchan, &waiters, 0) == 1) { 1494 suword8_noerr(&lp->mutex_waiters, waiters); 1495 } 1496 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1497 out: 1498 no_fault(); 1499 if (watched) 1500 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1501 if (error) 1502 return (set_errno(error)); 1503 return (0); 1504 } 1505 1506 /* 1507 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1508 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1509 * a flag telling the kernel whether or not to honor the kernel/user 1510 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1511 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1512 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1513 * it is used an an in/out parameter. On entry, it contains the relative 1514 * time until timeout. On exit, we copyout the residual time left to it. 1515 */ 1516 int 1517 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1518 { 1519 kthread_t *t = curthread; 1520 klwp_t *lwp = ttolwp(t); 1521 proc_t *p = ttoproc(t); 1522 lwp_timer_t lwpt; 1523 lwpchan_t cv_lwpchan; 1524 lwpchan_t m_lwpchan; 1525 caddr_t timedwait; 1526 volatile uint16_t type = 0; 1527 volatile uint8_t mtype = 0; 1528 uchar_t waiters; 1529 volatile int error; 1530 clock_t tim = -1; 1531 volatile int locked = 0; 1532 volatile int m_locked = 0; 1533 volatile int cvwatched = 0; 1534 volatile int mpwatched = 0; 1535 label_t ljb; 1536 volatile int no_lwpchan = 1; 1537 int imm_timeout = 0; 1538 int imm_unpark = 0; 1539 1540 if ((caddr_t)cv >= p->p_as->a_userlimit || 1541 (caddr_t)mp >= p->p_as->a_userlimit) 1542 return (set_errno(EFAULT)); 1543 1544 timedwait = (caddr_t)tsp; 1545 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1546 return (set_errno(error)); 1547 if (lwpt.lwpt_imm_timeout) { 1548 imm_timeout = 1; 1549 timedwait = NULL; 1550 } 1551 1552 (void) new_mstate(t, LMS_USER_LOCK); 1553 1554 if (on_fault(&ljb)) { 1555 if (no_lwpchan) { 1556 error = EFAULT; 1557 goto out; 1558 } 1559 if (m_locked) { 1560 m_locked = 0; 1561 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1562 } 1563 if (locked) { 1564 locked = 0; 1565 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1566 } 1567 /* 1568 * set up another on_fault() for a possible fault 1569 * on the user lock accessed at "efault" 1570 */ 1571 if (on_fault(&ljb)) { 1572 if (m_locked) { 1573 m_locked = 0; 1574 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1575 } 1576 goto out; 1577 } 1578 error = EFAULT; 1579 goto efault; 1580 } 1581 1582 /* 1583 * Force Copy-on-write fault if lwp_cond_t and lwp_mutex_t 1584 * objects are defined to be MAP_PRIVATE, and are USYNC_PROCESS 1585 */ 1586 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1587 if (UPIMUTEX(mtype) == 0) { 1588 suword8_noerr(&mp->mutex_type, mtype); 1589 /* convert user level mutex, "mp", to a unique lwpchan */ 1590 /* check if mtype is ok to use below, instead of type from cv */ 1591 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1592 &m_lwpchan, LWPCHAN_MPPOOL)) { 1593 error = EFAULT; 1594 goto out; 1595 } 1596 } 1597 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1598 suword16_noerr(&cv->cond_type, type); 1599 /* convert user level condition variable, "cv", to a unique lwpchan */ 1600 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1601 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1602 error = EFAULT; 1603 goto out; 1604 } 1605 no_lwpchan = 0; 1606 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1607 if (UPIMUTEX(mtype) == 0) 1608 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1609 S_WRITE); 1610 1611 /* 1612 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1613 * with respect to a possible wakeup which is a result of either 1614 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1615 * 1616 * What's misleading, is that the lwp is put to sleep after the 1617 * condition variable's mutex is released. This is OK as long as 1618 * the release operation is also done while holding lwpchan_lock. 1619 * The lwp is then put to sleep when the possibility of pagefaulting 1620 * or sleeping is completely eliminated. 1621 */ 1622 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1623 locked = 1; 1624 if (UPIMUTEX(mtype) == 0) { 1625 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1626 m_locked = 1; 1627 suword8_noerr(&cv->cond_waiters_kernel, 1); 1628 /* 1629 * unlock the condition variable's mutex. (pagefaults are 1630 * possible here.) 1631 */ 1632 ulock_clear(&mp->mutex_lockw); 1633 fuword8_noerr(&mp->mutex_waiters, &waiters); 1634 if (waiters != 0) { 1635 /* 1636 * Given the locking of lwpchan_lock around the release 1637 * of the mutex and checking for waiters, the following 1638 * call to lwp_release() can fail ONLY if the lock 1639 * acquirer is interrupted after setting the waiter bit, 1640 * calling lwp_block() and releasing lwpchan_lock. 1641 * In this case, it could get pulled off the lwp sleep 1642 * q (via setrun()) before the following call to 1643 * lwp_release() occurs. In this case, the lock 1644 * requestor will update the waiter bit correctly by 1645 * re-evaluating it. 1646 */ 1647 if (lwp_release(&m_lwpchan, &waiters, 0) > 0) 1648 suword8_noerr(&mp->mutex_waiters, waiters); 1649 } 1650 m_locked = 0; 1651 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1652 } else { 1653 suword8_noerr(&cv->cond_waiters_kernel, 1); 1654 error = lwp_upimutex_unlock(mp, mtype); 1655 if (error) { /* if the upimutex unlock failed */ 1656 locked = 0; 1657 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1658 goto out; 1659 } 1660 } 1661 no_fault(); 1662 1663 if (mpwatched) { 1664 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1665 mpwatched = 0; 1666 } 1667 if (cvwatched) { 1668 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1669 cvwatched = 0; 1670 } 1671 1672 /* 1673 * Put the lwp in an orderly state for debugging. 1674 */ 1675 prstop(PR_REQUESTED, 0); 1676 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1677 /* 1678 * We received a signal at user-level before calling here 1679 * or another thread wants us to return immediately 1680 * with EINTR. See lwp_unpark(). 1681 */ 1682 imm_unpark = 1; 1683 t->t_unpark = 0; 1684 timedwait = NULL; 1685 } else if (timedwait) { 1686 /* 1687 * If we successfully queue the timeout, 1688 * then don't drop t_delay_lock until 1689 * we are on the sleep queue (below). 1690 */ 1691 mutex_enter(&t->t_delay_lock); 1692 if (lwp_timer_enqueue(&lwpt) != 0) { 1693 mutex_exit(&t->t_delay_lock); 1694 imm_timeout = 1; 1695 timedwait = NULL; 1696 } 1697 } 1698 t->t_flag |= T_WAITCVSEM; 1699 lwp_block(&cv_lwpchan); 1700 /* 1701 * Nothing should happen to cause the lwp to go to sleep 1702 * until after it returns from swtch(). 1703 */ 1704 if (timedwait) 1705 mutex_exit(&t->t_delay_lock); 1706 locked = 0; 1707 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1708 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1709 (imm_timeout | imm_unpark)) 1710 setrun(t); 1711 swtch(); 1712 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1713 if (timedwait) 1714 tim = lwp_timer_dequeue(&lwpt); 1715 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1716 MUSTRETURN(p, t) || imm_unpark) 1717 error = EINTR; 1718 else if (imm_timeout || (timedwait && tim == -1)) 1719 error = ETIME; 1720 lwp->lwp_asleep = 0; 1721 lwp->lwp_sysabort = 0; 1722 setallwatch(); 1723 1724 if (t->t_mstate == LMS_USER_LOCK) 1725 (void) new_mstate(t, LMS_SYSTEM); 1726 1727 if (tsp && check_park) /* copyout the residual time left */ 1728 error = lwp_timer_copyout(&lwpt, error); 1729 1730 /* the mutex is reacquired by the caller on return to user level */ 1731 if (error) { 1732 /* 1733 * If we were concurrently lwp_cond_signal()d and we 1734 * received a UNIX signal or got a timeout, then perform 1735 * another lwp_cond_signal() to avoid consuming the wakeup. 1736 */ 1737 if (t->t_release) 1738 (void) lwp_cond_signal(cv); 1739 return (set_errno(error)); 1740 } 1741 return (0); 1742 1743 efault: 1744 /* 1745 * make sure that the user level lock is dropped before 1746 * returning to caller, since the caller always re-acquires it. 1747 */ 1748 if (UPIMUTEX(mtype) == 0) { 1749 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1750 m_locked = 1; 1751 ulock_clear(&mp->mutex_lockw); 1752 fuword8_noerr(&mp->mutex_waiters, &waiters); 1753 if (waiters != 0) { 1754 /* 1755 * See comment above on lock clearing and lwp_release() 1756 * success/failure. 1757 */ 1758 if (lwp_release(&m_lwpchan, &waiters, 0) > 0) 1759 suword8_noerr(&mp->mutex_waiters, waiters); 1760 } 1761 m_locked = 0; 1762 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1763 } else { 1764 (void) lwp_upimutex_unlock(mp, mtype); 1765 } 1766 out: 1767 no_fault(); 1768 if (mpwatched) 1769 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1770 if (cvwatched) 1771 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1772 if (t->t_mstate == LMS_USER_LOCK) 1773 (void) new_mstate(t, LMS_SYSTEM); 1774 return (set_errno(error)); 1775 } 1776 1777 /* 1778 * wakeup one lwp that's blocked on this condition variable. 1779 */ 1780 int 1781 lwp_cond_signal(lwp_cond_t *cv) 1782 { 1783 proc_t *p = ttoproc(curthread); 1784 lwpchan_t lwpchan; 1785 uchar_t waiters; 1786 volatile uint16_t type = 0; 1787 volatile int locked = 0; 1788 volatile int watched = 0; 1789 label_t ljb; 1790 int error = 0; 1791 1792 if ((caddr_t)cv >= p->p_as->a_userlimit) 1793 return (set_errno(EFAULT)); 1794 1795 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1796 1797 if (on_fault(&ljb)) { 1798 if (locked) 1799 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1800 error = EFAULT; 1801 goto out; 1802 } 1803 /* 1804 * Force Copy-on-write fault if lwp_cond_t object is 1805 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1806 */ 1807 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1808 suword16_noerr(&cv->cond_type, type); 1809 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1810 &lwpchan, LWPCHAN_CVPOOL)) { 1811 error = EFAULT; 1812 goto out; 1813 } 1814 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1815 locked = 1; 1816 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1817 if (waiters != 0) { 1818 /* 1819 * The following call to lwp_release() might fail but it is 1820 * OK to write into the waiters bit below, since the memory 1821 * could not have been re-used or unmapped (for correctly 1822 * written user programs) as in the case of lwp_mutex_wakeup(). 1823 * For an incorrect program, we should not care about data 1824 * corruption since this is just one instance of other places 1825 * where corruption can occur for such a program. Of course 1826 * if the memory is unmapped, normal fault recovery occurs. 1827 */ 1828 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1829 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1830 } 1831 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1832 out: 1833 no_fault(); 1834 if (watched) 1835 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1836 if (error) 1837 return (set_errno(error)); 1838 return (0); 1839 } 1840 1841 /* 1842 * wakeup every lwp that's blocked on this condition variable. 1843 */ 1844 int 1845 lwp_cond_broadcast(lwp_cond_t *cv) 1846 { 1847 proc_t *p = ttoproc(curthread); 1848 lwpchan_t lwpchan; 1849 volatile uint16_t type = 0; 1850 volatile int locked = 0; 1851 volatile int watched = 0; 1852 label_t ljb; 1853 uchar_t waiters; 1854 int error = 0; 1855 1856 if ((caddr_t)cv >= p->p_as->a_userlimit) 1857 return (set_errno(EFAULT)); 1858 1859 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1860 1861 if (on_fault(&ljb)) { 1862 if (locked) 1863 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1864 error = EFAULT; 1865 goto out; 1866 } 1867 /* 1868 * Force Copy-on-write fault if lwp_cond_t object is 1869 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1870 */ 1871 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1872 suword16_noerr(&cv->cond_type, type); 1873 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1874 &lwpchan, LWPCHAN_CVPOOL)) { 1875 error = EFAULT; 1876 goto out; 1877 } 1878 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1879 locked = 1; 1880 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1881 if (waiters != 0) { 1882 lwp_release_all(&lwpchan); 1883 suword8_noerr(&cv->cond_waiters_kernel, 0); 1884 } 1885 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1886 out: 1887 no_fault(); 1888 if (watched) 1889 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1890 if (error) 1891 return (set_errno(error)); 1892 return (0); 1893 } 1894 1895 int 1896 lwp_sema_trywait(lwp_sema_t *sp) 1897 { 1898 kthread_t *t = curthread; 1899 proc_t *p = ttoproc(t); 1900 label_t ljb; 1901 volatile int locked = 0; 1902 volatile int watched = 0; 1903 volatile uint16_t type = 0; 1904 int count; 1905 lwpchan_t lwpchan; 1906 uchar_t waiters; 1907 int error = 0; 1908 1909 if ((caddr_t)sp >= p->p_as->a_userlimit) 1910 return (set_errno(EFAULT)); 1911 1912 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1913 1914 if (on_fault(&ljb)) { 1915 if (locked) 1916 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1917 error = EFAULT; 1918 goto out; 1919 } 1920 /* 1921 * Force Copy-on-write fault if lwp_sema_t object is 1922 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1923 */ 1924 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1925 suword16_noerr((void *)&sp->sema_type, type); 1926 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1927 &lwpchan, LWPCHAN_CVPOOL)) { 1928 error = EFAULT; 1929 goto out; 1930 } 1931 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1932 locked = 1; 1933 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1934 if (count == 0) 1935 error = EBUSY; 1936 else 1937 suword32_noerr((void *)&sp->sema_count, --count); 1938 if (count != 0) { 1939 fuword8_noerr(&sp->sema_waiters, &waiters); 1940 if (waiters != 0) { 1941 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1942 suword8_noerr(&sp->sema_waiters, waiters); 1943 } 1944 } 1945 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1946 out: 1947 no_fault(); 1948 if (watched) 1949 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1950 if (error) 1951 return (set_errno(error)); 1952 return (0); 1953 } 1954 1955 /* 1956 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 1957 */ 1958 int 1959 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 1960 { 1961 kthread_t *t = curthread; 1962 klwp_t *lwp = ttolwp(t); 1963 proc_t *p = ttoproc(t); 1964 lwp_timer_t lwpt; 1965 caddr_t timedwait; 1966 clock_t tim = -1; 1967 label_t ljb; 1968 volatile int locked = 0; 1969 volatile int watched = 0; 1970 volatile uint16_t type = 0; 1971 int count; 1972 lwpchan_t lwpchan; 1973 uchar_t waiters; 1974 int error = 0; 1975 int time_error; 1976 int imm_timeout = 0; 1977 int imm_unpark = 0; 1978 1979 if ((caddr_t)sp >= p->p_as->a_userlimit) 1980 return (set_errno(EFAULT)); 1981 1982 timedwait = (caddr_t)tsp; 1983 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1984 lwpt.lwpt_imm_timeout) { 1985 imm_timeout = 1; 1986 timedwait = NULL; 1987 } 1988 1989 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1990 1991 if (on_fault(&ljb)) { 1992 if (locked) 1993 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1994 error = EFAULT; 1995 goto out; 1996 } 1997 /* 1998 * Force Copy-on-write fault if lwp_sema_t object is 1999 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 2000 */ 2001 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2002 suword16_noerr((void *)&sp->sema_type, type); 2003 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2004 &lwpchan, LWPCHAN_CVPOOL)) { 2005 error = EFAULT; 2006 goto out; 2007 } 2008 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2009 locked = 1; 2010 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2011 while (error == 0 && count == 0) { 2012 if (time_error) { 2013 /* 2014 * The SUSV3 Posix spec is very clear that we 2015 * should get no error from validating the 2016 * timer until we would actually sleep. 2017 */ 2018 error = time_error; 2019 break; 2020 } 2021 suword8_noerr(&sp->sema_waiters, 1); 2022 if (watched) 2023 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2024 /* 2025 * Put the lwp in an orderly state for debugging. 2026 */ 2027 prstop(PR_REQUESTED, 0); 2028 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2029 /* 2030 * We received a signal at user-level before calling 2031 * here or another thread wants us to return 2032 * immediately with EINTR. See lwp_unpark(). 2033 */ 2034 imm_unpark = 1; 2035 t->t_unpark = 0; 2036 timedwait = NULL; 2037 } else if (timedwait) { 2038 /* 2039 * If we successfully queue the timeout, 2040 * then don't drop t_delay_lock until 2041 * we are on the sleep queue (below). 2042 */ 2043 mutex_enter(&t->t_delay_lock); 2044 if (lwp_timer_enqueue(&lwpt) != 0) { 2045 mutex_exit(&t->t_delay_lock); 2046 imm_timeout = 1; 2047 timedwait = NULL; 2048 } 2049 } 2050 t->t_flag |= T_WAITCVSEM; 2051 lwp_block(&lwpchan); 2052 /* 2053 * Nothing should happen to cause the lwp to sleep 2054 * again until after it returns from swtch(). 2055 */ 2056 if (timedwait) 2057 mutex_exit(&t->t_delay_lock); 2058 locked = 0; 2059 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2060 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2061 (imm_timeout | imm_unpark)) 2062 setrun(t); 2063 swtch(); 2064 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2065 if (timedwait) 2066 tim = lwp_timer_dequeue(&lwpt); 2067 setallwatch(); 2068 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2069 MUSTRETURN(p, t) || imm_unpark) 2070 error = EINTR; 2071 else if (imm_timeout || (timedwait && tim == -1)) 2072 error = ETIME; 2073 lwp->lwp_asleep = 0; 2074 lwp->lwp_sysabort = 0; 2075 watched = watch_disable_addr((caddr_t)sp, 2076 sizeof (*sp), S_WRITE); 2077 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2078 locked = 1; 2079 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2080 } 2081 if (error == 0) 2082 suword32_noerr((void *)&sp->sema_count, --count); 2083 if (count != 0) { 2084 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2085 suword8_noerr(&sp->sema_waiters, waiters); 2086 } 2087 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2088 out: 2089 no_fault(); 2090 if (watched) 2091 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2092 if (tsp && check_park && !time_error) 2093 error = lwp_timer_copyout(&lwpt, error); 2094 if (error) 2095 return (set_errno(error)); 2096 return (0); 2097 } 2098 2099 /* 2100 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2101 * libc now calls lwp_sema_timedwait(). 2102 * This system call trap exists solely for the benefit of old 2103 * statically linked applications from Solaris 9 and before. 2104 * It should be removed when we no longer care about such applications. 2105 */ 2106 int 2107 lwp_sema_wait(lwp_sema_t *sp) 2108 { 2109 return (lwp_sema_timedwait(sp, NULL, 0)); 2110 } 2111 2112 int 2113 lwp_sema_post(lwp_sema_t *sp) 2114 { 2115 proc_t *p = ttoproc(curthread); 2116 label_t ljb; 2117 volatile int locked = 0; 2118 volatile int watched = 0; 2119 volatile uint16_t type = 0; 2120 int count; 2121 lwpchan_t lwpchan; 2122 uchar_t waiters; 2123 int error = 0; 2124 2125 if ((caddr_t)sp >= p->p_as->a_userlimit) 2126 return (set_errno(EFAULT)); 2127 2128 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2129 2130 if (on_fault(&ljb)) { 2131 if (locked) 2132 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2133 error = EFAULT; 2134 goto out; 2135 } 2136 /* 2137 * Force Copy-on-write fault if lwp_sema_t object is 2138 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 2139 */ 2140 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2141 suword16_noerr(&sp->sema_type, type); 2142 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2143 &lwpchan, LWPCHAN_CVPOOL)) { 2144 error = EFAULT; 2145 goto out; 2146 } 2147 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2148 locked = 1; 2149 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2150 if (count == _SEM_VALUE_MAX) 2151 error = EOVERFLOW; 2152 else 2153 suword32_noerr(&sp->sema_count, ++count); 2154 if (count == 1) { 2155 fuword8_noerr(&sp->sema_waiters, &waiters); 2156 if (waiters) { 2157 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2158 suword8_noerr(&sp->sema_waiters, waiters); 2159 } 2160 } 2161 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2162 out: 2163 no_fault(); 2164 if (watched) 2165 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2166 if (error) 2167 return (set_errno(error)); 2168 return (0); 2169 } 2170 2171 #define TRW_WANT_WRITE 0x1 2172 #define TRW_LOCK_GRANTED 0x2 2173 2174 #define READ_LOCK 0 2175 #define WRITE_LOCK 1 2176 #define TRY_FLAG 0x10 2177 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2178 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2179 2180 /* 2181 * Release one writer or one or more readers. Compute the rwstate word to 2182 * reflect the new state of the queue. For a safe hand-off we copy the new 2183 * rwstate value back to userland before we wake any of the new lock holders. 2184 * 2185 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2186 * being given precedence over readers of the same priority). 2187 * 2188 * If the first thread is a reader we scan the queue releasing all readers 2189 * until we hit a writer or the end of the queue. If the first thread is a 2190 * writer we still need to check for another writer (i.e. URW_WRITE_WANTED). 2191 */ 2192 void 2193 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2194 { 2195 sleepq_head_t *sqh; 2196 kthread_t *tp; 2197 kthread_t **tpp; 2198 kthread_t *tpnext; 2199 kthread_t *wakelist = NULL; 2200 uint32_t rwstate = 0; 2201 int wcount = 0; 2202 int rcount = 0; 2203 2204 sqh = lwpsqhash(lwpchan); 2205 disp_lock_enter(&sqh->sq_lock); 2206 tpp = &sqh->sq_queue.sq_first; 2207 while ((tp = *tpp) != NULL) { 2208 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2209 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2210 if (tp->t_writer & TRW_WANT_WRITE) { 2211 if ((wcount++ == 0) && (rcount == 0)) { 2212 rwstate |= URW_WRITE_LOCKED; 2213 2214 /* Just one writer to wake. */ 2215 sleepq_unlink(tpp, tp); 2216 wakelist = tp; 2217 2218 /* tpp already set for next thread. */ 2219 continue; 2220 } else { 2221 rwstate |= 2222 (URW_WRITE_WANTED|URW_HAS_WAITERS); 2223 2224 /* We need look no further. */ 2225 break; 2226 } 2227 } else { 2228 rcount++; 2229 if (wcount == 0) { 2230 rwstate++; 2231 2232 /* Add reader to wake list. */ 2233 sleepq_unlink(tpp, tp); 2234 tp->t_link = wakelist; 2235 wakelist = tp; 2236 2237 /* tpp already set for next thread. */ 2238 continue; 2239 } else 2240 rwstate |= URW_HAS_WAITERS; 2241 } 2242 } 2243 tpp = &tp->t_link; 2244 } 2245 2246 /* Copy the new rwstate back to userland. */ 2247 suword32_noerr(&rw->rwlock_readers, rwstate); 2248 2249 /* Wake the new lock holder(s) up. */ 2250 tp = wakelist; 2251 while (tp != NULL) { 2252 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2253 tp->t_wchan0 = NULL; 2254 tp->t_wchan = NULL; 2255 tp->t_sobj_ops = NULL; 2256 tp->t_writer |= TRW_LOCK_GRANTED; 2257 tpnext = tp->t_link; 2258 tp->t_link = NULL; 2259 CL_WAKEUP(tp); 2260 thread_unlock_high(tp); 2261 tp = tpnext; 2262 } 2263 2264 disp_lock_exit(&sqh->sq_lock); 2265 } 2266 2267 /* 2268 * We enter here holding the user-level mutex, which we must release before 2269 * returning or blocking. Based on lwp_cond_wait(). 2270 */ 2271 static int 2272 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2273 { 2274 lwp_mutex_t *mp = NULL; 2275 kthread_t *t = curthread; 2276 kthread_t *tp; 2277 klwp_t *lwp = ttolwp(t); 2278 proc_t *p = ttoproc(t); 2279 lwp_timer_t lwpt; 2280 lwpchan_t lwpchan; 2281 lwpchan_t mlwpchan; 2282 caddr_t timedwait; 2283 volatile uint16_t type = 0; 2284 volatile uint8_t mtype = 0; 2285 uchar_t mwaiters; 2286 volatile int error = 0; 2287 int time_error; 2288 clock_t tim = -1; 2289 volatile int locked = 0; 2290 volatile int mlocked = 0; 2291 volatile int watched = 0; 2292 volatile int mwatched = 0; 2293 label_t ljb; 2294 volatile int no_lwpchan = 1; 2295 int imm_timeout = 0; 2296 int try_flag; 2297 uint32_t rwstate; 2298 int acquired = 0; 2299 2300 /* We only check rw because the mutex is included in it. */ 2301 if ((caddr_t)rw >= p->p_as->a_userlimit) 2302 return (set_errno(EFAULT)); 2303 2304 /* We must only report this error if we are about to sleep (later). */ 2305 timedwait = (caddr_t)tsp; 2306 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2307 lwpt.lwpt_imm_timeout) { 2308 imm_timeout = 1; 2309 timedwait = NULL; 2310 } 2311 2312 (void) new_mstate(t, LMS_USER_LOCK); 2313 2314 if (on_fault(&ljb)) { 2315 if (no_lwpchan) { 2316 error = EFAULT; 2317 goto out_nodrop; 2318 } 2319 if (mlocked) { 2320 mlocked = 0; 2321 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2322 } 2323 if (locked) { 2324 locked = 0; 2325 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2326 } 2327 /* 2328 * Set up another on_fault() for a possible fault 2329 * on the user lock accessed at "out_drop". 2330 */ 2331 if (on_fault(&ljb)) { 2332 if (mlocked) { 2333 mlocked = 0; 2334 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2335 } 2336 error = EFAULT; 2337 goto out_nodrop; 2338 } 2339 error = EFAULT; 2340 goto out_nodrop; 2341 } 2342 2343 /* Process rd_wr (including sanity check). */ 2344 try_flag = (rd_wr & TRY_FLAG); 2345 rd_wr &= ~TRY_FLAG; 2346 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2347 error = EINVAL; 2348 goto out_nodrop; 2349 } 2350 2351 /* We can only continue for simple USYNC_PROCESS locks. */ 2352 mp = &rw->mutex; 2353 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2354 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2355 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2356 error = EINVAL; 2357 goto out_nodrop; 2358 } 2359 2360 /* Force Copy-on-write fault incase objects are MAP_PRIVATE. */ 2361 suword8_noerr(&mp->mutex_type, mtype); 2362 suword16_noerr(&rw->rwlock_type, type); 2363 2364 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2365 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2366 &mlwpchan, LWPCHAN_MPPOOL)) { 2367 error = EFAULT; 2368 goto out_nodrop; 2369 } 2370 2371 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2372 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2373 &lwpchan, LWPCHAN_CVPOOL)) { 2374 error = EFAULT; 2375 goto out_nodrop; 2376 } 2377 2378 no_lwpchan = 0; 2379 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2380 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2381 2382 /* 2383 * lwpchan_lock() ensures that the calling LWP is put to sleep 2384 * atomically with respect to a possible wakeup which is a result 2385 * of lwp_rwlock_unlock(). 2386 * 2387 * What's misleading is that the LWP is put to sleep after the 2388 * rwlock's mutex is released. This is OK as long as the release 2389 * operation is also done while holding mlwpchan. The LWP is then 2390 * put to sleep when the possibility of pagefaulting or sleeping 2391 * has been completely eliminated. 2392 */ 2393 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2394 locked = 1; 2395 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2396 mlocked = 1; 2397 2398 /* 2399 * Fetch the current rwlock state. 2400 * 2401 * The possibility of spurious wake-ups or killed waiters means that 2402 * rwstate's URW_HAS_WAITERS and URW_WRITE_WANTED bits may indicate 2403 * false positives. We only fix these if they are important to us. 2404 * 2405 * Although various error states can be observed here (e.g. the lock 2406 * is not held, but there are waiters) we assume these are applicaton 2407 * errors and so we take no corrective action. 2408 */ 2409 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2410 2411 /* 2412 * If the lock is uncontended we can acquire it here. These tests 2413 * should have already been done at user-level, we just need to be 2414 * sure. 2415 */ 2416 if (rd_wr == READ_LOCK) { 2417 if ((rwstate & ~URW_READERS_MASK) == 0) { 2418 rwstate++; 2419 acquired = 1; 2420 } 2421 } else if (rwstate == 0) { 2422 rwstate = URW_WRITE_LOCKED; 2423 acquired = 1; 2424 } 2425 2426 /* 2427 * We can only try harder if the lock isn't held by a writer. 2428 */ 2429 if (!acquired && !(rwstate & URW_WRITE_LOCKED)) { 2430 tp = lwp_queue_waiter(&lwpchan); 2431 if (tp == NULL) { 2432 /* 2433 * Hmmm, rwstate indicates waiters but there are 2434 * none queued. This could just be the result of a 2435 * spurious wakeup, so let's fix it. 2436 */ 2437 rwstate &= URW_READERS_MASK; 2438 2439 /* 2440 * We now have another chance to acquire the lock 2441 * uncontended, but this is the last chance for a 2442 * writer to acquire the lock without blocking. 2443 */ 2444 if (rd_wr == READ_LOCK) { 2445 rwstate++; 2446 acquired = 1; 2447 } else if (rwstate == 0) { 2448 rwstate = URW_WRITE_LOCKED; 2449 acquired = 1; 2450 } 2451 } else if (rd_wr == READ_LOCK) { 2452 /* 2453 * This is the last chance for a reader to acquire 2454 * the lock now, but it can only do so if there is 2455 * no writer of equal or greater priority at the 2456 * head of the queue . 2457 * 2458 * It is also just possible that there is a reader 2459 * at the head of the queue. This may be the result 2460 * of a spurious wakeup or an application failure. 2461 * In this case we only acquire the lock if we have 2462 * equal or greater priority. It is not our job to 2463 * release spurious waiters. 2464 */ 2465 pri_t our_pri = DISP_PRIO(t); 2466 pri_t his_pri = DISP_PRIO(tp); 2467 2468 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2469 !(tp->t_writer & TRW_WANT_WRITE))) { 2470 rwstate++; 2471 acquired = 1; 2472 } 2473 } 2474 } 2475 2476 if (acquired || try_flag || time_error) { 2477 /* 2478 * We're not going to block this time! 2479 */ 2480 suword32_noerr(&rw->rwlock_readers, rwstate); 2481 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2482 locked = 0; 2483 2484 if (acquired) { 2485 /* 2486 * Got the lock! 2487 */ 2488 error = 0; 2489 2490 } else if (try_flag) { 2491 /* 2492 * We didn't get the lock and we're about to block. 2493 * If we're doing a trylock, return EBUSY instead. 2494 */ 2495 error = EBUSY; 2496 2497 } else if (time_error) { 2498 /* 2499 * The SUSV3 POSIX spec is very clear that we should 2500 * get no error from validating the timer (above) 2501 * until we would actually sleep. 2502 */ 2503 error = time_error; 2504 } 2505 2506 goto out_drop; 2507 } 2508 2509 /* 2510 * We're about to block, so indicate what kind of waiter we are. 2511 */ 2512 t->t_writer = 0; 2513 rwstate |= URW_HAS_WAITERS; 2514 if (rd_wr == WRITE_LOCK) { 2515 t->t_writer = TRW_WANT_WRITE; 2516 rwstate |= URW_WRITE_WANTED; 2517 } 2518 suword32_noerr(&rw->rwlock_readers, rwstate); 2519 2520 /* 2521 * Unlock the rwlock's mutex (pagefaults are possible here). 2522 */ 2523 ulock_clear(&mp->mutex_lockw); 2524 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2525 if (mwaiters != 0) { 2526 /* 2527 * Given the locking of mlwpchan around the release of 2528 * the mutex and checking for waiters, the following 2529 * call to lwp_release() can fail ONLY if the lock 2530 * acquirer is interrupted after setting the waiter bit, 2531 * calling lwp_block() and releasing mlwpchan. 2532 * In this case, it could get pulled off the LWP sleep 2533 * queue (via setrun()) before the following call to 2534 * lwp_release() occurs, and the lock requestor will 2535 * update the waiter bit correctly by re-evaluating it. 2536 */ 2537 if (lwp_release(&mlwpchan, &mwaiters, 0) > 0) 2538 suword8_noerr(&mp->mutex_waiters, mwaiters); 2539 } 2540 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2541 mlocked = 0; 2542 no_fault(); 2543 2544 if (mwatched) { 2545 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2546 mwatched = 0; 2547 } 2548 if (watched) { 2549 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2550 watched = 0; 2551 } 2552 2553 /* 2554 * Put the LWP in an orderly state for debugging. 2555 */ 2556 prstop(PR_REQUESTED, 0); 2557 if (timedwait) { 2558 /* 2559 * If we successfully queue the timeout, 2560 * then don't drop t_delay_lock until 2561 * we are on the sleep queue (below). 2562 */ 2563 mutex_enter(&t->t_delay_lock); 2564 if (lwp_timer_enqueue(&lwpt) != 0) { 2565 mutex_exit(&t->t_delay_lock); 2566 imm_timeout = 1; 2567 timedwait = NULL; 2568 } 2569 } 2570 t->t_flag |= T_WAITCVSEM; 2571 lwp_block(&lwpchan); 2572 2573 /* 2574 * Nothing should happen to cause the LWp to go to sleep until after 2575 * it returns from swtch(). 2576 */ 2577 if (timedwait) 2578 mutex_exit(&t->t_delay_lock); 2579 locked = 0; 2580 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2581 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t)) 2582 setrun(t); 2583 swtch(); 2584 2585 /* 2586 * We're back, but we need to work out why. Were we interrupted? Did 2587 * we timeout? Were we granted the lock? 2588 */ 2589 error = EAGAIN; 2590 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2591 t->t_writer = 0; 2592 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2593 if (timedwait) 2594 tim = lwp_timer_dequeue(&lwpt); 2595 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2596 error = EINTR; 2597 else if (imm_timeout || (timedwait && tim == -1)) 2598 error = ETIME; 2599 lwp->lwp_asleep = 0; 2600 lwp->lwp_sysabort = 0; 2601 setallwatch(); 2602 2603 /* 2604 * If we were granted the lock we don't care about EINTR or ETIME. 2605 */ 2606 if (acquired) 2607 error = 0; 2608 2609 if (t->t_mstate == LMS_USER_LOCK) 2610 (void) new_mstate(t, LMS_SYSTEM); 2611 2612 if (error) 2613 return (set_errno(error)); 2614 return (0); 2615 2616 out_drop: 2617 /* 2618 * Make sure that the user level lock is dropped before returning 2619 * to the caller. 2620 */ 2621 if (!mlocked) { 2622 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2623 mlocked = 1; 2624 } 2625 suword32_noerr(&mp->mutex_ownerpid, 0); 2626 ulock_clear(&mp->mutex_lockw); 2627 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2628 if (mwaiters != 0) { 2629 /* 2630 * See comment above on lock clearing and lwp_release() 2631 * success/failure. 2632 */ 2633 if (lwp_release(&mlwpchan, &mwaiters, 0) > 0) 2634 suword8_noerr(&mp->mutex_waiters, mwaiters); 2635 } 2636 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2637 mlocked = 0; 2638 2639 out_nodrop: 2640 no_fault(); 2641 if (mwatched) 2642 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2643 if (watched) 2644 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2645 if (t->t_mstate == LMS_USER_LOCK) 2646 (void) new_mstate(t, LMS_SYSTEM); 2647 if (error) 2648 return (set_errno(error)); 2649 return (0); 2650 } 2651 2652 /* 2653 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2654 * we never drop the lock. 2655 */ 2656 static int 2657 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2658 { 2659 kthread_t *t = curthread; 2660 proc_t *p = ttoproc(t); 2661 lwpchan_t lwpchan; 2662 volatile uint16_t type = 0; 2663 volatile int error = 0; 2664 volatile int locked = 0; 2665 volatile int watched = 0; 2666 label_t ljb; 2667 volatile int no_lwpchan = 1; 2668 uint32_t rwstate; 2669 2670 /* We only check rw because the mutex is included in it. */ 2671 if ((caddr_t)rw >= p->p_as->a_userlimit) 2672 return (set_errno(EFAULT)); 2673 2674 if (on_fault(&ljb)) { 2675 if (no_lwpchan) { 2676 error = EFAULT; 2677 goto out_nodrop; 2678 } 2679 if (locked) { 2680 locked = 0; 2681 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2682 } 2683 error = EFAULT; 2684 goto out_nodrop; 2685 } 2686 2687 /* We can only continue for simple USYNC_PROCESS locks. */ 2688 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2689 if (type != USYNC_PROCESS) { 2690 error = EINVAL; 2691 goto out_nodrop; 2692 } 2693 2694 /* Force Copy-on-write fault incase objects are MAP_PRIVATE. */ 2695 suword16_noerr(&rw->rwlock_type, type); 2696 2697 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2698 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2699 &lwpchan, LWPCHAN_CVPOOL)) { 2700 error = EFAULT; 2701 goto out_nodrop; 2702 } 2703 2704 no_lwpchan = 0; 2705 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2706 2707 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2708 locked = 1; 2709 2710 /* 2711 * We can resolve multiple readers (except the last reader) here. 2712 * For the last reader or a writer we need lwp_rwlock_release(), 2713 * to which we also delegate the task of copying the new rwstate 2714 * back to userland (see the comment there). 2715 */ 2716 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2717 if (rwstate & URW_WRITE_LOCKED) 2718 lwp_rwlock_release(&lwpchan, rw); 2719 else if ((rwstate & URW_READERS_MASK) > 0) { 2720 rwstate--; 2721 if ((rwstate & URW_READERS_MASK) == 0) 2722 lwp_rwlock_release(&lwpchan, rw); 2723 else 2724 suword32_noerr(&rw->rwlock_readers, rwstate); 2725 } 2726 2727 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2728 locked = 0; 2729 error = 0; 2730 2731 out_nodrop: 2732 no_fault(); 2733 if (watched) 2734 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2735 if (error) 2736 return (set_errno(error)); 2737 return (0); 2738 } 2739 2740 int 2741 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2742 { 2743 switch (subcode) { 2744 case 0: 2745 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2746 case 1: 2747 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2748 case 2: 2749 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2750 case 3: 2751 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2752 case 4: 2753 return (lwp_rwlock_unlock(rwlp)); 2754 } 2755 return (set_errno(EINVAL)); 2756 } 2757 2758 /* 2759 * Return the owner of the user-level s-object. 2760 * Since we can't really do this, return NULL. 2761 */ 2762 /* ARGSUSED */ 2763 static kthread_t * 2764 lwpsobj_owner(caddr_t sobj) 2765 { 2766 return ((kthread_t *)NULL); 2767 } 2768 2769 /* 2770 * Wake up a thread asleep on a user-level synchronization 2771 * object. 2772 */ 2773 static void 2774 lwp_unsleep(kthread_t *t) 2775 { 2776 ASSERT(THREAD_LOCK_HELD(t)); 2777 if (t->t_wchan0 != NULL) { 2778 sleepq_head_t *sqh; 2779 sleepq_t *sqp = t->t_sleepq; 2780 2781 if (sqp != NULL) { 2782 sqh = lwpsqhash(&t->t_lwpchan); 2783 ASSERT(&sqh->sq_queue == sqp); 2784 sleepq_unsleep(t); 2785 disp_lock_exit_high(&sqh->sq_lock); 2786 CL_SETRUN(t); 2787 return; 2788 } 2789 } 2790 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2791 } 2792 2793 /* 2794 * Change the priority of a thread asleep on a user-level 2795 * synchronization object. To maintain proper priority order, 2796 * we: 2797 * o dequeue the thread. 2798 * o change its priority. 2799 * o re-enqueue the thread. 2800 * Assumption: the thread is locked on entry. 2801 */ 2802 static void 2803 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2804 { 2805 ASSERT(THREAD_LOCK_HELD(t)); 2806 if (t->t_wchan0 != NULL) { 2807 sleepq_t *sqp = t->t_sleepq; 2808 2809 sleepq_dequeue(t); 2810 *t_prip = pri; 2811 sleepq_insert(sqp, t); 2812 } else 2813 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2814 } 2815 2816 /* 2817 * Clean up a locked a robust mutex 2818 */ 2819 static void 2820 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2821 { 2822 uint16_t flag; 2823 uchar_t waiters; 2824 label_t ljb; 2825 pid_t owner_pid; 2826 lwp_mutex_t *lp; 2827 volatile int locked = 0; 2828 volatile int watched = 0; 2829 2830 ASSERT(ent->lwpchan_type & USYNC_PROCESS_ROBUST); 2831 2832 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2833 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2834 if (on_fault(&ljb)) { 2835 if (locked) 2836 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2837 goto out; 2838 } 2839 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2840 if (owner_pid != curproc->p_pid) { 2841 goto out; 2842 } 2843 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2844 locked = 1; 2845 fuword16_noerr(&lp->mutex_flag, &flag); 2846 if ((flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) == 0) { 2847 flag |= lockflg; 2848 suword16_noerr(&lp->mutex_flag, flag); 2849 } 2850 suword32_noerr(&lp->mutex_ownerpid, 0); 2851 ulock_clear(&lp->mutex_lockw); 2852 fuword8_noerr(&lp->mutex_waiters, &waiters); 2853 if (waiters && lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2854 suword8_noerr(&lp->mutex_waiters, waiters); 2855 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2856 out: 2857 no_fault(); 2858 if (watched) 2859 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2860 } 2861 2862 /* 2863 * Register the mutex and initialize the mutex if it is not already 2864 */ 2865 int 2866 lwp_mutex_init(lwp_mutex_t *lp, int type) 2867 { 2868 proc_t *p = curproc; 2869 int error = 0; 2870 volatile int locked = 0; 2871 volatile int watched = 0; 2872 label_t ljb; 2873 uint16_t flag; 2874 lwpchan_t lwpchan; 2875 pid_t owner_pid; 2876 2877 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2878 return (set_errno(EFAULT)); 2879 2880 if (type != USYNC_PROCESS_ROBUST) 2881 return (set_errno(EINVAL)); 2882 2883 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2884 2885 if (on_fault(&ljb)) { 2886 if (locked) 2887 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2888 error = EFAULT; 2889 goto out; 2890 } 2891 /* 2892 * Force Copy-on-write fault if lwp_mutex_t object is 2893 * defined to be MAP_PRIVATE and it was initialized to 2894 * USYNC_PROCESS. 2895 */ 2896 suword8_noerr(&lp->mutex_type, type); 2897 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2898 &lwpchan, LWPCHAN_MPPOOL)) { 2899 error = EFAULT; 2900 goto out; 2901 } 2902 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 2903 locked = 1; 2904 fuword16_noerr(&lp->mutex_flag, &flag); 2905 if (flag & LOCK_INITED) { 2906 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 2907 fuword32_noerr(&lp->mutex_ownerpid, 2908 (uint32_t *)&owner_pid); 2909 if (owner_pid == p->p_pid) { 2910 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2911 suword16_noerr(&lp->mutex_flag, flag); 2912 locked = 0; 2913 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2914 goto out; 2915 } 2916 } 2917 error = EBUSY; 2918 } else { 2919 suword8_noerr(&lp->mutex_waiters, 0); 2920 suword8_noerr(&lp->mutex_lockw, 0); 2921 suword16_noerr(&lp->mutex_flag, LOCK_INITED); 2922 suword32_noerr(&lp->mutex_ownerpid, 0); 2923 } 2924 locked = 0; 2925 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2926 out: 2927 no_fault(); 2928 if (watched) 2929 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2930 if (error) 2931 return (set_errno(error)); 2932 return (0); 2933 } 2934 2935 int 2936 lwp_mutex_trylock(lwp_mutex_t *lp) 2937 { 2938 kthread_t *t = curthread; 2939 proc_t *p = ttoproc(t); 2940 int error = 0; 2941 volatile int locked = 0; 2942 volatile int watched = 0; 2943 label_t ljb; 2944 volatile uint8_t type = 0; 2945 uint16_t flag; 2946 lwpchan_t lwpchan; 2947 2948 if ((caddr_t)lp >= p->p_as->a_userlimit) 2949 return (set_errno(EFAULT)); 2950 2951 (void) new_mstate(t, LMS_USER_LOCK); 2952 2953 if (on_fault(&ljb)) { 2954 if (locked) 2955 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2956 error = EFAULT; 2957 goto out; 2958 } 2959 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 2960 if (UPIMUTEX(type)) { 2961 no_fault(); 2962 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 2963 if ((error == 0 || error == EOWNERDEAD) && 2964 (type & USYNC_PROCESS)) 2965 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 2966 if (error) 2967 return (set_errno(error)); 2968 return (0); 2969 } 2970 /* 2971 * Force Copy-on-write fault if lwp_mutex_t object is 2972 * defined to be MAP_PRIVATE and it was initialized to 2973 * USYNC_PROCESS. 2974 */ 2975 suword8_noerr(&lp->mutex_type, type); 2976 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2977 &lwpchan, LWPCHAN_MPPOOL)) { 2978 error = EFAULT; 2979 goto out; 2980 } 2981 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 2982 locked = 1; 2983 if (type & USYNC_PROCESS_ROBUST) { 2984 fuword16_noerr((uint16_t *)(&lp->mutex_flag), &flag); 2985 if (flag & LOCK_NOTRECOVERABLE) { 2986 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2987 error = ENOTRECOVERABLE; 2988 goto out; 2989 } 2990 } 2991 2992 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2993 2994 if (!ulock_try(&lp->mutex_lockw)) 2995 error = EBUSY; 2996 else if (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { 2997 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 2998 if (type & USYNC_PROCESS_ROBUST) { 2999 if (flag & LOCK_OWNERDEAD) 3000 error = EOWNERDEAD; 3001 else if (flag & LOCK_UNMAPPED) 3002 error = ELOCKUNMAPPED; 3003 } 3004 } 3005 locked = 0; 3006 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3007 out: 3008 3009 if (t->t_mstate == LMS_USER_LOCK) 3010 (void) new_mstate(t, LMS_SYSTEM); 3011 3012 no_fault(); 3013 if (watched) 3014 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3015 if (error) 3016 return (set_errno(error)); 3017 return (0); 3018 } 3019 3020 /* 3021 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3022 * the blocked lwp resumes and retries to acquire the lock. 3023 */ 3024 int 3025 lwp_mutex_unlock(lwp_mutex_t *lp) 3026 { 3027 proc_t *p = ttoproc(curthread); 3028 lwpchan_t lwpchan; 3029 uchar_t waiters; 3030 volatile int locked = 0; 3031 volatile int watched = 0; 3032 volatile uint8_t type = 0; 3033 label_t ljb; 3034 uint16_t flag; 3035 int error = 0; 3036 3037 if ((caddr_t)lp >= p->p_as->a_userlimit) 3038 return (set_errno(EFAULT)); 3039 3040 if (on_fault(&ljb)) { 3041 if (locked) 3042 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3043 error = EFAULT; 3044 goto out; 3045 } 3046 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3047 if (UPIMUTEX(type)) { 3048 no_fault(); 3049 error = lwp_upimutex_unlock(lp, type); 3050 if (error) 3051 return (set_errno(error)); 3052 return (0); 3053 } 3054 3055 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3056 3057 /* 3058 * Force Copy-on-write fault if lwp_mutex_t object is 3059 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS 3060 */ 3061 suword8_noerr(&lp->mutex_type, type); 3062 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3063 &lwpchan, LWPCHAN_MPPOOL)) { 3064 error = EFAULT; 3065 goto out; 3066 } 3067 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3068 locked = 1; 3069 if (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { 3070 if (type & USYNC_PROCESS_ROBUST) { 3071 fuword16_noerr(&lp->mutex_flag, &flag); 3072 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3073 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3074 flag |= LOCK_NOTRECOVERABLE; 3075 suword16_noerr(&lp->mutex_flag, flag); 3076 } 3077 } 3078 suword32_noerr(&lp->mutex_ownerpid, 0); 3079 } 3080 ulock_clear(&lp->mutex_lockw); 3081 /* 3082 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3083 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3084 * may fail. If it fails, do not write into the waiter bit. 3085 * The call to lwp_release() might fail due to one of three reasons: 3086 * 3087 * 1. due to the thread which set the waiter bit not actually 3088 * sleeping since it got the lock on the re-try. The waiter 3089 * bit will then be correctly updated by that thread. This 3090 * window may be closed by reading the wait bit again here 3091 * and not calling lwp_release() at all if it is zero. 3092 * 2. the thread which set the waiter bit and went to sleep 3093 * was woken up by a signal. This time, the waiter recomputes 3094 * the wait bit in the return with EINTR code. 3095 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3096 * memory that has been re-used after the lock was dropped. 3097 * In this case, writing into the waiter bit would cause data 3098 * corruption. 3099 */ 3100 fuword8_noerr(&lp->mutex_waiters, &waiters); 3101 if (waiters) { 3102 if ((type & USYNC_PROCESS_ROBUST) && 3103 (flag & LOCK_NOTRECOVERABLE)) { 3104 lwp_release_all(&lwpchan); 3105 suword8_noerr(&lp->mutex_waiters, 0); 3106 } else if (lwp_release(&lwpchan, &waiters, 0) == 1) { 3107 suword8_noerr(&lp->mutex_waiters, waiters); 3108 } 3109 } 3110 3111 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3112 out: 3113 no_fault(); 3114 if (watched) 3115 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3116 if (error) 3117 return (set_errno(error)); 3118 return (0); 3119 } 3120