1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/errno.h> 39 #include <sys/file.h> 40 #include <sys/proc.h> 41 #include <sys/prsystm.h> 42 #include <sys/kmem.h> 43 #include <sys/sobject.h> 44 #include <sys/fault.h> 45 #include <sys/procfs.h> 46 #include <sys/watchpoint.h> 47 #include <sys/time.h> 48 #include <sys/cmn_err.h> 49 #include <sys/machlock.h> 50 #include <sys/debug.h> 51 #include <sys/synch.h> 52 #include <sys/synch32.h> 53 #include <sys/mman.h> 54 #include <sys/class.h> 55 #include <sys/schedctl.h> 56 #include <sys/sleepq.h> 57 #include <sys/policy.h> 58 #include <sys/tnf_probe.h> 59 #include <sys/lwpchan_impl.h> 60 #include <sys/turnstile.h> 61 #include <sys/atomic.h> 62 #include <sys/lwp_timer_impl.h> 63 #include <sys/lwp_upimutex_impl.h> 64 #include <vm/as.h> 65 #include <sys/sdt.h> 66 67 static kthread_t *lwpsobj_owner(caddr_t); 68 static void lwp_unsleep(kthread_t *t); 69 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 70 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 71 72 extern int lwp_cond_signal(lwp_cond_t *cv); 73 74 /* 75 * Maximum number of user prio inheritance locks that can be held by a thread. 76 * Used to limit kmem for each thread. This is a per-thread limit that 77 * can be administered on a system wide basis (using /etc/system). 78 * 79 * Also, when a limit, say maxlwps is added for numbers of lwps within a 80 * process, the per-thread limit automatically becomes a process-wide limit 81 * of maximum number of held upi locks within a process: 82 * maxheldupimx = maxnestupimx * maxlwps; 83 */ 84 static uint32_t maxnestupimx = 2000; 85 86 /* 87 * The sobj_ops vector exports a set of functions needed when a thread 88 * is asleep on a synchronization object of this type. 89 */ 90 static sobj_ops_t lwp_sobj_ops = { 91 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 92 }; 93 94 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 95 96 static sobj_ops_t lwp_sobj_pi_ops = { 97 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 98 turnstile_change_pri 99 }; 100 101 static sleepq_head_t lwpsleepq[NSLEEPQ]; 102 upib_t upimutextab[UPIMUTEX_TABSIZE]; 103 104 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 105 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 106 107 /* 108 * We know that both lc_wchan and lc_wchan0 are addresses that most 109 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 110 * 'pool' is either 0 or 1. 111 */ 112 #define LWPCHAN_LOCK_HASH(X, pool) \ 113 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 114 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 115 116 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 117 118 /* 119 * Is this a POSIX threads user-level lock requiring priority inheritance? 120 */ 121 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 122 123 static sleepq_head_t * 124 lwpsqhash(lwpchan_t *lwpchan) 125 { 126 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 127 return (&lwpsleepq[SQHASHINDEX(x)]); 128 } 129 130 /* 131 * Lock an lwpchan. 132 * Keep this in sync with lwpchan_unlock(), below. 133 */ 134 static void 135 lwpchan_lock(lwpchan_t *lwpchan, int pool) 136 { 137 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 138 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 139 } 140 141 /* 142 * Unlock an lwpchan. 143 * Keep this in sync with lwpchan_lock(), above. 144 */ 145 static void 146 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 147 { 148 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 149 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 150 } 151 152 /* 153 * Delete mappings from the lwpchan cache for pages that are being 154 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 155 * all mappings within the range are deleted from the lwpchan cache. 156 */ 157 void 158 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 159 { 160 lwpchan_data_t *lcp; 161 lwpchan_hashbucket_t *hashbucket; 162 lwpchan_hashbucket_t *endbucket; 163 lwpchan_entry_t *ent; 164 lwpchan_entry_t **prev; 165 caddr_t addr; 166 167 mutex_enter(&p->p_lcp_lock); 168 lcp = p->p_lcp; 169 hashbucket = lcp->lwpchan_cache; 170 endbucket = hashbucket + lcp->lwpchan_size; 171 for (; hashbucket < endbucket; hashbucket++) { 172 if (hashbucket->lwpchan_chain == NULL) 173 continue; 174 mutex_enter(&hashbucket->lwpchan_lock); 175 prev = &hashbucket->lwpchan_chain; 176 /* check entire chain */ 177 while ((ent = *prev) != NULL) { 178 addr = ent->lwpchan_addr; 179 if (start <= addr && addr < end) { 180 *prev = ent->lwpchan_next; 181 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 182 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 183 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 184 kmem_free(ent, sizeof (*ent)); 185 atomic_add_32(&lcp->lwpchan_entries, -1); 186 } else { 187 prev = &ent->lwpchan_next; 188 } 189 } 190 mutex_exit(&hashbucket->lwpchan_lock); 191 } 192 mutex_exit(&p->p_lcp_lock); 193 } 194 195 /* 196 * Given an lwpchan cache pointer and a process virtual address, 197 * return a pointer to the corresponding lwpchan hash bucket. 198 */ 199 static lwpchan_hashbucket_t * 200 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 201 { 202 uint_t i; 203 204 /* 205 * All user-level sync object addresses are 8-byte aligned. 206 * Ignore the lowest 3 bits of the address and use the 207 * higher-order 2*lwpchan_bits bits for the hash index. 208 */ 209 addr >>= 3; 210 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 211 return (lcp->lwpchan_cache + i); 212 } 213 214 /* 215 * (Re)allocate the per-process lwpchan cache. 216 */ 217 static void 218 lwpchan_alloc_cache(proc_t *p, uint_t bits) 219 { 220 lwpchan_data_t *lcp; 221 lwpchan_data_t *old_lcp; 222 lwpchan_hashbucket_t *hashbucket; 223 lwpchan_hashbucket_t *endbucket; 224 lwpchan_hashbucket_t *newbucket; 225 lwpchan_entry_t *ent; 226 lwpchan_entry_t *next; 227 uint_t count; 228 229 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 230 231 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 232 lcp->lwpchan_bits = bits; 233 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 234 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 235 lcp->lwpchan_entries = 0; 236 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 237 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 238 lcp->lwpchan_next_data = NULL; 239 240 mutex_enter(&p->p_lcp_lock); 241 if ((old_lcp = p->p_lcp) != NULL) { 242 if (old_lcp->lwpchan_bits >= bits) { 243 /* someone beat us to it */ 244 mutex_exit(&p->p_lcp_lock); 245 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 246 sizeof (lwpchan_hashbucket_t)); 247 kmem_free(lcp, sizeof (lwpchan_data_t)); 248 return; 249 } 250 /* 251 * Acquire all of the old hash table locks. 252 */ 253 hashbucket = old_lcp->lwpchan_cache; 254 endbucket = hashbucket + old_lcp->lwpchan_size; 255 for (; hashbucket < endbucket; hashbucket++) 256 mutex_enter(&hashbucket->lwpchan_lock); 257 /* 258 * Move all of the old hash table entries to the 259 * new hash table. The new hash table has not yet 260 * been installed so we don't need any of its locks. 261 */ 262 count = 0; 263 hashbucket = old_lcp->lwpchan_cache; 264 for (; hashbucket < endbucket; hashbucket++) { 265 ent = hashbucket->lwpchan_chain; 266 while (ent != NULL) { 267 next = ent->lwpchan_next; 268 newbucket = lwpchan_bucket(lcp, 269 (uintptr_t)ent->lwpchan_addr); 270 ent->lwpchan_next = newbucket->lwpchan_chain; 271 newbucket->lwpchan_chain = ent; 272 ent = next; 273 count++; 274 } 275 hashbucket->lwpchan_chain = NULL; 276 } 277 lcp->lwpchan_entries = count; 278 } 279 280 /* 281 * Retire the old hash table. We can't actually kmem_free() it 282 * now because someone may still have a pointer to it. Instead, 283 * we link it onto the new hash table's list of retired hash tables. 284 * The new hash table is double the size of the previous one, so 285 * the total size of all retired hash tables is less than the size 286 * of the new one. exit() and exec() free the retired hash tables 287 * (see lwpchan_destroy_cache(), below). 288 */ 289 lcp->lwpchan_next_data = old_lcp; 290 291 /* 292 * As soon as we store the new lcp, future locking operations will 293 * use it. Therefore, we must ensure that all the state we've just 294 * established reaches global visibility before the new lcp does. 295 */ 296 membar_producer(); 297 p->p_lcp = lcp; 298 299 if (old_lcp != NULL) { 300 /* 301 * Release all of the old hash table locks. 302 */ 303 hashbucket = old_lcp->lwpchan_cache; 304 for (; hashbucket < endbucket; hashbucket++) 305 mutex_exit(&hashbucket->lwpchan_lock); 306 } 307 mutex_exit(&p->p_lcp_lock); 308 } 309 310 /* 311 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 312 * Called when the process exits or execs. All lwps except one have 313 * exited so we need no locks here. 314 */ 315 void 316 lwpchan_destroy_cache(int exec) 317 { 318 proc_t *p = curproc; 319 lwpchan_hashbucket_t *hashbucket; 320 lwpchan_hashbucket_t *endbucket; 321 lwpchan_data_t *lcp; 322 lwpchan_entry_t *ent; 323 lwpchan_entry_t *next; 324 uint16_t lockflg; 325 326 lcp = p->p_lcp; 327 p->p_lcp = NULL; 328 329 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 330 hashbucket = lcp->lwpchan_cache; 331 endbucket = hashbucket + lcp->lwpchan_size; 332 for (; hashbucket < endbucket; hashbucket++) { 333 ent = hashbucket->lwpchan_chain; 334 hashbucket->lwpchan_chain = NULL; 335 while (ent != NULL) { 336 next = ent->lwpchan_next; 337 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 338 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 339 lwp_mutex_cleanup(ent, lockflg); 340 kmem_free(ent, sizeof (*ent)); 341 ent = next; 342 } 343 } 344 345 while (lcp != NULL) { 346 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 347 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 348 sizeof (lwpchan_hashbucket_t)); 349 kmem_free(lcp, sizeof (lwpchan_data_t)); 350 lcp = next_lcp; 351 } 352 } 353 354 /* 355 * Return zero when there is an entry in the lwpchan cache for the 356 * given process virtual address and non-zero when there is not. 357 * The returned non-zero value is the current length of the 358 * hash chain plus one. The caller holds the hash bucket lock. 359 */ 360 static uint_t 361 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 362 lwpchan_hashbucket_t *hashbucket) 363 { 364 lwpchan_entry_t *ent; 365 uint_t count = 1; 366 367 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 368 if (ent->lwpchan_addr == addr) { 369 if (ent->lwpchan_type != type || 370 ent->lwpchan_pool != pool) { 371 /* 372 * This shouldn't happen, but might if the 373 * process reuses its memory for different 374 * types of sync objects. We test first 375 * to avoid grabbing the memory cache line. 376 */ 377 ent->lwpchan_type = (uint16_t)type; 378 ent->lwpchan_pool = (uint16_t)pool; 379 } 380 *lwpchan = ent->lwpchan_lwpchan; 381 return (0); 382 } 383 count++; 384 } 385 return (count); 386 } 387 388 /* 389 * Return the cached lwpchan mapping if cached, otherwise insert 390 * a virtual address to lwpchan mapping into the cache. 391 */ 392 static int 393 lwpchan_get_mapping(struct as *as, caddr_t addr, 394 int type, lwpchan_t *lwpchan, int pool) 395 { 396 proc_t *p = curproc; 397 lwpchan_data_t *lcp; 398 lwpchan_hashbucket_t *hashbucket; 399 lwpchan_entry_t *ent; 400 memid_t memid; 401 uint_t count; 402 uint_t bits; 403 404 top: 405 /* initialize the lwpchan cache, if necesary */ 406 if ((lcp = p->p_lcp) == NULL) { 407 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 408 goto top; 409 } 410 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 411 mutex_enter(&hashbucket->lwpchan_lock); 412 if (lcp != p->p_lcp) { 413 /* someone resized the lwpchan cache; start over */ 414 mutex_exit(&hashbucket->lwpchan_lock); 415 goto top; 416 } 417 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 418 /* it's in the cache */ 419 mutex_exit(&hashbucket->lwpchan_lock); 420 return (1); 421 } 422 mutex_exit(&hashbucket->lwpchan_lock); 423 if (as_getmemid(as, addr, &memid) != 0) 424 return (0); 425 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 426 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 427 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 428 mutex_enter(&hashbucket->lwpchan_lock); 429 if (lcp != p->p_lcp) { 430 /* someone resized the lwpchan cache; start over */ 431 mutex_exit(&hashbucket->lwpchan_lock); 432 kmem_free(ent, sizeof (*ent)); 433 goto top; 434 } 435 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 436 if (count == 0) { 437 /* someone else added this entry to the cache */ 438 mutex_exit(&hashbucket->lwpchan_lock); 439 kmem_free(ent, sizeof (*ent)); 440 return (1); 441 } 442 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 443 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 444 /* hash chain too long; reallocate the hash table */ 445 mutex_exit(&hashbucket->lwpchan_lock); 446 kmem_free(ent, sizeof (*ent)); 447 lwpchan_alloc_cache(p, bits + 1); 448 goto top; 449 } 450 ent->lwpchan_addr = addr; 451 ent->lwpchan_type = (uint16_t)type; 452 ent->lwpchan_pool = (uint16_t)pool; 453 ent->lwpchan_lwpchan = *lwpchan; 454 ent->lwpchan_next = hashbucket->lwpchan_chain; 455 hashbucket->lwpchan_chain = ent; 456 atomic_add_32(&lcp->lwpchan_entries, 1); 457 mutex_exit(&hashbucket->lwpchan_lock); 458 return (1); 459 } 460 461 /* 462 * Return a unique pair of identifiers that corresponds to a 463 * synchronization object's virtual address. Process-shared 464 * sync objects usually get vnode/offset from as_getmemid(). 465 */ 466 static int 467 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 468 { 469 /* 470 * If the lwp synch object is defined to be process-private, 471 * we just make the first field of the lwpchan be 'as' and 472 * the second field be the synch object's virtual address. 473 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 474 * The lwpchan cache is used only for process-shared objects. 475 */ 476 if ((type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) == 0) { 477 lwpchan->lc_wchan0 = (caddr_t)as; 478 lwpchan->lc_wchan = addr; 479 return (1); 480 } 481 /* check the lwpchan cache for mapping */ 482 return (lwpchan_get_mapping(as, addr, type, lwpchan, pool)); 483 } 484 485 static void 486 lwp_block(lwpchan_t *lwpchan) 487 { 488 kthread_t *t = curthread; 489 klwp_t *lwp = ttolwp(t); 490 sleepq_head_t *sqh; 491 492 thread_lock(t); 493 t->t_flag |= T_WAKEABLE; 494 t->t_lwpchan = *lwpchan; 495 t->t_sobj_ops = &lwp_sobj_ops; 496 t->t_release = 0; 497 sqh = lwpsqhash(lwpchan); 498 disp_lock_enter_high(&sqh->sq_lock); 499 CL_SLEEP(t); 500 DTRACE_SCHED(sleep); 501 THREAD_SLEEP(t, &sqh->sq_lock); 502 sleepq_insert(&sqh->sq_queue, t); 503 thread_unlock(t); 504 lwp->lwp_asleep = 1; 505 lwp->lwp_sysabort = 0; 506 lwp->lwp_ru.nvcsw++; 507 (void) new_mstate(curthread, LMS_SLEEP); 508 } 509 510 static kthread_t * 511 lwpsobj_pi_owner(upimutex_t *up) 512 { 513 return (up->upi_owner); 514 } 515 516 static struct upimutex * 517 upi_get(upib_t *upibp, lwpchan_t *lcp) 518 { 519 struct upimutex *upip; 520 521 for (upip = upibp->upib_first; upip != NULL; 522 upip = upip->upi_nextchain) { 523 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 524 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 525 break; 526 } 527 return (upip); 528 } 529 530 static void 531 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 532 { 533 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 534 535 /* 536 * Insert upimutex at front of list. Maybe a bit unfair 537 * but assume that not many lwpchans hash to the same 538 * upimutextab bucket, i.e. the list of upimutexes from 539 * upib_first is not too long. 540 */ 541 upimutex->upi_nextchain = upibp->upib_first; 542 upibp->upib_first = upimutex; 543 } 544 545 static void 546 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 547 { 548 struct upimutex **prev; 549 550 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 551 552 prev = &upibp->upib_first; 553 while (*prev != upimutex) { 554 prev = &(*prev)->upi_nextchain; 555 } 556 *prev = upimutex->upi_nextchain; 557 upimutex->upi_nextchain = NULL; 558 } 559 560 /* 561 * Add upimutex to chain of upimutexes held by curthread. 562 * Returns number of upimutexes held by curthread. 563 */ 564 static uint32_t 565 upi_mylist_add(struct upimutex *upimutex) 566 { 567 kthread_t *t = curthread; 568 569 /* 570 * Insert upimutex at front of list of upimutexes owned by t. This 571 * would match typical LIFO order in which nested locks are acquired 572 * and released. 573 */ 574 upimutex->upi_nextowned = t->t_upimutex; 575 t->t_upimutex = upimutex; 576 t->t_nupinest++; 577 ASSERT(t->t_nupinest > 0); 578 return (t->t_nupinest); 579 } 580 581 /* 582 * Delete upimutex from list of upimutexes owned by curthread. 583 */ 584 static void 585 upi_mylist_del(struct upimutex *upimutex) 586 { 587 kthread_t *t = curthread; 588 struct upimutex **prev; 589 590 /* 591 * Since the order in which nested locks are acquired and released, 592 * is typically LIFO, and typical nesting levels are not too deep, the 593 * following should not be expensive in the general case. 594 */ 595 prev = &t->t_upimutex; 596 while (*prev != upimutex) { 597 prev = &(*prev)->upi_nextowned; 598 } 599 *prev = upimutex->upi_nextowned; 600 upimutex->upi_nextowned = NULL; 601 ASSERT(t->t_nupinest > 0); 602 t->t_nupinest--; 603 } 604 605 /* 606 * Returns true if upimutex is owned. Should be called only when upim points 607 * to kmem which cannot disappear from underneath. 608 */ 609 static int 610 upi_owned(upimutex_t *upim) 611 { 612 return (upim->upi_owner == curthread); 613 } 614 615 /* 616 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 617 */ 618 static struct upimutex * 619 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 620 { 621 lwpchan_t lwpchan; 622 upib_t *upibp; 623 struct upimutex *upimutex; 624 625 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 626 &lwpchan, LWPCHAN_MPPOOL)) 627 return (NULL); 628 629 upibp = &UPI_CHAIN(lwpchan); 630 mutex_enter(&upibp->upib_lock); 631 upimutex = upi_get(upibp, &lwpchan); 632 if (upimutex == NULL || upimutex->upi_owner != curthread) { 633 mutex_exit(&upibp->upib_lock); 634 return (NULL); 635 } 636 mutex_exit(&upibp->upib_lock); 637 return (upimutex); 638 } 639 640 /* 641 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 642 * no lock hand-off occurrs. 643 */ 644 static void 645 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 646 { 647 turnstile_t *ts; 648 upib_t *upibp; 649 kthread_t *newowner; 650 651 upi_mylist_del(upimutex); 652 upibp = upimutex->upi_upibp; 653 mutex_enter(&upibp->upib_lock); 654 if (upimutex->upi_waiter != 0) { /* if waiters */ 655 ts = turnstile_lookup(upimutex); 656 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 657 /* hand-off lock to highest prio waiter */ 658 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 659 upimutex->upi_owner = newowner; 660 if (ts->ts_waiters == 1) 661 upimutex->upi_waiter = 0; 662 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 663 mutex_exit(&upibp->upib_lock); 664 return; 665 } else if (ts != NULL) { 666 /* LOCK_NOTRECOVERABLE: wakeup all */ 667 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 668 } else { 669 /* 670 * Misleading w bit. Waiters might have been 671 * interrupted. No need to clear the w bit (upimutex 672 * will soon be freed). Re-calculate PI from existing 673 * waiters. 674 */ 675 turnstile_exit(upimutex); 676 turnstile_pi_recalc(); 677 } 678 } 679 /* 680 * no waiters, or LOCK_NOTRECOVERABLE. 681 * remove from the bucket chain of upi mutexes. 682 * de-allocate kernel memory (upimutex). 683 */ 684 upi_chain_del(upimutex->upi_upibp, upimutex); 685 mutex_exit(&upibp->upib_lock); 686 kmem_free(upimutex, sizeof (upimutex_t)); 687 } 688 689 static int 690 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 691 { 692 label_t ljb; 693 int error = 0; 694 lwpchan_t lwpchan; 695 uint16_t flag; 696 upib_t *upibp; 697 volatile struct upimutex *upimutex = NULL; 698 turnstile_t *ts; 699 uint32_t nupinest; 700 volatile int upilocked = 0; 701 702 if (on_fault(&ljb)) { 703 if (upilocked) 704 upimutex_unlock((upimutex_t *)upimutex, 0); 705 error = EFAULT; 706 goto out; 707 } 708 /* 709 * The apparent assumption made in implementing other _lwp_* synch 710 * primitives, is that get_lwpchan() does not return a unique cookie 711 * for the case where 2 processes (one forked from the other) point 712 * at the same underlying object, which is typed USYNC_PROCESS, but 713 * mapped MAP_PRIVATE, since the object has not yet been written to, 714 * in the child process. 715 * 716 * Since get_lwpchan() has been fixed, it is not necessary to do the 717 * dummy writes to force a COW fault as in other places (which should 718 * be fixed). 719 */ 720 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 721 &lwpchan, LWPCHAN_MPPOOL)) { 722 error = EFAULT; 723 goto out; 724 } 725 upibp = &UPI_CHAIN(lwpchan); 726 retry: 727 mutex_enter(&upibp->upib_lock); 728 upimutex = upi_get(upibp, &lwpchan); 729 if (upimutex == NULL) { 730 /* lock available since lwpchan has no upimutex */ 731 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 732 upi_chain_add(upibp, (upimutex_t *)upimutex); 733 upimutex->upi_owner = curthread; /* grab lock */ 734 upimutex->upi_upibp = upibp; 735 upimutex->upi_vaddr = lp; 736 upimutex->upi_lwpchan = lwpchan; 737 mutex_exit(&upibp->upib_lock); 738 nupinest = upi_mylist_add((upimutex_t *)upimutex); 739 upilocked = 1; 740 fuword16_noerr(&lp->mutex_flag, &flag); 741 if (nupinest > maxnestupimx && 742 secpolicy_resource(CRED()) != 0) { 743 upimutex_unlock((upimutex_t *)upimutex, flag); 744 error = ENOMEM; 745 goto out; 746 } 747 if (flag & LOCK_OWNERDEAD) { 748 /* 749 * Return with upimutex held. 750 */ 751 error = EOWNERDEAD; 752 } else if (flag & LOCK_NOTRECOVERABLE) { 753 /* 754 * Since the setting of LOCK_NOTRECOVERABLE 755 * was done under the high-level upi mutex, 756 * in lwp_upimutex_unlock(), this flag needs to 757 * be checked while holding the upi mutex. 758 * If set, this thread should return without 759 * the lock held, and with the right error 760 * code. 761 */ 762 upimutex_unlock((upimutex_t *)upimutex, flag); 763 upilocked = 0; 764 error = ENOTRECOVERABLE; 765 } 766 goto out; 767 } 768 /* 769 * If a upimutex object exists, it must have an owner. 770 * This is due to lock hand-off, and release of upimutex when no 771 * waiters are present at unlock time, 772 */ 773 ASSERT(upimutex->upi_owner != NULL); 774 if (upimutex->upi_owner == curthread) { 775 /* 776 * The user wrapper can check if the mutex type is 777 * ERRORCHECK: if not, it should stall at user-level. 778 * If so, it should return the error code. 779 */ 780 mutex_exit(&upibp->upib_lock); 781 error = EDEADLK; 782 goto out; 783 } 784 if (try == UPIMUTEX_TRY) { 785 mutex_exit(&upibp->upib_lock); 786 error = EBUSY; 787 goto out; 788 } 789 /* 790 * Block for the lock. 791 * Put the lwp in an orderly state for debugging. 792 * Calling prstop() has to be done here, and not in 793 * turnstile_block(), since the preceding call to 794 * turnstile_lookup() raises the PIL to a level 795 * at which calls to prstop() should not be made. 796 */ 797 if ((error = lwptp->lwpt_time_error) != 0) { 798 /* 799 * The SUSV3 Posix spec is very clear that we 800 * should get no error from validating the 801 * timer until we would actually sleep. 802 */ 803 mutex_exit(&upibp->upib_lock); 804 goto out; 805 } 806 prstop(PR_REQUESTED, 0); 807 if (lwptp->lwpt_tsp != NULL) { 808 /* 809 * If we successfully queue the timeout 810 * (lwp_timer_enqueue() returns zero), 811 * then don't drop t_delay_lock until we are 812 * on the sleep queue (in turnstile_block()). 813 * Otherwise we will get an immediate timeout 814 * when we attempt to sleep in turnstile_block(). 815 */ 816 mutex_enter(&curthread->t_delay_lock); 817 if (lwp_timer_enqueue(lwptp) != 0) 818 mutex_exit(&curthread->t_delay_lock); 819 } 820 /* 821 * Now, set the waiter bit and block for the lock in turnstile_block(). 822 * No need to preserve the previous wbit since a lock try is not 823 * attempted after setting the wait bit. Wait bit is set under 824 * the upib_lock, which is not released until the turnstile lock 825 * is acquired. Say, the upimutex is L: 826 * 827 * 1. upib_lock is held so the waiter does not have to retry L after 828 * setting the wait bit: since the owner has to grab the upib_lock 829 * to unlock L, it will certainly see the wait bit set. 830 * 2. upib_lock is not released until the turnstile lock is acquired. 831 * This is the key to preventing a missed wake-up. Otherwise, the 832 * owner could acquire the upib_lock, and the tc_lock, to call 833 * turnstile_wakeup(). All this, before the waiter gets tc_lock 834 * to sleep in turnstile_block(). turnstile_wakeup() will then not 835 * find this waiter, resulting in the missed wakeup. 836 * 3. The upib_lock, being a kernel mutex, cannot be released while 837 * holding the tc_lock (since mutex_exit() could need to acquire 838 * the same tc_lock)...and so is held when calling turnstile_block(). 839 * The address of upib_lock is passed to turnstile_block() which 840 * releases it after releasing all turnstile locks, and before going 841 * to sleep in swtch(). 842 * 4. The waiter value cannot be a count of waiters, because a waiter 843 * can be interrupted. The interrupt occurs under the tc_lock, at 844 * which point, the upib_lock cannot be locked, to decrement waiter 845 * count. So, just treat the waiter state as a bit, not a count. 846 */ 847 ts = turnstile_lookup((upimutex_t *)upimutex); 848 upimutex->upi_waiter = 1; 849 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 850 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 851 /* 852 * Hand-off implies that we wakeup holding the lock, except when: 853 * - deadlock is detected 854 * - lock is not recoverable 855 * - we got an interrupt or timeout 856 * If we wake up due to an interrupt or timeout, we may 857 * or may not be holding the lock due to mutex hand-off. 858 * Use lwp_upimutex_owned() to check if we do hold the lock. 859 */ 860 if (error != 0) { 861 if ((error == EINTR || error == ETIME) && 862 (upimutex = lwp_upimutex_owned(lp, type))) { 863 /* 864 * Unlock and return - the re-startable syscall will 865 * try the lock again if we got EINTR. 866 */ 867 (void) upi_mylist_add((upimutex_t *)upimutex); 868 upimutex_unlock((upimutex_t *)upimutex, 0); 869 } 870 /* 871 * The only other possible error is EDEADLK. If so, upimutex 872 * is valid, since its owner is deadlocked with curthread. 873 */ 874 ASSERT(error == EINTR || error == ETIME || 875 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 876 ASSERT(!lwp_upimutex_owned(lp, type)); 877 goto out; 878 } 879 if (lwp_upimutex_owned(lp, type)) { 880 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 881 nupinest = upi_mylist_add((upimutex_t *)upimutex); 882 upilocked = 1; 883 } 884 /* 885 * Now, need to read the user-level lp->mutex_flag to do the following: 886 * 887 * - if lock is held, check if EOWNERDEAD should be returned 888 * - if lock isn't held, check if ENOTRECOVERABLE should be returned 889 * 890 * Now, either lp->mutex_flag is readable or it's not. If not 891 * readable, the on_fault path will cause a return with EFAULT as 892 * it should. If it is readable, the state of the flag encodes the 893 * robustness state of the lock: 894 * 895 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD setting 896 * will influence the return code appropriately. If the upimutex is 897 * not locked here, this could be due to a spurious wake-up or a 898 * NOTRECOVERABLE event. The flag's setting can be used to distinguish 899 * between these two events. 900 */ 901 fuword16_noerr(&lp->mutex_flag, &flag); 902 if (upilocked) { 903 /* 904 * If the thread wakes up from turnstile_block with the lock 905 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 906 * since it would not have been handed-off the lock. 907 * So, no need to check for this case. 908 */ 909 if (nupinest > maxnestupimx && 910 secpolicy_resource(CRED()) != 0) { 911 upimutex_unlock((upimutex_t *)upimutex, flag); 912 upilocked = 0; 913 error = ENOMEM; 914 } else if (flag & LOCK_OWNERDEAD) { 915 error = EOWNERDEAD; 916 } 917 } else { 918 /* 919 * Wake-up without the upimutex held. Either this is a 920 * spurious wake-up (due to signals, forkall(), whatever), or 921 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 922 * of the mutex flag can be used to distinguish between the 923 * two events. 924 */ 925 if (flag & LOCK_NOTRECOVERABLE) { 926 error = ENOTRECOVERABLE; 927 } else { 928 /* 929 * Here, the flag could be set to LOCK_OWNERDEAD or 930 * not. In both cases, this is a spurious wakeup, 931 * since the upi lock is not held, but the thread 932 * has returned from turnstile_block(). 933 * 934 * The user flag could be LOCK_OWNERDEAD if, at the 935 * same time as curthread having been woken up 936 * spuriously, the owner (say Tdead) has died, marked 937 * the mutex flag accordingly, and handed off the lock 938 * to some other waiter (say Tnew). curthread just 939 * happened to read the flag while Tnew has yet to deal 940 * with the owner-dead event. 941 * 942 * In this event, curthread should retry the lock. 943 * If Tnew is able to cleanup the lock, curthread 944 * will eventually get the lock with a zero error code, 945 * If Tnew is unable to cleanup, its eventual call to 946 * unlock the lock will result in the mutex flag being 947 * set to LOCK_NOTRECOVERABLE, and the wake-up of 948 * all waiters, including curthread, which will then 949 * eventually return ENOTRECOVERABLE due to the above 950 * check. 951 * 952 * Of course, if the user-flag is not set with 953 * LOCK_OWNERDEAD, retrying is the thing to do, since 954 * this is definitely a spurious wakeup. 955 */ 956 goto retry; 957 } 958 } 959 960 out: 961 no_fault(); 962 return (error); 963 } 964 965 966 static int 967 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 968 { 969 label_t ljb; 970 int error = 0; 971 lwpchan_t lwpchan; 972 uint16_t flag; 973 upib_t *upibp; 974 volatile struct upimutex *upimutex = NULL; 975 volatile int upilocked = 0; 976 977 if (on_fault(&ljb)) { 978 if (upilocked) 979 upimutex_unlock((upimutex_t *)upimutex, 0); 980 error = EFAULT; 981 goto out; 982 } 983 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 984 &lwpchan, LWPCHAN_MPPOOL)) { 985 error = EFAULT; 986 goto out; 987 } 988 upibp = &UPI_CHAIN(lwpchan); 989 mutex_enter(&upibp->upib_lock); 990 upimutex = upi_get(upibp, &lwpchan); 991 /* 992 * If the lock is not held, or the owner is not curthread, return 993 * error. The user-level wrapper can return this error or stall, 994 * depending on whether mutex is of ERRORCHECK type or not. 995 */ 996 if (upimutex == NULL || upimutex->upi_owner != curthread) { 997 mutex_exit(&upibp->upib_lock); 998 error = EPERM; 999 goto out; 1000 } 1001 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1002 upilocked = 1; 1003 fuword16_noerr(&lp->mutex_flag, &flag); 1004 if (flag & LOCK_OWNERDEAD) { 1005 /* 1006 * transition mutex to the LOCK_NOTRECOVERABLE state. 1007 */ 1008 flag &= ~LOCK_OWNERDEAD; 1009 flag |= LOCK_NOTRECOVERABLE; 1010 suword16_noerr(&lp->mutex_flag, flag); 1011 } 1012 upimutex_unlock((upimutex_t *)upimutex, flag); 1013 upilocked = 0; 1014 out: 1015 no_fault(); 1016 return (error); 1017 } 1018 1019 /* 1020 * Mark user mutex state, corresponding to kernel upimutex, as LOCK_OWNERDEAD. 1021 */ 1022 static int 1023 upi_dead(upimutex_t *upip) 1024 { 1025 label_t ljb; 1026 int error = 0; 1027 lwp_mutex_t *lp; 1028 uint16_t flag; 1029 1030 if (on_fault(&ljb)) { 1031 error = EFAULT; 1032 goto out; 1033 } 1034 1035 lp = upip->upi_vaddr; 1036 fuword16_noerr(&lp->mutex_flag, &flag); 1037 flag |= LOCK_OWNERDEAD; 1038 suword16_noerr(&lp->mutex_flag, flag); 1039 out: 1040 no_fault(); 1041 return (error); 1042 } 1043 1044 /* 1045 * Unlock all upimutexes held by curthread, since curthread is dying. 1046 * For each upimutex, attempt to mark its corresponding user mutex object as 1047 * dead. 1048 */ 1049 void 1050 upimutex_cleanup() 1051 { 1052 kthread_t *t = curthread; 1053 struct upimutex *upip; 1054 1055 while ((upip = t->t_upimutex) != NULL) { 1056 if (upi_dead(upip) != 0) { 1057 /* 1058 * If the user object associated with this upimutex is 1059 * unmapped, unlock upimutex with the 1060 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1061 * woken up. Since user object is unmapped, it could 1062 * not be marked as dead or notrecoverable. 1063 * The waiters will now all wake up and return 1064 * ENOTRECOVERABLE, since they would find that the lock 1065 * has not been handed-off to them. 1066 * See lwp_upimutex_lock(). 1067 */ 1068 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1069 } else { 1070 /* 1071 * The user object has been updated as dead. 1072 * Unlock the upimutex: if no waiters, upip kmem will 1073 * be freed. If there is a waiter, the lock will be 1074 * handed off. If exit() is in progress, each existing 1075 * waiter will successively get the lock, as owners 1076 * die, and each new owner will call this routine as 1077 * it dies. The last owner will free kmem, since 1078 * it will find the upimutex has no waiters. So, 1079 * eventually, the kmem is guaranteed to be freed. 1080 */ 1081 upimutex_unlock(upip, 0); 1082 } 1083 /* 1084 * Note that the call to upimutex_unlock() above will delete 1085 * upimutex from the t_upimutexes chain. And so the 1086 * while loop will eventually terminate. 1087 */ 1088 } 1089 } 1090 1091 int 1092 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1093 { 1094 kthread_t *t = curthread; 1095 klwp_t *lwp = ttolwp(t); 1096 proc_t *p = ttoproc(t); 1097 lwp_timer_t lwpt; 1098 caddr_t timedwait; 1099 int error = 0; 1100 int time_error; 1101 clock_t tim = -1; 1102 uchar_t waiters; 1103 volatile int locked = 0; 1104 volatile int watched = 0; 1105 label_t ljb; 1106 volatile uint8_t type = 0; 1107 lwpchan_t lwpchan; 1108 sleepq_head_t *sqh; 1109 static int iswanted(); 1110 uint16_t flag; 1111 int imm_timeout = 0; 1112 1113 if ((caddr_t)lp >= p->p_as->a_userlimit) 1114 return (set_errno(EFAULT)); 1115 1116 timedwait = (caddr_t)tsp; 1117 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1118 lwpt.lwpt_imm_timeout) { 1119 imm_timeout = 1; 1120 timedwait = NULL; 1121 } 1122 1123 /* 1124 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1125 * this micro state is really a run state. If the thread indeed blocks, 1126 * this state becomes valid. If not, the state is converted back to 1127 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1128 * when blocking. 1129 */ 1130 (void) new_mstate(t, LMS_USER_LOCK); 1131 if (on_fault(&ljb)) { 1132 if (locked) 1133 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1134 error = EFAULT; 1135 goto out; 1136 } 1137 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1138 if (UPIMUTEX(type)) { 1139 no_fault(); 1140 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1141 if ((error == 0 || error == EOWNERDEAD) && 1142 (type & USYNC_PROCESS)) 1143 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1144 if (tsp && !time_error) /* copyout the residual time left */ 1145 error = lwp_timer_copyout(&lwpt, error); 1146 if (error) 1147 return (set_errno(error)); 1148 return (0); 1149 } 1150 /* 1151 * Force Copy-on-write fault if lwp_mutex_t object is 1152 * defined to be MAP_PRIVATE and it was initialized to 1153 * USYNC_PROCESS. 1154 */ 1155 suword8_noerr(&lp->mutex_type, type); 1156 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1157 &lwpchan, LWPCHAN_MPPOOL)) { 1158 error = EFAULT; 1159 goto out; 1160 } 1161 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1162 locked = 1; 1163 fuword8_noerr(&lp->mutex_waiters, &waiters); 1164 suword8_noerr(&lp->mutex_waiters, 1); 1165 if (type & USYNC_PROCESS_ROBUST) { 1166 fuword16_noerr(&lp->mutex_flag, &flag); 1167 if (flag & LOCK_NOTRECOVERABLE) { 1168 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1169 error = ENOTRECOVERABLE; 1170 goto out; 1171 } 1172 } 1173 1174 /* 1175 * If watchpoints are set, they need to be restored, since 1176 * atomic accesses of memory such as the call to ulock_try() 1177 * below cannot be watched. 1178 */ 1179 1180 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1181 1182 while (!ulock_try(&lp->mutex_lockw)) { 1183 if (time_error) { 1184 /* 1185 * The SUSV3 Posix spec is very clear that we 1186 * should get no error from validating the 1187 * timer until we would actually sleep. 1188 */ 1189 error = time_error; 1190 break; 1191 } 1192 1193 if (watched) { 1194 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1195 watched = 0; 1196 } 1197 1198 /* 1199 * Put the lwp in an orderly state for debugging. 1200 */ 1201 prstop(PR_REQUESTED, 0); 1202 if (timedwait) { 1203 /* 1204 * If we successfully queue the timeout, 1205 * then don't drop t_delay_lock until 1206 * we are on the sleep queue (below). 1207 */ 1208 mutex_enter(&t->t_delay_lock); 1209 if (lwp_timer_enqueue(&lwpt) != 0) { 1210 mutex_exit(&t->t_delay_lock); 1211 imm_timeout = 1; 1212 timedwait = NULL; 1213 } 1214 } 1215 lwp_block(&lwpchan); 1216 /* 1217 * Nothing should happen to cause the lwp to go to 1218 * sleep again until after it returns from swtch(). 1219 */ 1220 if (timedwait) 1221 mutex_exit(&t->t_delay_lock); 1222 locked = 0; 1223 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1224 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1225 setrun(t); 1226 swtch(); 1227 t->t_flag &= ~T_WAKEABLE; 1228 if (timedwait) 1229 tim = lwp_timer_dequeue(&lwpt); 1230 setallwatch(); 1231 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1232 error = EINTR; 1233 else if (imm_timeout || (timedwait && tim == -1)) 1234 error = ETIME; 1235 if (error) { 1236 lwp->lwp_asleep = 0; 1237 lwp->lwp_sysabort = 0; 1238 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1239 S_WRITE); 1240 1241 /* 1242 * Need to re-compute waiters bit. The waiters field in 1243 * the lock is not reliable. Either of two things could 1244 * have occurred: no lwp may have called lwp_release() 1245 * for me but I have woken up due to a signal or 1246 * timeout. In this case, the waiter bit is incorrect 1247 * since it is still set to 1, set above. 1248 * OR an lwp_release() did occur for some other lwp on 1249 * the same lwpchan. In this case, the waiter bit is 1250 * correct. But which event occurred, one can't tell. 1251 * So, recompute. 1252 */ 1253 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1254 locked = 1; 1255 sqh = lwpsqhash(&lwpchan); 1256 disp_lock_enter(&sqh->sq_lock); 1257 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1258 disp_lock_exit(&sqh->sq_lock); 1259 break; 1260 } 1261 lwp->lwp_asleep = 0; 1262 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1263 S_WRITE); 1264 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1265 locked = 1; 1266 fuword8_noerr(&lp->mutex_waiters, &waiters); 1267 suword8_noerr(&lp->mutex_waiters, 1); 1268 if (type & USYNC_PROCESS_ROBUST) { 1269 fuword16_noerr(&lp->mutex_flag, &flag); 1270 if (flag & LOCK_NOTRECOVERABLE) { 1271 error = ENOTRECOVERABLE; 1272 break; 1273 } 1274 } 1275 } 1276 1277 if (t->t_mstate == LMS_USER_LOCK) 1278 (void) new_mstate(t, LMS_SYSTEM); 1279 1280 if (!error && (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))) { 1281 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1282 if (type & USYNC_PROCESS_ROBUST) { 1283 fuword16_noerr(&lp->mutex_flag, &flag); 1284 if (flag & LOCK_OWNERDEAD) 1285 error = EOWNERDEAD; 1286 else if (flag & LOCK_UNMAPPED) 1287 error = ELOCKUNMAPPED; 1288 } 1289 } 1290 suword8_noerr(&lp->mutex_waiters, waiters); 1291 locked = 0; 1292 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1293 out: 1294 no_fault(); 1295 if (watched) 1296 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1297 if (tsp && !time_error) /* copyout the residual time left */ 1298 error = lwp_timer_copyout(&lwpt, error); 1299 if (error) 1300 return (set_errno(error)); 1301 return (0); 1302 } 1303 1304 /* 1305 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1306 * libc now calls lwp_mutex_timedlock(lp, NULL). 1307 * This system call trap continues to exist solely for the benefit 1308 * of old statically-linked binaries from Solaris 9 and before. 1309 * It should be removed from the system when we no longer care 1310 * about such applications. 1311 */ 1312 int 1313 lwp_mutex_lock(lwp_mutex_t *lp) 1314 { 1315 return (lwp_mutex_timedlock(lp, NULL)); 1316 } 1317 1318 static int 1319 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1320 { 1321 /* 1322 * The caller holds the dispatcher lock on the sleep queue. 1323 */ 1324 while (t != NULL) { 1325 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1326 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1327 return (1); 1328 t = t->t_link; 1329 } 1330 return (0); 1331 } 1332 1333 /* 1334 * Return the highest priority thread sleeping on this lwpchan. 1335 */ 1336 static kthread_t * 1337 lwp_queue_waiter(lwpchan_t *lwpchan) 1338 { 1339 sleepq_head_t *sqh; 1340 kthread_t *tp; 1341 1342 sqh = lwpsqhash(lwpchan); 1343 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1344 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1345 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1346 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1347 break; 1348 } 1349 disp_lock_exit(&sqh->sq_lock); 1350 return (tp); 1351 } 1352 1353 static int 1354 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1355 { 1356 sleepq_head_t *sqh; 1357 kthread_t *tp; 1358 kthread_t **tpp; 1359 1360 sqh = lwpsqhash(lwpchan); 1361 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1362 tpp = &sqh->sq_queue.sq_first; 1363 while ((tp = *tpp) != NULL) { 1364 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1365 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1366 /* 1367 * The following is typically false. It could be true 1368 * only if lwp_release() is called from 1369 * lwp_mutex_wakeup() after reading the waiters field 1370 * from memory in which the lwp lock used to be, but has 1371 * since been re-used to hold a lwp cv or lwp semaphore. 1372 * The thread "tp" found to match the lwp lock's wchan 1373 * is actually sleeping for the cv or semaphore which 1374 * now has the same wchan. In this case, lwp_release() 1375 * should return failure. 1376 */ 1377 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1378 ASSERT(sync_type == 0); 1379 /* 1380 * assert that this can happen only for mutexes 1381 * i.e. sync_type == 0, for correctly written 1382 * user programs. 1383 */ 1384 disp_lock_exit(&sqh->sq_lock); 1385 return (0); 1386 } 1387 *waiters = iswanted(tp->t_link, lwpchan); 1388 sleepq_unlink(tpp, tp); 1389 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1390 tp->t_wchan0 = NULL; 1391 tp->t_wchan = NULL; 1392 tp->t_sobj_ops = NULL; 1393 tp->t_release = 1; 1394 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1395 CL_WAKEUP(tp); 1396 thread_unlock(tp); /* drop run queue lock */ 1397 return (1); 1398 } 1399 tpp = &tp->t_link; 1400 } 1401 *waiters = 0; 1402 disp_lock_exit(&sqh->sq_lock); 1403 return (0); 1404 } 1405 1406 static void 1407 lwp_release_all(lwpchan_t *lwpchan) 1408 { 1409 sleepq_head_t *sqh; 1410 kthread_t *tp; 1411 kthread_t **tpp; 1412 1413 sqh = lwpsqhash(lwpchan); 1414 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1415 tpp = &sqh->sq_queue.sq_first; 1416 while ((tp = *tpp) != NULL) { 1417 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1418 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1419 sleepq_unlink(tpp, tp); 1420 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1421 tp->t_wchan0 = NULL; 1422 tp->t_wchan = NULL; 1423 tp->t_sobj_ops = NULL; 1424 CL_WAKEUP(tp); 1425 thread_unlock_high(tp); /* release run queue lock */ 1426 } else { 1427 tpp = &tp->t_link; 1428 } 1429 } 1430 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1431 } 1432 1433 /* 1434 * unblock a lwp that is trying to acquire this mutex. the blocked 1435 * lwp resumes and retries to acquire the lock. 1436 */ 1437 int 1438 lwp_mutex_wakeup(lwp_mutex_t *lp) 1439 { 1440 proc_t *p = ttoproc(curthread); 1441 lwpchan_t lwpchan; 1442 uchar_t waiters; 1443 volatile int locked = 0; 1444 volatile int watched = 0; 1445 volatile uint8_t type = 0; 1446 label_t ljb; 1447 int error = 0; 1448 1449 if ((caddr_t)lp >= p->p_as->a_userlimit) 1450 return (set_errno(EFAULT)); 1451 1452 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1453 1454 if (on_fault(&ljb)) { 1455 if (locked) 1456 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1457 error = EFAULT; 1458 goto out; 1459 } 1460 /* 1461 * Force Copy-on-write fault if lwp_mutex_t object is 1462 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS 1463 */ 1464 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1465 suword8_noerr(&lp->mutex_type, type); 1466 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1467 &lwpchan, LWPCHAN_MPPOOL)) { 1468 error = EFAULT; 1469 goto out; 1470 } 1471 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1472 locked = 1; 1473 /* 1474 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1475 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1476 * may fail. If it fails, do not write into the waiter bit. 1477 * The call to lwp_release() might fail due to one of three reasons: 1478 * 1479 * 1. due to the thread which set the waiter bit not actually 1480 * sleeping since it got the lock on the re-try. The waiter 1481 * bit will then be correctly updated by that thread. This 1482 * window may be closed by reading the wait bit again here 1483 * and not calling lwp_release() at all if it is zero. 1484 * 2. the thread which set the waiter bit and went to sleep 1485 * was woken up by a signal. This time, the waiter recomputes 1486 * the wait bit in the return with EINTR code. 1487 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1488 * memory that has been re-used after the lock was dropped. 1489 * In this case, writing into the waiter bit would cause data 1490 * corruption. 1491 */ 1492 if (lwp_release(&lwpchan, &waiters, 0) == 1) { 1493 suword8_noerr(&lp->mutex_waiters, waiters); 1494 } 1495 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1496 out: 1497 no_fault(); 1498 if (watched) 1499 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1500 if (error) 1501 return (set_errno(error)); 1502 return (0); 1503 } 1504 1505 /* 1506 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1507 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1508 * a flag telling the kernel whether or not to honor the kernel/user 1509 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1510 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1511 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1512 * it is used an an in/out parameter. On entry, it contains the relative 1513 * time until timeout. On exit, we copyout the residual time left to it. 1514 */ 1515 int 1516 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1517 { 1518 kthread_t *t = curthread; 1519 klwp_t *lwp = ttolwp(t); 1520 proc_t *p = ttoproc(t); 1521 lwp_timer_t lwpt; 1522 lwpchan_t cv_lwpchan; 1523 lwpchan_t m_lwpchan; 1524 caddr_t timedwait; 1525 volatile uint16_t type = 0; 1526 volatile uint8_t mtype = 0; 1527 uchar_t waiters; 1528 volatile int error; 1529 clock_t tim = -1; 1530 volatile int locked = 0; 1531 volatile int m_locked = 0; 1532 volatile int cvwatched = 0; 1533 volatile int mpwatched = 0; 1534 label_t ljb; 1535 volatile int no_lwpchan = 1; 1536 int imm_timeout = 0; 1537 int imm_unpark = 0; 1538 1539 if ((caddr_t)cv >= p->p_as->a_userlimit || 1540 (caddr_t)mp >= p->p_as->a_userlimit) 1541 return (set_errno(EFAULT)); 1542 1543 timedwait = (caddr_t)tsp; 1544 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1545 return (set_errno(error)); 1546 if (lwpt.lwpt_imm_timeout) { 1547 imm_timeout = 1; 1548 timedwait = NULL; 1549 } 1550 1551 (void) new_mstate(t, LMS_USER_LOCK); 1552 1553 if (on_fault(&ljb)) { 1554 if (no_lwpchan) { 1555 error = EFAULT; 1556 goto out; 1557 } 1558 if (m_locked) { 1559 m_locked = 0; 1560 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1561 } 1562 if (locked) { 1563 locked = 0; 1564 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1565 } 1566 /* 1567 * set up another on_fault() for a possible fault 1568 * on the user lock accessed at "efault" 1569 */ 1570 if (on_fault(&ljb)) { 1571 if (m_locked) { 1572 m_locked = 0; 1573 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1574 } 1575 goto out; 1576 } 1577 error = EFAULT; 1578 goto efault; 1579 } 1580 1581 /* 1582 * Force Copy-on-write fault if lwp_cond_t and lwp_mutex_t 1583 * objects are defined to be MAP_PRIVATE, and are USYNC_PROCESS 1584 */ 1585 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1586 if (UPIMUTEX(mtype) == 0) { 1587 suword8_noerr(&mp->mutex_type, mtype); 1588 /* convert user level mutex, "mp", to a unique lwpchan */ 1589 /* check if mtype is ok to use below, instead of type from cv */ 1590 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1591 &m_lwpchan, LWPCHAN_MPPOOL)) { 1592 error = EFAULT; 1593 goto out; 1594 } 1595 } 1596 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1597 suword16_noerr(&cv->cond_type, type); 1598 /* convert user level condition variable, "cv", to a unique lwpchan */ 1599 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1600 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1601 error = EFAULT; 1602 goto out; 1603 } 1604 no_lwpchan = 0; 1605 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1606 if (UPIMUTEX(mtype) == 0) 1607 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1608 S_WRITE); 1609 1610 /* 1611 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1612 * with respect to a possible wakeup which is a result of either 1613 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1614 * 1615 * What's misleading, is that the lwp is put to sleep after the 1616 * condition variable's mutex is released. This is OK as long as 1617 * the release operation is also done while holding lwpchan_lock. 1618 * The lwp is then put to sleep when the possibility of pagefaulting 1619 * or sleeping is completely eliminated. 1620 */ 1621 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1622 locked = 1; 1623 if (UPIMUTEX(mtype) == 0) { 1624 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1625 m_locked = 1; 1626 suword8_noerr(&cv->cond_waiters_kernel, 1); 1627 /* 1628 * unlock the condition variable's mutex. (pagefaults are 1629 * possible here.) 1630 */ 1631 ulock_clear(&mp->mutex_lockw); 1632 fuword8_noerr(&mp->mutex_waiters, &waiters); 1633 if (waiters != 0) { 1634 /* 1635 * Given the locking of lwpchan_lock around the release 1636 * of the mutex and checking for waiters, the following 1637 * call to lwp_release() can fail ONLY if the lock 1638 * acquirer is interrupted after setting the waiter bit, 1639 * calling lwp_block() and releasing lwpchan_lock. 1640 * In this case, it could get pulled off the lwp sleep 1641 * q (via setrun()) before the following call to 1642 * lwp_release() occurs. In this case, the lock 1643 * requestor will update the waiter bit correctly by 1644 * re-evaluating it. 1645 */ 1646 if (lwp_release(&m_lwpchan, &waiters, 0) > 0) 1647 suword8_noerr(&mp->mutex_waiters, waiters); 1648 } 1649 m_locked = 0; 1650 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1651 } else { 1652 suword8_noerr(&cv->cond_waiters_kernel, 1); 1653 error = lwp_upimutex_unlock(mp, mtype); 1654 if (error) { /* if the upimutex unlock failed */ 1655 locked = 0; 1656 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1657 goto out; 1658 } 1659 } 1660 no_fault(); 1661 1662 if (mpwatched) { 1663 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1664 mpwatched = 0; 1665 } 1666 if (cvwatched) { 1667 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1668 cvwatched = 0; 1669 } 1670 1671 /* 1672 * Put the lwp in an orderly state for debugging. 1673 */ 1674 prstop(PR_REQUESTED, 0); 1675 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1676 /* 1677 * We received a signal at user-level before calling here 1678 * or another thread wants us to return immediately 1679 * with EINTR. See lwp_unpark(). 1680 */ 1681 imm_unpark = 1; 1682 t->t_unpark = 0; 1683 timedwait = NULL; 1684 } else if (timedwait) { 1685 /* 1686 * If we successfully queue the timeout, 1687 * then don't drop t_delay_lock until 1688 * we are on the sleep queue (below). 1689 */ 1690 mutex_enter(&t->t_delay_lock); 1691 if (lwp_timer_enqueue(&lwpt) != 0) { 1692 mutex_exit(&t->t_delay_lock); 1693 imm_timeout = 1; 1694 timedwait = NULL; 1695 } 1696 } 1697 t->t_flag |= T_WAITCVSEM; 1698 lwp_block(&cv_lwpchan); 1699 /* 1700 * Nothing should happen to cause the lwp to go to sleep 1701 * until after it returns from swtch(). 1702 */ 1703 if (timedwait) 1704 mutex_exit(&t->t_delay_lock); 1705 locked = 0; 1706 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1707 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1708 (imm_timeout | imm_unpark)) 1709 setrun(t); 1710 swtch(); 1711 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1712 if (timedwait) 1713 tim = lwp_timer_dequeue(&lwpt); 1714 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1715 MUSTRETURN(p, t) || imm_unpark) 1716 error = EINTR; 1717 else if (imm_timeout || (timedwait && tim == -1)) 1718 error = ETIME; 1719 lwp->lwp_asleep = 0; 1720 lwp->lwp_sysabort = 0; 1721 setallwatch(); 1722 1723 if (t->t_mstate == LMS_USER_LOCK) 1724 (void) new_mstate(t, LMS_SYSTEM); 1725 1726 if (tsp && check_park) /* copyout the residual time left */ 1727 error = lwp_timer_copyout(&lwpt, error); 1728 1729 /* the mutex is reacquired by the caller on return to user level */ 1730 if (error) { 1731 /* 1732 * If we were concurrently lwp_cond_signal()d and we 1733 * received a UNIX signal or got a timeout, then perform 1734 * another lwp_cond_signal() to avoid consuming the wakeup. 1735 */ 1736 if (t->t_release) 1737 (void) lwp_cond_signal(cv); 1738 return (set_errno(error)); 1739 } 1740 return (0); 1741 1742 efault: 1743 /* 1744 * make sure that the user level lock is dropped before 1745 * returning to caller, since the caller always re-acquires it. 1746 */ 1747 if (UPIMUTEX(mtype) == 0) { 1748 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1749 m_locked = 1; 1750 ulock_clear(&mp->mutex_lockw); 1751 fuword8_noerr(&mp->mutex_waiters, &waiters); 1752 if (waiters != 0) { 1753 /* 1754 * See comment above on lock clearing and lwp_release() 1755 * success/failure. 1756 */ 1757 if (lwp_release(&m_lwpchan, &waiters, 0) > 0) 1758 suword8_noerr(&mp->mutex_waiters, waiters); 1759 } 1760 m_locked = 0; 1761 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1762 } else { 1763 (void) lwp_upimutex_unlock(mp, mtype); 1764 } 1765 out: 1766 no_fault(); 1767 if (mpwatched) 1768 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1769 if (cvwatched) 1770 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1771 if (t->t_mstate == LMS_USER_LOCK) 1772 (void) new_mstate(t, LMS_SYSTEM); 1773 return (set_errno(error)); 1774 } 1775 1776 /* 1777 * wakeup one lwp that's blocked on this condition variable. 1778 */ 1779 int 1780 lwp_cond_signal(lwp_cond_t *cv) 1781 { 1782 proc_t *p = ttoproc(curthread); 1783 lwpchan_t lwpchan; 1784 uchar_t waiters; 1785 volatile uint16_t type = 0; 1786 volatile int locked = 0; 1787 volatile int watched = 0; 1788 label_t ljb; 1789 int error = 0; 1790 1791 if ((caddr_t)cv >= p->p_as->a_userlimit) 1792 return (set_errno(EFAULT)); 1793 1794 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1795 1796 if (on_fault(&ljb)) { 1797 if (locked) 1798 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1799 error = EFAULT; 1800 goto out; 1801 } 1802 /* 1803 * Force Copy-on-write fault if lwp_cond_t object is 1804 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1805 */ 1806 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1807 suword16_noerr(&cv->cond_type, type); 1808 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1809 &lwpchan, LWPCHAN_CVPOOL)) { 1810 error = EFAULT; 1811 goto out; 1812 } 1813 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1814 locked = 1; 1815 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1816 if (waiters != 0) { 1817 /* 1818 * The following call to lwp_release() might fail but it is 1819 * OK to write into the waiters bit below, since the memory 1820 * could not have been re-used or unmapped (for correctly 1821 * written user programs) as in the case of lwp_mutex_wakeup(). 1822 * For an incorrect program, we should not care about data 1823 * corruption since this is just one instance of other places 1824 * where corruption can occur for such a program. Of course 1825 * if the memory is unmapped, normal fault recovery occurs. 1826 */ 1827 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1828 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1829 } 1830 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1831 out: 1832 no_fault(); 1833 if (watched) 1834 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1835 if (error) 1836 return (set_errno(error)); 1837 return (0); 1838 } 1839 1840 /* 1841 * wakeup every lwp that's blocked on this condition variable. 1842 */ 1843 int 1844 lwp_cond_broadcast(lwp_cond_t *cv) 1845 { 1846 proc_t *p = ttoproc(curthread); 1847 lwpchan_t lwpchan; 1848 volatile uint16_t type = 0; 1849 volatile int locked = 0; 1850 volatile int watched = 0; 1851 label_t ljb; 1852 uchar_t waiters; 1853 int error = 0; 1854 1855 if ((caddr_t)cv >= p->p_as->a_userlimit) 1856 return (set_errno(EFAULT)); 1857 1858 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1859 1860 if (on_fault(&ljb)) { 1861 if (locked) 1862 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1863 error = EFAULT; 1864 goto out; 1865 } 1866 /* 1867 * Force Copy-on-write fault if lwp_cond_t object is 1868 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1869 */ 1870 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1871 suword16_noerr(&cv->cond_type, type); 1872 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1873 &lwpchan, LWPCHAN_CVPOOL)) { 1874 error = EFAULT; 1875 goto out; 1876 } 1877 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1878 locked = 1; 1879 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1880 if (waiters != 0) { 1881 lwp_release_all(&lwpchan); 1882 suword8_noerr(&cv->cond_waiters_kernel, 0); 1883 } 1884 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1885 out: 1886 no_fault(); 1887 if (watched) 1888 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1889 if (error) 1890 return (set_errno(error)); 1891 return (0); 1892 } 1893 1894 int 1895 lwp_sema_trywait(lwp_sema_t *sp) 1896 { 1897 kthread_t *t = curthread; 1898 proc_t *p = ttoproc(t); 1899 label_t ljb; 1900 volatile int locked = 0; 1901 volatile int watched = 0; 1902 volatile uint16_t type = 0; 1903 int count; 1904 lwpchan_t lwpchan; 1905 uchar_t waiters; 1906 int error = 0; 1907 1908 if ((caddr_t)sp >= p->p_as->a_userlimit) 1909 return (set_errno(EFAULT)); 1910 1911 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1912 1913 if (on_fault(&ljb)) { 1914 if (locked) 1915 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1916 error = EFAULT; 1917 goto out; 1918 } 1919 /* 1920 * Force Copy-on-write fault if lwp_sema_t object is 1921 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1922 */ 1923 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1924 suword16_noerr((void *)&sp->sema_type, type); 1925 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1926 &lwpchan, LWPCHAN_CVPOOL)) { 1927 error = EFAULT; 1928 goto out; 1929 } 1930 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1931 locked = 1; 1932 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1933 if (count == 0) 1934 error = EBUSY; 1935 else 1936 suword32_noerr((void *)&sp->sema_count, --count); 1937 if (count != 0) { 1938 fuword8_noerr(&sp->sema_waiters, &waiters); 1939 if (waiters != 0) { 1940 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1941 suword8_noerr(&sp->sema_waiters, waiters); 1942 } 1943 } 1944 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1945 out: 1946 no_fault(); 1947 if (watched) 1948 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1949 if (error) 1950 return (set_errno(error)); 1951 return (0); 1952 } 1953 1954 /* 1955 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 1956 */ 1957 int 1958 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 1959 { 1960 kthread_t *t = curthread; 1961 klwp_t *lwp = ttolwp(t); 1962 proc_t *p = ttoproc(t); 1963 lwp_timer_t lwpt; 1964 caddr_t timedwait; 1965 clock_t tim = -1; 1966 label_t ljb; 1967 volatile int locked = 0; 1968 volatile int watched = 0; 1969 volatile uint16_t type = 0; 1970 int count; 1971 lwpchan_t lwpchan; 1972 uchar_t waiters; 1973 int error = 0; 1974 int time_error; 1975 int imm_timeout = 0; 1976 int imm_unpark = 0; 1977 1978 if ((caddr_t)sp >= p->p_as->a_userlimit) 1979 return (set_errno(EFAULT)); 1980 1981 timedwait = (caddr_t)tsp; 1982 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1983 lwpt.lwpt_imm_timeout) { 1984 imm_timeout = 1; 1985 timedwait = NULL; 1986 } 1987 1988 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1989 1990 if (on_fault(&ljb)) { 1991 if (locked) 1992 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1993 error = EFAULT; 1994 goto out; 1995 } 1996 /* 1997 * Force Copy-on-write fault if lwp_sema_t object is 1998 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1999 */ 2000 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2001 suword16_noerr((void *)&sp->sema_type, type); 2002 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2003 &lwpchan, LWPCHAN_CVPOOL)) { 2004 error = EFAULT; 2005 goto out; 2006 } 2007 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2008 locked = 1; 2009 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2010 while (error == 0 && count == 0) { 2011 if (time_error) { 2012 /* 2013 * The SUSV3 Posix spec is very clear that we 2014 * should get no error from validating the 2015 * timer until we would actually sleep. 2016 */ 2017 error = time_error; 2018 break; 2019 } 2020 suword8_noerr(&sp->sema_waiters, 1); 2021 if (watched) 2022 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2023 /* 2024 * Put the lwp in an orderly state for debugging. 2025 */ 2026 prstop(PR_REQUESTED, 0); 2027 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2028 /* 2029 * We received a signal at user-level before calling 2030 * here or another thread wants us to return 2031 * immediately with EINTR. See lwp_unpark(). 2032 */ 2033 imm_unpark = 1; 2034 t->t_unpark = 0; 2035 timedwait = NULL; 2036 } else if (timedwait) { 2037 /* 2038 * If we successfully queue the timeout, 2039 * then don't drop t_delay_lock until 2040 * we are on the sleep queue (below). 2041 */ 2042 mutex_enter(&t->t_delay_lock); 2043 if (lwp_timer_enqueue(&lwpt) != 0) { 2044 mutex_exit(&t->t_delay_lock); 2045 imm_timeout = 1; 2046 timedwait = NULL; 2047 } 2048 } 2049 t->t_flag |= T_WAITCVSEM; 2050 lwp_block(&lwpchan); 2051 /* 2052 * Nothing should happen to cause the lwp to sleep 2053 * again until after it returns from swtch(). 2054 */ 2055 if (timedwait) 2056 mutex_exit(&t->t_delay_lock); 2057 locked = 0; 2058 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2059 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2060 (imm_timeout | imm_unpark)) 2061 setrun(t); 2062 swtch(); 2063 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2064 if (timedwait) 2065 tim = lwp_timer_dequeue(&lwpt); 2066 setallwatch(); 2067 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2068 MUSTRETURN(p, t) || imm_unpark) 2069 error = EINTR; 2070 else if (imm_timeout || (timedwait && tim == -1)) 2071 error = ETIME; 2072 lwp->lwp_asleep = 0; 2073 lwp->lwp_sysabort = 0; 2074 watched = watch_disable_addr((caddr_t)sp, 2075 sizeof (*sp), S_WRITE); 2076 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2077 locked = 1; 2078 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2079 } 2080 if (error == 0) 2081 suword32_noerr((void *)&sp->sema_count, --count); 2082 if (count != 0) { 2083 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2084 suword8_noerr(&sp->sema_waiters, waiters); 2085 } 2086 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2087 out: 2088 no_fault(); 2089 if (watched) 2090 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2091 if (tsp && check_park && !time_error) 2092 error = lwp_timer_copyout(&lwpt, error); 2093 if (error) 2094 return (set_errno(error)); 2095 return (0); 2096 } 2097 2098 /* 2099 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2100 * libc now calls lwp_sema_timedwait(). 2101 * This system call trap exists solely for the benefit of old 2102 * statically linked applications from Solaris 9 and before. 2103 * It should be removed when we no longer care about such applications. 2104 */ 2105 int 2106 lwp_sema_wait(lwp_sema_t *sp) 2107 { 2108 return (lwp_sema_timedwait(sp, NULL, 0)); 2109 } 2110 2111 int 2112 lwp_sema_post(lwp_sema_t *sp) 2113 { 2114 proc_t *p = ttoproc(curthread); 2115 label_t ljb; 2116 volatile int locked = 0; 2117 volatile int watched = 0; 2118 volatile uint16_t type = 0; 2119 int count; 2120 lwpchan_t lwpchan; 2121 uchar_t waiters; 2122 int error = 0; 2123 2124 if ((caddr_t)sp >= p->p_as->a_userlimit) 2125 return (set_errno(EFAULT)); 2126 2127 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2128 2129 if (on_fault(&ljb)) { 2130 if (locked) 2131 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2132 error = EFAULT; 2133 goto out; 2134 } 2135 /* 2136 * Force Copy-on-write fault if lwp_sema_t object is 2137 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 2138 */ 2139 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2140 suword16_noerr(&sp->sema_type, type); 2141 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2142 &lwpchan, LWPCHAN_CVPOOL)) { 2143 error = EFAULT; 2144 goto out; 2145 } 2146 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2147 locked = 1; 2148 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2149 if (count == _SEM_VALUE_MAX) 2150 error = EOVERFLOW; 2151 else 2152 suword32_noerr(&sp->sema_count, ++count); 2153 if (count == 1) { 2154 fuword8_noerr(&sp->sema_waiters, &waiters); 2155 if (waiters) { 2156 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2157 suword8_noerr(&sp->sema_waiters, waiters); 2158 } 2159 } 2160 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2161 out: 2162 no_fault(); 2163 if (watched) 2164 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2165 if (error) 2166 return (set_errno(error)); 2167 return (0); 2168 } 2169 2170 #define TRW_WANT_WRITE 0x1 2171 #define TRW_LOCK_GRANTED 0x2 2172 2173 #define READ_LOCK 0 2174 #define WRITE_LOCK 1 2175 #define TRY_FLAG 0x10 2176 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2177 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2178 2179 /* 2180 * Release one writer or one or more readers. Compute the rwstate word to 2181 * reflect the new state of the queue. For a safe hand-off we copy the new 2182 * rwstate value back to userland before we wake any of the new lock holders. 2183 * 2184 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2185 * being given precedence over readers of the same priority). 2186 * 2187 * If the first thread is a reader we scan the queue releasing all readers 2188 * until we hit a writer or the end of the queue. If the first thread is a 2189 * writer we still need to check for another writer. 2190 */ 2191 void 2192 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2193 { 2194 sleepq_head_t *sqh; 2195 kthread_t *tp; 2196 kthread_t **tpp; 2197 kthread_t *tpnext; 2198 kthread_t *wakelist = NULL; 2199 uint32_t rwstate = 0; 2200 int wcount = 0; 2201 int rcount = 0; 2202 2203 sqh = lwpsqhash(lwpchan); 2204 disp_lock_enter(&sqh->sq_lock); 2205 tpp = &sqh->sq_queue.sq_first; 2206 while ((tp = *tpp) != NULL) { 2207 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2208 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2209 if (tp->t_writer & TRW_WANT_WRITE) { 2210 if ((wcount++ == 0) && (rcount == 0)) { 2211 rwstate |= URW_WRITE_LOCKED; 2212 2213 /* Just one writer to wake. */ 2214 sleepq_unlink(tpp, tp); 2215 wakelist = tp; 2216 2217 /* tpp already set for next thread. */ 2218 continue; 2219 } else { 2220 rwstate |= URW_HAS_WAITERS; 2221 /* We need look no further. */ 2222 break; 2223 } 2224 } else { 2225 rcount++; 2226 if (wcount == 0) { 2227 rwstate++; 2228 2229 /* Add reader to wake list. */ 2230 sleepq_unlink(tpp, tp); 2231 tp->t_link = wakelist; 2232 wakelist = tp; 2233 2234 /* tpp already set for next thread. */ 2235 continue; 2236 } else { 2237 rwstate |= URW_HAS_WAITERS; 2238 /* We need look no further. */ 2239 break; 2240 } 2241 } 2242 } 2243 tpp = &tp->t_link; 2244 } 2245 2246 /* Copy the new rwstate back to userland. */ 2247 suword32_noerr(&rw->rwlock_readers, rwstate); 2248 2249 /* Wake the new lock holder(s) up. */ 2250 tp = wakelist; 2251 while (tp != NULL) { 2252 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2253 tp->t_wchan0 = NULL; 2254 tp->t_wchan = NULL; 2255 tp->t_sobj_ops = NULL; 2256 tp->t_writer |= TRW_LOCK_GRANTED; 2257 tpnext = tp->t_link; 2258 tp->t_link = NULL; 2259 CL_WAKEUP(tp); 2260 thread_unlock_high(tp); 2261 tp = tpnext; 2262 } 2263 2264 disp_lock_exit(&sqh->sq_lock); 2265 } 2266 2267 /* 2268 * We enter here holding the user-level mutex, which we must release before 2269 * returning or blocking. Based on lwp_cond_wait(). 2270 */ 2271 static int 2272 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2273 { 2274 lwp_mutex_t *mp = NULL; 2275 kthread_t *t = curthread; 2276 kthread_t *tp; 2277 klwp_t *lwp = ttolwp(t); 2278 proc_t *p = ttoproc(t); 2279 lwp_timer_t lwpt; 2280 lwpchan_t lwpchan; 2281 lwpchan_t mlwpchan; 2282 caddr_t timedwait; 2283 volatile uint16_t type = 0; 2284 volatile uint8_t mtype = 0; 2285 uchar_t mwaiters; 2286 volatile int error = 0; 2287 int time_error; 2288 clock_t tim = -1; 2289 volatile int locked = 0; 2290 volatile int mlocked = 0; 2291 volatile int watched = 0; 2292 volatile int mwatched = 0; 2293 label_t ljb; 2294 volatile int no_lwpchan = 1; 2295 int imm_timeout = 0; 2296 int try_flag; 2297 uint32_t rwstate; 2298 int acquired = 0; 2299 2300 /* We only check rw because the mutex is included in it. */ 2301 if ((caddr_t)rw >= p->p_as->a_userlimit) 2302 return (set_errno(EFAULT)); 2303 2304 /* We must only report this error if we are about to sleep (later). */ 2305 timedwait = (caddr_t)tsp; 2306 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2307 lwpt.lwpt_imm_timeout) { 2308 imm_timeout = 1; 2309 timedwait = NULL; 2310 } 2311 2312 (void) new_mstate(t, LMS_USER_LOCK); 2313 2314 if (on_fault(&ljb)) { 2315 if (no_lwpchan) { 2316 error = EFAULT; 2317 goto out_nodrop; 2318 } 2319 if (mlocked) { 2320 mlocked = 0; 2321 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2322 } 2323 if (locked) { 2324 locked = 0; 2325 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2326 } 2327 /* 2328 * Set up another on_fault() for a possible fault 2329 * on the user lock accessed at "out_drop". 2330 */ 2331 if (on_fault(&ljb)) { 2332 if (mlocked) { 2333 mlocked = 0; 2334 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2335 } 2336 error = EFAULT; 2337 goto out_nodrop; 2338 } 2339 error = EFAULT; 2340 goto out_nodrop; 2341 } 2342 2343 /* Process rd_wr (including sanity check). */ 2344 try_flag = (rd_wr & TRY_FLAG); 2345 rd_wr &= ~TRY_FLAG; 2346 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2347 error = EINVAL; 2348 goto out_nodrop; 2349 } 2350 2351 /* We can only continue for simple USYNC_PROCESS locks. */ 2352 mp = &rw->mutex; 2353 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2354 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2355 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2356 error = EINVAL; 2357 goto out_nodrop; 2358 } 2359 2360 /* Force Copy-on-write fault incase objects are MAP_PRIVATE. */ 2361 suword8_noerr(&mp->mutex_type, mtype); 2362 suword16_noerr(&rw->rwlock_type, type); 2363 2364 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2365 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2366 &mlwpchan, LWPCHAN_MPPOOL)) { 2367 error = EFAULT; 2368 goto out_nodrop; 2369 } 2370 2371 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2372 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2373 &lwpchan, LWPCHAN_CVPOOL)) { 2374 error = EFAULT; 2375 goto out_nodrop; 2376 } 2377 2378 no_lwpchan = 0; 2379 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2380 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2381 2382 /* 2383 * lwpchan_lock() ensures that the calling LWP is put to sleep 2384 * atomically with respect to a possible wakeup which is a result 2385 * of lwp_rwlock_unlock(). 2386 * 2387 * What's misleading is that the LWP is put to sleep after the 2388 * rwlock's mutex is released. This is OK as long as the release 2389 * operation is also done while holding mlwpchan. The LWP is then 2390 * put to sleep when the possibility of pagefaulting or sleeping 2391 * has been completely eliminated. 2392 */ 2393 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2394 locked = 1; 2395 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2396 mlocked = 1; 2397 2398 /* 2399 * Fetch the current rwlock state. 2400 * 2401 * The possibility of spurious wake-ups or killed waiters means 2402 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2403 * We only fix these if they are important to us. 2404 * 2405 * Although various error states can be observed here (e.g. the lock 2406 * is not held, but there are waiters) we assume these are applicaton 2407 * errors and so we take no corrective action. 2408 */ 2409 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2410 /* 2411 * We cannot legitimately get here from user-level 2412 * without URW_HAS_WAITERS being set. 2413 * Set it now to guard against user-level error. 2414 */ 2415 rwstate |= URW_HAS_WAITERS; 2416 2417 /* 2418 * We can try only if the lock isn't held by a writer. 2419 */ 2420 if (!(rwstate & URW_WRITE_LOCKED)) { 2421 tp = lwp_queue_waiter(&lwpchan); 2422 if (tp == NULL) { 2423 /* 2424 * Hmmm, rwstate indicates waiters but there are 2425 * none queued. This could just be the result of a 2426 * spurious wakeup, so let's ignore it. 2427 * 2428 * We now have a chance to acquire the lock 2429 * uncontended, but this is the last chance for 2430 * a writer to acquire the lock without blocking. 2431 */ 2432 if (rd_wr == READ_LOCK) { 2433 rwstate++; 2434 acquired = 1; 2435 } else if ((rwstate & URW_READERS_MASK) == 0) { 2436 rwstate |= URW_WRITE_LOCKED; 2437 acquired = 1; 2438 } 2439 } else if (rd_wr == READ_LOCK) { 2440 /* 2441 * This is the last chance for a reader to acquire 2442 * the lock now, but it can only do so if there is 2443 * no writer of equal or greater priority at the 2444 * head of the queue . 2445 * 2446 * It is also just possible that there is a reader 2447 * at the head of the queue. This may be the result 2448 * of a spurious wakeup or an application failure. 2449 * In this case we only acquire the lock if we have 2450 * equal or greater priority. It is not our job to 2451 * release spurious waiters. 2452 */ 2453 pri_t our_pri = DISP_PRIO(t); 2454 pri_t his_pri = DISP_PRIO(tp); 2455 2456 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2457 !(tp->t_writer & TRW_WANT_WRITE))) { 2458 rwstate++; 2459 acquired = 1; 2460 } 2461 } 2462 } 2463 2464 if (acquired || try_flag || time_error) { 2465 /* 2466 * We're not going to block this time. 2467 */ 2468 suword32_noerr(&rw->rwlock_readers, rwstate); 2469 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2470 locked = 0; 2471 2472 if (acquired) { 2473 /* 2474 * Got the lock! 2475 */ 2476 error = 0; 2477 2478 } else if (try_flag) { 2479 /* 2480 * We didn't get the lock and we're about to block. 2481 * If we're doing a trylock, return EBUSY instead. 2482 */ 2483 error = EBUSY; 2484 2485 } else if (time_error) { 2486 /* 2487 * The SUSV3 POSIX spec is very clear that we should 2488 * get no error from validating the timer (above) 2489 * until we would actually sleep. 2490 */ 2491 error = time_error; 2492 } 2493 2494 goto out_drop; 2495 } 2496 2497 /* 2498 * We're about to block, so indicate what kind of waiter we are. 2499 */ 2500 t->t_writer = 0; 2501 if (rd_wr == WRITE_LOCK) 2502 t->t_writer = TRW_WANT_WRITE; 2503 suword32_noerr(&rw->rwlock_readers, rwstate); 2504 2505 /* 2506 * Unlock the rwlock's mutex (pagefaults are possible here). 2507 */ 2508 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2509 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2510 suword32_noerr(&mp->mutex_ownerpid, 0); 2511 ulock_clear(&mp->mutex_lockw); 2512 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2513 if (mwaiters != 0) { 2514 /* 2515 * Given the locking of mlwpchan around the release of 2516 * the mutex and checking for waiters, the following 2517 * call to lwp_release() can fail ONLY if the lock 2518 * acquirer is interrupted after setting the waiter bit, 2519 * calling lwp_block() and releasing mlwpchan. 2520 * In this case, it could get pulled off the LWP sleep 2521 * queue (via setrun()) before the following call to 2522 * lwp_release() occurs, and the lock requestor will 2523 * update the waiter bit correctly by re-evaluating it. 2524 */ 2525 if (lwp_release(&mlwpchan, &mwaiters, 0) > 0) 2526 suword8_noerr(&mp->mutex_waiters, mwaiters); 2527 } 2528 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2529 mlocked = 0; 2530 no_fault(); 2531 2532 if (mwatched) { 2533 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2534 mwatched = 0; 2535 } 2536 if (watched) { 2537 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2538 watched = 0; 2539 } 2540 2541 /* 2542 * Put the LWP in an orderly state for debugging. 2543 */ 2544 prstop(PR_REQUESTED, 0); 2545 if (timedwait) { 2546 /* 2547 * If we successfully queue the timeout, 2548 * then don't drop t_delay_lock until 2549 * we are on the sleep queue (below). 2550 */ 2551 mutex_enter(&t->t_delay_lock); 2552 if (lwp_timer_enqueue(&lwpt) != 0) { 2553 mutex_exit(&t->t_delay_lock); 2554 imm_timeout = 1; 2555 timedwait = NULL; 2556 } 2557 } 2558 t->t_flag |= T_WAITCVSEM; 2559 lwp_block(&lwpchan); 2560 2561 /* 2562 * Nothing should happen to cause the LWp to go to sleep until after 2563 * it returns from swtch(). 2564 */ 2565 if (timedwait) 2566 mutex_exit(&t->t_delay_lock); 2567 locked = 0; 2568 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2569 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t)) 2570 setrun(t); 2571 swtch(); 2572 2573 /* 2574 * We're back, but we need to work out why. Were we interrupted? Did 2575 * we timeout? Were we granted the lock? 2576 */ 2577 error = EAGAIN; 2578 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2579 t->t_writer = 0; 2580 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2581 if (timedwait) 2582 tim = lwp_timer_dequeue(&lwpt); 2583 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2584 error = EINTR; 2585 else if (imm_timeout || (timedwait && tim == -1)) 2586 error = ETIME; 2587 lwp->lwp_asleep = 0; 2588 lwp->lwp_sysabort = 0; 2589 setallwatch(); 2590 2591 /* 2592 * If we were granted the lock we don't care about EINTR or ETIME. 2593 */ 2594 if (acquired) 2595 error = 0; 2596 2597 if (t->t_mstate == LMS_USER_LOCK) 2598 (void) new_mstate(t, LMS_SYSTEM); 2599 2600 if (error) 2601 return (set_errno(error)); 2602 return (0); 2603 2604 out_drop: 2605 /* 2606 * Make sure that the user level lock is dropped before returning 2607 * to the caller. 2608 */ 2609 if (!mlocked) { 2610 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2611 mlocked = 1; 2612 } 2613 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2614 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2615 suword32_noerr(&mp->mutex_ownerpid, 0); 2616 ulock_clear(&mp->mutex_lockw); 2617 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2618 if (mwaiters != 0) { 2619 /* 2620 * See comment above on lock clearing and lwp_release() 2621 * success/failure. 2622 */ 2623 if (lwp_release(&mlwpchan, &mwaiters, 0) > 0) 2624 suword8_noerr(&mp->mutex_waiters, mwaiters); 2625 } 2626 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2627 mlocked = 0; 2628 2629 out_nodrop: 2630 no_fault(); 2631 if (mwatched) 2632 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2633 if (watched) 2634 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2635 if (t->t_mstate == LMS_USER_LOCK) 2636 (void) new_mstate(t, LMS_SYSTEM); 2637 if (error) 2638 return (set_errno(error)); 2639 return (0); 2640 } 2641 2642 /* 2643 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2644 * we never drop the lock. 2645 */ 2646 static int 2647 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2648 { 2649 kthread_t *t = curthread; 2650 proc_t *p = ttoproc(t); 2651 lwpchan_t lwpchan; 2652 volatile uint16_t type = 0; 2653 volatile int error = 0; 2654 volatile int locked = 0; 2655 volatile int watched = 0; 2656 label_t ljb; 2657 volatile int no_lwpchan = 1; 2658 uint32_t rwstate; 2659 2660 /* We only check rw because the mutex is included in it. */ 2661 if ((caddr_t)rw >= p->p_as->a_userlimit) 2662 return (set_errno(EFAULT)); 2663 2664 if (on_fault(&ljb)) { 2665 if (no_lwpchan) { 2666 error = EFAULT; 2667 goto out_nodrop; 2668 } 2669 if (locked) { 2670 locked = 0; 2671 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2672 } 2673 error = EFAULT; 2674 goto out_nodrop; 2675 } 2676 2677 /* We can only continue for simple USYNC_PROCESS locks. */ 2678 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2679 if (type != USYNC_PROCESS) { 2680 error = EINVAL; 2681 goto out_nodrop; 2682 } 2683 2684 /* Force Copy-on-write fault incase objects are MAP_PRIVATE. */ 2685 suword16_noerr(&rw->rwlock_type, type); 2686 2687 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2688 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2689 &lwpchan, LWPCHAN_CVPOOL)) { 2690 error = EFAULT; 2691 goto out_nodrop; 2692 } 2693 2694 no_lwpchan = 0; 2695 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2696 2697 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2698 locked = 1; 2699 2700 /* 2701 * We can resolve multiple readers (except the last reader) here. 2702 * For the last reader or a writer we need lwp_rwlock_release(), 2703 * to which we also delegate the task of copying the new rwstate 2704 * back to userland (see the comment there). 2705 */ 2706 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2707 if (rwstate & URW_WRITE_LOCKED) 2708 lwp_rwlock_release(&lwpchan, rw); 2709 else if ((rwstate & URW_READERS_MASK) > 0) { 2710 rwstate--; 2711 if ((rwstate & URW_READERS_MASK) == 0) 2712 lwp_rwlock_release(&lwpchan, rw); 2713 else 2714 suword32_noerr(&rw->rwlock_readers, rwstate); 2715 } 2716 2717 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2718 locked = 0; 2719 error = 0; 2720 2721 out_nodrop: 2722 no_fault(); 2723 if (watched) 2724 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2725 if (error) 2726 return (set_errno(error)); 2727 return (0); 2728 } 2729 2730 int 2731 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2732 { 2733 switch (subcode) { 2734 case 0: 2735 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2736 case 1: 2737 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2738 case 2: 2739 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2740 case 3: 2741 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2742 case 4: 2743 return (lwp_rwlock_unlock(rwlp)); 2744 } 2745 return (set_errno(EINVAL)); 2746 } 2747 2748 /* 2749 * Return the owner of the user-level s-object. 2750 * Since we can't really do this, return NULL. 2751 */ 2752 /* ARGSUSED */ 2753 static kthread_t * 2754 lwpsobj_owner(caddr_t sobj) 2755 { 2756 return ((kthread_t *)NULL); 2757 } 2758 2759 /* 2760 * Wake up a thread asleep on a user-level synchronization 2761 * object. 2762 */ 2763 static void 2764 lwp_unsleep(kthread_t *t) 2765 { 2766 ASSERT(THREAD_LOCK_HELD(t)); 2767 if (t->t_wchan0 != NULL) { 2768 sleepq_head_t *sqh; 2769 sleepq_t *sqp = t->t_sleepq; 2770 2771 if (sqp != NULL) { 2772 sqh = lwpsqhash(&t->t_lwpchan); 2773 ASSERT(&sqh->sq_queue == sqp); 2774 sleepq_unsleep(t); 2775 disp_lock_exit_high(&sqh->sq_lock); 2776 CL_SETRUN(t); 2777 return; 2778 } 2779 } 2780 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2781 } 2782 2783 /* 2784 * Change the priority of a thread asleep on a user-level 2785 * synchronization object. To maintain proper priority order, 2786 * we: 2787 * o dequeue the thread. 2788 * o change its priority. 2789 * o re-enqueue the thread. 2790 * Assumption: the thread is locked on entry. 2791 */ 2792 static void 2793 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2794 { 2795 ASSERT(THREAD_LOCK_HELD(t)); 2796 if (t->t_wchan0 != NULL) { 2797 sleepq_t *sqp = t->t_sleepq; 2798 2799 sleepq_dequeue(t); 2800 *t_prip = pri; 2801 sleepq_insert(sqp, t); 2802 } else 2803 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2804 } 2805 2806 /* 2807 * Clean up a locked a robust mutex 2808 */ 2809 static void 2810 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2811 { 2812 uint16_t flag; 2813 uchar_t waiters; 2814 label_t ljb; 2815 pid_t owner_pid; 2816 lwp_mutex_t *lp; 2817 volatile int locked = 0; 2818 volatile int watched = 0; 2819 2820 ASSERT(ent->lwpchan_type & USYNC_PROCESS_ROBUST); 2821 2822 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2823 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2824 if (on_fault(&ljb)) { 2825 if (locked) 2826 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2827 goto out; 2828 } 2829 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2830 if (owner_pid != curproc->p_pid) { 2831 goto out; 2832 } 2833 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2834 locked = 1; 2835 fuword16_noerr(&lp->mutex_flag, &flag); 2836 if ((flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) == 0) { 2837 flag |= lockflg; 2838 suword16_noerr(&lp->mutex_flag, flag); 2839 } 2840 suword32_noerr(&lp->mutex_ownerpid, 0); 2841 ulock_clear(&lp->mutex_lockw); 2842 fuword8_noerr(&lp->mutex_waiters, &waiters); 2843 if (waiters && lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2844 suword8_noerr(&lp->mutex_waiters, waiters); 2845 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2846 out: 2847 no_fault(); 2848 if (watched) 2849 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2850 } 2851 2852 /* 2853 * Register the mutex and initialize the mutex if it is not already 2854 */ 2855 int 2856 lwp_mutex_init(lwp_mutex_t *lp, int type) 2857 { 2858 proc_t *p = curproc; 2859 int error = 0; 2860 volatile int locked = 0; 2861 volatile int watched = 0; 2862 label_t ljb; 2863 uint16_t flag; 2864 lwpchan_t lwpchan; 2865 pid_t owner_pid; 2866 2867 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2868 return (set_errno(EFAULT)); 2869 2870 if (type != USYNC_PROCESS_ROBUST) 2871 return (set_errno(EINVAL)); 2872 2873 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2874 2875 if (on_fault(&ljb)) { 2876 if (locked) 2877 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2878 error = EFAULT; 2879 goto out; 2880 } 2881 /* 2882 * Force Copy-on-write fault if lwp_mutex_t object is 2883 * defined to be MAP_PRIVATE and it was initialized to 2884 * USYNC_PROCESS. 2885 */ 2886 suword8_noerr(&lp->mutex_type, type); 2887 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2888 &lwpchan, LWPCHAN_MPPOOL)) { 2889 error = EFAULT; 2890 goto out; 2891 } 2892 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 2893 locked = 1; 2894 fuword16_noerr(&lp->mutex_flag, &flag); 2895 if (flag & LOCK_INITED) { 2896 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 2897 fuword32_noerr(&lp->mutex_ownerpid, 2898 (uint32_t *)&owner_pid); 2899 if (owner_pid == p->p_pid) { 2900 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2901 suword16_noerr(&lp->mutex_flag, flag); 2902 locked = 0; 2903 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2904 goto out; 2905 } 2906 } 2907 error = EBUSY; 2908 } else { 2909 suword8_noerr(&lp->mutex_waiters, 0); 2910 suword8_noerr(&lp->mutex_lockw, 0); 2911 suword16_noerr(&lp->mutex_flag, LOCK_INITED); 2912 suword32_noerr(&lp->mutex_ownerpid, 0); 2913 } 2914 locked = 0; 2915 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2916 out: 2917 no_fault(); 2918 if (watched) 2919 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2920 if (error) 2921 return (set_errno(error)); 2922 return (0); 2923 } 2924 2925 int 2926 lwp_mutex_trylock(lwp_mutex_t *lp) 2927 { 2928 kthread_t *t = curthread; 2929 proc_t *p = ttoproc(t); 2930 int error = 0; 2931 volatile int locked = 0; 2932 volatile int watched = 0; 2933 label_t ljb; 2934 volatile uint8_t type = 0; 2935 uint16_t flag; 2936 lwpchan_t lwpchan; 2937 2938 if ((caddr_t)lp >= p->p_as->a_userlimit) 2939 return (set_errno(EFAULT)); 2940 2941 (void) new_mstate(t, LMS_USER_LOCK); 2942 2943 if (on_fault(&ljb)) { 2944 if (locked) 2945 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2946 error = EFAULT; 2947 goto out; 2948 } 2949 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 2950 if (UPIMUTEX(type)) { 2951 no_fault(); 2952 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 2953 if ((error == 0 || error == EOWNERDEAD) && 2954 (type & USYNC_PROCESS)) 2955 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 2956 if (error) 2957 return (set_errno(error)); 2958 return (0); 2959 } 2960 /* 2961 * Force Copy-on-write fault if lwp_mutex_t object is 2962 * defined to be MAP_PRIVATE and it was initialized to 2963 * USYNC_PROCESS. 2964 */ 2965 suword8_noerr(&lp->mutex_type, type); 2966 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2967 &lwpchan, LWPCHAN_MPPOOL)) { 2968 error = EFAULT; 2969 goto out; 2970 } 2971 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 2972 locked = 1; 2973 if (type & USYNC_PROCESS_ROBUST) { 2974 fuword16_noerr((uint16_t *)(&lp->mutex_flag), &flag); 2975 if (flag & LOCK_NOTRECOVERABLE) { 2976 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2977 error = ENOTRECOVERABLE; 2978 goto out; 2979 } 2980 } 2981 2982 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2983 2984 if (!ulock_try(&lp->mutex_lockw)) 2985 error = EBUSY; 2986 else if (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { 2987 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 2988 if (type & USYNC_PROCESS_ROBUST) { 2989 if (flag & LOCK_OWNERDEAD) 2990 error = EOWNERDEAD; 2991 else if (flag & LOCK_UNMAPPED) 2992 error = ELOCKUNMAPPED; 2993 } 2994 } 2995 locked = 0; 2996 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2997 out: 2998 2999 if (t->t_mstate == LMS_USER_LOCK) 3000 (void) new_mstate(t, LMS_SYSTEM); 3001 3002 no_fault(); 3003 if (watched) 3004 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3005 if (error) 3006 return (set_errno(error)); 3007 return (0); 3008 } 3009 3010 /* 3011 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3012 * the blocked lwp resumes and retries to acquire the lock. 3013 */ 3014 int 3015 lwp_mutex_unlock(lwp_mutex_t *lp) 3016 { 3017 proc_t *p = ttoproc(curthread); 3018 lwpchan_t lwpchan; 3019 uchar_t waiters; 3020 volatile int locked = 0; 3021 volatile int watched = 0; 3022 volatile uint8_t type = 0; 3023 label_t ljb; 3024 uint16_t flag; 3025 int error = 0; 3026 3027 if ((caddr_t)lp >= p->p_as->a_userlimit) 3028 return (set_errno(EFAULT)); 3029 3030 if (on_fault(&ljb)) { 3031 if (locked) 3032 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3033 error = EFAULT; 3034 goto out; 3035 } 3036 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3037 if (UPIMUTEX(type)) { 3038 no_fault(); 3039 error = lwp_upimutex_unlock(lp, type); 3040 if (error) 3041 return (set_errno(error)); 3042 return (0); 3043 } 3044 3045 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3046 3047 /* 3048 * Force Copy-on-write fault if lwp_mutex_t object is 3049 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS 3050 */ 3051 suword8_noerr(&lp->mutex_type, type); 3052 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3053 &lwpchan, LWPCHAN_MPPOOL)) { 3054 error = EFAULT; 3055 goto out; 3056 } 3057 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3058 locked = 1; 3059 if (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) { 3060 if (type & USYNC_PROCESS_ROBUST) { 3061 fuword16_noerr(&lp->mutex_flag, &flag); 3062 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3063 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3064 flag |= LOCK_NOTRECOVERABLE; 3065 suword16_noerr(&lp->mutex_flag, flag); 3066 } 3067 } 3068 suword32_noerr(&lp->mutex_ownerpid, 0); 3069 } 3070 ulock_clear(&lp->mutex_lockw); 3071 /* 3072 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3073 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3074 * may fail. If it fails, do not write into the waiter bit. 3075 * The call to lwp_release() might fail due to one of three reasons: 3076 * 3077 * 1. due to the thread which set the waiter bit not actually 3078 * sleeping since it got the lock on the re-try. The waiter 3079 * bit will then be correctly updated by that thread. This 3080 * window may be closed by reading the wait bit again here 3081 * and not calling lwp_release() at all if it is zero. 3082 * 2. the thread which set the waiter bit and went to sleep 3083 * was woken up by a signal. This time, the waiter recomputes 3084 * the wait bit in the return with EINTR code. 3085 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3086 * memory that has been re-used after the lock was dropped. 3087 * In this case, writing into the waiter bit would cause data 3088 * corruption. 3089 */ 3090 fuword8_noerr(&lp->mutex_waiters, &waiters); 3091 if (waiters) { 3092 if ((type & USYNC_PROCESS_ROBUST) && 3093 (flag & LOCK_NOTRECOVERABLE)) { 3094 lwp_release_all(&lwpchan); 3095 suword8_noerr(&lp->mutex_waiters, 0); 3096 } else if (lwp_release(&lwpchan, &waiters, 0) == 1) { 3097 suword8_noerr(&lp->mutex_waiters, waiters); 3098 } 3099 } 3100 3101 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3102 out: 3103 no_fault(); 3104 if (watched) 3105 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3106 if (error) 3107 return (set_errno(error)); 3108 return (0); 3109 } 3110