1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/errno.h> 39 #include <sys/file.h> 40 #include <sys/proc.h> 41 #include <sys/prsystm.h> 42 #include <sys/kmem.h> 43 #include <sys/sobject.h> 44 #include <sys/fault.h> 45 #include <sys/procfs.h> 46 #include <sys/watchpoint.h> 47 #include <sys/time.h> 48 #include <sys/cmn_err.h> 49 #include <sys/machlock.h> 50 #include <sys/debug.h> 51 #include <sys/synch.h> 52 #include <sys/synch32.h> 53 #include <sys/mman.h> 54 #include <sys/class.h> 55 #include <sys/schedctl.h> 56 #include <sys/sleepq.h> 57 #include <sys/policy.h> 58 #include <sys/tnf_probe.h> 59 #include <sys/lwpchan_impl.h> 60 #include <sys/turnstile.h> 61 #include <sys/atomic.h> 62 #include <sys/lwp_timer_impl.h> 63 #include <sys/lwp_upimutex_impl.h> 64 #include <vm/as.h> 65 #include <sys/sdt.h> 66 67 static kthread_t *lwpsobj_owner(caddr_t); 68 static void lwp_unsleep(kthread_t *t); 69 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 70 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 71 72 extern int lwp_cond_signal(lwp_cond_t *cv); 73 74 /* 75 * Maximum number of user prio inheritance locks that can be held by a thread. 76 * Used to limit kmem for each thread. This is a per-thread limit that 77 * can be administered on a system wide basis (using /etc/system). 78 * 79 * Also, when a limit, say maxlwps is added for numbers of lwps within a 80 * process, the per-thread limit automatically becomes a process-wide limit 81 * of maximum number of held upi locks within a process: 82 * maxheldupimx = maxnestupimx * maxlwps; 83 */ 84 static uint32_t maxnestupimx = 2000; 85 86 /* 87 * The sobj_ops vector exports a set of functions needed when a thread 88 * is asleep on a synchronization object of this type. 89 */ 90 static sobj_ops_t lwp_sobj_ops = { 91 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 92 }; 93 94 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 95 96 static sobj_ops_t lwp_sobj_pi_ops = { 97 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 98 turnstile_change_pri 99 }; 100 101 static sleepq_head_t lwpsleepq[NSLEEPQ]; 102 upib_t upimutextab[UPIMUTEX_TABSIZE]; 103 104 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 105 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 106 107 /* 108 * We know that both lc_wchan and lc_wchan0 are addresses that most 109 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 110 * 'pool' is either 0 or 1. 111 */ 112 #define LWPCHAN_LOCK_HASH(X, pool) \ 113 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 114 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 115 116 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 117 118 /* 119 * Is this a POSIX threads user-level lock requiring priority inheritance? 120 */ 121 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 122 123 static sleepq_head_t * 124 lwpsqhash(lwpchan_t *lwpchan) 125 { 126 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 127 return (&lwpsleepq[SQHASHINDEX(x)]); 128 } 129 130 /* 131 * Lock an lwpchan. 132 * Keep this in sync with lwpchan_unlock(), below. 133 */ 134 static void 135 lwpchan_lock(lwpchan_t *lwpchan, int pool) 136 { 137 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 138 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 139 } 140 141 /* 142 * Unlock an lwpchan. 143 * Keep this in sync with lwpchan_lock(), above. 144 */ 145 static void 146 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 147 { 148 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 149 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 150 } 151 152 /* 153 * Delete mappings from the lwpchan cache for pages that are being 154 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 155 * all mappings within the range are deleted from the lwpchan cache. 156 */ 157 void 158 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 159 { 160 lwpchan_data_t *lcp; 161 lwpchan_hashbucket_t *hashbucket; 162 lwpchan_hashbucket_t *endbucket; 163 lwpchan_entry_t *ent; 164 lwpchan_entry_t **prev; 165 caddr_t addr; 166 167 mutex_enter(&p->p_lcp_lock); 168 lcp = p->p_lcp; 169 hashbucket = lcp->lwpchan_cache; 170 endbucket = hashbucket + lcp->lwpchan_size; 171 for (; hashbucket < endbucket; hashbucket++) { 172 if (hashbucket->lwpchan_chain == NULL) 173 continue; 174 mutex_enter(&hashbucket->lwpchan_lock); 175 prev = &hashbucket->lwpchan_chain; 176 /* check entire chain */ 177 while ((ent = *prev) != NULL) { 178 addr = ent->lwpchan_addr; 179 if (start <= addr && addr < end) { 180 *prev = ent->lwpchan_next; 181 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 182 (ent->lwpchan_type & LOCK_ROBUST)) 183 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 184 kmem_free(ent, sizeof (*ent)); 185 atomic_add_32(&lcp->lwpchan_entries, -1); 186 } else { 187 prev = &ent->lwpchan_next; 188 } 189 } 190 mutex_exit(&hashbucket->lwpchan_lock); 191 } 192 mutex_exit(&p->p_lcp_lock); 193 } 194 195 /* 196 * Given an lwpchan cache pointer and a process virtual address, 197 * return a pointer to the corresponding lwpchan hash bucket. 198 */ 199 static lwpchan_hashbucket_t * 200 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 201 { 202 uint_t i; 203 204 /* 205 * All user-level sync object addresses are 8-byte aligned. 206 * Ignore the lowest 3 bits of the address and use the 207 * higher-order 2*lwpchan_bits bits for the hash index. 208 */ 209 addr >>= 3; 210 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 211 return (lcp->lwpchan_cache + i); 212 } 213 214 /* 215 * (Re)allocate the per-process lwpchan cache. 216 */ 217 static void 218 lwpchan_alloc_cache(proc_t *p, uint_t bits) 219 { 220 lwpchan_data_t *lcp; 221 lwpchan_data_t *old_lcp; 222 lwpchan_hashbucket_t *hashbucket; 223 lwpchan_hashbucket_t *endbucket; 224 lwpchan_hashbucket_t *newbucket; 225 lwpchan_entry_t *ent; 226 lwpchan_entry_t *next; 227 uint_t count; 228 229 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 230 231 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 232 lcp->lwpchan_bits = bits; 233 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 234 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 235 lcp->lwpchan_entries = 0; 236 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 237 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 238 lcp->lwpchan_next_data = NULL; 239 240 mutex_enter(&p->p_lcp_lock); 241 if ((old_lcp = p->p_lcp) != NULL) { 242 if (old_lcp->lwpchan_bits >= bits) { 243 /* someone beat us to it */ 244 mutex_exit(&p->p_lcp_lock); 245 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 246 sizeof (lwpchan_hashbucket_t)); 247 kmem_free(lcp, sizeof (lwpchan_data_t)); 248 return; 249 } 250 /* 251 * Acquire all of the old hash table locks. 252 */ 253 hashbucket = old_lcp->lwpchan_cache; 254 endbucket = hashbucket + old_lcp->lwpchan_size; 255 for (; hashbucket < endbucket; hashbucket++) 256 mutex_enter(&hashbucket->lwpchan_lock); 257 /* 258 * Move all of the old hash table entries to the 259 * new hash table. The new hash table has not yet 260 * been installed so we don't need any of its locks. 261 */ 262 count = 0; 263 hashbucket = old_lcp->lwpchan_cache; 264 for (; hashbucket < endbucket; hashbucket++) { 265 ent = hashbucket->lwpchan_chain; 266 while (ent != NULL) { 267 next = ent->lwpchan_next; 268 newbucket = lwpchan_bucket(lcp, 269 (uintptr_t)ent->lwpchan_addr); 270 ent->lwpchan_next = newbucket->lwpchan_chain; 271 newbucket->lwpchan_chain = ent; 272 ent = next; 273 count++; 274 } 275 hashbucket->lwpchan_chain = NULL; 276 } 277 lcp->lwpchan_entries = count; 278 } 279 280 /* 281 * Retire the old hash table. We can't actually kmem_free() it 282 * now because someone may still have a pointer to it. Instead, 283 * we link it onto the new hash table's list of retired hash tables. 284 * The new hash table is double the size of the previous one, so 285 * the total size of all retired hash tables is less than the size 286 * of the new one. exit() and exec() free the retired hash tables 287 * (see lwpchan_destroy_cache(), below). 288 */ 289 lcp->lwpchan_next_data = old_lcp; 290 291 /* 292 * As soon as we store the new lcp, future locking operations will 293 * use it. Therefore, we must ensure that all the state we've just 294 * established reaches global visibility before the new lcp does. 295 */ 296 membar_producer(); 297 p->p_lcp = lcp; 298 299 if (old_lcp != NULL) { 300 /* 301 * Release all of the old hash table locks. 302 */ 303 hashbucket = old_lcp->lwpchan_cache; 304 for (; hashbucket < endbucket; hashbucket++) 305 mutex_exit(&hashbucket->lwpchan_lock); 306 } 307 mutex_exit(&p->p_lcp_lock); 308 } 309 310 /* 311 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 312 * Called when the process exits or execs. All lwps except one have 313 * exited so we need no locks here. 314 */ 315 void 316 lwpchan_destroy_cache(int exec) 317 { 318 proc_t *p = curproc; 319 lwpchan_hashbucket_t *hashbucket; 320 lwpchan_hashbucket_t *endbucket; 321 lwpchan_data_t *lcp; 322 lwpchan_entry_t *ent; 323 lwpchan_entry_t *next; 324 uint16_t lockflg; 325 326 lcp = p->p_lcp; 327 p->p_lcp = NULL; 328 329 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 330 hashbucket = lcp->lwpchan_cache; 331 endbucket = hashbucket + lcp->lwpchan_size; 332 for (; hashbucket < endbucket; hashbucket++) { 333 ent = hashbucket->lwpchan_chain; 334 hashbucket->lwpchan_chain = NULL; 335 while (ent != NULL) { 336 next = ent->lwpchan_next; 337 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 338 (ent->lwpchan_type & LOCK_ROBUST)) 339 lwp_mutex_cleanup(ent, lockflg); 340 kmem_free(ent, sizeof (*ent)); 341 ent = next; 342 } 343 } 344 345 while (lcp != NULL) { 346 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 347 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 348 sizeof (lwpchan_hashbucket_t)); 349 kmem_free(lcp, sizeof (lwpchan_data_t)); 350 lcp = next_lcp; 351 } 352 } 353 354 /* 355 * Return zero when there is an entry in the lwpchan cache for the 356 * given process virtual address and non-zero when there is not. 357 * The returned non-zero value is the current length of the 358 * hash chain plus one. The caller holds the hash bucket lock. 359 */ 360 static uint_t 361 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 362 lwpchan_hashbucket_t *hashbucket) 363 { 364 lwpchan_entry_t *ent; 365 uint_t count = 1; 366 367 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 368 if (ent->lwpchan_addr == addr) { 369 if (ent->lwpchan_type != type || 370 ent->lwpchan_pool != pool) { 371 /* 372 * This shouldn't happen, but might if the 373 * process reuses its memory for different 374 * types of sync objects. We test first 375 * to avoid grabbing the memory cache line. 376 */ 377 ent->lwpchan_type = (uint16_t)type; 378 ent->lwpchan_pool = (uint16_t)pool; 379 } 380 *lwpchan = ent->lwpchan_lwpchan; 381 return (0); 382 } 383 count++; 384 } 385 return (count); 386 } 387 388 /* 389 * Return the cached lwpchan mapping if cached, otherwise insert 390 * a virtual address to lwpchan mapping into the cache. 391 */ 392 static int 393 lwpchan_get_mapping(struct as *as, caddr_t addr, 394 int type, lwpchan_t *lwpchan, int pool) 395 { 396 proc_t *p = curproc; 397 lwpchan_data_t *lcp; 398 lwpchan_hashbucket_t *hashbucket; 399 lwpchan_entry_t *ent; 400 memid_t memid; 401 uint_t count; 402 uint_t bits; 403 404 top: 405 /* initialize the lwpchan cache, if necesary */ 406 if ((lcp = p->p_lcp) == NULL) { 407 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 408 goto top; 409 } 410 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 411 mutex_enter(&hashbucket->lwpchan_lock); 412 if (lcp != p->p_lcp) { 413 /* someone resized the lwpchan cache; start over */ 414 mutex_exit(&hashbucket->lwpchan_lock); 415 goto top; 416 } 417 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 418 /* it's in the cache */ 419 mutex_exit(&hashbucket->lwpchan_lock); 420 return (1); 421 } 422 mutex_exit(&hashbucket->lwpchan_lock); 423 if (as_getmemid(as, addr, &memid) != 0) 424 return (0); 425 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 426 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 427 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 428 mutex_enter(&hashbucket->lwpchan_lock); 429 if (lcp != p->p_lcp) { 430 /* someone resized the lwpchan cache; start over */ 431 mutex_exit(&hashbucket->lwpchan_lock); 432 kmem_free(ent, sizeof (*ent)); 433 goto top; 434 } 435 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 436 if (count == 0) { 437 /* someone else added this entry to the cache */ 438 mutex_exit(&hashbucket->lwpchan_lock); 439 kmem_free(ent, sizeof (*ent)); 440 return (1); 441 } 442 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 443 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 444 /* hash chain too long; reallocate the hash table */ 445 mutex_exit(&hashbucket->lwpchan_lock); 446 kmem_free(ent, sizeof (*ent)); 447 lwpchan_alloc_cache(p, bits + 1); 448 goto top; 449 } 450 ent->lwpchan_addr = addr; 451 ent->lwpchan_type = (uint16_t)type; 452 ent->lwpchan_pool = (uint16_t)pool; 453 ent->lwpchan_lwpchan = *lwpchan; 454 ent->lwpchan_next = hashbucket->lwpchan_chain; 455 hashbucket->lwpchan_chain = ent; 456 atomic_add_32(&lcp->lwpchan_entries, 1); 457 mutex_exit(&hashbucket->lwpchan_lock); 458 return (1); 459 } 460 461 /* 462 * Return a unique pair of identifiers that corresponds to a 463 * synchronization object's virtual address. Process-shared 464 * sync objects usually get vnode/offset from as_getmemid(). 465 */ 466 static int 467 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 468 { 469 /* 470 * If the lwp synch object is defined to be process-private, 471 * we just make the first field of the lwpchan be 'as' and 472 * the second field be the synch object's virtual address. 473 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 474 * The lwpchan cache is used only for process-shared objects. 475 */ 476 if (!(type & USYNC_PROCESS)) { 477 lwpchan->lc_wchan0 = (caddr_t)as; 478 lwpchan->lc_wchan = addr; 479 return (1); 480 } 481 482 return (lwpchan_get_mapping(as, addr, type, lwpchan, pool)); 483 } 484 485 static void 486 lwp_block(lwpchan_t *lwpchan) 487 { 488 kthread_t *t = curthread; 489 klwp_t *lwp = ttolwp(t); 490 sleepq_head_t *sqh; 491 492 thread_lock(t); 493 t->t_flag |= T_WAKEABLE; 494 t->t_lwpchan = *lwpchan; 495 t->t_sobj_ops = &lwp_sobj_ops; 496 t->t_release = 0; 497 sqh = lwpsqhash(lwpchan); 498 disp_lock_enter_high(&sqh->sq_lock); 499 CL_SLEEP(t); 500 DTRACE_SCHED(sleep); 501 THREAD_SLEEP(t, &sqh->sq_lock); 502 sleepq_insert(&sqh->sq_queue, t); 503 thread_unlock(t); 504 lwp->lwp_asleep = 1; 505 lwp->lwp_sysabort = 0; 506 lwp->lwp_ru.nvcsw++; 507 (void) new_mstate(curthread, LMS_SLEEP); 508 } 509 510 static kthread_t * 511 lwpsobj_pi_owner(upimutex_t *up) 512 { 513 return (up->upi_owner); 514 } 515 516 static struct upimutex * 517 upi_get(upib_t *upibp, lwpchan_t *lcp) 518 { 519 struct upimutex *upip; 520 521 for (upip = upibp->upib_first; upip != NULL; 522 upip = upip->upi_nextchain) { 523 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 524 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 525 break; 526 } 527 return (upip); 528 } 529 530 static void 531 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 532 { 533 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 534 535 /* 536 * Insert upimutex at front of list. Maybe a bit unfair 537 * but assume that not many lwpchans hash to the same 538 * upimutextab bucket, i.e. the list of upimutexes from 539 * upib_first is not too long. 540 */ 541 upimutex->upi_nextchain = upibp->upib_first; 542 upibp->upib_first = upimutex; 543 } 544 545 static void 546 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 547 { 548 struct upimutex **prev; 549 550 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 551 552 prev = &upibp->upib_first; 553 while (*prev != upimutex) { 554 prev = &(*prev)->upi_nextchain; 555 } 556 *prev = upimutex->upi_nextchain; 557 upimutex->upi_nextchain = NULL; 558 } 559 560 /* 561 * Add upimutex to chain of upimutexes held by curthread. 562 * Returns number of upimutexes held by curthread. 563 */ 564 static uint32_t 565 upi_mylist_add(struct upimutex *upimutex) 566 { 567 kthread_t *t = curthread; 568 569 /* 570 * Insert upimutex at front of list of upimutexes owned by t. This 571 * would match typical LIFO order in which nested locks are acquired 572 * and released. 573 */ 574 upimutex->upi_nextowned = t->t_upimutex; 575 t->t_upimutex = upimutex; 576 t->t_nupinest++; 577 ASSERT(t->t_nupinest > 0); 578 return (t->t_nupinest); 579 } 580 581 /* 582 * Delete upimutex from list of upimutexes owned by curthread. 583 */ 584 static void 585 upi_mylist_del(struct upimutex *upimutex) 586 { 587 kthread_t *t = curthread; 588 struct upimutex **prev; 589 590 /* 591 * Since the order in which nested locks are acquired and released, 592 * is typically LIFO, and typical nesting levels are not too deep, the 593 * following should not be expensive in the general case. 594 */ 595 prev = &t->t_upimutex; 596 while (*prev != upimutex) { 597 prev = &(*prev)->upi_nextowned; 598 } 599 *prev = upimutex->upi_nextowned; 600 upimutex->upi_nextowned = NULL; 601 ASSERT(t->t_nupinest > 0); 602 t->t_nupinest--; 603 } 604 605 /* 606 * Returns true if upimutex is owned. Should be called only when upim points 607 * to kmem which cannot disappear from underneath. 608 */ 609 static int 610 upi_owned(upimutex_t *upim) 611 { 612 return (upim->upi_owner == curthread); 613 } 614 615 /* 616 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 617 */ 618 static struct upimutex * 619 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 620 { 621 lwpchan_t lwpchan; 622 upib_t *upibp; 623 struct upimutex *upimutex; 624 625 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 626 &lwpchan, LWPCHAN_MPPOOL)) 627 return (NULL); 628 629 upibp = &UPI_CHAIN(lwpchan); 630 mutex_enter(&upibp->upib_lock); 631 upimutex = upi_get(upibp, &lwpchan); 632 if (upimutex == NULL || upimutex->upi_owner != curthread) { 633 mutex_exit(&upibp->upib_lock); 634 return (NULL); 635 } 636 mutex_exit(&upibp->upib_lock); 637 return (upimutex); 638 } 639 640 /* 641 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 642 * no lock hand-off occurrs. 643 */ 644 static void 645 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 646 { 647 turnstile_t *ts; 648 upib_t *upibp; 649 kthread_t *newowner; 650 651 upi_mylist_del(upimutex); 652 upibp = upimutex->upi_upibp; 653 mutex_enter(&upibp->upib_lock); 654 if (upimutex->upi_waiter != 0) { /* if waiters */ 655 ts = turnstile_lookup(upimutex); 656 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 657 /* hand-off lock to highest prio waiter */ 658 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 659 upimutex->upi_owner = newowner; 660 if (ts->ts_waiters == 1) 661 upimutex->upi_waiter = 0; 662 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 663 mutex_exit(&upibp->upib_lock); 664 return; 665 } else if (ts != NULL) { 666 /* LOCK_NOTRECOVERABLE: wakeup all */ 667 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 668 } else { 669 /* 670 * Misleading w bit. Waiters might have been 671 * interrupted. No need to clear the w bit (upimutex 672 * will soon be freed). Re-calculate PI from existing 673 * waiters. 674 */ 675 turnstile_exit(upimutex); 676 turnstile_pi_recalc(); 677 } 678 } 679 /* 680 * no waiters, or LOCK_NOTRECOVERABLE. 681 * remove from the bucket chain of upi mutexes. 682 * de-allocate kernel memory (upimutex). 683 */ 684 upi_chain_del(upimutex->upi_upibp, upimutex); 685 mutex_exit(&upibp->upib_lock); 686 kmem_free(upimutex, sizeof (upimutex_t)); 687 } 688 689 static int 690 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 691 { 692 label_t ljb; 693 int error = 0; 694 lwpchan_t lwpchan; 695 uint16_t flag; 696 upib_t *upibp; 697 volatile struct upimutex *upimutex = NULL; 698 turnstile_t *ts; 699 uint32_t nupinest; 700 volatile int upilocked = 0; 701 702 if (on_fault(&ljb)) { 703 if (upilocked) 704 upimutex_unlock((upimutex_t *)upimutex, 0); 705 error = EFAULT; 706 goto out; 707 } 708 /* 709 * The apparent assumption made in implementing other _lwp_* synch 710 * primitives, is that get_lwpchan() does not return a unique cookie 711 * for the case where 2 processes (one forked from the other) point 712 * at the same underlying object, which is typed USYNC_PROCESS, but 713 * mapped MAP_PRIVATE, since the object has not yet been written to, 714 * in the child process. 715 * 716 * Since get_lwpchan() has been fixed, it is not necessary to do the 717 * dummy writes to force a COW fault as in other places (which should 718 * be fixed). 719 */ 720 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 721 &lwpchan, LWPCHAN_MPPOOL)) { 722 error = EFAULT; 723 goto out; 724 } 725 upibp = &UPI_CHAIN(lwpchan); 726 retry: 727 mutex_enter(&upibp->upib_lock); 728 upimutex = upi_get(upibp, &lwpchan); 729 if (upimutex == NULL) { 730 /* lock available since lwpchan has no upimutex */ 731 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 732 upi_chain_add(upibp, (upimutex_t *)upimutex); 733 upimutex->upi_owner = curthread; /* grab lock */ 734 upimutex->upi_upibp = upibp; 735 upimutex->upi_vaddr = lp; 736 upimutex->upi_lwpchan = lwpchan; 737 mutex_exit(&upibp->upib_lock); 738 nupinest = upi_mylist_add((upimutex_t *)upimutex); 739 upilocked = 1; 740 fuword16_noerr(&lp->mutex_flag, &flag); 741 if (nupinest > maxnestupimx && 742 secpolicy_resource(CRED()) != 0) { 743 upimutex_unlock((upimutex_t *)upimutex, flag); 744 error = ENOMEM; 745 goto out; 746 } 747 if (flag & LOCK_NOTRECOVERABLE) { 748 /* 749 * Since the setting of LOCK_NOTRECOVERABLE 750 * was done under the high-level upi mutex, 751 * in lwp_upimutex_unlock(), this flag needs to 752 * be checked while holding the upi mutex. 753 * If set, this thread should return without 754 * the lock held, and with the right error code. 755 */ 756 upimutex_unlock((upimutex_t *)upimutex, flag); 757 upilocked = 0; 758 error = ENOTRECOVERABLE; 759 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 760 if (flag & LOCK_OWNERDEAD) 761 error = EOWNERDEAD; 762 else if (type & USYNC_PROCESS_ROBUST) 763 error = ELOCKUNMAPPED; 764 else 765 error = EOWNERDEAD; 766 } 767 goto out; 768 } 769 /* 770 * If a upimutex object exists, it must have an owner. 771 * This is due to lock hand-off, and release of upimutex when no 772 * waiters are present at unlock time, 773 */ 774 ASSERT(upimutex->upi_owner != NULL); 775 if (upimutex->upi_owner == curthread) { 776 /* 777 * The user wrapper can check if the mutex type is 778 * ERRORCHECK: if not, it should stall at user-level. 779 * If so, it should return the error code. 780 */ 781 mutex_exit(&upibp->upib_lock); 782 error = EDEADLK; 783 goto out; 784 } 785 if (try == UPIMUTEX_TRY) { 786 mutex_exit(&upibp->upib_lock); 787 error = EBUSY; 788 goto out; 789 } 790 /* 791 * Block for the lock. 792 * Put the lwp in an orderly state for debugging. 793 * Calling prstop() has to be done here, and not in 794 * turnstile_block(), since the preceding call to 795 * turnstile_lookup() raises the PIL to a level 796 * at which calls to prstop() should not be made. 797 */ 798 if ((error = lwptp->lwpt_time_error) != 0) { 799 /* 800 * The SUSV3 Posix spec is very clear that we 801 * should get no error from validating the 802 * timer until we would actually sleep. 803 */ 804 mutex_exit(&upibp->upib_lock); 805 goto out; 806 } 807 prstop(PR_REQUESTED, 0); 808 if (lwptp->lwpt_tsp != NULL) { 809 /* 810 * If we successfully queue the timeout 811 * (lwp_timer_enqueue() returns zero), 812 * then don't drop t_delay_lock until we are 813 * on the sleep queue (in turnstile_block()). 814 * Otherwise we will get an immediate timeout 815 * when we attempt to sleep in turnstile_block(). 816 */ 817 mutex_enter(&curthread->t_delay_lock); 818 if (lwp_timer_enqueue(lwptp) != 0) 819 mutex_exit(&curthread->t_delay_lock); 820 } 821 /* 822 * Now, set the waiter bit and block for the lock in turnstile_block(). 823 * No need to preserve the previous wbit since a lock try is not 824 * attempted after setting the wait bit. Wait bit is set under 825 * the upib_lock, which is not released until the turnstile lock 826 * is acquired. Say, the upimutex is L: 827 * 828 * 1. upib_lock is held so the waiter does not have to retry L after 829 * setting the wait bit: since the owner has to grab the upib_lock 830 * to unlock L, it will certainly see the wait bit set. 831 * 2. upib_lock is not released until the turnstile lock is acquired. 832 * This is the key to preventing a missed wake-up. Otherwise, the 833 * owner could acquire the upib_lock, and the tc_lock, to call 834 * turnstile_wakeup(). All this, before the waiter gets tc_lock 835 * to sleep in turnstile_block(). turnstile_wakeup() will then not 836 * find this waiter, resulting in the missed wakeup. 837 * 3. The upib_lock, being a kernel mutex, cannot be released while 838 * holding the tc_lock (since mutex_exit() could need to acquire 839 * the same tc_lock)...and so is held when calling turnstile_block(). 840 * The address of upib_lock is passed to turnstile_block() which 841 * releases it after releasing all turnstile locks, and before going 842 * to sleep in swtch(). 843 * 4. The waiter value cannot be a count of waiters, because a waiter 844 * can be interrupted. The interrupt occurs under the tc_lock, at 845 * which point, the upib_lock cannot be locked, to decrement waiter 846 * count. So, just treat the waiter state as a bit, not a count. 847 */ 848 ts = turnstile_lookup((upimutex_t *)upimutex); 849 upimutex->upi_waiter = 1; 850 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 851 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 852 /* 853 * Hand-off implies that we wakeup holding the lock, except when: 854 * - deadlock is detected 855 * - lock is not recoverable 856 * - we got an interrupt or timeout 857 * If we wake up due to an interrupt or timeout, we may 858 * or may not be holding the lock due to mutex hand-off. 859 * Use lwp_upimutex_owned() to check if we do hold the lock. 860 */ 861 if (error != 0) { 862 if ((error == EINTR || error == ETIME) && 863 (upimutex = lwp_upimutex_owned(lp, type))) { 864 /* 865 * Unlock and return - the re-startable syscall will 866 * try the lock again if we got EINTR. 867 */ 868 (void) upi_mylist_add((upimutex_t *)upimutex); 869 upimutex_unlock((upimutex_t *)upimutex, 0); 870 } 871 /* 872 * The only other possible error is EDEADLK. If so, upimutex 873 * is valid, since its owner is deadlocked with curthread. 874 */ 875 ASSERT(error == EINTR || error == ETIME || 876 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 877 ASSERT(!lwp_upimutex_owned(lp, type)); 878 goto out; 879 } 880 if (lwp_upimutex_owned(lp, type)) { 881 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 882 nupinest = upi_mylist_add((upimutex_t *)upimutex); 883 upilocked = 1; 884 } 885 /* 886 * Now, need to read the user-level lp->mutex_flag to do the following: 887 * 888 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 889 * should be returned. 890 * - if lock isn't held, check if ENOTRECOVERABLE should 891 * be returned. 892 * 893 * Now, either lp->mutex_flag is readable or it's not. If not 894 * readable, the on_fault path will cause a return with EFAULT 895 * as it should. If it is readable, the state of the flag 896 * encodes the robustness state of the lock: 897 * 898 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 899 * or LOCK_UNMAPPED setting will influence the return code 900 * appropriately. If the upimutex is not locked here, this 901 * could be due to a spurious wake-up or a NOTRECOVERABLE 902 * event. The flag's setting can be used to distinguish 903 * between these two events. 904 */ 905 fuword16_noerr(&lp->mutex_flag, &flag); 906 if (upilocked) { 907 /* 908 * If the thread wakes up from turnstile_block with the lock 909 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 910 * since it would not have been handed-off the lock. 911 * So, no need to check for this case. 912 */ 913 if (nupinest > maxnestupimx && 914 secpolicy_resource(CRED()) != 0) { 915 upimutex_unlock((upimutex_t *)upimutex, flag); 916 upilocked = 0; 917 error = ENOMEM; 918 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 919 if (flag & LOCK_OWNERDEAD) 920 error = EOWNERDEAD; 921 else if (type & USYNC_PROCESS_ROBUST) 922 error = ELOCKUNMAPPED; 923 else 924 error = EOWNERDEAD; 925 } 926 } else { 927 /* 928 * Wake-up without the upimutex held. Either this is a 929 * spurious wake-up (due to signals, forkall(), whatever), or 930 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 931 * of the mutex flag can be used to distinguish between the 932 * two events. 933 */ 934 if (flag & LOCK_NOTRECOVERABLE) { 935 error = ENOTRECOVERABLE; 936 } else { 937 /* 938 * Here, the flag could be set to LOCK_OWNERDEAD or 939 * not. In both cases, this is a spurious wakeup, 940 * since the upi lock is not held, but the thread 941 * has returned from turnstile_block(). 942 * 943 * The user flag could be LOCK_OWNERDEAD if, at the 944 * same time as curthread having been woken up 945 * spuriously, the owner (say Tdead) has died, marked 946 * the mutex flag accordingly, and handed off the lock 947 * to some other waiter (say Tnew). curthread just 948 * happened to read the flag while Tnew has yet to deal 949 * with the owner-dead event. 950 * 951 * In this event, curthread should retry the lock. 952 * If Tnew is able to cleanup the lock, curthread 953 * will eventually get the lock with a zero error code, 954 * If Tnew is unable to cleanup, its eventual call to 955 * unlock the lock will result in the mutex flag being 956 * set to LOCK_NOTRECOVERABLE, and the wake-up of 957 * all waiters, including curthread, which will then 958 * eventually return ENOTRECOVERABLE due to the above 959 * check. 960 * 961 * Of course, if the user-flag is not set with 962 * LOCK_OWNERDEAD, retrying is the thing to do, since 963 * this is definitely a spurious wakeup. 964 */ 965 goto retry; 966 } 967 } 968 969 out: 970 no_fault(); 971 return (error); 972 } 973 974 975 static int 976 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 977 { 978 label_t ljb; 979 int error = 0; 980 lwpchan_t lwpchan; 981 uint16_t flag; 982 upib_t *upibp; 983 volatile struct upimutex *upimutex = NULL; 984 volatile int upilocked = 0; 985 986 if (on_fault(&ljb)) { 987 if (upilocked) 988 upimutex_unlock((upimutex_t *)upimutex, 0); 989 error = EFAULT; 990 goto out; 991 } 992 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 993 &lwpchan, LWPCHAN_MPPOOL)) { 994 error = EFAULT; 995 goto out; 996 } 997 upibp = &UPI_CHAIN(lwpchan); 998 mutex_enter(&upibp->upib_lock); 999 upimutex = upi_get(upibp, &lwpchan); 1000 /* 1001 * If the lock is not held, or the owner is not curthread, return 1002 * error. The user-level wrapper can return this error or stall, 1003 * depending on whether mutex is of ERRORCHECK type or not. 1004 */ 1005 if (upimutex == NULL || upimutex->upi_owner != curthread) { 1006 mutex_exit(&upibp->upib_lock); 1007 error = EPERM; 1008 goto out; 1009 } 1010 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1011 upilocked = 1; 1012 fuword16_noerr(&lp->mutex_flag, &flag); 1013 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1014 /* 1015 * transition mutex to the LOCK_NOTRECOVERABLE state. 1016 */ 1017 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1018 flag |= LOCK_NOTRECOVERABLE; 1019 suword16_noerr(&lp->mutex_flag, flag); 1020 } 1021 if (type & USYNC_PROCESS) 1022 suword32_noerr(&lp->mutex_ownerpid, 0); 1023 upimutex_unlock((upimutex_t *)upimutex, flag); 1024 upilocked = 0; 1025 out: 1026 no_fault(); 1027 return (error); 1028 } 1029 1030 /* 1031 * Clear the contents of a user-level mutex; return the flags. 1032 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1033 */ 1034 static uint16_t 1035 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1036 { 1037 uint16_t flag; 1038 1039 fuword16_noerr(&lp->mutex_flag, &flag); 1040 if ((flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) == 0) { 1041 flag |= lockflg; 1042 suword16_noerr(&lp->mutex_flag, flag); 1043 } 1044 suword32_noerr((uint32_t *)&lp->mutex_owner, 0); 1045 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, 0); 1046 suword32_noerr(&lp->mutex_ownerpid, 0); 1047 suword8_noerr(&lp->mutex_rcount, 0); 1048 1049 return (flag); 1050 } 1051 1052 /* 1053 * Mark user mutex state, corresponding to kernel upimutex, 1054 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1055 */ 1056 static int 1057 upi_dead(upimutex_t *upip, uint16_t lockflg) 1058 { 1059 label_t ljb; 1060 int error = 0; 1061 lwp_mutex_t *lp; 1062 1063 if (on_fault(&ljb)) { 1064 error = EFAULT; 1065 goto out; 1066 } 1067 1068 lp = upip->upi_vaddr; 1069 (void) lwp_clear_mutex(lp, lockflg); 1070 suword8_noerr(&lp->mutex_lockw, 0); 1071 out: 1072 no_fault(); 1073 return (error); 1074 } 1075 1076 /* 1077 * Unlock all upimutexes held by curthread, since curthread is dying. 1078 * For each upimutex, attempt to mark its corresponding user mutex object as 1079 * dead. 1080 */ 1081 void 1082 upimutex_cleanup() 1083 { 1084 kthread_t *t = curthread; 1085 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1086 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1087 struct upimutex *upip; 1088 1089 while ((upip = t->t_upimutex) != NULL) { 1090 if (upi_dead(upip, lockflg) != 0) { 1091 /* 1092 * If the user object associated with this upimutex is 1093 * unmapped, unlock upimutex with the 1094 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1095 * woken up. Since user object is unmapped, it could 1096 * not be marked as dead or notrecoverable. 1097 * The waiters will now all wake up and return 1098 * ENOTRECOVERABLE, since they would find that the lock 1099 * has not been handed-off to them. 1100 * See lwp_upimutex_lock(). 1101 */ 1102 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1103 } else { 1104 /* 1105 * The user object has been updated as dead. 1106 * Unlock the upimutex: if no waiters, upip kmem will 1107 * be freed. If there is a waiter, the lock will be 1108 * handed off. If exit() is in progress, each existing 1109 * waiter will successively get the lock, as owners 1110 * die, and each new owner will call this routine as 1111 * it dies. The last owner will free kmem, since 1112 * it will find the upimutex has no waiters. So, 1113 * eventually, the kmem is guaranteed to be freed. 1114 */ 1115 upimutex_unlock(upip, 0); 1116 } 1117 /* 1118 * Note that the call to upimutex_unlock() above will delete 1119 * upimutex from the t_upimutexes chain. And so the 1120 * while loop will eventually terminate. 1121 */ 1122 } 1123 } 1124 1125 int 1126 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1127 { 1128 kthread_t *t = curthread; 1129 klwp_t *lwp = ttolwp(t); 1130 proc_t *p = ttoproc(t); 1131 lwp_timer_t lwpt; 1132 caddr_t timedwait; 1133 int error = 0; 1134 int time_error; 1135 clock_t tim = -1; 1136 uchar_t waiters; 1137 volatile int locked = 0; 1138 volatile int watched = 0; 1139 label_t ljb; 1140 volatile uint8_t type = 0; 1141 lwpchan_t lwpchan; 1142 sleepq_head_t *sqh; 1143 static int iswanted(); 1144 uint16_t flag; 1145 int imm_timeout = 0; 1146 1147 if ((caddr_t)lp >= p->p_as->a_userlimit) 1148 return (set_errno(EFAULT)); 1149 1150 timedwait = (caddr_t)tsp; 1151 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1152 lwpt.lwpt_imm_timeout) { 1153 imm_timeout = 1; 1154 timedwait = NULL; 1155 } 1156 1157 /* 1158 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1159 * this micro state is really a run state. If the thread indeed blocks, 1160 * this state becomes valid. If not, the state is converted back to 1161 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1162 * when blocking. 1163 */ 1164 (void) new_mstate(t, LMS_USER_LOCK); 1165 if (on_fault(&ljb)) { 1166 if (locked) 1167 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1168 error = EFAULT; 1169 goto out; 1170 } 1171 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1172 if (UPIMUTEX(type)) { 1173 no_fault(); 1174 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1175 if ((type & USYNC_PROCESS) && 1176 (error == 0 || 1177 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 1178 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1179 if (tsp && !time_error) /* copyout the residual time left */ 1180 error = lwp_timer_copyout(&lwpt, error); 1181 if (error) 1182 return (set_errno(error)); 1183 return (0); 1184 } 1185 /* 1186 * Force Copy-on-write fault if lwp_mutex_t object is 1187 * defined to be MAP_PRIVATE and it was initialized to 1188 * USYNC_PROCESS. 1189 */ 1190 suword8_noerr(&lp->mutex_type, type); 1191 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1192 &lwpchan, LWPCHAN_MPPOOL)) { 1193 error = EFAULT; 1194 goto out; 1195 } 1196 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1197 locked = 1; 1198 if (type & LOCK_ROBUST) { 1199 fuword16_noerr(&lp->mutex_flag, &flag); 1200 if (flag & LOCK_NOTRECOVERABLE) { 1201 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1202 error = ENOTRECOVERABLE; 1203 goto out; 1204 } 1205 } 1206 fuword8_noerr(&lp->mutex_waiters, &waiters); 1207 suword8_noerr(&lp->mutex_waiters, 1); 1208 1209 /* 1210 * If watchpoints are set, they need to be restored, since 1211 * atomic accesses of memory such as the call to ulock_try() 1212 * below cannot be watched. 1213 */ 1214 1215 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1216 1217 while (!ulock_try(&lp->mutex_lockw)) { 1218 if (time_error) { 1219 /* 1220 * The SUSV3 Posix spec is very clear that we 1221 * should get no error from validating the 1222 * timer until we would actually sleep. 1223 */ 1224 error = time_error; 1225 break; 1226 } 1227 1228 if (watched) { 1229 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1230 watched = 0; 1231 } 1232 1233 /* 1234 * Put the lwp in an orderly state for debugging. 1235 */ 1236 prstop(PR_REQUESTED, 0); 1237 if (timedwait) { 1238 /* 1239 * If we successfully queue the timeout, 1240 * then don't drop t_delay_lock until 1241 * we are on the sleep queue (below). 1242 */ 1243 mutex_enter(&t->t_delay_lock); 1244 if (lwp_timer_enqueue(&lwpt) != 0) { 1245 mutex_exit(&t->t_delay_lock); 1246 imm_timeout = 1; 1247 timedwait = NULL; 1248 } 1249 } 1250 lwp_block(&lwpchan); 1251 /* 1252 * Nothing should happen to cause the lwp to go to 1253 * sleep again until after it returns from swtch(). 1254 */ 1255 if (timedwait) 1256 mutex_exit(&t->t_delay_lock); 1257 locked = 0; 1258 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1259 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1260 setrun(t); 1261 swtch(); 1262 t->t_flag &= ~T_WAKEABLE; 1263 if (timedwait) 1264 tim = lwp_timer_dequeue(&lwpt); 1265 setallwatch(); 1266 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1267 error = EINTR; 1268 else if (imm_timeout || (timedwait && tim == -1)) 1269 error = ETIME; 1270 if (error) { 1271 lwp->lwp_asleep = 0; 1272 lwp->lwp_sysabort = 0; 1273 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1274 S_WRITE); 1275 1276 /* 1277 * Need to re-compute waiters bit. The waiters field in 1278 * the lock is not reliable. Either of two things could 1279 * have occurred: no lwp may have called lwp_release() 1280 * for me but I have woken up due to a signal or 1281 * timeout. In this case, the waiter bit is incorrect 1282 * since it is still set to 1, set above. 1283 * OR an lwp_release() did occur for some other lwp on 1284 * the same lwpchan. In this case, the waiter bit is 1285 * correct. But which event occurred, one can't tell. 1286 * So, recompute. 1287 */ 1288 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1289 locked = 1; 1290 sqh = lwpsqhash(&lwpchan); 1291 disp_lock_enter(&sqh->sq_lock); 1292 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1293 disp_lock_exit(&sqh->sq_lock); 1294 break; 1295 } 1296 lwp->lwp_asleep = 0; 1297 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1298 S_WRITE); 1299 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1300 locked = 1; 1301 fuword8_noerr(&lp->mutex_waiters, &waiters); 1302 suword8_noerr(&lp->mutex_waiters, 1); 1303 if (type & LOCK_ROBUST) { 1304 fuword16_noerr(&lp->mutex_flag, &flag); 1305 if (flag & LOCK_NOTRECOVERABLE) { 1306 error = ENOTRECOVERABLE; 1307 break; 1308 } 1309 } 1310 } 1311 1312 if (t->t_mstate == LMS_USER_LOCK) 1313 (void) new_mstate(t, LMS_SYSTEM); 1314 1315 if (error == 0) { 1316 if (type & USYNC_PROCESS) 1317 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1318 if (type & LOCK_ROBUST) { 1319 fuword16_noerr(&lp->mutex_flag, &flag); 1320 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1321 if (flag & LOCK_OWNERDEAD) 1322 error = EOWNERDEAD; 1323 else if (type & USYNC_PROCESS_ROBUST) 1324 error = ELOCKUNMAPPED; 1325 else 1326 error = EOWNERDEAD; 1327 } 1328 } 1329 } 1330 suword8_noerr(&lp->mutex_waiters, waiters); 1331 locked = 0; 1332 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1333 out: 1334 no_fault(); 1335 if (watched) 1336 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1337 if (tsp && !time_error) /* copyout the residual time left */ 1338 error = lwp_timer_copyout(&lwpt, error); 1339 if (error) 1340 return (set_errno(error)); 1341 return (0); 1342 } 1343 1344 /* 1345 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1346 * libc now calls lwp_mutex_timedlock(lp, NULL). 1347 * This system call trap continues to exist solely for the benefit 1348 * of old statically-linked binaries from Solaris 9 and before. 1349 * It should be removed from the system when we no longer care 1350 * about such applications. 1351 */ 1352 int 1353 lwp_mutex_lock(lwp_mutex_t *lp) 1354 { 1355 return (lwp_mutex_timedlock(lp, NULL)); 1356 } 1357 1358 static int 1359 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1360 { 1361 /* 1362 * The caller holds the dispatcher lock on the sleep queue. 1363 */ 1364 while (t != NULL) { 1365 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1366 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1367 return (1); 1368 t = t->t_link; 1369 } 1370 return (0); 1371 } 1372 1373 /* 1374 * Return the highest priority thread sleeping on this lwpchan. 1375 */ 1376 static kthread_t * 1377 lwp_queue_waiter(lwpchan_t *lwpchan) 1378 { 1379 sleepq_head_t *sqh; 1380 kthread_t *tp; 1381 1382 sqh = lwpsqhash(lwpchan); 1383 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1384 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1385 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1386 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1387 break; 1388 } 1389 disp_lock_exit(&sqh->sq_lock); 1390 return (tp); 1391 } 1392 1393 static int 1394 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1395 { 1396 sleepq_head_t *sqh; 1397 kthread_t *tp; 1398 kthread_t **tpp; 1399 1400 sqh = lwpsqhash(lwpchan); 1401 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1402 tpp = &sqh->sq_queue.sq_first; 1403 while ((tp = *tpp) != NULL) { 1404 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1405 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1406 /* 1407 * The following is typically false. It could be true 1408 * only if lwp_release() is called from 1409 * lwp_mutex_wakeup() after reading the waiters field 1410 * from memory in which the lwp lock used to be, but has 1411 * since been re-used to hold a lwp cv or lwp semaphore. 1412 * The thread "tp" found to match the lwp lock's wchan 1413 * is actually sleeping for the cv or semaphore which 1414 * now has the same wchan. In this case, lwp_release() 1415 * should return failure. 1416 */ 1417 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1418 ASSERT(sync_type == 0); 1419 /* 1420 * assert that this can happen only for mutexes 1421 * i.e. sync_type == 0, for correctly written 1422 * user programs. 1423 */ 1424 disp_lock_exit(&sqh->sq_lock); 1425 return (0); 1426 } 1427 *waiters = iswanted(tp->t_link, lwpchan); 1428 sleepq_unlink(tpp, tp); 1429 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1430 tp->t_wchan0 = NULL; 1431 tp->t_wchan = NULL; 1432 tp->t_sobj_ops = NULL; 1433 tp->t_release = 1; 1434 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1435 CL_WAKEUP(tp); 1436 thread_unlock(tp); /* drop run queue lock */ 1437 return (1); 1438 } 1439 tpp = &tp->t_link; 1440 } 1441 *waiters = 0; 1442 disp_lock_exit(&sqh->sq_lock); 1443 return (0); 1444 } 1445 1446 static void 1447 lwp_release_all(lwpchan_t *lwpchan) 1448 { 1449 sleepq_head_t *sqh; 1450 kthread_t *tp; 1451 kthread_t **tpp; 1452 1453 sqh = lwpsqhash(lwpchan); 1454 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1455 tpp = &sqh->sq_queue.sq_first; 1456 while ((tp = *tpp) != NULL) { 1457 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1458 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1459 sleepq_unlink(tpp, tp); 1460 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1461 tp->t_wchan0 = NULL; 1462 tp->t_wchan = NULL; 1463 tp->t_sobj_ops = NULL; 1464 CL_WAKEUP(tp); 1465 thread_unlock_high(tp); /* release run queue lock */ 1466 } else { 1467 tpp = &tp->t_link; 1468 } 1469 } 1470 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1471 } 1472 1473 /* 1474 * unblock a lwp that is trying to acquire this mutex. the blocked 1475 * lwp resumes and retries to acquire the lock. 1476 */ 1477 int 1478 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1479 { 1480 proc_t *p = ttoproc(curthread); 1481 lwpchan_t lwpchan; 1482 uchar_t waiters; 1483 volatile int locked = 0; 1484 volatile int watched = 0; 1485 volatile uint8_t type = 0; 1486 label_t ljb; 1487 int error = 0; 1488 1489 if ((caddr_t)lp >= p->p_as->a_userlimit) 1490 return (set_errno(EFAULT)); 1491 1492 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1493 1494 if (on_fault(&ljb)) { 1495 if (locked) 1496 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1497 error = EFAULT; 1498 goto out; 1499 } 1500 /* 1501 * Force Copy-on-write fault if lwp_mutex_t object is 1502 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS 1503 */ 1504 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1505 suword8_noerr(&lp->mutex_type, type); 1506 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1507 &lwpchan, LWPCHAN_MPPOOL)) { 1508 error = EFAULT; 1509 goto out; 1510 } 1511 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1512 locked = 1; 1513 /* 1514 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1515 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1516 * may fail. If it fails, do not write into the waiter bit. 1517 * The call to lwp_release() might fail due to one of three reasons: 1518 * 1519 * 1. due to the thread which set the waiter bit not actually 1520 * sleeping since it got the lock on the re-try. The waiter 1521 * bit will then be correctly updated by that thread. This 1522 * window may be closed by reading the wait bit again here 1523 * and not calling lwp_release() at all if it is zero. 1524 * 2. the thread which set the waiter bit and went to sleep 1525 * was woken up by a signal. This time, the waiter recomputes 1526 * the wait bit in the return with EINTR code. 1527 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1528 * memory that has been re-used after the lock was dropped. 1529 * In this case, writing into the waiter bit would cause data 1530 * corruption. 1531 */ 1532 if (release_all) 1533 lwp_release_all(&lwpchan); 1534 else if (lwp_release(&lwpchan, &waiters, 0) == 1) 1535 suword8_noerr(&lp->mutex_waiters, waiters); 1536 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1537 out: 1538 no_fault(); 1539 if (watched) 1540 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1541 if (error) 1542 return (set_errno(error)); 1543 return (0); 1544 } 1545 1546 /* 1547 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1548 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1549 * a flag telling the kernel whether or not to honor the kernel/user 1550 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1551 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1552 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1553 * it is used an an in/out parameter. On entry, it contains the relative 1554 * time until timeout. On exit, we copyout the residual time left to it. 1555 */ 1556 int 1557 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1558 { 1559 kthread_t *t = curthread; 1560 klwp_t *lwp = ttolwp(t); 1561 proc_t *p = ttoproc(t); 1562 lwp_timer_t lwpt; 1563 lwpchan_t cv_lwpchan; 1564 lwpchan_t m_lwpchan; 1565 caddr_t timedwait; 1566 volatile uint16_t type = 0; 1567 volatile uint8_t mtype = 0; 1568 uchar_t waiters; 1569 volatile int error; 1570 clock_t tim = -1; 1571 volatile int locked = 0; 1572 volatile int m_locked = 0; 1573 volatile int cvwatched = 0; 1574 volatile int mpwatched = 0; 1575 label_t ljb; 1576 volatile int no_lwpchan = 1; 1577 int imm_timeout = 0; 1578 int imm_unpark = 0; 1579 1580 if ((caddr_t)cv >= p->p_as->a_userlimit || 1581 (caddr_t)mp >= p->p_as->a_userlimit) 1582 return (set_errno(EFAULT)); 1583 1584 timedwait = (caddr_t)tsp; 1585 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1586 return (set_errno(error)); 1587 if (lwpt.lwpt_imm_timeout) { 1588 imm_timeout = 1; 1589 timedwait = NULL; 1590 } 1591 1592 (void) new_mstate(t, LMS_USER_LOCK); 1593 1594 if (on_fault(&ljb)) { 1595 if (no_lwpchan) { 1596 error = EFAULT; 1597 goto out; 1598 } 1599 if (m_locked) { 1600 m_locked = 0; 1601 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1602 } 1603 if (locked) { 1604 locked = 0; 1605 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1606 } 1607 /* 1608 * set up another on_fault() for a possible fault 1609 * on the user lock accessed at "efault" 1610 */ 1611 if (on_fault(&ljb)) { 1612 if (m_locked) { 1613 m_locked = 0; 1614 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1615 } 1616 goto out; 1617 } 1618 error = EFAULT; 1619 goto efault; 1620 } 1621 1622 /* 1623 * Force Copy-on-write fault if lwp_cond_t and lwp_mutex_t 1624 * objects are defined to be MAP_PRIVATE, and are USYNC_PROCESS 1625 */ 1626 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1627 if (UPIMUTEX(mtype) == 0) { 1628 suword8_noerr(&mp->mutex_type, mtype); 1629 /* convert user level mutex, "mp", to a unique lwpchan */ 1630 /* check if mtype is ok to use below, instead of type from cv */ 1631 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1632 &m_lwpchan, LWPCHAN_MPPOOL)) { 1633 error = EFAULT; 1634 goto out; 1635 } 1636 } 1637 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1638 suword16_noerr(&cv->cond_type, type); 1639 /* convert user level condition variable, "cv", to a unique lwpchan */ 1640 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1641 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1642 error = EFAULT; 1643 goto out; 1644 } 1645 no_lwpchan = 0; 1646 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1647 if (UPIMUTEX(mtype) == 0) 1648 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1649 S_WRITE); 1650 1651 /* 1652 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1653 * with respect to a possible wakeup which is a result of either 1654 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1655 * 1656 * What's misleading, is that the lwp is put to sleep after the 1657 * condition variable's mutex is released. This is OK as long as 1658 * the release operation is also done while holding lwpchan_lock. 1659 * The lwp is then put to sleep when the possibility of pagefaulting 1660 * or sleeping is completely eliminated. 1661 */ 1662 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1663 locked = 1; 1664 if (UPIMUTEX(mtype) == 0) { 1665 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1666 m_locked = 1; 1667 suword8_noerr(&cv->cond_waiters_kernel, 1); 1668 /* 1669 * unlock the condition variable's mutex. (pagefaults are 1670 * possible here.) 1671 */ 1672 ulock_clear(&mp->mutex_lockw); 1673 fuword8_noerr(&mp->mutex_waiters, &waiters); 1674 if (waiters != 0) { 1675 /* 1676 * Given the locking of lwpchan_lock around the release 1677 * of the mutex and checking for waiters, the following 1678 * call to lwp_release() can fail ONLY if the lock 1679 * acquirer is interrupted after setting the waiter bit, 1680 * calling lwp_block() and releasing lwpchan_lock. 1681 * In this case, it could get pulled off the lwp sleep 1682 * q (via setrun()) before the following call to 1683 * lwp_release() occurs. In this case, the lock 1684 * requestor will update the waiter bit correctly by 1685 * re-evaluating it. 1686 */ 1687 if (lwp_release(&m_lwpchan, &waiters, 0) > 0) 1688 suword8_noerr(&mp->mutex_waiters, waiters); 1689 } 1690 m_locked = 0; 1691 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1692 } else { 1693 suword8_noerr(&cv->cond_waiters_kernel, 1); 1694 error = lwp_upimutex_unlock(mp, mtype); 1695 if (error) { /* if the upimutex unlock failed */ 1696 locked = 0; 1697 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1698 goto out; 1699 } 1700 } 1701 no_fault(); 1702 1703 if (mpwatched) { 1704 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1705 mpwatched = 0; 1706 } 1707 if (cvwatched) { 1708 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1709 cvwatched = 0; 1710 } 1711 1712 /* 1713 * Put the lwp in an orderly state for debugging. 1714 */ 1715 prstop(PR_REQUESTED, 0); 1716 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1717 /* 1718 * We received a signal at user-level before calling here 1719 * or another thread wants us to return immediately 1720 * with EINTR. See lwp_unpark(). 1721 */ 1722 imm_unpark = 1; 1723 t->t_unpark = 0; 1724 timedwait = NULL; 1725 } else if (timedwait) { 1726 /* 1727 * If we successfully queue the timeout, 1728 * then don't drop t_delay_lock until 1729 * we are on the sleep queue (below). 1730 */ 1731 mutex_enter(&t->t_delay_lock); 1732 if (lwp_timer_enqueue(&lwpt) != 0) { 1733 mutex_exit(&t->t_delay_lock); 1734 imm_timeout = 1; 1735 timedwait = NULL; 1736 } 1737 } 1738 t->t_flag |= T_WAITCVSEM; 1739 lwp_block(&cv_lwpchan); 1740 /* 1741 * Nothing should happen to cause the lwp to go to sleep 1742 * until after it returns from swtch(). 1743 */ 1744 if (timedwait) 1745 mutex_exit(&t->t_delay_lock); 1746 locked = 0; 1747 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1748 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1749 (imm_timeout | imm_unpark)) 1750 setrun(t); 1751 swtch(); 1752 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1753 if (timedwait) 1754 tim = lwp_timer_dequeue(&lwpt); 1755 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1756 MUSTRETURN(p, t) || imm_unpark) 1757 error = EINTR; 1758 else if (imm_timeout || (timedwait && tim == -1)) 1759 error = ETIME; 1760 lwp->lwp_asleep = 0; 1761 lwp->lwp_sysabort = 0; 1762 setallwatch(); 1763 1764 if (t->t_mstate == LMS_USER_LOCK) 1765 (void) new_mstate(t, LMS_SYSTEM); 1766 1767 if (tsp && check_park) /* copyout the residual time left */ 1768 error = lwp_timer_copyout(&lwpt, error); 1769 1770 /* the mutex is reacquired by the caller on return to user level */ 1771 if (error) { 1772 /* 1773 * If we were concurrently lwp_cond_signal()d and we 1774 * received a UNIX signal or got a timeout, then perform 1775 * another lwp_cond_signal() to avoid consuming the wakeup. 1776 */ 1777 if (t->t_release) 1778 (void) lwp_cond_signal(cv); 1779 return (set_errno(error)); 1780 } 1781 return (0); 1782 1783 efault: 1784 /* 1785 * make sure that the user level lock is dropped before 1786 * returning to caller, since the caller always re-acquires it. 1787 */ 1788 if (UPIMUTEX(mtype) == 0) { 1789 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1790 m_locked = 1; 1791 ulock_clear(&mp->mutex_lockw); 1792 fuword8_noerr(&mp->mutex_waiters, &waiters); 1793 if (waiters != 0) { 1794 /* 1795 * See comment above on lock clearing and lwp_release() 1796 * success/failure. 1797 */ 1798 if (lwp_release(&m_lwpchan, &waiters, 0) > 0) 1799 suword8_noerr(&mp->mutex_waiters, waiters); 1800 } 1801 m_locked = 0; 1802 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1803 } else { 1804 (void) lwp_upimutex_unlock(mp, mtype); 1805 } 1806 out: 1807 no_fault(); 1808 if (mpwatched) 1809 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1810 if (cvwatched) 1811 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1812 if (t->t_mstate == LMS_USER_LOCK) 1813 (void) new_mstate(t, LMS_SYSTEM); 1814 return (set_errno(error)); 1815 } 1816 1817 /* 1818 * wakeup one lwp that's blocked on this condition variable. 1819 */ 1820 int 1821 lwp_cond_signal(lwp_cond_t *cv) 1822 { 1823 proc_t *p = ttoproc(curthread); 1824 lwpchan_t lwpchan; 1825 uchar_t waiters; 1826 volatile uint16_t type = 0; 1827 volatile int locked = 0; 1828 volatile int watched = 0; 1829 label_t ljb; 1830 int error = 0; 1831 1832 if ((caddr_t)cv >= p->p_as->a_userlimit) 1833 return (set_errno(EFAULT)); 1834 1835 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1836 1837 if (on_fault(&ljb)) { 1838 if (locked) 1839 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1840 error = EFAULT; 1841 goto out; 1842 } 1843 /* 1844 * Force Copy-on-write fault if lwp_cond_t object is 1845 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1846 */ 1847 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1848 suword16_noerr(&cv->cond_type, type); 1849 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1850 &lwpchan, LWPCHAN_CVPOOL)) { 1851 error = EFAULT; 1852 goto out; 1853 } 1854 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1855 locked = 1; 1856 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1857 if (waiters != 0) { 1858 /* 1859 * The following call to lwp_release() might fail but it is 1860 * OK to write into the waiters bit below, since the memory 1861 * could not have been re-used or unmapped (for correctly 1862 * written user programs) as in the case of lwp_mutex_wakeup(). 1863 * For an incorrect program, we should not care about data 1864 * corruption since this is just one instance of other places 1865 * where corruption can occur for such a program. Of course 1866 * if the memory is unmapped, normal fault recovery occurs. 1867 */ 1868 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1869 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1870 } 1871 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1872 out: 1873 no_fault(); 1874 if (watched) 1875 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1876 if (error) 1877 return (set_errno(error)); 1878 return (0); 1879 } 1880 1881 /* 1882 * wakeup every lwp that's blocked on this condition variable. 1883 */ 1884 int 1885 lwp_cond_broadcast(lwp_cond_t *cv) 1886 { 1887 proc_t *p = ttoproc(curthread); 1888 lwpchan_t lwpchan; 1889 volatile uint16_t type = 0; 1890 volatile int locked = 0; 1891 volatile int watched = 0; 1892 label_t ljb; 1893 uchar_t waiters; 1894 int error = 0; 1895 1896 if ((caddr_t)cv >= p->p_as->a_userlimit) 1897 return (set_errno(EFAULT)); 1898 1899 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1900 1901 if (on_fault(&ljb)) { 1902 if (locked) 1903 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1904 error = EFAULT; 1905 goto out; 1906 } 1907 /* 1908 * Force Copy-on-write fault if lwp_cond_t object is 1909 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1910 */ 1911 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1912 suword16_noerr(&cv->cond_type, type); 1913 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1914 &lwpchan, LWPCHAN_CVPOOL)) { 1915 error = EFAULT; 1916 goto out; 1917 } 1918 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1919 locked = 1; 1920 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1921 if (waiters != 0) { 1922 lwp_release_all(&lwpchan); 1923 suword8_noerr(&cv->cond_waiters_kernel, 0); 1924 } 1925 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1926 out: 1927 no_fault(); 1928 if (watched) 1929 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1930 if (error) 1931 return (set_errno(error)); 1932 return (0); 1933 } 1934 1935 int 1936 lwp_sema_trywait(lwp_sema_t *sp) 1937 { 1938 kthread_t *t = curthread; 1939 proc_t *p = ttoproc(t); 1940 label_t ljb; 1941 volatile int locked = 0; 1942 volatile int watched = 0; 1943 volatile uint16_t type = 0; 1944 int count; 1945 lwpchan_t lwpchan; 1946 uchar_t waiters; 1947 int error = 0; 1948 1949 if ((caddr_t)sp >= p->p_as->a_userlimit) 1950 return (set_errno(EFAULT)); 1951 1952 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1953 1954 if (on_fault(&ljb)) { 1955 if (locked) 1956 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1957 error = EFAULT; 1958 goto out; 1959 } 1960 /* 1961 * Force Copy-on-write fault if lwp_sema_t object is 1962 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1963 */ 1964 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1965 suword16_noerr((void *)&sp->sema_type, type); 1966 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1967 &lwpchan, LWPCHAN_CVPOOL)) { 1968 error = EFAULT; 1969 goto out; 1970 } 1971 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1972 locked = 1; 1973 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1974 if (count == 0) 1975 error = EBUSY; 1976 else 1977 suword32_noerr((void *)&sp->sema_count, --count); 1978 if (count != 0) { 1979 fuword8_noerr(&sp->sema_waiters, &waiters); 1980 if (waiters != 0) { 1981 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1982 suword8_noerr(&sp->sema_waiters, waiters); 1983 } 1984 } 1985 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1986 out: 1987 no_fault(); 1988 if (watched) 1989 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1990 if (error) 1991 return (set_errno(error)); 1992 return (0); 1993 } 1994 1995 /* 1996 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 1997 */ 1998 int 1999 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2000 { 2001 kthread_t *t = curthread; 2002 klwp_t *lwp = ttolwp(t); 2003 proc_t *p = ttoproc(t); 2004 lwp_timer_t lwpt; 2005 caddr_t timedwait; 2006 clock_t tim = -1; 2007 label_t ljb; 2008 volatile int locked = 0; 2009 volatile int watched = 0; 2010 volatile uint16_t type = 0; 2011 int count; 2012 lwpchan_t lwpchan; 2013 uchar_t waiters; 2014 int error = 0; 2015 int time_error; 2016 int imm_timeout = 0; 2017 int imm_unpark = 0; 2018 2019 if ((caddr_t)sp >= p->p_as->a_userlimit) 2020 return (set_errno(EFAULT)); 2021 2022 timedwait = (caddr_t)tsp; 2023 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2024 lwpt.lwpt_imm_timeout) { 2025 imm_timeout = 1; 2026 timedwait = NULL; 2027 } 2028 2029 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2030 2031 if (on_fault(&ljb)) { 2032 if (locked) 2033 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2034 error = EFAULT; 2035 goto out; 2036 } 2037 /* 2038 * Force Copy-on-write fault if lwp_sema_t object is 2039 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 2040 */ 2041 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2042 suword16_noerr((void *)&sp->sema_type, type); 2043 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2044 &lwpchan, LWPCHAN_CVPOOL)) { 2045 error = EFAULT; 2046 goto out; 2047 } 2048 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2049 locked = 1; 2050 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2051 while (error == 0 && count == 0) { 2052 if (time_error) { 2053 /* 2054 * The SUSV3 Posix spec is very clear that we 2055 * should get no error from validating the 2056 * timer until we would actually sleep. 2057 */ 2058 error = time_error; 2059 break; 2060 } 2061 suword8_noerr(&sp->sema_waiters, 1); 2062 if (watched) 2063 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2064 /* 2065 * Put the lwp in an orderly state for debugging. 2066 */ 2067 prstop(PR_REQUESTED, 0); 2068 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2069 /* 2070 * We received a signal at user-level before calling 2071 * here or another thread wants us to return 2072 * immediately with EINTR. See lwp_unpark(). 2073 */ 2074 imm_unpark = 1; 2075 t->t_unpark = 0; 2076 timedwait = NULL; 2077 } else if (timedwait) { 2078 /* 2079 * If we successfully queue the timeout, 2080 * then don't drop t_delay_lock until 2081 * we are on the sleep queue (below). 2082 */ 2083 mutex_enter(&t->t_delay_lock); 2084 if (lwp_timer_enqueue(&lwpt) != 0) { 2085 mutex_exit(&t->t_delay_lock); 2086 imm_timeout = 1; 2087 timedwait = NULL; 2088 } 2089 } 2090 t->t_flag |= T_WAITCVSEM; 2091 lwp_block(&lwpchan); 2092 /* 2093 * Nothing should happen to cause the lwp to sleep 2094 * again until after it returns from swtch(). 2095 */ 2096 if (timedwait) 2097 mutex_exit(&t->t_delay_lock); 2098 locked = 0; 2099 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2100 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2101 (imm_timeout | imm_unpark)) 2102 setrun(t); 2103 swtch(); 2104 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2105 if (timedwait) 2106 tim = lwp_timer_dequeue(&lwpt); 2107 setallwatch(); 2108 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2109 MUSTRETURN(p, t) || imm_unpark) 2110 error = EINTR; 2111 else if (imm_timeout || (timedwait && tim == -1)) 2112 error = ETIME; 2113 lwp->lwp_asleep = 0; 2114 lwp->lwp_sysabort = 0; 2115 watched = watch_disable_addr((caddr_t)sp, 2116 sizeof (*sp), S_WRITE); 2117 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2118 locked = 1; 2119 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2120 } 2121 if (error == 0) 2122 suword32_noerr((void *)&sp->sema_count, --count); 2123 if (count != 0) { 2124 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2125 suword8_noerr(&sp->sema_waiters, waiters); 2126 } 2127 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2128 out: 2129 no_fault(); 2130 if (watched) 2131 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2132 if (tsp && check_park && !time_error) 2133 error = lwp_timer_copyout(&lwpt, error); 2134 if (error) 2135 return (set_errno(error)); 2136 return (0); 2137 } 2138 2139 /* 2140 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2141 * libc now calls lwp_sema_timedwait(). 2142 * This system call trap exists solely for the benefit of old 2143 * statically linked applications from Solaris 9 and before. 2144 * It should be removed when we no longer care about such applications. 2145 */ 2146 int 2147 lwp_sema_wait(lwp_sema_t *sp) 2148 { 2149 return (lwp_sema_timedwait(sp, NULL, 0)); 2150 } 2151 2152 int 2153 lwp_sema_post(lwp_sema_t *sp) 2154 { 2155 proc_t *p = ttoproc(curthread); 2156 label_t ljb; 2157 volatile int locked = 0; 2158 volatile int watched = 0; 2159 volatile uint16_t type = 0; 2160 int count; 2161 lwpchan_t lwpchan; 2162 uchar_t waiters; 2163 int error = 0; 2164 2165 if ((caddr_t)sp >= p->p_as->a_userlimit) 2166 return (set_errno(EFAULT)); 2167 2168 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2169 2170 if (on_fault(&ljb)) { 2171 if (locked) 2172 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2173 error = EFAULT; 2174 goto out; 2175 } 2176 /* 2177 * Force Copy-on-write fault if lwp_sema_t object is 2178 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 2179 */ 2180 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2181 suword16_noerr(&sp->sema_type, type); 2182 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2183 &lwpchan, LWPCHAN_CVPOOL)) { 2184 error = EFAULT; 2185 goto out; 2186 } 2187 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2188 locked = 1; 2189 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2190 if (count == _SEM_VALUE_MAX) 2191 error = EOVERFLOW; 2192 else 2193 suword32_noerr(&sp->sema_count, ++count); 2194 if (count == 1) { 2195 fuword8_noerr(&sp->sema_waiters, &waiters); 2196 if (waiters) { 2197 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2198 suword8_noerr(&sp->sema_waiters, waiters); 2199 } 2200 } 2201 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2202 out: 2203 no_fault(); 2204 if (watched) 2205 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2206 if (error) 2207 return (set_errno(error)); 2208 return (0); 2209 } 2210 2211 #define TRW_WANT_WRITE 0x1 2212 #define TRW_LOCK_GRANTED 0x2 2213 2214 #define READ_LOCK 0 2215 #define WRITE_LOCK 1 2216 #define TRY_FLAG 0x10 2217 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2218 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2219 2220 /* 2221 * Release one writer or one or more readers. Compute the rwstate word to 2222 * reflect the new state of the queue. For a safe hand-off we copy the new 2223 * rwstate value back to userland before we wake any of the new lock holders. 2224 * 2225 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2226 * being given precedence over readers of the same priority). 2227 * 2228 * If the first thread is a reader we scan the queue releasing all readers 2229 * until we hit a writer or the end of the queue. If the first thread is a 2230 * writer we still need to check for another writer. 2231 */ 2232 void 2233 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2234 { 2235 sleepq_head_t *sqh; 2236 kthread_t *tp; 2237 kthread_t **tpp; 2238 kthread_t *tpnext; 2239 kthread_t *wakelist = NULL; 2240 uint32_t rwstate = 0; 2241 int wcount = 0; 2242 int rcount = 0; 2243 2244 sqh = lwpsqhash(lwpchan); 2245 disp_lock_enter(&sqh->sq_lock); 2246 tpp = &sqh->sq_queue.sq_first; 2247 while ((tp = *tpp) != NULL) { 2248 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2249 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2250 if (tp->t_writer & TRW_WANT_WRITE) { 2251 if ((wcount++ == 0) && (rcount == 0)) { 2252 rwstate |= URW_WRITE_LOCKED; 2253 2254 /* Just one writer to wake. */ 2255 sleepq_unlink(tpp, tp); 2256 wakelist = tp; 2257 2258 /* tpp already set for next thread. */ 2259 continue; 2260 } else { 2261 rwstate |= URW_HAS_WAITERS; 2262 /* We need look no further. */ 2263 break; 2264 } 2265 } else { 2266 rcount++; 2267 if (wcount == 0) { 2268 rwstate++; 2269 2270 /* Add reader to wake list. */ 2271 sleepq_unlink(tpp, tp); 2272 tp->t_link = wakelist; 2273 wakelist = tp; 2274 2275 /* tpp already set for next thread. */ 2276 continue; 2277 } else { 2278 rwstate |= URW_HAS_WAITERS; 2279 /* We need look no further. */ 2280 break; 2281 } 2282 } 2283 } 2284 tpp = &tp->t_link; 2285 } 2286 2287 /* Copy the new rwstate back to userland. */ 2288 suword32_noerr(&rw->rwlock_readers, rwstate); 2289 2290 /* Wake the new lock holder(s) up. */ 2291 tp = wakelist; 2292 while (tp != NULL) { 2293 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2294 tp->t_wchan0 = NULL; 2295 tp->t_wchan = NULL; 2296 tp->t_sobj_ops = NULL; 2297 tp->t_writer |= TRW_LOCK_GRANTED; 2298 tpnext = tp->t_link; 2299 tp->t_link = NULL; 2300 CL_WAKEUP(tp); 2301 thread_unlock_high(tp); 2302 tp = tpnext; 2303 } 2304 2305 disp_lock_exit(&sqh->sq_lock); 2306 } 2307 2308 /* 2309 * We enter here holding the user-level mutex, which we must release before 2310 * returning or blocking. Based on lwp_cond_wait(). 2311 */ 2312 static int 2313 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2314 { 2315 lwp_mutex_t *mp = NULL; 2316 kthread_t *t = curthread; 2317 kthread_t *tp; 2318 klwp_t *lwp = ttolwp(t); 2319 proc_t *p = ttoproc(t); 2320 lwp_timer_t lwpt; 2321 lwpchan_t lwpchan; 2322 lwpchan_t mlwpchan; 2323 caddr_t timedwait; 2324 volatile uint16_t type = 0; 2325 volatile uint8_t mtype = 0; 2326 uchar_t mwaiters; 2327 volatile int error = 0; 2328 int time_error; 2329 clock_t tim = -1; 2330 volatile int locked = 0; 2331 volatile int mlocked = 0; 2332 volatile int watched = 0; 2333 volatile int mwatched = 0; 2334 label_t ljb; 2335 volatile int no_lwpchan = 1; 2336 int imm_timeout = 0; 2337 int try_flag; 2338 uint32_t rwstate; 2339 int acquired = 0; 2340 2341 /* We only check rw because the mutex is included in it. */ 2342 if ((caddr_t)rw >= p->p_as->a_userlimit) 2343 return (set_errno(EFAULT)); 2344 2345 /* We must only report this error if we are about to sleep (later). */ 2346 timedwait = (caddr_t)tsp; 2347 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2348 lwpt.lwpt_imm_timeout) { 2349 imm_timeout = 1; 2350 timedwait = NULL; 2351 } 2352 2353 (void) new_mstate(t, LMS_USER_LOCK); 2354 2355 if (on_fault(&ljb)) { 2356 if (no_lwpchan) { 2357 error = EFAULT; 2358 goto out_nodrop; 2359 } 2360 if (mlocked) { 2361 mlocked = 0; 2362 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2363 } 2364 if (locked) { 2365 locked = 0; 2366 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2367 } 2368 /* 2369 * Set up another on_fault() for a possible fault 2370 * on the user lock accessed at "out_drop". 2371 */ 2372 if (on_fault(&ljb)) { 2373 if (mlocked) { 2374 mlocked = 0; 2375 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2376 } 2377 error = EFAULT; 2378 goto out_nodrop; 2379 } 2380 error = EFAULT; 2381 goto out_nodrop; 2382 } 2383 2384 /* Process rd_wr (including sanity check). */ 2385 try_flag = (rd_wr & TRY_FLAG); 2386 rd_wr &= ~TRY_FLAG; 2387 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2388 error = EINVAL; 2389 goto out_nodrop; 2390 } 2391 2392 /* We can only continue for simple USYNC_PROCESS locks. */ 2393 mp = &rw->mutex; 2394 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2395 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2396 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2397 error = EINVAL; 2398 goto out_nodrop; 2399 } 2400 2401 /* Force Copy-on-write fault incase objects are MAP_PRIVATE. */ 2402 suword8_noerr(&mp->mutex_type, mtype); 2403 suword16_noerr(&rw->rwlock_type, type); 2404 2405 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2406 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2407 &mlwpchan, LWPCHAN_MPPOOL)) { 2408 error = EFAULT; 2409 goto out_nodrop; 2410 } 2411 2412 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2413 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2414 &lwpchan, LWPCHAN_CVPOOL)) { 2415 error = EFAULT; 2416 goto out_nodrop; 2417 } 2418 2419 no_lwpchan = 0; 2420 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2421 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2422 2423 /* 2424 * lwpchan_lock() ensures that the calling LWP is put to sleep 2425 * atomically with respect to a possible wakeup which is a result 2426 * of lwp_rwlock_unlock(). 2427 * 2428 * What's misleading is that the LWP is put to sleep after the 2429 * rwlock's mutex is released. This is OK as long as the release 2430 * operation is also done while holding mlwpchan. The LWP is then 2431 * put to sleep when the possibility of pagefaulting or sleeping 2432 * has been completely eliminated. 2433 */ 2434 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2435 locked = 1; 2436 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2437 mlocked = 1; 2438 2439 /* 2440 * Fetch the current rwlock state. 2441 * 2442 * The possibility of spurious wake-ups or killed waiters means 2443 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2444 * We only fix these if they are important to us. 2445 * 2446 * Although various error states can be observed here (e.g. the lock 2447 * is not held, but there are waiters) we assume these are applicaton 2448 * errors and so we take no corrective action. 2449 */ 2450 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2451 /* 2452 * We cannot legitimately get here from user-level 2453 * without URW_HAS_WAITERS being set. 2454 * Set it now to guard against user-level error. 2455 */ 2456 rwstate |= URW_HAS_WAITERS; 2457 2458 /* 2459 * We can try only if the lock isn't held by a writer. 2460 */ 2461 if (!(rwstate & URW_WRITE_LOCKED)) { 2462 tp = lwp_queue_waiter(&lwpchan); 2463 if (tp == NULL) { 2464 /* 2465 * Hmmm, rwstate indicates waiters but there are 2466 * none queued. This could just be the result of a 2467 * spurious wakeup, so let's ignore it. 2468 * 2469 * We now have a chance to acquire the lock 2470 * uncontended, but this is the last chance for 2471 * a writer to acquire the lock without blocking. 2472 */ 2473 if (rd_wr == READ_LOCK) { 2474 rwstate++; 2475 acquired = 1; 2476 } else if ((rwstate & URW_READERS_MASK) == 0) { 2477 rwstate |= URW_WRITE_LOCKED; 2478 acquired = 1; 2479 } 2480 } else if (rd_wr == READ_LOCK) { 2481 /* 2482 * This is the last chance for a reader to acquire 2483 * the lock now, but it can only do so if there is 2484 * no writer of equal or greater priority at the 2485 * head of the queue . 2486 * 2487 * It is also just possible that there is a reader 2488 * at the head of the queue. This may be the result 2489 * of a spurious wakeup or an application failure. 2490 * In this case we only acquire the lock if we have 2491 * equal or greater priority. It is not our job to 2492 * release spurious waiters. 2493 */ 2494 pri_t our_pri = DISP_PRIO(t); 2495 pri_t his_pri = DISP_PRIO(tp); 2496 2497 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2498 !(tp->t_writer & TRW_WANT_WRITE))) { 2499 rwstate++; 2500 acquired = 1; 2501 } 2502 } 2503 } 2504 2505 if (acquired || try_flag || time_error) { 2506 /* 2507 * We're not going to block this time. 2508 */ 2509 suword32_noerr(&rw->rwlock_readers, rwstate); 2510 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2511 locked = 0; 2512 2513 if (acquired) { 2514 /* 2515 * Got the lock! 2516 */ 2517 error = 0; 2518 2519 } else if (try_flag) { 2520 /* 2521 * We didn't get the lock and we're about to block. 2522 * If we're doing a trylock, return EBUSY instead. 2523 */ 2524 error = EBUSY; 2525 2526 } else if (time_error) { 2527 /* 2528 * The SUSV3 POSIX spec is very clear that we should 2529 * get no error from validating the timer (above) 2530 * until we would actually sleep. 2531 */ 2532 error = time_error; 2533 } 2534 2535 goto out_drop; 2536 } 2537 2538 /* 2539 * We're about to block, so indicate what kind of waiter we are. 2540 */ 2541 t->t_writer = 0; 2542 if (rd_wr == WRITE_LOCK) 2543 t->t_writer = TRW_WANT_WRITE; 2544 suword32_noerr(&rw->rwlock_readers, rwstate); 2545 2546 /* 2547 * Unlock the rwlock's mutex (pagefaults are possible here). 2548 */ 2549 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2550 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2551 suword32_noerr(&mp->mutex_ownerpid, 0); 2552 ulock_clear(&mp->mutex_lockw); 2553 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2554 if (mwaiters != 0) { 2555 /* 2556 * Given the locking of mlwpchan around the release of 2557 * the mutex and checking for waiters, the following 2558 * call to lwp_release() can fail ONLY if the lock 2559 * acquirer is interrupted after setting the waiter bit, 2560 * calling lwp_block() and releasing mlwpchan. 2561 * In this case, it could get pulled off the LWP sleep 2562 * queue (via setrun()) before the following call to 2563 * lwp_release() occurs, and the lock requestor will 2564 * update the waiter bit correctly by re-evaluating it. 2565 */ 2566 if (lwp_release(&mlwpchan, &mwaiters, 0) > 0) 2567 suword8_noerr(&mp->mutex_waiters, mwaiters); 2568 } 2569 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2570 mlocked = 0; 2571 no_fault(); 2572 2573 if (mwatched) { 2574 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2575 mwatched = 0; 2576 } 2577 if (watched) { 2578 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2579 watched = 0; 2580 } 2581 2582 /* 2583 * Put the LWP in an orderly state for debugging. 2584 */ 2585 prstop(PR_REQUESTED, 0); 2586 if (timedwait) { 2587 /* 2588 * If we successfully queue the timeout, 2589 * then don't drop t_delay_lock until 2590 * we are on the sleep queue (below). 2591 */ 2592 mutex_enter(&t->t_delay_lock); 2593 if (lwp_timer_enqueue(&lwpt) != 0) { 2594 mutex_exit(&t->t_delay_lock); 2595 imm_timeout = 1; 2596 timedwait = NULL; 2597 } 2598 } 2599 t->t_flag |= T_WAITCVSEM; 2600 lwp_block(&lwpchan); 2601 2602 /* 2603 * Nothing should happen to cause the LWp to go to sleep until after 2604 * it returns from swtch(). 2605 */ 2606 if (timedwait) 2607 mutex_exit(&t->t_delay_lock); 2608 locked = 0; 2609 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2610 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t)) 2611 setrun(t); 2612 swtch(); 2613 2614 /* 2615 * We're back, but we need to work out why. Were we interrupted? Did 2616 * we timeout? Were we granted the lock? 2617 */ 2618 error = EAGAIN; 2619 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2620 t->t_writer = 0; 2621 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2622 if (timedwait) 2623 tim = lwp_timer_dequeue(&lwpt); 2624 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2625 error = EINTR; 2626 else if (imm_timeout || (timedwait && tim == -1)) 2627 error = ETIME; 2628 lwp->lwp_asleep = 0; 2629 lwp->lwp_sysabort = 0; 2630 setallwatch(); 2631 2632 /* 2633 * If we were granted the lock we don't care about EINTR or ETIME. 2634 */ 2635 if (acquired) 2636 error = 0; 2637 2638 if (t->t_mstate == LMS_USER_LOCK) 2639 (void) new_mstate(t, LMS_SYSTEM); 2640 2641 if (error) 2642 return (set_errno(error)); 2643 return (0); 2644 2645 out_drop: 2646 /* 2647 * Make sure that the user level lock is dropped before returning 2648 * to the caller. 2649 */ 2650 if (!mlocked) { 2651 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2652 mlocked = 1; 2653 } 2654 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2655 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2656 suword32_noerr(&mp->mutex_ownerpid, 0); 2657 ulock_clear(&mp->mutex_lockw); 2658 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2659 if (mwaiters != 0) { 2660 /* 2661 * See comment above on lock clearing and lwp_release() 2662 * success/failure. 2663 */ 2664 if (lwp_release(&mlwpchan, &mwaiters, 0) > 0) 2665 suword8_noerr(&mp->mutex_waiters, mwaiters); 2666 } 2667 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2668 mlocked = 0; 2669 2670 out_nodrop: 2671 no_fault(); 2672 if (mwatched) 2673 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2674 if (watched) 2675 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2676 if (t->t_mstate == LMS_USER_LOCK) 2677 (void) new_mstate(t, LMS_SYSTEM); 2678 if (error) 2679 return (set_errno(error)); 2680 return (0); 2681 } 2682 2683 /* 2684 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2685 * we never drop the lock. 2686 */ 2687 static int 2688 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2689 { 2690 kthread_t *t = curthread; 2691 proc_t *p = ttoproc(t); 2692 lwpchan_t lwpchan; 2693 volatile uint16_t type = 0; 2694 volatile int error = 0; 2695 volatile int locked = 0; 2696 volatile int watched = 0; 2697 label_t ljb; 2698 volatile int no_lwpchan = 1; 2699 uint32_t rwstate; 2700 2701 /* We only check rw because the mutex is included in it. */ 2702 if ((caddr_t)rw >= p->p_as->a_userlimit) 2703 return (set_errno(EFAULT)); 2704 2705 if (on_fault(&ljb)) { 2706 if (no_lwpchan) { 2707 error = EFAULT; 2708 goto out_nodrop; 2709 } 2710 if (locked) { 2711 locked = 0; 2712 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2713 } 2714 error = EFAULT; 2715 goto out_nodrop; 2716 } 2717 2718 /* We can only continue for simple USYNC_PROCESS locks. */ 2719 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2720 if (type != USYNC_PROCESS) { 2721 error = EINVAL; 2722 goto out_nodrop; 2723 } 2724 2725 /* Force Copy-on-write fault incase objects are MAP_PRIVATE. */ 2726 suword16_noerr(&rw->rwlock_type, type); 2727 2728 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2729 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2730 &lwpchan, LWPCHAN_CVPOOL)) { 2731 error = EFAULT; 2732 goto out_nodrop; 2733 } 2734 2735 no_lwpchan = 0; 2736 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2737 2738 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2739 locked = 1; 2740 2741 /* 2742 * We can resolve multiple readers (except the last reader) here. 2743 * For the last reader or a writer we need lwp_rwlock_release(), 2744 * to which we also delegate the task of copying the new rwstate 2745 * back to userland (see the comment there). 2746 */ 2747 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2748 if (rwstate & URW_WRITE_LOCKED) 2749 lwp_rwlock_release(&lwpchan, rw); 2750 else if ((rwstate & URW_READERS_MASK) > 0) { 2751 rwstate--; 2752 if ((rwstate & URW_READERS_MASK) == 0) 2753 lwp_rwlock_release(&lwpchan, rw); 2754 else 2755 suword32_noerr(&rw->rwlock_readers, rwstate); 2756 } 2757 2758 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2759 locked = 0; 2760 error = 0; 2761 2762 out_nodrop: 2763 no_fault(); 2764 if (watched) 2765 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2766 if (error) 2767 return (set_errno(error)); 2768 return (0); 2769 } 2770 2771 int 2772 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2773 { 2774 switch (subcode) { 2775 case 0: 2776 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2777 case 1: 2778 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2779 case 2: 2780 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2781 case 3: 2782 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2783 case 4: 2784 return (lwp_rwlock_unlock(rwlp)); 2785 } 2786 return (set_errno(EINVAL)); 2787 } 2788 2789 /* 2790 * Return the owner of the user-level s-object. 2791 * Since we can't really do this, return NULL. 2792 */ 2793 /* ARGSUSED */ 2794 static kthread_t * 2795 lwpsobj_owner(caddr_t sobj) 2796 { 2797 return ((kthread_t *)NULL); 2798 } 2799 2800 /* 2801 * Wake up a thread asleep on a user-level synchronization 2802 * object. 2803 */ 2804 static void 2805 lwp_unsleep(kthread_t *t) 2806 { 2807 ASSERT(THREAD_LOCK_HELD(t)); 2808 if (t->t_wchan0 != NULL) { 2809 sleepq_head_t *sqh; 2810 sleepq_t *sqp = t->t_sleepq; 2811 2812 if (sqp != NULL) { 2813 sqh = lwpsqhash(&t->t_lwpchan); 2814 ASSERT(&sqh->sq_queue == sqp); 2815 sleepq_unsleep(t); 2816 disp_lock_exit_high(&sqh->sq_lock); 2817 CL_SETRUN(t); 2818 return; 2819 } 2820 } 2821 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2822 } 2823 2824 /* 2825 * Change the priority of a thread asleep on a user-level 2826 * synchronization object. To maintain proper priority order, 2827 * we: 2828 * o dequeue the thread. 2829 * o change its priority. 2830 * o re-enqueue the thread. 2831 * Assumption: the thread is locked on entry. 2832 */ 2833 static void 2834 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2835 { 2836 ASSERT(THREAD_LOCK_HELD(t)); 2837 if (t->t_wchan0 != NULL) { 2838 sleepq_t *sqp = t->t_sleepq; 2839 2840 sleepq_dequeue(t); 2841 *t_prip = pri; 2842 sleepq_insert(sqp, t); 2843 } else 2844 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2845 } 2846 2847 /* 2848 * Clean up a locked robust mutex 2849 */ 2850 static void 2851 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2852 { 2853 uint16_t flag; 2854 uchar_t waiters; 2855 label_t ljb; 2856 pid_t owner_pid; 2857 lwp_mutex_t *lp; 2858 volatile int locked = 0; 2859 volatile int watched = 0; 2860 volatile struct upimutex *upimutex = NULL; 2861 volatile int upilocked = 0; 2862 2863 ASSERT(ent->lwpchan_type & LOCK_ROBUST); 2864 2865 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2866 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2867 if (on_fault(&ljb)) { 2868 if (locked) 2869 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2870 if (upilocked) 2871 upimutex_unlock((upimutex_t *)upimutex, 0); 2872 goto out; 2873 } 2874 if (ent->lwpchan_type & USYNC_PROCESS) { 2875 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2876 if (owner_pid != curproc->p_pid) 2877 goto out; 2878 } 2879 if (UPIMUTEX(ent->lwpchan_type)) { 2880 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2881 upib_t *upibp = &UPI_CHAIN(lwpchan); 2882 2883 mutex_enter(&upibp->upib_lock); 2884 upimutex = upi_get(upibp, &lwpchan); 2885 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2886 mutex_exit(&upibp->upib_lock); 2887 goto out; 2888 } 2889 mutex_exit(&upibp->upib_lock); 2890 upilocked = 1; 2891 flag = lwp_clear_mutex(lp, lockflg); 2892 suword8_noerr(&lp->mutex_lockw, 0); 2893 upimutex_unlock((upimutex_t *)upimutex, flag); 2894 } else { 2895 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2896 locked = 1; 2897 (void) lwp_clear_mutex(lp, lockflg); 2898 ulock_clear(&lp->mutex_lockw); 2899 fuword8_noerr(&lp->mutex_waiters, &waiters); 2900 if (waiters && lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2901 suword8_noerr(&lp->mutex_waiters, waiters); 2902 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2903 } 2904 out: 2905 no_fault(); 2906 if (watched) 2907 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2908 } 2909 2910 /* 2911 * Register a process-shared robust mutex in the lwpchan cache. 2912 */ 2913 int 2914 lwp_mutex_register(lwp_mutex_t *lp) 2915 { 2916 int error = 0; 2917 volatile int watched; 2918 label_t ljb; 2919 uint8_t type; 2920 lwpchan_t lwpchan; 2921 2922 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2923 return (set_errno(EFAULT)); 2924 2925 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2926 2927 if (on_fault(&ljb)) { 2928 error = EFAULT; 2929 } else { 2930 fuword8_noerr(&lp->mutex_type, &type); 2931 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2932 != (USYNC_PROCESS|LOCK_ROBUST)) { 2933 error = EINVAL; 2934 } else { 2935 /* 2936 * Force Copy-on-write fault if lwp_mutex_t object is 2937 * defined to be MAP_PRIVATE and it was initialized to 2938 * USYNC_PROCESS. 2939 */ 2940 suword8_noerr(&lp->mutex_type, type); 2941 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2942 &lwpchan, LWPCHAN_MPPOOL)) 2943 error = EFAULT; 2944 } 2945 } 2946 no_fault(); 2947 if (watched) 2948 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2949 if (error) 2950 return (set_errno(error)); 2951 return (0); 2952 } 2953 2954 int 2955 lwp_mutex_trylock(lwp_mutex_t *lp) 2956 { 2957 kthread_t *t = curthread; 2958 proc_t *p = ttoproc(t); 2959 int error = 0; 2960 volatile int locked = 0; 2961 volatile int watched = 0; 2962 label_t ljb; 2963 volatile uint8_t type = 0; 2964 uint16_t flag; 2965 lwpchan_t lwpchan; 2966 2967 if ((caddr_t)lp >= p->p_as->a_userlimit) 2968 return (set_errno(EFAULT)); 2969 2970 (void) new_mstate(t, LMS_USER_LOCK); 2971 2972 if (on_fault(&ljb)) { 2973 if (locked) 2974 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 2975 error = EFAULT; 2976 goto out; 2977 } 2978 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 2979 if (UPIMUTEX(type)) { 2980 no_fault(); 2981 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 2982 if ((type & USYNC_PROCESS) && 2983 (error == 0 || 2984 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 2985 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 2986 if (error) 2987 return (set_errno(error)); 2988 return (0); 2989 } 2990 /* 2991 * Force Copy-on-write fault if lwp_mutex_t object is 2992 * defined to be MAP_PRIVATE and it was initialized to 2993 * USYNC_PROCESS. 2994 */ 2995 suword8_noerr(&lp->mutex_type, type); 2996 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2997 &lwpchan, LWPCHAN_MPPOOL)) { 2998 error = EFAULT; 2999 goto out; 3000 } 3001 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3002 locked = 1; 3003 if (type & LOCK_ROBUST) { 3004 fuword16_noerr(&lp->mutex_flag, &flag); 3005 if (flag & LOCK_NOTRECOVERABLE) { 3006 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3007 error = ENOTRECOVERABLE; 3008 goto out; 3009 } 3010 } 3011 3012 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3013 3014 if (!ulock_try(&lp->mutex_lockw)) 3015 error = EBUSY; 3016 else { 3017 if (type & USYNC_PROCESS) 3018 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 3019 if (type & LOCK_ROBUST) { 3020 fuword16_noerr(&lp->mutex_flag, &flag); 3021 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3022 if (flag & LOCK_OWNERDEAD) 3023 error = EOWNERDEAD; 3024 else if (type & USYNC_PROCESS_ROBUST) 3025 error = ELOCKUNMAPPED; 3026 else 3027 error = EOWNERDEAD; 3028 } 3029 } 3030 } 3031 locked = 0; 3032 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3033 out: 3034 3035 if (t->t_mstate == LMS_USER_LOCK) 3036 (void) new_mstate(t, LMS_SYSTEM); 3037 3038 no_fault(); 3039 if (watched) 3040 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3041 if (error) 3042 return (set_errno(error)); 3043 return (0); 3044 } 3045 3046 /* 3047 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3048 * the blocked lwp resumes and retries to acquire the lock. 3049 */ 3050 int 3051 lwp_mutex_unlock(lwp_mutex_t *lp) 3052 { 3053 proc_t *p = ttoproc(curthread); 3054 lwpchan_t lwpchan; 3055 uchar_t waiters; 3056 volatile int locked = 0; 3057 volatile int watched = 0; 3058 volatile uint8_t type = 0; 3059 label_t ljb; 3060 uint16_t flag; 3061 int error = 0; 3062 3063 if ((caddr_t)lp >= p->p_as->a_userlimit) 3064 return (set_errno(EFAULT)); 3065 3066 if (on_fault(&ljb)) { 3067 if (locked) 3068 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3069 error = EFAULT; 3070 goto out; 3071 } 3072 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3073 if (UPIMUTEX(type)) { 3074 no_fault(); 3075 error = lwp_upimutex_unlock(lp, type); 3076 if (error) 3077 return (set_errno(error)); 3078 return (0); 3079 } 3080 3081 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3082 3083 /* 3084 * Force Copy-on-write fault if lwp_mutex_t object is 3085 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS 3086 */ 3087 suword8_noerr(&lp->mutex_type, type); 3088 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3089 &lwpchan, LWPCHAN_MPPOOL)) { 3090 error = EFAULT; 3091 goto out; 3092 } 3093 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3094 locked = 1; 3095 if (type & LOCK_ROBUST) { 3096 fuword16_noerr(&lp->mutex_flag, &flag); 3097 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3098 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3099 flag |= LOCK_NOTRECOVERABLE; 3100 suword16_noerr(&lp->mutex_flag, flag); 3101 } 3102 } 3103 if (type & USYNC_PROCESS) 3104 suword32_noerr(&lp->mutex_ownerpid, 0); 3105 ulock_clear(&lp->mutex_lockw); 3106 /* 3107 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3108 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3109 * may fail. If it fails, do not write into the waiter bit. 3110 * The call to lwp_release() might fail due to one of three reasons: 3111 * 3112 * 1. due to the thread which set the waiter bit not actually 3113 * sleeping since it got the lock on the re-try. The waiter 3114 * bit will then be correctly updated by that thread. This 3115 * window may be closed by reading the wait bit again here 3116 * and not calling lwp_release() at all if it is zero. 3117 * 2. the thread which set the waiter bit and went to sleep 3118 * was woken up by a signal. This time, the waiter recomputes 3119 * the wait bit in the return with EINTR code. 3120 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3121 * memory that has been re-used after the lock was dropped. 3122 * In this case, writing into the waiter bit would cause data 3123 * corruption. 3124 */ 3125 fuword8_noerr(&lp->mutex_waiters, &waiters); 3126 if (waiters) { 3127 if ((type & LOCK_ROBUST) && 3128 (flag & LOCK_NOTRECOVERABLE)) { 3129 lwp_release_all(&lwpchan); 3130 suword8_noerr(&lp->mutex_waiters, 0); 3131 } else if (lwp_release(&lwpchan, &waiters, 0) == 1) { 3132 suword8_noerr(&lp->mutex_waiters, waiters); 3133 } 3134 } 3135 3136 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3137 out: 3138 no_fault(); 3139 if (watched) 3140 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3141 if (error) 3142 return (set_errno(error)); 3143 return (0); 3144 } 3145