1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/errno.h> 39 #include <sys/file.h> 40 #include <sys/proc.h> 41 #include <sys/prsystm.h> 42 #include <sys/kmem.h> 43 #include <sys/sobject.h> 44 #include <sys/fault.h> 45 #include <sys/procfs.h> 46 #include <sys/watchpoint.h> 47 #include <sys/time.h> 48 #include <sys/cmn_err.h> 49 #include <sys/machlock.h> 50 #include <sys/debug.h> 51 #include <sys/synch.h> 52 #include <sys/synch32.h> 53 #include <sys/mman.h> 54 #include <sys/class.h> 55 #include <sys/schedctl.h> 56 #include <sys/sleepq.h> 57 #include <sys/policy.h> 58 #include <sys/tnf_probe.h> 59 #include <sys/lwpchan_impl.h> 60 #include <sys/turnstile.h> 61 #include <sys/atomic.h> 62 #include <sys/lwp_timer_impl.h> 63 #include <sys/lwp_upimutex_impl.h> 64 #include <vm/as.h> 65 #include <sys/sdt.h> 66 67 static kthread_t *lwpsobj_owner(caddr_t); 68 static void lwp_unsleep(kthread_t *t); 69 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 70 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 71 72 extern int lwp_cond_signal(lwp_cond_t *cv); 73 74 /* 75 * Maximum number of user prio inheritance locks that can be held by a thread. 76 * Used to limit kmem for each thread. This is a per-thread limit that 77 * can be administered on a system wide basis (using /etc/system). 78 * 79 * Also, when a limit, say maxlwps is added for numbers of lwps within a 80 * process, the per-thread limit automatically becomes a process-wide limit 81 * of maximum number of held upi locks within a process: 82 * maxheldupimx = maxnestupimx * maxlwps; 83 */ 84 static uint32_t maxnestupimx = 2000; 85 86 /* 87 * The sobj_ops vector exports a set of functions needed when a thread 88 * is asleep on a synchronization object of this type. 89 */ 90 static sobj_ops_t lwp_sobj_ops = { 91 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 92 }; 93 94 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 95 96 static sobj_ops_t lwp_sobj_pi_ops = { 97 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 98 turnstile_change_pri 99 }; 100 101 static sleepq_head_t lwpsleepq[NSLEEPQ]; 102 upib_t upimutextab[UPIMUTEX_TABSIZE]; 103 104 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 105 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 106 107 /* 108 * We know that both lc_wchan and lc_wchan0 are addresses that most 109 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 110 * 'pool' is either 0 or 1. 111 */ 112 #define LWPCHAN_LOCK_HASH(X, pool) \ 113 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 114 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 115 116 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 117 118 /* 119 * Is this a POSIX threads user-level lock requiring priority inheritance? 120 */ 121 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 122 123 static sleepq_head_t * 124 lwpsqhash(lwpchan_t *lwpchan) 125 { 126 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 127 return (&lwpsleepq[SQHASHINDEX(x)]); 128 } 129 130 /* 131 * Lock an lwpchan. 132 * Keep this in sync with lwpchan_unlock(), below. 133 */ 134 static void 135 lwpchan_lock(lwpchan_t *lwpchan, int pool) 136 { 137 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 138 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 139 } 140 141 /* 142 * Unlock an lwpchan. 143 * Keep this in sync with lwpchan_lock(), above. 144 */ 145 static void 146 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 147 { 148 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 149 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 150 } 151 152 /* 153 * Delete mappings from the lwpchan cache for pages that are being 154 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 155 * all mappings within the range are deleted from the lwpchan cache. 156 */ 157 void 158 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 159 { 160 lwpchan_data_t *lcp; 161 lwpchan_hashbucket_t *hashbucket; 162 lwpchan_hashbucket_t *endbucket; 163 lwpchan_entry_t *ent; 164 lwpchan_entry_t **prev; 165 caddr_t addr; 166 167 mutex_enter(&p->p_lcp_lock); 168 lcp = p->p_lcp; 169 hashbucket = lcp->lwpchan_cache; 170 endbucket = hashbucket + lcp->lwpchan_size; 171 for (; hashbucket < endbucket; hashbucket++) { 172 if (hashbucket->lwpchan_chain == NULL) 173 continue; 174 mutex_enter(&hashbucket->lwpchan_lock); 175 prev = &hashbucket->lwpchan_chain; 176 /* check entire chain */ 177 while ((ent = *prev) != NULL) { 178 addr = ent->lwpchan_addr; 179 if (start <= addr && addr < end) { 180 *prev = ent->lwpchan_next; 181 /* 182 * We do this only for the obsolete type 183 * USYNC_PROCESS_ROBUST. Otherwise robust 184 * locks do not draw ELOCKUNMAPPED or 185 * EOWNERDEAD due to being unmapped. 186 */ 187 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 188 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 189 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 190 kmem_free(ent, sizeof (*ent)); 191 atomic_add_32(&lcp->lwpchan_entries, -1); 192 } else { 193 prev = &ent->lwpchan_next; 194 } 195 } 196 mutex_exit(&hashbucket->lwpchan_lock); 197 } 198 mutex_exit(&p->p_lcp_lock); 199 } 200 201 /* 202 * Given an lwpchan cache pointer and a process virtual address, 203 * return a pointer to the corresponding lwpchan hash bucket. 204 */ 205 static lwpchan_hashbucket_t * 206 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 207 { 208 uint_t i; 209 210 /* 211 * All user-level sync object addresses are 8-byte aligned. 212 * Ignore the lowest 3 bits of the address and use the 213 * higher-order 2*lwpchan_bits bits for the hash index. 214 */ 215 addr >>= 3; 216 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 217 return (lcp->lwpchan_cache + i); 218 } 219 220 /* 221 * (Re)allocate the per-process lwpchan cache. 222 */ 223 static void 224 lwpchan_alloc_cache(proc_t *p, uint_t bits) 225 { 226 lwpchan_data_t *lcp; 227 lwpchan_data_t *old_lcp; 228 lwpchan_hashbucket_t *hashbucket; 229 lwpchan_hashbucket_t *endbucket; 230 lwpchan_hashbucket_t *newbucket; 231 lwpchan_entry_t *ent; 232 lwpchan_entry_t *next; 233 uint_t count; 234 235 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 236 237 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 238 lcp->lwpchan_bits = bits; 239 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 240 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 241 lcp->lwpchan_entries = 0; 242 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 243 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 244 lcp->lwpchan_next_data = NULL; 245 246 mutex_enter(&p->p_lcp_lock); 247 if ((old_lcp = p->p_lcp) != NULL) { 248 if (old_lcp->lwpchan_bits >= bits) { 249 /* someone beat us to it */ 250 mutex_exit(&p->p_lcp_lock); 251 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 252 sizeof (lwpchan_hashbucket_t)); 253 kmem_free(lcp, sizeof (lwpchan_data_t)); 254 return; 255 } 256 /* 257 * Acquire all of the old hash table locks. 258 */ 259 hashbucket = old_lcp->lwpchan_cache; 260 endbucket = hashbucket + old_lcp->lwpchan_size; 261 for (; hashbucket < endbucket; hashbucket++) 262 mutex_enter(&hashbucket->lwpchan_lock); 263 /* 264 * Move all of the old hash table entries to the 265 * new hash table. The new hash table has not yet 266 * been installed so we don't need any of its locks. 267 */ 268 count = 0; 269 hashbucket = old_lcp->lwpchan_cache; 270 for (; hashbucket < endbucket; hashbucket++) { 271 ent = hashbucket->lwpchan_chain; 272 while (ent != NULL) { 273 next = ent->lwpchan_next; 274 newbucket = lwpchan_bucket(lcp, 275 (uintptr_t)ent->lwpchan_addr); 276 ent->lwpchan_next = newbucket->lwpchan_chain; 277 newbucket->lwpchan_chain = ent; 278 ent = next; 279 count++; 280 } 281 hashbucket->lwpchan_chain = NULL; 282 } 283 lcp->lwpchan_entries = count; 284 } 285 286 /* 287 * Retire the old hash table. We can't actually kmem_free() it 288 * now because someone may still have a pointer to it. Instead, 289 * we link it onto the new hash table's list of retired hash tables. 290 * The new hash table is double the size of the previous one, so 291 * the total size of all retired hash tables is less than the size 292 * of the new one. exit() and exec() free the retired hash tables 293 * (see lwpchan_destroy_cache(), below). 294 */ 295 lcp->lwpchan_next_data = old_lcp; 296 297 /* 298 * As soon as we store the new lcp, future locking operations will 299 * use it. Therefore, we must ensure that all the state we've just 300 * established reaches global visibility before the new lcp does. 301 */ 302 membar_producer(); 303 p->p_lcp = lcp; 304 305 if (old_lcp != NULL) { 306 /* 307 * Release all of the old hash table locks. 308 */ 309 hashbucket = old_lcp->lwpchan_cache; 310 for (; hashbucket < endbucket; hashbucket++) 311 mutex_exit(&hashbucket->lwpchan_lock); 312 } 313 mutex_exit(&p->p_lcp_lock); 314 } 315 316 /* 317 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 318 * Called when the process exits or execs. All lwps except one have 319 * exited so we need no locks here. 320 */ 321 void 322 lwpchan_destroy_cache(int exec) 323 { 324 proc_t *p = curproc; 325 lwpchan_hashbucket_t *hashbucket; 326 lwpchan_hashbucket_t *endbucket; 327 lwpchan_data_t *lcp; 328 lwpchan_entry_t *ent; 329 lwpchan_entry_t *next; 330 uint16_t lockflg; 331 332 lcp = p->p_lcp; 333 p->p_lcp = NULL; 334 335 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 336 hashbucket = lcp->lwpchan_cache; 337 endbucket = hashbucket + lcp->lwpchan_size; 338 for (; hashbucket < endbucket; hashbucket++) { 339 ent = hashbucket->lwpchan_chain; 340 hashbucket->lwpchan_chain = NULL; 341 while (ent != NULL) { 342 next = ent->lwpchan_next; 343 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 344 (ent->lwpchan_type & LOCK_ROBUST)) 345 lwp_mutex_cleanup(ent, lockflg); 346 kmem_free(ent, sizeof (*ent)); 347 ent = next; 348 } 349 } 350 351 while (lcp != NULL) { 352 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 353 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 354 sizeof (lwpchan_hashbucket_t)); 355 kmem_free(lcp, sizeof (lwpchan_data_t)); 356 lcp = next_lcp; 357 } 358 } 359 360 /* 361 * Return zero when there is an entry in the lwpchan cache for the 362 * given process virtual address and non-zero when there is not. 363 * The returned non-zero value is the current length of the 364 * hash chain plus one. The caller holds the hash bucket lock. 365 */ 366 static uint_t 367 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 368 lwpchan_hashbucket_t *hashbucket) 369 { 370 lwpchan_entry_t *ent; 371 uint_t count = 1; 372 373 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 374 if (ent->lwpchan_addr == addr) { 375 if (ent->lwpchan_type != type || 376 ent->lwpchan_pool != pool) { 377 /* 378 * This shouldn't happen, but might if the 379 * process reuses its memory for different 380 * types of sync objects. We test first 381 * to avoid grabbing the memory cache line. 382 */ 383 ent->lwpchan_type = (uint16_t)type; 384 ent->lwpchan_pool = (uint16_t)pool; 385 } 386 *lwpchan = ent->lwpchan_lwpchan; 387 return (0); 388 } 389 count++; 390 } 391 return (count); 392 } 393 394 /* 395 * Return the cached lwpchan mapping if cached, otherwise insert 396 * a virtual address to lwpchan mapping into the cache. 397 */ 398 static int 399 lwpchan_get_mapping(struct as *as, caddr_t addr, 400 int type, lwpchan_t *lwpchan, int pool) 401 { 402 proc_t *p = curproc; 403 lwpchan_data_t *lcp; 404 lwpchan_hashbucket_t *hashbucket; 405 lwpchan_entry_t *ent; 406 memid_t memid; 407 uint_t count; 408 uint_t bits; 409 410 top: 411 /* initialize the lwpchan cache, if necesary */ 412 if ((lcp = p->p_lcp) == NULL) { 413 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 414 goto top; 415 } 416 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 417 mutex_enter(&hashbucket->lwpchan_lock); 418 if (lcp != p->p_lcp) { 419 /* someone resized the lwpchan cache; start over */ 420 mutex_exit(&hashbucket->lwpchan_lock); 421 goto top; 422 } 423 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 424 /* it's in the cache */ 425 mutex_exit(&hashbucket->lwpchan_lock); 426 return (1); 427 } 428 mutex_exit(&hashbucket->lwpchan_lock); 429 if (as_getmemid(as, addr, &memid) != 0) 430 return (0); 431 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 432 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 433 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 434 mutex_enter(&hashbucket->lwpchan_lock); 435 if (lcp != p->p_lcp) { 436 /* someone resized the lwpchan cache; start over */ 437 mutex_exit(&hashbucket->lwpchan_lock); 438 kmem_free(ent, sizeof (*ent)); 439 goto top; 440 } 441 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 442 if (count == 0) { 443 /* someone else added this entry to the cache */ 444 mutex_exit(&hashbucket->lwpchan_lock); 445 kmem_free(ent, sizeof (*ent)); 446 return (1); 447 } 448 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 449 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 450 /* hash chain too long; reallocate the hash table */ 451 mutex_exit(&hashbucket->lwpchan_lock); 452 kmem_free(ent, sizeof (*ent)); 453 lwpchan_alloc_cache(p, bits + 1); 454 goto top; 455 } 456 ent->lwpchan_addr = addr; 457 ent->lwpchan_type = (uint16_t)type; 458 ent->lwpchan_pool = (uint16_t)pool; 459 ent->lwpchan_lwpchan = *lwpchan; 460 ent->lwpchan_next = hashbucket->lwpchan_chain; 461 hashbucket->lwpchan_chain = ent; 462 atomic_add_32(&lcp->lwpchan_entries, 1); 463 mutex_exit(&hashbucket->lwpchan_lock); 464 return (1); 465 } 466 467 /* 468 * Return a unique pair of identifiers that corresponds to a 469 * synchronization object's virtual address. Process-shared 470 * sync objects usually get vnode/offset from as_getmemid(). 471 */ 472 static int 473 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 474 { 475 /* 476 * If the lwp synch object is defined to be process-private, 477 * we just make the first field of the lwpchan be 'as' and 478 * the second field be the synch object's virtual address. 479 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 480 * The lwpchan cache is used only for process-shared objects. 481 */ 482 if (!(type & USYNC_PROCESS)) { 483 lwpchan->lc_wchan0 = (caddr_t)as; 484 lwpchan->lc_wchan = addr; 485 return (1); 486 } 487 488 return (lwpchan_get_mapping(as, addr, type, lwpchan, pool)); 489 } 490 491 static void 492 lwp_block(lwpchan_t *lwpchan) 493 { 494 kthread_t *t = curthread; 495 klwp_t *lwp = ttolwp(t); 496 sleepq_head_t *sqh; 497 498 thread_lock(t); 499 t->t_flag |= T_WAKEABLE; 500 t->t_lwpchan = *lwpchan; 501 t->t_sobj_ops = &lwp_sobj_ops; 502 t->t_release = 0; 503 sqh = lwpsqhash(lwpchan); 504 disp_lock_enter_high(&sqh->sq_lock); 505 CL_SLEEP(t); 506 DTRACE_SCHED(sleep); 507 THREAD_SLEEP(t, &sqh->sq_lock); 508 sleepq_insert(&sqh->sq_queue, t); 509 thread_unlock(t); 510 lwp->lwp_asleep = 1; 511 lwp->lwp_sysabort = 0; 512 lwp->lwp_ru.nvcsw++; 513 (void) new_mstate(curthread, LMS_SLEEP); 514 } 515 516 static kthread_t * 517 lwpsobj_pi_owner(upimutex_t *up) 518 { 519 return (up->upi_owner); 520 } 521 522 static struct upimutex * 523 upi_get(upib_t *upibp, lwpchan_t *lcp) 524 { 525 struct upimutex *upip; 526 527 for (upip = upibp->upib_first; upip != NULL; 528 upip = upip->upi_nextchain) { 529 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 530 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 531 break; 532 } 533 return (upip); 534 } 535 536 static void 537 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 538 { 539 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 540 541 /* 542 * Insert upimutex at front of list. Maybe a bit unfair 543 * but assume that not many lwpchans hash to the same 544 * upimutextab bucket, i.e. the list of upimutexes from 545 * upib_first is not too long. 546 */ 547 upimutex->upi_nextchain = upibp->upib_first; 548 upibp->upib_first = upimutex; 549 } 550 551 static void 552 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 553 { 554 struct upimutex **prev; 555 556 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 557 558 prev = &upibp->upib_first; 559 while (*prev != upimutex) { 560 prev = &(*prev)->upi_nextchain; 561 } 562 *prev = upimutex->upi_nextchain; 563 upimutex->upi_nextchain = NULL; 564 } 565 566 /* 567 * Add upimutex to chain of upimutexes held by curthread. 568 * Returns number of upimutexes held by curthread. 569 */ 570 static uint32_t 571 upi_mylist_add(struct upimutex *upimutex) 572 { 573 kthread_t *t = curthread; 574 575 /* 576 * Insert upimutex at front of list of upimutexes owned by t. This 577 * would match typical LIFO order in which nested locks are acquired 578 * and released. 579 */ 580 upimutex->upi_nextowned = t->t_upimutex; 581 t->t_upimutex = upimutex; 582 t->t_nupinest++; 583 ASSERT(t->t_nupinest > 0); 584 return (t->t_nupinest); 585 } 586 587 /* 588 * Delete upimutex from list of upimutexes owned by curthread. 589 */ 590 static void 591 upi_mylist_del(struct upimutex *upimutex) 592 { 593 kthread_t *t = curthread; 594 struct upimutex **prev; 595 596 /* 597 * Since the order in which nested locks are acquired and released, 598 * is typically LIFO, and typical nesting levels are not too deep, the 599 * following should not be expensive in the general case. 600 */ 601 prev = &t->t_upimutex; 602 while (*prev != upimutex) { 603 prev = &(*prev)->upi_nextowned; 604 } 605 *prev = upimutex->upi_nextowned; 606 upimutex->upi_nextowned = NULL; 607 ASSERT(t->t_nupinest > 0); 608 t->t_nupinest--; 609 } 610 611 /* 612 * Returns true if upimutex is owned. Should be called only when upim points 613 * to kmem which cannot disappear from underneath. 614 */ 615 static int 616 upi_owned(upimutex_t *upim) 617 { 618 return (upim->upi_owner == curthread); 619 } 620 621 /* 622 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 623 */ 624 static struct upimutex * 625 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 626 { 627 lwpchan_t lwpchan; 628 upib_t *upibp; 629 struct upimutex *upimutex; 630 631 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 632 &lwpchan, LWPCHAN_MPPOOL)) 633 return (NULL); 634 635 upibp = &UPI_CHAIN(lwpchan); 636 mutex_enter(&upibp->upib_lock); 637 upimutex = upi_get(upibp, &lwpchan); 638 if (upimutex == NULL || upimutex->upi_owner != curthread) { 639 mutex_exit(&upibp->upib_lock); 640 return (NULL); 641 } 642 mutex_exit(&upibp->upib_lock); 643 return (upimutex); 644 } 645 646 /* 647 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 648 * no lock hand-off occurrs. 649 */ 650 static void 651 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 652 { 653 turnstile_t *ts; 654 upib_t *upibp; 655 kthread_t *newowner; 656 657 upi_mylist_del(upimutex); 658 upibp = upimutex->upi_upibp; 659 mutex_enter(&upibp->upib_lock); 660 if (upimutex->upi_waiter != 0) { /* if waiters */ 661 ts = turnstile_lookup(upimutex); 662 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 663 /* hand-off lock to highest prio waiter */ 664 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 665 upimutex->upi_owner = newowner; 666 if (ts->ts_waiters == 1) 667 upimutex->upi_waiter = 0; 668 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 669 mutex_exit(&upibp->upib_lock); 670 return; 671 } else if (ts != NULL) { 672 /* LOCK_NOTRECOVERABLE: wakeup all */ 673 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 674 } else { 675 /* 676 * Misleading w bit. Waiters might have been 677 * interrupted. No need to clear the w bit (upimutex 678 * will soon be freed). Re-calculate PI from existing 679 * waiters. 680 */ 681 turnstile_exit(upimutex); 682 turnstile_pi_recalc(); 683 } 684 } 685 /* 686 * no waiters, or LOCK_NOTRECOVERABLE. 687 * remove from the bucket chain of upi mutexes. 688 * de-allocate kernel memory (upimutex). 689 */ 690 upi_chain_del(upimutex->upi_upibp, upimutex); 691 mutex_exit(&upibp->upib_lock); 692 kmem_free(upimutex, sizeof (upimutex_t)); 693 } 694 695 static int 696 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 697 { 698 label_t ljb; 699 int error = 0; 700 lwpchan_t lwpchan; 701 uint16_t flag; 702 upib_t *upibp; 703 volatile struct upimutex *upimutex = NULL; 704 turnstile_t *ts; 705 uint32_t nupinest; 706 volatile int upilocked = 0; 707 708 if (on_fault(&ljb)) { 709 if (upilocked) 710 upimutex_unlock((upimutex_t *)upimutex, 0); 711 error = EFAULT; 712 goto out; 713 } 714 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 715 &lwpchan, LWPCHAN_MPPOOL)) { 716 error = EFAULT; 717 goto out; 718 } 719 upibp = &UPI_CHAIN(lwpchan); 720 retry: 721 mutex_enter(&upibp->upib_lock); 722 upimutex = upi_get(upibp, &lwpchan); 723 if (upimutex == NULL) { 724 /* lock available since lwpchan has no upimutex */ 725 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 726 upi_chain_add(upibp, (upimutex_t *)upimutex); 727 upimutex->upi_owner = curthread; /* grab lock */ 728 upimutex->upi_upibp = upibp; 729 upimutex->upi_vaddr = lp; 730 upimutex->upi_lwpchan = lwpchan; 731 mutex_exit(&upibp->upib_lock); 732 nupinest = upi_mylist_add((upimutex_t *)upimutex); 733 upilocked = 1; 734 fuword16_noerr(&lp->mutex_flag, &flag); 735 if (nupinest > maxnestupimx && 736 secpolicy_resource(CRED()) != 0) { 737 upimutex_unlock((upimutex_t *)upimutex, flag); 738 error = ENOMEM; 739 goto out; 740 } 741 if (flag & LOCK_NOTRECOVERABLE) { 742 /* 743 * Since the setting of LOCK_NOTRECOVERABLE 744 * was done under the high-level upi mutex, 745 * in lwp_upimutex_unlock(), this flag needs to 746 * be checked while holding the upi mutex. 747 * If set, this thread should return without 748 * the lock held, and with the right error code. 749 */ 750 upimutex_unlock((upimutex_t *)upimutex, flag); 751 upilocked = 0; 752 error = ENOTRECOVERABLE; 753 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 754 if (flag & LOCK_OWNERDEAD) 755 error = EOWNERDEAD; 756 else if (type & USYNC_PROCESS_ROBUST) 757 error = ELOCKUNMAPPED; 758 else 759 error = EOWNERDEAD; 760 } 761 goto out; 762 } 763 /* 764 * If a upimutex object exists, it must have an owner. 765 * This is due to lock hand-off, and release of upimutex when no 766 * waiters are present at unlock time, 767 */ 768 ASSERT(upimutex->upi_owner != NULL); 769 if (upimutex->upi_owner == curthread) { 770 /* 771 * The user wrapper can check if the mutex type is 772 * ERRORCHECK: if not, it should stall at user-level. 773 * If so, it should return the error code. 774 */ 775 mutex_exit(&upibp->upib_lock); 776 error = EDEADLK; 777 goto out; 778 } 779 if (try == UPIMUTEX_TRY) { 780 mutex_exit(&upibp->upib_lock); 781 error = EBUSY; 782 goto out; 783 } 784 /* 785 * Block for the lock. 786 * Put the lwp in an orderly state for debugging. 787 * Calling prstop() has to be done here, and not in 788 * turnstile_block(), since the preceding call to 789 * turnstile_lookup() raises the PIL to a level 790 * at which calls to prstop() should not be made. 791 */ 792 if ((error = lwptp->lwpt_time_error) != 0) { 793 /* 794 * The SUSV3 Posix spec is very clear that we 795 * should get no error from validating the 796 * timer until we would actually sleep. 797 */ 798 mutex_exit(&upibp->upib_lock); 799 goto out; 800 } 801 prstop(PR_REQUESTED, 0); 802 if (lwptp->lwpt_tsp != NULL) { 803 /* 804 * If we successfully queue the timeout 805 * (lwp_timer_enqueue() returns zero), 806 * then don't drop t_delay_lock until we are 807 * on the sleep queue (in turnstile_block()). 808 * Otherwise we will get an immediate timeout 809 * when we attempt to sleep in turnstile_block(). 810 */ 811 mutex_enter(&curthread->t_delay_lock); 812 if (lwp_timer_enqueue(lwptp) != 0) 813 mutex_exit(&curthread->t_delay_lock); 814 } 815 /* 816 * Now, set the waiter bit and block for the lock in turnstile_block(). 817 * No need to preserve the previous wbit since a lock try is not 818 * attempted after setting the wait bit. Wait bit is set under 819 * the upib_lock, which is not released until the turnstile lock 820 * is acquired. Say, the upimutex is L: 821 * 822 * 1. upib_lock is held so the waiter does not have to retry L after 823 * setting the wait bit: since the owner has to grab the upib_lock 824 * to unlock L, it will certainly see the wait bit set. 825 * 2. upib_lock is not released until the turnstile lock is acquired. 826 * This is the key to preventing a missed wake-up. Otherwise, the 827 * owner could acquire the upib_lock, and the tc_lock, to call 828 * turnstile_wakeup(). All this, before the waiter gets tc_lock 829 * to sleep in turnstile_block(). turnstile_wakeup() will then not 830 * find this waiter, resulting in the missed wakeup. 831 * 3. The upib_lock, being a kernel mutex, cannot be released while 832 * holding the tc_lock (since mutex_exit() could need to acquire 833 * the same tc_lock)...and so is held when calling turnstile_block(). 834 * The address of upib_lock is passed to turnstile_block() which 835 * releases it after releasing all turnstile locks, and before going 836 * to sleep in swtch(). 837 * 4. The waiter value cannot be a count of waiters, because a waiter 838 * can be interrupted. The interrupt occurs under the tc_lock, at 839 * which point, the upib_lock cannot be locked, to decrement waiter 840 * count. So, just treat the waiter state as a bit, not a count. 841 */ 842 ts = turnstile_lookup((upimutex_t *)upimutex); 843 upimutex->upi_waiter = 1; 844 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 845 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 846 /* 847 * Hand-off implies that we wakeup holding the lock, except when: 848 * - deadlock is detected 849 * - lock is not recoverable 850 * - we got an interrupt or timeout 851 * If we wake up due to an interrupt or timeout, we may 852 * or may not be holding the lock due to mutex hand-off. 853 * Use lwp_upimutex_owned() to check if we do hold the lock. 854 */ 855 if (error != 0) { 856 if ((error == EINTR || error == ETIME) && 857 (upimutex = lwp_upimutex_owned(lp, type))) { 858 /* 859 * Unlock and return - the re-startable syscall will 860 * try the lock again if we got EINTR. 861 */ 862 (void) upi_mylist_add((upimutex_t *)upimutex); 863 upimutex_unlock((upimutex_t *)upimutex, 0); 864 } 865 /* 866 * The only other possible error is EDEADLK. If so, upimutex 867 * is valid, since its owner is deadlocked with curthread. 868 */ 869 ASSERT(error == EINTR || error == ETIME || 870 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 871 ASSERT(!lwp_upimutex_owned(lp, type)); 872 goto out; 873 } 874 if (lwp_upimutex_owned(lp, type)) { 875 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 876 nupinest = upi_mylist_add((upimutex_t *)upimutex); 877 upilocked = 1; 878 } 879 /* 880 * Now, need to read the user-level lp->mutex_flag to do the following: 881 * 882 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 883 * should be returned. 884 * - if lock isn't held, check if ENOTRECOVERABLE should 885 * be returned. 886 * 887 * Now, either lp->mutex_flag is readable or it's not. If not 888 * readable, the on_fault path will cause a return with EFAULT 889 * as it should. If it is readable, the state of the flag 890 * encodes the robustness state of the lock: 891 * 892 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 893 * or LOCK_UNMAPPED setting will influence the return code 894 * appropriately. If the upimutex is not locked here, this 895 * could be due to a spurious wake-up or a NOTRECOVERABLE 896 * event. The flag's setting can be used to distinguish 897 * between these two events. 898 */ 899 fuword16_noerr(&lp->mutex_flag, &flag); 900 if (upilocked) { 901 /* 902 * If the thread wakes up from turnstile_block with the lock 903 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 904 * since it would not have been handed-off the lock. 905 * So, no need to check for this case. 906 */ 907 if (nupinest > maxnestupimx && 908 secpolicy_resource(CRED()) != 0) { 909 upimutex_unlock((upimutex_t *)upimutex, flag); 910 upilocked = 0; 911 error = ENOMEM; 912 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 913 if (flag & LOCK_OWNERDEAD) 914 error = EOWNERDEAD; 915 else if (type & USYNC_PROCESS_ROBUST) 916 error = ELOCKUNMAPPED; 917 else 918 error = EOWNERDEAD; 919 } 920 } else { 921 /* 922 * Wake-up without the upimutex held. Either this is a 923 * spurious wake-up (due to signals, forkall(), whatever), or 924 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 925 * of the mutex flag can be used to distinguish between the 926 * two events. 927 */ 928 if (flag & LOCK_NOTRECOVERABLE) { 929 error = ENOTRECOVERABLE; 930 } else { 931 /* 932 * Here, the flag could be set to LOCK_OWNERDEAD or 933 * not. In both cases, this is a spurious wakeup, 934 * since the upi lock is not held, but the thread 935 * has returned from turnstile_block(). 936 * 937 * The user flag could be LOCK_OWNERDEAD if, at the 938 * same time as curthread having been woken up 939 * spuriously, the owner (say Tdead) has died, marked 940 * the mutex flag accordingly, and handed off the lock 941 * to some other waiter (say Tnew). curthread just 942 * happened to read the flag while Tnew has yet to deal 943 * with the owner-dead event. 944 * 945 * In this event, curthread should retry the lock. 946 * If Tnew is able to cleanup the lock, curthread 947 * will eventually get the lock with a zero error code, 948 * If Tnew is unable to cleanup, its eventual call to 949 * unlock the lock will result in the mutex flag being 950 * set to LOCK_NOTRECOVERABLE, and the wake-up of 951 * all waiters, including curthread, which will then 952 * eventually return ENOTRECOVERABLE due to the above 953 * check. 954 * 955 * Of course, if the user-flag is not set with 956 * LOCK_OWNERDEAD, retrying is the thing to do, since 957 * this is definitely a spurious wakeup. 958 */ 959 goto retry; 960 } 961 } 962 963 out: 964 no_fault(); 965 return (error); 966 } 967 968 969 static int 970 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 971 { 972 label_t ljb; 973 int error = 0; 974 lwpchan_t lwpchan; 975 uint16_t flag; 976 upib_t *upibp; 977 volatile struct upimutex *upimutex = NULL; 978 volatile int upilocked = 0; 979 980 if (on_fault(&ljb)) { 981 if (upilocked) 982 upimutex_unlock((upimutex_t *)upimutex, 0); 983 error = EFAULT; 984 goto out; 985 } 986 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 987 &lwpchan, LWPCHAN_MPPOOL)) { 988 error = EFAULT; 989 goto out; 990 } 991 upibp = &UPI_CHAIN(lwpchan); 992 mutex_enter(&upibp->upib_lock); 993 upimutex = upi_get(upibp, &lwpchan); 994 /* 995 * If the lock is not held, or the owner is not curthread, return 996 * error. The user-level wrapper can return this error or stall, 997 * depending on whether mutex is of ERRORCHECK type or not. 998 */ 999 if (upimutex == NULL || upimutex->upi_owner != curthread) { 1000 mutex_exit(&upibp->upib_lock); 1001 error = EPERM; 1002 goto out; 1003 } 1004 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1005 upilocked = 1; 1006 fuword16_noerr(&lp->mutex_flag, &flag); 1007 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1008 /* 1009 * transition mutex to the LOCK_NOTRECOVERABLE state. 1010 */ 1011 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1012 flag |= LOCK_NOTRECOVERABLE; 1013 suword16_noerr(&lp->mutex_flag, flag); 1014 } 1015 if (type & USYNC_PROCESS) 1016 suword32_noerr(&lp->mutex_ownerpid, 0); 1017 upimutex_unlock((upimutex_t *)upimutex, flag); 1018 upilocked = 0; 1019 out: 1020 no_fault(); 1021 return (error); 1022 } 1023 1024 /* 1025 * Clear the contents of a user-level mutex; return the flags. 1026 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1027 */ 1028 static uint16_t 1029 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1030 { 1031 uint16_t flag; 1032 1033 fuword16_noerr(&lp->mutex_flag, &flag); 1034 if ((flag & 1035 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1036 flag |= lockflg; 1037 suword16_noerr(&lp->mutex_flag, flag); 1038 } 1039 suword32_noerr((uint32_t *)&lp->mutex_owner, 0); 1040 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, 0); 1041 suword32_noerr(&lp->mutex_ownerpid, 0); 1042 suword8_noerr(&lp->mutex_rcount, 0); 1043 1044 return (flag); 1045 } 1046 1047 /* 1048 * Mark user mutex state, corresponding to kernel upimutex, 1049 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1050 */ 1051 static int 1052 upi_dead(upimutex_t *upip, uint16_t lockflg) 1053 { 1054 label_t ljb; 1055 int error = 0; 1056 lwp_mutex_t *lp; 1057 1058 if (on_fault(&ljb)) { 1059 error = EFAULT; 1060 goto out; 1061 } 1062 1063 lp = upip->upi_vaddr; 1064 (void) lwp_clear_mutex(lp, lockflg); 1065 suword8_noerr(&lp->mutex_lockw, 0); 1066 out: 1067 no_fault(); 1068 return (error); 1069 } 1070 1071 /* 1072 * Unlock all upimutexes held by curthread, since curthread is dying. 1073 * For each upimutex, attempt to mark its corresponding user mutex object as 1074 * dead. 1075 */ 1076 void 1077 upimutex_cleanup() 1078 { 1079 kthread_t *t = curthread; 1080 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1081 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1082 struct upimutex *upip; 1083 1084 while ((upip = t->t_upimutex) != NULL) { 1085 if (upi_dead(upip, lockflg) != 0) { 1086 /* 1087 * If the user object associated with this upimutex is 1088 * unmapped, unlock upimutex with the 1089 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1090 * woken up. Since user object is unmapped, it could 1091 * not be marked as dead or notrecoverable. 1092 * The waiters will now all wake up and return 1093 * ENOTRECOVERABLE, since they would find that the lock 1094 * has not been handed-off to them. 1095 * See lwp_upimutex_lock(). 1096 */ 1097 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1098 } else { 1099 /* 1100 * The user object has been updated as dead. 1101 * Unlock the upimutex: if no waiters, upip kmem will 1102 * be freed. If there is a waiter, the lock will be 1103 * handed off. If exit() is in progress, each existing 1104 * waiter will successively get the lock, as owners 1105 * die, and each new owner will call this routine as 1106 * it dies. The last owner will free kmem, since 1107 * it will find the upimutex has no waiters. So, 1108 * eventually, the kmem is guaranteed to be freed. 1109 */ 1110 upimutex_unlock(upip, 0); 1111 } 1112 /* 1113 * Note that the call to upimutex_unlock() above will delete 1114 * upimutex from the t_upimutexes chain. And so the 1115 * while loop will eventually terminate. 1116 */ 1117 } 1118 } 1119 1120 int 1121 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1122 { 1123 kthread_t *t = curthread; 1124 klwp_t *lwp = ttolwp(t); 1125 proc_t *p = ttoproc(t); 1126 lwp_timer_t lwpt; 1127 caddr_t timedwait; 1128 int error = 0; 1129 int time_error; 1130 clock_t tim = -1; 1131 uchar_t waiters; 1132 volatile int locked = 0; 1133 volatile int watched = 0; 1134 label_t ljb; 1135 volatile uint8_t type = 0; 1136 lwpchan_t lwpchan; 1137 sleepq_head_t *sqh; 1138 static int iswanted(); 1139 uint16_t flag; 1140 int imm_timeout = 0; 1141 1142 if ((caddr_t)lp >= p->p_as->a_userlimit) 1143 return (set_errno(EFAULT)); 1144 1145 timedwait = (caddr_t)tsp; 1146 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1147 lwpt.lwpt_imm_timeout) { 1148 imm_timeout = 1; 1149 timedwait = NULL; 1150 } 1151 1152 /* 1153 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1154 * this micro state is really a run state. If the thread indeed blocks, 1155 * this state becomes valid. If not, the state is converted back to 1156 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1157 * when blocking. 1158 */ 1159 (void) new_mstate(t, LMS_USER_LOCK); 1160 if (on_fault(&ljb)) { 1161 if (locked) 1162 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1163 error = EFAULT; 1164 goto out; 1165 } 1166 /* 1167 * Force Copy-on-write if necessary and ensure that the 1168 * synchronization object resides in read/write memory. 1169 * Cause an EFAULT return now if this is not so. 1170 */ 1171 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1172 suword8_noerr(&lp->mutex_type, type); 1173 if (UPIMUTEX(type)) { 1174 no_fault(); 1175 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1176 if ((type & USYNC_PROCESS) && 1177 (error == 0 || 1178 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 1179 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1180 if (tsp && !time_error) /* copyout the residual time left */ 1181 error = lwp_timer_copyout(&lwpt, error); 1182 if (error) 1183 return (set_errno(error)); 1184 return (0); 1185 } 1186 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1187 &lwpchan, LWPCHAN_MPPOOL)) { 1188 error = EFAULT; 1189 goto out; 1190 } 1191 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1192 locked = 1; 1193 if (type & LOCK_ROBUST) { 1194 fuword16_noerr(&lp->mutex_flag, &flag); 1195 if (flag & LOCK_NOTRECOVERABLE) { 1196 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1197 error = ENOTRECOVERABLE; 1198 goto out; 1199 } 1200 } 1201 fuword8_noerr(&lp->mutex_waiters, &waiters); 1202 suword8_noerr(&lp->mutex_waiters, 1); 1203 1204 /* 1205 * If watchpoints are set, they need to be restored, since 1206 * atomic accesses of memory such as the call to ulock_try() 1207 * below cannot be watched. 1208 */ 1209 1210 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1211 1212 while (!ulock_try(&lp->mutex_lockw)) { 1213 if (time_error) { 1214 /* 1215 * The SUSV3 Posix spec is very clear that we 1216 * should get no error from validating the 1217 * timer until we would actually sleep. 1218 */ 1219 error = time_error; 1220 break; 1221 } 1222 1223 if (watched) { 1224 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1225 watched = 0; 1226 } 1227 1228 /* 1229 * Put the lwp in an orderly state for debugging. 1230 */ 1231 prstop(PR_REQUESTED, 0); 1232 if (timedwait) { 1233 /* 1234 * If we successfully queue the timeout, 1235 * then don't drop t_delay_lock until 1236 * we are on the sleep queue (below). 1237 */ 1238 mutex_enter(&t->t_delay_lock); 1239 if (lwp_timer_enqueue(&lwpt) != 0) { 1240 mutex_exit(&t->t_delay_lock); 1241 imm_timeout = 1; 1242 timedwait = NULL; 1243 } 1244 } 1245 lwp_block(&lwpchan); 1246 /* 1247 * Nothing should happen to cause the lwp to go to 1248 * sleep again until after it returns from swtch(). 1249 */ 1250 if (timedwait) 1251 mutex_exit(&t->t_delay_lock); 1252 locked = 0; 1253 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1254 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1255 setrun(t); 1256 swtch(); 1257 t->t_flag &= ~T_WAKEABLE; 1258 if (timedwait) 1259 tim = lwp_timer_dequeue(&lwpt); 1260 setallwatch(); 1261 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1262 error = EINTR; 1263 else if (imm_timeout || (timedwait && tim == -1)) 1264 error = ETIME; 1265 if (error) { 1266 lwp->lwp_asleep = 0; 1267 lwp->lwp_sysabort = 0; 1268 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1269 S_WRITE); 1270 1271 /* 1272 * Need to re-compute waiters bit. The waiters field in 1273 * the lock is not reliable. Either of two things could 1274 * have occurred: no lwp may have called lwp_release() 1275 * for me but I have woken up due to a signal or 1276 * timeout. In this case, the waiter bit is incorrect 1277 * since it is still set to 1, set above. 1278 * OR an lwp_release() did occur for some other lwp on 1279 * the same lwpchan. In this case, the waiter bit is 1280 * correct. But which event occurred, one can't tell. 1281 * So, recompute. 1282 */ 1283 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1284 locked = 1; 1285 sqh = lwpsqhash(&lwpchan); 1286 disp_lock_enter(&sqh->sq_lock); 1287 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1288 disp_lock_exit(&sqh->sq_lock); 1289 break; 1290 } 1291 lwp->lwp_asleep = 0; 1292 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1293 S_WRITE); 1294 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1295 locked = 1; 1296 fuword8_noerr(&lp->mutex_waiters, &waiters); 1297 suword8_noerr(&lp->mutex_waiters, 1); 1298 if (type & LOCK_ROBUST) { 1299 fuword16_noerr(&lp->mutex_flag, &flag); 1300 if (flag & LOCK_NOTRECOVERABLE) { 1301 error = ENOTRECOVERABLE; 1302 break; 1303 } 1304 } 1305 } 1306 1307 if (t->t_mstate == LMS_USER_LOCK) 1308 (void) new_mstate(t, LMS_SYSTEM); 1309 1310 if (error == 0) { 1311 if (type & USYNC_PROCESS) 1312 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1313 if (type & LOCK_ROBUST) { 1314 fuword16_noerr(&lp->mutex_flag, &flag); 1315 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1316 if (flag & LOCK_OWNERDEAD) 1317 error = EOWNERDEAD; 1318 else if (type & USYNC_PROCESS_ROBUST) 1319 error = ELOCKUNMAPPED; 1320 else 1321 error = EOWNERDEAD; 1322 } 1323 } 1324 } 1325 suword8_noerr(&lp->mutex_waiters, waiters); 1326 locked = 0; 1327 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1328 out: 1329 no_fault(); 1330 if (watched) 1331 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1332 if (tsp && !time_error) /* copyout the residual time left */ 1333 error = lwp_timer_copyout(&lwpt, error); 1334 if (error) 1335 return (set_errno(error)); 1336 return (0); 1337 } 1338 1339 /* 1340 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1341 * libc now calls lwp_mutex_timedlock(lp, NULL). 1342 * This system call trap continues to exist solely for the benefit 1343 * of old statically-linked binaries from Solaris 9 and before. 1344 * It should be removed from the system when we no longer care 1345 * about such applications. 1346 */ 1347 int 1348 lwp_mutex_lock(lwp_mutex_t *lp) 1349 { 1350 return (lwp_mutex_timedlock(lp, NULL)); 1351 } 1352 1353 static int 1354 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1355 { 1356 /* 1357 * The caller holds the dispatcher lock on the sleep queue. 1358 */ 1359 while (t != NULL) { 1360 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1361 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1362 return (1); 1363 t = t->t_link; 1364 } 1365 return (0); 1366 } 1367 1368 /* 1369 * Return the highest priority thread sleeping on this lwpchan. 1370 */ 1371 static kthread_t * 1372 lwp_queue_waiter(lwpchan_t *lwpchan) 1373 { 1374 sleepq_head_t *sqh; 1375 kthread_t *tp; 1376 1377 sqh = lwpsqhash(lwpchan); 1378 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1379 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1380 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1381 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1382 break; 1383 } 1384 disp_lock_exit(&sqh->sq_lock); 1385 return (tp); 1386 } 1387 1388 static int 1389 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1390 { 1391 sleepq_head_t *sqh; 1392 kthread_t *tp; 1393 kthread_t **tpp; 1394 1395 sqh = lwpsqhash(lwpchan); 1396 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1397 tpp = &sqh->sq_queue.sq_first; 1398 while ((tp = *tpp) != NULL) { 1399 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1400 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1401 /* 1402 * The following is typically false. It could be true 1403 * only if lwp_release() is called from 1404 * lwp_mutex_wakeup() after reading the waiters field 1405 * from memory in which the lwp lock used to be, but has 1406 * since been re-used to hold a lwp cv or lwp semaphore. 1407 * The thread "tp" found to match the lwp lock's wchan 1408 * is actually sleeping for the cv or semaphore which 1409 * now has the same wchan. In this case, lwp_release() 1410 * should return failure. 1411 */ 1412 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1413 ASSERT(sync_type == 0); 1414 /* 1415 * assert that this can happen only for mutexes 1416 * i.e. sync_type == 0, for correctly written 1417 * user programs. 1418 */ 1419 disp_lock_exit(&sqh->sq_lock); 1420 return (0); 1421 } 1422 *waiters = iswanted(tp->t_link, lwpchan); 1423 sleepq_unlink(tpp, tp); 1424 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1425 tp->t_wchan0 = NULL; 1426 tp->t_wchan = NULL; 1427 tp->t_sobj_ops = NULL; 1428 tp->t_release = 1; 1429 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1430 CL_WAKEUP(tp); 1431 thread_unlock(tp); /* drop run queue lock */ 1432 return (1); 1433 } 1434 tpp = &tp->t_link; 1435 } 1436 *waiters = 0; 1437 disp_lock_exit(&sqh->sq_lock); 1438 return (0); 1439 } 1440 1441 static void 1442 lwp_release_all(lwpchan_t *lwpchan) 1443 { 1444 sleepq_head_t *sqh; 1445 kthread_t *tp; 1446 kthread_t **tpp; 1447 1448 sqh = lwpsqhash(lwpchan); 1449 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1450 tpp = &sqh->sq_queue.sq_first; 1451 while ((tp = *tpp) != NULL) { 1452 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1453 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1454 sleepq_unlink(tpp, tp); 1455 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1456 tp->t_wchan0 = NULL; 1457 tp->t_wchan = NULL; 1458 tp->t_sobj_ops = NULL; 1459 CL_WAKEUP(tp); 1460 thread_unlock_high(tp); /* release run queue lock */ 1461 } else { 1462 tpp = &tp->t_link; 1463 } 1464 } 1465 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1466 } 1467 1468 /* 1469 * unblock a lwp that is trying to acquire this mutex. the blocked 1470 * lwp resumes and retries to acquire the lock. 1471 */ 1472 int 1473 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1474 { 1475 proc_t *p = ttoproc(curthread); 1476 lwpchan_t lwpchan; 1477 uchar_t waiters; 1478 volatile int locked = 0; 1479 volatile int watched = 0; 1480 volatile uint8_t type = 0; 1481 label_t ljb; 1482 int error = 0; 1483 1484 if ((caddr_t)lp >= p->p_as->a_userlimit) 1485 return (set_errno(EFAULT)); 1486 1487 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1488 1489 if (on_fault(&ljb)) { 1490 if (locked) 1491 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1492 error = EFAULT; 1493 goto out; 1494 } 1495 /* 1496 * Force Copy-on-write if necessary and ensure that the 1497 * synchronization object resides in read/write memory. 1498 * Cause an EFAULT return now if this is not so. 1499 */ 1500 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1501 suword8_noerr(&lp->mutex_type, type); 1502 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1503 &lwpchan, LWPCHAN_MPPOOL)) { 1504 error = EFAULT; 1505 goto out; 1506 } 1507 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1508 locked = 1; 1509 /* 1510 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1511 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1512 * may fail. If it fails, do not write into the waiter bit. 1513 * The call to lwp_release() might fail due to one of three reasons: 1514 * 1515 * 1. due to the thread which set the waiter bit not actually 1516 * sleeping since it got the lock on the re-try. The waiter 1517 * bit will then be correctly updated by that thread. This 1518 * window may be closed by reading the wait bit again here 1519 * and not calling lwp_release() at all if it is zero. 1520 * 2. the thread which set the waiter bit and went to sleep 1521 * was woken up by a signal. This time, the waiter recomputes 1522 * the wait bit in the return with EINTR code. 1523 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1524 * memory that has been re-used after the lock was dropped. 1525 * In this case, writing into the waiter bit would cause data 1526 * corruption. 1527 */ 1528 if (release_all) 1529 lwp_release_all(&lwpchan); 1530 else if (lwp_release(&lwpchan, &waiters, 0)) 1531 suword8_noerr(&lp->mutex_waiters, waiters); 1532 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1533 out: 1534 no_fault(); 1535 if (watched) 1536 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1537 if (error) 1538 return (set_errno(error)); 1539 return (0); 1540 } 1541 1542 /* 1543 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1544 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1545 * a flag telling the kernel whether or not to honor the kernel/user 1546 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1547 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1548 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1549 * it is used an an in/out parameter. On entry, it contains the relative 1550 * time until timeout. On exit, we copyout the residual time left to it. 1551 */ 1552 int 1553 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1554 { 1555 kthread_t *t = curthread; 1556 klwp_t *lwp = ttolwp(t); 1557 proc_t *p = ttoproc(t); 1558 lwp_timer_t lwpt; 1559 lwpchan_t cv_lwpchan; 1560 lwpchan_t m_lwpchan; 1561 caddr_t timedwait; 1562 volatile uint16_t type = 0; 1563 volatile uint8_t mtype = 0; 1564 uchar_t waiters; 1565 volatile int error; 1566 clock_t tim = -1; 1567 volatile int locked = 0; 1568 volatile int m_locked = 0; 1569 volatile int cvwatched = 0; 1570 volatile int mpwatched = 0; 1571 label_t ljb; 1572 volatile int no_lwpchan = 1; 1573 int imm_timeout = 0; 1574 int imm_unpark = 0; 1575 1576 if ((caddr_t)cv >= p->p_as->a_userlimit || 1577 (caddr_t)mp >= p->p_as->a_userlimit) 1578 return (set_errno(EFAULT)); 1579 1580 timedwait = (caddr_t)tsp; 1581 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1582 return (set_errno(error)); 1583 if (lwpt.lwpt_imm_timeout) { 1584 imm_timeout = 1; 1585 timedwait = NULL; 1586 } 1587 1588 (void) new_mstate(t, LMS_USER_LOCK); 1589 1590 if (on_fault(&ljb)) { 1591 if (no_lwpchan) { 1592 error = EFAULT; 1593 goto out; 1594 } 1595 if (m_locked) { 1596 m_locked = 0; 1597 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1598 } 1599 if (locked) { 1600 locked = 0; 1601 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1602 } 1603 /* 1604 * set up another on_fault() for a possible fault 1605 * on the user lock accessed at "efault" 1606 */ 1607 if (on_fault(&ljb)) { 1608 if (m_locked) { 1609 m_locked = 0; 1610 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1611 } 1612 goto out; 1613 } 1614 error = EFAULT; 1615 goto efault; 1616 } 1617 1618 /* 1619 * Force Copy-on-write if necessary and ensure that the 1620 * synchronization object resides in read/write memory. 1621 * Cause an EFAULT return now if this is not so. 1622 */ 1623 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1624 suword8_noerr(&mp->mutex_type, mtype); 1625 if (UPIMUTEX(mtype) == 0) { 1626 /* convert user level mutex, "mp", to a unique lwpchan */ 1627 /* check if mtype is ok to use below, instead of type from cv */ 1628 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1629 &m_lwpchan, LWPCHAN_MPPOOL)) { 1630 error = EFAULT; 1631 goto out; 1632 } 1633 } 1634 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1635 suword16_noerr(&cv->cond_type, type); 1636 /* convert user level condition variable, "cv", to a unique lwpchan */ 1637 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1638 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1639 error = EFAULT; 1640 goto out; 1641 } 1642 no_lwpchan = 0; 1643 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1644 if (UPIMUTEX(mtype) == 0) 1645 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1646 S_WRITE); 1647 1648 /* 1649 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1650 * with respect to a possible wakeup which is a result of either 1651 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1652 * 1653 * What's misleading, is that the lwp is put to sleep after the 1654 * condition variable's mutex is released. This is OK as long as 1655 * the release operation is also done while holding lwpchan_lock. 1656 * The lwp is then put to sleep when the possibility of pagefaulting 1657 * or sleeping is completely eliminated. 1658 */ 1659 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1660 locked = 1; 1661 if (UPIMUTEX(mtype) == 0) { 1662 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1663 m_locked = 1; 1664 suword8_noerr(&cv->cond_waiters_kernel, 1); 1665 /* 1666 * unlock the condition variable's mutex. (pagefaults are 1667 * possible here.) 1668 */ 1669 if (mtype & USYNC_PROCESS) 1670 suword32_noerr(&mp->mutex_ownerpid, 0); 1671 ulock_clear(&mp->mutex_lockw); 1672 fuword8_noerr(&mp->mutex_waiters, &waiters); 1673 if (waiters != 0) { 1674 /* 1675 * Given the locking of lwpchan_lock around the release 1676 * of the mutex and checking for waiters, the following 1677 * call to lwp_release() can fail ONLY if the lock 1678 * acquirer is interrupted after setting the waiter bit, 1679 * calling lwp_block() and releasing lwpchan_lock. 1680 * In this case, it could get pulled off the lwp sleep 1681 * q (via setrun()) before the following call to 1682 * lwp_release() occurs. In this case, the lock 1683 * requestor will update the waiter bit correctly by 1684 * re-evaluating it. 1685 */ 1686 if (lwp_release(&m_lwpchan, &waiters, 0)) 1687 suword8_noerr(&mp->mutex_waiters, waiters); 1688 } 1689 m_locked = 0; 1690 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1691 } else { 1692 suword8_noerr(&cv->cond_waiters_kernel, 1); 1693 error = lwp_upimutex_unlock(mp, mtype); 1694 if (error) { /* if the upimutex unlock failed */ 1695 locked = 0; 1696 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1697 goto out; 1698 } 1699 } 1700 no_fault(); 1701 1702 if (mpwatched) { 1703 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1704 mpwatched = 0; 1705 } 1706 if (cvwatched) { 1707 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1708 cvwatched = 0; 1709 } 1710 1711 /* 1712 * Put the lwp in an orderly state for debugging. 1713 */ 1714 prstop(PR_REQUESTED, 0); 1715 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1716 /* 1717 * We received a signal at user-level before calling here 1718 * or another thread wants us to return immediately 1719 * with EINTR. See lwp_unpark(). 1720 */ 1721 imm_unpark = 1; 1722 t->t_unpark = 0; 1723 timedwait = NULL; 1724 } else if (timedwait) { 1725 /* 1726 * If we successfully queue the timeout, 1727 * then don't drop t_delay_lock until 1728 * we are on the sleep queue (below). 1729 */ 1730 mutex_enter(&t->t_delay_lock); 1731 if (lwp_timer_enqueue(&lwpt) != 0) { 1732 mutex_exit(&t->t_delay_lock); 1733 imm_timeout = 1; 1734 timedwait = NULL; 1735 } 1736 } 1737 t->t_flag |= T_WAITCVSEM; 1738 lwp_block(&cv_lwpchan); 1739 /* 1740 * Nothing should happen to cause the lwp to go to sleep 1741 * until after it returns from swtch(). 1742 */ 1743 if (timedwait) 1744 mutex_exit(&t->t_delay_lock); 1745 locked = 0; 1746 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1747 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1748 (imm_timeout | imm_unpark)) 1749 setrun(t); 1750 swtch(); 1751 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1752 if (timedwait) 1753 tim = lwp_timer_dequeue(&lwpt); 1754 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1755 MUSTRETURN(p, t) || imm_unpark) 1756 error = EINTR; 1757 else if (imm_timeout || (timedwait && tim == -1)) 1758 error = ETIME; 1759 lwp->lwp_asleep = 0; 1760 lwp->lwp_sysabort = 0; 1761 setallwatch(); 1762 1763 if (t->t_mstate == LMS_USER_LOCK) 1764 (void) new_mstate(t, LMS_SYSTEM); 1765 1766 if (tsp && check_park) /* copyout the residual time left */ 1767 error = lwp_timer_copyout(&lwpt, error); 1768 1769 /* the mutex is reacquired by the caller on return to user level */ 1770 if (error) { 1771 /* 1772 * If we were concurrently lwp_cond_signal()d and we 1773 * received a UNIX signal or got a timeout, then perform 1774 * another lwp_cond_signal() to avoid consuming the wakeup. 1775 */ 1776 if (t->t_release) 1777 (void) lwp_cond_signal(cv); 1778 return (set_errno(error)); 1779 } 1780 return (0); 1781 1782 efault: 1783 /* 1784 * make sure that the user level lock is dropped before 1785 * returning to caller, since the caller always re-acquires it. 1786 */ 1787 if (UPIMUTEX(mtype) == 0) { 1788 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1789 m_locked = 1; 1790 if (mtype & USYNC_PROCESS) 1791 suword32_noerr(&mp->mutex_ownerpid, 0); 1792 ulock_clear(&mp->mutex_lockw); 1793 fuword8_noerr(&mp->mutex_waiters, &waiters); 1794 if (waiters != 0) { 1795 /* 1796 * See comment above on lock clearing and lwp_release() 1797 * success/failure. 1798 */ 1799 if (lwp_release(&m_lwpchan, &waiters, 0)) 1800 suword8_noerr(&mp->mutex_waiters, waiters); 1801 } 1802 m_locked = 0; 1803 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1804 } else { 1805 (void) lwp_upimutex_unlock(mp, mtype); 1806 } 1807 out: 1808 no_fault(); 1809 if (mpwatched) 1810 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1811 if (cvwatched) 1812 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1813 if (t->t_mstate == LMS_USER_LOCK) 1814 (void) new_mstate(t, LMS_SYSTEM); 1815 return (set_errno(error)); 1816 } 1817 1818 /* 1819 * wakeup one lwp that's blocked on this condition variable. 1820 */ 1821 int 1822 lwp_cond_signal(lwp_cond_t *cv) 1823 { 1824 proc_t *p = ttoproc(curthread); 1825 lwpchan_t lwpchan; 1826 uchar_t waiters; 1827 volatile uint16_t type = 0; 1828 volatile int locked = 0; 1829 volatile int watched = 0; 1830 label_t ljb; 1831 int error = 0; 1832 1833 if ((caddr_t)cv >= p->p_as->a_userlimit) 1834 return (set_errno(EFAULT)); 1835 1836 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1837 1838 if (on_fault(&ljb)) { 1839 if (locked) 1840 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1841 error = EFAULT; 1842 goto out; 1843 } 1844 /* 1845 * Force Copy-on-write if necessary and ensure that the 1846 * synchronization object resides in read/write memory. 1847 * Cause an EFAULT return now if this is not so. 1848 */ 1849 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1850 suword16_noerr(&cv->cond_type, type); 1851 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1852 &lwpchan, LWPCHAN_CVPOOL)) { 1853 error = EFAULT; 1854 goto out; 1855 } 1856 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1857 locked = 1; 1858 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1859 if (waiters != 0) { 1860 /* 1861 * The following call to lwp_release() might fail but it is 1862 * OK to write into the waiters bit below, since the memory 1863 * could not have been re-used or unmapped (for correctly 1864 * written user programs) as in the case of lwp_mutex_wakeup(). 1865 * For an incorrect program, we should not care about data 1866 * corruption since this is just one instance of other places 1867 * where corruption can occur for such a program. Of course 1868 * if the memory is unmapped, normal fault recovery occurs. 1869 */ 1870 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1871 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1872 } 1873 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1874 out: 1875 no_fault(); 1876 if (watched) 1877 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1878 if (error) 1879 return (set_errno(error)); 1880 return (0); 1881 } 1882 1883 /* 1884 * wakeup every lwp that's blocked on this condition variable. 1885 */ 1886 int 1887 lwp_cond_broadcast(lwp_cond_t *cv) 1888 { 1889 proc_t *p = ttoproc(curthread); 1890 lwpchan_t lwpchan; 1891 volatile uint16_t type = 0; 1892 volatile int locked = 0; 1893 volatile int watched = 0; 1894 label_t ljb; 1895 uchar_t waiters; 1896 int error = 0; 1897 1898 if ((caddr_t)cv >= p->p_as->a_userlimit) 1899 return (set_errno(EFAULT)); 1900 1901 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1902 1903 if (on_fault(&ljb)) { 1904 if (locked) 1905 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1906 error = EFAULT; 1907 goto out; 1908 } 1909 /* 1910 * Force Copy-on-write if necessary and ensure that the 1911 * synchronization object resides in read/write memory. 1912 * Cause an EFAULT return now if this is not so. 1913 */ 1914 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1915 suword16_noerr(&cv->cond_type, type); 1916 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1917 &lwpchan, LWPCHAN_CVPOOL)) { 1918 error = EFAULT; 1919 goto out; 1920 } 1921 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1922 locked = 1; 1923 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1924 if (waiters != 0) { 1925 lwp_release_all(&lwpchan); 1926 suword8_noerr(&cv->cond_waiters_kernel, 0); 1927 } 1928 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1929 out: 1930 no_fault(); 1931 if (watched) 1932 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1933 if (error) 1934 return (set_errno(error)); 1935 return (0); 1936 } 1937 1938 int 1939 lwp_sema_trywait(lwp_sema_t *sp) 1940 { 1941 kthread_t *t = curthread; 1942 proc_t *p = ttoproc(t); 1943 label_t ljb; 1944 volatile int locked = 0; 1945 volatile int watched = 0; 1946 volatile uint16_t type = 0; 1947 int count; 1948 lwpchan_t lwpchan; 1949 uchar_t waiters; 1950 int error = 0; 1951 1952 if ((caddr_t)sp >= p->p_as->a_userlimit) 1953 return (set_errno(EFAULT)); 1954 1955 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1956 1957 if (on_fault(&ljb)) { 1958 if (locked) 1959 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1960 error = EFAULT; 1961 goto out; 1962 } 1963 /* 1964 * Force Copy-on-write if necessary and ensure that the 1965 * synchronization object resides in read/write memory. 1966 * Cause an EFAULT return now if this is not so. 1967 */ 1968 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1969 suword16_noerr((void *)&sp->sema_type, type); 1970 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1971 &lwpchan, LWPCHAN_CVPOOL)) { 1972 error = EFAULT; 1973 goto out; 1974 } 1975 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1976 locked = 1; 1977 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1978 if (count == 0) 1979 error = EBUSY; 1980 else 1981 suword32_noerr((void *)&sp->sema_count, --count); 1982 if (count != 0) { 1983 fuword8_noerr(&sp->sema_waiters, &waiters); 1984 if (waiters != 0) { 1985 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1986 suword8_noerr(&sp->sema_waiters, waiters); 1987 } 1988 } 1989 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1990 out: 1991 no_fault(); 1992 if (watched) 1993 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1994 if (error) 1995 return (set_errno(error)); 1996 return (0); 1997 } 1998 1999 /* 2000 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 2001 */ 2002 int 2003 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2004 { 2005 kthread_t *t = curthread; 2006 klwp_t *lwp = ttolwp(t); 2007 proc_t *p = ttoproc(t); 2008 lwp_timer_t lwpt; 2009 caddr_t timedwait; 2010 clock_t tim = -1; 2011 label_t ljb; 2012 volatile int locked = 0; 2013 volatile int watched = 0; 2014 volatile uint16_t type = 0; 2015 int count; 2016 lwpchan_t lwpchan; 2017 uchar_t waiters; 2018 int error = 0; 2019 int time_error; 2020 int imm_timeout = 0; 2021 int imm_unpark = 0; 2022 2023 if ((caddr_t)sp >= p->p_as->a_userlimit) 2024 return (set_errno(EFAULT)); 2025 2026 timedwait = (caddr_t)tsp; 2027 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2028 lwpt.lwpt_imm_timeout) { 2029 imm_timeout = 1; 2030 timedwait = NULL; 2031 } 2032 2033 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2034 2035 if (on_fault(&ljb)) { 2036 if (locked) 2037 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2038 error = EFAULT; 2039 goto out; 2040 } 2041 /* 2042 * Force Copy-on-write if necessary and ensure that the 2043 * synchronization object resides in read/write memory. 2044 * Cause an EFAULT return now if this is not so. 2045 */ 2046 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2047 suword16_noerr((void *)&sp->sema_type, type); 2048 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2049 &lwpchan, LWPCHAN_CVPOOL)) { 2050 error = EFAULT; 2051 goto out; 2052 } 2053 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2054 locked = 1; 2055 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2056 while (error == 0 && count == 0) { 2057 if (time_error) { 2058 /* 2059 * The SUSV3 Posix spec is very clear that we 2060 * should get no error from validating the 2061 * timer until we would actually sleep. 2062 */ 2063 error = time_error; 2064 break; 2065 } 2066 suword8_noerr(&sp->sema_waiters, 1); 2067 if (watched) 2068 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2069 /* 2070 * Put the lwp in an orderly state for debugging. 2071 */ 2072 prstop(PR_REQUESTED, 0); 2073 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2074 /* 2075 * We received a signal at user-level before calling 2076 * here or another thread wants us to return 2077 * immediately with EINTR. See lwp_unpark(). 2078 */ 2079 imm_unpark = 1; 2080 t->t_unpark = 0; 2081 timedwait = NULL; 2082 } else if (timedwait) { 2083 /* 2084 * If we successfully queue the timeout, 2085 * then don't drop t_delay_lock until 2086 * we are on the sleep queue (below). 2087 */ 2088 mutex_enter(&t->t_delay_lock); 2089 if (lwp_timer_enqueue(&lwpt) != 0) { 2090 mutex_exit(&t->t_delay_lock); 2091 imm_timeout = 1; 2092 timedwait = NULL; 2093 } 2094 } 2095 t->t_flag |= T_WAITCVSEM; 2096 lwp_block(&lwpchan); 2097 /* 2098 * Nothing should happen to cause the lwp to sleep 2099 * again until after it returns from swtch(). 2100 */ 2101 if (timedwait) 2102 mutex_exit(&t->t_delay_lock); 2103 locked = 0; 2104 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2105 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2106 (imm_timeout | imm_unpark)) 2107 setrun(t); 2108 swtch(); 2109 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2110 if (timedwait) 2111 tim = lwp_timer_dequeue(&lwpt); 2112 setallwatch(); 2113 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2114 MUSTRETURN(p, t) || imm_unpark) 2115 error = EINTR; 2116 else if (imm_timeout || (timedwait && tim == -1)) 2117 error = ETIME; 2118 lwp->lwp_asleep = 0; 2119 lwp->lwp_sysabort = 0; 2120 watched = watch_disable_addr((caddr_t)sp, 2121 sizeof (*sp), S_WRITE); 2122 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2123 locked = 1; 2124 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2125 } 2126 if (error == 0) 2127 suword32_noerr((void *)&sp->sema_count, --count); 2128 if (count != 0) { 2129 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2130 suword8_noerr(&sp->sema_waiters, waiters); 2131 } 2132 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2133 out: 2134 no_fault(); 2135 if (watched) 2136 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2137 if (tsp && check_park && !time_error) 2138 error = lwp_timer_copyout(&lwpt, error); 2139 if (error) 2140 return (set_errno(error)); 2141 return (0); 2142 } 2143 2144 /* 2145 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2146 * libc now calls lwp_sema_timedwait(). 2147 * This system call trap exists solely for the benefit of old 2148 * statically linked applications from Solaris 9 and before. 2149 * It should be removed when we no longer care about such applications. 2150 */ 2151 int 2152 lwp_sema_wait(lwp_sema_t *sp) 2153 { 2154 return (lwp_sema_timedwait(sp, NULL, 0)); 2155 } 2156 2157 int 2158 lwp_sema_post(lwp_sema_t *sp) 2159 { 2160 proc_t *p = ttoproc(curthread); 2161 label_t ljb; 2162 volatile int locked = 0; 2163 volatile int watched = 0; 2164 volatile uint16_t type = 0; 2165 int count; 2166 lwpchan_t lwpchan; 2167 uchar_t waiters; 2168 int error = 0; 2169 2170 if ((caddr_t)sp >= p->p_as->a_userlimit) 2171 return (set_errno(EFAULT)); 2172 2173 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2174 2175 if (on_fault(&ljb)) { 2176 if (locked) 2177 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2178 error = EFAULT; 2179 goto out; 2180 } 2181 /* 2182 * Force Copy-on-write if necessary and ensure that the 2183 * synchronization object resides in read/write memory. 2184 * Cause an EFAULT return now if this is not so. 2185 */ 2186 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2187 suword16_noerr(&sp->sema_type, type); 2188 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2189 &lwpchan, LWPCHAN_CVPOOL)) { 2190 error = EFAULT; 2191 goto out; 2192 } 2193 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2194 locked = 1; 2195 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2196 if (count == _SEM_VALUE_MAX) 2197 error = EOVERFLOW; 2198 else 2199 suword32_noerr(&sp->sema_count, ++count); 2200 if (count == 1) { 2201 fuword8_noerr(&sp->sema_waiters, &waiters); 2202 if (waiters) { 2203 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2204 suword8_noerr(&sp->sema_waiters, waiters); 2205 } 2206 } 2207 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2208 out: 2209 no_fault(); 2210 if (watched) 2211 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2212 if (error) 2213 return (set_errno(error)); 2214 return (0); 2215 } 2216 2217 #define TRW_WANT_WRITE 0x1 2218 #define TRW_LOCK_GRANTED 0x2 2219 2220 #define READ_LOCK 0 2221 #define WRITE_LOCK 1 2222 #define TRY_FLAG 0x10 2223 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2224 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2225 2226 /* 2227 * Release one writer or one or more readers. Compute the rwstate word to 2228 * reflect the new state of the queue. For a safe hand-off we copy the new 2229 * rwstate value back to userland before we wake any of the new lock holders. 2230 * 2231 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2232 * being given precedence over readers of the same priority). 2233 * 2234 * If the first thread is a reader we scan the queue releasing all readers 2235 * until we hit a writer or the end of the queue. If the first thread is a 2236 * writer we still need to check for another writer. 2237 */ 2238 void 2239 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2240 { 2241 sleepq_head_t *sqh; 2242 kthread_t *tp; 2243 kthread_t **tpp; 2244 kthread_t *tpnext; 2245 kthread_t *wakelist = NULL; 2246 uint32_t rwstate = 0; 2247 int wcount = 0; 2248 int rcount = 0; 2249 2250 sqh = lwpsqhash(lwpchan); 2251 disp_lock_enter(&sqh->sq_lock); 2252 tpp = &sqh->sq_queue.sq_first; 2253 while ((tp = *tpp) != NULL) { 2254 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2255 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2256 if (tp->t_writer & TRW_WANT_WRITE) { 2257 if ((wcount++ == 0) && (rcount == 0)) { 2258 rwstate |= URW_WRITE_LOCKED; 2259 2260 /* Just one writer to wake. */ 2261 sleepq_unlink(tpp, tp); 2262 wakelist = tp; 2263 2264 /* tpp already set for next thread. */ 2265 continue; 2266 } else { 2267 rwstate |= URW_HAS_WAITERS; 2268 /* We need look no further. */ 2269 break; 2270 } 2271 } else { 2272 rcount++; 2273 if (wcount == 0) { 2274 rwstate++; 2275 2276 /* Add reader to wake list. */ 2277 sleepq_unlink(tpp, tp); 2278 tp->t_link = wakelist; 2279 wakelist = tp; 2280 2281 /* tpp already set for next thread. */ 2282 continue; 2283 } else { 2284 rwstate |= URW_HAS_WAITERS; 2285 /* We need look no further. */ 2286 break; 2287 } 2288 } 2289 } 2290 tpp = &tp->t_link; 2291 } 2292 2293 /* Copy the new rwstate back to userland. */ 2294 suword32_noerr(&rw->rwlock_readers, rwstate); 2295 2296 /* Wake the new lock holder(s) up. */ 2297 tp = wakelist; 2298 while (tp != NULL) { 2299 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2300 tp->t_wchan0 = NULL; 2301 tp->t_wchan = NULL; 2302 tp->t_sobj_ops = NULL; 2303 tp->t_writer |= TRW_LOCK_GRANTED; 2304 tpnext = tp->t_link; 2305 tp->t_link = NULL; 2306 CL_WAKEUP(tp); 2307 thread_unlock_high(tp); 2308 tp = tpnext; 2309 } 2310 2311 disp_lock_exit(&sqh->sq_lock); 2312 } 2313 2314 /* 2315 * We enter here holding the user-level mutex, which we must release before 2316 * returning or blocking. Based on lwp_cond_wait(). 2317 */ 2318 static int 2319 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2320 { 2321 lwp_mutex_t *mp = NULL; 2322 kthread_t *t = curthread; 2323 kthread_t *tp; 2324 klwp_t *lwp = ttolwp(t); 2325 proc_t *p = ttoproc(t); 2326 lwp_timer_t lwpt; 2327 lwpchan_t lwpchan; 2328 lwpchan_t mlwpchan; 2329 caddr_t timedwait; 2330 volatile uint16_t type = 0; 2331 volatile uint8_t mtype = 0; 2332 uchar_t mwaiters; 2333 volatile int error = 0; 2334 int time_error; 2335 clock_t tim = -1; 2336 volatile int locked = 0; 2337 volatile int mlocked = 0; 2338 volatile int watched = 0; 2339 volatile int mwatched = 0; 2340 label_t ljb; 2341 volatile int no_lwpchan = 1; 2342 int imm_timeout = 0; 2343 int try_flag; 2344 uint32_t rwstate; 2345 int acquired = 0; 2346 2347 /* We only check rw because the mutex is included in it. */ 2348 if ((caddr_t)rw >= p->p_as->a_userlimit) 2349 return (set_errno(EFAULT)); 2350 2351 /* We must only report this error if we are about to sleep (later). */ 2352 timedwait = (caddr_t)tsp; 2353 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2354 lwpt.lwpt_imm_timeout) { 2355 imm_timeout = 1; 2356 timedwait = NULL; 2357 } 2358 2359 (void) new_mstate(t, LMS_USER_LOCK); 2360 2361 if (on_fault(&ljb)) { 2362 if (no_lwpchan) { 2363 error = EFAULT; 2364 goto out_nodrop; 2365 } 2366 if (mlocked) { 2367 mlocked = 0; 2368 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2369 } 2370 if (locked) { 2371 locked = 0; 2372 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2373 } 2374 /* 2375 * Set up another on_fault() for a possible fault 2376 * on the user lock accessed at "out_drop". 2377 */ 2378 if (on_fault(&ljb)) { 2379 if (mlocked) { 2380 mlocked = 0; 2381 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2382 } 2383 error = EFAULT; 2384 goto out_nodrop; 2385 } 2386 error = EFAULT; 2387 goto out_nodrop; 2388 } 2389 2390 /* Process rd_wr (including sanity check). */ 2391 try_flag = (rd_wr & TRY_FLAG); 2392 rd_wr &= ~TRY_FLAG; 2393 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2394 error = EINVAL; 2395 goto out_nodrop; 2396 } 2397 2398 /* 2399 * Force Copy-on-write if necessary and ensure that the 2400 * synchronization object resides in read/write memory. 2401 * Cause an EFAULT return now if this is not so. 2402 */ 2403 mp = &rw->mutex; 2404 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2405 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2406 suword8_noerr(&mp->mutex_type, mtype); 2407 suword16_noerr(&rw->rwlock_type, type); 2408 2409 /* We can only continue for simple USYNC_PROCESS locks. */ 2410 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2411 error = EINVAL; 2412 goto out_nodrop; 2413 } 2414 2415 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2416 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2417 &mlwpchan, LWPCHAN_MPPOOL)) { 2418 error = EFAULT; 2419 goto out_nodrop; 2420 } 2421 2422 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2423 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2424 &lwpchan, LWPCHAN_CVPOOL)) { 2425 error = EFAULT; 2426 goto out_nodrop; 2427 } 2428 2429 no_lwpchan = 0; 2430 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2431 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2432 2433 /* 2434 * lwpchan_lock() ensures that the calling LWP is put to sleep 2435 * atomically with respect to a possible wakeup which is a result 2436 * of lwp_rwlock_unlock(). 2437 * 2438 * What's misleading is that the LWP is put to sleep after the 2439 * rwlock's mutex is released. This is OK as long as the release 2440 * operation is also done while holding mlwpchan. The LWP is then 2441 * put to sleep when the possibility of pagefaulting or sleeping 2442 * has been completely eliminated. 2443 */ 2444 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2445 locked = 1; 2446 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2447 mlocked = 1; 2448 2449 /* 2450 * Fetch the current rwlock state. 2451 * 2452 * The possibility of spurious wake-ups or killed waiters means 2453 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2454 * We only fix these if they are important to us. 2455 * 2456 * Although various error states can be observed here (e.g. the lock 2457 * is not held, but there are waiters) we assume these are applicaton 2458 * errors and so we take no corrective action. 2459 */ 2460 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2461 /* 2462 * We cannot legitimately get here from user-level 2463 * without URW_HAS_WAITERS being set. 2464 * Set it now to guard against user-level error. 2465 */ 2466 rwstate |= URW_HAS_WAITERS; 2467 2468 /* 2469 * We can try only if the lock isn't held by a writer. 2470 */ 2471 if (!(rwstate & URW_WRITE_LOCKED)) { 2472 tp = lwp_queue_waiter(&lwpchan); 2473 if (tp == NULL) { 2474 /* 2475 * Hmmm, rwstate indicates waiters but there are 2476 * none queued. This could just be the result of a 2477 * spurious wakeup, so let's ignore it. 2478 * 2479 * We now have a chance to acquire the lock 2480 * uncontended, but this is the last chance for 2481 * a writer to acquire the lock without blocking. 2482 */ 2483 if (rd_wr == READ_LOCK) { 2484 rwstate++; 2485 acquired = 1; 2486 } else if ((rwstate & URW_READERS_MASK) == 0) { 2487 rwstate |= URW_WRITE_LOCKED; 2488 acquired = 1; 2489 } 2490 } else if (rd_wr == READ_LOCK) { 2491 /* 2492 * This is the last chance for a reader to acquire 2493 * the lock now, but it can only do so if there is 2494 * no writer of equal or greater priority at the 2495 * head of the queue . 2496 * 2497 * It is also just possible that there is a reader 2498 * at the head of the queue. This may be the result 2499 * of a spurious wakeup or an application failure. 2500 * In this case we only acquire the lock if we have 2501 * equal or greater priority. It is not our job to 2502 * release spurious waiters. 2503 */ 2504 pri_t our_pri = DISP_PRIO(t); 2505 pri_t his_pri = DISP_PRIO(tp); 2506 2507 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2508 !(tp->t_writer & TRW_WANT_WRITE))) { 2509 rwstate++; 2510 acquired = 1; 2511 } 2512 } 2513 } 2514 2515 if (acquired || try_flag || time_error) { 2516 /* 2517 * We're not going to block this time. 2518 */ 2519 suword32_noerr(&rw->rwlock_readers, rwstate); 2520 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2521 locked = 0; 2522 2523 if (acquired) { 2524 /* 2525 * Got the lock! 2526 */ 2527 error = 0; 2528 2529 } else if (try_flag) { 2530 /* 2531 * We didn't get the lock and we're about to block. 2532 * If we're doing a trylock, return EBUSY instead. 2533 */ 2534 error = EBUSY; 2535 2536 } else if (time_error) { 2537 /* 2538 * The SUSV3 POSIX spec is very clear that we should 2539 * get no error from validating the timer (above) 2540 * until we would actually sleep. 2541 */ 2542 error = time_error; 2543 } 2544 2545 goto out_drop; 2546 } 2547 2548 /* 2549 * We're about to block, so indicate what kind of waiter we are. 2550 */ 2551 t->t_writer = 0; 2552 if (rd_wr == WRITE_LOCK) 2553 t->t_writer = TRW_WANT_WRITE; 2554 suword32_noerr(&rw->rwlock_readers, rwstate); 2555 2556 /* 2557 * Unlock the rwlock's mutex (pagefaults are possible here). 2558 */ 2559 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2560 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2561 suword32_noerr(&mp->mutex_ownerpid, 0); 2562 ulock_clear(&mp->mutex_lockw); 2563 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2564 if (mwaiters != 0) { 2565 /* 2566 * Given the locking of mlwpchan around the release of 2567 * the mutex and checking for waiters, the following 2568 * call to lwp_release() can fail ONLY if the lock 2569 * acquirer is interrupted after setting the waiter bit, 2570 * calling lwp_block() and releasing mlwpchan. 2571 * In this case, it could get pulled off the LWP sleep 2572 * queue (via setrun()) before the following call to 2573 * lwp_release() occurs, and the lock requestor will 2574 * update the waiter bit correctly by re-evaluating it. 2575 */ 2576 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2577 suword8_noerr(&mp->mutex_waiters, mwaiters); 2578 } 2579 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2580 mlocked = 0; 2581 no_fault(); 2582 2583 if (mwatched) { 2584 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2585 mwatched = 0; 2586 } 2587 if (watched) { 2588 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2589 watched = 0; 2590 } 2591 2592 /* 2593 * Put the LWP in an orderly state for debugging. 2594 */ 2595 prstop(PR_REQUESTED, 0); 2596 if (timedwait) { 2597 /* 2598 * If we successfully queue the timeout, 2599 * then don't drop t_delay_lock until 2600 * we are on the sleep queue (below). 2601 */ 2602 mutex_enter(&t->t_delay_lock); 2603 if (lwp_timer_enqueue(&lwpt) != 0) { 2604 mutex_exit(&t->t_delay_lock); 2605 imm_timeout = 1; 2606 timedwait = NULL; 2607 } 2608 } 2609 t->t_flag |= T_WAITCVSEM; 2610 lwp_block(&lwpchan); 2611 2612 /* 2613 * Nothing should happen to cause the LWp to go to sleep until after 2614 * it returns from swtch(). 2615 */ 2616 if (timedwait) 2617 mutex_exit(&t->t_delay_lock); 2618 locked = 0; 2619 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2620 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t)) 2621 setrun(t); 2622 swtch(); 2623 2624 /* 2625 * We're back, but we need to work out why. Were we interrupted? Did 2626 * we timeout? Were we granted the lock? 2627 */ 2628 error = EAGAIN; 2629 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2630 t->t_writer = 0; 2631 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2632 if (timedwait) 2633 tim = lwp_timer_dequeue(&lwpt); 2634 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2635 error = EINTR; 2636 else if (imm_timeout || (timedwait && tim == -1)) 2637 error = ETIME; 2638 lwp->lwp_asleep = 0; 2639 lwp->lwp_sysabort = 0; 2640 setallwatch(); 2641 2642 /* 2643 * If we were granted the lock we don't care about EINTR or ETIME. 2644 */ 2645 if (acquired) 2646 error = 0; 2647 2648 if (t->t_mstate == LMS_USER_LOCK) 2649 (void) new_mstate(t, LMS_SYSTEM); 2650 2651 if (error) 2652 return (set_errno(error)); 2653 return (0); 2654 2655 out_drop: 2656 /* 2657 * Make sure that the user level lock is dropped before returning 2658 * to the caller. 2659 */ 2660 if (!mlocked) { 2661 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2662 mlocked = 1; 2663 } 2664 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2665 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2666 suword32_noerr(&mp->mutex_ownerpid, 0); 2667 ulock_clear(&mp->mutex_lockw); 2668 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2669 if (mwaiters != 0) { 2670 /* 2671 * See comment above on lock clearing and lwp_release() 2672 * success/failure. 2673 */ 2674 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2675 suword8_noerr(&mp->mutex_waiters, mwaiters); 2676 } 2677 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2678 mlocked = 0; 2679 2680 out_nodrop: 2681 no_fault(); 2682 if (mwatched) 2683 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2684 if (watched) 2685 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2686 if (t->t_mstate == LMS_USER_LOCK) 2687 (void) new_mstate(t, LMS_SYSTEM); 2688 if (error) 2689 return (set_errno(error)); 2690 return (0); 2691 } 2692 2693 /* 2694 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2695 * we never drop the lock. 2696 */ 2697 static int 2698 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2699 { 2700 kthread_t *t = curthread; 2701 proc_t *p = ttoproc(t); 2702 lwpchan_t lwpchan; 2703 volatile uint16_t type = 0; 2704 volatile int error = 0; 2705 volatile int locked = 0; 2706 volatile int watched = 0; 2707 label_t ljb; 2708 volatile int no_lwpchan = 1; 2709 uint32_t rwstate; 2710 2711 /* We only check rw because the mutex is included in it. */ 2712 if ((caddr_t)rw >= p->p_as->a_userlimit) 2713 return (set_errno(EFAULT)); 2714 2715 if (on_fault(&ljb)) { 2716 if (no_lwpchan) { 2717 error = EFAULT; 2718 goto out_nodrop; 2719 } 2720 if (locked) { 2721 locked = 0; 2722 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2723 } 2724 error = EFAULT; 2725 goto out_nodrop; 2726 } 2727 2728 /* 2729 * Force Copy-on-write if necessary and ensure that the 2730 * synchronization object resides in read/write memory. 2731 * Cause an EFAULT return now if this is not so. 2732 */ 2733 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2734 suword16_noerr(&rw->rwlock_type, type); 2735 2736 /* We can only continue for simple USYNC_PROCESS locks. */ 2737 if (type != USYNC_PROCESS) { 2738 error = EINVAL; 2739 goto out_nodrop; 2740 } 2741 2742 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2743 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2744 &lwpchan, LWPCHAN_CVPOOL)) { 2745 error = EFAULT; 2746 goto out_nodrop; 2747 } 2748 2749 no_lwpchan = 0; 2750 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2751 2752 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2753 locked = 1; 2754 2755 /* 2756 * We can resolve multiple readers (except the last reader) here. 2757 * For the last reader or a writer we need lwp_rwlock_release(), 2758 * to which we also delegate the task of copying the new rwstate 2759 * back to userland (see the comment there). 2760 */ 2761 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2762 if (rwstate & URW_WRITE_LOCKED) 2763 lwp_rwlock_release(&lwpchan, rw); 2764 else if ((rwstate & URW_READERS_MASK) > 0) { 2765 rwstate--; 2766 if ((rwstate & URW_READERS_MASK) == 0) 2767 lwp_rwlock_release(&lwpchan, rw); 2768 else 2769 suword32_noerr(&rw->rwlock_readers, rwstate); 2770 } 2771 2772 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2773 locked = 0; 2774 error = 0; 2775 2776 out_nodrop: 2777 no_fault(); 2778 if (watched) 2779 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2780 if (error) 2781 return (set_errno(error)); 2782 return (0); 2783 } 2784 2785 int 2786 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2787 { 2788 switch (subcode) { 2789 case 0: 2790 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2791 case 1: 2792 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2793 case 2: 2794 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2795 case 3: 2796 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2797 case 4: 2798 return (lwp_rwlock_unlock(rwlp)); 2799 } 2800 return (set_errno(EINVAL)); 2801 } 2802 2803 /* 2804 * Return the owner of the user-level s-object. 2805 * Since we can't really do this, return NULL. 2806 */ 2807 /* ARGSUSED */ 2808 static kthread_t * 2809 lwpsobj_owner(caddr_t sobj) 2810 { 2811 return ((kthread_t *)NULL); 2812 } 2813 2814 /* 2815 * Wake up a thread asleep on a user-level synchronization 2816 * object. 2817 */ 2818 static void 2819 lwp_unsleep(kthread_t *t) 2820 { 2821 ASSERT(THREAD_LOCK_HELD(t)); 2822 if (t->t_wchan0 != NULL) { 2823 sleepq_head_t *sqh; 2824 sleepq_t *sqp = t->t_sleepq; 2825 2826 if (sqp != NULL) { 2827 sqh = lwpsqhash(&t->t_lwpchan); 2828 ASSERT(&sqh->sq_queue == sqp); 2829 sleepq_unsleep(t); 2830 disp_lock_exit_high(&sqh->sq_lock); 2831 CL_SETRUN(t); 2832 return; 2833 } 2834 } 2835 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2836 } 2837 2838 /* 2839 * Change the priority of a thread asleep on a user-level 2840 * synchronization object. To maintain proper priority order, 2841 * we: 2842 * o dequeue the thread. 2843 * o change its priority. 2844 * o re-enqueue the thread. 2845 * Assumption: the thread is locked on entry. 2846 */ 2847 static void 2848 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2849 { 2850 ASSERT(THREAD_LOCK_HELD(t)); 2851 if (t->t_wchan0 != NULL) { 2852 sleepq_t *sqp = t->t_sleepq; 2853 2854 sleepq_dequeue(t); 2855 *t_prip = pri; 2856 sleepq_insert(sqp, t); 2857 } else 2858 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2859 } 2860 2861 /* 2862 * Clean up a locked robust mutex 2863 */ 2864 static void 2865 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2866 { 2867 uint16_t flag; 2868 uchar_t waiters; 2869 label_t ljb; 2870 pid_t owner_pid; 2871 lwp_mutex_t *lp; 2872 volatile int locked = 0; 2873 volatile int watched = 0; 2874 volatile struct upimutex *upimutex = NULL; 2875 volatile int upilocked = 0; 2876 2877 ASSERT(ent->lwpchan_type & LOCK_ROBUST); 2878 2879 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2880 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2881 if (on_fault(&ljb)) { 2882 if (locked) 2883 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2884 if (upilocked) 2885 upimutex_unlock((upimutex_t *)upimutex, 0); 2886 goto out; 2887 } 2888 if (ent->lwpchan_type & USYNC_PROCESS) { 2889 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2890 if ((UPIMUTEX(ent->lwpchan_type) || owner_pid != 0) && 2891 owner_pid != curproc->p_pid) 2892 goto out; 2893 } 2894 if (UPIMUTEX(ent->lwpchan_type)) { 2895 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2896 upib_t *upibp = &UPI_CHAIN(lwpchan); 2897 2898 mutex_enter(&upibp->upib_lock); 2899 upimutex = upi_get(upibp, &lwpchan); 2900 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2901 mutex_exit(&upibp->upib_lock); 2902 goto out; 2903 } 2904 mutex_exit(&upibp->upib_lock); 2905 upilocked = 1; 2906 flag = lwp_clear_mutex(lp, lockflg); 2907 suword8_noerr(&lp->mutex_lockw, 0); 2908 upimutex_unlock((upimutex_t *)upimutex, flag); 2909 } else { 2910 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2911 locked = 1; 2912 if ((ent->lwpchan_type & USYNC_PROCESS) && owner_pid == 0) { 2913 /* 2914 * There is no owner. If there are waiters, 2915 * we should wake up one or all of them. 2916 * It doesn't hurt to wake them up in error 2917 * since they will just retry the lock and 2918 * go to sleep again if necessary. 2919 */ 2920 fuword8_noerr(&lp->mutex_waiters, &waiters); 2921 if (waiters != 0) { /* there are waiters */ 2922 fuword16_noerr(&lp->mutex_flag, &flag); 2923 if (flag & LOCK_NOTRECOVERABLE) { 2924 lwp_release_all(&ent->lwpchan_lwpchan); 2925 suword8_noerr(&lp->mutex_waiters, 0); 2926 } else if (lwp_release(&ent->lwpchan_lwpchan, 2927 &waiters, 0)) { 2928 suword8_noerr(&lp->mutex_waiters, 2929 waiters); 2930 } 2931 } 2932 } else { 2933 (void) lwp_clear_mutex(lp, lockflg); 2934 ulock_clear(&lp->mutex_lockw); 2935 fuword8_noerr(&lp->mutex_waiters, &waiters); 2936 if (waiters && 2937 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2938 suword8_noerr(&lp->mutex_waiters, waiters); 2939 } 2940 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2941 } 2942 out: 2943 no_fault(); 2944 if (watched) 2945 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2946 } 2947 2948 /* 2949 * Register a process-shared robust mutex in the lwpchan cache. 2950 */ 2951 int 2952 lwp_mutex_register(lwp_mutex_t *lp) 2953 { 2954 int error = 0; 2955 volatile int watched; 2956 label_t ljb; 2957 uint8_t type; 2958 lwpchan_t lwpchan; 2959 2960 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2961 return (set_errno(EFAULT)); 2962 2963 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2964 2965 if (on_fault(&ljb)) { 2966 error = EFAULT; 2967 } else { 2968 /* 2969 * Force Copy-on-write if necessary and ensure that the 2970 * synchronization object resides in read/write memory. 2971 * Cause an EFAULT return now if this is not so. 2972 */ 2973 fuword8_noerr(&lp->mutex_type, &type); 2974 suword8_noerr(&lp->mutex_type, type); 2975 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2976 != (USYNC_PROCESS|LOCK_ROBUST)) { 2977 error = EINVAL; 2978 } else if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2979 &lwpchan, LWPCHAN_MPPOOL)) { 2980 error = EFAULT; 2981 } 2982 } 2983 no_fault(); 2984 if (watched) 2985 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2986 if (error) 2987 return (set_errno(error)); 2988 return (0); 2989 } 2990 2991 int 2992 lwp_mutex_trylock(lwp_mutex_t *lp) 2993 { 2994 kthread_t *t = curthread; 2995 proc_t *p = ttoproc(t); 2996 int error = 0; 2997 volatile int locked = 0; 2998 volatile int watched = 0; 2999 label_t ljb; 3000 volatile uint8_t type = 0; 3001 uint16_t flag; 3002 lwpchan_t lwpchan; 3003 3004 if ((caddr_t)lp >= p->p_as->a_userlimit) 3005 return (set_errno(EFAULT)); 3006 3007 (void) new_mstate(t, LMS_USER_LOCK); 3008 3009 if (on_fault(&ljb)) { 3010 if (locked) 3011 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3012 error = EFAULT; 3013 goto out; 3014 } 3015 /* 3016 * Force Copy-on-write if necessary and ensure that the 3017 * synchronization object resides in read/write memory. 3018 * Cause an EFAULT return now if this is not so. 3019 */ 3020 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3021 suword8_noerr(&lp->mutex_type, type); 3022 if (UPIMUTEX(type)) { 3023 no_fault(); 3024 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3025 if ((type & USYNC_PROCESS) && 3026 (error == 0 || 3027 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 3028 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 3029 if (error) 3030 return (set_errno(error)); 3031 return (0); 3032 } 3033 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3034 &lwpchan, LWPCHAN_MPPOOL)) { 3035 error = EFAULT; 3036 goto out; 3037 } 3038 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3039 locked = 1; 3040 if (type & LOCK_ROBUST) { 3041 fuword16_noerr(&lp->mutex_flag, &flag); 3042 if (flag & LOCK_NOTRECOVERABLE) { 3043 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3044 error = ENOTRECOVERABLE; 3045 goto out; 3046 } 3047 } 3048 3049 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3050 3051 if (!ulock_try(&lp->mutex_lockw)) 3052 error = EBUSY; 3053 else { 3054 if (type & USYNC_PROCESS) 3055 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 3056 if (type & LOCK_ROBUST) { 3057 fuword16_noerr(&lp->mutex_flag, &flag); 3058 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3059 if (flag & LOCK_OWNERDEAD) 3060 error = EOWNERDEAD; 3061 else if (type & USYNC_PROCESS_ROBUST) 3062 error = ELOCKUNMAPPED; 3063 else 3064 error = EOWNERDEAD; 3065 } 3066 } 3067 } 3068 locked = 0; 3069 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3070 out: 3071 3072 if (t->t_mstate == LMS_USER_LOCK) 3073 (void) new_mstate(t, LMS_SYSTEM); 3074 3075 no_fault(); 3076 if (watched) 3077 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3078 if (error) 3079 return (set_errno(error)); 3080 return (0); 3081 } 3082 3083 /* 3084 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3085 * the blocked lwp resumes and retries to acquire the lock. 3086 */ 3087 int 3088 lwp_mutex_unlock(lwp_mutex_t *lp) 3089 { 3090 proc_t *p = ttoproc(curthread); 3091 lwpchan_t lwpchan; 3092 uchar_t waiters; 3093 volatile int locked = 0; 3094 volatile int watched = 0; 3095 volatile uint8_t type = 0; 3096 label_t ljb; 3097 uint16_t flag; 3098 int error = 0; 3099 3100 if ((caddr_t)lp >= p->p_as->a_userlimit) 3101 return (set_errno(EFAULT)); 3102 3103 if (on_fault(&ljb)) { 3104 if (locked) 3105 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3106 error = EFAULT; 3107 goto out; 3108 } 3109 3110 /* 3111 * Force Copy-on-write if necessary and ensure that the 3112 * synchronization object resides in read/write memory. 3113 * Cause an EFAULT return now if this is not so. 3114 */ 3115 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3116 suword8_noerr(&lp->mutex_type, type); 3117 3118 if (UPIMUTEX(type)) { 3119 no_fault(); 3120 error = lwp_upimutex_unlock(lp, type); 3121 if (error) 3122 return (set_errno(error)); 3123 return (0); 3124 } 3125 3126 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3127 3128 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3129 &lwpchan, LWPCHAN_MPPOOL)) { 3130 error = EFAULT; 3131 goto out; 3132 } 3133 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3134 locked = 1; 3135 if (type & LOCK_ROBUST) { 3136 fuword16_noerr(&lp->mutex_flag, &flag); 3137 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3138 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3139 flag |= LOCK_NOTRECOVERABLE; 3140 suword16_noerr(&lp->mutex_flag, flag); 3141 } 3142 } 3143 if (type & USYNC_PROCESS) 3144 suword32_noerr(&lp->mutex_ownerpid, 0); 3145 ulock_clear(&lp->mutex_lockw); 3146 /* 3147 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3148 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3149 * may fail. If it fails, do not write into the waiter bit. 3150 * The call to lwp_release() might fail due to one of three reasons: 3151 * 3152 * 1. due to the thread which set the waiter bit not actually 3153 * sleeping since it got the lock on the re-try. The waiter 3154 * bit will then be correctly updated by that thread. This 3155 * window may be closed by reading the wait bit again here 3156 * and not calling lwp_release() at all if it is zero. 3157 * 2. the thread which set the waiter bit and went to sleep 3158 * was woken up by a signal. This time, the waiter recomputes 3159 * the wait bit in the return with EINTR code. 3160 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3161 * memory that has been re-used after the lock was dropped. 3162 * In this case, writing into the waiter bit would cause data 3163 * corruption. 3164 */ 3165 fuword8_noerr(&lp->mutex_waiters, &waiters); 3166 if (waiters) { 3167 if ((type & LOCK_ROBUST) && 3168 (flag & LOCK_NOTRECOVERABLE)) { 3169 lwp_release_all(&lwpchan); 3170 suword8_noerr(&lp->mutex_waiters, 0); 3171 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3172 suword8_noerr(&lp->mutex_waiters, waiters); 3173 } 3174 } 3175 3176 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3177 out: 3178 no_fault(); 3179 if (watched) 3180 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3181 if (error) 3182 return (set_errno(error)); 3183 return (0); 3184 } 3185