1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/errno.h> 39 #include <sys/file.h> 40 #include <sys/proc.h> 41 #include <sys/prsystm.h> 42 #include <sys/kmem.h> 43 #include <sys/sobject.h> 44 #include <sys/fault.h> 45 #include <sys/procfs.h> 46 #include <sys/watchpoint.h> 47 #include <sys/time.h> 48 #include <sys/cmn_err.h> 49 #include <sys/machlock.h> 50 #include <sys/debug.h> 51 #include <sys/synch.h> 52 #include <sys/synch32.h> 53 #include <sys/mman.h> 54 #include <sys/class.h> 55 #include <sys/schedctl.h> 56 #include <sys/sleepq.h> 57 #include <sys/policy.h> 58 #include <sys/tnf_probe.h> 59 #include <sys/lwpchan_impl.h> 60 #include <sys/turnstile.h> 61 #include <sys/atomic.h> 62 #include <sys/lwp_timer_impl.h> 63 #include <sys/lwp_upimutex_impl.h> 64 #include <vm/as.h> 65 #include <sys/sdt.h> 66 67 static kthread_t *lwpsobj_owner(caddr_t); 68 static void lwp_unsleep(kthread_t *t); 69 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 70 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 71 72 extern int lwp_cond_signal(lwp_cond_t *cv); 73 74 /* 75 * Maximum number of user prio inheritance locks that can be held by a thread. 76 * Used to limit kmem for each thread. This is a per-thread limit that 77 * can be administered on a system wide basis (using /etc/system). 78 * 79 * Also, when a limit, say maxlwps is added for numbers of lwps within a 80 * process, the per-thread limit automatically becomes a process-wide limit 81 * of maximum number of held upi locks within a process: 82 * maxheldupimx = maxnestupimx * maxlwps; 83 */ 84 static uint32_t maxnestupimx = 2000; 85 86 /* 87 * The sobj_ops vector exports a set of functions needed when a thread 88 * is asleep on a synchronization object of this type. 89 */ 90 static sobj_ops_t lwp_sobj_ops = { 91 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 92 }; 93 94 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 95 96 static sobj_ops_t lwp_sobj_pi_ops = { 97 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 98 turnstile_change_pri 99 }; 100 101 static sleepq_head_t lwpsleepq[NSLEEPQ]; 102 upib_t upimutextab[UPIMUTEX_TABSIZE]; 103 104 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 105 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 106 107 /* 108 * We know that both lc_wchan and lc_wchan0 are addresses that most 109 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 110 * 'pool' is either 0 or 1. 111 */ 112 #define LWPCHAN_LOCK_HASH(X, pool) \ 113 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 114 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 115 116 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 117 118 /* 119 * Is this a POSIX threads user-level lock requiring priority inheritance? 120 */ 121 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 122 123 static sleepq_head_t * 124 lwpsqhash(lwpchan_t *lwpchan) 125 { 126 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 127 return (&lwpsleepq[SQHASHINDEX(x)]); 128 } 129 130 /* 131 * Lock an lwpchan. 132 * Keep this in sync with lwpchan_unlock(), below. 133 */ 134 static void 135 lwpchan_lock(lwpchan_t *lwpchan, int pool) 136 { 137 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 138 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 139 } 140 141 /* 142 * Unlock an lwpchan. 143 * Keep this in sync with lwpchan_lock(), above. 144 */ 145 static void 146 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 147 { 148 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 149 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 150 } 151 152 /* 153 * Delete mappings from the lwpchan cache for pages that are being 154 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 155 * all mappings within the range are deleted from the lwpchan cache. 156 */ 157 void 158 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 159 { 160 lwpchan_data_t *lcp; 161 lwpchan_hashbucket_t *hashbucket; 162 lwpchan_hashbucket_t *endbucket; 163 lwpchan_entry_t *ent; 164 lwpchan_entry_t **prev; 165 caddr_t addr; 166 167 mutex_enter(&p->p_lcp_lock); 168 lcp = p->p_lcp; 169 hashbucket = lcp->lwpchan_cache; 170 endbucket = hashbucket + lcp->lwpchan_size; 171 for (; hashbucket < endbucket; hashbucket++) { 172 if (hashbucket->lwpchan_chain == NULL) 173 continue; 174 mutex_enter(&hashbucket->lwpchan_lock); 175 prev = &hashbucket->lwpchan_chain; 176 /* check entire chain */ 177 while ((ent = *prev) != NULL) { 178 addr = ent->lwpchan_addr; 179 if (start <= addr && addr < end) { 180 *prev = ent->lwpchan_next; 181 /* 182 * We do this only for the obsolete type 183 * USYNC_PROCESS_ROBUST. Otherwise robust 184 * locks do not draw ELOCKUNMAPPED or 185 * EOWNERDEAD due to being unmapped. 186 */ 187 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 188 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 189 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 190 kmem_free(ent, sizeof (*ent)); 191 atomic_add_32(&lcp->lwpchan_entries, -1); 192 } else { 193 prev = &ent->lwpchan_next; 194 } 195 } 196 mutex_exit(&hashbucket->lwpchan_lock); 197 } 198 mutex_exit(&p->p_lcp_lock); 199 } 200 201 /* 202 * Given an lwpchan cache pointer and a process virtual address, 203 * return a pointer to the corresponding lwpchan hash bucket. 204 */ 205 static lwpchan_hashbucket_t * 206 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 207 { 208 uint_t i; 209 210 /* 211 * All user-level sync object addresses are 8-byte aligned. 212 * Ignore the lowest 3 bits of the address and use the 213 * higher-order 2*lwpchan_bits bits for the hash index. 214 */ 215 addr >>= 3; 216 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 217 return (lcp->lwpchan_cache + i); 218 } 219 220 /* 221 * (Re)allocate the per-process lwpchan cache. 222 */ 223 static void 224 lwpchan_alloc_cache(proc_t *p, uint_t bits) 225 { 226 lwpchan_data_t *lcp; 227 lwpchan_data_t *old_lcp; 228 lwpchan_hashbucket_t *hashbucket; 229 lwpchan_hashbucket_t *endbucket; 230 lwpchan_hashbucket_t *newbucket; 231 lwpchan_entry_t *ent; 232 lwpchan_entry_t *next; 233 uint_t count; 234 235 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 236 237 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 238 lcp->lwpchan_bits = bits; 239 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 240 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 241 lcp->lwpchan_entries = 0; 242 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 243 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 244 lcp->lwpchan_next_data = NULL; 245 246 mutex_enter(&p->p_lcp_lock); 247 if ((old_lcp = p->p_lcp) != NULL) { 248 if (old_lcp->lwpchan_bits >= bits) { 249 /* someone beat us to it */ 250 mutex_exit(&p->p_lcp_lock); 251 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 252 sizeof (lwpchan_hashbucket_t)); 253 kmem_free(lcp, sizeof (lwpchan_data_t)); 254 return; 255 } 256 /* 257 * Acquire all of the old hash table locks. 258 */ 259 hashbucket = old_lcp->lwpchan_cache; 260 endbucket = hashbucket + old_lcp->lwpchan_size; 261 for (; hashbucket < endbucket; hashbucket++) 262 mutex_enter(&hashbucket->lwpchan_lock); 263 /* 264 * Move all of the old hash table entries to the 265 * new hash table. The new hash table has not yet 266 * been installed so we don't need any of its locks. 267 */ 268 count = 0; 269 hashbucket = old_lcp->lwpchan_cache; 270 for (; hashbucket < endbucket; hashbucket++) { 271 ent = hashbucket->lwpchan_chain; 272 while (ent != NULL) { 273 next = ent->lwpchan_next; 274 newbucket = lwpchan_bucket(lcp, 275 (uintptr_t)ent->lwpchan_addr); 276 ent->lwpchan_next = newbucket->lwpchan_chain; 277 newbucket->lwpchan_chain = ent; 278 ent = next; 279 count++; 280 } 281 hashbucket->lwpchan_chain = NULL; 282 } 283 lcp->lwpchan_entries = count; 284 } 285 286 /* 287 * Retire the old hash table. We can't actually kmem_free() it 288 * now because someone may still have a pointer to it. Instead, 289 * we link it onto the new hash table's list of retired hash tables. 290 * The new hash table is double the size of the previous one, so 291 * the total size of all retired hash tables is less than the size 292 * of the new one. exit() and exec() free the retired hash tables 293 * (see lwpchan_destroy_cache(), below). 294 */ 295 lcp->lwpchan_next_data = old_lcp; 296 297 /* 298 * As soon as we store the new lcp, future locking operations will 299 * use it. Therefore, we must ensure that all the state we've just 300 * established reaches global visibility before the new lcp does. 301 */ 302 membar_producer(); 303 p->p_lcp = lcp; 304 305 if (old_lcp != NULL) { 306 /* 307 * Release all of the old hash table locks. 308 */ 309 hashbucket = old_lcp->lwpchan_cache; 310 for (; hashbucket < endbucket; hashbucket++) 311 mutex_exit(&hashbucket->lwpchan_lock); 312 } 313 mutex_exit(&p->p_lcp_lock); 314 } 315 316 /* 317 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 318 * Called when the process exits or execs. All lwps except one have 319 * exited so we need no locks here. 320 */ 321 void 322 lwpchan_destroy_cache(int exec) 323 { 324 proc_t *p = curproc; 325 lwpchan_hashbucket_t *hashbucket; 326 lwpchan_hashbucket_t *endbucket; 327 lwpchan_data_t *lcp; 328 lwpchan_entry_t *ent; 329 lwpchan_entry_t *next; 330 uint16_t lockflg; 331 332 lcp = p->p_lcp; 333 p->p_lcp = NULL; 334 335 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 336 hashbucket = lcp->lwpchan_cache; 337 endbucket = hashbucket + lcp->lwpchan_size; 338 for (; hashbucket < endbucket; hashbucket++) { 339 ent = hashbucket->lwpchan_chain; 340 hashbucket->lwpchan_chain = NULL; 341 while (ent != NULL) { 342 next = ent->lwpchan_next; 343 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 344 (ent->lwpchan_type & LOCK_ROBUST)) 345 lwp_mutex_cleanup(ent, lockflg); 346 kmem_free(ent, sizeof (*ent)); 347 ent = next; 348 } 349 } 350 351 while (lcp != NULL) { 352 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 353 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 354 sizeof (lwpchan_hashbucket_t)); 355 kmem_free(lcp, sizeof (lwpchan_data_t)); 356 lcp = next_lcp; 357 } 358 } 359 360 /* 361 * Return zero when there is an entry in the lwpchan cache for the 362 * given process virtual address and non-zero when there is not. 363 * The returned non-zero value is the current length of the 364 * hash chain plus one. The caller holds the hash bucket lock. 365 */ 366 static uint_t 367 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 368 lwpchan_hashbucket_t *hashbucket) 369 { 370 lwpchan_entry_t *ent; 371 uint_t count = 1; 372 373 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 374 if (ent->lwpchan_addr == addr) { 375 if (ent->lwpchan_type != type || 376 ent->lwpchan_pool != pool) { 377 /* 378 * This shouldn't happen, but might if the 379 * process reuses its memory for different 380 * types of sync objects. We test first 381 * to avoid grabbing the memory cache line. 382 */ 383 ent->lwpchan_type = (uint16_t)type; 384 ent->lwpchan_pool = (uint16_t)pool; 385 } 386 *lwpchan = ent->lwpchan_lwpchan; 387 return (0); 388 } 389 count++; 390 } 391 return (count); 392 } 393 394 /* 395 * Return the cached lwpchan mapping if cached, otherwise insert 396 * a virtual address to lwpchan mapping into the cache. 397 */ 398 static int 399 lwpchan_get_mapping(struct as *as, caddr_t addr, 400 int type, lwpchan_t *lwpchan, int pool) 401 { 402 proc_t *p = curproc; 403 lwpchan_data_t *lcp; 404 lwpchan_hashbucket_t *hashbucket; 405 lwpchan_entry_t *ent; 406 memid_t memid; 407 uint_t count; 408 uint_t bits; 409 410 top: 411 /* initialize the lwpchan cache, if necesary */ 412 if ((lcp = p->p_lcp) == NULL) { 413 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 414 goto top; 415 } 416 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 417 mutex_enter(&hashbucket->lwpchan_lock); 418 if (lcp != p->p_lcp) { 419 /* someone resized the lwpchan cache; start over */ 420 mutex_exit(&hashbucket->lwpchan_lock); 421 goto top; 422 } 423 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 424 /* it's in the cache */ 425 mutex_exit(&hashbucket->lwpchan_lock); 426 return (1); 427 } 428 mutex_exit(&hashbucket->lwpchan_lock); 429 if (as_getmemid(as, addr, &memid) != 0) 430 return (0); 431 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 432 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 433 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 434 mutex_enter(&hashbucket->lwpchan_lock); 435 if (lcp != p->p_lcp) { 436 /* someone resized the lwpchan cache; start over */ 437 mutex_exit(&hashbucket->lwpchan_lock); 438 kmem_free(ent, sizeof (*ent)); 439 goto top; 440 } 441 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 442 if (count == 0) { 443 /* someone else added this entry to the cache */ 444 mutex_exit(&hashbucket->lwpchan_lock); 445 kmem_free(ent, sizeof (*ent)); 446 return (1); 447 } 448 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 449 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 450 /* hash chain too long; reallocate the hash table */ 451 mutex_exit(&hashbucket->lwpchan_lock); 452 kmem_free(ent, sizeof (*ent)); 453 lwpchan_alloc_cache(p, bits + 1); 454 goto top; 455 } 456 ent->lwpchan_addr = addr; 457 ent->lwpchan_type = (uint16_t)type; 458 ent->lwpchan_pool = (uint16_t)pool; 459 ent->lwpchan_lwpchan = *lwpchan; 460 ent->lwpchan_next = hashbucket->lwpchan_chain; 461 hashbucket->lwpchan_chain = ent; 462 atomic_add_32(&lcp->lwpchan_entries, 1); 463 mutex_exit(&hashbucket->lwpchan_lock); 464 return (1); 465 } 466 467 /* 468 * Return a unique pair of identifiers that corresponds to a 469 * synchronization object's virtual address. Process-shared 470 * sync objects usually get vnode/offset from as_getmemid(). 471 */ 472 static int 473 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 474 { 475 /* 476 * If the lwp synch object is defined to be process-private, 477 * we just make the first field of the lwpchan be 'as' and 478 * the second field be the synch object's virtual address. 479 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 480 * The lwpchan cache is used only for process-shared objects. 481 */ 482 if (!(type & USYNC_PROCESS)) { 483 lwpchan->lc_wchan0 = (caddr_t)as; 484 lwpchan->lc_wchan = addr; 485 return (1); 486 } 487 488 return (lwpchan_get_mapping(as, addr, type, lwpchan, pool)); 489 } 490 491 static void 492 lwp_block(lwpchan_t *lwpchan) 493 { 494 kthread_t *t = curthread; 495 klwp_t *lwp = ttolwp(t); 496 sleepq_head_t *sqh; 497 498 thread_lock(t); 499 t->t_flag |= T_WAKEABLE; 500 t->t_lwpchan = *lwpchan; 501 t->t_sobj_ops = &lwp_sobj_ops; 502 t->t_release = 0; 503 sqh = lwpsqhash(lwpchan); 504 disp_lock_enter_high(&sqh->sq_lock); 505 CL_SLEEP(t); 506 DTRACE_SCHED(sleep); 507 THREAD_SLEEP(t, &sqh->sq_lock); 508 sleepq_insert(&sqh->sq_queue, t); 509 thread_unlock(t); 510 lwp->lwp_asleep = 1; 511 lwp->lwp_sysabort = 0; 512 lwp->lwp_ru.nvcsw++; 513 (void) new_mstate(curthread, LMS_SLEEP); 514 } 515 516 static kthread_t * 517 lwpsobj_pi_owner(upimutex_t *up) 518 { 519 return (up->upi_owner); 520 } 521 522 static struct upimutex * 523 upi_get(upib_t *upibp, lwpchan_t *lcp) 524 { 525 struct upimutex *upip; 526 527 for (upip = upibp->upib_first; upip != NULL; 528 upip = upip->upi_nextchain) { 529 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 530 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 531 break; 532 } 533 return (upip); 534 } 535 536 static void 537 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 538 { 539 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 540 541 /* 542 * Insert upimutex at front of list. Maybe a bit unfair 543 * but assume that not many lwpchans hash to the same 544 * upimutextab bucket, i.e. the list of upimutexes from 545 * upib_first is not too long. 546 */ 547 upimutex->upi_nextchain = upibp->upib_first; 548 upibp->upib_first = upimutex; 549 } 550 551 static void 552 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 553 { 554 struct upimutex **prev; 555 556 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 557 558 prev = &upibp->upib_first; 559 while (*prev != upimutex) { 560 prev = &(*prev)->upi_nextchain; 561 } 562 *prev = upimutex->upi_nextchain; 563 upimutex->upi_nextchain = NULL; 564 } 565 566 /* 567 * Add upimutex to chain of upimutexes held by curthread. 568 * Returns number of upimutexes held by curthread. 569 */ 570 static uint32_t 571 upi_mylist_add(struct upimutex *upimutex) 572 { 573 kthread_t *t = curthread; 574 575 /* 576 * Insert upimutex at front of list of upimutexes owned by t. This 577 * would match typical LIFO order in which nested locks are acquired 578 * and released. 579 */ 580 upimutex->upi_nextowned = t->t_upimutex; 581 t->t_upimutex = upimutex; 582 t->t_nupinest++; 583 ASSERT(t->t_nupinest > 0); 584 return (t->t_nupinest); 585 } 586 587 /* 588 * Delete upimutex from list of upimutexes owned by curthread. 589 */ 590 static void 591 upi_mylist_del(struct upimutex *upimutex) 592 { 593 kthread_t *t = curthread; 594 struct upimutex **prev; 595 596 /* 597 * Since the order in which nested locks are acquired and released, 598 * is typically LIFO, and typical nesting levels are not too deep, the 599 * following should not be expensive in the general case. 600 */ 601 prev = &t->t_upimutex; 602 while (*prev != upimutex) { 603 prev = &(*prev)->upi_nextowned; 604 } 605 *prev = upimutex->upi_nextowned; 606 upimutex->upi_nextowned = NULL; 607 ASSERT(t->t_nupinest > 0); 608 t->t_nupinest--; 609 } 610 611 /* 612 * Returns true if upimutex is owned. Should be called only when upim points 613 * to kmem which cannot disappear from underneath. 614 */ 615 static int 616 upi_owned(upimutex_t *upim) 617 { 618 return (upim->upi_owner == curthread); 619 } 620 621 /* 622 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 623 */ 624 static struct upimutex * 625 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 626 { 627 lwpchan_t lwpchan; 628 upib_t *upibp; 629 struct upimutex *upimutex; 630 631 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 632 &lwpchan, LWPCHAN_MPPOOL)) 633 return (NULL); 634 635 upibp = &UPI_CHAIN(lwpchan); 636 mutex_enter(&upibp->upib_lock); 637 upimutex = upi_get(upibp, &lwpchan); 638 if (upimutex == NULL || upimutex->upi_owner != curthread) { 639 mutex_exit(&upibp->upib_lock); 640 return (NULL); 641 } 642 mutex_exit(&upibp->upib_lock); 643 return (upimutex); 644 } 645 646 /* 647 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 648 * no lock hand-off occurrs. 649 */ 650 static void 651 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 652 { 653 turnstile_t *ts; 654 upib_t *upibp; 655 kthread_t *newowner; 656 657 upi_mylist_del(upimutex); 658 upibp = upimutex->upi_upibp; 659 mutex_enter(&upibp->upib_lock); 660 if (upimutex->upi_waiter != 0) { /* if waiters */ 661 ts = turnstile_lookup(upimutex); 662 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 663 /* hand-off lock to highest prio waiter */ 664 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 665 upimutex->upi_owner = newowner; 666 if (ts->ts_waiters == 1) 667 upimutex->upi_waiter = 0; 668 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 669 mutex_exit(&upibp->upib_lock); 670 return; 671 } else if (ts != NULL) { 672 /* LOCK_NOTRECOVERABLE: wakeup all */ 673 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 674 } else { 675 /* 676 * Misleading w bit. Waiters might have been 677 * interrupted. No need to clear the w bit (upimutex 678 * will soon be freed). Re-calculate PI from existing 679 * waiters. 680 */ 681 turnstile_exit(upimutex); 682 turnstile_pi_recalc(); 683 } 684 } 685 /* 686 * no waiters, or LOCK_NOTRECOVERABLE. 687 * remove from the bucket chain of upi mutexes. 688 * de-allocate kernel memory (upimutex). 689 */ 690 upi_chain_del(upimutex->upi_upibp, upimutex); 691 mutex_exit(&upibp->upib_lock); 692 kmem_free(upimutex, sizeof (upimutex_t)); 693 } 694 695 static int 696 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 697 { 698 label_t ljb; 699 int error = 0; 700 lwpchan_t lwpchan; 701 uint16_t flag; 702 upib_t *upibp; 703 volatile struct upimutex *upimutex = NULL; 704 turnstile_t *ts; 705 uint32_t nupinest; 706 volatile int upilocked = 0; 707 708 if (on_fault(&ljb)) { 709 if (upilocked) 710 upimutex_unlock((upimutex_t *)upimutex, 0); 711 error = EFAULT; 712 goto out; 713 } 714 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 715 &lwpchan, LWPCHAN_MPPOOL)) { 716 error = EFAULT; 717 goto out; 718 } 719 upibp = &UPI_CHAIN(lwpchan); 720 retry: 721 mutex_enter(&upibp->upib_lock); 722 upimutex = upi_get(upibp, &lwpchan); 723 if (upimutex == NULL) { 724 /* lock available since lwpchan has no upimutex */ 725 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 726 upi_chain_add(upibp, (upimutex_t *)upimutex); 727 upimutex->upi_owner = curthread; /* grab lock */ 728 upimutex->upi_upibp = upibp; 729 upimutex->upi_vaddr = lp; 730 upimutex->upi_lwpchan = lwpchan; 731 mutex_exit(&upibp->upib_lock); 732 nupinest = upi_mylist_add((upimutex_t *)upimutex); 733 upilocked = 1; 734 fuword16_noerr(&lp->mutex_flag, &flag); 735 if (nupinest > maxnestupimx && 736 secpolicy_resource(CRED()) != 0) { 737 upimutex_unlock((upimutex_t *)upimutex, flag); 738 error = ENOMEM; 739 goto out; 740 } 741 if (flag & LOCK_NOTRECOVERABLE) { 742 /* 743 * Since the setting of LOCK_NOTRECOVERABLE 744 * was done under the high-level upi mutex, 745 * in lwp_upimutex_unlock(), this flag needs to 746 * be checked while holding the upi mutex. 747 * If set, this thread should return without 748 * the lock held, and with the right error code. 749 */ 750 upimutex_unlock((upimutex_t *)upimutex, flag); 751 upilocked = 0; 752 error = ENOTRECOVERABLE; 753 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 754 if (flag & LOCK_OWNERDEAD) 755 error = EOWNERDEAD; 756 else if (type & USYNC_PROCESS_ROBUST) 757 error = ELOCKUNMAPPED; 758 else 759 error = EOWNERDEAD; 760 } 761 goto out; 762 } 763 /* 764 * If a upimutex object exists, it must have an owner. 765 * This is due to lock hand-off, and release of upimutex when no 766 * waiters are present at unlock time, 767 */ 768 ASSERT(upimutex->upi_owner != NULL); 769 if (upimutex->upi_owner == curthread) { 770 /* 771 * The user wrapper can check if the mutex type is 772 * ERRORCHECK: if not, it should stall at user-level. 773 * If so, it should return the error code. 774 */ 775 mutex_exit(&upibp->upib_lock); 776 error = EDEADLK; 777 goto out; 778 } 779 if (try == UPIMUTEX_TRY) { 780 mutex_exit(&upibp->upib_lock); 781 error = EBUSY; 782 goto out; 783 } 784 /* 785 * Block for the lock. 786 * Put the lwp in an orderly state for debugging. 787 * Calling prstop() has to be done here, and not in 788 * turnstile_block(), since the preceding call to 789 * turnstile_lookup() raises the PIL to a level 790 * at which calls to prstop() should not be made. 791 */ 792 if ((error = lwptp->lwpt_time_error) != 0) { 793 /* 794 * The SUSV3 Posix spec is very clear that we 795 * should get no error from validating the 796 * timer until we would actually sleep. 797 */ 798 mutex_exit(&upibp->upib_lock); 799 goto out; 800 } 801 prstop(PR_REQUESTED, 0); 802 if (lwptp->lwpt_tsp != NULL) { 803 /* 804 * Unlike the protocol for other lwp timedwait operations, 805 * we must drop t_delay_lock before going to sleep in 806 * turnstile_block() for a upi mutex. 807 * See the comments below and in turnstile.c 808 */ 809 mutex_enter(&curthread->t_delay_lock); 810 (void) lwp_timer_enqueue(lwptp); 811 mutex_exit(&curthread->t_delay_lock); 812 } 813 /* 814 * Now, set the waiter bit and block for the lock in turnstile_block(). 815 * No need to preserve the previous wbit since a lock try is not 816 * attempted after setting the wait bit. Wait bit is set under 817 * the upib_lock, which is not released until the turnstile lock 818 * is acquired. Say, the upimutex is L: 819 * 820 * 1. upib_lock is held so the waiter does not have to retry L after 821 * setting the wait bit: since the owner has to grab the upib_lock 822 * to unlock L, it will certainly see the wait bit set. 823 * 2. upib_lock is not released until the turnstile lock is acquired. 824 * This is the key to preventing a missed wake-up. Otherwise, the 825 * owner could acquire the upib_lock, and the tc_lock, to call 826 * turnstile_wakeup(). All this, before the waiter gets tc_lock 827 * to sleep in turnstile_block(). turnstile_wakeup() will then not 828 * find this waiter, resulting in the missed wakeup. 829 * 3. The upib_lock, being a kernel mutex, cannot be released while 830 * holding the tc_lock (since mutex_exit() could need to acquire 831 * the same tc_lock)...and so is held when calling turnstile_block(). 832 * The address of upib_lock is passed to turnstile_block() which 833 * releases it after releasing all turnstile locks, and before going 834 * to sleep in swtch(). 835 * 4. The waiter value cannot be a count of waiters, because a waiter 836 * can be interrupted. The interrupt occurs under the tc_lock, at 837 * which point, the upib_lock cannot be locked, to decrement waiter 838 * count. So, just treat the waiter state as a bit, not a count. 839 */ 840 ts = turnstile_lookup((upimutex_t *)upimutex); 841 upimutex->upi_waiter = 1; 842 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 843 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 844 /* 845 * Hand-off implies that we wakeup holding the lock, except when: 846 * - deadlock is detected 847 * - lock is not recoverable 848 * - we got an interrupt or timeout 849 * If we wake up due to an interrupt or timeout, we may 850 * or may not be holding the lock due to mutex hand-off. 851 * Use lwp_upimutex_owned() to check if we do hold the lock. 852 */ 853 if (error != 0) { 854 if ((error == EINTR || error == ETIME) && 855 (upimutex = lwp_upimutex_owned(lp, type))) { 856 /* 857 * Unlock and return - the re-startable syscall will 858 * try the lock again if we got EINTR. 859 */ 860 (void) upi_mylist_add((upimutex_t *)upimutex); 861 upimutex_unlock((upimutex_t *)upimutex, 0); 862 } 863 /* 864 * The only other possible error is EDEADLK. If so, upimutex 865 * is valid, since its owner is deadlocked with curthread. 866 */ 867 ASSERT(error == EINTR || error == ETIME || 868 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 869 ASSERT(!lwp_upimutex_owned(lp, type)); 870 goto out; 871 } 872 if (lwp_upimutex_owned(lp, type)) { 873 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 874 nupinest = upi_mylist_add((upimutex_t *)upimutex); 875 upilocked = 1; 876 } 877 /* 878 * Now, need to read the user-level lp->mutex_flag to do the following: 879 * 880 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 881 * should be returned. 882 * - if lock isn't held, check if ENOTRECOVERABLE should 883 * be returned. 884 * 885 * Now, either lp->mutex_flag is readable or it's not. If not 886 * readable, the on_fault path will cause a return with EFAULT 887 * as it should. If it is readable, the state of the flag 888 * encodes the robustness state of the lock: 889 * 890 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 891 * or LOCK_UNMAPPED setting will influence the return code 892 * appropriately. If the upimutex is not locked here, this 893 * could be due to a spurious wake-up or a NOTRECOVERABLE 894 * event. The flag's setting can be used to distinguish 895 * between these two events. 896 */ 897 fuword16_noerr(&lp->mutex_flag, &flag); 898 if (upilocked) { 899 /* 900 * If the thread wakes up from turnstile_block with the lock 901 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 902 * since it would not have been handed-off the lock. 903 * So, no need to check for this case. 904 */ 905 if (nupinest > maxnestupimx && 906 secpolicy_resource(CRED()) != 0) { 907 upimutex_unlock((upimutex_t *)upimutex, flag); 908 upilocked = 0; 909 error = ENOMEM; 910 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 911 if (flag & LOCK_OWNERDEAD) 912 error = EOWNERDEAD; 913 else if (type & USYNC_PROCESS_ROBUST) 914 error = ELOCKUNMAPPED; 915 else 916 error = EOWNERDEAD; 917 } 918 } else { 919 /* 920 * Wake-up without the upimutex held. Either this is a 921 * spurious wake-up (due to signals, forkall(), whatever), or 922 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 923 * of the mutex flag can be used to distinguish between the 924 * two events. 925 */ 926 if (flag & LOCK_NOTRECOVERABLE) { 927 error = ENOTRECOVERABLE; 928 } else { 929 /* 930 * Here, the flag could be set to LOCK_OWNERDEAD or 931 * not. In both cases, this is a spurious wakeup, 932 * since the upi lock is not held, but the thread 933 * has returned from turnstile_block(). 934 * 935 * The user flag could be LOCK_OWNERDEAD if, at the 936 * same time as curthread having been woken up 937 * spuriously, the owner (say Tdead) has died, marked 938 * the mutex flag accordingly, and handed off the lock 939 * to some other waiter (say Tnew). curthread just 940 * happened to read the flag while Tnew has yet to deal 941 * with the owner-dead event. 942 * 943 * In this event, curthread should retry the lock. 944 * If Tnew is able to cleanup the lock, curthread 945 * will eventually get the lock with a zero error code, 946 * If Tnew is unable to cleanup, its eventual call to 947 * unlock the lock will result in the mutex flag being 948 * set to LOCK_NOTRECOVERABLE, and the wake-up of 949 * all waiters, including curthread, which will then 950 * eventually return ENOTRECOVERABLE due to the above 951 * check. 952 * 953 * Of course, if the user-flag is not set with 954 * LOCK_OWNERDEAD, retrying is the thing to do, since 955 * this is definitely a spurious wakeup. 956 */ 957 goto retry; 958 } 959 } 960 961 out: 962 no_fault(); 963 return (error); 964 } 965 966 967 static int 968 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 969 { 970 label_t ljb; 971 int error = 0; 972 lwpchan_t lwpchan; 973 uint16_t flag; 974 upib_t *upibp; 975 volatile struct upimutex *upimutex = NULL; 976 volatile int upilocked = 0; 977 978 if (on_fault(&ljb)) { 979 if (upilocked) 980 upimutex_unlock((upimutex_t *)upimutex, 0); 981 error = EFAULT; 982 goto out; 983 } 984 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 985 &lwpchan, LWPCHAN_MPPOOL)) { 986 error = EFAULT; 987 goto out; 988 } 989 upibp = &UPI_CHAIN(lwpchan); 990 mutex_enter(&upibp->upib_lock); 991 upimutex = upi_get(upibp, &lwpchan); 992 /* 993 * If the lock is not held, or the owner is not curthread, return 994 * error. The user-level wrapper can return this error or stall, 995 * depending on whether mutex is of ERRORCHECK type or not. 996 */ 997 if (upimutex == NULL || upimutex->upi_owner != curthread) { 998 mutex_exit(&upibp->upib_lock); 999 error = EPERM; 1000 goto out; 1001 } 1002 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1003 upilocked = 1; 1004 fuword16_noerr(&lp->mutex_flag, &flag); 1005 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1006 /* 1007 * transition mutex to the LOCK_NOTRECOVERABLE state. 1008 */ 1009 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1010 flag |= LOCK_NOTRECOVERABLE; 1011 suword16_noerr(&lp->mutex_flag, flag); 1012 } 1013 if (type & USYNC_PROCESS) 1014 suword32_noerr(&lp->mutex_ownerpid, 0); 1015 upimutex_unlock((upimutex_t *)upimutex, flag); 1016 upilocked = 0; 1017 out: 1018 no_fault(); 1019 return (error); 1020 } 1021 1022 /* 1023 * Clear the contents of a user-level mutex; return the flags. 1024 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1025 */ 1026 static uint16_t 1027 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1028 { 1029 uint16_t flag; 1030 1031 fuword16_noerr(&lp->mutex_flag, &flag); 1032 if ((flag & 1033 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1034 flag |= lockflg; 1035 suword16_noerr(&lp->mutex_flag, flag); 1036 } 1037 suword32_noerr((uint32_t *)&lp->mutex_owner, 0); 1038 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, 0); 1039 suword32_noerr(&lp->mutex_ownerpid, 0); 1040 suword8_noerr(&lp->mutex_rcount, 0); 1041 1042 return (flag); 1043 } 1044 1045 /* 1046 * Mark user mutex state, corresponding to kernel upimutex, 1047 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1048 */ 1049 static int 1050 upi_dead(upimutex_t *upip, uint16_t lockflg) 1051 { 1052 label_t ljb; 1053 int error = 0; 1054 lwp_mutex_t *lp; 1055 1056 if (on_fault(&ljb)) { 1057 error = EFAULT; 1058 goto out; 1059 } 1060 1061 lp = upip->upi_vaddr; 1062 (void) lwp_clear_mutex(lp, lockflg); 1063 suword8_noerr(&lp->mutex_lockw, 0); 1064 out: 1065 no_fault(); 1066 return (error); 1067 } 1068 1069 /* 1070 * Unlock all upimutexes held by curthread, since curthread is dying. 1071 * For each upimutex, attempt to mark its corresponding user mutex object as 1072 * dead. 1073 */ 1074 void 1075 upimutex_cleanup() 1076 { 1077 kthread_t *t = curthread; 1078 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1079 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1080 struct upimutex *upip; 1081 1082 while ((upip = t->t_upimutex) != NULL) { 1083 if (upi_dead(upip, lockflg) != 0) { 1084 /* 1085 * If the user object associated with this upimutex is 1086 * unmapped, unlock upimutex with the 1087 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1088 * woken up. Since user object is unmapped, it could 1089 * not be marked as dead or notrecoverable. 1090 * The waiters will now all wake up and return 1091 * ENOTRECOVERABLE, since they would find that the lock 1092 * has not been handed-off to them. 1093 * See lwp_upimutex_lock(). 1094 */ 1095 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1096 } else { 1097 /* 1098 * The user object has been updated as dead. 1099 * Unlock the upimutex: if no waiters, upip kmem will 1100 * be freed. If there is a waiter, the lock will be 1101 * handed off. If exit() is in progress, each existing 1102 * waiter will successively get the lock, as owners 1103 * die, and each new owner will call this routine as 1104 * it dies. The last owner will free kmem, since 1105 * it will find the upimutex has no waiters. So, 1106 * eventually, the kmem is guaranteed to be freed. 1107 */ 1108 upimutex_unlock(upip, 0); 1109 } 1110 /* 1111 * Note that the call to upimutex_unlock() above will delete 1112 * upimutex from the t_upimutexes chain. And so the 1113 * while loop will eventually terminate. 1114 */ 1115 } 1116 } 1117 1118 int 1119 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1120 { 1121 kthread_t *t = curthread; 1122 klwp_t *lwp = ttolwp(t); 1123 proc_t *p = ttoproc(t); 1124 lwp_timer_t lwpt; 1125 caddr_t timedwait; 1126 int error = 0; 1127 int time_error; 1128 clock_t tim = -1; 1129 uchar_t waiters; 1130 volatile int locked = 0; 1131 volatile int watched = 0; 1132 label_t ljb; 1133 volatile uint8_t type = 0; 1134 lwpchan_t lwpchan; 1135 sleepq_head_t *sqh; 1136 static int iswanted(); 1137 uint16_t flag; 1138 int imm_timeout = 0; 1139 1140 if ((caddr_t)lp >= p->p_as->a_userlimit) 1141 return (set_errno(EFAULT)); 1142 1143 timedwait = (caddr_t)tsp; 1144 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1145 lwpt.lwpt_imm_timeout) { 1146 imm_timeout = 1; 1147 timedwait = NULL; 1148 } 1149 1150 /* 1151 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1152 * this micro state is really a run state. If the thread indeed blocks, 1153 * this state becomes valid. If not, the state is converted back to 1154 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1155 * when blocking. 1156 */ 1157 (void) new_mstate(t, LMS_USER_LOCK); 1158 if (on_fault(&ljb)) { 1159 if (locked) 1160 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1161 error = EFAULT; 1162 goto out; 1163 } 1164 /* 1165 * Force Copy-on-write if necessary and ensure that the 1166 * synchronization object resides in read/write memory. 1167 * Cause an EFAULT return now if this is not so. 1168 */ 1169 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1170 suword8_noerr(&lp->mutex_type, type); 1171 if (UPIMUTEX(type)) { 1172 no_fault(); 1173 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1174 if ((type & USYNC_PROCESS) && 1175 (error == 0 || 1176 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 1177 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1178 if (tsp && !time_error) /* copyout the residual time left */ 1179 error = lwp_timer_copyout(&lwpt, error); 1180 if (error) 1181 return (set_errno(error)); 1182 return (0); 1183 } 1184 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1185 &lwpchan, LWPCHAN_MPPOOL)) { 1186 error = EFAULT; 1187 goto out; 1188 } 1189 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1190 locked = 1; 1191 if (type & LOCK_ROBUST) { 1192 fuword16_noerr(&lp->mutex_flag, &flag); 1193 if (flag & LOCK_NOTRECOVERABLE) { 1194 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1195 error = ENOTRECOVERABLE; 1196 goto out; 1197 } 1198 } 1199 fuword8_noerr(&lp->mutex_waiters, &waiters); 1200 suword8_noerr(&lp->mutex_waiters, 1); 1201 1202 /* 1203 * If watchpoints are set, they need to be restored, since 1204 * atomic accesses of memory such as the call to ulock_try() 1205 * below cannot be watched. 1206 */ 1207 1208 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1209 1210 while (!ulock_try(&lp->mutex_lockw)) { 1211 if (time_error) { 1212 /* 1213 * The SUSV3 Posix spec is very clear that we 1214 * should get no error from validating the 1215 * timer until we would actually sleep. 1216 */ 1217 error = time_error; 1218 break; 1219 } 1220 1221 if (watched) { 1222 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1223 watched = 0; 1224 } 1225 1226 /* 1227 * Put the lwp in an orderly state for debugging. 1228 */ 1229 prstop(PR_REQUESTED, 0); 1230 if (timedwait) { 1231 /* 1232 * If we successfully queue the timeout, 1233 * then don't drop t_delay_lock until 1234 * we are on the sleep queue (below). 1235 */ 1236 mutex_enter(&t->t_delay_lock); 1237 if (lwp_timer_enqueue(&lwpt) != 0) { 1238 mutex_exit(&t->t_delay_lock); 1239 imm_timeout = 1; 1240 timedwait = NULL; 1241 } 1242 } 1243 lwp_block(&lwpchan); 1244 /* 1245 * Nothing should happen to cause the lwp to go to 1246 * sleep again until after it returns from swtch(). 1247 */ 1248 if (timedwait) 1249 mutex_exit(&t->t_delay_lock); 1250 locked = 0; 1251 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1252 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1253 setrun(t); 1254 swtch(); 1255 t->t_flag &= ~T_WAKEABLE; 1256 if (timedwait) 1257 tim = lwp_timer_dequeue(&lwpt); 1258 setallwatch(); 1259 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1260 error = EINTR; 1261 else if (imm_timeout || (timedwait && tim == -1)) 1262 error = ETIME; 1263 if (error) { 1264 lwp->lwp_asleep = 0; 1265 lwp->lwp_sysabort = 0; 1266 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1267 S_WRITE); 1268 1269 /* 1270 * Need to re-compute waiters bit. The waiters field in 1271 * the lock is not reliable. Either of two things could 1272 * have occurred: no lwp may have called lwp_release() 1273 * for me but I have woken up due to a signal or 1274 * timeout. In this case, the waiter bit is incorrect 1275 * since it is still set to 1, set above. 1276 * OR an lwp_release() did occur for some other lwp on 1277 * the same lwpchan. In this case, the waiter bit is 1278 * correct. But which event occurred, one can't tell. 1279 * So, recompute. 1280 */ 1281 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1282 locked = 1; 1283 sqh = lwpsqhash(&lwpchan); 1284 disp_lock_enter(&sqh->sq_lock); 1285 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1286 disp_lock_exit(&sqh->sq_lock); 1287 break; 1288 } 1289 lwp->lwp_asleep = 0; 1290 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1291 S_WRITE); 1292 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1293 locked = 1; 1294 fuword8_noerr(&lp->mutex_waiters, &waiters); 1295 suword8_noerr(&lp->mutex_waiters, 1); 1296 if (type & LOCK_ROBUST) { 1297 fuword16_noerr(&lp->mutex_flag, &flag); 1298 if (flag & LOCK_NOTRECOVERABLE) { 1299 error = ENOTRECOVERABLE; 1300 break; 1301 } 1302 } 1303 } 1304 1305 if (t->t_mstate == LMS_USER_LOCK) 1306 (void) new_mstate(t, LMS_SYSTEM); 1307 1308 if (error == 0) { 1309 if (type & USYNC_PROCESS) 1310 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1311 if (type & LOCK_ROBUST) { 1312 fuword16_noerr(&lp->mutex_flag, &flag); 1313 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1314 if (flag & LOCK_OWNERDEAD) 1315 error = EOWNERDEAD; 1316 else if (type & USYNC_PROCESS_ROBUST) 1317 error = ELOCKUNMAPPED; 1318 else 1319 error = EOWNERDEAD; 1320 } 1321 } 1322 } 1323 suword8_noerr(&lp->mutex_waiters, waiters); 1324 locked = 0; 1325 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1326 out: 1327 no_fault(); 1328 if (watched) 1329 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1330 if (tsp && !time_error) /* copyout the residual time left */ 1331 error = lwp_timer_copyout(&lwpt, error); 1332 if (error) 1333 return (set_errno(error)); 1334 return (0); 1335 } 1336 1337 /* 1338 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1339 * libc now calls lwp_mutex_timedlock(lp, NULL). 1340 * This system call trap continues to exist solely for the benefit 1341 * of old statically-linked binaries from Solaris 9 and before. 1342 * It should be removed from the system when we no longer care 1343 * about such applications. 1344 */ 1345 int 1346 lwp_mutex_lock(lwp_mutex_t *lp) 1347 { 1348 return (lwp_mutex_timedlock(lp, NULL)); 1349 } 1350 1351 static int 1352 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1353 { 1354 /* 1355 * The caller holds the dispatcher lock on the sleep queue. 1356 */ 1357 while (t != NULL) { 1358 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1359 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1360 return (1); 1361 t = t->t_link; 1362 } 1363 return (0); 1364 } 1365 1366 /* 1367 * Return the highest priority thread sleeping on this lwpchan. 1368 */ 1369 static kthread_t * 1370 lwp_queue_waiter(lwpchan_t *lwpchan) 1371 { 1372 sleepq_head_t *sqh; 1373 kthread_t *tp; 1374 1375 sqh = lwpsqhash(lwpchan); 1376 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1377 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1378 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1379 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1380 break; 1381 } 1382 disp_lock_exit(&sqh->sq_lock); 1383 return (tp); 1384 } 1385 1386 static int 1387 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1388 { 1389 sleepq_head_t *sqh; 1390 kthread_t *tp; 1391 kthread_t **tpp; 1392 1393 sqh = lwpsqhash(lwpchan); 1394 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1395 tpp = &sqh->sq_queue.sq_first; 1396 while ((tp = *tpp) != NULL) { 1397 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1398 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1399 /* 1400 * The following is typically false. It could be true 1401 * only if lwp_release() is called from 1402 * lwp_mutex_wakeup() after reading the waiters field 1403 * from memory in which the lwp lock used to be, but has 1404 * since been re-used to hold a lwp cv or lwp semaphore. 1405 * The thread "tp" found to match the lwp lock's wchan 1406 * is actually sleeping for the cv or semaphore which 1407 * now has the same wchan. In this case, lwp_release() 1408 * should return failure. 1409 */ 1410 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1411 ASSERT(sync_type == 0); 1412 /* 1413 * assert that this can happen only for mutexes 1414 * i.e. sync_type == 0, for correctly written 1415 * user programs. 1416 */ 1417 disp_lock_exit(&sqh->sq_lock); 1418 return (0); 1419 } 1420 *waiters = iswanted(tp->t_link, lwpchan); 1421 sleepq_unlink(tpp, tp); 1422 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1423 tp->t_wchan0 = NULL; 1424 tp->t_wchan = NULL; 1425 tp->t_sobj_ops = NULL; 1426 tp->t_release = 1; 1427 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1428 CL_WAKEUP(tp); 1429 thread_unlock(tp); /* drop run queue lock */ 1430 return (1); 1431 } 1432 tpp = &tp->t_link; 1433 } 1434 *waiters = 0; 1435 disp_lock_exit(&sqh->sq_lock); 1436 return (0); 1437 } 1438 1439 static void 1440 lwp_release_all(lwpchan_t *lwpchan) 1441 { 1442 sleepq_head_t *sqh; 1443 kthread_t *tp; 1444 kthread_t **tpp; 1445 1446 sqh = lwpsqhash(lwpchan); 1447 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1448 tpp = &sqh->sq_queue.sq_first; 1449 while ((tp = *tpp) != NULL) { 1450 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1451 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1452 sleepq_unlink(tpp, tp); 1453 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1454 tp->t_wchan0 = NULL; 1455 tp->t_wchan = NULL; 1456 tp->t_sobj_ops = NULL; 1457 CL_WAKEUP(tp); 1458 thread_unlock_high(tp); /* release run queue lock */ 1459 } else { 1460 tpp = &tp->t_link; 1461 } 1462 } 1463 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1464 } 1465 1466 /* 1467 * unblock a lwp that is trying to acquire this mutex. the blocked 1468 * lwp resumes and retries to acquire the lock. 1469 */ 1470 int 1471 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1472 { 1473 proc_t *p = ttoproc(curthread); 1474 lwpchan_t lwpchan; 1475 uchar_t waiters; 1476 volatile int locked = 0; 1477 volatile int watched = 0; 1478 volatile uint8_t type = 0; 1479 label_t ljb; 1480 int error = 0; 1481 1482 if ((caddr_t)lp >= p->p_as->a_userlimit) 1483 return (set_errno(EFAULT)); 1484 1485 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1486 1487 if (on_fault(&ljb)) { 1488 if (locked) 1489 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1490 error = EFAULT; 1491 goto out; 1492 } 1493 /* 1494 * Force Copy-on-write if necessary and ensure that the 1495 * synchronization object resides in read/write memory. 1496 * Cause an EFAULT return now if this is not so. 1497 */ 1498 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1499 suword8_noerr(&lp->mutex_type, type); 1500 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1501 &lwpchan, LWPCHAN_MPPOOL)) { 1502 error = EFAULT; 1503 goto out; 1504 } 1505 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1506 locked = 1; 1507 /* 1508 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1509 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1510 * may fail. If it fails, do not write into the waiter bit. 1511 * The call to lwp_release() might fail due to one of three reasons: 1512 * 1513 * 1. due to the thread which set the waiter bit not actually 1514 * sleeping since it got the lock on the re-try. The waiter 1515 * bit will then be correctly updated by that thread. This 1516 * window may be closed by reading the wait bit again here 1517 * and not calling lwp_release() at all if it is zero. 1518 * 2. the thread which set the waiter bit and went to sleep 1519 * was woken up by a signal. This time, the waiter recomputes 1520 * the wait bit in the return with EINTR code. 1521 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1522 * memory that has been re-used after the lock was dropped. 1523 * In this case, writing into the waiter bit would cause data 1524 * corruption. 1525 */ 1526 if (release_all) 1527 lwp_release_all(&lwpchan); 1528 else if (lwp_release(&lwpchan, &waiters, 0)) 1529 suword8_noerr(&lp->mutex_waiters, waiters); 1530 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1531 out: 1532 no_fault(); 1533 if (watched) 1534 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1535 if (error) 1536 return (set_errno(error)); 1537 return (0); 1538 } 1539 1540 /* 1541 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1542 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1543 * a flag telling the kernel whether or not to honor the kernel/user 1544 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1545 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1546 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1547 * it is used an an in/out parameter. On entry, it contains the relative 1548 * time until timeout. On exit, we copyout the residual time left to it. 1549 */ 1550 int 1551 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1552 { 1553 kthread_t *t = curthread; 1554 klwp_t *lwp = ttolwp(t); 1555 proc_t *p = ttoproc(t); 1556 lwp_timer_t lwpt; 1557 lwpchan_t cv_lwpchan; 1558 lwpchan_t m_lwpchan; 1559 caddr_t timedwait; 1560 volatile uint16_t type = 0; 1561 volatile uint8_t mtype = 0; 1562 uchar_t waiters; 1563 volatile int error; 1564 clock_t tim = -1; 1565 volatile int locked = 0; 1566 volatile int m_locked = 0; 1567 volatile int cvwatched = 0; 1568 volatile int mpwatched = 0; 1569 label_t ljb; 1570 volatile int no_lwpchan = 1; 1571 int imm_timeout = 0; 1572 int imm_unpark = 0; 1573 1574 if ((caddr_t)cv >= p->p_as->a_userlimit || 1575 (caddr_t)mp >= p->p_as->a_userlimit) 1576 return (set_errno(EFAULT)); 1577 1578 timedwait = (caddr_t)tsp; 1579 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1580 return (set_errno(error)); 1581 if (lwpt.lwpt_imm_timeout) { 1582 imm_timeout = 1; 1583 timedwait = NULL; 1584 } 1585 1586 (void) new_mstate(t, LMS_USER_LOCK); 1587 1588 if (on_fault(&ljb)) { 1589 if (no_lwpchan) { 1590 error = EFAULT; 1591 goto out; 1592 } 1593 if (m_locked) { 1594 m_locked = 0; 1595 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1596 } 1597 if (locked) { 1598 locked = 0; 1599 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1600 } 1601 /* 1602 * set up another on_fault() for a possible fault 1603 * on the user lock accessed at "efault" 1604 */ 1605 if (on_fault(&ljb)) { 1606 if (m_locked) { 1607 m_locked = 0; 1608 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1609 } 1610 goto out; 1611 } 1612 error = EFAULT; 1613 goto efault; 1614 } 1615 1616 /* 1617 * Force Copy-on-write if necessary and ensure that the 1618 * synchronization object resides in read/write memory. 1619 * Cause an EFAULT return now if this is not so. 1620 */ 1621 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1622 suword8_noerr(&mp->mutex_type, mtype); 1623 if (UPIMUTEX(mtype) == 0) { 1624 /* convert user level mutex, "mp", to a unique lwpchan */ 1625 /* check if mtype is ok to use below, instead of type from cv */ 1626 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1627 &m_lwpchan, LWPCHAN_MPPOOL)) { 1628 error = EFAULT; 1629 goto out; 1630 } 1631 } 1632 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1633 suword16_noerr(&cv->cond_type, type); 1634 /* convert user level condition variable, "cv", to a unique lwpchan */ 1635 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1636 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1637 error = EFAULT; 1638 goto out; 1639 } 1640 no_lwpchan = 0; 1641 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1642 if (UPIMUTEX(mtype) == 0) 1643 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1644 S_WRITE); 1645 1646 /* 1647 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1648 * with respect to a possible wakeup which is a result of either 1649 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1650 * 1651 * What's misleading, is that the lwp is put to sleep after the 1652 * condition variable's mutex is released. This is OK as long as 1653 * the release operation is also done while holding lwpchan_lock. 1654 * The lwp is then put to sleep when the possibility of pagefaulting 1655 * or sleeping is completely eliminated. 1656 */ 1657 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1658 locked = 1; 1659 if (UPIMUTEX(mtype) == 0) { 1660 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1661 m_locked = 1; 1662 suword8_noerr(&cv->cond_waiters_kernel, 1); 1663 /* 1664 * unlock the condition variable's mutex. (pagefaults are 1665 * possible here.) 1666 */ 1667 if (mtype & USYNC_PROCESS) 1668 suword32_noerr(&mp->mutex_ownerpid, 0); 1669 ulock_clear(&mp->mutex_lockw); 1670 fuword8_noerr(&mp->mutex_waiters, &waiters); 1671 if (waiters != 0) { 1672 /* 1673 * Given the locking of lwpchan_lock around the release 1674 * of the mutex and checking for waiters, the following 1675 * call to lwp_release() can fail ONLY if the lock 1676 * acquirer is interrupted after setting the waiter bit, 1677 * calling lwp_block() and releasing lwpchan_lock. 1678 * In this case, it could get pulled off the lwp sleep 1679 * q (via setrun()) before the following call to 1680 * lwp_release() occurs. In this case, the lock 1681 * requestor will update the waiter bit correctly by 1682 * re-evaluating it. 1683 */ 1684 if (lwp_release(&m_lwpchan, &waiters, 0)) 1685 suword8_noerr(&mp->mutex_waiters, waiters); 1686 } 1687 m_locked = 0; 1688 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1689 } else { 1690 suword8_noerr(&cv->cond_waiters_kernel, 1); 1691 error = lwp_upimutex_unlock(mp, mtype); 1692 if (error) { /* if the upimutex unlock failed */ 1693 locked = 0; 1694 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1695 goto out; 1696 } 1697 } 1698 no_fault(); 1699 1700 if (mpwatched) { 1701 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1702 mpwatched = 0; 1703 } 1704 if (cvwatched) { 1705 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1706 cvwatched = 0; 1707 } 1708 1709 /* 1710 * Put the lwp in an orderly state for debugging. 1711 */ 1712 prstop(PR_REQUESTED, 0); 1713 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1714 /* 1715 * We received a signal at user-level before calling here 1716 * or another thread wants us to return immediately 1717 * with EINTR. See lwp_unpark(). 1718 */ 1719 imm_unpark = 1; 1720 t->t_unpark = 0; 1721 timedwait = NULL; 1722 } else if (timedwait) { 1723 /* 1724 * If we successfully queue the timeout, 1725 * then don't drop t_delay_lock until 1726 * we are on the sleep queue (below). 1727 */ 1728 mutex_enter(&t->t_delay_lock); 1729 if (lwp_timer_enqueue(&lwpt) != 0) { 1730 mutex_exit(&t->t_delay_lock); 1731 imm_timeout = 1; 1732 timedwait = NULL; 1733 } 1734 } 1735 t->t_flag |= T_WAITCVSEM; 1736 lwp_block(&cv_lwpchan); 1737 /* 1738 * Nothing should happen to cause the lwp to go to sleep 1739 * until after it returns from swtch(). 1740 */ 1741 if (timedwait) 1742 mutex_exit(&t->t_delay_lock); 1743 locked = 0; 1744 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1745 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1746 (imm_timeout | imm_unpark)) 1747 setrun(t); 1748 swtch(); 1749 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1750 if (timedwait) 1751 tim = lwp_timer_dequeue(&lwpt); 1752 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1753 MUSTRETURN(p, t) || imm_unpark) 1754 error = EINTR; 1755 else if (imm_timeout || (timedwait && tim == -1)) 1756 error = ETIME; 1757 lwp->lwp_asleep = 0; 1758 lwp->lwp_sysabort = 0; 1759 setallwatch(); 1760 1761 if (t->t_mstate == LMS_USER_LOCK) 1762 (void) new_mstate(t, LMS_SYSTEM); 1763 1764 if (tsp && check_park) /* copyout the residual time left */ 1765 error = lwp_timer_copyout(&lwpt, error); 1766 1767 /* the mutex is reacquired by the caller on return to user level */ 1768 if (error) { 1769 /* 1770 * If we were concurrently lwp_cond_signal()d and we 1771 * received a UNIX signal or got a timeout, then perform 1772 * another lwp_cond_signal() to avoid consuming the wakeup. 1773 */ 1774 if (t->t_release) 1775 (void) lwp_cond_signal(cv); 1776 return (set_errno(error)); 1777 } 1778 return (0); 1779 1780 efault: 1781 /* 1782 * make sure that the user level lock is dropped before 1783 * returning to caller, since the caller always re-acquires it. 1784 */ 1785 if (UPIMUTEX(mtype) == 0) { 1786 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1787 m_locked = 1; 1788 if (mtype & USYNC_PROCESS) 1789 suword32_noerr(&mp->mutex_ownerpid, 0); 1790 ulock_clear(&mp->mutex_lockw); 1791 fuword8_noerr(&mp->mutex_waiters, &waiters); 1792 if (waiters != 0) { 1793 /* 1794 * See comment above on lock clearing and lwp_release() 1795 * success/failure. 1796 */ 1797 if (lwp_release(&m_lwpchan, &waiters, 0)) 1798 suword8_noerr(&mp->mutex_waiters, waiters); 1799 } 1800 m_locked = 0; 1801 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1802 } else { 1803 (void) lwp_upimutex_unlock(mp, mtype); 1804 } 1805 out: 1806 no_fault(); 1807 if (mpwatched) 1808 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1809 if (cvwatched) 1810 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1811 if (t->t_mstate == LMS_USER_LOCK) 1812 (void) new_mstate(t, LMS_SYSTEM); 1813 return (set_errno(error)); 1814 } 1815 1816 /* 1817 * wakeup one lwp that's blocked on this condition variable. 1818 */ 1819 int 1820 lwp_cond_signal(lwp_cond_t *cv) 1821 { 1822 proc_t *p = ttoproc(curthread); 1823 lwpchan_t lwpchan; 1824 uchar_t waiters; 1825 volatile uint16_t type = 0; 1826 volatile int locked = 0; 1827 volatile int watched = 0; 1828 label_t ljb; 1829 int error = 0; 1830 1831 if ((caddr_t)cv >= p->p_as->a_userlimit) 1832 return (set_errno(EFAULT)); 1833 1834 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1835 1836 if (on_fault(&ljb)) { 1837 if (locked) 1838 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1839 error = EFAULT; 1840 goto out; 1841 } 1842 /* 1843 * Force Copy-on-write if necessary and ensure that the 1844 * synchronization object resides in read/write memory. 1845 * Cause an EFAULT return now if this is not so. 1846 */ 1847 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1848 suword16_noerr(&cv->cond_type, type); 1849 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1850 &lwpchan, LWPCHAN_CVPOOL)) { 1851 error = EFAULT; 1852 goto out; 1853 } 1854 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1855 locked = 1; 1856 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1857 if (waiters != 0) { 1858 /* 1859 * The following call to lwp_release() might fail but it is 1860 * OK to write into the waiters bit below, since the memory 1861 * could not have been re-used or unmapped (for correctly 1862 * written user programs) as in the case of lwp_mutex_wakeup(). 1863 * For an incorrect program, we should not care about data 1864 * corruption since this is just one instance of other places 1865 * where corruption can occur for such a program. Of course 1866 * if the memory is unmapped, normal fault recovery occurs. 1867 */ 1868 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1869 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1870 } 1871 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1872 out: 1873 no_fault(); 1874 if (watched) 1875 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1876 if (error) 1877 return (set_errno(error)); 1878 return (0); 1879 } 1880 1881 /* 1882 * wakeup every lwp that's blocked on this condition variable. 1883 */ 1884 int 1885 lwp_cond_broadcast(lwp_cond_t *cv) 1886 { 1887 proc_t *p = ttoproc(curthread); 1888 lwpchan_t lwpchan; 1889 volatile uint16_t type = 0; 1890 volatile int locked = 0; 1891 volatile int watched = 0; 1892 label_t ljb; 1893 uchar_t waiters; 1894 int error = 0; 1895 1896 if ((caddr_t)cv >= p->p_as->a_userlimit) 1897 return (set_errno(EFAULT)); 1898 1899 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1900 1901 if (on_fault(&ljb)) { 1902 if (locked) 1903 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1904 error = EFAULT; 1905 goto out; 1906 } 1907 /* 1908 * Force Copy-on-write if necessary and ensure that the 1909 * synchronization object resides in read/write memory. 1910 * Cause an EFAULT return now if this is not so. 1911 */ 1912 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1913 suword16_noerr(&cv->cond_type, type); 1914 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1915 &lwpchan, LWPCHAN_CVPOOL)) { 1916 error = EFAULT; 1917 goto out; 1918 } 1919 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1920 locked = 1; 1921 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1922 if (waiters != 0) { 1923 lwp_release_all(&lwpchan); 1924 suword8_noerr(&cv->cond_waiters_kernel, 0); 1925 } 1926 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1927 out: 1928 no_fault(); 1929 if (watched) 1930 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1931 if (error) 1932 return (set_errno(error)); 1933 return (0); 1934 } 1935 1936 int 1937 lwp_sema_trywait(lwp_sema_t *sp) 1938 { 1939 kthread_t *t = curthread; 1940 proc_t *p = ttoproc(t); 1941 label_t ljb; 1942 volatile int locked = 0; 1943 volatile int watched = 0; 1944 volatile uint16_t type = 0; 1945 int count; 1946 lwpchan_t lwpchan; 1947 uchar_t waiters; 1948 int error = 0; 1949 1950 if ((caddr_t)sp >= p->p_as->a_userlimit) 1951 return (set_errno(EFAULT)); 1952 1953 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1954 1955 if (on_fault(&ljb)) { 1956 if (locked) 1957 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1958 error = EFAULT; 1959 goto out; 1960 } 1961 /* 1962 * Force Copy-on-write if necessary and ensure that the 1963 * synchronization object resides in read/write memory. 1964 * Cause an EFAULT return now if this is not so. 1965 */ 1966 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1967 suword16_noerr((void *)&sp->sema_type, type); 1968 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1969 &lwpchan, LWPCHAN_CVPOOL)) { 1970 error = EFAULT; 1971 goto out; 1972 } 1973 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1974 locked = 1; 1975 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1976 if (count == 0) 1977 error = EBUSY; 1978 else 1979 suword32_noerr((void *)&sp->sema_count, --count); 1980 if (count != 0) { 1981 fuword8_noerr(&sp->sema_waiters, &waiters); 1982 if (waiters != 0) { 1983 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1984 suword8_noerr(&sp->sema_waiters, waiters); 1985 } 1986 } 1987 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1988 out: 1989 no_fault(); 1990 if (watched) 1991 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1992 if (error) 1993 return (set_errno(error)); 1994 return (0); 1995 } 1996 1997 /* 1998 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 1999 */ 2000 int 2001 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2002 { 2003 kthread_t *t = curthread; 2004 klwp_t *lwp = ttolwp(t); 2005 proc_t *p = ttoproc(t); 2006 lwp_timer_t lwpt; 2007 caddr_t timedwait; 2008 clock_t tim = -1; 2009 label_t ljb; 2010 volatile int locked = 0; 2011 volatile int watched = 0; 2012 volatile uint16_t type = 0; 2013 int count; 2014 lwpchan_t lwpchan; 2015 uchar_t waiters; 2016 int error = 0; 2017 int time_error; 2018 int imm_timeout = 0; 2019 int imm_unpark = 0; 2020 2021 if ((caddr_t)sp >= p->p_as->a_userlimit) 2022 return (set_errno(EFAULT)); 2023 2024 timedwait = (caddr_t)tsp; 2025 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2026 lwpt.lwpt_imm_timeout) { 2027 imm_timeout = 1; 2028 timedwait = NULL; 2029 } 2030 2031 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2032 2033 if (on_fault(&ljb)) { 2034 if (locked) 2035 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2036 error = EFAULT; 2037 goto out; 2038 } 2039 /* 2040 * Force Copy-on-write if necessary and ensure that the 2041 * synchronization object resides in read/write memory. 2042 * Cause an EFAULT return now if this is not so. 2043 */ 2044 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2045 suword16_noerr((void *)&sp->sema_type, type); 2046 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2047 &lwpchan, LWPCHAN_CVPOOL)) { 2048 error = EFAULT; 2049 goto out; 2050 } 2051 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2052 locked = 1; 2053 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2054 while (error == 0 && count == 0) { 2055 if (time_error) { 2056 /* 2057 * The SUSV3 Posix spec is very clear that we 2058 * should get no error from validating the 2059 * timer until we would actually sleep. 2060 */ 2061 error = time_error; 2062 break; 2063 } 2064 suword8_noerr(&sp->sema_waiters, 1); 2065 if (watched) 2066 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2067 /* 2068 * Put the lwp in an orderly state for debugging. 2069 */ 2070 prstop(PR_REQUESTED, 0); 2071 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2072 /* 2073 * We received a signal at user-level before calling 2074 * here or another thread wants us to return 2075 * immediately with EINTR. See lwp_unpark(). 2076 */ 2077 imm_unpark = 1; 2078 t->t_unpark = 0; 2079 timedwait = NULL; 2080 } else if (timedwait) { 2081 /* 2082 * If we successfully queue the timeout, 2083 * then don't drop t_delay_lock until 2084 * we are on the sleep queue (below). 2085 */ 2086 mutex_enter(&t->t_delay_lock); 2087 if (lwp_timer_enqueue(&lwpt) != 0) { 2088 mutex_exit(&t->t_delay_lock); 2089 imm_timeout = 1; 2090 timedwait = NULL; 2091 } 2092 } 2093 t->t_flag |= T_WAITCVSEM; 2094 lwp_block(&lwpchan); 2095 /* 2096 * Nothing should happen to cause the lwp to sleep 2097 * again until after it returns from swtch(). 2098 */ 2099 if (timedwait) 2100 mutex_exit(&t->t_delay_lock); 2101 locked = 0; 2102 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2103 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2104 (imm_timeout | imm_unpark)) 2105 setrun(t); 2106 swtch(); 2107 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2108 if (timedwait) 2109 tim = lwp_timer_dequeue(&lwpt); 2110 setallwatch(); 2111 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2112 MUSTRETURN(p, t) || imm_unpark) 2113 error = EINTR; 2114 else if (imm_timeout || (timedwait && tim == -1)) 2115 error = ETIME; 2116 lwp->lwp_asleep = 0; 2117 lwp->lwp_sysabort = 0; 2118 watched = watch_disable_addr((caddr_t)sp, 2119 sizeof (*sp), S_WRITE); 2120 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2121 locked = 1; 2122 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2123 } 2124 if (error == 0) 2125 suword32_noerr((void *)&sp->sema_count, --count); 2126 if (count != 0) { 2127 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2128 suword8_noerr(&sp->sema_waiters, waiters); 2129 } 2130 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2131 out: 2132 no_fault(); 2133 if (watched) 2134 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2135 if (tsp && check_park && !time_error) 2136 error = lwp_timer_copyout(&lwpt, error); 2137 if (error) 2138 return (set_errno(error)); 2139 return (0); 2140 } 2141 2142 /* 2143 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2144 * libc now calls lwp_sema_timedwait(). 2145 * This system call trap exists solely for the benefit of old 2146 * statically linked applications from Solaris 9 and before. 2147 * It should be removed when we no longer care about such applications. 2148 */ 2149 int 2150 lwp_sema_wait(lwp_sema_t *sp) 2151 { 2152 return (lwp_sema_timedwait(sp, NULL, 0)); 2153 } 2154 2155 int 2156 lwp_sema_post(lwp_sema_t *sp) 2157 { 2158 proc_t *p = ttoproc(curthread); 2159 label_t ljb; 2160 volatile int locked = 0; 2161 volatile int watched = 0; 2162 volatile uint16_t type = 0; 2163 int count; 2164 lwpchan_t lwpchan; 2165 uchar_t waiters; 2166 int error = 0; 2167 2168 if ((caddr_t)sp >= p->p_as->a_userlimit) 2169 return (set_errno(EFAULT)); 2170 2171 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2172 2173 if (on_fault(&ljb)) { 2174 if (locked) 2175 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2176 error = EFAULT; 2177 goto out; 2178 } 2179 /* 2180 * Force Copy-on-write if necessary and ensure that the 2181 * synchronization object resides in read/write memory. 2182 * Cause an EFAULT return now if this is not so. 2183 */ 2184 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2185 suword16_noerr(&sp->sema_type, type); 2186 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2187 &lwpchan, LWPCHAN_CVPOOL)) { 2188 error = EFAULT; 2189 goto out; 2190 } 2191 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2192 locked = 1; 2193 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2194 if (count == _SEM_VALUE_MAX) 2195 error = EOVERFLOW; 2196 else 2197 suword32_noerr(&sp->sema_count, ++count); 2198 if (count == 1) { 2199 fuword8_noerr(&sp->sema_waiters, &waiters); 2200 if (waiters) { 2201 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2202 suword8_noerr(&sp->sema_waiters, waiters); 2203 } 2204 } 2205 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2206 out: 2207 no_fault(); 2208 if (watched) 2209 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2210 if (error) 2211 return (set_errno(error)); 2212 return (0); 2213 } 2214 2215 #define TRW_WANT_WRITE 0x1 2216 #define TRW_LOCK_GRANTED 0x2 2217 2218 #define READ_LOCK 0 2219 #define WRITE_LOCK 1 2220 #define TRY_FLAG 0x10 2221 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2222 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2223 2224 /* 2225 * Release one writer or one or more readers. Compute the rwstate word to 2226 * reflect the new state of the queue. For a safe hand-off we copy the new 2227 * rwstate value back to userland before we wake any of the new lock holders. 2228 * 2229 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2230 * being given precedence over readers of the same priority). 2231 * 2232 * If the first thread is a reader we scan the queue releasing all readers 2233 * until we hit a writer or the end of the queue. If the first thread is a 2234 * writer we still need to check for another writer. 2235 */ 2236 void 2237 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2238 { 2239 sleepq_head_t *sqh; 2240 kthread_t *tp; 2241 kthread_t **tpp; 2242 kthread_t *tpnext; 2243 kthread_t *wakelist = NULL; 2244 uint32_t rwstate = 0; 2245 int wcount = 0; 2246 int rcount = 0; 2247 2248 sqh = lwpsqhash(lwpchan); 2249 disp_lock_enter(&sqh->sq_lock); 2250 tpp = &sqh->sq_queue.sq_first; 2251 while ((tp = *tpp) != NULL) { 2252 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2253 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2254 if (tp->t_writer & TRW_WANT_WRITE) { 2255 if ((wcount++ == 0) && (rcount == 0)) { 2256 rwstate |= URW_WRITE_LOCKED; 2257 2258 /* Just one writer to wake. */ 2259 sleepq_unlink(tpp, tp); 2260 wakelist = tp; 2261 2262 /* tpp already set for next thread. */ 2263 continue; 2264 } else { 2265 rwstate |= URW_HAS_WAITERS; 2266 /* We need look no further. */ 2267 break; 2268 } 2269 } else { 2270 rcount++; 2271 if (wcount == 0) { 2272 rwstate++; 2273 2274 /* Add reader to wake list. */ 2275 sleepq_unlink(tpp, tp); 2276 tp->t_link = wakelist; 2277 wakelist = tp; 2278 2279 /* tpp already set for next thread. */ 2280 continue; 2281 } else { 2282 rwstate |= URW_HAS_WAITERS; 2283 /* We need look no further. */ 2284 break; 2285 } 2286 } 2287 } 2288 tpp = &tp->t_link; 2289 } 2290 2291 /* Copy the new rwstate back to userland. */ 2292 suword32_noerr(&rw->rwlock_readers, rwstate); 2293 2294 /* Wake the new lock holder(s) up. */ 2295 tp = wakelist; 2296 while (tp != NULL) { 2297 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2298 tp->t_wchan0 = NULL; 2299 tp->t_wchan = NULL; 2300 tp->t_sobj_ops = NULL; 2301 tp->t_writer |= TRW_LOCK_GRANTED; 2302 tpnext = tp->t_link; 2303 tp->t_link = NULL; 2304 CL_WAKEUP(tp); 2305 thread_unlock_high(tp); 2306 tp = tpnext; 2307 } 2308 2309 disp_lock_exit(&sqh->sq_lock); 2310 } 2311 2312 /* 2313 * We enter here holding the user-level mutex, which we must release before 2314 * returning or blocking. Based on lwp_cond_wait(). 2315 */ 2316 static int 2317 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2318 { 2319 lwp_mutex_t *mp = NULL; 2320 kthread_t *t = curthread; 2321 kthread_t *tp; 2322 klwp_t *lwp = ttolwp(t); 2323 proc_t *p = ttoproc(t); 2324 lwp_timer_t lwpt; 2325 lwpchan_t lwpchan; 2326 lwpchan_t mlwpchan; 2327 caddr_t timedwait; 2328 volatile uint16_t type = 0; 2329 volatile uint8_t mtype = 0; 2330 uchar_t mwaiters; 2331 volatile int error = 0; 2332 int time_error; 2333 clock_t tim = -1; 2334 volatile int locked = 0; 2335 volatile int mlocked = 0; 2336 volatile int watched = 0; 2337 volatile int mwatched = 0; 2338 label_t ljb; 2339 volatile int no_lwpchan = 1; 2340 int imm_timeout = 0; 2341 int try_flag; 2342 uint32_t rwstate; 2343 int acquired = 0; 2344 2345 /* We only check rw because the mutex is included in it. */ 2346 if ((caddr_t)rw >= p->p_as->a_userlimit) 2347 return (set_errno(EFAULT)); 2348 2349 /* We must only report this error if we are about to sleep (later). */ 2350 timedwait = (caddr_t)tsp; 2351 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2352 lwpt.lwpt_imm_timeout) { 2353 imm_timeout = 1; 2354 timedwait = NULL; 2355 } 2356 2357 (void) new_mstate(t, LMS_USER_LOCK); 2358 2359 if (on_fault(&ljb)) { 2360 if (no_lwpchan) { 2361 error = EFAULT; 2362 goto out_nodrop; 2363 } 2364 if (mlocked) { 2365 mlocked = 0; 2366 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2367 } 2368 if (locked) { 2369 locked = 0; 2370 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2371 } 2372 /* 2373 * Set up another on_fault() for a possible fault 2374 * on the user lock accessed at "out_drop". 2375 */ 2376 if (on_fault(&ljb)) { 2377 if (mlocked) { 2378 mlocked = 0; 2379 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2380 } 2381 error = EFAULT; 2382 goto out_nodrop; 2383 } 2384 error = EFAULT; 2385 goto out_nodrop; 2386 } 2387 2388 /* Process rd_wr (including sanity check). */ 2389 try_flag = (rd_wr & TRY_FLAG); 2390 rd_wr &= ~TRY_FLAG; 2391 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2392 error = EINVAL; 2393 goto out_nodrop; 2394 } 2395 2396 /* 2397 * Force Copy-on-write if necessary and ensure that the 2398 * synchronization object resides in read/write memory. 2399 * Cause an EFAULT return now if this is not so. 2400 */ 2401 mp = &rw->mutex; 2402 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2403 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2404 suword8_noerr(&mp->mutex_type, mtype); 2405 suword16_noerr(&rw->rwlock_type, type); 2406 2407 /* We can only continue for simple USYNC_PROCESS locks. */ 2408 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2409 error = EINVAL; 2410 goto out_nodrop; 2411 } 2412 2413 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2414 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2415 &mlwpchan, LWPCHAN_MPPOOL)) { 2416 error = EFAULT; 2417 goto out_nodrop; 2418 } 2419 2420 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2421 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2422 &lwpchan, LWPCHAN_CVPOOL)) { 2423 error = EFAULT; 2424 goto out_nodrop; 2425 } 2426 2427 no_lwpchan = 0; 2428 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2429 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2430 2431 /* 2432 * lwpchan_lock() ensures that the calling LWP is put to sleep 2433 * atomically with respect to a possible wakeup which is a result 2434 * of lwp_rwlock_unlock(). 2435 * 2436 * What's misleading is that the LWP is put to sleep after the 2437 * rwlock's mutex is released. This is OK as long as the release 2438 * operation is also done while holding mlwpchan. The LWP is then 2439 * put to sleep when the possibility of pagefaulting or sleeping 2440 * has been completely eliminated. 2441 */ 2442 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2443 locked = 1; 2444 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2445 mlocked = 1; 2446 2447 /* 2448 * Fetch the current rwlock state. 2449 * 2450 * The possibility of spurious wake-ups or killed waiters means 2451 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2452 * We only fix these if they are important to us. 2453 * 2454 * Although various error states can be observed here (e.g. the lock 2455 * is not held, but there are waiters) we assume these are applicaton 2456 * errors and so we take no corrective action. 2457 */ 2458 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2459 /* 2460 * We cannot legitimately get here from user-level 2461 * without URW_HAS_WAITERS being set. 2462 * Set it now to guard against user-level error. 2463 */ 2464 rwstate |= URW_HAS_WAITERS; 2465 2466 /* 2467 * We can try only if the lock isn't held by a writer. 2468 */ 2469 if (!(rwstate & URW_WRITE_LOCKED)) { 2470 tp = lwp_queue_waiter(&lwpchan); 2471 if (tp == NULL) { 2472 /* 2473 * Hmmm, rwstate indicates waiters but there are 2474 * none queued. This could just be the result of a 2475 * spurious wakeup, so let's ignore it. 2476 * 2477 * We now have a chance to acquire the lock 2478 * uncontended, but this is the last chance for 2479 * a writer to acquire the lock without blocking. 2480 */ 2481 if (rd_wr == READ_LOCK) { 2482 rwstate++; 2483 acquired = 1; 2484 } else if ((rwstate & URW_READERS_MASK) == 0) { 2485 rwstate |= URW_WRITE_LOCKED; 2486 acquired = 1; 2487 } 2488 } else if (rd_wr == READ_LOCK) { 2489 /* 2490 * This is the last chance for a reader to acquire 2491 * the lock now, but it can only do so if there is 2492 * no writer of equal or greater priority at the 2493 * head of the queue . 2494 * 2495 * It is also just possible that there is a reader 2496 * at the head of the queue. This may be the result 2497 * of a spurious wakeup or an application failure. 2498 * In this case we only acquire the lock if we have 2499 * equal or greater priority. It is not our job to 2500 * release spurious waiters. 2501 */ 2502 pri_t our_pri = DISP_PRIO(t); 2503 pri_t his_pri = DISP_PRIO(tp); 2504 2505 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2506 !(tp->t_writer & TRW_WANT_WRITE))) { 2507 rwstate++; 2508 acquired = 1; 2509 } 2510 } 2511 } 2512 2513 if (acquired || try_flag || time_error) { 2514 /* 2515 * We're not going to block this time. 2516 */ 2517 suword32_noerr(&rw->rwlock_readers, rwstate); 2518 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2519 locked = 0; 2520 2521 if (acquired) { 2522 /* 2523 * Got the lock! 2524 */ 2525 error = 0; 2526 2527 } else if (try_flag) { 2528 /* 2529 * We didn't get the lock and we're about to block. 2530 * If we're doing a trylock, return EBUSY instead. 2531 */ 2532 error = EBUSY; 2533 2534 } else if (time_error) { 2535 /* 2536 * The SUSV3 POSIX spec is very clear that we should 2537 * get no error from validating the timer (above) 2538 * until we would actually sleep. 2539 */ 2540 error = time_error; 2541 } 2542 2543 goto out_drop; 2544 } 2545 2546 /* 2547 * We're about to block, so indicate what kind of waiter we are. 2548 */ 2549 t->t_writer = 0; 2550 if (rd_wr == WRITE_LOCK) 2551 t->t_writer = TRW_WANT_WRITE; 2552 suword32_noerr(&rw->rwlock_readers, rwstate); 2553 2554 /* 2555 * Unlock the rwlock's mutex (pagefaults are possible here). 2556 */ 2557 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2558 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2559 suword32_noerr(&mp->mutex_ownerpid, 0); 2560 ulock_clear(&mp->mutex_lockw); 2561 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2562 if (mwaiters != 0) { 2563 /* 2564 * Given the locking of mlwpchan around the release of 2565 * the mutex and checking for waiters, the following 2566 * call to lwp_release() can fail ONLY if the lock 2567 * acquirer is interrupted after setting the waiter bit, 2568 * calling lwp_block() and releasing mlwpchan. 2569 * In this case, it could get pulled off the LWP sleep 2570 * queue (via setrun()) before the following call to 2571 * lwp_release() occurs, and the lock requestor will 2572 * update the waiter bit correctly by re-evaluating it. 2573 */ 2574 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2575 suword8_noerr(&mp->mutex_waiters, mwaiters); 2576 } 2577 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2578 mlocked = 0; 2579 no_fault(); 2580 2581 if (mwatched) { 2582 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2583 mwatched = 0; 2584 } 2585 if (watched) { 2586 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2587 watched = 0; 2588 } 2589 2590 /* 2591 * Put the LWP in an orderly state for debugging. 2592 */ 2593 prstop(PR_REQUESTED, 0); 2594 if (timedwait) { 2595 /* 2596 * If we successfully queue the timeout, 2597 * then don't drop t_delay_lock until 2598 * we are on the sleep queue (below). 2599 */ 2600 mutex_enter(&t->t_delay_lock); 2601 if (lwp_timer_enqueue(&lwpt) != 0) { 2602 mutex_exit(&t->t_delay_lock); 2603 imm_timeout = 1; 2604 timedwait = NULL; 2605 } 2606 } 2607 t->t_flag |= T_WAITCVSEM; 2608 lwp_block(&lwpchan); 2609 2610 /* 2611 * Nothing should happen to cause the LWp to go to sleep until after 2612 * it returns from swtch(). 2613 */ 2614 if (timedwait) 2615 mutex_exit(&t->t_delay_lock); 2616 locked = 0; 2617 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2618 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 2619 setrun(t); 2620 swtch(); 2621 2622 /* 2623 * We're back, but we need to work out why. Were we interrupted? Did 2624 * we timeout? Were we granted the lock? 2625 */ 2626 error = EAGAIN; 2627 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2628 t->t_writer = 0; 2629 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2630 if (timedwait) 2631 tim = lwp_timer_dequeue(&lwpt); 2632 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2633 error = EINTR; 2634 else if (imm_timeout || (timedwait && tim == -1)) 2635 error = ETIME; 2636 lwp->lwp_asleep = 0; 2637 lwp->lwp_sysabort = 0; 2638 setallwatch(); 2639 2640 /* 2641 * If we were granted the lock we don't care about EINTR or ETIME. 2642 */ 2643 if (acquired) 2644 error = 0; 2645 2646 if (t->t_mstate == LMS_USER_LOCK) 2647 (void) new_mstate(t, LMS_SYSTEM); 2648 2649 if (error) 2650 return (set_errno(error)); 2651 return (0); 2652 2653 out_drop: 2654 /* 2655 * Make sure that the user level lock is dropped before returning 2656 * to the caller. 2657 */ 2658 if (!mlocked) { 2659 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2660 mlocked = 1; 2661 } 2662 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2663 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2664 suword32_noerr(&mp->mutex_ownerpid, 0); 2665 ulock_clear(&mp->mutex_lockw); 2666 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2667 if (mwaiters != 0) { 2668 /* 2669 * See comment above on lock clearing and lwp_release() 2670 * success/failure. 2671 */ 2672 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2673 suword8_noerr(&mp->mutex_waiters, mwaiters); 2674 } 2675 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2676 mlocked = 0; 2677 2678 out_nodrop: 2679 no_fault(); 2680 if (mwatched) 2681 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2682 if (watched) 2683 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2684 if (t->t_mstate == LMS_USER_LOCK) 2685 (void) new_mstate(t, LMS_SYSTEM); 2686 if (error) 2687 return (set_errno(error)); 2688 return (0); 2689 } 2690 2691 /* 2692 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2693 * we never drop the lock. 2694 */ 2695 static int 2696 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2697 { 2698 kthread_t *t = curthread; 2699 proc_t *p = ttoproc(t); 2700 lwpchan_t lwpchan; 2701 volatile uint16_t type = 0; 2702 volatile int error = 0; 2703 volatile int locked = 0; 2704 volatile int watched = 0; 2705 label_t ljb; 2706 volatile int no_lwpchan = 1; 2707 uint32_t rwstate; 2708 2709 /* We only check rw because the mutex is included in it. */ 2710 if ((caddr_t)rw >= p->p_as->a_userlimit) 2711 return (set_errno(EFAULT)); 2712 2713 if (on_fault(&ljb)) { 2714 if (no_lwpchan) { 2715 error = EFAULT; 2716 goto out_nodrop; 2717 } 2718 if (locked) { 2719 locked = 0; 2720 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2721 } 2722 error = EFAULT; 2723 goto out_nodrop; 2724 } 2725 2726 /* 2727 * Force Copy-on-write if necessary and ensure that the 2728 * synchronization object resides in read/write memory. 2729 * Cause an EFAULT return now if this is not so. 2730 */ 2731 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2732 suword16_noerr(&rw->rwlock_type, type); 2733 2734 /* We can only continue for simple USYNC_PROCESS locks. */ 2735 if (type != USYNC_PROCESS) { 2736 error = EINVAL; 2737 goto out_nodrop; 2738 } 2739 2740 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2741 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2742 &lwpchan, LWPCHAN_CVPOOL)) { 2743 error = EFAULT; 2744 goto out_nodrop; 2745 } 2746 2747 no_lwpchan = 0; 2748 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2749 2750 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2751 locked = 1; 2752 2753 /* 2754 * We can resolve multiple readers (except the last reader) here. 2755 * For the last reader or a writer we need lwp_rwlock_release(), 2756 * to which we also delegate the task of copying the new rwstate 2757 * back to userland (see the comment there). 2758 */ 2759 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2760 if (rwstate & URW_WRITE_LOCKED) 2761 lwp_rwlock_release(&lwpchan, rw); 2762 else if ((rwstate & URW_READERS_MASK) > 0) { 2763 rwstate--; 2764 if ((rwstate & URW_READERS_MASK) == 0) 2765 lwp_rwlock_release(&lwpchan, rw); 2766 else 2767 suword32_noerr(&rw->rwlock_readers, rwstate); 2768 } 2769 2770 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2771 locked = 0; 2772 error = 0; 2773 2774 out_nodrop: 2775 no_fault(); 2776 if (watched) 2777 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2778 if (error) 2779 return (set_errno(error)); 2780 return (0); 2781 } 2782 2783 int 2784 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2785 { 2786 switch (subcode) { 2787 case 0: 2788 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2789 case 1: 2790 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2791 case 2: 2792 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2793 case 3: 2794 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2795 case 4: 2796 return (lwp_rwlock_unlock(rwlp)); 2797 } 2798 return (set_errno(EINVAL)); 2799 } 2800 2801 /* 2802 * Return the owner of the user-level s-object. 2803 * Since we can't really do this, return NULL. 2804 */ 2805 /* ARGSUSED */ 2806 static kthread_t * 2807 lwpsobj_owner(caddr_t sobj) 2808 { 2809 return ((kthread_t *)NULL); 2810 } 2811 2812 /* 2813 * Wake up a thread asleep on a user-level synchronization 2814 * object. 2815 */ 2816 static void 2817 lwp_unsleep(kthread_t *t) 2818 { 2819 ASSERT(THREAD_LOCK_HELD(t)); 2820 if (t->t_wchan0 != NULL) { 2821 sleepq_head_t *sqh; 2822 sleepq_t *sqp = t->t_sleepq; 2823 2824 if (sqp != NULL) { 2825 sqh = lwpsqhash(&t->t_lwpchan); 2826 ASSERT(&sqh->sq_queue == sqp); 2827 sleepq_unsleep(t); 2828 disp_lock_exit_high(&sqh->sq_lock); 2829 CL_SETRUN(t); 2830 return; 2831 } 2832 } 2833 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2834 } 2835 2836 /* 2837 * Change the priority of a thread asleep on a user-level 2838 * synchronization object. To maintain proper priority order, 2839 * we: 2840 * o dequeue the thread. 2841 * o change its priority. 2842 * o re-enqueue the thread. 2843 * Assumption: the thread is locked on entry. 2844 */ 2845 static void 2846 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2847 { 2848 ASSERT(THREAD_LOCK_HELD(t)); 2849 if (t->t_wchan0 != NULL) { 2850 sleepq_t *sqp = t->t_sleepq; 2851 2852 sleepq_dequeue(t); 2853 *t_prip = pri; 2854 sleepq_insert(sqp, t); 2855 } else 2856 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2857 } 2858 2859 /* 2860 * Clean up a locked robust mutex 2861 */ 2862 static void 2863 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2864 { 2865 uint16_t flag; 2866 uchar_t waiters; 2867 label_t ljb; 2868 pid_t owner_pid; 2869 lwp_mutex_t *lp; 2870 volatile int locked = 0; 2871 volatile int watched = 0; 2872 volatile struct upimutex *upimutex = NULL; 2873 volatile int upilocked = 0; 2874 2875 ASSERT(ent->lwpchan_type & LOCK_ROBUST); 2876 2877 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2878 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2879 if (on_fault(&ljb)) { 2880 if (locked) 2881 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2882 if (upilocked) 2883 upimutex_unlock((upimutex_t *)upimutex, 0); 2884 goto out; 2885 } 2886 if (ent->lwpchan_type & USYNC_PROCESS) { 2887 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2888 if ((UPIMUTEX(ent->lwpchan_type) || owner_pid != 0) && 2889 owner_pid != curproc->p_pid) 2890 goto out; 2891 } 2892 if (UPIMUTEX(ent->lwpchan_type)) { 2893 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2894 upib_t *upibp = &UPI_CHAIN(lwpchan); 2895 2896 mutex_enter(&upibp->upib_lock); 2897 upimutex = upi_get(upibp, &lwpchan); 2898 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2899 mutex_exit(&upibp->upib_lock); 2900 goto out; 2901 } 2902 mutex_exit(&upibp->upib_lock); 2903 upilocked = 1; 2904 flag = lwp_clear_mutex(lp, lockflg); 2905 suword8_noerr(&lp->mutex_lockw, 0); 2906 upimutex_unlock((upimutex_t *)upimutex, flag); 2907 } else { 2908 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2909 locked = 1; 2910 if ((ent->lwpchan_type & USYNC_PROCESS) && owner_pid == 0) { 2911 /* 2912 * There is no owner. If there are waiters, 2913 * we should wake up one or all of them. 2914 * It doesn't hurt to wake them up in error 2915 * since they will just retry the lock and 2916 * go to sleep again if necessary. 2917 */ 2918 fuword8_noerr(&lp->mutex_waiters, &waiters); 2919 if (waiters != 0) { /* there are waiters */ 2920 fuword16_noerr(&lp->mutex_flag, &flag); 2921 if (flag & LOCK_NOTRECOVERABLE) { 2922 lwp_release_all(&ent->lwpchan_lwpchan); 2923 suword8_noerr(&lp->mutex_waiters, 0); 2924 } else if (lwp_release(&ent->lwpchan_lwpchan, 2925 &waiters, 0)) { 2926 suword8_noerr(&lp->mutex_waiters, 2927 waiters); 2928 } 2929 } 2930 } else { 2931 (void) lwp_clear_mutex(lp, lockflg); 2932 ulock_clear(&lp->mutex_lockw); 2933 fuword8_noerr(&lp->mutex_waiters, &waiters); 2934 if (waiters && 2935 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2936 suword8_noerr(&lp->mutex_waiters, waiters); 2937 } 2938 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2939 } 2940 out: 2941 no_fault(); 2942 if (watched) 2943 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2944 } 2945 2946 /* 2947 * Register a process-shared robust mutex in the lwpchan cache. 2948 */ 2949 int 2950 lwp_mutex_register(lwp_mutex_t *lp) 2951 { 2952 int error = 0; 2953 volatile int watched; 2954 label_t ljb; 2955 uint8_t type; 2956 lwpchan_t lwpchan; 2957 2958 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2959 return (set_errno(EFAULT)); 2960 2961 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2962 2963 if (on_fault(&ljb)) { 2964 error = EFAULT; 2965 } else { 2966 /* 2967 * Force Copy-on-write if necessary and ensure that the 2968 * synchronization object resides in read/write memory. 2969 * Cause an EFAULT return now if this is not so. 2970 */ 2971 fuword8_noerr(&lp->mutex_type, &type); 2972 suword8_noerr(&lp->mutex_type, type); 2973 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2974 != (USYNC_PROCESS|LOCK_ROBUST)) { 2975 error = EINVAL; 2976 } else if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2977 &lwpchan, LWPCHAN_MPPOOL)) { 2978 error = EFAULT; 2979 } 2980 } 2981 no_fault(); 2982 if (watched) 2983 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2984 if (error) 2985 return (set_errno(error)); 2986 return (0); 2987 } 2988 2989 int 2990 lwp_mutex_trylock(lwp_mutex_t *lp) 2991 { 2992 kthread_t *t = curthread; 2993 proc_t *p = ttoproc(t); 2994 int error = 0; 2995 volatile int locked = 0; 2996 volatile int watched = 0; 2997 label_t ljb; 2998 volatile uint8_t type = 0; 2999 uint16_t flag; 3000 lwpchan_t lwpchan; 3001 3002 if ((caddr_t)lp >= p->p_as->a_userlimit) 3003 return (set_errno(EFAULT)); 3004 3005 (void) new_mstate(t, LMS_USER_LOCK); 3006 3007 if (on_fault(&ljb)) { 3008 if (locked) 3009 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3010 error = EFAULT; 3011 goto out; 3012 } 3013 /* 3014 * Force Copy-on-write if necessary and ensure that the 3015 * synchronization object resides in read/write memory. 3016 * Cause an EFAULT return now if this is not so. 3017 */ 3018 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3019 suword8_noerr(&lp->mutex_type, type); 3020 if (UPIMUTEX(type)) { 3021 no_fault(); 3022 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3023 if ((type & USYNC_PROCESS) && 3024 (error == 0 || 3025 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 3026 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 3027 if (error) 3028 return (set_errno(error)); 3029 return (0); 3030 } 3031 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3032 &lwpchan, LWPCHAN_MPPOOL)) { 3033 error = EFAULT; 3034 goto out; 3035 } 3036 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3037 locked = 1; 3038 if (type & LOCK_ROBUST) { 3039 fuword16_noerr(&lp->mutex_flag, &flag); 3040 if (flag & LOCK_NOTRECOVERABLE) { 3041 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3042 error = ENOTRECOVERABLE; 3043 goto out; 3044 } 3045 } 3046 3047 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3048 3049 if (!ulock_try(&lp->mutex_lockw)) 3050 error = EBUSY; 3051 else { 3052 if (type & USYNC_PROCESS) 3053 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 3054 if (type & LOCK_ROBUST) { 3055 fuword16_noerr(&lp->mutex_flag, &flag); 3056 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3057 if (flag & LOCK_OWNERDEAD) 3058 error = EOWNERDEAD; 3059 else if (type & USYNC_PROCESS_ROBUST) 3060 error = ELOCKUNMAPPED; 3061 else 3062 error = EOWNERDEAD; 3063 } 3064 } 3065 } 3066 locked = 0; 3067 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3068 out: 3069 3070 if (t->t_mstate == LMS_USER_LOCK) 3071 (void) new_mstate(t, LMS_SYSTEM); 3072 3073 no_fault(); 3074 if (watched) 3075 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3076 if (error) 3077 return (set_errno(error)); 3078 return (0); 3079 } 3080 3081 /* 3082 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3083 * the blocked lwp resumes and retries to acquire the lock. 3084 */ 3085 int 3086 lwp_mutex_unlock(lwp_mutex_t *lp) 3087 { 3088 proc_t *p = ttoproc(curthread); 3089 lwpchan_t lwpchan; 3090 uchar_t waiters; 3091 volatile int locked = 0; 3092 volatile int watched = 0; 3093 volatile uint8_t type = 0; 3094 label_t ljb; 3095 uint16_t flag; 3096 int error = 0; 3097 3098 if ((caddr_t)lp >= p->p_as->a_userlimit) 3099 return (set_errno(EFAULT)); 3100 3101 if (on_fault(&ljb)) { 3102 if (locked) 3103 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3104 error = EFAULT; 3105 goto out; 3106 } 3107 3108 /* 3109 * Force Copy-on-write if necessary and ensure that the 3110 * synchronization object resides in read/write memory. 3111 * Cause an EFAULT return now if this is not so. 3112 */ 3113 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3114 suword8_noerr(&lp->mutex_type, type); 3115 3116 if (UPIMUTEX(type)) { 3117 no_fault(); 3118 error = lwp_upimutex_unlock(lp, type); 3119 if (error) 3120 return (set_errno(error)); 3121 return (0); 3122 } 3123 3124 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3125 3126 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3127 &lwpchan, LWPCHAN_MPPOOL)) { 3128 error = EFAULT; 3129 goto out; 3130 } 3131 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3132 locked = 1; 3133 if (type & LOCK_ROBUST) { 3134 fuword16_noerr(&lp->mutex_flag, &flag); 3135 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3136 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3137 flag |= LOCK_NOTRECOVERABLE; 3138 suword16_noerr(&lp->mutex_flag, flag); 3139 } 3140 } 3141 if (type & USYNC_PROCESS) 3142 suword32_noerr(&lp->mutex_ownerpid, 0); 3143 ulock_clear(&lp->mutex_lockw); 3144 /* 3145 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3146 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3147 * may fail. If it fails, do not write into the waiter bit. 3148 * The call to lwp_release() might fail due to one of three reasons: 3149 * 3150 * 1. due to the thread which set the waiter bit not actually 3151 * sleeping since it got the lock on the re-try. The waiter 3152 * bit will then be correctly updated by that thread. This 3153 * window may be closed by reading the wait bit again here 3154 * and not calling lwp_release() at all if it is zero. 3155 * 2. the thread which set the waiter bit and went to sleep 3156 * was woken up by a signal. This time, the waiter recomputes 3157 * the wait bit in the return with EINTR code. 3158 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3159 * memory that has been re-used after the lock was dropped. 3160 * In this case, writing into the waiter bit would cause data 3161 * corruption. 3162 */ 3163 fuword8_noerr(&lp->mutex_waiters, &waiters); 3164 if (waiters) { 3165 if ((type & LOCK_ROBUST) && 3166 (flag & LOCK_NOTRECOVERABLE)) { 3167 lwp_release_all(&lwpchan); 3168 suword8_noerr(&lp->mutex_waiters, 0); 3169 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3170 suword8_noerr(&lp->mutex_waiters, waiters); 3171 } 3172 } 3173 3174 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3175 out: 3176 no_fault(); 3177 if (watched) 3178 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3179 if (error) 3180 return (set_errno(error)); 3181 return (0); 3182 } 3183