1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/errno.h> 37 #include <sys/file.h> 38 #include <sys/proc.h> 39 #include <sys/prsystm.h> 40 #include <sys/kmem.h> 41 #include <sys/sobject.h> 42 #include <sys/fault.h> 43 #include <sys/procfs.h> 44 #include <sys/watchpoint.h> 45 #include <sys/time.h> 46 #include <sys/cmn_err.h> 47 #include <sys/machlock.h> 48 #include <sys/debug.h> 49 #include <sys/synch.h> 50 #include <sys/synch32.h> 51 #include <sys/mman.h> 52 #include <sys/class.h> 53 #include <sys/schedctl.h> 54 #include <sys/sleepq.h> 55 #include <sys/policy.h> 56 #include <sys/tnf_probe.h> 57 #include <sys/lwpchan_impl.h> 58 #include <sys/turnstile.h> 59 #include <sys/atomic.h> 60 #include <sys/lwp_timer_impl.h> 61 #include <sys/lwp_upimutex_impl.h> 62 #include <vm/as.h> 63 #include <sys/sdt.h> 64 65 static kthread_t *lwpsobj_owner(caddr_t); 66 static void lwp_unsleep(kthread_t *t); 67 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 68 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 69 70 extern int lwp_cond_signal(lwp_cond_t *cv); 71 72 /* 73 * Maximum number of user prio inheritance locks that can be held by a thread. 74 * Used to limit kmem for each thread. This is a per-thread limit that 75 * can be administered on a system wide basis (using /etc/system). 76 * 77 * Also, when a limit, say maxlwps is added for numbers of lwps within a 78 * process, the per-thread limit automatically becomes a process-wide limit 79 * of maximum number of held upi locks within a process: 80 * maxheldupimx = maxnestupimx * maxlwps; 81 */ 82 static uint32_t maxnestupimx = 2000; 83 84 /* 85 * The sobj_ops vector exports a set of functions needed when a thread 86 * is asleep on a synchronization object of this type. 87 */ 88 static sobj_ops_t lwp_sobj_ops = { 89 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 90 }; 91 92 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 93 94 static sobj_ops_t lwp_sobj_pi_ops = { 95 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 96 turnstile_change_pri 97 }; 98 99 static sleepq_head_t lwpsleepq[NSLEEPQ]; 100 upib_t upimutextab[UPIMUTEX_TABSIZE]; 101 102 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 103 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 104 105 /* 106 * We know that both lc_wchan and lc_wchan0 are addresses that most 107 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 108 * 'pool' is either 0 or 1. 109 */ 110 #define LWPCHAN_LOCK_HASH(X, pool) \ 111 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 112 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 113 114 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 115 116 /* 117 * Is this a POSIX threads user-level lock requiring priority inheritance? 118 */ 119 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 120 121 static sleepq_head_t * 122 lwpsqhash(lwpchan_t *lwpchan) 123 { 124 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 125 return (&lwpsleepq[SQHASHINDEX(x)]); 126 } 127 128 /* 129 * Lock an lwpchan. 130 * Keep this in sync with lwpchan_unlock(), below. 131 */ 132 static void 133 lwpchan_lock(lwpchan_t *lwpchan, int pool) 134 { 135 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 136 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 137 } 138 139 /* 140 * Unlock an lwpchan. 141 * Keep this in sync with lwpchan_lock(), above. 142 */ 143 static void 144 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 145 { 146 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 147 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 148 } 149 150 /* 151 * Delete mappings from the lwpchan cache for pages that are being 152 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 153 * all mappings within the range are deleted from the lwpchan cache. 154 */ 155 void 156 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 157 { 158 lwpchan_data_t *lcp; 159 lwpchan_hashbucket_t *hashbucket; 160 lwpchan_hashbucket_t *endbucket; 161 lwpchan_entry_t *ent; 162 lwpchan_entry_t **prev; 163 caddr_t addr; 164 165 mutex_enter(&p->p_lcp_lock); 166 lcp = p->p_lcp; 167 hashbucket = lcp->lwpchan_cache; 168 endbucket = hashbucket + lcp->lwpchan_size; 169 for (; hashbucket < endbucket; hashbucket++) { 170 if (hashbucket->lwpchan_chain == NULL) 171 continue; 172 mutex_enter(&hashbucket->lwpchan_lock); 173 prev = &hashbucket->lwpchan_chain; 174 /* check entire chain */ 175 while ((ent = *prev) != NULL) { 176 addr = ent->lwpchan_addr; 177 if (start <= addr && addr < end) { 178 *prev = ent->lwpchan_next; 179 /* 180 * We do this only for the obsolete type 181 * USYNC_PROCESS_ROBUST. Otherwise robust 182 * locks do not draw ELOCKUNMAPPED or 183 * EOWNERDEAD due to being unmapped. 184 */ 185 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 186 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 187 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 188 kmem_free(ent, sizeof (*ent)); 189 atomic_add_32(&lcp->lwpchan_entries, -1); 190 } else { 191 prev = &ent->lwpchan_next; 192 } 193 } 194 mutex_exit(&hashbucket->lwpchan_lock); 195 } 196 mutex_exit(&p->p_lcp_lock); 197 } 198 199 /* 200 * Given an lwpchan cache pointer and a process virtual address, 201 * return a pointer to the corresponding lwpchan hash bucket. 202 */ 203 static lwpchan_hashbucket_t * 204 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 205 { 206 uint_t i; 207 208 /* 209 * All user-level sync object addresses are 8-byte aligned. 210 * Ignore the lowest 3 bits of the address and use the 211 * higher-order 2*lwpchan_bits bits for the hash index. 212 */ 213 addr >>= 3; 214 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 215 return (lcp->lwpchan_cache + i); 216 } 217 218 /* 219 * (Re)allocate the per-process lwpchan cache. 220 */ 221 static void 222 lwpchan_alloc_cache(proc_t *p, uint_t bits) 223 { 224 lwpchan_data_t *lcp; 225 lwpchan_data_t *old_lcp; 226 lwpchan_hashbucket_t *hashbucket; 227 lwpchan_hashbucket_t *endbucket; 228 lwpchan_hashbucket_t *newbucket; 229 lwpchan_entry_t *ent; 230 lwpchan_entry_t *next; 231 uint_t count; 232 233 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 234 235 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 236 lcp->lwpchan_bits = bits; 237 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 238 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 239 lcp->lwpchan_entries = 0; 240 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 241 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 242 lcp->lwpchan_next_data = NULL; 243 244 mutex_enter(&p->p_lcp_lock); 245 if ((old_lcp = p->p_lcp) != NULL) { 246 if (old_lcp->lwpchan_bits >= bits) { 247 /* someone beat us to it */ 248 mutex_exit(&p->p_lcp_lock); 249 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 250 sizeof (lwpchan_hashbucket_t)); 251 kmem_free(lcp, sizeof (lwpchan_data_t)); 252 return; 253 } 254 /* 255 * Acquire all of the old hash table locks. 256 */ 257 hashbucket = old_lcp->lwpchan_cache; 258 endbucket = hashbucket + old_lcp->lwpchan_size; 259 for (; hashbucket < endbucket; hashbucket++) 260 mutex_enter(&hashbucket->lwpchan_lock); 261 /* 262 * Move all of the old hash table entries to the 263 * new hash table. The new hash table has not yet 264 * been installed so we don't need any of its locks. 265 */ 266 count = 0; 267 hashbucket = old_lcp->lwpchan_cache; 268 for (; hashbucket < endbucket; hashbucket++) { 269 ent = hashbucket->lwpchan_chain; 270 while (ent != NULL) { 271 next = ent->lwpchan_next; 272 newbucket = lwpchan_bucket(lcp, 273 (uintptr_t)ent->lwpchan_addr); 274 ent->lwpchan_next = newbucket->lwpchan_chain; 275 newbucket->lwpchan_chain = ent; 276 ent = next; 277 count++; 278 } 279 hashbucket->lwpchan_chain = NULL; 280 } 281 lcp->lwpchan_entries = count; 282 } 283 284 /* 285 * Retire the old hash table. We can't actually kmem_free() it 286 * now because someone may still have a pointer to it. Instead, 287 * we link it onto the new hash table's list of retired hash tables. 288 * The new hash table is double the size of the previous one, so 289 * the total size of all retired hash tables is less than the size 290 * of the new one. exit() and exec() free the retired hash tables 291 * (see lwpchan_destroy_cache(), below). 292 */ 293 lcp->lwpchan_next_data = old_lcp; 294 295 /* 296 * As soon as we store the new lcp, future locking operations will 297 * use it. Therefore, we must ensure that all the state we've just 298 * established reaches global visibility before the new lcp does. 299 */ 300 membar_producer(); 301 p->p_lcp = lcp; 302 303 if (old_lcp != NULL) { 304 /* 305 * Release all of the old hash table locks. 306 */ 307 hashbucket = old_lcp->lwpchan_cache; 308 for (; hashbucket < endbucket; hashbucket++) 309 mutex_exit(&hashbucket->lwpchan_lock); 310 } 311 mutex_exit(&p->p_lcp_lock); 312 } 313 314 /* 315 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 316 * Called when the process exits or execs. All lwps except one have 317 * exited so we need no locks here. 318 */ 319 void 320 lwpchan_destroy_cache(int exec) 321 { 322 proc_t *p = curproc; 323 lwpchan_hashbucket_t *hashbucket; 324 lwpchan_hashbucket_t *endbucket; 325 lwpchan_data_t *lcp; 326 lwpchan_entry_t *ent; 327 lwpchan_entry_t *next; 328 uint16_t lockflg; 329 330 lcp = p->p_lcp; 331 p->p_lcp = NULL; 332 333 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 334 hashbucket = lcp->lwpchan_cache; 335 endbucket = hashbucket + lcp->lwpchan_size; 336 for (; hashbucket < endbucket; hashbucket++) { 337 ent = hashbucket->lwpchan_chain; 338 hashbucket->lwpchan_chain = NULL; 339 while (ent != NULL) { 340 next = ent->lwpchan_next; 341 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 342 (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 343 == (USYNC_PROCESS | LOCK_ROBUST)) 344 lwp_mutex_cleanup(ent, lockflg); 345 kmem_free(ent, sizeof (*ent)); 346 ent = next; 347 } 348 } 349 350 while (lcp != NULL) { 351 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 352 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 353 sizeof (lwpchan_hashbucket_t)); 354 kmem_free(lcp, sizeof (lwpchan_data_t)); 355 lcp = next_lcp; 356 } 357 } 358 359 /* 360 * Return zero when there is an entry in the lwpchan cache for the 361 * given process virtual address and non-zero when there is not. 362 * The returned non-zero value is the current length of the 363 * hash chain plus one. The caller holds the hash bucket lock. 364 */ 365 static uint_t 366 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 367 lwpchan_hashbucket_t *hashbucket) 368 { 369 lwpchan_entry_t *ent; 370 uint_t count = 1; 371 372 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 373 if (ent->lwpchan_addr == addr) { 374 if (ent->lwpchan_type != type || 375 ent->lwpchan_pool != pool) { 376 /* 377 * This shouldn't happen, but might if the 378 * process reuses its memory for different 379 * types of sync objects. We test first 380 * to avoid grabbing the memory cache line. 381 */ 382 ent->lwpchan_type = (uint16_t)type; 383 ent->lwpchan_pool = (uint16_t)pool; 384 } 385 *lwpchan = ent->lwpchan_lwpchan; 386 return (0); 387 } 388 count++; 389 } 390 return (count); 391 } 392 393 /* 394 * Return the cached lwpchan mapping if cached, otherwise insert 395 * a virtual address to lwpchan mapping into the cache. 396 */ 397 static int 398 lwpchan_get_mapping(struct as *as, caddr_t addr, 399 int type, lwpchan_t *lwpchan, int pool) 400 { 401 proc_t *p = curproc; 402 lwpchan_data_t *lcp; 403 lwpchan_hashbucket_t *hashbucket; 404 lwpchan_entry_t *ent; 405 memid_t memid; 406 uint_t count; 407 uint_t bits; 408 409 top: 410 /* initialize the lwpchan cache, if necesary */ 411 if ((lcp = p->p_lcp) == NULL) { 412 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 413 goto top; 414 } 415 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 416 mutex_enter(&hashbucket->lwpchan_lock); 417 if (lcp != p->p_lcp) { 418 /* someone resized the lwpchan cache; start over */ 419 mutex_exit(&hashbucket->lwpchan_lock); 420 goto top; 421 } 422 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 423 /* it's in the cache */ 424 mutex_exit(&hashbucket->lwpchan_lock); 425 return (1); 426 } 427 mutex_exit(&hashbucket->lwpchan_lock); 428 if (as_getmemid(as, addr, &memid) != 0) 429 return (0); 430 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 431 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 432 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 433 mutex_enter(&hashbucket->lwpchan_lock); 434 if (lcp != p->p_lcp) { 435 /* someone resized the lwpchan cache; start over */ 436 mutex_exit(&hashbucket->lwpchan_lock); 437 kmem_free(ent, sizeof (*ent)); 438 goto top; 439 } 440 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 441 if (count == 0) { 442 /* someone else added this entry to the cache */ 443 mutex_exit(&hashbucket->lwpchan_lock); 444 kmem_free(ent, sizeof (*ent)); 445 return (1); 446 } 447 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 448 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 449 /* hash chain too long; reallocate the hash table */ 450 mutex_exit(&hashbucket->lwpchan_lock); 451 kmem_free(ent, sizeof (*ent)); 452 lwpchan_alloc_cache(p, bits + 1); 453 goto top; 454 } 455 ent->lwpchan_addr = addr; 456 ent->lwpchan_type = (uint16_t)type; 457 ent->lwpchan_pool = (uint16_t)pool; 458 ent->lwpchan_lwpchan = *lwpchan; 459 ent->lwpchan_next = hashbucket->lwpchan_chain; 460 hashbucket->lwpchan_chain = ent; 461 atomic_add_32(&lcp->lwpchan_entries, 1); 462 mutex_exit(&hashbucket->lwpchan_lock); 463 return (1); 464 } 465 466 /* 467 * Return a unique pair of identifiers that corresponds to a 468 * synchronization object's virtual address. Process-shared 469 * sync objects usually get vnode/offset from as_getmemid(). 470 */ 471 static int 472 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 473 { 474 /* 475 * If the lwp synch object is defined to be process-private, 476 * we just make the first field of the lwpchan be 'as' and 477 * the second field be the synch object's virtual address. 478 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 479 * The lwpchan cache is used only for process-shared objects. 480 */ 481 if (!(type & USYNC_PROCESS)) { 482 lwpchan->lc_wchan0 = (caddr_t)as; 483 lwpchan->lc_wchan = addr; 484 return (1); 485 } 486 487 return (lwpchan_get_mapping(as, addr, type, lwpchan, pool)); 488 } 489 490 static void 491 lwp_block(lwpchan_t *lwpchan) 492 { 493 kthread_t *t = curthread; 494 klwp_t *lwp = ttolwp(t); 495 sleepq_head_t *sqh; 496 497 thread_lock(t); 498 t->t_flag |= T_WAKEABLE; 499 t->t_lwpchan = *lwpchan; 500 t->t_sobj_ops = &lwp_sobj_ops; 501 t->t_release = 0; 502 sqh = lwpsqhash(lwpchan); 503 disp_lock_enter_high(&sqh->sq_lock); 504 CL_SLEEP(t); 505 DTRACE_SCHED(sleep); 506 THREAD_SLEEP(t, &sqh->sq_lock); 507 sleepq_insert(&sqh->sq_queue, t); 508 thread_unlock(t); 509 lwp->lwp_asleep = 1; 510 lwp->lwp_sysabort = 0; 511 lwp->lwp_ru.nvcsw++; 512 (void) new_mstate(curthread, LMS_SLEEP); 513 } 514 515 static kthread_t * 516 lwpsobj_pi_owner(upimutex_t *up) 517 { 518 return (up->upi_owner); 519 } 520 521 static struct upimutex * 522 upi_get(upib_t *upibp, lwpchan_t *lcp) 523 { 524 struct upimutex *upip; 525 526 for (upip = upibp->upib_first; upip != NULL; 527 upip = upip->upi_nextchain) { 528 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 529 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 530 break; 531 } 532 return (upip); 533 } 534 535 static void 536 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 537 { 538 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 539 540 /* 541 * Insert upimutex at front of list. Maybe a bit unfair 542 * but assume that not many lwpchans hash to the same 543 * upimutextab bucket, i.e. the list of upimutexes from 544 * upib_first is not too long. 545 */ 546 upimutex->upi_nextchain = upibp->upib_first; 547 upibp->upib_first = upimutex; 548 } 549 550 static void 551 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 552 { 553 struct upimutex **prev; 554 555 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 556 557 prev = &upibp->upib_first; 558 while (*prev != upimutex) { 559 prev = &(*prev)->upi_nextchain; 560 } 561 *prev = upimutex->upi_nextchain; 562 upimutex->upi_nextchain = NULL; 563 } 564 565 /* 566 * Add upimutex to chain of upimutexes held by curthread. 567 * Returns number of upimutexes held by curthread. 568 */ 569 static uint32_t 570 upi_mylist_add(struct upimutex *upimutex) 571 { 572 kthread_t *t = curthread; 573 574 /* 575 * Insert upimutex at front of list of upimutexes owned by t. This 576 * would match typical LIFO order in which nested locks are acquired 577 * and released. 578 */ 579 upimutex->upi_nextowned = t->t_upimutex; 580 t->t_upimutex = upimutex; 581 t->t_nupinest++; 582 ASSERT(t->t_nupinest > 0); 583 return (t->t_nupinest); 584 } 585 586 /* 587 * Delete upimutex from list of upimutexes owned by curthread. 588 */ 589 static void 590 upi_mylist_del(struct upimutex *upimutex) 591 { 592 kthread_t *t = curthread; 593 struct upimutex **prev; 594 595 /* 596 * Since the order in which nested locks are acquired and released, 597 * is typically LIFO, and typical nesting levels are not too deep, the 598 * following should not be expensive in the general case. 599 */ 600 prev = &t->t_upimutex; 601 while (*prev != upimutex) { 602 prev = &(*prev)->upi_nextowned; 603 } 604 *prev = upimutex->upi_nextowned; 605 upimutex->upi_nextowned = NULL; 606 ASSERT(t->t_nupinest > 0); 607 t->t_nupinest--; 608 } 609 610 /* 611 * Returns true if upimutex is owned. Should be called only when upim points 612 * to kmem which cannot disappear from underneath. 613 */ 614 static int 615 upi_owned(upimutex_t *upim) 616 { 617 return (upim->upi_owner == curthread); 618 } 619 620 /* 621 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 622 */ 623 static struct upimutex * 624 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 625 { 626 lwpchan_t lwpchan; 627 upib_t *upibp; 628 struct upimutex *upimutex; 629 630 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 631 &lwpchan, LWPCHAN_MPPOOL)) 632 return (NULL); 633 634 upibp = &UPI_CHAIN(lwpchan); 635 mutex_enter(&upibp->upib_lock); 636 upimutex = upi_get(upibp, &lwpchan); 637 if (upimutex == NULL || upimutex->upi_owner != curthread) { 638 mutex_exit(&upibp->upib_lock); 639 return (NULL); 640 } 641 mutex_exit(&upibp->upib_lock); 642 return (upimutex); 643 } 644 645 /* 646 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 647 * no lock hand-off occurrs. 648 */ 649 static void 650 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 651 { 652 turnstile_t *ts; 653 upib_t *upibp; 654 kthread_t *newowner; 655 656 upi_mylist_del(upimutex); 657 upibp = upimutex->upi_upibp; 658 mutex_enter(&upibp->upib_lock); 659 if (upimutex->upi_waiter != 0) { /* if waiters */ 660 ts = turnstile_lookup(upimutex); 661 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 662 /* hand-off lock to highest prio waiter */ 663 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 664 upimutex->upi_owner = newowner; 665 if (ts->ts_waiters == 1) 666 upimutex->upi_waiter = 0; 667 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 668 mutex_exit(&upibp->upib_lock); 669 return; 670 } else if (ts != NULL) { 671 /* LOCK_NOTRECOVERABLE: wakeup all */ 672 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 673 } else { 674 /* 675 * Misleading w bit. Waiters might have been 676 * interrupted. No need to clear the w bit (upimutex 677 * will soon be freed). Re-calculate PI from existing 678 * waiters. 679 */ 680 turnstile_exit(upimutex); 681 turnstile_pi_recalc(); 682 } 683 } 684 /* 685 * no waiters, or LOCK_NOTRECOVERABLE. 686 * remove from the bucket chain of upi mutexes. 687 * de-allocate kernel memory (upimutex). 688 */ 689 upi_chain_del(upimutex->upi_upibp, upimutex); 690 mutex_exit(&upibp->upib_lock); 691 kmem_free(upimutex, sizeof (upimutex_t)); 692 } 693 694 static int 695 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 696 { 697 label_t ljb; 698 int error = 0; 699 lwpchan_t lwpchan; 700 uint16_t flag; 701 upib_t *upibp; 702 volatile struct upimutex *upimutex = NULL; 703 turnstile_t *ts; 704 uint32_t nupinest; 705 volatile int upilocked = 0; 706 707 if (on_fault(&ljb)) { 708 if (upilocked) 709 upimutex_unlock((upimutex_t *)upimutex, 0); 710 error = EFAULT; 711 goto out; 712 } 713 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 714 &lwpchan, LWPCHAN_MPPOOL)) { 715 error = EFAULT; 716 goto out; 717 } 718 upibp = &UPI_CHAIN(lwpchan); 719 retry: 720 mutex_enter(&upibp->upib_lock); 721 upimutex = upi_get(upibp, &lwpchan); 722 if (upimutex == NULL) { 723 /* lock available since lwpchan has no upimutex */ 724 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 725 upi_chain_add(upibp, (upimutex_t *)upimutex); 726 upimutex->upi_owner = curthread; /* grab lock */ 727 upimutex->upi_upibp = upibp; 728 upimutex->upi_vaddr = lp; 729 upimutex->upi_lwpchan = lwpchan; 730 mutex_exit(&upibp->upib_lock); 731 nupinest = upi_mylist_add((upimutex_t *)upimutex); 732 upilocked = 1; 733 fuword16_noerr(&lp->mutex_flag, &flag); 734 if (nupinest > maxnestupimx && 735 secpolicy_resource(CRED()) != 0) { 736 upimutex_unlock((upimutex_t *)upimutex, flag); 737 error = ENOMEM; 738 goto out; 739 } 740 if (flag & LOCK_NOTRECOVERABLE) { 741 /* 742 * Since the setting of LOCK_NOTRECOVERABLE 743 * was done under the high-level upi mutex, 744 * in lwp_upimutex_unlock(), this flag needs to 745 * be checked while holding the upi mutex. 746 * If set, this thread should return without 747 * the lock held, and with the right error code. 748 */ 749 upimutex_unlock((upimutex_t *)upimutex, flag); 750 upilocked = 0; 751 error = ENOTRECOVERABLE; 752 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 753 if (flag & LOCK_OWNERDEAD) 754 error = EOWNERDEAD; 755 else if (type & USYNC_PROCESS_ROBUST) 756 error = ELOCKUNMAPPED; 757 else 758 error = EOWNERDEAD; 759 } 760 goto out; 761 } 762 /* 763 * If a upimutex object exists, it must have an owner. 764 * This is due to lock hand-off, and release of upimutex when no 765 * waiters are present at unlock time, 766 */ 767 ASSERT(upimutex->upi_owner != NULL); 768 if (upimutex->upi_owner == curthread) { 769 /* 770 * The user wrapper can check if the mutex type is 771 * ERRORCHECK: if not, it should stall at user-level. 772 * If so, it should return the error code. 773 */ 774 mutex_exit(&upibp->upib_lock); 775 error = EDEADLK; 776 goto out; 777 } 778 if (try == UPIMUTEX_TRY) { 779 mutex_exit(&upibp->upib_lock); 780 error = EBUSY; 781 goto out; 782 } 783 /* 784 * Block for the lock. 785 * Put the lwp in an orderly state for debugging. 786 * Calling prstop() has to be done here, and not in 787 * turnstile_block(), since the preceding call to 788 * turnstile_lookup() raises the PIL to a level 789 * at which calls to prstop() should not be made. 790 */ 791 if ((error = lwptp->lwpt_time_error) != 0) { 792 /* 793 * The SUSV3 Posix spec is very clear that we 794 * should get no error from validating the 795 * timer until we would actually sleep. 796 */ 797 mutex_exit(&upibp->upib_lock); 798 goto out; 799 } 800 prstop(PR_REQUESTED, 0); 801 if (lwptp->lwpt_tsp != NULL) { 802 /* 803 * Unlike the protocol for other lwp timedwait operations, 804 * we must drop t_delay_lock before going to sleep in 805 * turnstile_block() for a upi mutex. 806 * See the comments below and in turnstile.c 807 */ 808 mutex_enter(&curthread->t_delay_lock); 809 (void) lwp_timer_enqueue(lwptp); 810 mutex_exit(&curthread->t_delay_lock); 811 } 812 /* 813 * Now, set the waiter bit and block for the lock in turnstile_block(). 814 * No need to preserve the previous wbit since a lock try is not 815 * attempted after setting the wait bit. Wait bit is set under 816 * the upib_lock, which is not released until the turnstile lock 817 * is acquired. Say, the upimutex is L: 818 * 819 * 1. upib_lock is held so the waiter does not have to retry L after 820 * setting the wait bit: since the owner has to grab the upib_lock 821 * to unlock L, it will certainly see the wait bit set. 822 * 2. upib_lock is not released until the turnstile lock is acquired. 823 * This is the key to preventing a missed wake-up. Otherwise, the 824 * owner could acquire the upib_lock, and the tc_lock, to call 825 * turnstile_wakeup(). All this, before the waiter gets tc_lock 826 * to sleep in turnstile_block(). turnstile_wakeup() will then not 827 * find this waiter, resulting in the missed wakeup. 828 * 3. The upib_lock, being a kernel mutex, cannot be released while 829 * holding the tc_lock (since mutex_exit() could need to acquire 830 * the same tc_lock)...and so is held when calling turnstile_block(). 831 * The address of upib_lock is passed to turnstile_block() which 832 * releases it after releasing all turnstile locks, and before going 833 * to sleep in swtch(). 834 * 4. The waiter value cannot be a count of waiters, because a waiter 835 * can be interrupted. The interrupt occurs under the tc_lock, at 836 * which point, the upib_lock cannot be locked, to decrement waiter 837 * count. So, just treat the waiter state as a bit, not a count. 838 */ 839 ts = turnstile_lookup((upimutex_t *)upimutex); 840 upimutex->upi_waiter = 1; 841 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 842 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 843 /* 844 * Hand-off implies that we wakeup holding the lock, except when: 845 * - deadlock is detected 846 * - lock is not recoverable 847 * - we got an interrupt or timeout 848 * If we wake up due to an interrupt or timeout, we may 849 * or may not be holding the lock due to mutex hand-off. 850 * Use lwp_upimutex_owned() to check if we do hold the lock. 851 */ 852 if (error != 0) { 853 if ((error == EINTR || error == ETIME) && 854 (upimutex = lwp_upimutex_owned(lp, type))) { 855 /* 856 * Unlock and return - the re-startable syscall will 857 * try the lock again if we got EINTR. 858 */ 859 (void) upi_mylist_add((upimutex_t *)upimutex); 860 upimutex_unlock((upimutex_t *)upimutex, 0); 861 } 862 /* 863 * The only other possible error is EDEADLK. If so, upimutex 864 * is valid, since its owner is deadlocked with curthread. 865 */ 866 ASSERT(error == EINTR || error == ETIME || 867 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 868 ASSERT(!lwp_upimutex_owned(lp, type)); 869 goto out; 870 } 871 if (lwp_upimutex_owned(lp, type)) { 872 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 873 nupinest = upi_mylist_add((upimutex_t *)upimutex); 874 upilocked = 1; 875 } 876 /* 877 * Now, need to read the user-level lp->mutex_flag to do the following: 878 * 879 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 880 * should be returned. 881 * - if lock isn't held, check if ENOTRECOVERABLE should 882 * be returned. 883 * 884 * Now, either lp->mutex_flag is readable or it's not. If not 885 * readable, the on_fault path will cause a return with EFAULT 886 * as it should. If it is readable, the state of the flag 887 * encodes the robustness state of the lock: 888 * 889 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 890 * or LOCK_UNMAPPED setting will influence the return code 891 * appropriately. If the upimutex is not locked here, this 892 * could be due to a spurious wake-up or a NOTRECOVERABLE 893 * event. The flag's setting can be used to distinguish 894 * between these two events. 895 */ 896 fuword16_noerr(&lp->mutex_flag, &flag); 897 if (upilocked) { 898 /* 899 * If the thread wakes up from turnstile_block with the lock 900 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 901 * since it would not have been handed-off the lock. 902 * So, no need to check for this case. 903 */ 904 if (nupinest > maxnestupimx && 905 secpolicy_resource(CRED()) != 0) { 906 upimutex_unlock((upimutex_t *)upimutex, flag); 907 upilocked = 0; 908 error = ENOMEM; 909 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 910 if (flag & LOCK_OWNERDEAD) 911 error = EOWNERDEAD; 912 else if (type & USYNC_PROCESS_ROBUST) 913 error = ELOCKUNMAPPED; 914 else 915 error = EOWNERDEAD; 916 } 917 } else { 918 /* 919 * Wake-up without the upimutex held. Either this is a 920 * spurious wake-up (due to signals, forkall(), whatever), or 921 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 922 * of the mutex flag can be used to distinguish between the 923 * two events. 924 */ 925 if (flag & LOCK_NOTRECOVERABLE) { 926 error = ENOTRECOVERABLE; 927 } else { 928 /* 929 * Here, the flag could be set to LOCK_OWNERDEAD or 930 * not. In both cases, this is a spurious wakeup, 931 * since the upi lock is not held, but the thread 932 * has returned from turnstile_block(). 933 * 934 * The user flag could be LOCK_OWNERDEAD if, at the 935 * same time as curthread having been woken up 936 * spuriously, the owner (say Tdead) has died, marked 937 * the mutex flag accordingly, and handed off the lock 938 * to some other waiter (say Tnew). curthread just 939 * happened to read the flag while Tnew has yet to deal 940 * with the owner-dead event. 941 * 942 * In this event, curthread should retry the lock. 943 * If Tnew is able to cleanup the lock, curthread 944 * will eventually get the lock with a zero error code, 945 * If Tnew is unable to cleanup, its eventual call to 946 * unlock the lock will result in the mutex flag being 947 * set to LOCK_NOTRECOVERABLE, and the wake-up of 948 * all waiters, including curthread, which will then 949 * eventually return ENOTRECOVERABLE due to the above 950 * check. 951 * 952 * Of course, if the user-flag is not set with 953 * LOCK_OWNERDEAD, retrying is the thing to do, since 954 * this is definitely a spurious wakeup. 955 */ 956 goto retry; 957 } 958 } 959 960 out: 961 no_fault(); 962 return (error); 963 } 964 965 966 static int 967 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 968 { 969 label_t ljb; 970 int error = 0; 971 lwpchan_t lwpchan; 972 uint16_t flag; 973 upib_t *upibp; 974 volatile struct upimutex *upimutex = NULL; 975 volatile int upilocked = 0; 976 977 if (on_fault(&ljb)) { 978 if (upilocked) 979 upimutex_unlock((upimutex_t *)upimutex, 0); 980 error = EFAULT; 981 goto out; 982 } 983 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 984 &lwpchan, LWPCHAN_MPPOOL)) { 985 error = EFAULT; 986 goto out; 987 } 988 upibp = &UPI_CHAIN(lwpchan); 989 mutex_enter(&upibp->upib_lock); 990 upimutex = upi_get(upibp, &lwpchan); 991 /* 992 * If the lock is not held, or the owner is not curthread, return 993 * error. The user-level wrapper can return this error or stall, 994 * depending on whether mutex is of ERRORCHECK type or not. 995 */ 996 if (upimutex == NULL || upimutex->upi_owner != curthread) { 997 mutex_exit(&upibp->upib_lock); 998 error = EPERM; 999 goto out; 1000 } 1001 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1002 upilocked = 1; 1003 fuword16_noerr(&lp->mutex_flag, &flag); 1004 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1005 /* 1006 * transition mutex to the LOCK_NOTRECOVERABLE state. 1007 */ 1008 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1009 flag |= LOCK_NOTRECOVERABLE; 1010 suword16_noerr(&lp->mutex_flag, flag); 1011 } 1012 if (type & USYNC_PROCESS) 1013 suword32_noerr(&lp->mutex_ownerpid, 0); 1014 upimutex_unlock((upimutex_t *)upimutex, flag); 1015 upilocked = 0; 1016 out: 1017 no_fault(); 1018 return (error); 1019 } 1020 1021 /* 1022 * Clear the contents of a user-level mutex; return the flags. 1023 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1024 */ 1025 static uint16_t 1026 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1027 { 1028 uint16_t flag; 1029 1030 fuword16_noerr(&lp->mutex_flag, &flag); 1031 if ((flag & 1032 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1033 flag |= lockflg; 1034 suword16_noerr(&lp->mutex_flag, flag); 1035 } 1036 suword32_noerr((uint32_t *)&lp->mutex_owner, 0); 1037 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, 0); 1038 suword32_noerr(&lp->mutex_ownerpid, 0); 1039 suword8_noerr(&lp->mutex_rcount, 0); 1040 1041 return (flag); 1042 } 1043 1044 /* 1045 * Mark user mutex state, corresponding to kernel upimutex, 1046 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1047 */ 1048 static int 1049 upi_dead(upimutex_t *upip, uint16_t lockflg) 1050 { 1051 label_t ljb; 1052 int error = 0; 1053 lwp_mutex_t *lp; 1054 1055 if (on_fault(&ljb)) { 1056 error = EFAULT; 1057 goto out; 1058 } 1059 1060 lp = upip->upi_vaddr; 1061 (void) lwp_clear_mutex(lp, lockflg); 1062 suword8_noerr(&lp->mutex_lockw, 0); 1063 out: 1064 no_fault(); 1065 return (error); 1066 } 1067 1068 /* 1069 * Unlock all upimutexes held by curthread, since curthread is dying. 1070 * For each upimutex, attempt to mark its corresponding user mutex object as 1071 * dead. 1072 */ 1073 void 1074 upimutex_cleanup() 1075 { 1076 kthread_t *t = curthread; 1077 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1078 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1079 struct upimutex *upip; 1080 1081 while ((upip = t->t_upimutex) != NULL) { 1082 if (upi_dead(upip, lockflg) != 0) { 1083 /* 1084 * If the user object associated with this upimutex is 1085 * unmapped, unlock upimutex with the 1086 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1087 * woken up. Since user object is unmapped, it could 1088 * not be marked as dead or notrecoverable. 1089 * The waiters will now all wake up and return 1090 * ENOTRECOVERABLE, since they would find that the lock 1091 * has not been handed-off to them. 1092 * See lwp_upimutex_lock(). 1093 */ 1094 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1095 } else { 1096 /* 1097 * The user object has been updated as dead. 1098 * Unlock the upimutex: if no waiters, upip kmem will 1099 * be freed. If there is a waiter, the lock will be 1100 * handed off. If exit() is in progress, each existing 1101 * waiter will successively get the lock, as owners 1102 * die, and each new owner will call this routine as 1103 * it dies. The last owner will free kmem, since 1104 * it will find the upimutex has no waiters. So, 1105 * eventually, the kmem is guaranteed to be freed. 1106 */ 1107 upimutex_unlock(upip, 0); 1108 } 1109 /* 1110 * Note that the call to upimutex_unlock() above will delete 1111 * upimutex from the t_upimutexes chain. And so the 1112 * while loop will eventually terminate. 1113 */ 1114 } 1115 } 1116 1117 int 1118 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1119 { 1120 kthread_t *t = curthread; 1121 klwp_t *lwp = ttolwp(t); 1122 proc_t *p = ttoproc(t); 1123 lwp_timer_t lwpt; 1124 caddr_t timedwait; 1125 int error = 0; 1126 int time_error; 1127 clock_t tim = -1; 1128 uchar_t waiters; 1129 volatile int locked = 0; 1130 volatile int watched = 0; 1131 label_t ljb; 1132 volatile uint8_t type = 0; 1133 lwpchan_t lwpchan; 1134 sleepq_head_t *sqh; 1135 static int iswanted(); 1136 uint16_t flag; 1137 int imm_timeout = 0; 1138 1139 if ((caddr_t)lp >= p->p_as->a_userlimit) 1140 return (set_errno(EFAULT)); 1141 1142 timedwait = (caddr_t)tsp; 1143 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1144 lwpt.lwpt_imm_timeout) { 1145 imm_timeout = 1; 1146 timedwait = NULL; 1147 } 1148 1149 /* 1150 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1151 * this micro state is really a run state. If the thread indeed blocks, 1152 * this state becomes valid. If not, the state is converted back to 1153 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1154 * when blocking. 1155 */ 1156 (void) new_mstate(t, LMS_USER_LOCK); 1157 if (on_fault(&ljb)) { 1158 if (locked) 1159 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1160 error = EFAULT; 1161 goto out; 1162 } 1163 /* 1164 * Force Copy-on-write if necessary and ensure that the 1165 * synchronization object resides in read/write memory. 1166 * Cause an EFAULT return now if this is not so. 1167 */ 1168 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1169 suword8_noerr(&lp->mutex_type, type); 1170 if (UPIMUTEX(type)) { 1171 no_fault(); 1172 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1173 if ((type & USYNC_PROCESS) && 1174 (error == 0 || 1175 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 1176 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1177 if (tsp && !time_error) /* copyout the residual time left */ 1178 error = lwp_timer_copyout(&lwpt, error); 1179 if (error) 1180 return (set_errno(error)); 1181 return (0); 1182 } 1183 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1184 &lwpchan, LWPCHAN_MPPOOL)) { 1185 error = EFAULT; 1186 goto out; 1187 } 1188 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1189 locked = 1; 1190 if (type & LOCK_ROBUST) { 1191 fuword16_noerr(&lp->mutex_flag, &flag); 1192 if (flag & LOCK_NOTRECOVERABLE) { 1193 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1194 error = ENOTRECOVERABLE; 1195 goto out; 1196 } 1197 } 1198 fuword8_noerr(&lp->mutex_waiters, &waiters); 1199 suword8_noerr(&lp->mutex_waiters, 1); 1200 1201 /* 1202 * If watchpoints are set, they need to be restored, since 1203 * atomic accesses of memory such as the call to ulock_try() 1204 * below cannot be watched. 1205 */ 1206 1207 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1208 1209 while (!ulock_try(&lp->mutex_lockw)) { 1210 if (time_error) { 1211 /* 1212 * The SUSV3 Posix spec is very clear that we 1213 * should get no error from validating the 1214 * timer until we would actually sleep. 1215 */ 1216 error = time_error; 1217 break; 1218 } 1219 1220 if (watched) { 1221 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1222 watched = 0; 1223 } 1224 1225 /* 1226 * Put the lwp in an orderly state for debugging. 1227 */ 1228 prstop(PR_REQUESTED, 0); 1229 if (timedwait) { 1230 /* 1231 * If we successfully queue the timeout, 1232 * then don't drop t_delay_lock until 1233 * we are on the sleep queue (below). 1234 */ 1235 mutex_enter(&t->t_delay_lock); 1236 if (lwp_timer_enqueue(&lwpt) != 0) { 1237 mutex_exit(&t->t_delay_lock); 1238 imm_timeout = 1; 1239 timedwait = NULL; 1240 } 1241 } 1242 lwp_block(&lwpchan); 1243 /* 1244 * Nothing should happen to cause the lwp to go to 1245 * sleep again until after it returns from swtch(). 1246 */ 1247 if (timedwait) 1248 mutex_exit(&t->t_delay_lock); 1249 locked = 0; 1250 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1251 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1252 setrun(t); 1253 swtch(); 1254 t->t_flag &= ~T_WAKEABLE; 1255 if (timedwait) 1256 tim = lwp_timer_dequeue(&lwpt); 1257 setallwatch(); 1258 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1259 error = EINTR; 1260 else if (imm_timeout || (timedwait && tim == -1)) 1261 error = ETIME; 1262 if (error) { 1263 lwp->lwp_asleep = 0; 1264 lwp->lwp_sysabort = 0; 1265 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1266 S_WRITE); 1267 1268 /* 1269 * Need to re-compute waiters bit. The waiters field in 1270 * the lock is not reliable. Either of two things could 1271 * have occurred: no lwp may have called lwp_release() 1272 * for me but I have woken up due to a signal or 1273 * timeout. In this case, the waiter bit is incorrect 1274 * since it is still set to 1, set above. 1275 * OR an lwp_release() did occur for some other lwp on 1276 * the same lwpchan. In this case, the waiter bit is 1277 * correct. But which event occurred, one can't tell. 1278 * So, recompute. 1279 */ 1280 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1281 locked = 1; 1282 sqh = lwpsqhash(&lwpchan); 1283 disp_lock_enter(&sqh->sq_lock); 1284 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1285 disp_lock_exit(&sqh->sq_lock); 1286 break; 1287 } 1288 lwp->lwp_asleep = 0; 1289 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1290 S_WRITE); 1291 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1292 locked = 1; 1293 fuword8_noerr(&lp->mutex_waiters, &waiters); 1294 suword8_noerr(&lp->mutex_waiters, 1); 1295 if (type & LOCK_ROBUST) { 1296 fuword16_noerr(&lp->mutex_flag, &flag); 1297 if (flag & LOCK_NOTRECOVERABLE) { 1298 error = ENOTRECOVERABLE; 1299 break; 1300 } 1301 } 1302 } 1303 1304 if (t->t_mstate == LMS_USER_LOCK) 1305 (void) new_mstate(t, LMS_SYSTEM); 1306 1307 if (error == 0) { 1308 if (type & USYNC_PROCESS) 1309 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1310 if (type & LOCK_ROBUST) { 1311 fuword16_noerr(&lp->mutex_flag, &flag); 1312 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1313 if (flag & LOCK_OWNERDEAD) 1314 error = EOWNERDEAD; 1315 else if (type & USYNC_PROCESS_ROBUST) 1316 error = ELOCKUNMAPPED; 1317 else 1318 error = EOWNERDEAD; 1319 } 1320 } 1321 } 1322 suword8_noerr(&lp->mutex_waiters, waiters); 1323 locked = 0; 1324 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1325 out: 1326 no_fault(); 1327 if (watched) 1328 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1329 if (tsp && !time_error) /* copyout the residual time left */ 1330 error = lwp_timer_copyout(&lwpt, error); 1331 if (error) 1332 return (set_errno(error)); 1333 return (0); 1334 } 1335 1336 /* 1337 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1338 * libc now calls lwp_mutex_timedlock(lp, NULL). 1339 * This system call trap continues to exist solely for the benefit 1340 * of old statically-linked binaries from Solaris 9 and before. 1341 * It should be removed from the system when we no longer care 1342 * about such applications. 1343 */ 1344 int 1345 lwp_mutex_lock(lwp_mutex_t *lp) 1346 { 1347 return (lwp_mutex_timedlock(lp, NULL)); 1348 } 1349 1350 static int 1351 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1352 { 1353 /* 1354 * The caller holds the dispatcher lock on the sleep queue. 1355 */ 1356 while (t != NULL) { 1357 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1358 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1359 return (1); 1360 t = t->t_link; 1361 } 1362 return (0); 1363 } 1364 1365 /* 1366 * Return the highest priority thread sleeping on this lwpchan. 1367 */ 1368 static kthread_t * 1369 lwp_queue_waiter(lwpchan_t *lwpchan) 1370 { 1371 sleepq_head_t *sqh; 1372 kthread_t *tp; 1373 1374 sqh = lwpsqhash(lwpchan); 1375 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1376 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1377 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1378 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1379 break; 1380 } 1381 disp_lock_exit(&sqh->sq_lock); 1382 return (tp); 1383 } 1384 1385 static int 1386 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1387 { 1388 sleepq_head_t *sqh; 1389 kthread_t *tp; 1390 kthread_t **tpp; 1391 1392 sqh = lwpsqhash(lwpchan); 1393 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1394 tpp = &sqh->sq_queue.sq_first; 1395 while ((tp = *tpp) != NULL) { 1396 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1397 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1398 /* 1399 * The following is typically false. It could be true 1400 * only if lwp_release() is called from 1401 * lwp_mutex_wakeup() after reading the waiters field 1402 * from memory in which the lwp lock used to be, but has 1403 * since been re-used to hold a lwp cv or lwp semaphore. 1404 * The thread "tp" found to match the lwp lock's wchan 1405 * is actually sleeping for the cv or semaphore which 1406 * now has the same wchan. In this case, lwp_release() 1407 * should return failure. 1408 */ 1409 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1410 ASSERT(sync_type == 0); 1411 /* 1412 * assert that this can happen only for mutexes 1413 * i.e. sync_type == 0, for correctly written 1414 * user programs. 1415 */ 1416 disp_lock_exit(&sqh->sq_lock); 1417 return (0); 1418 } 1419 *waiters = iswanted(tp->t_link, lwpchan); 1420 sleepq_unlink(tpp, tp); 1421 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1422 tp->t_wchan0 = NULL; 1423 tp->t_wchan = NULL; 1424 tp->t_sobj_ops = NULL; 1425 tp->t_release = 1; 1426 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1427 CL_WAKEUP(tp); 1428 thread_unlock(tp); /* drop run queue lock */ 1429 return (1); 1430 } 1431 tpp = &tp->t_link; 1432 } 1433 *waiters = 0; 1434 disp_lock_exit(&sqh->sq_lock); 1435 return (0); 1436 } 1437 1438 static void 1439 lwp_release_all(lwpchan_t *lwpchan) 1440 { 1441 sleepq_head_t *sqh; 1442 kthread_t *tp; 1443 kthread_t **tpp; 1444 1445 sqh = lwpsqhash(lwpchan); 1446 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1447 tpp = &sqh->sq_queue.sq_first; 1448 while ((tp = *tpp) != NULL) { 1449 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1450 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1451 sleepq_unlink(tpp, tp); 1452 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1453 tp->t_wchan0 = NULL; 1454 tp->t_wchan = NULL; 1455 tp->t_sobj_ops = NULL; 1456 CL_WAKEUP(tp); 1457 thread_unlock_high(tp); /* release run queue lock */ 1458 } else { 1459 tpp = &tp->t_link; 1460 } 1461 } 1462 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1463 } 1464 1465 /* 1466 * unblock a lwp that is trying to acquire this mutex. the blocked 1467 * lwp resumes and retries to acquire the lock. 1468 */ 1469 int 1470 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1471 { 1472 proc_t *p = ttoproc(curthread); 1473 lwpchan_t lwpchan; 1474 uchar_t waiters; 1475 volatile int locked = 0; 1476 volatile int watched = 0; 1477 volatile uint8_t type = 0; 1478 label_t ljb; 1479 int error = 0; 1480 1481 if ((caddr_t)lp >= p->p_as->a_userlimit) 1482 return (set_errno(EFAULT)); 1483 1484 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1485 1486 if (on_fault(&ljb)) { 1487 if (locked) 1488 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1489 error = EFAULT; 1490 goto out; 1491 } 1492 /* 1493 * Force Copy-on-write if necessary and ensure that the 1494 * synchronization object resides in read/write memory. 1495 * Cause an EFAULT return now if this is not so. 1496 */ 1497 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1498 suword8_noerr(&lp->mutex_type, type); 1499 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1500 &lwpchan, LWPCHAN_MPPOOL)) { 1501 error = EFAULT; 1502 goto out; 1503 } 1504 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1505 locked = 1; 1506 /* 1507 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1508 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1509 * may fail. If it fails, do not write into the waiter bit. 1510 * The call to lwp_release() might fail due to one of three reasons: 1511 * 1512 * 1. due to the thread which set the waiter bit not actually 1513 * sleeping since it got the lock on the re-try. The waiter 1514 * bit will then be correctly updated by that thread. This 1515 * window may be closed by reading the wait bit again here 1516 * and not calling lwp_release() at all if it is zero. 1517 * 2. the thread which set the waiter bit and went to sleep 1518 * was woken up by a signal. This time, the waiter recomputes 1519 * the wait bit in the return with EINTR code. 1520 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1521 * memory that has been re-used after the lock was dropped. 1522 * In this case, writing into the waiter bit would cause data 1523 * corruption. 1524 */ 1525 if (release_all) 1526 lwp_release_all(&lwpchan); 1527 else if (lwp_release(&lwpchan, &waiters, 0)) 1528 suword8_noerr(&lp->mutex_waiters, waiters); 1529 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1530 out: 1531 no_fault(); 1532 if (watched) 1533 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1534 if (error) 1535 return (set_errno(error)); 1536 return (0); 1537 } 1538 1539 /* 1540 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1541 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1542 * a flag telling the kernel whether or not to honor the kernel/user 1543 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1544 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1545 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1546 * it is used an an in/out parameter. On entry, it contains the relative 1547 * time until timeout. On exit, we copyout the residual time left to it. 1548 */ 1549 int 1550 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1551 { 1552 kthread_t *t = curthread; 1553 klwp_t *lwp = ttolwp(t); 1554 proc_t *p = ttoproc(t); 1555 lwp_timer_t lwpt; 1556 lwpchan_t cv_lwpchan; 1557 lwpchan_t m_lwpchan; 1558 caddr_t timedwait; 1559 volatile uint16_t type = 0; 1560 volatile uint8_t mtype = 0; 1561 uchar_t waiters; 1562 volatile int error; 1563 clock_t tim = -1; 1564 volatile int locked = 0; 1565 volatile int m_locked = 0; 1566 volatile int cvwatched = 0; 1567 volatile int mpwatched = 0; 1568 label_t ljb; 1569 volatile int no_lwpchan = 1; 1570 int imm_timeout = 0; 1571 int imm_unpark = 0; 1572 1573 if ((caddr_t)cv >= p->p_as->a_userlimit || 1574 (caddr_t)mp >= p->p_as->a_userlimit) 1575 return (set_errno(EFAULT)); 1576 1577 timedwait = (caddr_t)tsp; 1578 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1579 return (set_errno(error)); 1580 if (lwpt.lwpt_imm_timeout) { 1581 imm_timeout = 1; 1582 timedwait = NULL; 1583 } 1584 1585 (void) new_mstate(t, LMS_USER_LOCK); 1586 1587 if (on_fault(&ljb)) { 1588 if (no_lwpchan) { 1589 error = EFAULT; 1590 goto out; 1591 } 1592 if (m_locked) { 1593 m_locked = 0; 1594 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1595 } 1596 if (locked) { 1597 locked = 0; 1598 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1599 } 1600 /* 1601 * set up another on_fault() for a possible fault 1602 * on the user lock accessed at "efault" 1603 */ 1604 if (on_fault(&ljb)) { 1605 if (m_locked) { 1606 m_locked = 0; 1607 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1608 } 1609 goto out; 1610 } 1611 error = EFAULT; 1612 goto efault; 1613 } 1614 1615 /* 1616 * Force Copy-on-write if necessary and ensure that the 1617 * synchronization object resides in read/write memory. 1618 * Cause an EFAULT return now if this is not so. 1619 */ 1620 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1621 suword8_noerr(&mp->mutex_type, mtype); 1622 if (UPIMUTEX(mtype) == 0) { 1623 /* convert user level mutex, "mp", to a unique lwpchan */ 1624 /* check if mtype is ok to use below, instead of type from cv */ 1625 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1626 &m_lwpchan, LWPCHAN_MPPOOL)) { 1627 error = EFAULT; 1628 goto out; 1629 } 1630 } 1631 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1632 suword16_noerr(&cv->cond_type, type); 1633 /* convert user level condition variable, "cv", to a unique lwpchan */ 1634 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1635 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1636 error = EFAULT; 1637 goto out; 1638 } 1639 no_lwpchan = 0; 1640 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1641 if (UPIMUTEX(mtype) == 0) 1642 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1643 S_WRITE); 1644 1645 /* 1646 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1647 * with respect to a possible wakeup which is a result of either 1648 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1649 * 1650 * What's misleading, is that the lwp is put to sleep after the 1651 * condition variable's mutex is released. This is OK as long as 1652 * the release operation is also done while holding lwpchan_lock. 1653 * The lwp is then put to sleep when the possibility of pagefaulting 1654 * or sleeping is completely eliminated. 1655 */ 1656 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1657 locked = 1; 1658 if (UPIMUTEX(mtype) == 0) { 1659 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1660 m_locked = 1; 1661 suword8_noerr(&cv->cond_waiters_kernel, 1); 1662 /* 1663 * unlock the condition variable's mutex. (pagefaults are 1664 * possible here.) 1665 */ 1666 if (mtype & USYNC_PROCESS) 1667 suword32_noerr(&mp->mutex_ownerpid, 0); 1668 ulock_clear(&mp->mutex_lockw); 1669 fuword8_noerr(&mp->mutex_waiters, &waiters); 1670 if (waiters != 0) { 1671 /* 1672 * Given the locking of lwpchan_lock around the release 1673 * of the mutex and checking for waiters, the following 1674 * call to lwp_release() can fail ONLY if the lock 1675 * acquirer is interrupted after setting the waiter bit, 1676 * calling lwp_block() and releasing lwpchan_lock. 1677 * In this case, it could get pulled off the lwp sleep 1678 * q (via setrun()) before the following call to 1679 * lwp_release() occurs. In this case, the lock 1680 * requestor will update the waiter bit correctly by 1681 * re-evaluating it. 1682 */ 1683 if (lwp_release(&m_lwpchan, &waiters, 0)) 1684 suword8_noerr(&mp->mutex_waiters, waiters); 1685 } 1686 m_locked = 0; 1687 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1688 } else { 1689 suword8_noerr(&cv->cond_waiters_kernel, 1); 1690 error = lwp_upimutex_unlock(mp, mtype); 1691 if (error) { /* if the upimutex unlock failed */ 1692 locked = 0; 1693 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1694 goto out; 1695 } 1696 } 1697 no_fault(); 1698 1699 if (mpwatched) { 1700 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1701 mpwatched = 0; 1702 } 1703 if (cvwatched) { 1704 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1705 cvwatched = 0; 1706 } 1707 1708 /* 1709 * Put the lwp in an orderly state for debugging. 1710 */ 1711 prstop(PR_REQUESTED, 0); 1712 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1713 /* 1714 * We received a signal at user-level before calling here 1715 * or another thread wants us to return immediately 1716 * with EINTR. See lwp_unpark(). 1717 */ 1718 imm_unpark = 1; 1719 t->t_unpark = 0; 1720 timedwait = NULL; 1721 } else if (timedwait) { 1722 /* 1723 * If we successfully queue the timeout, 1724 * then don't drop t_delay_lock until 1725 * we are on the sleep queue (below). 1726 */ 1727 mutex_enter(&t->t_delay_lock); 1728 if (lwp_timer_enqueue(&lwpt) != 0) { 1729 mutex_exit(&t->t_delay_lock); 1730 imm_timeout = 1; 1731 timedwait = NULL; 1732 } 1733 } 1734 t->t_flag |= T_WAITCVSEM; 1735 lwp_block(&cv_lwpchan); 1736 /* 1737 * Nothing should happen to cause the lwp to go to sleep 1738 * until after it returns from swtch(). 1739 */ 1740 if (timedwait) 1741 mutex_exit(&t->t_delay_lock); 1742 locked = 0; 1743 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1744 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1745 (imm_timeout | imm_unpark)) 1746 setrun(t); 1747 swtch(); 1748 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1749 if (timedwait) 1750 tim = lwp_timer_dequeue(&lwpt); 1751 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1752 MUSTRETURN(p, t) || imm_unpark) 1753 error = EINTR; 1754 else if (imm_timeout || (timedwait && tim == -1)) 1755 error = ETIME; 1756 lwp->lwp_asleep = 0; 1757 lwp->lwp_sysabort = 0; 1758 setallwatch(); 1759 1760 if (t->t_mstate == LMS_USER_LOCK) 1761 (void) new_mstate(t, LMS_SYSTEM); 1762 1763 if (tsp && check_park) /* copyout the residual time left */ 1764 error = lwp_timer_copyout(&lwpt, error); 1765 1766 /* the mutex is reacquired by the caller on return to user level */ 1767 if (error) { 1768 /* 1769 * If we were concurrently lwp_cond_signal()d and we 1770 * received a UNIX signal or got a timeout, then perform 1771 * another lwp_cond_signal() to avoid consuming the wakeup. 1772 */ 1773 if (t->t_release) 1774 (void) lwp_cond_signal(cv); 1775 return (set_errno(error)); 1776 } 1777 return (0); 1778 1779 efault: 1780 /* 1781 * make sure that the user level lock is dropped before 1782 * returning to caller, since the caller always re-acquires it. 1783 */ 1784 if (UPIMUTEX(mtype) == 0) { 1785 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1786 m_locked = 1; 1787 if (mtype & USYNC_PROCESS) 1788 suword32_noerr(&mp->mutex_ownerpid, 0); 1789 ulock_clear(&mp->mutex_lockw); 1790 fuword8_noerr(&mp->mutex_waiters, &waiters); 1791 if (waiters != 0) { 1792 /* 1793 * See comment above on lock clearing and lwp_release() 1794 * success/failure. 1795 */ 1796 if (lwp_release(&m_lwpchan, &waiters, 0)) 1797 suword8_noerr(&mp->mutex_waiters, waiters); 1798 } 1799 m_locked = 0; 1800 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1801 } else { 1802 (void) lwp_upimutex_unlock(mp, mtype); 1803 } 1804 out: 1805 no_fault(); 1806 if (mpwatched) 1807 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1808 if (cvwatched) 1809 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1810 if (t->t_mstate == LMS_USER_LOCK) 1811 (void) new_mstate(t, LMS_SYSTEM); 1812 return (set_errno(error)); 1813 } 1814 1815 /* 1816 * wakeup one lwp that's blocked on this condition variable. 1817 */ 1818 int 1819 lwp_cond_signal(lwp_cond_t *cv) 1820 { 1821 proc_t *p = ttoproc(curthread); 1822 lwpchan_t lwpchan; 1823 uchar_t waiters; 1824 volatile uint16_t type = 0; 1825 volatile int locked = 0; 1826 volatile int watched = 0; 1827 label_t ljb; 1828 int error = 0; 1829 1830 if ((caddr_t)cv >= p->p_as->a_userlimit) 1831 return (set_errno(EFAULT)); 1832 1833 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1834 1835 if (on_fault(&ljb)) { 1836 if (locked) 1837 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1838 error = EFAULT; 1839 goto out; 1840 } 1841 /* 1842 * Force Copy-on-write if necessary and ensure that the 1843 * synchronization object resides in read/write memory. 1844 * Cause an EFAULT return now if this is not so. 1845 */ 1846 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1847 suword16_noerr(&cv->cond_type, type); 1848 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1849 &lwpchan, LWPCHAN_CVPOOL)) { 1850 error = EFAULT; 1851 goto out; 1852 } 1853 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1854 locked = 1; 1855 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1856 if (waiters != 0) { 1857 /* 1858 * The following call to lwp_release() might fail but it is 1859 * OK to write into the waiters bit below, since the memory 1860 * could not have been re-used or unmapped (for correctly 1861 * written user programs) as in the case of lwp_mutex_wakeup(). 1862 * For an incorrect program, we should not care about data 1863 * corruption since this is just one instance of other places 1864 * where corruption can occur for such a program. Of course 1865 * if the memory is unmapped, normal fault recovery occurs. 1866 */ 1867 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1868 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1869 } 1870 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1871 out: 1872 no_fault(); 1873 if (watched) 1874 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1875 if (error) 1876 return (set_errno(error)); 1877 return (0); 1878 } 1879 1880 /* 1881 * wakeup every lwp that's blocked on this condition variable. 1882 */ 1883 int 1884 lwp_cond_broadcast(lwp_cond_t *cv) 1885 { 1886 proc_t *p = ttoproc(curthread); 1887 lwpchan_t lwpchan; 1888 volatile uint16_t type = 0; 1889 volatile int locked = 0; 1890 volatile int watched = 0; 1891 label_t ljb; 1892 uchar_t waiters; 1893 int error = 0; 1894 1895 if ((caddr_t)cv >= p->p_as->a_userlimit) 1896 return (set_errno(EFAULT)); 1897 1898 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1899 1900 if (on_fault(&ljb)) { 1901 if (locked) 1902 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1903 error = EFAULT; 1904 goto out; 1905 } 1906 /* 1907 * Force Copy-on-write if necessary and ensure that the 1908 * synchronization object resides in read/write memory. 1909 * Cause an EFAULT return now if this is not so. 1910 */ 1911 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1912 suword16_noerr(&cv->cond_type, type); 1913 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1914 &lwpchan, LWPCHAN_CVPOOL)) { 1915 error = EFAULT; 1916 goto out; 1917 } 1918 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1919 locked = 1; 1920 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1921 if (waiters != 0) { 1922 lwp_release_all(&lwpchan); 1923 suword8_noerr(&cv->cond_waiters_kernel, 0); 1924 } 1925 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1926 out: 1927 no_fault(); 1928 if (watched) 1929 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1930 if (error) 1931 return (set_errno(error)); 1932 return (0); 1933 } 1934 1935 int 1936 lwp_sema_trywait(lwp_sema_t *sp) 1937 { 1938 kthread_t *t = curthread; 1939 proc_t *p = ttoproc(t); 1940 label_t ljb; 1941 volatile int locked = 0; 1942 volatile int watched = 0; 1943 volatile uint16_t type = 0; 1944 int count; 1945 lwpchan_t lwpchan; 1946 uchar_t waiters; 1947 int error = 0; 1948 1949 if ((caddr_t)sp >= p->p_as->a_userlimit) 1950 return (set_errno(EFAULT)); 1951 1952 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1953 1954 if (on_fault(&ljb)) { 1955 if (locked) 1956 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1957 error = EFAULT; 1958 goto out; 1959 } 1960 /* 1961 * Force Copy-on-write if necessary and ensure that the 1962 * synchronization object resides in read/write memory. 1963 * Cause an EFAULT return now if this is not so. 1964 */ 1965 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1966 suword16_noerr((void *)&sp->sema_type, type); 1967 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1968 &lwpchan, LWPCHAN_CVPOOL)) { 1969 error = EFAULT; 1970 goto out; 1971 } 1972 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1973 locked = 1; 1974 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1975 if (count == 0) 1976 error = EBUSY; 1977 else 1978 suword32_noerr((void *)&sp->sema_count, --count); 1979 if (count != 0) { 1980 fuword8_noerr(&sp->sema_waiters, &waiters); 1981 if (waiters != 0) { 1982 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1983 suword8_noerr(&sp->sema_waiters, waiters); 1984 } 1985 } 1986 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1987 out: 1988 no_fault(); 1989 if (watched) 1990 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1991 if (error) 1992 return (set_errno(error)); 1993 return (0); 1994 } 1995 1996 /* 1997 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 1998 */ 1999 int 2000 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2001 { 2002 kthread_t *t = curthread; 2003 klwp_t *lwp = ttolwp(t); 2004 proc_t *p = ttoproc(t); 2005 lwp_timer_t lwpt; 2006 caddr_t timedwait; 2007 clock_t tim = -1; 2008 label_t ljb; 2009 volatile int locked = 0; 2010 volatile int watched = 0; 2011 volatile uint16_t type = 0; 2012 int count; 2013 lwpchan_t lwpchan; 2014 uchar_t waiters; 2015 int error = 0; 2016 int time_error; 2017 int imm_timeout = 0; 2018 int imm_unpark = 0; 2019 2020 if ((caddr_t)sp >= p->p_as->a_userlimit) 2021 return (set_errno(EFAULT)); 2022 2023 timedwait = (caddr_t)tsp; 2024 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2025 lwpt.lwpt_imm_timeout) { 2026 imm_timeout = 1; 2027 timedwait = NULL; 2028 } 2029 2030 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2031 2032 if (on_fault(&ljb)) { 2033 if (locked) 2034 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2035 error = EFAULT; 2036 goto out; 2037 } 2038 /* 2039 * Force Copy-on-write if necessary and ensure that the 2040 * synchronization object resides in read/write memory. 2041 * Cause an EFAULT return now if this is not so. 2042 */ 2043 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2044 suword16_noerr((void *)&sp->sema_type, type); 2045 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2046 &lwpchan, LWPCHAN_CVPOOL)) { 2047 error = EFAULT; 2048 goto out; 2049 } 2050 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2051 locked = 1; 2052 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2053 while (error == 0 && count == 0) { 2054 if (time_error) { 2055 /* 2056 * The SUSV3 Posix spec is very clear that we 2057 * should get no error from validating the 2058 * timer until we would actually sleep. 2059 */ 2060 error = time_error; 2061 break; 2062 } 2063 suword8_noerr(&sp->sema_waiters, 1); 2064 if (watched) 2065 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2066 /* 2067 * Put the lwp in an orderly state for debugging. 2068 */ 2069 prstop(PR_REQUESTED, 0); 2070 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2071 /* 2072 * We received a signal at user-level before calling 2073 * here or another thread wants us to return 2074 * immediately with EINTR. See lwp_unpark(). 2075 */ 2076 imm_unpark = 1; 2077 t->t_unpark = 0; 2078 timedwait = NULL; 2079 } else if (timedwait) { 2080 /* 2081 * If we successfully queue the timeout, 2082 * then don't drop t_delay_lock until 2083 * we are on the sleep queue (below). 2084 */ 2085 mutex_enter(&t->t_delay_lock); 2086 if (lwp_timer_enqueue(&lwpt) != 0) { 2087 mutex_exit(&t->t_delay_lock); 2088 imm_timeout = 1; 2089 timedwait = NULL; 2090 } 2091 } 2092 t->t_flag |= T_WAITCVSEM; 2093 lwp_block(&lwpchan); 2094 /* 2095 * Nothing should happen to cause the lwp to sleep 2096 * again until after it returns from swtch(). 2097 */ 2098 if (timedwait) 2099 mutex_exit(&t->t_delay_lock); 2100 locked = 0; 2101 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2102 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2103 (imm_timeout | imm_unpark)) 2104 setrun(t); 2105 swtch(); 2106 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2107 if (timedwait) 2108 tim = lwp_timer_dequeue(&lwpt); 2109 setallwatch(); 2110 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2111 MUSTRETURN(p, t) || imm_unpark) 2112 error = EINTR; 2113 else if (imm_timeout || (timedwait && tim == -1)) 2114 error = ETIME; 2115 lwp->lwp_asleep = 0; 2116 lwp->lwp_sysabort = 0; 2117 watched = watch_disable_addr((caddr_t)sp, 2118 sizeof (*sp), S_WRITE); 2119 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2120 locked = 1; 2121 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2122 } 2123 if (error == 0) 2124 suword32_noerr((void *)&sp->sema_count, --count); 2125 if (count != 0) { 2126 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2127 suword8_noerr(&sp->sema_waiters, waiters); 2128 } 2129 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2130 out: 2131 no_fault(); 2132 if (watched) 2133 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2134 if (tsp && check_park && !time_error) 2135 error = lwp_timer_copyout(&lwpt, error); 2136 if (error) 2137 return (set_errno(error)); 2138 return (0); 2139 } 2140 2141 /* 2142 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2143 * libc now calls lwp_sema_timedwait(). 2144 * This system call trap exists solely for the benefit of old 2145 * statically linked applications from Solaris 9 and before. 2146 * It should be removed when we no longer care about such applications. 2147 */ 2148 int 2149 lwp_sema_wait(lwp_sema_t *sp) 2150 { 2151 return (lwp_sema_timedwait(sp, NULL, 0)); 2152 } 2153 2154 int 2155 lwp_sema_post(lwp_sema_t *sp) 2156 { 2157 proc_t *p = ttoproc(curthread); 2158 label_t ljb; 2159 volatile int locked = 0; 2160 volatile int watched = 0; 2161 volatile uint16_t type = 0; 2162 int count; 2163 lwpchan_t lwpchan; 2164 uchar_t waiters; 2165 int error = 0; 2166 2167 if ((caddr_t)sp >= p->p_as->a_userlimit) 2168 return (set_errno(EFAULT)); 2169 2170 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2171 2172 if (on_fault(&ljb)) { 2173 if (locked) 2174 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2175 error = EFAULT; 2176 goto out; 2177 } 2178 /* 2179 * Force Copy-on-write if necessary and ensure that the 2180 * synchronization object resides in read/write memory. 2181 * Cause an EFAULT return now if this is not so. 2182 */ 2183 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2184 suword16_noerr(&sp->sema_type, type); 2185 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2186 &lwpchan, LWPCHAN_CVPOOL)) { 2187 error = EFAULT; 2188 goto out; 2189 } 2190 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2191 locked = 1; 2192 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2193 if (count == _SEM_VALUE_MAX) 2194 error = EOVERFLOW; 2195 else 2196 suword32_noerr(&sp->sema_count, ++count); 2197 if (count == 1) { 2198 fuword8_noerr(&sp->sema_waiters, &waiters); 2199 if (waiters) { 2200 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2201 suword8_noerr(&sp->sema_waiters, waiters); 2202 } 2203 } 2204 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2205 out: 2206 no_fault(); 2207 if (watched) 2208 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2209 if (error) 2210 return (set_errno(error)); 2211 return (0); 2212 } 2213 2214 #define TRW_WANT_WRITE 0x1 2215 #define TRW_LOCK_GRANTED 0x2 2216 2217 #define READ_LOCK 0 2218 #define WRITE_LOCK 1 2219 #define TRY_FLAG 0x10 2220 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2221 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2222 2223 /* 2224 * Release one writer or one or more readers. Compute the rwstate word to 2225 * reflect the new state of the queue. For a safe hand-off we copy the new 2226 * rwstate value back to userland before we wake any of the new lock holders. 2227 * 2228 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2229 * being given precedence over readers of the same priority). 2230 * 2231 * If the first thread is a reader we scan the queue releasing all readers 2232 * until we hit a writer or the end of the queue. If the first thread is a 2233 * writer we still need to check for another writer. 2234 */ 2235 void 2236 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2237 { 2238 sleepq_head_t *sqh; 2239 kthread_t *tp; 2240 kthread_t **tpp; 2241 kthread_t *tpnext; 2242 kthread_t *wakelist = NULL; 2243 uint32_t rwstate = 0; 2244 int wcount = 0; 2245 int rcount = 0; 2246 2247 sqh = lwpsqhash(lwpchan); 2248 disp_lock_enter(&sqh->sq_lock); 2249 tpp = &sqh->sq_queue.sq_first; 2250 while ((tp = *tpp) != NULL) { 2251 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2252 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2253 if (tp->t_writer & TRW_WANT_WRITE) { 2254 if ((wcount++ == 0) && (rcount == 0)) { 2255 rwstate |= URW_WRITE_LOCKED; 2256 2257 /* Just one writer to wake. */ 2258 sleepq_unlink(tpp, tp); 2259 wakelist = tp; 2260 2261 /* tpp already set for next thread. */ 2262 continue; 2263 } else { 2264 rwstate |= URW_HAS_WAITERS; 2265 /* We need look no further. */ 2266 break; 2267 } 2268 } else { 2269 rcount++; 2270 if (wcount == 0) { 2271 rwstate++; 2272 2273 /* Add reader to wake list. */ 2274 sleepq_unlink(tpp, tp); 2275 tp->t_link = wakelist; 2276 wakelist = tp; 2277 2278 /* tpp already set for next thread. */ 2279 continue; 2280 } else { 2281 rwstate |= URW_HAS_WAITERS; 2282 /* We need look no further. */ 2283 break; 2284 } 2285 } 2286 } 2287 tpp = &tp->t_link; 2288 } 2289 2290 /* Copy the new rwstate back to userland. */ 2291 suword32_noerr(&rw->rwlock_readers, rwstate); 2292 2293 /* Wake the new lock holder(s) up. */ 2294 tp = wakelist; 2295 while (tp != NULL) { 2296 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2297 tp->t_wchan0 = NULL; 2298 tp->t_wchan = NULL; 2299 tp->t_sobj_ops = NULL; 2300 tp->t_writer |= TRW_LOCK_GRANTED; 2301 tpnext = tp->t_link; 2302 tp->t_link = NULL; 2303 CL_WAKEUP(tp); 2304 thread_unlock_high(tp); 2305 tp = tpnext; 2306 } 2307 2308 disp_lock_exit(&sqh->sq_lock); 2309 } 2310 2311 /* 2312 * We enter here holding the user-level mutex, which we must release before 2313 * returning or blocking. Based on lwp_cond_wait(). 2314 */ 2315 static int 2316 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2317 { 2318 lwp_mutex_t *mp = NULL; 2319 kthread_t *t = curthread; 2320 kthread_t *tp; 2321 klwp_t *lwp = ttolwp(t); 2322 proc_t *p = ttoproc(t); 2323 lwp_timer_t lwpt; 2324 lwpchan_t lwpchan; 2325 lwpchan_t mlwpchan; 2326 caddr_t timedwait; 2327 volatile uint16_t type = 0; 2328 volatile uint8_t mtype = 0; 2329 uchar_t mwaiters; 2330 volatile int error = 0; 2331 int time_error; 2332 clock_t tim = -1; 2333 volatile int locked = 0; 2334 volatile int mlocked = 0; 2335 volatile int watched = 0; 2336 volatile int mwatched = 0; 2337 label_t ljb; 2338 volatile int no_lwpchan = 1; 2339 int imm_timeout = 0; 2340 int try_flag; 2341 uint32_t rwstate; 2342 int acquired = 0; 2343 2344 /* We only check rw because the mutex is included in it. */ 2345 if ((caddr_t)rw >= p->p_as->a_userlimit) 2346 return (set_errno(EFAULT)); 2347 2348 /* We must only report this error if we are about to sleep (later). */ 2349 timedwait = (caddr_t)tsp; 2350 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2351 lwpt.lwpt_imm_timeout) { 2352 imm_timeout = 1; 2353 timedwait = NULL; 2354 } 2355 2356 (void) new_mstate(t, LMS_USER_LOCK); 2357 2358 if (on_fault(&ljb)) { 2359 if (no_lwpchan) { 2360 error = EFAULT; 2361 goto out_nodrop; 2362 } 2363 if (mlocked) { 2364 mlocked = 0; 2365 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2366 } 2367 if (locked) { 2368 locked = 0; 2369 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2370 } 2371 /* 2372 * Set up another on_fault() for a possible fault 2373 * on the user lock accessed at "out_drop". 2374 */ 2375 if (on_fault(&ljb)) { 2376 if (mlocked) { 2377 mlocked = 0; 2378 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2379 } 2380 error = EFAULT; 2381 goto out_nodrop; 2382 } 2383 error = EFAULT; 2384 goto out_nodrop; 2385 } 2386 2387 /* Process rd_wr (including sanity check). */ 2388 try_flag = (rd_wr & TRY_FLAG); 2389 rd_wr &= ~TRY_FLAG; 2390 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2391 error = EINVAL; 2392 goto out_nodrop; 2393 } 2394 2395 /* 2396 * Force Copy-on-write if necessary and ensure that the 2397 * synchronization object resides in read/write memory. 2398 * Cause an EFAULT return now if this is not so. 2399 */ 2400 mp = &rw->mutex; 2401 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2402 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2403 suword8_noerr(&mp->mutex_type, mtype); 2404 suword16_noerr(&rw->rwlock_type, type); 2405 2406 /* We can only continue for simple USYNC_PROCESS locks. */ 2407 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2408 error = EINVAL; 2409 goto out_nodrop; 2410 } 2411 2412 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2413 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2414 &mlwpchan, LWPCHAN_MPPOOL)) { 2415 error = EFAULT; 2416 goto out_nodrop; 2417 } 2418 2419 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2420 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2421 &lwpchan, LWPCHAN_CVPOOL)) { 2422 error = EFAULT; 2423 goto out_nodrop; 2424 } 2425 2426 no_lwpchan = 0; 2427 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2428 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2429 2430 /* 2431 * lwpchan_lock() ensures that the calling LWP is put to sleep 2432 * atomically with respect to a possible wakeup which is a result 2433 * of lwp_rwlock_unlock(). 2434 * 2435 * What's misleading is that the LWP is put to sleep after the 2436 * rwlock's mutex is released. This is OK as long as the release 2437 * operation is also done while holding mlwpchan. The LWP is then 2438 * put to sleep when the possibility of pagefaulting or sleeping 2439 * has been completely eliminated. 2440 */ 2441 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2442 locked = 1; 2443 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2444 mlocked = 1; 2445 2446 /* 2447 * Fetch the current rwlock state. 2448 * 2449 * The possibility of spurious wake-ups or killed waiters means 2450 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2451 * We only fix these if they are important to us. 2452 * 2453 * Although various error states can be observed here (e.g. the lock 2454 * is not held, but there are waiters) we assume these are applicaton 2455 * errors and so we take no corrective action. 2456 */ 2457 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2458 /* 2459 * We cannot legitimately get here from user-level 2460 * without URW_HAS_WAITERS being set. 2461 * Set it now to guard against user-level error. 2462 */ 2463 rwstate |= URW_HAS_WAITERS; 2464 2465 /* 2466 * We can try only if the lock isn't held by a writer. 2467 */ 2468 if (!(rwstate & URW_WRITE_LOCKED)) { 2469 tp = lwp_queue_waiter(&lwpchan); 2470 if (tp == NULL) { 2471 /* 2472 * Hmmm, rwstate indicates waiters but there are 2473 * none queued. This could just be the result of a 2474 * spurious wakeup, so let's ignore it. 2475 * 2476 * We now have a chance to acquire the lock 2477 * uncontended, but this is the last chance for 2478 * a writer to acquire the lock without blocking. 2479 */ 2480 if (rd_wr == READ_LOCK) { 2481 rwstate++; 2482 acquired = 1; 2483 } else if ((rwstate & URW_READERS_MASK) == 0) { 2484 rwstate |= URW_WRITE_LOCKED; 2485 acquired = 1; 2486 } 2487 } else if (rd_wr == READ_LOCK) { 2488 /* 2489 * This is the last chance for a reader to acquire 2490 * the lock now, but it can only do so if there is 2491 * no writer of equal or greater priority at the 2492 * head of the queue . 2493 * 2494 * It is also just possible that there is a reader 2495 * at the head of the queue. This may be the result 2496 * of a spurious wakeup or an application failure. 2497 * In this case we only acquire the lock if we have 2498 * equal or greater priority. It is not our job to 2499 * release spurious waiters. 2500 */ 2501 pri_t our_pri = DISP_PRIO(t); 2502 pri_t his_pri = DISP_PRIO(tp); 2503 2504 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2505 !(tp->t_writer & TRW_WANT_WRITE))) { 2506 rwstate++; 2507 acquired = 1; 2508 } 2509 } 2510 } 2511 2512 if (acquired || try_flag || time_error) { 2513 /* 2514 * We're not going to block this time. 2515 */ 2516 suword32_noerr(&rw->rwlock_readers, rwstate); 2517 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2518 locked = 0; 2519 2520 if (acquired) { 2521 /* 2522 * Got the lock! 2523 */ 2524 error = 0; 2525 2526 } else if (try_flag) { 2527 /* 2528 * We didn't get the lock and we're about to block. 2529 * If we're doing a trylock, return EBUSY instead. 2530 */ 2531 error = EBUSY; 2532 2533 } else if (time_error) { 2534 /* 2535 * The SUSV3 POSIX spec is very clear that we should 2536 * get no error from validating the timer (above) 2537 * until we would actually sleep. 2538 */ 2539 error = time_error; 2540 } 2541 2542 goto out_drop; 2543 } 2544 2545 /* 2546 * We're about to block, so indicate what kind of waiter we are. 2547 */ 2548 t->t_writer = 0; 2549 if (rd_wr == WRITE_LOCK) 2550 t->t_writer = TRW_WANT_WRITE; 2551 suword32_noerr(&rw->rwlock_readers, rwstate); 2552 2553 /* 2554 * Unlock the rwlock's mutex (pagefaults are possible here). 2555 */ 2556 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2557 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2558 suword32_noerr(&mp->mutex_ownerpid, 0); 2559 ulock_clear(&mp->mutex_lockw); 2560 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2561 if (mwaiters != 0) { 2562 /* 2563 * Given the locking of mlwpchan around the release of 2564 * the mutex and checking for waiters, the following 2565 * call to lwp_release() can fail ONLY if the lock 2566 * acquirer is interrupted after setting the waiter bit, 2567 * calling lwp_block() and releasing mlwpchan. 2568 * In this case, it could get pulled off the LWP sleep 2569 * queue (via setrun()) before the following call to 2570 * lwp_release() occurs, and the lock requestor will 2571 * update the waiter bit correctly by re-evaluating it. 2572 */ 2573 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2574 suword8_noerr(&mp->mutex_waiters, mwaiters); 2575 } 2576 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2577 mlocked = 0; 2578 no_fault(); 2579 2580 if (mwatched) { 2581 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2582 mwatched = 0; 2583 } 2584 if (watched) { 2585 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2586 watched = 0; 2587 } 2588 2589 /* 2590 * Put the LWP in an orderly state for debugging. 2591 */ 2592 prstop(PR_REQUESTED, 0); 2593 if (timedwait) { 2594 /* 2595 * If we successfully queue the timeout, 2596 * then don't drop t_delay_lock until 2597 * we are on the sleep queue (below). 2598 */ 2599 mutex_enter(&t->t_delay_lock); 2600 if (lwp_timer_enqueue(&lwpt) != 0) { 2601 mutex_exit(&t->t_delay_lock); 2602 imm_timeout = 1; 2603 timedwait = NULL; 2604 } 2605 } 2606 t->t_flag |= T_WAITCVSEM; 2607 lwp_block(&lwpchan); 2608 2609 /* 2610 * Nothing should happen to cause the LWp to go to sleep until after 2611 * it returns from swtch(). 2612 */ 2613 if (timedwait) 2614 mutex_exit(&t->t_delay_lock); 2615 locked = 0; 2616 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2617 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 2618 setrun(t); 2619 swtch(); 2620 2621 /* 2622 * We're back, but we need to work out why. Were we interrupted? Did 2623 * we timeout? Were we granted the lock? 2624 */ 2625 error = EAGAIN; 2626 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2627 t->t_writer = 0; 2628 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2629 if (timedwait) 2630 tim = lwp_timer_dequeue(&lwpt); 2631 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2632 error = EINTR; 2633 else if (imm_timeout || (timedwait && tim == -1)) 2634 error = ETIME; 2635 lwp->lwp_asleep = 0; 2636 lwp->lwp_sysabort = 0; 2637 setallwatch(); 2638 2639 /* 2640 * If we were granted the lock we don't care about EINTR or ETIME. 2641 */ 2642 if (acquired) 2643 error = 0; 2644 2645 if (t->t_mstate == LMS_USER_LOCK) 2646 (void) new_mstate(t, LMS_SYSTEM); 2647 2648 if (error) 2649 return (set_errno(error)); 2650 return (0); 2651 2652 out_drop: 2653 /* 2654 * Make sure that the user level lock is dropped before returning 2655 * to the caller. 2656 */ 2657 if (!mlocked) { 2658 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2659 mlocked = 1; 2660 } 2661 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2662 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2663 suword32_noerr(&mp->mutex_ownerpid, 0); 2664 ulock_clear(&mp->mutex_lockw); 2665 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2666 if (mwaiters != 0) { 2667 /* 2668 * See comment above on lock clearing and lwp_release() 2669 * success/failure. 2670 */ 2671 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2672 suword8_noerr(&mp->mutex_waiters, mwaiters); 2673 } 2674 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2675 mlocked = 0; 2676 2677 out_nodrop: 2678 no_fault(); 2679 if (mwatched) 2680 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2681 if (watched) 2682 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2683 if (t->t_mstate == LMS_USER_LOCK) 2684 (void) new_mstate(t, LMS_SYSTEM); 2685 if (error) 2686 return (set_errno(error)); 2687 return (0); 2688 } 2689 2690 /* 2691 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2692 * we never drop the lock. 2693 */ 2694 static int 2695 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2696 { 2697 kthread_t *t = curthread; 2698 proc_t *p = ttoproc(t); 2699 lwpchan_t lwpchan; 2700 volatile uint16_t type = 0; 2701 volatile int error = 0; 2702 volatile int locked = 0; 2703 volatile int watched = 0; 2704 label_t ljb; 2705 volatile int no_lwpchan = 1; 2706 uint32_t rwstate; 2707 2708 /* We only check rw because the mutex is included in it. */ 2709 if ((caddr_t)rw >= p->p_as->a_userlimit) 2710 return (set_errno(EFAULT)); 2711 2712 if (on_fault(&ljb)) { 2713 if (no_lwpchan) { 2714 error = EFAULT; 2715 goto out_nodrop; 2716 } 2717 if (locked) { 2718 locked = 0; 2719 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2720 } 2721 error = EFAULT; 2722 goto out_nodrop; 2723 } 2724 2725 /* 2726 * Force Copy-on-write if necessary and ensure that the 2727 * synchronization object resides in read/write memory. 2728 * Cause an EFAULT return now if this is not so. 2729 */ 2730 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2731 suword16_noerr(&rw->rwlock_type, type); 2732 2733 /* We can only continue for simple USYNC_PROCESS locks. */ 2734 if (type != USYNC_PROCESS) { 2735 error = EINVAL; 2736 goto out_nodrop; 2737 } 2738 2739 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2740 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2741 &lwpchan, LWPCHAN_CVPOOL)) { 2742 error = EFAULT; 2743 goto out_nodrop; 2744 } 2745 2746 no_lwpchan = 0; 2747 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2748 2749 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2750 locked = 1; 2751 2752 /* 2753 * We can resolve multiple readers (except the last reader) here. 2754 * For the last reader or a writer we need lwp_rwlock_release(), 2755 * to which we also delegate the task of copying the new rwstate 2756 * back to userland (see the comment there). 2757 */ 2758 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2759 if (rwstate & URW_WRITE_LOCKED) 2760 lwp_rwlock_release(&lwpchan, rw); 2761 else if ((rwstate & URW_READERS_MASK) > 0) { 2762 rwstate--; 2763 if ((rwstate & URW_READERS_MASK) == 0) 2764 lwp_rwlock_release(&lwpchan, rw); 2765 else 2766 suword32_noerr(&rw->rwlock_readers, rwstate); 2767 } 2768 2769 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2770 locked = 0; 2771 error = 0; 2772 2773 out_nodrop: 2774 no_fault(); 2775 if (watched) 2776 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2777 if (error) 2778 return (set_errno(error)); 2779 return (0); 2780 } 2781 2782 int 2783 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2784 { 2785 switch (subcode) { 2786 case 0: 2787 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2788 case 1: 2789 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2790 case 2: 2791 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2792 case 3: 2793 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2794 case 4: 2795 return (lwp_rwlock_unlock(rwlp)); 2796 } 2797 return (set_errno(EINVAL)); 2798 } 2799 2800 /* 2801 * Return the owner of the user-level s-object. 2802 * Since we can't really do this, return NULL. 2803 */ 2804 /* ARGSUSED */ 2805 static kthread_t * 2806 lwpsobj_owner(caddr_t sobj) 2807 { 2808 return ((kthread_t *)NULL); 2809 } 2810 2811 /* 2812 * Wake up a thread asleep on a user-level synchronization 2813 * object. 2814 */ 2815 static void 2816 lwp_unsleep(kthread_t *t) 2817 { 2818 ASSERT(THREAD_LOCK_HELD(t)); 2819 if (t->t_wchan0 != NULL) { 2820 sleepq_head_t *sqh; 2821 sleepq_t *sqp = t->t_sleepq; 2822 2823 if (sqp != NULL) { 2824 sqh = lwpsqhash(&t->t_lwpchan); 2825 ASSERT(&sqh->sq_queue == sqp); 2826 sleepq_unsleep(t); 2827 disp_lock_exit_high(&sqh->sq_lock); 2828 CL_SETRUN(t); 2829 return; 2830 } 2831 } 2832 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2833 } 2834 2835 /* 2836 * Change the priority of a thread asleep on a user-level 2837 * synchronization object. To maintain proper priority order, 2838 * we: 2839 * o dequeue the thread. 2840 * o change its priority. 2841 * o re-enqueue the thread. 2842 * Assumption: the thread is locked on entry. 2843 */ 2844 static void 2845 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2846 { 2847 ASSERT(THREAD_LOCK_HELD(t)); 2848 if (t->t_wchan0 != NULL) { 2849 sleepq_t *sqp = t->t_sleepq; 2850 2851 sleepq_dequeue(t); 2852 *t_prip = pri; 2853 sleepq_insert(sqp, t); 2854 } else 2855 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2856 } 2857 2858 /* 2859 * Clean up a left-over process-shared robust mutex 2860 */ 2861 static void 2862 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2863 { 2864 uint16_t flag; 2865 uchar_t waiters; 2866 label_t ljb; 2867 pid_t owner_pid; 2868 lwp_mutex_t *lp; 2869 volatile int locked = 0; 2870 volatile int watched = 0; 2871 volatile struct upimutex *upimutex = NULL; 2872 volatile int upilocked = 0; 2873 2874 if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 2875 != (USYNC_PROCESS | LOCK_ROBUST)) 2876 return; 2877 2878 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2879 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2880 if (on_fault(&ljb)) { 2881 if (locked) 2882 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2883 if (upilocked) 2884 upimutex_unlock((upimutex_t *)upimutex, 0); 2885 goto out; 2886 } 2887 2888 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2889 2890 if (UPIMUTEX(ent->lwpchan_type)) { 2891 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2892 upib_t *upibp = &UPI_CHAIN(lwpchan); 2893 2894 if (owner_pid != curproc->p_pid) 2895 goto out; 2896 mutex_enter(&upibp->upib_lock); 2897 upimutex = upi_get(upibp, &lwpchan); 2898 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2899 mutex_exit(&upibp->upib_lock); 2900 goto out; 2901 } 2902 mutex_exit(&upibp->upib_lock); 2903 upilocked = 1; 2904 flag = lwp_clear_mutex(lp, lockflg); 2905 suword8_noerr(&lp->mutex_lockw, 0); 2906 upimutex_unlock((upimutex_t *)upimutex, flag); 2907 } else { 2908 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2909 locked = 1; 2910 /* 2911 * Clear the spinners count because one of our 2912 * threads could have been spinning for this lock 2913 * at user level when the process was suddenly killed. 2914 * There is no harm in this since user-level libc code 2915 * will adapt to the sudden change in the spinner count. 2916 */ 2917 suword8_noerr(&lp->mutex_spinners, 0); 2918 if (owner_pid != curproc->p_pid) { 2919 /* 2920 * We are not the owner. There may or may not be one. 2921 * If there are waiters, we wake up one or all of them. 2922 * It doesn't hurt to wake them up in error since 2923 * they will just retry the lock and go to sleep 2924 * again if necessary. 2925 */ 2926 fuword8_noerr(&lp->mutex_waiters, &waiters); 2927 if (waiters != 0) { /* there are waiters */ 2928 fuword16_noerr(&lp->mutex_flag, &flag); 2929 if (flag & LOCK_NOTRECOVERABLE) { 2930 lwp_release_all(&ent->lwpchan_lwpchan); 2931 suword8_noerr(&lp->mutex_waiters, 0); 2932 } else if (lwp_release(&ent->lwpchan_lwpchan, 2933 &waiters, 0)) { 2934 suword8_noerr(&lp->mutex_waiters, 2935 waiters); 2936 } 2937 } 2938 } else { 2939 /* 2940 * We are the owner. Release it. 2941 */ 2942 (void) lwp_clear_mutex(lp, lockflg); 2943 ulock_clear(&lp->mutex_lockw); 2944 fuword8_noerr(&lp->mutex_waiters, &waiters); 2945 if (waiters && 2946 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2947 suword8_noerr(&lp->mutex_waiters, waiters); 2948 } 2949 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2950 } 2951 out: 2952 no_fault(); 2953 if (watched) 2954 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2955 } 2956 2957 /* 2958 * Register a process-shared robust mutex in the lwpchan cache. 2959 */ 2960 int 2961 lwp_mutex_register(lwp_mutex_t *lp) 2962 { 2963 int error = 0; 2964 volatile int watched; 2965 label_t ljb; 2966 uint8_t type; 2967 lwpchan_t lwpchan; 2968 2969 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2970 return (set_errno(EFAULT)); 2971 2972 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2973 2974 if (on_fault(&ljb)) { 2975 error = EFAULT; 2976 } else { 2977 /* 2978 * Force Copy-on-write if necessary and ensure that the 2979 * synchronization object resides in read/write memory. 2980 * Cause an EFAULT return now if this is not so. 2981 */ 2982 fuword8_noerr(&lp->mutex_type, &type); 2983 suword8_noerr(&lp->mutex_type, type); 2984 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2985 != (USYNC_PROCESS|LOCK_ROBUST)) { 2986 error = EINVAL; 2987 } else if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2988 &lwpchan, LWPCHAN_MPPOOL)) { 2989 error = EFAULT; 2990 } 2991 } 2992 no_fault(); 2993 if (watched) 2994 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2995 if (error) 2996 return (set_errno(error)); 2997 return (0); 2998 } 2999 3000 int 3001 lwp_mutex_trylock(lwp_mutex_t *lp) 3002 { 3003 kthread_t *t = curthread; 3004 proc_t *p = ttoproc(t); 3005 int error = 0; 3006 volatile int locked = 0; 3007 volatile int watched = 0; 3008 label_t ljb; 3009 volatile uint8_t type = 0; 3010 uint16_t flag; 3011 lwpchan_t lwpchan; 3012 3013 if ((caddr_t)lp >= p->p_as->a_userlimit) 3014 return (set_errno(EFAULT)); 3015 3016 (void) new_mstate(t, LMS_USER_LOCK); 3017 3018 if (on_fault(&ljb)) { 3019 if (locked) 3020 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3021 error = EFAULT; 3022 goto out; 3023 } 3024 /* 3025 * Force Copy-on-write if necessary and ensure that the 3026 * synchronization object resides in read/write memory. 3027 * Cause an EFAULT return now if this is not so. 3028 */ 3029 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3030 suword8_noerr(&lp->mutex_type, type); 3031 if (UPIMUTEX(type)) { 3032 no_fault(); 3033 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3034 if ((type & USYNC_PROCESS) && 3035 (error == 0 || 3036 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 3037 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 3038 if (error) 3039 return (set_errno(error)); 3040 return (0); 3041 } 3042 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3043 &lwpchan, LWPCHAN_MPPOOL)) { 3044 error = EFAULT; 3045 goto out; 3046 } 3047 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3048 locked = 1; 3049 if (type & LOCK_ROBUST) { 3050 fuword16_noerr(&lp->mutex_flag, &flag); 3051 if (flag & LOCK_NOTRECOVERABLE) { 3052 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3053 error = ENOTRECOVERABLE; 3054 goto out; 3055 } 3056 } 3057 3058 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3059 3060 if (!ulock_try(&lp->mutex_lockw)) 3061 error = EBUSY; 3062 else { 3063 if (type & USYNC_PROCESS) 3064 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 3065 if (type & LOCK_ROBUST) { 3066 fuword16_noerr(&lp->mutex_flag, &flag); 3067 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3068 if (flag & LOCK_OWNERDEAD) 3069 error = EOWNERDEAD; 3070 else if (type & USYNC_PROCESS_ROBUST) 3071 error = ELOCKUNMAPPED; 3072 else 3073 error = EOWNERDEAD; 3074 } 3075 } 3076 } 3077 locked = 0; 3078 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3079 out: 3080 3081 if (t->t_mstate == LMS_USER_LOCK) 3082 (void) new_mstate(t, LMS_SYSTEM); 3083 3084 no_fault(); 3085 if (watched) 3086 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3087 if (error) 3088 return (set_errno(error)); 3089 return (0); 3090 } 3091 3092 /* 3093 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3094 * the blocked lwp resumes and retries to acquire the lock. 3095 */ 3096 int 3097 lwp_mutex_unlock(lwp_mutex_t *lp) 3098 { 3099 proc_t *p = ttoproc(curthread); 3100 lwpchan_t lwpchan; 3101 uchar_t waiters; 3102 volatile int locked = 0; 3103 volatile int watched = 0; 3104 volatile uint8_t type = 0; 3105 label_t ljb; 3106 uint16_t flag; 3107 int error = 0; 3108 3109 if ((caddr_t)lp >= p->p_as->a_userlimit) 3110 return (set_errno(EFAULT)); 3111 3112 if (on_fault(&ljb)) { 3113 if (locked) 3114 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3115 error = EFAULT; 3116 goto out; 3117 } 3118 3119 /* 3120 * Force Copy-on-write if necessary and ensure that the 3121 * synchronization object resides in read/write memory. 3122 * Cause an EFAULT return now if this is not so. 3123 */ 3124 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3125 suword8_noerr(&lp->mutex_type, type); 3126 3127 if (UPIMUTEX(type)) { 3128 no_fault(); 3129 error = lwp_upimutex_unlock(lp, type); 3130 if (error) 3131 return (set_errno(error)); 3132 return (0); 3133 } 3134 3135 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3136 3137 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3138 &lwpchan, LWPCHAN_MPPOOL)) { 3139 error = EFAULT; 3140 goto out; 3141 } 3142 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3143 locked = 1; 3144 if (type & LOCK_ROBUST) { 3145 fuword16_noerr(&lp->mutex_flag, &flag); 3146 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3147 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3148 flag |= LOCK_NOTRECOVERABLE; 3149 suword16_noerr(&lp->mutex_flag, flag); 3150 } 3151 } 3152 if (type & USYNC_PROCESS) 3153 suword32_noerr(&lp->mutex_ownerpid, 0); 3154 ulock_clear(&lp->mutex_lockw); 3155 /* 3156 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3157 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3158 * may fail. If it fails, do not write into the waiter bit. 3159 * The call to lwp_release() might fail due to one of three reasons: 3160 * 3161 * 1. due to the thread which set the waiter bit not actually 3162 * sleeping since it got the lock on the re-try. The waiter 3163 * bit will then be correctly updated by that thread. This 3164 * window may be closed by reading the wait bit again here 3165 * and not calling lwp_release() at all if it is zero. 3166 * 2. the thread which set the waiter bit and went to sleep 3167 * was woken up by a signal. This time, the waiter recomputes 3168 * the wait bit in the return with EINTR code. 3169 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3170 * memory that has been re-used after the lock was dropped. 3171 * In this case, writing into the waiter bit would cause data 3172 * corruption. 3173 */ 3174 fuword8_noerr(&lp->mutex_waiters, &waiters); 3175 if (waiters) { 3176 if ((type & LOCK_ROBUST) && 3177 (flag & LOCK_NOTRECOVERABLE)) { 3178 lwp_release_all(&lwpchan); 3179 suword8_noerr(&lp->mutex_waiters, 0); 3180 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3181 suword8_noerr(&lp->mutex_waiters, waiters); 3182 } 3183 } 3184 3185 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3186 out: 3187 no_fault(); 3188 if (watched) 3189 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3190 if (error) 3191 return (set_errno(error)); 3192 return (0); 3193 } 3194