1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/errno.h> 37 #include <sys/file.h> 38 #include <sys/proc.h> 39 #include <sys/prsystm.h> 40 #include <sys/kmem.h> 41 #include <sys/sobject.h> 42 #include <sys/fault.h> 43 #include <sys/procfs.h> 44 #include <sys/watchpoint.h> 45 #include <sys/time.h> 46 #include <sys/cmn_err.h> 47 #include <sys/machlock.h> 48 #include <sys/debug.h> 49 #include <sys/synch.h> 50 #include <sys/synch32.h> 51 #include <sys/mman.h> 52 #include <sys/class.h> 53 #include <sys/schedctl.h> 54 #include <sys/sleepq.h> 55 #include <sys/policy.h> 56 #include <sys/tnf_probe.h> 57 #include <sys/lwpchan_impl.h> 58 #include <sys/turnstile.h> 59 #include <sys/atomic.h> 60 #include <sys/lwp_timer_impl.h> 61 #include <sys/lwp_upimutex_impl.h> 62 #include <vm/as.h> 63 #include <sys/sdt.h> 64 65 static kthread_t *lwpsobj_owner(caddr_t); 66 static void lwp_unsleep(kthread_t *t); 67 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 68 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 69 static void lwp_mutex_unregister(void *uaddr); 70 71 extern int lwp_cond_signal(lwp_cond_t *cv); 72 73 /* 74 * Maximum number of user prio inheritance locks that can be held by a thread. 75 * Used to limit kmem for each thread. This is a per-thread limit that 76 * can be administered on a system wide basis (using /etc/system). 77 * 78 * Also, when a limit, say maxlwps is added for numbers of lwps within a 79 * process, the per-thread limit automatically becomes a process-wide limit 80 * of maximum number of held upi locks within a process: 81 * maxheldupimx = maxnestupimx * maxlwps; 82 */ 83 static uint32_t maxnestupimx = 2000; 84 85 /* 86 * The sobj_ops vector exports a set of functions needed when a thread 87 * is asleep on a synchronization object of this type. 88 */ 89 static sobj_ops_t lwp_sobj_ops = { 90 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 91 }; 92 93 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 94 95 static sobj_ops_t lwp_sobj_pi_ops = { 96 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 97 turnstile_change_pri 98 }; 99 100 static sleepq_head_t lwpsleepq[NSLEEPQ]; 101 upib_t upimutextab[UPIMUTEX_TABSIZE]; 102 103 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 104 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 105 106 /* 107 * We know that both lc_wchan and lc_wchan0 are addresses that most 108 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 109 * 'pool' is either 0 or 1. 110 */ 111 #define LWPCHAN_LOCK_HASH(X, pool) \ 112 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 113 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 114 115 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 116 117 /* 118 * Is this a POSIX threads user-level lock requiring priority inheritance? 119 */ 120 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 121 122 static sleepq_head_t * 123 lwpsqhash(lwpchan_t *lwpchan) 124 { 125 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 126 return (&lwpsleepq[SQHASHINDEX(x)]); 127 } 128 129 /* 130 * Lock an lwpchan. 131 * Keep this in sync with lwpchan_unlock(), below. 132 */ 133 static void 134 lwpchan_lock(lwpchan_t *lwpchan, int pool) 135 { 136 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 137 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 138 } 139 140 /* 141 * Unlock an lwpchan. 142 * Keep this in sync with lwpchan_lock(), above. 143 */ 144 static void 145 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 146 { 147 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 148 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 149 } 150 151 /* 152 * Delete mappings from the lwpchan cache for pages that are being 153 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 154 * all mappings within the range are deleted from the lwpchan cache. 155 */ 156 void 157 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 158 { 159 lwpchan_data_t *lcp; 160 lwpchan_hashbucket_t *hashbucket; 161 lwpchan_hashbucket_t *endbucket; 162 lwpchan_entry_t *ent; 163 lwpchan_entry_t **prev; 164 caddr_t addr; 165 166 mutex_enter(&p->p_lcp_lock); 167 lcp = p->p_lcp; 168 hashbucket = lcp->lwpchan_cache; 169 endbucket = hashbucket + lcp->lwpchan_size; 170 for (; hashbucket < endbucket; hashbucket++) { 171 if (hashbucket->lwpchan_chain == NULL) 172 continue; 173 mutex_enter(&hashbucket->lwpchan_lock); 174 prev = &hashbucket->lwpchan_chain; 175 /* check entire chain */ 176 while ((ent = *prev) != NULL) { 177 addr = ent->lwpchan_addr; 178 if (start <= addr && addr < end) { 179 *prev = ent->lwpchan_next; 180 /* 181 * We do this only for the obsolete type 182 * USYNC_PROCESS_ROBUST. Otherwise robust 183 * locks do not draw ELOCKUNMAPPED or 184 * EOWNERDEAD due to being unmapped. 185 */ 186 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 187 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 188 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 189 /* 190 * If there is a user-level robust lock 191 * registration, mark it as invalid. 192 */ 193 if ((addr = ent->lwpchan_uaddr) != NULL) 194 lwp_mutex_unregister(addr); 195 kmem_free(ent, sizeof (*ent)); 196 atomic_add_32(&lcp->lwpchan_entries, -1); 197 } else { 198 prev = &ent->lwpchan_next; 199 } 200 } 201 mutex_exit(&hashbucket->lwpchan_lock); 202 } 203 mutex_exit(&p->p_lcp_lock); 204 } 205 206 /* 207 * Given an lwpchan cache pointer and a process virtual address, 208 * return a pointer to the corresponding lwpchan hash bucket. 209 */ 210 static lwpchan_hashbucket_t * 211 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 212 { 213 uint_t i; 214 215 /* 216 * All user-level sync object addresses are 8-byte aligned. 217 * Ignore the lowest 3 bits of the address and use the 218 * higher-order 2*lwpchan_bits bits for the hash index. 219 */ 220 addr >>= 3; 221 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 222 return (lcp->lwpchan_cache + i); 223 } 224 225 /* 226 * (Re)allocate the per-process lwpchan cache. 227 */ 228 static void 229 lwpchan_alloc_cache(proc_t *p, uint_t bits) 230 { 231 lwpchan_data_t *lcp; 232 lwpchan_data_t *old_lcp; 233 lwpchan_hashbucket_t *hashbucket; 234 lwpchan_hashbucket_t *endbucket; 235 lwpchan_hashbucket_t *newbucket; 236 lwpchan_entry_t *ent; 237 lwpchan_entry_t *next; 238 uint_t count; 239 240 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 241 242 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 243 lcp->lwpchan_bits = bits; 244 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 245 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 246 lcp->lwpchan_entries = 0; 247 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 248 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 249 lcp->lwpchan_next_data = NULL; 250 251 mutex_enter(&p->p_lcp_lock); 252 if ((old_lcp = p->p_lcp) != NULL) { 253 if (old_lcp->lwpchan_bits >= bits) { 254 /* someone beat us to it */ 255 mutex_exit(&p->p_lcp_lock); 256 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 257 sizeof (lwpchan_hashbucket_t)); 258 kmem_free(lcp, sizeof (lwpchan_data_t)); 259 return; 260 } 261 /* 262 * Acquire all of the old hash table locks. 263 */ 264 hashbucket = old_lcp->lwpchan_cache; 265 endbucket = hashbucket + old_lcp->lwpchan_size; 266 for (; hashbucket < endbucket; hashbucket++) 267 mutex_enter(&hashbucket->lwpchan_lock); 268 /* 269 * Move all of the old hash table entries to the 270 * new hash table. The new hash table has not yet 271 * been installed so we don't need any of its locks. 272 */ 273 count = 0; 274 hashbucket = old_lcp->lwpchan_cache; 275 for (; hashbucket < endbucket; hashbucket++) { 276 ent = hashbucket->lwpchan_chain; 277 while (ent != NULL) { 278 next = ent->lwpchan_next; 279 newbucket = lwpchan_bucket(lcp, 280 (uintptr_t)ent->lwpchan_addr); 281 ent->lwpchan_next = newbucket->lwpchan_chain; 282 newbucket->lwpchan_chain = ent; 283 ent = next; 284 count++; 285 } 286 hashbucket->lwpchan_chain = NULL; 287 } 288 lcp->lwpchan_entries = count; 289 } 290 291 /* 292 * Retire the old hash table. We can't actually kmem_free() it 293 * now because someone may still have a pointer to it. Instead, 294 * we link it onto the new hash table's list of retired hash tables. 295 * The new hash table is double the size of the previous one, so 296 * the total size of all retired hash tables is less than the size 297 * of the new one. exit() and exec() free the retired hash tables 298 * (see lwpchan_destroy_cache(), below). 299 */ 300 lcp->lwpchan_next_data = old_lcp; 301 302 /* 303 * As soon as we store the new lcp, future locking operations will 304 * use it. Therefore, we must ensure that all the state we've just 305 * established reaches global visibility before the new lcp does. 306 */ 307 membar_producer(); 308 p->p_lcp = lcp; 309 310 if (old_lcp != NULL) { 311 /* 312 * Release all of the old hash table locks. 313 */ 314 hashbucket = old_lcp->lwpchan_cache; 315 for (; hashbucket < endbucket; hashbucket++) 316 mutex_exit(&hashbucket->lwpchan_lock); 317 } 318 mutex_exit(&p->p_lcp_lock); 319 } 320 321 /* 322 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 323 * Called when the process exits or execs. All lwps except one have 324 * exited so we need no locks here. 325 */ 326 void 327 lwpchan_destroy_cache(int exec) 328 { 329 proc_t *p = curproc; 330 lwpchan_hashbucket_t *hashbucket; 331 lwpchan_hashbucket_t *endbucket; 332 lwpchan_data_t *lcp; 333 lwpchan_entry_t *ent; 334 lwpchan_entry_t *next; 335 uint16_t lockflg; 336 337 lcp = p->p_lcp; 338 p->p_lcp = NULL; 339 340 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 341 hashbucket = lcp->lwpchan_cache; 342 endbucket = hashbucket + lcp->lwpchan_size; 343 for (; hashbucket < endbucket; hashbucket++) { 344 ent = hashbucket->lwpchan_chain; 345 hashbucket->lwpchan_chain = NULL; 346 while (ent != NULL) { 347 next = ent->lwpchan_next; 348 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 349 (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 350 == (USYNC_PROCESS | LOCK_ROBUST)) 351 lwp_mutex_cleanup(ent, lockflg); 352 kmem_free(ent, sizeof (*ent)); 353 ent = next; 354 } 355 } 356 357 while (lcp != NULL) { 358 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 359 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 360 sizeof (lwpchan_hashbucket_t)); 361 kmem_free(lcp, sizeof (lwpchan_data_t)); 362 lcp = next_lcp; 363 } 364 } 365 366 /* 367 * Return zero when there is an entry in the lwpchan cache for the 368 * given process virtual address and non-zero when there is not. 369 * The returned non-zero value is the current length of the 370 * hash chain plus one. The caller holds the hash bucket lock. 371 */ 372 static uint_t 373 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 374 lwpchan_hashbucket_t *hashbucket) 375 { 376 lwpchan_entry_t *ent; 377 uint_t count = 1; 378 379 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 380 if (ent->lwpchan_addr == addr) { 381 if (ent->lwpchan_type != type || 382 ent->lwpchan_pool != pool) { 383 /* 384 * This shouldn't happen, but might if the 385 * process reuses its memory for different 386 * types of sync objects. We test first 387 * to avoid grabbing the memory cache line. 388 */ 389 ent->lwpchan_type = (uint16_t)type; 390 ent->lwpchan_pool = (uint16_t)pool; 391 } 392 *lwpchan = ent->lwpchan_lwpchan; 393 return (0); 394 } 395 count++; 396 } 397 return (count); 398 } 399 400 /* 401 * Return the cached lwpchan mapping if cached, otherwise insert 402 * a virtual address to lwpchan mapping into the cache. 403 */ 404 static int 405 lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr, 406 int type, lwpchan_t *lwpchan, int pool) 407 { 408 proc_t *p = curproc; 409 lwpchan_data_t *lcp; 410 lwpchan_hashbucket_t *hashbucket; 411 lwpchan_entry_t *ent; 412 memid_t memid; 413 uint_t count; 414 uint_t bits; 415 416 top: 417 /* initialize the lwpchan cache, if necesary */ 418 if ((lcp = p->p_lcp) == NULL) { 419 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 420 goto top; 421 } 422 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 423 mutex_enter(&hashbucket->lwpchan_lock); 424 if (lcp != p->p_lcp) { 425 /* someone resized the lwpchan cache; start over */ 426 mutex_exit(&hashbucket->lwpchan_lock); 427 goto top; 428 } 429 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 430 /* it's in the cache */ 431 mutex_exit(&hashbucket->lwpchan_lock); 432 return (1); 433 } 434 mutex_exit(&hashbucket->lwpchan_lock); 435 if (as_getmemid(as, addr, &memid) != 0) 436 return (0); 437 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 438 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 439 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 440 mutex_enter(&hashbucket->lwpchan_lock); 441 if (lcp != p->p_lcp) { 442 /* someone resized the lwpchan cache; start over */ 443 mutex_exit(&hashbucket->lwpchan_lock); 444 kmem_free(ent, sizeof (*ent)); 445 goto top; 446 } 447 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 448 if (count == 0) { 449 /* someone else added this entry to the cache */ 450 mutex_exit(&hashbucket->lwpchan_lock); 451 kmem_free(ent, sizeof (*ent)); 452 return (1); 453 } 454 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 455 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 456 /* hash chain too long; reallocate the hash table */ 457 mutex_exit(&hashbucket->lwpchan_lock); 458 kmem_free(ent, sizeof (*ent)); 459 lwpchan_alloc_cache(p, bits + 1); 460 goto top; 461 } 462 ent->lwpchan_addr = addr; 463 ent->lwpchan_uaddr = uaddr; 464 ent->lwpchan_type = (uint16_t)type; 465 ent->lwpchan_pool = (uint16_t)pool; 466 ent->lwpchan_lwpchan = *lwpchan; 467 ent->lwpchan_next = hashbucket->lwpchan_chain; 468 hashbucket->lwpchan_chain = ent; 469 atomic_add_32(&lcp->lwpchan_entries, 1); 470 mutex_exit(&hashbucket->lwpchan_lock); 471 return (1); 472 } 473 474 /* 475 * Return a unique pair of identifiers that corresponds to a 476 * synchronization object's virtual address. Process-shared 477 * sync objects usually get vnode/offset from as_getmemid(). 478 */ 479 static int 480 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 481 { 482 /* 483 * If the lwp synch object is defined to be process-private, 484 * we just make the first field of the lwpchan be 'as' and 485 * the second field be the synch object's virtual address. 486 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 487 * The lwpchan cache is used only for process-shared objects. 488 */ 489 if (!(type & USYNC_PROCESS)) { 490 lwpchan->lc_wchan0 = (caddr_t)as; 491 lwpchan->lc_wchan = addr; 492 return (1); 493 } 494 495 return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool)); 496 } 497 498 static void 499 lwp_block(lwpchan_t *lwpchan) 500 { 501 kthread_t *t = curthread; 502 klwp_t *lwp = ttolwp(t); 503 sleepq_head_t *sqh; 504 505 thread_lock(t); 506 t->t_flag |= T_WAKEABLE; 507 t->t_lwpchan = *lwpchan; 508 t->t_sobj_ops = &lwp_sobj_ops; 509 t->t_release = 0; 510 sqh = lwpsqhash(lwpchan); 511 disp_lock_enter_high(&sqh->sq_lock); 512 CL_SLEEP(t); 513 DTRACE_SCHED(sleep); 514 THREAD_SLEEP(t, &sqh->sq_lock); 515 sleepq_insert(&sqh->sq_queue, t); 516 thread_unlock(t); 517 lwp->lwp_asleep = 1; 518 lwp->lwp_sysabort = 0; 519 lwp->lwp_ru.nvcsw++; 520 (void) new_mstate(curthread, LMS_SLEEP); 521 } 522 523 static kthread_t * 524 lwpsobj_pi_owner(upimutex_t *up) 525 { 526 return (up->upi_owner); 527 } 528 529 static struct upimutex * 530 upi_get(upib_t *upibp, lwpchan_t *lcp) 531 { 532 struct upimutex *upip; 533 534 for (upip = upibp->upib_first; upip != NULL; 535 upip = upip->upi_nextchain) { 536 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 537 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 538 break; 539 } 540 return (upip); 541 } 542 543 static void 544 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 545 { 546 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 547 548 /* 549 * Insert upimutex at front of list. Maybe a bit unfair 550 * but assume that not many lwpchans hash to the same 551 * upimutextab bucket, i.e. the list of upimutexes from 552 * upib_first is not too long. 553 */ 554 upimutex->upi_nextchain = upibp->upib_first; 555 upibp->upib_first = upimutex; 556 } 557 558 static void 559 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 560 { 561 struct upimutex **prev; 562 563 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 564 565 prev = &upibp->upib_first; 566 while (*prev != upimutex) { 567 prev = &(*prev)->upi_nextchain; 568 } 569 *prev = upimutex->upi_nextchain; 570 upimutex->upi_nextchain = NULL; 571 } 572 573 /* 574 * Add upimutex to chain of upimutexes held by curthread. 575 * Returns number of upimutexes held by curthread. 576 */ 577 static uint32_t 578 upi_mylist_add(struct upimutex *upimutex) 579 { 580 kthread_t *t = curthread; 581 582 /* 583 * Insert upimutex at front of list of upimutexes owned by t. This 584 * would match typical LIFO order in which nested locks are acquired 585 * and released. 586 */ 587 upimutex->upi_nextowned = t->t_upimutex; 588 t->t_upimutex = upimutex; 589 t->t_nupinest++; 590 ASSERT(t->t_nupinest > 0); 591 return (t->t_nupinest); 592 } 593 594 /* 595 * Delete upimutex from list of upimutexes owned by curthread. 596 */ 597 static void 598 upi_mylist_del(struct upimutex *upimutex) 599 { 600 kthread_t *t = curthread; 601 struct upimutex **prev; 602 603 /* 604 * Since the order in which nested locks are acquired and released, 605 * is typically LIFO, and typical nesting levels are not too deep, the 606 * following should not be expensive in the general case. 607 */ 608 prev = &t->t_upimutex; 609 while (*prev != upimutex) { 610 prev = &(*prev)->upi_nextowned; 611 } 612 *prev = upimutex->upi_nextowned; 613 upimutex->upi_nextowned = NULL; 614 ASSERT(t->t_nupinest > 0); 615 t->t_nupinest--; 616 } 617 618 /* 619 * Returns true if upimutex is owned. Should be called only when upim points 620 * to kmem which cannot disappear from underneath. 621 */ 622 static int 623 upi_owned(upimutex_t *upim) 624 { 625 return (upim->upi_owner == curthread); 626 } 627 628 /* 629 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 630 */ 631 static struct upimutex * 632 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 633 { 634 lwpchan_t lwpchan; 635 upib_t *upibp; 636 struct upimutex *upimutex; 637 638 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 639 &lwpchan, LWPCHAN_MPPOOL)) 640 return (NULL); 641 642 upibp = &UPI_CHAIN(lwpchan); 643 mutex_enter(&upibp->upib_lock); 644 upimutex = upi_get(upibp, &lwpchan); 645 if (upimutex == NULL || upimutex->upi_owner != curthread) { 646 mutex_exit(&upibp->upib_lock); 647 return (NULL); 648 } 649 mutex_exit(&upibp->upib_lock); 650 return (upimutex); 651 } 652 653 /* 654 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 655 * no lock hand-off occurrs. 656 */ 657 static void 658 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 659 { 660 turnstile_t *ts; 661 upib_t *upibp; 662 kthread_t *newowner; 663 664 upi_mylist_del(upimutex); 665 upibp = upimutex->upi_upibp; 666 mutex_enter(&upibp->upib_lock); 667 if (upimutex->upi_waiter != 0) { /* if waiters */ 668 ts = turnstile_lookup(upimutex); 669 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 670 /* hand-off lock to highest prio waiter */ 671 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 672 upimutex->upi_owner = newowner; 673 if (ts->ts_waiters == 1) 674 upimutex->upi_waiter = 0; 675 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 676 mutex_exit(&upibp->upib_lock); 677 return; 678 } else if (ts != NULL) { 679 /* LOCK_NOTRECOVERABLE: wakeup all */ 680 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 681 } else { 682 /* 683 * Misleading w bit. Waiters might have been 684 * interrupted. No need to clear the w bit (upimutex 685 * will soon be freed). Re-calculate PI from existing 686 * waiters. 687 */ 688 turnstile_exit(upimutex); 689 turnstile_pi_recalc(); 690 } 691 } 692 /* 693 * no waiters, or LOCK_NOTRECOVERABLE. 694 * remove from the bucket chain of upi mutexes. 695 * de-allocate kernel memory (upimutex). 696 */ 697 upi_chain_del(upimutex->upi_upibp, upimutex); 698 mutex_exit(&upibp->upib_lock); 699 kmem_free(upimutex, sizeof (upimutex_t)); 700 } 701 702 static int 703 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 704 { 705 label_t ljb; 706 int error = 0; 707 lwpchan_t lwpchan; 708 uint16_t flag; 709 upib_t *upibp; 710 volatile struct upimutex *upimutex = NULL; 711 turnstile_t *ts; 712 uint32_t nupinest; 713 volatile int upilocked = 0; 714 715 if (on_fault(&ljb)) { 716 if (upilocked) 717 upimutex_unlock((upimutex_t *)upimutex, 0); 718 error = EFAULT; 719 goto out; 720 } 721 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 722 &lwpchan, LWPCHAN_MPPOOL)) { 723 error = EFAULT; 724 goto out; 725 } 726 upibp = &UPI_CHAIN(lwpchan); 727 retry: 728 mutex_enter(&upibp->upib_lock); 729 upimutex = upi_get(upibp, &lwpchan); 730 if (upimutex == NULL) { 731 /* lock available since lwpchan has no upimutex */ 732 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 733 upi_chain_add(upibp, (upimutex_t *)upimutex); 734 upimutex->upi_owner = curthread; /* grab lock */ 735 upimutex->upi_upibp = upibp; 736 upimutex->upi_vaddr = lp; 737 upimutex->upi_lwpchan = lwpchan; 738 mutex_exit(&upibp->upib_lock); 739 nupinest = upi_mylist_add((upimutex_t *)upimutex); 740 upilocked = 1; 741 fuword16_noerr(&lp->mutex_flag, &flag); 742 if (nupinest > maxnestupimx && 743 secpolicy_resource(CRED()) != 0) { 744 upimutex_unlock((upimutex_t *)upimutex, flag); 745 error = ENOMEM; 746 goto out; 747 } 748 if (flag & LOCK_NOTRECOVERABLE) { 749 /* 750 * Since the setting of LOCK_NOTRECOVERABLE 751 * was done under the high-level upi mutex, 752 * in lwp_upimutex_unlock(), this flag needs to 753 * be checked while holding the upi mutex. 754 * If set, this thread should return without 755 * the lock held, and with the right error code. 756 */ 757 upimutex_unlock((upimutex_t *)upimutex, flag); 758 upilocked = 0; 759 error = ENOTRECOVERABLE; 760 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 761 if (flag & LOCK_OWNERDEAD) 762 error = EOWNERDEAD; 763 else if (type & USYNC_PROCESS_ROBUST) 764 error = ELOCKUNMAPPED; 765 else 766 error = EOWNERDEAD; 767 } 768 goto out; 769 } 770 /* 771 * If a upimutex object exists, it must have an owner. 772 * This is due to lock hand-off, and release of upimutex when no 773 * waiters are present at unlock time, 774 */ 775 ASSERT(upimutex->upi_owner != NULL); 776 if (upimutex->upi_owner == curthread) { 777 /* 778 * The user wrapper can check if the mutex type is 779 * ERRORCHECK: if not, it should stall at user-level. 780 * If so, it should return the error code. 781 */ 782 mutex_exit(&upibp->upib_lock); 783 error = EDEADLK; 784 goto out; 785 } 786 if (try == UPIMUTEX_TRY) { 787 mutex_exit(&upibp->upib_lock); 788 error = EBUSY; 789 goto out; 790 } 791 /* 792 * Block for the lock. 793 */ 794 if ((error = lwptp->lwpt_time_error) != 0) { 795 /* 796 * The SUSV3 Posix spec is very clear that we 797 * should get no error from validating the 798 * timer until we would actually sleep. 799 */ 800 mutex_exit(&upibp->upib_lock); 801 goto out; 802 } 803 if (lwptp->lwpt_tsp != NULL) { 804 /* 805 * Unlike the protocol for other lwp timedwait operations, 806 * we must drop t_delay_lock before going to sleep in 807 * turnstile_block() for a upi mutex. 808 * See the comments below and in turnstile.c 809 */ 810 mutex_enter(&curthread->t_delay_lock); 811 (void) lwp_timer_enqueue(lwptp); 812 mutex_exit(&curthread->t_delay_lock); 813 } 814 /* 815 * Now, set the waiter bit and block for the lock in turnstile_block(). 816 * No need to preserve the previous wbit since a lock try is not 817 * attempted after setting the wait bit. Wait bit is set under 818 * the upib_lock, which is not released until the turnstile lock 819 * is acquired. Say, the upimutex is L: 820 * 821 * 1. upib_lock is held so the waiter does not have to retry L after 822 * setting the wait bit: since the owner has to grab the upib_lock 823 * to unlock L, it will certainly see the wait bit set. 824 * 2. upib_lock is not released until the turnstile lock is acquired. 825 * This is the key to preventing a missed wake-up. Otherwise, the 826 * owner could acquire the upib_lock, and the tc_lock, to call 827 * turnstile_wakeup(). All this, before the waiter gets tc_lock 828 * to sleep in turnstile_block(). turnstile_wakeup() will then not 829 * find this waiter, resulting in the missed wakeup. 830 * 3. The upib_lock, being a kernel mutex, cannot be released while 831 * holding the tc_lock (since mutex_exit() could need to acquire 832 * the same tc_lock)...and so is held when calling turnstile_block(). 833 * The address of upib_lock is passed to turnstile_block() which 834 * releases it after releasing all turnstile locks, and before going 835 * to sleep in swtch(). 836 * 4. The waiter value cannot be a count of waiters, because a waiter 837 * can be interrupted. The interrupt occurs under the tc_lock, at 838 * which point, the upib_lock cannot be locked, to decrement waiter 839 * count. So, just treat the waiter state as a bit, not a count. 840 */ 841 ts = turnstile_lookup((upimutex_t *)upimutex); 842 upimutex->upi_waiter = 1; 843 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 844 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 845 /* 846 * Hand-off implies that we wakeup holding the lock, except when: 847 * - deadlock is detected 848 * - lock is not recoverable 849 * - we got an interrupt or timeout 850 * If we wake up due to an interrupt or timeout, we may 851 * or may not be holding the lock due to mutex hand-off. 852 * Use lwp_upimutex_owned() to check if we do hold the lock. 853 */ 854 if (error != 0) { 855 if ((error == EINTR || error == ETIME) && 856 (upimutex = lwp_upimutex_owned(lp, type))) { 857 /* 858 * Unlock and return - the re-startable syscall will 859 * try the lock again if we got EINTR. 860 */ 861 (void) upi_mylist_add((upimutex_t *)upimutex); 862 upimutex_unlock((upimutex_t *)upimutex, 0); 863 } 864 /* 865 * The only other possible error is EDEADLK. If so, upimutex 866 * is valid, since its owner is deadlocked with curthread. 867 */ 868 ASSERT(error == EINTR || error == ETIME || 869 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 870 ASSERT(!lwp_upimutex_owned(lp, type)); 871 goto out; 872 } 873 if (lwp_upimutex_owned(lp, type)) { 874 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 875 nupinest = upi_mylist_add((upimutex_t *)upimutex); 876 upilocked = 1; 877 } 878 /* 879 * Now, need to read the user-level lp->mutex_flag to do the following: 880 * 881 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 882 * should be returned. 883 * - if lock isn't held, check if ENOTRECOVERABLE should 884 * be returned. 885 * 886 * Now, either lp->mutex_flag is readable or it's not. If not 887 * readable, the on_fault path will cause a return with EFAULT 888 * as it should. If it is readable, the state of the flag 889 * encodes the robustness state of the lock: 890 * 891 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 892 * or LOCK_UNMAPPED setting will influence the return code 893 * appropriately. If the upimutex is not locked here, this 894 * could be due to a spurious wake-up or a NOTRECOVERABLE 895 * event. The flag's setting can be used to distinguish 896 * between these two events. 897 */ 898 fuword16_noerr(&lp->mutex_flag, &flag); 899 if (upilocked) { 900 /* 901 * If the thread wakes up from turnstile_block with the lock 902 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 903 * since it would not have been handed-off the lock. 904 * So, no need to check for this case. 905 */ 906 if (nupinest > maxnestupimx && 907 secpolicy_resource(CRED()) != 0) { 908 upimutex_unlock((upimutex_t *)upimutex, flag); 909 upilocked = 0; 910 error = ENOMEM; 911 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 912 if (flag & LOCK_OWNERDEAD) 913 error = EOWNERDEAD; 914 else if (type & USYNC_PROCESS_ROBUST) 915 error = ELOCKUNMAPPED; 916 else 917 error = EOWNERDEAD; 918 } 919 } else { 920 /* 921 * Wake-up without the upimutex held. Either this is a 922 * spurious wake-up (due to signals, forkall(), whatever), or 923 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 924 * of the mutex flag can be used to distinguish between the 925 * two events. 926 */ 927 if (flag & LOCK_NOTRECOVERABLE) { 928 error = ENOTRECOVERABLE; 929 } else { 930 /* 931 * Here, the flag could be set to LOCK_OWNERDEAD or 932 * not. In both cases, this is a spurious wakeup, 933 * since the upi lock is not held, but the thread 934 * has returned from turnstile_block(). 935 * 936 * The user flag could be LOCK_OWNERDEAD if, at the 937 * same time as curthread having been woken up 938 * spuriously, the owner (say Tdead) has died, marked 939 * the mutex flag accordingly, and handed off the lock 940 * to some other waiter (say Tnew). curthread just 941 * happened to read the flag while Tnew has yet to deal 942 * with the owner-dead event. 943 * 944 * In this event, curthread should retry the lock. 945 * If Tnew is able to cleanup the lock, curthread 946 * will eventually get the lock with a zero error code, 947 * If Tnew is unable to cleanup, its eventual call to 948 * unlock the lock will result in the mutex flag being 949 * set to LOCK_NOTRECOVERABLE, and the wake-up of 950 * all waiters, including curthread, which will then 951 * eventually return ENOTRECOVERABLE due to the above 952 * check. 953 * 954 * Of course, if the user-flag is not set with 955 * LOCK_OWNERDEAD, retrying is the thing to do, since 956 * this is definitely a spurious wakeup. 957 */ 958 goto retry; 959 } 960 } 961 962 out: 963 no_fault(); 964 return (error); 965 } 966 967 968 static int 969 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 970 { 971 label_t ljb; 972 int error = 0; 973 lwpchan_t lwpchan; 974 uint16_t flag; 975 upib_t *upibp; 976 volatile struct upimutex *upimutex = NULL; 977 volatile int upilocked = 0; 978 979 if (on_fault(&ljb)) { 980 if (upilocked) 981 upimutex_unlock((upimutex_t *)upimutex, 0); 982 error = EFAULT; 983 goto out; 984 } 985 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 986 &lwpchan, LWPCHAN_MPPOOL)) { 987 error = EFAULT; 988 goto out; 989 } 990 upibp = &UPI_CHAIN(lwpchan); 991 mutex_enter(&upibp->upib_lock); 992 upimutex = upi_get(upibp, &lwpchan); 993 /* 994 * If the lock is not held, or the owner is not curthread, return 995 * error. The user-level wrapper can return this error or stall, 996 * depending on whether mutex is of ERRORCHECK type or not. 997 */ 998 if (upimutex == NULL || upimutex->upi_owner != curthread) { 999 mutex_exit(&upibp->upib_lock); 1000 error = EPERM; 1001 goto out; 1002 } 1003 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1004 upilocked = 1; 1005 fuword16_noerr(&lp->mutex_flag, &flag); 1006 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1007 /* 1008 * transition mutex to the LOCK_NOTRECOVERABLE state. 1009 */ 1010 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1011 flag |= LOCK_NOTRECOVERABLE; 1012 suword16_noerr(&lp->mutex_flag, flag); 1013 } 1014 if (type & USYNC_PROCESS) 1015 suword32_noerr(&lp->mutex_ownerpid, 0); 1016 upimutex_unlock((upimutex_t *)upimutex, flag); 1017 upilocked = 0; 1018 out: 1019 no_fault(); 1020 return (error); 1021 } 1022 1023 /* 1024 * Clear the contents of a user-level mutex; return the flags. 1025 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1026 */ 1027 static uint16_t 1028 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1029 { 1030 uint16_t flag; 1031 1032 fuword16_noerr(&lp->mutex_flag, &flag); 1033 if ((flag & 1034 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1035 flag |= lockflg; 1036 suword16_noerr(&lp->mutex_flag, flag); 1037 } 1038 suword32_noerr((uint32_t *)&lp->mutex_owner, 0); 1039 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, 0); 1040 suword32_noerr(&lp->mutex_ownerpid, 0); 1041 suword8_noerr(&lp->mutex_rcount, 0); 1042 1043 return (flag); 1044 } 1045 1046 /* 1047 * Mark user mutex state, corresponding to kernel upimutex, 1048 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1049 */ 1050 static int 1051 upi_dead(upimutex_t *upip, uint16_t lockflg) 1052 { 1053 label_t ljb; 1054 int error = 0; 1055 lwp_mutex_t *lp; 1056 1057 if (on_fault(&ljb)) { 1058 error = EFAULT; 1059 goto out; 1060 } 1061 1062 lp = upip->upi_vaddr; 1063 (void) lwp_clear_mutex(lp, lockflg); 1064 suword8_noerr(&lp->mutex_lockw, 0); 1065 out: 1066 no_fault(); 1067 return (error); 1068 } 1069 1070 /* 1071 * Unlock all upimutexes held by curthread, since curthread is dying. 1072 * For each upimutex, attempt to mark its corresponding user mutex object as 1073 * dead. 1074 */ 1075 void 1076 upimutex_cleanup() 1077 { 1078 kthread_t *t = curthread; 1079 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1080 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1081 struct upimutex *upip; 1082 1083 while ((upip = t->t_upimutex) != NULL) { 1084 if (upi_dead(upip, lockflg) != 0) { 1085 /* 1086 * If the user object associated with this upimutex is 1087 * unmapped, unlock upimutex with the 1088 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1089 * woken up. Since user object is unmapped, it could 1090 * not be marked as dead or notrecoverable. 1091 * The waiters will now all wake up and return 1092 * ENOTRECOVERABLE, since they would find that the lock 1093 * has not been handed-off to them. 1094 * See lwp_upimutex_lock(). 1095 */ 1096 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1097 } else { 1098 /* 1099 * The user object has been updated as dead. 1100 * Unlock the upimutex: if no waiters, upip kmem will 1101 * be freed. If there is a waiter, the lock will be 1102 * handed off. If exit() is in progress, each existing 1103 * waiter will successively get the lock, as owners 1104 * die, and each new owner will call this routine as 1105 * it dies. The last owner will free kmem, since 1106 * it will find the upimutex has no waiters. So, 1107 * eventually, the kmem is guaranteed to be freed. 1108 */ 1109 upimutex_unlock(upip, 0); 1110 } 1111 /* 1112 * Note that the call to upimutex_unlock() above will delete 1113 * upimutex from the t_upimutexes chain. And so the 1114 * while loop will eventually terminate. 1115 */ 1116 } 1117 } 1118 1119 static int iswanted(); 1120 int 1121 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1122 { 1123 kthread_t *t = curthread; 1124 klwp_t *lwp = ttolwp(t); 1125 proc_t *p = ttoproc(t); 1126 lwp_timer_t lwpt; 1127 caddr_t timedwait; 1128 int error = 0; 1129 int time_error; 1130 clock_t tim = -1; 1131 uchar_t waiters; 1132 volatile int locked = 0; 1133 volatile int watched = 0; 1134 label_t ljb; 1135 volatile uint8_t type = 0; 1136 lwpchan_t lwpchan; 1137 sleepq_head_t *sqh; 1138 uint16_t flag; 1139 int imm_timeout = 0; 1140 1141 if ((caddr_t)lp >= p->p_as->a_userlimit) 1142 return (set_errno(EFAULT)); 1143 1144 /* 1145 * Put the lwp in an orderly state for debugging, 1146 * in case we are stopped while sleeping, below. 1147 */ 1148 prstop(PR_REQUESTED, 0); 1149 1150 timedwait = (caddr_t)tsp; 1151 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1152 lwpt.lwpt_imm_timeout) { 1153 imm_timeout = 1; 1154 timedwait = NULL; 1155 } 1156 1157 /* 1158 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1159 * this micro state is really a run state. If the thread indeed blocks, 1160 * this state becomes valid. If not, the state is converted back to 1161 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1162 * when blocking. 1163 */ 1164 (void) new_mstate(t, LMS_USER_LOCK); 1165 if (on_fault(&ljb)) { 1166 if (locked) 1167 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1168 error = EFAULT; 1169 goto out; 1170 } 1171 /* 1172 * Force Copy-on-write if necessary and ensure that the 1173 * synchronization object resides in read/write memory. 1174 * Cause an EFAULT return now if this is not so. 1175 */ 1176 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1177 suword8_noerr(&lp->mutex_type, type); 1178 if (UPIMUTEX(type)) { 1179 no_fault(); 1180 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1181 if ((type & USYNC_PROCESS) && 1182 (error == 0 || 1183 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 1184 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1185 if (tsp && !time_error) /* copyout the residual time left */ 1186 error = lwp_timer_copyout(&lwpt, error); 1187 if (error) 1188 return (set_errno(error)); 1189 return (0); 1190 } 1191 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1192 &lwpchan, LWPCHAN_MPPOOL)) { 1193 error = EFAULT; 1194 goto out; 1195 } 1196 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1197 locked = 1; 1198 if (type & LOCK_ROBUST) { 1199 fuword16_noerr(&lp->mutex_flag, &flag); 1200 if (flag & LOCK_NOTRECOVERABLE) { 1201 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1202 error = ENOTRECOVERABLE; 1203 goto out; 1204 } 1205 } 1206 fuword8_noerr(&lp->mutex_waiters, &waiters); 1207 suword8_noerr(&lp->mutex_waiters, 1); 1208 1209 /* 1210 * If watchpoints are set, they need to be restored, since 1211 * atomic accesses of memory such as the call to ulock_try() 1212 * below cannot be watched. 1213 */ 1214 1215 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1216 1217 while (!ulock_try(&lp->mutex_lockw)) { 1218 if (time_error) { 1219 /* 1220 * The SUSV3 Posix spec is very clear that we 1221 * should get no error from validating the 1222 * timer until we would actually sleep. 1223 */ 1224 error = time_error; 1225 break; 1226 } 1227 1228 if (watched) { 1229 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1230 watched = 0; 1231 } 1232 1233 if (timedwait) { 1234 /* 1235 * If we successfully queue the timeout, 1236 * then don't drop t_delay_lock until 1237 * we are on the sleep queue (below). 1238 */ 1239 mutex_enter(&t->t_delay_lock); 1240 if (lwp_timer_enqueue(&lwpt) != 0) { 1241 mutex_exit(&t->t_delay_lock); 1242 imm_timeout = 1; 1243 timedwait = NULL; 1244 } 1245 } 1246 lwp_block(&lwpchan); 1247 /* 1248 * Nothing should happen to cause the lwp to go to 1249 * sleep again until after it returns from swtch(). 1250 */ 1251 if (timedwait) 1252 mutex_exit(&t->t_delay_lock); 1253 locked = 0; 1254 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1255 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1256 setrun(t); 1257 swtch(); 1258 t->t_flag &= ~T_WAKEABLE; 1259 if (timedwait) 1260 tim = lwp_timer_dequeue(&lwpt); 1261 setallwatch(); 1262 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1263 error = EINTR; 1264 else if (imm_timeout || (timedwait && tim == -1)) 1265 error = ETIME; 1266 if (error) { 1267 lwp->lwp_asleep = 0; 1268 lwp->lwp_sysabort = 0; 1269 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1270 S_WRITE); 1271 1272 /* 1273 * Need to re-compute waiters bit. The waiters field in 1274 * the lock is not reliable. Either of two things could 1275 * have occurred: no lwp may have called lwp_release() 1276 * for me but I have woken up due to a signal or 1277 * timeout. In this case, the waiter bit is incorrect 1278 * since it is still set to 1, set above. 1279 * OR an lwp_release() did occur for some other lwp on 1280 * the same lwpchan. In this case, the waiter bit is 1281 * correct. But which event occurred, one can't tell. 1282 * So, recompute. 1283 */ 1284 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1285 locked = 1; 1286 sqh = lwpsqhash(&lwpchan); 1287 disp_lock_enter(&sqh->sq_lock); 1288 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1289 disp_lock_exit(&sqh->sq_lock); 1290 break; 1291 } 1292 lwp->lwp_asleep = 0; 1293 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1294 S_WRITE); 1295 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1296 locked = 1; 1297 fuword8_noerr(&lp->mutex_waiters, &waiters); 1298 suword8_noerr(&lp->mutex_waiters, 1); 1299 if (type & LOCK_ROBUST) { 1300 fuword16_noerr(&lp->mutex_flag, &flag); 1301 if (flag & LOCK_NOTRECOVERABLE) { 1302 error = ENOTRECOVERABLE; 1303 break; 1304 } 1305 } 1306 } 1307 1308 if (t->t_mstate == LMS_USER_LOCK) 1309 (void) new_mstate(t, LMS_SYSTEM); 1310 1311 if (error == 0) { 1312 if (type & USYNC_PROCESS) 1313 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1314 if (type & LOCK_ROBUST) { 1315 fuword16_noerr(&lp->mutex_flag, &flag); 1316 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1317 if (flag & LOCK_OWNERDEAD) 1318 error = EOWNERDEAD; 1319 else if (type & USYNC_PROCESS_ROBUST) 1320 error = ELOCKUNMAPPED; 1321 else 1322 error = EOWNERDEAD; 1323 } 1324 } 1325 } 1326 suword8_noerr(&lp->mutex_waiters, waiters); 1327 locked = 0; 1328 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1329 out: 1330 no_fault(); 1331 if (watched) 1332 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1333 if (tsp && !time_error) /* copyout the residual time left */ 1334 error = lwp_timer_copyout(&lwpt, error); 1335 if (error) 1336 return (set_errno(error)); 1337 return (0); 1338 } 1339 1340 /* 1341 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1342 * libc now calls lwp_mutex_timedlock(lp, NULL). 1343 * This system call trap continues to exist solely for the benefit 1344 * of old statically-linked binaries from Solaris 9 and before. 1345 * It should be removed from the system when we no longer care 1346 * about such applications. 1347 */ 1348 int 1349 lwp_mutex_lock(lwp_mutex_t *lp) 1350 { 1351 return (lwp_mutex_timedlock(lp, NULL)); 1352 } 1353 1354 static int 1355 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1356 { 1357 /* 1358 * The caller holds the dispatcher lock on the sleep queue. 1359 */ 1360 while (t != NULL) { 1361 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1362 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1363 return (1); 1364 t = t->t_link; 1365 } 1366 return (0); 1367 } 1368 1369 /* 1370 * Return the highest priority thread sleeping on this lwpchan. 1371 */ 1372 static kthread_t * 1373 lwp_queue_waiter(lwpchan_t *lwpchan) 1374 { 1375 sleepq_head_t *sqh; 1376 kthread_t *tp; 1377 1378 sqh = lwpsqhash(lwpchan); 1379 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1380 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1381 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1382 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1383 break; 1384 } 1385 disp_lock_exit(&sqh->sq_lock); 1386 return (tp); 1387 } 1388 1389 static int 1390 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1391 { 1392 sleepq_head_t *sqh; 1393 kthread_t *tp; 1394 kthread_t **tpp; 1395 1396 sqh = lwpsqhash(lwpchan); 1397 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1398 tpp = &sqh->sq_queue.sq_first; 1399 while ((tp = *tpp) != NULL) { 1400 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1401 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1402 /* 1403 * The following is typically false. It could be true 1404 * only if lwp_release() is called from 1405 * lwp_mutex_wakeup() after reading the waiters field 1406 * from memory in which the lwp lock used to be, but has 1407 * since been re-used to hold a lwp cv or lwp semaphore. 1408 * The thread "tp" found to match the lwp lock's wchan 1409 * is actually sleeping for the cv or semaphore which 1410 * now has the same wchan. In this case, lwp_release() 1411 * should return failure. 1412 */ 1413 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1414 ASSERT(sync_type == 0); 1415 /* 1416 * assert that this can happen only for mutexes 1417 * i.e. sync_type == 0, for correctly written 1418 * user programs. 1419 */ 1420 disp_lock_exit(&sqh->sq_lock); 1421 return (0); 1422 } 1423 *waiters = iswanted(tp->t_link, lwpchan); 1424 sleepq_unlink(tpp, tp); 1425 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1426 tp->t_wchan0 = NULL; 1427 tp->t_wchan = NULL; 1428 tp->t_sobj_ops = NULL; 1429 tp->t_release = 1; 1430 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1431 CL_WAKEUP(tp); 1432 thread_unlock(tp); /* drop run queue lock */ 1433 return (1); 1434 } 1435 tpp = &tp->t_link; 1436 } 1437 *waiters = 0; 1438 disp_lock_exit(&sqh->sq_lock); 1439 return (0); 1440 } 1441 1442 static void 1443 lwp_release_all(lwpchan_t *lwpchan) 1444 { 1445 sleepq_head_t *sqh; 1446 kthread_t *tp; 1447 kthread_t **tpp; 1448 1449 sqh = lwpsqhash(lwpchan); 1450 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1451 tpp = &sqh->sq_queue.sq_first; 1452 while ((tp = *tpp) != NULL) { 1453 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1454 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1455 sleepq_unlink(tpp, tp); 1456 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1457 tp->t_wchan0 = NULL; 1458 tp->t_wchan = NULL; 1459 tp->t_sobj_ops = NULL; 1460 CL_WAKEUP(tp); 1461 thread_unlock_high(tp); /* release run queue lock */ 1462 } else { 1463 tpp = &tp->t_link; 1464 } 1465 } 1466 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1467 } 1468 1469 /* 1470 * unblock a lwp that is trying to acquire this mutex. the blocked 1471 * lwp resumes and retries to acquire the lock. 1472 */ 1473 int 1474 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1475 { 1476 proc_t *p = ttoproc(curthread); 1477 lwpchan_t lwpchan; 1478 uchar_t waiters; 1479 volatile int locked = 0; 1480 volatile int watched = 0; 1481 volatile uint8_t type = 0; 1482 label_t ljb; 1483 int error = 0; 1484 1485 if ((caddr_t)lp >= p->p_as->a_userlimit) 1486 return (set_errno(EFAULT)); 1487 1488 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1489 1490 if (on_fault(&ljb)) { 1491 if (locked) 1492 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1493 error = EFAULT; 1494 goto out; 1495 } 1496 /* 1497 * Force Copy-on-write if necessary and ensure that the 1498 * synchronization object resides in read/write memory. 1499 * Cause an EFAULT return now if this is not so. 1500 */ 1501 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1502 suword8_noerr(&lp->mutex_type, type); 1503 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1504 &lwpchan, LWPCHAN_MPPOOL)) { 1505 error = EFAULT; 1506 goto out; 1507 } 1508 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1509 locked = 1; 1510 /* 1511 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1512 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1513 * may fail. If it fails, do not write into the waiter bit. 1514 * The call to lwp_release() might fail due to one of three reasons: 1515 * 1516 * 1. due to the thread which set the waiter bit not actually 1517 * sleeping since it got the lock on the re-try. The waiter 1518 * bit will then be correctly updated by that thread. This 1519 * window may be closed by reading the wait bit again here 1520 * and not calling lwp_release() at all if it is zero. 1521 * 2. the thread which set the waiter bit and went to sleep 1522 * was woken up by a signal. This time, the waiter recomputes 1523 * the wait bit in the return with EINTR code. 1524 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1525 * memory that has been re-used after the lock was dropped. 1526 * In this case, writing into the waiter bit would cause data 1527 * corruption. 1528 */ 1529 if (release_all) 1530 lwp_release_all(&lwpchan); 1531 else if (lwp_release(&lwpchan, &waiters, 0)) 1532 suword8_noerr(&lp->mutex_waiters, waiters); 1533 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1534 out: 1535 no_fault(); 1536 if (watched) 1537 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1538 if (error) 1539 return (set_errno(error)); 1540 return (0); 1541 } 1542 1543 /* 1544 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1545 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1546 * a flag telling the kernel whether or not to honor the kernel/user 1547 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1548 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1549 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1550 * it is used an an in/out parameter. On entry, it contains the relative 1551 * time until timeout. On exit, we copyout the residual time left to it. 1552 */ 1553 int 1554 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1555 { 1556 kthread_t *t = curthread; 1557 klwp_t *lwp = ttolwp(t); 1558 proc_t *p = ttoproc(t); 1559 lwp_timer_t lwpt; 1560 lwpchan_t cv_lwpchan; 1561 lwpchan_t m_lwpchan; 1562 caddr_t timedwait; 1563 volatile uint16_t type = 0; 1564 volatile uint8_t mtype = 0; 1565 uchar_t waiters; 1566 volatile int error; 1567 clock_t tim = -1; 1568 volatile int locked = 0; 1569 volatile int m_locked = 0; 1570 volatile int cvwatched = 0; 1571 volatile int mpwatched = 0; 1572 label_t ljb; 1573 volatile int no_lwpchan = 1; 1574 int imm_timeout = 0; 1575 int imm_unpark = 0; 1576 1577 if ((caddr_t)cv >= p->p_as->a_userlimit || 1578 (caddr_t)mp >= p->p_as->a_userlimit) 1579 return (set_errno(EFAULT)); 1580 1581 /* 1582 * Put the lwp in an orderly state for debugging, 1583 * in case we are stopped while sleeping, below. 1584 */ 1585 prstop(PR_REQUESTED, 0); 1586 1587 timedwait = (caddr_t)tsp; 1588 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1589 return (set_errno(error)); 1590 if (lwpt.lwpt_imm_timeout) { 1591 imm_timeout = 1; 1592 timedwait = NULL; 1593 } 1594 1595 (void) new_mstate(t, LMS_USER_LOCK); 1596 1597 if (on_fault(&ljb)) { 1598 if (no_lwpchan) { 1599 error = EFAULT; 1600 goto out; 1601 } 1602 if (m_locked) { 1603 m_locked = 0; 1604 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1605 } 1606 if (locked) { 1607 locked = 0; 1608 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1609 } 1610 /* 1611 * set up another on_fault() for a possible fault 1612 * on the user lock accessed at "efault" 1613 */ 1614 if (on_fault(&ljb)) { 1615 if (m_locked) { 1616 m_locked = 0; 1617 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1618 } 1619 goto out; 1620 } 1621 error = EFAULT; 1622 goto efault; 1623 } 1624 1625 /* 1626 * Force Copy-on-write if necessary and ensure that the 1627 * synchronization object resides in read/write memory. 1628 * Cause an EFAULT return now if this is not so. 1629 */ 1630 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1631 suword8_noerr(&mp->mutex_type, mtype); 1632 if (UPIMUTEX(mtype) == 0) { 1633 /* convert user level mutex, "mp", to a unique lwpchan */ 1634 /* check if mtype is ok to use below, instead of type from cv */ 1635 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1636 &m_lwpchan, LWPCHAN_MPPOOL)) { 1637 error = EFAULT; 1638 goto out; 1639 } 1640 } 1641 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1642 suword16_noerr(&cv->cond_type, type); 1643 /* convert user level condition variable, "cv", to a unique lwpchan */ 1644 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1645 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1646 error = EFAULT; 1647 goto out; 1648 } 1649 no_lwpchan = 0; 1650 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1651 if (UPIMUTEX(mtype) == 0) 1652 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1653 S_WRITE); 1654 1655 /* 1656 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1657 * with respect to a possible wakeup which is a result of either 1658 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1659 * 1660 * What's misleading, is that the lwp is put to sleep after the 1661 * condition variable's mutex is released. This is OK as long as 1662 * the release operation is also done while holding lwpchan_lock. 1663 * The lwp is then put to sleep when the possibility of pagefaulting 1664 * or sleeping is completely eliminated. 1665 */ 1666 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1667 locked = 1; 1668 if (UPIMUTEX(mtype) == 0) { 1669 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1670 m_locked = 1; 1671 suword8_noerr(&cv->cond_waiters_kernel, 1); 1672 /* 1673 * unlock the condition variable's mutex. (pagefaults are 1674 * possible here.) 1675 */ 1676 if (mtype & USYNC_PROCESS) 1677 suword32_noerr(&mp->mutex_ownerpid, 0); 1678 ulock_clear(&mp->mutex_lockw); 1679 fuword8_noerr(&mp->mutex_waiters, &waiters); 1680 if (waiters != 0) { 1681 /* 1682 * Given the locking of lwpchan_lock around the release 1683 * of the mutex and checking for waiters, the following 1684 * call to lwp_release() can fail ONLY if the lock 1685 * acquirer is interrupted after setting the waiter bit, 1686 * calling lwp_block() and releasing lwpchan_lock. 1687 * In this case, it could get pulled off the lwp sleep 1688 * q (via setrun()) before the following call to 1689 * lwp_release() occurs. In this case, the lock 1690 * requestor will update the waiter bit correctly by 1691 * re-evaluating it. 1692 */ 1693 if (lwp_release(&m_lwpchan, &waiters, 0)) 1694 suword8_noerr(&mp->mutex_waiters, waiters); 1695 } 1696 m_locked = 0; 1697 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1698 } else { 1699 suword8_noerr(&cv->cond_waiters_kernel, 1); 1700 error = lwp_upimutex_unlock(mp, mtype); 1701 if (error) { /* if the upimutex unlock failed */ 1702 locked = 0; 1703 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1704 goto out; 1705 } 1706 } 1707 no_fault(); 1708 1709 if (mpwatched) { 1710 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1711 mpwatched = 0; 1712 } 1713 if (cvwatched) { 1714 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1715 cvwatched = 0; 1716 } 1717 1718 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1719 /* 1720 * We received a signal at user-level before calling here 1721 * or another thread wants us to return immediately 1722 * with EINTR. See lwp_unpark(). 1723 */ 1724 imm_unpark = 1; 1725 t->t_unpark = 0; 1726 timedwait = NULL; 1727 } else if (timedwait) { 1728 /* 1729 * If we successfully queue the timeout, 1730 * then don't drop t_delay_lock until 1731 * we are on the sleep queue (below). 1732 */ 1733 mutex_enter(&t->t_delay_lock); 1734 if (lwp_timer_enqueue(&lwpt) != 0) { 1735 mutex_exit(&t->t_delay_lock); 1736 imm_timeout = 1; 1737 timedwait = NULL; 1738 } 1739 } 1740 t->t_flag |= T_WAITCVSEM; 1741 lwp_block(&cv_lwpchan); 1742 /* 1743 * Nothing should happen to cause the lwp to go to sleep 1744 * until after it returns from swtch(). 1745 */ 1746 if (timedwait) 1747 mutex_exit(&t->t_delay_lock); 1748 locked = 0; 1749 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1750 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1751 (imm_timeout | imm_unpark)) 1752 setrun(t); 1753 swtch(); 1754 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1755 if (timedwait) 1756 tim = lwp_timer_dequeue(&lwpt); 1757 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1758 MUSTRETURN(p, t) || imm_unpark) 1759 error = EINTR; 1760 else if (imm_timeout || (timedwait && tim == -1)) 1761 error = ETIME; 1762 lwp->lwp_asleep = 0; 1763 lwp->lwp_sysabort = 0; 1764 setallwatch(); 1765 1766 if (t->t_mstate == LMS_USER_LOCK) 1767 (void) new_mstate(t, LMS_SYSTEM); 1768 1769 if (tsp && check_park) /* copyout the residual time left */ 1770 error = lwp_timer_copyout(&lwpt, error); 1771 1772 /* the mutex is reacquired by the caller on return to user level */ 1773 if (error) { 1774 /* 1775 * If we were concurrently lwp_cond_signal()d and we 1776 * received a UNIX signal or got a timeout, then perform 1777 * another lwp_cond_signal() to avoid consuming the wakeup. 1778 */ 1779 if (t->t_release) 1780 (void) lwp_cond_signal(cv); 1781 return (set_errno(error)); 1782 } 1783 return (0); 1784 1785 efault: 1786 /* 1787 * make sure that the user level lock is dropped before 1788 * returning to caller, since the caller always re-acquires it. 1789 */ 1790 if (UPIMUTEX(mtype) == 0) { 1791 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1792 m_locked = 1; 1793 if (mtype & USYNC_PROCESS) 1794 suword32_noerr(&mp->mutex_ownerpid, 0); 1795 ulock_clear(&mp->mutex_lockw); 1796 fuword8_noerr(&mp->mutex_waiters, &waiters); 1797 if (waiters != 0) { 1798 /* 1799 * See comment above on lock clearing and lwp_release() 1800 * success/failure. 1801 */ 1802 if (lwp_release(&m_lwpchan, &waiters, 0)) 1803 suword8_noerr(&mp->mutex_waiters, waiters); 1804 } 1805 m_locked = 0; 1806 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1807 } else { 1808 (void) lwp_upimutex_unlock(mp, mtype); 1809 } 1810 out: 1811 no_fault(); 1812 if (mpwatched) 1813 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1814 if (cvwatched) 1815 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1816 if (t->t_mstate == LMS_USER_LOCK) 1817 (void) new_mstate(t, LMS_SYSTEM); 1818 return (set_errno(error)); 1819 } 1820 1821 /* 1822 * wakeup one lwp that's blocked on this condition variable. 1823 */ 1824 int 1825 lwp_cond_signal(lwp_cond_t *cv) 1826 { 1827 proc_t *p = ttoproc(curthread); 1828 lwpchan_t lwpchan; 1829 uchar_t waiters; 1830 volatile uint16_t type = 0; 1831 volatile int locked = 0; 1832 volatile int watched = 0; 1833 label_t ljb; 1834 int error = 0; 1835 1836 if ((caddr_t)cv >= p->p_as->a_userlimit) 1837 return (set_errno(EFAULT)); 1838 1839 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1840 1841 if (on_fault(&ljb)) { 1842 if (locked) 1843 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1844 error = EFAULT; 1845 goto out; 1846 } 1847 /* 1848 * Force Copy-on-write if necessary and ensure that the 1849 * synchronization object resides in read/write memory. 1850 * Cause an EFAULT return now if this is not so. 1851 */ 1852 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1853 suword16_noerr(&cv->cond_type, type); 1854 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1855 &lwpchan, LWPCHAN_CVPOOL)) { 1856 error = EFAULT; 1857 goto out; 1858 } 1859 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1860 locked = 1; 1861 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1862 if (waiters != 0) { 1863 /* 1864 * The following call to lwp_release() might fail but it is 1865 * OK to write into the waiters bit below, since the memory 1866 * could not have been re-used or unmapped (for correctly 1867 * written user programs) as in the case of lwp_mutex_wakeup(). 1868 * For an incorrect program, we should not care about data 1869 * corruption since this is just one instance of other places 1870 * where corruption can occur for such a program. Of course 1871 * if the memory is unmapped, normal fault recovery occurs. 1872 */ 1873 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1874 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1875 } 1876 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1877 out: 1878 no_fault(); 1879 if (watched) 1880 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1881 if (error) 1882 return (set_errno(error)); 1883 return (0); 1884 } 1885 1886 /* 1887 * wakeup every lwp that's blocked on this condition variable. 1888 */ 1889 int 1890 lwp_cond_broadcast(lwp_cond_t *cv) 1891 { 1892 proc_t *p = ttoproc(curthread); 1893 lwpchan_t lwpchan; 1894 volatile uint16_t type = 0; 1895 volatile int locked = 0; 1896 volatile int watched = 0; 1897 label_t ljb; 1898 uchar_t waiters; 1899 int error = 0; 1900 1901 if ((caddr_t)cv >= p->p_as->a_userlimit) 1902 return (set_errno(EFAULT)); 1903 1904 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1905 1906 if (on_fault(&ljb)) { 1907 if (locked) 1908 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1909 error = EFAULT; 1910 goto out; 1911 } 1912 /* 1913 * Force Copy-on-write if necessary and ensure that the 1914 * synchronization object resides in read/write memory. 1915 * Cause an EFAULT return now if this is not so. 1916 */ 1917 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1918 suword16_noerr(&cv->cond_type, type); 1919 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1920 &lwpchan, LWPCHAN_CVPOOL)) { 1921 error = EFAULT; 1922 goto out; 1923 } 1924 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1925 locked = 1; 1926 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1927 if (waiters != 0) { 1928 lwp_release_all(&lwpchan); 1929 suword8_noerr(&cv->cond_waiters_kernel, 0); 1930 } 1931 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1932 out: 1933 no_fault(); 1934 if (watched) 1935 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1936 if (error) 1937 return (set_errno(error)); 1938 return (0); 1939 } 1940 1941 int 1942 lwp_sema_trywait(lwp_sema_t *sp) 1943 { 1944 kthread_t *t = curthread; 1945 proc_t *p = ttoproc(t); 1946 label_t ljb; 1947 volatile int locked = 0; 1948 volatile int watched = 0; 1949 volatile uint16_t type = 0; 1950 int count; 1951 lwpchan_t lwpchan; 1952 uchar_t waiters; 1953 int error = 0; 1954 1955 if ((caddr_t)sp >= p->p_as->a_userlimit) 1956 return (set_errno(EFAULT)); 1957 1958 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1959 1960 if (on_fault(&ljb)) { 1961 if (locked) 1962 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1963 error = EFAULT; 1964 goto out; 1965 } 1966 /* 1967 * Force Copy-on-write if necessary and ensure that the 1968 * synchronization object resides in read/write memory. 1969 * Cause an EFAULT return now if this is not so. 1970 */ 1971 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1972 suword16_noerr((void *)&sp->sema_type, type); 1973 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1974 &lwpchan, LWPCHAN_CVPOOL)) { 1975 error = EFAULT; 1976 goto out; 1977 } 1978 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1979 locked = 1; 1980 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1981 if (count == 0) 1982 error = EBUSY; 1983 else 1984 suword32_noerr((void *)&sp->sema_count, --count); 1985 if (count != 0) { 1986 fuword8_noerr(&sp->sema_waiters, &waiters); 1987 if (waiters != 0) { 1988 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1989 suword8_noerr(&sp->sema_waiters, waiters); 1990 } 1991 } 1992 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1993 out: 1994 no_fault(); 1995 if (watched) 1996 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1997 if (error) 1998 return (set_errno(error)); 1999 return (0); 2000 } 2001 2002 /* 2003 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 2004 */ 2005 int 2006 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2007 { 2008 kthread_t *t = curthread; 2009 klwp_t *lwp = ttolwp(t); 2010 proc_t *p = ttoproc(t); 2011 lwp_timer_t lwpt; 2012 caddr_t timedwait; 2013 clock_t tim = -1; 2014 label_t ljb; 2015 volatile int locked = 0; 2016 volatile int watched = 0; 2017 volatile uint16_t type = 0; 2018 int count; 2019 lwpchan_t lwpchan; 2020 uchar_t waiters; 2021 int error = 0; 2022 int time_error; 2023 int imm_timeout = 0; 2024 int imm_unpark = 0; 2025 2026 if ((caddr_t)sp >= p->p_as->a_userlimit) 2027 return (set_errno(EFAULT)); 2028 2029 /* 2030 * Put the lwp in an orderly state for debugging, 2031 * in case we are stopped while sleeping, below. 2032 */ 2033 prstop(PR_REQUESTED, 0); 2034 2035 timedwait = (caddr_t)tsp; 2036 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2037 lwpt.lwpt_imm_timeout) { 2038 imm_timeout = 1; 2039 timedwait = NULL; 2040 } 2041 2042 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2043 2044 if (on_fault(&ljb)) { 2045 if (locked) 2046 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2047 error = EFAULT; 2048 goto out; 2049 } 2050 /* 2051 * Force Copy-on-write if necessary and ensure that the 2052 * synchronization object resides in read/write memory. 2053 * Cause an EFAULT return now if this is not so. 2054 */ 2055 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2056 suword16_noerr((void *)&sp->sema_type, type); 2057 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2058 &lwpchan, LWPCHAN_CVPOOL)) { 2059 error = EFAULT; 2060 goto out; 2061 } 2062 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2063 locked = 1; 2064 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2065 while (error == 0 && count == 0) { 2066 if (time_error) { 2067 /* 2068 * The SUSV3 Posix spec is very clear that we 2069 * should get no error from validating the 2070 * timer until we would actually sleep. 2071 */ 2072 error = time_error; 2073 break; 2074 } 2075 suword8_noerr(&sp->sema_waiters, 1); 2076 if (watched) 2077 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2078 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2079 /* 2080 * We received a signal at user-level before calling 2081 * here or another thread wants us to return 2082 * immediately with EINTR. See lwp_unpark(). 2083 */ 2084 imm_unpark = 1; 2085 t->t_unpark = 0; 2086 timedwait = NULL; 2087 } else if (timedwait) { 2088 /* 2089 * If we successfully queue the timeout, 2090 * then don't drop t_delay_lock until 2091 * we are on the sleep queue (below). 2092 */ 2093 mutex_enter(&t->t_delay_lock); 2094 if (lwp_timer_enqueue(&lwpt) != 0) { 2095 mutex_exit(&t->t_delay_lock); 2096 imm_timeout = 1; 2097 timedwait = NULL; 2098 } 2099 } 2100 t->t_flag |= T_WAITCVSEM; 2101 lwp_block(&lwpchan); 2102 /* 2103 * Nothing should happen to cause the lwp to sleep 2104 * again until after it returns from swtch(). 2105 */ 2106 if (timedwait) 2107 mutex_exit(&t->t_delay_lock); 2108 locked = 0; 2109 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2110 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2111 (imm_timeout | imm_unpark)) 2112 setrun(t); 2113 swtch(); 2114 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2115 if (timedwait) 2116 tim = lwp_timer_dequeue(&lwpt); 2117 setallwatch(); 2118 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2119 MUSTRETURN(p, t) || imm_unpark) 2120 error = EINTR; 2121 else if (imm_timeout || (timedwait && tim == -1)) 2122 error = ETIME; 2123 lwp->lwp_asleep = 0; 2124 lwp->lwp_sysabort = 0; 2125 watched = watch_disable_addr((caddr_t)sp, 2126 sizeof (*sp), S_WRITE); 2127 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2128 locked = 1; 2129 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2130 } 2131 if (error == 0) 2132 suword32_noerr((void *)&sp->sema_count, --count); 2133 if (count != 0) { 2134 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2135 suword8_noerr(&sp->sema_waiters, waiters); 2136 } 2137 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2138 out: 2139 no_fault(); 2140 if (watched) 2141 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2142 if (tsp && check_park && !time_error) 2143 error = lwp_timer_copyout(&lwpt, error); 2144 if (error) 2145 return (set_errno(error)); 2146 return (0); 2147 } 2148 2149 /* 2150 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2151 * libc now calls lwp_sema_timedwait(). 2152 * This system call trap exists solely for the benefit of old 2153 * statically linked applications from Solaris 9 and before. 2154 * It should be removed when we no longer care about such applications. 2155 */ 2156 int 2157 lwp_sema_wait(lwp_sema_t *sp) 2158 { 2159 return (lwp_sema_timedwait(sp, NULL, 0)); 2160 } 2161 2162 int 2163 lwp_sema_post(lwp_sema_t *sp) 2164 { 2165 proc_t *p = ttoproc(curthread); 2166 label_t ljb; 2167 volatile int locked = 0; 2168 volatile int watched = 0; 2169 volatile uint16_t type = 0; 2170 int count; 2171 lwpchan_t lwpchan; 2172 uchar_t waiters; 2173 int error = 0; 2174 2175 if ((caddr_t)sp >= p->p_as->a_userlimit) 2176 return (set_errno(EFAULT)); 2177 2178 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2179 2180 if (on_fault(&ljb)) { 2181 if (locked) 2182 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2183 error = EFAULT; 2184 goto out; 2185 } 2186 /* 2187 * Force Copy-on-write if necessary and ensure that the 2188 * synchronization object resides in read/write memory. 2189 * Cause an EFAULT return now if this is not so. 2190 */ 2191 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2192 suword16_noerr(&sp->sema_type, type); 2193 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2194 &lwpchan, LWPCHAN_CVPOOL)) { 2195 error = EFAULT; 2196 goto out; 2197 } 2198 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2199 locked = 1; 2200 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2201 if (count == _SEM_VALUE_MAX) 2202 error = EOVERFLOW; 2203 else 2204 suword32_noerr(&sp->sema_count, ++count); 2205 if (count == 1) { 2206 fuword8_noerr(&sp->sema_waiters, &waiters); 2207 if (waiters) { 2208 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2209 suword8_noerr(&sp->sema_waiters, waiters); 2210 } 2211 } 2212 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2213 out: 2214 no_fault(); 2215 if (watched) 2216 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2217 if (error) 2218 return (set_errno(error)); 2219 return (0); 2220 } 2221 2222 #define TRW_WANT_WRITE 0x1 2223 #define TRW_LOCK_GRANTED 0x2 2224 2225 #define READ_LOCK 0 2226 #define WRITE_LOCK 1 2227 #define TRY_FLAG 0x10 2228 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2229 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2230 2231 /* 2232 * Release one writer or one or more readers. Compute the rwstate word to 2233 * reflect the new state of the queue. For a safe hand-off we copy the new 2234 * rwstate value back to userland before we wake any of the new lock holders. 2235 * 2236 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2237 * being given precedence over readers of the same priority). 2238 * 2239 * If the first thread is a reader we scan the queue releasing all readers 2240 * until we hit a writer or the end of the queue. If the first thread is a 2241 * writer we still need to check for another writer. 2242 */ 2243 void 2244 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2245 { 2246 sleepq_head_t *sqh; 2247 kthread_t *tp; 2248 kthread_t **tpp; 2249 kthread_t *tpnext; 2250 kthread_t *wakelist = NULL; 2251 uint32_t rwstate = 0; 2252 int wcount = 0; 2253 int rcount = 0; 2254 2255 sqh = lwpsqhash(lwpchan); 2256 disp_lock_enter(&sqh->sq_lock); 2257 tpp = &sqh->sq_queue.sq_first; 2258 while ((tp = *tpp) != NULL) { 2259 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2260 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2261 if (tp->t_writer & TRW_WANT_WRITE) { 2262 if ((wcount++ == 0) && (rcount == 0)) { 2263 rwstate |= URW_WRITE_LOCKED; 2264 2265 /* Just one writer to wake. */ 2266 sleepq_unlink(tpp, tp); 2267 wakelist = tp; 2268 2269 /* tpp already set for next thread. */ 2270 continue; 2271 } else { 2272 rwstate |= URW_HAS_WAITERS; 2273 /* We need look no further. */ 2274 break; 2275 } 2276 } else { 2277 rcount++; 2278 if (wcount == 0) { 2279 rwstate++; 2280 2281 /* Add reader to wake list. */ 2282 sleepq_unlink(tpp, tp); 2283 tp->t_link = wakelist; 2284 wakelist = tp; 2285 2286 /* tpp already set for next thread. */ 2287 continue; 2288 } else { 2289 rwstate |= URW_HAS_WAITERS; 2290 /* We need look no further. */ 2291 break; 2292 } 2293 } 2294 } 2295 tpp = &tp->t_link; 2296 } 2297 2298 /* Copy the new rwstate back to userland. */ 2299 suword32_noerr(&rw->rwlock_readers, rwstate); 2300 2301 /* Wake the new lock holder(s) up. */ 2302 tp = wakelist; 2303 while (tp != NULL) { 2304 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2305 tp->t_wchan0 = NULL; 2306 tp->t_wchan = NULL; 2307 tp->t_sobj_ops = NULL; 2308 tp->t_writer |= TRW_LOCK_GRANTED; 2309 tpnext = tp->t_link; 2310 tp->t_link = NULL; 2311 CL_WAKEUP(tp); 2312 thread_unlock_high(tp); 2313 tp = tpnext; 2314 } 2315 2316 disp_lock_exit(&sqh->sq_lock); 2317 } 2318 2319 /* 2320 * We enter here holding the user-level mutex, which we must release before 2321 * returning or blocking. Based on lwp_cond_wait(). 2322 */ 2323 static int 2324 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2325 { 2326 lwp_mutex_t *mp = NULL; 2327 kthread_t *t = curthread; 2328 kthread_t *tp; 2329 klwp_t *lwp = ttolwp(t); 2330 proc_t *p = ttoproc(t); 2331 lwp_timer_t lwpt; 2332 lwpchan_t lwpchan; 2333 lwpchan_t mlwpchan; 2334 caddr_t timedwait; 2335 volatile uint16_t type = 0; 2336 volatile uint8_t mtype = 0; 2337 uchar_t mwaiters; 2338 volatile int error = 0; 2339 int time_error; 2340 clock_t tim = -1; 2341 volatile int locked = 0; 2342 volatile int mlocked = 0; 2343 volatile int watched = 0; 2344 volatile int mwatched = 0; 2345 label_t ljb; 2346 volatile int no_lwpchan = 1; 2347 int imm_timeout = 0; 2348 int try_flag; 2349 uint32_t rwstate; 2350 int acquired = 0; 2351 2352 /* We only check rw because the mutex is included in it. */ 2353 if ((caddr_t)rw >= p->p_as->a_userlimit) 2354 return (set_errno(EFAULT)); 2355 2356 /* 2357 * Put the lwp in an orderly state for debugging, 2358 * in case we are stopped while sleeping, below. 2359 */ 2360 prstop(PR_REQUESTED, 0); 2361 2362 /* We must only report this error if we are about to sleep (later). */ 2363 timedwait = (caddr_t)tsp; 2364 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2365 lwpt.lwpt_imm_timeout) { 2366 imm_timeout = 1; 2367 timedwait = NULL; 2368 } 2369 2370 (void) new_mstate(t, LMS_USER_LOCK); 2371 2372 if (on_fault(&ljb)) { 2373 if (no_lwpchan) { 2374 error = EFAULT; 2375 goto out_nodrop; 2376 } 2377 if (mlocked) { 2378 mlocked = 0; 2379 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2380 } 2381 if (locked) { 2382 locked = 0; 2383 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2384 } 2385 /* 2386 * Set up another on_fault() for a possible fault 2387 * on the user lock accessed at "out_drop". 2388 */ 2389 if (on_fault(&ljb)) { 2390 if (mlocked) { 2391 mlocked = 0; 2392 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2393 } 2394 error = EFAULT; 2395 goto out_nodrop; 2396 } 2397 error = EFAULT; 2398 goto out_nodrop; 2399 } 2400 2401 /* Process rd_wr (including sanity check). */ 2402 try_flag = (rd_wr & TRY_FLAG); 2403 rd_wr &= ~TRY_FLAG; 2404 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2405 error = EINVAL; 2406 goto out_nodrop; 2407 } 2408 2409 /* 2410 * Force Copy-on-write if necessary and ensure that the 2411 * synchronization object resides in read/write memory. 2412 * Cause an EFAULT return now if this is not so. 2413 */ 2414 mp = &rw->mutex; 2415 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2416 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2417 suword8_noerr(&mp->mutex_type, mtype); 2418 suword16_noerr(&rw->rwlock_type, type); 2419 2420 /* We can only continue for simple USYNC_PROCESS locks. */ 2421 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2422 error = EINVAL; 2423 goto out_nodrop; 2424 } 2425 2426 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2427 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2428 &mlwpchan, LWPCHAN_MPPOOL)) { 2429 error = EFAULT; 2430 goto out_nodrop; 2431 } 2432 2433 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2434 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2435 &lwpchan, LWPCHAN_CVPOOL)) { 2436 error = EFAULT; 2437 goto out_nodrop; 2438 } 2439 2440 no_lwpchan = 0; 2441 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2442 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2443 2444 /* 2445 * lwpchan_lock() ensures that the calling LWP is put to sleep 2446 * atomically with respect to a possible wakeup which is a result 2447 * of lwp_rwlock_unlock(). 2448 * 2449 * What's misleading is that the LWP is put to sleep after the 2450 * rwlock's mutex is released. This is OK as long as the release 2451 * operation is also done while holding mlwpchan. The LWP is then 2452 * put to sleep when the possibility of pagefaulting or sleeping 2453 * has been completely eliminated. 2454 */ 2455 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2456 locked = 1; 2457 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2458 mlocked = 1; 2459 2460 /* 2461 * Fetch the current rwlock state. 2462 * 2463 * The possibility of spurious wake-ups or killed waiters means 2464 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2465 * We only fix these if they are important to us. 2466 * 2467 * Although various error states can be observed here (e.g. the lock 2468 * is not held, but there are waiters) we assume these are applicaton 2469 * errors and so we take no corrective action. 2470 */ 2471 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2472 /* 2473 * We cannot legitimately get here from user-level 2474 * without URW_HAS_WAITERS being set. 2475 * Set it now to guard against user-level error. 2476 */ 2477 rwstate |= URW_HAS_WAITERS; 2478 2479 /* 2480 * We can try only if the lock isn't held by a writer. 2481 */ 2482 if (!(rwstate & URW_WRITE_LOCKED)) { 2483 tp = lwp_queue_waiter(&lwpchan); 2484 if (tp == NULL) { 2485 /* 2486 * Hmmm, rwstate indicates waiters but there are 2487 * none queued. This could just be the result of a 2488 * spurious wakeup, so let's ignore it. 2489 * 2490 * We now have a chance to acquire the lock 2491 * uncontended, but this is the last chance for 2492 * a writer to acquire the lock without blocking. 2493 */ 2494 if (rd_wr == READ_LOCK) { 2495 rwstate++; 2496 acquired = 1; 2497 } else if ((rwstate & URW_READERS_MASK) == 0) { 2498 rwstate |= URW_WRITE_LOCKED; 2499 acquired = 1; 2500 } 2501 } else if (rd_wr == READ_LOCK) { 2502 /* 2503 * This is the last chance for a reader to acquire 2504 * the lock now, but it can only do so if there is 2505 * no writer of equal or greater priority at the 2506 * head of the queue . 2507 * 2508 * It is also just possible that there is a reader 2509 * at the head of the queue. This may be the result 2510 * of a spurious wakeup or an application failure. 2511 * In this case we only acquire the lock if we have 2512 * equal or greater priority. It is not our job to 2513 * release spurious waiters. 2514 */ 2515 pri_t our_pri = DISP_PRIO(t); 2516 pri_t his_pri = DISP_PRIO(tp); 2517 2518 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2519 !(tp->t_writer & TRW_WANT_WRITE))) { 2520 rwstate++; 2521 acquired = 1; 2522 } 2523 } 2524 } 2525 2526 if (acquired || try_flag || time_error) { 2527 /* 2528 * We're not going to block this time. 2529 */ 2530 suword32_noerr(&rw->rwlock_readers, rwstate); 2531 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2532 locked = 0; 2533 2534 if (acquired) { 2535 /* 2536 * Got the lock! 2537 */ 2538 error = 0; 2539 2540 } else if (try_flag) { 2541 /* 2542 * We didn't get the lock and we're about to block. 2543 * If we're doing a trylock, return EBUSY instead. 2544 */ 2545 error = EBUSY; 2546 2547 } else if (time_error) { 2548 /* 2549 * The SUSV3 POSIX spec is very clear that we should 2550 * get no error from validating the timer (above) 2551 * until we would actually sleep. 2552 */ 2553 error = time_error; 2554 } 2555 2556 goto out_drop; 2557 } 2558 2559 /* 2560 * We're about to block, so indicate what kind of waiter we are. 2561 */ 2562 t->t_writer = 0; 2563 if (rd_wr == WRITE_LOCK) 2564 t->t_writer = TRW_WANT_WRITE; 2565 suword32_noerr(&rw->rwlock_readers, rwstate); 2566 2567 /* 2568 * Unlock the rwlock's mutex (pagefaults are possible here). 2569 */ 2570 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2571 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2572 suword32_noerr(&mp->mutex_ownerpid, 0); 2573 ulock_clear(&mp->mutex_lockw); 2574 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2575 if (mwaiters != 0) { 2576 /* 2577 * Given the locking of mlwpchan around the release of 2578 * the mutex and checking for waiters, the following 2579 * call to lwp_release() can fail ONLY if the lock 2580 * acquirer is interrupted after setting the waiter bit, 2581 * calling lwp_block() and releasing mlwpchan. 2582 * In this case, it could get pulled off the LWP sleep 2583 * queue (via setrun()) before the following call to 2584 * lwp_release() occurs, and the lock requestor will 2585 * update the waiter bit correctly by re-evaluating it. 2586 */ 2587 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2588 suword8_noerr(&mp->mutex_waiters, mwaiters); 2589 } 2590 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2591 mlocked = 0; 2592 no_fault(); 2593 2594 if (mwatched) { 2595 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2596 mwatched = 0; 2597 } 2598 if (watched) { 2599 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2600 watched = 0; 2601 } 2602 2603 if (timedwait) { 2604 /* 2605 * If we successfully queue the timeout, 2606 * then don't drop t_delay_lock until 2607 * we are on the sleep queue (below). 2608 */ 2609 mutex_enter(&t->t_delay_lock); 2610 if (lwp_timer_enqueue(&lwpt) != 0) { 2611 mutex_exit(&t->t_delay_lock); 2612 imm_timeout = 1; 2613 timedwait = NULL; 2614 } 2615 } 2616 t->t_flag |= T_WAITCVSEM; 2617 lwp_block(&lwpchan); 2618 2619 /* 2620 * Nothing should happen to cause the LWp to go to sleep until after 2621 * it returns from swtch(). 2622 */ 2623 if (timedwait) 2624 mutex_exit(&t->t_delay_lock); 2625 locked = 0; 2626 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2627 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 2628 setrun(t); 2629 swtch(); 2630 2631 /* 2632 * We're back, but we need to work out why. Were we interrupted? Did 2633 * we timeout? Were we granted the lock? 2634 */ 2635 error = EAGAIN; 2636 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2637 t->t_writer = 0; 2638 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2639 if (timedwait) 2640 tim = lwp_timer_dequeue(&lwpt); 2641 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2642 error = EINTR; 2643 else if (imm_timeout || (timedwait && tim == -1)) 2644 error = ETIME; 2645 lwp->lwp_asleep = 0; 2646 lwp->lwp_sysabort = 0; 2647 setallwatch(); 2648 2649 /* 2650 * If we were granted the lock we don't care about EINTR or ETIME. 2651 */ 2652 if (acquired) 2653 error = 0; 2654 2655 if (t->t_mstate == LMS_USER_LOCK) 2656 (void) new_mstate(t, LMS_SYSTEM); 2657 2658 if (error) 2659 return (set_errno(error)); 2660 return (0); 2661 2662 out_drop: 2663 /* 2664 * Make sure that the user level lock is dropped before returning 2665 * to the caller. 2666 */ 2667 if (!mlocked) { 2668 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2669 mlocked = 1; 2670 } 2671 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2672 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2673 suword32_noerr(&mp->mutex_ownerpid, 0); 2674 ulock_clear(&mp->mutex_lockw); 2675 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2676 if (mwaiters != 0) { 2677 /* 2678 * See comment above on lock clearing and lwp_release() 2679 * success/failure. 2680 */ 2681 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2682 suword8_noerr(&mp->mutex_waiters, mwaiters); 2683 } 2684 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2685 mlocked = 0; 2686 2687 out_nodrop: 2688 no_fault(); 2689 if (mwatched) 2690 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2691 if (watched) 2692 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2693 if (t->t_mstate == LMS_USER_LOCK) 2694 (void) new_mstate(t, LMS_SYSTEM); 2695 if (error) 2696 return (set_errno(error)); 2697 return (0); 2698 } 2699 2700 /* 2701 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2702 * we never drop the lock. 2703 */ 2704 static int 2705 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2706 { 2707 kthread_t *t = curthread; 2708 proc_t *p = ttoproc(t); 2709 lwpchan_t lwpchan; 2710 volatile uint16_t type = 0; 2711 volatile int error = 0; 2712 volatile int locked = 0; 2713 volatile int watched = 0; 2714 label_t ljb; 2715 volatile int no_lwpchan = 1; 2716 uint32_t rwstate; 2717 2718 /* We only check rw because the mutex is included in it. */ 2719 if ((caddr_t)rw >= p->p_as->a_userlimit) 2720 return (set_errno(EFAULT)); 2721 2722 if (on_fault(&ljb)) { 2723 if (no_lwpchan) { 2724 error = EFAULT; 2725 goto out_nodrop; 2726 } 2727 if (locked) { 2728 locked = 0; 2729 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2730 } 2731 error = EFAULT; 2732 goto out_nodrop; 2733 } 2734 2735 /* 2736 * Force Copy-on-write if necessary and ensure that the 2737 * synchronization object resides in read/write memory. 2738 * Cause an EFAULT return now if this is not so. 2739 */ 2740 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2741 suword16_noerr(&rw->rwlock_type, type); 2742 2743 /* We can only continue for simple USYNC_PROCESS locks. */ 2744 if (type != USYNC_PROCESS) { 2745 error = EINVAL; 2746 goto out_nodrop; 2747 } 2748 2749 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2750 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2751 &lwpchan, LWPCHAN_CVPOOL)) { 2752 error = EFAULT; 2753 goto out_nodrop; 2754 } 2755 2756 no_lwpchan = 0; 2757 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2758 2759 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2760 locked = 1; 2761 2762 /* 2763 * We can resolve multiple readers (except the last reader) here. 2764 * For the last reader or a writer we need lwp_rwlock_release(), 2765 * to which we also delegate the task of copying the new rwstate 2766 * back to userland (see the comment there). 2767 */ 2768 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2769 if (rwstate & URW_WRITE_LOCKED) 2770 lwp_rwlock_release(&lwpchan, rw); 2771 else if ((rwstate & URW_READERS_MASK) > 0) { 2772 rwstate--; 2773 if ((rwstate & URW_READERS_MASK) == 0) 2774 lwp_rwlock_release(&lwpchan, rw); 2775 else 2776 suword32_noerr(&rw->rwlock_readers, rwstate); 2777 } 2778 2779 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2780 locked = 0; 2781 error = 0; 2782 2783 out_nodrop: 2784 no_fault(); 2785 if (watched) 2786 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2787 if (error) 2788 return (set_errno(error)); 2789 return (0); 2790 } 2791 2792 int 2793 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2794 { 2795 switch (subcode) { 2796 case 0: 2797 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2798 case 1: 2799 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2800 case 2: 2801 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2802 case 3: 2803 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2804 case 4: 2805 return (lwp_rwlock_unlock(rwlp)); 2806 } 2807 return (set_errno(EINVAL)); 2808 } 2809 2810 /* 2811 * Return the owner of the user-level s-object. 2812 * Since we can't really do this, return NULL. 2813 */ 2814 /* ARGSUSED */ 2815 static kthread_t * 2816 lwpsobj_owner(caddr_t sobj) 2817 { 2818 return ((kthread_t *)NULL); 2819 } 2820 2821 /* 2822 * Wake up a thread asleep on a user-level synchronization 2823 * object. 2824 */ 2825 static void 2826 lwp_unsleep(kthread_t *t) 2827 { 2828 ASSERT(THREAD_LOCK_HELD(t)); 2829 if (t->t_wchan0 != NULL) { 2830 sleepq_head_t *sqh; 2831 sleepq_t *sqp = t->t_sleepq; 2832 2833 if (sqp != NULL) { 2834 sqh = lwpsqhash(&t->t_lwpchan); 2835 ASSERT(&sqh->sq_queue == sqp); 2836 sleepq_unsleep(t); 2837 disp_lock_exit_high(&sqh->sq_lock); 2838 CL_SETRUN(t); 2839 return; 2840 } 2841 } 2842 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2843 } 2844 2845 /* 2846 * Change the priority of a thread asleep on a user-level 2847 * synchronization object. To maintain proper priority order, 2848 * we: 2849 * o dequeue the thread. 2850 * o change its priority. 2851 * o re-enqueue the thread. 2852 * Assumption: the thread is locked on entry. 2853 */ 2854 static void 2855 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2856 { 2857 ASSERT(THREAD_LOCK_HELD(t)); 2858 if (t->t_wchan0 != NULL) { 2859 sleepq_t *sqp = t->t_sleepq; 2860 2861 sleepq_dequeue(t); 2862 *t_prip = pri; 2863 sleepq_insert(sqp, t); 2864 } else 2865 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2866 } 2867 2868 /* 2869 * Clean up a left-over process-shared robust mutex 2870 */ 2871 static void 2872 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2873 { 2874 uint16_t flag; 2875 uchar_t waiters; 2876 label_t ljb; 2877 pid_t owner_pid; 2878 lwp_mutex_t *lp; 2879 volatile int locked = 0; 2880 volatile int watched = 0; 2881 volatile struct upimutex *upimutex = NULL; 2882 volatile int upilocked = 0; 2883 2884 if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 2885 != (USYNC_PROCESS | LOCK_ROBUST)) 2886 return; 2887 2888 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2889 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2890 if (on_fault(&ljb)) { 2891 if (locked) 2892 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2893 if (upilocked) 2894 upimutex_unlock((upimutex_t *)upimutex, 0); 2895 goto out; 2896 } 2897 2898 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2899 2900 if (UPIMUTEX(ent->lwpchan_type)) { 2901 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2902 upib_t *upibp = &UPI_CHAIN(lwpchan); 2903 2904 if (owner_pid != curproc->p_pid) 2905 goto out; 2906 mutex_enter(&upibp->upib_lock); 2907 upimutex = upi_get(upibp, &lwpchan); 2908 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2909 mutex_exit(&upibp->upib_lock); 2910 goto out; 2911 } 2912 mutex_exit(&upibp->upib_lock); 2913 upilocked = 1; 2914 flag = lwp_clear_mutex(lp, lockflg); 2915 suword8_noerr(&lp->mutex_lockw, 0); 2916 upimutex_unlock((upimutex_t *)upimutex, flag); 2917 } else { 2918 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2919 locked = 1; 2920 /* 2921 * Clear the spinners count because one of our 2922 * threads could have been spinning for this lock 2923 * at user level when the process was suddenly killed. 2924 * There is no harm in this since user-level libc code 2925 * will adapt to the sudden change in the spinner count. 2926 */ 2927 suword8_noerr(&lp->mutex_spinners, 0); 2928 if (owner_pid != curproc->p_pid) { 2929 /* 2930 * We are not the owner. There may or may not be one. 2931 * If there are waiters, we wake up one or all of them. 2932 * It doesn't hurt to wake them up in error since 2933 * they will just retry the lock and go to sleep 2934 * again if necessary. 2935 */ 2936 fuword8_noerr(&lp->mutex_waiters, &waiters); 2937 if (waiters != 0) { /* there are waiters */ 2938 fuword16_noerr(&lp->mutex_flag, &flag); 2939 if (flag & LOCK_NOTRECOVERABLE) { 2940 lwp_release_all(&ent->lwpchan_lwpchan); 2941 suword8_noerr(&lp->mutex_waiters, 0); 2942 } else if (lwp_release(&ent->lwpchan_lwpchan, 2943 &waiters, 0)) { 2944 suword8_noerr(&lp->mutex_waiters, 2945 waiters); 2946 } 2947 } 2948 } else { 2949 /* 2950 * We are the owner. Release it. 2951 */ 2952 (void) lwp_clear_mutex(lp, lockflg); 2953 ulock_clear(&lp->mutex_lockw); 2954 fuword8_noerr(&lp->mutex_waiters, &waiters); 2955 if (waiters && 2956 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2957 suword8_noerr(&lp->mutex_waiters, waiters); 2958 } 2959 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2960 } 2961 out: 2962 no_fault(); 2963 if (watched) 2964 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2965 } 2966 2967 /* 2968 * Register a process-shared robust mutex in the lwpchan cache. 2969 */ 2970 int 2971 lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr) 2972 { 2973 int error = 0; 2974 volatile int watched; 2975 label_t ljb; 2976 uint8_t type; 2977 lwpchan_t lwpchan; 2978 2979 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2980 return (set_errno(EFAULT)); 2981 2982 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2983 2984 if (on_fault(&ljb)) { 2985 error = EFAULT; 2986 } else { 2987 /* 2988 * Force Copy-on-write if necessary and ensure that the 2989 * synchronization object resides in read/write memory. 2990 * Cause an EFAULT return now if this is not so. 2991 */ 2992 fuword8_noerr(&lp->mutex_type, &type); 2993 suword8_noerr(&lp->mutex_type, type); 2994 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2995 != (USYNC_PROCESS|LOCK_ROBUST)) { 2996 error = EINVAL; 2997 } else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp, 2998 uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) { 2999 error = EFAULT; 3000 } 3001 } 3002 no_fault(); 3003 if (watched) 3004 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3005 if (error) 3006 return (set_errno(error)); 3007 return (0); 3008 } 3009 3010 /* 3011 * There is a user-level robust lock registration in libc. 3012 * Mark it as invalid by storing -1 into the location of the pointer. 3013 */ 3014 static void 3015 lwp_mutex_unregister(void *uaddr) 3016 { 3017 if (get_udatamodel() == DATAMODEL_NATIVE) { 3018 (void) sulword(uaddr, (ulong_t)-1); 3019 #ifdef _SYSCALL32_IMPL 3020 } else { 3021 (void) suword32(uaddr, (uint32_t)-1); 3022 #endif 3023 } 3024 } 3025 3026 int 3027 lwp_mutex_trylock(lwp_mutex_t *lp) 3028 { 3029 kthread_t *t = curthread; 3030 proc_t *p = ttoproc(t); 3031 int error = 0; 3032 volatile int locked = 0; 3033 volatile int watched = 0; 3034 label_t ljb; 3035 volatile uint8_t type = 0; 3036 uint16_t flag; 3037 lwpchan_t lwpchan; 3038 3039 if ((caddr_t)lp >= p->p_as->a_userlimit) 3040 return (set_errno(EFAULT)); 3041 3042 (void) new_mstate(t, LMS_USER_LOCK); 3043 3044 if (on_fault(&ljb)) { 3045 if (locked) 3046 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3047 error = EFAULT; 3048 goto out; 3049 } 3050 /* 3051 * Force Copy-on-write if necessary and ensure that the 3052 * synchronization object resides in read/write memory. 3053 * Cause an EFAULT return now if this is not so. 3054 */ 3055 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3056 suword8_noerr(&lp->mutex_type, type); 3057 if (UPIMUTEX(type)) { 3058 no_fault(); 3059 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3060 if ((type & USYNC_PROCESS) && 3061 (error == 0 || 3062 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 3063 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 3064 if (error) 3065 return (set_errno(error)); 3066 return (0); 3067 } 3068 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3069 &lwpchan, LWPCHAN_MPPOOL)) { 3070 error = EFAULT; 3071 goto out; 3072 } 3073 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3074 locked = 1; 3075 if (type & LOCK_ROBUST) { 3076 fuword16_noerr(&lp->mutex_flag, &flag); 3077 if (flag & LOCK_NOTRECOVERABLE) { 3078 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3079 error = ENOTRECOVERABLE; 3080 goto out; 3081 } 3082 } 3083 3084 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3085 3086 if (!ulock_try(&lp->mutex_lockw)) 3087 error = EBUSY; 3088 else { 3089 if (type & USYNC_PROCESS) 3090 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 3091 if (type & LOCK_ROBUST) { 3092 fuword16_noerr(&lp->mutex_flag, &flag); 3093 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3094 if (flag & LOCK_OWNERDEAD) 3095 error = EOWNERDEAD; 3096 else if (type & USYNC_PROCESS_ROBUST) 3097 error = ELOCKUNMAPPED; 3098 else 3099 error = EOWNERDEAD; 3100 } 3101 } 3102 } 3103 locked = 0; 3104 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3105 out: 3106 3107 if (t->t_mstate == LMS_USER_LOCK) 3108 (void) new_mstate(t, LMS_SYSTEM); 3109 3110 no_fault(); 3111 if (watched) 3112 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3113 if (error) 3114 return (set_errno(error)); 3115 return (0); 3116 } 3117 3118 /* 3119 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3120 * the blocked lwp resumes and retries to acquire the lock. 3121 */ 3122 int 3123 lwp_mutex_unlock(lwp_mutex_t *lp) 3124 { 3125 proc_t *p = ttoproc(curthread); 3126 lwpchan_t lwpchan; 3127 uchar_t waiters; 3128 volatile int locked = 0; 3129 volatile int watched = 0; 3130 volatile uint8_t type = 0; 3131 label_t ljb; 3132 uint16_t flag; 3133 int error = 0; 3134 3135 if ((caddr_t)lp >= p->p_as->a_userlimit) 3136 return (set_errno(EFAULT)); 3137 3138 if (on_fault(&ljb)) { 3139 if (locked) 3140 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3141 error = EFAULT; 3142 goto out; 3143 } 3144 3145 /* 3146 * Force Copy-on-write if necessary and ensure that the 3147 * synchronization object resides in read/write memory. 3148 * Cause an EFAULT return now if this is not so. 3149 */ 3150 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3151 suword8_noerr(&lp->mutex_type, type); 3152 3153 if (UPIMUTEX(type)) { 3154 no_fault(); 3155 error = lwp_upimutex_unlock(lp, type); 3156 if (error) 3157 return (set_errno(error)); 3158 return (0); 3159 } 3160 3161 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3162 3163 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3164 &lwpchan, LWPCHAN_MPPOOL)) { 3165 error = EFAULT; 3166 goto out; 3167 } 3168 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3169 locked = 1; 3170 if (type & LOCK_ROBUST) { 3171 fuword16_noerr(&lp->mutex_flag, &flag); 3172 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3173 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3174 flag |= LOCK_NOTRECOVERABLE; 3175 suword16_noerr(&lp->mutex_flag, flag); 3176 } 3177 } 3178 if (type & USYNC_PROCESS) 3179 suword32_noerr(&lp->mutex_ownerpid, 0); 3180 ulock_clear(&lp->mutex_lockw); 3181 /* 3182 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3183 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3184 * may fail. If it fails, do not write into the waiter bit. 3185 * The call to lwp_release() might fail due to one of three reasons: 3186 * 3187 * 1. due to the thread which set the waiter bit not actually 3188 * sleeping since it got the lock on the re-try. The waiter 3189 * bit will then be correctly updated by that thread. This 3190 * window may be closed by reading the wait bit again here 3191 * and not calling lwp_release() at all if it is zero. 3192 * 2. the thread which set the waiter bit and went to sleep 3193 * was woken up by a signal. This time, the waiter recomputes 3194 * the wait bit in the return with EINTR code. 3195 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3196 * memory that has been re-used after the lock was dropped. 3197 * In this case, writing into the waiter bit would cause data 3198 * corruption. 3199 */ 3200 fuword8_noerr(&lp->mutex_waiters, &waiters); 3201 if (waiters) { 3202 if ((type & LOCK_ROBUST) && 3203 (flag & LOCK_NOTRECOVERABLE)) { 3204 lwp_release_all(&lwpchan); 3205 suword8_noerr(&lp->mutex_waiters, 0); 3206 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3207 suword8_noerr(&lp->mutex_waiters, waiters); 3208 } 3209 } 3210 3211 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3212 out: 3213 no_fault(); 3214 if (watched) 3215 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3216 if (error) 3217 return (set_errno(error)); 3218 return (0); 3219 } 3220