1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/errno.h> 37 #include <sys/file.h> 38 #include <sys/proc.h> 39 #include <sys/prsystm.h> 40 #include <sys/kmem.h> 41 #include <sys/sobject.h> 42 #include <sys/fault.h> 43 #include <sys/procfs.h> 44 #include <sys/watchpoint.h> 45 #include <sys/time.h> 46 #include <sys/cmn_err.h> 47 #include <sys/machlock.h> 48 #include <sys/debug.h> 49 #include <sys/synch.h> 50 #include <sys/synch32.h> 51 #include <sys/mman.h> 52 #include <sys/class.h> 53 #include <sys/schedctl.h> 54 #include <sys/sleepq.h> 55 #include <sys/policy.h> 56 #include <sys/tnf_probe.h> 57 #include <sys/lwpchan_impl.h> 58 #include <sys/turnstile.h> 59 #include <sys/atomic.h> 60 #include <sys/lwp_timer_impl.h> 61 #include <sys/lwp_upimutex_impl.h> 62 #include <vm/as.h> 63 #include <sys/sdt.h> 64 65 static kthread_t *lwpsobj_owner(caddr_t); 66 static void lwp_unsleep(kthread_t *t); 67 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 68 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 69 static void lwp_mutex_unregister(void *uaddr); 70 71 extern int lwp_cond_signal(lwp_cond_t *cv); 72 73 /* 74 * Maximum number of user prio inheritance locks that can be held by a thread. 75 * Used to limit kmem for each thread. This is a per-thread limit that 76 * can be administered on a system wide basis (using /etc/system). 77 * 78 * Also, when a limit, say maxlwps is added for numbers of lwps within a 79 * process, the per-thread limit automatically becomes a process-wide limit 80 * of maximum number of held upi locks within a process: 81 * maxheldupimx = maxnestupimx * maxlwps; 82 */ 83 static uint32_t maxnestupimx = 2000; 84 85 /* 86 * The sobj_ops vector exports a set of functions needed when a thread 87 * is asleep on a synchronization object of this type. 88 */ 89 static sobj_ops_t lwp_sobj_ops = { 90 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 91 }; 92 93 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 94 95 static sobj_ops_t lwp_sobj_pi_ops = { 96 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 97 turnstile_change_pri 98 }; 99 100 static sleepq_head_t lwpsleepq[NSLEEPQ]; 101 upib_t upimutextab[UPIMUTEX_TABSIZE]; 102 103 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 104 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 105 106 /* 107 * We know that both lc_wchan and lc_wchan0 are addresses that most 108 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 109 * 'pool' is either 0 or 1. 110 */ 111 #define LWPCHAN_LOCK_HASH(X, pool) \ 112 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 113 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 114 115 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 116 117 /* 118 * Is this a POSIX threads user-level lock requiring priority inheritance? 119 */ 120 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 121 122 static sleepq_head_t * 123 lwpsqhash(lwpchan_t *lwpchan) 124 { 125 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 126 return (&lwpsleepq[SQHASHINDEX(x)]); 127 } 128 129 /* 130 * Lock an lwpchan. 131 * Keep this in sync with lwpchan_unlock(), below. 132 */ 133 static void 134 lwpchan_lock(lwpchan_t *lwpchan, int pool) 135 { 136 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 137 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 138 } 139 140 /* 141 * Unlock an lwpchan. 142 * Keep this in sync with lwpchan_lock(), above. 143 */ 144 static void 145 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 146 { 147 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 148 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 149 } 150 151 /* 152 * Delete mappings from the lwpchan cache for pages that are being 153 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 154 * all mappings within the range are deleted from the lwpchan cache. 155 */ 156 void 157 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 158 { 159 lwpchan_data_t *lcp; 160 lwpchan_hashbucket_t *hashbucket; 161 lwpchan_hashbucket_t *endbucket; 162 lwpchan_entry_t *ent; 163 lwpchan_entry_t **prev; 164 caddr_t addr; 165 166 mutex_enter(&p->p_lcp_lock); 167 lcp = p->p_lcp; 168 hashbucket = lcp->lwpchan_cache; 169 endbucket = hashbucket + lcp->lwpchan_size; 170 for (; hashbucket < endbucket; hashbucket++) { 171 if (hashbucket->lwpchan_chain == NULL) 172 continue; 173 mutex_enter(&hashbucket->lwpchan_lock); 174 prev = &hashbucket->lwpchan_chain; 175 /* check entire chain */ 176 while ((ent = *prev) != NULL) { 177 addr = ent->lwpchan_addr; 178 if (start <= addr && addr < end) { 179 *prev = ent->lwpchan_next; 180 /* 181 * We do this only for the obsolete type 182 * USYNC_PROCESS_ROBUST. Otherwise robust 183 * locks do not draw ELOCKUNMAPPED or 184 * EOWNERDEAD due to being unmapped. 185 */ 186 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 187 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 188 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 189 /* 190 * If there is a user-level robust lock 191 * registration, mark it as invalid. 192 */ 193 if ((addr = ent->lwpchan_uaddr) != NULL) 194 lwp_mutex_unregister(addr); 195 kmem_free(ent, sizeof (*ent)); 196 atomic_add_32(&lcp->lwpchan_entries, -1); 197 } else { 198 prev = &ent->lwpchan_next; 199 } 200 } 201 mutex_exit(&hashbucket->lwpchan_lock); 202 } 203 mutex_exit(&p->p_lcp_lock); 204 } 205 206 /* 207 * Given an lwpchan cache pointer and a process virtual address, 208 * return a pointer to the corresponding lwpchan hash bucket. 209 */ 210 static lwpchan_hashbucket_t * 211 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 212 { 213 uint_t i; 214 215 /* 216 * All user-level sync object addresses are 8-byte aligned. 217 * Ignore the lowest 3 bits of the address and use the 218 * higher-order 2*lwpchan_bits bits for the hash index. 219 */ 220 addr >>= 3; 221 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 222 return (lcp->lwpchan_cache + i); 223 } 224 225 /* 226 * (Re)allocate the per-process lwpchan cache. 227 */ 228 static void 229 lwpchan_alloc_cache(proc_t *p, uint_t bits) 230 { 231 lwpchan_data_t *lcp; 232 lwpchan_data_t *old_lcp; 233 lwpchan_hashbucket_t *hashbucket; 234 lwpchan_hashbucket_t *endbucket; 235 lwpchan_hashbucket_t *newbucket; 236 lwpchan_entry_t *ent; 237 lwpchan_entry_t *next; 238 uint_t count; 239 240 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 241 242 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 243 lcp->lwpchan_bits = bits; 244 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 245 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 246 lcp->lwpchan_entries = 0; 247 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 248 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 249 lcp->lwpchan_next_data = NULL; 250 251 mutex_enter(&p->p_lcp_lock); 252 if ((old_lcp = p->p_lcp) != NULL) { 253 if (old_lcp->lwpchan_bits >= bits) { 254 /* someone beat us to it */ 255 mutex_exit(&p->p_lcp_lock); 256 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 257 sizeof (lwpchan_hashbucket_t)); 258 kmem_free(lcp, sizeof (lwpchan_data_t)); 259 return; 260 } 261 /* 262 * Acquire all of the old hash table locks. 263 */ 264 hashbucket = old_lcp->lwpchan_cache; 265 endbucket = hashbucket + old_lcp->lwpchan_size; 266 for (; hashbucket < endbucket; hashbucket++) 267 mutex_enter(&hashbucket->lwpchan_lock); 268 /* 269 * Move all of the old hash table entries to the 270 * new hash table. The new hash table has not yet 271 * been installed so we don't need any of its locks. 272 */ 273 count = 0; 274 hashbucket = old_lcp->lwpchan_cache; 275 for (; hashbucket < endbucket; hashbucket++) { 276 ent = hashbucket->lwpchan_chain; 277 while (ent != NULL) { 278 next = ent->lwpchan_next; 279 newbucket = lwpchan_bucket(lcp, 280 (uintptr_t)ent->lwpchan_addr); 281 ent->lwpchan_next = newbucket->lwpchan_chain; 282 newbucket->lwpchan_chain = ent; 283 ent = next; 284 count++; 285 } 286 hashbucket->lwpchan_chain = NULL; 287 } 288 lcp->lwpchan_entries = count; 289 } 290 291 /* 292 * Retire the old hash table. We can't actually kmem_free() it 293 * now because someone may still have a pointer to it. Instead, 294 * we link it onto the new hash table's list of retired hash tables. 295 * The new hash table is double the size of the previous one, so 296 * the total size of all retired hash tables is less than the size 297 * of the new one. exit() and exec() free the retired hash tables 298 * (see lwpchan_destroy_cache(), below). 299 */ 300 lcp->lwpchan_next_data = old_lcp; 301 302 /* 303 * As soon as we store the new lcp, future locking operations will 304 * use it. Therefore, we must ensure that all the state we've just 305 * established reaches global visibility before the new lcp does. 306 */ 307 membar_producer(); 308 p->p_lcp = lcp; 309 310 if (old_lcp != NULL) { 311 /* 312 * Release all of the old hash table locks. 313 */ 314 hashbucket = old_lcp->lwpchan_cache; 315 for (; hashbucket < endbucket; hashbucket++) 316 mutex_exit(&hashbucket->lwpchan_lock); 317 } 318 mutex_exit(&p->p_lcp_lock); 319 } 320 321 /* 322 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 323 * Called when the process exits or execs. All lwps except one have 324 * exited so we need no locks here. 325 */ 326 void 327 lwpchan_destroy_cache(int exec) 328 { 329 proc_t *p = curproc; 330 lwpchan_hashbucket_t *hashbucket; 331 lwpchan_hashbucket_t *endbucket; 332 lwpchan_data_t *lcp; 333 lwpchan_entry_t *ent; 334 lwpchan_entry_t *next; 335 uint16_t lockflg; 336 337 lcp = p->p_lcp; 338 p->p_lcp = NULL; 339 340 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 341 hashbucket = lcp->lwpchan_cache; 342 endbucket = hashbucket + lcp->lwpchan_size; 343 for (; hashbucket < endbucket; hashbucket++) { 344 ent = hashbucket->lwpchan_chain; 345 hashbucket->lwpchan_chain = NULL; 346 while (ent != NULL) { 347 next = ent->lwpchan_next; 348 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 349 (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 350 == (USYNC_PROCESS | LOCK_ROBUST)) 351 lwp_mutex_cleanup(ent, lockflg); 352 kmem_free(ent, sizeof (*ent)); 353 ent = next; 354 } 355 } 356 357 while (lcp != NULL) { 358 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 359 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 360 sizeof (lwpchan_hashbucket_t)); 361 kmem_free(lcp, sizeof (lwpchan_data_t)); 362 lcp = next_lcp; 363 } 364 } 365 366 /* 367 * Return zero when there is an entry in the lwpchan cache for the 368 * given process virtual address and non-zero when there is not. 369 * The returned non-zero value is the current length of the 370 * hash chain plus one. The caller holds the hash bucket lock. 371 */ 372 static uint_t 373 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 374 lwpchan_hashbucket_t *hashbucket) 375 { 376 lwpchan_entry_t *ent; 377 uint_t count = 1; 378 379 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 380 if (ent->lwpchan_addr == addr) { 381 if (ent->lwpchan_type != type || 382 ent->lwpchan_pool != pool) { 383 /* 384 * This shouldn't happen, but might if the 385 * process reuses its memory for different 386 * types of sync objects. We test first 387 * to avoid grabbing the memory cache line. 388 */ 389 ent->lwpchan_type = (uint16_t)type; 390 ent->lwpchan_pool = (uint16_t)pool; 391 } 392 *lwpchan = ent->lwpchan_lwpchan; 393 return (0); 394 } 395 count++; 396 } 397 return (count); 398 } 399 400 /* 401 * Return the cached lwpchan mapping if cached, otherwise insert 402 * a virtual address to lwpchan mapping into the cache. 403 */ 404 static int 405 lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr, 406 int type, lwpchan_t *lwpchan, int pool) 407 { 408 proc_t *p = curproc; 409 lwpchan_data_t *lcp; 410 lwpchan_hashbucket_t *hashbucket; 411 lwpchan_entry_t *ent; 412 memid_t memid; 413 uint_t count; 414 uint_t bits; 415 416 top: 417 /* initialize the lwpchan cache, if necesary */ 418 if ((lcp = p->p_lcp) == NULL) { 419 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 420 goto top; 421 } 422 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 423 mutex_enter(&hashbucket->lwpchan_lock); 424 if (lcp != p->p_lcp) { 425 /* someone resized the lwpchan cache; start over */ 426 mutex_exit(&hashbucket->lwpchan_lock); 427 goto top; 428 } 429 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 430 /* it's in the cache */ 431 mutex_exit(&hashbucket->lwpchan_lock); 432 return (1); 433 } 434 mutex_exit(&hashbucket->lwpchan_lock); 435 if (as_getmemid(as, addr, &memid) != 0) 436 return (0); 437 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 438 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 439 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 440 mutex_enter(&hashbucket->lwpchan_lock); 441 if (lcp != p->p_lcp) { 442 /* someone resized the lwpchan cache; start over */ 443 mutex_exit(&hashbucket->lwpchan_lock); 444 kmem_free(ent, sizeof (*ent)); 445 goto top; 446 } 447 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 448 if (count == 0) { 449 /* someone else added this entry to the cache */ 450 mutex_exit(&hashbucket->lwpchan_lock); 451 kmem_free(ent, sizeof (*ent)); 452 return (1); 453 } 454 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 455 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 456 /* hash chain too long; reallocate the hash table */ 457 mutex_exit(&hashbucket->lwpchan_lock); 458 kmem_free(ent, sizeof (*ent)); 459 lwpchan_alloc_cache(p, bits + 1); 460 goto top; 461 } 462 ent->lwpchan_addr = addr; 463 ent->lwpchan_uaddr = uaddr; 464 ent->lwpchan_type = (uint16_t)type; 465 ent->lwpchan_pool = (uint16_t)pool; 466 ent->lwpchan_lwpchan = *lwpchan; 467 ent->lwpchan_next = hashbucket->lwpchan_chain; 468 hashbucket->lwpchan_chain = ent; 469 atomic_add_32(&lcp->lwpchan_entries, 1); 470 mutex_exit(&hashbucket->lwpchan_lock); 471 return (1); 472 } 473 474 /* 475 * Return a unique pair of identifiers that corresponds to a 476 * synchronization object's virtual address. Process-shared 477 * sync objects usually get vnode/offset from as_getmemid(). 478 */ 479 static int 480 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 481 { 482 /* 483 * If the lwp synch object is defined to be process-private, 484 * we just make the first field of the lwpchan be 'as' and 485 * the second field be the synch object's virtual address. 486 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 487 * The lwpchan cache is used only for process-shared objects. 488 */ 489 if (!(type & USYNC_PROCESS)) { 490 lwpchan->lc_wchan0 = (caddr_t)as; 491 lwpchan->lc_wchan = addr; 492 return (1); 493 } 494 495 return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool)); 496 } 497 498 static void 499 lwp_block(lwpchan_t *lwpchan) 500 { 501 kthread_t *t = curthread; 502 klwp_t *lwp = ttolwp(t); 503 sleepq_head_t *sqh; 504 505 thread_lock(t); 506 t->t_flag |= T_WAKEABLE; 507 t->t_lwpchan = *lwpchan; 508 t->t_sobj_ops = &lwp_sobj_ops; 509 t->t_release = 0; 510 sqh = lwpsqhash(lwpchan); 511 disp_lock_enter_high(&sqh->sq_lock); 512 CL_SLEEP(t); 513 DTRACE_SCHED(sleep); 514 THREAD_SLEEP(t, &sqh->sq_lock); 515 sleepq_insert(&sqh->sq_queue, t); 516 thread_unlock(t); 517 lwp->lwp_asleep = 1; 518 lwp->lwp_sysabort = 0; 519 lwp->lwp_ru.nvcsw++; 520 (void) new_mstate(curthread, LMS_SLEEP); 521 } 522 523 static kthread_t * 524 lwpsobj_pi_owner(upimutex_t *up) 525 { 526 return (up->upi_owner); 527 } 528 529 static struct upimutex * 530 upi_get(upib_t *upibp, lwpchan_t *lcp) 531 { 532 struct upimutex *upip; 533 534 for (upip = upibp->upib_first; upip != NULL; 535 upip = upip->upi_nextchain) { 536 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 537 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 538 break; 539 } 540 return (upip); 541 } 542 543 static void 544 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 545 { 546 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 547 548 /* 549 * Insert upimutex at front of list. Maybe a bit unfair 550 * but assume that not many lwpchans hash to the same 551 * upimutextab bucket, i.e. the list of upimutexes from 552 * upib_first is not too long. 553 */ 554 upimutex->upi_nextchain = upibp->upib_first; 555 upibp->upib_first = upimutex; 556 } 557 558 static void 559 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 560 { 561 struct upimutex **prev; 562 563 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 564 565 prev = &upibp->upib_first; 566 while (*prev != upimutex) { 567 prev = &(*prev)->upi_nextchain; 568 } 569 *prev = upimutex->upi_nextchain; 570 upimutex->upi_nextchain = NULL; 571 } 572 573 /* 574 * Add upimutex to chain of upimutexes held by curthread. 575 * Returns number of upimutexes held by curthread. 576 */ 577 static uint32_t 578 upi_mylist_add(struct upimutex *upimutex) 579 { 580 kthread_t *t = curthread; 581 582 /* 583 * Insert upimutex at front of list of upimutexes owned by t. This 584 * would match typical LIFO order in which nested locks are acquired 585 * and released. 586 */ 587 upimutex->upi_nextowned = t->t_upimutex; 588 t->t_upimutex = upimutex; 589 t->t_nupinest++; 590 ASSERT(t->t_nupinest > 0); 591 return (t->t_nupinest); 592 } 593 594 /* 595 * Delete upimutex from list of upimutexes owned by curthread. 596 */ 597 static void 598 upi_mylist_del(struct upimutex *upimutex) 599 { 600 kthread_t *t = curthread; 601 struct upimutex **prev; 602 603 /* 604 * Since the order in which nested locks are acquired and released, 605 * is typically LIFO, and typical nesting levels are not too deep, the 606 * following should not be expensive in the general case. 607 */ 608 prev = &t->t_upimutex; 609 while (*prev != upimutex) { 610 prev = &(*prev)->upi_nextowned; 611 } 612 *prev = upimutex->upi_nextowned; 613 upimutex->upi_nextowned = NULL; 614 ASSERT(t->t_nupinest > 0); 615 t->t_nupinest--; 616 } 617 618 /* 619 * Returns true if upimutex is owned. Should be called only when upim points 620 * to kmem which cannot disappear from underneath. 621 */ 622 static int 623 upi_owned(upimutex_t *upim) 624 { 625 return (upim->upi_owner == curthread); 626 } 627 628 /* 629 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 630 */ 631 static struct upimutex * 632 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 633 { 634 lwpchan_t lwpchan; 635 upib_t *upibp; 636 struct upimutex *upimutex; 637 638 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 639 &lwpchan, LWPCHAN_MPPOOL)) 640 return (NULL); 641 642 upibp = &UPI_CHAIN(lwpchan); 643 mutex_enter(&upibp->upib_lock); 644 upimutex = upi_get(upibp, &lwpchan); 645 if (upimutex == NULL || upimutex->upi_owner != curthread) { 646 mutex_exit(&upibp->upib_lock); 647 return (NULL); 648 } 649 mutex_exit(&upibp->upib_lock); 650 return (upimutex); 651 } 652 653 /* 654 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 655 * no lock hand-off occurrs. 656 */ 657 static void 658 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 659 { 660 turnstile_t *ts; 661 upib_t *upibp; 662 kthread_t *newowner; 663 664 upi_mylist_del(upimutex); 665 upibp = upimutex->upi_upibp; 666 mutex_enter(&upibp->upib_lock); 667 if (upimutex->upi_waiter != 0) { /* if waiters */ 668 ts = turnstile_lookup(upimutex); 669 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 670 /* hand-off lock to highest prio waiter */ 671 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 672 upimutex->upi_owner = newowner; 673 if (ts->ts_waiters == 1) 674 upimutex->upi_waiter = 0; 675 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 676 mutex_exit(&upibp->upib_lock); 677 return; 678 } else if (ts != NULL) { 679 /* LOCK_NOTRECOVERABLE: wakeup all */ 680 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 681 } else { 682 /* 683 * Misleading w bit. Waiters might have been 684 * interrupted. No need to clear the w bit (upimutex 685 * will soon be freed). Re-calculate PI from existing 686 * waiters. 687 */ 688 turnstile_exit(upimutex); 689 turnstile_pi_recalc(); 690 } 691 } 692 /* 693 * no waiters, or LOCK_NOTRECOVERABLE. 694 * remove from the bucket chain of upi mutexes. 695 * de-allocate kernel memory (upimutex). 696 */ 697 upi_chain_del(upimutex->upi_upibp, upimutex); 698 mutex_exit(&upibp->upib_lock); 699 kmem_free(upimutex, sizeof (upimutex_t)); 700 } 701 702 static int 703 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 704 { 705 label_t ljb; 706 int error = 0; 707 lwpchan_t lwpchan; 708 uint16_t flag; 709 upib_t *upibp; 710 volatile struct upimutex *upimutex = NULL; 711 turnstile_t *ts; 712 uint32_t nupinest; 713 volatile int upilocked = 0; 714 715 if (on_fault(&ljb)) { 716 if (upilocked) 717 upimutex_unlock((upimutex_t *)upimutex, 0); 718 error = EFAULT; 719 goto out; 720 } 721 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 722 &lwpchan, LWPCHAN_MPPOOL)) { 723 error = EFAULT; 724 goto out; 725 } 726 upibp = &UPI_CHAIN(lwpchan); 727 retry: 728 mutex_enter(&upibp->upib_lock); 729 upimutex = upi_get(upibp, &lwpchan); 730 if (upimutex == NULL) { 731 /* lock available since lwpchan has no upimutex */ 732 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 733 upi_chain_add(upibp, (upimutex_t *)upimutex); 734 upimutex->upi_owner = curthread; /* grab lock */ 735 upimutex->upi_upibp = upibp; 736 upimutex->upi_vaddr = lp; 737 upimutex->upi_lwpchan = lwpchan; 738 mutex_exit(&upibp->upib_lock); 739 nupinest = upi_mylist_add((upimutex_t *)upimutex); 740 upilocked = 1; 741 fuword16_noerr(&lp->mutex_flag, &flag); 742 if (nupinest > maxnestupimx && 743 secpolicy_resource(CRED()) != 0) { 744 upimutex_unlock((upimutex_t *)upimutex, flag); 745 error = ENOMEM; 746 goto out; 747 } 748 if (flag & LOCK_NOTRECOVERABLE) { 749 /* 750 * Since the setting of LOCK_NOTRECOVERABLE 751 * was done under the high-level upi mutex, 752 * in lwp_upimutex_unlock(), this flag needs to 753 * be checked while holding the upi mutex. 754 * If set, this thread should return without 755 * the lock held, and with the right error code. 756 */ 757 upimutex_unlock((upimutex_t *)upimutex, flag); 758 upilocked = 0; 759 error = ENOTRECOVERABLE; 760 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 761 if (flag & LOCK_OWNERDEAD) 762 error = EOWNERDEAD; 763 else if (type & USYNC_PROCESS_ROBUST) 764 error = ELOCKUNMAPPED; 765 else 766 error = EOWNERDEAD; 767 } 768 goto out; 769 } 770 /* 771 * If a upimutex object exists, it must have an owner. 772 * This is due to lock hand-off, and release of upimutex when no 773 * waiters are present at unlock time, 774 */ 775 ASSERT(upimutex->upi_owner != NULL); 776 if (upimutex->upi_owner == curthread) { 777 /* 778 * The user wrapper can check if the mutex type is 779 * ERRORCHECK: if not, it should stall at user-level. 780 * If so, it should return the error code. 781 */ 782 mutex_exit(&upibp->upib_lock); 783 error = EDEADLK; 784 goto out; 785 } 786 if (try == UPIMUTEX_TRY) { 787 mutex_exit(&upibp->upib_lock); 788 error = EBUSY; 789 goto out; 790 } 791 /* 792 * Block for the lock. 793 * Put the lwp in an orderly state for debugging. 794 * Calling prstop() has to be done here, and not in 795 * turnstile_block(), since the preceding call to 796 * turnstile_lookup() raises the PIL to a level 797 * at which calls to prstop() should not be made. 798 */ 799 if ((error = lwptp->lwpt_time_error) != 0) { 800 /* 801 * The SUSV3 Posix spec is very clear that we 802 * should get no error from validating the 803 * timer until we would actually sleep. 804 */ 805 mutex_exit(&upibp->upib_lock); 806 goto out; 807 } 808 prstop(PR_REQUESTED, 0); 809 if (lwptp->lwpt_tsp != NULL) { 810 /* 811 * Unlike the protocol for other lwp timedwait operations, 812 * we must drop t_delay_lock before going to sleep in 813 * turnstile_block() for a upi mutex. 814 * See the comments below and in turnstile.c 815 */ 816 mutex_enter(&curthread->t_delay_lock); 817 (void) lwp_timer_enqueue(lwptp); 818 mutex_exit(&curthread->t_delay_lock); 819 } 820 /* 821 * Now, set the waiter bit and block for the lock in turnstile_block(). 822 * No need to preserve the previous wbit since a lock try is not 823 * attempted after setting the wait bit. Wait bit is set under 824 * the upib_lock, which is not released until the turnstile lock 825 * is acquired. Say, the upimutex is L: 826 * 827 * 1. upib_lock is held so the waiter does not have to retry L after 828 * setting the wait bit: since the owner has to grab the upib_lock 829 * to unlock L, it will certainly see the wait bit set. 830 * 2. upib_lock is not released until the turnstile lock is acquired. 831 * This is the key to preventing a missed wake-up. Otherwise, the 832 * owner could acquire the upib_lock, and the tc_lock, to call 833 * turnstile_wakeup(). All this, before the waiter gets tc_lock 834 * to sleep in turnstile_block(). turnstile_wakeup() will then not 835 * find this waiter, resulting in the missed wakeup. 836 * 3. The upib_lock, being a kernel mutex, cannot be released while 837 * holding the tc_lock (since mutex_exit() could need to acquire 838 * the same tc_lock)...and so is held when calling turnstile_block(). 839 * The address of upib_lock is passed to turnstile_block() which 840 * releases it after releasing all turnstile locks, and before going 841 * to sleep in swtch(). 842 * 4. The waiter value cannot be a count of waiters, because a waiter 843 * can be interrupted. The interrupt occurs under the tc_lock, at 844 * which point, the upib_lock cannot be locked, to decrement waiter 845 * count. So, just treat the waiter state as a bit, not a count. 846 */ 847 ts = turnstile_lookup((upimutex_t *)upimutex); 848 upimutex->upi_waiter = 1; 849 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 850 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 851 /* 852 * Hand-off implies that we wakeup holding the lock, except when: 853 * - deadlock is detected 854 * - lock is not recoverable 855 * - we got an interrupt or timeout 856 * If we wake up due to an interrupt or timeout, we may 857 * or may not be holding the lock due to mutex hand-off. 858 * Use lwp_upimutex_owned() to check if we do hold the lock. 859 */ 860 if (error != 0) { 861 if ((error == EINTR || error == ETIME) && 862 (upimutex = lwp_upimutex_owned(lp, type))) { 863 /* 864 * Unlock and return - the re-startable syscall will 865 * try the lock again if we got EINTR. 866 */ 867 (void) upi_mylist_add((upimutex_t *)upimutex); 868 upimutex_unlock((upimutex_t *)upimutex, 0); 869 } 870 /* 871 * The only other possible error is EDEADLK. If so, upimutex 872 * is valid, since its owner is deadlocked with curthread. 873 */ 874 ASSERT(error == EINTR || error == ETIME || 875 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 876 ASSERT(!lwp_upimutex_owned(lp, type)); 877 goto out; 878 } 879 if (lwp_upimutex_owned(lp, type)) { 880 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 881 nupinest = upi_mylist_add((upimutex_t *)upimutex); 882 upilocked = 1; 883 } 884 /* 885 * Now, need to read the user-level lp->mutex_flag to do the following: 886 * 887 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 888 * should be returned. 889 * - if lock isn't held, check if ENOTRECOVERABLE should 890 * be returned. 891 * 892 * Now, either lp->mutex_flag is readable or it's not. If not 893 * readable, the on_fault path will cause a return with EFAULT 894 * as it should. If it is readable, the state of the flag 895 * encodes the robustness state of the lock: 896 * 897 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 898 * or LOCK_UNMAPPED setting will influence the return code 899 * appropriately. If the upimutex is not locked here, this 900 * could be due to a spurious wake-up or a NOTRECOVERABLE 901 * event. The flag's setting can be used to distinguish 902 * between these two events. 903 */ 904 fuword16_noerr(&lp->mutex_flag, &flag); 905 if (upilocked) { 906 /* 907 * If the thread wakes up from turnstile_block with the lock 908 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 909 * since it would not have been handed-off the lock. 910 * So, no need to check for this case. 911 */ 912 if (nupinest > maxnestupimx && 913 secpolicy_resource(CRED()) != 0) { 914 upimutex_unlock((upimutex_t *)upimutex, flag); 915 upilocked = 0; 916 error = ENOMEM; 917 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 918 if (flag & LOCK_OWNERDEAD) 919 error = EOWNERDEAD; 920 else if (type & USYNC_PROCESS_ROBUST) 921 error = ELOCKUNMAPPED; 922 else 923 error = EOWNERDEAD; 924 } 925 } else { 926 /* 927 * Wake-up without the upimutex held. Either this is a 928 * spurious wake-up (due to signals, forkall(), whatever), or 929 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 930 * of the mutex flag can be used to distinguish between the 931 * two events. 932 */ 933 if (flag & LOCK_NOTRECOVERABLE) { 934 error = ENOTRECOVERABLE; 935 } else { 936 /* 937 * Here, the flag could be set to LOCK_OWNERDEAD or 938 * not. In both cases, this is a spurious wakeup, 939 * since the upi lock is not held, but the thread 940 * has returned from turnstile_block(). 941 * 942 * The user flag could be LOCK_OWNERDEAD if, at the 943 * same time as curthread having been woken up 944 * spuriously, the owner (say Tdead) has died, marked 945 * the mutex flag accordingly, and handed off the lock 946 * to some other waiter (say Tnew). curthread just 947 * happened to read the flag while Tnew has yet to deal 948 * with the owner-dead event. 949 * 950 * In this event, curthread should retry the lock. 951 * If Tnew is able to cleanup the lock, curthread 952 * will eventually get the lock with a zero error code, 953 * If Tnew is unable to cleanup, its eventual call to 954 * unlock the lock will result in the mutex flag being 955 * set to LOCK_NOTRECOVERABLE, and the wake-up of 956 * all waiters, including curthread, which will then 957 * eventually return ENOTRECOVERABLE due to the above 958 * check. 959 * 960 * Of course, if the user-flag is not set with 961 * LOCK_OWNERDEAD, retrying is the thing to do, since 962 * this is definitely a spurious wakeup. 963 */ 964 goto retry; 965 } 966 } 967 968 out: 969 no_fault(); 970 return (error); 971 } 972 973 974 static int 975 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 976 { 977 label_t ljb; 978 int error = 0; 979 lwpchan_t lwpchan; 980 uint16_t flag; 981 upib_t *upibp; 982 volatile struct upimutex *upimutex = NULL; 983 volatile int upilocked = 0; 984 985 if (on_fault(&ljb)) { 986 if (upilocked) 987 upimutex_unlock((upimutex_t *)upimutex, 0); 988 error = EFAULT; 989 goto out; 990 } 991 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 992 &lwpchan, LWPCHAN_MPPOOL)) { 993 error = EFAULT; 994 goto out; 995 } 996 upibp = &UPI_CHAIN(lwpchan); 997 mutex_enter(&upibp->upib_lock); 998 upimutex = upi_get(upibp, &lwpchan); 999 /* 1000 * If the lock is not held, or the owner is not curthread, return 1001 * error. The user-level wrapper can return this error or stall, 1002 * depending on whether mutex is of ERRORCHECK type or not. 1003 */ 1004 if (upimutex == NULL || upimutex->upi_owner != curthread) { 1005 mutex_exit(&upibp->upib_lock); 1006 error = EPERM; 1007 goto out; 1008 } 1009 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1010 upilocked = 1; 1011 fuword16_noerr(&lp->mutex_flag, &flag); 1012 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1013 /* 1014 * transition mutex to the LOCK_NOTRECOVERABLE state. 1015 */ 1016 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1017 flag |= LOCK_NOTRECOVERABLE; 1018 suword16_noerr(&lp->mutex_flag, flag); 1019 } 1020 if (type & USYNC_PROCESS) 1021 suword32_noerr(&lp->mutex_ownerpid, 0); 1022 upimutex_unlock((upimutex_t *)upimutex, flag); 1023 upilocked = 0; 1024 out: 1025 no_fault(); 1026 return (error); 1027 } 1028 1029 /* 1030 * Clear the contents of a user-level mutex; return the flags. 1031 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1032 */ 1033 static uint16_t 1034 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1035 { 1036 uint16_t flag; 1037 1038 fuword16_noerr(&lp->mutex_flag, &flag); 1039 if ((flag & 1040 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1041 flag |= lockflg; 1042 suword16_noerr(&lp->mutex_flag, flag); 1043 } 1044 suword32_noerr((uint32_t *)&lp->mutex_owner, 0); 1045 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, 0); 1046 suword32_noerr(&lp->mutex_ownerpid, 0); 1047 suword8_noerr(&lp->mutex_rcount, 0); 1048 1049 return (flag); 1050 } 1051 1052 /* 1053 * Mark user mutex state, corresponding to kernel upimutex, 1054 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1055 */ 1056 static int 1057 upi_dead(upimutex_t *upip, uint16_t lockflg) 1058 { 1059 label_t ljb; 1060 int error = 0; 1061 lwp_mutex_t *lp; 1062 1063 if (on_fault(&ljb)) { 1064 error = EFAULT; 1065 goto out; 1066 } 1067 1068 lp = upip->upi_vaddr; 1069 (void) lwp_clear_mutex(lp, lockflg); 1070 suword8_noerr(&lp->mutex_lockw, 0); 1071 out: 1072 no_fault(); 1073 return (error); 1074 } 1075 1076 /* 1077 * Unlock all upimutexes held by curthread, since curthread is dying. 1078 * For each upimutex, attempt to mark its corresponding user mutex object as 1079 * dead. 1080 */ 1081 void 1082 upimutex_cleanup() 1083 { 1084 kthread_t *t = curthread; 1085 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1086 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1087 struct upimutex *upip; 1088 1089 while ((upip = t->t_upimutex) != NULL) { 1090 if (upi_dead(upip, lockflg) != 0) { 1091 /* 1092 * If the user object associated with this upimutex is 1093 * unmapped, unlock upimutex with the 1094 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1095 * woken up. Since user object is unmapped, it could 1096 * not be marked as dead or notrecoverable. 1097 * The waiters will now all wake up and return 1098 * ENOTRECOVERABLE, since they would find that the lock 1099 * has not been handed-off to them. 1100 * See lwp_upimutex_lock(). 1101 */ 1102 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1103 } else { 1104 /* 1105 * The user object has been updated as dead. 1106 * Unlock the upimutex: if no waiters, upip kmem will 1107 * be freed. If there is a waiter, the lock will be 1108 * handed off. If exit() is in progress, each existing 1109 * waiter will successively get the lock, as owners 1110 * die, and each new owner will call this routine as 1111 * it dies. The last owner will free kmem, since 1112 * it will find the upimutex has no waiters. So, 1113 * eventually, the kmem is guaranteed to be freed. 1114 */ 1115 upimutex_unlock(upip, 0); 1116 } 1117 /* 1118 * Note that the call to upimutex_unlock() above will delete 1119 * upimutex from the t_upimutexes chain. And so the 1120 * while loop will eventually terminate. 1121 */ 1122 } 1123 } 1124 1125 int 1126 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1127 { 1128 kthread_t *t = curthread; 1129 klwp_t *lwp = ttolwp(t); 1130 proc_t *p = ttoproc(t); 1131 lwp_timer_t lwpt; 1132 caddr_t timedwait; 1133 int error = 0; 1134 int time_error; 1135 clock_t tim = -1; 1136 uchar_t waiters; 1137 volatile int locked = 0; 1138 volatile int watched = 0; 1139 label_t ljb; 1140 volatile uint8_t type = 0; 1141 lwpchan_t lwpchan; 1142 sleepq_head_t *sqh; 1143 static int iswanted(); 1144 uint16_t flag; 1145 int imm_timeout = 0; 1146 1147 if ((caddr_t)lp >= p->p_as->a_userlimit) 1148 return (set_errno(EFAULT)); 1149 1150 timedwait = (caddr_t)tsp; 1151 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1152 lwpt.lwpt_imm_timeout) { 1153 imm_timeout = 1; 1154 timedwait = NULL; 1155 } 1156 1157 /* 1158 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1159 * this micro state is really a run state. If the thread indeed blocks, 1160 * this state becomes valid. If not, the state is converted back to 1161 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1162 * when blocking. 1163 */ 1164 (void) new_mstate(t, LMS_USER_LOCK); 1165 if (on_fault(&ljb)) { 1166 if (locked) 1167 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1168 error = EFAULT; 1169 goto out; 1170 } 1171 /* 1172 * Force Copy-on-write if necessary and ensure that the 1173 * synchronization object resides in read/write memory. 1174 * Cause an EFAULT return now if this is not so. 1175 */ 1176 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1177 suword8_noerr(&lp->mutex_type, type); 1178 if (UPIMUTEX(type)) { 1179 no_fault(); 1180 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1181 if ((type & USYNC_PROCESS) && 1182 (error == 0 || 1183 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 1184 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1185 if (tsp && !time_error) /* copyout the residual time left */ 1186 error = lwp_timer_copyout(&lwpt, error); 1187 if (error) 1188 return (set_errno(error)); 1189 return (0); 1190 } 1191 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1192 &lwpchan, LWPCHAN_MPPOOL)) { 1193 error = EFAULT; 1194 goto out; 1195 } 1196 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1197 locked = 1; 1198 if (type & LOCK_ROBUST) { 1199 fuword16_noerr(&lp->mutex_flag, &flag); 1200 if (flag & LOCK_NOTRECOVERABLE) { 1201 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1202 error = ENOTRECOVERABLE; 1203 goto out; 1204 } 1205 } 1206 fuword8_noerr(&lp->mutex_waiters, &waiters); 1207 suword8_noerr(&lp->mutex_waiters, 1); 1208 1209 /* 1210 * If watchpoints are set, they need to be restored, since 1211 * atomic accesses of memory such as the call to ulock_try() 1212 * below cannot be watched. 1213 */ 1214 1215 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1216 1217 while (!ulock_try(&lp->mutex_lockw)) { 1218 if (time_error) { 1219 /* 1220 * The SUSV3 Posix spec is very clear that we 1221 * should get no error from validating the 1222 * timer until we would actually sleep. 1223 */ 1224 error = time_error; 1225 break; 1226 } 1227 1228 if (watched) { 1229 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1230 watched = 0; 1231 } 1232 1233 /* 1234 * Put the lwp in an orderly state for debugging. 1235 */ 1236 prstop(PR_REQUESTED, 0); 1237 if (timedwait) { 1238 /* 1239 * If we successfully queue the timeout, 1240 * then don't drop t_delay_lock until 1241 * we are on the sleep queue (below). 1242 */ 1243 mutex_enter(&t->t_delay_lock); 1244 if (lwp_timer_enqueue(&lwpt) != 0) { 1245 mutex_exit(&t->t_delay_lock); 1246 imm_timeout = 1; 1247 timedwait = NULL; 1248 } 1249 } 1250 lwp_block(&lwpchan); 1251 /* 1252 * Nothing should happen to cause the lwp to go to 1253 * sleep again until after it returns from swtch(). 1254 */ 1255 if (timedwait) 1256 mutex_exit(&t->t_delay_lock); 1257 locked = 0; 1258 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1259 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1260 setrun(t); 1261 swtch(); 1262 t->t_flag &= ~T_WAKEABLE; 1263 if (timedwait) 1264 tim = lwp_timer_dequeue(&lwpt); 1265 setallwatch(); 1266 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1267 error = EINTR; 1268 else if (imm_timeout || (timedwait && tim == -1)) 1269 error = ETIME; 1270 if (error) { 1271 lwp->lwp_asleep = 0; 1272 lwp->lwp_sysabort = 0; 1273 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1274 S_WRITE); 1275 1276 /* 1277 * Need to re-compute waiters bit. The waiters field in 1278 * the lock is not reliable. Either of two things could 1279 * have occurred: no lwp may have called lwp_release() 1280 * for me but I have woken up due to a signal or 1281 * timeout. In this case, the waiter bit is incorrect 1282 * since it is still set to 1, set above. 1283 * OR an lwp_release() did occur for some other lwp on 1284 * the same lwpchan. In this case, the waiter bit is 1285 * correct. But which event occurred, one can't tell. 1286 * So, recompute. 1287 */ 1288 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1289 locked = 1; 1290 sqh = lwpsqhash(&lwpchan); 1291 disp_lock_enter(&sqh->sq_lock); 1292 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1293 disp_lock_exit(&sqh->sq_lock); 1294 break; 1295 } 1296 lwp->lwp_asleep = 0; 1297 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1298 S_WRITE); 1299 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1300 locked = 1; 1301 fuword8_noerr(&lp->mutex_waiters, &waiters); 1302 suword8_noerr(&lp->mutex_waiters, 1); 1303 if (type & LOCK_ROBUST) { 1304 fuword16_noerr(&lp->mutex_flag, &flag); 1305 if (flag & LOCK_NOTRECOVERABLE) { 1306 error = ENOTRECOVERABLE; 1307 break; 1308 } 1309 } 1310 } 1311 1312 if (t->t_mstate == LMS_USER_LOCK) 1313 (void) new_mstate(t, LMS_SYSTEM); 1314 1315 if (error == 0) { 1316 if (type & USYNC_PROCESS) 1317 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1318 if (type & LOCK_ROBUST) { 1319 fuword16_noerr(&lp->mutex_flag, &flag); 1320 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1321 if (flag & LOCK_OWNERDEAD) 1322 error = EOWNERDEAD; 1323 else if (type & USYNC_PROCESS_ROBUST) 1324 error = ELOCKUNMAPPED; 1325 else 1326 error = EOWNERDEAD; 1327 } 1328 } 1329 } 1330 suword8_noerr(&lp->mutex_waiters, waiters); 1331 locked = 0; 1332 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1333 out: 1334 no_fault(); 1335 if (watched) 1336 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1337 if (tsp && !time_error) /* copyout the residual time left */ 1338 error = lwp_timer_copyout(&lwpt, error); 1339 if (error) 1340 return (set_errno(error)); 1341 return (0); 1342 } 1343 1344 /* 1345 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1346 * libc now calls lwp_mutex_timedlock(lp, NULL). 1347 * This system call trap continues to exist solely for the benefit 1348 * of old statically-linked binaries from Solaris 9 and before. 1349 * It should be removed from the system when we no longer care 1350 * about such applications. 1351 */ 1352 int 1353 lwp_mutex_lock(lwp_mutex_t *lp) 1354 { 1355 return (lwp_mutex_timedlock(lp, NULL)); 1356 } 1357 1358 static int 1359 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1360 { 1361 /* 1362 * The caller holds the dispatcher lock on the sleep queue. 1363 */ 1364 while (t != NULL) { 1365 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1366 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1367 return (1); 1368 t = t->t_link; 1369 } 1370 return (0); 1371 } 1372 1373 /* 1374 * Return the highest priority thread sleeping on this lwpchan. 1375 */ 1376 static kthread_t * 1377 lwp_queue_waiter(lwpchan_t *lwpchan) 1378 { 1379 sleepq_head_t *sqh; 1380 kthread_t *tp; 1381 1382 sqh = lwpsqhash(lwpchan); 1383 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1384 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1385 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1386 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1387 break; 1388 } 1389 disp_lock_exit(&sqh->sq_lock); 1390 return (tp); 1391 } 1392 1393 static int 1394 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1395 { 1396 sleepq_head_t *sqh; 1397 kthread_t *tp; 1398 kthread_t **tpp; 1399 1400 sqh = lwpsqhash(lwpchan); 1401 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1402 tpp = &sqh->sq_queue.sq_first; 1403 while ((tp = *tpp) != NULL) { 1404 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1405 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1406 /* 1407 * The following is typically false. It could be true 1408 * only if lwp_release() is called from 1409 * lwp_mutex_wakeup() after reading the waiters field 1410 * from memory in which the lwp lock used to be, but has 1411 * since been re-used to hold a lwp cv or lwp semaphore. 1412 * The thread "tp" found to match the lwp lock's wchan 1413 * is actually sleeping for the cv or semaphore which 1414 * now has the same wchan. In this case, lwp_release() 1415 * should return failure. 1416 */ 1417 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1418 ASSERT(sync_type == 0); 1419 /* 1420 * assert that this can happen only for mutexes 1421 * i.e. sync_type == 0, for correctly written 1422 * user programs. 1423 */ 1424 disp_lock_exit(&sqh->sq_lock); 1425 return (0); 1426 } 1427 *waiters = iswanted(tp->t_link, lwpchan); 1428 sleepq_unlink(tpp, tp); 1429 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1430 tp->t_wchan0 = NULL; 1431 tp->t_wchan = NULL; 1432 tp->t_sobj_ops = NULL; 1433 tp->t_release = 1; 1434 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1435 CL_WAKEUP(tp); 1436 thread_unlock(tp); /* drop run queue lock */ 1437 return (1); 1438 } 1439 tpp = &tp->t_link; 1440 } 1441 *waiters = 0; 1442 disp_lock_exit(&sqh->sq_lock); 1443 return (0); 1444 } 1445 1446 static void 1447 lwp_release_all(lwpchan_t *lwpchan) 1448 { 1449 sleepq_head_t *sqh; 1450 kthread_t *tp; 1451 kthread_t **tpp; 1452 1453 sqh = lwpsqhash(lwpchan); 1454 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1455 tpp = &sqh->sq_queue.sq_first; 1456 while ((tp = *tpp) != NULL) { 1457 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1458 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1459 sleepq_unlink(tpp, tp); 1460 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1461 tp->t_wchan0 = NULL; 1462 tp->t_wchan = NULL; 1463 tp->t_sobj_ops = NULL; 1464 CL_WAKEUP(tp); 1465 thread_unlock_high(tp); /* release run queue lock */ 1466 } else { 1467 tpp = &tp->t_link; 1468 } 1469 } 1470 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1471 } 1472 1473 /* 1474 * unblock a lwp that is trying to acquire this mutex. the blocked 1475 * lwp resumes and retries to acquire the lock. 1476 */ 1477 int 1478 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1479 { 1480 proc_t *p = ttoproc(curthread); 1481 lwpchan_t lwpchan; 1482 uchar_t waiters; 1483 volatile int locked = 0; 1484 volatile int watched = 0; 1485 volatile uint8_t type = 0; 1486 label_t ljb; 1487 int error = 0; 1488 1489 if ((caddr_t)lp >= p->p_as->a_userlimit) 1490 return (set_errno(EFAULT)); 1491 1492 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1493 1494 if (on_fault(&ljb)) { 1495 if (locked) 1496 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1497 error = EFAULT; 1498 goto out; 1499 } 1500 /* 1501 * Force Copy-on-write if necessary and ensure that the 1502 * synchronization object resides in read/write memory. 1503 * Cause an EFAULT return now if this is not so. 1504 */ 1505 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1506 suword8_noerr(&lp->mutex_type, type); 1507 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1508 &lwpchan, LWPCHAN_MPPOOL)) { 1509 error = EFAULT; 1510 goto out; 1511 } 1512 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1513 locked = 1; 1514 /* 1515 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1516 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1517 * may fail. If it fails, do not write into the waiter bit. 1518 * The call to lwp_release() might fail due to one of three reasons: 1519 * 1520 * 1. due to the thread which set the waiter bit not actually 1521 * sleeping since it got the lock on the re-try. The waiter 1522 * bit will then be correctly updated by that thread. This 1523 * window may be closed by reading the wait bit again here 1524 * and not calling lwp_release() at all if it is zero. 1525 * 2. the thread which set the waiter bit and went to sleep 1526 * was woken up by a signal. This time, the waiter recomputes 1527 * the wait bit in the return with EINTR code. 1528 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1529 * memory that has been re-used after the lock was dropped. 1530 * In this case, writing into the waiter bit would cause data 1531 * corruption. 1532 */ 1533 if (release_all) 1534 lwp_release_all(&lwpchan); 1535 else if (lwp_release(&lwpchan, &waiters, 0)) 1536 suword8_noerr(&lp->mutex_waiters, waiters); 1537 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1538 out: 1539 no_fault(); 1540 if (watched) 1541 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1542 if (error) 1543 return (set_errno(error)); 1544 return (0); 1545 } 1546 1547 /* 1548 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1549 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1550 * a flag telling the kernel whether or not to honor the kernel/user 1551 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1552 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1553 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1554 * it is used an an in/out parameter. On entry, it contains the relative 1555 * time until timeout. On exit, we copyout the residual time left to it. 1556 */ 1557 int 1558 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1559 { 1560 kthread_t *t = curthread; 1561 klwp_t *lwp = ttolwp(t); 1562 proc_t *p = ttoproc(t); 1563 lwp_timer_t lwpt; 1564 lwpchan_t cv_lwpchan; 1565 lwpchan_t m_lwpchan; 1566 caddr_t timedwait; 1567 volatile uint16_t type = 0; 1568 volatile uint8_t mtype = 0; 1569 uchar_t waiters; 1570 volatile int error; 1571 clock_t tim = -1; 1572 volatile int locked = 0; 1573 volatile int m_locked = 0; 1574 volatile int cvwatched = 0; 1575 volatile int mpwatched = 0; 1576 label_t ljb; 1577 volatile int no_lwpchan = 1; 1578 int imm_timeout = 0; 1579 int imm_unpark = 0; 1580 1581 if ((caddr_t)cv >= p->p_as->a_userlimit || 1582 (caddr_t)mp >= p->p_as->a_userlimit) 1583 return (set_errno(EFAULT)); 1584 1585 timedwait = (caddr_t)tsp; 1586 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1587 return (set_errno(error)); 1588 if (lwpt.lwpt_imm_timeout) { 1589 imm_timeout = 1; 1590 timedwait = NULL; 1591 } 1592 1593 (void) new_mstate(t, LMS_USER_LOCK); 1594 1595 if (on_fault(&ljb)) { 1596 if (no_lwpchan) { 1597 error = EFAULT; 1598 goto out; 1599 } 1600 if (m_locked) { 1601 m_locked = 0; 1602 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1603 } 1604 if (locked) { 1605 locked = 0; 1606 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1607 } 1608 /* 1609 * set up another on_fault() for a possible fault 1610 * on the user lock accessed at "efault" 1611 */ 1612 if (on_fault(&ljb)) { 1613 if (m_locked) { 1614 m_locked = 0; 1615 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1616 } 1617 goto out; 1618 } 1619 error = EFAULT; 1620 goto efault; 1621 } 1622 1623 /* 1624 * Force Copy-on-write if necessary and ensure that the 1625 * synchronization object resides in read/write memory. 1626 * Cause an EFAULT return now if this is not so. 1627 */ 1628 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1629 suword8_noerr(&mp->mutex_type, mtype); 1630 if (UPIMUTEX(mtype) == 0) { 1631 /* convert user level mutex, "mp", to a unique lwpchan */ 1632 /* check if mtype is ok to use below, instead of type from cv */ 1633 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1634 &m_lwpchan, LWPCHAN_MPPOOL)) { 1635 error = EFAULT; 1636 goto out; 1637 } 1638 } 1639 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1640 suword16_noerr(&cv->cond_type, type); 1641 /* convert user level condition variable, "cv", to a unique lwpchan */ 1642 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1643 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1644 error = EFAULT; 1645 goto out; 1646 } 1647 no_lwpchan = 0; 1648 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1649 if (UPIMUTEX(mtype) == 0) 1650 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1651 S_WRITE); 1652 1653 /* 1654 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1655 * with respect to a possible wakeup which is a result of either 1656 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1657 * 1658 * What's misleading, is that the lwp is put to sleep after the 1659 * condition variable's mutex is released. This is OK as long as 1660 * the release operation is also done while holding lwpchan_lock. 1661 * The lwp is then put to sleep when the possibility of pagefaulting 1662 * or sleeping is completely eliminated. 1663 */ 1664 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1665 locked = 1; 1666 if (UPIMUTEX(mtype) == 0) { 1667 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1668 m_locked = 1; 1669 suword8_noerr(&cv->cond_waiters_kernel, 1); 1670 /* 1671 * unlock the condition variable's mutex. (pagefaults are 1672 * possible here.) 1673 */ 1674 if (mtype & USYNC_PROCESS) 1675 suword32_noerr(&mp->mutex_ownerpid, 0); 1676 ulock_clear(&mp->mutex_lockw); 1677 fuword8_noerr(&mp->mutex_waiters, &waiters); 1678 if (waiters != 0) { 1679 /* 1680 * Given the locking of lwpchan_lock around the release 1681 * of the mutex and checking for waiters, the following 1682 * call to lwp_release() can fail ONLY if the lock 1683 * acquirer is interrupted after setting the waiter bit, 1684 * calling lwp_block() and releasing lwpchan_lock. 1685 * In this case, it could get pulled off the lwp sleep 1686 * q (via setrun()) before the following call to 1687 * lwp_release() occurs. In this case, the lock 1688 * requestor will update the waiter bit correctly by 1689 * re-evaluating it. 1690 */ 1691 if (lwp_release(&m_lwpchan, &waiters, 0)) 1692 suword8_noerr(&mp->mutex_waiters, waiters); 1693 } 1694 m_locked = 0; 1695 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1696 } else { 1697 suword8_noerr(&cv->cond_waiters_kernel, 1); 1698 error = lwp_upimutex_unlock(mp, mtype); 1699 if (error) { /* if the upimutex unlock failed */ 1700 locked = 0; 1701 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1702 goto out; 1703 } 1704 } 1705 no_fault(); 1706 1707 if (mpwatched) { 1708 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1709 mpwatched = 0; 1710 } 1711 if (cvwatched) { 1712 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1713 cvwatched = 0; 1714 } 1715 1716 /* 1717 * Put the lwp in an orderly state for debugging. 1718 */ 1719 prstop(PR_REQUESTED, 0); 1720 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1721 /* 1722 * We received a signal at user-level before calling here 1723 * or another thread wants us to return immediately 1724 * with EINTR. See lwp_unpark(). 1725 */ 1726 imm_unpark = 1; 1727 t->t_unpark = 0; 1728 timedwait = NULL; 1729 } else if (timedwait) { 1730 /* 1731 * If we successfully queue the timeout, 1732 * then don't drop t_delay_lock until 1733 * we are on the sleep queue (below). 1734 */ 1735 mutex_enter(&t->t_delay_lock); 1736 if (lwp_timer_enqueue(&lwpt) != 0) { 1737 mutex_exit(&t->t_delay_lock); 1738 imm_timeout = 1; 1739 timedwait = NULL; 1740 } 1741 } 1742 t->t_flag |= T_WAITCVSEM; 1743 lwp_block(&cv_lwpchan); 1744 /* 1745 * Nothing should happen to cause the lwp to go to sleep 1746 * until after it returns from swtch(). 1747 */ 1748 if (timedwait) 1749 mutex_exit(&t->t_delay_lock); 1750 locked = 0; 1751 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1752 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1753 (imm_timeout | imm_unpark)) 1754 setrun(t); 1755 swtch(); 1756 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1757 if (timedwait) 1758 tim = lwp_timer_dequeue(&lwpt); 1759 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1760 MUSTRETURN(p, t) || imm_unpark) 1761 error = EINTR; 1762 else if (imm_timeout || (timedwait && tim == -1)) 1763 error = ETIME; 1764 lwp->lwp_asleep = 0; 1765 lwp->lwp_sysabort = 0; 1766 setallwatch(); 1767 1768 if (t->t_mstate == LMS_USER_LOCK) 1769 (void) new_mstate(t, LMS_SYSTEM); 1770 1771 if (tsp && check_park) /* copyout the residual time left */ 1772 error = lwp_timer_copyout(&lwpt, error); 1773 1774 /* the mutex is reacquired by the caller on return to user level */ 1775 if (error) { 1776 /* 1777 * If we were concurrently lwp_cond_signal()d and we 1778 * received a UNIX signal or got a timeout, then perform 1779 * another lwp_cond_signal() to avoid consuming the wakeup. 1780 */ 1781 if (t->t_release) 1782 (void) lwp_cond_signal(cv); 1783 return (set_errno(error)); 1784 } 1785 return (0); 1786 1787 efault: 1788 /* 1789 * make sure that the user level lock is dropped before 1790 * returning to caller, since the caller always re-acquires it. 1791 */ 1792 if (UPIMUTEX(mtype) == 0) { 1793 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1794 m_locked = 1; 1795 if (mtype & USYNC_PROCESS) 1796 suword32_noerr(&mp->mutex_ownerpid, 0); 1797 ulock_clear(&mp->mutex_lockw); 1798 fuword8_noerr(&mp->mutex_waiters, &waiters); 1799 if (waiters != 0) { 1800 /* 1801 * See comment above on lock clearing and lwp_release() 1802 * success/failure. 1803 */ 1804 if (lwp_release(&m_lwpchan, &waiters, 0)) 1805 suword8_noerr(&mp->mutex_waiters, waiters); 1806 } 1807 m_locked = 0; 1808 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1809 } else { 1810 (void) lwp_upimutex_unlock(mp, mtype); 1811 } 1812 out: 1813 no_fault(); 1814 if (mpwatched) 1815 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1816 if (cvwatched) 1817 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1818 if (t->t_mstate == LMS_USER_LOCK) 1819 (void) new_mstate(t, LMS_SYSTEM); 1820 return (set_errno(error)); 1821 } 1822 1823 /* 1824 * wakeup one lwp that's blocked on this condition variable. 1825 */ 1826 int 1827 lwp_cond_signal(lwp_cond_t *cv) 1828 { 1829 proc_t *p = ttoproc(curthread); 1830 lwpchan_t lwpchan; 1831 uchar_t waiters; 1832 volatile uint16_t type = 0; 1833 volatile int locked = 0; 1834 volatile int watched = 0; 1835 label_t ljb; 1836 int error = 0; 1837 1838 if ((caddr_t)cv >= p->p_as->a_userlimit) 1839 return (set_errno(EFAULT)); 1840 1841 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1842 1843 if (on_fault(&ljb)) { 1844 if (locked) 1845 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1846 error = EFAULT; 1847 goto out; 1848 } 1849 /* 1850 * Force Copy-on-write if necessary and ensure that the 1851 * synchronization object resides in read/write memory. 1852 * Cause an EFAULT return now if this is not so. 1853 */ 1854 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1855 suword16_noerr(&cv->cond_type, type); 1856 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1857 &lwpchan, LWPCHAN_CVPOOL)) { 1858 error = EFAULT; 1859 goto out; 1860 } 1861 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1862 locked = 1; 1863 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1864 if (waiters != 0) { 1865 /* 1866 * The following call to lwp_release() might fail but it is 1867 * OK to write into the waiters bit below, since the memory 1868 * could not have been re-used or unmapped (for correctly 1869 * written user programs) as in the case of lwp_mutex_wakeup(). 1870 * For an incorrect program, we should not care about data 1871 * corruption since this is just one instance of other places 1872 * where corruption can occur for such a program. Of course 1873 * if the memory is unmapped, normal fault recovery occurs. 1874 */ 1875 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1876 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1877 } 1878 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1879 out: 1880 no_fault(); 1881 if (watched) 1882 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1883 if (error) 1884 return (set_errno(error)); 1885 return (0); 1886 } 1887 1888 /* 1889 * wakeup every lwp that's blocked on this condition variable. 1890 */ 1891 int 1892 lwp_cond_broadcast(lwp_cond_t *cv) 1893 { 1894 proc_t *p = ttoproc(curthread); 1895 lwpchan_t lwpchan; 1896 volatile uint16_t type = 0; 1897 volatile int locked = 0; 1898 volatile int watched = 0; 1899 label_t ljb; 1900 uchar_t waiters; 1901 int error = 0; 1902 1903 if ((caddr_t)cv >= p->p_as->a_userlimit) 1904 return (set_errno(EFAULT)); 1905 1906 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1907 1908 if (on_fault(&ljb)) { 1909 if (locked) 1910 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1911 error = EFAULT; 1912 goto out; 1913 } 1914 /* 1915 * Force Copy-on-write if necessary and ensure that the 1916 * synchronization object resides in read/write memory. 1917 * Cause an EFAULT return now if this is not so. 1918 */ 1919 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1920 suword16_noerr(&cv->cond_type, type); 1921 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1922 &lwpchan, LWPCHAN_CVPOOL)) { 1923 error = EFAULT; 1924 goto out; 1925 } 1926 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1927 locked = 1; 1928 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1929 if (waiters != 0) { 1930 lwp_release_all(&lwpchan); 1931 suword8_noerr(&cv->cond_waiters_kernel, 0); 1932 } 1933 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1934 out: 1935 no_fault(); 1936 if (watched) 1937 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1938 if (error) 1939 return (set_errno(error)); 1940 return (0); 1941 } 1942 1943 int 1944 lwp_sema_trywait(lwp_sema_t *sp) 1945 { 1946 kthread_t *t = curthread; 1947 proc_t *p = ttoproc(t); 1948 label_t ljb; 1949 volatile int locked = 0; 1950 volatile int watched = 0; 1951 volatile uint16_t type = 0; 1952 int count; 1953 lwpchan_t lwpchan; 1954 uchar_t waiters; 1955 int error = 0; 1956 1957 if ((caddr_t)sp >= p->p_as->a_userlimit) 1958 return (set_errno(EFAULT)); 1959 1960 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1961 1962 if (on_fault(&ljb)) { 1963 if (locked) 1964 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1965 error = EFAULT; 1966 goto out; 1967 } 1968 /* 1969 * Force Copy-on-write if necessary and ensure that the 1970 * synchronization object resides in read/write memory. 1971 * Cause an EFAULT return now if this is not so. 1972 */ 1973 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1974 suword16_noerr((void *)&sp->sema_type, type); 1975 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1976 &lwpchan, LWPCHAN_CVPOOL)) { 1977 error = EFAULT; 1978 goto out; 1979 } 1980 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1981 locked = 1; 1982 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1983 if (count == 0) 1984 error = EBUSY; 1985 else 1986 suword32_noerr((void *)&sp->sema_count, --count); 1987 if (count != 0) { 1988 fuword8_noerr(&sp->sema_waiters, &waiters); 1989 if (waiters != 0) { 1990 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1991 suword8_noerr(&sp->sema_waiters, waiters); 1992 } 1993 } 1994 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1995 out: 1996 no_fault(); 1997 if (watched) 1998 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1999 if (error) 2000 return (set_errno(error)); 2001 return (0); 2002 } 2003 2004 /* 2005 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 2006 */ 2007 int 2008 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2009 { 2010 kthread_t *t = curthread; 2011 klwp_t *lwp = ttolwp(t); 2012 proc_t *p = ttoproc(t); 2013 lwp_timer_t lwpt; 2014 caddr_t timedwait; 2015 clock_t tim = -1; 2016 label_t ljb; 2017 volatile int locked = 0; 2018 volatile int watched = 0; 2019 volatile uint16_t type = 0; 2020 int count; 2021 lwpchan_t lwpchan; 2022 uchar_t waiters; 2023 int error = 0; 2024 int time_error; 2025 int imm_timeout = 0; 2026 int imm_unpark = 0; 2027 2028 if ((caddr_t)sp >= p->p_as->a_userlimit) 2029 return (set_errno(EFAULT)); 2030 2031 timedwait = (caddr_t)tsp; 2032 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2033 lwpt.lwpt_imm_timeout) { 2034 imm_timeout = 1; 2035 timedwait = NULL; 2036 } 2037 2038 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2039 2040 if (on_fault(&ljb)) { 2041 if (locked) 2042 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2043 error = EFAULT; 2044 goto out; 2045 } 2046 /* 2047 * Force Copy-on-write if necessary and ensure that the 2048 * synchronization object resides in read/write memory. 2049 * Cause an EFAULT return now if this is not so. 2050 */ 2051 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2052 suword16_noerr((void *)&sp->sema_type, type); 2053 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2054 &lwpchan, LWPCHAN_CVPOOL)) { 2055 error = EFAULT; 2056 goto out; 2057 } 2058 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2059 locked = 1; 2060 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2061 while (error == 0 && count == 0) { 2062 if (time_error) { 2063 /* 2064 * The SUSV3 Posix spec is very clear that we 2065 * should get no error from validating the 2066 * timer until we would actually sleep. 2067 */ 2068 error = time_error; 2069 break; 2070 } 2071 suword8_noerr(&sp->sema_waiters, 1); 2072 if (watched) 2073 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2074 /* 2075 * Put the lwp in an orderly state for debugging. 2076 */ 2077 prstop(PR_REQUESTED, 0); 2078 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2079 /* 2080 * We received a signal at user-level before calling 2081 * here or another thread wants us to return 2082 * immediately with EINTR. See lwp_unpark(). 2083 */ 2084 imm_unpark = 1; 2085 t->t_unpark = 0; 2086 timedwait = NULL; 2087 } else if (timedwait) { 2088 /* 2089 * If we successfully queue the timeout, 2090 * then don't drop t_delay_lock until 2091 * we are on the sleep queue (below). 2092 */ 2093 mutex_enter(&t->t_delay_lock); 2094 if (lwp_timer_enqueue(&lwpt) != 0) { 2095 mutex_exit(&t->t_delay_lock); 2096 imm_timeout = 1; 2097 timedwait = NULL; 2098 } 2099 } 2100 t->t_flag |= T_WAITCVSEM; 2101 lwp_block(&lwpchan); 2102 /* 2103 * Nothing should happen to cause the lwp to sleep 2104 * again until after it returns from swtch(). 2105 */ 2106 if (timedwait) 2107 mutex_exit(&t->t_delay_lock); 2108 locked = 0; 2109 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2110 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2111 (imm_timeout | imm_unpark)) 2112 setrun(t); 2113 swtch(); 2114 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2115 if (timedwait) 2116 tim = lwp_timer_dequeue(&lwpt); 2117 setallwatch(); 2118 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2119 MUSTRETURN(p, t) || imm_unpark) 2120 error = EINTR; 2121 else if (imm_timeout || (timedwait && tim == -1)) 2122 error = ETIME; 2123 lwp->lwp_asleep = 0; 2124 lwp->lwp_sysabort = 0; 2125 watched = watch_disable_addr((caddr_t)sp, 2126 sizeof (*sp), S_WRITE); 2127 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2128 locked = 1; 2129 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2130 } 2131 if (error == 0) 2132 suword32_noerr((void *)&sp->sema_count, --count); 2133 if (count != 0) { 2134 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2135 suword8_noerr(&sp->sema_waiters, waiters); 2136 } 2137 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2138 out: 2139 no_fault(); 2140 if (watched) 2141 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2142 if (tsp && check_park && !time_error) 2143 error = lwp_timer_copyout(&lwpt, error); 2144 if (error) 2145 return (set_errno(error)); 2146 return (0); 2147 } 2148 2149 /* 2150 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2151 * libc now calls lwp_sema_timedwait(). 2152 * This system call trap exists solely for the benefit of old 2153 * statically linked applications from Solaris 9 and before. 2154 * It should be removed when we no longer care about such applications. 2155 */ 2156 int 2157 lwp_sema_wait(lwp_sema_t *sp) 2158 { 2159 return (lwp_sema_timedwait(sp, NULL, 0)); 2160 } 2161 2162 int 2163 lwp_sema_post(lwp_sema_t *sp) 2164 { 2165 proc_t *p = ttoproc(curthread); 2166 label_t ljb; 2167 volatile int locked = 0; 2168 volatile int watched = 0; 2169 volatile uint16_t type = 0; 2170 int count; 2171 lwpchan_t lwpchan; 2172 uchar_t waiters; 2173 int error = 0; 2174 2175 if ((caddr_t)sp >= p->p_as->a_userlimit) 2176 return (set_errno(EFAULT)); 2177 2178 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2179 2180 if (on_fault(&ljb)) { 2181 if (locked) 2182 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2183 error = EFAULT; 2184 goto out; 2185 } 2186 /* 2187 * Force Copy-on-write if necessary and ensure that the 2188 * synchronization object resides in read/write memory. 2189 * Cause an EFAULT return now if this is not so. 2190 */ 2191 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2192 suword16_noerr(&sp->sema_type, type); 2193 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2194 &lwpchan, LWPCHAN_CVPOOL)) { 2195 error = EFAULT; 2196 goto out; 2197 } 2198 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2199 locked = 1; 2200 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2201 if (count == _SEM_VALUE_MAX) 2202 error = EOVERFLOW; 2203 else 2204 suword32_noerr(&sp->sema_count, ++count); 2205 if (count == 1) { 2206 fuword8_noerr(&sp->sema_waiters, &waiters); 2207 if (waiters) { 2208 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2209 suword8_noerr(&sp->sema_waiters, waiters); 2210 } 2211 } 2212 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2213 out: 2214 no_fault(); 2215 if (watched) 2216 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2217 if (error) 2218 return (set_errno(error)); 2219 return (0); 2220 } 2221 2222 #define TRW_WANT_WRITE 0x1 2223 #define TRW_LOCK_GRANTED 0x2 2224 2225 #define READ_LOCK 0 2226 #define WRITE_LOCK 1 2227 #define TRY_FLAG 0x10 2228 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2229 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2230 2231 /* 2232 * Release one writer or one or more readers. Compute the rwstate word to 2233 * reflect the new state of the queue. For a safe hand-off we copy the new 2234 * rwstate value back to userland before we wake any of the new lock holders. 2235 * 2236 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2237 * being given precedence over readers of the same priority). 2238 * 2239 * If the first thread is a reader we scan the queue releasing all readers 2240 * until we hit a writer or the end of the queue. If the first thread is a 2241 * writer we still need to check for another writer. 2242 */ 2243 void 2244 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2245 { 2246 sleepq_head_t *sqh; 2247 kthread_t *tp; 2248 kthread_t **tpp; 2249 kthread_t *tpnext; 2250 kthread_t *wakelist = NULL; 2251 uint32_t rwstate = 0; 2252 int wcount = 0; 2253 int rcount = 0; 2254 2255 sqh = lwpsqhash(lwpchan); 2256 disp_lock_enter(&sqh->sq_lock); 2257 tpp = &sqh->sq_queue.sq_first; 2258 while ((tp = *tpp) != NULL) { 2259 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2260 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2261 if (tp->t_writer & TRW_WANT_WRITE) { 2262 if ((wcount++ == 0) && (rcount == 0)) { 2263 rwstate |= URW_WRITE_LOCKED; 2264 2265 /* Just one writer to wake. */ 2266 sleepq_unlink(tpp, tp); 2267 wakelist = tp; 2268 2269 /* tpp already set for next thread. */ 2270 continue; 2271 } else { 2272 rwstate |= URW_HAS_WAITERS; 2273 /* We need look no further. */ 2274 break; 2275 } 2276 } else { 2277 rcount++; 2278 if (wcount == 0) { 2279 rwstate++; 2280 2281 /* Add reader to wake list. */ 2282 sleepq_unlink(tpp, tp); 2283 tp->t_link = wakelist; 2284 wakelist = tp; 2285 2286 /* tpp already set for next thread. */ 2287 continue; 2288 } else { 2289 rwstate |= URW_HAS_WAITERS; 2290 /* We need look no further. */ 2291 break; 2292 } 2293 } 2294 } 2295 tpp = &tp->t_link; 2296 } 2297 2298 /* Copy the new rwstate back to userland. */ 2299 suword32_noerr(&rw->rwlock_readers, rwstate); 2300 2301 /* Wake the new lock holder(s) up. */ 2302 tp = wakelist; 2303 while (tp != NULL) { 2304 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2305 tp->t_wchan0 = NULL; 2306 tp->t_wchan = NULL; 2307 tp->t_sobj_ops = NULL; 2308 tp->t_writer |= TRW_LOCK_GRANTED; 2309 tpnext = tp->t_link; 2310 tp->t_link = NULL; 2311 CL_WAKEUP(tp); 2312 thread_unlock_high(tp); 2313 tp = tpnext; 2314 } 2315 2316 disp_lock_exit(&sqh->sq_lock); 2317 } 2318 2319 /* 2320 * We enter here holding the user-level mutex, which we must release before 2321 * returning or blocking. Based on lwp_cond_wait(). 2322 */ 2323 static int 2324 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2325 { 2326 lwp_mutex_t *mp = NULL; 2327 kthread_t *t = curthread; 2328 kthread_t *tp; 2329 klwp_t *lwp = ttolwp(t); 2330 proc_t *p = ttoproc(t); 2331 lwp_timer_t lwpt; 2332 lwpchan_t lwpchan; 2333 lwpchan_t mlwpchan; 2334 caddr_t timedwait; 2335 volatile uint16_t type = 0; 2336 volatile uint8_t mtype = 0; 2337 uchar_t mwaiters; 2338 volatile int error = 0; 2339 int time_error; 2340 clock_t tim = -1; 2341 volatile int locked = 0; 2342 volatile int mlocked = 0; 2343 volatile int watched = 0; 2344 volatile int mwatched = 0; 2345 label_t ljb; 2346 volatile int no_lwpchan = 1; 2347 int imm_timeout = 0; 2348 int try_flag; 2349 uint32_t rwstate; 2350 int acquired = 0; 2351 2352 /* We only check rw because the mutex is included in it. */ 2353 if ((caddr_t)rw >= p->p_as->a_userlimit) 2354 return (set_errno(EFAULT)); 2355 2356 /* We must only report this error if we are about to sleep (later). */ 2357 timedwait = (caddr_t)tsp; 2358 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2359 lwpt.lwpt_imm_timeout) { 2360 imm_timeout = 1; 2361 timedwait = NULL; 2362 } 2363 2364 (void) new_mstate(t, LMS_USER_LOCK); 2365 2366 if (on_fault(&ljb)) { 2367 if (no_lwpchan) { 2368 error = EFAULT; 2369 goto out_nodrop; 2370 } 2371 if (mlocked) { 2372 mlocked = 0; 2373 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2374 } 2375 if (locked) { 2376 locked = 0; 2377 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2378 } 2379 /* 2380 * Set up another on_fault() for a possible fault 2381 * on the user lock accessed at "out_drop". 2382 */ 2383 if (on_fault(&ljb)) { 2384 if (mlocked) { 2385 mlocked = 0; 2386 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2387 } 2388 error = EFAULT; 2389 goto out_nodrop; 2390 } 2391 error = EFAULT; 2392 goto out_nodrop; 2393 } 2394 2395 /* Process rd_wr (including sanity check). */ 2396 try_flag = (rd_wr & TRY_FLAG); 2397 rd_wr &= ~TRY_FLAG; 2398 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2399 error = EINVAL; 2400 goto out_nodrop; 2401 } 2402 2403 /* 2404 * Force Copy-on-write if necessary and ensure that the 2405 * synchronization object resides in read/write memory. 2406 * Cause an EFAULT return now if this is not so. 2407 */ 2408 mp = &rw->mutex; 2409 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2410 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2411 suword8_noerr(&mp->mutex_type, mtype); 2412 suword16_noerr(&rw->rwlock_type, type); 2413 2414 /* We can only continue for simple USYNC_PROCESS locks. */ 2415 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2416 error = EINVAL; 2417 goto out_nodrop; 2418 } 2419 2420 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2421 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2422 &mlwpchan, LWPCHAN_MPPOOL)) { 2423 error = EFAULT; 2424 goto out_nodrop; 2425 } 2426 2427 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2428 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2429 &lwpchan, LWPCHAN_CVPOOL)) { 2430 error = EFAULT; 2431 goto out_nodrop; 2432 } 2433 2434 no_lwpchan = 0; 2435 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2436 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2437 2438 /* 2439 * lwpchan_lock() ensures that the calling LWP is put to sleep 2440 * atomically with respect to a possible wakeup which is a result 2441 * of lwp_rwlock_unlock(). 2442 * 2443 * What's misleading is that the LWP is put to sleep after the 2444 * rwlock's mutex is released. This is OK as long as the release 2445 * operation is also done while holding mlwpchan. The LWP is then 2446 * put to sleep when the possibility of pagefaulting or sleeping 2447 * has been completely eliminated. 2448 */ 2449 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2450 locked = 1; 2451 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2452 mlocked = 1; 2453 2454 /* 2455 * Fetch the current rwlock state. 2456 * 2457 * The possibility of spurious wake-ups or killed waiters means 2458 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2459 * We only fix these if they are important to us. 2460 * 2461 * Although various error states can be observed here (e.g. the lock 2462 * is not held, but there are waiters) we assume these are applicaton 2463 * errors and so we take no corrective action. 2464 */ 2465 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2466 /* 2467 * We cannot legitimately get here from user-level 2468 * without URW_HAS_WAITERS being set. 2469 * Set it now to guard against user-level error. 2470 */ 2471 rwstate |= URW_HAS_WAITERS; 2472 2473 /* 2474 * We can try only if the lock isn't held by a writer. 2475 */ 2476 if (!(rwstate & URW_WRITE_LOCKED)) { 2477 tp = lwp_queue_waiter(&lwpchan); 2478 if (tp == NULL) { 2479 /* 2480 * Hmmm, rwstate indicates waiters but there are 2481 * none queued. This could just be the result of a 2482 * spurious wakeup, so let's ignore it. 2483 * 2484 * We now have a chance to acquire the lock 2485 * uncontended, but this is the last chance for 2486 * a writer to acquire the lock without blocking. 2487 */ 2488 if (rd_wr == READ_LOCK) { 2489 rwstate++; 2490 acquired = 1; 2491 } else if ((rwstate & URW_READERS_MASK) == 0) { 2492 rwstate |= URW_WRITE_LOCKED; 2493 acquired = 1; 2494 } 2495 } else if (rd_wr == READ_LOCK) { 2496 /* 2497 * This is the last chance for a reader to acquire 2498 * the lock now, but it can only do so if there is 2499 * no writer of equal or greater priority at the 2500 * head of the queue . 2501 * 2502 * It is also just possible that there is a reader 2503 * at the head of the queue. This may be the result 2504 * of a spurious wakeup or an application failure. 2505 * In this case we only acquire the lock if we have 2506 * equal or greater priority. It is not our job to 2507 * release spurious waiters. 2508 */ 2509 pri_t our_pri = DISP_PRIO(t); 2510 pri_t his_pri = DISP_PRIO(tp); 2511 2512 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2513 !(tp->t_writer & TRW_WANT_WRITE))) { 2514 rwstate++; 2515 acquired = 1; 2516 } 2517 } 2518 } 2519 2520 if (acquired || try_flag || time_error) { 2521 /* 2522 * We're not going to block this time. 2523 */ 2524 suword32_noerr(&rw->rwlock_readers, rwstate); 2525 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2526 locked = 0; 2527 2528 if (acquired) { 2529 /* 2530 * Got the lock! 2531 */ 2532 error = 0; 2533 2534 } else if (try_flag) { 2535 /* 2536 * We didn't get the lock and we're about to block. 2537 * If we're doing a trylock, return EBUSY instead. 2538 */ 2539 error = EBUSY; 2540 2541 } else if (time_error) { 2542 /* 2543 * The SUSV3 POSIX spec is very clear that we should 2544 * get no error from validating the timer (above) 2545 * until we would actually sleep. 2546 */ 2547 error = time_error; 2548 } 2549 2550 goto out_drop; 2551 } 2552 2553 /* 2554 * We're about to block, so indicate what kind of waiter we are. 2555 */ 2556 t->t_writer = 0; 2557 if (rd_wr == WRITE_LOCK) 2558 t->t_writer = TRW_WANT_WRITE; 2559 suword32_noerr(&rw->rwlock_readers, rwstate); 2560 2561 /* 2562 * Unlock the rwlock's mutex (pagefaults are possible here). 2563 */ 2564 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2565 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2566 suword32_noerr(&mp->mutex_ownerpid, 0); 2567 ulock_clear(&mp->mutex_lockw); 2568 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2569 if (mwaiters != 0) { 2570 /* 2571 * Given the locking of mlwpchan around the release of 2572 * the mutex and checking for waiters, the following 2573 * call to lwp_release() can fail ONLY if the lock 2574 * acquirer is interrupted after setting the waiter bit, 2575 * calling lwp_block() and releasing mlwpchan. 2576 * In this case, it could get pulled off the LWP sleep 2577 * queue (via setrun()) before the following call to 2578 * lwp_release() occurs, and the lock requestor will 2579 * update the waiter bit correctly by re-evaluating it. 2580 */ 2581 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2582 suword8_noerr(&mp->mutex_waiters, mwaiters); 2583 } 2584 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2585 mlocked = 0; 2586 no_fault(); 2587 2588 if (mwatched) { 2589 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2590 mwatched = 0; 2591 } 2592 if (watched) { 2593 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2594 watched = 0; 2595 } 2596 2597 /* 2598 * Put the LWP in an orderly state for debugging. 2599 */ 2600 prstop(PR_REQUESTED, 0); 2601 if (timedwait) { 2602 /* 2603 * If we successfully queue the timeout, 2604 * then don't drop t_delay_lock until 2605 * we are on the sleep queue (below). 2606 */ 2607 mutex_enter(&t->t_delay_lock); 2608 if (lwp_timer_enqueue(&lwpt) != 0) { 2609 mutex_exit(&t->t_delay_lock); 2610 imm_timeout = 1; 2611 timedwait = NULL; 2612 } 2613 } 2614 t->t_flag |= T_WAITCVSEM; 2615 lwp_block(&lwpchan); 2616 2617 /* 2618 * Nothing should happen to cause the LWp to go to sleep until after 2619 * it returns from swtch(). 2620 */ 2621 if (timedwait) 2622 mutex_exit(&t->t_delay_lock); 2623 locked = 0; 2624 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2625 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 2626 setrun(t); 2627 swtch(); 2628 2629 /* 2630 * We're back, but we need to work out why. Were we interrupted? Did 2631 * we timeout? Were we granted the lock? 2632 */ 2633 error = EAGAIN; 2634 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2635 t->t_writer = 0; 2636 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2637 if (timedwait) 2638 tim = lwp_timer_dequeue(&lwpt); 2639 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2640 error = EINTR; 2641 else if (imm_timeout || (timedwait && tim == -1)) 2642 error = ETIME; 2643 lwp->lwp_asleep = 0; 2644 lwp->lwp_sysabort = 0; 2645 setallwatch(); 2646 2647 /* 2648 * If we were granted the lock we don't care about EINTR or ETIME. 2649 */ 2650 if (acquired) 2651 error = 0; 2652 2653 if (t->t_mstate == LMS_USER_LOCK) 2654 (void) new_mstate(t, LMS_SYSTEM); 2655 2656 if (error) 2657 return (set_errno(error)); 2658 return (0); 2659 2660 out_drop: 2661 /* 2662 * Make sure that the user level lock is dropped before returning 2663 * to the caller. 2664 */ 2665 if (!mlocked) { 2666 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2667 mlocked = 1; 2668 } 2669 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2670 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2671 suword32_noerr(&mp->mutex_ownerpid, 0); 2672 ulock_clear(&mp->mutex_lockw); 2673 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2674 if (mwaiters != 0) { 2675 /* 2676 * See comment above on lock clearing and lwp_release() 2677 * success/failure. 2678 */ 2679 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2680 suword8_noerr(&mp->mutex_waiters, mwaiters); 2681 } 2682 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2683 mlocked = 0; 2684 2685 out_nodrop: 2686 no_fault(); 2687 if (mwatched) 2688 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2689 if (watched) 2690 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2691 if (t->t_mstate == LMS_USER_LOCK) 2692 (void) new_mstate(t, LMS_SYSTEM); 2693 if (error) 2694 return (set_errno(error)); 2695 return (0); 2696 } 2697 2698 /* 2699 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2700 * we never drop the lock. 2701 */ 2702 static int 2703 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2704 { 2705 kthread_t *t = curthread; 2706 proc_t *p = ttoproc(t); 2707 lwpchan_t lwpchan; 2708 volatile uint16_t type = 0; 2709 volatile int error = 0; 2710 volatile int locked = 0; 2711 volatile int watched = 0; 2712 label_t ljb; 2713 volatile int no_lwpchan = 1; 2714 uint32_t rwstate; 2715 2716 /* We only check rw because the mutex is included in it. */ 2717 if ((caddr_t)rw >= p->p_as->a_userlimit) 2718 return (set_errno(EFAULT)); 2719 2720 if (on_fault(&ljb)) { 2721 if (no_lwpchan) { 2722 error = EFAULT; 2723 goto out_nodrop; 2724 } 2725 if (locked) { 2726 locked = 0; 2727 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2728 } 2729 error = EFAULT; 2730 goto out_nodrop; 2731 } 2732 2733 /* 2734 * Force Copy-on-write if necessary and ensure that the 2735 * synchronization object resides in read/write memory. 2736 * Cause an EFAULT return now if this is not so. 2737 */ 2738 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2739 suword16_noerr(&rw->rwlock_type, type); 2740 2741 /* We can only continue for simple USYNC_PROCESS locks. */ 2742 if (type != USYNC_PROCESS) { 2743 error = EINVAL; 2744 goto out_nodrop; 2745 } 2746 2747 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2748 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2749 &lwpchan, LWPCHAN_CVPOOL)) { 2750 error = EFAULT; 2751 goto out_nodrop; 2752 } 2753 2754 no_lwpchan = 0; 2755 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2756 2757 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2758 locked = 1; 2759 2760 /* 2761 * We can resolve multiple readers (except the last reader) here. 2762 * For the last reader or a writer we need lwp_rwlock_release(), 2763 * to which we also delegate the task of copying the new rwstate 2764 * back to userland (see the comment there). 2765 */ 2766 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2767 if (rwstate & URW_WRITE_LOCKED) 2768 lwp_rwlock_release(&lwpchan, rw); 2769 else if ((rwstate & URW_READERS_MASK) > 0) { 2770 rwstate--; 2771 if ((rwstate & URW_READERS_MASK) == 0) 2772 lwp_rwlock_release(&lwpchan, rw); 2773 else 2774 suword32_noerr(&rw->rwlock_readers, rwstate); 2775 } 2776 2777 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2778 locked = 0; 2779 error = 0; 2780 2781 out_nodrop: 2782 no_fault(); 2783 if (watched) 2784 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2785 if (error) 2786 return (set_errno(error)); 2787 return (0); 2788 } 2789 2790 int 2791 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2792 { 2793 switch (subcode) { 2794 case 0: 2795 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2796 case 1: 2797 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2798 case 2: 2799 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2800 case 3: 2801 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2802 case 4: 2803 return (lwp_rwlock_unlock(rwlp)); 2804 } 2805 return (set_errno(EINVAL)); 2806 } 2807 2808 /* 2809 * Return the owner of the user-level s-object. 2810 * Since we can't really do this, return NULL. 2811 */ 2812 /* ARGSUSED */ 2813 static kthread_t * 2814 lwpsobj_owner(caddr_t sobj) 2815 { 2816 return ((kthread_t *)NULL); 2817 } 2818 2819 /* 2820 * Wake up a thread asleep on a user-level synchronization 2821 * object. 2822 */ 2823 static void 2824 lwp_unsleep(kthread_t *t) 2825 { 2826 ASSERT(THREAD_LOCK_HELD(t)); 2827 if (t->t_wchan0 != NULL) { 2828 sleepq_head_t *sqh; 2829 sleepq_t *sqp = t->t_sleepq; 2830 2831 if (sqp != NULL) { 2832 sqh = lwpsqhash(&t->t_lwpchan); 2833 ASSERT(&sqh->sq_queue == sqp); 2834 sleepq_unsleep(t); 2835 disp_lock_exit_high(&sqh->sq_lock); 2836 CL_SETRUN(t); 2837 return; 2838 } 2839 } 2840 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2841 } 2842 2843 /* 2844 * Change the priority of a thread asleep on a user-level 2845 * synchronization object. To maintain proper priority order, 2846 * we: 2847 * o dequeue the thread. 2848 * o change its priority. 2849 * o re-enqueue the thread. 2850 * Assumption: the thread is locked on entry. 2851 */ 2852 static void 2853 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2854 { 2855 ASSERT(THREAD_LOCK_HELD(t)); 2856 if (t->t_wchan0 != NULL) { 2857 sleepq_t *sqp = t->t_sleepq; 2858 2859 sleepq_dequeue(t); 2860 *t_prip = pri; 2861 sleepq_insert(sqp, t); 2862 } else 2863 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2864 } 2865 2866 /* 2867 * Clean up a left-over process-shared robust mutex 2868 */ 2869 static void 2870 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2871 { 2872 uint16_t flag; 2873 uchar_t waiters; 2874 label_t ljb; 2875 pid_t owner_pid; 2876 lwp_mutex_t *lp; 2877 volatile int locked = 0; 2878 volatile int watched = 0; 2879 volatile struct upimutex *upimutex = NULL; 2880 volatile int upilocked = 0; 2881 2882 if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 2883 != (USYNC_PROCESS | LOCK_ROBUST)) 2884 return; 2885 2886 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2887 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2888 if (on_fault(&ljb)) { 2889 if (locked) 2890 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2891 if (upilocked) 2892 upimutex_unlock((upimutex_t *)upimutex, 0); 2893 goto out; 2894 } 2895 2896 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2897 2898 if (UPIMUTEX(ent->lwpchan_type)) { 2899 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2900 upib_t *upibp = &UPI_CHAIN(lwpchan); 2901 2902 if (owner_pid != curproc->p_pid) 2903 goto out; 2904 mutex_enter(&upibp->upib_lock); 2905 upimutex = upi_get(upibp, &lwpchan); 2906 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2907 mutex_exit(&upibp->upib_lock); 2908 goto out; 2909 } 2910 mutex_exit(&upibp->upib_lock); 2911 upilocked = 1; 2912 flag = lwp_clear_mutex(lp, lockflg); 2913 suword8_noerr(&lp->mutex_lockw, 0); 2914 upimutex_unlock((upimutex_t *)upimutex, flag); 2915 } else { 2916 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2917 locked = 1; 2918 /* 2919 * Clear the spinners count because one of our 2920 * threads could have been spinning for this lock 2921 * at user level when the process was suddenly killed. 2922 * There is no harm in this since user-level libc code 2923 * will adapt to the sudden change in the spinner count. 2924 */ 2925 suword8_noerr(&lp->mutex_spinners, 0); 2926 if (owner_pid != curproc->p_pid) { 2927 /* 2928 * We are not the owner. There may or may not be one. 2929 * If there are waiters, we wake up one or all of them. 2930 * It doesn't hurt to wake them up in error since 2931 * they will just retry the lock and go to sleep 2932 * again if necessary. 2933 */ 2934 fuword8_noerr(&lp->mutex_waiters, &waiters); 2935 if (waiters != 0) { /* there are waiters */ 2936 fuword16_noerr(&lp->mutex_flag, &flag); 2937 if (flag & LOCK_NOTRECOVERABLE) { 2938 lwp_release_all(&ent->lwpchan_lwpchan); 2939 suword8_noerr(&lp->mutex_waiters, 0); 2940 } else if (lwp_release(&ent->lwpchan_lwpchan, 2941 &waiters, 0)) { 2942 suword8_noerr(&lp->mutex_waiters, 2943 waiters); 2944 } 2945 } 2946 } else { 2947 /* 2948 * We are the owner. Release it. 2949 */ 2950 (void) lwp_clear_mutex(lp, lockflg); 2951 ulock_clear(&lp->mutex_lockw); 2952 fuword8_noerr(&lp->mutex_waiters, &waiters); 2953 if (waiters && 2954 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2955 suword8_noerr(&lp->mutex_waiters, waiters); 2956 } 2957 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2958 } 2959 out: 2960 no_fault(); 2961 if (watched) 2962 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2963 } 2964 2965 /* 2966 * Register a process-shared robust mutex in the lwpchan cache. 2967 */ 2968 int 2969 lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr) 2970 { 2971 int error = 0; 2972 volatile int watched; 2973 label_t ljb; 2974 uint8_t type; 2975 lwpchan_t lwpchan; 2976 2977 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2978 return (set_errno(EFAULT)); 2979 2980 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2981 2982 if (on_fault(&ljb)) { 2983 error = EFAULT; 2984 } else { 2985 /* 2986 * Force Copy-on-write if necessary and ensure that the 2987 * synchronization object resides in read/write memory. 2988 * Cause an EFAULT return now if this is not so. 2989 */ 2990 fuword8_noerr(&lp->mutex_type, &type); 2991 suword8_noerr(&lp->mutex_type, type); 2992 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2993 != (USYNC_PROCESS|LOCK_ROBUST)) { 2994 error = EINVAL; 2995 } else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp, 2996 uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) { 2997 error = EFAULT; 2998 } 2999 } 3000 no_fault(); 3001 if (watched) 3002 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3003 if (error) 3004 return (set_errno(error)); 3005 return (0); 3006 } 3007 3008 /* 3009 * There is a user-level robust lock registration in libc. 3010 * Mark it as invalid by storing -1 into the location of the pointer. 3011 */ 3012 static void 3013 lwp_mutex_unregister(void *uaddr) 3014 { 3015 if (get_udatamodel() == DATAMODEL_NATIVE) { 3016 (void) sulword(uaddr, (ulong_t)-1); 3017 #ifdef _SYSCALL32_IMPL 3018 } else { 3019 (void) suword32(uaddr, (uint32_t)-1); 3020 #endif 3021 } 3022 } 3023 3024 int 3025 lwp_mutex_trylock(lwp_mutex_t *lp) 3026 { 3027 kthread_t *t = curthread; 3028 proc_t *p = ttoproc(t); 3029 int error = 0; 3030 volatile int locked = 0; 3031 volatile int watched = 0; 3032 label_t ljb; 3033 volatile uint8_t type = 0; 3034 uint16_t flag; 3035 lwpchan_t lwpchan; 3036 3037 if ((caddr_t)lp >= p->p_as->a_userlimit) 3038 return (set_errno(EFAULT)); 3039 3040 (void) new_mstate(t, LMS_USER_LOCK); 3041 3042 if (on_fault(&ljb)) { 3043 if (locked) 3044 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3045 error = EFAULT; 3046 goto out; 3047 } 3048 /* 3049 * Force Copy-on-write if necessary and ensure that the 3050 * synchronization object resides in read/write memory. 3051 * Cause an EFAULT return now if this is not so. 3052 */ 3053 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3054 suword8_noerr(&lp->mutex_type, type); 3055 if (UPIMUTEX(type)) { 3056 no_fault(); 3057 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3058 if ((type & USYNC_PROCESS) && 3059 (error == 0 || 3060 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 3061 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 3062 if (error) 3063 return (set_errno(error)); 3064 return (0); 3065 } 3066 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3067 &lwpchan, LWPCHAN_MPPOOL)) { 3068 error = EFAULT; 3069 goto out; 3070 } 3071 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3072 locked = 1; 3073 if (type & LOCK_ROBUST) { 3074 fuword16_noerr(&lp->mutex_flag, &flag); 3075 if (flag & LOCK_NOTRECOVERABLE) { 3076 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3077 error = ENOTRECOVERABLE; 3078 goto out; 3079 } 3080 } 3081 3082 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3083 3084 if (!ulock_try(&lp->mutex_lockw)) 3085 error = EBUSY; 3086 else { 3087 if (type & USYNC_PROCESS) 3088 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 3089 if (type & LOCK_ROBUST) { 3090 fuword16_noerr(&lp->mutex_flag, &flag); 3091 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3092 if (flag & LOCK_OWNERDEAD) 3093 error = EOWNERDEAD; 3094 else if (type & USYNC_PROCESS_ROBUST) 3095 error = ELOCKUNMAPPED; 3096 else 3097 error = EOWNERDEAD; 3098 } 3099 } 3100 } 3101 locked = 0; 3102 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3103 out: 3104 3105 if (t->t_mstate == LMS_USER_LOCK) 3106 (void) new_mstate(t, LMS_SYSTEM); 3107 3108 no_fault(); 3109 if (watched) 3110 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3111 if (error) 3112 return (set_errno(error)); 3113 return (0); 3114 } 3115 3116 /* 3117 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3118 * the blocked lwp resumes and retries to acquire the lock. 3119 */ 3120 int 3121 lwp_mutex_unlock(lwp_mutex_t *lp) 3122 { 3123 proc_t *p = ttoproc(curthread); 3124 lwpchan_t lwpchan; 3125 uchar_t waiters; 3126 volatile int locked = 0; 3127 volatile int watched = 0; 3128 volatile uint8_t type = 0; 3129 label_t ljb; 3130 uint16_t flag; 3131 int error = 0; 3132 3133 if ((caddr_t)lp >= p->p_as->a_userlimit) 3134 return (set_errno(EFAULT)); 3135 3136 if (on_fault(&ljb)) { 3137 if (locked) 3138 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3139 error = EFAULT; 3140 goto out; 3141 } 3142 3143 /* 3144 * Force Copy-on-write if necessary and ensure that the 3145 * synchronization object resides in read/write memory. 3146 * Cause an EFAULT return now if this is not so. 3147 */ 3148 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3149 suword8_noerr(&lp->mutex_type, type); 3150 3151 if (UPIMUTEX(type)) { 3152 no_fault(); 3153 error = lwp_upimutex_unlock(lp, type); 3154 if (error) 3155 return (set_errno(error)); 3156 return (0); 3157 } 3158 3159 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3160 3161 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3162 &lwpchan, LWPCHAN_MPPOOL)) { 3163 error = EFAULT; 3164 goto out; 3165 } 3166 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3167 locked = 1; 3168 if (type & LOCK_ROBUST) { 3169 fuword16_noerr(&lp->mutex_flag, &flag); 3170 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3171 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3172 flag |= LOCK_NOTRECOVERABLE; 3173 suword16_noerr(&lp->mutex_flag, flag); 3174 } 3175 } 3176 if (type & USYNC_PROCESS) 3177 suword32_noerr(&lp->mutex_ownerpid, 0); 3178 ulock_clear(&lp->mutex_lockw); 3179 /* 3180 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3181 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3182 * may fail. If it fails, do not write into the waiter bit. 3183 * The call to lwp_release() might fail due to one of three reasons: 3184 * 3185 * 1. due to the thread which set the waiter bit not actually 3186 * sleeping since it got the lock on the re-try. The waiter 3187 * bit will then be correctly updated by that thread. This 3188 * window may be closed by reading the wait bit again here 3189 * and not calling lwp_release() at all if it is zero. 3190 * 2. the thread which set the waiter bit and went to sleep 3191 * was woken up by a signal. This time, the waiter recomputes 3192 * the wait bit in the return with EINTR code. 3193 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3194 * memory that has been re-used after the lock was dropped. 3195 * In this case, writing into the waiter bit would cause data 3196 * corruption. 3197 */ 3198 fuword8_noerr(&lp->mutex_waiters, &waiters); 3199 if (waiters) { 3200 if ((type & LOCK_ROBUST) && 3201 (flag & LOCK_NOTRECOVERABLE)) { 3202 lwp_release_all(&lwpchan); 3203 suword8_noerr(&lp->mutex_waiters, 0); 3204 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3205 suword8_noerr(&lp->mutex_waiters, waiters); 3206 } 3207 } 3208 3209 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3210 out: 3211 no_fault(); 3212 if (watched) 3213 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3214 if (error) 3215 return (set_errno(error)); 3216 return (0); 3217 } 3218