1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/errno.h> 39 #include <sys/file.h> 40 #include <sys/proc.h> 41 #include <sys/prsystm.h> 42 #include <sys/kmem.h> 43 #include <sys/sobject.h> 44 #include <sys/fault.h> 45 #include <sys/procfs.h> 46 #include <sys/watchpoint.h> 47 #include <sys/time.h> 48 #include <sys/cmn_err.h> 49 #include <sys/machlock.h> 50 #include <sys/debug.h> 51 #include <sys/synch.h> 52 #include <sys/synch32.h> 53 #include <sys/mman.h> 54 #include <sys/class.h> 55 #include <sys/schedctl.h> 56 #include <sys/sleepq.h> 57 #include <sys/policy.h> 58 #include <sys/tnf_probe.h> 59 #include <sys/lwpchan_impl.h> 60 #include <sys/turnstile.h> 61 #include <sys/atomic.h> 62 #include <sys/lwp_timer_impl.h> 63 #include <sys/lwp_upimutex_impl.h> 64 #include <vm/as.h> 65 #include <sys/sdt.h> 66 67 static kthread_t *lwpsobj_owner(caddr_t); 68 static void lwp_unsleep(kthread_t *t); 69 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 70 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 71 72 extern int lwp_cond_signal(lwp_cond_t *cv); 73 74 /* 75 * Maximum number of user prio inheritance locks that can be held by a thread. 76 * Used to limit kmem for each thread. This is a per-thread limit that 77 * can be administered on a system wide basis (using /etc/system). 78 * 79 * Also, when a limit, say maxlwps is added for numbers of lwps within a 80 * process, the per-thread limit automatically becomes a process-wide limit 81 * of maximum number of held upi locks within a process: 82 * maxheldupimx = maxnestupimx * maxlwps; 83 */ 84 static uint32_t maxnestupimx = 2000; 85 86 /* 87 * The sobj_ops vector exports a set of functions needed when a thread 88 * is asleep on a synchronization object of this type. 89 */ 90 static sobj_ops_t lwp_sobj_ops = { 91 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 92 }; 93 94 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 95 96 static sobj_ops_t lwp_sobj_pi_ops = { 97 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 98 turnstile_change_pri 99 }; 100 101 static sleepq_head_t lwpsleepq[NSLEEPQ]; 102 upib_t upimutextab[UPIMUTEX_TABSIZE]; 103 104 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 105 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 106 107 /* 108 * We know that both lc_wchan and lc_wchan0 are addresses that most 109 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 110 * 'pool' is either 0 or 1. 111 */ 112 #define LWPCHAN_LOCK_HASH(X, pool) \ 113 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 114 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 115 116 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 117 118 /* 119 * Is this a POSIX threads user-level lock requiring priority inheritance? 120 */ 121 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 122 123 static sleepq_head_t * 124 lwpsqhash(lwpchan_t *lwpchan) 125 { 126 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 127 return (&lwpsleepq[SQHASHINDEX(x)]); 128 } 129 130 /* 131 * Lock an lwpchan. 132 * Keep this in sync with lwpchan_unlock(), below. 133 */ 134 static void 135 lwpchan_lock(lwpchan_t *lwpchan, int pool) 136 { 137 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 138 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 139 } 140 141 /* 142 * Unlock an lwpchan. 143 * Keep this in sync with lwpchan_lock(), above. 144 */ 145 static void 146 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 147 { 148 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 149 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 150 } 151 152 /* 153 * Delete mappings from the lwpchan cache for pages that are being 154 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 155 * all mappings within the range are deleted from the lwpchan cache. 156 */ 157 void 158 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 159 { 160 lwpchan_data_t *lcp; 161 lwpchan_hashbucket_t *hashbucket; 162 lwpchan_hashbucket_t *endbucket; 163 lwpchan_entry_t *ent; 164 lwpchan_entry_t **prev; 165 caddr_t addr; 166 167 mutex_enter(&p->p_lcp_lock); 168 lcp = p->p_lcp; 169 hashbucket = lcp->lwpchan_cache; 170 endbucket = hashbucket + lcp->lwpchan_size; 171 for (; hashbucket < endbucket; hashbucket++) { 172 if (hashbucket->lwpchan_chain == NULL) 173 continue; 174 mutex_enter(&hashbucket->lwpchan_lock); 175 prev = &hashbucket->lwpchan_chain; 176 /* check entire chain */ 177 while ((ent = *prev) != NULL) { 178 addr = ent->lwpchan_addr; 179 if (start <= addr && addr < end) { 180 *prev = ent->lwpchan_next; 181 /* 182 * We do this only for the obsolete type 183 * USYNC_PROCESS_ROBUST. Otherwise robust 184 * locks do not draw ELOCKUNMAPPED or 185 * EOWNERDEAD due to being unmapped. 186 */ 187 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 188 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 189 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 190 kmem_free(ent, sizeof (*ent)); 191 atomic_add_32(&lcp->lwpchan_entries, -1); 192 } else { 193 prev = &ent->lwpchan_next; 194 } 195 } 196 mutex_exit(&hashbucket->lwpchan_lock); 197 } 198 mutex_exit(&p->p_lcp_lock); 199 } 200 201 /* 202 * Given an lwpchan cache pointer and a process virtual address, 203 * return a pointer to the corresponding lwpchan hash bucket. 204 */ 205 static lwpchan_hashbucket_t * 206 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 207 { 208 uint_t i; 209 210 /* 211 * All user-level sync object addresses are 8-byte aligned. 212 * Ignore the lowest 3 bits of the address and use the 213 * higher-order 2*lwpchan_bits bits for the hash index. 214 */ 215 addr >>= 3; 216 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 217 return (lcp->lwpchan_cache + i); 218 } 219 220 /* 221 * (Re)allocate the per-process lwpchan cache. 222 */ 223 static void 224 lwpchan_alloc_cache(proc_t *p, uint_t bits) 225 { 226 lwpchan_data_t *lcp; 227 lwpchan_data_t *old_lcp; 228 lwpchan_hashbucket_t *hashbucket; 229 lwpchan_hashbucket_t *endbucket; 230 lwpchan_hashbucket_t *newbucket; 231 lwpchan_entry_t *ent; 232 lwpchan_entry_t *next; 233 uint_t count; 234 235 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 236 237 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 238 lcp->lwpchan_bits = bits; 239 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 240 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 241 lcp->lwpchan_entries = 0; 242 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 243 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 244 lcp->lwpchan_next_data = NULL; 245 246 mutex_enter(&p->p_lcp_lock); 247 if ((old_lcp = p->p_lcp) != NULL) { 248 if (old_lcp->lwpchan_bits >= bits) { 249 /* someone beat us to it */ 250 mutex_exit(&p->p_lcp_lock); 251 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 252 sizeof (lwpchan_hashbucket_t)); 253 kmem_free(lcp, sizeof (lwpchan_data_t)); 254 return; 255 } 256 /* 257 * Acquire all of the old hash table locks. 258 */ 259 hashbucket = old_lcp->lwpchan_cache; 260 endbucket = hashbucket + old_lcp->lwpchan_size; 261 for (; hashbucket < endbucket; hashbucket++) 262 mutex_enter(&hashbucket->lwpchan_lock); 263 /* 264 * Move all of the old hash table entries to the 265 * new hash table. The new hash table has not yet 266 * been installed so we don't need any of its locks. 267 */ 268 count = 0; 269 hashbucket = old_lcp->lwpchan_cache; 270 for (; hashbucket < endbucket; hashbucket++) { 271 ent = hashbucket->lwpchan_chain; 272 while (ent != NULL) { 273 next = ent->lwpchan_next; 274 newbucket = lwpchan_bucket(lcp, 275 (uintptr_t)ent->lwpchan_addr); 276 ent->lwpchan_next = newbucket->lwpchan_chain; 277 newbucket->lwpchan_chain = ent; 278 ent = next; 279 count++; 280 } 281 hashbucket->lwpchan_chain = NULL; 282 } 283 lcp->lwpchan_entries = count; 284 } 285 286 /* 287 * Retire the old hash table. We can't actually kmem_free() it 288 * now because someone may still have a pointer to it. Instead, 289 * we link it onto the new hash table's list of retired hash tables. 290 * The new hash table is double the size of the previous one, so 291 * the total size of all retired hash tables is less than the size 292 * of the new one. exit() and exec() free the retired hash tables 293 * (see lwpchan_destroy_cache(), below). 294 */ 295 lcp->lwpchan_next_data = old_lcp; 296 297 /* 298 * As soon as we store the new lcp, future locking operations will 299 * use it. Therefore, we must ensure that all the state we've just 300 * established reaches global visibility before the new lcp does. 301 */ 302 membar_producer(); 303 p->p_lcp = lcp; 304 305 if (old_lcp != NULL) { 306 /* 307 * Release all of the old hash table locks. 308 */ 309 hashbucket = old_lcp->lwpchan_cache; 310 for (; hashbucket < endbucket; hashbucket++) 311 mutex_exit(&hashbucket->lwpchan_lock); 312 } 313 mutex_exit(&p->p_lcp_lock); 314 } 315 316 /* 317 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 318 * Called when the process exits or execs. All lwps except one have 319 * exited so we need no locks here. 320 */ 321 void 322 lwpchan_destroy_cache(int exec) 323 { 324 proc_t *p = curproc; 325 lwpchan_hashbucket_t *hashbucket; 326 lwpchan_hashbucket_t *endbucket; 327 lwpchan_data_t *lcp; 328 lwpchan_entry_t *ent; 329 lwpchan_entry_t *next; 330 uint16_t lockflg; 331 332 lcp = p->p_lcp; 333 p->p_lcp = NULL; 334 335 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 336 hashbucket = lcp->lwpchan_cache; 337 endbucket = hashbucket + lcp->lwpchan_size; 338 for (; hashbucket < endbucket; hashbucket++) { 339 ent = hashbucket->lwpchan_chain; 340 hashbucket->lwpchan_chain = NULL; 341 while (ent != NULL) { 342 next = ent->lwpchan_next; 343 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 344 (ent->lwpchan_type & LOCK_ROBUST)) 345 lwp_mutex_cleanup(ent, lockflg); 346 kmem_free(ent, sizeof (*ent)); 347 ent = next; 348 } 349 } 350 351 while (lcp != NULL) { 352 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 353 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 354 sizeof (lwpchan_hashbucket_t)); 355 kmem_free(lcp, sizeof (lwpchan_data_t)); 356 lcp = next_lcp; 357 } 358 } 359 360 /* 361 * Return zero when there is an entry in the lwpchan cache for the 362 * given process virtual address and non-zero when there is not. 363 * The returned non-zero value is the current length of the 364 * hash chain plus one. The caller holds the hash bucket lock. 365 */ 366 static uint_t 367 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 368 lwpchan_hashbucket_t *hashbucket) 369 { 370 lwpchan_entry_t *ent; 371 uint_t count = 1; 372 373 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 374 if (ent->lwpchan_addr == addr) { 375 if (ent->lwpchan_type != type || 376 ent->lwpchan_pool != pool) { 377 /* 378 * This shouldn't happen, but might if the 379 * process reuses its memory for different 380 * types of sync objects. We test first 381 * to avoid grabbing the memory cache line. 382 */ 383 ent->lwpchan_type = (uint16_t)type; 384 ent->lwpchan_pool = (uint16_t)pool; 385 } 386 *lwpchan = ent->lwpchan_lwpchan; 387 return (0); 388 } 389 count++; 390 } 391 return (count); 392 } 393 394 /* 395 * Return the cached lwpchan mapping if cached, otherwise insert 396 * a virtual address to lwpchan mapping into the cache. 397 */ 398 static int 399 lwpchan_get_mapping(struct as *as, caddr_t addr, 400 int type, lwpchan_t *lwpchan, int pool) 401 { 402 proc_t *p = curproc; 403 lwpchan_data_t *lcp; 404 lwpchan_hashbucket_t *hashbucket; 405 lwpchan_entry_t *ent; 406 memid_t memid; 407 uint_t count; 408 uint_t bits; 409 410 top: 411 /* initialize the lwpchan cache, if necesary */ 412 if ((lcp = p->p_lcp) == NULL) { 413 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 414 goto top; 415 } 416 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 417 mutex_enter(&hashbucket->lwpchan_lock); 418 if (lcp != p->p_lcp) { 419 /* someone resized the lwpchan cache; start over */ 420 mutex_exit(&hashbucket->lwpchan_lock); 421 goto top; 422 } 423 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 424 /* it's in the cache */ 425 mutex_exit(&hashbucket->lwpchan_lock); 426 return (1); 427 } 428 mutex_exit(&hashbucket->lwpchan_lock); 429 if (as_getmemid(as, addr, &memid) != 0) 430 return (0); 431 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 432 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 433 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 434 mutex_enter(&hashbucket->lwpchan_lock); 435 if (lcp != p->p_lcp) { 436 /* someone resized the lwpchan cache; start over */ 437 mutex_exit(&hashbucket->lwpchan_lock); 438 kmem_free(ent, sizeof (*ent)); 439 goto top; 440 } 441 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 442 if (count == 0) { 443 /* someone else added this entry to the cache */ 444 mutex_exit(&hashbucket->lwpchan_lock); 445 kmem_free(ent, sizeof (*ent)); 446 return (1); 447 } 448 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 449 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 450 /* hash chain too long; reallocate the hash table */ 451 mutex_exit(&hashbucket->lwpchan_lock); 452 kmem_free(ent, sizeof (*ent)); 453 lwpchan_alloc_cache(p, bits + 1); 454 goto top; 455 } 456 ent->lwpchan_addr = addr; 457 ent->lwpchan_type = (uint16_t)type; 458 ent->lwpchan_pool = (uint16_t)pool; 459 ent->lwpchan_lwpchan = *lwpchan; 460 ent->lwpchan_next = hashbucket->lwpchan_chain; 461 hashbucket->lwpchan_chain = ent; 462 atomic_add_32(&lcp->lwpchan_entries, 1); 463 mutex_exit(&hashbucket->lwpchan_lock); 464 return (1); 465 } 466 467 /* 468 * Return a unique pair of identifiers that corresponds to a 469 * synchronization object's virtual address. Process-shared 470 * sync objects usually get vnode/offset from as_getmemid(). 471 */ 472 static int 473 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 474 { 475 /* 476 * If the lwp synch object is defined to be process-private, 477 * we just make the first field of the lwpchan be 'as' and 478 * the second field be the synch object's virtual address. 479 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 480 * The lwpchan cache is used only for process-shared objects. 481 */ 482 if (!(type & USYNC_PROCESS)) { 483 lwpchan->lc_wchan0 = (caddr_t)as; 484 lwpchan->lc_wchan = addr; 485 return (1); 486 } 487 488 return (lwpchan_get_mapping(as, addr, type, lwpchan, pool)); 489 } 490 491 static void 492 lwp_block(lwpchan_t *lwpchan) 493 { 494 kthread_t *t = curthread; 495 klwp_t *lwp = ttolwp(t); 496 sleepq_head_t *sqh; 497 498 thread_lock(t); 499 t->t_flag |= T_WAKEABLE; 500 t->t_lwpchan = *lwpchan; 501 t->t_sobj_ops = &lwp_sobj_ops; 502 t->t_release = 0; 503 sqh = lwpsqhash(lwpchan); 504 disp_lock_enter_high(&sqh->sq_lock); 505 CL_SLEEP(t); 506 DTRACE_SCHED(sleep); 507 THREAD_SLEEP(t, &sqh->sq_lock); 508 sleepq_insert(&sqh->sq_queue, t); 509 thread_unlock(t); 510 lwp->lwp_asleep = 1; 511 lwp->lwp_sysabort = 0; 512 lwp->lwp_ru.nvcsw++; 513 (void) new_mstate(curthread, LMS_SLEEP); 514 } 515 516 static kthread_t * 517 lwpsobj_pi_owner(upimutex_t *up) 518 { 519 return (up->upi_owner); 520 } 521 522 static struct upimutex * 523 upi_get(upib_t *upibp, lwpchan_t *lcp) 524 { 525 struct upimutex *upip; 526 527 for (upip = upibp->upib_first; upip != NULL; 528 upip = upip->upi_nextchain) { 529 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 530 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 531 break; 532 } 533 return (upip); 534 } 535 536 static void 537 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 538 { 539 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 540 541 /* 542 * Insert upimutex at front of list. Maybe a bit unfair 543 * but assume that not many lwpchans hash to the same 544 * upimutextab bucket, i.e. the list of upimutexes from 545 * upib_first is not too long. 546 */ 547 upimutex->upi_nextchain = upibp->upib_first; 548 upibp->upib_first = upimutex; 549 } 550 551 static void 552 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 553 { 554 struct upimutex **prev; 555 556 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 557 558 prev = &upibp->upib_first; 559 while (*prev != upimutex) { 560 prev = &(*prev)->upi_nextchain; 561 } 562 *prev = upimutex->upi_nextchain; 563 upimutex->upi_nextchain = NULL; 564 } 565 566 /* 567 * Add upimutex to chain of upimutexes held by curthread. 568 * Returns number of upimutexes held by curthread. 569 */ 570 static uint32_t 571 upi_mylist_add(struct upimutex *upimutex) 572 { 573 kthread_t *t = curthread; 574 575 /* 576 * Insert upimutex at front of list of upimutexes owned by t. This 577 * would match typical LIFO order in which nested locks are acquired 578 * and released. 579 */ 580 upimutex->upi_nextowned = t->t_upimutex; 581 t->t_upimutex = upimutex; 582 t->t_nupinest++; 583 ASSERT(t->t_nupinest > 0); 584 return (t->t_nupinest); 585 } 586 587 /* 588 * Delete upimutex from list of upimutexes owned by curthread. 589 */ 590 static void 591 upi_mylist_del(struct upimutex *upimutex) 592 { 593 kthread_t *t = curthread; 594 struct upimutex **prev; 595 596 /* 597 * Since the order in which nested locks are acquired and released, 598 * is typically LIFO, and typical nesting levels are not too deep, the 599 * following should not be expensive in the general case. 600 */ 601 prev = &t->t_upimutex; 602 while (*prev != upimutex) { 603 prev = &(*prev)->upi_nextowned; 604 } 605 *prev = upimutex->upi_nextowned; 606 upimutex->upi_nextowned = NULL; 607 ASSERT(t->t_nupinest > 0); 608 t->t_nupinest--; 609 } 610 611 /* 612 * Returns true if upimutex is owned. Should be called only when upim points 613 * to kmem which cannot disappear from underneath. 614 */ 615 static int 616 upi_owned(upimutex_t *upim) 617 { 618 return (upim->upi_owner == curthread); 619 } 620 621 /* 622 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 623 */ 624 static struct upimutex * 625 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 626 { 627 lwpchan_t lwpchan; 628 upib_t *upibp; 629 struct upimutex *upimutex; 630 631 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 632 &lwpchan, LWPCHAN_MPPOOL)) 633 return (NULL); 634 635 upibp = &UPI_CHAIN(lwpchan); 636 mutex_enter(&upibp->upib_lock); 637 upimutex = upi_get(upibp, &lwpchan); 638 if (upimutex == NULL || upimutex->upi_owner != curthread) { 639 mutex_exit(&upibp->upib_lock); 640 return (NULL); 641 } 642 mutex_exit(&upibp->upib_lock); 643 return (upimutex); 644 } 645 646 /* 647 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 648 * no lock hand-off occurrs. 649 */ 650 static void 651 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 652 { 653 turnstile_t *ts; 654 upib_t *upibp; 655 kthread_t *newowner; 656 657 upi_mylist_del(upimutex); 658 upibp = upimutex->upi_upibp; 659 mutex_enter(&upibp->upib_lock); 660 if (upimutex->upi_waiter != 0) { /* if waiters */ 661 ts = turnstile_lookup(upimutex); 662 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 663 /* hand-off lock to highest prio waiter */ 664 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 665 upimutex->upi_owner = newowner; 666 if (ts->ts_waiters == 1) 667 upimutex->upi_waiter = 0; 668 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 669 mutex_exit(&upibp->upib_lock); 670 return; 671 } else if (ts != NULL) { 672 /* LOCK_NOTRECOVERABLE: wakeup all */ 673 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 674 } else { 675 /* 676 * Misleading w bit. Waiters might have been 677 * interrupted. No need to clear the w bit (upimutex 678 * will soon be freed). Re-calculate PI from existing 679 * waiters. 680 */ 681 turnstile_exit(upimutex); 682 turnstile_pi_recalc(); 683 } 684 } 685 /* 686 * no waiters, or LOCK_NOTRECOVERABLE. 687 * remove from the bucket chain of upi mutexes. 688 * de-allocate kernel memory (upimutex). 689 */ 690 upi_chain_del(upimutex->upi_upibp, upimutex); 691 mutex_exit(&upibp->upib_lock); 692 kmem_free(upimutex, sizeof (upimutex_t)); 693 } 694 695 static int 696 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 697 { 698 label_t ljb; 699 int error = 0; 700 lwpchan_t lwpchan; 701 uint16_t flag; 702 upib_t *upibp; 703 volatile struct upimutex *upimutex = NULL; 704 turnstile_t *ts; 705 uint32_t nupinest; 706 volatile int upilocked = 0; 707 708 if (on_fault(&ljb)) { 709 if (upilocked) 710 upimutex_unlock((upimutex_t *)upimutex, 0); 711 error = EFAULT; 712 goto out; 713 } 714 /* 715 * The apparent assumption made in implementing other _lwp_* synch 716 * primitives, is that get_lwpchan() does not return a unique cookie 717 * for the case where 2 processes (one forked from the other) point 718 * at the same underlying object, which is typed USYNC_PROCESS, but 719 * mapped MAP_PRIVATE, since the object has not yet been written to, 720 * in the child process. 721 * 722 * Since get_lwpchan() has been fixed, it is not necessary to do the 723 * dummy writes to force a COW fault as in other places (which should 724 * be fixed). 725 */ 726 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 727 &lwpchan, LWPCHAN_MPPOOL)) { 728 error = EFAULT; 729 goto out; 730 } 731 upibp = &UPI_CHAIN(lwpchan); 732 retry: 733 mutex_enter(&upibp->upib_lock); 734 upimutex = upi_get(upibp, &lwpchan); 735 if (upimutex == NULL) { 736 /* lock available since lwpchan has no upimutex */ 737 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 738 upi_chain_add(upibp, (upimutex_t *)upimutex); 739 upimutex->upi_owner = curthread; /* grab lock */ 740 upimutex->upi_upibp = upibp; 741 upimutex->upi_vaddr = lp; 742 upimutex->upi_lwpchan = lwpchan; 743 mutex_exit(&upibp->upib_lock); 744 nupinest = upi_mylist_add((upimutex_t *)upimutex); 745 upilocked = 1; 746 fuword16_noerr(&lp->mutex_flag, &flag); 747 if (nupinest > maxnestupimx && 748 secpolicy_resource(CRED()) != 0) { 749 upimutex_unlock((upimutex_t *)upimutex, flag); 750 error = ENOMEM; 751 goto out; 752 } 753 if (flag & LOCK_NOTRECOVERABLE) { 754 /* 755 * Since the setting of LOCK_NOTRECOVERABLE 756 * was done under the high-level upi mutex, 757 * in lwp_upimutex_unlock(), this flag needs to 758 * be checked while holding the upi mutex. 759 * If set, this thread should return without 760 * the lock held, and with the right error code. 761 */ 762 upimutex_unlock((upimutex_t *)upimutex, flag); 763 upilocked = 0; 764 error = ENOTRECOVERABLE; 765 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 766 if (flag & LOCK_OWNERDEAD) 767 error = EOWNERDEAD; 768 else if (type & USYNC_PROCESS_ROBUST) 769 error = ELOCKUNMAPPED; 770 else 771 error = EOWNERDEAD; 772 } 773 goto out; 774 } 775 /* 776 * If a upimutex object exists, it must have an owner. 777 * This is due to lock hand-off, and release of upimutex when no 778 * waiters are present at unlock time, 779 */ 780 ASSERT(upimutex->upi_owner != NULL); 781 if (upimutex->upi_owner == curthread) { 782 /* 783 * The user wrapper can check if the mutex type is 784 * ERRORCHECK: if not, it should stall at user-level. 785 * If so, it should return the error code. 786 */ 787 mutex_exit(&upibp->upib_lock); 788 error = EDEADLK; 789 goto out; 790 } 791 if (try == UPIMUTEX_TRY) { 792 mutex_exit(&upibp->upib_lock); 793 error = EBUSY; 794 goto out; 795 } 796 /* 797 * Block for the lock. 798 * Put the lwp in an orderly state for debugging. 799 * Calling prstop() has to be done here, and not in 800 * turnstile_block(), since the preceding call to 801 * turnstile_lookup() raises the PIL to a level 802 * at which calls to prstop() should not be made. 803 */ 804 if ((error = lwptp->lwpt_time_error) != 0) { 805 /* 806 * The SUSV3 Posix spec is very clear that we 807 * should get no error from validating the 808 * timer until we would actually sleep. 809 */ 810 mutex_exit(&upibp->upib_lock); 811 goto out; 812 } 813 prstop(PR_REQUESTED, 0); 814 if (lwptp->lwpt_tsp != NULL) { 815 /* 816 * If we successfully queue the timeout 817 * (lwp_timer_enqueue() returns zero), 818 * then don't drop t_delay_lock until we are 819 * on the sleep queue (in turnstile_block()). 820 * Otherwise we will get an immediate timeout 821 * when we attempt to sleep in turnstile_block(). 822 */ 823 mutex_enter(&curthread->t_delay_lock); 824 if (lwp_timer_enqueue(lwptp) != 0) 825 mutex_exit(&curthread->t_delay_lock); 826 } 827 /* 828 * Now, set the waiter bit and block for the lock in turnstile_block(). 829 * No need to preserve the previous wbit since a lock try is not 830 * attempted after setting the wait bit. Wait bit is set under 831 * the upib_lock, which is not released until the turnstile lock 832 * is acquired. Say, the upimutex is L: 833 * 834 * 1. upib_lock is held so the waiter does not have to retry L after 835 * setting the wait bit: since the owner has to grab the upib_lock 836 * to unlock L, it will certainly see the wait bit set. 837 * 2. upib_lock is not released until the turnstile lock is acquired. 838 * This is the key to preventing a missed wake-up. Otherwise, the 839 * owner could acquire the upib_lock, and the tc_lock, to call 840 * turnstile_wakeup(). All this, before the waiter gets tc_lock 841 * to sleep in turnstile_block(). turnstile_wakeup() will then not 842 * find this waiter, resulting in the missed wakeup. 843 * 3. The upib_lock, being a kernel mutex, cannot be released while 844 * holding the tc_lock (since mutex_exit() could need to acquire 845 * the same tc_lock)...and so is held when calling turnstile_block(). 846 * The address of upib_lock is passed to turnstile_block() which 847 * releases it after releasing all turnstile locks, and before going 848 * to sleep in swtch(). 849 * 4. The waiter value cannot be a count of waiters, because a waiter 850 * can be interrupted. The interrupt occurs under the tc_lock, at 851 * which point, the upib_lock cannot be locked, to decrement waiter 852 * count. So, just treat the waiter state as a bit, not a count. 853 */ 854 ts = turnstile_lookup((upimutex_t *)upimutex); 855 upimutex->upi_waiter = 1; 856 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 857 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 858 /* 859 * Hand-off implies that we wakeup holding the lock, except when: 860 * - deadlock is detected 861 * - lock is not recoverable 862 * - we got an interrupt or timeout 863 * If we wake up due to an interrupt or timeout, we may 864 * or may not be holding the lock due to mutex hand-off. 865 * Use lwp_upimutex_owned() to check if we do hold the lock. 866 */ 867 if (error != 0) { 868 if ((error == EINTR || error == ETIME) && 869 (upimutex = lwp_upimutex_owned(lp, type))) { 870 /* 871 * Unlock and return - the re-startable syscall will 872 * try the lock again if we got EINTR. 873 */ 874 (void) upi_mylist_add((upimutex_t *)upimutex); 875 upimutex_unlock((upimutex_t *)upimutex, 0); 876 } 877 /* 878 * The only other possible error is EDEADLK. If so, upimutex 879 * is valid, since its owner is deadlocked with curthread. 880 */ 881 ASSERT(error == EINTR || error == ETIME || 882 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 883 ASSERT(!lwp_upimutex_owned(lp, type)); 884 goto out; 885 } 886 if (lwp_upimutex_owned(lp, type)) { 887 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 888 nupinest = upi_mylist_add((upimutex_t *)upimutex); 889 upilocked = 1; 890 } 891 /* 892 * Now, need to read the user-level lp->mutex_flag to do the following: 893 * 894 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 895 * should be returned. 896 * - if lock isn't held, check if ENOTRECOVERABLE should 897 * be returned. 898 * 899 * Now, either lp->mutex_flag is readable or it's not. If not 900 * readable, the on_fault path will cause a return with EFAULT 901 * as it should. If it is readable, the state of the flag 902 * encodes the robustness state of the lock: 903 * 904 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 905 * or LOCK_UNMAPPED setting will influence the return code 906 * appropriately. If the upimutex is not locked here, this 907 * could be due to a spurious wake-up or a NOTRECOVERABLE 908 * event. The flag's setting can be used to distinguish 909 * between these two events. 910 */ 911 fuword16_noerr(&lp->mutex_flag, &flag); 912 if (upilocked) { 913 /* 914 * If the thread wakes up from turnstile_block with the lock 915 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 916 * since it would not have been handed-off the lock. 917 * So, no need to check for this case. 918 */ 919 if (nupinest > maxnestupimx && 920 secpolicy_resource(CRED()) != 0) { 921 upimutex_unlock((upimutex_t *)upimutex, flag); 922 upilocked = 0; 923 error = ENOMEM; 924 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 925 if (flag & LOCK_OWNERDEAD) 926 error = EOWNERDEAD; 927 else if (type & USYNC_PROCESS_ROBUST) 928 error = ELOCKUNMAPPED; 929 else 930 error = EOWNERDEAD; 931 } 932 } else { 933 /* 934 * Wake-up without the upimutex held. Either this is a 935 * spurious wake-up (due to signals, forkall(), whatever), or 936 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 937 * of the mutex flag can be used to distinguish between the 938 * two events. 939 */ 940 if (flag & LOCK_NOTRECOVERABLE) { 941 error = ENOTRECOVERABLE; 942 } else { 943 /* 944 * Here, the flag could be set to LOCK_OWNERDEAD or 945 * not. In both cases, this is a spurious wakeup, 946 * since the upi lock is not held, but the thread 947 * has returned from turnstile_block(). 948 * 949 * The user flag could be LOCK_OWNERDEAD if, at the 950 * same time as curthread having been woken up 951 * spuriously, the owner (say Tdead) has died, marked 952 * the mutex flag accordingly, and handed off the lock 953 * to some other waiter (say Tnew). curthread just 954 * happened to read the flag while Tnew has yet to deal 955 * with the owner-dead event. 956 * 957 * In this event, curthread should retry the lock. 958 * If Tnew is able to cleanup the lock, curthread 959 * will eventually get the lock with a zero error code, 960 * If Tnew is unable to cleanup, its eventual call to 961 * unlock the lock will result in the mutex flag being 962 * set to LOCK_NOTRECOVERABLE, and the wake-up of 963 * all waiters, including curthread, which will then 964 * eventually return ENOTRECOVERABLE due to the above 965 * check. 966 * 967 * Of course, if the user-flag is not set with 968 * LOCK_OWNERDEAD, retrying is the thing to do, since 969 * this is definitely a spurious wakeup. 970 */ 971 goto retry; 972 } 973 } 974 975 out: 976 no_fault(); 977 return (error); 978 } 979 980 981 static int 982 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 983 { 984 label_t ljb; 985 int error = 0; 986 lwpchan_t lwpchan; 987 uint16_t flag; 988 upib_t *upibp; 989 volatile struct upimutex *upimutex = NULL; 990 volatile int upilocked = 0; 991 992 if (on_fault(&ljb)) { 993 if (upilocked) 994 upimutex_unlock((upimutex_t *)upimutex, 0); 995 error = EFAULT; 996 goto out; 997 } 998 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 999 &lwpchan, LWPCHAN_MPPOOL)) { 1000 error = EFAULT; 1001 goto out; 1002 } 1003 upibp = &UPI_CHAIN(lwpchan); 1004 mutex_enter(&upibp->upib_lock); 1005 upimutex = upi_get(upibp, &lwpchan); 1006 /* 1007 * If the lock is not held, or the owner is not curthread, return 1008 * error. The user-level wrapper can return this error or stall, 1009 * depending on whether mutex is of ERRORCHECK type or not. 1010 */ 1011 if (upimutex == NULL || upimutex->upi_owner != curthread) { 1012 mutex_exit(&upibp->upib_lock); 1013 error = EPERM; 1014 goto out; 1015 } 1016 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1017 upilocked = 1; 1018 fuword16_noerr(&lp->mutex_flag, &flag); 1019 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1020 /* 1021 * transition mutex to the LOCK_NOTRECOVERABLE state. 1022 */ 1023 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1024 flag |= LOCK_NOTRECOVERABLE; 1025 suword16_noerr(&lp->mutex_flag, flag); 1026 } 1027 if (type & USYNC_PROCESS) 1028 suword32_noerr(&lp->mutex_ownerpid, 0); 1029 upimutex_unlock((upimutex_t *)upimutex, flag); 1030 upilocked = 0; 1031 out: 1032 no_fault(); 1033 return (error); 1034 } 1035 1036 /* 1037 * Clear the contents of a user-level mutex; return the flags. 1038 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1039 */ 1040 static uint16_t 1041 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1042 { 1043 uint16_t flag; 1044 1045 fuword16_noerr(&lp->mutex_flag, &flag); 1046 if ((flag & 1047 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1048 flag |= lockflg; 1049 suword16_noerr(&lp->mutex_flag, flag); 1050 } 1051 suword32_noerr((uint32_t *)&lp->mutex_owner, 0); 1052 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, 0); 1053 suword32_noerr(&lp->mutex_ownerpid, 0); 1054 suword8_noerr(&lp->mutex_rcount, 0); 1055 1056 return (flag); 1057 } 1058 1059 /* 1060 * Mark user mutex state, corresponding to kernel upimutex, 1061 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1062 */ 1063 static int 1064 upi_dead(upimutex_t *upip, uint16_t lockflg) 1065 { 1066 label_t ljb; 1067 int error = 0; 1068 lwp_mutex_t *lp; 1069 1070 if (on_fault(&ljb)) { 1071 error = EFAULT; 1072 goto out; 1073 } 1074 1075 lp = upip->upi_vaddr; 1076 (void) lwp_clear_mutex(lp, lockflg); 1077 suword8_noerr(&lp->mutex_lockw, 0); 1078 out: 1079 no_fault(); 1080 return (error); 1081 } 1082 1083 /* 1084 * Unlock all upimutexes held by curthread, since curthread is dying. 1085 * For each upimutex, attempt to mark its corresponding user mutex object as 1086 * dead. 1087 */ 1088 void 1089 upimutex_cleanup() 1090 { 1091 kthread_t *t = curthread; 1092 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1093 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1094 struct upimutex *upip; 1095 1096 while ((upip = t->t_upimutex) != NULL) { 1097 if (upi_dead(upip, lockflg) != 0) { 1098 /* 1099 * If the user object associated with this upimutex is 1100 * unmapped, unlock upimutex with the 1101 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1102 * woken up. Since user object is unmapped, it could 1103 * not be marked as dead or notrecoverable. 1104 * The waiters will now all wake up and return 1105 * ENOTRECOVERABLE, since they would find that the lock 1106 * has not been handed-off to them. 1107 * See lwp_upimutex_lock(). 1108 */ 1109 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1110 } else { 1111 /* 1112 * The user object has been updated as dead. 1113 * Unlock the upimutex: if no waiters, upip kmem will 1114 * be freed. If there is a waiter, the lock will be 1115 * handed off. If exit() is in progress, each existing 1116 * waiter will successively get the lock, as owners 1117 * die, and each new owner will call this routine as 1118 * it dies. The last owner will free kmem, since 1119 * it will find the upimutex has no waiters. So, 1120 * eventually, the kmem is guaranteed to be freed. 1121 */ 1122 upimutex_unlock(upip, 0); 1123 } 1124 /* 1125 * Note that the call to upimutex_unlock() above will delete 1126 * upimutex from the t_upimutexes chain. And so the 1127 * while loop will eventually terminate. 1128 */ 1129 } 1130 } 1131 1132 int 1133 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp) 1134 { 1135 kthread_t *t = curthread; 1136 klwp_t *lwp = ttolwp(t); 1137 proc_t *p = ttoproc(t); 1138 lwp_timer_t lwpt; 1139 caddr_t timedwait; 1140 int error = 0; 1141 int time_error; 1142 clock_t tim = -1; 1143 uchar_t waiters; 1144 volatile int locked = 0; 1145 volatile int watched = 0; 1146 label_t ljb; 1147 volatile uint8_t type = 0; 1148 lwpchan_t lwpchan; 1149 sleepq_head_t *sqh; 1150 static int iswanted(); 1151 uint16_t flag; 1152 int imm_timeout = 0; 1153 1154 if ((caddr_t)lp >= p->p_as->a_userlimit) 1155 return (set_errno(EFAULT)); 1156 1157 timedwait = (caddr_t)tsp; 1158 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1159 lwpt.lwpt_imm_timeout) { 1160 imm_timeout = 1; 1161 timedwait = NULL; 1162 } 1163 1164 /* 1165 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1166 * this micro state is really a run state. If the thread indeed blocks, 1167 * this state becomes valid. If not, the state is converted back to 1168 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1169 * when blocking. 1170 */ 1171 (void) new_mstate(t, LMS_USER_LOCK); 1172 if (on_fault(&ljb)) { 1173 if (locked) 1174 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1175 error = EFAULT; 1176 goto out; 1177 } 1178 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1179 if (UPIMUTEX(type)) { 1180 no_fault(); 1181 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1182 if ((type & USYNC_PROCESS) && 1183 (error == 0 || 1184 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 1185 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 1186 if (tsp && !time_error) /* copyout the residual time left */ 1187 error = lwp_timer_copyout(&lwpt, error); 1188 if (error) 1189 return (set_errno(error)); 1190 return (0); 1191 } 1192 /* 1193 * Force Copy-on-write fault if lwp_mutex_t object is 1194 * defined to be MAP_PRIVATE and it was initialized to 1195 * USYNC_PROCESS. 1196 */ 1197 suword8_noerr(&lp->mutex_type, type); 1198 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1199 &lwpchan, LWPCHAN_MPPOOL)) { 1200 error = EFAULT; 1201 goto out; 1202 } 1203 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1204 locked = 1; 1205 if (type & LOCK_ROBUST) { 1206 fuword16_noerr(&lp->mutex_flag, &flag); 1207 if (flag & LOCK_NOTRECOVERABLE) { 1208 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1209 error = ENOTRECOVERABLE; 1210 goto out; 1211 } 1212 } 1213 fuword8_noerr(&lp->mutex_waiters, &waiters); 1214 suword8_noerr(&lp->mutex_waiters, 1); 1215 1216 /* 1217 * If watchpoints are set, they need to be restored, since 1218 * atomic accesses of memory such as the call to ulock_try() 1219 * below cannot be watched. 1220 */ 1221 1222 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1223 1224 while (!ulock_try(&lp->mutex_lockw)) { 1225 if (time_error) { 1226 /* 1227 * The SUSV3 Posix spec is very clear that we 1228 * should get no error from validating the 1229 * timer until we would actually sleep. 1230 */ 1231 error = time_error; 1232 break; 1233 } 1234 1235 if (watched) { 1236 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1237 watched = 0; 1238 } 1239 1240 /* 1241 * Put the lwp in an orderly state for debugging. 1242 */ 1243 prstop(PR_REQUESTED, 0); 1244 if (timedwait) { 1245 /* 1246 * If we successfully queue the timeout, 1247 * then don't drop t_delay_lock until 1248 * we are on the sleep queue (below). 1249 */ 1250 mutex_enter(&t->t_delay_lock); 1251 if (lwp_timer_enqueue(&lwpt) != 0) { 1252 mutex_exit(&t->t_delay_lock); 1253 imm_timeout = 1; 1254 timedwait = NULL; 1255 } 1256 } 1257 lwp_block(&lwpchan); 1258 /* 1259 * Nothing should happen to cause the lwp to go to 1260 * sleep again until after it returns from swtch(). 1261 */ 1262 if (timedwait) 1263 mutex_exit(&t->t_delay_lock); 1264 locked = 0; 1265 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1266 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1267 setrun(t); 1268 swtch(); 1269 t->t_flag &= ~T_WAKEABLE; 1270 if (timedwait) 1271 tim = lwp_timer_dequeue(&lwpt); 1272 setallwatch(); 1273 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1274 error = EINTR; 1275 else if (imm_timeout || (timedwait && tim == -1)) 1276 error = ETIME; 1277 if (error) { 1278 lwp->lwp_asleep = 0; 1279 lwp->lwp_sysabort = 0; 1280 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1281 S_WRITE); 1282 1283 /* 1284 * Need to re-compute waiters bit. The waiters field in 1285 * the lock is not reliable. Either of two things could 1286 * have occurred: no lwp may have called lwp_release() 1287 * for me but I have woken up due to a signal or 1288 * timeout. In this case, the waiter bit is incorrect 1289 * since it is still set to 1, set above. 1290 * OR an lwp_release() did occur for some other lwp on 1291 * the same lwpchan. In this case, the waiter bit is 1292 * correct. But which event occurred, one can't tell. 1293 * So, recompute. 1294 */ 1295 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1296 locked = 1; 1297 sqh = lwpsqhash(&lwpchan); 1298 disp_lock_enter(&sqh->sq_lock); 1299 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1300 disp_lock_exit(&sqh->sq_lock); 1301 break; 1302 } 1303 lwp->lwp_asleep = 0; 1304 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1305 S_WRITE); 1306 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1307 locked = 1; 1308 fuword8_noerr(&lp->mutex_waiters, &waiters); 1309 suword8_noerr(&lp->mutex_waiters, 1); 1310 if (type & LOCK_ROBUST) { 1311 fuword16_noerr(&lp->mutex_flag, &flag); 1312 if (flag & LOCK_NOTRECOVERABLE) { 1313 error = ENOTRECOVERABLE; 1314 break; 1315 } 1316 } 1317 } 1318 1319 if (t->t_mstate == LMS_USER_LOCK) 1320 (void) new_mstate(t, LMS_SYSTEM); 1321 1322 if (error == 0) { 1323 if (type & USYNC_PROCESS) 1324 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 1325 if (type & LOCK_ROBUST) { 1326 fuword16_noerr(&lp->mutex_flag, &flag); 1327 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1328 if (flag & LOCK_OWNERDEAD) 1329 error = EOWNERDEAD; 1330 else if (type & USYNC_PROCESS_ROBUST) 1331 error = ELOCKUNMAPPED; 1332 else 1333 error = EOWNERDEAD; 1334 } 1335 } 1336 } 1337 suword8_noerr(&lp->mutex_waiters, waiters); 1338 locked = 0; 1339 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1340 out: 1341 no_fault(); 1342 if (watched) 1343 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1344 if (tsp && !time_error) /* copyout the residual time left */ 1345 error = lwp_timer_copyout(&lwpt, error); 1346 if (error) 1347 return (set_errno(error)); 1348 return (0); 1349 } 1350 1351 /* 1352 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1353 * libc now calls lwp_mutex_timedlock(lp, NULL). 1354 * This system call trap continues to exist solely for the benefit 1355 * of old statically-linked binaries from Solaris 9 and before. 1356 * It should be removed from the system when we no longer care 1357 * about such applications. 1358 */ 1359 int 1360 lwp_mutex_lock(lwp_mutex_t *lp) 1361 { 1362 return (lwp_mutex_timedlock(lp, NULL)); 1363 } 1364 1365 static int 1366 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1367 { 1368 /* 1369 * The caller holds the dispatcher lock on the sleep queue. 1370 */ 1371 while (t != NULL) { 1372 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1373 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1374 return (1); 1375 t = t->t_link; 1376 } 1377 return (0); 1378 } 1379 1380 /* 1381 * Return the highest priority thread sleeping on this lwpchan. 1382 */ 1383 static kthread_t * 1384 lwp_queue_waiter(lwpchan_t *lwpchan) 1385 { 1386 sleepq_head_t *sqh; 1387 kthread_t *tp; 1388 1389 sqh = lwpsqhash(lwpchan); 1390 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1391 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1392 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1393 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1394 break; 1395 } 1396 disp_lock_exit(&sqh->sq_lock); 1397 return (tp); 1398 } 1399 1400 static int 1401 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1402 { 1403 sleepq_head_t *sqh; 1404 kthread_t *tp; 1405 kthread_t **tpp; 1406 1407 sqh = lwpsqhash(lwpchan); 1408 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1409 tpp = &sqh->sq_queue.sq_first; 1410 while ((tp = *tpp) != NULL) { 1411 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1412 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1413 /* 1414 * The following is typically false. It could be true 1415 * only if lwp_release() is called from 1416 * lwp_mutex_wakeup() after reading the waiters field 1417 * from memory in which the lwp lock used to be, but has 1418 * since been re-used to hold a lwp cv or lwp semaphore. 1419 * The thread "tp" found to match the lwp lock's wchan 1420 * is actually sleeping for the cv or semaphore which 1421 * now has the same wchan. In this case, lwp_release() 1422 * should return failure. 1423 */ 1424 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1425 ASSERT(sync_type == 0); 1426 /* 1427 * assert that this can happen only for mutexes 1428 * i.e. sync_type == 0, for correctly written 1429 * user programs. 1430 */ 1431 disp_lock_exit(&sqh->sq_lock); 1432 return (0); 1433 } 1434 *waiters = iswanted(tp->t_link, lwpchan); 1435 sleepq_unlink(tpp, tp); 1436 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1437 tp->t_wchan0 = NULL; 1438 tp->t_wchan = NULL; 1439 tp->t_sobj_ops = NULL; 1440 tp->t_release = 1; 1441 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1442 CL_WAKEUP(tp); 1443 thread_unlock(tp); /* drop run queue lock */ 1444 return (1); 1445 } 1446 tpp = &tp->t_link; 1447 } 1448 *waiters = 0; 1449 disp_lock_exit(&sqh->sq_lock); 1450 return (0); 1451 } 1452 1453 static void 1454 lwp_release_all(lwpchan_t *lwpchan) 1455 { 1456 sleepq_head_t *sqh; 1457 kthread_t *tp; 1458 kthread_t **tpp; 1459 1460 sqh = lwpsqhash(lwpchan); 1461 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1462 tpp = &sqh->sq_queue.sq_first; 1463 while ((tp = *tpp) != NULL) { 1464 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1465 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1466 sleepq_unlink(tpp, tp); 1467 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1468 tp->t_wchan0 = NULL; 1469 tp->t_wchan = NULL; 1470 tp->t_sobj_ops = NULL; 1471 CL_WAKEUP(tp); 1472 thread_unlock_high(tp); /* release run queue lock */ 1473 } else { 1474 tpp = &tp->t_link; 1475 } 1476 } 1477 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1478 } 1479 1480 /* 1481 * unblock a lwp that is trying to acquire this mutex. the blocked 1482 * lwp resumes and retries to acquire the lock. 1483 */ 1484 int 1485 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1486 { 1487 proc_t *p = ttoproc(curthread); 1488 lwpchan_t lwpchan; 1489 uchar_t waiters; 1490 volatile int locked = 0; 1491 volatile int watched = 0; 1492 volatile uint8_t type = 0; 1493 label_t ljb; 1494 int error = 0; 1495 1496 if ((caddr_t)lp >= p->p_as->a_userlimit) 1497 return (set_errno(EFAULT)); 1498 1499 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1500 1501 if (on_fault(&ljb)) { 1502 if (locked) 1503 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1504 error = EFAULT; 1505 goto out; 1506 } 1507 /* 1508 * Force Copy-on-write fault if lwp_mutex_t object is 1509 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS 1510 */ 1511 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1512 suword8_noerr(&lp->mutex_type, type); 1513 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1514 &lwpchan, LWPCHAN_MPPOOL)) { 1515 error = EFAULT; 1516 goto out; 1517 } 1518 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1519 locked = 1; 1520 /* 1521 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1522 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1523 * may fail. If it fails, do not write into the waiter bit. 1524 * The call to lwp_release() might fail due to one of three reasons: 1525 * 1526 * 1. due to the thread which set the waiter bit not actually 1527 * sleeping since it got the lock on the re-try. The waiter 1528 * bit will then be correctly updated by that thread. This 1529 * window may be closed by reading the wait bit again here 1530 * and not calling lwp_release() at all if it is zero. 1531 * 2. the thread which set the waiter bit and went to sleep 1532 * was woken up by a signal. This time, the waiter recomputes 1533 * the wait bit in the return with EINTR code. 1534 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1535 * memory that has been re-used after the lock was dropped. 1536 * In this case, writing into the waiter bit would cause data 1537 * corruption. 1538 */ 1539 if (release_all) 1540 lwp_release_all(&lwpchan); 1541 else if (lwp_release(&lwpchan, &waiters, 0)) 1542 suword8_noerr(&lp->mutex_waiters, waiters); 1543 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1544 out: 1545 no_fault(); 1546 if (watched) 1547 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1548 if (error) 1549 return (set_errno(error)); 1550 return (0); 1551 } 1552 1553 /* 1554 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1555 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1556 * a flag telling the kernel whether or not to honor the kernel/user 1557 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1558 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1559 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1560 * it is used an an in/out parameter. On entry, it contains the relative 1561 * time until timeout. On exit, we copyout the residual time left to it. 1562 */ 1563 int 1564 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1565 { 1566 kthread_t *t = curthread; 1567 klwp_t *lwp = ttolwp(t); 1568 proc_t *p = ttoproc(t); 1569 lwp_timer_t lwpt; 1570 lwpchan_t cv_lwpchan; 1571 lwpchan_t m_lwpchan; 1572 caddr_t timedwait; 1573 volatile uint16_t type = 0; 1574 volatile uint8_t mtype = 0; 1575 uchar_t waiters; 1576 volatile int error; 1577 clock_t tim = -1; 1578 volatile int locked = 0; 1579 volatile int m_locked = 0; 1580 volatile int cvwatched = 0; 1581 volatile int mpwatched = 0; 1582 label_t ljb; 1583 volatile int no_lwpchan = 1; 1584 int imm_timeout = 0; 1585 int imm_unpark = 0; 1586 1587 if ((caddr_t)cv >= p->p_as->a_userlimit || 1588 (caddr_t)mp >= p->p_as->a_userlimit) 1589 return (set_errno(EFAULT)); 1590 1591 timedwait = (caddr_t)tsp; 1592 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1593 return (set_errno(error)); 1594 if (lwpt.lwpt_imm_timeout) { 1595 imm_timeout = 1; 1596 timedwait = NULL; 1597 } 1598 1599 (void) new_mstate(t, LMS_USER_LOCK); 1600 1601 if (on_fault(&ljb)) { 1602 if (no_lwpchan) { 1603 error = EFAULT; 1604 goto out; 1605 } 1606 if (m_locked) { 1607 m_locked = 0; 1608 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1609 } 1610 if (locked) { 1611 locked = 0; 1612 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1613 } 1614 /* 1615 * set up another on_fault() for a possible fault 1616 * on the user lock accessed at "efault" 1617 */ 1618 if (on_fault(&ljb)) { 1619 if (m_locked) { 1620 m_locked = 0; 1621 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1622 } 1623 goto out; 1624 } 1625 error = EFAULT; 1626 goto efault; 1627 } 1628 1629 /* 1630 * Force Copy-on-write fault if lwp_cond_t and lwp_mutex_t 1631 * objects are defined to be MAP_PRIVATE, and are USYNC_PROCESS 1632 */ 1633 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1634 if (UPIMUTEX(mtype) == 0) { 1635 suword8_noerr(&mp->mutex_type, mtype); 1636 /* convert user level mutex, "mp", to a unique lwpchan */ 1637 /* check if mtype is ok to use below, instead of type from cv */ 1638 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1639 &m_lwpchan, LWPCHAN_MPPOOL)) { 1640 error = EFAULT; 1641 goto out; 1642 } 1643 } 1644 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1645 suword16_noerr(&cv->cond_type, type); 1646 /* convert user level condition variable, "cv", to a unique lwpchan */ 1647 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1648 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1649 error = EFAULT; 1650 goto out; 1651 } 1652 no_lwpchan = 0; 1653 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1654 if (UPIMUTEX(mtype) == 0) 1655 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1656 S_WRITE); 1657 1658 /* 1659 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1660 * with respect to a possible wakeup which is a result of either 1661 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1662 * 1663 * What's misleading, is that the lwp is put to sleep after the 1664 * condition variable's mutex is released. This is OK as long as 1665 * the release operation is also done while holding lwpchan_lock. 1666 * The lwp is then put to sleep when the possibility of pagefaulting 1667 * or sleeping is completely eliminated. 1668 */ 1669 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1670 locked = 1; 1671 if (UPIMUTEX(mtype) == 0) { 1672 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1673 m_locked = 1; 1674 suword8_noerr(&cv->cond_waiters_kernel, 1); 1675 /* 1676 * unlock the condition variable's mutex. (pagefaults are 1677 * possible here.) 1678 */ 1679 if (mtype & USYNC_PROCESS) 1680 suword32_noerr(&mp->mutex_ownerpid, 0); 1681 ulock_clear(&mp->mutex_lockw); 1682 fuword8_noerr(&mp->mutex_waiters, &waiters); 1683 if (waiters != 0) { 1684 /* 1685 * Given the locking of lwpchan_lock around the release 1686 * of the mutex and checking for waiters, the following 1687 * call to lwp_release() can fail ONLY if the lock 1688 * acquirer is interrupted after setting the waiter bit, 1689 * calling lwp_block() and releasing lwpchan_lock. 1690 * In this case, it could get pulled off the lwp sleep 1691 * q (via setrun()) before the following call to 1692 * lwp_release() occurs. In this case, the lock 1693 * requestor will update the waiter bit correctly by 1694 * re-evaluating it. 1695 */ 1696 if (lwp_release(&m_lwpchan, &waiters, 0)) 1697 suword8_noerr(&mp->mutex_waiters, waiters); 1698 } 1699 m_locked = 0; 1700 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1701 } else { 1702 suword8_noerr(&cv->cond_waiters_kernel, 1); 1703 error = lwp_upimutex_unlock(mp, mtype); 1704 if (error) { /* if the upimutex unlock failed */ 1705 locked = 0; 1706 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1707 goto out; 1708 } 1709 } 1710 no_fault(); 1711 1712 if (mpwatched) { 1713 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1714 mpwatched = 0; 1715 } 1716 if (cvwatched) { 1717 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1718 cvwatched = 0; 1719 } 1720 1721 /* 1722 * Put the lwp in an orderly state for debugging. 1723 */ 1724 prstop(PR_REQUESTED, 0); 1725 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1726 /* 1727 * We received a signal at user-level before calling here 1728 * or another thread wants us to return immediately 1729 * with EINTR. See lwp_unpark(). 1730 */ 1731 imm_unpark = 1; 1732 t->t_unpark = 0; 1733 timedwait = NULL; 1734 } else if (timedwait) { 1735 /* 1736 * If we successfully queue the timeout, 1737 * then don't drop t_delay_lock until 1738 * we are on the sleep queue (below). 1739 */ 1740 mutex_enter(&t->t_delay_lock); 1741 if (lwp_timer_enqueue(&lwpt) != 0) { 1742 mutex_exit(&t->t_delay_lock); 1743 imm_timeout = 1; 1744 timedwait = NULL; 1745 } 1746 } 1747 t->t_flag |= T_WAITCVSEM; 1748 lwp_block(&cv_lwpchan); 1749 /* 1750 * Nothing should happen to cause the lwp to go to sleep 1751 * until after it returns from swtch(). 1752 */ 1753 if (timedwait) 1754 mutex_exit(&t->t_delay_lock); 1755 locked = 0; 1756 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1757 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1758 (imm_timeout | imm_unpark)) 1759 setrun(t); 1760 swtch(); 1761 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1762 if (timedwait) 1763 tim = lwp_timer_dequeue(&lwpt); 1764 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1765 MUSTRETURN(p, t) || imm_unpark) 1766 error = EINTR; 1767 else if (imm_timeout || (timedwait && tim == -1)) 1768 error = ETIME; 1769 lwp->lwp_asleep = 0; 1770 lwp->lwp_sysabort = 0; 1771 setallwatch(); 1772 1773 if (t->t_mstate == LMS_USER_LOCK) 1774 (void) new_mstate(t, LMS_SYSTEM); 1775 1776 if (tsp && check_park) /* copyout the residual time left */ 1777 error = lwp_timer_copyout(&lwpt, error); 1778 1779 /* the mutex is reacquired by the caller on return to user level */ 1780 if (error) { 1781 /* 1782 * If we were concurrently lwp_cond_signal()d and we 1783 * received a UNIX signal or got a timeout, then perform 1784 * another lwp_cond_signal() to avoid consuming the wakeup. 1785 */ 1786 if (t->t_release) 1787 (void) lwp_cond_signal(cv); 1788 return (set_errno(error)); 1789 } 1790 return (0); 1791 1792 efault: 1793 /* 1794 * make sure that the user level lock is dropped before 1795 * returning to caller, since the caller always re-acquires it. 1796 */ 1797 if (UPIMUTEX(mtype) == 0) { 1798 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1799 m_locked = 1; 1800 if (mtype & USYNC_PROCESS) 1801 suword32_noerr(&mp->mutex_ownerpid, 0); 1802 ulock_clear(&mp->mutex_lockw); 1803 fuword8_noerr(&mp->mutex_waiters, &waiters); 1804 if (waiters != 0) { 1805 /* 1806 * See comment above on lock clearing and lwp_release() 1807 * success/failure. 1808 */ 1809 if (lwp_release(&m_lwpchan, &waiters, 0)) 1810 suword8_noerr(&mp->mutex_waiters, waiters); 1811 } 1812 m_locked = 0; 1813 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1814 } else { 1815 (void) lwp_upimutex_unlock(mp, mtype); 1816 } 1817 out: 1818 no_fault(); 1819 if (mpwatched) 1820 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1821 if (cvwatched) 1822 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1823 if (t->t_mstate == LMS_USER_LOCK) 1824 (void) new_mstate(t, LMS_SYSTEM); 1825 return (set_errno(error)); 1826 } 1827 1828 /* 1829 * wakeup one lwp that's blocked on this condition variable. 1830 */ 1831 int 1832 lwp_cond_signal(lwp_cond_t *cv) 1833 { 1834 proc_t *p = ttoproc(curthread); 1835 lwpchan_t lwpchan; 1836 uchar_t waiters; 1837 volatile uint16_t type = 0; 1838 volatile int locked = 0; 1839 volatile int watched = 0; 1840 label_t ljb; 1841 int error = 0; 1842 1843 if ((caddr_t)cv >= p->p_as->a_userlimit) 1844 return (set_errno(EFAULT)); 1845 1846 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1847 1848 if (on_fault(&ljb)) { 1849 if (locked) 1850 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1851 error = EFAULT; 1852 goto out; 1853 } 1854 /* 1855 * Force Copy-on-write fault if lwp_cond_t object is 1856 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1857 */ 1858 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1859 suword16_noerr(&cv->cond_type, type); 1860 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1861 &lwpchan, LWPCHAN_CVPOOL)) { 1862 error = EFAULT; 1863 goto out; 1864 } 1865 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1866 locked = 1; 1867 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1868 if (waiters != 0) { 1869 /* 1870 * The following call to lwp_release() might fail but it is 1871 * OK to write into the waiters bit below, since the memory 1872 * could not have been re-used or unmapped (for correctly 1873 * written user programs) as in the case of lwp_mutex_wakeup(). 1874 * For an incorrect program, we should not care about data 1875 * corruption since this is just one instance of other places 1876 * where corruption can occur for such a program. Of course 1877 * if the memory is unmapped, normal fault recovery occurs. 1878 */ 1879 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1880 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1881 } 1882 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1883 out: 1884 no_fault(); 1885 if (watched) 1886 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1887 if (error) 1888 return (set_errno(error)); 1889 return (0); 1890 } 1891 1892 /* 1893 * wakeup every lwp that's blocked on this condition variable. 1894 */ 1895 int 1896 lwp_cond_broadcast(lwp_cond_t *cv) 1897 { 1898 proc_t *p = ttoproc(curthread); 1899 lwpchan_t lwpchan; 1900 volatile uint16_t type = 0; 1901 volatile int locked = 0; 1902 volatile int watched = 0; 1903 label_t ljb; 1904 uchar_t waiters; 1905 int error = 0; 1906 1907 if ((caddr_t)cv >= p->p_as->a_userlimit) 1908 return (set_errno(EFAULT)); 1909 1910 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1911 1912 if (on_fault(&ljb)) { 1913 if (locked) 1914 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1915 error = EFAULT; 1916 goto out; 1917 } 1918 /* 1919 * Force Copy-on-write fault if lwp_cond_t object is 1920 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1921 */ 1922 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1923 suword16_noerr(&cv->cond_type, type); 1924 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1925 &lwpchan, LWPCHAN_CVPOOL)) { 1926 error = EFAULT; 1927 goto out; 1928 } 1929 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1930 locked = 1; 1931 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1932 if (waiters != 0) { 1933 lwp_release_all(&lwpchan); 1934 suword8_noerr(&cv->cond_waiters_kernel, 0); 1935 } 1936 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1937 out: 1938 no_fault(); 1939 if (watched) 1940 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1941 if (error) 1942 return (set_errno(error)); 1943 return (0); 1944 } 1945 1946 int 1947 lwp_sema_trywait(lwp_sema_t *sp) 1948 { 1949 kthread_t *t = curthread; 1950 proc_t *p = ttoproc(t); 1951 label_t ljb; 1952 volatile int locked = 0; 1953 volatile int watched = 0; 1954 volatile uint16_t type = 0; 1955 int count; 1956 lwpchan_t lwpchan; 1957 uchar_t waiters; 1958 int error = 0; 1959 1960 if ((caddr_t)sp >= p->p_as->a_userlimit) 1961 return (set_errno(EFAULT)); 1962 1963 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1964 1965 if (on_fault(&ljb)) { 1966 if (locked) 1967 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1968 error = EFAULT; 1969 goto out; 1970 } 1971 /* 1972 * Force Copy-on-write fault if lwp_sema_t object is 1973 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 1974 */ 1975 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1976 suword16_noerr((void *)&sp->sema_type, type); 1977 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1978 &lwpchan, LWPCHAN_CVPOOL)) { 1979 error = EFAULT; 1980 goto out; 1981 } 1982 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1983 locked = 1; 1984 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1985 if (count == 0) 1986 error = EBUSY; 1987 else 1988 suword32_noerr((void *)&sp->sema_count, --count); 1989 if (count != 0) { 1990 fuword8_noerr(&sp->sema_waiters, &waiters); 1991 if (waiters != 0) { 1992 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1993 suword8_noerr(&sp->sema_waiters, waiters); 1994 } 1995 } 1996 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1997 out: 1998 no_fault(); 1999 if (watched) 2000 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2001 if (error) 2002 return (set_errno(error)); 2003 return (0); 2004 } 2005 2006 /* 2007 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 2008 */ 2009 int 2010 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2011 { 2012 kthread_t *t = curthread; 2013 klwp_t *lwp = ttolwp(t); 2014 proc_t *p = ttoproc(t); 2015 lwp_timer_t lwpt; 2016 caddr_t timedwait; 2017 clock_t tim = -1; 2018 label_t ljb; 2019 volatile int locked = 0; 2020 volatile int watched = 0; 2021 volatile uint16_t type = 0; 2022 int count; 2023 lwpchan_t lwpchan; 2024 uchar_t waiters; 2025 int error = 0; 2026 int time_error; 2027 int imm_timeout = 0; 2028 int imm_unpark = 0; 2029 2030 if ((caddr_t)sp >= p->p_as->a_userlimit) 2031 return (set_errno(EFAULT)); 2032 2033 timedwait = (caddr_t)tsp; 2034 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2035 lwpt.lwpt_imm_timeout) { 2036 imm_timeout = 1; 2037 timedwait = NULL; 2038 } 2039 2040 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2041 2042 if (on_fault(&ljb)) { 2043 if (locked) 2044 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2045 error = EFAULT; 2046 goto out; 2047 } 2048 /* 2049 * Force Copy-on-write fault if lwp_sema_t object is 2050 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 2051 */ 2052 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2053 suword16_noerr((void *)&sp->sema_type, type); 2054 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2055 &lwpchan, LWPCHAN_CVPOOL)) { 2056 error = EFAULT; 2057 goto out; 2058 } 2059 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2060 locked = 1; 2061 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2062 while (error == 0 && count == 0) { 2063 if (time_error) { 2064 /* 2065 * The SUSV3 Posix spec is very clear that we 2066 * should get no error from validating the 2067 * timer until we would actually sleep. 2068 */ 2069 error = time_error; 2070 break; 2071 } 2072 suword8_noerr(&sp->sema_waiters, 1); 2073 if (watched) 2074 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2075 /* 2076 * Put the lwp in an orderly state for debugging. 2077 */ 2078 prstop(PR_REQUESTED, 0); 2079 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2080 /* 2081 * We received a signal at user-level before calling 2082 * here or another thread wants us to return 2083 * immediately with EINTR. See lwp_unpark(). 2084 */ 2085 imm_unpark = 1; 2086 t->t_unpark = 0; 2087 timedwait = NULL; 2088 } else if (timedwait) { 2089 /* 2090 * If we successfully queue the timeout, 2091 * then don't drop t_delay_lock until 2092 * we are on the sleep queue (below). 2093 */ 2094 mutex_enter(&t->t_delay_lock); 2095 if (lwp_timer_enqueue(&lwpt) != 0) { 2096 mutex_exit(&t->t_delay_lock); 2097 imm_timeout = 1; 2098 timedwait = NULL; 2099 } 2100 } 2101 t->t_flag |= T_WAITCVSEM; 2102 lwp_block(&lwpchan); 2103 /* 2104 * Nothing should happen to cause the lwp to sleep 2105 * again until after it returns from swtch(). 2106 */ 2107 if (timedwait) 2108 mutex_exit(&t->t_delay_lock); 2109 locked = 0; 2110 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2111 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2112 (imm_timeout | imm_unpark)) 2113 setrun(t); 2114 swtch(); 2115 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2116 if (timedwait) 2117 tim = lwp_timer_dequeue(&lwpt); 2118 setallwatch(); 2119 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2120 MUSTRETURN(p, t) || imm_unpark) 2121 error = EINTR; 2122 else if (imm_timeout || (timedwait && tim == -1)) 2123 error = ETIME; 2124 lwp->lwp_asleep = 0; 2125 lwp->lwp_sysabort = 0; 2126 watched = watch_disable_addr((caddr_t)sp, 2127 sizeof (*sp), S_WRITE); 2128 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2129 locked = 1; 2130 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2131 } 2132 if (error == 0) 2133 suword32_noerr((void *)&sp->sema_count, --count); 2134 if (count != 0) { 2135 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2136 suword8_noerr(&sp->sema_waiters, waiters); 2137 } 2138 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2139 out: 2140 no_fault(); 2141 if (watched) 2142 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2143 if (tsp && check_park && !time_error) 2144 error = lwp_timer_copyout(&lwpt, error); 2145 if (error) 2146 return (set_errno(error)); 2147 return (0); 2148 } 2149 2150 /* 2151 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2152 * libc now calls lwp_sema_timedwait(). 2153 * This system call trap exists solely for the benefit of old 2154 * statically linked applications from Solaris 9 and before. 2155 * It should be removed when we no longer care about such applications. 2156 */ 2157 int 2158 lwp_sema_wait(lwp_sema_t *sp) 2159 { 2160 return (lwp_sema_timedwait(sp, NULL, 0)); 2161 } 2162 2163 int 2164 lwp_sema_post(lwp_sema_t *sp) 2165 { 2166 proc_t *p = ttoproc(curthread); 2167 label_t ljb; 2168 volatile int locked = 0; 2169 volatile int watched = 0; 2170 volatile uint16_t type = 0; 2171 int count; 2172 lwpchan_t lwpchan; 2173 uchar_t waiters; 2174 int error = 0; 2175 2176 if ((caddr_t)sp >= p->p_as->a_userlimit) 2177 return (set_errno(EFAULT)); 2178 2179 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2180 2181 if (on_fault(&ljb)) { 2182 if (locked) 2183 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2184 error = EFAULT; 2185 goto out; 2186 } 2187 /* 2188 * Force Copy-on-write fault if lwp_sema_t object is 2189 * defined to be MAP_PRIVATE, and is USYNC_PROCESS. 2190 */ 2191 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2192 suword16_noerr(&sp->sema_type, type); 2193 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2194 &lwpchan, LWPCHAN_CVPOOL)) { 2195 error = EFAULT; 2196 goto out; 2197 } 2198 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2199 locked = 1; 2200 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2201 if (count == _SEM_VALUE_MAX) 2202 error = EOVERFLOW; 2203 else 2204 suword32_noerr(&sp->sema_count, ++count); 2205 if (count == 1) { 2206 fuword8_noerr(&sp->sema_waiters, &waiters); 2207 if (waiters) { 2208 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2209 suword8_noerr(&sp->sema_waiters, waiters); 2210 } 2211 } 2212 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2213 out: 2214 no_fault(); 2215 if (watched) 2216 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2217 if (error) 2218 return (set_errno(error)); 2219 return (0); 2220 } 2221 2222 #define TRW_WANT_WRITE 0x1 2223 #define TRW_LOCK_GRANTED 0x2 2224 2225 #define READ_LOCK 0 2226 #define WRITE_LOCK 1 2227 #define TRY_FLAG 0x10 2228 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2229 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2230 2231 /* 2232 * Release one writer or one or more readers. Compute the rwstate word to 2233 * reflect the new state of the queue. For a safe hand-off we copy the new 2234 * rwstate value back to userland before we wake any of the new lock holders. 2235 * 2236 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2237 * being given precedence over readers of the same priority). 2238 * 2239 * If the first thread is a reader we scan the queue releasing all readers 2240 * until we hit a writer or the end of the queue. If the first thread is a 2241 * writer we still need to check for another writer. 2242 */ 2243 void 2244 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2245 { 2246 sleepq_head_t *sqh; 2247 kthread_t *tp; 2248 kthread_t **tpp; 2249 kthread_t *tpnext; 2250 kthread_t *wakelist = NULL; 2251 uint32_t rwstate = 0; 2252 int wcount = 0; 2253 int rcount = 0; 2254 2255 sqh = lwpsqhash(lwpchan); 2256 disp_lock_enter(&sqh->sq_lock); 2257 tpp = &sqh->sq_queue.sq_first; 2258 while ((tp = *tpp) != NULL) { 2259 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2260 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2261 if (tp->t_writer & TRW_WANT_WRITE) { 2262 if ((wcount++ == 0) && (rcount == 0)) { 2263 rwstate |= URW_WRITE_LOCKED; 2264 2265 /* Just one writer to wake. */ 2266 sleepq_unlink(tpp, tp); 2267 wakelist = tp; 2268 2269 /* tpp already set for next thread. */ 2270 continue; 2271 } else { 2272 rwstate |= URW_HAS_WAITERS; 2273 /* We need look no further. */ 2274 break; 2275 } 2276 } else { 2277 rcount++; 2278 if (wcount == 0) { 2279 rwstate++; 2280 2281 /* Add reader to wake list. */ 2282 sleepq_unlink(tpp, tp); 2283 tp->t_link = wakelist; 2284 wakelist = tp; 2285 2286 /* tpp already set for next thread. */ 2287 continue; 2288 } else { 2289 rwstate |= URW_HAS_WAITERS; 2290 /* We need look no further. */ 2291 break; 2292 } 2293 } 2294 } 2295 tpp = &tp->t_link; 2296 } 2297 2298 /* Copy the new rwstate back to userland. */ 2299 suword32_noerr(&rw->rwlock_readers, rwstate); 2300 2301 /* Wake the new lock holder(s) up. */ 2302 tp = wakelist; 2303 while (tp != NULL) { 2304 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2305 tp->t_wchan0 = NULL; 2306 tp->t_wchan = NULL; 2307 tp->t_sobj_ops = NULL; 2308 tp->t_writer |= TRW_LOCK_GRANTED; 2309 tpnext = tp->t_link; 2310 tp->t_link = NULL; 2311 CL_WAKEUP(tp); 2312 thread_unlock_high(tp); 2313 tp = tpnext; 2314 } 2315 2316 disp_lock_exit(&sqh->sq_lock); 2317 } 2318 2319 /* 2320 * We enter here holding the user-level mutex, which we must release before 2321 * returning or blocking. Based on lwp_cond_wait(). 2322 */ 2323 static int 2324 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2325 { 2326 lwp_mutex_t *mp = NULL; 2327 kthread_t *t = curthread; 2328 kthread_t *tp; 2329 klwp_t *lwp = ttolwp(t); 2330 proc_t *p = ttoproc(t); 2331 lwp_timer_t lwpt; 2332 lwpchan_t lwpchan; 2333 lwpchan_t mlwpchan; 2334 caddr_t timedwait; 2335 volatile uint16_t type = 0; 2336 volatile uint8_t mtype = 0; 2337 uchar_t mwaiters; 2338 volatile int error = 0; 2339 int time_error; 2340 clock_t tim = -1; 2341 volatile int locked = 0; 2342 volatile int mlocked = 0; 2343 volatile int watched = 0; 2344 volatile int mwatched = 0; 2345 label_t ljb; 2346 volatile int no_lwpchan = 1; 2347 int imm_timeout = 0; 2348 int try_flag; 2349 uint32_t rwstate; 2350 int acquired = 0; 2351 2352 /* We only check rw because the mutex is included in it. */ 2353 if ((caddr_t)rw >= p->p_as->a_userlimit) 2354 return (set_errno(EFAULT)); 2355 2356 /* We must only report this error if we are about to sleep (later). */ 2357 timedwait = (caddr_t)tsp; 2358 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2359 lwpt.lwpt_imm_timeout) { 2360 imm_timeout = 1; 2361 timedwait = NULL; 2362 } 2363 2364 (void) new_mstate(t, LMS_USER_LOCK); 2365 2366 if (on_fault(&ljb)) { 2367 if (no_lwpchan) { 2368 error = EFAULT; 2369 goto out_nodrop; 2370 } 2371 if (mlocked) { 2372 mlocked = 0; 2373 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2374 } 2375 if (locked) { 2376 locked = 0; 2377 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2378 } 2379 /* 2380 * Set up another on_fault() for a possible fault 2381 * on the user lock accessed at "out_drop". 2382 */ 2383 if (on_fault(&ljb)) { 2384 if (mlocked) { 2385 mlocked = 0; 2386 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2387 } 2388 error = EFAULT; 2389 goto out_nodrop; 2390 } 2391 error = EFAULT; 2392 goto out_nodrop; 2393 } 2394 2395 /* Process rd_wr (including sanity check). */ 2396 try_flag = (rd_wr & TRY_FLAG); 2397 rd_wr &= ~TRY_FLAG; 2398 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2399 error = EINVAL; 2400 goto out_nodrop; 2401 } 2402 2403 /* We can only continue for simple USYNC_PROCESS locks. */ 2404 mp = &rw->mutex; 2405 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2406 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2407 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2408 error = EINVAL; 2409 goto out_nodrop; 2410 } 2411 2412 /* Force Copy-on-write fault incase objects are MAP_PRIVATE. */ 2413 suword8_noerr(&mp->mutex_type, mtype); 2414 suword16_noerr(&rw->rwlock_type, type); 2415 2416 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2417 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2418 &mlwpchan, LWPCHAN_MPPOOL)) { 2419 error = EFAULT; 2420 goto out_nodrop; 2421 } 2422 2423 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2424 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2425 &lwpchan, LWPCHAN_CVPOOL)) { 2426 error = EFAULT; 2427 goto out_nodrop; 2428 } 2429 2430 no_lwpchan = 0; 2431 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2432 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2433 2434 /* 2435 * lwpchan_lock() ensures that the calling LWP is put to sleep 2436 * atomically with respect to a possible wakeup which is a result 2437 * of lwp_rwlock_unlock(). 2438 * 2439 * What's misleading is that the LWP is put to sleep after the 2440 * rwlock's mutex is released. This is OK as long as the release 2441 * operation is also done while holding mlwpchan. The LWP is then 2442 * put to sleep when the possibility of pagefaulting or sleeping 2443 * has been completely eliminated. 2444 */ 2445 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2446 locked = 1; 2447 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2448 mlocked = 1; 2449 2450 /* 2451 * Fetch the current rwlock state. 2452 * 2453 * The possibility of spurious wake-ups or killed waiters means 2454 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2455 * We only fix these if they are important to us. 2456 * 2457 * Although various error states can be observed here (e.g. the lock 2458 * is not held, but there are waiters) we assume these are applicaton 2459 * errors and so we take no corrective action. 2460 */ 2461 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2462 /* 2463 * We cannot legitimately get here from user-level 2464 * without URW_HAS_WAITERS being set. 2465 * Set it now to guard against user-level error. 2466 */ 2467 rwstate |= URW_HAS_WAITERS; 2468 2469 /* 2470 * We can try only if the lock isn't held by a writer. 2471 */ 2472 if (!(rwstate & URW_WRITE_LOCKED)) { 2473 tp = lwp_queue_waiter(&lwpchan); 2474 if (tp == NULL) { 2475 /* 2476 * Hmmm, rwstate indicates waiters but there are 2477 * none queued. This could just be the result of a 2478 * spurious wakeup, so let's ignore it. 2479 * 2480 * We now have a chance to acquire the lock 2481 * uncontended, but this is the last chance for 2482 * a writer to acquire the lock without blocking. 2483 */ 2484 if (rd_wr == READ_LOCK) { 2485 rwstate++; 2486 acquired = 1; 2487 } else if ((rwstate & URW_READERS_MASK) == 0) { 2488 rwstate |= URW_WRITE_LOCKED; 2489 acquired = 1; 2490 } 2491 } else if (rd_wr == READ_LOCK) { 2492 /* 2493 * This is the last chance for a reader to acquire 2494 * the lock now, but it can only do so if there is 2495 * no writer of equal or greater priority at the 2496 * head of the queue . 2497 * 2498 * It is also just possible that there is a reader 2499 * at the head of the queue. This may be the result 2500 * of a spurious wakeup or an application failure. 2501 * In this case we only acquire the lock if we have 2502 * equal or greater priority. It is not our job to 2503 * release spurious waiters. 2504 */ 2505 pri_t our_pri = DISP_PRIO(t); 2506 pri_t his_pri = DISP_PRIO(tp); 2507 2508 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2509 !(tp->t_writer & TRW_WANT_WRITE))) { 2510 rwstate++; 2511 acquired = 1; 2512 } 2513 } 2514 } 2515 2516 if (acquired || try_flag || time_error) { 2517 /* 2518 * We're not going to block this time. 2519 */ 2520 suword32_noerr(&rw->rwlock_readers, rwstate); 2521 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2522 locked = 0; 2523 2524 if (acquired) { 2525 /* 2526 * Got the lock! 2527 */ 2528 error = 0; 2529 2530 } else if (try_flag) { 2531 /* 2532 * We didn't get the lock and we're about to block. 2533 * If we're doing a trylock, return EBUSY instead. 2534 */ 2535 error = EBUSY; 2536 2537 } else if (time_error) { 2538 /* 2539 * The SUSV3 POSIX spec is very clear that we should 2540 * get no error from validating the timer (above) 2541 * until we would actually sleep. 2542 */ 2543 error = time_error; 2544 } 2545 2546 goto out_drop; 2547 } 2548 2549 /* 2550 * We're about to block, so indicate what kind of waiter we are. 2551 */ 2552 t->t_writer = 0; 2553 if (rd_wr == WRITE_LOCK) 2554 t->t_writer = TRW_WANT_WRITE; 2555 suword32_noerr(&rw->rwlock_readers, rwstate); 2556 2557 /* 2558 * Unlock the rwlock's mutex (pagefaults are possible here). 2559 */ 2560 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2561 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2562 suword32_noerr(&mp->mutex_ownerpid, 0); 2563 ulock_clear(&mp->mutex_lockw); 2564 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2565 if (mwaiters != 0) { 2566 /* 2567 * Given the locking of mlwpchan around the release of 2568 * the mutex and checking for waiters, the following 2569 * call to lwp_release() can fail ONLY if the lock 2570 * acquirer is interrupted after setting the waiter bit, 2571 * calling lwp_block() and releasing mlwpchan. 2572 * In this case, it could get pulled off the LWP sleep 2573 * queue (via setrun()) before the following call to 2574 * lwp_release() occurs, and the lock requestor will 2575 * update the waiter bit correctly by re-evaluating it. 2576 */ 2577 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2578 suword8_noerr(&mp->mutex_waiters, mwaiters); 2579 } 2580 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2581 mlocked = 0; 2582 no_fault(); 2583 2584 if (mwatched) { 2585 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2586 mwatched = 0; 2587 } 2588 if (watched) { 2589 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2590 watched = 0; 2591 } 2592 2593 /* 2594 * Put the LWP in an orderly state for debugging. 2595 */ 2596 prstop(PR_REQUESTED, 0); 2597 if (timedwait) { 2598 /* 2599 * If we successfully queue the timeout, 2600 * then don't drop t_delay_lock until 2601 * we are on the sleep queue (below). 2602 */ 2603 mutex_enter(&t->t_delay_lock); 2604 if (lwp_timer_enqueue(&lwpt) != 0) { 2605 mutex_exit(&t->t_delay_lock); 2606 imm_timeout = 1; 2607 timedwait = NULL; 2608 } 2609 } 2610 t->t_flag |= T_WAITCVSEM; 2611 lwp_block(&lwpchan); 2612 2613 /* 2614 * Nothing should happen to cause the LWp to go to sleep until after 2615 * it returns from swtch(). 2616 */ 2617 if (timedwait) 2618 mutex_exit(&t->t_delay_lock); 2619 locked = 0; 2620 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2621 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t)) 2622 setrun(t); 2623 swtch(); 2624 2625 /* 2626 * We're back, but we need to work out why. Were we interrupted? Did 2627 * we timeout? Were we granted the lock? 2628 */ 2629 error = EAGAIN; 2630 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2631 t->t_writer = 0; 2632 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2633 if (timedwait) 2634 tim = lwp_timer_dequeue(&lwpt); 2635 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2636 error = EINTR; 2637 else if (imm_timeout || (timedwait && tim == -1)) 2638 error = ETIME; 2639 lwp->lwp_asleep = 0; 2640 lwp->lwp_sysabort = 0; 2641 setallwatch(); 2642 2643 /* 2644 * If we were granted the lock we don't care about EINTR or ETIME. 2645 */ 2646 if (acquired) 2647 error = 0; 2648 2649 if (t->t_mstate == LMS_USER_LOCK) 2650 (void) new_mstate(t, LMS_SYSTEM); 2651 2652 if (error) 2653 return (set_errno(error)); 2654 return (0); 2655 2656 out_drop: 2657 /* 2658 * Make sure that the user level lock is dropped before returning 2659 * to the caller. 2660 */ 2661 if (!mlocked) { 2662 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2663 mlocked = 1; 2664 } 2665 suword32_noerr((uint32_t *)&mp->mutex_owner, 0); 2666 suword32_noerr((uint32_t *)&mp->mutex_owner + 1, 0); 2667 suword32_noerr(&mp->mutex_ownerpid, 0); 2668 ulock_clear(&mp->mutex_lockw); 2669 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2670 if (mwaiters != 0) { 2671 /* 2672 * See comment above on lock clearing and lwp_release() 2673 * success/failure. 2674 */ 2675 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2676 suword8_noerr(&mp->mutex_waiters, mwaiters); 2677 } 2678 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2679 mlocked = 0; 2680 2681 out_nodrop: 2682 no_fault(); 2683 if (mwatched) 2684 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2685 if (watched) 2686 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2687 if (t->t_mstate == LMS_USER_LOCK) 2688 (void) new_mstate(t, LMS_SYSTEM); 2689 if (error) 2690 return (set_errno(error)); 2691 return (0); 2692 } 2693 2694 /* 2695 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2696 * we never drop the lock. 2697 */ 2698 static int 2699 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2700 { 2701 kthread_t *t = curthread; 2702 proc_t *p = ttoproc(t); 2703 lwpchan_t lwpchan; 2704 volatile uint16_t type = 0; 2705 volatile int error = 0; 2706 volatile int locked = 0; 2707 volatile int watched = 0; 2708 label_t ljb; 2709 volatile int no_lwpchan = 1; 2710 uint32_t rwstate; 2711 2712 /* We only check rw because the mutex is included in it. */ 2713 if ((caddr_t)rw >= p->p_as->a_userlimit) 2714 return (set_errno(EFAULT)); 2715 2716 if (on_fault(&ljb)) { 2717 if (no_lwpchan) { 2718 error = EFAULT; 2719 goto out_nodrop; 2720 } 2721 if (locked) { 2722 locked = 0; 2723 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2724 } 2725 error = EFAULT; 2726 goto out_nodrop; 2727 } 2728 2729 /* We can only continue for simple USYNC_PROCESS locks. */ 2730 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2731 if (type != USYNC_PROCESS) { 2732 error = EINVAL; 2733 goto out_nodrop; 2734 } 2735 2736 /* Force Copy-on-write fault incase objects are MAP_PRIVATE. */ 2737 suword16_noerr(&rw->rwlock_type, type); 2738 2739 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2740 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2741 &lwpchan, LWPCHAN_CVPOOL)) { 2742 error = EFAULT; 2743 goto out_nodrop; 2744 } 2745 2746 no_lwpchan = 0; 2747 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2748 2749 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2750 locked = 1; 2751 2752 /* 2753 * We can resolve multiple readers (except the last reader) here. 2754 * For the last reader or a writer we need lwp_rwlock_release(), 2755 * to which we also delegate the task of copying the new rwstate 2756 * back to userland (see the comment there). 2757 */ 2758 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2759 if (rwstate & URW_WRITE_LOCKED) 2760 lwp_rwlock_release(&lwpchan, rw); 2761 else if ((rwstate & URW_READERS_MASK) > 0) { 2762 rwstate--; 2763 if ((rwstate & URW_READERS_MASK) == 0) 2764 lwp_rwlock_release(&lwpchan, rw); 2765 else 2766 suword32_noerr(&rw->rwlock_readers, rwstate); 2767 } 2768 2769 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2770 locked = 0; 2771 error = 0; 2772 2773 out_nodrop: 2774 no_fault(); 2775 if (watched) 2776 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2777 if (error) 2778 return (set_errno(error)); 2779 return (0); 2780 } 2781 2782 int 2783 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2784 { 2785 switch (subcode) { 2786 case 0: 2787 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2788 case 1: 2789 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2790 case 2: 2791 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2792 case 3: 2793 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2794 case 4: 2795 return (lwp_rwlock_unlock(rwlp)); 2796 } 2797 return (set_errno(EINVAL)); 2798 } 2799 2800 /* 2801 * Return the owner of the user-level s-object. 2802 * Since we can't really do this, return NULL. 2803 */ 2804 /* ARGSUSED */ 2805 static kthread_t * 2806 lwpsobj_owner(caddr_t sobj) 2807 { 2808 return ((kthread_t *)NULL); 2809 } 2810 2811 /* 2812 * Wake up a thread asleep on a user-level synchronization 2813 * object. 2814 */ 2815 static void 2816 lwp_unsleep(kthread_t *t) 2817 { 2818 ASSERT(THREAD_LOCK_HELD(t)); 2819 if (t->t_wchan0 != NULL) { 2820 sleepq_head_t *sqh; 2821 sleepq_t *sqp = t->t_sleepq; 2822 2823 if (sqp != NULL) { 2824 sqh = lwpsqhash(&t->t_lwpchan); 2825 ASSERT(&sqh->sq_queue == sqp); 2826 sleepq_unsleep(t); 2827 disp_lock_exit_high(&sqh->sq_lock); 2828 CL_SETRUN(t); 2829 return; 2830 } 2831 } 2832 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2833 } 2834 2835 /* 2836 * Change the priority of a thread asleep on a user-level 2837 * synchronization object. To maintain proper priority order, 2838 * we: 2839 * o dequeue the thread. 2840 * o change its priority. 2841 * o re-enqueue the thread. 2842 * Assumption: the thread is locked on entry. 2843 */ 2844 static void 2845 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2846 { 2847 ASSERT(THREAD_LOCK_HELD(t)); 2848 if (t->t_wchan0 != NULL) { 2849 sleepq_t *sqp = t->t_sleepq; 2850 2851 sleepq_dequeue(t); 2852 *t_prip = pri; 2853 sleepq_insert(sqp, t); 2854 } else 2855 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2856 } 2857 2858 /* 2859 * Clean up a locked robust mutex 2860 */ 2861 static void 2862 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2863 { 2864 uint16_t flag; 2865 uchar_t waiters; 2866 label_t ljb; 2867 pid_t owner_pid; 2868 lwp_mutex_t *lp; 2869 volatile int locked = 0; 2870 volatile int watched = 0; 2871 volatile struct upimutex *upimutex = NULL; 2872 volatile int upilocked = 0; 2873 2874 ASSERT(ent->lwpchan_type & LOCK_ROBUST); 2875 2876 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2877 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2878 if (on_fault(&ljb)) { 2879 if (locked) 2880 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2881 if (upilocked) 2882 upimutex_unlock((upimutex_t *)upimutex, 0); 2883 goto out; 2884 } 2885 if (ent->lwpchan_type & USYNC_PROCESS) { 2886 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2887 if ((UPIMUTEX(ent->lwpchan_type) || owner_pid != 0) && 2888 owner_pid != curproc->p_pid) 2889 goto out; 2890 } 2891 if (UPIMUTEX(ent->lwpchan_type)) { 2892 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2893 upib_t *upibp = &UPI_CHAIN(lwpchan); 2894 2895 mutex_enter(&upibp->upib_lock); 2896 upimutex = upi_get(upibp, &lwpchan); 2897 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2898 mutex_exit(&upibp->upib_lock); 2899 goto out; 2900 } 2901 mutex_exit(&upibp->upib_lock); 2902 upilocked = 1; 2903 flag = lwp_clear_mutex(lp, lockflg); 2904 suword8_noerr(&lp->mutex_lockw, 0); 2905 upimutex_unlock((upimutex_t *)upimutex, flag); 2906 } else { 2907 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2908 locked = 1; 2909 if ((ent->lwpchan_type & USYNC_PROCESS) && owner_pid == 0) { 2910 /* 2911 * There is no owner. If there are waiters, 2912 * we should wake up one or all of them. 2913 * It doesn't hurt to wake them up in error 2914 * since they will just retry the lock and 2915 * go to sleep again if necessary. 2916 */ 2917 fuword8_noerr(&lp->mutex_waiters, &waiters); 2918 if (waiters != 0) { /* there are waiters */ 2919 fuword16_noerr(&lp->mutex_flag, &flag); 2920 if (flag & LOCK_NOTRECOVERABLE) { 2921 lwp_release_all(&ent->lwpchan_lwpchan); 2922 suword8_noerr(&lp->mutex_waiters, 0); 2923 } else if (lwp_release(&ent->lwpchan_lwpchan, 2924 &waiters, 0)) { 2925 suword8_noerr(&lp->mutex_waiters, 2926 waiters); 2927 } 2928 } 2929 } else { 2930 (void) lwp_clear_mutex(lp, lockflg); 2931 ulock_clear(&lp->mutex_lockw); 2932 fuword8_noerr(&lp->mutex_waiters, &waiters); 2933 if (waiters && 2934 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2935 suword8_noerr(&lp->mutex_waiters, waiters); 2936 } 2937 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2938 } 2939 out: 2940 no_fault(); 2941 if (watched) 2942 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2943 } 2944 2945 /* 2946 * Register a process-shared robust mutex in the lwpchan cache. 2947 */ 2948 int 2949 lwp_mutex_register(lwp_mutex_t *lp) 2950 { 2951 int error = 0; 2952 volatile int watched; 2953 label_t ljb; 2954 uint8_t type; 2955 lwpchan_t lwpchan; 2956 2957 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2958 return (set_errno(EFAULT)); 2959 2960 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2961 2962 if (on_fault(&ljb)) { 2963 error = EFAULT; 2964 } else { 2965 fuword8_noerr(&lp->mutex_type, &type); 2966 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2967 != (USYNC_PROCESS|LOCK_ROBUST)) { 2968 error = EINVAL; 2969 } else { 2970 /* 2971 * Force Copy-on-write fault if lwp_mutex_t object is 2972 * defined to be MAP_PRIVATE and it was initialized to 2973 * USYNC_PROCESS. 2974 */ 2975 suword8_noerr(&lp->mutex_type, type); 2976 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 2977 &lwpchan, LWPCHAN_MPPOOL)) 2978 error = EFAULT; 2979 } 2980 } 2981 no_fault(); 2982 if (watched) 2983 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2984 if (error) 2985 return (set_errno(error)); 2986 return (0); 2987 } 2988 2989 int 2990 lwp_mutex_trylock(lwp_mutex_t *lp) 2991 { 2992 kthread_t *t = curthread; 2993 proc_t *p = ttoproc(t); 2994 int error = 0; 2995 volatile int locked = 0; 2996 volatile int watched = 0; 2997 label_t ljb; 2998 volatile uint8_t type = 0; 2999 uint16_t flag; 3000 lwpchan_t lwpchan; 3001 3002 if ((caddr_t)lp >= p->p_as->a_userlimit) 3003 return (set_errno(EFAULT)); 3004 3005 (void) new_mstate(t, LMS_USER_LOCK); 3006 3007 if (on_fault(&ljb)) { 3008 if (locked) 3009 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3010 error = EFAULT; 3011 goto out; 3012 } 3013 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3014 if (UPIMUTEX(type)) { 3015 no_fault(); 3016 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3017 if ((type & USYNC_PROCESS) && 3018 (error == 0 || 3019 error == EOWNERDEAD || error == ELOCKUNMAPPED)) 3020 (void) suword32(&lp->mutex_ownerpid, p->p_pid); 3021 if (error) 3022 return (set_errno(error)); 3023 return (0); 3024 } 3025 /* 3026 * Force Copy-on-write fault if lwp_mutex_t object is 3027 * defined to be MAP_PRIVATE and it was initialized to 3028 * USYNC_PROCESS. 3029 */ 3030 suword8_noerr(&lp->mutex_type, type); 3031 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3032 &lwpchan, LWPCHAN_MPPOOL)) { 3033 error = EFAULT; 3034 goto out; 3035 } 3036 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3037 locked = 1; 3038 if (type & LOCK_ROBUST) { 3039 fuword16_noerr(&lp->mutex_flag, &flag); 3040 if (flag & LOCK_NOTRECOVERABLE) { 3041 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3042 error = ENOTRECOVERABLE; 3043 goto out; 3044 } 3045 } 3046 3047 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3048 3049 if (!ulock_try(&lp->mutex_lockw)) 3050 error = EBUSY; 3051 else { 3052 if (type & USYNC_PROCESS) 3053 suword32_noerr(&lp->mutex_ownerpid, p->p_pid); 3054 if (type & LOCK_ROBUST) { 3055 fuword16_noerr(&lp->mutex_flag, &flag); 3056 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3057 if (flag & LOCK_OWNERDEAD) 3058 error = EOWNERDEAD; 3059 else if (type & USYNC_PROCESS_ROBUST) 3060 error = ELOCKUNMAPPED; 3061 else 3062 error = EOWNERDEAD; 3063 } 3064 } 3065 } 3066 locked = 0; 3067 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3068 out: 3069 3070 if (t->t_mstate == LMS_USER_LOCK) 3071 (void) new_mstate(t, LMS_SYSTEM); 3072 3073 no_fault(); 3074 if (watched) 3075 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3076 if (error) 3077 return (set_errno(error)); 3078 return (0); 3079 } 3080 3081 /* 3082 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3083 * the blocked lwp resumes and retries to acquire the lock. 3084 */ 3085 int 3086 lwp_mutex_unlock(lwp_mutex_t *lp) 3087 { 3088 proc_t *p = ttoproc(curthread); 3089 lwpchan_t lwpchan; 3090 uchar_t waiters; 3091 volatile int locked = 0; 3092 volatile int watched = 0; 3093 volatile uint8_t type = 0; 3094 label_t ljb; 3095 uint16_t flag; 3096 int error = 0; 3097 3098 if ((caddr_t)lp >= p->p_as->a_userlimit) 3099 return (set_errno(EFAULT)); 3100 3101 if (on_fault(&ljb)) { 3102 if (locked) 3103 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3104 error = EFAULT; 3105 goto out; 3106 } 3107 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3108 if (UPIMUTEX(type)) { 3109 no_fault(); 3110 error = lwp_upimutex_unlock(lp, type); 3111 if (error) 3112 return (set_errno(error)); 3113 return (0); 3114 } 3115 3116 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3117 3118 /* 3119 * Force Copy-on-write fault if lwp_mutex_t object is 3120 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS 3121 */ 3122 suword8_noerr(&lp->mutex_type, type); 3123 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3124 &lwpchan, LWPCHAN_MPPOOL)) { 3125 error = EFAULT; 3126 goto out; 3127 } 3128 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3129 locked = 1; 3130 if (type & LOCK_ROBUST) { 3131 fuword16_noerr(&lp->mutex_flag, &flag); 3132 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3133 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3134 flag |= LOCK_NOTRECOVERABLE; 3135 suword16_noerr(&lp->mutex_flag, flag); 3136 } 3137 } 3138 if (type & USYNC_PROCESS) 3139 suword32_noerr(&lp->mutex_ownerpid, 0); 3140 ulock_clear(&lp->mutex_lockw); 3141 /* 3142 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3143 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3144 * may fail. If it fails, do not write into the waiter bit. 3145 * The call to lwp_release() might fail due to one of three reasons: 3146 * 3147 * 1. due to the thread which set the waiter bit not actually 3148 * sleeping since it got the lock on the re-try. The waiter 3149 * bit will then be correctly updated by that thread. This 3150 * window may be closed by reading the wait bit again here 3151 * and not calling lwp_release() at all if it is zero. 3152 * 2. the thread which set the waiter bit and went to sleep 3153 * was woken up by a signal. This time, the waiter recomputes 3154 * the wait bit in the return with EINTR code. 3155 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3156 * memory that has been re-used after the lock was dropped. 3157 * In this case, writing into the waiter bit would cause data 3158 * corruption. 3159 */ 3160 fuword8_noerr(&lp->mutex_waiters, &waiters); 3161 if (waiters) { 3162 if ((type & LOCK_ROBUST) && 3163 (flag & LOCK_NOTRECOVERABLE)) { 3164 lwp_release_all(&lwpchan); 3165 suword8_noerr(&lp->mutex_waiters, 0); 3166 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3167 suword8_noerr(&lp->mutex_waiters, waiters); 3168 } 3169 } 3170 3171 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3172 out: 3173 no_fault(); 3174 if (watched) 3175 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3176 if (error) 3177 return (set_errno(error)); 3178 return (0); 3179 } 3180