1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/errno.h> 37 #include <sys/file.h> 38 #include <sys/proc.h> 39 #include <sys/prsystm.h> 40 #include <sys/kmem.h> 41 #include <sys/sobject.h> 42 #include <sys/fault.h> 43 #include <sys/procfs.h> 44 #include <sys/watchpoint.h> 45 #include <sys/time.h> 46 #include <sys/cmn_err.h> 47 #include <sys/machlock.h> 48 #include <sys/debug.h> 49 #include <sys/synch.h> 50 #include <sys/synch32.h> 51 #include <sys/mman.h> 52 #include <sys/class.h> 53 #include <sys/schedctl.h> 54 #include <sys/sleepq.h> 55 #include <sys/policy.h> 56 #include <sys/tnf_probe.h> 57 #include <sys/lwpchan_impl.h> 58 #include <sys/turnstile.h> 59 #include <sys/atomic.h> 60 #include <sys/lwp_timer_impl.h> 61 #include <sys/lwp_upimutex_impl.h> 62 #include <vm/as.h> 63 #include <sys/sdt.h> 64 65 static kthread_t *lwpsobj_owner(caddr_t); 66 static void lwp_unsleep(kthread_t *t); 67 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 68 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 69 static void lwp_mutex_unregister(void *uaddr); 70 static void set_owner_pid(lwp_mutex_t *, uintptr_t, pid_t); 71 static int iswanted(kthread_t *, lwpchan_t *); 72 73 extern int lwp_cond_signal(lwp_cond_t *cv); 74 75 /* 76 * Maximum number of user prio inheritance locks that can be held by a thread. 77 * Used to limit kmem for each thread. This is a per-thread limit that 78 * can be administered on a system wide basis (using /etc/system). 79 * 80 * Also, when a limit, say maxlwps is added for numbers of lwps within a 81 * process, the per-thread limit automatically becomes a process-wide limit 82 * of maximum number of held upi locks within a process: 83 * maxheldupimx = maxnestupimx * maxlwps; 84 */ 85 static uint32_t maxnestupimx = 2000; 86 87 /* 88 * The sobj_ops vector exports a set of functions needed when a thread 89 * is asleep on a synchronization object of this type. 90 */ 91 static sobj_ops_t lwp_sobj_ops = { 92 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 93 }; 94 95 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 96 97 static sobj_ops_t lwp_sobj_pi_ops = { 98 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 99 turnstile_change_pri 100 }; 101 102 static sleepq_head_t lwpsleepq[NSLEEPQ]; 103 upib_t upimutextab[UPIMUTEX_TABSIZE]; 104 105 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 106 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 107 108 /* 109 * We know that both lc_wchan and lc_wchan0 are addresses that most 110 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 111 * 'pool' is either 0 or 1. 112 */ 113 #define LWPCHAN_LOCK_HASH(X, pool) \ 114 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 115 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 116 117 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 118 119 /* 120 * Is this a POSIX threads user-level lock requiring priority inheritance? 121 */ 122 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 123 124 static sleepq_head_t * 125 lwpsqhash(lwpchan_t *lwpchan) 126 { 127 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 128 return (&lwpsleepq[SQHASHINDEX(x)]); 129 } 130 131 /* 132 * Lock an lwpchan. 133 * Keep this in sync with lwpchan_unlock(), below. 134 */ 135 static void 136 lwpchan_lock(lwpchan_t *lwpchan, int pool) 137 { 138 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 139 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 140 } 141 142 /* 143 * Unlock an lwpchan. 144 * Keep this in sync with lwpchan_lock(), above. 145 */ 146 static void 147 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 148 { 149 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 150 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 151 } 152 153 /* 154 * Delete mappings from the lwpchan cache for pages that are being 155 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 156 * all mappings within the range are deleted from the lwpchan cache. 157 */ 158 void 159 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 160 { 161 lwpchan_data_t *lcp; 162 lwpchan_hashbucket_t *hashbucket; 163 lwpchan_hashbucket_t *endbucket; 164 lwpchan_entry_t *ent; 165 lwpchan_entry_t **prev; 166 caddr_t addr; 167 168 mutex_enter(&p->p_lcp_lock); 169 lcp = p->p_lcp; 170 hashbucket = lcp->lwpchan_cache; 171 endbucket = hashbucket + lcp->lwpchan_size; 172 for (; hashbucket < endbucket; hashbucket++) { 173 if (hashbucket->lwpchan_chain == NULL) 174 continue; 175 mutex_enter(&hashbucket->lwpchan_lock); 176 prev = &hashbucket->lwpchan_chain; 177 /* check entire chain */ 178 while ((ent = *prev) != NULL) { 179 addr = ent->lwpchan_addr; 180 if (start <= addr && addr < end) { 181 *prev = ent->lwpchan_next; 182 /* 183 * We do this only for the obsolete type 184 * USYNC_PROCESS_ROBUST. Otherwise robust 185 * locks do not draw ELOCKUNMAPPED or 186 * EOWNERDEAD due to being unmapped. 187 */ 188 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 189 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 190 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 191 /* 192 * If there is a user-level robust lock 193 * registration, mark it as invalid. 194 */ 195 if ((addr = ent->lwpchan_uaddr) != NULL) 196 lwp_mutex_unregister(addr); 197 kmem_free(ent, sizeof (*ent)); 198 atomic_add_32(&lcp->lwpchan_entries, -1); 199 } else { 200 prev = &ent->lwpchan_next; 201 } 202 } 203 mutex_exit(&hashbucket->lwpchan_lock); 204 } 205 mutex_exit(&p->p_lcp_lock); 206 } 207 208 /* 209 * Given an lwpchan cache pointer and a process virtual address, 210 * return a pointer to the corresponding lwpchan hash bucket. 211 */ 212 static lwpchan_hashbucket_t * 213 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 214 { 215 uint_t i; 216 217 /* 218 * All user-level sync object addresses are 8-byte aligned. 219 * Ignore the lowest 3 bits of the address and use the 220 * higher-order 2*lwpchan_bits bits for the hash index. 221 */ 222 addr >>= 3; 223 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 224 return (lcp->lwpchan_cache + i); 225 } 226 227 /* 228 * (Re)allocate the per-process lwpchan cache. 229 */ 230 static void 231 lwpchan_alloc_cache(proc_t *p, uint_t bits) 232 { 233 lwpchan_data_t *lcp; 234 lwpchan_data_t *old_lcp; 235 lwpchan_hashbucket_t *hashbucket; 236 lwpchan_hashbucket_t *endbucket; 237 lwpchan_hashbucket_t *newbucket; 238 lwpchan_entry_t *ent; 239 lwpchan_entry_t *next; 240 uint_t count; 241 242 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 243 244 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 245 lcp->lwpchan_bits = bits; 246 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 247 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 248 lcp->lwpchan_entries = 0; 249 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 250 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 251 lcp->lwpchan_next_data = NULL; 252 253 mutex_enter(&p->p_lcp_lock); 254 if ((old_lcp = p->p_lcp) != NULL) { 255 if (old_lcp->lwpchan_bits >= bits) { 256 /* someone beat us to it */ 257 mutex_exit(&p->p_lcp_lock); 258 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 259 sizeof (lwpchan_hashbucket_t)); 260 kmem_free(lcp, sizeof (lwpchan_data_t)); 261 return; 262 } 263 /* 264 * Acquire all of the old hash table locks. 265 */ 266 hashbucket = old_lcp->lwpchan_cache; 267 endbucket = hashbucket + old_lcp->lwpchan_size; 268 for (; hashbucket < endbucket; hashbucket++) 269 mutex_enter(&hashbucket->lwpchan_lock); 270 /* 271 * Move all of the old hash table entries to the 272 * new hash table. The new hash table has not yet 273 * been installed so we don't need any of its locks. 274 */ 275 count = 0; 276 hashbucket = old_lcp->lwpchan_cache; 277 for (; hashbucket < endbucket; hashbucket++) { 278 ent = hashbucket->lwpchan_chain; 279 while (ent != NULL) { 280 next = ent->lwpchan_next; 281 newbucket = lwpchan_bucket(lcp, 282 (uintptr_t)ent->lwpchan_addr); 283 ent->lwpchan_next = newbucket->lwpchan_chain; 284 newbucket->lwpchan_chain = ent; 285 ent = next; 286 count++; 287 } 288 hashbucket->lwpchan_chain = NULL; 289 } 290 lcp->lwpchan_entries = count; 291 } 292 293 /* 294 * Retire the old hash table. We can't actually kmem_free() it 295 * now because someone may still have a pointer to it. Instead, 296 * we link it onto the new hash table's list of retired hash tables. 297 * The new hash table is double the size of the previous one, so 298 * the total size of all retired hash tables is less than the size 299 * of the new one. exit() and exec() free the retired hash tables 300 * (see lwpchan_destroy_cache(), below). 301 */ 302 lcp->lwpchan_next_data = old_lcp; 303 304 /* 305 * As soon as we store the new lcp, future locking operations will 306 * use it. Therefore, we must ensure that all the state we've just 307 * established reaches global visibility before the new lcp does. 308 */ 309 membar_producer(); 310 p->p_lcp = lcp; 311 312 if (old_lcp != NULL) { 313 /* 314 * Release all of the old hash table locks. 315 */ 316 hashbucket = old_lcp->lwpchan_cache; 317 for (; hashbucket < endbucket; hashbucket++) 318 mutex_exit(&hashbucket->lwpchan_lock); 319 } 320 mutex_exit(&p->p_lcp_lock); 321 } 322 323 /* 324 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 325 * Called when the process exits or execs. All lwps except one have 326 * exited so we need no locks here. 327 */ 328 void 329 lwpchan_destroy_cache(int exec) 330 { 331 proc_t *p = curproc; 332 lwpchan_hashbucket_t *hashbucket; 333 lwpchan_hashbucket_t *endbucket; 334 lwpchan_data_t *lcp; 335 lwpchan_entry_t *ent; 336 lwpchan_entry_t *next; 337 uint16_t lockflg; 338 339 lcp = p->p_lcp; 340 p->p_lcp = NULL; 341 342 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 343 hashbucket = lcp->lwpchan_cache; 344 endbucket = hashbucket + lcp->lwpchan_size; 345 for (; hashbucket < endbucket; hashbucket++) { 346 ent = hashbucket->lwpchan_chain; 347 hashbucket->lwpchan_chain = NULL; 348 while (ent != NULL) { 349 next = ent->lwpchan_next; 350 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 351 (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 352 == (USYNC_PROCESS | LOCK_ROBUST)) 353 lwp_mutex_cleanup(ent, lockflg); 354 kmem_free(ent, sizeof (*ent)); 355 ent = next; 356 } 357 } 358 359 while (lcp != NULL) { 360 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 361 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 362 sizeof (lwpchan_hashbucket_t)); 363 kmem_free(lcp, sizeof (lwpchan_data_t)); 364 lcp = next_lcp; 365 } 366 } 367 368 /* 369 * Return zero when there is an entry in the lwpchan cache for the 370 * given process virtual address and non-zero when there is not. 371 * The returned non-zero value is the current length of the 372 * hash chain plus one. The caller holds the hash bucket lock. 373 */ 374 static uint_t 375 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 376 lwpchan_hashbucket_t *hashbucket) 377 { 378 lwpchan_entry_t *ent; 379 uint_t count = 1; 380 381 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 382 if (ent->lwpchan_addr == addr) { 383 if (ent->lwpchan_type != type || 384 ent->lwpchan_pool != pool) { 385 /* 386 * This shouldn't happen, but might if the 387 * process reuses its memory for different 388 * types of sync objects. We test first 389 * to avoid grabbing the memory cache line. 390 */ 391 ent->lwpchan_type = (uint16_t)type; 392 ent->lwpchan_pool = (uint16_t)pool; 393 } 394 *lwpchan = ent->lwpchan_lwpchan; 395 return (0); 396 } 397 count++; 398 } 399 return (count); 400 } 401 402 /* 403 * Return the cached lwpchan mapping if cached, otherwise insert 404 * a virtual address to lwpchan mapping into the cache. 405 */ 406 static int 407 lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr, 408 int type, lwpchan_t *lwpchan, int pool) 409 { 410 proc_t *p = curproc; 411 lwpchan_data_t *lcp; 412 lwpchan_hashbucket_t *hashbucket; 413 lwpchan_entry_t *ent; 414 memid_t memid; 415 uint_t count; 416 uint_t bits; 417 418 top: 419 /* initialize the lwpchan cache, if necesary */ 420 if ((lcp = p->p_lcp) == NULL) { 421 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 422 goto top; 423 } 424 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 425 mutex_enter(&hashbucket->lwpchan_lock); 426 if (lcp != p->p_lcp) { 427 /* someone resized the lwpchan cache; start over */ 428 mutex_exit(&hashbucket->lwpchan_lock); 429 goto top; 430 } 431 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 432 /* it's in the cache */ 433 mutex_exit(&hashbucket->lwpchan_lock); 434 return (1); 435 } 436 mutex_exit(&hashbucket->lwpchan_lock); 437 if (as_getmemid(as, addr, &memid) != 0) 438 return (0); 439 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 440 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 441 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 442 mutex_enter(&hashbucket->lwpchan_lock); 443 if (lcp != p->p_lcp) { 444 /* someone resized the lwpchan cache; start over */ 445 mutex_exit(&hashbucket->lwpchan_lock); 446 kmem_free(ent, sizeof (*ent)); 447 goto top; 448 } 449 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 450 if (count == 0) { 451 /* someone else added this entry to the cache */ 452 mutex_exit(&hashbucket->lwpchan_lock); 453 kmem_free(ent, sizeof (*ent)); 454 return (1); 455 } 456 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 457 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 458 /* hash chain too long; reallocate the hash table */ 459 mutex_exit(&hashbucket->lwpchan_lock); 460 kmem_free(ent, sizeof (*ent)); 461 lwpchan_alloc_cache(p, bits + 1); 462 goto top; 463 } 464 ent->lwpchan_addr = addr; 465 ent->lwpchan_uaddr = uaddr; 466 ent->lwpchan_type = (uint16_t)type; 467 ent->lwpchan_pool = (uint16_t)pool; 468 ent->lwpchan_lwpchan = *lwpchan; 469 ent->lwpchan_next = hashbucket->lwpchan_chain; 470 hashbucket->lwpchan_chain = ent; 471 atomic_add_32(&lcp->lwpchan_entries, 1); 472 mutex_exit(&hashbucket->lwpchan_lock); 473 return (1); 474 } 475 476 /* 477 * Return a unique pair of identifiers that corresponds to a 478 * synchronization object's virtual address. Process-shared 479 * sync objects usually get vnode/offset from as_getmemid(). 480 */ 481 static int 482 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 483 { 484 /* 485 * If the lwp synch object is defined to be process-private, 486 * we just make the first field of the lwpchan be 'as' and 487 * the second field be the synch object's virtual address. 488 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 489 * The lwpchan cache is used only for process-shared objects. 490 */ 491 if (!(type & USYNC_PROCESS)) { 492 lwpchan->lc_wchan0 = (caddr_t)as; 493 lwpchan->lc_wchan = addr; 494 return (1); 495 } 496 497 return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool)); 498 } 499 500 static void 501 lwp_block(lwpchan_t *lwpchan) 502 { 503 kthread_t *t = curthread; 504 klwp_t *lwp = ttolwp(t); 505 sleepq_head_t *sqh; 506 507 thread_lock(t); 508 t->t_flag |= T_WAKEABLE; 509 t->t_lwpchan = *lwpchan; 510 t->t_sobj_ops = &lwp_sobj_ops; 511 t->t_release = 0; 512 sqh = lwpsqhash(lwpchan); 513 disp_lock_enter_high(&sqh->sq_lock); 514 CL_SLEEP(t); 515 DTRACE_SCHED(sleep); 516 THREAD_SLEEP(t, &sqh->sq_lock); 517 sleepq_insert(&sqh->sq_queue, t); 518 thread_unlock(t); 519 lwp->lwp_asleep = 1; 520 lwp->lwp_sysabort = 0; 521 lwp->lwp_ru.nvcsw++; 522 (void) new_mstate(curthread, LMS_SLEEP); 523 } 524 525 static kthread_t * 526 lwpsobj_pi_owner(upimutex_t *up) 527 { 528 return (up->upi_owner); 529 } 530 531 static struct upimutex * 532 upi_get(upib_t *upibp, lwpchan_t *lcp) 533 { 534 struct upimutex *upip; 535 536 for (upip = upibp->upib_first; upip != NULL; 537 upip = upip->upi_nextchain) { 538 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 539 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 540 break; 541 } 542 return (upip); 543 } 544 545 static void 546 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 547 { 548 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 549 550 /* 551 * Insert upimutex at front of list. Maybe a bit unfair 552 * but assume that not many lwpchans hash to the same 553 * upimutextab bucket, i.e. the list of upimutexes from 554 * upib_first is not too long. 555 */ 556 upimutex->upi_nextchain = upibp->upib_first; 557 upibp->upib_first = upimutex; 558 } 559 560 static void 561 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 562 { 563 struct upimutex **prev; 564 565 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 566 567 prev = &upibp->upib_first; 568 while (*prev != upimutex) { 569 prev = &(*prev)->upi_nextchain; 570 } 571 *prev = upimutex->upi_nextchain; 572 upimutex->upi_nextchain = NULL; 573 } 574 575 /* 576 * Add upimutex to chain of upimutexes held by curthread. 577 * Returns number of upimutexes held by curthread. 578 */ 579 static uint32_t 580 upi_mylist_add(struct upimutex *upimutex) 581 { 582 kthread_t *t = curthread; 583 584 /* 585 * Insert upimutex at front of list of upimutexes owned by t. This 586 * would match typical LIFO order in which nested locks are acquired 587 * and released. 588 */ 589 upimutex->upi_nextowned = t->t_upimutex; 590 t->t_upimutex = upimutex; 591 t->t_nupinest++; 592 ASSERT(t->t_nupinest > 0); 593 return (t->t_nupinest); 594 } 595 596 /* 597 * Delete upimutex from list of upimutexes owned by curthread. 598 */ 599 static void 600 upi_mylist_del(struct upimutex *upimutex) 601 { 602 kthread_t *t = curthread; 603 struct upimutex **prev; 604 605 /* 606 * Since the order in which nested locks are acquired and released, 607 * is typically LIFO, and typical nesting levels are not too deep, the 608 * following should not be expensive in the general case. 609 */ 610 prev = &t->t_upimutex; 611 while (*prev != upimutex) { 612 prev = &(*prev)->upi_nextowned; 613 } 614 *prev = upimutex->upi_nextowned; 615 upimutex->upi_nextowned = NULL; 616 ASSERT(t->t_nupinest > 0); 617 t->t_nupinest--; 618 } 619 620 /* 621 * Returns true if upimutex is owned. Should be called only when upim points 622 * to kmem which cannot disappear from underneath. 623 */ 624 static int 625 upi_owned(upimutex_t *upim) 626 { 627 return (upim->upi_owner == curthread); 628 } 629 630 /* 631 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 632 */ 633 static struct upimutex * 634 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 635 { 636 lwpchan_t lwpchan; 637 upib_t *upibp; 638 struct upimutex *upimutex; 639 640 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 641 &lwpchan, LWPCHAN_MPPOOL)) 642 return (NULL); 643 644 upibp = &UPI_CHAIN(lwpchan); 645 mutex_enter(&upibp->upib_lock); 646 upimutex = upi_get(upibp, &lwpchan); 647 if (upimutex == NULL || upimutex->upi_owner != curthread) { 648 mutex_exit(&upibp->upib_lock); 649 return (NULL); 650 } 651 mutex_exit(&upibp->upib_lock); 652 return (upimutex); 653 } 654 655 /* 656 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 657 * no lock hand-off occurrs. 658 */ 659 static void 660 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 661 { 662 turnstile_t *ts; 663 upib_t *upibp; 664 kthread_t *newowner; 665 666 upi_mylist_del(upimutex); 667 upibp = upimutex->upi_upibp; 668 mutex_enter(&upibp->upib_lock); 669 if (upimutex->upi_waiter != 0) { /* if waiters */ 670 ts = turnstile_lookup(upimutex); 671 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 672 /* hand-off lock to highest prio waiter */ 673 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 674 upimutex->upi_owner = newowner; 675 if (ts->ts_waiters == 1) 676 upimutex->upi_waiter = 0; 677 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 678 mutex_exit(&upibp->upib_lock); 679 return; 680 } else if (ts != NULL) { 681 /* LOCK_NOTRECOVERABLE: wakeup all */ 682 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 683 } else { 684 /* 685 * Misleading w bit. Waiters might have been 686 * interrupted. No need to clear the w bit (upimutex 687 * will soon be freed). Re-calculate PI from existing 688 * waiters. 689 */ 690 turnstile_exit(upimutex); 691 turnstile_pi_recalc(); 692 } 693 } 694 /* 695 * no waiters, or LOCK_NOTRECOVERABLE. 696 * remove from the bucket chain of upi mutexes. 697 * de-allocate kernel memory (upimutex). 698 */ 699 upi_chain_del(upimutex->upi_upibp, upimutex); 700 mutex_exit(&upibp->upib_lock); 701 kmem_free(upimutex, sizeof (upimutex_t)); 702 } 703 704 static int 705 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 706 { 707 label_t ljb; 708 int error = 0; 709 lwpchan_t lwpchan; 710 uint16_t flag; 711 upib_t *upibp; 712 volatile struct upimutex *upimutex = NULL; 713 turnstile_t *ts; 714 uint32_t nupinest; 715 volatile int upilocked = 0; 716 717 if (on_fault(&ljb)) { 718 if (upilocked) 719 upimutex_unlock((upimutex_t *)upimutex, 0); 720 error = EFAULT; 721 goto out; 722 } 723 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 724 &lwpchan, LWPCHAN_MPPOOL)) { 725 error = EFAULT; 726 goto out; 727 } 728 upibp = &UPI_CHAIN(lwpchan); 729 retry: 730 mutex_enter(&upibp->upib_lock); 731 upimutex = upi_get(upibp, &lwpchan); 732 if (upimutex == NULL) { 733 /* lock available since lwpchan has no upimutex */ 734 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 735 upi_chain_add(upibp, (upimutex_t *)upimutex); 736 upimutex->upi_owner = curthread; /* grab lock */ 737 upimutex->upi_upibp = upibp; 738 upimutex->upi_vaddr = lp; 739 upimutex->upi_lwpchan = lwpchan; 740 mutex_exit(&upibp->upib_lock); 741 nupinest = upi_mylist_add((upimutex_t *)upimutex); 742 upilocked = 1; 743 fuword16_noerr(&lp->mutex_flag, &flag); 744 if (nupinest > maxnestupimx && 745 secpolicy_resource(CRED()) != 0) { 746 upimutex_unlock((upimutex_t *)upimutex, flag); 747 error = ENOMEM; 748 goto out; 749 } 750 if (flag & LOCK_NOTRECOVERABLE) { 751 /* 752 * Since the setting of LOCK_NOTRECOVERABLE 753 * was done under the high-level upi mutex, 754 * in lwp_upimutex_unlock(), this flag needs to 755 * be checked while holding the upi mutex. 756 * If set, this thread should return without 757 * the lock held, and with the right error code. 758 */ 759 upimutex_unlock((upimutex_t *)upimutex, flag); 760 upilocked = 0; 761 error = ENOTRECOVERABLE; 762 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 763 if (flag & LOCK_OWNERDEAD) 764 error = EOWNERDEAD; 765 else if (type & USYNC_PROCESS_ROBUST) 766 error = ELOCKUNMAPPED; 767 else 768 error = EOWNERDEAD; 769 } 770 goto out; 771 } 772 /* 773 * If a upimutex object exists, it must have an owner. 774 * This is due to lock hand-off, and release of upimutex when no 775 * waiters are present at unlock time, 776 */ 777 ASSERT(upimutex->upi_owner != NULL); 778 if (upimutex->upi_owner == curthread) { 779 /* 780 * The user wrapper can check if the mutex type is 781 * ERRORCHECK: if not, it should stall at user-level. 782 * If so, it should return the error code. 783 */ 784 mutex_exit(&upibp->upib_lock); 785 error = EDEADLK; 786 goto out; 787 } 788 if (try == UPIMUTEX_TRY) { 789 mutex_exit(&upibp->upib_lock); 790 error = EBUSY; 791 goto out; 792 } 793 /* 794 * Block for the lock. 795 */ 796 if ((error = lwptp->lwpt_time_error) != 0) { 797 /* 798 * The SUSV3 Posix spec is very clear that we 799 * should get no error from validating the 800 * timer until we would actually sleep. 801 */ 802 mutex_exit(&upibp->upib_lock); 803 goto out; 804 } 805 if (lwptp->lwpt_tsp != NULL) { 806 /* 807 * Unlike the protocol for other lwp timedwait operations, 808 * we must drop t_delay_lock before going to sleep in 809 * turnstile_block() for a upi mutex. 810 * See the comments below and in turnstile.c 811 */ 812 mutex_enter(&curthread->t_delay_lock); 813 (void) lwp_timer_enqueue(lwptp); 814 mutex_exit(&curthread->t_delay_lock); 815 } 816 /* 817 * Now, set the waiter bit and block for the lock in turnstile_block(). 818 * No need to preserve the previous wbit since a lock try is not 819 * attempted after setting the wait bit. Wait bit is set under 820 * the upib_lock, which is not released until the turnstile lock 821 * is acquired. Say, the upimutex is L: 822 * 823 * 1. upib_lock is held so the waiter does not have to retry L after 824 * setting the wait bit: since the owner has to grab the upib_lock 825 * to unlock L, it will certainly see the wait bit set. 826 * 2. upib_lock is not released until the turnstile lock is acquired. 827 * This is the key to preventing a missed wake-up. Otherwise, the 828 * owner could acquire the upib_lock, and the tc_lock, to call 829 * turnstile_wakeup(). All this, before the waiter gets tc_lock 830 * to sleep in turnstile_block(). turnstile_wakeup() will then not 831 * find this waiter, resulting in the missed wakeup. 832 * 3. The upib_lock, being a kernel mutex, cannot be released while 833 * holding the tc_lock (since mutex_exit() could need to acquire 834 * the same tc_lock)...and so is held when calling turnstile_block(). 835 * The address of upib_lock is passed to turnstile_block() which 836 * releases it after releasing all turnstile locks, and before going 837 * to sleep in swtch(). 838 * 4. The waiter value cannot be a count of waiters, because a waiter 839 * can be interrupted. The interrupt occurs under the tc_lock, at 840 * which point, the upib_lock cannot be locked, to decrement waiter 841 * count. So, just treat the waiter state as a bit, not a count. 842 */ 843 ts = turnstile_lookup((upimutex_t *)upimutex); 844 upimutex->upi_waiter = 1; 845 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 846 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 847 /* 848 * Hand-off implies that we wakeup holding the lock, except when: 849 * - deadlock is detected 850 * - lock is not recoverable 851 * - we got an interrupt or timeout 852 * If we wake up due to an interrupt or timeout, we may 853 * or may not be holding the lock due to mutex hand-off. 854 * Use lwp_upimutex_owned() to check if we do hold the lock. 855 */ 856 if (error != 0) { 857 if ((error == EINTR || error == ETIME) && 858 (upimutex = lwp_upimutex_owned(lp, type))) { 859 /* 860 * Unlock and return - the re-startable syscall will 861 * try the lock again if we got EINTR. 862 */ 863 (void) upi_mylist_add((upimutex_t *)upimutex); 864 upimutex_unlock((upimutex_t *)upimutex, 0); 865 } 866 /* 867 * The only other possible error is EDEADLK. If so, upimutex 868 * is valid, since its owner is deadlocked with curthread. 869 */ 870 ASSERT(error == EINTR || error == ETIME || 871 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 872 ASSERT(!lwp_upimutex_owned(lp, type)); 873 goto out; 874 } 875 if (lwp_upimutex_owned(lp, type)) { 876 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 877 nupinest = upi_mylist_add((upimutex_t *)upimutex); 878 upilocked = 1; 879 } 880 /* 881 * Now, need to read the user-level lp->mutex_flag to do the following: 882 * 883 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 884 * should be returned. 885 * - if lock isn't held, check if ENOTRECOVERABLE should 886 * be returned. 887 * 888 * Now, either lp->mutex_flag is readable or it's not. If not 889 * readable, the on_fault path will cause a return with EFAULT 890 * as it should. If it is readable, the state of the flag 891 * encodes the robustness state of the lock: 892 * 893 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 894 * or LOCK_UNMAPPED setting will influence the return code 895 * appropriately. If the upimutex is not locked here, this 896 * could be due to a spurious wake-up or a NOTRECOVERABLE 897 * event. The flag's setting can be used to distinguish 898 * between these two events. 899 */ 900 fuword16_noerr(&lp->mutex_flag, &flag); 901 if (upilocked) { 902 /* 903 * If the thread wakes up from turnstile_block with the lock 904 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 905 * since it would not have been handed-off the lock. 906 * So, no need to check for this case. 907 */ 908 if (nupinest > maxnestupimx && 909 secpolicy_resource(CRED()) != 0) { 910 upimutex_unlock((upimutex_t *)upimutex, flag); 911 upilocked = 0; 912 error = ENOMEM; 913 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 914 if (flag & LOCK_OWNERDEAD) 915 error = EOWNERDEAD; 916 else if (type & USYNC_PROCESS_ROBUST) 917 error = ELOCKUNMAPPED; 918 else 919 error = EOWNERDEAD; 920 } 921 } else { 922 /* 923 * Wake-up without the upimutex held. Either this is a 924 * spurious wake-up (due to signals, forkall(), whatever), or 925 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 926 * of the mutex flag can be used to distinguish between the 927 * two events. 928 */ 929 if (flag & LOCK_NOTRECOVERABLE) { 930 error = ENOTRECOVERABLE; 931 } else { 932 /* 933 * Here, the flag could be set to LOCK_OWNERDEAD or 934 * not. In both cases, this is a spurious wakeup, 935 * since the upi lock is not held, but the thread 936 * has returned from turnstile_block(). 937 * 938 * The user flag could be LOCK_OWNERDEAD if, at the 939 * same time as curthread having been woken up 940 * spuriously, the owner (say Tdead) has died, marked 941 * the mutex flag accordingly, and handed off the lock 942 * to some other waiter (say Tnew). curthread just 943 * happened to read the flag while Tnew has yet to deal 944 * with the owner-dead event. 945 * 946 * In this event, curthread should retry the lock. 947 * If Tnew is able to cleanup the lock, curthread 948 * will eventually get the lock with a zero error code, 949 * If Tnew is unable to cleanup, its eventual call to 950 * unlock the lock will result in the mutex flag being 951 * set to LOCK_NOTRECOVERABLE, and the wake-up of 952 * all waiters, including curthread, which will then 953 * eventually return ENOTRECOVERABLE due to the above 954 * check. 955 * 956 * Of course, if the user-flag is not set with 957 * LOCK_OWNERDEAD, retrying is the thing to do, since 958 * this is definitely a spurious wakeup. 959 */ 960 goto retry; 961 } 962 } 963 964 out: 965 no_fault(); 966 return (error); 967 } 968 969 970 static int 971 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 972 { 973 label_t ljb; 974 int error = 0; 975 lwpchan_t lwpchan; 976 uint16_t flag; 977 upib_t *upibp; 978 volatile struct upimutex *upimutex = NULL; 979 volatile int upilocked = 0; 980 981 if (on_fault(&ljb)) { 982 if (upilocked) 983 upimutex_unlock((upimutex_t *)upimutex, 0); 984 error = EFAULT; 985 goto out; 986 } 987 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 988 &lwpchan, LWPCHAN_MPPOOL)) { 989 error = EFAULT; 990 goto out; 991 } 992 upibp = &UPI_CHAIN(lwpchan); 993 mutex_enter(&upibp->upib_lock); 994 upimutex = upi_get(upibp, &lwpchan); 995 /* 996 * If the lock is not held, or the owner is not curthread, return 997 * error. The user-level wrapper can return this error or stall, 998 * depending on whether mutex is of ERRORCHECK type or not. 999 */ 1000 if (upimutex == NULL || upimutex->upi_owner != curthread) { 1001 mutex_exit(&upibp->upib_lock); 1002 error = EPERM; 1003 goto out; 1004 } 1005 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1006 upilocked = 1; 1007 fuword16_noerr(&lp->mutex_flag, &flag); 1008 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1009 /* 1010 * transition mutex to the LOCK_NOTRECOVERABLE state. 1011 */ 1012 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1013 flag |= LOCK_NOTRECOVERABLE; 1014 suword16_noerr(&lp->mutex_flag, flag); 1015 } 1016 set_owner_pid(lp, 0, 0); 1017 upimutex_unlock((upimutex_t *)upimutex, flag); 1018 upilocked = 0; 1019 out: 1020 no_fault(); 1021 return (error); 1022 } 1023 1024 /* 1025 * Set the owner and ownerpid fields of a user-level mutex. 1026 */ 1027 static void 1028 set_owner_pid(lwp_mutex_t *lp, uintptr_t owner, pid_t pid) 1029 { 1030 union { 1031 uint64_t word64; 1032 uint32_t word32[2]; 1033 } un; 1034 1035 un.word64 = (uint64_t)owner; 1036 1037 suword32_noerr(&lp->mutex_ownerpid, pid); 1038 #if defined(_LP64) 1039 if (((uintptr_t)lp & (_LONG_LONG_ALIGNMENT - 1)) == 0) { /* aligned */ 1040 suword64_noerr(&lp->mutex_owner, un.word64); 1041 return; 1042 } 1043 #endif 1044 /* mutex is unaligned or we are running on a 32-bit kernel */ 1045 suword32_noerr((uint32_t *)&lp->mutex_owner, un.word32[0]); 1046 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, un.word32[1]); 1047 } 1048 1049 /* 1050 * Clear the contents of a user-level mutex; return the flags. 1051 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1052 */ 1053 static uint16_t 1054 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1055 { 1056 uint16_t flag; 1057 1058 fuword16_noerr(&lp->mutex_flag, &flag); 1059 if ((flag & 1060 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1061 flag |= lockflg; 1062 suword16_noerr(&lp->mutex_flag, flag); 1063 } 1064 set_owner_pid(lp, 0, 0); 1065 suword8_noerr(&lp->mutex_rcount, 0); 1066 1067 return (flag); 1068 } 1069 1070 /* 1071 * Mark user mutex state, corresponding to kernel upimutex, 1072 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1073 */ 1074 static int 1075 upi_dead(upimutex_t *upip, uint16_t lockflg) 1076 { 1077 label_t ljb; 1078 int error = 0; 1079 lwp_mutex_t *lp; 1080 1081 if (on_fault(&ljb)) { 1082 error = EFAULT; 1083 goto out; 1084 } 1085 1086 lp = upip->upi_vaddr; 1087 (void) lwp_clear_mutex(lp, lockflg); 1088 suword8_noerr(&lp->mutex_lockw, 0); 1089 out: 1090 no_fault(); 1091 return (error); 1092 } 1093 1094 /* 1095 * Unlock all upimutexes held by curthread, since curthread is dying. 1096 * For each upimutex, attempt to mark its corresponding user mutex object as 1097 * dead. 1098 */ 1099 void 1100 upimutex_cleanup() 1101 { 1102 kthread_t *t = curthread; 1103 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1104 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1105 struct upimutex *upip; 1106 1107 while ((upip = t->t_upimutex) != NULL) { 1108 if (upi_dead(upip, lockflg) != 0) { 1109 /* 1110 * If the user object associated with this upimutex is 1111 * unmapped, unlock upimutex with the 1112 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1113 * woken up. Since user object is unmapped, it could 1114 * not be marked as dead or notrecoverable. 1115 * The waiters will now all wake up and return 1116 * ENOTRECOVERABLE, since they would find that the lock 1117 * has not been handed-off to them. 1118 * See lwp_upimutex_lock(). 1119 */ 1120 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1121 } else { 1122 /* 1123 * The user object has been updated as dead. 1124 * Unlock the upimutex: if no waiters, upip kmem will 1125 * be freed. If there is a waiter, the lock will be 1126 * handed off. If exit() is in progress, each existing 1127 * waiter will successively get the lock, as owners 1128 * die, and each new owner will call this routine as 1129 * it dies. The last owner will free kmem, since 1130 * it will find the upimutex has no waiters. So, 1131 * eventually, the kmem is guaranteed to be freed. 1132 */ 1133 upimutex_unlock(upip, 0); 1134 } 1135 /* 1136 * Note that the call to upimutex_unlock() above will delete 1137 * upimutex from the t_upimutexes chain. And so the 1138 * while loop will eventually terminate. 1139 */ 1140 } 1141 } 1142 1143 int 1144 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp, uintptr_t owner) 1145 { 1146 kthread_t *t = curthread; 1147 klwp_t *lwp = ttolwp(t); 1148 proc_t *p = ttoproc(t); 1149 lwp_timer_t lwpt; 1150 caddr_t timedwait; 1151 int error = 0; 1152 int time_error; 1153 clock_t tim = -1; 1154 uchar_t waiters; 1155 volatile int locked = 0; 1156 volatile int watched = 0; 1157 label_t ljb; 1158 volatile uint8_t type = 0; 1159 lwpchan_t lwpchan; 1160 sleepq_head_t *sqh; 1161 uint16_t flag; 1162 int imm_timeout = 0; 1163 1164 if ((caddr_t)lp >= p->p_as->a_userlimit) 1165 return (set_errno(EFAULT)); 1166 1167 /* 1168 * Put the lwp in an orderly state for debugging, 1169 * in case we are stopped while sleeping, below. 1170 */ 1171 prstop(PR_REQUESTED, 0); 1172 1173 timedwait = (caddr_t)tsp; 1174 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1175 lwpt.lwpt_imm_timeout) { 1176 imm_timeout = 1; 1177 timedwait = NULL; 1178 } 1179 1180 /* 1181 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1182 * this micro state is really a run state. If the thread indeed blocks, 1183 * this state becomes valid. If not, the state is converted back to 1184 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1185 * when blocking. 1186 */ 1187 (void) new_mstate(t, LMS_USER_LOCK); 1188 if (on_fault(&ljb)) { 1189 if (locked) 1190 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1191 error = EFAULT; 1192 goto out; 1193 } 1194 /* 1195 * Force Copy-on-write if necessary and ensure that the 1196 * synchronization object resides in read/write memory. 1197 * Cause an EFAULT return now if this is not so. 1198 */ 1199 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1200 suword8_noerr(&lp->mutex_type, type); 1201 if (UPIMUTEX(type)) { 1202 no_fault(); 1203 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1204 if (error == 0 || error == EOWNERDEAD || error == ELOCKUNMAPPED) 1205 set_owner_pid(lp, owner, 1206 (type & USYNC_PROCESS)? p->p_pid : 0); 1207 if (tsp && !time_error) /* copyout the residual time left */ 1208 error = lwp_timer_copyout(&lwpt, error); 1209 if (error) 1210 return (set_errno(error)); 1211 return (0); 1212 } 1213 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1214 &lwpchan, LWPCHAN_MPPOOL)) { 1215 error = EFAULT; 1216 goto out; 1217 } 1218 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1219 locked = 1; 1220 if (type & LOCK_ROBUST) { 1221 fuword16_noerr(&lp->mutex_flag, &flag); 1222 if (flag & LOCK_NOTRECOVERABLE) { 1223 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1224 error = ENOTRECOVERABLE; 1225 goto out; 1226 } 1227 } 1228 fuword8_noerr(&lp->mutex_waiters, &waiters); 1229 suword8_noerr(&lp->mutex_waiters, 1); 1230 1231 /* 1232 * If watchpoints are set, they need to be restored, since 1233 * atomic accesses of memory such as the call to ulock_try() 1234 * below cannot be watched. 1235 */ 1236 1237 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1238 1239 while (!ulock_try(&lp->mutex_lockw)) { 1240 if (time_error) { 1241 /* 1242 * The SUSV3 Posix spec is very clear that we 1243 * should get no error from validating the 1244 * timer until we would actually sleep. 1245 */ 1246 error = time_error; 1247 break; 1248 } 1249 1250 if (watched) { 1251 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1252 watched = 0; 1253 } 1254 1255 if (timedwait) { 1256 /* 1257 * If we successfully queue the timeout, 1258 * then don't drop t_delay_lock until 1259 * we are on the sleep queue (below). 1260 */ 1261 mutex_enter(&t->t_delay_lock); 1262 if (lwp_timer_enqueue(&lwpt) != 0) { 1263 mutex_exit(&t->t_delay_lock); 1264 imm_timeout = 1; 1265 timedwait = NULL; 1266 } 1267 } 1268 lwp_block(&lwpchan); 1269 /* 1270 * Nothing should happen to cause the lwp to go to 1271 * sleep again until after it returns from swtch(). 1272 */ 1273 if (timedwait) 1274 mutex_exit(&t->t_delay_lock); 1275 locked = 0; 1276 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1277 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1278 setrun(t); 1279 swtch(); 1280 t->t_flag &= ~T_WAKEABLE; 1281 if (timedwait) 1282 tim = lwp_timer_dequeue(&lwpt); 1283 setallwatch(); 1284 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1285 error = EINTR; 1286 else if (imm_timeout || (timedwait && tim == -1)) 1287 error = ETIME; 1288 if (error) { 1289 lwp->lwp_asleep = 0; 1290 lwp->lwp_sysabort = 0; 1291 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1292 S_WRITE); 1293 1294 /* 1295 * Need to re-compute waiters bit. The waiters field in 1296 * the lock is not reliable. Either of two things could 1297 * have occurred: no lwp may have called lwp_release() 1298 * for me but I have woken up due to a signal or 1299 * timeout. In this case, the waiter bit is incorrect 1300 * since it is still set to 1, set above. 1301 * OR an lwp_release() did occur for some other lwp on 1302 * the same lwpchan. In this case, the waiter bit is 1303 * correct. But which event occurred, one can't tell. 1304 * So, recompute. 1305 */ 1306 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1307 locked = 1; 1308 sqh = lwpsqhash(&lwpchan); 1309 disp_lock_enter(&sqh->sq_lock); 1310 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1311 disp_lock_exit(&sqh->sq_lock); 1312 break; 1313 } 1314 lwp->lwp_asleep = 0; 1315 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1316 S_WRITE); 1317 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1318 locked = 1; 1319 fuword8_noerr(&lp->mutex_waiters, &waiters); 1320 suword8_noerr(&lp->mutex_waiters, 1); 1321 if (type & LOCK_ROBUST) { 1322 fuword16_noerr(&lp->mutex_flag, &flag); 1323 if (flag & LOCK_NOTRECOVERABLE) { 1324 error = ENOTRECOVERABLE; 1325 break; 1326 } 1327 } 1328 } 1329 1330 if (t->t_mstate == LMS_USER_LOCK) 1331 (void) new_mstate(t, LMS_SYSTEM); 1332 1333 if (error == 0) { 1334 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0); 1335 if (type & LOCK_ROBUST) { 1336 fuword16_noerr(&lp->mutex_flag, &flag); 1337 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1338 if (flag & LOCK_OWNERDEAD) 1339 error = EOWNERDEAD; 1340 else if (type & USYNC_PROCESS_ROBUST) 1341 error = ELOCKUNMAPPED; 1342 else 1343 error = EOWNERDEAD; 1344 } 1345 } 1346 } 1347 suword8_noerr(&lp->mutex_waiters, waiters); 1348 locked = 0; 1349 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1350 out: 1351 no_fault(); 1352 if (watched) 1353 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1354 if (tsp && !time_error) /* copyout the residual time left */ 1355 error = lwp_timer_copyout(&lwpt, error); 1356 if (error) 1357 return (set_errno(error)); 1358 return (0); 1359 } 1360 1361 /* 1362 * Obsolete lwp_mutex_lock() interface, no longer called from libc. 1363 * libc now calls lwp_mutex_timedlock(lp, NULL, NULL). 1364 * This system call trap continues to exist solely for the benefit 1365 * of old statically-linked binaries from Solaris 9 and before. 1366 * It should be removed from the system when we no longer care 1367 * about such applications. 1368 */ 1369 int 1370 lwp_mutex_lock(lwp_mutex_t *lp) 1371 { 1372 return (lwp_mutex_timedlock(lp, NULL, NULL)); 1373 } 1374 1375 static int 1376 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1377 { 1378 /* 1379 * The caller holds the dispatcher lock on the sleep queue. 1380 */ 1381 while (t != NULL) { 1382 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1383 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1384 return (1); 1385 t = t->t_link; 1386 } 1387 return (0); 1388 } 1389 1390 /* 1391 * Return the highest priority thread sleeping on this lwpchan. 1392 */ 1393 static kthread_t * 1394 lwp_queue_waiter(lwpchan_t *lwpchan) 1395 { 1396 sleepq_head_t *sqh; 1397 kthread_t *tp; 1398 1399 sqh = lwpsqhash(lwpchan); 1400 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1401 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1402 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1403 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1404 break; 1405 } 1406 disp_lock_exit(&sqh->sq_lock); 1407 return (tp); 1408 } 1409 1410 static int 1411 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1412 { 1413 sleepq_head_t *sqh; 1414 kthread_t *tp; 1415 kthread_t **tpp; 1416 1417 sqh = lwpsqhash(lwpchan); 1418 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1419 tpp = &sqh->sq_queue.sq_first; 1420 while ((tp = *tpp) != NULL) { 1421 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1422 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1423 /* 1424 * The following is typically false. It could be true 1425 * only if lwp_release() is called from 1426 * lwp_mutex_wakeup() after reading the waiters field 1427 * from memory in which the lwp lock used to be, but has 1428 * since been re-used to hold a lwp cv or lwp semaphore. 1429 * The thread "tp" found to match the lwp lock's wchan 1430 * is actually sleeping for the cv or semaphore which 1431 * now has the same wchan. In this case, lwp_release() 1432 * should return failure. 1433 */ 1434 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1435 ASSERT(sync_type == 0); 1436 /* 1437 * assert that this can happen only for mutexes 1438 * i.e. sync_type == 0, for correctly written 1439 * user programs. 1440 */ 1441 disp_lock_exit(&sqh->sq_lock); 1442 return (0); 1443 } 1444 *waiters = iswanted(tp->t_link, lwpchan); 1445 sleepq_unlink(tpp, tp); 1446 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1447 tp->t_wchan0 = NULL; 1448 tp->t_wchan = NULL; 1449 tp->t_sobj_ops = NULL; 1450 tp->t_release = 1; 1451 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1452 CL_WAKEUP(tp); 1453 thread_unlock(tp); /* drop run queue lock */ 1454 return (1); 1455 } 1456 tpp = &tp->t_link; 1457 } 1458 *waiters = 0; 1459 disp_lock_exit(&sqh->sq_lock); 1460 return (0); 1461 } 1462 1463 static void 1464 lwp_release_all(lwpchan_t *lwpchan) 1465 { 1466 sleepq_head_t *sqh; 1467 kthread_t *tp; 1468 kthread_t **tpp; 1469 1470 sqh = lwpsqhash(lwpchan); 1471 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1472 tpp = &sqh->sq_queue.sq_first; 1473 while ((tp = *tpp) != NULL) { 1474 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1475 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1476 sleepq_unlink(tpp, tp); 1477 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1478 tp->t_wchan0 = NULL; 1479 tp->t_wchan = NULL; 1480 tp->t_sobj_ops = NULL; 1481 CL_WAKEUP(tp); 1482 thread_unlock_high(tp); /* release run queue lock */ 1483 } else { 1484 tpp = &tp->t_link; 1485 } 1486 } 1487 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1488 } 1489 1490 /* 1491 * unblock a lwp that is trying to acquire this mutex. the blocked 1492 * lwp resumes and retries to acquire the lock. 1493 */ 1494 int 1495 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1496 { 1497 proc_t *p = ttoproc(curthread); 1498 lwpchan_t lwpchan; 1499 uchar_t waiters; 1500 volatile int locked = 0; 1501 volatile int watched = 0; 1502 volatile uint8_t type = 0; 1503 label_t ljb; 1504 int error = 0; 1505 1506 if ((caddr_t)lp >= p->p_as->a_userlimit) 1507 return (set_errno(EFAULT)); 1508 1509 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1510 1511 if (on_fault(&ljb)) { 1512 if (locked) 1513 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1514 error = EFAULT; 1515 goto out; 1516 } 1517 /* 1518 * Force Copy-on-write if necessary and ensure that the 1519 * synchronization object resides in read/write memory. 1520 * Cause an EFAULT return now if this is not so. 1521 */ 1522 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1523 suword8_noerr(&lp->mutex_type, type); 1524 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1525 &lwpchan, LWPCHAN_MPPOOL)) { 1526 error = EFAULT; 1527 goto out; 1528 } 1529 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1530 locked = 1; 1531 /* 1532 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1533 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1534 * may fail. If it fails, do not write into the waiter bit. 1535 * The call to lwp_release() might fail due to one of three reasons: 1536 * 1537 * 1. due to the thread which set the waiter bit not actually 1538 * sleeping since it got the lock on the re-try. The waiter 1539 * bit will then be correctly updated by that thread. This 1540 * window may be closed by reading the wait bit again here 1541 * and not calling lwp_release() at all if it is zero. 1542 * 2. the thread which set the waiter bit and went to sleep 1543 * was woken up by a signal. This time, the waiter recomputes 1544 * the wait bit in the return with EINTR code. 1545 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1546 * memory that has been re-used after the lock was dropped. 1547 * In this case, writing into the waiter bit would cause data 1548 * corruption. 1549 */ 1550 if (release_all) 1551 lwp_release_all(&lwpchan); 1552 else if (lwp_release(&lwpchan, &waiters, 0)) 1553 suword8_noerr(&lp->mutex_waiters, waiters); 1554 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1555 out: 1556 no_fault(); 1557 if (watched) 1558 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1559 if (error) 1560 return (set_errno(error)); 1561 return (0); 1562 } 1563 1564 /* 1565 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1566 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1567 * a flag telling the kernel whether or not to honor the kernel/user 1568 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1569 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1570 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1571 * it is used an an in/out parameter. On entry, it contains the relative 1572 * time until timeout. On exit, we copyout the residual time left to it. 1573 */ 1574 int 1575 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1576 { 1577 kthread_t *t = curthread; 1578 klwp_t *lwp = ttolwp(t); 1579 proc_t *p = ttoproc(t); 1580 lwp_timer_t lwpt; 1581 lwpchan_t cv_lwpchan; 1582 lwpchan_t m_lwpchan; 1583 caddr_t timedwait; 1584 volatile uint16_t type = 0; 1585 volatile uint8_t mtype = 0; 1586 uchar_t waiters; 1587 volatile int error; 1588 clock_t tim = -1; 1589 volatile int locked = 0; 1590 volatile int m_locked = 0; 1591 volatile int cvwatched = 0; 1592 volatile int mpwatched = 0; 1593 label_t ljb; 1594 volatile int no_lwpchan = 1; 1595 int imm_timeout = 0; 1596 int imm_unpark = 0; 1597 1598 if ((caddr_t)cv >= p->p_as->a_userlimit || 1599 (caddr_t)mp >= p->p_as->a_userlimit) 1600 return (set_errno(EFAULT)); 1601 1602 /* 1603 * Put the lwp in an orderly state for debugging, 1604 * in case we are stopped while sleeping, below. 1605 */ 1606 prstop(PR_REQUESTED, 0); 1607 1608 timedwait = (caddr_t)tsp; 1609 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1610 return (set_errno(error)); 1611 if (lwpt.lwpt_imm_timeout) { 1612 imm_timeout = 1; 1613 timedwait = NULL; 1614 } 1615 1616 (void) new_mstate(t, LMS_USER_LOCK); 1617 1618 if (on_fault(&ljb)) { 1619 if (no_lwpchan) { 1620 error = EFAULT; 1621 goto out; 1622 } 1623 if (m_locked) { 1624 m_locked = 0; 1625 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1626 } 1627 if (locked) { 1628 locked = 0; 1629 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1630 } 1631 /* 1632 * set up another on_fault() for a possible fault 1633 * on the user lock accessed at "efault" 1634 */ 1635 if (on_fault(&ljb)) { 1636 if (m_locked) { 1637 m_locked = 0; 1638 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1639 } 1640 goto out; 1641 } 1642 error = EFAULT; 1643 goto efault; 1644 } 1645 1646 /* 1647 * Force Copy-on-write if necessary and ensure that the 1648 * synchronization object resides in read/write memory. 1649 * Cause an EFAULT return now if this is not so. 1650 */ 1651 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1652 suword8_noerr(&mp->mutex_type, mtype); 1653 if (UPIMUTEX(mtype) == 0) { 1654 /* convert user level mutex, "mp", to a unique lwpchan */ 1655 /* check if mtype is ok to use below, instead of type from cv */ 1656 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1657 &m_lwpchan, LWPCHAN_MPPOOL)) { 1658 error = EFAULT; 1659 goto out; 1660 } 1661 } 1662 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1663 suword16_noerr(&cv->cond_type, type); 1664 /* convert user level condition variable, "cv", to a unique lwpchan */ 1665 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1666 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1667 error = EFAULT; 1668 goto out; 1669 } 1670 no_lwpchan = 0; 1671 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1672 if (UPIMUTEX(mtype) == 0) 1673 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1674 S_WRITE); 1675 1676 /* 1677 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1678 * with respect to a possible wakeup which is a result of either 1679 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1680 * 1681 * What's misleading, is that the lwp is put to sleep after the 1682 * condition variable's mutex is released. This is OK as long as 1683 * the release operation is also done while holding lwpchan_lock. 1684 * The lwp is then put to sleep when the possibility of pagefaulting 1685 * or sleeping is completely eliminated. 1686 */ 1687 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1688 locked = 1; 1689 if (UPIMUTEX(mtype) == 0) { 1690 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1691 m_locked = 1; 1692 suword8_noerr(&cv->cond_waiters_kernel, 1); 1693 /* 1694 * unlock the condition variable's mutex. (pagefaults are 1695 * possible here.) 1696 */ 1697 set_owner_pid(mp, 0, 0); 1698 ulock_clear(&mp->mutex_lockw); 1699 fuword8_noerr(&mp->mutex_waiters, &waiters); 1700 if (waiters != 0) { 1701 /* 1702 * Given the locking of lwpchan_lock around the release 1703 * of the mutex and checking for waiters, the following 1704 * call to lwp_release() can fail ONLY if the lock 1705 * acquirer is interrupted after setting the waiter bit, 1706 * calling lwp_block() and releasing lwpchan_lock. 1707 * In this case, it could get pulled off the lwp sleep 1708 * q (via setrun()) before the following call to 1709 * lwp_release() occurs. In this case, the lock 1710 * requestor will update the waiter bit correctly by 1711 * re-evaluating it. 1712 */ 1713 if (lwp_release(&m_lwpchan, &waiters, 0)) 1714 suword8_noerr(&mp->mutex_waiters, waiters); 1715 } 1716 m_locked = 0; 1717 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1718 } else { 1719 suword8_noerr(&cv->cond_waiters_kernel, 1); 1720 error = lwp_upimutex_unlock(mp, mtype); 1721 if (error) { /* if the upimutex unlock failed */ 1722 locked = 0; 1723 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1724 goto out; 1725 } 1726 } 1727 no_fault(); 1728 1729 if (mpwatched) { 1730 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1731 mpwatched = 0; 1732 } 1733 if (cvwatched) { 1734 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1735 cvwatched = 0; 1736 } 1737 1738 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1739 /* 1740 * We received a signal at user-level before calling here 1741 * or another thread wants us to return immediately 1742 * with EINTR. See lwp_unpark(). 1743 */ 1744 imm_unpark = 1; 1745 t->t_unpark = 0; 1746 timedwait = NULL; 1747 } else if (timedwait) { 1748 /* 1749 * If we successfully queue the timeout, 1750 * then don't drop t_delay_lock until 1751 * we are on the sleep queue (below). 1752 */ 1753 mutex_enter(&t->t_delay_lock); 1754 if (lwp_timer_enqueue(&lwpt) != 0) { 1755 mutex_exit(&t->t_delay_lock); 1756 imm_timeout = 1; 1757 timedwait = NULL; 1758 } 1759 } 1760 t->t_flag |= T_WAITCVSEM; 1761 lwp_block(&cv_lwpchan); 1762 /* 1763 * Nothing should happen to cause the lwp to go to sleep 1764 * until after it returns from swtch(). 1765 */ 1766 if (timedwait) 1767 mutex_exit(&t->t_delay_lock); 1768 locked = 0; 1769 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1770 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1771 (imm_timeout | imm_unpark)) 1772 setrun(t); 1773 swtch(); 1774 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1775 if (timedwait) 1776 tim = lwp_timer_dequeue(&lwpt); 1777 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1778 MUSTRETURN(p, t) || imm_unpark) 1779 error = EINTR; 1780 else if (imm_timeout || (timedwait && tim == -1)) 1781 error = ETIME; 1782 lwp->lwp_asleep = 0; 1783 lwp->lwp_sysabort = 0; 1784 setallwatch(); 1785 1786 if (t->t_mstate == LMS_USER_LOCK) 1787 (void) new_mstate(t, LMS_SYSTEM); 1788 1789 if (tsp && check_park) /* copyout the residual time left */ 1790 error = lwp_timer_copyout(&lwpt, error); 1791 1792 /* the mutex is reacquired by the caller on return to user level */ 1793 if (error) { 1794 /* 1795 * If we were concurrently lwp_cond_signal()d and we 1796 * received a UNIX signal or got a timeout, then perform 1797 * another lwp_cond_signal() to avoid consuming the wakeup. 1798 */ 1799 if (t->t_release) 1800 (void) lwp_cond_signal(cv); 1801 return (set_errno(error)); 1802 } 1803 return (0); 1804 1805 efault: 1806 /* 1807 * make sure that the user level lock is dropped before 1808 * returning to caller, since the caller always re-acquires it. 1809 */ 1810 if (UPIMUTEX(mtype) == 0) { 1811 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1812 m_locked = 1; 1813 set_owner_pid(mp, 0, 0); 1814 ulock_clear(&mp->mutex_lockw); 1815 fuword8_noerr(&mp->mutex_waiters, &waiters); 1816 if (waiters != 0) { 1817 /* 1818 * See comment above on lock clearing and lwp_release() 1819 * success/failure. 1820 */ 1821 if (lwp_release(&m_lwpchan, &waiters, 0)) 1822 suword8_noerr(&mp->mutex_waiters, waiters); 1823 } 1824 m_locked = 0; 1825 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1826 } else { 1827 (void) lwp_upimutex_unlock(mp, mtype); 1828 } 1829 out: 1830 no_fault(); 1831 if (mpwatched) 1832 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1833 if (cvwatched) 1834 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1835 if (t->t_mstate == LMS_USER_LOCK) 1836 (void) new_mstate(t, LMS_SYSTEM); 1837 return (set_errno(error)); 1838 } 1839 1840 /* 1841 * wakeup one lwp that's blocked on this condition variable. 1842 */ 1843 int 1844 lwp_cond_signal(lwp_cond_t *cv) 1845 { 1846 proc_t *p = ttoproc(curthread); 1847 lwpchan_t lwpchan; 1848 uchar_t waiters; 1849 volatile uint16_t type = 0; 1850 volatile int locked = 0; 1851 volatile int watched = 0; 1852 label_t ljb; 1853 int error = 0; 1854 1855 if ((caddr_t)cv >= p->p_as->a_userlimit) 1856 return (set_errno(EFAULT)); 1857 1858 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1859 1860 if (on_fault(&ljb)) { 1861 if (locked) 1862 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1863 error = EFAULT; 1864 goto out; 1865 } 1866 /* 1867 * Force Copy-on-write if necessary and ensure that the 1868 * synchronization object resides in read/write memory. 1869 * Cause an EFAULT return now if this is not so. 1870 */ 1871 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1872 suword16_noerr(&cv->cond_type, type); 1873 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1874 &lwpchan, LWPCHAN_CVPOOL)) { 1875 error = EFAULT; 1876 goto out; 1877 } 1878 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1879 locked = 1; 1880 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1881 if (waiters != 0) { 1882 /* 1883 * The following call to lwp_release() might fail but it is 1884 * OK to write into the waiters bit below, since the memory 1885 * could not have been re-used or unmapped (for correctly 1886 * written user programs) as in the case of lwp_mutex_wakeup(). 1887 * For an incorrect program, we should not care about data 1888 * corruption since this is just one instance of other places 1889 * where corruption can occur for such a program. Of course 1890 * if the memory is unmapped, normal fault recovery occurs. 1891 */ 1892 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1893 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1894 } 1895 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1896 out: 1897 no_fault(); 1898 if (watched) 1899 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1900 if (error) 1901 return (set_errno(error)); 1902 return (0); 1903 } 1904 1905 /* 1906 * wakeup every lwp that's blocked on this condition variable. 1907 */ 1908 int 1909 lwp_cond_broadcast(lwp_cond_t *cv) 1910 { 1911 proc_t *p = ttoproc(curthread); 1912 lwpchan_t lwpchan; 1913 volatile uint16_t type = 0; 1914 volatile int locked = 0; 1915 volatile int watched = 0; 1916 label_t ljb; 1917 uchar_t waiters; 1918 int error = 0; 1919 1920 if ((caddr_t)cv >= p->p_as->a_userlimit) 1921 return (set_errno(EFAULT)); 1922 1923 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1924 1925 if (on_fault(&ljb)) { 1926 if (locked) 1927 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1928 error = EFAULT; 1929 goto out; 1930 } 1931 /* 1932 * Force Copy-on-write if necessary and ensure that the 1933 * synchronization object resides in read/write memory. 1934 * Cause an EFAULT return now if this is not so. 1935 */ 1936 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1937 suword16_noerr(&cv->cond_type, type); 1938 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1939 &lwpchan, LWPCHAN_CVPOOL)) { 1940 error = EFAULT; 1941 goto out; 1942 } 1943 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1944 locked = 1; 1945 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1946 if (waiters != 0) { 1947 lwp_release_all(&lwpchan); 1948 suword8_noerr(&cv->cond_waiters_kernel, 0); 1949 } 1950 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1951 out: 1952 no_fault(); 1953 if (watched) 1954 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1955 if (error) 1956 return (set_errno(error)); 1957 return (0); 1958 } 1959 1960 int 1961 lwp_sema_trywait(lwp_sema_t *sp) 1962 { 1963 kthread_t *t = curthread; 1964 proc_t *p = ttoproc(t); 1965 label_t ljb; 1966 volatile int locked = 0; 1967 volatile int watched = 0; 1968 volatile uint16_t type = 0; 1969 int count; 1970 lwpchan_t lwpchan; 1971 uchar_t waiters; 1972 int error = 0; 1973 1974 if ((caddr_t)sp >= p->p_as->a_userlimit) 1975 return (set_errno(EFAULT)); 1976 1977 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1978 1979 if (on_fault(&ljb)) { 1980 if (locked) 1981 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1982 error = EFAULT; 1983 goto out; 1984 } 1985 /* 1986 * Force Copy-on-write if necessary and ensure that the 1987 * synchronization object resides in read/write memory. 1988 * Cause an EFAULT return now if this is not so. 1989 */ 1990 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1991 suword16_noerr((void *)&sp->sema_type, type); 1992 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1993 &lwpchan, LWPCHAN_CVPOOL)) { 1994 error = EFAULT; 1995 goto out; 1996 } 1997 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1998 locked = 1; 1999 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2000 if (count == 0) 2001 error = EBUSY; 2002 else 2003 suword32_noerr((void *)&sp->sema_count, --count); 2004 if (count != 0) { 2005 fuword8_noerr(&sp->sema_waiters, &waiters); 2006 if (waiters != 0) { 2007 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2008 suword8_noerr(&sp->sema_waiters, waiters); 2009 } 2010 } 2011 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2012 out: 2013 no_fault(); 2014 if (watched) 2015 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2016 if (error) 2017 return (set_errno(error)); 2018 return (0); 2019 } 2020 2021 /* 2022 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 2023 */ 2024 int 2025 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2026 { 2027 kthread_t *t = curthread; 2028 klwp_t *lwp = ttolwp(t); 2029 proc_t *p = ttoproc(t); 2030 lwp_timer_t lwpt; 2031 caddr_t timedwait; 2032 clock_t tim = -1; 2033 label_t ljb; 2034 volatile int locked = 0; 2035 volatile int watched = 0; 2036 volatile uint16_t type = 0; 2037 int count; 2038 lwpchan_t lwpchan; 2039 uchar_t waiters; 2040 int error = 0; 2041 int time_error; 2042 int imm_timeout = 0; 2043 int imm_unpark = 0; 2044 2045 if ((caddr_t)sp >= p->p_as->a_userlimit) 2046 return (set_errno(EFAULT)); 2047 2048 /* 2049 * Put the lwp in an orderly state for debugging, 2050 * in case we are stopped while sleeping, below. 2051 */ 2052 prstop(PR_REQUESTED, 0); 2053 2054 timedwait = (caddr_t)tsp; 2055 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2056 lwpt.lwpt_imm_timeout) { 2057 imm_timeout = 1; 2058 timedwait = NULL; 2059 } 2060 2061 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2062 2063 if (on_fault(&ljb)) { 2064 if (locked) 2065 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2066 error = EFAULT; 2067 goto out; 2068 } 2069 /* 2070 * Force Copy-on-write if necessary and ensure that the 2071 * synchronization object resides in read/write memory. 2072 * Cause an EFAULT return now if this is not so. 2073 */ 2074 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2075 suword16_noerr((void *)&sp->sema_type, type); 2076 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2077 &lwpchan, LWPCHAN_CVPOOL)) { 2078 error = EFAULT; 2079 goto out; 2080 } 2081 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2082 locked = 1; 2083 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2084 while (error == 0 && count == 0) { 2085 if (time_error) { 2086 /* 2087 * The SUSV3 Posix spec is very clear that we 2088 * should get no error from validating the 2089 * timer until we would actually sleep. 2090 */ 2091 error = time_error; 2092 break; 2093 } 2094 suword8_noerr(&sp->sema_waiters, 1); 2095 if (watched) 2096 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2097 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2098 /* 2099 * We received a signal at user-level before calling 2100 * here or another thread wants us to return 2101 * immediately with EINTR. See lwp_unpark(). 2102 */ 2103 imm_unpark = 1; 2104 t->t_unpark = 0; 2105 timedwait = NULL; 2106 } else if (timedwait) { 2107 /* 2108 * If we successfully queue the timeout, 2109 * then don't drop t_delay_lock until 2110 * we are on the sleep queue (below). 2111 */ 2112 mutex_enter(&t->t_delay_lock); 2113 if (lwp_timer_enqueue(&lwpt) != 0) { 2114 mutex_exit(&t->t_delay_lock); 2115 imm_timeout = 1; 2116 timedwait = NULL; 2117 } 2118 } 2119 t->t_flag |= T_WAITCVSEM; 2120 lwp_block(&lwpchan); 2121 /* 2122 * Nothing should happen to cause the lwp to sleep 2123 * again until after it returns from swtch(). 2124 */ 2125 if (timedwait) 2126 mutex_exit(&t->t_delay_lock); 2127 locked = 0; 2128 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2129 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2130 (imm_timeout | imm_unpark)) 2131 setrun(t); 2132 swtch(); 2133 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2134 if (timedwait) 2135 tim = lwp_timer_dequeue(&lwpt); 2136 setallwatch(); 2137 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2138 MUSTRETURN(p, t) || imm_unpark) 2139 error = EINTR; 2140 else if (imm_timeout || (timedwait && tim == -1)) 2141 error = ETIME; 2142 lwp->lwp_asleep = 0; 2143 lwp->lwp_sysabort = 0; 2144 watched = watch_disable_addr((caddr_t)sp, 2145 sizeof (*sp), S_WRITE); 2146 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2147 locked = 1; 2148 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2149 } 2150 if (error == 0) 2151 suword32_noerr((void *)&sp->sema_count, --count); 2152 if (count != 0) { 2153 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2154 suword8_noerr(&sp->sema_waiters, waiters); 2155 } 2156 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2157 out: 2158 no_fault(); 2159 if (watched) 2160 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2161 if (tsp && check_park && !time_error) 2162 error = lwp_timer_copyout(&lwpt, error); 2163 if (error) 2164 return (set_errno(error)); 2165 return (0); 2166 } 2167 2168 /* 2169 * Obsolete lwp_sema_wait() interface, no longer called from libc. 2170 * libc now calls lwp_sema_timedwait(). 2171 * This system call trap exists solely for the benefit of old 2172 * statically linked applications from Solaris 9 and before. 2173 * It should be removed when we no longer care about such applications. 2174 */ 2175 int 2176 lwp_sema_wait(lwp_sema_t *sp) 2177 { 2178 return (lwp_sema_timedwait(sp, NULL, 0)); 2179 } 2180 2181 int 2182 lwp_sema_post(lwp_sema_t *sp) 2183 { 2184 proc_t *p = ttoproc(curthread); 2185 label_t ljb; 2186 volatile int locked = 0; 2187 volatile int watched = 0; 2188 volatile uint16_t type = 0; 2189 int count; 2190 lwpchan_t lwpchan; 2191 uchar_t waiters; 2192 int error = 0; 2193 2194 if ((caddr_t)sp >= p->p_as->a_userlimit) 2195 return (set_errno(EFAULT)); 2196 2197 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2198 2199 if (on_fault(&ljb)) { 2200 if (locked) 2201 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2202 error = EFAULT; 2203 goto out; 2204 } 2205 /* 2206 * Force Copy-on-write if necessary and ensure that the 2207 * synchronization object resides in read/write memory. 2208 * Cause an EFAULT return now if this is not so. 2209 */ 2210 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2211 suword16_noerr(&sp->sema_type, type); 2212 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2213 &lwpchan, LWPCHAN_CVPOOL)) { 2214 error = EFAULT; 2215 goto out; 2216 } 2217 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2218 locked = 1; 2219 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2220 if (count == _SEM_VALUE_MAX) 2221 error = EOVERFLOW; 2222 else 2223 suword32_noerr(&sp->sema_count, ++count); 2224 if (count == 1) { 2225 fuword8_noerr(&sp->sema_waiters, &waiters); 2226 if (waiters) { 2227 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2228 suword8_noerr(&sp->sema_waiters, waiters); 2229 } 2230 } 2231 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2232 out: 2233 no_fault(); 2234 if (watched) 2235 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2236 if (error) 2237 return (set_errno(error)); 2238 return (0); 2239 } 2240 2241 #define TRW_WANT_WRITE 0x1 2242 #define TRW_LOCK_GRANTED 0x2 2243 2244 #define READ_LOCK 0 2245 #define WRITE_LOCK 1 2246 #define TRY_FLAG 0x10 2247 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2248 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2249 2250 /* 2251 * Release one writer or one or more readers. Compute the rwstate word to 2252 * reflect the new state of the queue. For a safe hand-off we copy the new 2253 * rwstate value back to userland before we wake any of the new lock holders. 2254 * 2255 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2256 * being given precedence over readers of the same priority). 2257 * 2258 * If the first thread is a reader we scan the queue releasing all readers 2259 * until we hit a writer or the end of the queue. If the first thread is a 2260 * writer we still need to check for another writer. 2261 */ 2262 void 2263 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2264 { 2265 sleepq_head_t *sqh; 2266 kthread_t *tp; 2267 kthread_t **tpp; 2268 kthread_t *tpnext; 2269 kthread_t *wakelist = NULL; 2270 uint32_t rwstate = 0; 2271 int wcount = 0; 2272 int rcount = 0; 2273 2274 sqh = lwpsqhash(lwpchan); 2275 disp_lock_enter(&sqh->sq_lock); 2276 tpp = &sqh->sq_queue.sq_first; 2277 while ((tp = *tpp) != NULL) { 2278 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2279 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2280 if (tp->t_writer & TRW_WANT_WRITE) { 2281 if ((wcount++ == 0) && (rcount == 0)) { 2282 rwstate |= URW_WRITE_LOCKED; 2283 2284 /* Just one writer to wake. */ 2285 sleepq_unlink(tpp, tp); 2286 wakelist = tp; 2287 2288 /* tpp already set for next thread. */ 2289 continue; 2290 } else { 2291 rwstate |= URW_HAS_WAITERS; 2292 /* We need look no further. */ 2293 break; 2294 } 2295 } else { 2296 rcount++; 2297 if (wcount == 0) { 2298 rwstate++; 2299 2300 /* Add reader to wake list. */ 2301 sleepq_unlink(tpp, tp); 2302 tp->t_link = wakelist; 2303 wakelist = tp; 2304 2305 /* tpp already set for next thread. */ 2306 continue; 2307 } else { 2308 rwstate |= URW_HAS_WAITERS; 2309 /* We need look no further. */ 2310 break; 2311 } 2312 } 2313 } 2314 tpp = &tp->t_link; 2315 } 2316 2317 /* Copy the new rwstate back to userland. */ 2318 suword32_noerr(&rw->rwlock_readers, rwstate); 2319 2320 /* Wake the new lock holder(s) up. */ 2321 tp = wakelist; 2322 while (tp != NULL) { 2323 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2324 tp->t_wchan0 = NULL; 2325 tp->t_wchan = NULL; 2326 tp->t_sobj_ops = NULL; 2327 tp->t_writer |= TRW_LOCK_GRANTED; 2328 tpnext = tp->t_link; 2329 tp->t_link = NULL; 2330 CL_WAKEUP(tp); 2331 thread_unlock_high(tp); 2332 tp = tpnext; 2333 } 2334 2335 disp_lock_exit(&sqh->sq_lock); 2336 } 2337 2338 /* 2339 * We enter here holding the user-level mutex, which we must release before 2340 * returning or blocking. Based on lwp_cond_wait(). 2341 */ 2342 static int 2343 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2344 { 2345 lwp_mutex_t *mp = NULL; 2346 kthread_t *t = curthread; 2347 kthread_t *tp; 2348 klwp_t *lwp = ttolwp(t); 2349 proc_t *p = ttoproc(t); 2350 lwp_timer_t lwpt; 2351 lwpchan_t lwpchan; 2352 lwpchan_t mlwpchan; 2353 caddr_t timedwait; 2354 volatile uint16_t type = 0; 2355 volatile uint8_t mtype = 0; 2356 uchar_t mwaiters; 2357 volatile int error = 0; 2358 int time_error; 2359 clock_t tim = -1; 2360 volatile int locked = 0; 2361 volatile int mlocked = 0; 2362 volatile int watched = 0; 2363 volatile int mwatched = 0; 2364 label_t ljb; 2365 volatile int no_lwpchan = 1; 2366 int imm_timeout = 0; 2367 int try_flag; 2368 uint32_t rwstate; 2369 int acquired = 0; 2370 2371 /* We only check rw because the mutex is included in it. */ 2372 if ((caddr_t)rw >= p->p_as->a_userlimit) 2373 return (set_errno(EFAULT)); 2374 2375 /* 2376 * Put the lwp in an orderly state for debugging, 2377 * in case we are stopped while sleeping, below. 2378 */ 2379 prstop(PR_REQUESTED, 0); 2380 2381 /* We must only report this error if we are about to sleep (later). */ 2382 timedwait = (caddr_t)tsp; 2383 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2384 lwpt.lwpt_imm_timeout) { 2385 imm_timeout = 1; 2386 timedwait = NULL; 2387 } 2388 2389 (void) new_mstate(t, LMS_USER_LOCK); 2390 2391 if (on_fault(&ljb)) { 2392 if (no_lwpchan) { 2393 error = EFAULT; 2394 goto out_nodrop; 2395 } 2396 if (mlocked) { 2397 mlocked = 0; 2398 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2399 } 2400 if (locked) { 2401 locked = 0; 2402 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2403 } 2404 /* 2405 * Set up another on_fault() for a possible fault 2406 * on the user lock accessed at "out_drop". 2407 */ 2408 if (on_fault(&ljb)) { 2409 if (mlocked) { 2410 mlocked = 0; 2411 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2412 } 2413 error = EFAULT; 2414 goto out_nodrop; 2415 } 2416 error = EFAULT; 2417 goto out_nodrop; 2418 } 2419 2420 /* Process rd_wr (including sanity check). */ 2421 try_flag = (rd_wr & TRY_FLAG); 2422 rd_wr &= ~TRY_FLAG; 2423 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2424 error = EINVAL; 2425 goto out_nodrop; 2426 } 2427 2428 /* 2429 * Force Copy-on-write if necessary and ensure that the 2430 * synchronization object resides in read/write memory. 2431 * Cause an EFAULT return now if this is not so. 2432 */ 2433 mp = &rw->mutex; 2434 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2435 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2436 suword8_noerr(&mp->mutex_type, mtype); 2437 suword16_noerr(&rw->rwlock_type, type); 2438 2439 /* We can only continue for simple USYNC_PROCESS locks. */ 2440 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2441 error = EINVAL; 2442 goto out_nodrop; 2443 } 2444 2445 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2446 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2447 &mlwpchan, LWPCHAN_MPPOOL)) { 2448 error = EFAULT; 2449 goto out_nodrop; 2450 } 2451 2452 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2453 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2454 &lwpchan, LWPCHAN_CVPOOL)) { 2455 error = EFAULT; 2456 goto out_nodrop; 2457 } 2458 2459 no_lwpchan = 0; 2460 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2461 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2462 2463 /* 2464 * lwpchan_lock() ensures that the calling LWP is put to sleep 2465 * atomically with respect to a possible wakeup which is a result 2466 * of lwp_rwlock_unlock(). 2467 * 2468 * What's misleading is that the LWP is put to sleep after the 2469 * rwlock's mutex is released. This is OK as long as the release 2470 * operation is also done while holding mlwpchan. The LWP is then 2471 * put to sleep when the possibility of pagefaulting or sleeping 2472 * has been completely eliminated. 2473 */ 2474 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2475 locked = 1; 2476 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2477 mlocked = 1; 2478 2479 /* 2480 * Fetch the current rwlock state. 2481 * 2482 * The possibility of spurious wake-ups or killed waiters means 2483 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2484 * We only fix these if they are important to us. 2485 * 2486 * Although various error states can be observed here (e.g. the lock 2487 * is not held, but there are waiters) we assume these are applicaton 2488 * errors and so we take no corrective action. 2489 */ 2490 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2491 /* 2492 * We cannot legitimately get here from user-level 2493 * without URW_HAS_WAITERS being set. 2494 * Set it now to guard against user-level error. 2495 */ 2496 rwstate |= URW_HAS_WAITERS; 2497 2498 /* 2499 * We can try only if the lock isn't held by a writer. 2500 */ 2501 if (!(rwstate & URW_WRITE_LOCKED)) { 2502 tp = lwp_queue_waiter(&lwpchan); 2503 if (tp == NULL) { 2504 /* 2505 * Hmmm, rwstate indicates waiters but there are 2506 * none queued. This could just be the result of a 2507 * spurious wakeup, so let's ignore it. 2508 * 2509 * We now have a chance to acquire the lock 2510 * uncontended, but this is the last chance for 2511 * a writer to acquire the lock without blocking. 2512 */ 2513 if (rd_wr == READ_LOCK) { 2514 rwstate++; 2515 acquired = 1; 2516 } else if ((rwstate & URW_READERS_MASK) == 0) { 2517 rwstate |= URW_WRITE_LOCKED; 2518 acquired = 1; 2519 } 2520 } else if (rd_wr == READ_LOCK) { 2521 /* 2522 * This is the last chance for a reader to acquire 2523 * the lock now, but it can only do so if there is 2524 * no writer of equal or greater priority at the 2525 * head of the queue . 2526 * 2527 * It is also just possible that there is a reader 2528 * at the head of the queue. This may be the result 2529 * of a spurious wakeup or an application failure. 2530 * In this case we only acquire the lock if we have 2531 * equal or greater priority. It is not our job to 2532 * release spurious waiters. 2533 */ 2534 pri_t our_pri = DISP_PRIO(t); 2535 pri_t his_pri = DISP_PRIO(tp); 2536 2537 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2538 !(tp->t_writer & TRW_WANT_WRITE))) { 2539 rwstate++; 2540 acquired = 1; 2541 } 2542 } 2543 } 2544 2545 if (acquired || try_flag || time_error) { 2546 /* 2547 * We're not going to block this time. 2548 */ 2549 suword32_noerr(&rw->rwlock_readers, rwstate); 2550 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2551 locked = 0; 2552 2553 if (acquired) { 2554 /* 2555 * Got the lock! 2556 */ 2557 error = 0; 2558 2559 } else if (try_flag) { 2560 /* 2561 * We didn't get the lock and we're about to block. 2562 * If we're doing a trylock, return EBUSY instead. 2563 */ 2564 error = EBUSY; 2565 2566 } else if (time_error) { 2567 /* 2568 * The SUSV3 POSIX spec is very clear that we should 2569 * get no error from validating the timer (above) 2570 * until we would actually sleep. 2571 */ 2572 error = time_error; 2573 } 2574 2575 goto out_drop; 2576 } 2577 2578 /* 2579 * We're about to block, so indicate what kind of waiter we are. 2580 */ 2581 t->t_writer = 0; 2582 if (rd_wr == WRITE_LOCK) 2583 t->t_writer = TRW_WANT_WRITE; 2584 suword32_noerr(&rw->rwlock_readers, rwstate); 2585 2586 /* 2587 * Unlock the rwlock's mutex (pagefaults are possible here). 2588 */ 2589 set_owner_pid(mp, 0, 0); 2590 ulock_clear(&mp->mutex_lockw); 2591 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2592 if (mwaiters != 0) { 2593 /* 2594 * Given the locking of mlwpchan around the release of 2595 * the mutex and checking for waiters, the following 2596 * call to lwp_release() can fail ONLY if the lock 2597 * acquirer is interrupted after setting the waiter bit, 2598 * calling lwp_block() and releasing mlwpchan. 2599 * In this case, it could get pulled off the LWP sleep 2600 * queue (via setrun()) before the following call to 2601 * lwp_release() occurs, and the lock requestor will 2602 * update the waiter bit correctly by re-evaluating it. 2603 */ 2604 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2605 suword8_noerr(&mp->mutex_waiters, mwaiters); 2606 } 2607 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2608 mlocked = 0; 2609 no_fault(); 2610 2611 if (mwatched) { 2612 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2613 mwatched = 0; 2614 } 2615 if (watched) { 2616 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2617 watched = 0; 2618 } 2619 2620 if (timedwait) { 2621 /* 2622 * If we successfully queue the timeout, 2623 * then don't drop t_delay_lock until 2624 * we are on the sleep queue (below). 2625 */ 2626 mutex_enter(&t->t_delay_lock); 2627 if (lwp_timer_enqueue(&lwpt) != 0) { 2628 mutex_exit(&t->t_delay_lock); 2629 imm_timeout = 1; 2630 timedwait = NULL; 2631 } 2632 } 2633 t->t_flag |= T_WAITCVSEM; 2634 lwp_block(&lwpchan); 2635 2636 /* 2637 * Nothing should happen to cause the LWp to go to sleep until after 2638 * it returns from swtch(). 2639 */ 2640 if (timedwait) 2641 mutex_exit(&t->t_delay_lock); 2642 locked = 0; 2643 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2644 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 2645 setrun(t); 2646 swtch(); 2647 2648 /* 2649 * We're back, but we need to work out why. Were we interrupted? Did 2650 * we timeout? Were we granted the lock? 2651 */ 2652 error = EAGAIN; 2653 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2654 t->t_writer = 0; 2655 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2656 if (timedwait) 2657 tim = lwp_timer_dequeue(&lwpt); 2658 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2659 error = EINTR; 2660 else if (imm_timeout || (timedwait && tim == -1)) 2661 error = ETIME; 2662 lwp->lwp_asleep = 0; 2663 lwp->lwp_sysabort = 0; 2664 setallwatch(); 2665 2666 /* 2667 * If we were granted the lock we don't care about EINTR or ETIME. 2668 */ 2669 if (acquired) 2670 error = 0; 2671 2672 if (t->t_mstate == LMS_USER_LOCK) 2673 (void) new_mstate(t, LMS_SYSTEM); 2674 2675 if (error) 2676 return (set_errno(error)); 2677 return (0); 2678 2679 out_drop: 2680 /* 2681 * Make sure that the user level lock is dropped before returning 2682 * to the caller. 2683 */ 2684 if (!mlocked) { 2685 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2686 mlocked = 1; 2687 } 2688 set_owner_pid(mp, 0, 0); 2689 ulock_clear(&mp->mutex_lockw); 2690 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2691 if (mwaiters != 0) { 2692 /* 2693 * See comment above on lock clearing and lwp_release() 2694 * success/failure. 2695 */ 2696 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2697 suword8_noerr(&mp->mutex_waiters, mwaiters); 2698 } 2699 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2700 mlocked = 0; 2701 2702 out_nodrop: 2703 no_fault(); 2704 if (mwatched) 2705 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2706 if (watched) 2707 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2708 if (t->t_mstate == LMS_USER_LOCK) 2709 (void) new_mstate(t, LMS_SYSTEM); 2710 if (error) 2711 return (set_errno(error)); 2712 return (0); 2713 } 2714 2715 /* 2716 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2717 * we never drop the lock. 2718 */ 2719 static int 2720 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2721 { 2722 kthread_t *t = curthread; 2723 proc_t *p = ttoproc(t); 2724 lwpchan_t lwpchan; 2725 volatile uint16_t type = 0; 2726 volatile int error = 0; 2727 volatile int locked = 0; 2728 volatile int watched = 0; 2729 label_t ljb; 2730 volatile int no_lwpchan = 1; 2731 uint32_t rwstate; 2732 2733 /* We only check rw because the mutex is included in it. */ 2734 if ((caddr_t)rw >= p->p_as->a_userlimit) 2735 return (set_errno(EFAULT)); 2736 2737 if (on_fault(&ljb)) { 2738 if (no_lwpchan) { 2739 error = EFAULT; 2740 goto out_nodrop; 2741 } 2742 if (locked) { 2743 locked = 0; 2744 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2745 } 2746 error = EFAULT; 2747 goto out_nodrop; 2748 } 2749 2750 /* 2751 * Force Copy-on-write if necessary and ensure that the 2752 * synchronization object resides in read/write memory. 2753 * Cause an EFAULT return now if this is not so. 2754 */ 2755 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2756 suword16_noerr(&rw->rwlock_type, type); 2757 2758 /* We can only continue for simple USYNC_PROCESS locks. */ 2759 if (type != USYNC_PROCESS) { 2760 error = EINVAL; 2761 goto out_nodrop; 2762 } 2763 2764 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2765 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2766 &lwpchan, LWPCHAN_CVPOOL)) { 2767 error = EFAULT; 2768 goto out_nodrop; 2769 } 2770 2771 no_lwpchan = 0; 2772 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2773 2774 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2775 locked = 1; 2776 2777 /* 2778 * We can resolve multiple readers (except the last reader) here. 2779 * For the last reader or a writer we need lwp_rwlock_release(), 2780 * to which we also delegate the task of copying the new rwstate 2781 * back to userland (see the comment there). 2782 */ 2783 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2784 if (rwstate & URW_WRITE_LOCKED) 2785 lwp_rwlock_release(&lwpchan, rw); 2786 else if ((rwstate & URW_READERS_MASK) > 0) { 2787 rwstate--; 2788 if ((rwstate & URW_READERS_MASK) == 0) 2789 lwp_rwlock_release(&lwpchan, rw); 2790 else 2791 suword32_noerr(&rw->rwlock_readers, rwstate); 2792 } 2793 2794 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2795 locked = 0; 2796 error = 0; 2797 2798 out_nodrop: 2799 no_fault(); 2800 if (watched) 2801 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2802 if (error) 2803 return (set_errno(error)); 2804 return (0); 2805 } 2806 2807 int 2808 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2809 { 2810 switch (subcode) { 2811 case 0: 2812 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2813 case 1: 2814 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2815 case 2: 2816 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2817 case 3: 2818 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2819 case 4: 2820 return (lwp_rwlock_unlock(rwlp)); 2821 } 2822 return (set_errno(EINVAL)); 2823 } 2824 2825 /* 2826 * Return the owner of the user-level s-object. 2827 * Since we can't really do this, return NULL. 2828 */ 2829 /* ARGSUSED */ 2830 static kthread_t * 2831 lwpsobj_owner(caddr_t sobj) 2832 { 2833 return ((kthread_t *)NULL); 2834 } 2835 2836 /* 2837 * Wake up a thread asleep on a user-level synchronization 2838 * object. 2839 */ 2840 static void 2841 lwp_unsleep(kthread_t *t) 2842 { 2843 ASSERT(THREAD_LOCK_HELD(t)); 2844 if (t->t_wchan0 != NULL) { 2845 sleepq_head_t *sqh; 2846 sleepq_t *sqp = t->t_sleepq; 2847 2848 if (sqp != NULL) { 2849 sqh = lwpsqhash(&t->t_lwpchan); 2850 ASSERT(&sqh->sq_queue == sqp); 2851 sleepq_unsleep(t); 2852 disp_lock_exit_high(&sqh->sq_lock); 2853 CL_SETRUN(t); 2854 return; 2855 } 2856 } 2857 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2858 } 2859 2860 /* 2861 * Change the priority of a thread asleep on a user-level 2862 * synchronization object. To maintain proper priority order, 2863 * we: 2864 * o dequeue the thread. 2865 * o change its priority. 2866 * o re-enqueue the thread. 2867 * Assumption: the thread is locked on entry. 2868 */ 2869 static void 2870 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2871 { 2872 ASSERT(THREAD_LOCK_HELD(t)); 2873 if (t->t_wchan0 != NULL) { 2874 sleepq_t *sqp = t->t_sleepq; 2875 2876 sleepq_dequeue(t); 2877 *t_prip = pri; 2878 sleepq_insert(sqp, t); 2879 } else 2880 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2881 } 2882 2883 /* 2884 * Clean up a left-over process-shared robust mutex 2885 */ 2886 static void 2887 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2888 { 2889 uint16_t flag; 2890 uchar_t waiters; 2891 label_t ljb; 2892 pid_t owner_pid; 2893 lwp_mutex_t *lp; 2894 volatile int locked = 0; 2895 volatile int watched = 0; 2896 volatile struct upimutex *upimutex = NULL; 2897 volatile int upilocked = 0; 2898 2899 if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 2900 != (USYNC_PROCESS | LOCK_ROBUST)) 2901 return; 2902 2903 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2904 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2905 if (on_fault(&ljb)) { 2906 if (locked) 2907 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2908 if (upilocked) 2909 upimutex_unlock((upimutex_t *)upimutex, 0); 2910 goto out; 2911 } 2912 2913 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2914 2915 if (UPIMUTEX(ent->lwpchan_type)) { 2916 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2917 upib_t *upibp = &UPI_CHAIN(lwpchan); 2918 2919 if (owner_pid != curproc->p_pid) 2920 goto out; 2921 mutex_enter(&upibp->upib_lock); 2922 upimutex = upi_get(upibp, &lwpchan); 2923 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2924 mutex_exit(&upibp->upib_lock); 2925 goto out; 2926 } 2927 mutex_exit(&upibp->upib_lock); 2928 upilocked = 1; 2929 flag = lwp_clear_mutex(lp, lockflg); 2930 suword8_noerr(&lp->mutex_lockw, 0); 2931 upimutex_unlock((upimutex_t *)upimutex, flag); 2932 } else { 2933 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2934 locked = 1; 2935 /* 2936 * Clear the spinners count because one of our 2937 * threads could have been spinning for this lock 2938 * at user level when the process was suddenly killed. 2939 * There is no harm in this since user-level libc code 2940 * will adapt to the sudden change in the spinner count. 2941 */ 2942 suword8_noerr(&lp->mutex_spinners, 0); 2943 if (owner_pid != curproc->p_pid) { 2944 /* 2945 * We are not the owner. There may or may not be one. 2946 * If there are waiters, we wake up one or all of them. 2947 * It doesn't hurt to wake them up in error since 2948 * they will just retry the lock and go to sleep 2949 * again if necessary. 2950 */ 2951 fuword8_noerr(&lp->mutex_waiters, &waiters); 2952 if (waiters != 0) { /* there are waiters */ 2953 fuword16_noerr(&lp->mutex_flag, &flag); 2954 if (flag & LOCK_NOTRECOVERABLE) { 2955 lwp_release_all(&ent->lwpchan_lwpchan); 2956 suword8_noerr(&lp->mutex_waiters, 0); 2957 } else if (lwp_release(&ent->lwpchan_lwpchan, 2958 &waiters, 0)) { 2959 suword8_noerr(&lp->mutex_waiters, 2960 waiters); 2961 } 2962 } 2963 } else { 2964 /* 2965 * We are the owner. Release it. 2966 */ 2967 (void) lwp_clear_mutex(lp, lockflg); 2968 ulock_clear(&lp->mutex_lockw); 2969 fuword8_noerr(&lp->mutex_waiters, &waiters); 2970 if (waiters && 2971 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2972 suword8_noerr(&lp->mutex_waiters, waiters); 2973 } 2974 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2975 } 2976 out: 2977 no_fault(); 2978 if (watched) 2979 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2980 } 2981 2982 /* 2983 * Register a process-shared robust mutex in the lwpchan cache. 2984 */ 2985 int 2986 lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr) 2987 { 2988 int error = 0; 2989 volatile int watched; 2990 label_t ljb; 2991 uint8_t type; 2992 lwpchan_t lwpchan; 2993 2994 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2995 return (set_errno(EFAULT)); 2996 2997 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2998 2999 if (on_fault(&ljb)) { 3000 error = EFAULT; 3001 } else { 3002 /* 3003 * Force Copy-on-write if necessary and ensure that the 3004 * synchronization object resides in read/write memory. 3005 * Cause an EFAULT return now if this is not so. 3006 */ 3007 fuword8_noerr(&lp->mutex_type, &type); 3008 suword8_noerr(&lp->mutex_type, type); 3009 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 3010 != (USYNC_PROCESS|LOCK_ROBUST)) { 3011 error = EINVAL; 3012 } else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp, 3013 uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) { 3014 error = EFAULT; 3015 } 3016 } 3017 no_fault(); 3018 if (watched) 3019 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3020 if (error) 3021 return (set_errno(error)); 3022 return (0); 3023 } 3024 3025 /* 3026 * There is a user-level robust lock registration in libc. 3027 * Mark it as invalid by storing -1 into the location of the pointer. 3028 */ 3029 static void 3030 lwp_mutex_unregister(void *uaddr) 3031 { 3032 if (get_udatamodel() == DATAMODEL_NATIVE) { 3033 (void) sulword(uaddr, (ulong_t)-1); 3034 #ifdef _SYSCALL32_IMPL 3035 } else { 3036 (void) suword32(uaddr, (uint32_t)-1); 3037 #endif 3038 } 3039 } 3040 3041 int 3042 lwp_mutex_trylock(lwp_mutex_t *lp, uintptr_t owner) 3043 { 3044 kthread_t *t = curthread; 3045 proc_t *p = ttoproc(t); 3046 int error = 0; 3047 volatile int locked = 0; 3048 volatile int watched = 0; 3049 label_t ljb; 3050 volatile uint8_t type = 0; 3051 uint16_t flag; 3052 lwpchan_t lwpchan; 3053 3054 if ((caddr_t)lp >= p->p_as->a_userlimit) 3055 return (set_errno(EFAULT)); 3056 3057 (void) new_mstate(t, LMS_USER_LOCK); 3058 3059 if (on_fault(&ljb)) { 3060 if (locked) 3061 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3062 error = EFAULT; 3063 goto out; 3064 } 3065 /* 3066 * Force Copy-on-write if necessary and ensure that the 3067 * synchronization object resides in read/write memory. 3068 * Cause an EFAULT return now if this is not so. 3069 */ 3070 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3071 suword8_noerr(&lp->mutex_type, type); 3072 if (UPIMUTEX(type)) { 3073 no_fault(); 3074 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3075 if (error == 0 || error == EOWNERDEAD || error == ELOCKUNMAPPED) 3076 set_owner_pid(lp, owner, 3077 (type & USYNC_PROCESS)? p->p_pid : 0); 3078 if (error) 3079 return (set_errno(error)); 3080 return (0); 3081 } 3082 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3083 &lwpchan, LWPCHAN_MPPOOL)) { 3084 error = EFAULT; 3085 goto out; 3086 } 3087 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3088 locked = 1; 3089 if (type & LOCK_ROBUST) { 3090 fuword16_noerr(&lp->mutex_flag, &flag); 3091 if (flag & LOCK_NOTRECOVERABLE) { 3092 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3093 error = ENOTRECOVERABLE; 3094 goto out; 3095 } 3096 } 3097 3098 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3099 3100 if (!ulock_try(&lp->mutex_lockw)) 3101 error = EBUSY; 3102 else { 3103 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0); 3104 if (type & LOCK_ROBUST) { 3105 fuword16_noerr(&lp->mutex_flag, &flag); 3106 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3107 if (flag & LOCK_OWNERDEAD) 3108 error = EOWNERDEAD; 3109 else if (type & USYNC_PROCESS_ROBUST) 3110 error = ELOCKUNMAPPED; 3111 else 3112 error = EOWNERDEAD; 3113 } 3114 } 3115 } 3116 locked = 0; 3117 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3118 out: 3119 3120 if (t->t_mstate == LMS_USER_LOCK) 3121 (void) new_mstate(t, LMS_SYSTEM); 3122 3123 no_fault(); 3124 if (watched) 3125 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3126 if (error) 3127 return (set_errno(error)); 3128 return (0); 3129 } 3130 3131 /* 3132 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3133 * the blocked lwp resumes and retries to acquire the lock. 3134 */ 3135 int 3136 lwp_mutex_unlock(lwp_mutex_t *lp) 3137 { 3138 proc_t *p = ttoproc(curthread); 3139 lwpchan_t lwpchan; 3140 uchar_t waiters; 3141 volatile int locked = 0; 3142 volatile int watched = 0; 3143 volatile uint8_t type = 0; 3144 label_t ljb; 3145 uint16_t flag; 3146 int error = 0; 3147 3148 if ((caddr_t)lp >= p->p_as->a_userlimit) 3149 return (set_errno(EFAULT)); 3150 3151 if (on_fault(&ljb)) { 3152 if (locked) 3153 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3154 error = EFAULT; 3155 goto out; 3156 } 3157 3158 /* 3159 * Force Copy-on-write if necessary and ensure that the 3160 * synchronization object resides in read/write memory. 3161 * Cause an EFAULT return now if this is not so. 3162 */ 3163 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3164 suword8_noerr(&lp->mutex_type, type); 3165 3166 if (UPIMUTEX(type)) { 3167 no_fault(); 3168 error = lwp_upimutex_unlock(lp, type); 3169 if (error) 3170 return (set_errno(error)); 3171 return (0); 3172 } 3173 3174 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3175 3176 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3177 &lwpchan, LWPCHAN_MPPOOL)) { 3178 error = EFAULT; 3179 goto out; 3180 } 3181 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3182 locked = 1; 3183 if (type & LOCK_ROBUST) { 3184 fuword16_noerr(&lp->mutex_flag, &flag); 3185 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3186 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3187 flag |= LOCK_NOTRECOVERABLE; 3188 suword16_noerr(&lp->mutex_flag, flag); 3189 } 3190 } 3191 set_owner_pid(lp, 0, 0); 3192 ulock_clear(&lp->mutex_lockw); 3193 /* 3194 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3195 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3196 * may fail. If it fails, do not write into the waiter bit. 3197 * The call to lwp_release() might fail due to one of three reasons: 3198 * 3199 * 1. due to the thread which set the waiter bit not actually 3200 * sleeping since it got the lock on the re-try. The waiter 3201 * bit will then be correctly updated by that thread. This 3202 * window may be closed by reading the wait bit again here 3203 * and not calling lwp_release() at all if it is zero. 3204 * 2. the thread which set the waiter bit and went to sleep 3205 * was woken up by a signal. This time, the waiter recomputes 3206 * the wait bit in the return with EINTR code. 3207 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3208 * memory that has been re-used after the lock was dropped. 3209 * In this case, writing into the waiter bit would cause data 3210 * corruption. 3211 */ 3212 fuword8_noerr(&lp->mutex_waiters, &waiters); 3213 if (waiters) { 3214 if ((type & LOCK_ROBUST) && 3215 (flag & LOCK_NOTRECOVERABLE)) { 3216 lwp_release_all(&lwpchan); 3217 suword8_noerr(&lp->mutex_waiters, 0); 3218 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3219 suword8_noerr(&lp->mutex_waiters, waiters); 3220 } 3221 } 3222 3223 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3224 out: 3225 no_fault(); 3226 if (watched) 3227 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3228 if (error) 3229 return (set_errno(error)); 3230 return (0); 3231 } 3232