1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2015 Joyent, Inc. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/file.h> 39 #include <sys/proc.h> 40 #include <sys/prsystm.h> 41 #include <sys/kmem.h> 42 #include <sys/sobject.h> 43 #include <sys/fault.h> 44 #include <sys/procfs.h> 45 #include <sys/watchpoint.h> 46 #include <sys/time.h> 47 #include <sys/cmn_err.h> 48 #include <sys/machlock.h> 49 #include <sys/debug.h> 50 #include <sys/synch.h> 51 #include <sys/synch32.h> 52 #include <sys/mman.h> 53 #include <sys/class.h> 54 #include <sys/schedctl.h> 55 #include <sys/sleepq.h> 56 #include <sys/policy.h> 57 #include <sys/lwpchan_impl.h> 58 #include <sys/turnstile.h> 59 #include <sys/atomic.h> 60 #include <sys/lwp_timer_impl.h> 61 #include <sys/lwp_upimutex_impl.h> 62 #include <vm/as.h> 63 #include <sys/sdt.h> 64 65 static kthread_t *lwpsobj_owner(caddr_t); 66 static void lwp_unsleep(kthread_t *t); 67 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 68 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 69 static void lwp_mutex_unregister(void *uaddr); 70 static void set_owner_pid(lwp_mutex_t *, uintptr_t, pid_t); 71 static int iswanted(kthread_t *, lwpchan_t *); 72 73 extern int lwp_cond_signal(lwp_cond_t *cv); 74 75 /* 76 * Maximum number of user prio inheritance locks that can be held by a thread. 77 * Used to limit kmem for each thread. This is a per-thread limit that 78 * can be administered on a system wide basis (using /etc/system). 79 * 80 * Also, when a limit, say maxlwps is added for numbers of lwps within a 81 * process, the per-thread limit automatically becomes a process-wide limit 82 * of maximum number of held upi locks within a process: 83 * maxheldupimx = maxnestupimx * maxlwps; 84 */ 85 static uint32_t maxnestupimx = 2000; 86 87 /* 88 * The sobj_ops vector exports a set of functions needed when a thread 89 * is asleep on a synchronization object of this type. 90 */ 91 static sobj_ops_t lwp_sobj_ops = { 92 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 93 }; 94 95 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 96 97 static sobj_ops_t lwp_sobj_pi_ops = { 98 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 99 turnstile_change_pri 100 }; 101 102 static sleepq_head_t lwpsleepq[NSLEEPQ]; 103 upib_t upimutextab[UPIMUTEX_TABSIZE]; 104 105 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 106 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 107 108 /* 109 * We know that both lc_wchan and lc_wchan0 are addresses that most 110 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 111 * 'pool' is either 0 or 1. 112 */ 113 #define LWPCHAN_LOCK_HASH(X, pool) \ 114 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 115 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 116 117 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 118 119 /* 120 * Is this a POSIX threads user-level lock requiring priority inheritance? 121 */ 122 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 123 124 static sleepq_head_t * 125 lwpsqhash(lwpchan_t *lwpchan) 126 { 127 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 128 return (&lwpsleepq[SQHASHINDEX(x)]); 129 } 130 131 /* 132 * Lock an lwpchan. 133 * Keep this in sync with lwpchan_unlock(), below. 134 */ 135 static void 136 lwpchan_lock(lwpchan_t *lwpchan, int pool) 137 { 138 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 139 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 140 } 141 142 /* 143 * Unlock an lwpchan. 144 * Keep this in sync with lwpchan_lock(), above. 145 */ 146 static void 147 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 148 { 149 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 150 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 151 } 152 153 /* 154 * Delete mappings from the lwpchan cache for pages that are being 155 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 156 * all mappings within the range are deleted from the lwpchan cache. 157 */ 158 void 159 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 160 { 161 lwpchan_data_t *lcp; 162 lwpchan_hashbucket_t *hashbucket; 163 lwpchan_hashbucket_t *endbucket; 164 lwpchan_entry_t *ent; 165 lwpchan_entry_t **prev; 166 caddr_t addr; 167 168 mutex_enter(&p->p_lcp_lock); 169 lcp = p->p_lcp; 170 hashbucket = lcp->lwpchan_cache; 171 endbucket = hashbucket + lcp->lwpchan_size; 172 for (; hashbucket < endbucket; hashbucket++) { 173 if (hashbucket->lwpchan_chain == NULL) 174 continue; 175 mutex_enter(&hashbucket->lwpchan_lock); 176 prev = &hashbucket->lwpchan_chain; 177 /* check entire chain */ 178 while ((ent = *prev) != NULL) { 179 addr = ent->lwpchan_addr; 180 if (start <= addr && addr < end) { 181 *prev = ent->lwpchan_next; 182 /* 183 * We do this only for the obsolete type 184 * USYNC_PROCESS_ROBUST. Otherwise robust 185 * locks do not draw ELOCKUNMAPPED or 186 * EOWNERDEAD due to being unmapped. 187 */ 188 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 189 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 190 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 191 /* 192 * If there is a user-level robust lock 193 * registration, mark it as invalid. 194 */ 195 if ((addr = ent->lwpchan_uaddr) != NULL) 196 lwp_mutex_unregister(addr); 197 kmem_free(ent, sizeof (*ent)); 198 atomic_dec_32(&lcp->lwpchan_entries); 199 } else { 200 prev = &ent->lwpchan_next; 201 } 202 } 203 mutex_exit(&hashbucket->lwpchan_lock); 204 } 205 mutex_exit(&p->p_lcp_lock); 206 } 207 208 /* 209 * Given an lwpchan cache pointer and a process virtual address, 210 * return a pointer to the corresponding lwpchan hash bucket. 211 */ 212 static lwpchan_hashbucket_t * 213 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 214 { 215 uint_t i; 216 217 /* 218 * All user-level sync object addresses are 8-byte aligned. 219 * Ignore the lowest 3 bits of the address and use the 220 * higher-order 2*lwpchan_bits bits for the hash index. 221 */ 222 addr >>= 3; 223 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 224 return (lcp->lwpchan_cache + i); 225 } 226 227 /* 228 * (Re)allocate the per-process lwpchan cache. 229 */ 230 static void 231 lwpchan_alloc_cache(proc_t *p, uint_t bits) 232 { 233 lwpchan_data_t *lcp; 234 lwpchan_data_t *old_lcp; 235 lwpchan_hashbucket_t *hashbucket; 236 lwpchan_hashbucket_t *endbucket; 237 lwpchan_hashbucket_t *newbucket; 238 lwpchan_entry_t *ent; 239 lwpchan_entry_t *next; 240 uint_t count; 241 242 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 243 244 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 245 lcp->lwpchan_bits = bits; 246 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 247 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 248 lcp->lwpchan_entries = 0; 249 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 250 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 251 lcp->lwpchan_next_data = NULL; 252 253 mutex_enter(&p->p_lcp_lock); 254 if ((old_lcp = p->p_lcp) != NULL) { 255 if (old_lcp->lwpchan_bits >= bits) { 256 /* someone beat us to it */ 257 mutex_exit(&p->p_lcp_lock); 258 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 259 sizeof (lwpchan_hashbucket_t)); 260 kmem_free(lcp, sizeof (lwpchan_data_t)); 261 return; 262 } 263 /* 264 * Acquire all of the old hash table locks. 265 */ 266 hashbucket = old_lcp->lwpchan_cache; 267 endbucket = hashbucket + old_lcp->lwpchan_size; 268 for (; hashbucket < endbucket; hashbucket++) 269 mutex_enter(&hashbucket->lwpchan_lock); 270 /* 271 * Move all of the old hash table entries to the 272 * new hash table. The new hash table has not yet 273 * been installed so we don't need any of its locks. 274 */ 275 count = 0; 276 hashbucket = old_lcp->lwpchan_cache; 277 for (; hashbucket < endbucket; hashbucket++) { 278 ent = hashbucket->lwpchan_chain; 279 while (ent != NULL) { 280 next = ent->lwpchan_next; 281 newbucket = lwpchan_bucket(lcp, 282 (uintptr_t)ent->lwpchan_addr); 283 ent->lwpchan_next = newbucket->lwpchan_chain; 284 newbucket->lwpchan_chain = ent; 285 ent = next; 286 count++; 287 } 288 hashbucket->lwpchan_chain = NULL; 289 } 290 lcp->lwpchan_entries = count; 291 } 292 293 /* 294 * Retire the old hash table. We can't actually kmem_free() it 295 * now because someone may still have a pointer to it. Instead, 296 * we link it onto the new hash table's list of retired hash tables. 297 * The new hash table is double the size of the previous one, so 298 * the total size of all retired hash tables is less than the size 299 * of the new one. exit() and exec() free the retired hash tables 300 * (see lwpchan_destroy_cache(), below). 301 */ 302 lcp->lwpchan_next_data = old_lcp; 303 304 /* 305 * As soon as we store the new lcp, future locking operations will 306 * use it. Therefore, we must ensure that all the state we've just 307 * established reaches global visibility before the new lcp does. 308 */ 309 membar_producer(); 310 p->p_lcp = lcp; 311 312 if (old_lcp != NULL) { 313 /* 314 * Release all of the old hash table locks. 315 */ 316 hashbucket = old_lcp->lwpchan_cache; 317 for (; hashbucket < endbucket; hashbucket++) 318 mutex_exit(&hashbucket->lwpchan_lock); 319 } 320 mutex_exit(&p->p_lcp_lock); 321 } 322 323 /* 324 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 325 * Called when the process exits or execs. All lwps except one have 326 * exited so we need no locks here. 327 */ 328 void 329 lwpchan_destroy_cache(int exec) 330 { 331 proc_t *p = curproc; 332 lwpchan_hashbucket_t *hashbucket; 333 lwpchan_hashbucket_t *endbucket; 334 lwpchan_data_t *lcp; 335 lwpchan_entry_t *ent; 336 lwpchan_entry_t *next; 337 uint16_t lockflg; 338 339 lcp = p->p_lcp; 340 p->p_lcp = NULL; 341 342 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 343 hashbucket = lcp->lwpchan_cache; 344 endbucket = hashbucket + lcp->lwpchan_size; 345 for (; hashbucket < endbucket; hashbucket++) { 346 ent = hashbucket->lwpchan_chain; 347 hashbucket->lwpchan_chain = NULL; 348 while (ent != NULL) { 349 next = ent->lwpchan_next; 350 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 351 (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 352 == (USYNC_PROCESS | LOCK_ROBUST)) 353 lwp_mutex_cleanup(ent, lockflg); 354 kmem_free(ent, sizeof (*ent)); 355 ent = next; 356 } 357 } 358 359 while (lcp != NULL) { 360 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 361 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 362 sizeof (lwpchan_hashbucket_t)); 363 kmem_free(lcp, sizeof (lwpchan_data_t)); 364 lcp = next_lcp; 365 } 366 } 367 368 /* 369 * Return zero when there is an entry in the lwpchan cache for the 370 * given process virtual address and non-zero when there is not. 371 * The returned non-zero value is the current length of the 372 * hash chain plus one. The caller holds the hash bucket lock. 373 */ 374 static uint_t 375 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 376 lwpchan_hashbucket_t *hashbucket) 377 { 378 lwpchan_entry_t *ent; 379 uint_t count = 1; 380 381 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 382 if (ent->lwpchan_addr == addr) { 383 if (ent->lwpchan_type != type || 384 ent->lwpchan_pool != pool) { 385 /* 386 * This shouldn't happen, but might if the 387 * process reuses its memory for different 388 * types of sync objects. We test first 389 * to avoid grabbing the memory cache line. 390 */ 391 ent->lwpchan_type = (uint16_t)type; 392 ent->lwpchan_pool = (uint16_t)pool; 393 } 394 *lwpchan = ent->lwpchan_lwpchan; 395 return (0); 396 } 397 count++; 398 } 399 return (count); 400 } 401 402 /* 403 * Return the cached lwpchan mapping if cached, otherwise insert 404 * a virtual address to lwpchan mapping into the cache. 405 */ 406 static int 407 lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr, 408 int type, lwpchan_t *lwpchan, int pool) 409 { 410 proc_t *p = curproc; 411 lwpchan_data_t *lcp; 412 lwpchan_hashbucket_t *hashbucket; 413 lwpchan_entry_t *ent; 414 memid_t memid; 415 uint_t count; 416 uint_t bits; 417 418 top: 419 /* initialize the lwpchan cache, if necesary */ 420 if ((lcp = p->p_lcp) == NULL) { 421 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 422 goto top; 423 } 424 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 425 mutex_enter(&hashbucket->lwpchan_lock); 426 if (lcp != p->p_lcp) { 427 /* someone resized the lwpchan cache; start over */ 428 mutex_exit(&hashbucket->lwpchan_lock); 429 goto top; 430 } 431 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 432 /* it's in the cache */ 433 mutex_exit(&hashbucket->lwpchan_lock); 434 return (1); 435 } 436 mutex_exit(&hashbucket->lwpchan_lock); 437 if (as_getmemid(as, addr, &memid) != 0) 438 return (0); 439 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 440 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 441 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 442 mutex_enter(&hashbucket->lwpchan_lock); 443 if (lcp != p->p_lcp) { 444 /* someone resized the lwpchan cache; start over */ 445 mutex_exit(&hashbucket->lwpchan_lock); 446 kmem_free(ent, sizeof (*ent)); 447 goto top; 448 } 449 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 450 if (count == 0) { 451 /* someone else added this entry to the cache */ 452 mutex_exit(&hashbucket->lwpchan_lock); 453 kmem_free(ent, sizeof (*ent)); 454 return (1); 455 } 456 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 457 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 458 /* hash chain too long; reallocate the hash table */ 459 mutex_exit(&hashbucket->lwpchan_lock); 460 kmem_free(ent, sizeof (*ent)); 461 lwpchan_alloc_cache(p, bits + 1); 462 goto top; 463 } 464 ent->lwpchan_addr = addr; 465 ent->lwpchan_uaddr = uaddr; 466 ent->lwpchan_type = (uint16_t)type; 467 ent->lwpchan_pool = (uint16_t)pool; 468 ent->lwpchan_lwpchan = *lwpchan; 469 ent->lwpchan_next = hashbucket->lwpchan_chain; 470 hashbucket->lwpchan_chain = ent; 471 atomic_inc_32(&lcp->lwpchan_entries); 472 mutex_exit(&hashbucket->lwpchan_lock); 473 return (1); 474 } 475 476 /* 477 * Return a unique pair of identifiers that corresponds to a 478 * synchronization object's virtual address. Process-shared 479 * sync objects usually get vnode/offset from as_getmemid(). 480 */ 481 static int 482 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 483 { 484 /* 485 * If the lwp synch object is defined to be process-private, 486 * we just make the first field of the lwpchan be 'as' and 487 * the second field be the synch object's virtual address. 488 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 489 * The lwpchan cache is used only for process-shared objects. 490 */ 491 if (!(type & USYNC_PROCESS)) { 492 lwpchan->lc_wchan0 = (caddr_t)as; 493 lwpchan->lc_wchan = addr; 494 return (1); 495 } 496 497 return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool)); 498 } 499 500 static void 501 lwp_block(lwpchan_t *lwpchan) 502 { 503 kthread_t *t = curthread; 504 klwp_t *lwp = ttolwp(t); 505 sleepq_head_t *sqh; 506 507 thread_lock(t); 508 t->t_flag |= T_WAKEABLE; 509 t->t_lwpchan = *lwpchan; 510 t->t_sobj_ops = &lwp_sobj_ops; 511 t->t_release = 0; 512 sqh = lwpsqhash(lwpchan); 513 disp_lock_enter_high(&sqh->sq_lock); 514 CL_SLEEP(t); 515 DTRACE_SCHED(sleep); 516 THREAD_SLEEP(t, &sqh->sq_lock); 517 sleepq_insert(&sqh->sq_queue, t); 518 thread_unlock(t); 519 lwp->lwp_asleep = 1; 520 lwp->lwp_sysabort = 0; 521 lwp->lwp_ru.nvcsw++; 522 (void) new_mstate(curthread, LMS_SLEEP); 523 } 524 525 static kthread_t * 526 lwpsobj_pi_owner(upimutex_t *up) 527 { 528 return (up->upi_owner); 529 } 530 531 static struct upimutex * 532 upi_get(upib_t *upibp, lwpchan_t *lcp) 533 { 534 struct upimutex *upip; 535 536 for (upip = upibp->upib_first; upip != NULL; 537 upip = upip->upi_nextchain) { 538 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 539 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 540 break; 541 } 542 return (upip); 543 } 544 545 static void 546 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 547 { 548 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 549 550 /* 551 * Insert upimutex at front of list. Maybe a bit unfair 552 * but assume that not many lwpchans hash to the same 553 * upimutextab bucket, i.e. the list of upimutexes from 554 * upib_first is not too long. 555 */ 556 upimutex->upi_nextchain = upibp->upib_first; 557 upibp->upib_first = upimutex; 558 } 559 560 static void 561 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 562 { 563 struct upimutex **prev; 564 565 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 566 567 prev = &upibp->upib_first; 568 while (*prev != upimutex) { 569 prev = &(*prev)->upi_nextchain; 570 } 571 *prev = upimutex->upi_nextchain; 572 upimutex->upi_nextchain = NULL; 573 } 574 575 /* 576 * Add upimutex to chain of upimutexes held by curthread. 577 * Returns number of upimutexes held by curthread. 578 */ 579 static uint32_t 580 upi_mylist_add(struct upimutex *upimutex) 581 { 582 kthread_t *t = curthread; 583 584 /* 585 * Insert upimutex at front of list of upimutexes owned by t. This 586 * would match typical LIFO order in which nested locks are acquired 587 * and released. 588 */ 589 upimutex->upi_nextowned = t->t_upimutex; 590 t->t_upimutex = upimutex; 591 t->t_nupinest++; 592 ASSERT(t->t_nupinest > 0); 593 return (t->t_nupinest); 594 } 595 596 /* 597 * Delete upimutex from list of upimutexes owned by curthread. 598 */ 599 static void 600 upi_mylist_del(struct upimutex *upimutex) 601 { 602 kthread_t *t = curthread; 603 struct upimutex **prev; 604 605 /* 606 * Since the order in which nested locks are acquired and released, 607 * is typically LIFO, and typical nesting levels are not too deep, the 608 * following should not be expensive in the general case. 609 */ 610 prev = &t->t_upimutex; 611 while (*prev != upimutex) { 612 prev = &(*prev)->upi_nextowned; 613 } 614 *prev = upimutex->upi_nextowned; 615 upimutex->upi_nextowned = NULL; 616 ASSERT(t->t_nupinest > 0); 617 t->t_nupinest--; 618 } 619 620 /* 621 * Returns true if upimutex is owned. Should be called only when upim points 622 * to kmem which cannot disappear from underneath. 623 */ 624 static int 625 upi_owned(upimutex_t *upim) 626 { 627 return (upim->upi_owner == curthread); 628 } 629 630 /* 631 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 632 */ 633 static struct upimutex * 634 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 635 { 636 lwpchan_t lwpchan; 637 upib_t *upibp; 638 struct upimutex *upimutex; 639 640 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 641 &lwpchan, LWPCHAN_MPPOOL)) 642 return (NULL); 643 644 upibp = &UPI_CHAIN(lwpchan); 645 mutex_enter(&upibp->upib_lock); 646 upimutex = upi_get(upibp, &lwpchan); 647 if (upimutex == NULL || upimutex->upi_owner != curthread) { 648 mutex_exit(&upibp->upib_lock); 649 return (NULL); 650 } 651 mutex_exit(&upibp->upib_lock); 652 return (upimutex); 653 } 654 655 /* 656 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 657 * no lock hand-off occurrs. 658 */ 659 static void 660 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 661 { 662 turnstile_t *ts; 663 upib_t *upibp; 664 kthread_t *newowner; 665 666 upi_mylist_del(upimutex); 667 upibp = upimutex->upi_upibp; 668 mutex_enter(&upibp->upib_lock); 669 if (upimutex->upi_waiter != 0) { /* if waiters */ 670 ts = turnstile_lookup(upimutex); 671 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 672 /* hand-off lock to highest prio waiter */ 673 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 674 upimutex->upi_owner = newowner; 675 if (ts->ts_waiters == 1) 676 upimutex->upi_waiter = 0; 677 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 678 mutex_exit(&upibp->upib_lock); 679 return; 680 } else if (ts != NULL) { 681 /* LOCK_NOTRECOVERABLE: wakeup all */ 682 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 683 } else { 684 /* 685 * Misleading w bit. Waiters might have been 686 * interrupted. No need to clear the w bit (upimutex 687 * will soon be freed). Re-calculate PI from existing 688 * waiters. 689 */ 690 turnstile_exit(upimutex); 691 turnstile_pi_recalc(); 692 } 693 } 694 /* 695 * no waiters, or LOCK_NOTRECOVERABLE. 696 * remove from the bucket chain of upi mutexes. 697 * de-allocate kernel memory (upimutex). 698 */ 699 upi_chain_del(upimutex->upi_upibp, upimutex); 700 mutex_exit(&upibp->upib_lock); 701 kmem_free(upimutex, sizeof (upimutex_t)); 702 } 703 704 static int 705 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 706 { 707 label_t ljb; 708 int error = 0; 709 lwpchan_t lwpchan; 710 uint16_t flag; 711 upib_t *upibp; 712 volatile struct upimutex *upimutex = NULL; 713 turnstile_t *ts; 714 uint32_t nupinest; 715 volatile int upilocked = 0; 716 717 if (on_fault(&ljb)) { 718 if (upilocked) 719 upimutex_unlock((upimutex_t *)upimutex, 0); 720 error = EFAULT; 721 goto out; 722 } 723 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 724 &lwpchan, LWPCHAN_MPPOOL)) { 725 error = EFAULT; 726 goto out; 727 } 728 upibp = &UPI_CHAIN(lwpchan); 729 retry: 730 mutex_enter(&upibp->upib_lock); 731 upimutex = upi_get(upibp, &lwpchan); 732 if (upimutex == NULL) { 733 /* lock available since lwpchan has no upimutex */ 734 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 735 upi_chain_add(upibp, (upimutex_t *)upimutex); 736 upimutex->upi_owner = curthread; /* grab lock */ 737 upimutex->upi_upibp = upibp; 738 upimutex->upi_vaddr = lp; 739 upimutex->upi_lwpchan = lwpchan; 740 mutex_exit(&upibp->upib_lock); 741 nupinest = upi_mylist_add((upimutex_t *)upimutex); 742 upilocked = 1; 743 fuword16_noerr(&lp->mutex_flag, &flag); 744 if (nupinest > maxnestupimx && 745 secpolicy_resource(CRED()) != 0) { 746 upimutex_unlock((upimutex_t *)upimutex, flag); 747 error = ENOMEM; 748 goto out; 749 } 750 if (flag & LOCK_NOTRECOVERABLE) { 751 /* 752 * Since the setting of LOCK_NOTRECOVERABLE 753 * was done under the high-level upi mutex, 754 * in lwp_upimutex_unlock(), this flag needs to 755 * be checked while holding the upi mutex. 756 * If set, this thread should return without 757 * the lock held, and with the right error code. 758 */ 759 upimutex_unlock((upimutex_t *)upimutex, flag); 760 upilocked = 0; 761 error = ENOTRECOVERABLE; 762 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 763 if (flag & LOCK_OWNERDEAD) 764 error = EOWNERDEAD; 765 else if (type & USYNC_PROCESS_ROBUST) 766 error = ELOCKUNMAPPED; 767 else 768 error = EOWNERDEAD; 769 } 770 goto out; 771 } 772 /* 773 * If a upimutex object exists, it must have an owner. 774 * This is due to lock hand-off, and release of upimutex when no 775 * waiters are present at unlock time, 776 */ 777 ASSERT(upimutex->upi_owner != NULL); 778 if (upimutex->upi_owner == curthread) { 779 /* 780 * The user wrapper can check if the mutex type is 781 * ERRORCHECK: if not, it should stall at user-level. 782 * If so, it should return the error code. 783 */ 784 mutex_exit(&upibp->upib_lock); 785 error = EDEADLK; 786 goto out; 787 } 788 if (try == UPIMUTEX_TRY) { 789 mutex_exit(&upibp->upib_lock); 790 error = EBUSY; 791 goto out; 792 } 793 /* 794 * Block for the lock. 795 */ 796 if ((error = lwptp->lwpt_time_error) != 0) { 797 /* 798 * The SUSV3 Posix spec is very clear that we 799 * should get no error from validating the 800 * timer until we would actually sleep. 801 */ 802 mutex_exit(&upibp->upib_lock); 803 goto out; 804 } 805 if (lwptp->lwpt_tsp != NULL) { 806 /* 807 * Unlike the protocol for other lwp timedwait operations, 808 * we must drop t_delay_lock before going to sleep in 809 * turnstile_block() for a upi mutex. 810 * See the comments below and in turnstile.c 811 */ 812 mutex_enter(&curthread->t_delay_lock); 813 (void) lwp_timer_enqueue(lwptp); 814 mutex_exit(&curthread->t_delay_lock); 815 } 816 /* 817 * Now, set the waiter bit and block for the lock in turnstile_block(). 818 * No need to preserve the previous wbit since a lock try is not 819 * attempted after setting the wait bit. Wait bit is set under 820 * the upib_lock, which is not released until the turnstile lock 821 * is acquired. Say, the upimutex is L: 822 * 823 * 1. upib_lock is held so the waiter does not have to retry L after 824 * setting the wait bit: since the owner has to grab the upib_lock 825 * to unlock L, it will certainly see the wait bit set. 826 * 2. upib_lock is not released until the turnstile lock is acquired. 827 * This is the key to preventing a missed wake-up. Otherwise, the 828 * owner could acquire the upib_lock, and the tc_lock, to call 829 * turnstile_wakeup(). All this, before the waiter gets tc_lock 830 * to sleep in turnstile_block(). turnstile_wakeup() will then not 831 * find this waiter, resulting in the missed wakeup. 832 * 3. The upib_lock, being a kernel mutex, cannot be released while 833 * holding the tc_lock (since mutex_exit() could need to acquire 834 * the same tc_lock)...and so is held when calling turnstile_block(). 835 * The address of upib_lock is passed to turnstile_block() which 836 * releases it after releasing all turnstile locks, and before going 837 * to sleep in swtch(). 838 * 4. The waiter value cannot be a count of waiters, because a waiter 839 * can be interrupted. The interrupt occurs under the tc_lock, at 840 * which point, the upib_lock cannot be locked, to decrement waiter 841 * count. So, just treat the waiter state as a bit, not a count. 842 */ 843 ts = turnstile_lookup((upimutex_t *)upimutex); 844 upimutex->upi_waiter = 1; 845 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 846 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 847 /* 848 * Hand-off implies that we wakeup holding the lock, except when: 849 * - deadlock is detected 850 * - lock is not recoverable 851 * - we got an interrupt or timeout 852 * If we wake up due to an interrupt or timeout, we may 853 * or may not be holding the lock due to mutex hand-off. 854 * Use lwp_upimutex_owned() to check if we do hold the lock. 855 */ 856 if (error != 0) { 857 if ((error == EINTR || error == ETIME) && 858 (upimutex = lwp_upimutex_owned(lp, type))) { 859 /* 860 * Unlock and return - the re-startable syscall will 861 * try the lock again if we got EINTR. 862 */ 863 (void) upi_mylist_add((upimutex_t *)upimutex); 864 upimutex_unlock((upimutex_t *)upimutex, 0); 865 } 866 /* 867 * The only other possible error is EDEADLK. If so, upimutex 868 * is valid, since its owner is deadlocked with curthread. 869 */ 870 ASSERT(error == EINTR || error == ETIME || 871 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 872 ASSERT(!lwp_upimutex_owned(lp, type)); 873 goto out; 874 } 875 if (lwp_upimutex_owned(lp, type)) { 876 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 877 nupinest = upi_mylist_add((upimutex_t *)upimutex); 878 upilocked = 1; 879 } 880 /* 881 * Now, need to read the user-level lp->mutex_flag to do the following: 882 * 883 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 884 * should be returned. 885 * - if lock isn't held, check if ENOTRECOVERABLE should 886 * be returned. 887 * 888 * Now, either lp->mutex_flag is readable or it's not. If not 889 * readable, the on_fault path will cause a return with EFAULT 890 * as it should. If it is readable, the state of the flag 891 * encodes the robustness state of the lock: 892 * 893 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 894 * or LOCK_UNMAPPED setting will influence the return code 895 * appropriately. If the upimutex is not locked here, this 896 * could be due to a spurious wake-up or a NOTRECOVERABLE 897 * event. The flag's setting can be used to distinguish 898 * between these two events. 899 */ 900 fuword16_noerr(&lp->mutex_flag, &flag); 901 if (upilocked) { 902 /* 903 * If the thread wakes up from turnstile_block with the lock 904 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 905 * since it would not have been handed-off the lock. 906 * So, no need to check for this case. 907 */ 908 if (nupinest > maxnestupimx && 909 secpolicy_resource(CRED()) != 0) { 910 upimutex_unlock((upimutex_t *)upimutex, flag); 911 upilocked = 0; 912 error = ENOMEM; 913 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 914 if (flag & LOCK_OWNERDEAD) 915 error = EOWNERDEAD; 916 else if (type & USYNC_PROCESS_ROBUST) 917 error = ELOCKUNMAPPED; 918 else 919 error = EOWNERDEAD; 920 } 921 } else { 922 /* 923 * Wake-up without the upimutex held. Either this is a 924 * spurious wake-up (due to signals, forkall(), whatever), or 925 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 926 * of the mutex flag can be used to distinguish between the 927 * two events. 928 */ 929 if (flag & LOCK_NOTRECOVERABLE) { 930 error = ENOTRECOVERABLE; 931 } else { 932 /* 933 * Here, the flag could be set to LOCK_OWNERDEAD or 934 * not. In both cases, this is a spurious wakeup, 935 * since the upi lock is not held, but the thread 936 * has returned from turnstile_block(). 937 * 938 * The user flag could be LOCK_OWNERDEAD if, at the 939 * same time as curthread having been woken up 940 * spuriously, the owner (say Tdead) has died, marked 941 * the mutex flag accordingly, and handed off the lock 942 * to some other waiter (say Tnew). curthread just 943 * happened to read the flag while Tnew has yet to deal 944 * with the owner-dead event. 945 * 946 * In this event, curthread should retry the lock. 947 * If Tnew is able to cleanup the lock, curthread 948 * will eventually get the lock with a zero error code, 949 * If Tnew is unable to cleanup, its eventual call to 950 * unlock the lock will result in the mutex flag being 951 * set to LOCK_NOTRECOVERABLE, and the wake-up of 952 * all waiters, including curthread, which will then 953 * eventually return ENOTRECOVERABLE due to the above 954 * check. 955 * 956 * Of course, if the user-flag is not set with 957 * LOCK_OWNERDEAD, retrying is the thing to do, since 958 * this is definitely a spurious wakeup. 959 */ 960 goto retry; 961 } 962 } 963 964 out: 965 no_fault(); 966 return (error); 967 } 968 969 970 static int 971 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 972 { 973 label_t ljb; 974 int error = 0; 975 lwpchan_t lwpchan; 976 uint16_t flag; 977 upib_t *upibp; 978 volatile struct upimutex *upimutex = NULL; 979 volatile int upilocked = 0; 980 981 if (on_fault(&ljb)) { 982 if (upilocked) 983 upimutex_unlock((upimutex_t *)upimutex, 0); 984 error = EFAULT; 985 goto out; 986 } 987 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 988 &lwpchan, LWPCHAN_MPPOOL)) { 989 error = EFAULT; 990 goto out; 991 } 992 upibp = &UPI_CHAIN(lwpchan); 993 mutex_enter(&upibp->upib_lock); 994 upimutex = upi_get(upibp, &lwpchan); 995 /* 996 * If the lock is not held, or the owner is not curthread, return 997 * error. The user-level wrapper can return this error or stall, 998 * depending on whether mutex is of ERRORCHECK type or not. 999 */ 1000 if (upimutex == NULL || upimutex->upi_owner != curthread) { 1001 mutex_exit(&upibp->upib_lock); 1002 error = EPERM; 1003 goto out; 1004 } 1005 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1006 upilocked = 1; 1007 fuword16_noerr(&lp->mutex_flag, &flag); 1008 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1009 /* 1010 * transition mutex to the LOCK_NOTRECOVERABLE state. 1011 */ 1012 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1013 flag |= LOCK_NOTRECOVERABLE; 1014 suword16_noerr(&lp->mutex_flag, flag); 1015 } 1016 set_owner_pid(lp, 0, 0); 1017 upimutex_unlock((upimutex_t *)upimutex, flag); 1018 upilocked = 0; 1019 out: 1020 no_fault(); 1021 return (error); 1022 } 1023 1024 /* 1025 * Set the owner and ownerpid fields of a user-level mutex. Note, this function 1026 * uses the suword*_noerr routines which must be called between 1027 * on_fault/no_fault. However, this routine itself does not do the 1028 * on_fault/no_fault and it is assumed all the callers will do so instead! 1029 */ 1030 static void 1031 set_owner_pid(lwp_mutex_t *lp, uintptr_t owner, pid_t pid) 1032 { 1033 union { 1034 uint64_t word64; 1035 uint32_t word32[2]; 1036 } un; 1037 1038 un.word64 = (uint64_t)owner; 1039 1040 suword32_noerr(&lp->mutex_ownerpid, pid); 1041 #if defined(_LP64) 1042 if (((uintptr_t)lp & (_LONG_LONG_ALIGNMENT - 1)) == 0) { /* aligned */ 1043 suword64_noerr(&lp->mutex_owner, un.word64); 1044 return; 1045 } 1046 #endif 1047 /* mutex is unaligned or we are running on a 32-bit kernel */ 1048 suword32_noerr((uint32_t *)&lp->mutex_owner, un.word32[0]); 1049 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, un.word32[1]); 1050 } 1051 1052 /* 1053 * Clear the contents of a user-level mutex; return the flags. 1054 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1055 */ 1056 static uint16_t 1057 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1058 { 1059 uint16_t flag; 1060 1061 fuword16_noerr(&lp->mutex_flag, &flag); 1062 if ((flag & 1063 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1064 flag |= lockflg; 1065 suword16_noerr(&lp->mutex_flag, flag); 1066 } 1067 set_owner_pid(lp, 0, 0); 1068 suword8_noerr(&lp->mutex_rcount, 0); 1069 1070 return (flag); 1071 } 1072 1073 /* 1074 * Mark user mutex state, corresponding to kernel upimutex, 1075 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1076 */ 1077 static int 1078 upi_dead(upimutex_t *upip, uint16_t lockflg) 1079 { 1080 label_t ljb; 1081 int error = 0; 1082 lwp_mutex_t *lp; 1083 1084 if (on_fault(&ljb)) { 1085 error = EFAULT; 1086 goto out; 1087 } 1088 1089 lp = upip->upi_vaddr; 1090 (void) lwp_clear_mutex(lp, lockflg); 1091 suword8_noerr(&lp->mutex_lockw, 0); 1092 out: 1093 no_fault(); 1094 return (error); 1095 } 1096 1097 /* 1098 * Unlock all upimutexes held by curthread, since curthread is dying. 1099 * For each upimutex, attempt to mark its corresponding user mutex object as 1100 * dead. 1101 */ 1102 void 1103 upimutex_cleanup() 1104 { 1105 kthread_t *t = curthread; 1106 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1107 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1108 struct upimutex *upip; 1109 1110 while ((upip = t->t_upimutex) != NULL) { 1111 if (upi_dead(upip, lockflg) != 0) { 1112 /* 1113 * If the user object associated with this upimutex is 1114 * unmapped, unlock upimutex with the 1115 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1116 * woken up. Since user object is unmapped, it could 1117 * not be marked as dead or notrecoverable. 1118 * The waiters will now all wake up and return 1119 * ENOTRECOVERABLE, since they would find that the lock 1120 * has not been handed-off to them. 1121 * See lwp_upimutex_lock(). 1122 */ 1123 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1124 } else { 1125 /* 1126 * The user object has been updated as dead. 1127 * Unlock the upimutex: if no waiters, upip kmem will 1128 * be freed. If there is a waiter, the lock will be 1129 * handed off. If exit() is in progress, each existing 1130 * waiter will successively get the lock, as owners 1131 * die, and each new owner will call this routine as 1132 * it dies. The last owner will free kmem, since 1133 * it will find the upimutex has no waiters. So, 1134 * eventually, the kmem is guaranteed to be freed. 1135 */ 1136 upimutex_unlock(upip, 0); 1137 } 1138 /* 1139 * Note that the call to upimutex_unlock() above will delete 1140 * upimutex from the t_upimutexes chain. And so the 1141 * while loop will eventually terminate. 1142 */ 1143 } 1144 } 1145 1146 int 1147 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp, uintptr_t owner) 1148 { 1149 kthread_t *t = curthread; 1150 klwp_t *lwp = ttolwp(t); 1151 proc_t *p = ttoproc(t); 1152 lwp_timer_t lwpt; 1153 caddr_t timedwait; 1154 int error = 0; 1155 int time_error; 1156 clock_t tim = -1; 1157 uchar_t waiters; 1158 volatile int locked = 0; 1159 volatile int watched = 0; 1160 label_t ljb; 1161 volatile uint8_t type = 0; 1162 lwpchan_t lwpchan; 1163 sleepq_head_t *sqh; 1164 uint16_t flag; 1165 int imm_timeout = 0; 1166 1167 if ((caddr_t)lp >= p->p_as->a_userlimit) 1168 return (set_errno(EFAULT)); 1169 1170 /* 1171 * Put the lwp in an orderly state for debugging, 1172 * in case we are stopped while sleeping, below. 1173 */ 1174 prstop(PR_REQUESTED, 0); 1175 1176 timedwait = (caddr_t)tsp; 1177 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1178 lwpt.lwpt_imm_timeout) { 1179 imm_timeout = 1; 1180 timedwait = NULL; 1181 } 1182 1183 /* 1184 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1185 * this micro state is really a run state. If the thread indeed blocks, 1186 * this state becomes valid. If not, the state is converted back to 1187 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1188 * when blocking. 1189 */ 1190 (void) new_mstate(t, LMS_USER_LOCK); 1191 if (on_fault(&ljb)) { 1192 if (locked) 1193 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1194 error = EFAULT; 1195 goto out; 1196 } 1197 /* 1198 * Force Copy-on-write if necessary and ensure that the 1199 * synchronization object resides in read/write memory. 1200 * Cause an EFAULT return now if this is not so. 1201 */ 1202 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1203 suword8_noerr(&lp->mutex_type, type); 1204 if (UPIMUTEX(type)) { 1205 no_fault(); 1206 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1207 if (error == 0 || error == EOWNERDEAD || 1208 error == ELOCKUNMAPPED) { 1209 volatile int locked = error != 0; 1210 if (on_fault(&ljb)) { 1211 if (locked != 0) 1212 error = lwp_upimutex_unlock(lp, type); 1213 else 1214 error = EFAULT; 1215 goto upierr; 1216 } 1217 set_owner_pid(lp, owner, 1218 (type & USYNC_PROCESS)? p->p_pid : 0); 1219 no_fault(); 1220 } 1221 upierr: 1222 if (tsp && !time_error) /* copyout the residual time left */ 1223 error = lwp_timer_copyout(&lwpt, error); 1224 if (error) 1225 return (set_errno(error)); 1226 return (0); 1227 } 1228 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1229 &lwpchan, LWPCHAN_MPPOOL)) { 1230 error = EFAULT; 1231 goto out; 1232 } 1233 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1234 locked = 1; 1235 if (type & LOCK_ROBUST) { 1236 fuword16_noerr(&lp->mutex_flag, &flag); 1237 if (flag & LOCK_NOTRECOVERABLE) { 1238 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1239 error = ENOTRECOVERABLE; 1240 goto out; 1241 } 1242 } 1243 fuword8_noerr(&lp->mutex_waiters, &waiters); 1244 suword8_noerr(&lp->mutex_waiters, 1); 1245 1246 /* 1247 * If watchpoints are set, they need to be restored, since 1248 * atomic accesses of memory such as the call to ulock_try() 1249 * below cannot be watched. 1250 */ 1251 1252 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1253 1254 while (!ulock_try(&lp->mutex_lockw)) { 1255 if (time_error) { 1256 /* 1257 * The SUSV3 Posix spec is very clear that we 1258 * should get no error from validating the 1259 * timer until we would actually sleep. 1260 */ 1261 error = time_error; 1262 break; 1263 } 1264 1265 if (watched) { 1266 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1267 watched = 0; 1268 } 1269 1270 if (timedwait) { 1271 /* 1272 * If we successfully queue the timeout, 1273 * then don't drop t_delay_lock until 1274 * we are on the sleep queue (below). 1275 */ 1276 mutex_enter(&t->t_delay_lock); 1277 if (lwp_timer_enqueue(&lwpt) != 0) { 1278 mutex_exit(&t->t_delay_lock); 1279 imm_timeout = 1; 1280 timedwait = NULL; 1281 } 1282 } 1283 lwp_block(&lwpchan); 1284 /* 1285 * Nothing should happen to cause the lwp to go to 1286 * sleep again until after it returns from swtch(). 1287 */ 1288 if (timedwait) 1289 mutex_exit(&t->t_delay_lock); 1290 locked = 0; 1291 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1292 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1293 setrun(t); 1294 swtch(); 1295 t->t_flag &= ~T_WAKEABLE; 1296 if (timedwait) 1297 tim = lwp_timer_dequeue(&lwpt); 1298 setallwatch(); 1299 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1300 error = EINTR; 1301 else if (imm_timeout || (timedwait && tim == -1)) 1302 error = ETIME; 1303 if (error) { 1304 lwp->lwp_asleep = 0; 1305 lwp->lwp_sysabort = 0; 1306 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1307 S_WRITE); 1308 1309 /* 1310 * Need to re-compute waiters bit. The waiters field in 1311 * the lock is not reliable. Either of two things could 1312 * have occurred: no lwp may have called lwp_release() 1313 * for me but I have woken up due to a signal or 1314 * timeout. In this case, the waiter bit is incorrect 1315 * since it is still set to 1, set above. 1316 * OR an lwp_release() did occur for some other lwp on 1317 * the same lwpchan. In this case, the waiter bit is 1318 * correct. But which event occurred, one can't tell. 1319 * So, recompute. 1320 */ 1321 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1322 locked = 1; 1323 sqh = lwpsqhash(&lwpchan); 1324 disp_lock_enter(&sqh->sq_lock); 1325 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1326 disp_lock_exit(&sqh->sq_lock); 1327 break; 1328 } 1329 lwp->lwp_asleep = 0; 1330 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1331 S_WRITE); 1332 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1333 locked = 1; 1334 fuword8_noerr(&lp->mutex_waiters, &waiters); 1335 suword8_noerr(&lp->mutex_waiters, 1); 1336 if (type & LOCK_ROBUST) { 1337 fuword16_noerr(&lp->mutex_flag, &flag); 1338 if (flag & LOCK_NOTRECOVERABLE) { 1339 error = ENOTRECOVERABLE; 1340 break; 1341 } 1342 } 1343 } 1344 1345 if (t->t_mstate == LMS_USER_LOCK) 1346 (void) new_mstate(t, LMS_SYSTEM); 1347 1348 if (error == 0) { 1349 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0); 1350 if (type & LOCK_ROBUST) { 1351 fuword16_noerr(&lp->mutex_flag, &flag); 1352 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1353 if (flag & LOCK_OWNERDEAD) 1354 error = EOWNERDEAD; 1355 else if (type & USYNC_PROCESS_ROBUST) 1356 error = ELOCKUNMAPPED; 1357 else 1358 error = EOWNERDEAD; 1359 } 1360 } 1361 } 1362 suword8_noerr(&lp->mutex_waiters, waiters); 1363 locked = 0; 1364 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1365 out: 1366 no_fault(); 1367 if (watched) 1368 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1369 if (tsp && !time_error) /* copyout the residual time left */ 1370 error = lwp_timer_copyout(&lwpt, error); 1371 if (error) 1372 return (set_errno(error)); 1373 return (0); 1374 } 1375 1376 static int 1377 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1378 { 1379 /* 1380 * The caller holds the dispatcher lock on the sleep queue. 1381 */ 1382 while (t != NULL) { 1383 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1384 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1385 return (1); 1386 t = t->t_link; 1387 } 1388 return (0); 1389 } 1390 1391 /* 1392 * Return the highest priority thread sleeping on this lwpchan. 1393 */ 1394 static kthread_t * 1395 lwp_queue_waiter(lwpchan_t *lwpchan) 1396 { 1397 sleepq_head_t *sqh; 1398 kthread_t *tp; 1399 1400 sqh = lwpsqhash(lwpchan); 1401 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1402 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1403 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1404 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1405 break; 1406 } 1407 disp_lock_exit(&sqh->sq_lock); 1408 return (tp); 1409 } 1410 1411 static int 1412 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1413 { 1414 sleepq_head_t *sqh; 1415 kthread_t *tp; 1416 kthread_t **tpp; 1417 1418 sqh = lwpsqhash(lwpchan); 1419 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1420 tpp = &sqh->sq_queue.sq_first; 1421 while ((tp = *tpp) != NULL) { 1422 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1423 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1424 /* 1425 * The following is typically false. It could be true 1426 * only if lwp_release() is called from 1427 * lwp_mutex_wakeup() after reading the waiters field 1428 * from memory in which the lwp lock used to be, but has 1429 * since been re-used to hold a lwp cv or lwp semaphore. 1430 * The thread "tp" found to match the lwp lock's wchan 1431 * is actually sleeping for the cv or semaphore which 1432 * now has the same wchan. In this case, lwp_release() 1433 * should return failure. 1434 */ 1435 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1436 ASSERT(sync_type == 0); 1437 /* 1438 * assert that this can happen only for mutexes 1439 * i.e. sync_type == 0, for correctly written 1440 * user programs. 1441 */ 1442 disp_lock_exit(&sqh->sq_lock); 1443 return (0); 1444 } 1445 *waiters = iswanted(tp->t_link, lwpchan); 1446 sleepq_unlink(tpp, tp); 1447 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1448 tp->t_wchan0 = NULL; 1449 tp->t_wchan = NULL; 1450 tp->t_sobj_ops = NULL; 1451 tp->t_release = 1; 1452 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1453 CL_WAKEUP(tp); 1454 thread_unlock(tp); /* drop run queue lock */ 1455 return (1); 1456 } 1457 tpp = &tp->t_link; 1458 } 1459 *waiters = 0; 1460 disp_lock_exit(&sqh->sq_lock); 1461 return (0); 1462 } 1463 1464 static void 1465 lwp_release_all(lwpchan_t *lwpchan) 1466 { 1467 sleepq_head_t *sqh; 1468 kthread_t *tp; 1469 kthread_t **tpp; 1470 1471 sqh = lwpsqhash(lwpchan); 1472 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1473 tpp = &sqh->sq_queue.sq_first; 1474 while ((tp = *tpp) != NULL) { 1475 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1476 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1477 sleepq_unlink(tpp, tp); 1478 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1479 tp->t_wchan0 = NULL; 1480 tp->t_wchan = NULL; 1481 tp->t_sobj_ops = NULL; 1482 CL_WAKEUP(tp); 1483 thread_unlock_high(tp); /* release run queue lock */ 1484 } else { 1485 tpp = &tp->t_link; 1486 } 1487 } 1488 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1489 } 1490 1491 /* 1492 * unblock a lwp that is trying to acquire this mutex. the blocked 1493 * lwp resumes and retries to acquire the lock. 1494 */ 1495 int 1496 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1497 { 1498 proc_t *p = ttoproc(curthread); 1499 lwpchan_t lwpchan; 1500 uchar_t waiters; 1501 volatile int locked = 0; 1502 volatile int watched = 0; 1503 volatile uint8_t type = 0; 1504 label_t ljb; 1505 int error = 0; 1506 1507 if ((caddr_t)lp >= p->p_as->a_userlimit) 1508 return (set_errno(EFAULT)); 1509 1510 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1511 1512 if (on_fault(&ljb)) { 1513 if (locked) 1514 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1515 error = EFAULT; 1516 goto out; 1517 } 1518 /* 1519 * Force Copy-on-write if necessary and ensure that the 1520 * synchronization object resides in read/write memory. 1521 * Cause an EFAULT return now if this is not so. 1522 */ 1523 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1524 suword8_noerr(&lp->mutex_type, type); 1525 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1526 &lwpchan, LWPCHAN_MPPOOL)) { 1527 error = EFAULT; 1528 goto out; 1529 } 1530 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1531 locked = 1; 1532 /* 1533 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1534 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1535 * may fail. If it fails, do not write into the waiter bit. 1536 * The call to lwp_release() might fail due to one of three reasons: 1537 * 1538 * 1. due to the thread which set the waiter bit not actually 1539 * sleeping since it got the lock on the re-try. The waiter 1540 * bit will then be correctly updated by that thread. This 1541 * window may be closed by reading the wait bit again here 1542 * and not calling lwp_release() at all if it is zero. 1543 * 2. the thread which set the waiter bit and went to sleep 1544 * was woken up by a signal. This time, the waiter recomputes 1545 * the wait bit in the return with EINTR code. 1546 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1547 * memory that has been re-used after the lock was dropped. 1548 * In this case, writing into the waiter bit would cause data 1549 * corruption. 1550 */ 1551 if (release_all) 1552 lwp_release_all(&lwpchan); 1553 else if (lwp_release(&lwpchan, &waiters, 0)) 1554 suword8_noerr(&lp->mutex_waiters, waiters); 1555 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1556 out: 1557 no_fault(); 1558 if (watched) 1559 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1560 if (error) 1561 return (set_errno(error)); 1562 return (0); 1563 } 1564 1565 /* 1566 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1567 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1568 * a flag telling the kernel whether or not to honor the kernel/user 1569 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1570 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1571 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1572 * it is used an an in/out parameter. On entry, it contains the relative 1573 * time until timeout. On exit, we copyout the residual time left to it. 1574 */ 1575 int 1576 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1577 { 1578 kthread_t *t = curthread; 1579 klwp_t *lwp = ttolwp(t); 1580 proc_t *p = ttoproc(t); 1581 lwp_timer_t lwpt; 1582 lwpchan_t cv_lwpchan; 1583 lwpchan_t m_lwpchan; 1584 caddr_t timedwait; 1585 volatile uint16_t type = 0; 1586 volatile uint8_t mtype = 0; 1587 uchar_t waiters; 1588 volatile int error; 1589 clock_t tim = -1; 1590 volatile int locked = 0; 1591 volatile int m_locked = 0; 1592 volatile int cvwatched = 0; 1593 volatile int mpwatched = 0; 1594 label_t ljb; 1595 volatile int no_lwpchan = 1; 1596 int imm_timeout = 0; 1597 int imm_unpark = 0; 1598 1599 if ((caddr_t)cv >= p->p_as->a_userlimit || 1600 (caddr_t)mp >= p->p_as->a_userlimit) 1601 return (set_errno(EFAULT)); 1602 1603 /* 1604 * Put the lwp in an orderly state for debugging, 1605 * in case we are stopped while sleeping, below. 1606 */ 1607 prstop(PR_REQUESTED, 0); 1608 1609 timedwait = (caddr_t)tsp; 1610 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1611 return (set_errno(error)); 1612 if (lwpt.lwpt_imm_timeout) { 1613 imm_timeout = 1; 1614 timedwait = NULL; 1615 } 1616 1617 (void) new_mstate(t, LMS_USER_LOCK); 1618 1619 if (on_fault(&ljb)) { 1620 if (no_lwpchan) { 1621 error = EFAULT; 1622 goto out; 1623 } 1624 if (m_locked) { 1625 m_locked = 0; 1626 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1627 } 1628 if (locked) { 1629 locked = 0; 1630 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1631 } 1632 /* 1633 * set up another on_fault() for a possible fault 1634 * on the user lock accessed at "efault" 1635 */ 1636 if (on_fault(&ljb)) { 1637 if (m_locked) { 1638 m_locked = 0; 1639 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1640 } 1641 goto out; 1642 } 1643 error = EFAULT; 1644 goto efault; 1645 } 1646 1647 /* 1648 * Force Copy-on-write if necessary and ensure that the 1649 * synchronization object resides in read/write memory. 1650 * Cause an EFAULT return now if this is not so. 1651 */ 1652 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1653 suword8_noerr(&mp->mutex_type, mtype); 1654 if (UPIMUTEX(mtype) == 0) { 1655 /* convert user level mutex, "mp", to a unique lwpchan */ 1656 /* check if mtype is ok to use below, instead of type from cv */ 1657 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1658 &m_lwpchan, LWPCHAN_MPPOOL)) { 1659 error = EFAULT; 1660 goto out; 1661 } 1662 } 1663 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1664 suword16_noerr(&cv->cond_type, type); 1665 /* convert user level condition variable, "cv", to a unique lwpchan */ 1666 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1667 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1668 error = EFAULT; 1669 goto out; 1670 } 1671 no_lwpchan = 0; 1672 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1673 if (UPIMUTEX(mtype) == 0) 1674 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1675 S_WRITE); 1676 1677 /* 1678 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1679 * with respect to a possible wakeup which is a result of either 1680 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1681 * 1682 * What's misleading, is that the lwp is put to sleep after the 1683 * condition variable's mutex is released. This is OK as long as 1684 * the release operation is also done while holding lwpchan_lock. 1685 * The lwp is then put to sleep when the possibility of pagefaulting 1686 * or sleeping is completely eliminated. 1687 */ 1688 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1689 locked = 1; 1690 if (UPIMUTEX(mtype) == 0) { 1691 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1692 m_locked = 1; 1693 suword8_noerr(&cv->cond_waiters_kernel, 1); 1694 /* 1695 * unlock the condition variable's mutex. (pagefaults are 1696 * possible here.) 1697 */ 1698 set_owner_pid(mp, 0, 0); 1699 ulock_clear(&mp->mutex_lockw); 1700 fuword8_noerr(&mp->mutex_waiters, &waiters); 1701 if (waiters != 0) { 1702 /* 1703 * Given the locking of lwpchan_lock around the release 1704 * of the mutex and checking for waiters, the following 1705 * call to lwp_release() can fail ONLY if the lock 1706 * acquirer is interrupted after setting the waiter bit, 1707 * calling lwp_block() and releasing lwpchan_lock. 1708 * In this case, it could get pulled off the lwp sleep 1709 * q (via setrun()) before the following call to 1710 * lwp_release() occurs. In this case, the lock 1711 * requestor will update the waiter bit correctly by 1712 * re-evaluating it. 1713 */ 1714 if (lwp_release(&m_lwpchan, &waiters, 0)) 1715 suword8_noerr(&mp->mutex_waiters, waiters); 1716 } 1717 m_locked = 0; 1718 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1719 } else { 1720 suword8_noerr(&cv->cond_waiters_kernel, 1); 1721 error = lwp_upimutex_unlock(mp, mtype); 1722 if (error) { /* if the upimutex unlock failed */ 1723 locked = 0; 1724 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1725 goto out; 1726 } 1727 } 1728 no_fault(); 1729 1730 if (mpwatched) { 1731 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1732 mpwatched = 0; 1733 } 1734 if (cvwatched) { 1735 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1736 cvwatched = 0; 1737 } 1738 1739 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1740 /* 1741 * We received a signal at user-level before calling here 1742 * or another thread wants us to return immediately 1743 * with EINTR. See lwp_unpark(). 1744 */ 1745 imm_unpark = 1; 1746 t->t_unpark = 0; 1747 timedwait = NULL; 1748 } else if (timedwait) { 1749 /* 1750 * If we successfully queue the timeout, 1751 * then don't drop t_delay_lock until 1752 * we are on the sleep queue (below). 1753 */ 1754 mutex_enter(&t->t_delay_lock); 1755 if (lwp_timer_enqueue(&lwpt) != 0) { 1756 mutex_exit(&t->t_delay_lock); 1757 imm_timeout = 1; 1758 timedwait = NULL; 1759 } 1760 } 1761 t->t_flag |= T_WAITCVSEM; 1762 lwp_block(&cv_lwpchan); 1763 /* 1764 * Nothing should happen to cause the lwp to go to sleep 1765 * until after it returns from swtch(). 1766 */ 1767 if (timedwait) 1768 mutex_exit(&t->t_delay_lock); 1769 locked = 0; 1770 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1771 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1772 (imm_timeout | imm_unpark)) 1773 setrun(t); 1774 swtch(); 1775 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1776 if (timedwait) 1777 tim = lwp_timer_dequeue(&lwpt); 1778 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1779 MUSTRETURN(p, t) || imm_unpark) 1780 error = EINTR; 1781 else if (imm_timeout || (timedwait && tim == -1)) 1782 error = ETIME; 1783 lwp->lwp_asleep = 0; 1784 lwp->lwp_sysabort = 0; 1785 setallwatch(); 1786 1787 if (t->t_mstate == LMS_USER_LOCK) 1788 (void) new_mstate(t, LMS_SYSTEM); 1789 1790 if (tsp && check_park) /* copyout the residual time left */ 1791 error = lwp_timer_copyout(&lwpt, error); 1792 1793 /* the mutex is reacquired by the caller on return to user level */ 1794 if (error) { 1795 /* 1796 * If we were concurrently lwp_cond_signal()d and we 1797 * received a UNIX signal or got a timeout, then perform 1798 * another lwp_cond_signal() to avoid consuming the wakeup. 1799 */ 1800 if (t->t_release) 1801 (void) lwp_cond_signal(cv); 1802 return (set_errno(error)); 1803 } 1804 return (0); 1805 1806 efault: 1807 /* 1808 * make sure that the user level lock is dropped before 1809 * returning to caller, since the caller always re-acquires it. 1810 */ 1811 if (UPIMUTEX(mtype) == 0) { 1812 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1813 m_locked = 1; 1814 set_owner_pid(mp, 0, 0); 1815 ulock_clear(&mp->mutex_lockw); 1816 fuword8_noerr(&mp->mutex_waiters, &waiters); 1817 if (waiters != 0) { 1818 /* 1819 * See comment above on lock clearing and lwp_release() 1820 * success/failure. 1821 */ 1822 if (lwp_release(&m_lwpchan, &waiters, 0)) 1823 suword8_noerr(&mp->mutex_waiters, waiters); 1824 } 1825 m_locked = 0; 1826 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1827 } else { 1828 (void) lwp_upimutex_unlock(mp, mtype); 1829 } 1830 out: 1831 no_fault(); 1832 if (mpwatched) 1833 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1834 if (cvwatched) 1835 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1836 if (t->t_mstate == LMS_USER_LOCK) 1837 (void) new_mstate(t, LMS_SYSTEM); 1838 return (set_errno(error)); 1839 } 1840 1841 /* 1842 * wakeup one lwp that's blocked on this condition variable. 1843 */ 1844 int 1845 lwp_cond_signal(lwp_cond_t *cv) 1846 { 1847 proc_t *p = ttoproc(curthread); 1848 lwpchan_t lwpchan; 1849 uchar_t waiters; 1850 volatile uint16_t type = 0; 1851 volatile int locked = 0; 1852 volatile int watched = 0; 1853 label_t ljb; 1854 int error = 0; 1855 1856 if ((caddr_t)cv >= p->p_as->a_userlimit) 1857 return (set_errno(EFAULT)); 1858 1859 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1860 1861 if (on_fault(&ljb)) { 1862 if (locked) 1863 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1864 error = EFAULT; 1865 goto out; 1866 } 1867 /* 1868 * Force Copy-on-write if necessary and ensure that the 1869 * synchronization object resides in read/write memory. 1870 * Cause an EFAULT return now if this is not so. 1871 */ 1872 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1873 suword16_noerr(&cv->cond_type, type); 1874 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1875 &lwpchan, LWPCHAN_CVPOOL)) { 1876 error = EFAULT; 1877 goto out; 1878 } 1879 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1880 locked = 1; 1881 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1882 if (waiters != 0) { 1883 /* 1884 * The following call to lwp_release() might fail but it is 1885 * OK to write into the waiters bit below, since the memory 1886 * could not have been re-used or unmapped (for correctly 1887 * written user programs) as in the case of lwp_mutex_wakeup(). 1888 * For an incorrect program, we should not care about data 1889 * corruption since this is just one instance of other places 1890 * where corruption can occur for such a program. Of course 1891 * if the memory is unmapped, normal fault recovery occurs. 1892 */ 1893 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1894 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1895 } 1896 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1897 out: 1898 no_fault(); 1899 if (watched) 1900 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1901 if (error) 1902 return (set_errno(error)); 1903 return (0); 1904 } 1905 1906 /* 1907 * wakeup every lwp that's blocked on this condition variable. 1908 */ 1909 int 1910 lwp_cond_broadcast(lwp_cond_t *cv) 1911 { 1912 proc_t *p = ttoproc(curthread); 1913 lwpchan_t lwpchan; 1914 volatile uint16_t type = 0; 1915 volatile int locked = 0; 1916 volatile int watched = 0; 1917 label_t ljb; 1918 uchar_t waiters; 1919 int error = 0; 1920 1921 if ((caddr_t)cv >= p->p_as->a_userlimit) 1922 return (set_errno(EFAULT)); 1923 1924 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1925 1926 if (on_fault(&ljb)) { 1927 if (locked) 1928 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1929 error = EFAULT; 1930 goto out; 1931 } 1932 /* 1933 * Force Copy-on-write if necessary and ensure that the 1934 * synchronization object resides in read/write memory. 1935 * Cause an EFAULT return now if this is not so. 1936 */ 1937 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1938 suword16_noerr(&cv->cond_type, type); 1939 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1940 &lwpchan, LWPCHAN_CVPOOL)) { 1941 error = EFAULT; 1942 goto out; 1943 } 1944 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1945 locked = 1; 1946 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1947 if (waiters != 0) { 1948 lwp_release_all(&lwpchan); 1949 suword8_noerr(&cv->cond_waiters_kernel, 0); 1950 } 1951 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1952 out: 1953 no_fault(); 1954 if (watched) 1955 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1956 if (error) 1957 return (set_errno(error)); 1958 return (0); 1959 } 1960 1961 int 1962 lwp_sema_trywait(lwp_sema_t *sp) 1963 { 1964 kthread_t *t = curthread; 1965 proc_t *p = ttoproc(t); 1966 label_t ljb; 1967 volatile int locked = 0; 1968 volatile int watched = 0; 1969 volatile uint16_t type = 0; 1970 int count; 1971 lwpchan_t lwpchan; 1972 uchar_t waiters; 1973 int error = 0; 1974 1975 if ((caddr_t)sp >= p->p_as->a_userlimit) 1976 return (set_errno(EFAULT)); 1977 1978 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1979 1980 if (on_fault(&ljb)) { 1981 if (locked) 1982 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1983 error = EFAULT; 1984 goto out; 1985 } 1986 /* 1987 * Force Copy-on-write if necessary and ensure that the 1988 * synchronization object resides in read/write memory. 1989 * Cause an EFAULT return now if this is not so. 1990 */ 1991 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1992 suword16_noerr((void *)&sp->sema_type, type); 1993 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1994 &lwpchan, LWPCHAN_CVPOOL)) { 1995 error = EFAULT; 1996 goto out; 1997 } 1998 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1999 locked = 1; 2000 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2001 if (count == 0) 2002 error = EBUSY; 2003 else 2004 suword32_noerr((void *)&sp->sema_count, --count); 2005 if (count != 0) { 2006 fuword8_noerr(&sp->sema_waiters, &waiters); 2007 if (waiters != 0) { 2008 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2009 suword8_noerr(&sp->sema_waiters, waiters); 2010 } 2011 } 2012 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2013 out: 2014 no_fault(); 2015 if (watched) 2016 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2017 if (error) 2018 return (set_errno(error)); 2019 return (0); 2020 } 2021 2022 /* 2023 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 2024 */ 2025 int 2026 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2027 { 2028 kthread_t *t = curthread; 2029 klwp_t *lwp = ttolwp(t); 2030 proc_t *p = ttoproc(t); 2031 lwp_timer_t lwpt; 2032 caddr_t timedwait; 2033 clock_t tim = -1; 2034 label_t ljb; 2035 volatile int locked = 0; 2036 volatile int watched = 0; 2037 volatile uint16_t type = 0; 2038 int count; 2039 lwpchan_t lwpchan; 2040 uchar_t waiters; 2041 int error = 0; 2042 int time_error; 2043 int imm_timeout = 0; 2044 int imm_unpark = 0; 2045 2046 if ((caddr_t)sp >= p->p_as->a_userlimit) 2047 return (set_errno(EFAULT)); 2048 2049 /* 2050 * Put the lwp in an orderly state for debugging, 2051 * in case we are stopped while sleeping, below. 2052 */ 2053 prstop(PR_REQUESTED, 0); 2054 2055 timedwait = (caddr_t)tsp; 2056 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2057 lwpt.lwpt_imm_timeout) { 2058 imm_timeout = 1; 2059 timedwait = NULL; 2060 } 2061 2062 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2063 2064 if (on_fault(&ljb)) { 2065 if (locked) 2066 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2067 error = EFAULT; 2068 goto out; 2069 } 2070 /* 2071 * Force Copy-on-write if necessary and ensure that the 2072 * synchronization object resides in read/write memory. 2073 * Cause an EFAULT return now if this is not so. 2074 */ 2075 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2076 suword16_noerr((void *)&sp->sema_type, type); 2077 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2078 &lwpchan, LWPCHAN_CVPOOL)) { 2079 error = EFAULT; 2080 goto out; 2081 } 2082 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2083 locked = 1; 2084 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2085 while (error == 0 && count == 0) { 2086 if (time_error) { 2087 /* 2088 * The SUSV3 Posix spec is very clear that we 2089 * should get no error from validating the 2090 * timer until we would actually sleep. 2091 */ 2092 error = time_error; 2093 break; 2094 } 2095 suword8_noerr(&sp->sema_waiters, 1); 2096 if (watched) 2097 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2098 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2099 /* 2100 * We received a signal at user-level before calling 2101 * here or another thread wants us to return 2102 * immediately with EINTR. See lwp_unpark(). 2103 */ 2104 imm_unpark = 1; 2105 t->t_unpark = 0; 2106 timedwait = NULL; 2107 } else if (timedwait) { 2108 /* 2109 * If we successfully queue the timeout, 2110 * then don't drop t_delay_lock until 2111 * we are on the sleep queue (below). 2112 */ 2113 mutex_enter(&t->t_delay_lock); 2114 if (lwp_timer_enqueue(&lwpt) != 0) { 2115 mutex_exit(&t->t_delay_lock); 2116 imm_timeout = 1; 2117 timedwait = NULL; 2118 } 2119 } 2120 t->t_flag |= T_WAITCVSEM; 2121 lwp_block(&lwpchan); 2122 /* 2123 * Nothing should happen to cause the lwp to sleep 2124 * again until after it returns from swtch(). 2125 */ 2126 if (timedwait) 2127 mutex_exit(&t->t_delay_lock); 2128 locked = 0; 2129 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2130 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2131 (imm_timeout | imm_unpark)) 2132 setrun(t); 2133 swtch(); 2134 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2135 if (timedwait) 2136 tim = lwp_timer_dequeue(&lwpt); 2137 setallwatch(); 2138 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2139 MUSTRETURN(p, t) || imm_unpark) 2140 error = EINTR; 2141 else if (imm_timeout || (timedwait && tim == -1)) 2142 error = ETIME; 2143 lwp->lwp_asleep = 0; 2144 lwp->lwp_sysabort = 0; 2145 watched = watch_disable_addr((caddr_t)sp, 2146 sizeof (*sp), S_WRITE); 2147 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2148 locked = 1; 2149 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2150 } 2151 if (error == 0) 2152 suword32_noerr((void *)&sp->sema_count, --count); 2153 if (count != 0) { 2154 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2155 suword8_noerr(&sp->sema_waiters, waiters); 2156 } 2157 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2158 out: 2159 no_fault(); 2160 if (watched) 2161 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2162 if (tsp && check_park && !time_error) 2163 error = lwp_timer_copyout(&lwpt, error); 2164 if (error) 2165 return (set_errno(error)); 2166 return (0); 2167 } 2168 2169 int 2170 lwp_sema_post(lwp_sema_t *sp) 2171 { 2172 proc_t *p = ttoproc(curthread); 2173 label_t ljb; 2174 volatile int locked = 0; 2175 volatile int watched = 0; 2176 volatile uint16_t type = 0; 2177 int count; 2178 lwpchan_t lwpchan; 2179 uchar_t waiters; 2180 int error = 0; 2181 2182 if ((caddr_t)sp >= p->p_as->a_userlimit) 2183 return (set_errno(EFAULT)); 2184 2185 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2186 2187 if (on_fault(&ljb)) { 2188 if (locked) 2189 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2190 error = EFAULT; 2191 goto out; 2192 } 2193 /* 2194 * Force Copy-on-write if necessary and ensure that the 2195 * synchronization object resides in read/write memory. 2196 * Cause an EFAULT return now if this is not so. 2197 */ 2198 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2199 suword16_noerr(&sp->sema_type, type); 2200 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2201 &lwpchan, LWPCHAN_CVPOOL)) { 2202 error = EFAULT; 2203 goto out; 2204 } 2205 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2206 locked = 1; 2207 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2208 if (count == _SEM_VALUE_MAX) 2209 error = EOVERFLOW; 2210 else 2211 suword32_noerr(&sp->sema_count, ++count); 2212 if (count == 1) { 2213 fuword8_noerr(&sp->sema_waiters, &waiters); 2214 if (waiters) { 2215 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2216 suword8_noerr(&sp->sema_waiters, waiters); 2217 } 2218 } 2219 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2220 out: 2221 no_fault(); 2222 if (watched) 2223 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2224 if (error) 2225 return (set_errno(error)); 2226 return (0); 2227 } 2228 2229 #define TRW_WANT_WRITE 0x1 2230 #define TRW_LOCK_GRANTED 0x2 2231 2232 #define READ_LOCK 0 2233 #define WRITE_LOCK 1 2234 #define TRY_FLAG 0x10 2235 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2236 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2237 2238 /* 2239 * Release one writer or one or more readers. Compute the rwstate word to 2240 * reflect the new state of the queue. For a safe hand-off we copy the new 2241 * rwstate value back to userland before we wake any of the new lock holders. 2242 * 2243 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2244 * being given precedence over readers of the same priority). 2245 * 2246 * If the first thread is a reader we scan the queue releasing all readers 2247 * until we hit a writer or the end of the queue. If the first thread is a 2248 * writer we still need to check for another writer. 2249 */ 2250 void 2251 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2252 { 2253 sleepq_head_t *sqh; 2254 kthread_t *tp; 2255 kthread_t **tpp; 2256 kthread_t *tpnext; 2257 kthread_t *wakelist = NULL; 2258 uint32_t rwstate = 0; 2259 int wcount = 0; 2260 int rcount = 0; 2261 2262 sqh = lwpsqhash(lwpchan); 2263 disp_lock_enter(&sqh->sq_lock); 2264 tpp = &sqh->sq_queue.sq_first; 2265 while ((tp = *tpp) != NULL) { 2266 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2267 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2268 if (tp->t_writer & TRW_WANT_WRITE) { 2269 if ((wcount++ == 0) && (rcount == 0)) { 2270 rwstate |= URW_WRITE_LOCKED; 2271 2272 /* Just one writer to wake. */ 2273 sleepq_unlink(tpp, tp); 2274 wakelist = tp; 2275 2276 /* tpp already set for next thread. */ 2277 continue; 2278 } else { 2279 rwstate |= URW_HAS_WAITERS; 2280 /* We need look no further. */ 2281 break; 2282 } 2283 } else { 2284 rcount++; 2285 if (wcount == 0) { 2286 rwstate++; 2287 2288 /* Add reader to wake list. */ 2289 sleepq_unlink(tpp, tp); 2290 tp->t_link = wakelist; 2291 wakelist = tp; 2292 2293 /* tpp already set for next thread. */ 2294 continue; 2295 } else { 2296 rwstate |= URW_HAS_WAITERS; 2297 /* We need look no further. */ 2298 break; 2299 } 2300 } 2301 } 2302 tpp = &tp->t_link; 2303 } 2304 2305 /* Copy the new rwstate back to userland. */ 2306 suword32_noerr(&rw->rwlock_readers, rwstate); 2307 2308 /* Wake the new lock holder(s) up. */ 2309 tp = wakelist; 2310 while (tp != NULL) { 2311 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2312 tp->t_wchan0 = NULL; 2313 tp->t_wchan = NULL; 2314 tp->t_sobj_ops = NULL; 2315 tp->t_writer |= TRW_LOCK_GRANTED; 2316 tpnext = tp->t_link; 2317 tp->t_link = NULL; 2318 CL_WAKEUP(tp); 2319 thread_unlock_high(tp); 2320 tp = tpnext; 2321 } 2322 2323 disp_lock_exit(&sqh->sq_lock); 2324 } 2325 2326 /* 2327 * We enter here holding the user-level mutex, which we must release before 2328 * returning or blocking. Based on lwp_cond_wait(). 2329 */ 2330 static int 2331 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2332 { 2333 lwp_mutex_t *mp = NULL; 2334 kthread_t *t = curthread; 2335 kthread_t *tp; 2336 klwp_t *lwp = ttolwp(t); 2337 proc_t *p = ttoproc(t); 2338 lwp_timer_t lwpt; 2339 lwpchan_t lwpchan; 2340 lwpchan_t mlwpchan; 2341 caddr_t timedwait; 2342 volatile uint16_t type = 0; 2343 volatile uint8_t mtype = 0; 2344 uchar_t mwaiters; 2345 volatile int error = 0; 2346 int time_error; 2347 clock_t tim = -1; 2348 volatile int locked = 0; 2349 volatile int mlocked = 0; 2350 volatile int watched = 0; 2351 volatile int mwatched = 0; 2352 label_t ljb; 2353 volatile int no_lwpchan = 1; 2354 int imm_timeout = 0; 2355 int try_flag; 2356 uint32_t rwstate; 2357 int acquired = 0; 2358 2359 /* We only check rw because the mutex is included in it. */ 2360 if ((caddr_t)rw >= p->p_as->a_userlimit) 2361 return (set_errno(EFAULT)); 2362 2363 /* 2364 * Put the lwp in an orderly state for debugging, 2365 * in case we are stopped while sleeping, below. 2366 */ 2367 prstop(PR_REQUESTED, 0); 2368 2369 /* We must only report this error if we are about to sleep (later). */ 2370 timedwait = (caddr_t)tsp; 2371 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2372 lwpt.lwpt_imm_timeout) { 2373 imm_timeout = 1; 2374 timedwait = NULL; 2375 } 2376 2377 (void) new_mstate(t, LMS_USER_LOCK); 2378 2379 if (on_fault(&ljb)) { 2380 if (no_lwpchan) { 2381 error = EFAULT; 2382 goto out_nodrop; 2383 } 2384 if (mlocked) { 2385 mlocked = 0; 2386 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2387 } 2388 if (locked) { 2389 locked = 0; 2390 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2391 } 2392 /* 2393 * Set up another on_fault() for a possible fault 2394 * on the user lock accessed at "out_drop". 2395 */ 2396 if (on_fault(&ljb)) { 2397 if (mlocked) { 2398 mlocked = 0; 2399 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2400 } 2401 error = EFAULT; 2402 goto out_nodrop; 2403 } 2404 error = EFAULT; 2405 goto out_nodrop; 2406 } 2407 2408 /* Process rd_wr (including sanity check). */ 2409 try_flag = (rd_wr & TRY_FLAG); 2410 rd_wr &= ~TRY_FLAG; 2411 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2412 error = EINVAL; 2413 goto out_nodrop; 2414 } 2415 2416 /* 2417 * Force Copy-on-write if necessary and ensure that the 2418 * synchronization object resides in read/write memory. 2419 * Cause an EFAULT return now if this is not so. 2420 */ 2421 mp = &rw->mutex; 2422 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2423 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2424 suword8_noerr(&mp->mutex_type, mtype); 2425 suword16_noerr(&rw->rwlock_type, type); 2426 2427 /* We can only continue for simple USYNC_PROCESS locks. */ 2428 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2429 error = EINVAL; 2430 goto out_nodrop; 2431 } 2432 2433 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2434 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2435 &mlwpchan, LWPCHAN_MPPOOL)) { 2436 error = EFAULT; 2437 goto out_nodrop; 2438 } 2439 2440 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2441 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2442 &lwpchan, LWPCHAN_CVPOOL)) { 2443 error = EFAULT; 2444 goto out_nodrop; 2445 } 2446 2447 no_lwpchan = 0; 2448 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2449 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2450 2451 /* 2452 * lwpchan_lock() ensures that the calling LWP is put to sleep 2453 * atomically with respect to a possible wakeup which is a result 2454 * of lwp_rwlock_unlock(). 2455 * 2456 * What's misleading is that the LWP is put to sleep after the 2457 * rwlock's mutex is released. This is OK as long as the release 2458 * operation is also done while holding mlwpchan. The LWP is then 2459 * put to sleep when the possibility of pagefaulting or sleeping 2460 * has been completely eliminated. 2461 */ 2462 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2463 locked = 1; 2464 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2465 mlocked = 1; 2466 2467 /* 2468 * Fetch the current rwlock state. 2469 * 2470 * The possibility of spurious wake-ups or killed waiters means 2471 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2472 * We only fix these if they are important to us. 2473 * 2474 * Although various error states can be observed here (e.g. the lock 2475 * is not held, but there are waiters) we assume these are applicaton 2476 * errors and so we take no corrective action. 2477 */ 2478 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2479 /* 2480 * We cannot legitimately get here from user-level 2481 * without URW_HAS_WAITERS being set. 2482 * Set it now to guard against user-level error. 2483 */ 2484 rwstate |= URW_HAS_WAITERS; 2485 2486 /* 2487 * We can try only if the lock isn't held by a writer. 2488 */ 2489 if (!(rwstate & URW_WRITE_LOCKED)) { 2490 tp = lwp_queue_waiter(&lwpchan); 2491 if (tp == NULL) { 2492 /* 2493 * Hmmm, rwstate indicates waiters but there are 2494 * none queued. This could just be the result of a 2495 * spurious wakeup, so let's ignore it. 2496 * 2497 * We now have a chance to acquire the lock 2498 * uncontended, but this is the last chance for 2499 * a writer to acquire the lock without blocking. 2500 */ 2501 if (rd_wr == READ_LOCK) { 2502 rwstate++; 2503 acquired = 1; 2504 } else if ((rwstate & URW_READERS_MASK) == 0) { 2505 rwstate |= URW_WRITE_LOCKED; 2506 acquired = 1; 2507 } 2508 } else if (rd_wr == READ_LOCK) { 2509 /* 2510 * This is the last chance for a reader to acquire 2511 * the lock now, but it can only do so if there is 2512 * no writer of equal or greater priority at the 2513 * head of the queue . 2514 * 2515 * It is also just possible that there is a reader 2516 * at the head of the queue. This may be the result 2517 * of a spurious wakeup or an application failure. 2518 * In this case we only acquire the lock if we have 2519 * equal or greater priority. It is not our job to 2520 * release spurious waiters. 2521 */ 2522 pri_t our_pri = DISP_PRIO(t); 2523 pri_t his_pri = DISP_PRIO(tp); 2524 2525 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2526 !(tp->t_writer & TRW_WANT_WRITE))) { 2527 rwstate++; 2528 acquired = 1; 2529 } 2530 } 2531 } 2532 2533 if (acquired || try_flag || time_error) { 2534 /* 2535 * We're not going to block this time. 2536 */ 2537 suword32_noerr(&rw->rwlock_readers, rwstate); 2538 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2539 locked = 0; 2540 2541 if (acquired) { 2542 /* 2543 * Got the lock! 2544 */ 2545 error = 0; 2546 2547 } else if (try_flag) { 2548 /* 2549 * We didn't get the lock and we're about to block. 2550 * If we're doing a trylock, return EBUSY instead. 2551 */ 2552 error = EBUSY; 2553 2554 } else if (time_error) { 2555 /* 2556 * The SUSV3 POSIX spec is very clear that we should 2557 * get no error from validating the timer (above) 2558 * until we would actually sleep. 2559 */ 2560 error = time_error; 2561 } 2562 2563 goto out_drop; 2564 } 2565 2566 /* 2567 * We're about to block, so indicate what kind of waiter we are. 2568 */ 2569 t->t_writer = 0; 2570 if (rd_wr == WRITE_LOCK) 2571 t->t_writer = TRW_WANT_WRITE; 2572 suword32_noerr(&rw->rwlock_readers, rwstate); 2573 2574 /* 2575 * Unlock the rwlock's mutex (pagefaults are possible here). 2576 */ 2577 set_owner_pid(mp, 0, 0); 2578 ulock_clear(&mp->mutex_lockw); 2579 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2580 if (mwaiters != 0) { 2581 /* 2582 * Given the locking of mlwpchan around the release of 2583 * the mutex and checking for waiters, the following 2584 * call to lwp_release() can fail ONLY if the lock 2585 * acquirer is interrupted after setting the waiter bit, 2586 * calling lwp_block() and releasing mlwpchan. 2587 * In this case, it could get pulled off the LWP sleep 2588 * queue (via setrun()) before the following call to 2589 * lwp_release() occurs, and the lock requestor will 2590 * update the waiter bit correctly by re-evaluating it. 2591 */ 2592 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2593 suword8_noerr(&mp->mutex_waiters, mwaiters); 2594 } 2595 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2596 mlocked = 0; 2597 no_fault(); 2598 2599 if (mwatched) { 2600 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2601 mwatched = 0; 2602 } 2603 if (watched) { 2604 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2605 watched = 0; 2606 } 2607 2608 if (timedwait) { 2609 /* 2610 * If we successfully queue the timeout, 2611 * then don't drop t_delay_lock until 2612 * we are on the sleep queue (below). 2613 */ 2614 mutex_enter(&t->t_delay_lock); 2615 if (lwp_timer_enqueue(&lwpt) != 0) { 2616 mutex_exit(&t->t_delay_lock); 2617 imm_timeout = 1; 2618 timedwait = NULL; 2619 } 2620 } 2621 t->t_flag |= T_WAITCVSEM; 2622 lwp_block(&lwpchan); 2623 2624 /* 2625 * Nothing should happen to cause the LWp to go to sleep until after 2626 * it returns from swtch(). 2627 */ 2628 if (timedwait) 2629 mutex_exit(&t->t_delay_lock); 2630 locked = 0; 2631 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2632 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 2633 setrun(t); 2634 swtch(); 2635 2636 /* 2637 * We're back, but we need to work out why. Were we interrupted? Did 2638 * we timeout? Were we granted the lock? 2639 */ 2640 error = EAGAIN; 2641 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2642 t->t_writer = 0; 2643 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2644 if (timedwait) 2645 tim = lwp_timer_dequeue(&lwpt); 2646 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2647 error = EINTR; 2648 else if (imm_timeout || (timedwait && tim == -1)) 2649 error = ETIME; 2650 lwp->lwp_asleep = 0; 2651 lwp->lwp_sysabort = 0; 2652 setallwatch(); 2653 2654 /* 2655 * If we were granted the lock we don't care about EINTR or ETIME. 2656 */ 2657 if (acquired) 2658 error = 0; 2659 2660 if (t->t_mstate == LMS_USER_LOCK) 2661 (void) new_mstate(t, LMS_SYSTEM); 2662 2663 if (error) 2664 return (set_errno(error)); 2665 return (0); 2666 2667 out_drop: 2668 /* 2669 * Make sure that the user level lock is dropped before returning 2670 * to the caller. 2671 */ 2672 if (!mlocked) { 2673 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2674 mlocked = 1; 2675 } 2676 set_owner_pid(mp, 0, 0); 2677 ulock_clear(&mp->mutex_lockw); 2678 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2679 if (mwaiters != 0) { 2680 /* 2681 * See comment above on lock clearing and lwp_release() 2682 * success/failure. 2683 */ 2684 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2685 suword8_noerr(&mp->mutex_waiters, mwaiters); 2686 } 2687 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2688 mlocked = 0; 2689 2690 out_nodrop: 2691 no_fault(); 2692 if (mwatched) 2693 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2694 if (watched) 2695 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2696 if (t->t_mstate == LMS_USER_LOCK) 2697 (void) new_mstate(t, LMS_SYSTEM); 2698 if (error) 2699 return (set_errno(error)); 2700 return (0); 2701 } 2702 2703 /* 2704 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2705 * we never drop the lock. 2706 */ 2707 static int 2708 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2709 { 2710 kthread_t *t = curthread; 2711 proc_t *p = ttoproc(t); 2712 lwpchan_t lwpchan; 2713 volatile uint16_t type = 0; 2714 volatile int error = 0; 2715 volatile int locked = 0; 2716 volatile int watched = 0; 2717 label_t ljb; 2718 volatile int no_lwpchan = 1; 2719 uint32_t rwstate; 2720 2721 /* We only check rw because the mutex is included in it. */ 2722 if ((caddr_t)rw >= p->p_as->a_userlimit) 2723 return (set_errno(EFAULT)); 2724 2725 if (on_fault(&ljb)) { 2726 if (no_lwpchan) { 2727 error = EFAULT; 2728 goto out_nodrop; 2729 } 2730 if (locked) { 2731 locked = 0; 2732 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2733 } 2734 error = EFAULT; 2735 goto out_nodrop; 2736 } 2737 2738 /* 2739 * Force Copy-on-write if necessary and ensure that the 2740 * synchronization object resides in read/write memory. 2741 * Cause an EFAULT return now if this is not so. 2742 */ 2743 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2744 suword16_noerr(&rw->rwlock_type, type); 2745 2746 /* We can only continue for simple USYNC_PROCESS locks. */ 2747 if (type != USYNC_PROCESS) { 2748 error = EINVAL; 2749 goto out_nodrop; 2750 } 2751 2752 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2753 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2754 &lwpchan, LWPCHAN_CVPOOL)) { 2755 error = EFAULT; 2756 goto out_nodrop; 2757 } 2758 2759 no_lwpchan = 0; 2760 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2761 2762 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2763 locked = 1; 2764 2765 /* 2766 * We can resolve multiple readers (except the last reader) here. 2767 * For the last reader or a writer we need lwp_rwlock_release(), 2768 * to which we also delegate the task of copying the new rwstate 2769 * back to userland (see the comment there). 2770 */ 2771 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2772 if (rwstate & URW_WRITE_LOCKED) 2773 lwp_rwlock_release(&lwpchan, rw); 2774 else if ((rwstate & URW_READERS_MASK) > 0) { 2775 rwstate--; 2776 if ((rwstate & URW_READERS_MASK) == 0) 2777 lwp_rwlock_release(&lwpchan, rw); 2778 else 2779 suword32_noerr(&rw->rwlock_readers, rwstate); 2780 } 2781 2782 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2783 locked = 0; 2784 error = 0; 2785 2786 out_nodrop: 2787 no_fault(); 2788 if (watched) 2789 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2790 if (error) 2791 return (set_errno(error)); 2792 return (0); 2793 } 2794 2795 int 2796 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2797 { 2798 switch (subcode) { 2799 case 0: 2800 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2801 case 1: 2802 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2803 case 2: 2804 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2805 case 3: 2806 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2807 case 4: 2808 return (lwp_rwlock_unlock(rwlp)); 2809 } 2810 return (set_errno(EINVAL)); 2811 } 2812 2813 /* 2814 * Return the owner of the user-level s-object. 2815 * Since we can't really do this, return NULL. 2816 */ 2817 /* ARGSUSED */ 2818 static kthread_t * 2819 lwpsobj_owner(caddr_t sobj) 2820 { 2821 return ((kthread_t *)NULL); 2822 } 2823 2824 /* 2825 * Wake up a thread asleep on a user-level synchronization 2826 * object. 2827 */ 2828 static void 2829 lwp_unsleep(kthread_t *t) 2830 { 2831 ASSERT(THREAD_LOCK_HELD(t)); 2832 if (t->t_wchan0 != NULL) { 2833 sleepq_head_t *sqh; 2834 sleepq_t *sqp = t->t_sleepq; 2835 2836 if (sqp != NULL) { 2837 sqh = lwpsqhash(&t->t_lwpchan); 2838 ASSERT(&sqh->sq_queue == sqp); 2839 sleepq_unsleep(t); 2840 disp_lock_exit_high(&sqh->sq_lock); 2841 CL_SETRUN(t); 2842 return; 2843 } 2844 } 2845 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2846 } 2847 2848 /* 2849 * Change the priority of a thread asleep on a user-level 2850 * synchronization object. To maintain proper priority order, 2851 * we: 2852 * o dequeue the thread. 2853 * o change its priority. 2854 * o re-enqueue the thread. 2855 * Assumption: the thread is locked on entry. 2856 */ 2857 static void 2858 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2859 { 2860 ASSERT(THREAD_LOCK_HELD(t)); 2861 if (t->t_wchan0 != NULL) { 2862 sleepq_t *sqp = t->t_sleepq; 2863 2864 sleepq_dequeue(t); 2865 *t_prip = pri; 2866 sleepq_insert(sqp, t); 2867 } else 2868 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2869 } 2870 2871 /* 2872 * Clean up a left-over process-shared robust mutex 2873 */ 2874 static void 2875 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2876 { 2877 uint16_t flag; 2878 uchar_t waiters; 2879 label_t ljb; 2880 pid_t owner_pid; 2881 lwp_mutex_t *lp; 2882 volatile int locked = 0; 2883 volatile int watched = 0; 2884 volatile struct upimutex *upimutex = NULL; 2885 volatile int upilocked = 0; 2886 2887 if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 2888 != (USYNC_PROCESS | LOCK_ROBUST)) 2889 return; 2890 2891 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2892 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2893 if (on_fault(&ljb)) { 2894 if (locked) 2895 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2896 if (upilocked) 2897 upimutex_unlock((upimutex_t *)upimutex, 0); 2898 goto out; 2899 } 2900 2901 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2902 2903 if (UPIMUTEX(ent->lwpchan_type)) { 2904 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2905 upib_t *upibp = &UPI_CHAIN(lwpchan); 2906 2907 if (owner_pid != curproc->p_pid) 2908 goto out; 2909 mutex_enter(&upibp->upib_lock); 2910 upimutex = upi_get(upibp, &lwpchan); 2911 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2912 mutex_exit(&upibp->upib_lock); 2913 goto out; 2914 } 2915 mutex_exit(&upibp->upib_lock); 2916 upilocked = 1; 2917 flag = lwp_clear_mutex(lp, lockflg); 2918 suword8_noerr(&lp->mutex_lockw, 0); 2919 upimutex_unlock((upimutex_t *)upimutex, flag); 2920 } else { 2921 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2922 locked = 1; 2923 /* 2924 * Clear the spinners count because one of our 2925 * threads could have been spinning for this lock 2926 * at user level when the process was suddenly killed. 2927 * There is no harm in this since user-level libc code 2928 * will adapt to the sudden change in the spinner count. 2929 */ 2930 suword8_noerr(&lp->mutex_spinners, 0); 2931 if (owner_pid != curproc->p_pid) { 2932 /* 2933 * We are not the owner. There may or may not be one. 2934 * If there are waiters, we wake up one or all of them. 2935 * It doesn't hurt to wake them up in error since 2936 * they will just retry the lock and go to sleep 2937 * again if necessary. 2938 */ 2939 fuword8_noerr(&lp->mutex_waiters, &waiters); 2940 if (waiters != 0) { /* there are waiters */ 2941 fuword16_noerr(&lp->mutex_flag, &flag); 2942 if (flag & LOCK_NOTRECOVERABLE) { 2943 lwp_release_all(&ent->lwpchan_lwpchan); 2944 suword8_noerr(&lp->mutex_waiters, 0); 2945 } else if (lwp_release(&ent->lwpchan_lwpchan, 2946 &waiters, 0)) { 2947 suword8_noerr(&lp->mutex_waiters, 2948 waiters); 2949 } 2950 } 2951 } else { 2952 /* 2953 * We are the owner. Release it. 2954 */ 2955 (void) lwp_clear_mutex(lp, lockflg); 2956 ulock_clear(&lp->mutex_lockw); 2957 fuword8_noerr(&lp->mutex_waiters, &waiters); 2958 if (waiters && 2959 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2960 suword8_noerr(&lp->mutex_waiters, waiters); 2961 } 2962 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2963 } 2964 out: 2965 no_fault(); 2966 if (watched) 2967 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2968 } 2969 2970 /* 2971 * Register a process-shared robust mutex in the lwpchan cache. 2972 */ 2973 int 2974 lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr) 2975 { 2976 int error = 0; 2977 volatile int watched; 2978 label_t ljb; 2979 uint8_t type; 2980 lwpchan_t lwpchan; 2981 2982 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2983 return (set_errno(EFAULT)); 2984 2985 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2986 2987 if (on_fault(&ljb)) { 2988 error = EFAULT; 2989 } else { 2990 /* 2991 * Force Copy-on-write if necessary and ensure that the 2992 * synchronization object resides in read/write memory. 2993 * Cause an EFAULT return now if this is not so. 2994 */ 2995 fuword8_noerr(&lp->mutex_type, &type); 2996 suword8_noerr(&lp->mutex_type, type); 2997 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2998 != (USYNC_PROCESS|LOCK_ROBUST)) { 2999 error = EINVAL; 3000 } else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp, 3001 uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) { 3002 error = EFAULT; 3003 } 3004 } 3005 no_fault(); 3006 if (watched) 3007 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3008 if (error) 3009 return (set_errno(error)); 3010 return (0); 3011 } 3012 3013 /* 3014 * There is a user-level robust lock registration in libc. 3015 * Mark it as invalid by storing -1 into the location of the pointer. 3016 */ 3017 static void 3018 lwp_mutex_unregister(void *uaddr) 3019 { 3020 if (get_udatamodel() == DATAMODEL_NATIVE) { 3021 (void) sulword(uaddr, (ulong_t)-1); 3022 #ifdef _SYSCALL32_IMPL 3023 } else { 3024 (void) suword32(uaddr, (uint32_t)-1); 3025 #endif 3026 } 3027 } 3028 3029 int 3030 lwp_mutex_trylock(lwp_mutex_t *lp, uintptr_t owner) 3031 { 3032 kthread_t *t = curthread; 3033 proc_t *p = ttoproc(t); 3034 int error = 0; 3035 volatile int locked = 0; 3036 volatile int watched = 0; 3037 label_t ljb; 3038 volatile uint8_t type = 0; 3039 uint16_t flag; 3040 lwpchan_t lwpchan; 3041 3042 if ((caddr_t)lp >= p->p_as->a_userlimit) 3043 return (set_errno(EFAULT)); 3044 3045 (void) new_mstate(t, LMS_USER_LOCK); 3046 3047 if (on_fault(&ljb)) { 3048 if (locked) 3049 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3050 error = EFAULT; 3051 goto out; 3052 } 3053 /* 3054 * Force Copy-on-write if necessary and ensure that the 3055 * synchronization object resides in read/write memory. 3056 * Cause an EFAULT return now if this is not so. 3057 */ 3058 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3059 suword8_noerr(&lp->mutex_type, type); 3060 if (UPIMUTEX(type)) { 3061 no_fault(); 3062 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3063 if (error == 0 || error == EOWNERDEAD || 3064 error == ELOCKUNMAPPED) { 3065 volatile int locked = error != 0; 3066 if (on_fault(&ljb)) { 3067 if (locked != 0) 3068 error = lwp_upimutex_unlock(lp, type); 3069 else 3070 error = EFAULT; 3071 goto upierr; 3072 } 3073 set_owner_pid(lp, owner, 3074 (type & USYNC_PROCESS)? p->p_pid : 0); 3075 no_fault(); 3076 } 3077 3078 upierr: 3079 if (error) 3080 return (set_errno(error)); 3081 return (0); 3082 } 3083 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3084 &lwpchan, LWPCHAN_MPPOOL)) { 3085 error = EFAULT; 3086 goto out; 3087 } 3088 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3089 locked = 1; 3090 if (type & LOCK_ROBUST) { 3091 fuword16_noerr(&lp->mutex_flag, &flag); 3092 if (flag & LOCK_NOTRECOVERABLE) { 3093 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3094 error = ENOTRECOVERABLE; 3095 goto out; 3096 } 3097 } 3098 3099 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3100 3101 if (!ulock_try(&lp->mutex_lockw)) 3102 error = EBUSY; 3103 else { 3104 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0); 3105 if (type & LOCK_ROBUST) { 3106 fuword16_noerr(&lp->mutex_flag, &flag); 3107 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3108 if (flag & LOCK_OWNERDEAD) 3109 error = EOWNERDEAD; 3110 else if (type & USYNC_PROCESS_ROBUST) 3111 error = ELOCKUNMAPPED; 3112 else 3113 error = EOWNERDEAD; 3114 } 3115 } 3116 } 3117 locked = 0; 3118 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3119 out: 3120 3121 if (t->t_mstate == LMS_USER_LOCK) 3122 (void) new_mstate(t, LMS_SYSTEM); 3123 3124 no_fault(); 3125 if (watched) 3126 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3127 if (error) 3128 return (set_errno(error)); 3129 return (0); 3130 } 3131 3132 /* 3133 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3134 * the blocked lwp resumes and retries to acquire the lock. 3135 */ 3136 int 3137 lwp_mutex_unlock(lwp_mutex_t *lp) 3138 { 3139 proc_t *p = ttoproc(curthread); 3140 lwpchan_t lwpchan; 3141 uchar_t waiters; 3142 volatile int locked = 0; 3143 volatile int watched = 0; 3144 volatile uint8_t type = 0; 3145 label_t ljb; 3146 uint16_t flag; 3147 int error = 0; 3148 3149 if ((caddr_t)lp >= p->p_as->a_userlimit) 3150 return (set_errno(EFAULT)); 3151 3152 if (on_fault(&ljb)) { 3153 if (locked) 3154 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3155 error = EFAULT; 3156 goto out; 3157 } 3158 3159 /* 3160 * Force Copy-on-write if necessary and ensure that the 3161 * synchronization object resides in read/write memory. 3162 * Cause an EFAULT return now if this is not so. 3163 */ 3164 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3165 suword8_noerr(&lp->mutex_type, type); 3166 3167 if (UPIMUTEX(type)) { 3168 no_fault(); 3169 error = lwp_upimutex_unlock(lp, type); 3170 if (error) 3171 return (set_errno(error)); 3172 return (0); 3173 } 3174 3175 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3176 3177 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3178 &lwpchan, LWPCHAN_MPPOOL)) { 3179 error = EFAULT; 3180 goto out; 3181 } 3182 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3183 locked = 1; 3184 if (type & LOCK_ROBUST) { 3185 fuword16_noerr(&lp->mutex_flag, &flag); 3186 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3187 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3188 flag |= LOCK_NOTRECOVERABLE; 3189 suword16_noerr(&lp->mutex_flag, flag); 3190 } 3191 } 3192 set_owner_pid(lp, 0, 0); 3193 ulock_clear(&lp->mutex_lockw); 3194 /* 3195 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3196 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3197 * may fail. If it fails, do not write into the waiter bit. 3198 * The call to lwp_release() might fail due to one of three reasons: 3199 * 3200 * 1. due to the thread which set the waiter bit not actually 3201 * sleeping since it got the lock on the re-try. The waiter 3202 * bit will then be correctly updated by that thread. This 3203 * window may be closed by reading the wait bit again here 3204 * and not calling lwp_release() at all if it is zero. 3205 * 2. the thread which set the waiter bit and went to sleep 3206 * was woken up by a signal. This time, the waiter recomputes 3207 * the wait bit in the return with EINTR code. 3208 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3209 * memory that has been re-used after the lock was dropped. 3210 * In this case, writing into the waiter bit would cause data 3211 * corruption. 3212 */ 3213 fuword8_noerr(&lp->mutex_waiters, &waiters); 3214 if (waiters) { 3215 if ((type & LOCK_ROBUST) && 3216 (flag & LOCK_NOTRECOVERABLE)) { 3217 lwp_release_all(&lwpchan); 3218 suword8_noerr(&lp->mutex_waiters, 0); 3219 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3220 suword8_noerr(&lp->mutex_waiters, waiters); 3221 } 3222 } 3223 3224 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3225 out: 3226 no_fault(); 3227 if (watched) 3228 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3229 if (error) 3230 return (set_errno(error)); 3231 return (0); 3232 } 3233