1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/errno.h> 37 #include <sys/file.h> 38 #include <sys/proc.h> 39 #include <sys/prsystm.h> 40 #include <sys/kmem.h> 41 #include <sys/sobject.h> 42 #include <sys/fault.h> 43 #include <sys/procfs.h> 44 #include <sys/watchpoint.h> 45 #include <sys/time.h> 46 #include <sys/cmn_err.h> 47 #include <sys/machlock.h> 48 #include <sys/debug.h> 49 #include <sys/synch.h> 50 #include <sys/synch32.h> 51 #include <sys/mman.h> 52 #include <sys/class.h> 53 #include <sys/schedctl.h> 54 #include <sys/sleepq.h> 55 #include <sys/policy.h> 56 #include <sys/tnf_probe.h> 57 #include <sys/lwpchan_impl.h> 58 #include <sys/turnstile.h> 59 #include <sys/atomic.h> 60 #include <sys/lwp_timer_impl.h> 61 #include <sys/lwp_upimutex_impl.h> 62 #include <vm/as.h> 63 #include <sys/sdt.h> 64 65 static kthread_t *lwpsobj_owner(caddr_t); 66 static void lwp_unsleep(kthread_t *t); 67 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 68 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 69 static void lwp_mutex_unregister(void *uaddr); 70 static void set_owner_pid(lwp_mutex_t *, uintptr_t, pid_t); 71 static int iswanted(kthread_t *, lwpchan_t *); 72 73 extern int lwp_cond_signal(lwp_cond_t *cv); 74 75 /* 76 * Maximum number of user prio inheritance locks that can be held by a thread. 77 * Used to limit kmem for each thread. This is a per-thread limit that 78 * can be administered on a system wide basis (using /etc/system). 79 * 80 * Also, when a limit, say maxlwps is added for numbers of lwps within a 81 * process, the per-thread limit automatically becomes a process-wide limit 82 * of maximum number of held upi locks within a process: 83 * maxheldupimx = maxnestupimx * maxlwps; 84 */ 85 static uint32_t maxnestupimx = 2000; 86 87 /* 88 * The sobj_ops vector exports a set of functions needed when a thread 89 * is asleep on a synchronization object of this type. 90 */ 91 static sobj_ops_t lwp_sobj_ops = { 92 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 93 }; 94 95 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 96 97 static sobj_ops_t lwp_sobj_pi_ops = { 98 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 99 turnstile_change_pri 100 }; 101 102 static sleepq_head_t lwpsleepq[NSLEEPQ]; 103 upib_t upimutextab[UPIMUTEX_TABSIZE]; 104 105 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 106 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 107 108 /* 109 * We know that both lc_wchan and lc_wchan0 are addresses that most 110 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 111 * 'pool' is either 0 or 1. 112 */ 113 #define LWPCHAN_LOCK_HASH(X, pool) \ 114 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 115 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 116 117 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 118 119 /* 120 * Is this a POSIX threads user-level lock requiring priority inheritance? 121 */ 122 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 123 124 static sleepq_head_t * 125 lwpsqhash(lwpchan_t *lwpchan) 126 { 127 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 128 return (&lwpsleepq[SQHASHINDEX(x)]); 129 } 130 131 /* 132 * Lock an lwpchan. 133 * Keep this in sync with lwpchan_unlock(), below. 134 */ 135 static void 136 lwpchan_lock(lwpchan_t *lwpchan, int pool) 137 { 138 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 139 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 140 } 141 142 /* 143 * Unlock an lwpchan. 144 * Keep this in sync with lwpchan_lock(), above. 145 */ 146 static void 147 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 148 { 149 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 150 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 151 } 152 153 /* 154 * Delete mappings from the lwpchan cache for pages that are being 155 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 156 * all mappings within the range are deleted from the lwpchan cache. 157 */ 158 void 159 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 160 { 161 lwpchan_data_t *lcp; 162 lwpchan_hashbucket_t *hashbucket; 163 lwpchan_hashbucket_t *endbucket; 164 lwpchan_entry_t *ent; 165 lwpchan_entry_t **prev; 166 caddr_t addr; 167 168 mutex_enter(&p->p_lcp_lock); 169 lcp = p->p_lcp; 170 hashbucket = lcp->lwpchan_cache; 171 endbucket = hashbucket + lcp->lwpchan_size; 172 for (; hashbucket < endbucket; hashbucket++) { 173 if (hashbucket->lwpchan_chain == NULL) 174 continue; 175 mutex_enter(&hashbucket->lwpchan_lock); 176 prev = &hashbucket->lwpchan_chain; 177 /* check entire chain */ 178 while ((ent = *prev) != NULL) { 179 addr = ent->lwpchan_addr; 180 if (start <= addr && addr < end) { 181 *prev = ent->lwpchan_next; 182 /* 183 * We do this only for the obsolete type 184 * USYNC_PROCESS_ROBUST. Otherwise robust 185 * locks do not draw ELOCKUNMAPPED or 186 * EOWNERDEAD due to being unmapped. 187 */ 188 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 189 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 190 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 191 /* 192 * If there is a user-level robust lock 193 * registration, mark it as invalid. 194 */ 195 if ((addr = ent->lwpchan_uaddr) != NULL) 196 lwp_mutex_unregister(addr); 197 kmem_free(ent, sizeof (*ent)); 198 atomic_add_32(&lcp->lwpchan_entries, -1); 199 } else { 200 prev = &ent->lwpchan_next; 201 } 202 } 203 mutex_exit(&hashbucket->lwpchan_lock); 204 } 205 mutex_exit(&p->p_lcp_lock); 206 } 207 208 /* 209 * Given an lwpchan cache pointer and a process virtual address, 210 * return a pointer to the corresponding lwpchan hash bucket. 211 */ 212 static lwpchan_hashbucket_t * 213 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 214 { 215 uint_t i; 216 217 /* 218 * All user-level sync object addresses are 8-byte aligned. 219 * Ignore the lowest 3 bits of the address and use the 220 * higher-order 2*lwpchan_bits bits for the hash index. 221 */ 222 addr >>= 3; 223 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 224 return (lcp->lwpchan_cache + i); 225 } 226 227 /* 228 * (Re)allocate the per-process lwpchan cache. 229 */ 230 static void 231 lwpchan_alloc_cache(proc_t *p, uint_t bits) 232 { 233 lwpchan_data_t *lcp; 234 lwpchan_data_t *old_lcp; 235 lwpchan_hashbucket_t *hashbucket; 236 lwpchan_hashbucket_t *endbucket; 237 lwpchan_hashbucket_t *newbucket; 238 lwpchan_entry_t *ent; 239 lwpchan_entry_t *next; 240 uint_t count; 241 242 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 243 244 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 245 lcp->lwpchan_bits = bits; 246 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 247 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 248 lcp->lwpchan_entries = 0; 249 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 250 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 251 lcp->lwpchan_next_data = NULL; 252 253 mutex_enter(&p->p_lcp_lock); 254 if ((old_lcp = p->p_lcp) != NULL) { 255 if (old_lcp->lwpchan_bits >= bits) { 256 /* someone beat us to it */ 257 mutex_exit(&p->p_lcp_lock); 258 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 259 sizeof (lwpchan_hashbucket_t)); 260 kmem_free(lcp, sizeof (lwpchan_data_t)); 261 return; 262 } 263 /* 264 * Acquire all of the old hash table locks. 265 */ 266 hashbucket = old_lcp->lwpchan_cache; 267 endbucket = hashbucket + old_lcp->lwpchan_size; 268 for (; hashbucket < endbucket; hashbucket++) 269 mutex_enter(&hashbucket->lwpchan_lock); 270 /* 271 * Move all of the old hash table entries to the 272 * new hash table. The new hash table has not yet 273 * been installed so we don't need any of its locks. 274 */ 275 count = 0; 276 hashbucket = old_lcp->lwpchan_cache; 277 for (; hashbucket < endbucket; hashbucket++) { 278 ent = hashbucket->lwpchan_chain; 279 while (ent != NULL) { 280 next = ent->lwpchan_next; 281 newbucket = lwpchan_bucket(lcp, 282 (uintptr_t)ent->lwpchan_addr); 283 ent->lwpchan_next = newbucket->lwpchan_chain; 284 newbucket->lwpchan_chain = ent; 285 ent = next; 286 count++; 287 } 288 hashbucket->lwpchan_chain = NULL; 289 } 290 lcp->lwpchan_entries = count; 291 } 292 293 /* 294 * Retire the old hash table. We can't actually kmem_free() it 295 * now because someone may still have a pointer to it. Instead, 296 * we link it onto the new hash table's list of retired hash tables. 297 * The new hash table is double the size of the previous one, so 298 * the total size of all retired hash tables is less than the size 299 * of the new one. exit() and exec() free the retired hash tables 300 * (see lwpchan_destroy_cache(), below). 301 */ 302 lcp->lwpchan_next_data = old_lcp; 303 304 /* 305 * As soon as we store the new lcp, future locking operations will 306 * use it. Therefore, we must ensure that all the state we've just 307 * established reaches global visibility before the new lcp does. 308 */ 309 membar_producer(); 310 p->p_lcp = lcp; 311 312 if (old_lcp != NULL) { 313 /* 314 * Release all of the old hash table locks. 315 */ 316 hashbucket = old_lcp->lwpchan_cache; 317 for (; hashbucket < endbucket; hashbucket++) 318 mutex_exit(&hashbucket->lwpchan_lock); 319 } 320 mutex_exit(&p->p_lcp_lock); 321 } 322 323 /* 324 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 325 * Called when the process exits or execs. All lwps except one have 326 * exited so we need no locks here. 327 */ 328 void 329 lwpchan_destroy_cache(int exec) 330 { 331 proc_t *p = curproc; 332 lwpchan_hashbucket_t *hashbucket; 333 lwpchan_hashbucket_t *endbucket; 334 lwpchan_data_t *lcp; 335 lwpchan_entry_t *ent; 336 lwpchan_entry_t *next; 337 uint16_t lockflg; 338 339 lcp = p->p_lcp; 340 p->p_lcp = NULL; 341 342 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 343 hashbucket = lcp->lwpchan_cache; 344 endbucket = hashbucket + lcp->lwpchan_size; 345 for (; hashbucket < endbucket; hashbucket++) { 346 ent = hashbucket->lwpchan_chain; 347 hashbucket->lwpchan_chain = NULL; 348 while (ent != NULL) { 349 next = ent->lwpchan_next; 350 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 351 (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 352 == (USYNC_PROCESS | LOCK_ROBUST)) 353 lwp_mutex_cleanup(ent, lockflg); 354 kmem_free(ent, sizeof (*ent)); 355 ent = next; 356 } 357 } 358 359 while (lcp != NULL) { 360 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 361 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 362 sizeof (lwpchan_hashbucket_t)); 363 kmem_free(lcp, sizeof (lwpchan_data_t)); 364 lcp = next_lcp; 365 } 366 } 367 368 /* 369 * Return zero when there is an entry in the lwpchan cache for the 370 * given process virtual address and non-zero when there is not. 371 * The returned non-zero value is the current length of the 372 * hash chain plus one. The caller holds the hash bucket lock. 373 */ 374 static uint_t 375 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 376 lwpchan_hashbucket_t *hashbucket) 377 { 378 lwpchan_entry_t *ent; 379 uint_t count = 1; 380 381 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 382 if (ent->lwpchan_addr == addr) { 383 if (ent->lwpchan_type != type || 384 ent->lwpchan_pool != pool) { 385 /* 386 * This shouldn't happen, but might if the 387 * process reuses its memory for different 388 * types of sync objects. We test first 389 * to avoid grabbing the memory cache line. 390 */ 391 ent->lwpchan_type = (uint16_t)type; 392 ent->lwpchan_pool = (uint16_t)pool; 393 } 394 *lwpchan = ent->lwpchan_lwpchan; 395 return (0); 396 } 397 count++; 398 } 399 return (count); 400 } 401 402 /* 403 * Return the cached lwpchan mapping if cached, otherwise insert 404 * a virtual address to lwpchan mapping into the cache. 405 */ 406 static int 407 lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr, 408 int type, lwpchan_t *lwpchan, int pool) 409 { 410 proc_t *p = curproc; 411 lwpchan_data_t *lcp; 412 lwpchan_hashbucket_t *hashbucket; 413 lwpchan_entry_t *ent; 414 memid_t memid; 415 uint_t count; 416 uint_t bits; 417 418 top: 419 /* initialize the lwpchan cache, if necesary */ 420 if ((lcp = p->p_lcp) == NULL) { 421 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 422 goto top; 423 } 424 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 425 mutex_enter(&hashbucket->lwpchan_lock); 426 if (lcp != p->p_lcp) { 427 /* someone resized the lwpchan cache; start over */ 428 mutex_exit(&hashbucket->lwpchan_lock); 429 goto top; 430 } 431 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 432 /* it's in the cache */ 433 mutex_exit(&hashbucket->lwpchan_lock); 434 return (1); 435 } 436 mutex_exit(&hashbucket->lwpchan_lock); 437 if (as_getmemid(as, addr, &memid) != 0) 438 return (0); 439 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 440 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 441 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 442 mutex_enter(&hashbucket->lwpchan_lock); 443 if (lcp != p->p_lcp) { 444 /* someone resized the lwpchan cache; start over */ 445 mutex_exit(&hashbucket->lwpchan_lock); 446 kmem_free(ent, sizeof (*ent)); 447 goto top; 448 } 449 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 450 if (count == 0) { 451 /* someone else added this entry to the cache */ 452 mutex_exit(&hashbucket->lwpchan_lock); 453 kmem_free(ent, sizeof (*ent)); 454 return (1); 455 } 456 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 457 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 458 /* hash chain too long; reallocate the hash table */ 459 mutex_exit(&hashbucket->lwpchan_lock); 460 kmem_free(ent, sizeof (*ent)); 461 lwpchan_alloc_cache(p, bits + 1); 462 goto top; 463 } 464 ent->lwpchan_addr = addr; 465 ent->lwpchan_uaddr = uaddr; 466 ent->lwpchan_type = (uint16_t)type; 467 ent->lwpchan_pool = (uint16_t)pool; 468 ent->lwpchan_lwpchan = *lwpchan; 469 ent->lwpchan_next = hashbucket->lwpchan_chain; 470 hashbucket->lwpchan_chain = ent; 471 atomic_add_32(&lcp->lwpchan_entries, 1); 472 mutex_exit(&hashbucket->lwpchan_lock); 473 return (1); 474 } 475 476 /* 477 * Return a unique pair of identifiers that corresponds to a 478 * synchronization object's virtual address. Process-shared 479 * sync objects usually get vnode/offset from as_getmemid(). 480 */ 481 static int 482 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 483 { 484 /* 485 * If the lwp synch object is defined to be process-private, 486 * we just make the first field of the lwpchan be 'as' and 487 * the second field be the synch object's virtual address. 488 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 489 * The lwpchan cache is used only for process-shared objects. 490 */ 491 if (!(type & USYNC_PROCESS)) { 492 lwpchan->lc_wchan0 = (caddr_t)as; 493 lwpchan->lc_wchan = addr; 494 return (1); 495 } 496 497 return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool)); 498 } 499 500 static void 501 lwp_block(lwpchan_t *lwpchan) 502 { 503 kthread_t *t = curthread; 504 klwp_t *lwp = ttolwp(t); 505 sleepq_head_t *sqh; 506 507 thread_lock(t); 508 t->t_flag |= T_WAKEABLE; 509 t->t_lwpchan = *lwpchan; 510 t->t_sobj_ops = &lwp_sobj_ops; 511 t->t_release = 0; 512 sqh = lwpsqhash(lwpchan); 513 disp_lock_enter_high(&sqh->sq_lock); 514 CL_SLEEP(t); 515 DTRACE_SCHED(sleep); 516 THREAD_SLEEP(t, &sqh->sq_lock); 517 sleepq_insert(&sqh->sq_queue, t); 518 thread_unlock(t); 519 lwp->lwp_asleep = 1; 520 lwp->lwp_sysabort = 0; 521 lwp->lwp_ru.nvcsw++; 522 (void) new_mstate(curthread, LMS_SLEEP); 523 } 524 525 static kthread_t * 526 lwpsobj_pi_owner(upimutex_t *up) 527 { 528 return (up->upi_owner); 529 } 530 531 static struct upimutex * 532 upi_get(upib_t *upibp, lwpchan_t *lcp) 533 { 534 struct upimutex *upip; 535 536 for (upip = upibp->upib_first; upip != NULL; 537 upip = upip->upi_nextchain) { 538 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 539 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 540 break; 541 } 542 return (upip); 543 } 544 545 static void 546 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 547 { 548 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 549 550 /* 551 * Insert upimutex at front of list. Maybe a bit unfair 552 * but assume that not many lwpchans hash to the same 553 * upimutextab bucket, i.e. the list of upimutexes from 554 * upib_first is not too long. 555 */ 556 upimutex->upi_nextchain = upibp->upib_first; 557 upibp->upib_first = upimutex; 558 } 559 560 static void 561 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 562 { 563 struct upimutex **prev; 564 565 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 566 567 prev = &upibp->upib_first; 568 while (*prev != upimutex) { 569 prev = &(*prev)->upi_nextchain; 570 } 571 *prev = upimutex->upi_nextchain; 572 upimutex->upi_nextchain = NULL; 573 } 574 575 /* 576 * Add upimutex to chain of upimutexes held by curthread. 577 * Returns number of upimutexes held by curthread. 578 */ 579 static uint32_t 580 upi_mylist_add(struct upimutex *upimutex) 581 { 582 kthread_t *t = curthread; 583 584 /* 585 * Insert upimutex at front of list of upimutexes owned by t. This 586 * would match typical LIFO order in which nested locks are acquired 587 * and released. 588 */ 589 upimutex->upi_nextowned = t->t_upimutex; 590 t->t_upimutex = upimutex; 591 t->t_nupinest++; 592 ASSERT(t->t_nupinest > 0); 593 return (t->t_nupinest); 594 } 595 596 /* 597 * Delete upimutex from list of upimutexes owned by curthread. 598 */ 599 static void 600 upi_mylist_del(struct upimutex *upimutex) 601 { 602 kthread_t *t = curthread; 603 struct upimutex **prev; 604 605 /* 606 * Since the order in which nested locks are acquired and released, 607 * is typically LIFO, and typical nesting levels are not too deep, the 608 * following should not be expensive in the general case. 609 */ 610 prev = &t->t_upimutex; 611 while (*prev != upimutex) { 612 prev = &(*prev)->upi_nextowned; 613 } 614 *prev = upimutex->upi_nextowned; 615 upimutex->upi_nextowned = NULL; 616 ASSERT(t->t_nupinest > 0); 617 t->t_nupinest--; 618 } 619 620 /* 621 * Returns true if upimutex is owned. Should be called only when upim points 622 * to kmem which cannot disappear from underneath. 623 */ 624 static int 625 upi_owned(upimutex_t *upim) 626 { 627 return (upim->upi_owner == curthread); 628 } 629 630 /* 631 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 632 */ 633 static struct upimutex * 634 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 635 { 636 lwpchan_t lwpchan; 637 upib_t *upibp; 638 struct upimutex *upimutex; 639 640 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 641 &lwpchan, LWPCHAN_MPPOOL)) 642 return (NULL); 643 644 upibp = &UPI_CHAIN(lwpchan); 645 mutex_enter(&upibp->upib_lock); 646 upimutex = upi_get(upibp, &lwpchan); 647 if (upimutex == NULL || upimutex->upi_owner != curthread) { 648 mutex_exit(&upibp->upib_lock); 649 return (NULL); 650 } 651 mutex_exit(&upibp->upib_lock); 652 return (upimutex); 653 } 654 655 /* 656 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 657 * no lock hand-off occurrs. 658 */ 659 static void 660 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 661 { 662 turnstile_t *ts; 663 upib_t *upibp; 664 kthread_t *newowner; 665 666 upi_mylist_del(upimutex); 667 upibp = upimutex->upi_upibp; 668 mutex_enter(&upibp->upib_lock); 669 if (upimutex->upi_waiter != 0) { /* if waiters */ 670 ts = turnstile_lookup(upimutex); 671 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 672 /* hand-off lock to highest prio waiter */ 673 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 674 upimutex->upi_owner = newowner; 675 if (ts->ts_waiters == 1) 676 upimutex->upi_waiter = 0; 677 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 678 mutex_exit(&upibp->upib_lock); 679 return; 680 } else if (ts != NULL) { 681 /* LOCK_NOTRECOVERABLE: wakeup all */ 682 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 683 } else { 684 /* 685 * Misleading w bit. Waiters might have been 686 * interrupted. No need to clear the w bit (upimutex 687 * will soon be freed). Re-calculate PI from existing 688 * waiters. 689 */ 690 turnstile_exit(upimutex); 691 turnstile_pi_recalc(); 692 } 693 } 694 /* 695 * no waiters, or LOCK_NOTRECOVERABLE. 696 * remove from the bucket chain of upi mutexes. 697 * de-allocate kernel memory (upimutex). 698 */ 699 upi_chain_del(upimutex->upi_upibp, upimutex); 700 mutex_exit(&upibp->upib_lock); 701 kmem_free(upimutex, sizeof (upimutex_t)); 702 } 703 704 static int 705 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 706 { 707 label_t ljb; 708 int error = 0; 709 lwpchan_t lwpchan; 710 uint16_t flag; 711 upib_t *upibp; 712 volatile struct upimutex *upimutex = NULL; 713 turnstile_t *ts; 714 uint32_t nupinest; 715 volatile int upilocked = 0; 716 717 if (on_fault(&ljb)) { 718 if (upilocked) 719 upimutex_unlock((upimutex_t *)upimutex, 0); 720 error = EFAULT; 721 goto out; 722 } 723 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 724 &lwpchan, LWPCHAN_MPPOOL)) { 725 error = EFAULT; 726 goto out; 727 } 728 upibp = &UPI_CHAIN(lwpchan); 729 retry: 730 mutex_enter(&upibp->upib_lock); 731 upimutex = upi_get(upibp, &lwpchan); 732 if (upimutex == NULL) { 733 /* lock available since lwpchan has no upimutex */ 734 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 735 upi_chain_add(upibp, (upimutex_t *)upimutex); 736 upimutex->upi_owner = curthread; /* grab lock */ 737 upimutex->upi_upibp = upibp; 738 upimutex->upi_vaddr = lp; 739 upimutex->upi_lwpchan = lwpchan; 740 mutex_exit(&upibp->upib_lock); 741 nupinest = upi_mylist_add((upimutex_t *)upimutex); 742 upilocked = 1; 743 fuword16_noerr(&lp->mutex_flag, &flag); 744 if (nupinest > maxnestupimx && 745 secpolicy_resource(CRED()) != 0) { 746 upimutex_unlock((upimutex_t *)upimutex, flag); 747 error = ENOMEM; 748 goto out; 749 } 750 if (flag & LOCK_NOTRECOVERABLE) { 751 /* 752 * Since the setting of LOCK_NOTRECOVERABLE 753 * was done under the high-level upi mutex, 754 * in lwp_upimutex_unlock(), this flag needs to 755 * be checked while holding the upi mutex. 756 * If set, this thread should return without 757 * the lock held, and with the right error code. 758 */ 759 upimutex_unlock((upimutex_t *)upimutex, flag); 760 upilocked = 0; 761 error = ENOTRECOVERABLE; 762 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 763 if (flag & LOCK_OWNERDEAD) 764 error = EOWNERDEAD; 765 else if (type & USYNC_PROCESS_ROBUST) 766 error = ELOCKUNMAPPED; 767 else 768 error = EOWNERDEAD; 769 } 770 goto out; 771 } 772 /* 773 * If a upimutex object exists, it must have an owner. 774 * This is due to lock hand-off, and release of upimutex when no 775 * waiters are present at unlock time, 776 */ 777 ASSERT(upimutex->upi_owner != NULL); 778 if (upimutex->upi_owner == curthread) { 779 /* 780 * The user wrapper can check if the mutex type is 781 * ERRORCHECK: if not, it should stall at user-level. 782 * If so, it should return the error code. 783 */ 784 mutex_exit(&upibp->upib_lock); 785 error = EDEADLK; 786 goto out; 787 } 788 if (try == UPIMUTEX_TRY) { 789 mutex_exit(&upibp->upib_lock); 790 error = EBUSY; 791 goto out; 792 } 793 /* 794 * Block for the lock. 795 */ 796 if ((error = lwptp->lwpt_time_error) != 0) { 797 /* 798 * The SUSV3 Posix spec is very clear that we 799 * should get no error from validating the 800 * timer until we would actually sleep. 801 */ 802 mutex_exit(&upibp->upib_lock); 803 goto out; 804 } 805 if (lwptp->lwpt_tsp != NULL) { 806 /* 807 * Unlike the protocol for other lwp timedwait operations, 808 * we must drop t_delay_lock before going to sleep in 809 * turnstile_block() for a upi mutex. 810 * See the comments below and in turnstile.c 811 */ 812 mutex_enter(&curthread->t_delay_lock); 813 (void) lwp_timer_enqueue(lwptp); 814 mutex_exit(&curthread->t_delay_lock); 815 } 816 /* 817 * Now, set the waiter bit and block for the lock in turnstile_block(). 818 * No need to preserve the previous wbit since a lock try is not 819 * attempted after setting the wait bit. Wait bit is set under 820 * the upib_lock, which is not released until the turnstile lock 821 * is acquired. Say, the upimutex is L: 822 * 823 * 1. upib_lock is held so the waiter does not have to retry L after 824 * setting the wait bit: since the owner has to grab the upib_lock 825 * to unlock L, it will certainly see the wait bit set. 826 * 2. upib_lock is not released until the turnstile lock is acquired. 827 * This is the key to preventing a missed wake-up. Otherwise, the 828 * owner could acquire the upib_lock, and the tc_lock, to call 829 * turnstile_wakeup(). All this, before the waiter gets tc_lock 830 * to sleep in turnstile_block(). turnstile_wakeup() will then not 831 * find this waiter, resulting in the missed wakeup. 832 * 3. The upib_lock, being a kernel mutex, cannot be released while 833 * holding the tc_lock (since mutex_exit() could need to acquire 834 * the same tc_lock)...and so is held when calling turnstile_block(). 835 * The address of upib_lock is passed to turnstile_block() which 836 * releases it after releasing all turnstile locks, and before going 837 * to sleep in swtch(). 838 * 4. The waiter value cannot be a count of waiters, because a waiter 839 * can be interrupted. The interrupt occurs under the tc_lock, at 840 * which point, the upib_lock cannot be locked, to decrement waiter 841 * count. So, just treat the waiter state as a bit, not a count. 842 */ 843 ts = turnstile_lookup((upimutex_t *)upimutex); 844 upimutex->upi_waiter = 1; 845 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 846 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 847 /* 848 * Hand-off implies that we wakeup holding the lock, except when: 849 * - deadlock is detected 850 * - lock is not recoverable 851 * - we got an interrupt or timeout 852 * If we wake up due to an interrupt or timeout, we may 853 * or may not be holding the lock due to mutex hand-off. 854 * Use lwp_upimutex_owned() to check if we do hold the lock. 855 */ 856 if (error != 0) { 857 if ((error == EINTR || error == ETIME) && 858 (upimutex = lwp_upimutex_owned(lp, type))) { 859 /* 860 * Unlock and return - the re-startable syscall will 861 * try the lock again if we got EINTR. 862 */ 863 (void) upi_mylist_add((upimutex_t *)upimutex); 864 upimutex_unlock((upimutex_t *)upimutex, 0); 865 } 866 /* 867 * The only other possible error is EDEADLK. If so, upimutex 868 * is valid, since its owner is deadlocked with curthread. 869 */ 870 ASSERT(error == EINTR || error == ETIME || 871 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 872 ASSERT(!lwp_upimutex_owned(lp, type)); 873 goto out; 874 } 875 if (lwp_upimutex_owned(lp, type)) { 876 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 877 nupinest = upi_mylist_add((upimutex_t *)upimutex); 878 upilocked = 1; 879 } 880 /* 881 * Now, need to read the user-level lp->mutex_flag to do the following: 882 * 883 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 884 * should be returned. 885 * - if lock isn't held, check if ENOTRECOVERABLE should 886 * be returned. 887 * 888 * Now, either lp->mutex_flag is readable or it's not. If not 889 * readable, the on_fault path will cause a return with EFAULT 890 * as it should. If it is readable, the state of the flag 891 * encodes the robustness state of the lock: 892 * 893 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 894 * or LOCK_UNMAPPED setting will influence the return code 895 * appropriately. If the upimutex is not locked here, this 896 * could be due to a spurious wake-up or a NOTRECOVERABLE 897 * event. The flag's setting can be used to distinguish 898 * between these two events. 899 */ 900 fuword16_noerr(&lp->mutex_flag, &flag); 901 if (upilocked) { 902 /* 903 * If the thread wakes up from turnstile_block with the lock 904 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 905 * since it would not have been handed-off the lock. 906 * So, no need to check for this case. 907 */ 908 if (nupinest > maxnestupimx && 909 secpolicy_resource(CRED()) != 0) { 910 upimutex_unlock((upimutex_t *)upimutex, flag); 911 upilocked = 0; 912 error = ENOMEM; 913 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 914 if (flag & LOCK_OWNERDEAD) 915 error = EOWNERDEAD; 916 else if (type & USYNC_PROCESS_ROBUST) 917 error = ELOCKUNMAPPED; 918 else 919 error = EOWNERDEAD; 920 } 921 } else { 922 /* 923 * Wake-up without the upimutex held. Either this is a 924 * spurious wake-up (due to signals, forkall(), whatever), or 925 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 926 * of the mutex flag can be used to distinguish between the 927 * two events. 928 */ 929 if (flag & LOCK_NOTRECOVERABLE) { 930 error = ENOTRECOVERABLE; 931 } else { 932 /* 933 * Here, the flag could be set to LOCK_OWNERDEAD or 934 * not. In both cases, this is a spurious wakeup, 935 * since the upi lock is not held, but the thread 936 * has returned from turnstile_block(). 937 * 938 * The user flag could be LOCK_OWNERDEAD if, at the 939 * same time as curthread having been woken up 940 * spuriously, the owner (say Tdead) has died, marked 941 * the mutex flag accordingly, and handed off the lock 942 * to some other waiter (say Tnew). curthread just 943 * happened to read the flag while Tnew has yet to deal 944 * with the owner-dead event. 945 * 946 * In this event, curthread should retry the lock. 947 * If Tnew is able to cleanup the lock, curthread 948 * will eventually get the lock with a zero error code, 949 * If Tnew is unable to cleanup, its eventual call to 950 * unlock the lock will result in the mutex flag being 951 * set to LOCK_NOTRECOVERABLE, and the wake-up of 952 * all waiters, including curthread, which will then 953 * eventually return ENOTRECOVERABLE due to the above 954 * check. 955 * 956 * Of course, if the user-flag is not set with 957 * LOCK_OWNERDEAD, retrying is the thing to do, since 958 * this is definitely a spurious wakeup. 959 */ 960 goto retry; 961 } 962 } 963 964 out: 965 no_fault(); 966 return (error); 967 } 968 969 970 static int 971 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 972 { 973 label_t ljb; 974 int error = 0; 975 lwpchan_t lwpchan; 976 uint16_t flag; 977 upib_t *upibp; 978 volatile struct upimutex *upimutex = NULL; 979 volatile int upilocked = 0; 980 981 if (on_fault(&ljb)) { 982 if (upilocked) 983 upimutex_unlock((upimutex_t *)upimutex, 0); 984 error = EFAULT; 985 goto out; 986 } 987 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 988 &lwpchan, LWPCHAN_MPPOOL)) { 989 error = EFAULT; 990 goto out; 991 } 992 upibp = &UPI_CHAIN(lwpchan); 993 mutex_enter(&upibp->upib_lock); 994 upimutex = upi_get(upibp, &lwpchan); 995 /* 996 * If the lock is not held, or the owner is not curthread, return 997 * error. The user-level wrapper can return this error or stall, 998 * depending on whether mutex is of ERRORCHECK type or not. 999 */ 1000 if (upimutex == NULL || upimutex->upi_owner != curthread) { 1001 mutex_exit(&upibp->upib_lock); 1002 error = EPERM; 1003 goto out; 1004 } 1005 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1006 upilocked = 1; 1007 fuword16_noerr(&lp->mutex_flag, &flag); 1008 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1009 /* 1010 * transition mutex to the LOCK_NOTRECOVERABLE state. 1011 */ 1012 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1013 flag |= LOCK_NOTRECOVERABLE; 1014 suword16_noerr(&lp->mutex_flag, flag); 1015 } 1016 set_owner_pid(lp, 0, 0); 1017 upimutex_unlock((upimutex_t *)upimutex, flag); 1018 upilocked = 0; 1019 out: 1020 no_fault(); 1021 return (error); 1022 } 1023 1024 /* 1025 * Set the owner and ownerpid fields of a user-level mutex. 1026 */ 1027 static void 1028 set_owner_pid(lwp_mutex_t *lp, uintptr_t owner, pid_t pid) 1029 { 1030 union { 1031 uint64_t word64; 1032 uint32_t word32[2]; 1033 } un; 1034 1035 un.word64 = (uint64_t)owner; 1036 1037 suword32_noerr(&lp->mutex_ownerpid, pid); 1038 #if defined(_LP64) 1039 if (((uintptr_t)lp & (_LONG_LONG_ALIGNMENT - 1)) == 0) { /* aligned */ 1040 suword64_noerr(&lp->mutex_owner, un.word64); 1041 return; 1042 } 1043 #endif 1044 /* mutex is unaligned or we are running on a 32-bit kernel */ 1045 suword32_noerr((uint32_t *)&lp->mutex_owner, un.word32[0]); 1046 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, un.word32[1]); 1047 } 1048 1049 /* 1050 * Clear the contents of a user-level mutex; return the flags. 1051 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1052 */ 1053 static uint16_t 1054 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1055 { 1056 uint16_t flag; 1057 1058 fuword16_noerr(&lp->mutex_flag, &flag); 1059 if ((flag & 1060 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1061 flag |= lockflg; 1062 suword16_noerr(&lp->mutex_flag, flag); 1063 } 1064 set_owner_pid(lp, 0, 0); 1065 suword8_noerr(&lp->mutex_rcount, 0); 1066 1067 return (flag); 1068 } 1069 1070 /* 1071 * Mark user mutex state, corresponding to kernel upimutex, 1072 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1073 */ 1074 static int 1075 upi_dead(upimutex_t *upip, uint16_t lockflg) 1076 { 1077 label_t ljb; 1078 int error = 0; 1079 lwp_mutex_t *lp; 1080 1081 if (on_fault(&ljb)) { 1082 error = EFAULT; 1083 goto out; 1084 } 1085 1086 lp = upip->upi_vaddr; 1087 (void) lwp_clear_mutex(lp, lockflg); 1088 suword8_noerr(&lp->mutex_lockw, 0); 1089 out: 1090 no_fault(); 1091 return (error); 1092 } 1093 1094 /* 1095 * Unlock all upimutexes held by curthread, since curthread is dying. 1096 * For each upimutex, attempt to mark its corresponding user mutex object as 1097 * dead. 1098 */ 1099 void 1100 upimutex_cleanup() 1101 { 1102 kthread_t *t = curthread; 1103 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1104 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1105 struct upimutex *upip; 1106 1107 while ((upip = t->t_upimutex) != NULL) { 1108 if (upi_dead(upip, lockflg) != 0) { 1109 /* 1110 * If the user object associated with this upimutex is 1111 * unmapped, unlock upimutex with the 1112 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1113 * woken up. Since user object is unmapped, it could 1114 * not be marked as dead or notrecoverable. 1115 * The waiters will now all wake up and return 1116 * ENOTRECOVERABLE, since they would find that the lock 1117 * has not been handed-off to them. 1118 * See lwp_upimutex_lock(). 1119 */ 1120 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1121 } else { 1122 /* 1123 * The user object has been updated as dead. 1124 * Unlock the upimutex: if no waiters, upip kmem will 1125 * be freed. If there is a waiter, the lock will be 1126 * handed off. If exit() is in progress, each existing 1127 * waiter will successively get the lock, as owners 1128 * die, and each new owner will call this routine as 1129 * it dies. The last owner will free kmem, since 1130 * it will find the upimutex has no waiters. So, 1131 * eventually, the kmem is guaranteed to be freed. 1132 */ 1133 upimutex_unlock(upip, 0); 1134 } 1135 /* 1136 * Note that the call to upimutex_unlock() above will delete 1137 * upimutex from the t_upimutexes chain. And so the 1138 * while loop will eventually terminate. 1139 */ 1140 } 1141 } 1142 1143 int 1144 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp, uintptr_t owner) 1145 { 1146 kthread_t *t = curthread; 1147 klwp_t *lwp = ttolwp(t); 1148 proc_t *p = ttoproc(t); 1149 lwp_timer_t lwpt; 1150 caddr_t timedwait; 1151 int error = 0; 1152 int time_error; 1153 clock_t tim = -1; 1154 uchar_t waiters; 1155 volatile int locked = 0; 1156 volatile int watched = 0; 1157 label_t ljb; 1158 volatile uint8_t type = 0; 1159 lwpchan_t lwpchan; 1160 sleepq_head_t *sqh; 1161 uint16_t flag; 1162 int imm_timeout = 0; 1163 1164 if ((caddr_t)lp >= p->p_as->a_userlimit) 1165 return (set_errno(EFAULT)); 1166 1167 /* 1168 * Put the lwp in an orderly state for debugging, 1169 * in case we are stopped while sleeping, below. 1170 */ 1171 prstop(PR_REQUESTED, 0); 1172 1173 timedwait = (caddr_t)tsp; 1174 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1175 lwpt.lwpt_imm_timeout) { 1176 imm_timeout = 1; 1177 timedwait = NULL; 1178 } 1179 1180 /* 1181 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1182 * this micro state is really a run state. If the thread indeed blocks, 1183 * this state becomes valid. If not, the state is converted back to 1184 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1185 * when blocking. 1186 */ 1187 (void) new_mstate(t, LMS_USER_LOCK); 1188 if (on_fault(&ljb)) { 1189 if (locked) 1190 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1191 error = EFAULT; 1192 goto out; 1193 } 1194 /* 1195 * Force Copy-on-write if necessary and ensure that the 1196 * synchronization object resides in read/write memory. 1197 * Cause an EFAULT return now if this is not so. 1198 */ 1199 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1200 suword8_noerr(&lp->mutex_type, type); 1201 if (UPIMUTEX(type)) { 1202 no_fault(); 1203 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1204 if (error == 0 || error == EOWNERDEAD || error == ELOCKUNMAPPED) 1205 set_owner_pid(lp, owner, 1206 (type & USYNC_PROCESS)? p->p_pid : 0); 1207 if (tsp && !time_error) /* copyout the residual time left */ 1208 error = lwp_timer_copyout(&lwpt, error); 1209 if (error) 1210 return (set_errno(error)); 1211 return (0); 1212 } 1213 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1214 &lwpchan, LWPCHAN_MPPOOL)) { 1215 error = EFAULT; 1216 goto out; 1217 } 1218 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1219 locked = 1; 1220 if (type & LOCK_ROBUST) { 1221 fuword16_noerr(&lp->mutex_flag, &flag); 1222 if (flag & LOCK_NOTRECOVERABLE) { 1223 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1224 error = ENOTRECOVERABLE; 1225 goto out; 1226 } 1227 } 1228 fuword8_noerr(&lp->mutex_waiters, &waiters); 1229 suword8_noerr(&lp->mutex_waiters, 1); 1230 1231 /* 1232 * If watchpoints are set, they need to be restored, since 1233 * atomic accesses of memory such as the call to ulock_try() 1234 * below cannot be watched. 1235 */ 1236 1237 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1238 1239 while (!ulock_try(&lp->mutex_lockw)) { 1240 if (time_error) { 1241 /* 1242 * The SUSV3 Posix spec is very clear that we 1243 * should get no error from validating the 1244 * timer until we would actually sleep. 1245 */ 1246 error = time_error; 1247 break; 1248 } 1249 1250 if (watched) { 1251 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1252 watched = 0; 1253 } 1254 1255 if (timedwait) { 1256 /* 1257 * If we successfully queue the timeout, 1258 * then don't drop t_delay_lock until 1259 * we are on the sleep queue (below). 1260 */ 1261 mutex_enter(&t->t_delay_lock); 1262 if (lwp_timer_enqueue(&lwpt) != 0) { 1263 mutex_exit(&t->t_delay_lock); 1264 imm_timeout = 1; 1265 timedwait = NULL; 1266 } 1267 } 1268 lwp_block(&lwpchan); 1269 /* 1270 * Nothing should happen to cause the lwp to go to 1271 * sleep again until after it returns from swtch(). 1272 */ 1273 if (timedwait) 1274 mutex_exit(&t->t_delay_lock); 1275 locked = 0; 1276 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1277 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1278 setrun(t); 1279 swtch(); 1280 t->t_flag &= ~T_WAKEABLE; 1281 if (timedwait) 1282 tim = lwp_timer_dequeue(&lwpt); 1283 setallwatch(); 1284 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1285 error = EINTR; 1286 else if (imm_timeout || (timedwait && tim == -1)) 1287 error = ETIME; 1288 if (error) { 1289 lwp->lwp_asleep = 0; 1290 lwp->lwp_sysabort = 0; 1291 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1292 S_WRITE); 1293 1294 /* 1295 * Need to re-compute waiters bit. The waiters field in 1296 * the lock is not reliable. Either of two things could 1297 * have occurred: no lwp may have called lwp_release() 1298 * for me but I have woken up due to a signal or 1299 * timeout. In this case, the waiter bit is incorrect 1300 * since it is still set to 1, set above. 1301 * OR an lwp_release() did occur for some other lwp on 1302 * the same lwpchan. In this case, the waiter bit is 1303 * correct. But which event occurred, one can't tell. 1304 * So, recompute. 1305 */ 1306 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1307 locked = 1; 1308 sqh = lwpsqhash(&lwpchan); 1309 disp_lock_enter(&sqh->sq_lock); 1310 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1311 disp_lock_exit(&sqh->sq_lock); 1312 break; 1313 } 1314 lwp->lwp_asleep = 0; 1315 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1316 S_WRITE); 1317 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1318 locked = 1; 1319 fuword8_noerr(&lp->mutex_waiters, &waiters); 1320 suword8_noerr(&lp->mutex_waiters, 1); 1321 if (type & LOCK_ROBUST) { 1322 fuword16_noerr(&lp->mutex_flag, &flag); 1323 if (flag & LOCK_NOTRECOVERABLE) { 1324 error = ENOTRECOVERABLE; 1325 break; 1326 } 1327 } 1328 } 1329 1330 if (t->t_mstate == LMS_USER_LOCK) 1331 (void) new_mstate(t, LMS_SYSTEM); 1332 1333 if (error == 0) { 1334 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0); 1335 if (type & LOCK_ROBUST) { 1336 fuword16_noerr(&lp->mutex_flag, &flag); 1337 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1338 if (flag & LOCK_OWNERDEAD) 1339 error = EOWNERDEAD; 1340 else if (type & USYNC_PROCESS_ROBUST) 1341 error = ELOCKUNMAPPED; 1342 else 1343 error = EOWNERDEAD; 1344 } 1345 } 1346 } 1347 suword8_noerr(&lp->mutex_waiters, waiters); 1348 locked = 0; 1349 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1350 out: 1351 no_fault(); 1352 if (watched) 1353 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1354 if (tsp && !time_error) /* copyout the residual time left */ 1355 error = lwp_timer_copyout(&lwpt, error); 1356 if (error) 1357 return (set_errno(error)); 1358 return (0); 1359 } 1360 1361 static int 1362 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1363 { 1364 /* 1365 * The caller holds the dispatcher lock on the sleep queue. 1366 */ 1367 while (t != NULL) { 1368 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1369 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1370 return (1); 1371 t = t->t_link; 1372 } 1373 return (0); 1374 } 1375 1376 /* 1377 * Return the highest priority thread sleeping on this lwpchan. 1378 */ 1379 static kthread_t * 1380 lwp_queue_waiter(lwpchan_t *lwpchan) 1381 { 1382 sleepq_head_t *sqh; 1383 kthread_t *tp; 1384 1385 sqh = lwpsqhash(lwpchan); 1386 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1387 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1388 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1389 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1390 break; 1391 } 1392 disp_lock_exit(&sqh->sq_lock); 1393 return (tp); 1394 } 1395 1396 static int 1397 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1398 { 1399 sleepq_head_t *sqh; 1400 kthread_t *tp; 1401 kthread_t **tpp; 1402 1403 sqh = lwpsqhash(lwpchan); 1404 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1405 tpp = &sqh->sq_queue.sq_first; 1406 while ((tp = *tpp) != NULL) { 1407 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1408 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1409 /* 1410 * The following is typically false. It could be true 1411 * only if lwp_release() is called from 1412 * lwp_mutex_wakeup() after reading the waiters field 1413 * from memory in which the lwp lock used to be, but has 1414 * since been re-used to hold a lwp cv or lwp semaphore. 1415 * The thread "tp" found to match the lwp lock's wchan 1416 * is actually sleeping for the cv or semaphore which 1417 * now has the same wchan. In this case, lwp_release() 1418 * should return failure. 1419 */ 1420 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1421 ASSERT(sync_type == 0); 1422 /* 1423 * assert that this can happen only for mutexes 1424 * i.e. sync_type == 0, for correctly written 1425 * user programs. 1426 */ 1427 disp_lock_exit(&sqh->sq_lock); 1428 return (0); 1429 } 1430 *waiters = iswanted(tp->t_link, lwpchan); 1431 sleepq_unlink(tpp, tp); 1432 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1433 tp->t_wchan0 = NULL; 1434 tp->t_wchan = NULL; 1435 tp->t_sobj_ops = NULL; 1436 tp->t_release = 1; 1437 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1438 CL_WAKEUP(tp); 1439 thread_unlock(tp); /* drop run queue lock */ 1440 return (1); 1441 } 1442 tpp = &tp->t_link; 1443 } 1444 *waiters = 0; 1445 disp_lock_exit(&sqh->sq_lock); 1446 return (0); 1447 } 1448 1449 static void 1450 lwp_release_all(lwpchan_t *lwpchan) 1451 { 1452 sleepq_head_t *sqh; 1453 kthread_t *tp; 1454 kthread_t **tpp; 1455 1456 sqh = lwpsqhash(lwpchan); 1457 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1458 tpp = &sqh->sq_queue.sq_first; 1459 while ((tp = *tpp) != NULL) { 1460 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1461 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1462 sleepq_unlink(tpp, tp); 1463 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1464 tp->t_wchan0 = NULL; 1465 tp->t_wchan = NULL; 1466 tp->t_sobj_ops = NULL; 1467 CL_WAKEUP(tp); 1468 thread_unlock_high(tp); /* release run queue lock */ 1469 } else { 1470 tpp = &tp->t_link; 1471 } 1472 } 1473 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1474 } 1475 1476 /* 1477 * unblock a lwp that is trying to acquire this mutex. the blocked 1478 * lwp resumes and retries to acquire the lock. 1479 */ 1480 int 1481 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1482 { 1483 proc_t *p = ttoproc(curthread); 1484 lwpchan_t lwpchan; 1485 uchar_t waiters; 1486 volatile int locked = 0; 1487 volatile int watched = 0; 1488 volatile uint8_t type = 0; 1489 label_t ljb; 1490 int error = 0; 1491 1492 if ((caddr_t)lp >= p->p_as->a_userlimit) 1493 return (set_errno(EFAULT)); 1494 1495 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1496 1497 if (on_fault(&ljb)) { 1498 if (locked) 1499 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1500 error = EFAULT; 1501 goto out; 1502 } 1503 /* 1504 * Force Copy-on-write if necessary and ensure that the 1505 * synchronization object resides in read/write memory. 1506 * Cause an EFAULT return now if this is not so. 1507 */ 1508 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1509 suword8_noerr(&lp->mutex_type, type); 1510 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1511 &lwpchan, LWPCHAN_MPPOOL)) { 1512 error = EFAULT; 1513 goto out; 1514 } 1515 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1516 locked = 1; 1517 /* 1518 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1519 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1520 * may fail. If it fails, do not write into the waiter bit. 1521 * The call to lwp_release() might fail due to one of three reasons: 1522 * 1523 * 1. due to the thread which set the waiter bit not actually 1524 * sleeping since it got the lock on the re-try. The waiter 1525 * bit will then be correctly updated by that thread. This 1526 * window may be closed by reading the wait bit again here 1527 * and not calling lwp_release() at all if it is zero. 1528 * 2. the thread which set the waiter bit and went to sleep 1529 * was woken up by a signal. This time, the waiter recomputes 1530 * the wait bit in the return with EINTR code. 1531 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1532 * memory that has been re-used after the lock was dropped. 1533 * In this case, writing into the waiter bit would cause data 1534 * corruption. 1535 */ 1536 if (release_all) 1537 lwp_release_all(&lwpchan); 1538 else if (lwp_release(&lwpchan, &waiters, 0)) 1539 suword8_noerr(&lp->mutex_waiters, waiters); 1540 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1541 out: 1542 no_fault(); 1543 if (watched) 1544 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1545 if (error) 1546 return (set_errno(error)); 1547 return (0); 1548 } 1549 1550 /* 1551 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1552 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1553 * a flag telling the kernel whether or not to honor the kernel/user 1554 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1555 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1556 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1557 * it is used an an in/out parameter. On entry, it contains the relative 1558 * time until timeout. On exit, we copyout the residual time left to it. 1559 */ 1560 int 1561 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1562 { 1563 kthread_t *t = curthread; 1564 klwp_t *lwp = ttolwp(t); 1565 proc_t *p = ttoproc(t); 1566 lwp_timer_t lwpt; 1567 lwpchan_t cv_lwpchan; 1568 lwpchan_t m_lwpchan; 1569 caddr_t timedwait; 1570 volatile uint16_t type = 0; 1571 volatile uint8_t mtype = 0; 1572 uchar_t waiters; 1573 volatile int error; 1574 clock_t tim = -1; 1575 volatile int locked = 0; 1576 volatile int m_locked = 0; 1577 volatile int cvwatched = 0; 1578 volatile int mpwatched = 0; 1579 label_t ljb; 1580 volatile int no_lwpchan = 1; 1581 int imm_timeout = 0; 1582 int imm_unpark = 0; 1583 1584 if ((caddr_t)cv >= p->p_as->a_userlimit || 1585 (caddr_t)mp >= p->p_as->a_userlimit) 1586 return (set_errno(EFAULT)); 1587 1588 /* 1589 * Put the lwp in an orderly state for debugging, 1590 * in case we are stopped while sleeping, below. 1591 */ 1592 prstop(PR_REQUESTED, 0); 1593 1594 timedwait = (caddr_t)tsp; 1595 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1596 return (set_errno(error)); 1597 if (lwpt.lwpt_imm_timeout) { 1598 imm_timeout = 1; 1599 timedwait = NULL; 1600 } 1601 1602 (void) new_mstate(t, LMS_USER_LOCK); 1603 1604 if (on_fault(&ljb)) { 1605 if (no_lwpchan) { 1606 error = EFAULT; 1607 goto out; 1608 } 1609 if (m_locked) { 1610 m_locked = 0; 1611 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1612 } 1613 if (locked) { 1614 locked = 0; 1615 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1616 } 1617 /* 1618 * set up another on_fault() for a possible fault 1619 * on the user lock accessed at "efault" 1620 */ 1621 if (on_fault(&ljb)) { 1622 if (m_locked) { 1623 m_locked = 0; 1624 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1625 } 1626 goto out; 1627 } 1628 error = EFAULT; 1629 goto efault; 1630 } 1631 1632 /* 1633 * Force Copy-on-write if necessary and ensure that the 1634 * synchronization object resides in read/write memory. 1635 * Cause an EFAULT return now if this is not so. 1636 */ 1637 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1638 suword8_noerr(&mp->mutex_type, mtype); 1639 if (UPIMUTEX(mtype) == 0) { 1640 /* convert user level mutex, "mp", to a unique lwpchan */ 1641 /* check if mtype is ok to use below, instead of type from cv */ 1642 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1643 &m_lwpchan, LWPCHAN_MPPOOL)) { 1644 error = EFAULT; 1645 goto out; 1646 } 1647 } 1648 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1649 suword16_noerr(&cv->cond_type, type); 1650 /* convert user level condition variable, "cv", to a unique lwpchan */ 1651 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1652 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1653 error = EFAULT; 1654 goto out; 1655 } 1656 no_lwpchan = 0; 1657 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1658 if (UPIMUTEX(mtype) == 0) 1659 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1660 S_WRITE); 1661 1662 /* 1663 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1664 * with respect to a possible wakeup which is a result of either 1665 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1666 * 1667 * What's misleading, is that the lwp is put to sleep after the 1668 * condition variable's mutex is released. This is OK as long as 1669 * the release operation is also done while holding lwpchan_lock. 1670 * The lwp is then put to sleep when the possibility of pagefaulting 1671 * or sleeping is completely eliminated. 1672 */ 1673 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1674 locked = 1; 1675 if (UPIMUTEX(mtype) == 0) { 1676 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1677 m_locked = 1; 1678 suword8_noerr(&cv->cond_waiters_kernel, 1); 1679 /* 1680 * unlock the condition variable's mutex. (pagefaults are 1681 * possible here.) 1682 */ 1683 set_owner_pid(mp, 0, 0); 1684 ulock_clear(&mp->mutex_lockw); 1685 fuword8_noerr(&mp->mutex_waiters, &waiters); 1686 if (waiters != 0) { 1687 /* 1688 * Given the locking of lwpchan_lock around the release 1689 * of the mutex and checking for waiters, the following 1690 * call to lwp_release() can fail ONLY if the lock 1691 * acquirer is interrupted after setting the waiter bit, 1692 * calling lwp_block() and releasing lwpchan_lock. 1693 * In this case, it could get pulled off the lwp sleep 1694 * q (via setrun()) before the following call to 1695 * lwp_release() occurs. In this case, the lock 1696 * requestor will update the waiter bit correctly by 1697 * re-evaluating it. 1698 */ 1699 if (lwp_release(&m_lwpchan, &waiters, 0)) 1700 suword8_noerr(&mp->mutex_waiters, waiters); 1701 } 1702 m_locked = 0; 1703 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1704 } else { 1705 suword8_noerr(&cv->cond_waiters_kernel, 1); 1706 error = lwp_upimutex_unlock(mp, mtype); 1707 if (error) { /* if the upimutex unlock failed */ 1708 locked = 0; 1709 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1710 goto out; 1711 } 1712 } 1713 no_fault(); 1714 1715 if (mpwatched) { 1716 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1717 mpwatched = 0; 1718 } 1719 if (cvwatched) { 1720 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1721 cvwatched = 0; 1722 } 1723 1724 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1725 /* 1726 * We received a signal at user-level before calling here 1727 * or another thread wants us to return immediately 1728 * with EINTR. See lwp_unpark(). 1729 */ 1730 imm_unpark = 1; 1731 t->t_unpark = 0; 1732 timedwait = NULL; 1733 } else if (timedwait) { 1734 /* 1735 * If we successfully queue the timeout, 1736 * then don't drop t_delay_lock until 1737 * we are on the sleep queue (below). 1738 */ 1739 mutex_enter(&t->t_delay_lock); 1740 if (lwp_timer_enqueue(&lwpt) != 0) { 1741 mutex_exit(&t->t_delay_lock); 1742 imm_timeout = 1; 1743 timedwait = NULL; 1744 } 1745 } 1746 t->t_flag |= T_WAITCVSEM; 1747 lwp_block(&cv_lwpchan); 1748 /* 1749 * Nothing should happen to cause the lwp to go to sleep 1750 * until after it returns from swtch(). 1751 */ 1752 if (timedwait) 1753 mutex_exit(&t->t_delay_lock); 1754 locked = 0; 1755 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1756 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1757 (imm_timeout | imm_unpark)) 1758 setrun(t); 1759 swtch(); 1760 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1761 if (timedwait) 1762 tim = lwp_timer_dequeue(&lwpt); 1763 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1764 MUSTRETURN(p, t) || imm_unpark) 1765 error = EINTR; 1766 else if (imm_timeout || (timedwait && tim == -1)) 1767 error = ETIME; 1768 lwp->lwp_asleep = 0; 1769 lwp->lwp_sysabort = 0; 1770 setallwatch(); 1771 1772 if (t->t_mstate == LMS_USER_LOCK) 1773 (void) new_mstate(t, LMS_SYSTEM); 1774 1775 if (tsp && check_park) /* copyout the residual time left */ 1776 error = lwp_timer_copyout(&lwpt, error); 1777 1778 /* the mutex is reacquired by the caller on return to user level */ 1779 if (error) { 1780 /* 1781 * If we were concurrently lwp_cond_signal()d and we 1782 * received a UNIX signal or got a timeout, then perform 1783 * another lwp_cond_signal() to avoid consuming the wakeup. 1784 */ 1785 if (t->t_release) 1786 (void) lwp_cond_signal(cv); 1787 return (set_errno(error)); 1788 } 1789 return (0); 1790 1791 efault: 1792 /* 1793 * make sure that the user level lock is dropped before 1794 * returning to caller, since the caller always re-acquires it. 1795 */ 1796 if (UPIMUTEX(mtype) == 0) { 1797 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1798 m_locked = 1; 1799 set_owner_pid(mp, 0, 0); 1800 ulock_clear(&mp->mutex_lockw); 1801 fuword8_noerr(&mp->mutex_waiters, &waiters); 1802 if (waiters != 0) { 1803 /* 1804 * See comment above on lock clearing and lwp_release() 1805 * success/failure. 1806 */ 1807 if (lwp_release(&m_lwpchan, &waiters, 0)) 1808 suword8_noerr(&mp->mutex_waiters, waiters); 1809 } 1810 m_locked = 0; 1811 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1812 } else { 1813 (void) lwp_upimutex_unlock(mp, mtype); 1814 } 1815 out: 1816 no_fault(); 1817 if (mpwatched) 1818 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1819 if (cvwatched) 1820 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1821 if (t->t_mstate == LMS_USER_LOCK) 1822 (void) new_mstate(t, LMS_SYSTEM); 1823 return (set_errno(error)); 1824 } 1825 1826 /* 1827 * wakeup one lwp that's blocked on this condition variable. 1828 */ 1829 int 1830 lwp_cond_signal(lwp_cond_t *cv) 1831 { 1832 proc_t *p = ttoproc(curthread); 1833 lwpchan_t lwpchan; 1834 uchar_t waiters; 1835 volatile uint16_t type = 0; 1836 volatile int locked = 0; 1837 volatile int watched = 0; 1838 label_t ljb; 1839 int error = 0; 1840 1841 if ((caddr_t)cv >= p->p_as->a_userlimit) 1842 return (set_errno(EFAULT)); 1843 1844 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1845 1846 if (on_fault(&ljb)) { 1847 if (locked) 1848 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1849 error = EFAULT; 1850 goto out; 1851 } 1852 /* 1853 * Force Copy-on-write if necessary and ensure that the 1854 * synchronization object resides in read/write memory. 1855 * Cause an EFAULT return now if this is not so. 1856 */ 1857 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1858 suword16_noerr(&cv->cond_type, type); 1859 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1860 &lwpchan, LWPCHAN_CVPOOL)) { 1861 error = EFAULT; 1862 goto out; 1863 } 1864 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1865 locked = 1; 1866 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1867 if (waiters != 0) { 1868 /* 1869 * The following call to lwp_release() might fail but it is 1870 * OK to write into the waiters bit below, since the memory 1871 * could not have been re-used or unmapped (for correctly 1872 * written user programs) as in the case of lwp_mutex_wakeup(). 1873 * For an incorrect program, we should not care about data 1874 * corruption since this is just one instance of other places 1875 * where corruption can occur for such a program. Of course 1876 * if the memory is unmapped, normal fault recovery occurs. 1877 */ 1878 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1879 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1880 } 1881 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1882 out: 1883 no_fault(); 1884 if (watched) 1885 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1886 if (error) 1887 return (set_errno(error)); 1888 return (0); 1889 } 1890 1891 /* 1892 * wakeup every lwp that's blocked on this condition variable. 1893 */ 1894 int 1895 lwp_cond_broadcast(lwp_cond_t *cv) 1896 { 1897 proc_t *p = ttoproc(curthread); 1898 lwpchan_t lwpchan; 1899 volatile uint16_t type = 0; 1900 volatile int locked = 0; 1901 volatile int watched = 0; 1902 label_t ljb; 1903 uchar_t waiters; 1904 int error = 0; 1905 1906 if ((caddr_t)cv >= p->p_as->a_userlimit) 1907 return (set_errno(EFAULT)); 1908 1909 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1910 1911 if (on_fault(&ljb)) { 1912 if (locked) 1913 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1914 error = EFAULT; 1915 goto out; 1916 } 1917 /* 1918 * Force Copy-on-write if necessary and ensure that the 1919 * synchronization object resides in read/write memory. 1920 * Cause an EFAULT return now if this is not so. 1921 */ 1922 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1923 suword16_noerr(&cv->cond_type, type); 1924 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1925 &lwpchan, LWPCHAN_CVPOOL)) { 1926 error = EFAULT; 1927 goto out; 1928 } 1929 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1930 locked = 1; 1931 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1932 if (waiters != 0) { 1933 lwp_release_all(&lwpchan); 1934 suword8_noerr(&cv->cond_waiters_kernel, 0); 1935 } 1936 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1937 out: 1938 no_fault(); 1939 if (watched) 1940 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1941 if (error) 1942 return (set_errno(error)); 1943 return (0); 1944 } 1945 1946 int 1947 lwp_sema_trywait(lwp_sema_t *sp) 1948 { 1949 kthread_t *t = curthread; 1950 proc_t *p = ttoproc(t); 1951 label_t ljb; 1952 volatile int locked = 0; 1953 volatile int watched = 0; 1954 volatile uint16_t type = 0; 1955 int count; 1956 lwpchan_t lwpchan; 1957 uchar_t waiters; 1958 int error = 0; 1959 1960 if ((caddr_t)sp >= p->p_as->a_userlimit) 1961 return (set_errno(EFAULT)); 1962 1963 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1964 1965 if (on_fault(&ljb)) { 1966 if (locked) 1967 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1968 error = EFAULT; 1969 goto out; 1970 } 1971 /* 1972 * Force Copy-on-write if necessary and ensure that the 1973 * synchronization object resides in read/write memory. 1974 * Cause an EFAULT return now if this is not so. 1975 */ 1976 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1977 suword16_noerr((void *)&sp->sema_type, type); 1978 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1979 &lwpchan, LWPCHAN_CVPOOL)) { 1980 error = EFAULT; 1981 goto out; 1982 } 1983 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1984 locked = 1; 1985 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 1986 if (count == 0) 1987 error = EBUSY; 1988 else 1989 suword32_noerr((void *)&sp->sema_count, --count); 1990 if (count != 0) { 1991 fuword8_noerr(&sp->sema_waiters, &waiters); 1992 if (waiters != 0) { 1993 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1994 suword8_noerr(&sp->sema_waiters, waiters); 1995 } 1996 } 1997 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1998 out: 1999 no_fault(); 2000 if (watched) 2001 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2002 if (error) 2003 return (set_errno(error)); 2004 return (0); 2005 } 2006 2007 /* 2008 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 2009 */ 2010 int 2011 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2012 { 2013 kthread_t *t = curthread; 2014 klwp_t *lwp = ttolwp(t); 2015 proc_t *p = ttoproc(t); 2016 lwp_timer_t lwpt; 2017 caddr_t timedwait; 2018 clock_t tim = -1; 2019 label_t ljb; 2020 volatile int locked = 0; 2021 volatile int watched = 0; 2022 volatile uint16_t type = 0; 2023 int count; 2024 lwpchan_t lwpchan; 2025 uchar_t waiters; 2026 int error = 0; 2027 int time_error; 2028 int imm_timeout = 0; 2029 int imm_unpark = 0; 2030 2031 if ((caddr_t)sp >= p->p_as->a_userlimit) 2032 return (set_errno(EFAULT)); 2033 2034 /* 2035 * Put the lwp in an orderly state for debugging, 2036 * in case we are stopped while sleeping, below. 2037 */ 2038 prstop(PR_REQUESTED, 0); 2039 2040 timedwait = (caddr_t)tsp; 2041 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2042 lwpt.lwpt_imm_timeout) { 2043 imm_timeout = 1; 2044 timedwait = NULL; 2045 } 2046 2047 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2048 2049 if (on_fault(&ljb)) { 2050 if (locked) 2051 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2052 error = EFAULT; 2053 goto out; 2054 } 2055 /* 2056 * Force Copy-on-write if necessary and ensure that the 2057 * synchronization object resides in read/write memory. 2058 * Cause an EFAULT return now if this is not so. 2059 */ 2060 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2061 suword16_noerr((void *)&sp->sema_type, type); 2062 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2063 &lwpchan, LWPCHAN_CVPOOL)) { 2064 error = EFAULT; 2065 goto out; 2066 } 2067 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2068 locked = 1; 2069 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2070 while (error == 0 && count == 0) { 2071 if (time_error) { 2072 /* 2073 * The SUSV3 Posix spec is very clear that we 2074 * should get no error from validating the 2075 * timer until we would actually sleep. 2076 */ 2077 error = time_error; 2078 break; 2079 } 2080 suword8_noerr(&sp->sema_waiters, 1); 2081 if (watched) 2082 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2083 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2084 /* 2085 * We received a signal at user-level before calling 2086 * here or another thread wants us to return 2087 * immediately with EINTR. See lwp_unpark(). 2088 */ 2089 imm_unpark = 1; 2090 t->t_unpark = 0; 2091 timedwait = NULL; 2092 } else if (timedwait) { 2093 /* 2094 * If we successfully queue the timeout, 2095 * then don't drop t_delay_lock until 2096 * we are on the sleep queue (below). 2097 */ 2098 mutex_enter(&t->t_delay_lock); 2099 if (lwp_timer_enqueue(&lwpt) != 0) { 2100 mutex_exit(&t->t_delay_lock); 2101 imm_timeout = 1; 2102 timedwait = NULL; 2103 } 2104 } 2105 t->t_flag |= T_WAITCVSEM; 2106 lwp_block(&lwpchan); 2107 /* 2108 * Nothing should happen to cause the lwp to sleep 2109 * again until after it returns from swtch(). 2110 */ 2111 if (timedwait) 2112 mutex_exit(&t->t_delay_lock); 2113 locked = 0; 2114 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2115 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2116 (imm_timeout | imm_unpark)) 2117 setrun(t); 2118 swtch(); 2119 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2120 if (timedwait) 2121 tim = lwp_timer_dequeue(&lwpt); 2122 setallwatch(); 2123 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2124 MUSTRETURN(p, t) || imm_unpark) 2125 error = EINTR; 2126 else if (imm_timeout || (timedwait && tim == -1)) 2127 error = ETIME; 2128 lwp->lwp_asleep = 0; 2129 lwp->lwp_sysabort = 0; 2130 watched = watch_disable_addr((caddr_t)sp, 2131 sizeof (*sp), S_WRITE); 2132 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2133 locked = 1; 2134 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2135 } 2136 if (error == 0) 2137 suword32_noerr((void *)&sp->sema_count, --count); 2138 if (count != 0) { 2139 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2140 suword8_noerr(&sp->sema_waiters, waiters); 2141 } 2142 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2143 out: 2144 no_fault(); 2145 if (watched) 2146 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2147 if (tsp && check_park && !time_error) 2148 error = lwp_timer_copyout(&lwpt, error); 2149 if (error) 2150 return (set_errno(error)); 2151 return (0); 2152 } 2153 2154 int 2155 lwp_sema_post(lwp_sema_t *sp) 2156 { 2157 proc_t *p = ttoproc(curthread); 2158 label_t ljb; 2159 volatile int locked = 0; 2160 volatile int watched = 0; 2161 volatile uint16_t type = 0; 2162 int count; 2163 lwpchan_t lwpchan; 2164 uchar_t waiters; 2165 int error = 0; 2166 2167 if ((caddr_t)sp >= p->p_as->a_userlimit) 2168 return (set_errno(EFAULT)); 2169 2170 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2171 2172 if (on_fault(&ljb)) { 2173 if (locked) 2174 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2175 error = EFAULT; 2176 goto out; 2177 } 2178 /* 2179 * Force Copy-on-write if necessary and ensure that the 2180 * synchronization object resides in read/write memory. 2181 * Cause an EFAULT return now if this is not so. 2182 */ 2183 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2184 suword16_noerr(&sp->sema_type, type); 2185 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2186 &lwpchan, LWPCHAN_CVPOOL)) { 2187 error = EFAULT; 2188 goto out; 2189 } 2190 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2191 locked = 1; 2192 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2193 if (count == _SEM_VALUE_MAX) 2194 error = EOVERFLOW; 2195 else 2196 suword32_noerr(&sp->sema_count, ++count); 2197 if (count == 1) { 2198 fuword8_noerr(&sp->sema_waiters, &waiters); 2199 if (waiters) { 2200 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2201 suword8_noerr(&sp->sema_waiters, waiters); 2202 } 2203 } 2204 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2205 out: 2206 no_fault(); 2207 if (watched) 2208 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2209 if (error) 2210 return (set_errno(error)); 2211 return (0); 2212 } 2213 2214 #define TRW_WANT_WRITE 0x1 2215 #define TRW_LOCK_GRANTED 0x2 2216 2217 #define READ_LOCK 0 2218 #define WRITE_LOCK 1 2219 #define TRY_FLAG 0x10 2220 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2221 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2222 2223 /* 2224 * Release one writer or one or more readers. Compute the rwstate word to 2225 * reflect the new state of the queue. For a safe hand-off we copy the new 2226 * rwstate value back to userland before we wake any of the new lock holders. 2227 * 2228 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2229 * being given precedence over readers of the same priority). 2230 * 2231 * If the first thread is a reader we scan the queue releasing all readers 2232 * until we hit a writer or the end of the queue. If the first thread is a 2233 * writer we still need to check for another writer. 2234 */ 2235 void 2236 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2237 { 2238 sleepq_head_t *sqh; 2239 kthread_t *tp; 2240 kthread_t **tpp; 2241 kthread_t *tpnext; 2242 kthread_t *wakelist = NULL; 2243 uint32_t rwstate = 0; 2244 int wcount = 0; 2245 int rcount = 0; 2246 2247 sqh = lwpsqhash(lwpchan); 2248 disp_lock_enter(&sqh->sq_lock); 2249 tpp = &sqh->sq_queue.sq_first; 2250 while ((tp = *tpp) != NULL) { 2251 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2252 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2253 if (tp->t_writer & TRW_WANT_WRITE) { 2254 if ((wcount++ == 0) && (rcount == 0)) { 2255 rwstate |= URW_WRITE_LOCKED; 2256 2257 /* Just one writer to wake. */ 2258 sleepq_unlink(tpp, tp); 2259 wakelist = tp; 2260 2261 /* tpp already set for next thread. */ 2262 continue; 2263 } else { 2264 rwstate |= URW_HAS_WAITERS; 2265 /* We need look no further. */ 2266 break; 2267 } 2268 } else { 2269 rcount++; 2270 if (wcount == 0) { 2271 rwstate++; 2272 2273 /* Add reader to wake list. */ 2274 sleepq_unlink(tpp, tp); 2275 tp->t_link = wakelist; 2276 wakelist = tp; 2277 2278 /* tpp already set for next thread. */ 2279 continue; 2280 } else { 2281 rwstate |= URW_HAS_WAITERS; 2282 /* We need look no further. */ 2283 break; 2284 } 2285 } 2286 } 2287 tpp = &tp->t_link; 2288 } 2289 2290 /* Copy the new rwstate back to userland. */ 2291 suword32_noerr(&rw->rwlock_readers, rwstate); 2292 2293 /* Wake the new lock holder(s) up. */ 2294 tp = wakelist; 2295 while (tp != NULL) { 2296 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2297 tp->t_wchan0 = NULL; 2298 tp->t_wchan = NULL; 2299 tp->t_sobj_ops = NULL; 2300 tp->t_writer |= TRW_LOCK_GRANTED; 2301 tpnext = tp->t_link; 2302 tp->t_link = NULL; 2303 CL_WAKEUP(tp); 2304 thread_unlock_high(tp); 2305 tp = tpnext; 2306 } 2307 2308 disp_lock_exit(&sqh->sq_lock); 2309 } 2310 2311 /* 2312 * We enter here holding the user-level mutex, which we must release before 2313 * returning or blocking. Based on lwp_cond_wait(). 2314 */ 2315 static int 2316 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2317 { 2318 lwp_mutex_t *mp = NULL; 2319 kthread_t *t = curthread; 2320 kthread_t *tp; 2321 klwp_t *lwp = ttolwp(t); 2322 proc_t *p = ttoproc(t); 2323 lwp_timer_t lwpt; 2324 lwpchan_t lwpchan; 2325 lwpchan_t mlwpchan; 2326 caddr_t timedwait; 2327 volatile uint16_t type = 0; 2328 volatile uint8_t mtype = 0; 2329 uchar_t mwaiters; 2330 volatile int error = 0; 2331 int time_error; 2332 clock_t tim = -1; 2333 volatile int locked = 0; 2334 volatile int mlocked = 0; 2335 volatile int watched = 0; 2336 volatile int mwatched = 0; 2337 label_t ljb; 2338 volatile int no_lwpchan = 1; 2339 int imm_timeout = 0; 2340 int try_flag; 2341 uint32_t rwstate; 2342 int acquired = 0; 2343 2344 /* We only check rw because the mutex is included in it. */ 2345 if ((caddr_t)rw >= p->p_as->a_userlimit) 2346 return (set_errno(EFAULT)); 2347 2348 /* 2349 * Put the lwp in an orderly state for debugging, 2350 * in case we are stopped while sleeping, below. 2351 */ 2352 prstop(PR_REQUESTED, 0); 2353 2354 /* We must only report this error if we are about to sleep (later). */ 2355 timedwait = (caddr_t)tsp; 2356 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2357 lwpt.lwpt_imm_timeout) { 2358 imm_timeout = 1; 2359 timedwait = NULL; 2360 } 2361 2362 (void) new_mstate(t, LMS_USER_LOCK); 2363 2364 if (on_fault(&ljb)) { 2365 if (no_lwpchan) { 2366 error = EFAULT; 2367 goto out_nodrop; 2368 } 2369 if (mlocked) { 2370 mlocked = 0; 2371 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2372 } 2373 if (locked) { 2374 locked = 0; 2375 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2376 } 2377 /* 2378 * Set up another on_fault() for a possible fault 2379 * on the user lock accessed at "out_drop". 2380 */ 2381 if (on_fault(&ljb)) { 2382 if (mlocked) { 2383 mlocked = 0; 2384 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2385 } 2386 error = EFAULT; 2387 goto out_nodrop; 2388 } 2389 error = EFAULT; 2390 goto out_nodrop; 2391 } 2392 2393 /* Process rd_wr (including sanity check). */ 2394 try_flag = (rd_wr & TRY_FLAG); 2395 rd_wr &= ~TRY_FLAG; 2396 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2397 error = EINVAL; 2398 goto out_nodrop; 2399 } 2400 2401 /* 2402 * Force Copy-on-write if necessary and ensure that the 2403 * synchronization object resides in read/write memory. 2404 * Cause an EFAULT return now if this is not so. 2405 */ 2406 mp = &rw->mutex; 2407 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2408 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2409 suword8_noerr(&mp->mutex_type, mtype); 2410 suword16_noerr(&rw->rwlock_type, type); 2411 2412 /* We can only continue for simple USYNC_PROCESS locks. */ 2413 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2414 error = EINVAL; 2415 goto out_nodrop; 2416 } 2417 2418 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2419 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2420 &mlwpchan, LWPCHAN_MPPOOL)) { 2421 error = EFAULT; 2422 goto out_nodrop; 2423 } 2424 2425 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2426 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2427 &lwpchan, LWPCHAN_CVPOOL)) { 2428 error = EFAULT; 2429 goto out_nodrop; 2430 } 2431 2432 no_lwpchan = 0; 2433 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2434 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2435 2436 /* 2437 * lwpchan_lock() ensures that the calling LWP is put to sleep 2438 * atomically with respect to a possible wakeup which is a result 2439 * of lwp_rwlock_unlock(). 2440 * 2441 * What's misleading is that the LWP is put to sleep after the 2442 * rwlock's mutex is released. This is OK as long as the release 2443 * operation is also done while holding mlwpchan. The LWP is then 2444 * put to sleep when the possibility of pagefaulting or sleeping 2445 * has been completely eliminated. 2446 */ 2447 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2448 locked = 1; 2449 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2450 mlocked = 1; 2451 2452 /* 2453 * Fetch the current rwlock state. 2454 * 2455 * The possibility of spurious wake-ups or killed waiters means 2456 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2457 * We only fix these if they are important to us. 2458 * 2459 * Although various error states can be observed here (e.g. the lock 2460 * is not held, but there are waiters) we assume these are applicaton 2461 * errors and so we take no corrective action. 2462 */ 2463 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2464 /* 2465 * We cannot legitimately get here from user-level 2466 * without URW_HAS_WAITERS being set. 2467 * Set it now to guard against user-level error. 2468 */ 2469 rwstate |= URW_HAS_WAITERS; 2470 2471 /* 2472 * We can try only if the lock isn't held by a writer. 2473 */ 2474 if (!(rwstate & URW_WRITE_LOCKED)) { 2475 tp = lwp_queue_waiter(&lwpchan); 2476 if (tp == NULL) { 2477 /* 2478 * Hmmm, rwstate indicates waiters but there are 2479 * none queued. This could just be the result of a 2480 * spurious wakeup, so let's ignore it. 2481 * 2482 * We now have a chance to acquire the lock 2483 * uncontended, but this is the last chance for 2484 * a writer to acquire the lock without blocking. 2485 */ 2486 if (rd_wr == READ_LOCK) { 2487 rwstate++; 2488 acquired = 1; 2489 } else if ((rwstate & URW_READERS_MASK) == 0) { 2490 rwstate |= URW_WRITE_LOCKED; 2491 acquired = 1; 2492 } 2493 } else if (rd_wr == READ_LOCK) { 2494 /* 2495 * This is the last chance for a reader to acquire 2496 * the lock now, but it can only do so if there is 2497 * no writer of equal or greater priority at the 2498 * head of the queue . 2499 * 2500 * It is also just possible that there is a reader 2501 * at the head of the queue. This may be the result 2502 * of a spurious wakeup or an application failure. 2503 * In this case we only acquire the lock if we have 2504 * equal or greater priority. It is not our job to 2505 * release spurious waiters. 2506 */ 2507 pri_t our_pri = DISP_PRIO(t); 2508 pri_t his_pri = DISP_PRIO(tp); 2509 2510 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2511 !(tp->t_writer & TRW_WANT_WRITE))) { 2512 rwstate++; 2513 acquired = 1; 2514 } 2515 } 2516 } 2517 2518 if (acquired || try_flag || time_error) { 2519 /* 2520 * We're not going to block this time. 2521 */ 2522 suword32_noerr(&rw->rwlock_readers, rwstate); 2523 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2524 locked = 0; 2525 2526 if (acquired) { 2527 /* 2528 * Got the lock! 2529 */ 2530 error = 0; 2531 2532 } else if (try_flag) { 2533 /* 2534 * We didn't get the lock and we're about to block. 2535 * If we're doing a trylock, return EBUSY instead. 2536 */ 2537 error = EBUSY; 2538 2539 } else if (time_error) { 2540 /* 2541 * The SUSV3 POSIX spec is very clear that we should 2542 * get no error from validating the timer (above) 2543 * until we would actually sleep. 2544 */ 2545 error = time_error; 2546 } 2547 2548 goto out_drop; 2549 } 2550 2551 /* 2552 * We're about to block, so indicate what kind of waiter we are. 2553 */ 2554 t->t_writer = 0; 2555 if (rd_wr == WRITE_LOCK) 2556 t->t_writer = TRW_WANT_WRITE; 2557 suword32_noerr(&rw->rwlock_readers, rwstate); 2558 2559 /* 2560 * Unlock the rwlock's mutex (pagefaults are possible here). 2561 */ 2562 set_owner_pid(mp, 0, 0); 2563 ulock_clear(&mp->mutex_lockw); 2564 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2565 if (mwaiters != 0) { 2566 /* 2567 * Given the locking of mlwpchan around the release of 2568 * the mutex and checking for waiters, the following 2569 * call to lwp_release() can fail ONLY if the lock 2570 * acquirer is interrupted after setting the waiter bit, 2571 * calling lwp_block() and releasing mlwpchan. 2572 * In this case, it could get pulled off the LWP sleep 2573 * queue (via setrun()) before the following call to 2574 * lwp_release() occurs, and the lock requestor will 2575 * update the waiter bit correctly by re-evaluating it. 2576 */ 2577 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2578 suword8_noerr(&mp->mutex_waiters, mwaiters); 2579 } 2580 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2581 mlocked = 0; 2582 no_fault(); 2583 2584 if (mwatched) { 2585 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2586 mwatched = 0; 2587 } 2588 if (watched) { 2589 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2590 watched = 0; 2591 } 2592 2593 if (timedwait) { 2594 /* 2595 * If we successfully queue the timeout, 2596 * then don't drop t_delay_lock until 2597 * we are on the sleep queue (below). 2598 */ 2599 mutex_enter(&t->t_delay_lock); 2600 if (lwp_timer_enqueue(&lwpt) != 0) { 2601 mutex_exit(&t->t_delay_lock); 2602 imm_timeout = 1; 2603 timedwait = NULL; 2604 } 2605 } 2606 t->t_flag |= T_WAITCVSEM; 2607 lwp_block(&lwpchan); 2608 2609 /* 2610 * Nothing should happen to cause the LWp to go to sleep until after 2611 * it returns from swtch(). 2612 */ 2613 if (timedwait) 2614 mutex_exit(&t->t_delay_lock); 2615 locked = 0; 2616 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2617 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 2618 setrun(t); 2619 swtch(); 2620 2621 /* 2622 * We're back, but we need to work out why. Were we interrupted? Did 2623 * we timeout? Were we granted the lock? 2624 */ 2625 error = EAGAIN; 2626 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2627 t->t_writer = 0; 2628 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2629 if (timedwait) 2630 tim = lwp_timer_dequeue(&lwpt); 2631 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2632 error = EINTR; 2633 else if (imm_timeout || (timedwait && tim == -1)) 2634 error = ETIME; 2635 lwp->lwp_asleep = 0; 2636 lwp->lwp_sysabort = 0; 2637 setallwatch(); 2638 2639 /* 2640 * If we were granted the lock we don't care about EINTR or ETIME. 2641 */ 2642 if (acquired) 2643 error = 0; 2644 2645 if (t->t_mstate == LMS_USER_LOCK) 2646 (void) new_mstate(t, LMS_SYSTEM); 2647 2648 if (error) 2649 return (set_errno(error)); 2650 return (0); 2651 2652 out_drop: 2653 /* 2654 * Make sure that the user level lock is dropped before returning 2655 * to the caller. 2656 */ 2657 if (!mlocked) { 2658 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2659 mlocked = 1; 2660 } 2661 set_owner_pid(mp, 0, 0); 2662 ulock_clear(&mp->mutex_lockw); 2663 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2664 if (mwaiters != 0) { 2665 /* 2666 * See comment above on lock clearing and lwp_release() 2667 * success/failure. 2668 */ 2669 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2670 suword8_noerr(&mp->mutex_waiters, mwaiters); 2671 } 2672 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2673 mlocked = 0; 2674 2675 out_nodrop: 2676 no_fault(); 2677 if (mwatched) 2678 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2679 if (watched) 2680 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2681 if (t->t_mstate == LMS_USER_LOCK) 2682 (void) new_mstate(t, LMS_SYSTEM); 2683 if (error) 2684 return (set_errno(error)); 2685 return (0); 2686 } 2687 2688 /* 2689 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2690 * we never drop the lock. 2691 */ 2692 static int 2693 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2694 { 2695 kthread_t *t = curthread; 2696 proc_t *p = ttoproc(t); 2697 lwpchan_t lwpchan; 2698 volatile uint16_t type = 0; 2699 volatile int error = 0; 2700 volatile int locked = 0; 2701 volatile int watched = 0; 2702 label_t ljb; 2703 volatile int no_lwpchan = 1; 2704 uint32_t rwstate; 2705 2706 /* We only check rw because the mutex is included in it. */ 2707 if ((caddr_t)rw >= p->p_as->a_userlimit) 2708 return (set_errno(EFAULT)); 2709 2710 if (on_fault(&ljb)) { 2711 if (no_lwpchan) { 2712 error = EFAULT; 2713 goto out_nodrop; 2714 } 2715 if (locked) { 2716 locked = 0; 2717 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2718 } 2719 error = EFAULT; 2720 goto out_nodrop; 2721 } 2722 2723 /* 2724 * Force Copy-on-write if necessary and ensure that the 2725 * synchronization object resides in read/write memory. 2726 * Cause an EFAULT return now if this is not so. 2727 */ 2728 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2729 suword16_noerr(&rw->rwlock_type, type); 2730 2731 /* We can only continue for simple USYNC_PROCESS locks. */ 2732 if (type != USYNC_PROCESS) { 2733 error = EINVAL; 2734 goto out_nodrop; 2735 } 2736 2737 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2738 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2739 &lwpchan, LWPCHAN_CVPOOL)) { 2740 error = EFAULT; 2741 goto out_nodrop; 2742 } 2743 2744 no_lwpchan = 0; 2745 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2746 2747 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2748 locked = 1; 2749 2750 /* 2751 * We can resolve multiple readers (except the last reader) here. 2752 * For the last reader or a writer we need lwp_rwlock_release(), 2753 * to which we also delegate the task of copying the new rwstate 2754 * back to userland (see the comment there). 2755 */ 2756 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2757 if (rwstate & URW_WRITE_LOCKED) 2758 lwp_rwlock_release(&lwpchan, rw); 2759 else if ((rwstate & URW_READERS_MASK) > 0) { 2760 rwstate--; 2761 if ((rwstate & URW_READERS_MASK) == 0) 2762 lwp_rwlock_release(&lwpchan, rw); 2763 else 2764 suword32_noerr(&rw->rwlock_readers, rwstate); 2765 } 2766 2767 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2768 locked = 0; 2769 error = 0; 2770 2771 out_nodrop: 2772 no_fault(); 2773 if (watched) 2774 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2775 if (error) 2776 return (set_errno(error)); 2777 return (0); 2778 } 2779 2780 int 2781 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2782 { 2783 switch (subcode) { 2784 case 0: 2785 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2786 case 1: 2787 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2788 case 2: 2789 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2790 case 3: 2791 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2792 case 4: 2793 return (lwp_rwlock_unlock(rwlp)); 2794 } 2795 return (set_errno(EINVAL)); 2796 } 2797 2798 /* 2799 * Return the owner of the user-level s-object. 2800 * Since we can't really do this, return NULL. 2801 */ 2802 /* ARGSUSED */ 2803 static kthread_t * 2804 lwpsobj_owner(caddr_t sobj) 2805 { 2806 return ((kthread_t *)NULL); 2807 } 2808 2809 /* 2810 * Wake up a thread asleep on a user-level synchronization 2811 * object. 2812 */ 2813 static void 2814 lwp_unsleep(kthread_t *t) 2815 { 2816 ASSERT(THREAD_LOCK_HELD(t)); 2817 if (t->t_wchan0 != NULL) { 2818 sleepq_head_t *sqh; 2819 sleepq_t *sqp = t->t_sleepq; 2820 2821 if (sqp != NULL) { 2822 sqh = lwpsqhash(&t->t_lwpchan); 2823 ASSERT(&sqh->sq_queue == sqp); 2824 sleepq_unsleep(t); 2825 disp_lock_exit_high(&sqh->sq_lock); 2826 CL_SETRUN(t); 2827 return; 2828 } 2829 } 2830 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2831 } 2832 2833 /* 2834 * Change the priority of a thread asleep on a user-level 2835 * synchronization object. To maintain proper priority order, 2836 * we: 2837 * o dequeue the thread. 2838 * o change its priority. 2839 * o re-enqueue the thread. 2840 * Assumption: the thread is locked on entry. 2841 */ 2842 static void 2843 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2844 { 2845 ASSERT(THREAD_LOCK_HELD(t)); 2846 if (t->t_wchan0 != NULL) { 2847 sleepq_t *sqp = t->t_sleepq; 2848 2849 sleepq_dequeue(t); 2850 *t_prip = pri; 2851 sleepq_insert(sqp, t); 2852 } else 2853 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2854 } 2855 2856 /* 2857 * Clean up a left-over process-shared robust mutex 2858 */ 2859 static void 2860 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2861 { 2862 uint16_t flag; 2863 uchar_t waiters; 2864 label_t ljb; 2865 pid_t owner_pid; 2866 lwp_mutex_t *lp; 2867 volatile int locked = 0; 2868 volatile int watched = 0; 2869 volatile struct upimutex *upimutex = NULL; 2870 volatile int upilocked = 0; 2871 2872 if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 2873 != (USYNC_PROCESS | LOCK_ROBUST)) 2874 return; 2875 2876 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2877 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2878 if (on_fault(&ljb)) { 2879 if (locked) 2880 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2881 if (upilocked) 2882 upimutex_unlock((upimutex_t *)upimutex, 0); 2883 goto out; 2884 } 2885 2886 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2887 2888 if (UPIMUTEX(ent->lwpchan_type)) { 2889 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2890 upib_t *upibp = &UPI_CHAIN(lwpchan); 2891 2892 if (owner_pid != curproc->p_pid) 2893 goto out; 2894 mutex_enter(&upibp->upib_lock); 2895 upimutex = upi_get(upibp, &lwpchan); 2896 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2897 mutex_exit(&upibp->upib_lock); 2898 goto out; 2899 } 2900 mutex_exit(&upibp->upib_lock); 2901 upilocked = 1; 2902 flag = lwp_clear_mutex(lp, lockflg); 2903 suword8_noerr(&lp->mutex_lockw, 0); 2904 upimutex_unlock((upimutex_t *)upimutex, flag); 2905 } else { 2906 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2907 locked = 1; 2908 /* 2909 * Clear the spinners count because one of our 2910 * threads could have been spinning for this lock 2911 * at user level when the process was suddenly killed. 2912 * There is no harm in this since user-level libc code 2913 * will adapt to the sudden change in the spinner count. 2914 */ 2915 suword8_noerr(&lp->mutex_spinners, 0); 2916 if (owner_pid != curproc->p_pid) { 2917 /* 2918 * We are not the owner. There may or may not be one. 2919 * If there are waiters, we wake up one or all of them. 2920 * It doesn't hurt to wake them up in error since 2921 * they will just retry the lock and go to sleep 2922 * again if necessary. 2923 */ 2924 fuword8_noerr(&lp->mutex_waiters, &waiters); 2925 if (waiters != 0) { /* there are waiters */ 2926 fuword16_noerr(&lp->mutex_flag, &flag); 2927 if (flag & LOCK_NOTRECOVERABLE) { 2928 lwp_release_all(&ent->lwpchan_lwpchan); 2929 suword8_noerr(&lp->mutex_waiters, 0); 2930 } else if (lwp_release(&ent->lwpchan_lwpchan, 2931 &waiters, 0)) { 2932 suword8_noerr(&lp->mutex_waiters, 2933 waiters); 2934 } 2935 } 2936 } else { 2937 /* 2938 * We are the owner. Release it. 2939 */ 2940 (void) lwp_clear_mutex(lp, lockflg); 2941 ulock_clear(&lp->mutex_lockw); 2942 fuword8_noerr(&lp->mutex_waiters, &waiters); 2943 if (waiters && 2944 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2945 suword8_noerr(&lp->mutex_waiters, waiters); 2946 } 2947 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2948 } 2949 out: 2950 no_fault(); 2951 if (watched) 2952 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2953 } 2954 2955 /* 2956 * Register a process-shared robust mutex in the lwpchan cache. 2957 */ 2958 int 2959 lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr) 2960 { 2961 int error = 0; 2962 volatile int watched; 2963 label_t ljb; 2964 uint8_t type; 2965 lwpchan_t lwpchan; 2966 2967 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2968 return (set_errno(EFAULT)); 2969 2970 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2971 2972 if (on_fault(&ljb)) { 2973 error = EFAULT; 2974 } else { 2975 /* 2976 * Force Copy-on-write if necessary and ensure that the 2977 * synchronization object resides in read/write memory. 2978 * Cause an EFAULT return now if this is not so. 2979 */ 2980 fuword8_noerr(&lp->mutex_type, &type); 2981 suword8_noerr(&lp->mutex_type, type); 2982 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2983 != (USYNC_PROCESS|LOCK_ROBUST)) { 2984 error = EINVAL; 2985 } else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp, 2986 uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) { 2987 error = EFAULT; 2988 } 2989 } 2990 no_fault(); 2991 if (watched) 2992 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2993 if (error) 2994 return (set_errno(error)); 2995 return (0); 2996 } 2997 2998 /* 2999 * There is a user-level robust lock registration in libc. 3000 * Mark it as invalid by storing -1 into the location of the pointer. 3001 */ 3002 static void 3003 lwp_mutex_unregister(void *uaddr) 3004 { 3005 if (get_udatamodel() == DATAMODEL_NATIVE) { 3006 (void) sulword(uaddr, (ulong_t)-1); 3007 #ifdef _SYSCALL32_IMPL 3008 } else { 3009 (void) suword32(uaddr, (uint32_t)-1); 3010 #endif 3011 } 3012 } 3013 3014 int 3015 lwp_mutex_trylock(lwp_mutex_t *lp, uintptr_t owner) 3016 { 3017 kthread_t *t = curthread; 3018 proc_t *p = ttoproc(t); 3019 int error = 0; 3020 volatile int locked = 0; 3021 volatile int watched = 0; 3022 label_t ljb; 3023 volatile uint8_t type = 0; 3024 uint16_t flag; 3025 lwpchan_t lwpchan; 3026 3027 if ((caddr_t)lp >= p->p_as->a_userlimit) 3028 return (set_errno(EFAULT)); 3029 3030 (void) new_mstate(t, LMS_USER_LOCK); 3031 3032 if (on_fault(&ljb)) { 3033 if (locked) 3034 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3035 error = EFAULT; 3036 goto out; 3037 } 3038 /* 3039 * Force Copy-on-write if necessary and ensure that the 3040 * synchronization object resides in read/write memory. 3041 * Cause an EFAULT return now if this is not so. 3042 */ 3043 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3044 suword8_noerr(&lp->mutex_type, type); 3045 if (UPIMUTEX(type)) { 3046 no_fault(); 3047 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3048 if (error == 0 || error == EOWNERDEAD || error == ELOCKUNMAPPED) 3049 set_owner_pid(lp, owner, 3050 (type & USYNC_PROCESS)? p->p_pid : 0); 3051 if (error) 3052 return (set_errno(error)); 3053 return (0); 3054 } 3055 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3056 &lwpchan, LWPCHAN_MPPOOL)) { 3057 error = EFAULT; 3058 goto out; 3059 } 3060 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3061 locked = 1; 3062 if (type & LOCK_ROBUST) { 3063 fuword16_noerr(&lp->mutex_flag, &flag); 3064 if (flag & LOCK_NOTRECOVERABLE) { 3065 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3066 error = ENOTRECOVERABLE; 3067 goto out; 3068 } 3069 } 3070 3071 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3072 3073 if (!ulock_try(&lp->mutex_lockw)) 3074 error = EBUSY; 3075 else { 3076 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0); 3077 if (type & LOCK_ROBUST) { 3078 fuword16_noerr(&lp->mutex_flag, &flag); 3079 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3080 if (flag & LOCK_OWNERDEAD) 3081 error = EOWNERDEAD; 3082 else if (type & USYNC_PROCESS_ROBUST) 3083 error = ELOCKUNMAPPED; 3084 else 3085 error = EOWNERDEAD; 3086 } 3087 } 3088 } 3089 locked = 0; 3090 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3091 out: 3092 3093 if (t->t_mstate == LMS_USER_LOCK) 3094 (void) new_mstate(t, LMS_SYSTEM); 3095 3096 no_fault(); 3097 if (watched) 3098 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3099 if (error) 3100 return (set_errno(error)); 3101 return (0); 3102 } 3103 3104 /* 3105 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3106 * the blocked lwp resumes and retries to acquire the lock. 3107 */ 3108 int 3109 lwp_mutex_unlock(lwp_mutex_t *lp) 3110 { 3111 proc_t *p = ttoproc(curthread); 3112 lwpchan_t lwpchan; 3113 uchar_t waiters; 3114 volatile int locked = 0; 3115 volatile int watched = 0; 3116 volatile uint8_t type = 0; 3117 label_t ljb; 3118 uint16_t flag; 3119 int error = 0; 3120 3121 if ((caddr_t)lp >= p->p_as->a_userlimit) 3122 return (set_errno(EFAULT)); 3123 3124 if (on_fault(&ljb)) { 3125 if (locked) 3126 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3127 error = EFAULT; 3128 goto out; 3129 } 3130 3131 /* 3132 * Force Copy-on-write if necessary and ensure that the 3133 * synchronization object resides in read/write memory. 3134 * Cause an EFAULT return now if this is not so. 3135 */ 3136 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3137 suword8_noerr(&lp->mutex_type, type); 3138 3139 if (UPIMUTEX(type)) { 3140 no_fault(); 3141 error = lwp_upimutex_unlock(lp, type); 3142 if (error) 3143 return (set_errno(error)); 3144 return (0); 3145 } 3146 3147 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3148 3149 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3150 &lwpchan, LWPCHAN_MPPOOL)) { 3151 error = EFAULT; 3152 goto out; 3153 } 3154 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3155 locked = 1; 3156 if (type & LOCK_ROBUST) { 3157 fuword16_noerr(&lp->mutex_flag, &flag); 3158 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3159 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3160 flag |= LOCK_NOTRECOVERABLE; 3161 suword16_noerr(&lp->mutex_flag, flag); 3162 } 3163 } 3164 set_owner_pid(lp, 0, 0); 3165 ulock_clear(&lp->mutex_lockw); 3166 /* 3167 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3168 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3169 * may fail. If it fails, do not write into the waiter bit. 3170 * The call to lwp_release() might fail due to one of three reasons: 3171 * 3172 * 1. due to the thread which set the waiter bit not actually 3173 * sleeping since it got the lock on the re-try. The waiter 3174 * bit will then be correctly updated by that thread. This 3175 * window may be closed by reading the wait bit again here 3176 * and not calling lwp_release() at all if it is zero. 3177 * 2. the thread which set the waiter bit and went to sleep 3178 * was woken up by a signal. This time, the waiter recomputes 3179 * the wait bit in the return with EINTR code. 3180 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3181 * memory that has been re-used after the lock was dropped. 3182 * In this case, writing into the waiter bit would cause data 3183 * corruption. 3184 */ 3185 fuword8_noerr(&lp->mutex_waiters, &waiters); 3186 if (waiters) { 3187 if ((type & LOCK_ROBUST) && 3188 (flag & LOCK_NOTRECOVERABLE)) { 3189 lwp_release_all(&lwpchan); 3190 suword8_noerr(&lp->mutex_waiters, 0); 3191 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3192 suword8_noerr(&lp->mutex_waiters, waiters); 3193 } 3194 } 3195 3196 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3197 out: 3198 no_fault(); 3199 if (watched) 3200 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3201 if (error) 3202 return (set_errno(error)); 3203 return (0); 3204 } 3205