1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2015 Joyent, Inc. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/cred.h> 36 #include <sys/user.h> 37 #include <sys/errno.h> 38 #include <sys/file.h> 39 #include <sys/proc.h> 40 #include <sys/prsystm.h> 41 #include <sys/kmem.h> 42 #include <sys/sobject.h> 43 #include <sys/fault.h> 44 #include <sys/procfs.h> 45 #include <sys/watchpoint.h> 46 #include <sys/time.h> 47 #include <sys/cmn_err.h> 48 #include <sys/machlock.h> 49 #include <sys/debug.h> 50 #include <sys/synch.h> 51 #include <sys/synch32.h> 52 #include <sys/mman.h> 53 #include <sys/class.h> 54 #include <sys/schedctl.h> 55 #include <sys/sleepq.h> 56 #include <sys/policy.h> 57 #include <sys/tnf_probe.h> 58 #include <sys/lwpchan_impl.h> 59 #include <sys/turnstile.h> 60 #include <sys/atomic.h> 61 #include <sys/lwp_timer_impl.h> 62 #include <sys/lwp_upimutex_impl.h> 63 #include <vm/as.h> 64 #include <sys/sdt.h> 65 66 static kthread_t *lwpsobj_owner(caddr_t); 67 static void lwp_unsleep(kthread_t *t); 68 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip); 69 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg); 70 static void lwp_mutex_unregister(void *uaddr); 71 static void set_owner_pid(lwp_mutex_t *, uintptr_t, pid_t); 72 static int iswanted(kthread_t *, lwpchan_t *); 73 74 extern int lwp_cond_signal(lwp_cond_t *cv); 75 76 /* 77 * Maximum number of user prio inheritance locks that can be held by a thread. 78 * Used to limit kmem for each thread. This is a per-thread limit that 79 * can be administered on a system wide basis (using /etc/system). 80 * 81 * Also, when a limit, say maxlwps is added for numbers of lwps within a 82 * process, the per-thread limit automatically becomes a process-wide limit 83 * of maximum number of held upi locks within a process: 84 * maxheldupimx = maxnestupimx * maxlwps; 85 */ 86 static uint32_t maxnestupimx = 2000; 87 88 /* 89 * The sobj_ops vector exports a set of functions needed when a thread 90 * is asleep on a synchronization object of this type. 91 */ 92 static sobj_ops_t lwp_sobj_ops = { 93 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri 94 }; 95 96 static kthread_t *lwpsobj_pi_owner(upimutex_t *up); 97 98 static sobj_ops_t lwp_sobj_pi_ops = { 99 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep, 100 turnstile_change_pri 101 }; 102 103 static sleepq_head_t lwpsleepq[NSLEEPQ]; 104 upib_t upimutextab[UPIMUTEX_TABSIZE]; 105 106 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */ 107 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT) 108 109 /* 110 * We know that both lc_wchan and lc_wchan0 are addresses that most 111 * likely are 8-byte aligned, so we shift off the low-order 3 bits. 112 * 'pool' is either 0 or 1. 113 */ 114 #define LWPCHAN_LOCK_HASH(X, pool) \ 115 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \ 116 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0)) 117 118 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE]; 119 120 /* 121 * Is this a POSIX threads user-level lock requiring priority inheritance? 122 */ 123 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT) 124 125 static sleepq_head_t * 126 lwpsqhash(lwpchan_t *lwpchan) 127 { 128 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 129 return (&lwpsleepq[SQHASHINDEX(x)]); 130 } 131 132 /* 133 * Lock an lwpchan. 134 * Keep this in sync with lwpchan_unlock(), below. 135 */ 136 static void 137 lwpchan_lock(lwpchan_t *lwpchan, int pool) 138 { 139 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 140 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 141 } 142 143 /* 144 * Unlock an lwpchan. 145 * Keep this in sync with lwpchan_lock(), above. 146 */ 147 static void 148 lwpchan_unlock(lwpchan_t *lwpchan, int pool) 149 { 150 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0; 151 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]); 152 } 153 154 /* 155 * Delete mappings from the lwpchan cache for pages that are being 156 * unmapped by as_unmap(). Given a range of addresses, "start" to "end", 157 * all mappings within the range are deleted from the lwpchan cache. 158 */ 159 void 160 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end) 161 { 162 lwpchan_data_t *lcp; 163 lwpchan_hashbucket_t *hashbucket; 164 lwpchan_hashbucket_t *endbucket; 165 lwpchan_entry_t *ent; 166 lwpchan_entry_t **prev; 167 caddr_t addr; 168 169 mutex_enter(&p->p_lcp_lock); 170 lcp = p->p_lcp; 171 hashbucket = lcp->lwpchan_cache; 172 endbucket = hashbucket + lcp->lwpchan_size; 173 for (; hashbucket < endbucket; hashbucket++) { 174 if (hashbucket->lwpchan_chain == NULL) 175 continue; 176 mutex_enter(&hashbucket->lwpchan_lock); 177 prev = &hashbucket->lwpchan_chain; 178 /* check entire chain */ 179 while ((ent = *prev) != NULL) { 180 addr = ent->lwpchan_addr; 181 if (start <= addr && addr < end) { 182 *prev = ent->lwpchan_next; 183 /* 184 * We do this only for the obsolete type 185 * USYNC_PROCESS_ROBUST. Otherwise robust 186 * locks do not draw ELOCKUNMAPPED or 187 * EOWNERDEAD due to being unmapped. 188 */ 189 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 190 (ent->lwpchan_type & USYNC_PROCESS_ROBUST)) 191 lwp_mutex_cleanup(ent, LOCK_UNMAPPED); 192 /* 193 * If there is a user-level robust lock 194 * registration, mark it as invalid. 195 */ 196 if ((addr = ent->lwpchan_uaddr) != NULL) 197 lwp_mutex_unregister(addr); 198 kmem_free(ent, sizeof (*ent)); 199 atomic_dec_32(&lcp->lwpchan_entries); 200 } else { 201 prev = &ent->lwpchan_next; 202 } 203 } 204 mutex_exit(&hashbucket->lwpchan_lock); 205 } 206 mutex_exit(&p->p_lcp_lock); 207 } 208 209 /* 210 * Given an lwpchan cache pointer and a process virtual address, 211 * return a pointer to the corresponding lwpchan hash bucket. 212 */ 213 static lwpchan_hashbucket_t * 214 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr) 215 { 216 uint_t i; 217 218 /* 219 * All user-level sync object addresses are 8-byte aligned. 220 * Ignore the lowest 3 bits of the address and use the 221 * higher-order 2*lwpchan_bits bits for the hash index. 222 */ 223 addr >>= 3; 224 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask; 225 return (lcp->lwpchan_cache + i); 226 } 227 228 /* 229 * (Re)allocate the per-process lwpchan cache. 230 */ 231 static void 232 lwpchan_alloc_cache(proc_t *p, uint_t bits) 233 { 234 lwpchan_data_t *lcp; 235 lwpchan_data_t *old_lcp; 236 lwpchan_hashbucket_t *hashbucket; 237 lwpchan_hashbucket_t *endbucket; 238 lwpchan_hashbucket_t *newbucket; 239 lwpchan_entry_t *ent; 240 lwpchan_entry_t *next; 241 uint_t count; 242 243 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS); 244 245 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP); 246 lcp->lwpchan_bits = bits; 247 lcp->lwpchan_size = 1 << lcp->lwpchan_bits; 248 lcp->lwpchan_mask = lcp->lwpchan_size - 1; 249 lcp->lwpchan_entries = 0; 250 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size * 251 sizeof (lwpchan_hashbucket_t), KM_SLEEP); 252 lcp->lwpchan_next_data = NULL; 253 254 mutex_enter(&p->p_lcp_lock); 255 if ((old_lcp = p->p_lcp) != NULL) { 256 if (old_lcp->lwpchan_bits >= bits) { 257 /* someone beat us to it */ 258 mutex_exit(&p->p_lcp_lock); 259 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 260 sizeof (lwpchan_hashbucket_t)); 261 kmem_free(lcp, sizeof (lwpchan_data_t)); 262 return; 263 } 264 /* 265 * Acquire all of the old hash table locks. 266 */ 267 hashbucket = old_lcp->lwpchan_cache; 268 endbucket = hashbucket + old_lcp->lwpchan_size; 269 for (; hashbucket < endbucket; hashbucket++) 270 mutex_enter(&hashbucket->lwpchan_lock); 271 /* 272 * Move all of the old hash table entries to the 273 * new hash table. The new hash table has not yet 274 * been installed so we don't need any of its locks. 275 */ 276 count = 0; 277 hashbucket = old_lcp->lwpchan_cache; 278 for (; hashbucket < endbucket; hashbucket++) { 279 ent = hashbucket->lwpchan_chain; 280 while (ent != NULL) { 281 next = ent->lwpchan_next; 282 newbucket = lwpchan_bucket(lcp, 283 (uintptr_t)ent->lwpchan_addr); 284 ent->lwpchan_next = newbucket->lwpchan_chain; 285 newbucket->lwpchan_chain = ent; 286 ent = next; 287 count++; 288 } 289 hashbucket->lwpchan_chain = NULL; 290 } 291 lcp->lwpchan_entries = count; 292 } 293 294 /* 295 * Retire the old hash table. We can't actually kmem_free() it 296 * now because someone may still have a pointer to it. Instead, 297 * we link it onto the new hash table's list of retired hash tables. 298 * The new hash table is double the size of the previous one, so 299 * the total size of all retired hash tables is less than the size 300 * of the new one. exit() and exec() free the retired hash tables 301 * (see lwpchan_destroy_cache(), below). 302 */ 303 lcp->lwpchan_next_data = old_lcp; 304 305 /* 306 * As soon as we store the new lcp, future locking operations will 307 * use it. Therefore, we must ensure that all the state we've just 308 * established reaches global visibility before the new lcp does. 309 */ 310 membar_producer(); 311 p->p_lcp = lcp; 312 313 if (old_lcp != NULL) { 314 /* 315 * Release all of the old hash table locks. 316 */ 317 hashbucket = old_lcp->lwpchan_cache; 318 for (; hashbucket < endbucket; hashbucket++) 319 mutex_exit(&hashbucket->lwpchan_lock); 320 } 321 mutex_exit(&p->p_lcp_lock); 322 } 323 324 /* 325 * Deallocate the lwpchan cache, and any dynamically allocated mappings. 326 * Called when the process exits or execs. All lwps except one have 327 * exited so we need no locks here. 328 */ 329 void 330 lwpchan_destroy_cache(int exec) 331 { 332 proc_t *p = curproc; 333 lwpchan_hashbucket_t *hashbucket; 334 lwpchan_hashbucket_t *endbucket; 335 lwpchan_data_t *lcp; 336 lwpchan_entry_t *ent; 337 lwpchan_entry_t *next; 338 uint16_t lockflg; 339 340 lcp = p->p_lcp; 341 p->p_lcp = NULL; 342 343 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD; 344 hashbucket = lcp->lwpchan_cache; 345 endbucket = hashbucket + lcp->lwpchan_size; 346 for (; hashbucket < endbucket; hashbucket++) { 347 ent = hashbucket->lwpchan_chain; 348 hashbucket->lwpchan_chain = NULL; 349 while (ent != NULL) { 350 next = ent->lwpchan_next; 351 if (ent->lwpchan_pool == LWPCHAN_MPPOOL && 352 (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 353 == (USYNC_PROCESS | LOCK_ROBUST)) 354 lwp_mutex_cleanup(ent, lockflg); 355 kmem_free(ent, sizeof (*ent)); 356 ent = next; 357 } 358 } 359 360 while (lcp != NULL) { 361 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data; 362 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size * 363 sizeof (lwpchan_hashbucket_t)); 364 kmem_free(lcp, sizeof (lwpchan_data_t)); 365 lcp = next_lcp; 366 } 367 } 368 369 /* 370 * Return zero when there is an entry in the lwpchan cache for the 371 * given process virtual address and non-zero when there is not. 372 * The returned non-zero value is the current length of the 373 * hash chain plus one. The caller holds the hash bucket lock. 374 */ 375 static uint_t 376 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan, 377 lwpchan_hashbucket_t *hashbucket) 378 { 379 lwpchan_entry_t *ent; 380 uint_t count = 1; 381 382 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) { 383 if (ent->lwpchan_addr == addr) { 384 if (ent->lwpchan_type != type || 385 ent->lwpchan_pool != pool) { 386 /* 387 * This shouldn't happen, but might if the 388 * process reuses its memory for different 389 * types of sync objects. We test first 390 * to avoid grabbing the memory cache line. 391 */ 392 ent->lwpchan_type = (uint16_t)type; 393 ent->lwpchan_pool = (uint16_t)pool; 394 } 395 *lwpchan = ent->lwpchan_lwpchan; 396 return (0); 397 } 398 count++; 399 } 400 return (count); 401 } 402 403 /* 404 * Return the cached lwpchan mapping if cached, otherwise insert 405 * a virtual address to lwpchan mapping into the cache. 406 */ 407 static int 408 lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr, 409 int type, lwpchan_t *lwpchan, int pool) 410 { 411 proc_t *p = curproc; 412 lwpchan_data_t *lcp; 413 lwpchan_hashbucket_t *hashbucket; 414 lwpchan_entry_t *ent; 415 memid_t memid; 416 uint_t count; 417 uint_t bits; 418 419 top: 420 /* initialize the lwpchan cache, if necesary */ 421 if ((lcp = p->p_lcp) == NULL) { 422 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS); 423 goto top; 424 } 425 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr); 426 mutex_enter(&hashbucket->lwpchan_lock); 427 if (lcp != p->p_lcp) { 428 /* someone resized the lwpchan cache; start over */ 429 mutex_exit(&hashbucket->lwpchan_lock); 430 goto top; 431 } 432 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) { 433 /* it's in the cache */ 434 mutex_exit(&hashbucket->lwpchan_lock); 435 return (1); 436 } 437 mutex_exit(&hashbucket->lwpchan_lock); 438 if (as_getmemid(as, addr, &memid) != 0) 439 return (0); 440 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0]; 441 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1]; 442 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP); 443 mutex_enter(&hashbucket->lwpchan_lock); 444 if (lcp != p->p_lcp) { 445 /* someone resized the lwpchan cache; start over */ 446 mutex_exit(&hashbucket->lwpchan_lock); 447 kmem_free(ent, sizeof (*ent)); 448 goto top; 449 } 450 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket); 451 if (count == 0) { 452 /* someone else added this entry to the cache */ 453 mutex_exit(&hashbucket->lwpchan_lock); 454 kmem_free(ent, sizeof (*ent)); 455 return (1); 456 } 457 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */ 458 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) { 459 /* hash chain too long; reallocate the hash table */ 460 mutex_exit(&hashbucket->lwpchan_lock); 461 kmem_free(ent, sizeof (*ent)); 462 lwpchan_alloc_cache(p, bits + 1); 463 goto top; 464 } 465 ent->lwpchan_addr = addr; 466 ent->lwpchan_uaddr = uaddr; 467 ent->lwpchan_type = (uint16_t)type; 468 ent->lwpchan_pool = (uint16_t)pool; 469 ent->lwpchan_lwpchan = *lwpchan; 470 ent->lwpchan_next = hashbucket->lwpchan_chain; 471 hashbucket->lwpchan_chain = ent; 472 atomic_inc_32(&lcp->lwpchan_entries); 473 mutex_exit(&hashbucket->lwpchan_lock); 474 return (1); 475 } 476 477 /* 478 * Return a unique pair of identifiers that corresponds to a 479 * synchronization object's virtual address. Process-shared 480 * sync objects usually get vnode/offset from as_getmemid(). 481 */ 482 static int 483 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool) 484 { 485 /* 486 * If the lwp synch object is defined to be process-private, 487 * we just make the first field of the lwpchan be 'as' and 488 * the second field be the synch object's virtual address. 489 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.) 490 * The lwpchan cache is used only for process-shared objects. 491 */ 492 if (!(type & USYNC_PROCESS)) { 493 lwpchan->lc_wchan0 = (caddr_t)as; 494 lwpchan->lc_wchan = addr; 495 return (1); 496 } 497 498 return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool)); 499 } 500 501 static void 502 lwp_block(lwpchan_t *lwpchan) 503 { 504 kthread_t *t = curthread; 505 klwp_t *lwp = ttolwp(t); 506 sleepq_head_t *sqh; 507 508 thread_lock(t); 509 t->t_flag |= T_WAKEABLE; 510 t->t_lwpchan = *lwpchan; 511 t->t_sobj_ops = &lwp_sobj_ops; 512 t->t_release = 0; 513 sqh = lwpsqhash(lwpchan); 514 disp_lock_enter_high(&sqh->sq_lock); 515 CL_SLEEP(t); 516 DTRACE_SCHED(sleep); 517 THREAD_SLEEP(t, &sqh->sq_lock); 518 sleepq_insert(&sqh->sq_queue, t); 519 thread_unlock(t); 520 lwp->lwp_asleep = 1; 521 lwp->lwp_sysabort = 0; 522 lwp->lwp_ru.nvcsw++; 523 (void) new_mstate(curthread, LMS_SLEEP); 524 } 525 526 static kthread_t * 527 lwpsobj_pi_owner(upimutex_t *up) 528 { 529 return (up->upi_owner); 530 } 531 532 static struct upimutex * 533 upi_get(upib_t *upibp, lwpchan_t *lcp) 534 { 535 struct upimutex *upip; 536 537 for (upip = upibp->upib_first; upip != NULL; 538 upip = upip->upi_nextchain) { 539 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 && 540 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan) 541 break; 542 } 543 return (upip); 544 } 545 546 static void 547 upi_chain_add(upib_t *upibp, struct upimutex *upimutex) 548 { 549 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 550 551 /* 552 * Insert upimutex at front of list. Maybe a bit unfair 553 * but assume that not many lwpchans hash to the same 554 * upimutextab bucket, i.e. the list of upimutexes from 555 * upib_first is not too long. 556 */ 557 upimutex->upi_nextchain = upibp->upib_first; 558 upibp->upib_first = upimutex; 559 } 560 561 static void 562 upi_chain_del(upib_t *upibp, struct upimutex *upimutex) 563 { 564 struct upimutex **prev; 565 566 ASSERT(MUTEX_HELD(&upibp->upib_lock)); 567 568 prev = &upibp->upib_first; 569 while (*prev != upimutex) { 570 prev = &(*prev)->upi_nextchain; 571 } 572 *prev = upimutex->upi_nextchain; 573 upimutex->upi_nextchain = NULL; 574 } 575 576 /* 577 * Add upimutex to chain of upimutexes held by curthread. 578 * Returns number of upimutexes held by curthread. 579 */ 580 static uint32_t 581 upi_mylist_add(struct upimutex *upimutex) 582 { 583 kthread_t *t = curthread; 584 585 /* 586 * Insert upimutex at front of list of upimutexes owned by t. This 587 * would match typical LIFO order in which nested locks are acquired 588 * and released. 589 */ 590 upimutex->upi_nextowned = t->t_upimutex; 591 t->t_upimutex = upimutex; 592 t->t_nupinest++; 593 ASSERT(t->t_nupinest > 0); 594 return (t->t_nupinest); 595 } 596 597 /* 598 * Delete upimutex from list of upimutexes owned by curthread. 599 */ 600 static void 601 upi_mylist_del(struct upimutex *upimutex) 602 { 603 kthread_t *t = curthread; 604 struct upimutex **prev; 605 606 /* 607 * Since the order in which nested locks are acquired and released, 608 * is typically LIFO, and typical nesting levels are not too deep, the 609 * following should not be expensive in the general case. 610 */ 611 prev = &t->t_upimutex; 612 while (*prev != upimutex) { 613 prev = &(*prev)->upi_nextowned; 614 } 615 *prev = upimutex->upi_nextowned; 616 upimutex->upi_nextowned = NULL; 617 ASSERT(t->t_nupinest > 0); 618 t->t_nupinest--; 619 } 620 621 /* 622 * Returns true if upimutex is owned. Should be called only when upim points 623 * to kmem which cannot disappear from underneath. 624 */ 625 static int 626 upi_owned(upimutex_t *upim) 627 { 628 return (upim->upi_owner == curthread); 629 } 630 631 /* 632 * Returns pointer to kernel object (upimutex_t *) if lp is owned. 633 */ 634 static struct upimutex * 635 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type) 636 { 637 lwpchan_t lwpchan; 638 upib_t *upibp; 639 struct upimutex *upimutex; 640 641 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 642 &lwpchan, LWPCHAN_MPPOOL)) 643 return (NULL); 644 645 upibp = &UPI_CHAIN(lwpchan); 646 mutex_enter(&upibp->upib_lock); 647 upimutex = upi_get(upibp, &lwpchan); 648 if (upimutex == NULL || upimutex->upi_owner != curthread) { 649 mutex_exit(&upibp->upib_lock); 650 return (NULL); 651 } 652 mutex_exit(&upibp->upib_lock); 653 return (upimutex); 654 } 655 656 /* 657 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if 658 * no lock hand-off occurrs. 659 */ 660 static void 661 upimutex_unlock(struct upimutex *upimutex, uint16_t flag) 662 { 663 turnstile_t *ts; 664 upib_t *upibp; 665 kthread_t *newowner; 666 667 upi_mylist_del(upimutex); 668 upibp = upimutex->upi_upibp; 669 mutex_enter(&upibp->upib_lock); 670 if (upimutex->upi_waiter != 0) { /* if waiters */ 671 ts = turnstile_lookup(upimutex); 672 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) { 673 /* hand-off lock to highest prio waiter */ 674 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first; 675 upimutex->upi_owner = newowner; 676 if (ts->ts_waiters == 1) 677 upimutex->upi_waiter = 0; 678 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner); 679 mutex_exit(&upibp->upib_lock); 680 return; 681 } else if (ts != NULL) { 682 /* LOCK_NOTRECOVERABLE: wakeup all */ 683 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL); 684 } else { 685 /* 686 * Misleading w bit. Waiters might have been 687 * interrupted. No need to clear the w bit (upimutex 688 * will soon be freed). Re-calculate PI from existing 689 * waiters. 690 */ 691 turnstile_exit(upimutex); 692 turnstile_pi_recalc(); 693 } 694 } 695 /* 696 * no waiters, or LOCK_NOTRECOVERABLE. 697 * remove from the bucket chain of upi mutexes. 698 * de-allocate kernel memory (upimutex). 699 */ 700 upi_chain_del(upimutex->upi_upibp, upimutex); 701 mutex_exit(&upibp->upib_lock); 702 kmem_free(upimutex, sizeof (upimutex_t)); 703 } 704 705 static int 706 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp) 707 { 708 label_t ljb; 709 int error = 0; 710 lwpchan_t lwpchan; 711 uint16_t flag; 712 upib_t *upibp; 713 volatile struct upimutex *upimutex = NULL; 714 turnstile_t *ts; 715 uint32_t nupinest; 716 volatile int upilocked = 0; 717 718 if (on_fault(&ljb)) { 719 if (upilocked) 720 upimutex_unlock((upimutex_t *)upimutex, 0); 721 error = EFAULT; 722 goto out; 723 } 724 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 725 &lwpchan, LWPCHAN_MPPOOL)) { 726 error = EFAULT; 727 goto out; 728 } 729 upibp = &UPI_CHAIN(lwpchan); 730 retry: 731 mutex_enter(&upibp->upib_lock); 732 upimutex = upi_get(upibp, &lwpchan); 733 if (upimutex == NULL) { 734 /* lock available since lwpchan has no upimutex */ 735 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP); 736 upi_chain_add(upibp, (upimutex_t *)upimutex); 737 upimutex->upi_owner = curthread; /* grab lock */ 738 upimutex->upi_upibp = upibp; 739 upimutex->upi_vaddr = lp; 740 upimutex->upi_lwpchan = lwpchan; 741 mutex_exit(&upibp->upib_lock); 742 nupinest = upi_mylist_add((upimutex_t *)upimutex); 743 upilocked = 1; 744 fuword16_noerr(&lp->mutex_flag, &flag); 745 if (nupinest > maxnestupimx && 746 secpolicy_resource(CRED()) != 0) { 747 upimutex_unlock((upimutex_t *)upimutex, flag); 748 error = ENOMEM; 749 goto out; 750 } 751 if (flag & LOCK_NOTRECOVERABLE) { 752 /* 753 * Since the setting of LOCK_NOTRECOVERABLE 754 * was done under the high-level upi mutex, 755 * in lwp_upimutex_unlock(), this flag needs to 756 * be checked while holding the upi mutex. 757 * If set, this thread should return without 758 * the lock held, and with the right error code. 759 */ 760 upimutex_unlock((upimutex_t *)upimutex, flag); 761 upilocked = 0; 762 error = ENOTRECOVERABLE; 763 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 764 if (flag & LOCK_OWNERDEAD) 765 error = EOWNERDEAD; 766 else if (type & USYNC_PROCESS_ROBUST) 767 error = ELOCKUNMAPPED; 768 else 769 error = EOWNERDEAD; 770 } 771 goto out; 772 } 773 /* 774 * If a upimutex object exists, it must have an owner. 775 * This is due to lock hand-off, and release of upimutex when no 776 * waiters are present at unlock time, 777 */ 778 ASSERT(upimutex->upi_owner != NULL); 779 if (upimutex->upi_owner == curthread) { 780 /* 781 * The user wrapper can check if the mutex type is 782 * ERRORCHECK: if not, it should stall at user-level. 783 * If so, it should return the error code. 784 */ 785 mutex_exit(&upibp->upib_lock); 786 error = EDEADLK; 787 goto out; 788 } 789 if (try == UPIMUTEX_TRY) { 790 mutex_exit(&upibp->upib_lock); 791 error = EBUSY; 792 goto out; 793 } 794 /* 795 * Block for the lock. 796 */ 797 if ((error = lwptp->lwpt_time_error) != 0) { 798 /* 799 * The SUSV3 Posix spec is very clear that we 800 * should get no error from validating the 801 * timer until we would actually sleep. 802 */ 803 mutex_exit(&upibp->upib_lock); 804 goto out; 805 } 806 if (lwptp->lwpt_tsp != NULL) { 807 /* 808 * Unlike the protocol for other lwp timedwait operations, 809 * we must drop t_delay_lock before going to sleep in 810 * turnstile_block() for a upi mutex. 811 * See the comments below and in turnstile.c 812 */ 813 mutex_enter(&curthread->t_delay_lock); 814 (void) lwp_timer_enqueue(lwptp); 815 mutex_exit(&curthread->t_delay_lock); 816 } 817 /* 818 * Now, set the waiter bit and block for the lock in turnstile_block(). 819 * No need to preserve the previous wbit since a lock try is not 820 * attempted after setting the wait bit. Wait bit is set under 821 * the upib_lock, which is not released until the turnstile lock 822 * is acquired. Say, the upimutex is L: 823 * 824 * 1. upib_lock is held so the waiter does not have to retry L after 825 * setting the wait bit: since the owner has to grab the upib_lock 826 * to unlock L, it will certainly see the wait bit set. 827 * 2. upib_lock is not released until the turnstile lock is acquired. 828 * This is the key to preventing a missed wake-up. Otherwise, the 829 * owner could acquire the upib_lock, and the tc_lock, to call 830 * turnstile_wakeup(). All this, before the waiter gets tc_lock 831 * to sleep in turnstile_block(). turnstile_wakeup() will then not 832 * find this waiter, resulting in the missed wakeup. 833 * 3. The upib_lock, being a kernel mutex, cannot be released while 834 * holding the tc_lock (since mutex_exit() could need to acquire 835 * the same tc_lock)...and so is held when calling turnstile_block(). 836 * The address of upib_lock is passed to turnstile_block() which 837 * releases it after releasing all turnstile locks, and before going 838 * to sleep in swtch(). 839 * 4. The waiter value cannot be a count of waiters, because a waiter 840 * can be interrupted. The interrupt occurs under the tc_lock, at 841 * which point, the upib_lock cannot be locked, to decrement waiter 842 * count. So, just treat the waiter state as a bit, not a count. 843 */ 844 ts = turnstile_lookup((upimutex_t *)upimutex); 845 upimutex->upi_waiter = 1; 846 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex, 847 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp); 848 /* 849 * Hand-off implies that we wakeup holding the lock, except when: 850 * - deadlock is detected 851 * - lock is not recoverable 852 * - we got an interrupt or timeout 853 * If we wake up due to an interrupt or timeout, we may 854 * or may not be holding the lock due to mutex hand-off. 855 * Use lwp_upimutex_owned() to check if we do hold the lock. 856 */ 857 if (error != 0) { 858 if ((error == EINTR || error == ETIME) && 859 (upimutex = lwp_upimutex_owned(lp, type))) { 860 /* 861 * Unlock and return - the re-startable syscall will 862 * try the lock again if we got EINTR. 863 */ 864 (void) upi_mylist_add((upimutex_t *)upimutex); 865 upimutex_unlock((upimutex_t *)upimutex, 0); 866 } 867 /* 868 * The only other possible error is EDEADLK. If so, upimutex 869 * is valid, since its owner is deadlocked with curthread. 870 */ 871 ASSERT(error == EINTR || error == ETIME || 872 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex))); 873 ASSERT(!lwp_upimutex_owned(lp, type)); 874 goto out; 875 } 876 if (lwp_upimutex_owned(lp, type)) { 877 ASSERT(lwp_upimutex_owned(lp, type) == upimutex); 878 nupinest = upi_mylist_add((upimutex_t *)upimutex); 879 upilocked = 1; 880 } 881 /* 882 * Now, need to read the user-level lp->mutex_flag to do the following: 883 * 884 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED 885 * should be returned. 886 * - if lock isn't held, check if ENOTRECOVERABLE should 887 * be returned. 888 * 889 * Now, either lp->mutex_flag is readable or it's not. If not 890 * readable, the on_fault path will cause a return with EFAULT 891 * as it should. If it is readable, the state of the flag 892 * encodes the robustness state of the lock: 893 * 894 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD 895 * or LOCK_UNMAPPED setting will influence the return code 896 * appropriately. If the upimutex is not locked here, this 897 * could be due to a spurious wake-up or a NOTRECOVERABLE 898 * event. The flag's setting can be used to distinguish 899 * between these two events. 900 */ 901 fuword16_noerr(&lp->mutex_flag, &flag); 902 if (upilocked) { 903 /* 904 * If the thread wakes up from turnstile_block with the lock 905 * held, the flag could not be set to LOCK_NOTRECOVERABLE, 906 * since it would not have been handed-off the lock. 907 * So, no need to check for this case. 908 */ 909 if (nupinest > maxnestupimx && 910 secpolicy_resource(CRED()) != 0) { 911 upimutex_unlock((upimutex_t *)upimutex, flag); 912 upilocked = 0; 913 error = ENOMEM; 914 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 915 if (flag & LOCK_OWNERDEAD) 916 error = EOWNERDEAD; 917 else if (type & USYNC_PROCESS_ROBUST) 918 error = ELOCKUNMAPPED; 919 else 920 error = EOWNERDEAD; 921 } 922 } else { 923 /* 924 * Wake-up without the upimutex held. Either this is a 925 * spurious wake-up (due to signals, forkall(), whatever), or 926 * it is a LOCK_NOTRECOVERABLE robustness event. The setting 927 * of the mutex flag can be used to distinguish between the 928 * two events. 929 */ 930 if (flag & LOCK_NOTRECOVERABLE) { 931 error = ENOTRECOVERABLE; 932 } else { 933 /* 934 * Here, the flag could be set to LOCK_OWNERDEAD or 935 * not. In both cases, this is a spurious wakeup, 936 * since the upi lock is not held, but the thread 937 * has returned from turnstile_block(). 938 * 939 * The user flag could be LOCK_OWNERDEAD if, at the 940 * same time as curthread having been woken up 941 * spuriously, the owner (say Tdead) has died, marked 942 * the mutex flag accordingly, and handed off the lock 943 * to some other waiter (say Tnew). curthread just 944 * happened to read the flag while Tnew has yet to deal 945 * with the owner-dead event. 946 * 947 * In this event, curthread should retry the lock. 948 * If Tnew is able to cleanup the lock, curthread 949 * will eventually get the lock with a zero error code, 950 * If Tnew is unable to cleanup, its eventual call to 951 * unlock the lock will result in the mutex flag being 952 * set to LOCK_NOTRECOVERABLE, and the wake-up of 953 * all waiters, including curthread, which will then 954 * eventually return ENOTRECOVERABLE due to the above 955 * check. 956 * 957 * Of course, if the user-flag is not set with 958 * LOCK_OWNERDEAD, retrying is the thing to do, since 959 * this is definitely a spurious wakeup. 960 */ 961 goto retry; 962 } 963 } 964 965 out: 966 no_fault(); 967 return (error); 968 } 969 970 971 static int 972 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type) 973 { 974 label_t ljb; 975 int error = 0; 976 lwpchan_t lwpchan; 977 uint16_t flag; 978 upib_t *upibp; 979 volatile struct upimutex *upimutex = NULL; 980 volatile int upilocked = 0; 981 982 if (on_fault(&ljb)) { 983 if (upilocked) 984 upimutex_unlock((upimutex_t *)upimutex, 0); 985 error = EFAULT; 986 goto out; 987 } 988 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 989 &lwpchan, LWPCHAN_MPPOOL)) { 990 error = EFAULT; 991 goto out; 992 } 993 upibp = &UPI_CHAIN(lwpchan); 994 mutex_enter(&upibp->upib_lock); 995 upimutex = upi_get(upibp, &lwpchan); 996 /* 997 * If the lock is not held, or the owner is not curthread, return 998 * error. The user-level wrapper can return this error or stall, 999 * depending on whether mutex is of ERRORCHECK type or not. 1000 */ 1001 if (upimutex == NULL || upimutex->upi_owner != curthread) { 1002 mutex_exit(&upibp->upib_lock); 1003 error = EPERM; 1004 goto out; 1005 } 1006 mutex_exit(&upibp->upib_lock); /* release for user memory access */ 1007 upilocked = 1; 1008 fuword16_noerr(&lp->mutex_flag, &flag); 1009 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1010 /* 1011 * transition mutex to the LOCK_NOTRECOVERABLE state. 1012 */ 1013 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 1014 flag |= LOCK_NOTRECOVERABLE; 1015 suword16_noerr(&lp->mutex_flag, flag); 1016 } 1017 set_owner_pid(lp, 0, 0); 1018 upimutex_unlock((upimutex_t *)upimutex, flag); 1019 upilocked = 0; 1020 out: 1021 no_fault(); 1022 return (error); 1023 } 1024 1025 /* 1026 * Set the owner and ownerpid fields of a user-level mutex. Note, this function 1027 * uses the suword*_noerr routines which must be called between 1028 * on_fault/no_fault. However, this routine itself does not do the 1029 * on_fault/no_fault and it is assumed all the callers will do so instead! 1030 */ 1031 static void 1032 set_owner_pid(lwp_mutex_t *lp, uintptr_t owner, pid_t pid) 1033 { 1034 union { 1035 uint64_t word64; 1036 uint32_t word32[2]; 1037 } un; 1038 1039 un.word64 = (uint64_t)owner; 1040 1041 suword32_noerr(&lp->mutex_ownerpid, pid); 1042 #if defined(_LP64) 1043 if (((uintptr_t)lp & (_LONG_LONG_ALIGNMENT - 1)) == 0) { /* aligned */ 1044 suword64_noerr(&lp->mutex_owner, un.word64); 1045 return; 1046 } 1047 #endif 1048 /* mutex is unaligned or we are running on a 32-bit kernel */ 1049 suword32_noerr((uint32_t *)&lp->mutex_owner, un.word32[0]); 1050 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, un.word32[1]); 1051 } 1052 1053 /* 1054 * Clear the contents of a user-level mutex; return the flags. 1055 * Used only by upi_dead() and lwp_mutex_cleanup(), below. 1056 */ 1057 static uint16_t 1058 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg) 1059 { 1060 uint16_t flag; 1061 1062 fuword16_noerr(&lp->mutex_flag, &flag); 1063 if ((flag & 1064 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) { 1065 flag |= lockflg; 1066 suword16_noerr(&lp->mutex_flag, flag); 1067 } 1068 set_owner_pid(lp, 0, 0); 1069 suword8_noerr(&lp->mutex_rcount, 0); 1070 1071 return (flag); 1072 } 1073 1074 /* 1075 * Mark user mutex state, corresponding to kernel upimutex, 1076 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate 1077 */ 1078 static int 1079 upi_dead(upimutex_t *upip, uint16_t lockflg) 1080 { 1081 label_t ljb; 1082 int error = 0; 1083 lwp_mutex_t *lp; 1084 1085 if (on_fault(&ljb)) { 1086 error = EFAULT; 1087 goto out; 1088 } 1089 1090 lp = upip->upi_vaddr; 1091 (void) lwp_clear_mutex(lp, lockflg); 1092 suword8_noerr(&lp->mutex_lockw, 0); 1093 out: 1094 no_fault(); 1095 return (error); 1096 } 1097 1098 /* 1099 * Unlock all upimutexes held by curthread, since curthread is dying. 1100 * For each upimutex, attempt to mark its corresponding user mutex object as 1101 * dead. 1102 */ 1103 void 1104 upimutex_cleanup() 1105 { 1106 kthread_t *t = curthread; 1107 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)? 1108 LOCK_UNMAPPED : LOCK_OWNERDEAD; 1109 struct upimutex *upip; 1110 1111 while ((upip = t->t_upimutex) != NULL) { 1112 if (upi_dead(upip, lockflg) != 0) { 1113 /* 1114 * If the user object associated with this upimutex is 1115 * unmapped, unlock upimutex with the 1116 * LOCK_NOTRECOVERABLE flag, so that all waiters are 1117 * woken up. Since user object is unmapped, it could 1118 * not be marked as dead or notrecoverable. 1119 * The waiters will now all wake up and return 1120 * ENOTRECOVERABLE, since they would find that the lock 1121 * has not been handed-off to them. 1122 * See lwp_upimutex_lock(). 1123 */ 1124 upimutex_unlock(upip, LOCK_NOTRECOVERABLE); 1125 } else { 1126 /* 1127 * The user object has been updated as dead. 1128 * Unlock the upimutex: if no waiters, upip kmem will 1129 * be freed. If there is a waiter, the lock will be 1130 * handed off. If exit() is in progress, each existing 1131 * waiter will successively get the lock, as owners 1132 * die, and each new owner will call this routine as 1133 * it dies. The last owner will free kmem, since 1134 * it will find the upimutex has no waiters. So, 1135 * eventually, the kmem is guaranteed to be freed. 1136 */ 1137 upimutex_unlock(upip, 0); 1138 } 1139 /* 1140 * Note that the call to upimutex_unlock() above will delete 1141 * upimutex from the t_upimutexes chain. And so the 1142 * while loop will eventually terminate. 1143 */ 1144 } 1145 } 1146 1147 int 1148 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp, uintptr_t owner) 1149 { 1150 kthread_t *t = curthread; 1151 klwp_t *lwp = ttolwp(t); 1152 proc_t *p = ttoproc(t); 1153 lwp_timer_t lwpt; 1154 caddr_t timedwait; 1155 int error = 0; 1156 int time_error; 1157 clock_t tim = -1; 1158 uchar_t waiters; 1159 volatile int locked = 0; 1160 volatile int watched = 0; 1161 label_t ljb; 1162 volatile uint8_t type = 0; 1163 lwpchan_t lwpchan; 1164 sleepq_head_t *sqh; 1165 uint16_t flag; 1166 int imm_timeout = 0; 1167 1168 if ((caddr_t)lp >= p->p_as->a_userlimit) 1169 return (set_errno(EFAULT)); 1170 1171 /* 1172 * Put the lwp in an orderly state for debugging, 1173 * in case we are stopped while sleeping, below. 1174 */ 1175 prstop(PR_REQUESTED, 0); 1176 1177 timedwait = (caddr_t)tsp; 1178 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 1179 lwpt.lwpt_imm_timeout) { 1180 imm_timeout = 1; 1181 timedwait = NULL; 1182 } 1183 1184 /* 1185 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock", 1186 * this micro state is really a run state. If the thread indeed blocks, 1187 * this state becomes valid. If not, the state is converted back to 1188 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just 1189 * when blocking. 1190 */ 1191 (void) new_mstate(t, LMS_USER_LOCK); 1192 if (on_fault(&ljb)) { 1193 if (locked) 1194 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1195 error = EFAULT; 1196 goto out; 1197 } 1198 /* 1199 * Force Copy-on-write if necessary and ensure that the 1200 * synchronization object resides in read/write memory. 1201 * Cause an EFAULT return now if this is not so. 1202 */ 1203 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1204 suword8_noerr(&lp->mutex_type, type); 1205 if (UPIMUTEX(type)) { 1206 no_fault(); 1207 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt); 1208 if (error == 0 || error == EOWNERDEAD || 1209 error == ELOCKUNMAPPED) { 1210 volatile int locked = error != 0; 1211 if (on_fault(&ljb)) { 1212 if (locked != 0) 1213 error = lwp_upimutex_unlock(lp, type); 1214 else 1215 error = EFAULT; 1216 goto upierr; 1217 } 1218 set_owner_pid(lp, owner, 1219 (type & USYNC_PROCESS)? p->p_pid : 0); 1220 no_fault(); 1221 } 1222 upierr: 1223 if (tsp && !time_error) /* copyout the residual time left */ 1224 error = lwp_timer_copyout(&lwpt, error); 1225 if (error) 1226 return (set_errno(error)); 1227 return (0); 1228 } 1229 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1230 &lwpchan, LWPCHAN_MPPOOL)) { 1231 error = EFAULT; 1232 goto out; 1233 } 1234 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1235 locked = 1; 1236 if (type & LOCK_ROBUST) { 1237 fuword16_noerr(&lp->mutex_flag, &flag); 1238 if (flag & LOCK_NOTRECOVERABLE) { 1239 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1240 error = ENOTRECOVERABLE; 1241 goto out; 1242 } 1243 } 1244 fuword8_noerr(&lp->mutex_waiters, &waiters); 1245 suword8_noerr(&lp->mutex_waiters, 1); 1246 1247 /* 1248 * If watchpoints are set, they need to be restored, since 1249 * atomic accesses of memory such as the call to ulock_try() 1250 * below cannot be watched. 1251 */ 1252 1253 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1254 1255 while (!ulock_try(&lp->mutex_lockw)) { 1256 if (time_error) { 1257 /* 1258 * The SUSV3 Posix spec is very clear that we 1259 * should get no error from validating the 1260 * timer until we would actually sleep. 1261 */ 1262 error = time_error; 1263 break; 1264 } 1265 1266 if (watched) { 1267 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1268 watched = 0; 1269 } 1270 1271 if (timedwait) { 1272 /* 1273 * If we successfully queue the timeout, 1274 * then don't drop t_delay_lock until 1275 * we are on the sleep queue (below). 1276 */ 1277 mutex_enter(&t->t_delay_lock); 1278 if (lwp_timer_enqueue(&lwpt) != 0) { 1279 mutex_exit(&t->t_delay_lock); 1280 imm_timeout = 1; 1281 timedwait = NULL; 1282 } 1283 } 1284 lwp_block(&lwpchan); 1285 /* 1286 * Nothing should happen to cause the lwp to go to 1287 * sleep again until after it returns from swtch(). 1288 */ 1289 if (timedwait) 1290 mutex_exit(&t->t_delay_lock); 1291 locked = 0; 1292 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1293 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 1294 setrun(t); 1295 swtch(); 1296 t->t_flag &= ~T_WAKEABLE; 1297 if (timedwait) 1298 tim = lwp_timer_dequeue(&lwpt); 1299 setallwatch(); 1300 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 1301 error = EINTR; 1302 else if (imm_timeout || (timedwait && tim == -1)) 1303 error = ETIME; 1304 if (error) { 1305 lwp->lwp_asleep = 0; 1306 lwp->lwp_sysabort = 0; 1307 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1308 S_WRITE); 1309 1310 /* 1311 * Need to re-compute waiters bit. The waiters field in 1312 * the lock is not reliable. Either of two things could 1313 * have occurred: no lwp may have called lwp_release() 1314 * for me but I have woken up due to a signal or 1315 * timeout. In this case, the waiter bit is incorrect 1316 * since it is still set to 1, set above. 1317 * OR an lwp_release() did occur for some other lwp on 1318 * the same lwpchan. In this case, the waiter bit is 1319 * correct. But which event occurred, one can't tell. 1320 * So, recompute. 1321 */ 1322 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1323 locked = 1; 1324 sqh = lwpsqhash(&lwpchan); 1325 disp_lock_enter(&sqh->sq_lock); 1326 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan); 1327 disp_lock_exit(&sqh->sq_lock); 1328 break; 1329 } 1330 lwp->lwp_asleep = 0; 1331 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), 1332 S_WRITE); 1333 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1334 locked = 1; 1335 fuword8_noerr(&lp->mutex_waiters, &waiters); 1336 suword8_noerr(&lp->mutex_waiters, 1); 1337 if (type & LOCK_ROBUST) { 1338 fuword16_noerr(&lp->mutex_flag, &flag); 1339 if (flag & LOCK_NOTRECOVERABLE) { 1340 error = ENOTRECOVERABLE; 1341 break; 1342 } 1343 } 1344 } 1345 1346 if (t->t_mstate == LMS_USER_LOCK) 1347 (void) new_mstate(t, LMS_SYSTEM); 1348 1349 if (error == 0) { 1350 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0); 1351 if (type & LOCK_ROBUST) { 1352 fuword16_noerr(&lp->mutex_flag, &flag); 1353 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1354 if (flag & LOCK_OWNERDEAD) 1355 error = EOWNERDEAD; 1356 else if (type & USYNC_PROCESS_ROBUST) 1357 error = ELOCKUNMAPPED; 1358 else 1359 error = EOWNERDEAD; 1360 } 1361 } 1362 } 1363 suword8_noerr(&lp->mutex_waiters, waiters); 1364 locked = 0; 1365 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1366 out: 1367 no_fault(); 1368 if (watched) 1369 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1370 if (tsp && !time_error) /* copyout the residual time left */ 1371 error = lwp_timer_copyout(&lwpt, error); 1372 if (error) 1373 return (set_errno(error)); 1374 return (0); 1375 } 1376 1377 static int 1378 iswanted(kthread_t *t, lwpchan_t *lwpchan) 1379 { 1380 /* 1381 * The caller holds the dispatcher lock on the sleep queue. 1382 */ 1383 while (t != NULL) { 1384 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1385 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1386 return (1); 1387 t = t->t_link; 1388 } 1389 return (0); 1390 } 1391 1392 /* 1393 * Return the highest priority thread sleeping on this lwpchan. 1394 */ 1395 static kthread_t * 1396 lwp_queue_waiter(lwpchan_t *lwpchan) 1397 { 1398 sleepq_head_t *sqh; 1399 kthread_t *tp; 1400 1401 sqh = lwpsqhash(lwpchan); 1402 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1403 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) { 1404 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1405 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) 1406 break; 1407 } 1408 disp_lock_exit(&sqh->sq_lock); 1409 return (tp); 1410 } 1411 1412 static int 1413 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type) 1414 { 1415 sleepq_head_t *sqh; 1416 kthread_t *tp; 1417 kthread_t **tpp; 1418 1419 sqh = lwpsqhash(lwpchan); 1420 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */ 1421 tpp = &sqh->sq_queue.sq_first; 1422 while ((tp = *tpp) != NULL) { 1423 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1424 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1425 /* 1426 * The following is typically false. It could be true 1427 * only if lwp_release() is called from 1428 * lwp_mutex_wakeup() after reading the waiters field 1429 * from memory in which the lwp lock used to be, but has 1430 * since been re-used to hold a lwp cv or lwp semaphore. 1431 * The thread "tp" found to match the lwp lock's wchan 1432 * is actually sleeping for the cv or semaphore which 1433 * now has the same wchan. In this case, lwp_release() 1434 * should return failure. 1435 */ 1436 if (sync_type != (tp->t_flag & T_WAITCVSEM)) { 1437 ASSERT(sync_type == 0); 1438 /* 1439 * assert that this can happen only for mutexes 1440 * i.e. sync_type == 0, for correctly written 1441 * user programs. 1442 */ 1443 disp_lock_exit(&sqh->sq_lock); 1444 return (0); 1445 } 1446 *waiters = iswanted(tp->t_link, lwpchan); 1447 sleepq_unlink(tpp, tp); 1448 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1449 tp->t_wchan0 = NULL; 1450 tp->t_wchan = NULL; 1451 tp->t_sobj_ops = NULL; 1452 tp->t_release = 1; 1453 THREAD_TRANSITION(tp); /* drops sleepq lock */ 1454 CL_WAKEUP(tp); 1455 thread_unlock(tp); /* drop run queue lock */ 1456 return (1); 1457 } 1458 tpp = &tp->t_link; 1459 } 1460 *waiters = 0; 1461 disp_lock_exit(&sqh->sq_lock); 1462 return (0); 1463 } 1464 1465 static void 1466 lwp_release_all(lwpchan_t *lwpchan) 1467 { 1468 sleepq_head_t *sqh; 1469 kthread_t *tp; 1470 kthread_t **tpp; 1471 1472 sqh = lwpsqhash(lwpchan); 1473 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */ 1474 tpp = &sqh->sq_queue.sq_first; 1475 while ((tp = *tpp) != NULL) { 1476 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 1477 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 1478 sleepq_unlink(tpp, tp); 1479 DTRACE_SCHED1(wakeup, kthread_t *, tp); 1480 tp->t_wchan0 = NULL; 1481 tp->t_wchan = NULL; 1482 tp->t_sobj_ops = NULL; 1483 CL_WAKEUP(tp); 1484 thread_unlock_high(tp); /* release run queue lock */ 1485 } else { 1486 tpp = &tp->t_link; 1487 } 1488 } 1489 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */ 1490 } 1491 1492 /* 1493 * unblock a lwp that is trying to acquire this mutex. the blocked 1494 * lwp resumes and retries to acquire the lock. 1495 */ 1496 int 1497 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all) 1498 { 1499 proc_t *p = ttoproc(curthread); 1500 lwpchan_t lwpchan; 1501 uchar_t waiters; 1502 volatile int locked = 0; 1503 volatile int watched = 0; 1504 volatile uint8_t type = 0; 1505 label_t ljb; 1506 int error = 0; 1507 1508 if ((caddr_t)lp >= p->p_as->a_userlimit) 1509 return (set_errno(EFAULT)); 1510 1511 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1512 1513 if (on_fault(&ljb)) { 1514 if (locked) 1515 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1516 error = EFAULT; 1517 goto out; 1518 } 1519 /* 1520 * Force Copy-on-write if necessary and ensure that the 1521 * synchronization object resides in read/write memory. 1522 * Cause an EFAULT return now if this is not so. 1523 */ 1524 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 1525 suword8_noerr(&lp->mutex_type, type); 1526 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 1527 &lwpchan, LWPCHAN_MPPOOL)) { 1528 error = EFAULT; 1529 goto out; 1530 } 1531 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 1532 locked = 1; 1533 /* 1534 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 1535 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 1536 * may fail. If it fails, do not write into the waiter bit. 1537 * The call to lwp_release() might fail due to one of three reasons: 1538 * 1539 * 1. due to the thread which set the waiter bit not actually 1540 * sleeping since it got the lock on the re-try. The waiter 1541 * bit will then be correctly updated by that thread. This 1542 * window may be closed by reading the wait bit again here 1543 * and not calling lwp_release() at all if it is zero. 1544 * 2. the thread which set the waiter bit and went to sleep 1545 * was woken up by a signal. This time, the waiter recomputes 1546 * the wait bit in the return with EINTR code. 1547 * 3. the waiter bit read by lwp_mutex_wakeup() was in 1548 * memory that has been re-used after the lock was dropped. 1549 * In this case, writing into the waiter bit would cause data 1550 * corruption. 1551 */ 1552 if (release_all) 1553 lwp_release_all(&lwpchan); 1554 else if (lwp_release(&lwpchan, &waiters, 0)) 1555 suword8_noerr(&lp->mutex_waiters, waiters); 1556 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 1557 out: 1558 no_fault(); 1559 if (watched) 1560 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 1561 if (error) 1562 return (set_errno(error)); 1563 return (0); 1564 } 1565 1566 /* 1567 * lwp_cond_wait() has four arguments, a pointer to a condition variable, 1568 * a pointer to a mutex, a pointer to a timespec for a timed wait and 1569 * a flag telling the kernel whether or not to honor the kernel/user 1570 * schedctl parking protocol (see schedctl_is_park() in schedctl.c). 1571 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an 1572 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL, 1573 * it is used an an in/out parameter. On entry, it contains the relative 1574 * time until timeout. On exit, we copyout the residual time left to it. 1575 */ 1576 int 1577 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park) 1578 { 1579 kthread_t *t = curthread; 1580 klwp_t *lwp = ttolwp(t); 1581 proc_t *p = ttoproc(t); 1582 lwp_timer_t lwpt; 1583 lwpchan_t cv_lwpchan; 1584 lwpchan_t m_lwpchan; 1585 caddr_t timedwait; 1586 volatile uint16_t type = 0; 1587 volatile uint8_t mtype = 0; 1588 uchar_t waiters; 1589 volatile int error; 1590 clock_t tim = -1; 1591 volatile int locked = 0; 1592 volatile int m_locked = 0; 1593 volatile int cvwatched = 0; 1594 volatile int mpwatched = 0; 1595 label_t ljb; 1596 volatile int no_lwpchan = 1; 1597 int imm_timeout = 0; 1598 int imm_unpark = 0; 1599 1600 if ((caddr_t)cv >= p->p_as->a_userlimit || 1601 (caddr_t)mp >= p->p_as->a_userlimit) 1602 return (set_errno(EFAULT)); 1603 1604 /* 1605 * Put the lwp in an orderly state for debugging, 1606 * in case we are stopped while sleeping, below. 1607 */ 1608 prstop(PR_REQUESTED, 0); 1609 1610 timedwait = (caddr_t)tsp; 1611 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0) 1612 return (set_errno(error)); 1613 if (lwpt.lwpt_imm_timeout) { 1614 imm_timeout = 1; 1615 timedwait = NULL; 1616 } 1617 1618 (void) new_mstate(t, LMS_USER_LOCK); 1619 1620 if (on_fault(&ljb)) { 1621 if (no_lwpchan) { 1622 error = EFAULT; 1623 goto out; 1624 } 1625 if (m_locked) { 1626 m_locked = 0; 1627 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1628 } 1629 if (locked) { 1630 locked = 0; 1631 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1632 } 1633 /* 1634 * set up another on_fault() for a possible fault 1635 * on the user lock accessed at "efault" 1636 */ 1637 if (on_fault(&ljb)) { 1638 if (m_locked) { 1639 m_locked = 0; 1640 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1641 } 1642 goto out; 1643 } 1644 error = EFAULT; 1645 goto efault; 1646 } 1647 1648 /* 1649 * Force Copy-on-write if necessary and ensure that the 1650 * synchronization object resides in read/write memory. 1651 * Cause an EFAULT return now if this is not so. 1652 */ 1653 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 1654 suword8_noerr(&mp->mutex_type, mtype); 1655 if (UPIMUTEX(mtype) == 0) { 1656 /* convert user level mutex, "mp", to a unique lwpchan */ 1657 /* check if mtype is ok to use below, instead of type from cv */ 1658 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 1659 &m_lwpchan, LWPCHAN_MPPOOL)) { 1660 error = EFAULT; 1661 goto out; 1662 } 1663 } 1664 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1665 suword16_noerr(&cv->cond_type, type); 1666 /* convert user level condition variable, "cv", to a unique lwpchan */ 1667 if (!get_lwpchan(p->p_as, (caddr_t)cv, type, 1668 &cv_lwpchan, LWPCHAN_CVPOOL)) { 1669 error = EFAULT; 1670 goto out; 1671 } 1672 no_lwpchan = 0; 1673 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1674 if (UPIMUTEX(mtype) == 0) 1675 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), 1676 S_WRITE); 1677 1678 /* 1679 * lwpchan_lock ensures that the calling lwp is put to sleep atomically 1680 * with respect to a possible wakeup which is a result of either 1681 * an lwp_cond_signal() or an lwp_cond_broadcast(). 1682 * 1683 * What's misleading, is that the lwp is put to sleep after the 1684 * condition variable's mutex is released. This is OK as long as 1685 * the release operation is also done while holding lwpchan_lock. 1686 * The lwp is then put to sleep when the possibility of pagefaulting 1687 * or sleeping is completely eliminated. 1688 */ 1689 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL); 1690 locked = 1; 1691 if (UPIMUTEX(mtype) == 0) { 1692 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1693 m_locked = 1; 1694 suword8_noerr(&cv->cond_waiters_kernel, 1); 1695 /* 1696 * unlock the condition variable's mutex. (pagefaults are 1697 * possible here.) 1698 */ 1699 set_owner_pid(mp, 0, 0); 1700 ulock_clear(&mp->mutex_lockw); 1701 fuword8_noerr(&mp->mutex_waiters, &waiters); 1702 if (waiters != 0) { 1703 /* 1704 * Given the locking of lwpchan_lock around the release 1705 * of the mutex and checking for waiters, the following 1706 * call to lwp_release() can fail ONLY if the lock 1707 * acquirer is interrupted after setting the waiter bit, 1708 * calling lwp_block() and releasing lwpchan_lock. 1709 * In this case, it could get pulled off the lwp sleep 1710 * q (via setrun()) before the following call to 1711 * lwp_release() occurs. In this case, the lock 1712 * requestor will update the waiter bit correctly by 1713 * re-evaluating it. 1714 */ 1715 if (lwp_release(&m_lwpchan, &waiters, 0)) 1716 suword8_noerr(&mp->mutex_waiters, waiters); 1717 } 1718 m_locked = 0; 1719 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1720 } else { 1721 suword8_noerr(&cv->cond_waiters_kernel, 1); 1722 error = lwp_upimutex_unlock(mp, mtype); 1723 if (error) { /* if the upimutex unlock failed */ 1724 locked = 0; 1725 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1726 goto out; 1727 } 1728 } 1729 no_fault(); 1730 1731 if (mpwatched) { 1732 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1733 mpwatched = 0; 1734 } 1735 if (cvwatched) { 1736 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1737 cvwatched = 0; 1738 } 1739 1740 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 1741 /* 1742 * We received a signal at user-level before calling here 1743 * or another thread wants us to return immediately 1744 * with EINTR. See lwp_unpark(). 1745 */ 1746 imm_unpark = 1; 1747 t->t_unpark = 0; 1748 timedwait = NULL; 1749 } else if (timedwait) { 1750 /* 1751 * If we successfully queue the timeout, 1752 * then don't drop t_delay_lock until 1753 * we are on the sleep queue (below). 1754 */ 1755 mutex_enter(&t->t_delay_lock); 1756 if (lwp_timer_enqueue(&lwpt) != 0) { 1757 mutex_exit(&t->t_delay_lock); 1758 imm_timeout = 1; 1759 timedwait = NULL; 1760 } 1761 } 1762 t->t_flag |= T_WAITCVSEM; 1763 lwp_block(&cv_lwpchan); 1764 /* 1765 * Nothing should happen to cause the lwp to go to sleep 1766 * until after it returns from swtch(). 1767 */ 1768 if (timedwait) 1769 mutex_exit(&t->t_delay_lock); 1770 locked = 0; 1771 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL); 1772 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 1773 (imm_timeout | imm_unpark)) 1774 setrun(t); 1775 swtch(); 1776 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 1777 if (timedwait) 1778 tim = lwp_timer_dequeue(&lwpt); 1779 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 1780 MUSTRETURN(p, t) || imm_unpark) 1781 error = EINTR; 1782 else if (imm_timeout || (timedwait && tim == -1)) 1783 error = ETIME; 1784 lwp->lwp_asleep = 0; 1785 lwp->lwp_sysabort = 0; 1786 setallwatch(); 1787 1788 if (t->t_mstate == LMS_USER_LOCK) 1789 (void) new_mstate(t, LMS_SYSTEM); 1790 1791 if (tsp && check_park) /* copyout the residual time left */ 1792 error = lwp_timer_copyout(&lwpt, error); 1793 1794 /* the mutex is reacquired by the caller on return to user level */ 1795 if (error) { 1796 /* 1797 * If we were concurrently lwp_cond_signal()d and we 1798 * received a UNIX signal or got a timeout, then perform 1799 * another lwp_cond_signal() to avoid consuming the wakeup. 1800 */ 1801 if (t->t_release) 1802 (void) lwp_cond_signal(cv); 1803 return (set_errno(error)); 1804 } 1805 return (0); 1806 1807 efault: 1808 /* 1809 * make sure that the user level lock is dropped before 1810 * returning to caller, since the caller always re-acquires it. 1811 */ 1812 if (UPIMUTEX(mtype) == 0) { 1813 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL); 1814 m_locked = 1; 1815 set_owner_pid(mp, 0, 0); 1816 ulock_clear(&mp->mutex_lockw); 1817 fuword8_noerr(&mp->mutex_waiters, &waiters); 1818 if (waiters != 0) { 1819 /* 1820 * See comment above on lock clearing and lwp_release() 1821 * success/failure. 1822 */ 1823 if (lwp_release(&m_lwpchan, &waiters, 0)) 1824 suword8_noerr(&mp->mutex_waiters, waiters); 1825 } 1826 m_locked = 0; 1827 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL); 1828 } else { 1829 (void) lwp_upimutex_unlock(mp, mtype); 1830 } 1831 out: 1832 no_fault(); 1833 if (mpwatched) 1834 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 1835 if (cvwatched) 1836 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1837 if (t->t_mstate == LMS_USER_LOCK) 1838 (void) new_mstate(t, LMS_SYSTEM); 1839 return (set_errno(error)); 1840 } 1841 1842 /* 1843 * wakeup one lwp that's blocked on this condition variable. 1844 */ 1845 int 1846 lwp_cond_signal(lwp_cond_t *cv) 1847 { 1848 proc_t *p = ttoproc(curthread); 1849 lwpchan_t lwpchan; 1850 uchar_t waiters; 1851 volatile uint16_t type = 0; 1852 volatile int locked = 0; 1853 volatile int watched = 0; 1854 label_t ljb; 1855 int error = 0; 1856 1857 if ((caddr_t)cv >= p->p_as->a_userlimit) 1858 return (set_errno(EFAULT)); 1859 1860 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1861 1862 if (on_fault(&ljb)) { 1863 if (locked) 1864 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1865 error = EFAULT; 1866 goto out; 1867 } 1868 /* 1869 * Force Copy-on-write if necessary and ensure that the 1870 * synchronization object resides in read/write memory. 1871 * Cause an EFAULT return now if this is not so. 1872 */ 1873 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1874 suword16_noerr(&cv->cond_type, type); 1875 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1876 &lwpchan, LWPCHAN_CVPOOL)) { 1877 error = EFAULT; 1878 goto out; 1879 } 1880 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1881 locked = 1; 1882 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1883 if (waiters != 0) { 1884 /* 1885 * The following call to lwp_release() might fail but it is 1886 * OK to write into the waiters bit below, since the memory 1887 * could not have been re-used or unmapped (for correctly 1888 * written user programs) as in the case of lwp_mutex_wakeup(). 1889 * For an incorrect program, we should not care about data 1890 * corruption since this is just one instance of other places 1891 * where corruption can occur for such a program. Of course 1892 * if the memory is unmapped, normal fault recovery occurs. 1893 */ 1894 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 1895 suword8_noerr(&cv->cond_waiters_kernel, waiters); 1896 } 1897 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1898 out: 1899 no_fault(); 1900 if (watched) 1901 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1902 if (error) 1903 return (set_errno(error)); 1904 return (0); 1905 } 1906 1907 /* 1908 * wakeup every lwp that's blocked on this condition variable. 1909 */ 1910 int 1911 lwp_cond_broadcast(lwp_cond_t *cv) 1912 { 1913 proc_t *p = ttoproc(curthread); 1914 lwpchan_t lwpchan; 1915 volatile uint16_t type = 0; 1916 volatile int locked = 0; 1917 volatile int watched = 0; 1918 label_t ljb; 1919 uchar_t waiters; 1920 int error = 0; 1921 1922 if ((caddr_t)cv >= p->p_as->a_userlimit) 1923 return (set_errno(EFAULT)); 1924 1925 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1926 1927 if (on_fault(&ljb)) { 1928 if (locked) 1929 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1930 error = EFAULT; 1931 goto out; 1932 } 1933 /* 1934 * Force Copy-on-write if necessary and ensure that the 1935 * synchronization object resides in read/write memory. 1936 * Cause an EFAULT return now if this is not so. 1937 */ 1938 fuword16_noerr(&cv->cond_type, (uint16_t *)&type); 1939 suword16_noerr(&cv->cond_type, type); 1940 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type, 1941 &lwpchan, LWPCHAN_CVPOOL)) { 1942 error = EFAULT; 1943 goto out; 1944 } 1945 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 1946 locked = 1; 1947 fuword8_noerr(&cv->cond_waiters_kernel, &waiters); 1948 if (waiters != 0) { 1949 lwp_release_all(&lwpchan); 1950 suword8_noerr(&cv->cond_waiters_kernel, 0); 1951 } 1952 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1953 out: 1954 no_fault(); 1955 if (watched) 1956 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE); 1957 if (error) 1958 return (set_errno(error)); 1959 return (0); 1960 } 1961 1962 int 1963 lwp_sema_trywait(lwp_sema_t *sp) 1964 { 1965 kthread_t *t = curthread; 1966 proc_t *p = ttoproc(t); 1967 label_t ljb; 1968 volatile int locked = 0; 1969 volatile int watched = 0; 1970 volatile uint16_t type = 0; 1971 int count; 1972 lwpchan_t lwpchan; 1973 uchar_t waiters; 1974 int error = 0; 1975 1976 if ((caddr_t)sp >= p->p_as->a_userlimit) 1977 return (set_errno(EFAULT)); 1978 1979 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 1980 1981 if (on_fault(&ljb)) { 1982 if (locked) 1983 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 1984 error = EFAULT; 1985 goto out; 1986 } 1987 /* 1988 * Force Copy-on-write if necessary and ensure that the 1989 * synchronization object resides in read/write memory. 1990 * Cause an EFAULT return now if this is not so. 1991 */ 1992 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 1993 suword16_noerr((void *)&sp->sema_type, type); 1994 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 1995 &lwpchan, LWPCHAN_CVPOOL)) { 1996 error = EFAULT; 1997 goto out; 1998 } 1999 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2000 locked = 1; 2001 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2002 if (count == 0) 2003 error = EBUSY; 2004 else 2005 suword32_noerr((void *)&sp->sema_count, --count); 2006 if (count != 0) { 2007 fuword8_noerr(&sp->sema_waiters, &waiters); 2008 if (waiters != 0) { 2009 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2010 suword8_noerr(&sp->sema_waiters, waiters); 2011 } 2012 } 2013 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2014 out: 2015 no_fault(); 2016 if (watched) 2017 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2018 if (error) 2019 return (set_errno(error)); 2020 return (0); 2021 } 2022 2023 /* 2024 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument. 2025 */ 2026 int 2027 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park) 2028 { 2029 kthread_t *t = curthread; 2030 klwp_t *lwp = ttolwp(t); 2031 proc_t *p = ttoproc(t); 2032 lwp_timer_t lwpt; 2033 caddr_t timedwait; 2034 clock_t tim = -1; 2035 label_t ljb; 2036 volatile int locked = 0; 2037 volatile int watched = 0; 2038 volatile uint16_t type = 0; 2039 int count; 2040 lwpchan_t lwpchan; 2041 uchar_t waiters; 2042 int error = 0; 2043 int time_error; 2044 int imm_timeout = 0; 2045 int imm_unpark = 0; 2046 2047 if ((caddr_t)sp >= p->p_as->a_userlimit) 2048 return (set_errno(EFAULT)); 2049 2050 /* 2051 * Put the lwp in an orderly state for debugging, 2052 * in case we are stopped while sleeping, below. 2053 */ 2054 prstop(PR_REQUESTED, 0); 2055 2056 timedwait = (caddr_t)tsp; 2057 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2058 lwpt.lwpt_imm_timeout) { 2059 imm_timeout = 1; 2060 timedwait = NULL; 2061 } 2062 2063 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2064 2065 if (on_fault(&ljb)) { 2066 if (locked) 2067 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2068 error = EFAULT; 2069 goto out; 2070 } 2071 /* 2072 * Force Copy-on-write if necessary and ensure that the 2073 * synchronization object resides in read/write memory. 2074 * Cause an EFAULT return now if this is not so. 2075 */ 2076 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type); 2077 suword16_noerr((void *)&sp->sema_type, type); 2078 if (!get_lwpchan(p->p_as, (caddr_t)sp, type, 2079 &lwpchan, LWPCHAN_CVPOOL)) { 2080 error = EFAULT; 2081 goto out; 2082 } 2083 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2084 locked = 1; 2085 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2086 while (error == 0 && count == 0) { 2087 if (time_error) { 2088 /* 2089 * The SUSV3 Posix spec is very clear that we 2090 * should get no error from validating the 2091 * timer until we would actually sleep. 2092 */ 2093 error = time_error; 2094 break; 2095 } 2096 suword8_noerr(&sp->sema_waiters, 1); 2097 if (watched) 2098 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2099 if (check_park && (!schedctl_is_park() || t->t_unpark)) { 2100 /* 2101 * We received a signal at user-level before calling 2102 * here or another thread wants us to return 2103 * immediately with EINTR. See lwp_unpark(). 2104 */ 2105 imm_unpark = 1; 2106 t->t_unpark = 0; 2107 timedwait = NULL; 2108 } else if (timedwait) { 2109 /* 2110 * If we successfully queue the timeout, 2111 * then don't drop t_delay_lock until 2112 * we are on the sleep queue (below). 2113 */ 2114 mutex_enter(&t->t_delay_lock); 2115 if (lwp_timer_enqueue(&lwpt) != 0) { 2116 mutex_exit(&t->t_delay_lock); 2117 imm_timeout = 1; 2118 timedwait = NULL; 2119 } 2120 } 2121 t->t_flag |= T_WAITCVSEM; 2122 lwp_block(&lwpchan); 2123 /* 2124 * Nothing should happen to cause the lwp to sleep 2125 * again until after it returns from swtch(). 2126 */ 2127 if (timedwait) 2128 mutex_exit(&t->t_delay_lock); 2129 locked = 0; 2130 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2131 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || 2132 (imm_timeout | imm_unpark)) 2133 setrun(t); 2134 swtch(); 2135 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2136 if (timedwait) 2137 tim = lwp_timer_dequeue(&lwpt); 2138 setallwatch(); 2139 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || 2140 MUSTRETURN(p, t) || imm_unpark) 2141 error = EINTR; 2142 else if (imm_timeout || (timedwait && tim == -1)) 2143 error = ETIME; 2144 lwp->lwp_asleep = 0; 2145 lwp->lwp_sysabort = 0; 2146 watched = watch_disable_addr((caddr_t)sp, 2147 sizeof (*sp), S_WRITE); 2148 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2149 locked = 1; 2150 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count); 2151 } 2152 if (error == 0) 2153 suword32_noerr((void *)&sp->sema_count, --count); 2154 if (count != 0) { 2155 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2156 suword8_noerr(&sp->sema_waiters, waiters); 2157 } 2158 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2159 out: 2160 no_fault(); 2161 if (watched) 2162 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2163 if (tsp && check_park && !time_error) 2164 error = lwp_timer_copyout(&lwpt, error); 2165 if (error) 2166 return (set_errno(error)); 2167 return (0); 2168 } 2169 2170 int 2171 lwp_sema_post(lwp_sema_t *sp) 2172 { 2173 proc_t *p = ttoproc(curthread); 2174 label_t ljb; 2175 volatile int locked = 0; 2176 volatile int watched = 0; 2177 volatile uint16_t type = 0; 2178 int count; 2179 lwpchan_t lwpchan; 2180 uchar_t waiters; 2181 int error = 0; 2182 2183 if ((caddr_t)sp >= p->p_as->a_userlimit) 2184 return (set_errno(EFAULT)); 2185 2186 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2187 2188 if (on_fault(&ljb)) { 2189 if (locked) 2190 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2191 error = EFAULT; 2192 goto out; 2193 } 2194 /* 2195 * Force Copy-on-write if necessary and ensure that the 2196 * synchronization object resides in read/write memory. 2197 * Cause an EFAULT return now if this is not so. 2198 */ 2199 fuword16_noerr(&sp->sema_type, (uint16_t *)&type); 2200 suword16_noerr(&sp->sema_type, type); 2201 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type, 2202 &lwpchan, LWPCHAN_CVPOOL)) { 2203 error = EFAULT; 2204 goto out; 2205 } 2206 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2207 locked = 1; 2208 fuword32_noerr(&sp->sema_count, (uint32_t *)&count); 2209 if (count == _SEM_VALUE_MAX) 2210 error = EOVERFLOW; 2211 else 2212 suword32_noerr(&sp->sema_count, ++count); 2213 if (count == 1) { 2214 fuword8_noerr(&sp->sema_waiters, &waiters); 2215 if (waiters) { 2216 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM); 2217 suword8_noerr(&sp->sema_waiters, waiters); 2218 } 2219 } 2220 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2221 out: 2222 no_fault(); 2223 if (watched) 2224 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE); 2225 if (error) 2226 return (set_errno(error)); 2227 return (0); 2228 } 2229 2230 #define TRW_WANT_WRITE 0x1 2231 #define TRW_LOCK_GRANTED 0x2 2232 2233 #define READ_LOCK 0 2234 #define WRITE_LOCK 1 2235 #define TRY_FLAG 0x10 2236 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 2237 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 2238 2239 /* 2240 * Release one writer or one or more readers. Compute the rwstate word to 2241 * reflect the new state of the queue. For a safe hand-off we copy the new 2242 * rwstate value back to userland before we wake any of the new lock holders. 2243 * 2244 * Note that sleepq_insert() implements a prioritized FIFO (with writers 2245 * being given precedence over readers of the same priority). 2246 * 2247 * If the first thread is a reader we scan the queue releasing all readers 2248 * until we hit a writer or the end of the queue. If the first thread is a 2249 * writer we still need to check for another writer. 2250 */ 2251 void 2252 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw) 2253 { 2254 sleepq_head_t *sqh; 2255 kthread_t *tp; 2256 kthread_t **tpp; 2257 kthread_t *tpnext; 2258 kthread_t *wakelist = NULL; 2259 uint32_t rwstate = 0; 2260 int wcount = 0; 2261 int rcount = 0; 2262 2263 sqh = lwpsqhash(lwpchan); 2264 disp_lock_enter(&sqh->sq_lock); 2265 tpp = &sqh->sq_queue.sq_first; 2266 while ((tp = *tpp) != NULL) { 2267 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 && 2268 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) { 2269 if (tp->t_writer & TRW_WANT_WRITE) { 2270 if ((wcount++ == 0) && (rcount == 0)) { 2271 rwstate |= URW_WRITE_LOCKED; 2272 2273 /* Just one writer to wake. */ 2274 sleepq_unlink(tpp, tp); 2275 wakelist = tp; 2276 2277 /* tpp already set for next thread. */ 2278 continue; 2279 } else { 2280 rwstate |= URW_HAS_WAITERS; 2281 /* We need look no further. */ 2282 break; 2283 } 2284 } else { 2285 rcount++; 2286 if (wcount == 0) { 2287 rwstate++; 2288 2289 /* Add reader to wake list. */ 2290 sleepq_unlink(tpp, tp); 2291 tp->t_link = wakelist; 2292 wakelist = tp; 2293 2294 /* tpp already set for next thread. */ 2295 continue; 2296 } else { 2297 rwstate |= URW_HAS_WAITERS; 2298 /* We need look no further. */ 2299 break; 2300 } 2301 } 2302 } 2303 tpp = &tp->t_link; 2304 } 2305 2306 /* Copy the new rwstate back to userland. */ 2307 suword32_noerr(&rw->rwlock_readers, rwstate); 2308 2309 /* Wake the new lock holder(s) up. */ 2310 tp = wakelist; 2311 while (tp != NULL) { 2312 DTRACE_SCHED1(wakeup, kthread_t *, tp); 2313 tp->t_wchan0 = NULL; 2314 tp->t_wchan = NULL; 2315 tp->t_sobj_ops = NULL; 2316 tp->t_writer |= TRW_LOCK_GRANTED; 2317 tpnext = tp->t_link; 2318 tp->t_link = NULL; 2319 CL_WAKEUP(tp); 2320 thread_unlock_high(tp); 2321 tp = tpnext; 2322 } 2323 2324 disp_lock_exit(&sqh->sq_lock); 2325 } 2326 2327 /* 2328 * We enter here holding the user-level mutex, which we must release before 2329 * returning or blocking. Based on lwp_cond_wait(). 2330 */ 2331 static int 2332 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr) 2333 { 2334 lwp_mutex_t *mp = NULL; 2335 kthread_t *t = curthread; 2336 kthread_t *tp; 2337 klwp_t *lwp = ttolwp(t); 2338 proc_t *p = ttoproc(t); 2339 lwp_timer_t lwpt; 2340 lwpchan_t lwpchan; 2341 lwpchan_t mlwpchan; 2342 caddr_t timedwait; 2343 volatile uint16_t type = 0; 2344 volatile uint8_t mtype = 0; 2345 uchar_t mwaiters; 2346 volatile int error = 0; 2347 int time_error; 2348 clock_t tim = -1; 2349 volatile int locked = 0; 2350 volatile int mlocked = 0; 2351 volatile int watched = 0; 2352 volatile int mwatched = 0; 2353 label_t ljb; 2354 volatile int no_lwpchan = 1; 2355 int imm_timeout = 0; 2356 int try_flag; 2357 uint32_t rwstate; 2358 int acquired = 0; 2359 2360 /* We only check rw because the mutex is included in it. */ 2361 if ((caddr_t)rw >= p->p_as->a_userlimit) 2362 return (set_errno(EFAULT)); 2363 2364 /* 2365 * Put the lwp in an orderly state for debugging, 2366 * in case we are stopped while sleeping, below. 2367 */ 2368 prstop(PR_REQUESTED, 0); 2369 2370 /* We must only report this error if we are about to sleep (later). */ 2371 timedwait = (caddr_t)tsp; 2372 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 && 2373 lwpt.lwpt_imm_timeout) { 2374 imm_timeout = 1; 2375 timedwait = NULL; 2376 } 2377 2378 (void) new_mstate(t, LMS_USER_LOCK); 2379 2380 if (on_fault(&ljb)) { 2381 if (no_lwpchan) { 2382 error = EFAULT; 2383 goto out_nodrop; 2384 } 2385 if (mlocked) { 2386 mlocked = 0; 2387 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2388 } 2389 if (locked) { 2390 locked = 0; 2391 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2392 } 2393 /* 2394 * Set up another on_fault() for a possible fault 2395 * on the user lock accessed at "out_drop". 2396 */ 2397 if (on_fault(&ljb)) { 2398 if (mlocked) { 2399 mlocked = 0; 2400 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2401 } 2402 error = EFAULT; 2403 goto out_nodrop; 2404 } 2405 error = EFAULT; 2406 goto out_nodrop; 2407 } 2408 2409 /* Process rd_wr (including sanity check). */ 2410 try_flag = (rd_wr & TRY_FLAG); 2411 rd_wr &= ~TRY_FLAG; 2412 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) { 2413 error = EINVAL; 2414 goto out_nodrop; 2415 } 2416 2417 /* 2418 * Force Copy-on-write if necessary and ensure that the 2419 * synchronization object resides in read/write memory. 2420 * Cause an EFAULT return now if this is not so. 2421 */ 2422 mp = &rw->mutex; 2423 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype); 2424 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2425 suword8_noerr(&mp->mutex_type, mtype); 2426 suword16_noerr(&rw->rwlock_type, type); 2427 2428 /* We can only continue for simple USYNC_PROCESS locks. */ 2429 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) { 2430 error = EINVAL; 2431 goto out_nodrop; 2432 } 2433 2434 /* Convert user level mutex, "mp", to a unique lwpchan. */ 2435 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype, 2436 &mlwpchan, LWPCHAN_MPPOOL)) { 2437 error = EFAULT; 2438 goto out_nodrop; 2439 } 2440 2441 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2442 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2443 &lwpchan, LWPCHAN_CVPOOL)) { 2444 error = EFAULT; 2445 goto out_nodrop; 2446 } 2447 2448 no_lwpchan = 0; 2449 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2450 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2451 2452 /* 2453 * lwpchan_lock() ensures that the calling LWP is put to sleep 2454 * atomically with respect to a possible wakeup which is a result 2455 * of lwp_rwlock_unlock(). 2456 * 2457 * What's misleading is that the LWP is put to sleep after the 2458 * rwlock's mutex is released. This is OK as long as the release 2459 * operation is also done while holding mlwpchan. The LWP is then 2460 * put to sleep when the possibility of pagefaulting or sleeping 2461 * has been completely eliminated. 2462 */ 2463 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2464 locked = 1; 2465 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2466 mlocked = 1; 2467 2468 /* 2469 * Fetch the current rwlock state. 2470 * 2471 * The possibility of spurious wake-ups or killed waiters means 2472 * rwstate's URW_HAS_WAITERS bit may indicate false positives. 2473 * We only fix these if they are important to us. 2474 * 2475 * Although various error states can be observed here (e.g. the lock 2476 * is not held, but there are waiters) we assume these are applicaton 2477 * errors and so we take no corrective action. 2478 */ 2479 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2480 /* 2481 * We cannot legitimately get here from user-level 2482 * without URW_HAS_WAITERS being set. 2483 * Set it now to guard against user-level error. 2484 */ 2485 rwstate |= URW_HAS_WAITERS; 2486 2487 /* 2488 * We can try only if the lock isn't held by a writer. 2489 */ 2490 if (!(rwstate & URW_WRITE_LOCKED)) { 2491 tp = lwp_queue_waiter(&lwpchan); 2492 if (tp == NULL) { 2493 /* 2494 * Hmmm, rwstate indicates waiters but there are 2495 * none queued. This could just be the result of a 2496 * spurious wakeup, so let's ignore it. 2497 * 2498 * We now have a chance to acquire the lock 2499 * uncontended, but this is the last chance for 2500 * a writer to acquire the lock without blocking. 2501 */ 2502 if (rd_wr == READ_LOCK) { 2503 rwstate++; 2504 acquired = 1; 2505 } else if ((rwstate & URW_READERS_MASK) == 0) { 2506 rwstate |= URW_WRITE_LOCKED; 2507 acquired = 1; 2508 } 2509 } else if (rd_wr == READ_LOCK) { 2510 /* 2511 * This is the last chance for a reader to acquire 2512 * the lock now, but it can only do so if there is 2513 * no writer of equal or greater priority at the 2514 * head of the queue . 2515 * 2516 * It is also just possible that there is a reader 2517 * at the head of the queue. This may be the result 2518 * of a spurious wakeup or an application failure. 2519 * In this case we only acquire the lock if we have 2520 * equal or greater priority. It is not our job to 2521 * release spurious waiters. 2522 */ 2523 pri_t our_pri = DISP_PRIO(t); 2524 pri_t his_pri = DISP_PRIO(tp); 2525 2526 if ((our_pri > his_pri) || ((our_pri == his_pri) && 2527 !(tp->t_writer & TRW_WANT_WRITE))) { 2528 rwstate++; 2529 acquired = 1; 2530 } 2531 } 2532 } 2533 2534 if (acquired || try_flag || time_error) { 2535 /* 2536 * We're not going to block this time. 2537 */ 2538 suword32_noerr(&rw->rwlock_readers, rwstate); 2539 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2540 locked = 0; 2541 2542 if (acquired) { 2543 /* 2544 * Got the lock! 2545 */ 2546 error = 0; 2547 2548 } else if (try_flag) { 2549 /* 2550 * We didn't get the lock and we're about to block. 2551 * If we're doing a trylock, return EBUSY instead. 2552 */ 2553 error = EBUSY; 2554 2555 } else if (time_error) { 2556 /* 2557 * The SUSV3 POSIX spec is very clear that we should 2558 * get no error from validating the timer (above) 2559 * until we would actually sleep. 2560 */ 2561 error = time_error; 2562 } 2563 2564 goto out_drop; 2565 } 2566 2567 /* 2568 * We're about to block, so indicate what kind of waiter we are. 2569 */ 2570 t->t_writer = 0; 2571 if (rd_wr == WRITE_LOCK) 2572 t->t_writer = TRW_WANT_WRITE; 2573 suword32_noerr(&rw->rwlock_readers, rwstate); 2574 2575 /* 2576 * Unlock the rwlock's mutex (pagefaults are possible here). 2577 */ 2578 set_owner_pid(mp, 0, 0); 2579 ulock_clear(&mp->mutex_lockw); 2580 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2581 if (mwaiters != 0) { 2582 /* 2583 * Given the locking of mlwpchan around the release of 2584 * the mutex and checking for waiters, the following 2585 * call to lwp_release() can fail ONLY if the lock 2586 * acquirer is interrupted after setting the waiter bit, 2587 * calling lwp_block() and releasing mlwpchan. 2588 * In this case, it could get pulled off the LWP sleep 2589 * queue (via setrun()) before the following call to 2590 * lwp_release() occurs, and the lock requestor will 2591 * update the waiter bit correctly by re-evaluating it. 2592 */ 2593 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2594 suword8_noerr(&mp->mutex_waiters, mwaiters); 2595 } 2596 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2597 mlocked = 0; 2598 no_fault(); 2599 2600 if (mwatched) { 2601 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2602 mwatched = 0; 2603 } 2604 if (watched) { 2605 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2606 watched = 0; 2607 } 2608 2609 if (timedwait) { 2610 /* 2611 * If we successfully queue the timeout, 2612 * then don't drop t_delay_lock until 2613 * we are on the sleep queue (below). 2614 */ 2615 mutex_enter(&t->t_delay_lock); 2616 if (lwp_timer_enqueue(&lwpt) != 0) { 2617 mutex_exit(&t->t_delay_lock); 2618 imm_timeout = 1; 2619 timedwait = NULL; 2620 } 2621 } 2622 t->t_flag |= T_WAITCVSEM; 2623 lwp_block(&lwpchan); 2624 2625 /* 2626 * Nothing should happen to cause the LWp to go to sleep until after 2627 * it returns from swtch(). 2628 */ 2629 if (timedwait) 2630 mutex_exit(&t->t_delay_lock); 2631 locked = 0; 2632 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2633 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout) 2634 setrun(t); 2635 swtch(); 2636 2637 /* 2638 * We're back, but we need to work out why. Were we interrupted? Did 2639 * we timeout? Were we granted the lock? 2640 */ 2641 error = EAGAIN; 2642 acquired = (t->t_writer & TRW_LOCK_GRANTED); 2643 t->t_writer = 0; 2644 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE); 2645 if (timedwait) 2646 tim = lwp_timer_dequeue(&lwpt); 2647 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t)) 2648 error = EINTR; 2649 else if (imm_timeout || (timedwait && tim == -1)) 2650 error = ETIME; 2651 lwp->lwp_asleep = 0; 2652 lwp->lwp_sysabort = 0; 2653 setallwatch(); 2654 2655 /* 2656 * If we were granted the lock we don't care about EINTR or ETIME. 2657 */ 2658 if (acquired) 2659 error = 0; 2660 2661 if (t->t_mstate == LMS_USER_LOCK) 2662 (void) new_mstate(t, LMS_SYSTEM); 2663 2664 if (error) 2665 return (set_errno(error)); 2666 return (0); 2667 2668 out_drop: 2669 /* 2670 * Make sure that the user level lock is dropped before returning 2671 * to the caller. 2672 */ 2673 if (!mlocked) { 2674 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL); 2675 mlocked = 1; 2676 } 2677 set_owner_pid(mp, 0, 0); 2678 ulock_clear(&mp->mutex_lockw); 2679 fuword8_noerr(&mp->mutex_waiters, &mwaiters); 2680 if (mwaiters != 0) { 2681 /* 2682 * See comment above on lock clearing and lwp_release() 2683 * success/failure. 2684 */ 2685 if (lwp_release(&mlwpchan, &mwaiters, 0)) 2686 suword8_noerr(&mp->mutex_waiters, mwaiters); 2687 } 2688 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL); 2689 mlocked = 0; 2690 2691 out_nodrop: 2692 no_fault(); 2693 if (mwatched) 2694 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE); 2695 if (watched) 2696 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2697 if (t->t_mstate == LMS_USER_LOCK) 2698 (void) new_mstate(t, LMS_SYSTEM); 2699 if (error) 2700 return (set_errno(error)); 2701 return (0); 2702 } 2703 2704 /* 2705 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(), 2706 * we never drop the lock. 2707 */ 2708 static int 2709 lwp_rwlock_unlock(lwp_rwlock_t *rw) 2710 { 2711 kthread_t *t = curthread; 2712 proc_t *p = ttoproc(t); 2713 lwpchan_t lwpchan; 2714 volatile uint16_t type = 0; 2715 volatile int error = 0; 2716 volatile int locked = 0; 2717 volatile int watched = 0; 2718 label_t ljb; 2719 volatile int no_lwpchan = 1; 2720 uint32_t rwstate; 2721 2722 /* We only check rw because the mutex is included in it. */ 2723 if ((caddr_t)rw >= p->p_as->a_userlimit) 2724 return (set_errno(EFAULT)); 2725 2726 if (on_fault(&ljb)) { 2727 if (no_lwpchan) { 2728 error = EFAULT; 2729 goto out_nodrop; 2730 } 2731 if (locked) { 2732 locked = 0; 2733 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2734 } 2735 error = EFAULT; 2736 goto out_nodrop; 2737 } 2738 2739 /* 2740 * Force Copy-on-write if necessary and ensure that the 2741 * synchronization object resides in read/write memory. 2742 * Cause an EFAULT return now if this is not so. 2743 */ 2744 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type); 2745 suword16_noerr(&rw->rwlock_type, type); 2746 2747 /* We can only continue for simple USYNC_PROCESS locks. */ 2748 if (type != USYNC_PROCESS) { 2749 error = EINVAL; 2750 goto out_nodrop; 2751 } 2752 2753 /* Convert user level rwlock, "rw", to a unique lwpchan. */ 2754 if (!get_lwpchan(p->p_as, (caddr_t)rw, type, 2755 &lwpchan, LWPCHAN_CVPOOL)) { 2756 error = EFAULT; 2757 goto out_nodrop; 2758 } 2759 2760 no_lwpchan = 0; 2761 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2762 2763 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL); 2764 locked = 1; 2765 2766 /* 2767 * We can resolve multiple readers (except the last reader) here. 2768 * For the last reader or a writer we need lwp_rwlock_release(), 2769 * to which we also delegate the task of copying the new rwstate 2770 * back to userland (see the comment there). 2771 */ 2772 fuword32_noerr(&rw->rwlock_readers, &rwstate); 2773 if (rwstate & URW_WRITE_LOCKED) 2774 lwp_rwlock_release(&lwpchan, rw); 2775 else if ((rwstate & URW_READERS_MASK) > 0) { 2776 rwstate--; 2777 if ((rwstate & URW_READERS_MASK) == 0) 2778 lwp_rwlock_release(&lwpchan, rw); 2779 else 2780 suword32_noerr(&rw->rwlock_readers, rwstate); 2781 } 2782 2783 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL); 2784 locked = 0; 2785 error = 0; 2786 2787 out_nodrop: 2788 no_fault(); 2789 if (watched) 2790 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE); 2791 if (error) 2792 return (set_errno(error)); 2793 return (0); 2794 } 2795 2796 int 2797 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp) 2798 { 2799 switch (subcode) { 2800 case 0: 2801 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK)); 2802 case 1: 2803 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK)); 2804 case 2: 2805 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY)); 2806 case 3: 2807 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY)); 2808 case 4: 2809 return (lwp_rwlock_unlock(rwlp)); 2810 } 2811 return (set_errno(EINVAL)); 2812 } 2813 2814 /* 2815 * Return the owner of the user-level s-object. 2816 * Since we can't really do this, return NULL. 2817 */ 2818 /* ARGSUSED */ 2819 static kthread_t * 2820 lwpsobj_owner(caddr_t sobj) 2821 { 2822 return ((kthread_t *)NULL); 2823 } 2824 2825 /* 2826 * Wake up a thread asleep on a user-level synchronization 2827 * object. 2828 */ 2829 static void 2830 lwp_unsleep(kthread_t *t) 2831 { 2832 ASSERT(THREAD_LOCK_HELD(t)); 2833 if (t->t_wchan0 != NULL) { 2834 sleepq_head_t *sqh; 2835 sleepq_t *sqp = t->t_sleepq; 2836 2837 if (sqp != NULL) { 2838 sqh = lwpsqhash(&t->t_lwpchan); 2839 ASSERT(&sqh->sq_queue == sqp); 2840 sleepq_unsleep(t); 2841 disp_lock_exit_high(&sqh->sq_lock); 2842 CL_SETRUN(t); 2843 return; 2844 } 2845 } 2846 panic("lwp_unsleep: thread %p not on sleepq", (void *)t); 2847 } 2848 2849 /* 2850 * Change the priority of a thread asleep on a user-level 2851 * synchronization object. To maintain proper priority order, 2852 * we: 2853 * o dequeue the thread. 2854 * o change its priority. 2855 * o re-enqueue the thread. 2856 * Assumption: the thread is locked on entry. 2857 */ 2858 static void 2859 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 2860 { 2861 ASSERT(THREAD_LOCK_HELD(t)); 2862 if (t->t_wchan0 != NULL) { 2863 sleepq_t *sqp = t->t_sleepq; 2864 2865 sleepq_dequeue(t); 2866 *t_prip = pri; 2867 sleepq_insert(sqp, t); 2868 } else 2869 panic("lwp_change_pri: %p not on a sleep queue", (void *)t); 2870 } 2871 2872 /* 2873 * Clean up a left-over process-shared robust mutex 2874 */ 2875 static void 2876 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg) 2877 { 2878 uint16_t flag; 2879 uchar_t waiters; 2880 label_t ljb; 2881 pid_t owner_pid; 2882 lwp_mutex_t *lp; 2883 volatile int locked = 0; 2884 volatile int watched = 0; 2885 volatile struct upimutex *upimutex = NULL; 2886 volatile int upilocked = 0; 2887 2888 if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST)) 2889 != (USYNC_PROCESS | LOCK_ROBUST)) 2890 return; 2891 2892 lp = (lwp_mutex_t *)ent->lwpchan_addr; 2893 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2894 if (on_fault(&ljb)) { 2895 if (locked) 2896 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2897 if (upilocked) 2898 upimutex_unlock((upimutex_t *)upimutex, 0); 2899 goto out; 2900 } 2901 2902 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid); 2903 2904 if (UPIMUTEX(ent->lwpchan_type)) { 2905 lwpchan_t lwpchan = ent->lwpchan_lwpchan; 2906 upib_t *upibp = &UPI_CHAIN(lwpchan); 2907 2908 if (owner_pid != curproc->p_pid) 2909 goto out; 2910 mutex_enter(&upibp->upib_lock); 2911 upimutex = upi_get(upibp, &lwpchan); 2912 if (upimutex == NULL || upimutex->upi_owner != curthread) { 2913 mutex_exit(&upibp->upib_lock); 2914 goto out; 2915 } 2916 mutex_exit(&upibp->upib_lock); 2917 upilocked = 1; 2918 flag = lwp_clear_mutex(lp, lockflg); 2919 suword8_noerr(&lp->mutex_lockw, 0); 2920 upimutex_unlock((upimutex_t *)upimutex, flag); 2921 } else { 2922 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2923 locked = 1; 2924 /* 2925 * Clear the spinners count because one of our 2926 * threads could have been spinning for this lock 2927 * at user level when the process was suddenly killed. 2928 * There is no harm in this since user-level libc code 2929 * will adapt to the sudden change in the spinner count. 2930 */ 2931 suword8_noerr(&lp->mutex_spinners, 0); 2932 if (owner_pid != curproc->p_pid) { 2933 /* 2934 * We are not the owner. There may or may not be one. 2935 * If there are waiters, we wake up one or all of them. 2936 * It doesn't hurt to wake them up in error since 2937 * they will just retry the lock and go to sleep 2938 * again if necessary. 2939 */ 2940 fuword8_noerr(&lp->mutex_waiters, &waiters); 2941 if (waiters != 0) { /* there are waiters */ 2942 fuword16_noerr(&lp->mutex_flag, &flag); 2943 if (flag & LOCK_NOTRECOVERABLE) { 2944 lwp_release_all(&ent->lwpchan_lwpchan); 2945 suword8_noerr(&lp->mutex_waiters, 0); 2946 } else if (lwp_release(&ent->lwpchan_lwpchan, 2947 &waiters, 0)) { 2948 suword8_noerr(&lp->mutex_waiters, 2949 waiters); 2950 } 2951 } 2952 } else { 2953 /* 2954 * We are the owner. Release it. 2955 */ 2956 (void) lwp_clear_mutex(lp, lockflg); 2957 ulock_clear(&lp->mutex_lockw); 2958 fuword8_noerr(&lp->mutex_waiters, &waiters); 2959 if (waiters && 2960 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0)) 2961 suword8_noerr(&lp->mutex_waiters, waiters); 2962 } 2963 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL); 2964 } 2965 out: 2966 no_fault(); 2967 if (watched) 2968 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2969 } 2970 2971 /* 2972 * Register a process-shared robust mutex in the lwpchan cache. 2973 */ 2974 int 2975 lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr) 2976 { 2977 int error = 0; 2978 volatile int watched; 2979 label_t ljb; 2980 uint8_t type; 2981 lwpchan_t lwpchan; 2982 2983 if ((caddr_t)lp >= (caddr_t)USERLIMIT) 2984 return (set_errno(EFAULT)); 2985 2986 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 2987 2988 if (on_fault(&ljb)) { 2989 error = EFAULT; 2990 } else { 2991 /* 2992 * Force Copy-on-write if necessary and ensure that the 2993 * synchronization object resides in read/write memory. 2994 * Cause an EFAULT return now if this is not so. 2995 */ 2996 fuword8_noerr(&lp->mutex_type, &type); 2997 suword8_noerr(&lp->mutex_type, type); 2998 if ((type & (USYNC_PROCESS|LOCK_ROBUST)) 2999 != (USYNC_PROCESS|LOCK_ROBUST)) { 3000 error = EINVAL; 3001 } else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp, 3002 uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) { 3003 error = EFAULT; 3004 } 3005 } 3006 no_fault(); 3007 if (watched) 3008 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3009 if (error) 3010 return (set_errno(error)); 3011 return (0); 3012 } 3013 3014 /* 3015 * There is a user-level robust lock registration in libc. 3016 * Mark it as invalid by storing -1 into the location of the pointer. 3017 */ 3018 static void 3019 lwp_mutex_unregister(void *uaddr) 3020 { 3021 if (get_udatamodel() == DATAMODEL_NATIVE) { 3022 (void) sulword(uaddr, (ulong_t)-1); 3023 #ifdef _SYSCALL32_IMPL 3024 } else { 3025 (void) suword32(uaddr, (uint32_t)-1); 3026 #endif 3027 } 3028 } 3029 3030 int 3031 lwp_mutex_trylock(lwp_mutex_t *lp, uintptr_t owner) 3032 { 3033 kthread_t *t = curthread; 3034 proc_t *p = ttoproc(t); 3035 int error = 0; 3036 volatile int locked = 0; 3037 volatile int watched = 0; 3038 label_t ljb; 3039 volatile uint8_t type = 0; 3040 uint16_t flag; 3041 lwpchan_t lwpchan; 3042 3043 if ((caddr_t)lp >= p->p_as->a_userlimit) 3044 return (set_errno(EFAULT)); 3045 3046 (void) new_mstate(t, LMS_USER_LOCK); 3047 3048 if (on_fault(&ljb)) { 3049 if (locked) 3050 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3051 error = EFAULT; 3052 goto out; 3053 } 3054 /* 3055 * Force Copy-on-write if necessary and ensure that the 3056 * synchronization object resides in read/write memory. 3057 * Cause an EFAULT return now if this is not so. 3058 */ 3059 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3060 suword8_noerr(&lp->mutex_type, type); 3061 if (UPIMUTEX(type)) { 3062 no_fault(); 3063 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL); 3064 if (error == 0 || error == EOWNERDEAD || 3065 error == ELOCKUNMAPPED) { 3066 volatile int locked = error != 0; 3067 if (on_fault(&ljb)) { 3068 if (locked != 0) 3069 error = lwp_upimutex_unlock(lp, type); 3070 else 3071 error = EFAULT; 3072 goto upierr; 3073 } 3074 set_owner_pid(lp, owner, 3075 (type & USYNC_PROCESS)? p->p_pid : 0); 3076 no_fault(); 3077 } 3078 3079 upierr: 3080 if (error) 3081 return (set_errno(error)); 3082 return (0); 3083 } 3084 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3085 &lwpchan, LWPCHAN_MPPOOL)) { 3086 error = EFAULT; 3087 goto out; 3088 } 3089 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3090 locked = 1; 3091 if (type & LOCK_ROBUST) { 3092 fuword16_noerr(&lp->mutex_flag, &flag); 3093 if (flag & LOCK_NOTRECOVERABLE) { 3094 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3095 error = ENOTRECOVERABLE; 3096 goto out; 3097 } 3098 } 3099 3100 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3101 3102 if (!ulock_try(&lp->mutex_lockw)) 3103 error = EBUSY; 3104 else { 3105 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0); 3106 if (type & LOCK_ROBUST) { 3107 fuword16_noerr(&lp->mutex_flag, &flag); 3108 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3109 if (flag & LOCK_OWNERDEAD) 3110 error = EOWNERDEAD; 3111 else if (type & USYNC_PROCESS_ROBUST) 3112 error = ELOCKUNMAPPED; 3113 else 3114 error = EOWNERDEAD; 3115 } 3116 } 3117 } 3118 locked = 0; 3119 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3120 out: 3121 3122 if (t->t_mstate == LMS_USER_LOCK) 3123 (void) new_mstate(t, LMS_SYSTEM); 3124 3125 no_fault(); 3126 if (watched) 3127 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3128 if (error) 3129 return (set_errno(error)); 3130 return (0); 3131 } 3132 3133 /* 3134 * unlock the mutex and unblock lwps that is trying to acquire this mutex. 3135 * the blocked lwp resumes and retries to acquire the lock. 3136 */ 3137 int 3138 lwp_mutex_unlock(lwp_mutex_t *lp) 3139 { 3140 proc_t *p = ttoproc(curthread); 3141 lwpchan_t lwpchan; 3142 uchar_t waiters; 3143 volatile int locked = 0; 3144 volatile int watched = 0; 3145 volatile uint8_t type = 0; 3146 label_t ljb; 3147 uint16_t flag; 3148 int error = 0; 3149 3150 if ((caddr_t)lp >= p->p_as->a_userlimit) 3151 return (set_errno(EFAULT)); 3152 3153 if (on_fault(&ljb)) { 3154 if (locked) 3155 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3156 error = EFAULT; 3157 goto out; 3158 } 3159 3160 /* 3161 * Force Copy-on-write if necessary and ensure that the 3162 * synchronization object resides in read/write memory. 3163 * Cause an EFAULT return now if this is not so. 3164 */ 3165 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type); 3166 suword8_noerr(&lp->mutex_type, type); 3167 3168 if (UPIMUTEX(type)) { 3169 no_fault(); 3170 error = lwp_upimutex_unlock(lp, type); 3171 if (error) 3172 return (set_errno(error)); 3173 return (0); 3174 } 3175 3176 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3177 3178 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type, 3179 &lwpchan, LWPCHAN_MPPOOL)) { 3180 error = EFAULT; 3181 goto out; 3182 } 3183 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL); 3184 locked = 1; 3185 if (type & LOCK_ROBUST) { 3186 fuword16_noerr(&lp->mutex_flag, &flag); 3187 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 3188 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 3189 flag |= LOCK_NOTRECOVERABLE; 3190 suword16_noerr(&lp->mutex_flag, flag); 3191 } 3192 } 3193 set_owner_pid(lp, 0, 0); 3194 ulock_clear(&lp->mutex_lockw); 3195 /* 3196 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will 3197 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release() 3198 * may fail. If it fails, do not write into the waiter bit. 3199 * The call to lwp_release() might fail due to one of three reasons: 3200 * 3201 * 1. due to the thread which set the waiter bit not actually 3202 * sleeping since it got the lock on the re-try. The waiter 3203 * bit will then be correctly updated by that thread. This 3204 * window may be closed by reading the wait bit again here 3205 * and not calling lwp_release() at all if it is zero. 3206 * 2. the thread which set the waiter bit and went to sleep 3207 * was woken up by a signal. This time, the waiter recomputes 3208 * the wait bit in the return with EINTR code. 3209 * 3. the waiter bit read by lwp_mutex_wakeup() was in 3210 * memory that has been re-used after the lock was dropped. 3211 * In this case, writing into the waiter bit would cause data 3212 * corruption. 3213 */ 3214 fuword8_noerr(&lp->mutex_waiters, &waiters); 3215 if (waiters) { 3216 if ((type & LOCK_ROBUST) && 3217 (flag & LOCK_NOTRECOVERABLE)) { 3218 lwp_release_all(&lwpchan); 3219 suword8_noerr(&lp->mutex_waiters, 0); 3220 } else if (lwp_release(&lwpchan, &waiters, 0)) { 3221 suword8_noerr(&lp->mutex_waiters, waiters); 3222 } 3223 } 3224 3225 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL); 3226 out: 3227 no_fault(); 3228 if (watched) 3229 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE); 3230 if (error) 3231 return (set_errno(error)); 3232 return (0); 3233 } 3234