1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "lint.h" 28 #include "thr_uberdata.h" 29 #include <sys/rtpriocntl.h> 30 #include <sys/sdt.h> 31 #include <atomic.h> 32 33 #if defined(THREAD_DEBUG) 34 #define INCR32(x) (((x) != UINT32_MAX)? (x)++ : 0) 35 #define INCR(x) ((x)++) 36 #define DECR(x) ((x)--) 37 #define MAXINCR(m, x) ((m < ++x)? (m = x) : 0) 38 #else 39 #define INCR32(x) 40 #define INCR(x) 41 #define DECR(x) 42 #define MAXINCR(m, x) 43 #endif 44 45 /* 46 * This mutex is initialized to be held by lwp#1. 47 * It is used to block a thread that has returned from a mutex_lock() 48 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 49 */ 50 mutex_t stall_mutex = DEFAULTMUTEX; 51 52 static int shared_mutex_held(mutex_t *); 53 static int mutex_queuelock_adaptive(mutex_t *); 54 static void mutex_wakeup_all(mutex_t *); 55 56 /* 57 * Lock statistics support functions. 58 */ 59 void 60 record_begin_hold(tdb_mutex_stats_t *msp) 61 { 62 tdb_incr(msp->mutex_lock); 63 msp->mutex_begin_hold = gethrtime(); 64 } 65 66 hrtime_t 67 record_hold_time(tdb_mutex_stats_t *msp) 68 { 69 hrtime_t now = gethrtime(); 70 71 if (msp->mutex_begin_hold) 72 msp->mutex_hold_time += now - msp->mutex_begin_hold; 73 msp->mutex_begin_hold = 0; 74 return (now); 75 } 76 77 /* 78 * Called once at library initialization. 79 */ 80 void 81 mutex_setup(void) 82 { 83 if (set_lock_byte(&stall_mutex.mutex_lockw)) 84 thr_panic("mutex_setup() cannot acquire stall_mutex"); 85 stall_mutex.mutex_owner = (uintptr_t)curthread; 86 } 87 88 /* 89 * The default spin count of 1000 is experimentally determined. 90 * On sun4u machines with any number of processors it could be raised 91 * to 10,000 but that (experimentally) makes almost no difference. 92 * The environment variable: 93 * _THREAD_ADAPTIVE_SPIN=count 94 * can be used to override and set the count in the range [0 .. 1,000,000]. 95 */ 96 int thread_adaptive_spin = 1000; 97 uint_t thread_max_spinners = 100; 98 int thread_queue_verify = 0; 99 static int ncpus; 100 101 /* 102 * Distinguish spinning for queue locks from spinning for regular locks. 103 * We try harder to acquire queue locks by spinning. 104 * The environment variable: 105 * _THREAD_QUEUE_SPIN=count 106 * can be used to override and set the count in the range [0 .. 1,000,000]. 107 */ 108 int thread_queue_spin = 10000; 109 110 #define ALL_ATTRIBUTES \ 111 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 112 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 113 LOCK_ROBUST) 114 115 /* 116 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 117 * augmented by zero or more the flags: 118 * LOCK_RECURSIVE 119 * LOCK_ERRORCHECK 120 * LOCK_PRIO_INHERIT 121 * LOCK_PRIO_PROTECT 122 * LOCK_ROBUST 123 */ 124 #pragma weak _mutex_init = mutex_init 125 /* ARGSUSED2 */ 126 int 127 mutex_init(mutex_t *mp, int type, void *arg) 128 { 129 int basetype = (type & ~ALL_ATTRIBUTES); 130 const pcclass_t *pccp; 131 int error = 0; 132 int ceil; 133 134 if (basetype == USYNC_PROCESS_ROBUST) { 135 /* 136 * USYNC_PROCESS_ROBUST is a deprecated historical type. 137 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 138 * retain the USYNC_PROCESS_ROBUST flag so we can return 139 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 140 * mutexes will ever draw ELOCKUNMAPPED). 141 */ 142 type |= (USYNC_PROCESS | LOCK_ROBUST); 143 basetype = USYNC_PROCESS; 144 } 145 146 if (type & LOCK_PRIO_PROTECT) 147 pccp = get_info_by_policy(SCHED_FIFO); 148 if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) || 149 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 150 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) || 151 ((type & LOCK_PRIO_PROTECT) && 152 ((ceil = *(int *)arg) < pccp->pcc_primin || 153 ceil > pccp->pcc_primax))) { 154 error = EINVAL; 155 } else if (type & LOCK_ROBUST) { 156 /* 157 * Callers of mutex_init() with the LOCK_ROBUST attribute 158 * are required to pass an initially all-zero mutex. 159 * Multiple calls to mutex_init() are allowed; all but 160 * the first return EBUSY. A call to mutex_init() is 161 * allowed to make an inconsistent robust lock consistent 162 * (for historical usage, even though the proper interface 163 * for this is mutex_consistent()). Note that we use 164 * atomic_or_16() to set the LOCK_INITED flag so as 165 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 166 */ 167 if (!(mp->mutex_flag & LOCK_INITED)) { 168 mp->mutex_type = (uint8_t)type; 169 atomic_or_16(&mp->mutex_flag, LOCK_INITED); 170 mp->mutex_magic = MUTEX_MAGIC; 171 } else if (type != mp->mutex_type || 172 ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) { 173 error = EINVAL; 174 } else if (mutex_consistent(mp) != 0) { 175 error = EBUSY; 176 } 177 /* register a process robust mutex with the kernel */ 178 if (basetype == USYNC_PROCESS) 179 register_lock(mp); 180 } else { 181 (void) memset(mp, 0, sizeof (*mp)); 182 mp->mutex_type = (uint8_t)type; 183 mp->mutex_flag = LOCK_INITED; 184 mp->mutex_magic = MUTEX_MAGIC; 185 } 186 187 if (error == 0 && (type & LOCK_PRIO_PROTECT)) { 188 mp->mutex_ceiling = ceil; 189 } 190 191 /* 192 * This should be at the beginning of the function, 193 * but for the sake of old broken applications that 194 * do not have proper alignment for their mutexes 195 * (and don't check the return code from mutex_init), 196 * we put it here, after initializing the mutex regardless. 197 */ 198 if (error == 0 && 199 ((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 200 curthread->ul_misaligned == 0) 201 error = EINVAL; 202 203 return (error); 204 } 205 206 /* 207 * Delete mp from list of ceiling mutexes owned by curthread. 208 * Return 1 if the head of the chain was updated. 209 */ 210 int 211 _ceil_mylist_del(mutex_t *mp) 212 { 213 ulwp_t *self = curthread; 214 mxchain_t **mcpp; 215 mxchain_t *mcp; 216 217 for (mcpp = &self->ul_mxchain; 218 (mcp = *mcpp) != NULL; 219 mcpp = &mcp->mxchain_next) { 220 if (mcp->mxchain_mx == mp) { 221 *mcpp = mcp->mxchain_next; 222 lfree(mcp, sizeof (*mcp)); 223 return (mcpp == &self->ul_mxchain); 224 } 225 } 226 return (0); 227 } 228 229 /* 230 * Add mp to the list of ceiling mutexes owned by curthread. 231 * Return ENOMEM if no memory could be allocated. 232 */ 233 int 234 _ceil_mylist_add(mutex_t *mp) 235 { 236 ulwp_t *self = curthread; 237 mxchain_t *mcp; 238 239 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 240 return (ENOMEM); 241 mcp->mxchain_mx = mp; 242 mcp->mxchain_next = self->ul_mxchain; 243 self->ul_mxchain = mcp; 244 return (0); 245 } 246 247 /* 248 * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below. 249 */ 250 static void 251 set_rt_priority(ulwp_t *self, int prio) 252 { 253 pcparms_t pcparm; 254 255 pcparm.pc_cid = self->ul_rtclassid; 256 ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; 257 ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; 258 (void) priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm); 259 } 260 261 /* 262 * Inherit priority from ceiling. 263 * This changes the effective priority, not the assigned priority. 264 */ 265 void 266 _ceil_prio_inherit(int prio) 267 { 268 ulwp_t *self = curthread; 269 270 self->ul_epri = prio; 271 set_rt_priority(self, prio); 272 } 273 274 /* 275 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 276 * if holding at least one ceiling lock. If no ceiling locks are held at this 277 * point, disinherit completely, reverting back to assigned priority. 278 */ 279 void 280 _ceil_prio_waive(void) 281 { 282 ulwp_t *self = curthread; 283 mxchain_t *mcp = self->ul_mxchain; 284 int prio; 285 286 if (mcp == NULL) { 287 prio = self->ul_pri; 288 self->ul_epri = 0; 289 } else { 290 prio = mcp->mxchain_mx->mutex_ceiling; 291 self->ul_epri = prio; 292 } 293 set_rt_priority(self, prio); 294 } 295 296 /* 297 * Clear the lock byte. Retain the waiters byte and the spinners byte. 298 * Return the old value of the lock word. 299 */ 300 static uint32_t 301 clear_lockbyte(volatile uint32_t *lockword) 302 { 303 uint32_t old; 304 uint32_t new; 305 306 do { 307 old = *lockword; 308 new = old & ~LOCKMASK; 309 } while (atomic_cas_32(lockword, old, new) != old); 310 311 return (old); 312 } 313 314 /* 315 * Same as clear_lockbyte(), but operates on mutex_lockword64. 316 * The mutex_ownerpid field is cleared along with the lock byte. 317 */ 318 static uint64_t 319 clear_lockbyte64(volatile uint64_t *lockword64) 320 { 321 uint64_t old; 322 uint64_t new; 323 324 do { 325 old = *lockword64; 326 new = old & ~LOCKMASK64; 327 } while (atomic_cas_64(lockword64, old, new) != old); 328 329 return (old); 330 } 331 332 /* 333 * Similar to set_lock_byte(), which only tries to set the lock byte. 334 * Here, we attempt to set the lock byte AND the mutex_ownerpid, keeping 335 * the remaining bytes constant. This atomic operation is required for the 336 * correctness of process-shared robust locks, otherwise there would be 337 * a window or vulnerability in which the lock byte had been set but the 338 * mutex_ownerpid had not yet been set. If the process were to die in 339 * this window of vulnerability (due to some other thread calling exit() 340 * or the process receiving a fatal signal), the mutex would be left locked 341 * but without a process-ID to determine which process was holding the lock. 342 * The kernel would then be unable to mark the robust mutex as LOCK_OWNERDEAD 343 * when the process died. For all other cases of process-shared locks, this 344 * operation is just a convenience, for the sake of common code. 345 * 346 * This operation requires process-shared robust locks to be properly 347 * aligned on an 8-byte boundary, at least on sparc machines, lest the 348 * operation incur an alignment fault. This is automatic when locks 349 * are declared properly using the mutex_t or pthread_mutex_t data types 350 * and the application does not allocate dynamic memory on less than an 351 * 8-byte boundary. See the 'horrible hack' comments below for cases 352 * dealing with such broken applications. 353 */ 354 static int 355 set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid) 356 { 357 uint64_t old; 358 uint64_t new; 359 360 old = *lockword64 & ~LOCKMASK64; 361 new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64; 362 if (atomic_cas_64(lockword64, old, new) == old) 363 return (LOCKCLEAR); 364 365 return (LOCKSET); 366 } 367 368 /* 369 * Increment the spinners count in the mutex lock word. 370 * Return 0 on success. Return -1 if the count would overflow. 371 */ 372 static int 373 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 374 { 375 uint32_t old; 376 uint32_t new; 377 378 do { 379 old = *lockword; 380 if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 381 return (-1); 382 new = old + (1 << SPINNERSHIFT); 383 } while (atomic_cas_32(lockword, old, new) != old); 384 385 return (0); 386 } 387 388 /* 389 * Decrement the spinners count in the mutex lock word. 390 * Return the new value of the lock word. 391 */ 392 static uint32_t 393 spinners_decr(volatile uint32_t *lockword) 394 { 395 uint32_t old; 396 uint32_t new; 397 398 do { 399 new = old = *lockword; 400 if (new & SPINNERMASK) 401 new -= (1 << SPINNERSHIFT); 402 } while (atomic_cas_32(lockword, old, new) != old); 403 404 return (new); 405 } 406 407 /* 408 * Non-preemptive spin locks. Used by queue_lock(). 409 * No lock statistics are gathered for these locks. 410 * No DTrace probes are provided for these locks. 411 */ 412 void 413 spin_lock_set(mutex_t *mp) 414 { 415 ulwp_t *self = curthread; 416 417 no_preempt(self); 418 if (set_lock_byte(&mp->mutex_lockw) == 0) { 419 mp->mutex_owner = (uintptr_t)self; 420 return; 421 } 422 /* 423 * Spin for a while, attempting to acquire the lock. 424 */ 425 INCR32(self->ul_spin_lock_spin); 426 if (mutex_queuelock_adaptive(mp) == 0 || 427 set_lock_byte(&mp->mutex_lockw) == 0) { 428 mp->mutex_owner = (uintptr_t)self; 429 return; 430 } 431 /* 432 * Try harder if we were previously at a no premption level. 433 */ 434 if (self->ul_preempt > 1) { 435 INCR32(self->ul_spin_lock_spin2); 436 if (mutex_queuelock_adaptive(mp) == 0 || 437 set_lock_byte(&mp->mutex_lockw) == 0) { 438 mp->mutex_owner = (uintptr_t)self; 439 return; 440 } 441 } 442 /* 443 * Give up and block in the kernel for the mutex. 444 */ 445 INCR32(self->ul_spin_lock_sleep); 446 (void) ___lwp_mutex_timedlock(mp, NULL); 447 mp->mutex_owner = (uintptr_t)self; 448 } 449 450 void 451 spin_lock_clear(mutex_t *mp) 452 { 453 ulwp_t *self = curthread; 454 455 mp->mutex_owner = 0; 456 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 457 (void) ___lwp_mutex_wakeup(mp, 0); 458 INCR32(self->ul_spin_lock_wakeup); 459 } 460 preempt(self); 461 } 462 463 /* 464 * Allocate the sleep queue hash table. 465 */ 466 void 467 queue_alloc(void) 468 { 469 ulwp_t *self = curthread; 470 uberdata_t *udp = self->ul_uberdata; 471 queue_head_t *qp; 472 void *data; 473 int i; 474 475 /* 476 * No locks are needed; we call here only when single-threaded. 477 */ 478 ASSERT(self == udp->ulwp_one); 479 ASSERT(!udp->uberflags.uf_mt); 480 if ((data = mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 481 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 482 == MAP_FAILED) 483 thr_panic("cannot allocate thread queue_head table"); 484 udp->queue_head = qp = (queue_head_t *)data; 485 for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { 486 qp->qh_type = (i < QHASHSIZE)? MX : CV; 487 qp->qh_lock.mutex_flag = LOCK_INITED; 488 qp->qh_lock.mutex_magic = MUTEX_MAGIC; 489 qp->qh_hlist = &qp->qh_def_root; 490 #if defined(THREAD_DEBUG) 491 qp->qh_hlen = 1; 492 qp->qh_hmax = 1; 493 #endif 494 } 495 } 496 497 #if defined(THREAD_DEBUG) 498 499 /* 500 * Debugging: verify correctness of a sleep queue. 501 */ 502 void 503 QVERIFY(queue_head_t *qp) 504 { 505 ulwp_t *self = curthread; 506 uberdata_t *udp = self->ul_uberdata; 507 queue_root_t *qrp; 508 ulwp_t *ulwp; 509 ulwp_t *prev; 510 uint_t index; 511 uint32_t cnt; 512 char qtype; 513 void *wchan; 514 515 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 516 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 517 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 518 cnt++; 519 ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) || 520 (qrp->qr_head == NULL && qrp->qr_tail == NULL)); 521 } 522 ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt); 523 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 524 ASSERT(qp->qh_type == qtype); 525 if (!thread_queue_verify) 526 return; 527 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 528 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 529 for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL; 530 prev = ulwp, ulwp = ulwp->ul_link) { 531 cnt++; 532 if (ulwp->ul_writer) 533 ASSERT(prev == NULL || prev->ul_writer); 534 ASSERT(ulwp->ul_qtype == qtype); 535 ASSERT(ulwp->ul_wchan != NULL); 536 ASSERT(ulwp->ul_sleepq == qp); 537 wchan = ulwp->ul_wchan; 538 ASSERT(qrp->qr_wchan == wchan); 539 index = QUEUE_HASH(wchan, qtype); 540 ASSERT(&udp->queue_head[index] == qp); 541 } 542 ASSERT(qrp->qr_tail == prev); 543 } 544 ASSERT(qp->qh_qlen == cnt); 545 } 546 547 #else /* THREAD_DEBUG */ 548 549 #define QVERIFY(qp) 550 551 #endif /* THREAD_DEBUG */ 552 553 /* 554 * Acquire a queue head. 555 */ 556 queue_head_t * 557 queue_lock(void *wchan, int qtype) 558 { 559 uberdata_t *udp = curthread->ul_uberdata; 560 queue_head_t *qp; 561 queue_root_t *qrp; 562 563 ASSERT(qtype == MX || qtype == CV); 564 565 /* 566 * It is possible that we could be called while still single-threaded. 567 * If so, we call queue_alloc() to allocate the queue_head[] array. 568 */ 569 if ((qp = udp->queue_head) == NULL) { 570 queue_alloc(); 571 qp = udp->queue_head; 572 } 573 qp += QUEUE_HASH(wchan, qtype); 574 spin_lock_set(&qp->qh_lock); 575 for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) 576 if (qrp->qr_wchan == wchan) 577 break; 578 if (qrp == NULL && qp->qh_def_root.qr_head == NULL) { 579 /* the default queue root is available; use it */ 580 qrp = &qp->qh_def_root; 581 qrp->qr_wchan = wchan; 582 ASSERT(qrp->qr_next == NULL); 583 ASSERT(qrp->qr_tail == NULL && 584 qrp->qr_rtcount == 0 && qrp->qr_qlen == 0); 585 } 586 qp->qh_wchan = wchan; /* valid until queue_unlock() is called */ 587 qp->qh_root = qrp; /* valid until queue_unlock() is called */ 588 INCR32(qp->qh_lockcount); 589 QVERIFY(qp); 590 return (qp); 591 } 592 593 /* 594 * Release a queue head. 595 */ 596 void 597 queue_unlock(queue_head_t *qp) 598 { 599 QVERIFY(qp); 600 spin_lock_clear(&qp->qh_lock); 601 } 602 603 /* 604 * For rwlock queueing, we must queue writers ahead of readers of the 605 * same priority. We do this by making writers appear to have a half 606 * point higher priority for purposes of priority comparisons below. 607 */ 608 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 609 610 void 611 enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo) 612 { 613 queue_root_t *qrp; 614 ulwp_t **ulwpp; 615 ulwp_t *next; 616 int pri = CMP_PRIO(ulwp); 617 618 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 619 ASSERT(ulwp->ul_sleepq != qp); 620 621 if ((qrp = qp->qh_root) == NULL) { 622 /* use the thread's queue root for the linkage */ 623 qrp = &ulwp->ul_queue_root; 624 qrp->qr_next = qp->qh_hlist; 625 qrp->qr_prev = NULL; 626 qrp->qr_head = NULL; 627 qrp->qr_tail = NULL; 628 qrp->qr_wchan = qp->qh_wchan; 629 qrp->qr_rtcount = 0; 630 qrp->qr_qlen = 0; 631 qrp->qr_qmax = 0; 632 qp->qh_hlist->qr_prev = qrp; 633 qp->qh_hlist = qrp; 634 qp->qh_root = qrp; 635 MAXINCR(qp->qh_hmax, qp->qh_hlen); 636 } 637 638 /* 639 * LIFO queue ordering is unfair and can lead to starvation, 640 * but it gives better performance for heavily contended locks. 641 * We use thread_queue_fifo (range is 0..8) to determine 642 * the frequency of FIFO vs LIFO queuing: 643 * 0 : every 256th time (almost always LIFO) 644 * 1 : every 128th time 645 * 2 : every 64th time 646 * 3 : every 32nd time 647 * 4 : every 16th time (the default value, mostly LIFO) 648 * 5 : every 8th time 649 * 6 : every 4th time 650 * 7 : every 2nd time 651 * 8 : every time (never LIFO, always FIFO) 652 * Note that there is always some degree of FIFO ordering. 653 * This breaks live lock conditions that occur in applications 654 * that are written assuming (incorrectly) that threads acquire 655 * locks fairly, that is, in roughly round-robin order. 656 * In any event, the queue is maintained in kernel priority order. 657 * 658 * If force_fifo is non-zero, fifo queueing is forced. 659 * SUSV3 requires this for semaphores. 660 */ 661 if (qrp->qr_head == NULL) { 662 /* 663 * The queue is empty. LIFO/FIFO doesn't matter. 664 */ 665 ASSERT(qrp->qr_tail == NULL); 666 ulwpp = &qrp->qr_head; 667 } else if (force_fifo | 668 (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) { 669 /* 670 * Enqueue after the last thread whose priority is greater 671 * than or equal to the priority of the thread being queued. 672 * Attempt first to go directly onto the tail of the queue. 673 */ 674 if (pri <= CMP_PRIO(qrp->qr_tail)) 675 ulwpp = &qrp->qr_tail->ul_link; 676 else { 677 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 678 ulwpp = &next->ul_link) 679 if (pri > CMP_PRIO(next)) 680 break; 681 } 682 } else { 683 /* 684 * Enqueue before the first thread whose priority is less 685 * than or equal to the priority of the thread being queued. 686 * Hopefully we can go directly onto the head of the queue. 687 */ 688 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 689 ulwpp = &next->ul_link) 690 if (pri >= CMP_PRIO(next)) 691 break; 692 } 693 if ((ulwp->ul_link = *ulwpp) == NULL) 694 qrp->qr_tail = ulwp; 695 *ulwpp = ulwp; 696 697 ulwp->ul_sleepq = qp; 698 ulwp->ul_wchan = qp->qh_wchan; 699 ulwp->ul_qtype = qp->qh_type; 700 if ((ulwp->ul_schedctl != NULL && 701 ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) | 702 ulwp->ul_pilocks) { 703 ulwp->ul_rtqueued = 1; 704 qrp->qr_rtcount++; 705 } 706 MAXINCR(qrp->qr_qmax, qrp->qr_qlen); 707 MAXINCR(qp->qh_qmax, qp->qh_qlen); 708 } 709 710 /* 711 * Helper function for queue_slot() and queue_slot_rt(). 712 * Try to find a non-suspended thread on the queue. 713 */ 714 static ulwp_t ** 715 queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt) 716 { 717 ulwp_t *ulwp; 718 ulwp_t **foundpp = NULL; 719 int priority = -1; 720 ulwp_t *prev; 721 int tpri; 722 723 for (prev = NULL; 724 (ulwp = *ulwpp) != NULL; 725 prev = ulwp, ulwpp = &ulwp->ul_link) { 726 if (ulwp->ul_stop) /* skip suspended threads */ 727 continue; 728 tpri = rt? CMP_PRIO(ulwp) : 0; 729 if (tpri > priority) { 730 foundpp = ulwpp; 731 *prevp = prev; 732 priority = tpri; 733 if (!rt) 734 break; 735 } 736 } 737 return (foundpp); 738 } 739 740 /* 741 * For real-time, we search the entire queue because the dispatch 742 * (kernel) priorities may have changed since enqueueing. 743 */ 744 static ulwp_t ** 745 queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp) 746 { 747 ulwp_t **ulwpp = ulwpp_org; 748 ulwp_t *ulwp = *ulwpp; 749 ulwp_t **foundpp = ulwpp; 750 int priority = CMP_PRIO(ulwp); 751 ulwp_t *prev; 752 int tpri; 753 754 for (prev = ulwp, ulwpp = &ulwp->ul_link; 755 (ulwp = *ulwpp) != NULL; 756 prev = ulwp, ulwpp = &ulwp->ul_link) { 757 tpri = CMP_PRIO(ulwp); 758 if (tpri > priority) { 759 foundpp = ulwpp; 760 *prevp = prev; 761 priority = tpri; 762 } 763 } 764 ulwp = *foundpp; 765 766 /* 767 * Try not to return a suspended thread. 768 * This mimics the old libthread's behavior. 769 */ 770 if (ulwp->ul_stop && 771 (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) { 772 foundpp = ulwpp; 773 ulwp = *foundpp; 774 } 775 ulwp->ul_rt = 1; 776 return (foundpp); 777 } 778 779 ulwp_t ** 780 queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more) 781 { 782 queue_root_t *qrp; 783 ulwp_t **ulwpp; 784 ulwp_t *ulwp; 785 int rt; 786 787 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 788 789 if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) { 790 *more = 0; 791 return (NULL); /* no lwps on the queue */ 792 } 793 rt = (qrp->qr_rtcount != 0); 794 *prevp = NULL; 795 if (ulwp->ul_link == NULL) { /* only one lwp on the queue */ 796 *more = 0; 797 ulwp->ul_rt = rt; 798 return (&qrp->qr_head); 799 } 800 *more = 1; 801 802 if (rt) /* real-time queue */ 803 return (queue_slot_rt(&qrp->qr_head, prevp)); 804 /* 805 * Try not to return a suspended thread. 806 * This mimics the old libthread's behavior. 807 */ 808 if (ulwp->ul_stop && 809 (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) { 810 ulwp = *ulwpp; 811 ulwp->ul_rt = 0; 812 return (ulwpp); 813 } 814 /* 815 * The common case; just pick the first thread on the queue. 816 */ 817 ulwp->ul_rt = 0; 818 return (&qrp->qr_head); 819 } 820 821 /* 822 * Common code for unlinking an lwp from a user-level sleep queue. 823 */ 824 void 825 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 826 { 827 queue_root_t *qrp = qp->qh_root; 828 queue_root_t *nqrp; 829 ulwp_t *ulwp = *ulwpp; 830 ulwp_t *next; 831 832 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 833 ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan); 834 835 DECR(qp->qh_qlen); 836 DECR(qrp->qr_qlen); 837 if (ulwp->ul_rtqueued) { 838 ulwp->ul_rtqueued = 0; 839 qrp->qr_rtcount--; 840 } 841 next = ulwp->ul_link; 842 *ulwpp = next; 843 ulwp->ul_link = NULL; 844 if (qrp->qr_tail == ulwp) 845 qrp->qr_tail = prev; 846 if (qrp == &ulwp->ul_queue_root) { 847 /* 848 * We can't continue to use the unlinked thread's 849 * queue root for the linkage. 850 */ 851 queue_root_t *qr_next = qrp->qr_next; 852 queue_root_t *qr_prev = qrp->qr_prev; 853 854 if (qrp->qr_tail) { 855 /* switch to using the last thread's queue root */ 856 ASSERT(qrp->qr_qlen != 0); 857 nqrp = &qrp->qr_tail->ul_queue_root; 858 *nqrp = *qrp; 859 if (qr_next) 860 qr_next->qr_prev = nqrp; 861 if (qr_prev) 862 qr_prev->qr_next = nqrp; 863 else 864 qp->qh_hlist = nqrp; 865 qp->qh_root = nqrp; 866 } else { 867 /* empty queue root; just delete from the hash list */ 868 ASSERT(qrp->qr_qlen == 0); 869 if (qr_next) 870 qr_next->qr_prev = qr_prev; 871 if (qr_prev) 872 qr_prev->qr_next = qr_next; 873 else 874 qp->qh_hlist = qr_next; 875 qp->qh_root = NULL; 876 DECR(qp->qh_hlen); 877 } 878 } 879 } 880 881 ulwp_t * 882 dequeue(queue_head_t *qp, int *more) 883 { 884 ulwp_t **ulwpp; 885 ulwp_t *ulwp; 886 ulwp_t *prev; 887 888 if ((ulwpp = queue_slot(qp, &prev, more)) == NULL) 889 return (NULL); 890 ulwp = *ulwpp; 891 queue_unlink(qp, ulwpp, prev); 892 ulwp->ul_sleepq = NULL; 893 ulwp->ul_wchan = NULL; 894 return (ulwp); 895 } 896 897 /* 898 * Return a pointer to the highest priority thread sleeping on wchan. 899 */ 900 ulwp_t * 901 queue_waiter(queue_head_t *qp) 902 { 903 ulwp_t **ulwpp; 904 ulwp_t *prev; 905 int more; 906 907 if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL) 908 return (NULL); 909 return (*ulwpp); 910 } 911 912 int 913 dequeue_self(queue_head_t *qp) 914 { 915 ulwp_t *self = curthread; 916 queue_root_t *qrp; 917 ulwp_t **ulwpp; 918 ulwp_t *ulwp; 919 ulwp_t *prev; 920 int found = 0; 921 922 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 923 924 /* find self on the sleep queue */ 925 if ((qrp = qp->qh_root) != NULL) { 926 for (prev = NULL, ulwpp = &qrp->qr_head; 927 (ulwp = *ulwpp) != NULL; 928 prev = ulwp, ulwpp = &ulwp->ul_link) { 929 if (ulwp == self) { 930 queue_unlink(qp, ulwpp, prev); 931 self->ul_cvmutex = NULL; 932 self->ul_sleepq = NULL; 933 self->ul_wchan = NULL; 934 found = 1; 935 break; 936 } 937 } 938 } 939 940 if (!found) 941 thr_panic("dequeue_self(): curthread not found on queue"); 942 943 return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL); 944 } 945 946 /* 947 * Called from call_user_handler() and _thrp_suspend() to take 948 * ourself off of our sleep queue so we can grab locks. 949 */ 950 void 951 unsleep_self(void) 952 { 953 ulwp_t *self = curthread; 954 queue_head_t *qp; 955 956 /* 957 * Calling enter_critical()/exit_critical() here would lead 958 * to recursion. Just manipulate self->ul_critical directly. 959 */ 960 self->ul_critical++; 961 while (self->ul_sleepq != NULL) { 962 qp = queue_lock(self->ul_wchan, self->ul_qtype); 963 /* 964 * We may have been moved from a CV queue to a 965 * mutex queue while we were attempting queue_lock(). 966 * If so, just loop around and try again. 967 * dequeue_self() clears self->ul_sleepq. 968 */ 969 if (qp == self->ul_sleepq) 970 (void) dequeue_self(qp); 971 queue_unlock(qp); 972 } 973 self->ul_writer = 0; 974 self->ul_critical--; 975 } 976 977 /* 978 * Common code for calling the the ___lwp_mutex_timedlock() system call. 979 * Returns with mutex_owner and mutex_ownerpid set correctly. 980 */ 981 static int 982 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 983 { 984 ulwp_t *self = curthread; 985 uberdata_t *udp = self->ul_uberdata; 986 int mtype = mp->mutex_type; 987 hrtime_t begin_sleep; 988 int acquired; 989 int error; 990 991 self->ul_sp = stkptr(); 992 self->ul_wchan = mp; 993 if (__td_event_report(self, TD_SLEEP, udp)) { 994 self->ul_td_evbuf.eventnum = TD_SLEEP; 995 self->ul_td_evbuf.eventdata = mp; 996 tdb_event(TD_SLEEP, udp); 997 } 998 if (msp) { 999 tdb_incr(msp->mutex_sleep); 1000 begin_sleep = gethrtime(); 1001 } 1002 1003 DTRACE_PROBE1(plockstat, mutex__block, mp); 1004 1005 /* defer signals until the assignment of mp->mutex_owner */ 1006 sigoff(self); 1007 for (;;) { 1008 /* 1009 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1010 * means we successfully acquired the lock. 1011 */ 1012 if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 1013 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1014 acquired = 0; 1015 break; 1016 } 1017 1018 if (mtype & USYNC_PROCESS) { 1019 /* 1020 * Defend against forkall(). We may be the child, 1021 * in which case we don't actually own the mutex. 1022 */ 1023 enter_critical(self); 1024 if (mp->mutex_ownerpid == udp->pid) { 1025 mp->mutex_owner = (uintptr_t)self; 1026 exit_critical(self); 1027 acquired = 1; 1028 break; 1029 } 1030 exit_critical(self); 1031 } else { 1032 mp->mutex_owner = (uintptr_t)self; 1033 acquired = 1; 1034 break; 1035 } 1036 } 1037 sigon(self); 1038 1039 if (msp) 1040 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1041 self->ul_wchan = NULL; 1042 self->ul_sp = 0; 1043 1044 if (acquired) { 1045 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1046 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1047 } else { 1048 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1049 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1050 } 1051 1052 return (error); 1053 } 1054 1055 /* 1056 * Common code for calling the ___lwp_mutex_trylock() system call. 1057 * Returns with mutex_owner and mutex_ownerpid set correctly. 1058 */ 1059 int 1060 mutex_trylock_kernel(mutex_t *mp) 1061 { 1062 ulwp_t *self = curthread; 1063 uberdata_t *udp = self->ul_uberdata; 1064 int mtype = mp->mutex_type; 1065 int error; 1066 int acquired; 1067 1068 sigoff(self); 1069 for (;;) { 1070 /* 1071 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1072 * means we successfully acquired the lock. 1073 */ 1074 if ((error = ___lwp_mutex_trylock(mp)) != 0 && 1075 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1076 acquired = 0; 1077 break; 1078 } 1079 1080 if (mtype & USYNC_PROCESS) { 1081 /* 1082 * Defend against forkall(). We may be the child, 1083 * in which case we don't actually own the mutex. 1084 */ 1085 enter_critical(self); 1086 if (mp->mutex_ownerpid == udp->pid) { 1087 mp->mutex_owner = (uintptr_t)self; 1088 exit_critical(self); 1089 acquired = 1; 1090 break; 1091 } 1092 exit_critical(self); 1093 } else { 1094 mp->mutex_owner = (uintptr_t)self; 1095 acquired = 1; 1096 break; 1097 } 1098 } 1099 sigon(self); 1100 1101 if (acquired) { 1102 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1103 } else if (error != EBUSY) { 1104 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1105 } 1106 1107 return (error); 1108 } 1109 1110 volatile sc_shared_t * 1111 setup_schedctl(void) 1112 { 1113 ulwp_t *self = curthread; 1114 volatile sc_shared_t *scp; 1115 sc_shared_t *tmp; 1116 1117 if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 1118 !self->ul_vfork && /* not a child of vfork() */ 1119 !self->ul_schedctl_called) { /* haven't been called before */ 1120 enter_critical(self); 1121 self->ul_schedctl_called = &self->ul_uberdata->uberflags; 1122 if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 1123 self->ul_schedctl = scp = tmp; 1124 exit_critical(self); 1125 } 1126 /* 1127 * Unless the call to setup_schedctl() is surrounded 1128 * by enter_critical()/exit_critical(), the address 1129 * we are returning could be invalid due to a forkall() 1130 * having occurred in another thread. 1131 */ 1132 return (scp); 1133 } 1134 1135 /* 1136 * Interfaces from libsched, incorporated into libc. 1137 * libsched.so.1 is now a filter library onto libc. 1138 */ 1139 #pragma weak schedctl_lookup = schedctl_init 1140 schedctl_t * 1141 schedctl_init(void) 1142 { 1143 volatile sc_shared_t *scp = setup_schedctl(); 1144 return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 1145 } 1146 1147 void 1148 schedctl_exit(void) 1149 { 1150 } 1151 1152 /* 1153 * Contract private interface for java. 1154 * Set up the schedctl data if it doesn't exist yet. 1155 * Return a pointer to the pointer to the schedctl data. 1156 */ 1157 volatile sc_shared_t *volatile * 1158 _thr_schedctl(void) 1159 { 1160 ulwp_t *self = curthread; 1161 volatile sc_shared_t *volatile *ptr; 1162 1163 if (self->ul_vfork) 1164 return (NULL); 1165 if (*(ptr = &self->ul_schedctl) == NULL) 1166 (void) setup_schedctl(); 1167 return (ptr); 1168 } 1169 1170 /* 1171 * Block signals and attempt to block preemption. 1172 * no_preempt()/preempt() must be used in pairs but can be nested. 1173 */ 1174 void 1175 no_preempt(ulwp_t *self) 1176 { 1177 volatile sc_shared_t *scp; 1178 1179 if (self->ul_preempt++ == 0) { 1180 enter_critical(self); 1181 if ((scp = self->ul_schedctl) != NULL || 1182 (scp = setup_schedctl()) != NULL) { 1183 /* 1184 * Save the pre-existing preempt value. 1185 */ 1186 self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 1187 scp->sc_preemptctl.sc_nopreempt = 1; 1188 } 1189 } 1190 } 1191 1192 /* 1193 * Undo the effects of no_preempt(). 1194 */ 1195 void 1196 preempt(ulwp_t *self) 1197 { 1198 volatile sc_shared_t *scp; 1199 1200 ASSERT(self->ul_preempt > 0); 1201 if (--self->ul_preempt == 0) { 1202 if ((scp = self->ul_schedctl) != NULL) { 1203 /* 1204 * Restore the pre-existing preempt value. 1205 */ 1206 scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 1207 if (scp->sc_preemptctl.sc_yield && 1208 scp->sc_preemptctl.sc_nopreempt == 0) { 1209 yield(); 1210 if (scp->sc_preemptctl.sc_yield) { 1211 /* 1212 * Shouldn't happen. This is either 1213 * a race condition or the thread 1214 * just entered the real-time class. 1215 */ 1216 yield(); 1217 scp->sc_preemptctl.sc_yield = 0; 1218 } 1219 } 1220 } 1221 exit_critical(self); 1222 } 1223 } 1224 1225 /* 1226 * If a call to preempt() would cause the current thread to yield or to 1227 * take deferred actions in exit_critical(), then unpark the specified 1228 * lwp so it can run while we delay. Return the original lwpid if the 1229 * unpark was not performed, else return zero. The tests are a repeat 1230 * of some of the tests in preempt(), above. This is a statistical 1231 * optimization solely for cond_sleep_queue(), below. 1232 */ 1233 static lwpid_t 1234 preempt_unpark(ulwp_t *self, lwpid_t lwpid) 1235 { 1236 volatile sc_shared_t *scp = self->ul_schedctl; 1237 1238 ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 1239 if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 1240 (self->ul_curplease && self->ul_critical == 1)) { 1241 (void) __lwp_unpark(lwpid); 1242 lwpid = 0; 1243 } 1244 return (lwpid); 1245 } 1246 1247 /* 1248 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1249 * If this fails, return EBUSY and let the caller deal with it. 1250 * If this succeeds, return 0 with mutex_owner set to curthread. 1251 */ 1252 static int 1253 mutex_trylock_adaptive(mutex_t *mp, int tryhard) 1254 { 1255 ulwp_t *self = curthread; 1256 int error = EBUSY; 1257 ulwp_t *ulwp; 1258 volatile sc_shared_t *scp; 1259 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1260 volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 1261 uint32_t new_lockword; 1262 int count = 0; 1263 int max_count; 1264 uint8_t max_spinners; 1265 1266 ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 1267 1268 if (MUTEX_OWNED(mp, self)) 1269 return (EBUSY); 1270 1271 enter_critical(self); 1272 1273 /* short-cut, not definitive (see below) */ 1274 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1275 ASSERT(mp->mutex_type & LOCK_ROBUST); 1276 error = ENOTRECOVERABLE; 1277 goto done; 1278 } 1279 1280 /* 1281 * Make one attempt to acquire the lock before 1282 * incurring the overhead of the spin loop. 1283 */ 1284 if (set_lock_byte(lockp) == 0) { 1285 *ownerp = (uintptr_t)self; 1286 error = 0; 1287 goto done; 1288 } 1289 if (!tryhard) 1290 goto done; 1291 if (ncpus == 0) 1292 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1293 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1294 max_spinners = ncpus - 1; 1295 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1296 if (max_count == 0) 1297 goto done; 1298 1299 /* 1300 * This spin loop is unfair to lwps that have already dropped into 1301 * the kernel to sleep. They will starve on a highly-contended mutex. 1302 * This is just too bad. The adaptive spin algorithm is intended 1303 * to allow programs with highly-contended locks (that is, broken 1304 * programs) to execute with reasonable speed despite their contention. 1305 * Being fair would reduce the speed of such programs and well-written 1306 * programs will not suffer in any case. 1307 */ 1308 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) 1309 goto done; 1310 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1311 for (count = 1; ; count++) { 1312 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1313 *ownerp = (uintptr_t)self; 1314 error = 0; 1315 break; 1316 } 1317 if (count == max_count) 1318 break; 1319 SMT_PAUSE(); 1320 /* 1321 * Stop spinning if the mutex owner is not running on 1322 * a processor; it will not drop the lock any time soon 1323 * and we would just be wasting time to keep spinning. 1324 * 1325 * Note that we are looking at another thread (ulwp_t) 1326 * without ensuring that the other thread does not exit. 1327 * The scheme relies on ulwp_t structures never being 1328 * deallocated by the library (the library employs a free 1329 * list of ulwp_t structs that are reused when new threads 1330 * are created) and on schedctl shared memory never being 1331 * deallocated once created via __schedctl(). 1332 * 1333 * Thus, the worst that can happen when the spinning thread 1334 * looks at the owner's schedctl data is that it is looking 1335 * at some other thread's schedctl data. This almost never 1336 * happens and is benign when it does. 1337 */ 1338 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1339 ((scp = ulwp->ul_schedctl) == NULL || 1340 scp->sc_state != SC_ONPROC)) 1341 break; 1342 } 1343 new_lockword = spinners_decr(&mp->mutex_lockword); 1344 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1345 /* 1346 * We haven't yet acquired the lock, the lock 1347 * is free, and there are no other spinners. 1348 * Make one final attempt to acquire the lock. 1349 * 1350 * This isn't strictly necessary since mutex_lock_queue() 1351 * (the next action this thread will take if it doesn't 1352 * acquire the lock here) makes one attempt to acquire 1353 * the lock before putting the thread to sleep. 1354 * 1355 * If the next action for this thread (on failure here) 1356 * were not to call mutex_lock_queue(), this would be 1357 * necessary for correctness, to avoid ending up with an 1358 * unheld mutex with waiters but no one to wake them up. 1359 */ 1360 if (set_lock_byte(lockp) == 0) { 1361 *ownerp = (uintptr_t)self; 1362 error = 0; 1363 } 1364 count++; 1365 } 1366 1367 done: 1368 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1369 ASSERT(mp->mutex_type & LOCK_ROBUST); 1370 /* 1371 * We shouldn't own the mutex. 1372 * Just clear the lock; everyone has already been waked up. 1373 */ 1374 *ownerp = 0; 1375 (void) clear_lockbyte(&mp->mutex_lockword); 1376 error = ENOTRECOVERABLE; 1377 } 1378 1379 exit_critical(self); 1380 1381 if (error) { 1382 if (count) { 1383 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1384 } 1385 if (error != EBUSY) { 1386 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1387 } 1388 } else { 1389 if (count) { 1390 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1391 } 1392 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1393 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1394 ASSERT(mp->mutex_type & LOCK_ROBUST); 1395 error = EOWNERDEAD; 1396 } 1397 } 1398 1399 return (error); 1400 } 1401 1402 /* 1403 * Same as mutex_trylock_adaptive(), except specifically for queue locks. 1404 * The owner field is not set here; the caller (spin_lock_set()) sets it. 1405 */ 1406 static int 1407 mutex_queuelock_adaptive(mutex_t *mp) 1408 { 1409 ulwp_t *ulwp; 1410 volatile sc_shared_t *scp; 1411 volatile uint8_t *lockp; 1412 volatile uint64_t *ownerp; 1413 int count = curthread->ul_queue_spin; 1414 1415 ASSERT(mp->mutex_type == USYNC_THREAD); 1416 1417 if (count == 0) 1418 return (EBUSY); 1419 1420 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1421 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1422 while (--count >= 0) { 1423 if (*lockp == 0 && set_lock_byte(lockp) == 0) 1424 return (0); 1425 SMT_PAUSE(); 1426 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1427 ((scp = ulwp->ul_schedctl) == NULL || 1428 scp->sc_state != SC_ONPROC)) 1429 break; 1430 } 1431 1432 return (EBUSY); 1433 } 1434 1435 /* 1436 * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1437 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1438 * If this fails, return EBUSY and let the caller deal with it. 1439 * If this succeeds, return 0 with mutex_owner set to curthread 1440 * and mutex_ownerpid set to the current pid. 1441 */ 1442 static int 1443 mutex_trylock_process(mutex_t *mp, int tryhard) 1444 { 1445 ulwp_t *self = curthread; 1446 uberdata_t *udp = self->ul_uberdata; 1447 int error = EBUSY; 1448 volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64; 1449 uint32_t new_lockword; 1450 int count = 0; 1451 int max_count; 1452 uint8_t max_spinners; 1453 1454 #if defined(__sparc) && !defined(_LP64) 1455 /* horrible hack, necessary only on 32-bit sparc */ 1456 int fix_alignment_problem = 1457 (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 1458 self->ul_misaligned && !(mp->mutex_type & LOCK_ROBUST)); 1459 #endif 1460 1461 ASSERT(mp->mutex_type & USYNC_PROCESS); 1462 1463 if (shared_mutex_held(mp)) 1464 return (EBUSY); 1465 1466 enter_critical(self); 1467 1468 /* short-cut, not definitive (see below) */ 1469 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1470 ASSERT(mp->mutex_type & LOCK_ROBUST); 1471 error = ENOTRECOVERABLE; 1472 goto done; 1473 } 1474 1475 /* 1476 * Make one attempt to acquire the lock before 1477 * incurring the overhead of the spin loop. 1478 */ 1479 #if defined(__sparc) && !defined(_LP64) 1480 /* horrible hack, necessary only on 32-bit sparc */ 1481 if (fix_alignment_problem) { 1482 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1483 mp->mutex_ownerpid = udp->pid; 1484 mp->mutex_owner = (uintptr_t)self; 1485 error = 0; 1486 goto done; 1487 } 1488 } else 1489 #endif 1490 if (set_lock_byte64(lockp, udp->pid) == 0) { 1491 mp->mutex_owner = (uintptr_t)self; 1492 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1493 error = 0; 1494 goto done; 1495 } 1496 if (!tryhard) 1497 goto done; 1498 if (ncpus == 0) 1499 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1500 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1501 max_spinners = ncpus - 1; 1502 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1503 if (max_count == 0) 1504 goto done; 1505 1506 /* 1507 * This is a process-shared mutex. 1508 * We cannot know if the owner is running on a processor. 1509 * We just spin and hope that it is on a processor. 1510 */ 1511 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) 1512 goto done; 1513 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1514 for (count = 1; ; count++) { 1515 #if defined(__sparc) && !defined(_LP64) 1516 /* horrible hack, necessary only on 32-bit sparc */ 1517 if (fix_alignment_problem) { 1518 if ((*lockp & LOCKMASK64) == 0 && 1519 set_lock_byte(&mp->mutex_lockw) == 0) { 1520 mp->mutex_ownerpid = udp->pid; 1521 mp->mutex_owner = (uintptr_t)self; 1522 error = 0; 1523 break; 1524 } 1525 } else 1526 #endif 1527 if ((*lockp & LOCKMASK64) == 0 && 1528 set_lock_byte64(lockp, udp->pid) == 0) { 1529 mp->mutex_owner = (uintptr_t)self; 1530 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1531 error = 0; 1532 break; 1533 } 1534 if (count == max_count) 1535 break; 1536 SMT_PAUSE(); 1537 } 1538 new_lockword = spinners_decr(&mp->mutex_lockword); 1539 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1540 /* 1541 * We haven't yet acquired the lock, the lock 1542 * is free, and there are no other spinners. 1543 * Make one final attempt to acquire the lock. 1544 * 1545 * This isn't strictly necessary since mutex_lock_kernel() 1546 * (the next action this thread will take if it doesn't 1547 * acquire the lock here) makes one attempt to acquire 1548 * the lock before putting the thread to sleep. 1549 * 1550 * If the next action for this thread (on failure here) 1551 * were not to call mutex_lock_kernel(), this would be 1552 * necessary for correctness, to avoid ending up with an 1553 * unheld mutex with waiters but no one to wake them up. 1554 */ 1555 #if defined(__sparc) && !defined(_LP64) 1556 /* horrible hack, necessary only on 32-bit sparc */ 1557 if (fix_alignment_problem) { 1558 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1559 mp->mutex_ownerpid = udp->pid; 1560 mp->mutex_owner = (uintptr_t)self; 1561 error = 0; 1562 } 1563 } else 1564 #endif 1565 if (set_lock_byte64(lockp, udp->pid) == 0) { 1566 mp->mutex_owner = (uintptr_t)self; 1567 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1568 error = 0; 1569 } 1570 count++; 1571 } 1572 1573 done: 1574 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1575 ASSERT(mp->mutex_type & LOCK_ROBUST); 1576 /* 1577 * We shouldn't own the mutex. 1578 * Just clear the lock; everyone has already been waked up. 1579 */ 1580 mp->mutex_owner = 0; 1581 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1582 (void) clear_lockbyte64(&mp->mutex_lockword64); 1583 error = ENOTRECOVERABLE; 1584 } 1585 1586 exit_critical(self); 1587 1588 if (error) { 1589 if (count) { 1590 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1591 } 1592 if (error != EBUSY) { 1593 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1594 } 1595 } else { 1596 if (count) { 1597 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1598 } 1599 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1600 if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1601 ASSERT(mp->mutex_type & LOCK_ROBUST); 1602 if (mp->mutex_flag & LOCK_OWNERDEAD) 1603 error = EOWNERDEAD; 1604 else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1605 error = ELOCKUNMAPPED; 1606 else 1607 error = EOWNERDEAD; 1608 } 1609 } 1610 1611 return (error); 1612 } 1613 1614 /* 1615 * Mutex wakeup code for releasing a USYNC_THREAD mutex. 1616 * Returns the lwpid of the thread that was dequeued, if any. 1617 * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 1618 * to wake up the specified lwp. 1619 */ 1620 static lwpid_t 1621 mutex_wakeup(mutex_t *mp) 1622 { 1623 lwpid_t lwpid = 0; 1624 int more; 1625 queue_head_t *qp; 1626 ulwp_t *ulwp; 1627 1628 /* 1629 * Dequeue a waiter from the sleep queue. Don't touch the mutex 1630 * waiters bit if no one was found on the queue because the mutex 1631 * might have been deallocated or reallocated for another purpose. 1632 */ 1633 qp = queue_lock(mp, MX); 1634 if ((ulwp = dequeue(qp, &more)) != NULL) { 1635 lwpid = ulwp->ul_lwpid; 1636 mp->mutex_waiters = more; 1637 } 1638 queue_unlock(qp); 1639 return (lwpid); 1640 } 1641 1642 /* 1643 * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1644 */ 1645 static void 1646 mutex_wakeup_all(mutex_t *mp) 1647 { 1648 queue_head_t *qp; 1649 queue_root_t *qrp; 1650 int nlwpid = 0; 1651 int maxlwps = MAXLWPS; 1652 ulwp_t *ulwp; 1653 lwpid_t buffer[MAXLWPS]; 1654 lwpid_t *lwpid = buffer; 1655 1656 /* 1657 * Walk the list of waiters and prepare to wake up all of them. 1658 * The waiters flag has already been cleared from the mutex. 1659 * 1660 * We keep track of lwpids that are to be unparked in lwpid[]. 1661 * __lwp_unpark_all() is called to unpark all of them after 1662 * they have been removed from the sleep queue and the sleep 1663 * queue lock has been dropped. If we run out of space in our 1664 * on-stack buffer, we need to allocate more but we can't call 1665 * lmalloc() because we are holding a queue lock when the overflow 1666 * occurs and lmalloc() acquires a lock. We can't use alloca() 1667 * either because the application may have allocated a small 1668 * stack and we don't want to overrun the stack. So we call 1669 * alloc_lwpids() to allocate a bigger buffer using the mmap() 1670 * system call directly since that path acquires no locks. 1671 */ 1672 qp = queue_lock(mp, MX); 1673 for (;;) { 1674 if ((qrp = qp->qh_root) == NULL || 1675 (ulwp = qrp->qr_head) == NULL) 1676 break; 1677 ASSERT(ulwp->ul_wchan == mp); 1678 queue_unlink(qp, &qrp->qr_head, NULL); 1679 ulwp->ul_sleepq = NULL; 1680 ulwp->ul_wchan = NULL; 1681 if (nlwpid == maxlwps) 1682 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1683 lwpid[nlwpid++] = ulwp->ul_lwpid; 1684 } 1685 1686 if (nlwpid == 0) { 1687 queue_unlock(qp); 1688 } else { 1689 mp->mutex_waiters = 0; 1690 no_preempt(curthread); 1691 queue_unlock(qp); 1692 if (nlwpid == 1) 1693 (void) __lwp_unpark(lwpid[0]); 1694 else 1695 (void) __lwp_unpark_all(lwpid, nlwpid); 1696 preempt(curthread); 1697 } 1698 1699 if (lwpid != buffer) 1700 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 1701 } 1702 1703 /* 1704 * Release a process-private mutex. 1705 * As an optimization, if there are waiters but there are also spinners 1706 * attempting to acquire the mutex, then don't bother waking up a waiter; 1707 * one of the spinners will acquire the mutex soon and it would be a waste 1708 * of resources to wake up some thread just to have it spin for a while 1709 * and then possibly go back to sleep. See mutex_trylock_adaptive(). 1710 */ 1711 static lwpid_t 1712 mutex_unlock_queue(mutex_t *mp, int release_all) 1713 { 1714 ulwp_t *self = curthread; 1715 lwpid_t lwpid = 0; 1716 uint32_t old_lockword; 1717 1718 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1719 sigoff(self); 1720 mp->mutex_owner = 0; 1721 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1722 if ((old_lockword & WAITERMASK) && 1723 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1724 no_preempt(self); /* ensure a prompt wakeup */ 1725 if (release_all) 1726 mutex_wakeup_all(mp); 1727 else 1728 lwpid = mutex_wakeup(mp); 1729 if (lwpid == 0) 1730 preempt(self); 1731 } 1732 sigon(self); 1733 return (lwpid); 1734 } 1735 1736 /* 1737 * Like mutex_unlock_queue(), but for process-shared mutexes. 1738 */ 1739 static void 1740 mutex_unlock_process(mutex_t *mp, int release_all) 1741 { 1742 ulwp_t *self = curthread; 1743 uint64_t old_lockword64; 1744 1745 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1746 sigoff(self); 1747 mp->mutex_owner = 0; 1748 #if defined(__sparc) && !defined(_LP64) 1749 /* horrible hack, necessary only on 32-bit sparc */ 1750 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 1751 self->ul_misaligned && !(mp->mutex_type & LOCK_ROBUST)) { 1752 uint32_t old_lockword; 1753 mp->mutex_ownerpid = 0; 1754 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1755 if ((old_lockword & WAITERMASK) && 1756 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1757 no_preempt(self); /* ensure a prompt wakeup */ 1758 (void) ___lwp_mutex_wakeup(mp, release_all); 1759 preempt(self); 1760 } 1761 sigon(self); 1762 return; 1763 } 1764 #endif 1765 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1766 old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64); 1767 if ((old_lockword64 & WAITERMASK64) && 1768 (release_all || (old_lockword64 & SPINNERMASK64) == 0)) { 1769 no_preempt(self); /* ensure a prompt wakeup */ 1770 (void) ___lwp_mutex_wakeup(mp, release_all); 1771 preempt(self); 1772 } 1773 sigon(self); 1774 } 1775 1776 void 1777 stall(void) 1778 { 1779 for (;;) 1780 (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 1781 } 1782 1783 /* 1784 * Acquire a USYNC_THREAD mutex via user-level sleep queues. 1785 * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1786 * If successful, returns with mutex_owner set correctly. 1787 */ 1788 int 1789 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 1790 timespec_t *tsp) 1791 { 1792 uberdata_t *udp = curthread->ul_uberdata; 1793 queue_head_t *qp; 1794 hrtime_t begin_sleep; 1795 int error = 0; 1796 1797 self->ul_sp = stkptr(); 1798 if (__td_event_report(self, TD_SLEEP, udp)) { 1799 self->ul_wchan = mp; 1800 self->ul_td_evbuf.eventnum = TD_SLEEP; 1801 self->ul_td_evbuf.eventdata = mp; 1802 tdb_event(TD_SLEEP, udp); 1803 } 1804 if (msp) { 1805 tdb_incr(msp->mutex_sleep); 1806 begin_sleep = gethrtime(); 1807 } 1808 1809 DTRACE_PROBE1(plockstat, mutex__block, mp); 1810 1811 /* 1812 * Put ourself on the sleep queue, and while we are 1813 * unable to grab the lock, go park in the kernel. 1814 * Take ourself off the sleep queue after we acquire the lock. 1815 * The waiter bit can be set/cleared only while holding the queue lock. 1816 */ 1817 qp = queue_lock(mp, MX); 1818 enqueue(qp, self, 0); 1819 mp->mutex_waiters = 1; 1820 for (;;) { 1821 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1822 mp->mutex_owner = (uintptr_t)self; 1823 mp->mutex_waiters = dequeue_self(qp); 1824 break; 1825 } 1826 set_parking_flag(self, 1); 1827 queue_unlock(qp); 1828 /* 1829 * __lwp_park() will return the residual time in tsp 1830 * if we are unparked before the timeout expires. 1831 */ 1832 error = __lwp_park(tsp, 0); 1833 set_parking_flag(self, 0); 1834 /* 1835 * We could have taken a signal or suspended ourself. 1836 * If we did, then we removed ourself from the queue. 1837 * Someone else may have removed us from the queue 1838 * as a consequence of mutex_unlock(). We may have 1839 * gotten a timeout from __lwp_park(). Or we may still 1840 * be on the queue and this is just a spurious wakeup. 1841 */ 1842 qp = queue_lock(mp, MX); 1843 if (self->ul_sleepq == NULL) { 1844 if (error) { 1845 mp->mutex_waiters = queue_waiter(qp)? 1 : 0; 1846 if (error != EINTR) 1847 break; 1848 error = 0; 1849 } 1850 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1851 mp->mutex_owner = (uintptr_t)self; 1852 break; 1853 } 1854 enqueue(qp, self, 0); 1855 mp->mutex_waiters = 1; 1856 } 1857 ASSERT(self->ul_sleepq == qp && 1858 self->ul_qtype == MX && 1859 self->ul_wchan == mp); 1860 if (error) { 1861 if (error != EINTR) { 1862 mp->mutex_waiters = dequeue_self(qp); 1863 break; 1864 } 1865 error = 0; 1866 } 1867 } 1868 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 1869 self->ul_wchan == NULL); 1870 self->ul_sp = 0; 1871 1872 ASSERT(error == 0 || error == EINVAL || error == ETIME); 1873 1874 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1875 ASSERT(mp->mutex_type & LOCK_ROBUST); 1876 /* 1877 * We shouldn't own the mutex. 1878 * Just clear the lock; everyone has already been waked up. 1879 */ 1880 mp->mutex_owner = 0; 1881 (void) clear_lockbyte(&mp->mutex_lockword); 1882 error = ENOTRECOVERABLE; 1883 } 1884 1885 queue_unlock(qp); 1886 1887 if (msp) 1888 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1889 1890 if (error) { 1891 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1892 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1893 } else { 1894 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1895 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1896 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1897 ASSERT(mp->mutex_type & LOCK_ROBUST); 1898 error = EOWNERDEAD; 1899 } 1900 } 1901 1902 return (error); 1903 } 1904 1905 static int 1906 mutex_recursion(mutex_t *mp, int mtype, int try) 1907 { 1908 ASSERT(mutex_held(mp)); 1909 ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1910 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1911 1912 if (mtype & LOCK_RECURSIVE) { 1913 if (mp->mutex_rcount == RECURSION_MAX) { 1914 DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1915 return (EAGAIN); 1916 } 1917 mp->mutex_rcount++; 1918 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1919 return (0); 1920 } 1921 if (try == MUTEX_LOCK) { 1922 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1923 return (EDEADLK); 1924 } 1925 return (EBUSY); 1926 } 1927 1928 /* 1929 * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1930 * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1931 * We use tdb_hash_lock here and in the synch object tracking code in 1932 * the tdb_agent.c file. There is no conflict between these two usages. 1933 */ 1934 void 1935 register_lock(mutex_t *mp) 1936 { 1937 uberdata_t *udp = curthread->ul_uberdata; 1938 uint_t hash = LOCK_HASH(mp); 1939 robust_t *rlp; 1940 robust_t *invalid; 1941 robust_t **rlpp; 1942 robust_t **table; 1943 1944 if ((table = udp->robustlocks) == NULL) { 1945 lmutex_lock(&udp->tdb_hash_lock); 1946 if ((table = udp->robustlocks) == NULL) { 1947 table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1948 membar_producer(); 1949 udp->robustlocks = table; 1950 } 1951 lmutex_unlock(&udp->tdb_hash_lock); 1952 } 1953 membar_consumer(); 1954 1955 /* 1956 * First search the registered table with no locks held. 1957 * This is safe because the table never shrinks 1958 * and we can only get a false negative. 1959 */ 1960 for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1961 if (rlp->robust_lock == mp) /* already registered */ 1962 return; 1963 } 1964 1965 /* 1966 * The lock was not found. 1967 * Repeat the operation with tdb_hash_lock held. 1968 */ 1969 lmutex_lock(&udp->tdb_hash_lock); 1970 1971 invalid = NULL; 1972 for (rlpp = &table[hash]; 1973 (rlp = *rlpp) != NULL; 1974 rlpp = &rlp->robust_next) { 1975 if (rlp->robust_lock == mp) { /* already registered */ 1976 lmutex_unlock(&udp->tdb_hash_lock); 1977 return; 1978 } 1979 /* remember the first invalid entry, if any */ 1980 if (rlp->robust_lock == INVALID_ADDR && invalid == NULL) 1981 invalid = rlp; 1982 } 1983 1984 /* 1985 * The lock has never been registered. 1986 * Add it to the table and register it now. 1987 */ 1988 if (invalid != NULL) { 1989 /* 1990 * Reuse the invalid entry we found above. 1991 * The linkages are still correct. 1992 */ 1993 invalid->robust_lock = mp; 1994 membar_producer(); 1995 } else { 1996 /* 1997 * Allocate a new entry and add it to 1998 * the hash table and to the global list. 1999 */ 2000 rlp = lmalloc(sizeof (*rlp)); 2001 rlp->robust_lock = mp; 2002 rlp->robust_next = NULL; 2003 rlp->robust_list = udp->robustlist; 2004 udp->robustlist = rlp; 2005 membar_producer(); 2006 *rlpp = rlp; 2007 } 2008 2009 lmutex_unlock(&udp->tdb_hash_lock); 2010 2011 (void) ___lwp_mutex_register(mp); 2012 } 2013 2014 /* 2015 * This is called from mmap(), munmap() and shmdt() to unregister 2016 * all robust locks contained in the mapping that is going away. 2017 * We don't delete the entries in the hash table, since the hash table 2018 * is constrained never to shrink; we just invalidate the addresses. 2019 */ 2020 void 2021 unregister_locks(caddr_t addr, size_t len) 2022 { 2023 static size_t pagesize = 0; 2024 uberdata_t *udp = curthread->ul_uberdata; 2025 robust_t *rlp; 2026 caddr_t eaddr; 2027 caddr_t maddr; 2028 2029 /* 2030 * Round up len to a multiple of pagesize. 2031 */ 2032 if (pagesize == 0) /* do this once */ 2033 pagesize = _sysconf(_SC_PAGESIZE); 2034 eaddr = addr + ((len + pagesize - 1) & -pagesize); 2035 2036 lmutex_lock(&udp->tdb_hash_lock); 2037 2038 /* 2039 * Do this by traversing the global list, not the hash table. 2040 * The hash table is large (32K buckets) and sparsely populated. 2041 * The global list contains all of the registered entries. 2042 */ 2043 for (rlp = udp->robustlist; rlp != NULL; rlp = rlp->robust_list) { 2044 maddr = (caddr_t)rlp->robust_lock; 2045 if (addr <= maddr && maddr < eaddr) 2046 rlp->robust_lock = INVALID_ADDR; 2047 } 2048 2049 lmutex_unlock(&udp->tdb_hash_lock); 2050 } 2051 2052 /* 2053 * This is called in the child of fork()/forkall() to start over 2054 * with a clean slate. (Each process must register its own locks.) 2055 * No locks are needed because all other threads are suspended or gone. 2056 */ 2057 void 2058 unregister_all_locks(void) 2059 { 2060 uberdata_t *udp = curthread->ul_uberdata; 2061 robust_t **table; 2062 robust_t *rlp; 2063 robust_t *next; 2064 2065 /* 2066 * Do this first, before calling lfree(). 2067 * lfree() may call munmap(), which calls unregister_locks(). 2068 */ 2069 table = udp->robustlocks; 2070 udp->robustlocks = NULL; 2071 rlp = udp->robustlist; 2072 udp->robustlist = NULL; 2073 2074 /* 2075 * As above, do this by traversing the global list, not the hash table. 2076 */ 2077 while (rlp != NULL) { 2078 next = rlp->robust_list; 2079 lfree(rlp, sizeof (*rlp)); 2080 rlp = next; 2081 } 2082 if (table != NULL) 2083 lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 2084 } 2085 2086 /* 2087 * Returns with mutex_owner set correctly. 2088 */ 2089 int 2090 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 2091 { 2092 ulwp_t *self = curthread; 2093 uberdata_t *udp = self->ul_uberdata; 2094 int mtype = mp->mutex_type; 2095 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2096 int error = 0; 2097 int noceil = try & MUTEX_NOCEIL; 2098 uint8_t ceil; 2099 int myprio; 2100 2101 try &= ~MUTEX_NOCEIL; 2102 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 2103 2104 if (!self->ul_schedctl_called) 2105 (void) setup_schedctl(); 2106 2107 if (msp && try == MUTEX_TRY) 2108 tdb_incr(msp->mutex_try); 2109 2110 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_held(mp)) 2111 return (mutex_recursion(mp, mtype, try)); 2112 2113 if (self->ul_error_detection && try == MUTEX_LOCK && 2114 tsp == NULL && mutex_held(mp)) 2115 lock_error(mp, "mutex_lock", NULL, NULL); 2116 2117 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2118 update_sched(self); 2119 if (self->ul_cid != self->ul_rtclassid) { 2120 DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM); 2121 return (EPERM); 2122 } 2123 ceil = mp->mutex_ceiling; 2124 myprio = self->ul_epri? self->ul_epri : self->ul_pri; 2125 if (myprio > ceil) { 2126 DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 2127 return (EINVAL); 2128 } 2129 if ((error = _ceil_mylist_add(mp)) != 0) { 2130 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 2131 return (error); 2132 } 2133 if (myprio < ceil) 2134 _ceil_prio_inherit(ceil); 2135 } 2136 2137 if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 2138 == (USYNC_PROCESS | LOCK_ROBUST)) 2139 register_lock(mp); 2140 2141 if (mtype & LOCK_PRIO_INHERIT) { 2142 /* go straight to the kernel */ 2143 if (try == MUTEX_TRY) 2144 error = mutex_trylock_kernel(mp); 2145 else /* MUTEX_LOCK */ 2146 error = mutex_lock_kernel(mp, tsp, msp); 2147 /* 2148 * The kernel never sets or clears the lock byte 2149 * for LOCK_PRIO_INHERIT mutexes. 2150 * Set it here for consistency. 2151 */ 2152 switch (error) { 2153 case 0: 2154 self->ul_pilocks++; 2155 mp->mutex_lockw = LOCKSET; 2156 break; 2157 case EOWNERDEAD: 2158 case ELOCKUNMAPPED: 2159 self->ul_pilocks++; 2160 mp->mutex_lockw = LOCKSET; 2161 /* FALLTHROUGH */ 2162 case ENOTRECOVERABLE: 2163 ASSERT(mtype & LOCK_ROBUST); 2164 break; 2165 case EDEADLK: 2166 if (try == MUTEX_TRY) { 2167 error = EBUSY; 2168 } else if (tsp != NULL) { /* simulate a timeout */ 2169 /* 2170 * Note: mutex_timedlock() never returns EINTR. 2171 */ 2172 timespec_t ts = *tsp; 2173 timespec_t rts; 2174 2175 while (__nanosleep(&ts, &rts) == EINTR) 2176 ts = rts; 2177 error = ETIME; 2178 } else { /* simulate a deadlock */ 2179 stall(); 2180 } 2181 break; 2182 } 2183 } else if (mtype & USYNC_PROCESS) { 2184 error = mutex_trylock_process(mp, try == MUTEX_LOCK); 2185 if (error == EBUSY && try == MUTEX_LOCK) 2186 error = mutex_lock_kernel(mp, tsp, msp); 2187 } else { /* USYNC_THREAD */ 2188 error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 2189 if (error == EBUSY && try == MUTEX_LOCK) 2190 error = mutex_lock_queue(self, msp, mp, tsp); 2191 } 2192 2193 switch (error) { 2194 case 0: 2195 case EOWNERDEAD: 2196 case ELOCKUNMAPPED: 2197 if (mtype & LOCK_ROBUST) 2198 remember_lock(mp); 2199 if (msp) 2200 record_begin_hold(msp); 2201 break; 2202 default: 2203 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2204 (void) _ceil_mylist_del(mp); 2205 if (myprio < ceil) 2206 _ceil_prio_waive(); 2207 } 2208 if (try == MUTEX_TRY) { 2209 if (msp) 2210 tdb_incr(msp->mutex_try_fail); 2211 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2212 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2213 tdb_event(TD_LOCK_TRY, udp); 2214 } 2215 } 2216 break; 2217 } 2218 2219 return (error); 2220 } 2221 2222 int 2223 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 2224 { 2225 ulwp_t *self = curthread; 2226 uberdata_t *udp = self->ul_uberdata; 2227 2228 /* 2229 * We know that USYNC_PROCESS is set in mtype and that 2230 * zero, one, or both of the flags LOCK_RECURSIVE and 2231 * LOCK_ERRORCHECK are set, and that no other flags are set. 2232 */ 2233 ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 2234 enter_critical(self); 2235 #if defined(__sparc) && !defined(_LP64) 2236 /* horrible hack, necessary only on 32-bit sparc */ 2237 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2238 self->ul_misaligned) { 2239 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2240 mp->mutex_ownerpid = udp->pid; 2241 mp->mutex_owner = (uintptr_t)self; 2242 exit_critical(self); 2243 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2244 return (0); 2245 } 2246 } else 2247 #endif 2248 if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) { 2249 mp->mutex_owner = (uintptr_t)self; 2250 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 2251 exit_critical(self); 2252 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2253 return (0); 2254 } 2255 exit_critical(self); 2256 2257 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 2258 return (mutex_recursion(mp, mtype, try)); 2259 2260 if (try == MUTEX_LOCK) { 2261 if (mutex_trylock_process(mp, 1) == 0) 2262 return (0); 2263 return (mutex_lock_kernel(mp, tsp, NULL)); 2264 } 2265 2266 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2267 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2268 tdb_event(TD_LOCK_TRY, udp); 2269 } 2270 return (EBUSY); 2271 } 2272 2273 static int 2274 mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 2275 { 2276 ulwp_t *self = curthread; 2277 int mtype = mp->mutex_type; 2278 uberflags_t *gflags; 2279 2280 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2281 self->ul_error_detection && self->ul_misaligned == 0) 2282 lock_error(mp, "mutex_lock", NULL, "mutex is misaligned"); 2283 2284 /* 2285 * Optimize the case of USYNC_THREAD, including 2286 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2287 * no error detection, no lock statistics, 2288 * and the process has only a single thread. 2289 * (Most likely a traditional single-threaded application.) 2290 */ 2291 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2292 self->ul_uberdata->uberflags.uf_all) == 0) { 2293 /* 2294 * Only one thread exists so we don't need an atomic operation. 2295 * We do, however, need to protect against signals. 2296 */ 2297 if (mp->mutex_lockw == 0) { 2298 sigoff(self); 2299 mp->mutex_lockw = LOCKSET; 2300 mp->mutex_owner = (uintptr_t)self; 2301 sigon(self); 2302 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2303 return (0); 2304 } 2305 if (mtype && MUTEX_OWNER(mp) == self) 2306 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2307 /* 2308 * We have reached a deadlock, probably because the 2309 * process is executing non-async-signal-safe code in 2310 * a signal handler and is attempting to acquire a lock 2311 * that it already owns. This is not surprising, given 2312 * bad programming practices over the years that has 2313 * resulted in applications calling printf() and such 2314 * in their signal handlers. Unless the user has told 2315 * us that the signal handlers are safe by setting: 2316 * export _THREAD_ASYNC_SAFE=1 2317 * we return EDEADLK rather than actually deadlocking. 2318 */ 2319 if (tsp == NULL && 2320 MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 2321 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 2322 return (EDEADLK); 2323 } 2324 } 2325 2326 /* 2327 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2328 * no error detection, and no lock statistics. 2329 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2330 */ 2331 if ((gflags = self->ul_schedctl_called) != NULL && 2332 (gflags->uf_trs_ted | 2333 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2334 if (mtype & USYNC_PROCESS) 2335 return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 2336 sigoff(self); 2337 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2338 mp->mutex_owner = (uintptr_t)self; 2339 sigon(self); 2340 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2341 return (0); 2342 } 2343 sigon(self); 2344 if (mtype && MUTEX_OWNER(mp) == self) 2345 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2346 if (mutex_trylock_adaptive(mp, 1) != 0) 2347 return (mutex_lock_queue(self, NULL, mp, tsp)); 2348 return (0); 2349 } 2350 2351 /* else do it the long way */ 2352 return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 2353 } 2354 2355 #pragma weak pthread_mutex_lock = mutex_lock 2356 #pragma weak _mutex_lock = mutex_lock 2357 int 2358 mutex_lock(mutex_t *mp) 2359 { 2360 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2361 return (mutex_lock_impl(mp, NULL)); 2362 } 2363 2364 int 2365 pthread_mutex_timedlock(pthread_mutex_t *_RESTRICT_KYWD mp, 2366 const struct timespec *_RESTRICT_KYWD abstime) 2367 { 2368 timespec_t tslocal; 2369 int error; 2370 2371 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2372 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 2373 error = mutex_lock_impl((mutex_t *)mp, &tslocal); 2374 if (error == ETIME) 2375 error = ETIMEDOUT; 2376 return (error); 2377 } 2378 2379 int 2380 pthread_mutex_reltimedlock_np(pthread_mutex_t *_RESTRICT_KYWD mp, 2381 const struct timespec *_RESTRICT_KYWD reltime) 2382 { 2383 timespec_t tslocal; 2384 int error; 2385 2386 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2387 tslocal = *reltime; 2388 error = mutex_lock_impl((mutex_t *)mp, &tslocal); 2389 if (error == ETIME) 2390 error = ETIMEDOUT; 2391 return (error); 2392 } 2393 2394 #pragma weak pthread_mutex_trylock = mutex_trylock 2395 int 2396 mutex_trylock(mutex_t *mp) 2397 { 2398 ulwp_t *self = curthread; 2399 uberdata_t *udp = self->ul_uberdata; 2400 int mtype = mp->mutex_type; 2401 uberflags_t *gflags; 2402 2403 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2404 2405 /* 2406 * Optimize the case of USYNC_THREAD, including 2407 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2408 * no error detection, no lock statistics, 2409 * and the process has only a single thread. 2410 * (Most likely a traditional single-threaded application.) 2411 */ 2412 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2413 udp->uberflags.uf_all) == 0) { 2414 /* 2415 * Only one thread exists so we don't need an atomic operation. 2416 * We do, however, need to protect against signals. 2417 */ 2418 if (mp->mutex_lockw == 0) { 2419 sigoff(self); 2420 mp->mutex_lockw = LOCKSET; 2421 mp->mutex_owner = (uintptr_t)self; 2422 sigon(self); 2423 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2424 return (0); 2425 } 2426 if (mtype && MUTEX_OWNER(mp) == self) 2427 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2428 return (EBUSY); 2429 } 2430 2431 /* 2432 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2433 * no error detection, and no lock statistics. 2434 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2435 */ 2436 if ((gflags = self->ul_schedctl_called) != NULL && 2437 (gflags->uf_trs_ted | 2438 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2439 if (mtype & USYNC_PROCESS) 2440 return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 2441 sigoff(self); 2442 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2443 mp->mutex_owner = (uintptr_t)self; 2444 sigon(self); 2445 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2446 return (0); 2447 } 2448 sigon(self); 2449 if (mtype && MUTEX_OWNER(mp) == self) 2450 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2451 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2452 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2453 tdb_event(TD_LOCK_TRY, udp); 2454 } 2455 return (EBUSY); 2456 } 2457 2458 /* else do it the long way */ 2459 return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 2460 } 2461 2462 int 2463 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 2464 { 2465 ulwp_t *self = curthread; 2466 uberdata_t *udp = self->ul_uberdata; 2467 int mtype = mp->mutex_type; 2468 tdb_mutex_stats_t *msp; 2469 int error = 0; 2470 int release_all; 2471 lwpid_t lwpid; 2472 2473 if ((mtype & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 2474 !mutex_held(mp)) 2475 return (EPERM); 2476 2477 if (self->ul_error_detection && !mutex_held(mp)) 2478 lock_error(mp, "mutex_unlock", NULL, NULL); 2479 2480 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2481 mp->mutex_rcount--; 2482 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2483 return (0); 2484 } 2485 2486 if ((msp = MUTEX_STATS(mp, udp)) != NULL) 2487 (void) record_hold_time(msp); 2488 2489 if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2490 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2491 ASSERT(mtype & LOCK_ROBUST); 2492 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2493 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2494 } 2495 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2496 2497 if (mtype & LOCK_PRIO_INHERIT) { 2498 no_preempt(self); 2499 mp->mutex_owner = 0; 2500 /* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */ 2501 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2502 mp->mutex_lockw = LOCKCLEAR; 2503 self->ul_pilocks--; 2504 error = ___lwp_mutex_unlock(mp); 2505 preempt(self); 2506 } else if (mtype & USYNC_PROCESS) { 2507 mutex_unlock_process(mp, release_all); 2508 } else { /* USYNC_THREAD */ 2509 if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 2510 (void) __lwp_unpark(lwpid); 2511 preempt(self); 2512 } 2513 } 2514 2515 if (mtype & LOCK_ROBUST) 2516 forget_lock(mp); 2517 2518 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2519 _ceil_prio_waive(); 2520 2521 return (error); 2522 } 2523 2524 #pragma weak pthread_mutex_unlock = mutex_unlock 2525 #pragma weak _mutex_unlock = mutex_unlock 2526 int 2527 mutex_unlock(mutex_t *mp) 2528 { 2529 ulwp_t *self = curthread; 2530 int mtype = mp->mutex_type; 2531 uberflags_t *gflags; 2532 lwpid_t lwpid; 2533 short el; 2534 2535 /* 2536 * Optimize the case of USYNC_THREAD, including 2537 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2538 * no error detection, no lock statistics, 2539 * and the process has only a single thread. 2540 * (Most likely a traditional single-threaded application.) 2541 */ 2542 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2543 self->ul_uberdata->uberflags.uf_all) == 0) { 2544 if (mtype) { 2545 /* 2546 * At this point we know that one or both of the 2547 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2548 */ 2549 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2550 return (EPERM); 2551 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2552 mp->mutex_rcount--; 2553 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2554 return (0); 2555 } 2556 } 2557 /* 2558 * Only one thread exists so we don't need an atomic operation. 2559 * Also, there can be no waiters. 2560 */ 2561 sigoff(self); 2562 mp->mutex_owner = 0; 2563 mp->mutex_lockword = 0; 2564 sigon(self); 2565 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2566 return (0); 2567 } 2568 2569 /* 2570 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2571 * no error detection, and no lock statistics. 2572 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2573 */ 2574 if ((gflags = self->ul_schedctl_called) != NULL) { 2575 if (((el = gflags->uf_trs_ted) | mtype) == 0) { 2576 fast_unlock: 2577 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2578 (void) __lwp_unpark(lwpid); 2579 preempt(self); 2580 } 2581 return (0); 2582 } 2583 if (el) /* error detection or lock statistics */ 2584 goto slow_unlock; 2585 if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2586 /* 2587 * At this point we know that one or both of the 2588 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2589 */ 2590 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2591 return (EPERM); 2592 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2593 mp->mutex_rcount--; 2594 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2595 return (0); 2596 } 2597 goto fast_unlock; 2598 } 2599 if ((mtype & 2600 ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2601 /* 2602 * At this point we know that zero, one, or both of the 2603 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 2604 * that the USYNC_PROCESS flag is set. 2605 */ 2606 if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 2607 return (EPERM); 2608 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2609 mp->mutex_rcount--; 2610 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2611 return (0); 2612 } 2613 mutex_unlock_process(mp, 0); 2614 return (0); 2615 } 2616 } 2617 2618 /* else do it the long way */ 2619 slow_unlock: 2620 return (mutex_unlock_internal(mp, 0)); 2621 } 2622 2623 /* 2624 * Internally to the library, almost all mutex lock/unlock actions 2625 * go through these lmutex_ functions, to protect critical regions. 2626 * We replicate a bit of code from mutex_lock() and mutex_unlock() 2627 * to make these functions faster since we know that the mutex type 2628 * of all internal locks is USYNC_THREAD. We also know that internal 2629 * locking can never fail, so we panic if it does. 2630 */ 2631 void 2632 lmutex_lock(mutex_t *mp) 2633 { 2634 ulwp_t *self = curthread; 2635 uberdata_t *udp = self->ul_uberdata; 2636 2637 ASSERT(mp->mutex_type == USYNC_THREAD); 2638 2639 enter_critical(self); 2640 /* 2641 * Optimize the case of no lock statistics and only a single thread. 2642 * (Most likely a traditional single-threaded application.) 2643 */ 2644 if (udp->uberflags.uf_all == 0) { 2645 /* 2646 * Only one thread exists; the mutex must be free. 2647 */ 2648 ASSERT(mp->mutex_lockw == 0); 2649 mp->mutex_lockw = LOCKSET; 2650 mp->mutex_owner = (uintptr_t)self; 2651 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2652 } else { 2653 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2654 2655 if (!self->ul_schedctl_called) 2656 (void) setup_schedctl(); 2657 2658 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2659 mp->mutex_owner = (uintptr_t)self; 2660 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2661 } else if (mutex_trylock_adaptive(mp, 1) != 0) { 2662 (void) mutex_lock_queue(self, msp, mp, NULL); 2663 } 2664 2665 if (msp) 2666 record_begin_hold(msp); 2667 } 2668 } 2669 2670 void 2671 lmutex_unlock(mutex_t *mp) 2672 { 2673 ulwp_t *self = curthread; 2674 uberdata_t *udp = self->ul_uberdata; 2675 2676 ASSERT(mp->mutex_type == USYNC_THREAD); 2677 2678 /* 2679 * Optimize the case of no lock statistics and only a single thread. 2680 * (Most likely a traditional single-threaded application.) 2681 */ 2682 if (udp->uberflags.uf_all == 0) { 2683 /* 2684 * Only one thread exists so there can be no waiters. 2685 */ 2686 mp->mutex_owner = 0; 2687 mp->mutex_lockword = 0; 2688 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2689 } else { 2690 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2691 lwpid_t lwpid; 2692 2693 if (msp) 2694 (void) record_hold_time(msp); 2695 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2696 (void) __lwp_unpark(lwpid); 2697 preempt(self); 2698 } 2699 } 2700 exit_critical(self); 2701 } 2702 2703 /* 2704 * For specialized code in libc, like the asynchronous i/o code, 2705 * the following sig_*() locking primitives are used in order 2706 * to make the code asynchronous signal safe. Signals are 2707 * deferred while locks acquired by these functions are held. 2708 */ 2709 void 2710 sig_mutex_lock(mutex_t *mp) 2711 { 2712 sigoff(curthread); 2713 (void) mutex_lock(mp); 2714 } 2715 2716 void 2717 sig_mutex_unlock(mutex_t *mp) 2718 { 2719 (void) mutex_unlock(mp); 2720 sigon(curthread); 2721 } 2722 2723 int 2724 sig_mutex_trylock(mutex_t *mp) 2725 { 2726 int error; 2727 2728 sigoff(curthread); 2729 if ((error = mutex_trylock(mp)) != 0) 2730 sigon(curthread); 2731 return (error); 2732 } 2733 2734 /* 2735 * sig_cond_wait() is a cancellation point. 2736 */ 2737 int 2738 sig_cond_wait(cond_t *cv, mutex_t *mp) 2739 { 2740 int error; 2741 2742 ASSERT(curthread->ul_sigdefer != 0); 2743 pthread_testcancel(); 2744 error = __cond_wait(cv, mp); 2745 if (error == EINTR && curthread->ul_cursig) { 2746 sig_mutex_unlock(mp); 2747 /* take the deferred signal here */ 2748 sig_mutex_lock(mp); 2749 } 2750 pthread_testcancel(); 2751 return (error); 2752 } 2753 2754 /* 2755 * sig_cond_reltimedwait() is a cancellation point. 2756 */ 2757 int 2758 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2759 { 2760 int error; 2761 2762 ASSERT(curthread->ul_sigdefer != 0); 2763 pthread_testcancel(); 2764 error = __cond_reltimedwait(cv, mp, ts); 2765 if (error == EINTR && curthread->ul_cursig) { 2766 sig_mutex_unlock(mp); 2767 /* take the deferred signal here */ 2768 sig_mutex_lock(mp); 2769 } 2770 pthread_testcancel(); 2771 return (error); 2772 } 2773 2774 /* 2775 * For specialized code in libc, like the stdio code. 2776 * the following cancel_safe_*() locking primitives are used in 2777 * order to make the code cancellation-safe. Cancellation is 2778 * deferred while locks acquired by these functions are held. 2779 */ 2780 void 2781 cancel_safe_mutex_lock(mutex_t *mp) 2782 { 2783 (void) mutex_lock(mp); 2784 curthread->ul_libc_locks++; 2785 } 2786 2787 int 2788 cancel_safe_mutex_trylock(mutex_t *mp) 2789 { 2790 int error; 2791 2792 if ((error = mutex_trylock(mp)) == 0) 2793 curthread->ul_libc_locks++; 2794 return (error); 2795 } 2796 2797 void 2798 cancel_safe_mutex_unlock(mutex_t *mp) 2799 { 2800 ulwp_t *self = curthread; 2801 2802 ASSERT(self->ul_libc_locks != 0); 2803 2804 (void) mutex_unlock(mp); 2805 2806 /* 2807 * Decrement the count of locks held by cancel_safe_mutex_lock(). 2808 * If we are then in a position to terminate cleanly and 2809 * if there is a pending cancellation and cancellation 2810 * is not disabled and we received EINTR from a recent 2811 * system call then perform the cancellation action now. 2812 */ 2813 if (--self->ul_libc_locks == 0 && 2814 !(self->ul_vfork | self->ul_nocancel | 2815 self->ul_critical | self->ul_sigdefer) && 2816 cancel_active()) 2817 pthread_exit(PTHREAD_CANCELED); 2818 } 2819 2820 static int 2821 shared_mutex_held(mutex_t *mparg) 2822 { 2823 /* 2824 * The 'volatile' is necessary to make sure the compiler doesn't 2825 * reorder the tests of the various components of the mutex. 2826 * They must be tested in this order: 2827 * mutex_lockw 2828 * mutex_owner 2829 * mutex_ownerpid 2830 * This relies on the fact that everywhere mutex_lockw is cleared, 2831 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2832 * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2833 * and mutex_ownerpid are set after mutex_lockw is set, and that 2834 * mutex_lockw is set or cleared with a memory barrier. 2835 */ 2836 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2837 ulwp_t *self = curthread; 2838 uberdata_t *udp = self->ul_uberdata; 2839 2840 return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 2841 } 2842 2843 #pragma weak _mutex_held = mutex_held 2844 int 2845 mutex_held(mutex_t *mparg) 2846 { 2847 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2848 2849 if (mparg->mutex_type & USYNC_PROCESS) 2850 return (shared_mutex_held(mparg)); 2851 return (MUTEX_OWNED(mp, curthread)); 2852 } 2853 2854 #pragma weak pthread_mutex_destroy = mutex_destroy 2855 #pragma weak _mutex_destroy = mutex_destroy 2856 int 2857 mutex_destroy(mutex_t *mp) 2858 { 2859 if (mp->mutex_type & USYNC_PROCESS) 2860 forget_lock(mp); 2861 (void) memset(mp, 0, sizeof (*mp)); 2862 tdb_sync_obj_deregister(mp); 2863 return (0); 2864 } 2865 2866 #pragma weak pthread_mutex_consistent_np = mutex_consistent 2867 #pragma weak pthread_mutex_consistent = mutex_consistent 2868 int 2869 mutex_consistent(mutex_t *mp) 2870 { 2871 /* 2872 * Do this only for an inconsistent, initialized robust lock 2873 * that we hold. For all other cases, return EINVAL. 2874 */ 2875 if (mutex_held(mp) && 2876 (mp->mutex_type & LOCK_ROBUST) && 2877 (mp->mutex_flag & LOCK_INITED) && 2878 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2879 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2880 mp->mutex_rcount = 0; 2881 return (0); 2882 } 2883 return (EINVAL); 2884 } 2885 2886 /* 2887 * Spin locks are separate from ordinary mutexes, 2888 * but we use the same data structure for them. 2889 */ 2890 2891 int 2892 pthread_spin_init(pthread_spinlock_t *lock, int pshared) 2893 { 2894 mutex_t *mp = (mutex_t *)lock; 2895 2896 (void) memset(mp, 0, sizeof (*mp)); 2897 if (pshared == PTHREAD_PROCESS_SHARED) 2898 mp->mutex_type = USYNC_PROCESS; 2899 else 2900 mp->mutex_type = USYNC_THREAD; 2901 mp->mutex_flag = LOCK_INITED; 2902 mp->mutex_magic = MUTEX_MAGIC; 2903 2904 /* 2905 * This should be at the beginning of the function, 2906 * but for the sake of old broken applications that 2907 * do not have proper alignment for their mutexes 2908 * (and don't check the return code from pthread_spin_init), 2909 * we put it here, after initializing the mutex regardless. 2910 */ 2911 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2912 curthread->ul_misaligned == 0) 2913 return (EINVAL); 2914 2915 return (0); 2916 } 2917 2918 int 2919 pthread_spin_destroy(pthread_spinlock_t *lock) 2920 { 2921 (void) memset(lock, 0, sizeof (*lock)); 2922 return (0); 2923 } 2924 2925 int 2926 pthread_spin_trylock(pthread_spinlock_t *lock) 2927 { 2928 mutex_t *mp = (mutex_t *)lock; 2929 ulwp_t *self = curthread; 2930 int error = 0; 2931 2932 no_preempt(self); 2933 if (set_lock_byte(&mp->mutex_lockw) != 0) 2934 error = EBUSY; 2935 else { 2936 mp->mutex_owner = (uintptr_t)self; 2937 if (mp->mutex_type == USYNC_PROCESS) 2938 mp->mutex_ownerpid = self->ul_uberdata->pid; 2939 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2940 } 2941 preempt(self); 2942 return (error); 2943 } 2944 2945 int 2946 pthread_spin_lock(pthread_spinlock_t *lock) 2947 { 2948 mutex_t *mp = (mutex_t *)lock; 2949 ulwp_t *self = curthread; 2950 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2951 int count = 0; 2952 2953 ASSERT(!self->ul_critical || self->ul_bindflags); 2954 2955 DTRACE_PROBE1(plockstat, mutex__spin, mp); 2956 2957 /* 2958 * We don't care whether the owner is running on a processor. 2959 * We just spin because that's what this interface requires. 2960 */ 2961 for (;;) { 2962 if (*lockp == 0) { /* lock byte appears to be clear */ 2963 no_preempt(self); 2964 if (set_lock_byte(lockp) == 0) 2965 break; 2966 preempt(self); 2967 } 2968 if (count < INT_MAX) 2969 count++; 2970 SMT_PAUSE(); 2971 } 2972 mp->mutex_owner = (uintptr_t)self; 2973 if (mp->mutex_type == USYNC_PROCESS) 2974 mp->mutex_ownerpid = self->ul_uberdata->pid; 2975 preempt(self); 2976 if (count) { 2977 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 2978 } 2979 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 2980 return (0); 2981 } 2982 2983 int 2984 pthread_spin_unlock(pthread_spinlock_t *lock) 2985 { 2986 mutex_t *mp = (mutex_t *)lock; 2987 ulwp_t *self = curthread; 2988 2989 no_preempt(self); 2990 mp->mutex_owner = 0; 2991 mp->mutex_ownerpid = 0; 2992 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2993 (void) atomic_swap_32(&mp->mutex_lockword, 0); 2994 preempt(self); 2995 return (0); 2996 } 2997 2998 #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 2999 3000 /* 3001 * Find/allocate an entry for 'lock' in our array of held locks. 3002 */ 3003 static mutex_t ** 3004 find_lock_entry(mutex_t *lock) 3005 { 3006 ulwp_t *self = curthread; 3007 mutex_t **remembered = NULL; 3008 mutex_t **lockptr; 3009 uint_t nlocks; 3010 3011 if ((nlocks = self->ul_heldlockcnt) != 0) 3012 lockptr = self->ul_heldlocks.array; 3013 else { 3014 nlocks = 1; 3015 lockptr = &self->ul_heldlocks.single; 3016 } 3017 3018 for (; nlocks; nlocks--, lockptr++) { 3019 if (*lockptr == lock) 3020 return (lockptr); 3021 if (*lockptr == NULL && remembered == NULL) 3022 remembered = lockptr; 3023 } 3024 if (remembered != NULL) { 3025 *remembered = lock; 3026 return (remembered); 3027 } 3028 3029 /* 3030 * No entry available. Allocate more space, converting 3031 * the single entry into an array of entries if necessary. 3032 */ 3033 if ((nlocks = self->ul_heldlockcnt) == 0) { 3034 /* 3035 * Initial allocation of the array. 3036 * Convert the single entry into an array. 3037 */ 3038 self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 3039 lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 3040 /* 3041 * The single entry becomes the first entry in the array. 3042 */ 3043 *lockptr = self->ul_heldlocks.single; 3044 self->ul_heldlocks.array = lockptr; 3045 /* 3046 * Return the next available entry in the array. 3047 */ 3048 *++lockptr = lock; 3049 return (lockptr); 3050 } 3051 /* 3052 * Reallocate the array, double the size each time. 3053 */ 3054 lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 3055 (void) memcpy(lockptr, self->ul_heldlocks.array, 3056 nlocks * sizeof (mutex_t *)); 3057 lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 3058 self->ul_heldlocks.array = lockptr; 3059 self->ul_heldlockcnt *= 2; 3060 /* 3061 * Return the next available entry in the newly allocated array. 3062 */ 3063 *(lockptr += nlocks) = lock; 3064 return (lockptr); 3065 } 3066 3067 /* 3068 * Insert 'lock' into our list of held locks. 3069 * Currently only used for LOCK_ROBUST mutexes. 3070 */ 3071 void 3072 remember_lock(mutex_t *lock) 3073 { 3074 (void) find_lock_entry(lock); 3075 } 3076 3077 /* 3078 * Remove 'lock' from our list of held locks. 3079 * Currently only used for LOCK_ROBUST mutexes. 3080 */ 3081 void 3082 forget_lock(mutex_t *lock) 3083 { 3084 *find_lock_entry(lock) = NULL; 3085 } 3086 3087 /* 3088 * Free the array of held locks. 3089 */ 3090 void 3091 heldlock_free(ulwp_t *ulwp) 3092 { 3093 uint_t nlocks; 3094 3095 if ((nlocks = ulwp->ul_heldlockcnt) != 0) 3096 lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 3097 ulwp->ul_heldlockcnt = 0; 3098 ulwp->ul_heldlocks.array = NULL; 3099 } 3100 3101 /* 3102 * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 3103 * Called from _thrp_exit() to deal with abandoned locks. 3104 */ 3105 void 3106 heldlock_exit(void) 3107 { 3108 ulwp_t *self = curthread; 3109 mutex_t **lockptr; 3110 uint_t nlocks; 3111 mutex_t *mp; 3112 3113 if ((nlocks = self->ul_heldlockcnt) != 0) 3114 lockptr = self->ul_heldlocks.array; 3115 else { 3116 nlocks = 1; 3117 lockptr = &self->ul_heldlocks.single; 3118 } 3119 3120 for (; nlocks; nlocks--, lockptr++) { 3121 /* 3122 * The kernel takes care of transitioning held 3123 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 3124 * We avoid that case here. 3125 */ 3126 if ((mp = *lockptr) != NULL && 3127 mutex_held(mp) && 3128 (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 3129 LOCK_ROBUST) { 3130 mp->mutex_rcount = 0; 3131 if (!(mp->mutex_flag & LOCK_UNMAPPED)) 3132 mp->mutex_flag |= LOCK_OWNERDEAD; 3133 (void) mutex_unlock_internal(mp, 1); 3134 } 3135 } 3136 3137 heldlock_free(self); 3138 } 3139 3140 #pragma weak _cond_init = cond_init 3141 /* ARGSUSED2 */ 3142 int 3143 cond_init(cond_t *cvp, int type, void *arg) 3144 { 3145 if (type != USYNC_THREAD && type != USYNC_PROCESS) 3146 return (EINVAL); 3147 (void) memset(cvp, 0, sizeof (*cvp)); 3148 cvp->cond_type = (uint16_t)type; 3149 cvp->cond_magic = COND_MAGIC; 3150 3151 /* 3152 * This should be at the beginning of the function, 3153 * but for the sake of old broken applications that 3154 * do not have proper alignment for their condvars 3155 * (and don't check the return code from cond_init), 3156 * we put it here, after initializing the condvar regardless. 3157 */ 3158 if (((uintptr_t)cvp & (_LONG_LONG_ALIGNMENT - 1)) && 3159 curthread->ul_misaligned == 0) 3160 return (EINVAL); 3161 3162 return (0); 3163 } 3164 3165 /* 3166 * cond_sleep_queue(): utility function for cond_wait_queue(). 3167 * 3168 * Go to sleep on a condvar sleep queue, expect to be waked up 3169 * by someone calling cond_signal() or cond_broadcast() or due 3170 * to receiving a UNIX signal or being cancelled, or just simply 3171 * due to a spurious wakeup (like someome calling forkall()). 3172 * 3173 * The associated mutex is *not* reacquired before returning. 3174 * That must be done by the caller of cond_sleep_queue(). 3175 */ 3176 static int 3177 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3178 { 3179 ulwp_t *self = curthread; 3180 queue_head_t *qp; 3181 queue_head_t *mqp; 3182 lwpid_t lwpid; 3183 int signalled; 3184 int error; 3185 int cv_wake; 3186 int release_all; 3187 3188 /* 3189 * Put ourself on the CV sleep queue, unlock the mutex, then 3190 * park ourself and unpark a candidate lwp to grab the mutex. 3191 * We must go onto the CV sleep queue before dropping the 3192 * mutex in order to guarantee atomicity of the operation. 3193 */ 3194 self->ul_sp = stkptr(); 3195 qp = queue_lock(cvp, CV); 3196 enqueue(qp, self, 0); 3197 cvp->cond_waiters_user = 1; 3198 self->ul_cvmutex = mp; 3199 self->ul_cv_wake = cv_wake = (tsp != NULL); 3200 self->ul_signalled = 0; 3201 if (mp->mutex_flag & LOCK_OWNERDEAD) { 3202 mp->mutex_flag &= ~LOCK_OWNERDEAD; 3203 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 3204 } 3205 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 3206 lwpid = mutex_unlock_queue(mp, release_all); 3207 for (;;) { 3208 set_parking_flag(self, 1); 3209 queue_unlock(qp); 3210 if (lwpid != 0) { 3211 lwpid = preempt_unpark(self, lwpid); 3212 preempt(self); 3213 } 3214 /* 3215 * We may have a deferred signal present, 3216 * in which case we should return EINTR. 3217 * Also, we may have received a SIGCANCEL; if so 3218 * and we are cancelable we should return EINTR. 3219 * We force an immediate EINTR return from 3220 * __lwp_park() by turning our parking flag off. 3221 */ 3222 if (self->ul_cursig != 0 || 3223 (self->ul_cancelable && self->ul_cancel_pending)) 3224 set_parking_flag(self, 0); 3225 /* 3226 * __lwp_park() will return the residual time in tsp 3227 * if we are unparked before the timeout expires. 3228 */ 3229 error = __lwp_park(tsp, lwpid); 3230 set_parking_flag(self, 0); 3231 lwpid = 0; /* unpark the other lwp only once */ 3232 /* 3233 * We were waked up by cond_signal(), cond_broadcast(), 3234 * by an interrupt or timeout (EINTR or ETIME), 3235 * or we may just have gotten a spurious wakeup. 3236 */ 3237 qp = queue_lock(cvp, CV); 3238 if (!cv_wake) 3239 mqp = queue_lock(mp, MX); 3240 if (self->ul_sleepq == NULL) 3241 break; 3242 /* 3243 * We are on either the condvar sleep queue or the 3244 * mutex sleep queue. Break out of the sleep if we 3245 * were interrupted or we timed out (EINTR or ETIME). 3246 * Else this is a spurious wakeup; continue the loop. 3247 */ 3248 if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */ 3249 if (error) { 3250 mp->mutex_waiters = dequeue_self(mqp); 3251 break; 3252 } 3253 tsp = NULL; /* no more timeout */ 3254 } else if (self->ul_sleepq == qp) { /* condvar queue */ 3255 if (error) { 3256 cvp->cond_waiters_user = dequeue_self(qp); 3257 break; 3258 } 3259 /* 3260 * Else a spurious wakeup on the condvar queue. 3261 * __lwp_park() has already adjusted the timeout. 3262 */ 3263 } else { 3264 thr_panic("cond_sleep_queue(): thread not on queue"); 3265 } 3266 if (!cv_wake) 3267 queue_unlock(mqp); 3268 } 3269 3270 self->ul_sp = 0; 3271 self->ul_cv_wake = 0; 3272 ASSERT(self->ul_cvmutex == NULL); 3273 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 3274 self->ul_wchan == NULL); 3275 3276 signalled = self->ul_signalled; 3277 self->ul_signalled = 0; 3278 queue_unlock(qp); 3279 if (!cv_wake) 3280 queue_unlock(mqp); 3281 3282 /* 3283 * If we were concurrently cond_signal()d and any of: 3284 * received a UNIX signal, were cancelled, or got a timeout, 3285 * then perform another cond_signal() to avoid consuming it. 3286 */ 3287 if (error && signalled) 3288 (void) cond_signal(cvp); 3289 3290 return (error); 3291 } 3292 3293 static void 3294 cond_wait_check_alignment(cond_t *cvp, mutex_t *mp) 3295 { 3296 if ((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) 3297 lock_error(mp, "cond_wait", cvp, "mutex is misaligned"); 3298 if ((uintptr_t)cvp & (_LONG_LONG_ALIGNMENT - 1)) 3299 lock_error(mp, "cond_wait", cvp, "condvar is misaligned"); 3300 } 3301 3302 int 3303 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3304 { 3305 ulwp_t *self = curthread; 3306 int error; 3307 int merror; 3308 3309 if (self->ul_error_detection && self->ul_misaligned == 0) 3310 cond_wait_check_alignment(cvp, mp); 3311 3312 /* 3313 * The old thread library was programmed to defer signals 3314 * while in cond_wait() so that the associated mutex would 3315 * be guaranteed to be held when the application signal 3316 * handler was invoked. 3317 * 3318 * We do not behave this way by default; the state of the 3319 * associated mutex in the signal handler is undefined. 3320 * 3321 * To accommodate applications that depend on the old 3322 * behavior, the _THREAD_COND_WAIT_DEFER environment 3323 * variable can be set to 1 and we will behave in the 3324 * old way with respect to cond_wait(). 3325 */ 3326 if (self->ul_cond_wait_defer) 3327 sigoff(self); 3328 3329 error = cond_sleep_queue(cvp, mp, tsp); 3330 3331 /* 3332 * Reacquire the mutex. 3333 */ 3334 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3335 error = merror; 3336 3337 /* 3338 * Take any deferred signal now, after we have reacquired the mutex. 3339 */ 3340 if (self->ul_cond_wait_defer) 3341 sigon(self); 3342 3343 return (error); 3344 } 3345 3346 /* 3347 * cond_sleep_kernel(): utility function for cond_wait_kernel(). 3348 * See the comment ahead of cond_sleep_queue(), above. 3349 */ 3350 static int 3351 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3352 { 3353 int mtype = mp->mutex_type; 3354 ulwp_t *self = curthread; 3355 int error; 3356 3357 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 3358 _ceil_prio_waive(); 3359 3360 self->ul_sp = stkptr(); 3361 self->ul_wchan = cvp; 3362 sigoff(self); 3363 mp->mutex_owner = 0; 3364 /* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */ 3365 if (mtype & LOCK_PRIO_INHERIT) { 3366 mp->mutex_lockw = LOCKCLEAR; 3367 self->ul_pilocks--; 3368 } 3369 /* 3370 * ___lwp_cond_wait() returns immediately with EINTR if 3371 * set_parking_flag(self,0) is called on this lwp before it 3372 * goes to sleep in the kernel. sigacthandler() calls this 3373 * when a deferred signal is noted. This assures that we don't 3374 * get stuck in ___lwp_cond_wait() with all signals blocked 3375 * due to taking a deferred signal before going to sleep. 3376 */ 3377 set_parking_flag(self, 1); 3378 if (self->ul_cursig != 0 || 3379 (self->ul_cancelable && self->ul_cancel_pending)) 3380 set_parking_flag(self, 0); 3381 error = ___lwp_cond_wait(cvp, mp, tsp, 1); 3382 set_parking_flag(self, 0); 3383 sigon(self); 3384 self->ul_sp = 0; 3385 self->ul_wchan = NULL; 3386 return (error); 3387 } 3388 3389 int 3390 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3391 { 3392 ulwp_t *self = curthread; 3393 int error; 3394 int merror; 3395 3396 if (self->ul_error_detection && self->ul_misaligned == 0) 3397 cond_wait_check_alignment(cvp, mp); 3398 3399 /* 3400 * See the large comment in cond_wait_queue(), above. 3401 */ 3402 if (self->ul_cond_wait_defer) 3403 sigoff(self); 3404 3405 error = cond_sleep_kernel(cvp, mp, tsp); 3406 3407 /* 3408 * Override the return code from ___lwp_cond_wait() 3409 * with any non-zero return code from mutex_lock(). 3410 * This addresses robust lock failures in particular; 3411 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 3412 * errors in order to take corrective action. 3413 */ 3414 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3415 error = merror; 3416 3417 /* 3418 * Take any deferred signal now, after we have reacquired the mutex. 3419 */ 3420 if (self->ul_cond_wait_defer) 3421 sigon(self); 3422 3423 return (error); 3424 } 3425 3426 /* 3427 * Common code for cond_wait() and cond_timedwait() 3428 */ 3429 int 3430 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3431 { 3432 int mtype = mp->mutex_type; 3433 hrtime_t begin_sleep = 0; 3434 ulwp_t *self = curthread; 3435 uberdata_t *udp = self->ul_uberdata; 3436 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3437 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 3438 uint8_t rcount; 3439 int error = 0; 3440 3441 /* 3442 * The SUSV3 Posix spec for pthread_cond_timedwait() states: 3443 * Except in the case of [ETIMEDOUT], all these error checks 3444 * shall act as if they were performed immediately at the 3445 * beginning of processing for the function and shall cause 3446 * an error return, in effect, prior to modifying the state 3447 * of the mutex specified by mutex or the condition variable 3448 * specified by cond. 3449 * Therefore, we must return EINVAL now if the timout is invalid. 3450 */ 3451 if (tsp != NULL && 3452 (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 3453 return (EINVAL); 3454 3455 if (__td_event_report(self, TD_SLEEP, udp)) { 3456 self->ul_sp = stkptr(); 3457 self->ul_wchan = cvp; 3458 self->ul_td_evbuf.eventnum = TD_SLEEP; 3459 self->ul_td_evbuf.eventdata = cvp; 3460 tdb_event(TD_SLEEP, udp); 3461 self->ul_sp = 0; 3462 } 3463 if (csp) { 3464 if (tsp) 3465 tdb_incr(csp->cond_timedwait); 3466 else 3467 tdb_incr(csp->cond_wait); 3468 } 3469 if (msp) 3470 begin_sleep = record_hold_time(msp); 3471 else if (csp) 3472 begin_sleep = gethrtime(); 3473 3474 if (self->ul_error_detection) { 3475 if (!mutex_held(mp)) 3476 lock_error(mp, "cond_wait", cvp, NULL); 3477 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 3478 lock_error(mp, "recursive mutex in cond_wait", 3479 cvp, NULL); 3480 if (cvp->cond_type & USYNC_PROCESS) { 3481 if (!(mtype & USYNC_PROCESS)) 3482 lock_error(mp, "cond_wait", cvp, 3483 "condvar process-shared, " 3484 "mutex process-private"); 3485 } else { 3486 if (mtype & USYNC_PROCESS) 3487 lock_error(mp, "cond_wait", cvp, 3488 "condvar process-private, " 3489 "mutex process-shared"); 3490 } 3491 } 3492 3493 /* 3494 * We deal with recursive mutexes by completely 3495 * dropping the lock and restoring the recursion 3496 * count after waking up. This is arguably wrong, 3497 * but it obeys the principle of least astonishment. 3498 */ 3499 rcount = mp->mutex_rcount; 3500 mp->mutex_rcount = 0; 3501 if ((mtype & 3502 (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 3503 (cvp->cond_type & USYNC_PROCESS)) 3504 error = cond_wait_kernel(cvp, mp, tsp); 3505 else 3506 error = cond_wait_queue(cvp, mp, tsp); 3507 mp->mutex_rcount = rcount; 3508 3509 if (csp) { 3510 hrtime_t lapse = gethrtime() - begin_sleep; 3511 if (tsp == NULL) 3512 csp->cond_wait_sleep_time += lapse; 3513 else { 3514 csp->cond_timedwait_sleep_time += lapse; 3515 if (error == ETIME) 3516 tdb_incr(csp->cond_timedwait_timeout); 3517 } 3518 } 3519 return (error); 3520 } 3521 3522 /* 3523 * cond_wait() is a cancellation point but __cond_wait() is not. 3524 * Internally, libc calls the non-cancellation version. 3525 * Other libraries need to use pthread_setcancelstate(), as appropriate, 3526 * since __cond_wait() is not exported from libc. 3527 */ 3528 int 3529 __cond_wait(cond_t *cvp, mutex_t *mp) 3530 { 3531 ulwp_t *self = curthread; 3532 uberdata_t *udp = self->ul_uberdata; 3533 uberflags_t *gflags; 3534 3535 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3536 !mutex_held(mp)) 3537 return (EPERM); 3538 3539 /* 3540 * Optimize the common case of USYNC_THREAD plus 3541 * no error detection, no lock statistics, and no event tracing. 3542 */ 3543 if ((gflags = self->ul_schedctl_called) != NULL && 3544 (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 3545 self->ul_td_events_enable | 3546 udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3547 return (cond_wait_queue(cvp, mp, NULL)); 3548 3549 /* 3550 * Else do it the long way. 3551 */ 3552 return (cond_wait_common(cvp, mp, NULL)); 3553 } 3554 3555 #pragma weak _cond_wait = cond_wait 3556 int 3557 cond_wait(cond_t *cvp, mutex_t *mp) 3558 { 3559 int error; 3560 3561 _cancelon(); 3562 error = __cond_wait(cvp, mp); 3563 if (error == EINTR) 3564 _canceloff(); 3565 else 3566 _canceloff_nocancel(); 3567 return (error); 3568 } 3569 3570 /* 3571 * pthread_cond_wait() is a cancellation point. 3572 */ 3573 int 3574 pthread_cond_wait(pthread_cond_t *_RESTRICT_KYWD cvp, 3575 pthread_mutex_t *_RESTRICT_KYWD mp) 3576 { 3577 int error; 3578 3579 error = cond_wait((cond_t *)cvp, (mutex_t *)mp); 3580 return ((error == EINTR)? 0 : error); 3581 } 3582 3583 /* 3584 * cond_timedwait() is a cancellation point but __cond_timedwait() is not. 3585 */ 3586 int 3587 __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3588 { 3589 clockid_t clock_id = cvp->cond_clockid; 3590 timespec_t reltime; 3591 int error; 3592 3593 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3594 !mutex_held(mp)) 3595 return (EPERM); 3596 3597 if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 3598 clock_id = CLOCK_REALTIME; 3599 abstime_to_reltime(clock_id, abstime, &reltime); 3600 error = cond_wait_common(cvp, mp, &reltime); 3601 if (error == ETIME && clock_id == CLOCK_HIGHRES) { 3602 /* 3603 * Don't return ETIME if we didn't really get a timeout. 3604 * This can happen if we return because someone resets 3605 * the system clock. Just return zero in this case, 3606 * giving a spurious wakeup but not a timeout. 3607 */ 3608 if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 3609 abstime->tv_nsec > gethrtime()) 3610 error = 0; 3611 } 3612 return (error); 3613 } 3614 3615 int 3616 cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3617 { 3618 int error; 3619 3620 _cancelon(); 3621 error = __cond_timedwait(cvp, mp, abstime); 3622 if (error == EINTR) 3623 _canceloff(); 3624 else 3625 _canceloff_nocancel(); 3626 return (error); 3627 } 3628 3629 /* 3630 * pthread_cond_timedwait() is a cancellation point. 3631 */ 3632 int 3633 pthread_cond_timedwait(pthread_cond_t *_RESTRICT_KYWD cvp, 3634 pthread_mutex_t *_RESTRICT_KYWD mp, 3635 const struct timespec *_RESTRICT_KYWD abstime) 3636 { 3637 int error; 3638 3639 error = cond_timedwait((cond_t *)cvp, (mutex_t *)mp, abstime); 3640 if (error == ETIME) 3641 error = ETIMEDOUT; 3642 else if (error == EINTR) 3643 error = 0; 3644 return (error); 3645 } 3646 3647 /* 3648 * cond_reltimedwait() is a cancellation point but __cond_reltimedwait() is not. 3649 */ 3650 int 3651 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3652 { 3653 timespec_t tslocal = *reltime; 3654 3655 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3656 !mutex_held(mp)) 3657 return (EPERM); 3658 3659 return (cond_wait_common(cvp, mp, &tslocal)); 3660 } 3661 3662 int 3663 cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3664 { 3665 int error; 3666 3667 _cancelon(); 3668 error = __cond_reltimedwait(cvp, mp, reltime); 3669 if (error == EINTR) 3670 _canceloff(); 3671 else 3672 _canceloff_nocancel(); 3673 return (error); 3674 } 3675 3676 int 3677 pthread_cond_reltimedwait_np(pthread_cond_t *_RESTRICT_KYWD cvp, 3678 pthread_mutex_t *_RESTRICT_KYWD mp, 3679 const struct timespec *_RESTRICT_KYWD reltime) 3680 { 3681 int error; 3682 3683 error = cond_reltimedwait((cond_t *)cvp, (mutex_t *)mp, reltime); 3684 if (error == ETIME) 3685 error = ETIMEDOUT; 3686 else if (error == EINTR) 3687 error = 0; 3688 return (error); 3689 } 3690 3691 #pragma weak pthread_cond_signal = cond_signal 3692 #pragma weak _cond_signal = cond_signal 3693 int 3694 cond_signal(cond_t *cvp) 3695 { 3696 ulwp_t *self = curthread; 3697 uberdata_t *udp = self->ul_uberdata; 3698 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3699 int error = 0; 3700 int more; 3701 lwpid_t lwpid; 3702 queue_head_t *qp; 3703 mutex_t *mp; 3704 queue_head_t *mqp; 3705 ulwp_t **ulwpp; 3706 ulwp_t *ulwp; 3707 ulwp_t *prev; 3708 3709 if (csp) 3710 tdb_incr(csp->cond_signal); 3711 3712 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3713 error = _lwp_cond_signal(cvp); 3714 3715 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3716 return (error); 3717 3718 /* 3719 * Move someone from the condvar sleep queue to the mutex sleep 3720 * queue for the mutex that he will acquire on being waked up. 3721 * We can do this only if we own the mutex he will acquire. 3722 * If we do not own the mutex, or if his ul_cv_wake flag 3723 * is set, just dequeue and unpark him. 3724 */ 3725 qp = queue_lock(cvp, CV); 3726 ulwpp = queue_slot(qp, &prev, &more); 3727 cvp->cond_waiters_user = more; 3728 if (ulwpp == NULL) { /* no one on the sleep queue */ 3729 queue_unlock(qp); 3730 return (error); 3731 } 3732 ulwp = *ulwpp; 3733 3734 /* 3735 * Inform the thread that he was the recipient of a cond_signal(). 3736 * This lets him deal with cond_signal() and, concurrently, 3737 * one or more of a cancellation, a UNIX signal, or a timeout. 3738 * These latter conditions must not consume a cond_signal(). 3739 */ 3740 ulwp->ul_signalled = 1; 3741 3742 /* 3743 * Dequeue the waiter but leave his ul_sleepq non-NULL 3744 * while we move him to the mutex queue so that he can 3745 * deal properly with spurious wakeups. 3746 */ 3747 queue_unlink(qp, ulwpp, prev); 3748 3749 mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 3750 ulwp->ul_cvmutex = NULL; 3751 ASSERT(mp != NULL); 3752 3753 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3754 /* just wake him up */ 3755 lwpid = ulwp->ul_lwpid; 3756 no_preempt(self); 3757 ulwp->ul_sleepq = NULL; 3758 ulwp->ul_wchan = NULL; 3759 queue_unlock(qp); 3760 (void) __lwp_unpark(lwpid); 3761 preempt(self); 3762 } else { 3763 /* move him to the mutex queue */ 3764 mqp = queue_lock(mp, MX); 3765 enqueue(mqp, ulwp, 0); 3766 mp->mutex_waiters = 1; 3767 queue_unlock(mqp); 3768 queue_unlock(qp); 3769 } 3770 3771 return (error); 3772 } 3773 3774 /* 3775 * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3776 * and rw_queue_release() to (re)allocate a big buffer to hold the 3777 * lwpids of all the threads to be set running after they are removed 3778 * from their sleep queues. Since we are holding a queue lock, we 3779 * cannot call any function that might acquire a lock. mmap(), munmap(), 3780 * lwp_unpark_all() are simple system calls and are safe in this regard. 3781 */ 3782 lwpid_t * 3783 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 3784 { 3785 /* 3786 * Allocate NEWLWPS ids on the first overflow. 3787 * Double the allocation each time after that. 3788 */ 3789 int nlwpid = *nlwpid_ptr; 3790 int maxlwps = *maxlwps_ptr; 3791 int first_allocation; 3792 int newlwps; 3793 void *vaddr; 3794 3795 ASSERT(nlwpid == maxlwps); 3796 3797 first_allocation = (maxlwps == MAXLWPS); 3798 newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 3799 vaddr = mmap(NULL, newlwps * sizeof (lwpid_t), 3800 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 3801 3802 if (vaddr == MAP_FAILED) { 3803 /* 3804 * Let's hope this never happens. 3805 * If it does, then we have a terrible 3806 * thundering herd on our hands. 3807 */ 3808 (void) __lwp_unpark_all(lwpid, nlwpid); 3809 *nlwpid_ptr = 0; 3810 } else { 3811 (void) memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 3812 if (!first_allocation) 3813 (void) munmap((caddr_t)lwpid, 3814 maxlwps * sizeof (lwpid_t)); 3815 lwpid = vaddr; 3816 *maxlwps_ptr = newlwps; 3817 } 3818 3819 return (lwpid); 3820 } 3821 3822 #pragma weak pthread_cond_broadcast = cond_broadcast 3823 #pragma weak _cond_broadcast = cond_broadcast 3824 int 3825 cond_broadcast(cond_t *cvp) 3826 { 3827 ulwp_t *self = curthread; 3828 uberdata_t *udp = self->ul_uberdata; 3829 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3830 int error = 0; 3831 queue_head_t *qp; 3832 queue_root_t *qrp; 3833 mutex_t *mp; 3834 mutex_t *mp_cache = NULL; 3835 queue_head_t *mqp = NULL; 3836 ulwp_t *ulwp; 3837 int nlwpid = 0; 3838 int maxlwps = MAXLWPS; 3839 lwpid_t buffer[MAXLWPS]; 3840 lwpid_t *lwpid = buffer; 3841 3842 if (csp) 3843 tdb_incr(csp->cond_broadcast); 3844 3845 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3846 error = _lwp_cond_broadcast(cvp); 3847 3848 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3849 return (error); 3850 3851 /* 3852 * Move everyone from the condvar sleep queue to the mutex sleep 3853 * queue for the mutex that they will acquire on being waked up. 3854 * We can do this only if we own the mutex they will acquire. 3855 * If we do not own the mutex, or if their ul_cv_wake flag 3856 * is set, just dequeue and unpark them. 3857 * 3858 * We keep track of lwpids that are to be unparked in lwpid[]. 3859 * __lwp_unpark_all() is called to unpark all of them after 3860 * they have been removed from the sleep queue and the sleep 3861 * queue lock has been dropped. If we run out of space in our 3862 * on-stack buffer, we need to allocate more but we can't call 3863 * lmalloc() because we are holding a queue lock when the overflow 3864 * occurs and lmalloc() acquires a lock. We can't use alloca() 3865 * either because the application may have allocated a small 3866 * stack and we don't want to overrun the stack. So we call 3867 * alloc_lwpids() to allocate a bigger buffer using the mmap() 3868 * system call directly since that path acquires no locks. 3869 */ 3870 qp = queue_lock(cvp, CV); 3871 cvp->cond_waiters_user = 0; 3872 for (;;) { 3873 if ((qrp = qp->qh_root) == NULL || 3874 (ulwp = qrp->qr_head) == NULL) 3875 break; 3876 ASSERT(ulwp->ul_wchan == cvp); 3877 queue_unlink(qp, &qrp->qr_head, NULL); 3878 mp = ulwp->ul_cvmutex; /* his mutex */ 3879 ulwp->ul_cvmutex = NULL; 3880 ASSERT(mp != NULL); 3881 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3882 /* just wake him up */ 3883 ulwp->ul_sleepq = NULL; 3884 ulwp->ul_wchan = NULL; 3885 if (nlwpid == maxlwps) 3886 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 3887 lwpid[nlwpid++] = ulwp->ul_lwpid; 3888 } else { 3889 /* move him to the mutex queue */ 3890 if (mp != mp_cache) { 3891 mp_cache = mp; 3892 if (mqp != NULL) 3893 queue_unlock(mqp); 3894 mqp = queue_lock(mp, MX); 3895 } 3896 enqueue(mqp, ulwp, 0); 3897 mp->mutex_waiters = 1; 3898 } 3899 } 3900 if (mqp != NULL) 3901 queue_unlock(mqp); 3902 if (nlwpid == 0) { 3903 queue_unlock(qp); 3904 } else { 3905 no_preempt(self); 3906 queue_unlock(qp); 3907 if (nlwpid == 1) 3908 (void) __lwp_unpark(lwpid[0]); 3909 else 3910 (void) __lwp_unpark_all(lwpid, nlwpid); 3911 preempt(self); 3912 } 3913 if (lwpid != buffer) 3914 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 3915 return (error); 3916 } 3917 3918 #pragma weak pthread_cond_destroy = cond_destroy 3919 int 3920 cond_destroy(cond_t *cvp) 3921 { 3922 cvp->cond_magic = 0; 3923 tdb_sync_obj_deregister(cvp); 3924 return (0); 3925 } 3926 3927 #if defined(THREAD_DEBUG) 3928 void 3929 assert_no_libc_locks_held(void) 3930 { 3931 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 3932 } 3933 3934 /* protected by link_lock */ 3935 uint64_t spin_lock_spin; 3936 uint64_t spin_lock_spin2; 3937 uint64_t spin_lock_sleep; 3938 uint64_t spin_lock_wakeup; 3939 3940 /* 3941 * Record spin lock statistics. 3942 * Called by a thread exiting itself in thrp_exit(). 3943 * Also called via atexit() from the thread calling 3944 * exit() to do all the other threads as well. 3945 */ 3946 void 3947 record_spin_locks(ulwp_t *ulwp) 3948 { 3949 spin_lock_spin += ulwp->ul_spin_lock_spin; 3950 spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 3951 spin_lock_sleep += ulwp->ul_spin_lock_sleep; 3952 spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 3953 ulwp->ul_spin_lock_spin = 0; 3954 ulwp->ul_spin_lock_spin2 = 0; 3955 ulwp->ul_spin_lock_sleep = 0; 3956 ulwp->ul_spin_lock_wakeup = 0; 3957 } 3958 3959 /* 3960 * atexit function: dump the queue statistics to stderr. 3961 */ 3962 #include <stdio.h> 3963 void 3964 dump_queue_statistics(void) 3965 { 3966 uberdata_t *udp = curthread->ul_uberdata; 3967 queue_head_t *qp; 3968 int qn; 3969 uint64_t spin_lock_total = 0; 3970 3971 if (udp->queue_head == NULL || thread_queue_dump == 0) 3972 return; 3973 3974 if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 3975 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 3976 return; 3977 for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 3978 if (qp->qh_lockcount == 0) 3979 continue; 3980 spin_lock_total += qp->qh_lockcount; 3981 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3982 (u_longlong_t)qp->qh_lockcount, 3983 qp->qh_qmax, qp->qh_hmax) < 0) 3984 return; 3985 } 3986 3987 if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 3988 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 3989 return; 3990 for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 3991 if (qp->qh_lockcount == 0) 3992 continue; 3993 spin_lock_total += qp->qh_lockcount; 3994 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3995 (u_longlong_t)qp->qh_lockcount, 3996 qp->qh_qmax, qp->qh_hmax) < 0) 3997 return; 3998 } 3999 4000 (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 4001 (u_longlong_t)spin_lock_total); 4002 (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 4003 (u_longlong_t)spin_lock_spin); 4004 (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 4005 (u_longlong_t)spin_lock_spin2); 4006 (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 4007 (u_longlong_t)spin_lock_sleep); 4008 (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 4009 (u_longlong_t)spin_lock_wakeup); 4010 } 4011 #endif 4012