1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "lint.h" 28 #include "thr_uberdata.h" 29 #include <sys/rtpriocntl.h> 30 #include <sys/sdt.h> 31 #include <atomic.h> 32 33 #if defined(THREAD_DEBUG) 34 #define INCR32(x) (((x) != UINT32_MAX)? (x)++ : 0) 35 #define INCR(x) ((x)++) 36 #define DECR(x) ((x)--) 37 #define MAXINCR(m, x) ((m < ++x)? (m = x) : 0) 38 #else 39 #define INCR32(x) 40 #define INCR(x) 41 #define DECR(x) 42 #define MAXINCR(m, x) 43 #endif 44 45 /* 46 * This mutex is initialized to be held by lwp#1. 47 * It is used to block a thread that has returned from a mutex_lock() 48 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 49 */ 50 mutex_t stall_mutex = DEFAULTMUTEX; 51 52 static int shared_mutex_held(mutex_t *); 53 static int mutex_queuelock_adaptive(mutex_t *); 54 static void mutex_wakeup_all(mutex_t *); 55 56 /* 57 * Lock statistics support functions. 58 */ 59 void 60 record_begin_hold(tdb_mutex_stats_t *msp) 61 { 62 tdb_incr(msp->mutex_lock); 63 msp->mutex_begin_hold = gethrtime(); 64 } 65 66 hrtime_t 67 record_hold_time(tdb_mutex_stats_t *msp) 68 { 69 hrtime_t now = gethrtime(); 70 71 if (msp->mutex_begin_hold) 72 msp->mutex_hold_time += now - msp->mutex_begin_hold; 73 msp->mutex_begin_hold = 0; 74 return (now); 75 } 76 77 /* 78 * Called once at library initialization. 79 */ 80 void 81 mutex_setup(void) 82 { 83 if (set_lock_byte(&stall_mutex.mutex_lockw)) 84 thr_panic("mutex_setup() cannot acquire stall_mutex"); 85 stall_mutex.mutex_owner = (uintptr_t)curthread; 86 } 87 88 /* 89 * The default spin count of 1000 is experimentally determined. 90 * On sun4u machines with any number of processors it could be raised 91 * to 10,000 but that (experimentally) makes almost no difference. 92 * The environment variable: 93 * _THREAD_ADAPTIVE_SPIN=count 94 * can be used to override and set the count in the range [0 .. 1,000,000]. 95 */ 96 int thread_adaptive_spin = 1000; 97 uint_t thread_max_spinners = 100; 98 int thread_queue_verify = 0; 99 static int ncpus; 100 101 /* 102 * Distinguish spinning for queue locks from spinning for regular locks. 103 * We try harder to acquire queue locks by spinning. 104 * The environment variable: 105 * _THREAD_QUEUE_SPIN=count 106 * can be used to override and set the count in the range [0 .. 1,000,000]. 107 */ 108 int thread_queue_spin = 10000; 109 110 #define ALL_ATTRIBUTES \ 111 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 112 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 113 LOCK_ROBUST) 114 115 /* 116 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 117 * augmented by zero or more the flags: 118 * LOCK_RECURSIVE 119 * LOCK_ERRORCHECK 120 * LOCK_PRIO_INHERIT 121 * LOCK_PRIO_PROTECT 122 * LOCK_ROBUST 123 */ 124 #pragma weak _mutex_init = mutex_init 125 /* ARGSUSED2 */ 126 int 127 mutex_init(mutex_t *mp, int type, void *arg) 128 { 129 int basetype = (type & ~ALL_ATTRIBUTES); 130 const pcclass_t *pccp; 131 int error = 0; 132 int ceil; 133 134 if (basetype == USYNC_PROCESS_ROBUST) { 135 /* 136 * USYNC_PROCESS_ROBUST is a deprecated historical type. 137 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 138 * retain the USYNC_PROCESS_ROBUST flag so we can return 139 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 140 * mutexes will ever draw ELOCKUNMAPPED). 141 */ 142 type |= (USYNC_PROCESS | LOCK_ROBUST); 143 basetype = USYNC_PROCESS; 144 } 145 146 if (type & LOCK_PRIO_PROTECT) 147 pccp = get_info_by_policy(SCHED_FIFO); 148 if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) || 149 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 150 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) || 151 ((type & LOCK_PRIO_PROTECT) && 152 ((ceil = *(int *)arg) < pccp->pcc_primin || 153 ceil > pccp->pcc_primax))) { 154 error = EINVAL; 155 } else if (type & LOCK_ROBUST) { 156 /* 157 * Callers of mutex_init() with the LOCK_ROBUST attribute 158 * are required to pass an initially all-zero mutex. 159 * Multiple calls to mutex_init() are allowed; all but 160 * the first return EBUSY. A call to mutex_init() is 161 * allowed to make an inconsistent robust lock consistent 162 * (for historical usage, even though the proper interface 163 * for this is mutex_consistent()). Note that we use 164 * atomic_or_16() to set the LOCK_INITED flag so as 165 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 166 */ 167 if (!(mp->mutex_flag & LOCK_INITED)) { 168 mp->mutex_type = (uint8_t)type; 169 atomic_or_16(&mp->mutex_flag, LOCK_INITED); 170 mp->mutex_magic = MUTEX_MAGIC; 171 } else if (type != mp->mutex_type || 172 ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) { 173 error = EINVAL; 174 } else if (mutex_consistent(mp) != 0) { 175 error = EBUSY; 176 } 177 /* register a process robust mutex with the kernel */ 178 if (basetype == USYNC_PROCESS) 179 register_lock(mp); 180 } else { 181 (void) memset(mp, 0, sizeof (*mp)); 182 mp->mutex_type = (uint8_t)type; 183 mp->mutex_flag = LOCK_INITED; 184 mp->mutex_magic = MUTEX_MAGIC; 185 } 186 187 if (error == 0 && (type & LOCK_PRIO_PROTECT)) { 188 mp->mutex_ceiling = ceil; 189 } 190 191 /* 192 * This should be at the beginning of the function, 193 * but for the sake of old broken applications that 194 * do not have proper alignment for their mutexes 195 * (and don't check the return code from mutex_init), 196 * we put it here, after initializing the mutex regardless. 197 */ 198 if (error == 0 && 199 ((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 200 curthread->ul_misaligned == 0) 201 error = EINVAL; 202 203 return (error); 204 } 205 206 /* 207 * Delete mp from list of ceiling mutexes owned by curthread. 208 * Return 1 if the head of the chain was updated. 209 */ 210 int 211 _ceil_mylist_del(mutex_t *mp) 212 { 213 ulwp_t *self = curthread; 214 mxchain_t **mcpp; 215 mxchain_t *mcp; 216 217 for (mcpp = &self->ul_mxchain; 218 (mcp = *mcpp) != NULL; 219 mcpp = &mcp->mxchain_next) { 220 if (mcp->mxchain_mx == mp) { 221 *mcpp = mcp->mxchain_next; 222 lfree(mcp, sizeof (*mcp)); 223 return (mcpp == &self->ul_mxchain); 224 } 225 } 226 return (0); 227 } 228 229 /* 230 * Add mp to the list of ceiling mutexes owned by curthread. 231 * Return ENOMEM if no memory could be allocated. 232 */ 233 int 234 _ceil_mylist_add(mutex_t *mp) 235 { 236 ulwp_t *self = curthread; 237 mxchain_t *mcp; 238 239 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 240 return (ENOMEM); 241 mcp->mxchain_mx = mp; 242 mcp->mxchain_next = self->ul_mxchain; 243 self->ul_mxchain = mcp; 244 return (0); 245 } 246 247 /* 248 * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below. 249 */ 250 static void 251 set_rt_priority(ulwp_t *self, int prio) 252 { 253 pcparms_t pcparm; 254 255 pcparm.pc_cid = self->ul_rtclassid; 256 ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; 257 ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; 258 (void) priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm); 259 } 260 261 /* 262 * Inherit priority from ceiling. 263 * This changes the effective priority, not the assigned priority. 264 */ 265 void 266 _ceil_prio_inherit(int prio) 267 { 268 ulwp_t *self = curthread; 269 270 self->ul_epri = prio; 271 set_rt_priority(self, prio); 272 } 273 274 /* 275 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 276 * if holding at least one ceiling lock. If no ceiling locks are held at this 277 * point, disinherit completely, reverting back to assigned priority. 278 */ 279 void 280 _ceil_prio_waive(void) 281 { 282 ulwp_t *self = curthread; 283 mxchain_t *mcp = self->ul_mxchain; 284 int prio; 285 286 if (mcp == NULL) { 287 prio = self->ul_pri; 288 self->ul_epri = 0; 289 } else { 290 prio = mcp->mxchain_mx->mutex_ceiling; 291 self->ul_epri = prio; 292 } 293 set_rt_priority(self, prio); 294 } 295 296 /* 297 * Clear the lock byte. Retain the waiters byte and the spinners byte. 298 * Return the old value of the lock word. 299 */ 300 static uint32_t 301 clear_lockbyte(volatile uint32_t *lockword) 302 { 303 uint32_t old; 304 uint32_t new; 305 306 do { 307 old = *lockword; 308 new = old & ~LOCKMASK; 309 } while (atomic_cas_32(lockword, old, new) != old); 310 311 return (old); 312 } 313 314 /* 315 * Same as clear_lockbyte(), but operates on mutex_lockword64. 316 * The mutex_ownerpid field is cleared along with the lock byte. 317 */ 318 static uint64_t 319 clear_lockbyte64(volatile uint64_t *lockword64) 320 { 321 uint64_t old; 322 uint64_t new; 323 324 do { 325 old = *lockword64; 326 new = old & ~LOCKMASK64; 327 } while (atomic_cas_64(lockword64, old, new) != old); 328 329 return (old); 330 } 331 332 /* 333 * Similar to set_lock_byte(), which only tries to set the lock byte. 334 * Here, we attempt to set the lock byte AND the mutex_ownerpid, keeping 335 * the remaining bytes constant. This atomic operation is required for the 336 * correctness of process-shared robust locks, otherwise there would be 337 * a window or vulnerability in which the lock byte had been set but the 338 * mutex_ownerpid had not yet been set. If the process were to die in 339 * this window of vulnerability (due to some other thread calling exit() 340 * or the process receiving a fatal signal), the mutex would be left locked 341 * but without a process-ID to determine which process was holding the lock. 342 * The kernel would then be unable to mark the robust mutex as LOCK_OWNERDEAD 343 * when the process died. For all other cases of process-shared locks, this 344 * operation is just a convenience, for the sake of common code. 345 * 346 * This operation requires process-shared robust locks to be properly 347 * aligned on an 8-byte boundary, at least on sparc machines, lest the 348 * operation incur an alignment fault. This is automatic when locks 349 * are declared properly using the mutex_t or pthread_mutex_t data types 350 * and the application does not allocate dynamic memory on less than an 351 * 8-byte boundary. See the 'horrible hack' comments below for cases 352 * dealing with such broken applications. 353 */ 354 static int 355 set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid) 356 { 357 uint64_t old; 358 uint64_t new; 359 360 old = *lockword64 & ~LOCKMASK64; 361 new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64; 362 if (atomic_cas_64(lockword64, old, new) == old) 363 return (LOCKCLEAR); 364 365 return (LOCKSET); 366 } 367 368 /* 369 * Increment the spinners count in the mutex lock word. 370 * Return 0 on success. Return -1 if the count would overflow. 371 */ 372 static int 373 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 374 { 375 uint32_t old; 376 uint32_t new; 377 378 do { 379 old = *lockword; 380 if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 381 return (-1); 382 new = old + (1 << SPINNERSHIFT); 383 } while (atomic_cas_32(lockword, old, new) != old); 384 385 return (0); 386 } 387 388 /* 389 * Decrement the spinners count in the mutex lock word. 390 * Return the new value of the lock word. 391 */ 392 static uint32_t 393 spinners_decr(volatile uint32_t *lockword) 394 { 395 uint32_t old; 396 uint32_t new; 397 398 do { 399 new = old = *lockword; 400 if (new & SPINNERMASK) 401 new -= (1 << SPINNERSHIFT); 402 } while (atomic_cas_32(lockword, old, new) != old); 403 404 return (new); 405 } 406 407 /* 408 * Non-preemptive spin locks. Used by queue_lock(). 409 * No lock statistics are gathered for these locks. 410 * No DTrace probes are provided for these locks. 411 */ 412 void 413 spin_lock_set(mutex_t *mp) 414 { 415 ulwp_t *self = curthread; 416 417 no_preempt(self); 418 if (set_lock_byte(&mp->mutex_lockw) == 0) { 419 mp->mutex_owner = (uintptr_t)self; 420 return; 421 } 422 /* 423 * Spin for a while, attempting to acquire the lock. 424 */ 425 INCR32(self->ul_spin_lock_spin); 426 if (mutex_queuelock_adaptive(mp) == 0 || 427 set_lock_byte(&mp->mutex_lockw) == 0) { 428 mp->mutex_owner = (uintptr_t)self; 429 return; 430 } 431 /* 432 * Try harder if we were previously at a no premption level. 433 */ 434 if (self->ul_preempt > 1) { 435 INCR32(self->ul_spin_lock_spin2); 436 if (mutex_queuelock_adaptive(mp) == 0 || 437 set_lock_byte(&mp->mutex_lockw) == 0) { 438 mp->mutex_owner = (uintptr_t)self; 439 return; 440 } 441 } 442 /* 443 * Give up and block in the kernel for the mutex. 444 */ 445 INCR32(self->ul_spin_lock_sleep); 446 (void) ___lwp_mutex_timedlock(mp, NULL, self); 447 } 448 449 void 450 spin_lock_clear(mutex_t *mp) 451 { 452 ulwp_t *self = curthread; 453 454 mp->mutex_owner = 0; 455 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 456 (void) ___lwp_mutex_wakeup(mp, 0); 457 INCR32(self->ul_spin_lock_wakeup); 458 } 459 preempt(self); 460 } 461 462 /* 463 * Allocate the sleep queue hash table. 464 */ 465 void 466 queue_alloc(void) 467 { 468 ulwp_t *self = curthread; 469 uberdata_t *udp = self->ul_uberdata; 470 queue_head_t *qp; 471 void *data; 472 int i; 473 474 /* 475 * No locks are needed; we call here only when single-threaded. 476 */ 477 ASSERT(self == udp->ulwp_one); 478 ASSERT(!udp->uberflags.uf_mt); 479 if ((data = mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 480 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 481 == MAP_FAILED) 482 thr_panic("cannot allocate thread queue_head table"); 483 udp->queue_head = qp = (queue_head_t *)data; 484 for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { 485 qp->qh_type = (i < QHASHSIZE)? MX : CV; 486 qp->qh_lock.mutex_flag = LOCK_INITED; 487 qp->qh_lock.mutex_magic = MUTEX_MAGIC; 488 qp->qh_hlist = &qp->qh_def_root; 489 #if defined(THREAD_DEBUG) 490 qp->qh_hlen = 1; 491 qp->qh_hmax = 1; 492 #endif 493 } 494 } 495 496 #if defined(THREAD_DEBUG) 497 498 /* 499 * Debugging: verify correctness of a sleep queue. 500 */ 501 void 502 QVERIFY(queue_head_t *qp) 503 { 504 ulwp_t *self = curthread; 505 uberdata_t *udp = self->ul_uberdata; 506 queue_root_t *qrp; 507 ulwp_t *ulwp; 508 ulwp_t *prev; 509 uint_t index; 510 uint32_t cnt; 511 char qtype; 512 void *wchan; 513 514 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 515 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 516 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 517 cnt++; 518 ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) || 519 (qrp->qr_head == NULL && qrp->qr_tail == NULL)); 520 } 521 ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt); 522 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 523 ASSERT(qp->qh_type == qtype); 524 if (!thread_queue_verify) 525 return; 526 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 527 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 528 for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL; 529 prev = ulwp, ulwp = ulwp->ul_link) { 530 cnt++; 531 if (ulwp->ul_writer) 532 ASSERT(prev == NULL || prev->ul_writer); 533 ASSERT(ulwp->ul_qtype == qtype); 534 ASSERT(ulwp->ul_wchan != NULL); 535 ASSERT(ulwp->ul_sleepq == qp); 536 wchan = ulwp->ul_wchan; 537 ASSERT(qrp->qr_wchan == wchan); 538 index = QUEUE_HASH(wchan, qtype); 539 ASSERT(&udp->queue_head[index] == qp); 540 } 541 ASSERT(qrp->qr_tail == prev); 542 } 543 ASSERT(qp->qh_qlen == cnt); 544 } 545 546 #else /* THREAD_DEBUG */ 547 548 #define QVERIFY(qp) 549 550 #endif /* THREAD_DEBUG */ 551 552 /* 553 * Acquire a queue head. 554 */ 555 queue_head_t * 556 queue_lock(void *wchan, int qtype) 557 { 558 uberdata_t *udp = curthread->ul_uberdata; 559 queue_head_t *qp; 560 queue_root_t *qrp; 561 562 ASSERT(qtype == MX || qtype == CV); 563 564 /* 565 * It is possible that we could be called while still single-threaded. 566 * If so, we call queue_alloc() to allocate the queue_head[] array. 567 */ 568 if ((qp = udp->queue_head) == NULL) { 569 queue_alloc(); 570 qp = udp->queue_head; 571 } 572 qp += QUEUE_HASH(wchan, qtype); 573 spin_lock_set(&qp->qh_lock); 574 for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) 575 if (qrp->qr_wchan == wchan) 576 break; 577 if (qrp == NULL && qp->qh_def_root.qr_head == NULL) { 578 /* the default queue root is available; use it */ 579 qrp = &qp->qh_def_root; 580 qrp->qr_wchan = wchan; 581 ASSERT(qrp->qr_next == NULL); 582 ASSERT(qrp->qr_tail == NULL && 583 qrp->qr_rtcount == 0 && qrp->qr_qlen == 0); 584 } 585 qp->qh_wchan = wchan; /* valid until queue_unlock() is called */ 586 qp->qh_root = qrp; /* valid until queue_unlock() is called */ 587 INCR32(qp->qh_lockcount); 588 QVERIFY(qp); 589 return (qp); 590 } 591 592 /* 593 * Release a queue head. 594 */ 595 void 596 queue_unlock(queue_head_t *qp) 597 { 598 QVERIFY(qp); 599 spin_lock_clear(&qp->qh_lock); 600 } 601 602 /* 603 * For rwlock queueing, we must queue writers ahead of readers of the 604 * same priority. We do this by making writers appear to have a half 605 * point higher priority for purposes of priority comparisons below. 606 */ 607 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 608 609 void 610 enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo) 611 { 612 queue_root_t *qrp; 613 ulwp_t **ulwpp; 614 ulwp_t *next; 615 int pri = CMP_PRIO(ulwp); 616 617 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 618 ASSERT(ulwp->ul_sleepq != qp); 619 620 if ((qrp = qp->qh_root) == NULL) { 621 /* use the thread's queue root for the linkage */ 622 qrp = &ulwp->ul_queue_root; 623 qrp->qr_next = qp->qh_hlist; 624 qrp->qr_prev = NULL; 625 qrp->qr_head = NULL; 626 qrp->qr_tail = NULL; 627 qrp->qr_wchan = qp->qh_wchan; 628 qrp->qr_rtcount = 0; 629 qrp->qr_qlen = 0; 630 qrp->qr_qmax = 0; 631 qp->qh_hlist->qr_prev = qrp; 632 qp->qh_hlist = qrp; 633 qp->qh_root = qrp; 634 MAXINCR(qp->qh_hmax, qp->qh_hlen); 635 } 636 637 /* 638 * LIFO queue ordering is unfair and can lead to starvation, 639 * but it gives better performance for heavily contended locks. 640 * We use thread_queue_fifo (range is 0..8) to determine 641 * the frequency of FIFO vs LIFO queuing: 642 * 0 : every 256th time (almost always LIFO) 643 * 1 : every 128th time 644 * 2 : every 64th time 645 * 3 : every 32nd time 646 * 4 : every 16th time (the default value, mostly LIFO) 647 * 5 : every 8th time 648 * 6 : every 4th time 649 * 7 : every 2nd time 650 * 8 : every time (never LIFO, always FIFO) 651 * Note that there is always some degree of FIFO ordering. 652 * This breaks live lock conditions that occur in applications 653 * that are written assuming (incorrectly) that threads acquire 654 * locks fairly, that is, in roughly round-robin order. 655 * In any event, the queue is maintained in kernel priority order. 656 * 657 * If force_fifo is non-zero, fifo queueing is forced. 658 * SUSV3 requires this for semaphores. 659 */ 660 if (qrp->qr_head == NULL) { 661 /* 662 * The queue is empty. LIFO/FIFO doesn't matter. 663 */ 664 ASSERT(qrp->qr_tail == NULL); 665 ulwpp = &qrp->qr_head; 666 } else if (force_fifo | 667 (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) { 668 /* 669 * Enqueue after the last thread whose priority is greater 670 * than or equal to the priority of the thread being queued. 671 * Attempt first to go directly onto the tail of the queue. 672 */ 673 if (pri <= CMP_PRIO(qrp->qr_tail)) 674 ulwpp = &qrp->qr_tail->ul_link; 675 else { 676 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 677 ulwpp = &next->ul_link) 678 if (pri > CMP_PRIO(next)) 679 break; 680 } 681 } else { 682 /* 683 * Enqueue before the first thread whose priority is less 684 * than or equal to the priority of the thread being queued. 685 * Hopefully we can go directly onto the head of the queue. 686 */ 687 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 688 ulwpp = &next->ul_link) 689 if (pri >= CMP_PRIO(next)) 690 break; 691 } 692 if ((ulwp->ul_link = *ulwpp) == NULL) 693 qrp->qr_tail = ulwp; 694 *ulwpp = ulwp; 695 696 ulwp->ul_sleepq = qp; 697 ulwp->ul_wchan = qp->qh_wchan; 698 ulwp->ul_qtype = qp->qh_type; 699 if ((ulwp->ul_schedctl != NULL && 700 ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) | 701 ulwp->ul_pilocks) { 702 ulwp->ul_rtqueued = 1; 703 qrp->qr_rtcount++; 704 } 705 MAXINCR(qrp->qr_qmax, qrp->qr_qlen); 706 MAXINCR(qp->qh_qmax, qp->qh_qlen); 707 } 708 709 /* 710 * Helper function for queue_slot() and queue_slot_rt(). 711 * Try to find a non-suspended thread on the queue. 712 */ 713 static ulwp_t ** 714 queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt) 715 { 716 ulwp_t *ulwp; 717 ulwp_t **foundpp = NULL; 718 int priority = -1; 719 ulwp_t *prev; 720 int tpri; 721 722 for (prev = NULL; 723 (ulwp = *ulwpp) != NULL; 724 prev = ulwp, ulwpp = &ulwp->ul_link) { 725 if (ulwp->ul_stop) /* skip suspended threads */ 726 continue; 727 tpri = rt? CMP_PRIO(ulwp) : 0; 728 if (tpri > priority) { 729 foundpp = ulwpp; 730 *prevp = prev; 731 priority = tpri; 732 if (!rt) 733 break; 734 } 735 } 736 return (foundpp); 737 } 738 739 /* 740 * For real-time, we search the entire queue because the dispatch 741 * (kernel) priorities may have changed since enqueueing. 742 */ 743 static ulwp_t ** 744 queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp) 745 { 746 ulwp_t **ulwpp = ulwpp_org; 747 ulwp_t *ulwp = *ulwpp; 748 ulwp_t **foundpp = ulwpp; 749 int priority = CMP_PRIO(ulwp); 750 ulwp_t *prev; 751 int tpri; 752 753 for (prev = ulwp, ulwpp = &ulwp->ul_link; 754 (ulwp = *ulwpp) != NULL; 755 prev = ulwp, ulwpp = &ulwp->ul_link) { 756 tpri = CMP_PRIO(ulwp); 757 if (tpri > priority) { 758 foundpp = ulwpp; 759 *prevp = prev; 760 priority = tpri; 761 } 762 } 763 ulwp = *foundpp; 764 765 /* 766 * Try not to return a suspended thread. 767 * This mimics the old libthread's behavior. 768 */ 769 if (ulwp->ul_stop && 770 (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) { 771 foundpp = ulwpp; 772 ulwp = *foundpp; 773 } 774 ulwp->ul_rt = 1; 775 return (foundpp); 776 } 777 778 ulwp_t ** 779 queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more) 780 { 781 queue_root_t *qrp; 782 ulwp_t **ulwpp; 783 ulwp_t *ulwp; 784 int rt; 785 786 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 787 788 if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) { 789 *more = 0; 790 return (NULL); /* no lwps on the queue */ 791 } 792 rt = (qrp->qr_rtcount != 0); 793 *prevp = NULL; 794 if (ulwp->ul_link == NULL) { /* only one lwp on the queue */ 795 *more = 0; 796 ulwp->ul_rt = rt; 797 return (&qrp->qr_head); 798 } 799 *more = 1; 800 801 if (rt) /* real-time queue */ 802 return (queue_slot_rt(&qrp->qr_head, prevp)); 803 /* 804 * Try not to return a suspended thread. 805 * This mimics the old libthread's behavior. 806 */ 807 if (ulwp->ul_stop && 808 (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) { 809 ulwp = *ulwpp; 810 ulwp->ul_rt = 0; 811 return (ulwpp); 812 } 813 /* 814 * The common case; just pick the first thread on the queue. 815 */ 816 ulwp->ul_rt = 0; 817 return (&qrp->qr_head); 818 } 819 820 /* 821 * Common code for unlinking an lwp from a user-level sleep queue. 822 */ 823 void 824 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 825 { 826 queue_root_t *qrp = qp->qh_root; 827 queue_root_t *nqrp; 828 ulwp_t *ulwp = *ulwpp; 829 ulwp_t *next; 830 831 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 832 ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan); 833 834 DECR(qp->qh_qlen); 835 DECR(qrp->qr_qlen); 836 if (ulwp->ul_rtqueued) { 837 ulwp->ul_rtqueued = 0; 838 qrp->qr_rtcount--; 839 } 840 next = ulwp->ul_link; 841 *ulwpp = next; 842 ulwp->ul_link = NULL; 843 if (qrp->qr_tail == ulwp) 844 qrp->qr_tail = prev; 845 if (qrp == &ulwp->ul_queue_root) { 846 /* 847 * We can't continue to use the unlinked thread's 848 * queue root for the linkage. 849 */ 850 queue_root_t *qr_next = qrp->qr_next; 851 queue_root_t *qr_prev = qrp->qr_prev; 852 853 if (qrp->qr_tail) { 854 /* switch to using the last thread's queue root */ 855 ASSERT(qrp->qr_qlen != 0); 856 nqrp = &qrp->qr_tail->ul_queue_root; 857 *nqrp = *qrp; 858 if (qr_next) 859 qr_next->qr_prev = nqrp; 860 if (qr_prev) 861 qr_prev->qr_next = nqrp; 862 else 863 qp->qh_hlist = nqrp; 864 qp->qh_root = nqrp; 865 } else { 866 /* empty queue root; just delete from the hash list */ 867 ASSERT(qrp->qr_qlen == 0); 868 if (qr_next) 869 qr_next->qr_prev = qr_prev; 870 if (qr_prev) 871 qr_prev->qr_next = qr_next; 872 else 873 qp->qh_hlist = qr_next; 874 qp->qh_root = NULL; 875 DECR(qp->qh_hlen); 876 } 877 } 878 } 879 880 ulwp_t * 881 dequeue(queue_head_t *qp, int *more) 882 { 883 ulwp_t **ulwpp; 884 ulwp_t *ulwp; 885 ulwp_t *prev; 886 887 if ((ulwpp = queue_slot(qp, &prev, more)) == NULL) 888 return (NULL); 889 ulwp = *ulwpp; 890 queue_unlink(qp, ulwpp, prev); 891 ulwp->ul_sleepq = NULL; 892 ulwp->ul_wchan = NULL; 893 return (ulwp); 894 } 895 896 /* 897 * Return a pointer to the highest priority thread sleeping on wchan. 898 */ 899 ulwp_t * 900 queue_waiter(queue_head_t *qp) 901 { 902 ulwp_t **ulwpp; 903 ulwp_t *prev; 904 int more; 905 906 if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL) 907 return (NULL); 908 return (*ulwpp); 909 } 910 911 int 912 dequeue_self(queue_head_t *qp) 913 { 914 ulwp_t *self = curthread; 915 queue_root_t *qrp; 916 ulwp_t **ulwpp; 917 ulwp_t *ulwp; 918 ulwp_t *prev; 919 int found = 0; 920 921 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 922 923 /* find self on the sleep queue */ 924 if ((qrp = qp->qh_root) != NULL) { 925 for (prev = NULL, ulwpp = &qrp->qr_head; 926 (ulwp = *ulwpp) != NULL; 927 prev = ulwp, ulwpp = &ulwp->ul_link) { 928 if (ulwp == self) { 929 queue_unlink(qp, ulwpp, prev); 930 self->ul_cvmutex = NULL; 931 self->ul_sleepq = NULL; 932 self->ul_wchan = NULL; 933 found = 1; 934 break; 935 } 936 } 937 } 938 939 if (!found) 940 thr_panic("dequeue_self(): curthread not found on queue"); 941 942 return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL); 943 } 944 945 /* 946 * Called from call_user_handler() and _thrp_suspend() to take 947 * ourself off of our sleep queue so we can grab locks. 948 */ 949 void 950 unsleep_self(void) 951 { 952 ulwp_t *self = curthread; 953 queue_head_t *qp; 954 955 /* 956 * Calling enter_critical()/exit_critical() here would lead 957 * to recursion. Just manipulate self->ul_critical directly. 958 */ 959 self->ul_critical++; 960 while (self->ul_sleepq != NULL) { 961 qp = queue_lock(self->ul_wchan, self->ul_qtype); 962 /* 963 * We may have been moved from a CV queue to a 964 * mutex queue while we were attempting queue_lock(). 965 * If so, just loop around and try again. 966 * dequeue_self() clears self->ul_sleepq. 967 */ 968 if (qp == self->ul_sleepq) 969 (void) dequeue_self(qp); 970 queue_unlock(qp); 971 } 972 self->ul_writer = 0; 973 self->ul_critical--; 974 } 975 976 /* 977 * Common code for calling the the ___lwp_mutex_timedlock() system call. 978 * Returns with mutex_owner and mutex_ownerpid set correctly. 979 */ 980 static int 981 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 982 { 983 ulwp_t *self = curthread; 984 uberdata_t *udp = self->ul_uberdata; 985 int mtype = mp->mutex_type; 986 hrtime_t begin_sleep; 987 int acquired; 988 int error; 989 990 self->ul_sp = stkptr(); 991 self->ul_wchan = mp; 992 if (__td_event_report(self, TD_SLEEP, udp)) { 993 self->ul_td_evbuf.eventnum = TD_SLEEP; 994 self->ul_td_evbuf.eventdata = mp; 995 tdb_event(TD_SLEEP, udp); 996 } 997 if (msp) { 998 tdb_incr(msp->mutex_sleep); 999 begin_sleep = gethrtime(); 1000 } 1001 1002 DTRACE_PROBE1(plockstat, mutex__block, mp); 1003 1004 for (;;) { 1005 /* 1006 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1007 * means we successfully acquired the lock. 1008 */ 1009 if ((error = ___lwp_mutex_timedlock(mp, tsp, self)) != 0 && 1010 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1011 acquired = 0; 1012 break; 1013 } 1014 1015 if (mtype & USYNC_PROCESS) { 1016 /* 1017 * Defend against forkall(). We may be the child, 1018 * in which case we don't actually own the mutex. 1019 */ 1020 enter_critical(self); 1021 if (mp->mutex_ownerpid == udp->pid) { 1022 exit_critical(self); 1023 acquired = 1; 1024 break; 1025 } 1026 exit_critical(self); 1027 } else { 1028 acquired = 1; 1029 break; 1030 } 1031 } 1032 1033 if (msp) 1034 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1035 self->ul_wchan = NULL; 1036 self->ul_sp = 0; 1037 1038 if (acquired) { 1039 ASSERT(mp->mutex_owner == (uintptr_t)self); 1040 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1041 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1042 } else { 1043 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1044 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1045 } 1046 1047 return (error); 1048 } 1049 1050 /* 1051 * Common code for calling the ___lwp_mutex_trylock() system call. 1052 * Returns with mutex_owner and mutex_ownerpid set correctly. 1053 */ 1054 int 1055 mutex_trylock_kernel(mutex_t *mp) 1056 { 1057 ulwp_t *self = curthread; 1058 uberdata_t *udp = self->ul_uberdata; 1059 int mtype = mp->mutex_type; 1060 int error; 1061 int acquired; 1062 1063 for (;;) { 1064 /* 1065 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1066 * means we successfully acquired the lock. 1067 */ 1068 if ((error = ___lwp_mutex_trylock(mp, self)) != 0 && 1069 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1070 acquired = 0; 1071 break; 1072 } 1073 1074 if (mtype & USYNC_PROCESS) { 1075 /* 1076 * Defend against forkall(). We may be the child, 1077 * in which case we don't actually own the mutex. 1078 */ 1079 enter_critical(self); 1080 if (mp->mutex_ownerpid == udp->pid) { 1081 exit_critical(self); 1082 acquired = 1; 1083 break; 1084 } 1085 exit_critical(self); 1086 } else { 1087 acquired = 1; 1088 break; 1089 } 1090 } 1091 1092 if (acquired) { 1093 ASSERT(mp->mutex_owner == (uintptr_t)self); 1094 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1095 } else if (error != EBUSY) { 1096 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1097 } 1098 1099 return (error); 1100 } 1101 1102 volatile sc_shared_t * 1103 setup_schedctl(void) 1104 { 1105 ulwp_t *self = curthread; 1106 volatile sc_shared_t *scp; 1107 sc_shared_t *tmp; 1108 1109 if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 1110 !self->ul_vfork && /* not a child of vfork() */ 1111 !self->ul_schedctl_called) { /* haven't been called before */ 1112 enter_critical(self); 1113 self->ul_schedctl_called = &self->ul_uberdata->uberflags; 1114 if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 1115 self->ul_schedctl = scp = tmp; 1116 exit_critical(self); 1117 } 1118 /* 1119 * Unless the call to setup_schedctl() is surrounded 1120 * by enter_critical()/exit_critical(), the address 1121 * we are returning could be invalid due to a forkall() 1122 * having occurred in another thread. 1123 */ 1124 return (scp); 1125 } 1126 1127 /* 1128 * Interfaces from libsched, incorporated into libc. 1129 * libsched.so.1 is now a filter library onto libc. 1130 */ 1131 #pragma weak schedctl_lookup = schedctl_init 1132 schedctl_t * 1133 schedctl_init(void) 1134 { 1135 volatile sc_shared_t *scp = setup_schedctl(); 1136 return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 1137 } 1138 1139 void 1140 schedctl_exit(void) 1141 { 1142 } 1143 1144 /* 1145 * Contract private interface for java. 1146 * Set up the schedctl data if it doesn't exist yet. 1147 * Return a pointer to the pointer to the schedctl data. 1148 */ 1149 volatile sc_shared_t *volatile * 1150 _thr_schedctl(void) 1151 { 1152 ulwp_t *self = curthread; 1153 volatile sc_shared_t *volatile *ptr; 1154 1155 if (self->ul_vfork) 1156 return (NULL); 1157 if (*(ptr = &self->ul_schedctl) == NULL) 1158 (void) setup_schedctl(); 1159 return (ptr); 1160 } 1161 1162 /* 1163 * Block signals and attempt to block preemption. 1164 * no_preempt()/preempt() must be used in pairs but can be nested. 1165 */ 1166 void 1167 no_preempt(ulwp_t *self) 1168 { 1169 volatile sc_shared_t *scp; 1170 1171 if (self->ul_preempt++ == 0) { 1172 enter_critical(self); 1173 if ((scp = self->ul_schedctl) != NULL || 1174 (scp = setup_schedctl()) != NULL) { 1175 /* 1176 * Save the pre-existing preempt value. 1177 */ 1178 self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 1179 scp->sc_preemptctl.sc_nopreempt = 1; 1180 } 1181 } 1182 } 1183 1184 /* 1185 * Undo the effects of no_preempt(). 1186 */ 1187 void 1188 preempt(ulwp_t *self) 1189 { 1190 volatile sc_shared_t *scp; 1191 1192 ASSERT(self->ul_preempt > 0); 1193 if (--self->ul_preempt == 0) { 1194 if ((scp = self->ul_schedctl) != NULL) { 1195 /* 1196 * Restore the pre-existing preempt value. 1197 */ 1198 scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 1199 if (scp->sc_preemptctl.sc_yield && 1200 scp->sc_preemptctl.sc_nopreempt == 0) { 1201 yield(); 1202 if (scp->sc_preemptctl.sc_yield) { 1203 /* 1204 * Shouldn't happen. This is either 1205 * a race condition or the thread 1206 * just entered the real-time class. 1207 */ 1208 yield(); 1209 scp->sc_preemptctl.sc_yield = 0; 1210 } 1211 } 1212 } 1213 exit_critical(self); 1214 } 1215 } 1216 1217 /* 1218 * If a call to preempt() would cause the current thread to yield or to 1219 * take deferred actions in exit_critical(), then unpark the specified 1220 * lwp so it can run while we delay. Return the original lwpid if the 1221 * unpark was not performed, else return zero. The tests are a repeat 1222 * of some of the tests in preempt(), above. This is a statistical 1223 * optimization solely for cond_sleep_queue(), below. 1224 */ 1225 static lwpid_t 1226 preempt_unpark(ulwp_t *self, lwpid_t lwpid) 1227 { 1228 volatile sc_shared_t *scp = self->ul_schedctl; 1229 1230 ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 1231 if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 1232 (self->ul_curplease && self->ul_critical == 1)) { 1233 (void) __lwp_unpark(lwpid); 1234 lwpid = 0; 1235 } 1236 return (lwpid); 1237 } 1238 1239 /* 1240 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1241 * If this fails, return EBUSY and let the caller deal with it. 1242 * If this succeeds, return 0 with mutex_owner set to curthread. 1243 */ 1244 static int 1245 mutex_trylock_adaptive(mutex_t *mp, int tryhard) 1246 { 1247 ulwp_t *self = curthread; 1248 int error = EBUSY; 1249 ulwp_t *ulwp; 1250 volatile sc_shared_t *scp; 1251 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1252 volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 1253 uint32_t new_lockword; 1254 int count = 0; 1255 int max_count; 1256 uint8_t max_spinners; 1257 1258 ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 1259 1260 if (MUTEX_OWNED(mp, self)) 1261 return (EBUSY); 1262 1263 enter_critical(self); 1264 1265 /* short-cut, not definitive (see below) */ 1266 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1267 ASSERT(mp->mutex_type & LOCK_ROBUST); 1268 error = ENOTRECOVERABLE; 1269 goto done; 1270 } 1271 1272 /* 1273 * Make one attempt to acquire the lock before 1274 * incurring the overhead of the spin loop. 1275 */ 1276 if (set_lock_byte(lockp) == 0) { 1277 *ownerp = (uintptr_t)self; 1278 error = 0; 1279 goto done; 1280 } 1281 if (!tryhard) 1282 goto done; 1283 if (ncpus == 0) 1284 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1285 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1286 max_spinners = ncpus - 1; 1287 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1288 if (max_count == 0) 1289 goto done; 1290 1291 /* 1292 * This spin loop is unfair to lwps that have already dropped into 1293 * the kernel to sleep. They will starve on a highly-contended mutex. 1294 * This is just too bad. The adaptive spin algorithm is intended 1295 * to allow programs with highly-contended locks (that is, broken 1296 * programs) to execute with reasonable speed despite their contention. 1297 * Being fair would reduce the speed of such programs and well-written 1298 * programs will not suffer in any case. 1299 */ 1300 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) 1301 goto done; 1302 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1303 for (count = 1; ; count++) { 1304 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1305 *ownerp = (uintptr_t)self; 1306 error = 0; 1307 break; 1308 } 1309 if (count == max_count) 1310 break; 1311 SMT_PAUSE(); 1312 /* 1313 * Stop spinning if the mutex owner is not running on 1314 * a processor; it will not drop the lock any time soon 1315 * and we would just be wasting time to keep spinning. 1316 * 1317 * Note that we are looking at another thread (ulwp_t) 1318 * without ensuring that the other thread does not exit. 1319 * The scheme relies on ulwp_t structures never being 1320 * deallocated by the library (the library employs a free 1321 * list of ulwp_t structs that are reused when new threads 1322 * are created) and on schedctl shared memory never being 1323 * deallocated once created via __schedctl(). 1324 * 1325 * Thus, the worst that can happen when the spinning thread 1326 * looks at the owner's schedctl data is that it is looking 1327 * at some other thread's schedctl data. This almost never 1328 * happens and is benign when it does. 1329 */ 1330 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1331 ((scp = ulwp->ul_schedctl) == NULL || 1332 scp->sc_state != SC_ONPROC)) 1333 break; 1334 } 1335 new_lockword = spinners_decr(&mp->mutex_lockword); 1336 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1337 /* 1338 * We haven't yet acquired the lock, the lock 1339 * is free, and there are no other spinners. 1340 * Make one final attempt to acquire the lock. 1341 * 1342 * This isn't strictly necessary since mutex_lock_queue() 1343 * (the next action this thread will take if it doesn't 1344 * acquire the lock here) makes one attempt to acquire 1345 * the lock before putting the thread to sleep. 1346 * 1347 * If the next action for this thread (on failure here) 1348 * were not to call mutex_lock_queue(), this would be 1349 * necessary for correctness, to avoid ending up with an 1350 * unheld mutex with waiters but no one to wake them up. 1351 */ 1352 if (set_lock_byte(lockp) == 0) { 1353 *ownerp = (uintptr_t)self; 1354 error = 0; 1355 } 1356 count++; 1357 } 1358 1359 done: 1360 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1361 ASSERT(mp->mutex_type & LOCK_ROBUST); 1362 /* 1363 * We shouldn't own the mutex. 1364 * Just clear the lock; everyone has already been waked up. 1365 */ 1366 *ownerp = 0; 1367 (void) clear_lockbyte(&mp->mutex_lockword); 1368 error = ENOTRECOVERABLE; 1369 } 1370 1371 exit_critical(self); 1372 1373 if (error) { 1374 if (count) { 1375 DTRACE_PROBE3(plockstat, mutex__spun, mp, 0, count); 1376 } 1377 if (error != EBUSY) { 1378 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1379 } 1380 } else { 1381 if (count) { 1382 DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count); 1383 } 1384 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1385 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1386 ASSERT(mp->mutex_type & LOCK_ROBUST); 1387 error = EOWNERDEAD; 1388 } 1389 } 1390 1391 return (error); 1392 } 1393 1394 /* 1395 * Same as mutex_trylock_adaptive(), except specifically for queue locks. 1396 * The owner field is not set here; the caller (spin_lock_set()) sets it. 1397 */ 1398 static int 1399 mutex_queuelock_adaptive(mutex_t *mp) 1400 { 1401 ulwp_t *ulwp; 1402 volatile sc_shared_t *scp; 1403 volatile uint8_t *lockp; 1404 volatile uint64_t *ownerp; 1405 int count = curthread->ul_queue_spin; 1406 1407 ASSERT(mp->mutex_type == USYNC_THREAD); 1408 1409 if (count == 0) 1410 return (EBUSY); 1411 1412 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1413 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1414 while (--count >= 0) { 1415 if (*lockp == 0 && set_lock_byte(lockp) == 0) 1416 return (0); 1417 SMT_PAUSE(); 1418 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1419 ((scp = ulwp->ul_schedctl) == NULL || 1420 scp->sc_state != SC_ONPROC)) 1421 break; 1422 } 1423 1424 return (EBUSY); 1425 } 1426 1427 /* 1428 * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1429 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1430 * If this fails, return EBUSY and let the caller deal with it. 1431 * If this succeeds, return 0 with mutex_owner set to curthread 1432 * and mutex_ownerpid set to the current pid. 1433 */ 1434 static int 1435 mutex_trylock_process(mutex_t *mp, int tryhard) 1436 { 1437 ulwp_t *self = curthread; 1438 uberdata_t *udp = self->ul_uberdata; 1439 int error = EBUSY; 1440 volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64; 1441 uint32_t new_lockword; 1442 int count = 0; 1443 int max_count; 1444 uint8_t max_spinners; 1445 1446 #if defined(__sparc) && !defined(_LP64) 1447 /* horrible hack, necessary only on 32-bit sparc */ 1448 int fix_alignment_problem = 1449 (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 1450 self->ul_misaligned && !(mp->mutex_type & LOCK_ROBUST)); 1451 #endif 1452 1453 ASSERT(mp->mutex_type & USYNC_PROCESS); 1454 1455 if (shared_mutex_held(mp)) 1456 return (EBUSY); 1457 1458 enter_critical(self); 1459 1460 /* short-cut, not definitive (see below) */ 1461 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1462 ASSERT(mp->mutex_type & LOCK_ROBUST); 1463 error = ENOTRECOVERABLE; 1464 goto done; 1465 } 1466 1467 /* 1468 * Make one attempt to acquire the lock before 1469 * incurring the overhead of the spin loop. 1470 */ 1471 #if defined(__sparc) && !defined(_LP64) 1472 /* horrible hack, necessary only on 32-bit sparc */ 1473 if (fix_alignment_problem) { 1474 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1475 mp->mutex_ownerpid = udp->pid; 1476 mp->mutex_owner = (uintptr_t)self; 1477 error = 0; 1478 goto done; 1479 } 1480 } else 1481 #endif 1482 if (set_lock_byte64(lockp, udp->pid) == 0) { 1483 mp->mutex_owner = (uintptr_t)self; 1484 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1485 error = 0; 1486 goto done; 1487 } 1488 if (!tryhard) 1489 goto done; 1490 if (ncpus == 0) 1491 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1492 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1493 max_spinners = ncpus - 1; 1494 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1495 if (max_count == 0) 1496 goto done; 1497 1498 /* 1499 * This is a process-shared mutex. 1500 * We cannot know if the owner is running on a processor. 1501 * We just spin and hope that it is on a processor. 1502 */ 1503 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) 1504 goto done; 1505 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1506 for (count = 1; ; count++) { 1507 #if defined(__sparc) && !defined(_LP64) 1508 /* horrible hack, necessary only on 32-bit sparc */ 1509 if (fix_alignment_problem) { 1510 if ((*lockp & LOCKMASK64) == 0 && 1511 set_lock_byte(&mp->mutex_lockw) == 0) { 1512 mp->mutex_ownerpid = udp->pid; 1513 mp->mutex_owner = (uintptr_t)self; 1514 error = 0; 1515 break; 1516 } 1517 } else 1518 #endif 1519 if ((*lockp & LOCKMASK64) == 0 && 1520 set_lock_byte64(lockp, udp->pid) == 0) { 1521 mp->mutex_owner = (uintptr_t)self; 1522 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1523 error = 0; 1524 break; 1525 } 1526 if (count == max_count) 1527 break; 1528 SMT_PAUSE(); 1529 } 1530 new_lockword = spinners_decr(&mp->mutex_lockword); 1531 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1532 /* 1533 * We haven't yet acquired the lock, the lock 1534 * is free, and there are no other spinners. 1535 * Make one final attempt to acquire the lock. 1536 * 1537 * This isn't strictly necessary since mutex_lock_kernel() 1538 * (the next action this thread will take if it doesn't 1539 * acquire the lock here) makes one attempt to acquire 1540 * the lock before putting the thread to sleep. 1541 * 1542 * If the next action for this thread (on failure here) 1543 * were not to call mutex_lock_kernel(), this would be 1544 * necessary for correctness, to avoid ending up with an 1545 * unheld mutex with waiters but no one to wake them up. 1546 */ 1547 #if defined(__sparc) && !defined(_LP64) 1548 /* horrible hack, necessary only on 32-bit sparc */ 1549 if (fix_alignment_problem) { 1550 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1551 mp->mutex_ownerpid = udp->pid; 1552 mp->mutex_owner = (uintptr_t)self; 1553 error = 0; 1554 } 1555 } else 1556 #endif 1557 if (set_lock_byte64(lockp, udp->pid) == 0) { 1558 mp->mutex_owner = (uintptr_t)self; 1559 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1560 error = 0; 1561 } 1562 count++; 1563 } 1564 1565 done: 1566 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1567 ASSERT(mp->mutex_type & LOCK_ROBUST); 1568 /* 1569 * We shouldn't own the mutex. 1570 * Just clear the lock; everyone has already been waked up. 1571 */ 1572 mp->mutex_owner = 0; 1573 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1574 (void) clear_lockbyte64(&mp->mutex_lockword64); 1575 error = ENOTRECOVERABLE; 1576 } 1577 1578 exit_critical(self); 1579 1580 if (error) { 1581 if (count) { 1582 DTRACE_PROBE3(plockstat, mutex__spun, mp, 0, count); 1583 } 1584 if (error != EBUSY) { 1585 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1586 } 1587 } else { 1588 if (count) { 1589 DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count); 1590 } 1591 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1592 if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1593 ASSERT(mp->mutex_type & LOCK_ROBUST); 1594 if (mp->mutex_flag & LOCK_OWNERDEAD) 1595 error = EOWNERDEAD; 1596 else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1597 error = ELOCKUNMAPPED; 1598 else 1599 error = EOWNERDEAD; 1600 } 1601 } 1602 1603 return (error); 1604 } 1605 1606 /* 1607 * Mutex wakeup code for releasing a USYNC_THREAD mutex. 1608 * Returns the lwpid of the thread that was dequeued, if any. 1609 * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 1610 * to wake up the specified lwp. 1611 */ 1612 static lwpid_t 1613 mutex_wakeup(mutex_t *mp) 1614 { 1615 lwpid_t lwpid = 0; 1616 int more; 1617 queue_head_t *qp; 1618 ulwp_t *ulwp; 1619 1620 /* 1621 * Dequeue a waiter from the sleep queue. Don't touch the mutex 1622 * waiters bit if no one was found on the queue because the mutex 1623 * might have been deallocated or reallocated for another purpose. 1624 */ 1625 qp = queue_lock(mp, MX); 1626 if ((ulwp = dequeue(qp, &more)) != NULL) { 1627 lwpid = ulwp->ul_lwpid; 1628 mp->mutex_waiters = more; 1629 } 1630 queue_unlock(qp); 1631 return (lwpid); 1632 } 1633 1634 /* 1635 * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1636 */ 1637 static void 1638 mutex_wakeup_all(mutex_t *mp) 1639 { 1640 queue_head_t *qp; 1641 queue_root_t *qrp; 1642 int nlwpid = 0; 1643 int maxlwps = MAXLWPS; 1644 ulwp_t *ulwp; 1645 lwpid_t buffer[MAXLWPS]; 1646 lwpid_t *lwpid = buffer; 1647 1648 /* 1649 * Walk the list of waiters and prepare to wake up all of them. 1650 * The waiters flag has already been cleared from the mutex. 1651 * 1652 * We keep track of lwpids that are to be unparked in lwpid[]. 1653 * __lwp_unpark_all() is called to unpark all of them after 1654 * they have been removed from the sleep queue and the sleep 1655 * queue lock has been dropped. If we run out of space in our 1656 * on-stack buffer, we need to allocate more but we can't call 1657 * lmalloc() because we are holding a queue lock when the overflow 1658 * occurs and lmalloc() acquires a lock. We can't use alloca() 1659 * either because the application may have allocated a small 1660 * stack and we don't want to overrun the stack. So we call 1661 * alloc_lwpids() to allocate a bigger buffer using the mmap() 1662 * system call directly since that path acquires no locks. 1663 */ 1664 qp = queue_lock(mp, MX); 1665 for (;;) { 1666 if ((qrp = qp->qh_root) == NULL || 1667 (ulwp = qrp->qr_head) == NULL) 1668 break; 1669 ASSERT(ulwp->ul_wchan == mp); 1670 queue_unlink(qp, &qrp->qr_head, NULL); 1671 ulwp->ul_sleepq = NULL; 1672 ulwp->ul_wchan = NULL; 1673 if (nlwpid == maxlwps) 1674 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1675 lwpid[nlwpid++] = ulwp->ul_lwpid; 1676 } 1677 1678 if (nlwpid == 0) { 1679 queue_unlock(qp); 1680 } else { 1681 mp->mutex_waiters = 0; 1682 no_preempt(curthread); 1683 queue_unlock(qp); 1684 if (nlwpid == 1) 1685 (void) __lwp_unpark(lwpid[0]); 1686 else 1687 (void) __lwp_unpark_all(lwpid, nlwpid); 1688 preempt(curthread); 1689 } 1690 1691 if (lwpid != buffer) 1692 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 1693 } 1694 1695 /* 1696 * Release a process-private mutex. 1697 * As an optimization, if there are waiters but there are also spinners 1698 * attempting to acquire the mutex, then don't bother waking up a waiter; 1699 * one of the spinners will acquire the mutex soon and it would be a waste 1700 * of resources to wake up some thread just to have it spin for a while 1701 * and then possibly go back to sleep. See mutex_trylock_adaptive(). 1702 */ 1703 static lwpid_t 1704 mutex_unlock_queue(mutex_t *mp, int release_all) 1705 { 1706 ulwp_t *self = curthread; 1707 lwpid_t lwpid = 0; 1708 uint32_t old_lockword; 1709 1710 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1711 sigoff(self); 1712 mp->mutex_owner = 0; 1713 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1714 if ((old_lockword & WAITERMASK) && 1715 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1716 no_preempt(self); /* ensure a prompt wakeup */ 1717 if (release_all) 1718 mutex_wakeup_all(mp); 1719 else 1720 lwpid = mutex_wakeup(mp); 1721 if (lwpid == 0) 1722 preempt(self); 1723 } 1724 sigon(self); 1725 return (lwpid); 1726 } 1727 1728 /* 1729 * Like mutex_unlock_queue(), but for process-shared mutexes. 1730 */ 1731 static void 1732 mutex_unlock_process(mutex_t *mp, int release_all) 1733 { 1734 ulwp_t *self = curthread; 1735 uint64_t old_lockword64; 1736 1737 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1738 sigoff(self); 1739 mp->mutex_owner = 0; 1740 #if defined(__sparc) && !defined(_LP64) 1741 /* horrible hack, necessary only on 32-bit sparc */ 1742 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 1743 self->ul_misaligned && !(mp->mutex_type & LOCK_ROBUST)) { 1744 uint32_t old_lockword; 1745 mp->mutex_ownerpid = 0; 1746 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1747 if ((old_lockword & WAITERMASK) && 1748 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1749 no_preempt(self); /* ensure a prompt wakeup */ 1750 (void) ___lwp_mutex_wakeup(mp, release_all); 1751 preempt(self); 1752 } 1753 sigon(self); 1754 return; 1755 } 1756 #endif 1757 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1758 old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64); 1759 if ((old_lockword64 & WAITERMASK64) && 1760 (release_all || (old_lockword64 & SPINNERMASK64) == 0)) { 1761 no_preempt(self); /* ensure a prompt wakeup */ 1762 (void) ___lwp_mutex_wakeup(mp, release_all); 1763 preempt(self); 1764 } 1765 sigon(self); 1766 } 1767 1768 void 1769 stall(void) 1770 { 1771 for (;;) 1772 (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 1773 } 1774 1775 /* 1776 * Acquire a USYNC_THREAD mutex via user-level sleep queues. 1777 * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1778 * If successful, returns with mutex_owner set correctly. 1779 */ 1780 int 1781 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 1782 timespec_t *tsp) 1783 { 1784 uberdata_t *udp = curthread->ul_uberdata; 1785 queue_head_t *qp; 1786 hrtime_t begin_sleep; 1787 int error = 0; 1788 1789 self->ul_sp = stkptr(); 1790 if (__td_event_report(self, TD_SLEEP, udp)) { 1791 self->ul_wchan = mp; 1792 self->ul_td_evbuf.eventnum = TD_SLEEP; 1793 self->ul_td_evbuf.eventdata = mp; 1794 tdb_event(TD_SLEEP, udp); 1795 } 1796 if (msp) { 1797 tdb_incr(msp->mutex_sleep); 1798 begin_sleep = gethrtime(); 1799 } 1800 1801 DTRACE_PROBE1(plockstat, mutex__block, mp); 1802 1803 /* 1804 * Put ourself on the sleep queue, and while we are 1805 * unable to grab the lock, go park in the kernel. 1806 * Take ourself off the sleep queue after we acquire the lock. 1807 * The waiter bit can be set/cleared only while holding the queue lock. 1808 */ 1809 qp = queue_lock(mp, MX); 1810 enqueue(qp, self, 0); 1811 mp->mutex_waiters = 1; 1812 for (;;) { 1813 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1814 mp->mutex_owner = (uintptr_t)self; 1815 mp->mutex_waiters = dequeue_self(qp); 1816 break; 1817 } 1818 set_parking_flag(self, 1); 1819 queue_unlock(qp); 1820 /* 1821 * __lwp_park() will return the residual time in tsp 1822 * if we are unparked before the timeout expires. 1823 */ 1824 error = __lwp_park(tsp, 0); 1825 set_parking_flag(self, 0); 1826 /* 1827 * We could have taken a signal or suspended ourself. 1828 * If we did, then we removed ourself from the queue. 1829 * Someone else may have removed us from the queue 1830 * as a consequence of mutex_unlock(). We may have 1831 * gotten a timeout from __lwp_park(). Or we may still 1832 * be on the queue and this is just a spurious wakeup. 1833 */ 1834 qp = queue_lock(mp, MX); 1835 if (self->ul_sleepq == NULL) { 1836 if (error) { 1837 mp->mutex_waiters = queue_waiter(qp)? 1 : 0; 1838 if (error != EINTR) 1839 break; 1840 error = 0; 1841 } 1842 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1843 mp->mutex_owner = (uintptr_t)self; 1844 break; 1845 } 1846 enqueue(qp, self, 0); 1847 mp->mutex_waiters = 1; 1848 } 1849 ASSERT(self->ul_sleepq == qp && 1850 self->ul_qtype == MX && 1851 self->ul_wchan == mp); 1852 if (error) { 1853 if (error != EINTR) { 1854 mp->mutex_waiters = dequeue_self(qp); 1855 break; 1856 } 1857 error = 0; 1858 } 1859 } 1860 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 1861 self->ul_wchan == NULL); 1862 self->ul_sp = 0; 1863 1864 ASSERT(error == 0 || error == EINVAL || error == ETIME); 1865 1866 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1867 ASSERT(mp->mutex_type & LOCK_ROBUST); 1868 /* 1869 * We shouldn't own the mutex. 1870 * Just clear the lock; everyone has already been waked up. 1871 */ 1872 mp->mutex_owner = 0; 1873 (void) clear_lockbyte(&mp->mutex_lockword); 1874 error = ENOTRECOVERABLE; 1875 } 1876 1877 queue_unlock(qp); 1878 1879 if (msp) 1880 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1881 1882 if (error) { 1883 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1884 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1885 } else { 1886 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1887 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1888 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1889 ASSERT(mp->mutex_type & LOCK_ROBUST); 1890 error = EOWNERDEAD; 1891 } 1892 } 1893 1894 return (error); 1895 } 1896 1897 static int 1898 mutex_recursion(mutex_t *mp, int mtype, int try) 1899 { 1900 ASSERT(mutex_held(mp)); 1901 ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1902 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1903 1904 if (mtype & LOCK_RECURSIVE) { 1905 if (mp->mutex_rcount == RECURSION_MAX) { 1906 DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1907 return (EAGAIN); 1908 } 1909 mp->mutex_rcount++; 1910 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1911 return (0); 1912 } 1913 if (try == MUTEX_LOCK) { 1914 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1915 return (EDEADLK); 1916 } 1917 return (EBUSY); 1918 } 1919 1920 /* 1921 * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1922 * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1923 * We use tdb_hash_lock here and in the synch object tracking code in 1924 * the tdb_agent.c file. There is no conflict between these two usages. 1925 */ 1926 void 1927 register_lock(mutex_t *mp) 1928 { 1929 uberdata_t *udp = curthread->ul_uberdata; 1930 uint_t hash = LOCK_HASH(mp); 1931 robust_t *rlp; 1932 robust_t *invalid; 1933 robust_t **rlpp; 1934 robust_t **table; 1935 1936 if ((table = udp->robustlocks) == NULL) { 1937 lmutex_lock(&udp->tdb_hash_lock); 1938 if ((table = udp->robustlocks) == NULL) { 1939 table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1940 membar_producer(); 1941 udp->robustlocks = table; 1942 } 1943 lmutex_unlock(&udp->tdb_hash_lock); 1944 } 1945 membar_consumer(); 1946 1947 /* 1948 * First search the registered table with no locks held. 1949 * This is safe because the table never shrinks 1950 * and we can only get a false negative. 1951 */ 1952 for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1953 if (rlp->robust_lock == mp) /* already registered */ 1954 return; 1955 } 1956 1957 /* 1958 * The lock was not found. 1959 * Repeat the operation with tdb_hash_lock held. 1960 */ 1961 lmutex_lock(&udp->tdb_hash_lock); 1962 1963 invalid = NULL; 1964 for (rlpp = &table[hash]; 1965 (rlp = *rlpp) != NULL; 1966 rlpp = &rlp->robust_next) { 1967 if (rlp->robust_lock == mp) { /* already registered */ 1968 lmutex_unlock(&udp->tdb_hash_lock); 1969 return; 1970 } 1971 /* remember the first invalid entry, if any */ 1972 if (rlp->robust_lock == INVALID_ADDR && invalid == NULL) 1973 invalid = rlp; 1974 } 1975 1976 /* 1977 * The lock has never been registered. 1978 * Add it to the table and register it now. 1979 */ 1980 if ((rlp = invalid) != NULL) { 1981 /* 1982 * Reuse the invalid entry we found above. 1983 * The linkages are still correct. 1984 */ 1985 rlp->robust_lock = mp; 1986 membar_producer(); 1987 } else { 1988 /* 1989 * Allocate a new entry and add it to 1990 * the hash table and to the global list. 1991 */ 1992 rlp = lmalloc(sizeof (*rlp)); 1993 rlp->robust_lock = mp; 1994 rlp->robust_next = NULL; 1995 rlp->robust_list = udp->robustlist; 1996 udp->robustlist = rlp; 1997 membar_producer(); 1998 *rlpp = rlp; 1999 } 2000 2001 lmutex_unlock(&udp->tdb_hash_lock); 2002 2003 (void) ___lwp_mutex_register(mp, &rlp->robust_lock); 2004 } 2005 2006 /* 2007 * This is called in the child of fork()/forkall() to start over 2008 * with a clean slate. (Each process must register its own locks.) 2009 * No locks are needed because all other threads are suspended or gone. 2010 */ 2011 void 2012 unregister_locks(void) 2013 { 2014 uberdata_t *udp = curthread->ul_uberdata; 2015 robust_t **table; 2016 robust_t *rlp; 2017 robust_t *next; 2018 2019 /* 2020 * Do this first, before calling lfree(). 2021 */ 2022 table = udp->robustlocks; 2023 udp->robustlocks = NULL; 2024 rlp = udp->robustlist; 2025 udp->robustlist = NULL; 2026 2027 /* 2028 * Do this by traversing the global list, not the hash table. 2029 */ 2030 while (rlp != NULL) { 2031 next = rlp->robust_list; 2032 lfree(rlp, sizeof (*rlp)); 2033 rlp = next; 2034 } 2035 if (table != NULL) 2036 lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 2037 } 2038 2039 /* 2040 * Returns with mutex_owner set correctly. 2041 */ 2042 int 2043 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 2044 { 2045 ulwp_t *self = curthread; 2046 uberdata_t *udp = self->ul_uberdata; 2047 int mtype = mp->mutex_type; 2048 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2049 int error = 0; 2050 int noceil = try & MUTEX_NOCEIL; 2051 uint8_t ceil; 2052 int myprio; 2053 2054 try &= ~MUTEX_NOCEIL; 2055 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 2056 2057 if (!self->ul_schedctl_called) 2058 (void) setup_schedctl(); 2059 2060 if (msp && try == MUTEX_TRY) 2061 tdb_incr(msp->mutex_try); 2062 2063 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_held(mp)) 2064 return (mutex_recursion(mp, mtype, try)); 2065 2066 if (self->ul_error_detection && try == MUTEX_LOCK && 2067 tsp == NULL && mutex_held(mp)) 2068 lock_error(mp, "mutex_lock", NULL, NULL); 2069 2070 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2071 update_sched(self); 2072 if (self->ul_cid != self->ul_rtclassid) { 2073 DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM); 2074 return (EPERM); 2075 } 2076 ceil = mp->mutex_ceiling; 2077 myprio = self->ul_epri? self->ul_epri : self->ul_pri; 2078 if (myprio > ceil) { 2079 DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 2080 return (EINVAL); 2081 } 2082 if ((error = _ceil_mylist_add(mp)) != 0) { 2083 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 2084 return (error); 2085 } 2086 if (myprio < ceil) 2087 _ceil_prio_inherit(ceil); 2088 } 2089 2090 if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 2091 == (USYNC_PROCESS | LOCK_ROBUST)) 2092 register_lock(mp); 2093 2094 if (mtype & LOCK_PRIO_INHERIT) { 2095 /* go straight to the kernel */ 2096 if (try == MUTEX_TRY) 2097 error = mutex_trylock_kernel(mp); 2098 else /* MUTEX_LOCK */ 2099 error = mutex_lock_kernel(mp, tsp, msp); 2100 /* 2101 * The kernel never sets or clears the lock byte 2102 * for LOCK_PRIO_INHERIT mutexes. 2103 * Set it here for consistency. 2104 */ 2105 switch (error) { 2106 case 0: 2107 self->ul_pilocks++; 2108 mp->mutex_lockw = LOCKSET; 2109 break; 2110 case EOWNERDEAD: 2111 case ELOCKUNMAPPED: 2112 self->ul_pilocks++; 2113 mp->mutex_lockw = LOCKSET; 2114 /* FALLTHROUGH */ 2115 case ENOTRECOVERABLE: 2116 ASSERT(mtype & LOCK_ROBUST); 2117 break; 2118 case EDEADLK: 2119 if (try == MUTEX_TRY) { 2120 error = EBUSY; 2121 } else if (tsp != NULL) { /* simulate a timeout */ 2122 /* 2123 * Note: mutex_timedlock() never returns EINTR. 2124 */ 2125 timespec_t ts = *tsp; 2126 timespec_t rts; 2127 2128 while (__nanosleep(&ts, &rts) == EINTR) 2129 ts = rts; 2130 error = ETIME; 2131 } else { /* simulate a deadlock */ 2132 stall(); 2133 } 2134 break; 2135 } 2136 } else if (mtype & USYNC_PROCESS) { 2137 error = mutex_trylock_process(mp, try == MUTEX_LOCK); 2138 if (error == EBUSY && try == MUTEX_LOCK) 2139 error = mutex_lock_kernel(mp, tsp, msp); 2140 } else { /* USYNC_THREAD */ 2141 error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 2142 if (error == EBUSY && try == MUTEX_LOCK) 2143 error = mutex_lock_queue(self, msp, mp, tsp); 2144 } 2145 2146 switch (error) { 2147 case 0: 2148 case EOWNERDEAD: 2149 case ELOCKUNMAPPED: 2150 if (mtype & LOCK_ROBUST) 2151 remember_lock(mp); 2152 if (msp) 2153 record_begin_hold(msp); 2154 break; 2155 default: 2156 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2157 (void) _ceil_mylist_del(mp); 2158 if (myprio < ceil) 2159 _ceil_prio_waive(); 2160 } 2161 if (try == MUTEX_TRY) { 2162 if (msp) 2163 tdb_incr(msp->mutex_try_fail); 2164 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2165 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2166 tdb_event(TD_LOCK_TRY, udp); 2167 } 2168 } 2169 break; 2170 } 2171 2172 return (error); 2173 } 2174 2175 int 2176 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 2177 { 2178 ulwp_t *self = curthread; 2179 uberdata_t *udp = self->ul_uberdata; 2180 2181 /* 2182 * We know that USYNC_PROCESS is set in mtype and that 2183 * zero, one, or both of the flags LOCK_RECURSIVE and 2184 * LOCK_ERRORCHECK are set, and that no other flags are set. 2185 */ 2186 ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 2187 enter_critical(self); 2188 #if defined(__sparc) && !defined(_LP64) 2189 /* horrible hack, necessary only on 32-bit sparc */ 2190 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2191 self->ul_misaligned) { 2192 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2193 mp->mutex_ownerpid = udp->pid; 2194 mp->mutex_owner = (uintptr_t)self; 2195 exit_critical(self); 2196 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2197 return (0); 2198 } 2199 } else 2200 #endif 2201 if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) { 2202 mp->mutex_owner = (uintptr_t)self; 2203 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 2204 exit_critical(self); 2205 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2206 return (0); 2207 } 2208 exit_critical(self); 2209 2210 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 2211 return (mutex_recursion(mp, mtype, try)); 2212 2213 if (try == MUTEX_LOCK) { 2214 if (mutex_trylock_process(mp, 1) == 0) 2215 return (0); 2216 return (mutex_lock_kernel(mp, tsp, NULL)); 2217 } 2218 2219 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2220 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2221 tdb_event(TD_LOCK_TRY, udp); 2222 } 2223 return (EBUSY); 2224 } 2225 2226 static int 2227 mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 2228 { 2229 ulwp_t *self = curthread; 2230 int mtype = mp->mutex_type; 2231 uberflags_t *gflags; 2232 2233 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2234 self->ul_error_detection && self->ul_misaligned == 0) 2235 lock_error(mp, "mutex_lock", NULL, "mutex is misaligned"); 2236 2237 /* 2238 * Optimize the case of USYNC_THREAD, including 2239 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2240 * no error detection, no lock statistics, 2241 * and the process has only a single thread. 2242 * (Most likely a traditional single-threaded application.) 2243 */ 2244 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2245 self->ul_uberdata->uberflags.uf_all) == 0) { 2246 /* 2247 * Only one thread exists so we don't need an atomic operation. 2248 * We do, however, need to protect against signals. 2249 */ 2250 if (mp->mutex_lockw == 0) { 2251 sigoff(self); 2252 mp->mutex_lockw = LOCKSET; 2253 mp->mutex_owner = (uintptr_t)self; 2254 sigon(self); 2255 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2256 return (0); 2257 } 2258 if (mtype && MUTEX_OWNER(mp) == self) 2259 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2260 /* 2261 * We have reached a deadlock, probably because the 2262 * process is executing non-async-signal-safe code in 2263 * a signal handler and is attempting to acquire a lock 2264 * that it already owns. This is not surprising, given 2265 * bad programming practices over the years that has 2266 * resulted in applications calling printf() and such 2267 * in their signal handlers. Unless the user has told 2268 * us that the signal handlers are safe by setting: 2269 * export _THREAD_ASYNC_SAFE=1 2270 * we return EDEADLK rather than actually deadlocking. 2271 */ 2272 if (tsp == NULL && 2273 MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 2274 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 2275 return (EDEADLK); 2276 } 2277 } 2278 2279 /* 2280 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2281 * no error detection, and no lock statistics. 2282 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2283 */ 2284 if ((gflags = self->ul_schedctl_called) != NULL && 2285 (gflags->uf_trs_ted | 2286 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2287 if (mtype & USYNC_PROCESS) 2288 return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 2289 sigoff(self); 2290 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2291 mp->mutex_owner = (uintptr_t)self; 2292 sigon(self); 2293 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2294 return (0); 2295 } 2296 sigon(self); 2297 if (mtype && MUTEX_OWNER(mp) == self) 2298 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2299 if (mutex_trylock_adaptive(mp, 1) != 0) 2300 return (mutex_lock_queue(self, NULL, mp, tsp)); 2301 return (0); 2302 } 2303 2304 /* else do it the long way */ 2305 return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 2306 } 2307 2308 #pragma weak pthread_mutex_lock = mutex_lock 2309 #pragma weak _mutex_lock = mutex_lock 2310 int 2311 mutex_lock(mutex_t *mp) 2312 { 2313 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2314 return (mutex_lock_impl(mp, NULL)); 2315 } 2316 2317 int 2318 pthread_mutex_timedlock(pthread_mutex_t *_RESTRICT_KYWD mp, 2319 const struct timespec *_RESTRICT_KYWD abstime) 2320 { 2321 timespec_t tslocal; 2322 int error; 2323 2324 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2325 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 2326 error = mutex_lock_impl((mutex_t *)mp, &tslocal); 2327 if (error == ETIME) 2328 error = ETIMEDOUT; 2329 return (error); 2330 } 2331 2332 int 2333 pthread_mutex_reltimedlock_np(pthread_mutex_t *_RESTRICT_KYWD mp, 2334 const struct timespec *_RESTRICT_KYWD reltime) 2335 { 2336 timespec_t tslocal; 2337 int error; 2338 2339 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2340 tslocal = *reltime; 2341 error = mutex_lock_impl((mutex_t *)mp, &tslocal); 2342 if (error == ETIME) 2343 error = ETIMEDOUT; 2344 return (error); 2345 } 2346 2347 #pragma weak pthread_mutex_trylock = mutex_trylock 2348 int 2349 mutex_trylock(mutex_t *mp) 2350 { 2351 ulwp_t *self = curthread; 2352 uberdata_t *udp = self->ul_uberdata; 2353 int mtype = mp->mutex_type; 2354 uberflags_t *gflags; 2355 2356 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2357 2358 /* 2359 * Optimize the case of USYNC_THREAD, including 2360 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2361 * no error detection, no lock statistics, 2362 * and the process has only a single thread. 2363 * (Most likely a traditional single-threaded application.) 2364 */ 2365 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2366 udp->uberflags.uf_all) == 0) { 2367 /* 2368 * Only one thread exists so we don't need an atomic operation. 2369 * We do, however, need to protect against signals. 2370 */ 2371 if (mp->mutex_lockw == 0) { 2372 sigoff(self); 2373 mp->mutex_lockw = LOCKSET; 2374 mp->mutex_owner = (uintptr_t)self; 2375 sigon(self); 2376 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2377 return (0); 2378 } 2379 if (mtype && MUTEX_OWNER(mp) == self) 2380 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2381 return (EBUSY); 2382 } 2383 2384 /* 2385 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2386 * no error detection, and no lock statistics. 2387 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2388 */ 2389 if ((gflags = self->ul_schedctl_called) != NULL && 2390 (gflags->uf_trs_ted | 2391 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2392 if (mtype & USYNC_PROCESS) 2393 return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 2394 sigoff(self); 2395 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2396 mp->mutex_owner = (uintptr_t)self; 2397 sigon(self); 2398 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2399 return (0); 2400 } 2401 sigon(self); 2402 if (mtype && MUTEX_OWNER(mp) == self) 2403 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2404 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2405 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2406 tdb_event(TD_LOCK_TRY, udp); 2407 } 2408 return (EBUSY); 2409 } 2410 2411 /* else do it the long way */ 2412 return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 2413 } 2414 2415 int 2416 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 2417 { 2418 ulwp_t *self = curthread; 2419 uberdata_t *udp = self->ul_uberdata; 2420 int mtype = mp->mutex_type; 2421 tdb_mutex_stats_t *msp; 2422 int error = 0; 2423 int release_all; 2424 lwpid_t lwpid; 2425 2426 if ((mtype & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 2427 !mutex_held(mp)) 2428 return (EPERM); 2429 2430 if (self->ul_error_detection && !mutex_held(mp)) 2431 lock_error(mp, "mutex_unlock", NULL, NULL); 2432 2433 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2434 mp->mutex_rcount--; 2435 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2436 return (0); 2437 } 2438 2439 if ((msp = MUTEX_STATS(mp, udp)) != NULL) 2440 (void) record_hold_time(msp); 2441 2442 if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2443 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2444 ASSERT(mtype & LOCK_ROBUST); 2445 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2446 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2447 } 2448 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2449 2450 if (mtype & LOCK_PRIO_INHERIT) { 2451 no_preempt(self); 2452 mp->mutex_owner = 0; 2453 /* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */ 2454 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2455 mp->mutex_lockw = LOCKCLEAR; 2456 self->ul_pilocks--; 2457 error = ___lwp_mutex_unlock(mp); 2458 preempt(self); 2459 } else if (mtype & USYNC_PROCESS) { 2460 mutex_unlock_process(mp, release_all); 2461 } else { /* USYNC_THREAD */ 2462 if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 2463 (void) __lwp_unpark(lwpid); 2464 preempt(self); 2465 } 2466 } 2467 2468 if (mtype & LOCK_ROBUST) 2469 forget_lock(mp); 2470 2471 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2472 _ceil_prio_waive(); 2473 2474 return (error); 2475 } 2476 2477 #pragma weak pthread_mutex_unlock = mutex_unlock 2478 #pragma weak _mutex_unlock = mutex_unlock 2479 int 2480 mutex_unlock(mutex_t *mp) 2481 { 2482 ulwp_t *self = curthread; 2483 int mtype = mp->mutex_type; 2484 uberflags_t *gflags; 2485 lwpid_t lwpid; 2486 short el; 2487 2488 /* 2489 * Optimize the case of USYNC_THREAD, including 2490 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2491 * no error detection, no lock statistics, 2492 * and the process has only a single thread. 2493 * (Most likely a traditional single-threaded application.) 2494 */ 2495 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2496 self->ul_uberdata->uberflags.uf_all) == 0) { 2497 if (mtype) { 2498 /* 2499 * At this point we know that one or both of the 2500 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2501 */ 2502 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2503 return (EPERM); 2504 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2505 mp->mutex_rcount--; 2506 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2507 return (0); 2508 } 2509 } 2510 /* 2511 * Only one thread exists so we don't need an atomic operation. 2512 * Also, there can be no waiters. 2513 */ 2514 sigoff(self); 2515 mp->mutex_owner = 0; 2516 mp->mutex_lockword = 0; 2517 sigon(self); 2518 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2519 return (0); 2520 } 2521 2522 /* 2523 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2524 * no error detection, and no lock statistics. 2525 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2526 */ 2527 if ((gflags = self->ul_schedctl_called) != NULL) { 2528 if (((el = gflags->uf_trs_ted) | mtype) == 0) { 2529 fast_unlock: 2530 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2531 (void) __lwp_unpark(lwpid); 2532 preempt(self); 2533 } 2534 return (0); 2535 } 2536 if (el) /* error detection or lock statistics */ 2537 goto slow_unlock; 2538 if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2539 /* 2540 * At this point we know that one or both of the 2541 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2542 */ 2543 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2544 return (EPERM); 2545 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2546 mp->mutex_rcount--; 2547 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2548 return (0); 2549 } 2550 goto fast_unlock; 2551 } 2552 if ((mtype & 2553 ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2554 /* 2555 * At this point we know that zero, one, or both of the 2556 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 2557 * that the USYNC_PROCESS flag is set. 2558 */ 2559 if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 2560 return (EPERM); 2561 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2562 mp->mutex_rcount--; 2563 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2564 return (0); 2565 } 2566 mutex_unlock_process(mp, 0); 2567 return (0); 2568 } 2569 } 2570 2571 /* else do it the long way */ 2572 slow_unlock: 2573 return (mutex_unlock_internal(mp, 0)); 2574 } 2575 2576 /* 2577 * Internally to the library, almost all mutex lock/unlock actions 2578 * go through these lmutex_ functions, to protect critical regions. 2579 * We replicate a bit of code from mutex_lock() and mutex_unlock() 2580 * to make these functions faster since we know that the mutex type 2581 * of all internal locks is USYNC_THREAD. We also know that internal 2582 * locking can never fail, so we panic if it does. 2583 */ 2584 void 2585 lmutex_lock(mutex_t *mp) 2586 { 2587 ulwp_t *self = curthread; 2588 uberdata_t *udp = self->ul_uberdata; 2589 2590 ASSERT(mp->mutex_type == USYNC_THREAD); 2591 2592 enter_critical(self); 2593 /* 2594 * Optimize the case of no lock statistics and only a single thread. 2595 * (Most likely a traditional single-threaded application.) 2596 */ 2597 if (udp->uberflags.uf_all == 0) { 2598 /* 2599 * Only one thread exists; the mutex must be free. 2600 */ 2601 ASSERT(mp->mutex_lockw == 0); 2602 mp->mutex_lockw = LOCKSET; 2603 mp->mutex_owner = (uintptr_t)self; 2604 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2605 } else { 2606 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2607 2608 if (!self->ul_schedctl_called) 2609 (void) setup_schedctl(); 2610 2611 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2612 mp->mutex_owner = (uintptr_t)self; 2613 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2614 } else if (mutex_trylock_adaptive(mp, 1) != 0) { 2615 (void) mutex_lock_queue(self, msp, mp, NULL); 2616 } 2617 2618 if (msp) 2619 record_begin_hold(msp); 2620 } 2621 } 2622 2623 void 2624 lmutex_unlock(mutex_t *mp) 2625 { 2626 ulwp_t *self = curthread; 2627 uberdata_t *udp = self->ul_uberdata; 2628 2629 ASSERT(mp->mutex_type == USYNC_THREAD); 2630 2631 /* 2632 * Optimize the case of no lock statistics and only a single thread. 2633 * (Most likely a traditional single-threaded application.) 2634 */ 2635 if (udp->uberflags.uf_all == 0) { 2636 /* 2637 * Only one thread exists so there can be no waiters. 2638 */ 2639 mp->mutex_owner = 0; 2640 mp->mutex_lockword = 0; 2641 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2642 } else { 2643 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2644 lwpid_t lwpid; 2645 2646 if (msp) 2647 (void) record_hold_time(msp); 2648 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2649 (void) __lwp_unpark(lwpid); 2650 preempt(self); 2651 } 2652 } 2653 exit_critical(self); 2654 } 2655 2656 /* 2657 * For specialized code in libc, like the asynchronous i/o code, 2658 * the following sig_*() locking primitives are used in order 2659 * to make the code asynchronous signal safe. Signals are 2660 * deferred while locks acquired by these functions are held. 2661 */ 2662 void 2663 sig_mutex_lock(mutex_t *mp) 2664 { 2665 ulwp_t *self = curthread; 2666 2667 sigoff(self); 2668 (void) mutex_lock(mp); 2669 } 2670 2671 void 2672 sig_mutex_unlock(mutex_t *mp) 2673 { 2674 ulwp_t *self = curthread; 2675 2676 (void) mutex_unlock(mp); 2677 sigon(self); 2678 } 2679 2680 int 2681 sig_mutex_trylock(mutex_t *mp) 2682 { 2683 ulwp_t *self = curthread; 2684 int error; 2685 2686 sigoff(self); 2687 if ((error = mutex_trylock(mp)) != 0) 2688 sigon(self); 2689 return (error); 2690 } 2691 2692 /* 2693 * sig_cond_wait() is a cancellation point. 2694 */ 2695 int 2696 sig_cond_wait(cond_t *cv, mutex_t *mp) 2697 { 2698 int error; 2699 2700 ASSERT(curthread->ul_sigdefer != 0); 2701 pthread_testcancel(); 2702 error = __cond_wait(cv, mp); 2703 if (error == EINTR && curthread->ul_cursig) { 2704 sig_mutex_unlock(mp); 2705 /* take the deferred signal here */ 2706 sig_mutex_lock(mp); 2707 } 2708 pthread_testcancel(); 2709 return (error); 2710 } 2711 2712 /* 2713 * sig_cond_reltimedwait() is a cancellation point. 2714 */ 2715 int 2716 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2717 { 2718 int error; 2719 2720 ASSERT(curthread->ul_sigdefer != 0); 2721 pthread_testcancel(); 2722 error = __cond_reltimedwait(cv, mp, ts); 2723 if (error == EINTR && curthread->ul_cursig) { 2724 sig_mutex_unlock(mp); 2725 /* take the deferred signal here */ 2726 sig_mutex_lock(mp); 2727 } 2728 pthread_testcancel(); 2729 return (error); 2730 } 2731 2732 /* 2733 * For specialized code in libc, like the stdio code. 2734 * the following cancel_safe_*() locking primitives are used in 2735 * order to make the code cancellation-safe. Cancellation is 2736 * deferred while locks acquired by these functions are held. 2737 */ 2738 void 2739 cancel_safe_mutex_lock(mutex_t *mp) 2740 { 2741 (void) mutex_lock(mp); 2742 curthread->ul_libc_locks++; 2743 } 2744 2745 int 2746 cancel_safe_mutex_trylock(mutex_t *mp) 2747 { 2748 int error; 2749 2750 if ((error = mutex_trylock(mp)) == 0) 2751 curthread->ul_libc_locks++; 2752 return (error); 2753 } 2754 2755 void 2756 cancel_safe_mutex_unlock(mutex_t *mp) 2757 { 2758 ulwp_t *self = curthread; 2759 2760 ASSERT(self->ul_libc_locks != 0); 2761 2762 (void) mutex_unlock(mp); 2763 2764 /* 2765 * Decrement the count of locks held by cancel_safe_mutex_lock(). 2766 * If we are then in a position to terminate cleanly and 2767 * if there is a pending cancellation and cancellation 2768 * is not disabled and we received EINTR from a recent 2769 * system call then perform the cancellation action now. 2770 */ 2771 if (--self->ul_libc_locks == 0 && 2772 !(self->ul_vfork | self->ul_nocancel | 2773 self->ul_critical | self->ul_sigdefer) && 2774 cancel_active()) 2775 pthread_exit(PTHREAD_CANCELED); 2776 } 2777 2778 static int 2779 shared_mutex_held(mutex_t *mparg) 2780 { 2781 /* 2782 * The 'volatile' is necessary to make sure the compiler doesn't 2783 * reorder the tests of the various components of the mutex. 2784 * They must be tested in this order: 2785 * mutex_lockw 2786 * mutex_owner 2787 * mutex_ownerpid 2788 * This relies on the fact that everywhere mutex_lockw is cleared, 2789 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2790 * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2791 * and mutex_ownerpid are set after mutex_lockw is set, and that 2792 * mutex_lockw is set or cleared with a memory barrier. 2793 */ 2794 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2795 ulwp_t *self = curthread; 2796 uberdata_t *udp = self->ul_uberdata; 2797 2798 return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 2799 } 2800 2801 #pragma weak _mutex_held = mutex_held 2802 int 2803 mutex_held(mutex_t *mparg) 2804 { 2805 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2806 2807 if (mparg->mutex_type & USYNC_PROCESS) 2808 return (shared_mutex_held(mparg)); 2809 return (MUTEX_OWNED(mp, curthread)); 2810 } 2811 2812 #pragma weak pthread_mutex_destroy = mutex_destroy 2813 #pragma weak _mutex_destroy = mutex_destroy 2814 int 2815 mutex_destroy(mutex_t *mp) 2816 { 2817 if (mp->mutex_type & USYNC_PROCESS) 2818 forget_lock(mp); 2819 (void) memset(mp, 0, sizeof (*mp)); 2820 tdb_sync_obj_deregister(mp); 2821 return (0); 2822 } 2823 2824 #pragma weak pthread_mutex_consistent_np = mutex_consistent 2825 #pragma weak pthread_mutex_consistent = mutex_consistent 2826 int 2827 mutex_consistent(mutex_t *mp) 2828 { 2829 /* 2830 * Do this only for an inconsistent, initialized robust lock 2831 * that we hold. For all other cases, return EINVAL. 2832 */ 2833 if (mutex_held(mp) && 2834 (mp->mutex_type & LOCK_ROBUST) && 2835 (mp->mutex_flag & LOCK_INITED) && 2836 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2837 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2838 mp->mutex_rcount = 0; 2839 return (0); 2840 } 2841 return (EINVAL); 2842 } 2843 2844 /* 2845 * Spin locks are separate from ordinary mutexes, 2846 * but we use the same data structure for them. 2847 */ 2848 2849 int 2850 pthread_spin_init(pthread_spinlock_t *lock, int pshared) 2851 { 2852 mutex_t *mp = (mutex_t *)lock; 2853 2854 (void) memset(mp, 0, sizeof (*mp)); 2855 if (pshared == PTHREAD_PROCESS_SHARED) 2856 mp->mutex_type = USYNC_PROCESS; 2857 else 2858 mp->mutex_type = USYNC_THREAD; 2859 mp->mutex_flag = LOCK_INITED; 2860 mp->mutex_magic = MUTEX_MAGIC; 2861 2862 /* 2863 * This should be at the beginning of the function, 2864 * but for the sake of old broken applications that 2865 * do not have proper alignment for their mutexes 2866 * (and don't check the return code from pthread_spin_init), 2867 * we put it here, after initializing the mutex regardless. 2868 */ 2869 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2870 curthread->ul_misaligned == 0) 2871 return (EINVAL); 2872 2873 return (0); 2874 } 2875 2876 int 2877 pthread_spin_destroy(pthread_spinlock_t *lock) 2878 { 2879 (void) memset(lock, 0, sizeof (*lock)); 2880 return (0); 2881 } 2882 2883 int 2884 pthread_spin_trylock(pthread_spinlock_t *lock) 2885 { 2886 mutex_t *mp = (mutex_t *)lock; 2887 ulwp_t *self = curthread; 2888 int error = 0; 2889 2890 no_preempt(self); 2891 if (set_lock_byte(&mp->mutex_lockw) != 0) 2892 error = EBUSY; 2893 else { 2894 mp->mutex_owner = (uintptr_t)self; 2895 if (mp->mutex_type == USYNC_PROCESS) 2896 mp->mutex_ownerpid = self->ul_uberdata->pid; 2897 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2898 } 2899 preempt(self); 2900 return (error); 2901 } 2902 2903 int 2904 pthread_spin_lock(pthread_spinlock_t *lock) 2905 { 2906 mutex_t *mp = (mutex_t *)lock; 2907 ulwp_t *self = curthread; 2908 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2909 int count = 0; 2910 2911 ASSERT(!self->ul_critical || self->ul_bindflags); 2912 2913 DTRACE_PROBE1(plockstat, mutex__spin, mp); 2914 2915 /* 2916 * We don't care whether the owner is running on a processor. 2917 * We just spin because that's what this interface requires. 2918 */ 2919 for (;;) { 2920 if (*lockp == 0) { /* lock byte appears to be clear */ 2921 no_preempt(self); 2922 if (set_lock_byte(lockp) == 0) 2923 break; 2924 preempt(self); 2925 } 2926 if (count < INT_MAX) 2927 count++; 2928 SMT_PAUSE(); 2929 } 2930 mp->mutex_owner = (uintptr_t)self; 2931 if (mp->mutex_type == USYNC_PROCESS) 2932 mp->mutex_ownerpid = self->ul_uberdata->pid; 2933 preempt(self); 2934 if (count) { 2935 DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count); 2936 } 2937 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 2938 return (0); 2939 } 2940 2941 int 2942 pthread_spin_unlock(pthread_spinlock_t *lock) 2943 { 2944 mutex_t *mp = (mutex_t *)lock; 2945 ulwp_t *self = curthread; 2946 2947 no_preempt(self); 2948 mp->mutex_owner = 0; 2949 mp->mutex_ownerpid = 0; 2950 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2951 (void) atomic_swap_32(&mp->mutex_lockword, 0); 2952 preempt(self); 2953 return (0); 2954 } 2955 2956 #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 2957 2958 /* 2959 * Find/allocate an entry for 'lock' in our array of held locks. 2960 */ 2961 static mutex_t ** 2962 find_lock_entry(mutex_t *lock) 2963 { 2964 ulwp_t *self = curthread; 2965 mutex_t **remembered = NULL; 2966 mutex_t **lockptr; 2967 uint_t nlocks; 2968 2969 if ((nlocks = self->ul_heldlockcnt) != 0) 2970 lockptr = self->ul_heldlocks.array; 2971 else { 2972 nlocks = 1; 2973 lockptr = &self->ul_heldlocks.single; 2974 } 2975 2976 for (; nlocks; nlocks--, lockptr++) { 2977 if (*lockptr == lock) 2978 return (lockptr); 2979 if (*lockptr == NULL && remembered == NULL) 2980 remembered = lockptr; 2981 } 2982 if (remembered != NULL) { 2983 *remembered = lock; 2984 return (remembered); 2985 } 2986 2987 /* 2988 * No entry available. Allocate more space, converting 2989 * the single entry into an array of entries if necessary. 2990 */ 2991 if ((nlocks = self->ul_heldlockcnt) == 0) { 2992 /* 2993 * Initial allocation of the array. 2994 * Convert the single entry into an array. 2995 */ 2996 self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 2997 lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 2998 /* 2999 * The single entry becomes the first entry in the array. 3000 */ 3001 *lockptr = self->ul_heldlocks.single; 3002 self->ul_heldlocks.array = lockptr; 3003 /* 3004 * Return the next available entry in the array. 3005 */ 3006 *++lockptr = lock; 3007 return (lockptr); 3008 } 3009 /* 3010 * Reallocate the array, double the size each time. 3011 */ 3012 lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 3013 (void) memcpy(lockptr, self->ul_heldlocks.array, 3014 nlocks * sizeof (mutex_t *)); 3015 lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 3016 self->ul_heldlocks.array = lockptr; 3017 self->ul_heldlockcnt *= 2; 3018 /* 3019 * Return the next available entry in the newly allocated array. 3020 */ 3021 *(lockptr += nlocks) = lock; 3022 return (lockptr); 3023 } 3024 3025 /* 3026 * Insert 'lock' into our list of held locks. 3027 * Currently only used for LOCK_ROBUST mutexes. 3028 */ 3029 void 3030 remember_lock(mutex_t *lock) 3031 { 3032 (void) find_lock_entry(lock); 3033 } 3034 3035 /* 3036 * Remove 'lock' from our list of held locks. 3037 * Currently only used for LOCK_ROBUST mutexes. 3038 */ 3039 void 3040 forget_lock(mutex_t *lock) 3041 { 3042 *find_lock_entry(lock) = NULL; 3043 } 3044 3045 /* 3046 * Free the array of held locks. 3047 */ 3048 void 3049 heldlock_free(ulwp_t *ulwp) 3050 { 3051 uint_t nlocks; 3052 3053 if ((nlocks = ulwp->ul_heldlockcnt) != 0) 3054 lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 3055 ulwp->ul_heldlockcnt = 0; 3056 ulwp->ul_heldlocks.array = NULL; 3057 } 3058 3059 /* 3060 * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 3061 * Called from _thrp_exit() to deal with abandoned locks. 3062 */ 3063 void 3064 heldlock_exit(void) 3065 { 3066 ulwp_t *self = curthread; 3067 mutex_t **lockptr; 3068 uint_t nlocks; 3069 mutex_t *mp; 3070 3071 if ((nlocks = self->ul_heldlockcnt) != 0) 3072 lockptr = self->ul_heldlocks.array; 3073 else { 3074 nlocks = 1; 3075 lockptr = &self->ul_heldlocks.single; 3076 } 3077 3078 for (; nlocks; nlocks--, lockptr++) { 3079 /* 3080 * The kernel takes care of transitioning held 3081 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 3082 * We avoid that case here. 3083 */ 3084 if ((mp = *lockptr) != NULL && 3085 mutex_held(mp) && 3086 (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 3087 LOCK_ROBUST) { 3088 mp->mutex_rcount = 0; 3089 if (!(mp->mutex_flag & LOCK_UNMAPPED)) 3090 mp->mutex_flag |= LOCK_OWNERDEAD; 3091 (void) mutex_unlock_internal(mp, 1); 3092 } 3093 } 3094 3095 heldlock_free(self); 3096 } 3097 3098 #pragma weak _cond_init = cond_init 3099 /* ARGSUSED2 */ 3100 int 3101 cond_init(cond_t *cvp, int type, void *arg) 3102 { 3103 if (type != USYNC_THREAD && type != USYNC_PROCESS) 3104 return (EINVAL); 3105 (void) memset(cvp, 0, sizeof (*cvp)); 3106 cvp->cond_type = (uint16_t)type; 3107 cvp->cond_magic = COND_MAGIC; 3108 3109 /* 3110 * This should be at the beginning of the function, 3111 * but for the sake of old broken applications that 3112 * do not have proper alignment for their condvars 3113 * (and don't check the return code from cond_init), 3114 * we put it here, after initializing the condvar regardless. 3115 */ 3116 if (((uintptr_t)cvp & (_LONG_LONG_ALIGNMENT - 1)) && 3117 curthread->ul_misaligned == 0) 3118 return (EINVAL); 3119 3120 return (0); 3121 } 3122 3123 /* 3124 * cond_sleep_queue(): utility function for cond_wait_queue(). 3125 * 3126 * Go to sleep on a condvar sleep queue, expect to be waked up 3127 * by someone calling cond_signal() or cond_broadcast() or due 3128 * to receiving a UNIX signal or being cancelled, or just simply 3129 * due to a spurious wakeup (like someome calling forkall()). 3130 * 3131 * The associated mutex is *not* reacquired before returning. 3132 * That must be done by the caller of cond_sleep_queue(). 3133 */ 3134 static int 3135 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3136 { 3137 ulwp_t *self = curthread; 3138 queue_head_t *qp; 3139 queue_head_t *mqp; 3140 lwpid_t lwpid; 3141 int signalled; 3142 int error; 3143 int cv_wake; 3144 int release_all; 3145 3146 /* 3147 * Put ourself on the CV sleep queue, unlock the mutex, then 3148 * park ourself and unpark a candidate lwp to grab the mutex. 3149 * We must go onto the CV sleep queue before dropping the 3150 * mutex in order to guarantee atomicity of the operation. 3151 */ 3152 self->ul_sp = stkptr(); 3153 qp = queue_lock(cvp, CV); 3154 enqueue(qp, self, 0); 3155 cvp->cond_waiters_user = 1; 3156 self->ul_cvmutex = mp; 3157 self->ul_cv_wake = cv_wake = (tsp != NULL); 3158 self->ul_signalled = 0; 3159 if (mp->mutex_flag & LOCK_OWNERDEAD) { 3160 mp->mutex_flag &= ~LOCK_OWNERDEAD; 3161 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 3162 } 3163 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 3164 lwpid = mutex_unlock_queue(mp, release_all); 3165 for (;;) { 3166 set_parking_flag(self, 1); 3167 queue_unlock(qp); 3168 if (lwpid != 0) { 3169 lwpid = preempt_unpark(self, lwpid); 3170 preempt(self); 3171 } 3172 /* 3173 * We may have a deferred signal present, 3174 * in which case we should return EINTR. 3175 * Also, we may have received a SIGCANCEL; if so 3176 * and we are cancelable we should return EINTR. 3177 * We force an immediate EINTR return from 3178 * __lwp_park() by turning our parking flag off. 3179 */ 3180 if (self->ul_cursig != 0 || 3181 (self->ul_cancelable && self->ul_cancel_pending)) 3182 set_parking_flag(self, 0); 3183 /* 3184 * __lwp_park() will return the residual time in tsp 3185 * if we are unparked before the timeout expires. 3186 */ 3187 error = __lwp_park(tsp, lwpid); 3188 set_parking_flag(self, 0); 3189 lwpid = 0; /* unpark the other lwp only once */ 3190 /* 3191 * We were waked up by cond_signal(), cond_broadcast(), 3192 * by an interrupt or timeout (EINTR or ETIME), 3193 * or we may just have gotten a spurious wakeup. 3194 */ 3195 qp = queue_lock(cvp, CV); 3196 if (!cv_wake) 3197 mqp = queue_lock(mp, MX); 3198 if (self->ul_sleepq == NULL) 3199 break; 3200 /* 3201 * We are on either the condvar sleep queue or the 3202 * mutex sleep queue. Break out of the sleep if we 3203 * were interrupted or we timed out (EINTR or ETIME). 3204 * Else this is a spurious wakeup; continue the loop. 3205 */ 3206 if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */ 3207 if (error) { 3208 mp->mutex_waiters = dequeue_self(mqp); 3209 break; 3210 } 3211 tsp = NULL; /* no more timeout */ 3212 } else if (self->ul_sleepq == qp) { /* condvar queue */ 3213 if (error) { 3214 cvp->cond_waiters_user = dequeue_self(qp); 3215 break; 3216 } 3217 /* 3218 * Else a spurious wakeup on the condvar queue. 3219 * __lwp_park() has already adjusted the timeout. 3220 */ 3221 } else { 3222 thr_panic("cond_sleep_queue(): thread not on queue"); 3223 } 3224 if (!cv_wake) 3225 queue_unlock(mqp); 3226 } 3227 3228 self->ul_sp = 0; 3229 self->ul_cv_wake = 0; 3230 ASSERT(self->ul_cvmutex == NULL); 3231 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 3232 self->ul_wchan == NULL); 3233 3234 signalled = self->ul_signalled; 3235 self->ul_signalled = 0; 3236 queue_unlock(qp); 3237 if (!cv_wake) 3238 queue_unlock(mqp); 3239 3240 /* 3241 * If we were concurrently cond_signal()d and any of: 3242 * received a UNIX signal, were cancelled, or got a timeout, 3243 * then perform another cond_signal() to avoid consuming it. 3244 */ 3245 if (error && signalled) 3246 (void) cond_signal(cvp); 3247 3248 return (error); 3249 } 3250 3251 static void 3252 cond_wait_check_alignment(cond_t *cvp, mutex_t *mp) 3253 { 3254 if ((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) 3255 lock_error(mp, "cond_wait", cvp, "mutex is misaligned"); 3256 if ((uintptr_t)cvp & (_LONG_LONG_ALIGNMENT - 1)) 3257 lock_error(mp, "cond_wait", cvp, "condvar is misaligned"); 3258 } 3259 3260 int 3261 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3262 { 3263 ulwp_t *self = curthread; 3264 int error; 3265 int merror; 3266 3267 if (self->ul_error_detection && self->ul_misaligned == 0) 3268 cond_wait_check_alignment(cvp, mp); 3269 3270 /* 3271 * The old thread library was programmed to defer signals 3272 * while in cond_wait() so that the associated mutex would 3273 * be guaranteed to be held when the application signal 3274 * handler was invoked. 3275 * 3276 * We do not behave this way by default; the state of the 3277 * associated mutex in the signal handler is undefined. 3278 * 3279 * To accommodate applications that depend on the old 3280 * behavior, the _THREAD_COND_WAIT_DEFER environment 3281 * variable can be set to 1 and we will behave in the 3282 * old way with respect to cond_wait(). 3283 */ 3284 if (self->ul_cond_wait_defer) 3285 sigoff(self); 3286 3287 error = cond_sleep_queue(cvp, mp, tsp); 3288 3289 /* 3290 * Reacquire the mutex. 3291 */ 3292 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3293 error = merror; 3294 3295 /* 3296 * Take any deferred signal now, after we have reacquired the mutex. 3297 */ 3298 if (self->ul_cond_wait_defer) 3299 sigon(self); 3300 3301 return (error); 3302 } 3303 3304 /* 3305 * cond_sleep_kernel(): utility function for cond_wait_kernel(). 3306 * See the comment ahead of cond_sleep_queue(), above. 3307 */ 3308 static int 3309 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3310 { 3311 int mtype = mp->mutex_type; 3312 ulwp_t *self = curthread; 3313 int error; 3314 3315 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 3316 _ceil_prio_waive(); 3317 3318 self->ul_sp = stkptr(); 3319 self->ul_wchan = cvp; 3320 sigoff(self); 3321 mp->mutex_owner = 0; 3322 /* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */ 3323 if (mtype & LOCK_PRIO_INHERIT) { 3324 mp->mutex_lockw = LOCKCLEAR; 3325 self->ul_pilocks--; 3326 } 3327 /* 3328 * ___lwp_cond_wait() returns immediately with EINTR if 3329 * set_parking_flag(self,0) is called on this lwp before it 3330 * goes to sleep in the kernel. sigacthandler() calls this 3331 * when a deferred signal is noted. This assures that we don't 3332 * get stuck in ___lwp_cond_wait() with all signals blocked 3333 * due to taking a deferred signal before going to sleep. 3334 */ 3335 set_parking_flag(self, 1); 3336 if (self->ul_cursig != 0 || 3337 (self->ul_cancelable && self->ul_cancel_pending)) 3338 set_parking_flag(self, 0); 3339 error = ___lwp_cond_wait(cvp, mp, tsp, 1); 3340 set_parking_flag(self, 0); 3341 sigon(self); 3342 self->ul_sp = 0; 3343 self->ul_wchan = NULL; 3344 return (error); 3345 } 3346 3347 int 3348 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3349 { 3350 ulwp_t *self = curthread; 3351 int error; 3352 int merror; 3353 3354 if (self->ul_error_detection && self->ul_misaligned == 0) 3355 cond_wait_check_alignment(cvp, mp); 3356 3357 /* 3358 * See the large comment in cond_wait_queue(), above. 3359 */ 3360 if (self->ul_cond_wait_defer) 3361 sigoff(self); 3362 3363 error = cond_sleep_kernel(cvp, mp, tsp); 3364 3365 /* 3366 * Override the return code from ___lwp_cond_wait() 3367 * with any non-zero return code from mutex_lock(). 3368 * This addresses robust lock failures in particular; 3369 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 3370 * errors in order to take corrective action. 3371 */ 3372 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3373 error = merror; 3374 3375 /* 3376 * Take any deferred signal now, after we have reacquired the mutex. 3377 */ 3378 if (self->ul_cond_wait_defer) 3379 sigon(self); 3380 3381 return (error); 3382 } 3383 3384 /* 3385 * Common code for cond_wait() and cond_timedwait() 3386 */ 3387 int 3388 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3389 { 3390 int mtype = mp->mutex_type; 3391 hrtime_t begin_sleep = 0; 3392 ulwp_t *self = curthread; 3393 uberdata_t *udp = self->ul_uberdata; 3394 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3395 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 3396 uint8_t rcount; 3397 int error = 0; 3398 3399 /* 3400 * The SUSV3 Posix spec for pthread_cond_timedwait() states: 3401 * Except in the case of [ETIMEDOUT], all these error checks 3402 * shall act as if they were performed immediately at the 3403 * beginning of processing for the function and shall cause 3404 * an error return, in effect, prior to modifying the state 3405 * of the mutex specified by mutex or the condition variable 3406 * specified by cond. 3407 * Therefore, we must return EINVAL now if the timout is invalid. 3408 */ 3409 if (tsp != NULL && 3410 (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 3411 return (EINVAL); 3412 3413 if (__td_event_report(self, TD_SLEEP, udp)) { 3414 self->ul_sp = stkptr(); 3415 self->ul_wchan = cvp; 3416 self->ul_td_evbuf.eventnum = TD_SLEEP; 3417 self->ul_td_evbuf.eventdata = cvp; 3418 tdb_event(TD_SLEEP, udp); 3419 self->ul_sp = 0; 3420 } 3421 if (csp) { 3422 if (tsp) 3423 tdb_incr(csp->cond_timedwait); 3424 else 3425 tdb_incr(csp->cond_wait); 3426 } 3427 if (msp) 3428 begin_sleep = record_hold_time(msp); 3429 else if (csp) 3430 begin_sleep = gethrtime(); 3431 3432 if (self->ul_error_detection) { 3433 if (!mutex_held(mp)) 3434 lock_error(mp, "cond_wait", cvp, NULL); 3435 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 3436 lock_error(mp, "recursive mutex in cond_wait", 3437 cvp, NULL); 3438 if (cvp->cond_type & USYNC_PROCESS) { 3439 if (!(mtype & USYNC_PROCESS)) 3440 lock_error(mp, "cond_wait", cvp, 3441 "condvar process-shared, " 3442 "mutex process-private"); 3443 } else { 3444 if (mtype & USYNC_PROCESS) 3445 lock_error(mp, "cond_wait", cvp, 3446 "condvar process-private, " 3447 "mutex process-shared"); 3448 } 3449 } 3450 3451 /* 3452 * We deal with recursive mutexes by completely 3453 * dropping the lock and restoring the recursion 3454 * count after waking up. This is arguably wrong, 3455 * but it obeys the principle of least astonishment. 3456 */ 3457 rcount = mp->mutex_rcount; 3458 mp->mutex_rcount = 0; 3459 if ((mtype & 3460 (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 3461 (cvp->cond_type & USYNC_PROCESS)) 3462 error = cond_wait_kernel(cvp, mp, tsp); 3463 else 3464 error = cond_wait_queue(cvp, mp, tsp); 3465 mp->mutex_rcount = rcount; 3466 3467 if (csp) { 3468 hrtime_t lapse = gethrtime() - begin_sleep; 3469 if (tsp == NULL) 3470 csp->cond_wait_sleep_time += lapse; 3471 else { 3472 csp->cond_timedwait_sleep_time += lapse; 3473 if (error == ETIME) 3474 tdb_incr(csp->cond_timedwait_timeout); 3475 } 3476 } 3477 return (error); 3478 } 3479 3480 /* 3481 * cond_wait() is a cancellation point but __cond_wait() is not. 3482 * Internally, libc calls the non-cancellation version. 3483 * Other libraries need to use pthread_setcancelstate(), as appropriate, 3484 * since __cond_wait() is not exported from libc. 3485 */ 3486 int 3487 __cond_wait(cond_t *cvp, mutex_t *mp) 3488 { 3489 ulwp_t *self = curthread; 3490 uberdata_t *udp = self->ul_uberdata; 3491 uberflags_t *gflags; 3492 3493 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3494 !mutex_held(mp)) 3495 return (EPERM); 3496 3497 /* 3498 * Optimize the common case of USYNC_THREAD plus 3499 * no error detection, no lock statistics, and no event tracing. 3500 */ 3501 if ((gflags = self->ul_schedctl_called) != NULL && 3502 (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 3503 self->ul_td_events_enable | 3504 udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3505 return (cond_wait_queue(cvp, mp, NULL)); 3506 3507 /* 3508 * Else do it the long way. 3509 */ 3510 return (cond_wait_common(cvp, mp, NULL)); 3511 } 3512 3513 #pragma weak _cond_wait = cond_wait 3514 int 3515 cond_wait(cond_t *cvp, mutex_t *mp) 3516 { 3517 int error; 3518 3519 _cancelon(); 3520 error = __cond_wait(cvp, mp); 3521 if (error == EINTR) 3522 _canceloff(); 3523 else 3524 _canceloff_nocancel(); 3525 return (error); 3526 } 3527 3528 /* 3529 * pthread_cond_wait() is a cancellation point. 3530 */ 3531 int 3532 pthread_cond_wait(pthread_cond_t *_RESTRICT_KYWD cvp, 3533 pthread_mutex_t *_RESTRICT_KYWD mp) 3534 { 3535 int error; 3536 3537 error = cond_wait((cond_t *)cvp, (mutex_t *)mp); 3538 return ((error == EINTR)? 0 : error); 3539 } 3540 3541 /* 3542 * cond_timedwait() is a cancellation point but __cond_timedwait() is not. 3543 */ 3544 int 3545 __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3546 { 3547 clockid_t clock_id = cvp->cond_clockid; 3548 timespec_t reltime; 3549 int error; 3550 3551 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3552 !mutex_held(mp)) 3553 return (EPERM); 3554 3555 if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 3556 clock_id = CLOCK_REALTIME; 3557 abstime_to_reltime(clock_id, abstime, &reltime); 3558 error = cond_wait_common(cvp, mp, &reltime); 3559 if (error == ETIME && clock_id == CLOCK_HIGHRES) { 3560 /* 3561 * Don't return ETIME if we didn't really get a timeout. 3562 * This can happen if we return because someone resets 3563 * the system clock. Just return zero in this case, 3564 * giving a spurious wakeup but not a timeout. 3565 */ 3566 if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 3567 abstime->tv_nsec > gethrtime()) 3568 error = 0; 3569 } 3570 return (error); 3571 } 3572 3573 int 3574 cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3575 { 3576 int error; 3577 3578 _cancelon(); 3579 error = __cond_timedwait(cvp, mp, abstime); 3580 if (error == EINTR) 3581 _canceloff(); 3582 else 3583 _canceloff_nocancel(); 3584 return (error); 3585 } 3586 3587 /* 3588 * pthread_cond_timedwait() is a cancellation point. 3589 */ 3590 int 3591 pthread_cond_timedwait(pthread_cond_t *_RESTRICT_KYWD cvp, 3592 pthread_mutex_t *_RESTRICT_KYWD mp, 3593 const struct timespec *_RESTRICT_KYWD abstime) 3594 { 3595 int error; 3596 3597 error = cond_timedwait((cond_t *)cvp, (mutex_t *)mp, abstime); 3598 if (error == ETIME) 3599 error = ETIMEDOUT; 3600 else if (error == EINTR) 3601 error = 0; 3602 return (error); 3603 } 3604 3605 /* 3606 * cond_reltimedwait() is a cancellation point but __cond_reltimedwait() is not. 3607 */ 3608 int 3609 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3610 { 3611 timespec_t tslocal = *reltime; 3612 3613 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3614 !mutex_held(mp)) 3615 return (EPERM); 3616 3617 return (cond_wait_common(cvp, mp, &tslocal)); 3618 } 3619 3620 int 3621 cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3622 { 3623 int error; 3624 3625 _cancelon(); 3626 error = __cond_reltimedwait(cvp, mp, reltime); 3627 if (error == EINTR) 3628 _canceloff(); 3629 else 3630 _canceloff_nocancel(); 3631 return (error); 3632 } 3633 3634 int 3635 pthread_cond_reltimedwait_np(pthread_cond_t *_RESTRICT_KYWD cvp, 3636 pthread_mutex_t *_RESTRICT_KYWD mp, 3637 const struct timespec *_RESTRICT_KYWD reltime) 3638 { 3639 int error; 3640 3641 error = cond_reltimedwait((cond_t *)cvp, (mutex_t *)mp, reltime); 3642 if (error == ETIME) 3643 error = ETIMEDOUT; 3644 else if (error == EINTR) 3645 error = 0; 3646 return (error); 3647 } 3648 3649 #pragma weak pthread_cond_signal = cond_signal 3650 #pragma weak _cond_signal = cond_signal 3651 int 3652 cond_signal(cond_t *cvp) 3653 { 3654 ulwp_t *self = curthread; 3655 uberdata_t *udp = self->ul_uberdata; 3656 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3657 int error = 0; 3658 int more; 3659 lwpid_t lwpid; 3660 queue_head_t *qp; 3661 mutex_t *mp; 3662 queue_head_t *mqp; 3663 ulwp_t **ulwpp; 3664 ulwp_t *ulwp; 3665 ulwp_t *prev; 3666 3667 if (csp) 3668 tdb_incr(csp->cond_signal); 3669 3670 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3671 error = _lwp_cond_signal(cvp); 3672 3673 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3674 return (error); 3675 3676 /* 3677 * Move someone from the condvar sleep queue to the mutex sleep 3678 * queue for the mutex that he will acquire on being waked up. 3679 * We can do this only if we own the mutex he will acquire. 3680 * If we do not own the mutex, or if his ul_cv_wake flag 3681 * is set, just dequeue and unpark him. 3682 */ 3683 qp = queue_lock(cvp, CV); 3684 ulwpp = queue_slot(qp, &prev, &more); 3685 cvp->cond_waiters_user = more; 3686 if (ulwpp == NULL) { /* no one on the sleep queue */ 3687 queue_unlock(qp); 3688 return (error); 3689 } 3690 ulwp = *ulwpp; 3691 3692 /* 3693 * Inform the thread that he was the recipient of a cond_signal(). 3694 * This lets him deal with cond_signal() and, concurrently, 3695 * one or more of a cancellation, a UNIX signal, or a timeout. 3696 * These latter conditions must not consume a cond_signal(). 3697 */ 3698 ulwp->ul_signalled = 1; 3699 3700 /* 3701 * Dequeue the waiter but leave his ul_sleepq non-NULL 3702 * while we move him to the mutex queue so that he can 3703 * deal properly with spurious wakeups. 3704 */ 3705 queue_unlink(qp, ulwpp, prev); 3706 3707 mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 3708 ulwp->ul_cvmutex = NULL; 3709 ASSERT(mp != NULL); 3710 3711 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3712 /* just wake him up */ 3713 lwpid = ulwp->ul_lwpid; 3714 no_preempt(self); 3715 ulwp->ul_sleepq = NULL; 3716 ulwp->ul_wchan = NULL; 3717 queue_unlock(qp); 3718 (void) __lwp_unpark(lwpid); 3719 preempt(self); 3720 } else { 3721 /* move him to the mutex queue */ 3722 mqp = queue_lock(mp, MX); 3723 enqueue(mqp, ulwp, 0); 3724 mp->mutex_waiters = 1; 3725 queue_unlock(mqp); 3726 queue_unlock(qp); 3727 } 3728 3729 return (error); 3730 } 3731 3732 /* 3733 * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3734 * and rw_queue_release() to (re)allocate a big buffer to hold the 3735 * lwpids of all the threads to be set running after they are removed 3736 * from their sleep queues. Since we are holding a queue lock, we 3737 * cannot call any function that might acquire a lock. mmap(), munmap(), 3738 * lwp_unpark_all() are simple system calls and are safe in this regard. 3739 */ 3740 lwpid_t * 3741 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 3742 { 3743 /* 3744 * Allocate NEWLWPS ids on the first overflow. 3745 * Double the allocation each time after that. 3746 */ 3747 int nlwpid = *nlwpid_ptr; 3748 int maxlwps = *maxlwps_ptr; 3749 int first_allocation; 3750 int newlwps; 3751 void *vaddr; 3752 3753 ASSERT(nlwpid == maxlwps); 3754 3755 first_allocation = (maxlwps == MAXLWPS); 3756 newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 3757 vaddr = mmap(NULL, newlwps * sizeof (lwpid_t), 3758 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 3759 3760 if (vaddr == MAP_FAILED) { 3761 /* 3762 * Let's hope this never happens. 3763 * If it does, then we have a terrible 3764 * thundering herd on our hands. 3765 */ 3766 (void) __lwp_unpark_all(lwpid, nlwpid); 3767 *nlwpid_ptr = 0; 3768 } else { 3769 (void) memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 3770 if (!first_allocation) 3771 (void) munmap((caddr_t)lwpid, 3772 maxlwps * sizeof (lwpid_t)); 3773 lwpid = vaddr; 3774 *maxlwps_ptr = newlwps; 3775 } 3776 3777 return (lwpid); 3778 } 3779 3780 #pragma weak pthread_cond_broadcast = cond_broadcast 3781 #pragma weak _cond_broadcast = cond_broadcast 3782 int 3783 cond_broadcast(cond_t *cvp) 3784 { 3785 ulwp_t *self = curthread; 3786 uberdata_t *udp = self->ul_uberdata; 3787 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3788 int error = 0; 3789 queue_head_t *qp; 3790 queue_root_t *qrp; 3791 mutex_t *mp; 3792 mutex_t *mp_cache = NULL; 3793 queue_head_t *mqp = NULL; 3794 ulwp_t *ulwp; 3795 int nlwpid = 0; 3796 int maxlwps = MAXLWPS; 3797 lwpid_t buffer[MAXLWPS]; 3798 lwpid_t *lwpid = buffer; 3799 3800 if (csp) 3801 tdb_incr(csp->cond_broadcast); 3802 3803 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3804 error = _lwp_cond_broadcast(cvp); 3805 3806 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3807 return (error); 3808 3809 /* 3810 * Move everyone from the condvar sleep queue to the mutex sleep 3811 * queue for the mutex that they will acquire on being waked up. 3812 * We can do this only if we own the mutex they will acquire. 3813 * If we do not own the mutex, or if their ul_cv_wake flag 3814 * is set, just dequeue and unpark them. 3815 * 3816 * We keep track of lwpids that are to be unparked in lwpid[]. 3817 * __lwp_unpark_all() is called to unpark all of them after 3818 * they have been removed from the sleep queue and the sleep 3819 * queue lock has been dropped. If we run out of space in our 3820 * on-stack buffer, we need to allocate more but we can't call 3821 * lmalloc() because we are holding a queue lock when the overflow 3822 * occurs and lmalloc() acquires a lock. We can't use alloca() 3823 * either because the application may have allocated a small 3824 * stack and we don't want to overrun the stack. So we call 3825 * alloc_lwpids() to allocate a bigger buffer using the mmap() 3826 * system call directly since that path acquires no locks. 3827 */ 3828 qp = queue_lock(cvp, CV); 3829 cvp->cond_waiters_user = 0; 3830 for (;;) { 3831 if ((qrp = qp->qh_root) == NULL || 3832 (ulwp = qrp->qr_head) == NULL) 3833 break; 3834 ASSERT(ulwp->ul_wchan == cvp); 3835 queue_unlink(qp, &qrp->qr_head, NULL); 3836 mp = ulwp->ul_cvmutex; /* his mutex */ 3837 ulwp->ul_cvmutex = NULL; 3838 ASSERT(mp != NULL); 3839 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3840 /* just wake him up */ 3841 ulwp->ul_sleepq = NULL; 3842 ulwp->ul_wchan = NULL; 3843 if (nlwpid == maxlwps) 3844 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 3845 lwpid[nlwpid++] = ulwp->ul_lwpid; 3846 } else { 3847 /* move him to the mutex queue */ 3848 if (mp != mp_cache) { 3849 mp_cache = mp; 3850 if (mqp != NULL) 3851 queue_unlock(mqp); 3852 mqp = queue_lock(mp, MX); 3853 } 3854 enqueue(mqp, ulwp, 0); 3855 mp->mutex_waiters = 1; 3856 } 3857 } 3858 if (mqp != NULL) 3859 queue_unlock(mqp); 3860 if (nlwpid == 0) { 3861 queue_unlock(qp); 3862 } else { 3863 no_preempt(self); 3864 queue_unlock(qp); 3865 if (nlwpid == 1) 3866 (void) __lwp_unpark(lwpid[0]); 3867 else 3868 (void) __lwp_unpark_all(lwpid, nlwpid); 3869 preempt(self); 3870 } 3871 if (lwpid != buffer) 3872 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 3873 return (error); 3874 } 3875 3876 #pragma weak pthread_cond_destroy = cond_destroy 3877 int 3878 cond_destroy(cond_t *cvp) 3879 { 3880 cvp->cond_magic = 0; 3881 tdb_sync_obj_deregister(cvp); 3882 return (0); 3883 } 3884 3885 #if defined(THREAD_DEBUG) 3886 void 3887 assert_no_libc_locks_held(void) 3888 { 3889 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 3890 } 3891 3892 /* protected by link_lock */ 3893 uint64_t spin_lock_spin; 3894 uint64_t spin_lock_spin2; 3895 uint64_t spin_lock_sleep; 3896 uint64_t spin_lock_wakeup; 3897 3898 /* 3899 * Record spin lock statistics. 3900 * Called by a thread exiting itself in thrp_exit(). 3901 * Also called via atexit() from the thread calling 3902 * exit() to do all the other threads as well. 3903 */ 3904 void 3905 record_spin_locks(ulwp_t *ulwp) 3906 { 3907 spin_lock_spin += ulwp->ul_spin_lock_spin; 3908 spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 3909 spin_lock_sleep += ulwp->ul_spin_lock_sleep; 3910 spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 3911 ulwp->ul_spin_lock_spin = 0; 3912 ulwp->ul_spin_lock_spin2 = 0; 3913 ulwp->ul_spin_lock_sleep = 0; 3914 ulwp->ul_spin_lock_wakeup = 0; 3915 } 3916 3917 /* 3918 * atexit function: dump the queue statistics to stderr. 3919 */ 3920 #include <stdio.h> 3921 void 3922 dump_queue_statistics(void) 3923 { 3924 uberdata_t *udp = curthread->ul_uberdata; 3925 queue_head_t *qp; 3926 int qn; 3927 uint64_t spin_lock_total = 0; 3928 3929 if (udp->queue_head == NULL || thread_queue_dump == 0) 3930 return; 3931 3932 if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 3933 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 3934 return; 3935 for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 3936 if (qp->qh_lockcount == 0) 3937 continue; 3938 spin_lock_total += qp->qh_lockcount; 3939 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3940 (u_longlong_t)qp->qh_lockcount, 3941 qp->qh_qmax, qp->qh_hmax) < 0) 3942 return; 3943 } 3944 3945 if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 3946 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 3947 return; 3948 for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 3949 if (qp->qh_lockcount == 0) 3950 continue; 3951 spin_lock_total += qp->qh_lockcount; 3952 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3953 (u_longlong_t)qp->qh_lockcount, 3954 qp->qh_qmax, qp->qh_hmax) < 0) 3955 return; 3956 } 3957 3958 (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 3959 (u_longlong_t)spin_lock_total); 3960 (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 3961 (u_longlong_t)spin_lock_spin); 3962 (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 3963 (u_longlong_t)spin_lock_spin2); 3964 (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 3965 (u_longlong_t)spin_lock_sleep); 3966 (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 3967 (u_longlong_t)spin_lock_wakeup); 3968 } 3969 #endif 3970