1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/sdt.h> 30 31 #include "lint.h" 32 #include "thr_uberdata.h" 33 34 /* 35 * This mutex is initialized to be held by lwp#1. 36 * It is used to block a thread that has returned from a mutex_lock() 37 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 38 */ 39 mutex_t stall_mutex = DEFAULTMUTEX; 40 41 static int shared_mutex_held(mutex_t *); 42 static int mutex_unlock_internal(mutex_t *, int); 43 static int mutex_queuelock_adaptive(mutex_t *); 44 static void mutex_wakeup_all(mutex_t *); 45 46 /* 47 * Lock statistics support functions. 48 */ 49 void 50 record_begin_hold(tdb_mutex_stats_t *msp) 51 { 52 tdb_incr(msp->mutex_lock); 53 msp->mutex_begin_hold = gethrtime(); 54 } 55 56 hrtime_t 57 record_hold_time(tdb_mutex_stats_t *msp) 58 { 59 hrtime_t now = gethrtime(); 60 61 if (msp->mutex_begin_hold) 62 msp->mutex_hold_time += now - msp->mutex_begin_hold; 63 msp->mutex_begin_hold = 0; 64 return (now); 65 } 66 67 /* 68 * Called once at library initialization. 69 */ 70 void 71 mutex_setup(void) 72 { 73 if (set_lock_byte(&stall_mutex.mutex_lockw)) 74 thr_panic("mutex_setup() cannot acquire stall_mutex"); 75 stall_mutex.mutex_owner = (uintptr_t)curthread; 76 } 77 78 /* 79 * The default spin count of 1000 is experimentally determined. 80 * On sun4u machines with any number of processors it could be raised 81 * to 10,000 but that (experimentally) makes almost no difference. 82 * The environment variable: 83 * _THREAD_ADAPTIVE_SPIN=count 84 * can be used to override and set the count in the range [0 .. 1,000,000]. 85 */ 86 int thread_adaptive_spin = 1000; 87 uint_t thread_max_spinners = 100; 88 int thread_queue_verify = 0; 89 static int ncpus; 90 91 /* 92 * Distinguish spinning for queue locks from spinning for regular locks. 93 * We try harder to acquire queue locks by spinning. 94 * The environment variable: 95 * _THREAD_QUEUE_SPIN=count 96 * can be used to override and set the count in the range [0 .. 1,000,000]. 97 */ 98 int thread_queue_spin = 10000; 99 100 #define ALL_ATTRIBUTES \ 101 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 102 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 103 LOCK_ROBUST) 104 105 /* 106 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 107 * augmented by zero or more the flags: 108 * LOCK_RECURSIVE 109 * LOCK_ERRORCHECK 110 * LOCK_PRIO_INHERIT 111 * LOCK_PRIO_PROTECT 112 * LOCK_ROBUST 113 */ 114 #pragma weak _private_mutex_init = __mutex_init 115 #pragma weak mutex_init = __mutex_init 116 #pragma weak _mutex_init = __mutex_init 117 /* ARGSUSED2 */ 118 int 119 __mutex_init(mutex_t *mp, int type, void *arg) 120 { 121 int basetype = (type & ~ALL_ATTRIBUTES); 122 int error = 0; 123 124 if (basetype == USYNC_PROCESS_ROBUST) { 125 /* 126 * USYNC_PROCESS_ROBUST is a deprecated historical type. 127 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 128 * retain the USYNC_PROCESS_ROBUST flag so we can return 129 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 130 * mutexes will ever draw ELOCKUNMAPPED). 131 */ 132 type |= (USYNC_PROCESS | LOCK_ROBUST); 133 basetype = USYNC_PROCESS; 134 } 135 136 if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) || 137 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 138 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) { 139 error = EINVAL; 140 } else if (type & LOCK_ROBUST) { 141 /* 142 * Callers of mutex_init() with the LOCK_ROBUST attribute 143 * are required to pass an initially all-zero mutex. 144 * Multiple calls to mutex_init() are allowed; all but 145 * the first return EBUSY. A call to mutex_init() is 146 * allowed to make an inconsistent robust lock consistent 147 * (for historical usage, even though the proper interface 148 * for this is mutex_consistent()). Note that we use 149 * atomic_or_16() to set the LOCK_INITED flag so as 150 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 151 */ 152 extern void _atomic_or_16(volatile uint16_t *, uint16_t); 153 if (!(mp->mutex_flag & LOCK_INITED)) { 154 mp->mutex_type = (uint8_t)type; 155 _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 156 mp->mutex_magic = MUTEX_MAGIC; 157 } else if (type != mp->mutex_type || 158 ((type & LOCK_PRIO_PROTECT) && 159 mp->mutex_ceiling != (*(int *)arg))) { 160 error = EINVAL; 161 } else if (__mutex_consistent(mp) != 0) { 162 error = EBUSY; 163 } 164 /* register a process robust mutex with the kernel */ 165 if (basetype == USYNC_PROCESS) 166 register_lock(mp); 167 } else { 168 (void) _memset(mp, 0, sizeof (*mp)); 169 mp->mutex_type = (uint8_t)type; 170 mp->mutex_flag = LOCK_INITED; 171 mp->mutex_magic = MUTEX_MAGIC; 172 } 173 174 if (error == 0 && (type & LOCK_PRIO_PROTECT)) 175 mp->mutex_ceiling = (uint8_t)(*(int *)arg); 176 177 return (error); 178 } 179 180 /* 181 * Delete mp from list of ceil mutexes owned by curthread. 182 * Return 1 if the head of the chain was updated. 183 */ 184 int 185 _ceil_mylist_del(mutex_t *mp) 186 { 187 ulwp_t *self = curthread; 188 mxchain_t **mcpp; 189 mxchain_t *mcp; 190 191 mcpp = &self->ul_mxchain; 192 while ((*mcpp)->mxchain_mx != mp) 193 mcpp = &(*mcpp)->mxchain_next; 194 mcp = *mcpp; 195 *mcpp = mcp->mxchain_next; 196 lfree(mcp, sizeof (*mcp)); 197 return (mcpp == &self->ul_mxchain); 198 } 199 200 /* 201 * Add mp to head of list of ceil mutexes owned by curthread. 202 * Return ENOMEM if no memory could be allocated. 203 */ 204 int 205 _ceil_mylist_add(mutex_t *mp) 206 { 207 ulwp_t *self = curthread; 208 mxchain_t *mcp; 209 210 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 211 return (ENOMEM); 212 mcp->mxchain_mx = mp; 213 mcp->mxchain_next = self->ul_mxchain; 214 self->ul_mxchain = mcp; 215 return (0); 216 } 217 218 /* 219 * Inherit priority from ceiling. The inheritance impacts the effective 220 * priority, not the assigned priority. See _thread_setschedparam_main(). 221 */ 222 void 223 _ceil_prio_inherit(int ceil) 224 { 225 ulwp_t *self = curthread; 226 struct sched_param param; 227 228 (void) _memset(¶m, 0, sizeof (param)); 229 param.sched_priority = ceil; 230 if (_thread_setschedparam_main(self->ul_lwpid, 231 self->ul_policy, ¶m, PRIO_INHERIT)) { 232 /* 233 * Panic since unclear what error code to return. 234 * If we do return the error codes returned by above 235 * called routine, update the man page... 236 */ 237 thr_panic("_thread_setschedparam_main() fails"); 238 } 239 } 240 241 /* 242 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 243 * if holding at least one ceiling lock. If no ceiling locks are held at this 244 * point, disinherit completely, reverting back to assigned priority. 245 */ 246 void 247 _ceil_prio_waive(void) 248 { 249 ulwp_t *self = curthread; 250 struct sched_param param; 251 252 (void) _memset(¶m, 0, sizeof (param)); 253 if (self->ul_mxchain == NULL) { 254 /* 255 * No ceil locks held. Zero the epri, revert back to ul_pri. 256 * Since thread's hash lock is not held, one cannot just 257 * read ul_pri here...do it in the called routine... 258 */ 259 param.sched_priority = self->ul_pri; /* ignored */ 260 if (_thread_setschedparam_main(self->ul_lwpid, 261 self->ul_policy, ¶m, PRIO_DISINHERIT)) 262 thr_panic("_thread_setschedparam_main() fails"); 263 } else { 264 /* 265 * Set priority to that of the mutex at the head 266 * of the ceilmutex chain. 267 */ 268 param.sched_priority = 269 self->ul_mxchain->mxchain_mx->mutex_ceiling; 270 if (_thread_setschedparam_main(self->ul_lwpid, 271 self->ul_policy, ¶m, PRIO_INHERIT)) 272 thr_panic("_thread_setschedparam_main() fails"); 273 } 274 } 275 276 /* 277 * Clear the lock byte. Retain the waiters byte and the spinners byte. 278 * Return the old value of the lock word. 279 */ 280 static uint32_t 281 clear_lockbyte(volatile uint32_t *lockword) 282 { 283 uint32_t old; 284 uint32_t new; 285 286 do { 287 old = *lockword; 288 new = old & ~LOCKMASK; 289 } while (atomic_cas_32(lockword, old, new) != old); 290 291 return (old); 292 } 293 294 /* 295 * Increment the spinners count in the mutex lock word. 296 * Return 0 on success. Return -1 if the count would overflow. 297 */ 298 static int 299 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 300 { 301 uint32_t old; 302 uint32_t new; 303 304 do { 305 old = *lockword; 306 if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 307 return (-1); 308 new = old + (1 << SPINNERSHIFT); 309 } while (atomic_cas_32(lockword, old, new) != old); 310 311 return (0); 312 } 313 314 /* 315 * Decrement the spinners count in the mutex lock word. 316 * Return the new value of the lock word. 317 */ 318 static uint32_t 319 spinners_decr(volatile uint32_t *lockword) 320 { 321 uint32_t old; 322 uint32_t new; 323 324 do { 325 new = old = *lockword; 326 if (new & SPINNERMASK) 327 new -= (1 << SPINNERSHIFT); 328 } while (atomic_cas_32(lockword, old, new) != old); 329 330 return (new); 331 } 332 333 /* 334 * Non-preemptive spin locks. Used by queue_lock(). 335 * No lock statistics are gathered for these locks. 336 * No DTrace probes are provided for these locks. 337 */ 338 void 339 spin_lock_set(mutex_t *mp) 340 { 341 ulwp_t *self = curthread; 342 343 no_preempt(self); 344 if (set_lock_byte(&mp->mutex_lockw) == 0) { 345 mp->mutex_owner = (uintptr_t)self; 346 return; 347 } 348 /* 349 * Spin for a while, attempting to acquire the lock. 350 */ 351 if (self->ul_spin_lock_spin != UINT_MAX) 352 self->ul_spin_lock_spin++; 353 if (mutex_queuelock_adaptive(mp) == 0 || 354 set_lock_byte(&mp->mutex_lockw) == 0) { 355 mp->mutex_owner = (uintptr_t)self; 356 return; 357 } 358 /* 359 * Try harder if we were previously at a no premption level. 360 */ 361 if (self->ul_preempt > 1) { 362 if (self->ul_spin_lock_spin2 != UINT_MAX) 363 self->ul_spin_lock_spin2++; 364 if (mutex_queuelock_adaptive(mp) == 0 || 365 set_lock_byte(&mp->mutex_lockw) == 0) { 366 mp->mutex_owner = (uintptr_t)self; 367 return; 368 } 369 } 370 /* 371 * Give up and block in the kernel for the mutex. 372 */ 373 if (self->ul_spin_lock_sleep != UINT_MAX) 374 self->ul_spin_lock_sleep++; 375 (void) ___lwp_mutex_timedlock(mp, NULL); 376 mp->mutex_owner = (uintptr_t)self; 377 } 378 379 void 380 spin_lock_clear(mutex_t *mp) 381 { 382 ulwp_t *self = curthread; 383 384 mp->mutex_owner = 0; 385 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 386 (void) ___lwp_mutex_wakeup(mp, 0); 387 if (self->ul_spin_lock_wakeup != UINT_MAX) 388 self->ul_spin_lock_wakeup++; 389 } 390 preempt(self); 391 } 392 393 /* 394 * Allocate the sleep queue hash table. 395 */ 396 void 397 queue_alloc(void) 398 { 399 ulwp_t *self = curthread; 400 uberdata_t *udp = self->ul_uberdata; 401 mutex_t *mp; 402 void *data; 403 int i; 404 405 /* 406 * No locks are needed; we call here only when single-threaded. 407 */ 408 ASSERT(self == udp->ulwp_one); 409 ASSERT(!udp->uberflags.uf_mt); 410 if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 411 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 412 == MAP_FAILED) 413 thr_panic("cannot allocate thread queue_head table"); 414 udp->queue_head = (queue_head_t *)data; 415 for (i = 0; i < 2 * QHASHSIZE; i++) { 416 mp = &udp->queue_head[i].qh_lock; 417 mp->mutex_flag = LOCK_INITED; 418 mp->mutex_magic = MUTEX_MAGIC; 419 } 420 } 421 422 #if defined(THREAD_DEBUG) 423 424 /* 425 * Debugging: verify correctness of a sleep queue. 426 */ 427 void 428 QVERIFY(queue_head_t *qp) 429 { 430 ulwp_t *self = curthread; 431 uberdata_t *udp = self->ul_uberdata; 432 ulwp_t *ulwp; 433 ulwp_t *prev; 434 uint_t index; 435 uint32_t cnt = 0; 436 char qtype; 437 void *wchan; 438 439 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 440 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 441 ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) || 442 (qp->qh_head == NULL && qp->qh_tail == NULL)); 443 if (!thread_queue_verify) 444 return; 445 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 446 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 447 for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL; 448 prev = ulwp, ulwp = ulwp->ul_link, cnt++) { 449 ASSERT(ulwp->ul_qtype == qtype); 450 ASSERT(ulwp->ul_wchan != NULL); 451 ASSERT(ulwp->ul_sleepq == qp); 452 wchan = ulwp->ul_wchan; 453 index = QUEUE_HASH(wchan, qtype); 454 ASSERT(&udp->queue_head[index] == qp); 455 } 456 ASSERT(qp->qh_tail == prev); 457 ASSERT(qp->qh_qlen == cnt); 458 } 459 460 #else /* THREAD_DEBUG */ 461 462 #define QVERIFY(qp) 463 464 #endif /* THREAD_DEBUG */ 465 466 /* 467 * Acquire a queue head. 468 */ 469 queue_head_t * 470 queue_lock(void *wchan, int qtype) 471 { 472 uberdata_t *udp = curthread->ul_uberdata; 473 queue_head_t *qp; 474 475 ASSERT(qtype == MX || qtype == CV); 476 477 /* 478 * It is possible that we could be called while still single-threaded. 479 * If so, we call queue_alloc() to allocate the queue_head[] array. 480 */ 481 if ((qp = udp->queue_head) == NULL) { 482 queue_alloc(); 483 qp = udp->queue_head; 484 } 485 qp += QUEUE_HASH(wchan, qtype); 486 spin_lock_set(&qp->qh_lock); 487 /* 488 * At once per nanosecond, qh_lockcount will wrap after 512 years. 489 * Were we to care about this, we could peg the value at UINT64_MAX. 490 */ 491 qp->qh_lockcount++; 492 QVERIFY(qp); 493 return (qp); 494 } 495 496 /* 497 * Release a queue head. 498 */ 499 void 500 queue_unlock(queue_head_t *qp) 501 { 502 QVERIFY(qp); 503 spin_lock_clear(&qp->qh_lock); 504 } 505 506 /* 507 * For rwlock queueing, we must queue writers ahead of readers of the 508 * same priority. We do this by making writers appear to have a half 509 * point higher priority for purposes of priority comparisons below. 510 */ 511 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 512 513 void 514 enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) 515 { 516 ulwp_t **ulwpp; 517 ulwp_t *next; 518 int pri = CMP_PRIO(ulwp); 519 int force_fifo = (qtype & FIFOQ); 520 int do_fifo; 521 522 qtype &= ~FIFOQ; 523 ASSERT(qtype == MX || qtype == CV); 524 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 525 ASSERT(ulwp->ul_sleepq != qp); 526 527 /* 528 * LIFO queue ordering is unfair and can lead to starvation, 529 * but it gives better performance for heavily contended locks. 530 * We use thread_queue_fifo (range is 0..8) to determine 531 * the frequency of FIFO vs LIFO queuing: 532 * 0 : every 256th time (almost always LIFO) 533 * 1 : every 128th time 534 * 2 : every 64th time 535 * 3 : every 32nd time 536 * 4 : every 16th time (the default value, mostly LIFO) 537 * 5 : every 8th time 538 * 6 : every 4th time 539 * 7 : every 2nd time 540 * 8 : every time (never LIFO, always FIFO) 541 * Note that there is always some degree of FIFO ordering. 542 * This breaks live lock conditions that occur in applications 543 * that are written assuming (incorrectly) that threads acquire 544 * locks fairly, that is, in roughly round-robin order. 545 * In any event, the queue is maintained in priority order. 546 * 547 * If we are given the FIFOQ flag in qtype, fifo queueing is forced. 548 * SUSV3 requires this for semaphores. 549 */ 550 do_fifo = (force_fifo || 551 ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0); 552 553 if (qp->qh_head == NULL) { 554 /* 555 * The queue is empty. LIFO/FIFO doesn't matter. 556 */ 557 ASSERT(qp->qh_tail == NULL); 558 ulwpp = &qp->qh_head; 559 } else if (do_fifo) { 560 /* 561 * Enqueue after the last thread whose priority is greater 562 * than or equal to the priority of the thread being queued. 563 * Attempt first to go directly onto the tail of the queue. 564 */ 565 if (pri <= CMP_PRIO(qp->qh_tail)) 566 ulwpp = &qp->qh_tail->ul_link; 567 else { 568 for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 569 ulwpp = &next->ul_link) 570 if (pri > CMP_PRIO(next)) 571 break; 572 } 573 } else { 574 /* 575 * Enqueue before the first thread whose priority is less 576 * than or equal to the priority of the thread being queued. 577 * Hopefully we can go directly onto the head of the queue. 578 */ 579 for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 580 ulwpp = &next->ul_link) 581 if (pri >= CMP_PRIO(next)) 582 break; 583 } 584 if ((ulwp->ul_link = *ulwpp) == NULL) 585 qp->qh_tail = ulwp; 586 *ulwpp = ulwp; 587 588 ulwp->ul_sleepq = qp; 589 ulwp->ul_wchan = wchan; 590 ulwp->ul_qtype = qtype; 591 if (qp->qh_qmax < ++qp->qh_qlen) 592 qp->qh_qmax = qp->qh_qlen; 593 } 594 595 /* 596 * Return a pointer to the queue slot of the 597 * highest priority thread on the queue. 598 * On return, prevp, if not NULL, will contain a pointer 599 * to the thread's predecessor on the queue 600 */ 601 static ulwp_t ** 602 queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp) 603 { 604 ulwp_t **ulwpp; 605 ulwp_t *ulwp; 606 ulwp_t *prev = NULL; 607 ulwp_t **suspp = NULL; 608 ulwp_t *susprev; 609 610 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 611 612 /* 613 * Find a waiter on the sleep queue. 614 */ 615 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 616 prev = ulwp, ulwpp = &ulwp->ul_link) { 617 if (ulwp->ul_wchan == wchan) { 618 if (!ulwp->ul_stop) 619 break; 620 /* 621 * Try not to return a suspended thread. 622 * This mimics the old libthread's behavior. 623 */ 624 if (suspp == NULL) { 625 suspp = ulwpp; 626 susprev = prev; 627 } 628 } 629 } 630 631 if (ulwp == NULL && suspp != NULL) { 632 ulwp = *(ulwpp = suspp); 633 prev = susprev; 634 suspp = NULL; 635 } 636 if (ulwp == NULL) { 637 if (more != NULL) 638 *more = 0; 639 return (NULL); 640 } 641 642 if (prevp != NULL) 643 *prevp = prev; 644 if (more == NULL) 645 return (ulwpp); 646 647 /* 648 * Scan the remainder of the queue for another waiter. 649 */ 650 if (suspp != NULL) { 651 *more = 1; 652 return (ulwpp); 653 } 654 for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) { 655 if (ulwp->ul_wchan == wchan) { 656 *more = 1; 657 return (ulwpp); 658 } 659 } 660 661 *more = 0; 662 return (ulwpp); 663 } 664 665 ulwp_t * 666 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 667 { 668 ulwp_t *ulwp; 669 670 ulwp = *ulwpp; 671 *ulwpp = ulwp->ul_link; 672 ulwp->ul_link = NULL; 673 if (qp->qh_tail == ulwp) 674 qp->qh_tail = prev; 675 qp->qh_qlen--; 676 ulwp->ul_sleepq = NULL; 677 ulwp->ul_wchan = NULL; 678 679 return (ulwp); 680 } 681 682 ulwp_t * 683 dequeue(queue_head_t *qp, void *wchan, int *more) 684 { 685 ulwp_t **ulwpp; 686 ulwp_t *prev; 687 688 if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL) 689 return (NULL); 690 return (queue_unlink(qp, ulwpp, prev)); 691 } 692 693 /* 694 * Return a pointer to the highest priority thread sleeping on wchan. 695 */ 696 ulwp_t * 697 queue_waiter(queue_head_t *qp, void *wchan) 698 { 699 ulwp_t **ulwpp; 700 701 if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL) 702 return (NULL); 703 return (*ulwpp); 704 } 705 706 uint8_t 707 dequeue_self(queue_head_t *qp, void *wchan) 708 { 709 ulwp_t *self = curthread; 710 ulwp_t **ulwpp; 711 ulwp_t *ulwp; 712 ulwp_t *prev = NULL; 713 int found = 0; 714 int more = 0; 715 716 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 717 718 /* find self on the sleep queue */ 719 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 720 prev = ulwp, ulwpp = &ulwp->ul_link) { 721 if (ulwp == self) { 722 /* dequeue ourself */ 723 ASSERT(self->ul_wchan == wchan); 724 (void) queue_unlink(qp, ulwpp, prev); 725 self->ul_cvmutex = NULL; 726 self->ul_cv_wake = 0; 727 found = 1; 728 break; 729 } 730 if (ulwp->ul_wchan == wchan) 731 more = 1; 732 } 733 734 if (!found) 735 thr_panic("dequeue_self(): curthread not found on queue"); 736 737 if (more) 738 return (1); 739 740 /* scan the remainder of the queue for another waiter */ 741 for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) { 742 if (ulwp->ul_wchan == wchan) 743 return (1); 744 } 745 746 return (0); 747 } 748 749 /* 750 * Called from call_user_handler() and _thrp_suspend() to take 751 * ourself off of our sleep queue so we can grab locks. 752 */ 753 void 754 unsleep_self(void) 755 { 756 ulwp_t *self = curthread; 757 queue_head_t *qp; 758 759 /* 760 * Calling enter_critical()/exit_critical() here would lead 761 * to recursion. Just manipulate self->ul_critical directly. 762 */ 763 self->ul_critical++; 764 while (self->ul_sleepq != NULL) { 765 qp = queue_lock(self->ul_wchan, self->ul_qtype); 766 /* 767 * We may have been moved from a CV queue to a 768 * mutex queue while we were attempting queue_lock(). 769 * If so, just loop around and try again. 770 * dequeue_self() clears self->ul_sleepq. 771 */ 772 if (qp == self->ul_sleepq) { 773 (void) dequeue_self(qp, self->ul_wchan); 774 self->ul_writer = 0; 775 } 776 queue_unlock(qp); 777 } 778 self->ul_critical--; 779 } 780 781 /* 782 * Common code for calling the the ___lwp_mutex_timedlock() system call. 783 * Returns with mutex_owner and mutex_ownerpid set correctly. 784 */ 785 static int 786 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 787 { 788 ulwp_t *self = curthread; 789 uberdata_t *udp = self->ul_uberdata; 790 int mtype = mp->mutex_type; 791 hrtime_t begin_sleep; 792 int acquired; 793 int error; 794 795 self->ul_sp = stkptr(); 796 self->ul_wchan = mp; 797 if (__td_event_report(self, TD_SLEEP, udp)) { 798 self->ul_td_evbuf.eventnum = TD_SLEEP; 799 self->ul_td_evbuf.eventdata = mp; 800 tdb_event(TD_SLEEP, udp); 801 } 802 if (msp) { 803 tdb_incr(msp->mutex_sleep); 804 begin_sleep = gethrtime(); 805 } 806 807 DTRACE_PROBE1(plockstat, mutex__block, mp); 808 809 for (;;) { 810 /* 811 * A return value of EOWNERDEAD or ELOCKUNMAPPED 812 * means we successfully acquired the lock. 813 */ 814 if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 815 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 816 acquired = 0; 817 break; 818 } 819 820 if (mtype & USYNC_PROCESS) { 821 /* 822 * Defend against forkall(). We may be the child, 823 * in which case we don't actually own the mutex. 824 */ 825 enter_critical(self); 826 if (mp->mutex_ownerpid == udp->pid) { 827 mp->mutex_owner = (uintptr_t)self; 828 exit_critical(self); 829 acquired = 1; 830 break; 831 } 832 exit_critical(self); 833 } else { 834 mp->mutex_owner = (uintptr_t)self; 835 acquired = 1; 836 break; 837 } 838 } 839 if (msp) 840 msp->mutex_sleep_time += gethrtime() - begin_sleep; 841 self->ul_wchan = NULL; 842 self->ul_sp = 0; 843 844 if (acquired) { 845 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 846 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 847 } else { 848 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 849 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 850 } 851 852 return (error); 853 } 854 855 /* 856 * Common code for calling the ___lwp_mutex_trylock() system call. 857 * Returns with mutex_owner and mutex_ownerpid set correctly. 858 */ 859 int 860 mutex_trylock_kernel(mutex_t *mp) 861 { 862 ulwp_t *self = curthread; 863 uberdata_t *udp = self->ul_uberdata; 864 int mtype = mp->mutex_type; 865 int error; 866 int acquired; 867 868 for (;;) { 869 /* 870 * A return value of EOWNERDEAD or ELOCKUNMAPPED 871 * means we successfully acquired the lock. 872 */ 873 if ((error = ___lwp_mutex_trylock(mp)) != 0 && 874 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 875 acquired = 0; 876 break; 877 } 878 879 if (mtype & USYNC_PROCESS) { 880 /* 881 * Defend against forkall(). We may be the child, 882 * in which case we don't actually own the mutex. 883 */ 884 enter_critical(self); 885 if (mp->mutex_ownerpid == udp->pid) { 886 mp->mutex_owner = (uintptr_t)self; 887 exit_critical(self); 888 acquired = 1; 889 break; 890 } 891 exit_critical(self); 892 } else { 893 mp->mutex_owner = (uintptr_t)self; 894 acquired = 1; 895 break; 896 } 897 } 898 899 if (acquired) { 900 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 901 } else if (error != EBUSY) { 902 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 903 } 904 905 return (error); 906 } 907 908 volatile sc_shared_t * 909 setup_schedctl(void) 910 { 911 ulwp_t *self = curthread; 912 volatile sc_shared_t *scp; 913 sc_shared_t *tmp; 914 915 if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 916 !self->ul_vfork && /* not a child of vfork() */ 917 !self->ul_schedctl_called) { /* haven't been called before */ 918 enter_critical(self); 919 self->ul_schedctl_called = &self->ul_uberdata->uberflags; 920 if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 921 self->ul_schedctl = scp = tmp; 922 exit_critical(self); 923 } 924 /* 925 * Unless the call to setup_schedctl() is surrounded 926 * by enter_critical()/exit_critical(), the address 927 * we are returning could be invalid due to a forkall() 928 * having occurred in another thread. 929 */ 930 return (scp); 931 } 932 933 /* 934 * Interfaces from libsched, incorporated into libc. 935 * libsched.so.1 is now a filter library onto libc. 936 */ 937 #pragma weak schedctl_lookup = _schedctl_init 938 #pragma weak _schedctl_lookup = _schedctl_init 939 #pragma weak schedctl_init = _schedctl_init 940 schedctl_t * 941 _schedctl_init(void) 942 { 943 volatile sc_shared_t *scp = setup_schedctl(); 944 return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 945 } 946 947 #pragma weak schedctl_exit = _schedctl_exit 948 void 949 _schedctl_exit(void) 950 { 951 } 952 953 /* 954 * Contract private interface for java. 955 * Set up the schedctl data if it doesn't exist yet. 956 * Return a pointer to the pointer to the schedctl data. 957 */ 958 volatile sc_shared_t *volatile * 959 _thr_schedctl(void) 960 { 961 ulwp_t *self = curthread; 962 volatile sc_shared_t *volatile *ptr; 963 964 if (self->ul_vfork) 965 return (NULL); 966 if (*(ptr = &self->ul_schedctl) == NULL) 967 (void) setup_schedctl(); 968 return (ptr); 969 } 970 971 /* 972 * Block signals and attempt to block preemption. 973 * no_preempt()/preempt() must be used in pairs but can be nested. 974 */ 975 void 976 no_preempt(ulwp_t *self) 977 { 978 volatile sc_shared_t *scp; 979 980 if (self->ul_preempt++ == 0) { 981 enter_critical(self); 982 if ((scp = self->ul_schedctl) != NULL || 983 (scp = setup_schedctl()) != NULL) { 984 /* 985 * Save the pre-existing preempt value. 986 */ 987 self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 988 scp->sc_preemptctl.sc_nopreempt = 1; 989 } 990 } 991 } 992 993 /* 994 * Undo the effects of no_preempt(). 995 */ 996 void 997 preempt(ulwp_t *self) 998 { 999 volatile sc_shared_t *scp; 1000 1001 ASSERT(self->ul_preempt > 0); 1002 if (--self->ul_preempt == 0) { 1003 if ((scp = self->ul_schedctl) != NULL) { 1004 /* 1005 * Restore the pre-existing preempt value. 1006 */ 1007 scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 1008 if (scp->sc_preemptctl.sc_yield && 1009 scp->sc_preemptctl.sc_nopreempt == 0) { 1010 lwp_yield(); 1011 if (scp->sc_preemptctl.sc_yield) { 1012 /* 1013 * Shouldn't happen. This is either 1014 * a race condition or the thread 1015 * just entered the real-time class. 1016 */ 1017 lwp_yield(); 1018 scp->sc_preemptctl.sc_yield = 0; 1019 } 1020 } 1021 } 1022 exit_critical(self); 1023 } 1024 } 1025 1026 /* 1027 * If a call to preempt() would cause the current thread to yield or to 1028 * take deferred actions in exit_critical(), then unpark the specified 1029 * lwp so it can run while we delay. Return the original lwpid if the 1030 * unpark was not performed, else return zero. The tests are a repeat 1031 * of some of the tests in preempt(), above. This is a statistical 1032 * optimization solely for cond_sleep_queue(), below. 1033 */ 1034 static lwpid_t 1035 preempt_unpark(ulwp_t *self, lwpid_t lwpid) 1036 { 1037 volatile sc_shared_t *scp = self->ul_schedctl; 1038 1039 ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 1040 if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 1041 (self->ul_curplease && self->ul_critical == 1)) { 1042 (void) __lwp_unpark(lwpid); 1043 lwpid = 0; 1044 } 1045 return (lwpid); 1046 } 1047 1048 /* 1049 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1050 * If this fails, return EBUSY and let the caller deal with it. 1051 * If this succeeds, return 0 with mutex_owner set to curthread. 1052 */ 1053 static int 1054 mutex_trylock_adaptive(mutex_t *mp, int tryhard) 1055 { 1056 ulwp_t *self = curthread; 1057 int error = EBUSY; 1058 ulwp_t *ulwp; 1059 volatile sc_shared_t *scp; 1060 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1061 volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 1062 uint32_t new_lockword; 1063 int count = 0; 1064 int max_count; 1065 uint8_t max_spinners; 1066 1067 ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 1068 1069 if (MUTEX_OWNER(mp) == self) 1070 return (EBUSY); 1071 1072 /* short-cut, not definitive (see below) */ 1073 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1074 ASSERT(mp->mutex_type & LOCK_ROBUST); 1075 error = ENOTRECOVERABLE; 1076 goto done; 1077 } 1078 1079 /* 1080 * Make one attempt to acquire the lock before 1081 * incurring the overhead of the spin loop. 1082 */ 1083 if (set_lock_byte(lockp) == 0) { 1084 *ownerp = (uintptr_t)self; 1085 error = 0; 1086 goto done; 1087 } 1088 if (!tryhard) 1089 goto done; 1090 if (ncpus == 0) 1091 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1092 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1093 max_spinners = ncpus - 1; 1094 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1095 if (max_count == 0) 1096 goto done; 1097 1098 /* 1099 * This spin loop is unfair to lwps that have already dropped into 1100 * the kernel to sleep. They will starve on a highly-contended mutex. 1101 * This is just too bad. The adaptive spin algorithm is intended 1102 * to allow programs with highly-contended locks (that is, broken 1103 * programs) to execute with reasonable speed despite their contention. 1104 * Being fair would reduce the speed of such programs and well-written 1105 * programs will not suffer in any case. 1106 */ 1107 enter_critical(self); 1108 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 1109 exit_critical(self); 1110 goto done; 1111 } 1112 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1113 for (count = 1; ; count++) { 1114 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1115 *ownerp = (uintptr_t)self; 1116 error = 0; 1117 break; 1118 } 1119 if (count == max_count) 1120 break; 1121 SMT_PAUSE(); 1122 /* 1123 * Stop spinning if the mutex owner is not running on 1124 * a processor; it will not drop the lock any time soon 1125 * and we would just be wasting time to keep spinning. 1126 * 1127 * Note that we are looking at another thread (ulwp_t) 1128 * without ensuring that the other thread does not exit. 1129 * The scheme relies on ulwp_t structures never being 1130 * deallocated by the library (the library employs a free 1131 * list of ulwp_t structs that are reused when new threads 1132 * are created) and on schedctl shared memory never being 1133 * deallocated once created via __schedctl(). 1134 * 1135 * Thus, the worst that can happen when the spinning thread 1136 * looks at the owner's schedctl data is that it is looking 1137 * at some other thread's schedctl data. This almost never 1138 * happens and is benign when it does. 1139 */ 1140 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1141 ((scp = ulwp->ul_schedctl) == NULL || 1142 scp->sc_state != SC_ONPROC)) 1143 break; 1144 } 1145 new_lockword = spinners_decr(&mp->mutex_lockword); 1146 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1147 /* 1148 * We haven't yet acquired the lock, the lock 1149 * is free, and there are no other spinners. 1150 * Make one final attempt to acquire the lock. 1151 * 1152 * This isn't strictly necessary since mutex_lock_queue() 1153 * (the next action this thread will take if it doesn't 1154 * acquire the lock here) makes one attempt to acquire 1155 * the lock before putting the thread to sleep. 1156 * 1157 * If the next action for this thread (on failure here) 1158 * were not to call mutex_lock_queue(), this would be 1159 * necessary for correctness, to avoid ending up with an 1160 * unheld mutex with waiters but no one to wake them up. 1161 */ 1162 if (set_lock_byte(lockp) == 0) { 1163 *ownerp = (uintptr_t)self; 1164 error = 0; 1165 } 1166 count++; 1167 } 1168 exit_critical(self); 1169 1170 done: 1171 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1172 ASSERT(mp->mutex_type & LOCK_ROBUST); 1173 /* 1174 * We shouldn't own the mutex; clear the lock. 1175 */ 1176 mp->mutex_owner = 0; 1177 if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) 1178 mutex_wakeup_all(mp); 1179 error = ENOTRECOVERABLE; 1180 } 1181 1182 if (error) { 1183 if (count) { 1184 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1185 } 1186 if (error != EBUSY) { 1187 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1188 } 1189 } else { 1190 if (count) { 1191 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1192 } 1193 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1194 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1195 ASSERT(mp->mutex_type & LOCK_ROBUST); 1196 error = EOWNERDEAD; 1197 } 1198 } 1199 1200 return (error); 1201 } 1202 1203 /* 1204 * Same as mutex_trylock_adaptive(), except specifically for queue locks. 1205 * The owner field is not set here; the caller (spin_lock_set()) sets it. 1206 */ 1207 static int 1208 mutex_queuelock_adaptive(mutex_t *mp) 1209 { 1210 ulwp_t *ulwp; 1211 volatile sc_shared_t *scp; 1212 volatile uint8_t *lockp; 1213 volatile uint64_t *ownerp; 1214 int count = curthread->ul_queue_spin; 1215 1216 ASSERT(mp->mutex_type == USYNC_THREAD); 1217 1218 if (count == 0) 1219 return (EBUSY); 1220 1221 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1222 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1223 while (--count >= 0) { 1224 if (*lockp == 0 && set_lock_byte(lockp) == 0) 1225 return (0); 1226 SMT_PAUSE(); 1227 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1228 ((scp = ulwp->ul_schedctl) == NULL || 1229 scp->sc_state != SC_ONPROC)) 1230 break; 1231 } 1232 1233 return (EBUSY); 1234 } 1235 1236 /* 1237 * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1238 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1239 * If this fails, return EBUSY and let the caller deal with it. 1240 * If this succeeds, return 0 with mutex_owner set to curthread 1241 * and mutex_ownerpid set to the current pid. 1242 */ 1243 static int 1244 mutex_trylock_process(mutex_t *mp, int tryhard) 1245 { 1246 ulwp_t *self = curthread; 1247 uberdata_t *udp = self->ul_uberdata; 1248 int error = EBUSY; 1249 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1250 uint32_t new_lockword; 1251 int count = 0; 1252 int max_count; 1253 uint8_t max_spinners; 1254 1255 ASSERT(mp->mutex_type & USYNC_PROCESS); 1256 1257 if (shared_mutex_held(mp)) 1258 return (EBUSY); 1259 1260 /* short-cut, not definitive (see below) */ 1261 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1262 ASSERT(mp->mutex_type & LOCK_ROBUST); 1263 error = ENOTRECOVERABLE; 1264 goto done; 1265 } 1266 1267 /* 1268 * Make one attempt to acquire the lock before 1269 * incurring the overhead of the spin loop. 1270 */ 1271 enter_critical(self); 1272 if (set_lock_byte(lockp) == 0) { 1273 mp->mutex_owner = (uintptr_t)self; 1274 mp->mutex_ownerpid = udp->pid; 1275 exit_critical(self); 1276 error = 0; 1277 goto done; 1278 } 1279 exit_critical(self); 1280 if (!tryhard) 1281 goto done; 1282 if (ncpus == 0) 1283 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1284 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1285 max_spinners = ncpus - 1; 1286 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1287 if (max_count == 0) 1288 goto done; 1289 1290 /* 1291 * This is a process-shared mutex. 1292 * We cannot know if the owner is running on a processor. 1293 * We just spin and hope that it is on a processor. 1294 */ 1295 enter_critical(self); 1296 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 1297 exit_critical(self); 1298 goto done; 1299 } 1300 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1301 for (count = 1; ; count++) { 1302 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1303 mp->mutex_owner = (uintptr_t)self; 1304 mp->mutex_ownerpid = udp->pid; 1305 error = 0; 1306 break; 1307 } 1308 if (count == max_count) 1309 break; 1310 SMT_PAUSE(); 1311 } 1312 new_lockword = spinners_decr(&mp->mutex_lockword); 1313 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1314 /* 1315 * We haven't yet acquired the lock, the lock 1316 * is free, and there are no other spinners. 1317 * Make one final attempt to acquire the lock. 1318 * 1319 * This isn't strictly necessary since mutex_lock_kernel() 1320 * (the next action this thread will take if it doesn't 1321 * acquire the lock here) makes one attempt to acquire 1322 * the lock before putting the thread to sleep. 1323 * 1324 * If the next action for this thread (on failure here) 1325 * were not to call mutex_lock_kernel(), this would be 1326 * necessary for correctness, to avoid ending up with an 1327 * unheld mutex with waiters but no one to wake them up. 1328 */ 1329 if (set_lock_byte(lockp) == 0) { 1330 mp->mutex_owner = (uintptr_t)self; 1331 mp->mutex_ownerpid = udp->pid; 1332 error = 0; 1333 } 1334 count++; 1335 } 1336 exit_critical(self); 1337 1338 done: 1339 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1340 ASSERT(mp->mutex_type & LOCK_ROBUST); 1341 /* 1342 * We shouldn't own the mutex; clear the lock. 1343 */ 1344 mp->mutex_owner = 0; 1345 mp->mutex_ownerpid = 0; 1346 if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) { 1347 no_preempt(self); 1348 (void) ___lwp_mutex_wakeup(mp, 1); 1349 preempt(self); 1350 } 1351 error = ENOTRECOVERABLE; 1352 } 1353 1354 if (error) { 1355 if (count) { 1356 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1357 } 1358 if (error != EBUSY) { 1359 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1360 } 1361 } else { 1362 if (count) { 1363 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1364 } 1365 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1366 if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1367 ASSERT(mp->mutex_type & LOCK_ROBUST); 1368 if (mp->mutex_flag & LOCK_OWNERDEAD) 1369 error = EOWNERDEAD; 1370 else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1371 error = ELOCKUNMAPPED; 1372 else 1373 error = EOWNERDEAD; 1374 } 1375 } 1376 1377 return (error); 1378 } 1379 1380 /* 1381 * Mutex wakeup code for releasing a USYNC_THREAD mutex. 1382 * Returns the lwpid of the thread that was dequeued, if any. 1383 * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 1384 * to wake up the specified lwp. 1385 */ 1386 static lwpid_t 1387 mutex_wakeup(mutex_t *mp) 1388 { 1389 lwpid_t lwpid = 0; 1390 queue_head_t *qp; 1391 ulwp_t *ulwp; 1392 int more; 1393 1394 /* 1395 * Dequeue a waiter from the sleep queue. Don't touch the mutex 1396 * waiters bit if no one was found on the queue because the mutex 1397 * might have been deallocated or reallocated for another purpose. 1398 */ 1399 qp = queue_lock(mp, MX); 1400 if ((ulwp = dequeue(qp, mp, &more)) != NULL) { 1401 lwpid = ulwp->ul_lwpid; 1402 mp->mutex_waiters = (more? 1 : 0); 1403 } 1404 queue_unlock(qp); 1405 return (lwpid); 1406 } 1407 1408 /* 1409 * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1410 */ 1411 static void 1412 mutex_wakeup_all(mutex_t *mp) 1413 { 1414 queue_head_t *qp; 1415 int nlwpid = 0; 1416 int maxlwps = MAXLWPS; 1417 ulwp_t **ulwpp; 1418 ulwp_t *ulwp; 1419 ulwp_t *prev = NULL; 1420 lwpid_t buffer[MAXLWPS]; 1421 lwpid_t *lwpid = buffer; 1422 1423 /* 1424 * Walk the list of waiters and prepare to wake up all of them. 1425 * The waiters flag has already been cleared from the mutex. 1426 * 1427 * We keep track of lwpids that are to be unparked in lwpid[]. 1428 * __lwp_unpark_all() is called to unpark all of them after 1429 * they have been removed from the sleep queue and the sleep 1430 * queue lock has been dropped. If we run out of space in our 1431 * on-stack buffer, we need to allocate more but we can't call 1432 * lmalloc() because we are holding a queue lock when the overflow 1433 * occurs and lmalloc() acquires a lock. We can't use alloca() 1434 * either because the application may have allocated a small 1435 * stack and we don't want to overrun the stack. So we call 1436 * alloc_lwpids() to allocate a bigger buffer using the mmap() 1437 * system call directly since that path acquires no locks. 1438 */ 1439 qp = queue_lock(mp, MX); 1440 ulwpp = &qp->qh_head; 1441 while ((ulwp = *ulwpp) != NULL) { 1442 if (ulwp->ul_wchan != mp) { 1443 prev = ulwp; 1444 ulwpp = &ulwp->ul_link; 1445 } else { 1446 if (nlwpid == maxlwps) 1447 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1448 (void) queue_unlink(qp, ulwpp, prev); 1449 lwpid[nlwpid++] = ulwp->ul_lwpid; 1450 } 1451 } 1452 1453 if (nlwpid == 0) { 1454 queue_unlock(qp); 1455 } else { 1456 mp->mutex_waiters = 0; 1457 no_preempt(curthread); 1458 queue_unlock(qp); 1459 if (nlwpid == 1) 1460 (void) __lwp_unpark(lwpid[0]); 1461 else 1462 (void) __lwp_unpark_all(lwpid, nlwpid); 1463 preempt(curthread); 1464 } 1465 1466 if (lwpid != buffer) 1467 (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 1468 } 1469 1470 /* 1471 * Release a process-private mutex. 1472 * As an optimization, if there are waiters but there are also spinners 1473 * attempting to acquire the mutex, then don't bother waking up a waiter; 1474 * one of the spinners will acquire the mutex soon and it would be a waste 1475 * of resources to wake up some thread just to have it spin for a while 1476 * and then possibly go back to sleep. See mutex_trylock_adaptive(). 1477 */ 1478 static lwpid_t 1479 mutex_unlock_queue(mutex_t *mp, int release_all) 1480 { 1481 lwpid_t lwpid = 0; 1482 uint32_t old_lockword; 1483 1484 mp->mutex_owner = 0; 1485 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1486 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1487 if ((old_lockword & WAITERMASK) && 1488 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1489 ulwp_t *self = curthread; 1490 no_preempt(self); /* ensure a prompt wakeup */ 1491 if (release_all) 1492 mutex_wakeup_all(mp); 1493 else 1494 lwpid = mutex_wakeup(mp); 1495 if (lwpid == 0) 1496 preempt(self); 1497 } 1498 return (lwpid); 1499 } 1500 1501 /* 1502 * Like mutex_unlock_queue(), but for process-shared mutexes. 1503 */ 1504 static void 1505 mutex_unlock_process(mutex_t *mp, int release_all) 1506 { 1507 uint32_t old_lockword; 1508 1509 mp->mutex_owner = 0; 1510 mp->mutex_ownerpid = 0; 1511 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1512 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1513 if ((old_lockword & WAITERMASK) && 1514 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1515 ulwp_t *self = curthread; 1516 no_preempt(self); /* ensure a prompt wakeup */ 1517 (void) ___lwp_mutex_wakeup(mp, release_all); 1518 preempt(self); 1519 } 1520 } 1521 1522 /* 1523 * Return the real priority of a thread. 1524 */ 1525 int 1526 real_priority(ulwp_t *ulwp) 1527 { 1528 if (ulwp->ul_epri == 0) 1529 return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri); 1530 return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri); 1531 } 1532 1533 void 1534 stall(void) 1535 { 1536 for (;;) 1537 (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 1538 } 1539 1540 /* 1541 * Acquire a USYNC_THREAD mutex via user-level sleep queues. 1542 * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1543 * If successful, returns with mutex_owner set correctly. 1544 */ 1545 int 1546 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 1547 timespec_t *tsp) 1548 { 1549 uberdata_t *udp = curthread->ul_uberdata; 1550 queue_head_t *qp; 1551 hrtime_t begin_sleep; 1552 int error = 0; 1553 1554 self->ul_sp = stkptr(); 1555 if (__td_event_report(self, TD_SLEEP, udp)) { 1556 self->ul_wchan = mp; 1557 self->ul_td_evbuf.eventnum = TD_SLEEP; 1558 self->ul_td_evbuf.eventdata = mp; 1559 tdb_event(TD_SLEEP, udp); 1560 } 1561 if (msp) { 1562 tdb_incr(msp->mutex_sleep); 1563 begin_sleep = gethrtime(); 1564 } 1565 1566 DTRACE_PROBE1(plockstat, mutex__block, mp); 1567 1568 /* 1569 * Put ourself on the sleep queue, and while we are 1570 * unable to grab the lock, go park in the kernel. 1571 * Take ourself off the sleep queue after we acquire the lock. 1572 * The waiter bit can be set/cleared only while holding the queue lock. 1573 */ 1574 qp = queue_lock(mp, MX); 1575 enqueue(qp, self, mp, MX); 1576 mp->mutex_waiters = 1; 1577 for (;;) { 1578 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1579 mp->mutex_owner = (uintptr_t)self; 1580 mp->mutex_waiters = dequeue_self(qp, mp); 1581 break; 1582 } 1583 set_parking_flag(self, 1); 1584 queue_unlock(qp); 1585 /* 1586 * __lwp_park() will return the residual time in tsp 1587 * if we are unparked before the timeout expires. 1588 */ 1589 error = __lwp_park(tsp, 0); 1590 set_parking_flag(self, 0); 1591 /* 1592 * We could have taken a signal or suspended ourself. 1593 * If we did, then we removed ourself from the queue. 1594 * Someone else may have removed us from the queue 1595 * as a consequence of mutex_unlock(). We may have 1596 * gotten a timeout from __lwp_park(). Or we may still 1597 * be on the queue and this is just a spurious wakeup. 1598 */ 1599 qp = queue_lock(mp, MX); 1600 if (self->ul_sleepq == NULL) { 1601 if (error) { 1602 mp->mutex_waiters = queue_waiter(qp, mp)? 1 : 0; 1603 if (error != EINTR) 1604 break; 1605 error = 0; 1606 } 1607 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1608 mp->mutex_owner = (uintptr_t)self; 1609 break; 1610 } 1611 enqueue(qp, self, mp, MX); 1612 mp->mutex_waiters = 1; 1613 } 1614 ASSERT(self->ul_sleepq == qp && 1615 self->ul_qtype == MX && 1616 self->ul_wchan == mp); 1617 if (error) { 1618 if (error != EINTR) { 1619 mp->mutex_waiters = dequeue_self(qp, mp); 1620 break; 1621 } 1622 error = 0; 1623 } 1624 } 1625 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 1626 self->ul_wchan == NULL); 1627 self->ul_sp = 0; 1628 queue_unlock(qp); 1629 1630 if (msp) 1631 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1632 1633 ASSERT(error == 0 || error == EINVAL || error == ETIME); 1634 1635 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1636 ASSERT(mp->mutex_type & LOCK_ROBUST); 1637 /* 1638 * We shouldn't own the mutex; clear the lock. 1639 */ 1640 mp->mutex_owner = 0; 1641 if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) 1642 mutex_wakeup_all(mp); 1643 error = ENOTRECOVERABLE; 1644 } 1645 1646 if (error) { 1647 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1648 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1649 } else { 1650 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1651 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1652 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1653 ASSERT(mp->mutex_type & LOCK_ROBUST); 1654 error = EOWNERDEAD; 1655 } 1656 } 1657 1658 return (error); 1659 } 1660 1661 static int 1662 mutex_recursion(mutex_t *mp, int mtype, int try) 1663 { 1664 ASSERT(mutex_is_held(mp)); 1665 ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1666 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1667 1668 if (mtype & LOCK_RECURSIVE) { 1669 if (mp->mutex_rcount == RECURSION_MAX) { 1670 DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1671 return (EAGAIN); 1672 } 1673 mp->mutex_rcount++; 1674 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1675 return (0); 1676 } 1677 if (try == MUTEX_LOCK) { 1678 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1679 return (EDEADLK); 1680 } 1681 return (EBUSY); 1682 } 1683 1684 /* 1685 * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1686 * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1687 * We use tdb_hash_lock here and in the synch object tracking code in 1688 * the tdb_agent.c file. There is no conflict between these two usages. 1689 */ 1690 void 1691 register_lock(mutex_t *mp) 1692 { 1693 uberdata_t *udp = curthread->ul_uberdata; 1694 uint_t hash = LOCK_HASH(mp); 1695 robust_t *rlp; 1696 robust_t **rlpp; 1697 robust_t **table; 1698 1699 if ((table = udp->robustlocks) == NULL) { 1700 lmutex_lock(&udp->tdb_hash_lock); 1701 if ((table = udp->robustlocks) == NULL) { 1702 table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1703 _membar_producer(); 1704 udp->robustlocks = table; 1705 } 1706 lmutex_unlock(&udp->tdb_hash_lock); 1707 } 1708 _membar_consumer(); 1709 1710 /* 1711 * First search the registered table with no locks held. 1712 * This is safe because the table never shrinks 1713 * and we can only get a false negative. 1714 */ 1715 for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1716 if (rlp->robust_lock == mp) /* already registered */ 1717 return; 1718 } 1719 1720 /* 1721 * The lock was not found. 1722 * Repeat the operation with tdb_hash_lock held. 1723 */ 1724 lmutex_lock(&udp->tdb_hash_lock); 1725 1726 for (rlpp = &table[hash]; 1727 (rlp = *rlpp) != NULL; 1728 rlpp = &rlp->robust_next) { 1729 if (rlp->robust_lock == mp) { /* already registered */ 1730 lmutex_unlock(&udp->tdb_hash_lock); 1731 return; 1732 } 1733 } 1734 1735 /* 1736 * The lock has never been registered. 1737 * Register it now and add it to the table. 1738 */ 1739 (void) ___lwp_mutex_register(mp); 1740 rlp = lmalloc(sizeof (*rlp)); 1741 rlp->robust_lock = mp; 1742 _membar_producer(); 1743 *rlpp = rlp; 1744 1745 lmutex_unlock(&udp->tdb_hash_lock); 1746 } 1747 1748 /* 1749 * This is called in the child of fork()/forkall() to start over 1750 * with a clean slate. (Each process must register its own locks.) 1751 * No locks are needed because all other threads are suspended or gone. 1752 */ 1753 void 1754 unregister_locks(void) 1755 { 1756 uberdata_t *udp = curthread->ul_uberdata; 1757 uint_t hash; 1758 robust_t **table; 1759 robust_t *rlp; 1760 robust_t *next; 1761 1762 if ((table = udp->robustlocks) != NULL) { 1763 for (hash = 0; hash < LOCKHASHSZ; hash++) { 1764 rlp = table[hash]; 1765 while (rlp != NULL) { 1766 next = rlp->robust_next; 1767 lfree(rlp, sizeof (*rlp)); 1768 rlp = next; 1769 } 1770 } 1771 lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 1772 udp->robustlocks = NULL; 1773 } 1774 } 1775 1776 /* 1777 * Returns with mutex_owner set correctly. 1778 */ 1779 static int 1780 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 1781 { 1782 ulwp_t *self = curthread; 1783 uberdata_t *udp = self->ul_uberdata; 1784 int mtype = mp->mutex_type; 1785 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 1786 int error = 0; 1787 uint8_t ceil; 1788 int myprio; 1789 1790 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1791 1792 if (!self->ul_schedctl_called) 1793 (void) setup_schedctl(); 1794 1795 if (msp && try == MUTEX_TRY) 1796 tdb_incr(msp->mutex_try); 1797 1798 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 1799 return (mutex_recursion(mp, mtype, try)); 1800 1801 if (self->ul_error_detection && try == MUTEX_LOCK && 1802 tsp == NULL && mutex_is_held(mp)) 1803 lock_error(mp, "mutex_lock", NULL, NULL); 1804 1805 if (mtype & LOCK_PRIO_PROTECT) { 1806 ceil = mp->mutex_ceiling; 1807 ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); 1808 myprio = real_priority(self); 1809 if (myprio > ceil) { 1810 DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 1811 return (EINVAL); 1812 } 1813 if ((error = _ceil_mylist_add(mp)) != 0) { 1814 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1815 return (error); 1816 } 1817 if (myprio < ceil) 1818 _ceil_prio_inherit(ceil); 1819 } 1820 1821 if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 1822 == (USYNC_PROCESS | LOCK_ROBUST)) 1823 register_lock(mp); 1824 1825 if (mtype & LOCK_PRIO_INHERIT) { 1826 /* go straight to the kernel */ 1827 if (try == MUTEX_TRY) 1828 error = mutex_trylock_kernel(mp); 1829 else /* MUTEX_LOCK */ 1830 error = mutex_lock_kernel(mp, tsp, msp); 1831 /* 1832 * The kernel never sets or clears the lock byte 1833 * for LOCK_PRIO_INHERIT mutexes. 1834 * Set it here for consistency. 1835 */ 1836 switch (error) { 1837 case 0: 1838 mp->mutex_lockw = LOCKSET; 1839 break; 1840 case EOWNERDEAD: 1841 case ELOCKUNMAPPED: 1842 mp->mutex_lockw = LOCKSET; 1843 /* FALLTHROUGH */ 1844 case ENOTRECOVERABLE: 1845 ASSERT(mtype & LOCK_ROBUST); 1846 break; 1847 case EDEADLK: 1848 if (try == MUTEX_LOCK) 1849 stall(); 1850 error = EBUSY; 1851 break; 1852 } 1853 } else if (mtype & USYNC_PROCESS) { 1854 error = mutex_trylock_process(mp, try == MUTEX_LOCK); 1855 if (error == EBUSY && try == MUTEX_LOCK) 1856 error = mutex_lock_kernel(mp, tsp, msp); 1857 } else { /* USYNC_THREAD */ 1858 error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 1859 if (error == EBUSY && try == MUTEX_LOCK) 1860 error = mutex_lock_queue(self, msp, mp, tsp); 1861 } 1862 1863 switch (error) { 1864 case 0: 1865 case EOWNERDEAD: 1866 case ELOCKUNMAPPED: 1867 if (mtype & LOCK_ROBUST) 1868 remember_lock(mp); 1869 if (msp) 1870 record_begin_hold(msp); 1871 break; 1872 default: 1873 if (mtype & LOCK_PRIO_PROTECT) { 1874 (void) _ceil_mylist_del(mp); 1875 if (myprio < ceil) 1876 _ceil_prio_waive(); 1877 } 1878 if (try == MUTEX_TRY) { 1879 if (msp) 1880 tdb_incr(msp->mutex_try_fail); 1881 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 1882 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 1883 tdb_event(TD_LOCK_TRY, udp); 1884 } 1885 } 1886 break; 1887 } 1888 1889 return (error); 1890 } 1891 1892 int 1893 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 1894 { 1895 ulwp_t *self = curthread; 1896 uberdata_t *udp = self->ul_uberdata; 1897 1898 /* 1899 * We know that USYNC_PROCESS is set in mtype and that 1900 * zero, one, or both of the flags LOCK_RECURSIVE and 1901 * LOCK_ERRORCHECK are set, and that no other flags are set. 1902 */ 1903 ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 1904 enter_critical(self); 1905 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1906 mp->mutex_owner = (uintptr_t)self; 1907 mp->mutex_ownerpid = udp->pid; 1908 exit_critical(self); 1909 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1910 return (0); 1911 } 1912 exit_critical(self); 1913 1914 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 1915 return (mutex_recursion(mp, mtype, try)); 1916 1917 if (try == MUTEX_LOCK) { 1918 if (mutex_trylock_process(mp, 1) == 0) 1919 return (0); 1920 return (mutex_lock_kernel(mp, tsp, NULL)); 1921 } 1922 1923 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 1924 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 1925 tdb_event(TD_LOCK_TRY, udp); 1926 } 1927 return (EBUSY); 1928 } 1929 1930 static int 1931 mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 1932 { 1933 ulwp_t *self = curthread; 1934 uberdata_t *udp = self->ul_uberdata; 1935 uberflags_t *gflags; 1936 int mtype; 1937 1938 /* 1939 * Optimize the case of USYNC_THREAD, including 1940 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 1941 * no error detection, no lock statistics, 1942 * and the process has only a single thread. 1943 * (Most likely a traditional single-threaded application.) 1944 */ 1945 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 1946 udp->uberflags.uf_all) == 0) { 1947 /* 1948 * Only one thread exists so we don't need an atomic operation. 1949 */ 1950 if (mp->mutex_lockw == 0) { 1951 mp->mutex_lockw = LOCKSET; 1952 mp->mutex_owner = (uintptr_t)self; 1953 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1954 return (0); 1955 } 1956 if (mtype && MUTEX_OWNER(mp) == self) 1957 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 1958 /* 1959 * We have reached a deadlock, probably because the 1960 * process is executing non-async-signal-safe code in 1961 * a signal handler and is attempting to acquire a lock 1962 * that it already owns. This is not surprising, given 1963 * bad programming practices over the years that has 1964 * resulted in applications calling printf() and such 1965 * in their signal handlers. Unless the user has told 1966 * us that the signal handlers are safe by setting: 1967 * export _THREAD_ASYNC_SAFE=1 1968 * we return EDEADLK rather than actually deadlocking. 1969 */ 1970 if (tsp == NULL && 1971 MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 1972 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1973 return (EDEADLK); 1974 } 1975 } 1976 1977 /* 1978 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 1979 * no error detection, and no lock statistics. 1980 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 1981 */ 1982 if ((gflags = self->ul_schedctl_called) != NULL && 1983 (gflags->uf_trs_ted | 1984 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 1985 if (mtype & USYNC_PROCESS) 1986 return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 1987 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1988 mp->mutex_owner = (uintptr_t)self; 1989 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1990 return (0); 1991 } 1992 if (mtype && MUTEX_OWNER(mp) == self) 1993 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 1994 if (mutex_trylock_adaptive(mp, 1) != 0) 1995 return (mutex_lock_queue(self, NULL, mp, tsp)); 1996 return (0); 1997 } 1998 1999 /* else do it the long way */ 2000 return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 2001 } 2002 2003 /* 2004 * Of the following function names (all the same function, of course), 2005 * only _private_mutex_lock() is not exported from libc. This means 2006 * that calling _private_mutex_lock() within libc will not invoke the 2007 * dynamic linker. This is critical for any code called in the child 2008 * of vfork() (via posix_spawn()) because invoking the dynamic linker 2009 * in such a case would corrupt the parent's address space. There are 2010 * other places in libc where avoiding the dynamic linker is necessary. 2011 * Of course, _private_mutex_lock() can be called in cases not requiring 2012 * the avoidance of the dynamic linker too, and often is. 2013 */ 2014 #pragma weak _private_mutex_lock = __mutex_lock 2015 #pragma weak mutex_lock = __mutex_lock 2016 #pragma weak _mutex_lock = __mutex_lock 2017 #pragma weak pthread_mutex_lock = __mutex_lock 2018 #pragma weak _pthread_mutex_lock = __mutex_lock 2019 int 2020 __mutex_lock(mutex_t *mp) 2021 { 2022 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2023 return (mutex_lock_impl(mp, NULL)); 2024 } 2025 2026 #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 2027 int 2028 _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 2029 { 2030 timespec_t tslocal; 2031 int error; 2032 2033 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2034 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 2035 error = mutex_lock_impl(mp, &tslocal); 2036 if (error == ETIME) 2037 error = ETIMEDOUT; 2038 return (error); 2039 } 2040 2041 #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 2042 int 2043 _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 2044 { 2045 timespec_t tslocal; 2046 int error; 2047 2048 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2049 tslocal = *reltime; 2050 error = mutex_lock_impl(mp, &tslocal); 2051 if (error == ETIME) 2052 error = ETIMEDOUT; 2053 return (error); 2054 } 2055 2056 #pragma weak _private_mutex_trylock = __mutex_trylock 2057 #pragma weak mutex_trylock = __mutex_trylock 2058 #pragma weak _mutex_trylock = __mutex_trylock 2059 #pragma weak pthread_mutex_trylock = __mutex_trylock 2060 #pragma weak _pthread_mutex_trylock = __mutex_trylock 2061 int 2062 __mutex_trylock(mutex_t *mp) 2063 { 2064 ulwp_t *self = curthread; 2065 uberdata_t *udp = self->ul_uberdata; 2066 uberflags_t *gflags; 2067 int mtype; 2068 2069 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2070 /* 2071 * Optimize the case of USYNC_THREAD, including 2072 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2073 * no error detection, no lock statistics, 2074 * and the process has only a single thread. 2075 * (Most likely a traditional single-threaded application.) 2076 */ 2077 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2078 udp->uberflags.uf_all) == 0) { 2079 /* 2080 * Only one thread exists so we don't need an atomic operation. 2081 */ 2082 if (mp->mutex_lockw == 0) { 2083 mp->mutex_lockw = LOCKSET; 2084 mp->mutex_owner = (uintptr_t)self; 2085 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2086 return (0); 2087 } 2088 if (mtype && MUTEX_OWNER(mp) == self) 2089 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2090 return (EBUSY); 2091 } 2092 2093 /* 2094 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2095 * no error detection, and no lock statistics. 2096 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2097 */ 2098 if ((gflags = self->ul_schedctl_called) != NULL && 2099 (gflags->uf_trs_ted | 2100 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2101 if (mtype & USYNC_PROCESS) 2102 return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 2103 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2104 mp->mutex_owner = (uintptr_t)self; 2105 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2106 return (0); 2107 } 2108 if (mtype && MUTEX_OWNER(mp) == self) 2109 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2110 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2111 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2112 tdb_event(TD_LOCK_TRY, udp); 2113 } 2114 return (EBUSY); 2115 } 2116 2117 /* else do it the long way */ 2118 return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 2119 } 2120 2121 int 2122 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 2123 { 2124 ulwp_t *self = curthread; 2125 uberdata_t *udp = self->ul_uberdata; 2126 int mtype = mp->mutex_type; 2127 tdb_mutex_stats_t *msp; 2128 int error = 0; 2129 int release_all; 2130 lwpid_t lwpid; 2131 2132 if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 2133 return (EPERM); 2134 2135 if (self->ul_error_detection && !mutex_is_held(mp)) 2136 lock_error(mp, "mutex_unlock", NULL, NULL); 2137 2138 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2139 mp->mutex_rcount--; 2140 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2141 return (0); 2142 } 2143 2144 if ((msp = MUTEX_STATS(mp, udp)) != NULL) 2145 (void) record_hold_time(msp); 2146 2147 if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2148 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2149 ASSERT(mp->mutex_type & LOCK_ROBUST); 2150 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2151 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2152 } 2153 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2154 2155 if (mtype & LOCK_PRIO_INHERIT) { 2156 no_preempt(self); 2157 mp->mutex_owner = 0; 2158 mp->mutex_ownerpid = 0; 2159 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2160 mp->mutex_lockw = LOCKCLEAR; 2161 error = ___lwp_mutex_unlock(mp); 2162 preempt(self); 2163 } else if (mtype & USYNC_PROCESS) { 2164 mutex_unlock_process(mp, release_all); 2165 } else { /* USYNC_THREAD */ 2166 if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 2167 (void) __lwp_unpark(lwpid); 2168 preempt(self); 2169 } 2170 } 2171 2172 if (mtype & LOCK_ROBUST) 2173 forget_lock(mp); 2174 2175 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2176 _ceil_prio_waive(); 2177 2178 return (error); 2179 } 2180 2181 #pragma weak _private_mutex_unlock = __mutex_unlock 2182 #pragma weak mutex_unlock = __mutex_unlock 2183 #pragma weak _mutex_unlock = __mutex_unlock 2184 #pragma weak pthread_mutex_unlock = __mutex_unlock 2185 #pragma weak _pthread_mutex_unlock = __mutex_unlock 2186 int 2187 __mutex_unlock(mutex_t *mp) 2188 { 2189 ulwp_t *self = curthread; 2190 uberdata_t *udp = self->ul_uberdata; 2191 uberflags_t *gflags; 2192 lwpid_t lwpid; 2193 int mtype; 2194 short el; 2195 2196 /* 2197 * Optimize the case of USYNC_THREAD, including 2198 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2199 * no error detection, no lock statistics, 2200 * and the process has only a single thread. 2201 * (Most likely a traditional single-threaded application.) 2202 */ 2203 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2204 udp->uberflags.uf_all) == 0) { 2205 if (mtype) { 2206 /* 2207 * At this point we know that one or both of the 2208 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2209 */ 2210 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2211 return (EPERM); 2212 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2213 mp->mutex_rcount--; 2214 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2215 return (0); 2216 } 2217 } 2218 /* 2219 * Only one thread exists so we don't need an atomic operation. 2220 * Also, there can be no waiters. 2221 */ 2222 mp->mutex_owner = 0; 2223 mp->mutex_lockword = 0; 2224 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2225 return (0); 2226 } 2227 2228 /* 2229 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2230 * no error detection, and no lock statistics. 2231 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2232 */ 2233 if ((gflags = self->ul_schedctl_called) != NULL) { 2234 if (((el = gflags->uf_trs_ted) | mtype) == 0) { 2235 fast_unlock: 2236 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2237 (void) __lwp_unpark(lwpid); 2238 preempt(self); 2239 } 2240 return (0); 2241 } 2242 if (el) /* error detection or lock statistics */ 2243 goto slow_unlock; 2244 if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2245 /* 2246 * At this point we know that one or both of the 2247 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2248 */ 2249 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2250 return (EPERM); 2251 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2252 mp->mutex_rcount--; 2253 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2254 return (0); 2255 } 2256 goto fast_unlock; 2257 } 2258 if ((mtype & 2259 ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2260 /* 2261 * At this point we know that zero, one, or both of the 2262 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 2263 * that the USYNC_PROCESS flag is set. 2264 */ 2265 if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 2266 return (EPERM); 2267 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2268 mp->mutex_rcount--; 2269 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2270 return (0); 2271 } 2272 mutex_unlock_process(mp, 0); 2273 return (0); 2274 } 2275 } 2276 2277 /* else do it the long way */ 2278 slow_unlock: 2279 return (mutex_unlock_internal(mp, 0)); 2280 } 2281 2282 /* 2283 * Internally to the library, almost all mutex lock/unlock actions 2284 * go through these lmutex_ functions, to protect critical regions. 2285 * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 2286 * to make these functions faster since we know that the mutex type 2287 * of all internal locks is USYNC_THREAD. We also know that internal 2288 * locking can never fail, so we panic if it does. 2289 */ 2290 void 2291 lmutex_lock(mutex_t *mp) 2292 { 2293 ulwp_t *self = curthread; 2294 uberdata_t *udp = self->ul_uberdata; 2295 2296 ASSERT(mp->mutex_type == USYNC_THREAD); 2297 2298 enter_critical(self); 2299 /* 2300 * Optimize the case of no lock statistics and only a single thread. 2301 * (Most likely a traditional single-threaded application.) 2302 */ 2303 if (udp->uberflags.uf_all == 0) { 2304 /* 2305 * Only one thread exists; the mutex must be free. 2306 */ 2307 ASSERT(mp->mutex_lockw == 0); 2308 mp->mutex_lockw = LOCKSET; 2309 mp->mutex_owner = (uintptr_t)self; 2310 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2311 } else { 2312 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2313 2314 if (!self->ul_schedctl_called) 2315 (void) setup_schedctl(); 2316 2317 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2318 mp->mutex_owner = (uintptr_t)self; 2319 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2320 } else if (mutex_trylock_adaptive(mp, 1) != 0) { 2321 (void) mutex_lock_queue(self, msp, mp, NULL); 2322 } 2323 2324 if (msp) 2325 record_begin_hold(msp); 2326 } 2327 } 2328 2329 void 2330 lmutex_unlock(mutex_t *mp) 2331 { 2332 ulwp_t *self = curthread; 2333 uberdata_t *udp = self->ul_uberdata; 2334 2335 ASSERT(mp->mutex_type == USYNC_THREAD); 2336 2337 /* 2338 * Optimize the case of no lock statistics and only a single thread. 2339 * (Most likely a traditional single-threaded application.) 2340 */ 2341 if (udp->uberflags.uf_all == 0) { 2342 /* 2343 * Only one thread exists so there can be no waiters. 2344 */ 2345 mp->mutex_owner = 0; 2346 mp->mutex_lockword = 0; 2347 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2348 } else { 2349 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2350 lwpid_t lwpid; 2351 2352 if (msp) 2353 (void) record_hold_time(msp); 2354 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2355 (void) __lwp_unpark(lwpid); 2356 preempt(self); 2357 } 2358 } 2359 exit_critical(self); 2360 } 2361 2362 /* 2363 * For specialized code in libc, like the asynchronous i/o code, 2364 * the following sig_*() locking primitives are used in order 2365 * to make the code asynchronous signal safe. Signals are 2366 * deferred while locks acquired by these functions are held. 2367 */ 2368 void 2369 sig_mutex_lock(mutex_t *mp) 2370 { 2371 sigoff(curthread); 2372 (void) _private_mutex_lock(mp); 2373 } 2374 2375 void 2376 sig_mutex_unlock(mutex_t *mp) 2377 { 2378 (void) _private_mutex_unlock(mp); 2379 sigon(curthread); 2380 } 2381 2382 int 2383 sig_mutex_trylock(mutex_t *mp) 2384 { 2385 int error; 2386 2387 sigoff(curthread); 2388 if ((error = _private_mutex_trylock(mp)) != 0) 2389 sigon(curthread); 2390 return (error); 2391 } 2392 2393 /* 2394 * sig_cond_wait() is a cancellation point. 2395 */ 2396 int 2397 sig_cond_wait(cond_t *cv, mutex_t *mp) 2398 { 2399 int error; 2400 2401 ASSERT(curthread->ul_sigdefer != 0); 2402 _private_testcancel(); 2403 error = __cond_wait(cv, mp); 2404 if (error == EINTR && curthread->ul_cursig) { 2405 sig_mutex_unlock(mp); 2406 /* take the deferred signal here */ 2407 sig_mutex_lock(mp); 2408 } 2409 _private_testcancel(); 2410 return (error); 2411 } 2412 2413 /* 2414 * sig_cond_reltimedwait() is a cancellation point. 2415 */ 2416 int 2417 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2418 { 2419 int error; 2420 2421 ASSERT(curthread->ul_sigdefer != 0); 2422 _private_testcancel(); 2423 error = __cond_reltimedwait(cv, mp, ts); 2424 if (error == EINTR && curthread->ul_cursig) { 2425 sig_mutex_unlock(mp); 2426 /* take the deferred signal here */ 2427 sig_mutex_lock(mp); 2428 } 2429 _private_testcancel(); 2430 return (error); 2431 } 2432 2433 /* 2434 * For specialized code in libc, like the stdio code. 2435 * the following cancel_safe_*() locking primitives are used in 2436 * order to make the code cancellation-safe. Cancellation is 2437 * deferred while locks acquired by these functions are held. 2438 */ 2439 void 2440 cancel_safe_mutex_lock(mutex_t *mp) 2441 { 2442 (void) _private_mutex_lock(mp); 2443 curthread->ul_libc_locks++; 2444 } 2445 2446 int 2447 cancel_safe_mutex_trylock(mutex_t *mp) 2448 { 2449 int error; 2450 2451 if ((error = _private_mutex_trylock(mp)) == 0) 2452 curthread->ul_libc_locks++; 2453 return (error); 2454 } 2455 2456 void 2457 cancel_safe_mutex_unlock(mutex_t *mp) 2458 { 2459 ulwp_t *self = curthread; 2460 2461 ASSERT(self->ul_libc_locks != 0); 2462 2463 (void) _private_mutex_unlock(mp); 2464 2465 /* 2466 * Decrement the count of locks held by cancel_safe_mutex_lock(). 2467 * If we are then in a position to terminate cleanly and 2468 * if there is a pending cancellation and cancellation 2469 * is not disabled and we received EINTR from a recent 2470 * system call then perform the cancellation action now. 2471 */ 2472 if (--self->ul_libc_locks == 0 && 2473 !(self->ul_vfork | self->ul_nocancel | 2474 self->ul_critical | self->ul_sigdefer) && 2475 cancel_active()) 2476 _pthread_exit(PTHREAD_CANCELED); 2477 } 2478 2479 static int 2480 shared_mutex_held(mutex_t *mparg) 2481 { 2482 /* 2483 * The 'volatile' is necessary to make sure the compiler doesn't 2484 * reorder the tests of the various components of the mutex. 2485 * They must be tested in this order: 2486 * mutex_lockw 2487 * mutex_owner 2488 * mutex_ownerpid 2489 * This relies on the fact that everywhere mutex_lockw is cleared, 2490 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2491 * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2492 * and mutex_ownerpid are set after mutex_lockw is set, and that 2493 * mutex_lockw is set or cleared with a memory barrier. 2494 */ 2495 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2496 ulwp_t *self = curthread; 2497 uberdata_t *udp = self->ul_uberdata; 2498 2499 return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 2500 } 2501 2502 /* 2503 * Some crufty old programs define their own version of _mutex_held() 2504 * to be simply return(1). This breaks internal libc logic, so we 2505 * define a private version for exclusive use by libc, mutex_is_held(), 2506 * and also a new public function, __mutex_held(), to be used in new 2507 * code to circumvent these crufty old programs. 2508 */ 2509 #pragma weak mutex_held = mutex_is_held 2510 #pragma weak _mutex_held = mutex_is_held 2511 #pragma weak __mutex_held = mutex_is_held 2512 int 2513 mutex_is_held(mutex_t *mparg) 2514 { 2515 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2516 2517 if (mparg->mutex_type & USYNC_PROCESS) 2518 return (shared_mutex_held(mparg)); 2519 return (MUTEX_OWNED(mp, curthread)); 2520 } 2521 2522 #pragma weak _private_mutex_destroy = __mutex_destroy 2523 #pragma weak mutex_destroy = __mutex_destroy 2524 #pragma weak _mutex_destroy = __mutex_destroy 2525 #pragma weak pthread_mutex_destroy = __mutex_destroy 2526 #pragma weak _pthread_mutex_destroy = __mutex_destroy 2527 int 2528 __mutex_destroy(mutex_t *mp) 2529 { 2530 if (mp->mutex_type & USYNC_PROCESS) 2531 forget_lock(mp); 2532 (void) _memset(mp, 0, sizeof (*mp)); 2533 tdb_sync_obj_deregister(mp); 2534 return (0); 2535 } 2536 2537 #pragma weak mutex_consistent = __mutex_consistent 2538 #pragma weak _mutex_consistent = __mutex_consistent 2539 #pragma weak pthread_mutex_consistent_np = __mutex_consistent 2540 #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 2541 int 2542 __mutex_consistent(mutex_t *mp) 2543 { 2544 /* 2545 * Do this only for an inconsistent, initialized robust lock 2546 * that we hold. For all other cases, return EINVAL. 2547 */ 2548 if (mutex_is_held(mp) && 2549 (mp->mutex_type & LOCK_ROBUST) && 2550 (mp->mutex_flag & LOCK_INITED) && 2551 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2552 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2553 mp->mutex_rcount = 0; 2554 return (0); 2555 } 2556 return (EINVAL); 2557 } 2558 2559 /* 2560 * Spin locks are separate from ordinary mutexes, 2561 * but we use the same data structure for them. 2562 */ 2563 2564 #pragma weak pthread_spin_init = _pthread_spin_init 2565 int 2566 _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 2567 { 2568 mutex_t *mp = (mutex_t *)lock; 2569 2570 (void) _memset(mp, 0, sizeof (*mp)); 2571 if (pshared == PTHREAD_PROCESS_SHARED) 2572 mp->mutex_type = USYNC_PROCESS; 2573 else 2574 mp->mutex_type = USYNC_THREAD; 2575 mp->mutex_flag = LOCK_INITED; 2576 mp->mutex_magic = MUTEX_MAGIC; 2577 return (0); 2578 } 2579 2580 #pragma weak pthread_spin_destroy = _pthread_spin_destroy 2581 int 2582 _pthread_spin_destroy(pthread_spinlock_t *lock) 2583 { 2584 (void) _memset(lock, 0, sizeof (*lock)); 2585 return (0); 2586 } 2587 2588 #pragma weak pthread_spin_trylock = _pthread_spin_trylock 2589 int 2590 _pthread_spin_trylock(pthread_spinlock_t *lock) 2591 { 2592 mutex_t *mp = (mutex_t *)lock; 2593 ulwp_t *self = curthread; 2594 int error = 0; 2595 2596 no_preempt(self); 2597 if (set_lock_byte(&mp->mutex_lockw) != 0) 2598 error = EBUSY; 2599 else { 2600 mp->mutex_owner = (uintptr_t)self; 2601 if (mp->mutex_type == USYNC_PROCESS) 2602 mp->mutex_ownerpid = self->ul_uberdata->pid; 2603 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2604 } 2605 preempt(self); 2606 return (error); 2607 } 2608 2609 #pragma weak pthread_spin_lock = _pthread_spin_lock 2610 int 2611 _pthread_spin_lock(pthread_spinlock_t *lock) 2612 { 2613 mutex_t *mp = (mutex_t *)lock; 2614 ulwp_t *self = curthread; 2615 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2616 int count = 0; 2617 2618 ASSERT(!self->ul_critical || self->ul_bindflags); 2619 2620 DTRACE_PROBE1(plockstat, mutex__spin, mp); 2621 2622 /* 2623 * We don't care whether the owner is running on a processor. 2624 * We just spin because that's what this interface requires. 2625 */ 2626 for (;;) { 2627 if (*lockp == 0) { /* lock byte appears to be clear */ 2628 no_preempt(self); 2629 if (set_lock_byte(lockp) == 0) 2630 break; 2631 preempt(self); 2632 } 2633 if (count < INT_MAX) 2634 count++; 2635 SMT_PAUSE(); 2636 } 2637 mp->mutex_owner = (uintptr_t)self; 2638 if (mp->mutex_type == USYNC_PROCESS) 2639 mp->mutex_ownerpid = self->ul_uberdata->pid; 2640 preempt(self); 2641 if (count) { 2642 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 2643 } 2644 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 2645 return (0); 2646 } 2647 2648 #pragma weak pthread_spin_unlock = _pthread_spin_unlock 2649 int 2650 _pthread_spin_unlock(pthread_spinlock_t *lock) 2651 { 2652 mutex_t *mp = (mutex_t *)lock; 2653 ulwp_t *self = curthread; 2654 2655 no_preempt(self); 2656 mp->mutex_owner = 0; 2657 mp->mutex_ownerpid = 0; 2658 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2659 (void) atomic_swap_32(&mp->mutex_lockword, 0); 2660 preempt(self); 2661 return (0); 2662 } 2663 2664 #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 2665 2666 /* 2667 * Find/allocate an entry for 'lock' in our array of held locks. 2668 */ 2669 static mutex_t ** 2670 find_lock_entry(mutex_t *lock) 2671 { 2672 ulwp_t *self = curthread; 2673 mutex_t **remembered = NULL; 2674 mutex_t **lockptr; 2675 uint_t nlocks; 2676 2677 if ((nlocks = self->ul_heldlockcnt) != 0) 2678 lockptr = self->ul_heldlocks.array; 2679 else { 2680 nlocks = 1; 2681 lockptr = &self->ul_heldlocks.single; 2682 } 2683 2684 for (; nlocks; nlocks--, lockptr++) { 2685 if (*lockptr == lock) 2686 return (lockptr); 2687 if (*lockptr == NULL && remembered == NULL) 2688 remembered = lockptr; 2689 } 2690 if (remembered != NULL) { 2691 *remembered = lock; 2692 return (remembered); 2693 } 2694 2695 /* 2696 * No entry available. Allocate more space, converting 2697 * the single entry into an array of entries if necessary. 2698 */ 2699 if ((nlocks = self->ul_heldlockcnt) == 0) { 2700 /* 2701 * Initial allocation of the array. 2702 * Convert the single entry into an array. 2703 */ 2704 self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 2705 lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 2706 /* 2707 * The single entry becomes the first entry in the array. 2708 */ 2709 *lockptr = self->ul_heldlocks.single; 2710 self->ul_heldlocks.array = lockptr; 2711 /* 2712 * Return the next available entry in the array. 2713 */ 2714 *++lockptr = lock; 2715 return (lockptr); 2716 } 2717 /* 2718 * Reallocate the array, double the size each time. 2719 */ 2720 lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 2721 (void) _memcpy(lockptr, self->ul_heldlocks.array, 2722 nlocks * sizeof (mutex_t *)); 2723 lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2724 self->ul_heldlocks.array = lockptr; 2725 self->ul_heldlockcnt *= 2; 2726 /* 2727 * Return the next available entry in the newly allocated array. 2728 */ 2729 *(lockptr += nlocks) = lock; 2730 return (lockptr); 2731 } 2732 2733 /* 2734 * Insert 'lock' into our list of held locks. 2735 * Currently only used for LOCK_ROBUST mutexes. 2736 */ 2737 void 2738 remember_lock(mutex_t *lock) 2739 { 2740 (void) find_lock_entry(lock); 2741 } 2742 2743 /* 2744 * Remove 'lock' from our list of held locks. 2745 * Currently only used for LOCK_ROBUST mutexes. 2746 */ 2747 void 2748 forget_lock(mutex_t *lock) 2749 { 2750 *find_lock_entry(lock) = NULL; 2751 } 2752 2753 /* 2754 * Free the array of held locks. 2755 */ 2756 void 2757 heldlock_free(ulwp_t *ulwp) 2758 { 2759 uint_t nlocks; 2760 2761 if ((nlocks = ulwp->ul_heldlockcnt) != 0) 2762 lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2763 ulwp->ul_heldlockcnt = 0; 2764 ulwp->ul_heldlocks.array = NULL; 2765 } 2766 2767 /* 2768 * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 2769 * Called from _thrp_exit() to deal with abandoned locks. 2770 */ 2771 void 2772 heldlock_exit(void) 2773 { 2774 ulwp_t *self = curthread; 2775 mutex_t **lockptr; 2776 uint_t nlocks; 2777 mutex_t *mp; 2778 2779 if ((nlocks = self->ul_heldlockcnt) != 0) 2780 lockptr = self->ul_heldlocks.array; 2781 else { 2782 nlocks = 1; 2783 lockptr = &self->ul_heldlocks.single; 2784 } 2785 2786 for (; nlocks; nlocks--, lockptr++) { 2787 /* 2788 * The kernel takes care of transitioning held 2789 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 2790 * We avoid that case here. 2791 */ 2792 if ((mp = *lockptr) != NULL && 2793 mutex_is_held(mp) && 2794 (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 2795 LOCK_ROBUST) { 2796 mp->mutex_rcount = 0; 2797 if (!(mp->mutex_flag & LOCK_UNMAPPED)) 2798 mp->mutex_flag |= LOCK_OWNERDEAD; 2799 (void) mutex_unlock_internal(mp, 1); 2800 } 2801 } 2802 2803 heldlock_free(self); 2804 } 2805 2806 #pragma weak cond_init = _cond_init 2807 /* ARGSUSED2 */ 2808 int 2809 _cond_init(cond_t *cvp, int type, void *arg) 2810 { 2811 if (type != USYNC_THREAD && type != USYNC_PROCESS) 2812 return (EINVAL); 2813 (void) _memset(cvp, 0, sizeof (*cvp)); 2814 cvp->cond_type = (uint16_t)type; 2815 cvp->cond_magic = COND_MAGIC; 2816 return (0); 2817 } 2818 2819 /* 2820 * cond_sleep_queue(): utility function for cond_wait_queue(). 2821 * 2822 * Go to sleep on a condvar sleep queue, expect to be waked up 2823 * by someone calling cond_signal() or cond_broadcast() or due 2824 * to receiving a UNIX signal or being cancelled, or just simply 2825 * due to a spurious wakeup (like someome calling forkall()). 2826 * 2827 * The associated mutex is *not* reacquired before returning. 2828 * That must be done by the caller of cond_sleep_queue(). 2829 */ 2830 static int 2831 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2832 { 2833 ulwp_t *self = curthread; 2834 queue_head_t *qp; 2835 queue_head_t *mqp; 2836 lwpid_t lwpid; 2837 int signalled; 2838 int error; 2839 int release_all; 2840 2841 /* 2842 * Put ourself on the CV sleep queue, unlock the mutex, then 2843 * park ourself and unpark a candidate lwp to grab the mutex. 2844 * We must go onto the CV sleep queue before dropping the 2845 * mutex in order to guarantee atomicity of the operation. 2846 */ 2847 self->ul_sp = stkptr(); 2848 qp = queue_lock(cvp, CV); 2849 enqueue(qp, self, cvp, CV); 2850 cvp->cond_waiters_user = 1; 2851 self->ul_cvmutex = mp; 2852 self->ul_cv_wake = (tsp != NULL); 2853 self->ul_signalled = 0; 2854 if (mp->mutex_flag & LOCK_OWNERDEAD) { 2855 mp->mutex_flag &= ~LOCK_OWNERDEAD; 2856 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2857 } 2858 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2859 lwpid = mutex_unlock_queue(mp, release_all); 2860 for (;;) { 2861 set_parking_flag(self, 1); 2862 queue_unlock(qp); 2863 if (lwpid != 0) { 2864 lwpid = preempt_unpark(self, lwpid); 2865 preempt(self); 2866 } 2867 /* 2868 * We may have a deferred signal present, 2869 * in which case we should return EINTR. 2870 * Also, we may have received a SIGCANCEL; if so 2871 * and we are cancelable we should return EINTR. 2872 * We force an immediate EINTR return from 2873 * __lwp_park() by turning our parking flag off. 2874 */ 2875 if (self->ul_cursig != 0 || 2876 (self->ul_cancelable && self->ul_cancel_pending)) 2877 set_parking_flag(self, 0); 2878 /* 2879 * __lwp_park() will return the residual time in tsp 2880 * if we are unparked before the timeout expires. 2881 */ 2882 error = __lwp_park(tsp, lwpid); 2883 set_parking_flag(self, 0); 2884 lwpid = 0; /* unpark the other lwp only once */ 2885 /* 2886 * We were waked up by cond_signal(), cond_broadcast(), 2887 * by an interrupt or timeout (EINTR or ETIME), 2888 * or we may just have gotten a spurious wakeup. 2889 */ 2890 qp = queue_lock(cvp, CV); 2891 mqp = queue_lock(mp, MX); 2892 if (self->ul_sleepq == NULL) 2893 break; 2894 /* 2895 * We are on either the condvar sleep queue or the 2896 * mutex sleep queue. Break out of the sleep if we 2897 * were interrupted or we timed out (EINTR or ETIME). 2898 * Else this is a spurious wakeup; continue the loop. 2899 */ 2900 if (self->ul_sleepq == mqp) { /* mutex queue */ 2901 if (error) { 2902 mp->mutex_waiters = dequeue_self(mqp, mp); 2903 break; 2904 } 2905 tsp = NULL; /* no more timeout */ 2906 } else if (self->ul_sleepq == qp) { /* condvar queue */ 2907 if (error) { 2908 cvp->cond_waiters_user = dequeue_self(qp, cvp); 2909 break; 2910 } 2911 /* 2912 * Else a spurious wakeup on the condvar queue. 2913 * __lwp_park() has already adjusted the timeout. 2914 */ 2915 } else { 2916 thr_panic("cond_sleep_queue(): thread not on queue"); 2917 } 2918 queue_unlock(mqp); 2919 } 2920 2921 self->ul_sp = 0; 2922 ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0); 2923 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 2924 self->ul_wchan == NULL); 2925 2926 signalled = self->ul_signalled; 2927 self->ul_signalled = 0; 2928 queue_unlock(qp); 2929 queue_unlock(mqp); 2930 2931 /* 2932 * If we were concurrently cond_signal()d and any of: 2933 * received a UNIX signal, were cancelled, or got a timeout, 2934 * then perform another cond_signal() to avoid consuming it. 2935 */ 2936 if (error && signalled) 2937 (void) cond_signal_internal(cvp); 2938 2939 return (error); 2940 } 2941 2942 int 2943 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2944 { 2945 ulwp_t *self = curthread; 2946 int error; 2947 int merror; 2948 2949 /* 2950 * The old thread library was programmed to defer signals 2951 * while in cond_wait() so that the associated mutex would 2952 * be guaranteed to be held when the application signal 2953 * handler was invoked. 2954 * 2955 * We do not behave this way by default; the state of the 2956 * associated mutex in the signal handler is undefined. 2957 * 2958 * To accommodate applications that depend on the old 2959 * behavior, the _THREAD_COND_WAIT_DEFER environment 2960 * variable can be set to 1 and we will behave in the 2961 * old way with respect to cond_wait(). 2962 */ 2963 if (self->ul_cond_wait_defer) 2964 sigoff(self); 2965 2966 error = cond_sleep_queue(cvp, mp, tsp); 2967 2968 /* 2969 * Reacquire the mutex. 2970 */ 2971 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 2972 error = merror; 2973 2974 /* 2975 * Take any deferred signal now, after we have reacquired the mutex. 2976 */ 2977 if (self->ul_cond_wait_defer) 2978 sigon(self); 2979 2980 return (error); 2981 } 2982 2983 /* 2984 * cond_sleep_kernel(): utility function for cond_wait_kernel(). 2985 * See the comment ahead of cond_sleep_queue(), above. 2986 */ 2987 static int 2988 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2989 { 2990 int mtype = mp->mutex_type; 2991 ulwp_t *self = curthread; 2992 int error; 2993 2994 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2995 _ceil_prio_waive(); 2996 2997 self->ul_sp = stkptr(); 2998 self->ul_wchan = cvp; 2999 mp->mutex_owner = 0; 3000 mp->mutex_ownerpid = 0; 3001 if (mtype & LOCK_PRIO_INHERIT) 3002 mp->mutex_lockw = LOCKCLEAR; 3003 /* 3004 * ___lwp_cond_wait() returns immediately with EINTR if 3005 * set_parking_flag(self,0) is called on this lwp before it 3006 * goes to sleep in the kernel. sigacthandler() calls this 3007 * when a deferred signal is noted. This assures that we don't 3008 * get stuck in ___lwp_cond_wait() with all signals blocked 3009 * due to taking a deferred signal before going to sleep. 3010 */ 3011 set_parking_flag(self, 1); 3012 if (self->ul_cursig != 0 || 3013 (self->ul_cancelable && self->ul_cancel_pending)) 3014 set_parking_flag(self, 0); 3015 error = ___lwp_cond_wait(cvp, mp, tsp, 1); 3016 set_parking_flag(self, 0); 3017 self->ul_sp = 0; 3018 self->ul_wchan = NULL; 3019 return (error); 3020 } 3021 3022 int 3023 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3024 { 3025 ulwp_t *self = curthread; 3026 int error; 3027 int merror; 3028 3029 /* 3030 * See the large comment in cond_wait_queue(), above. 3031 */ 3032 if (self->ul_cond_wait_defer) 3033 sigoff(self); 3034 3035 error = cond_sleep_kernel(cvp, mp, tsp); 3036 3037 /* 3038 * Override the return code from ___lwp_cond_wait() 3039 * with any non-zero return code from mutex_lock(). 3040 * This addresses robust lock failures in particular; 3041 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 3042 * errors in order to take corrective action. 3043 */ 3044 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3045 error = merror; 3046 3047 /* 3048 * Take any deferred signal now, after we have reacquired the mutex. 3049 */ 3050 if (self->ul_cond_wait_defer) 3051 sigon(self); 3052 3053 return (error); 3054 } 3055 3056 /* 3057 * Common code for _cond_wait() and _cond_timedwait() 3058 */ 3059 int 3060 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3061 { 3062 int mtype = mp->mutex_type; 3063 hrtime_t begin_sleep = 0; 3064 ulwp_t *self = curthread; 3065 uberdata_t *udp = self->ul_uberdata; 3066 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3067 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 3068 uint8_t rcount; 3069 int error = 0; 3070 3071 /* 3072 * The SUSV3 Posix spec for pthread_cond_timedwait() states: 3073 * Except in the case of [ETIMEDOUT], all these error checks 3074 * shall act as if they were performed immediately at the 3075 * beginning of processing for the function and shall cause 3076 * an error return, in effect, prior to modifying the state 3077 * of the mutex specified by mutex or the condition variable 3078 * specified by cond. 3079 * Therefore, we must return EINVAL now if the timout is invalid. 3080 */ 3081 if (tsp != NULL && 3082 (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 3083 return (EINVAL); 3084 3085 if (__td_event_report(self, TD_SLEEP, udp)) { 3086 self->ul_sp = stkptr(); 3087 self->ul_wchan = cvp; 3088 self->ul_td_evbuf.eventnum = TD_SLEEP; 3089 self->ul_td_evbuf.eventdata = cvp; 3090 tdb_event(TD_SLEEP, udp); 3091 self->ul_sp = 0; 3092 } 3093 if (csp) { 3094 if (tsp) 3095 tdb_incr(csp->cond_timedwait); 3096 else 3097 tdb_incr(csp->cond_wait); 3098 } 3099 if (msp) 3100 begin_sleep = record_hold_time(msp); 3101 else if (csp) 3102 begin_sleep = gethrtime(); 3103 3104 if (self->ul_error_detection) { 3105 if (!mutex_is_held(mp)) 3106 lock_error(mp, "cond_wait", cvp, NULL); 3107 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 3108 lock_error(mp, "recursive mutex in cond_wait", 3109 cvp, NULL); 3110 if (cvp->cond_type & USYNC_PROCESS) { 3111 if (!(mtype & USYNC_PROCESS)) 3112 lock_error(mp, "cond_wait", cvp, 3113 "condvar process-shared, " 3114 "mutex process-private"); 3115 } else { 3116 if (mtype & USYNC_PROCESS) 3117 lock_error(mp, "cond_wait", cvp, 3118 "condvar process-private, " 3119 "mutex process-shared"); 3120 } 3121 } 3122 3123 /* 3124 * We deal with recursive mutexes by completely 3125 * dropping the lock and restoring the recursion 3126 * count after waking up. This is arguably wrong, 3127 * but it obeys the principle of least astonishment. 3128 */ 3129 rcount = mp->mutex_rcount; 3130 mp->mutex_rcount = 0; 3131 if ((mtype & 3132 (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 3133 (cvp->cond_type & USYNC_PROCESS)) 3134 error = cond_wait_kernel(cvp, mp, tsp); 3135 else 3136 error = cond_wait_queue(cvp, mp, tsp); 3137 mp->mutex_rcount = rcount; 3138 3139 if (csp) { 3140 hrtime_t lapse = gethrtime() - begin_sleep; 3141 if (tsp == NULL) 3142 csp->cond_wait_sleep_time += lapse; 3143 else { 3144 csp->cond_timedwait_sleep_time += lapse; 3145 if (error == ETIME) 3146 tdb_incr(csp->cond_timedwait_timeout); 3147 } 3148 } 3149 return (error); 3150 } 3151 3152 /* 3153 * cond_wait() and _cond_wait() are cancellation points but __cond_wait() 3154 * is not. Internally, libc calls the non-cancellation version. 3155 * Other libraries need to use pthread_setcancelstate(), as appropriate, 3156 * since __cond_wait() is not exported from libc. 3157 */ 3158 int 3159 __cond_wait(cond_t *cvp, mutex_t *mp) 3160 { 3161 ulwp_t *self = curthread; 3162 uberdata_t *udp = self->ul_uberdata; 3163 uberflags_t *gflags; 3164 3165 /* 3166 * Optimize the common case of USYNC_THREAD plus 3167 * no error detection, no lock statistics, and no event tracing. 3168 */ 3169 if ((gflags = self->ul_schedctl_called) != NULL && 3170 (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 3171 self->ul_td_events_enable | 3172 udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3173 return (cond_wait_queue(cvp, mp, NULL)); 3174 3175 /* 3176 * Else do it the long way. 3177 */ 3178 return (cond_wait_common(cvp, mp, NULL)); 3179 } 3180 3181 #pragma weak cond_wait = _cond_wait 3182 int 3183 _cond_wait(cond_t *cvp, mutex_t *mp) 3184 { 3185 int error; 3186 3187 _cancelon(); 3188 error = __cond_wait(cvp, mp); 3189 if (error == EINTR) 3190 _canceloff(); 3191 else 3192 _canceloff_nocancel(); 3193 return (error); 3194 } 3195 3196 /* 3197 * pthread_cond_wait() is a cancellation point. 3198 */ 3199 #pragma weak pthread_cond_wait = _pthread_cond_wait 3200 int 3201 _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 3202 { 3203 int error; 3204 3205 error = _cond_wait(cvp, mp); 3206 return ((error == EINTR)? 0 : error); 3207 } 3208 3209 /* 3210 * cond_timedwait() and _cond_timedwait() are cancellation points 3211 * but __cond_timedwait() is not. 3212 */ 3213 int 3214 __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3215 { 3216 clockid_t clock_id = cvp->cond_clockid; 3217 timespec_t reltime; 3218 int error; 3219 3220 if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 3221 clock_id = CLOCK_REALTIME; 3222 abstime_to_reltime(clock_id, abstime, &reltime); 3223 error = cond_wait_common(cvp, mp, &reltime); 3224 if (error == ETIME && clock_id == CLOCK_HIGHRES) { 3225 /* 3226 * Don't return ETIME if we didn't really get a timeout. 3227 * This can happen if we return because someone resets 3228 * the system clock. Just return zero in this case, 3229 * giving a spurious wakeup but not a timeout. 3230 */ 3231 if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 3232 abstime->tv_nsec > gethrtime()) 3233 error = 0; 3234 } 3235 return (error); 3236 } 3237 3238 #pragma weak cond_timedwait = _cond_timedwait 3239 int 3240 _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3241 { 3242 int error; 3243 3244 _cancelon(); 3245 error = __cond_timedwait(cvp, mp, abstime); 3246 if (error == EINTR) 3247 _canceloff(); 3248 else 3249 _canceloff_nocancel(); 3250 return (error); 3251 } 3252 3253 /* 3254 * pthread_cond_timedwait() is a cancellation point. 3255 */ 3256 #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 3257 int 3258 _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3259 { 3260 int error; 3261 3262 error = _cond_timedwait(cvp, mp, abstime); 3263 if (error == ETIME) 3264 error = ETIMEDOUT; 3265 else if (error == EINTR) 3266 error = 0; 3267 return (error); 3268 } 3269 3270 /* 3271 * cond_reltimedwait() and _cond_reltimedwait() are cancellation points 3272 * but __cond_reltimedwait() is not. 3273 */ 3274 int 3275 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3276 { 3277 timespec_t tslocal = *reltime; 3278 3279 return (cond_wait_common(cvp, mp, &tslocal)); 3280 } 3281 3282 #pragma weak cond_reltimedwait = _cond_reltimedwait 3283 int 3284 _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3285 { 3286 int error; 3287 3288 _cancelon(); 3289 error = __cond_reltimedwait(cvp, mp, reltime); 3290 if (error == EINTR) 3291 _canceloff(); 3292 else 3293 _canceloff_nocancel(); 3294 return (error); 3295 } 3296 3297 #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 3298 int 3299 _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 3300 const timespec_t *reltime) 3301 { 3302 int error; 3303 3304 error = _cond_reltimedwait(cvp, mp, reltime); 3305 if (error == ETIME) 3306 error = ETIMEDOUT; 3307 else if (error == EINTR) 3308 error = 0; 3309 return (error); 3310 } 3311 3312 #pragma weak pthread_cond_signal = cond_signal_internal 3313 #pragma weak _pthread_cond_signal = cond_signal_internal 3314 #pragma weak cond_signal = cond_signal_internal 3315 #pragma weak _cond_signal = cond_signal_internal 3316 int 3317 cond_signal_internal(cond_t *cvp) 3318 { 3319 ulwp_t *self = curthread; 3320 uberdata_t *udp = self->ul_uberdata; 3321 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3322 int error = 0; 3323 queue_head_t *qp; 3324 mutex_t *mp; 3325 queue_head_t *mqp; 3326 ulwp_t **ulwpp; 3327 ulwp_t *ulwp; 3328 ulwp_t *prev = NULL; 3329 ulwp_t *next; 3330 ulwp_t **suspp = NULL; 3331 ulwp_t *susprev; 3332 3333 if (csp) 3334 tdb_incr(csp->cond_signal); 3335 3336 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3337 error = __lwp_cond_signal(cvp); 3338 3339 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3340 return (error); 3341 3342 /* 3343 * Move someone from the condvar sleep queue to the mutex sleep 3344 * queue for the mutex that he will acquire on being waked up. 3345 * We can do this only if we own the mutex he will acquire. 3346 * If we do not own the mutex, or if his ul_cv_wake flag 3347 * is set, just dequeue and unpark him. 3348 */ 3349 qp = queue_lock(cvp, CV); 3350 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 3351 prev = ulwp, ulwpp = &ulwp->ul_link) { 3352 if (ulwp->ul_wchan == cvp) { 3353 if (!ulwp->ul_stop) 3354 break; 3355 /* 3356 * Try not to dequeue a suspended thread. 3357 * This mimics the old libthread's behavior. 3358 */ 3359 if (suspp == NULL) { 3360 suspp = ulwpp; 3361 susprev = prev; 3362 } 3363 } 3364 } 3365 if (ulwp == NULL && suspp != NULL) { 3366 ulwp = *(ulwpp = suspp); 3367 prev = susprev; 3368 suspp = NULL; 3369 } 3370 if (ulwp == NULL) { /* no one on the sleep queue */ 3371 cvp->cond_waiters_user = 0; 3372 queue_unlock(qp); 3373 return (error); 3374 } 3375 /* 3376 * Scan the remainder of the CV queue for another waiter. 3377 */ 3378 if (suspp != NULL) { 3379 next = *suspp; 3380 } else { 3381 for (next = ulwp->ul_link; next != NULL; next = next->ul_link) 3382 if (next->ul_wchan == cvp) 3383 break; 3384 } 3385 if (next == NULL) 3386 cvp->cond_waiters_user = 0; 3387 3388 /* 3389 * Inform the thread that he was the recipient of a cond_signal(). 3390 * This lets him deal with cond_signal() and, concurrently, 3391 * one or more of a cancellation, a UNIX signal, or a timeout. 3392 * These latter conditions must not consume a cond_signal(). 3393 */ 3394 ulwp->ul_signalled = 1; 3395 3396 /* 3397 * Dequeue the waiter but leave his ul_sleepq non-NULL 3398 * while we move him to the mutex queue so that he can 3399 * deal properly with spurious wakeups. 3400 */ 3401 *ulwpp = ulwp->ul_link; 3402 ulwp->ul_link = NULL; 3403 if (qp->qh_tail == ulwp) 3404 qp->qh_tail = prev; 3405 qp->qh_qlen--; 3406 3407 mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 3408 ulwp->ul_cvmutex = NULL; 3409 ASSERT(mp != NULL); 3410 3411 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3412 lwpid_t lwpid = ulwp->ul_lwpid; 3413 3414 no_preempt(self); 3415 ulwp->ul_sleepq = NULL; 3416 ulwp->ul_wchan = NULL; 3417 ulwp->ul_cv_wake = 0; 3418 queue_unlock(qp); 3419 (void) __lwp_unpark(lwpid); 3420 preempt(self); 3421 } else { 3422 mqp = queue_lock(mp, MX); 3423 enqueue(mqp, ulwp, mp, MX); 3424 mp->mutex_waiters = 1; 3425 queue_unlock(mqp); 3426 queue_unlock(qp); 3427 } 3428 3429 return (error); 3430 } 3431 3432 /* 3433 * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3434 * and rw_queue_release() to (re)allocate a big buffer to hold the 3435 * lwpids of all the threads to be set running after they are removed 3436 * from their sleep queues. Since we are holding a queue lock, we 3437 * cannot call any function that might acquire a lock. mmap(), munmap(), 3438 * lwp_unpark_all() are simple system calls and are safe in this regard. 3439 */ 3440 lwpid_t * 3441 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 3442 { 3443 /* 3444 * Allocate NEWLWPS ids on the first overflow. 3445 * Double the allocation each time after that. 3446 */ 3447 int nlwpid = *nlwpid_ptr; 3448 int maxlwps = *maxlwps_ptr; 3449 int first_allocation; 3450 int newlwps; 3451 void *vaddr; 3452 3453 ASSERT(nlwpid == maxlwps); 3454 3455 first_allocation = (maxlwps == MAXLWPS); 3456 newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 3457 vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t), 3458 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 3459 3460 if (vaddr == MAP_FAILED) { 3461 /* 3462 * Let's hope this never happens. 3463 * If it does, then we have a terrible 3464 * thundering herd on our hands. 3465 */ 3466 (void) __lwp_unpark_all(lwpid, nlwpid); 3467 *nlwpid_ptr = 0; 3468 } else { 3469 (void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 3470 if (!first_allocation) 3471 (void) _private_munmap(lwpid, 3472 maxlwps * sizeof (lwpid_t)); 3473 lwpid = vaddr; 3474 *maxlwps_ptr = newlwps; 3475 } 3476 3477 return (lwpid); 3478 } 3479 3480 #pragma weak pthread_cond_broadcast = cond_broadcast_internal 3481 #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 3482 #pragma weak cond_broadcast = cond_broadcast_internal 3483 #pragma weak _cond_broadcast = cond_broadcast_internal 3484 int 3485 cond_broadcast_internal(cond_t *cvp) 3486 { 3487 ulwp_t *self = curthread; 3488 uberdata_t *udp = self->ul_uberdata; 3489 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3490 int error = 0; 3491 queue_head_t *qp; 3492 mutex_t *mp; 3493 mutex_t *mp_cache = NULL; 3494 queue_head_t *mqp = NULL; 3495 ulwp_t **ulwpp; 3496 ulwp_t *ulwp; 3497 ulwp_t *prev = NULL; 3498 int nlwpid = 0; 3499 int maxlwps = MAXLWPS; 3500 lwpid_t buffer[MAXLWPS]; 3501 lwpid_t *lwpid = buffer; 3502 3503 if (csp) 3504 tdb_incr(csp->cond_broadcast); 3505 3506 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3507 error = __lwp_cond_broadcast(cvp); 3508 3509 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3510 return (error); 3511 3512 /* 3513 * Move everyone from the condvar sleep queue to the mutex sleep 3514 * queue for the mutex that they will acquire on being waked up. 3515 * We can do this only if we own the mutex they will acquire. 3516 * If we do not own the mutex, or if their ul_cv_wake flag 3517 * is set, just dequeue and unpark them. 3518 * 3519 * We keep track of lwpids that are to be unparked in lwpid[]. 3520 * __lwp_unpark_all() is called to unpark all of them after 3521 * they have been removed from the sleep queue and the sleep 3522 * queue lock has been dropped. If we run out of space in our 3523 * on-stack buffer, we need to allocate more but we can't call 3524 * lmalloc() because we are holding a queue lock when the overflow 3525 * occurs and lmalloc() acquires a lock. We can't use alloca() 3526 * either because the application may have allocated a small 3527 * stack and we don't want to overrun the stack. So we call 3528 * alloc_lwpids() to allocate a bigger buffer using the mmap() 3529 * system call directly since that path acquires no locks. 3530 */ 3531 qp = queue_lock(cvp, CV); 3532 cvp->cond_waiters_user = 0; 3533 ulwpp = &qp->qh_head; 3534 while ((ulwp = *ulwpp) != NULL) { 3535 if (ulwp->ul_wchan != cvp) { 3536 prev = ulwp; 3537 ulwpp = &ulwp->ul_link; 3538 continue; 3539 } 3540 *ulwpp = ulwp->ul_link; 3541 ulwp->ul_link = NULL; 3542 if (qp->qh_tail == ulwp) 3543 qp->qh_tail = prev; 3544 qp->qh_qlen--; 3545 mp = ulwp->ul_cvmutex; /* his mutex */ 3546 ulwp->ul_cvmutex = NULL; 3547 ASSERT(mp != NULL); 3548 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3549 ulwp->ul_sleepq = NULL; 3550 ulwp->ul_wchan = NULL; 3551 ulwp->ul_cv_wake = 0; 3552 if (nlwpid == maxlwps) 3553 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 3554 lwpid[nlwpid++] = ulwp->ul_lwpid; 3555 } else { 3556 if (mp != mp_cache) { 3557 mp_cache = mp; 3558 if (mqp != NULL) 3559 queue_unlock(mqp); 3560 mqp = queue_lock(mp, MX); 3561 } 3562 enqueue(mqp, ulwp, mp, MX); 3563 mp->mutex_waiters = 1; 3564 } 3565 } 3566 if (mqp != NULL) 3567 queue_unlock(mqp); 3568 if (nlwpid == 0) { 3569 queue_unlock(qp); 3570 } else { 3571 no_preempt(self); 3572 queue_unlock(qp); 3573 if (nlwpid == 1) 3574 (void) __lwp_unpark(lwpid[0]); 3575 else 3576 (void) __lwp_unpark_all(lwpid, nlwpid); 3577 preempt(self); 3578 } 3579 if (lwpid != buffer) 3580 (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 3581 return (error); 3582 } 3583 3584 #pragma weak pthread_cond_destroy = _cond_destroy 3585 #pragma weak _pthread_cond_destroy = _cond_destroy 3586 #pragma weak cond_destroy = _cond_destroy 3587 int 3588 _cond_destroy(cond_t *cvp) 3589 { 3590 cvp->cond_magic = 0; 3591 tdb_sync_obj_deregister(cvp); 3592 return (0); 3593 } 3594 3595 #if defined(THREAD_DEBUG) 3596 void 3597 assert_no_libc_locks_held(void) 3598 { 3599 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 3600 } 3601 #endif 3602 3603 /* protected by link_lock */ 3604 uint64_t spin_lock_spin; 3605 uint64_t spin_lock_spin2; 3606 uint64_t spin_lock_sleep; 3607 uint64_t spin_lock_wakeup; 3608 3609 /* 3610 * Record spin lock statistics. 3611 * Called by a thread exiting itself in thrp_exit(). 3612 * Also called via atexit() from the thread calling 3613 * exit() to do all the other threads as well. 3614 */ 3615 void 3616 record_spin_locks(ulwp_t *ulwp) 3617 { 3618 spin_lock_spin += ulwp->ul_spin_lock_spin; 3619 spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 3620 spin_lock_sleep += ulwp->ul_spin_lock_sleep; 3621 spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 3622 ulwp->ul_spin_lock_spin = 0; 3623 ulwp->ul_spin_lock_spin2 = 0; 3624 ulwp->ul_spin_lock_sleep = 0; 3625 ulwp->ul_spin_lock_wakeup = 0; 3626 } 3627 3628 /* 3629 * atexit function: dump the queue statistics to stderr. 3630 */ 3631 #if !defined(__lint) 3632 #define fprintf _fprintf 3633 #endif 3634 #include <stdio.h> 3635 void 3636 dump_queue_statistics(void) 3637 { 3638 uberdata_t *udp = curthread->ul_uberdata; 3639 queue_head_t *qp; 3640 int qn; 3641 uint64_t spin_lock_total = 0; 3642 3643 if (udp->queue_head == NULL || thread_queue_dump == 0) 3644 return; 3645 3646 if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 3647 fprintf(stderr, "queue# lockcount max qlen\n") < 0) 3648 return; 3649 for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 3650 if (qp->qh_lockcount == 0) 3651 continue; 3652 spin_lock_total += qp->qh_lockcount; 3653 if (fprintf(stderr, "%5d %12llu%12u\n", qn, 3654 (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 3655 return; 3656 } 3657 3658 if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 3659 fprintf(stderr, "queue# lockcount max qlen\n") < 0) 3660 return; 3661 for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 3662 if (qp->qh_lockcount == 0) 3663 continue; 3664 spin_lock_total += qp->qh_lockcount; 3665 if (fprintf(stderr, "%5d %12llu%12u\n", qn, 3666 (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 3667 return; 3668 } 3669 3670 (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 3671 (u_longlong_t)spin_lock_total); 3672 (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 3673 (u_longlong_t)spin_lock_spin); 3674 (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 3675 (u_longlong_t)spin_lock_spin2); 3676 (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 3677 (u_longlong_t)spin_lock_sleep); 3678 (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 3679 (u_longlong_t)spin_lock_wakeup); 3680 } 3681