1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/sdt.h> 30 31 #include "lint.h" 32 #include "thr_uberdata.h" 33 34 /* 35 * This mutex is initialized to be held by lwp#1. 36 * It is used to block a thread that has returned from a mutex_lock() 37 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 38 */ 39 mutex_t stall_mutex = DEFAULTMUTEX; 40 41 static int shared_mutex_held(mutex_t *); 42 static int mutex_unlock_internal(mutex_t *, int); 43 static int mutex_queuelock_adaptive(mutex_t *); 44 static void mutex_wakeup_all(mutex_t *); 45 46 /* 47 * Lock statistics support functions. 48 */ 49 void 50 record_begin_hold(tdb_mutex_stats_t *msp) 51 { 52 tdb_incr(msp->mutex_lock); 53 msp->mutex_begin_hold = gethrtime(); 54 } 55 56 hrtime_t 57 record_hold_time(tdb_mutex_stats_t *msp) 58 { 59 hrtime_t now = gethrtime(); 60 61 if (msp->mutex_begin_hold) 62 msp->mutex_hold_time += now - msp->mutex_begin_hold; 63 msp->mutex_begin_hold = 0; 64 return (now); 65 } 66 67 /* 68 * Called once at library initialization. 69 */ 70 void 71 mutex_setup(void) 72 { 73 if (set_lock_byte(&stall_mutex.mutex_lockw)) 74 thr_panic("mutex_setup() cannot acquire stall_mutex"); 75 stall_mutex.mutex_owner = (uintptr_t)curthread; 76 } 77 78 /* 79 * The default spin counts of 1000 and 500 are experimentally determined. 80 * On sun4u machines with any number of processors they could be raised 81 * to 10,000 but that (experimentally) makes almost no difference. 82 * The environment variables: 83 * _THREAD_ADAPTIVE_SPIN=count 84 * _THREAD_RELEASE_SPIN=count 85 * can be used to override and set the counts in the range [0 .. 1,000,000]. 86 */ 87 int thread_adaptive_spin = 1000; 88 uint_t thread_max_spinners = 100; 89 int thread_release_spin = 500; 90 int thread_queue_verify = 0; 91 static int ncpus; 92 93 /* 94 * Distinguish spinning for queue locks from spinning for regular locks. 95 * The environment variable: 96 * _THREAD_QUEUE_SPIN=count 97 * can be used to override and set the count in the range [0 .. 1,000,000]. 98 * There is no release spin concept for queue locks. 99 */ 100 int thread_queue_spin = 1000; 101 102 /* 103 * Use the otherwise-unused 'mutex_ownerpid' field of a USYNC_THREAD 104 * mutex to be a count of adaptive spins in progress. 105 */ 106 #define mutex_spinners mutex_ownerpid 107 108 #define ALL_ATTRIBUTES \ 109 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 110 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 111 LOCK_ROBUST) 112 113 /* 114 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 115 * augmented by zero or more the flags: 116 * LOCK_RECURSIVE 117 * LOCK_ERRORCHECK 118 * LOCK_PRIO_INHERIT 119 * LOCK_PRIO_PROTECT 120 * LOCK_ROBUST 121 */ 122 #pragma weak _private_mutex_init = __mutex_init 123 #pragma weak mutex_init = __mutex_init 124 #pragma weak _mutex_init = __mutex_init 125 /* ARGSUSED2 */ 126 int 127 __mutex_init(mutex_t *mp, int type, void *arg) 128 { 129 int basetype = (type & ~ALL_ATTRIBUTES); 130 int error = 0; 131 132 if (basetype == USYNC_PROCESS_ROBUST) { 133 /* 134 * USYNC_PROCESS_ROBUST is a deprecated historical type. 135 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 136 * retain the USYNC_PROCESS_ROBUST flag so we can return 137 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 138 * mutexes will ever draw ELOCKUNMAPPED). 139 */ 140 type |= (USYNC_PROCESS | LOCK_ROBUST); 141 basetype = USYNC_PROCESS; 142 } 143 144 if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) || 145 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 146 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) { 147 error = EINVAL; 148 } else if (type & LOCK_ROBUST) { 149 /* 150 * Callers of mutex_init() with the LOCK_ROBUST attribute 151 * are required to pass an initially all-zero mutex. 152 * Multiple calls to mutex_init() are allowed; all but 153 * the first return EBUSY. A call to mutex_init() is 154 * allowed to make an inconsistent robust lock consistent 155 * (for historical usage, even though the proper interface 156 * for this is mutex_consistent()). Note that we use 157 * atomic_or_16() to set the LOCK_INITED flag so as 158 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 159 */ 160 extern void _atomic_or_16(volatile uint16_t *, uint16_t); 161 if (!(mp->mutex_flag & LOCK_INITED)) { 162 mp->mutex_type = (uint8_t)type; 163 _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 164 mp->mutex_magic = MUTEX_MAGIC; 165 } else if (type != mp->mutex_type || 166 ((type & LOCK_PRIO_PROTECT) && 167 mp->mutex_ceiling != (*(int *)arg))) { 168 error = EINVAL; 169 } else if (__mutex_consistent(mp) != 0) { 170 error = EBUSY; 171 } 172 /* register a process robust mutex with the kernel */ 173 if (basetype == USYNC_PROCESS) 174 register_lock(mp); 175 } else { 176 (void) _memset(mp, 0, sizeof (*mp)); 177 mp->mutex_type = (uint8_t)type; 178 mp->mutex_flag = LOCK_INITED; 179 mp->mutex_magic = MUTEX_MAGIC; 180 } 181 182 if (error == 0 && (type & LOCK_PRIO_PROTECT)) 183 mp->mutex_ceiling = (uint8_t)(*(int *)arg); 184 185 return (error); 186 } 187 188 /* 189 * Delete mp from list of ceil mutexes owned by curthread. 190 * Return 1 if the head of the chain was updated. 191 */ 192 int 193 _ceil_mylist_del(mutex_t *mp) 194 { 195 ulwp_t *self = curthread; 196 mxchain_t **mcpp; 197 mxchain_t *mcp; 198 199 mcpp = &self->ul_mxchain; 200 while ((*mcpp)->mxchain_mx != mp) 201 mcpp = &(*mcpp)->mxchain_next; 202 mcp = *mcpp; 203 *mcpp = mcp->mxchain_next; 204 lfree(mcp, sizeof (*mcp)); 205 return (mcpp == &self->ul_mxchain); 206 } 207 208 /* 209 * Add mp to head of list of ceil mutexes owned by curthread. 210 * Return ENOMEM if no memory could be allocated. 211 */ 212 int 213 _ceil_mylist_add(mutex_t *mp) 214 { 215 ulwp_t *self = curthread; 216 mxchain_t *mcp; 217 218 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 219 return (ENOMEM); 220 mcp->mxchain_mx = mp; 221 mcp->mxchain_next = self->ul_mxchain; 222 self->ul_mxchain = mcp; 223 return (0); 224 } 225 226 /* 227 * Inherit priority from ceiling. The inheritance impacts the effective 228 * priority, not the assigned priority. See _thread_setschedparam_main(). 229 */ 230 void 231 _ceil_prio_inherit(int ceil) 232 { 233 ulwp_t *self = curthread; 234 struct sched_param param; 235 236 (void) _memset(¶m, 0, sizeof (param)); 237 param.sched_priority = ceil; 238 if (_thread_setschedparam_main(self->ul_lwpid, 239 self->ul_policy, ¶m, PRIO_INHERIT)) { 240 /* 241 * Panic since unclear what error code to return. 242 * If we do return the error codes returned by above 243 * called routine, update the man page... 244 */ 245 thr_panic("_thread_setschedparam_main() fails"); 246 } 247 } 248 249 /* 250 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 251 * if holding at least one ceiling lock. If no ceiling locks are held at this 252 * point, disinherit completely, reverting back to assigned priority. 253 */ 254 void 255 _ceil_prio_waive(void) 256 { 257 ulwp_t *self = curthread; 258 struct sched_param param; 259 260 (void) _memset(¶m, 0, sizeof (param)); 261 if (self->ul_mxchain == NULL) { 262 /* 263 * No ceil locks held. Zero the epri, revert back to ul_pri. 264 * Since thread's hash lock is not held, one cannot just 265 * read ul_pri here...do it in the called routine... 266 */ 267 param.sched_priority = self->ul_pri; /* ignored */ 268 if (_thread_setschedparam_main(self->ul_lwpid, 269 self->ul_policy, ¶m, PRIO_DISINHERIT)) 270 thr_panic("_thread_setschedparam_main() fails"); 271 } else { 272 /* 273 * Set priority to that of the mutex at the head 274 * of the ceilmutex chain. 275 */ 276 param.sched_priority = 277 self->ul_mxchain->mxchain_mx->mutex_ceiling; 278 if (_thread_setschedparam_main(self->ul_lwpid, 279 self->ul_policy, ¶m, PRIO_INHERIT)) 280 thr_panic("_thread_setschedparam_main() fails"); 281 } 282 } 283 284 /* 285 * Non-preemptive spin locks. Used by queue_lock(). 286 * No lock statistics are gathered for these locks. 287 */ 288 void 289 spin_lock_set(mutex_t *mp) 290 { 291 ulwp_t *self = curthread; 292 293 no_preempt(self); 294 if (set_lock_byte(&mp->mutex_lockw) == 0) { 295 mp->mutex_owner = (uintptr_t)self; 296 return; 297 } 298 /* 299 * Spin for a while, attempting to acquire the lock. 300 */ 301 if (self->ul_spin_lock_spin != UINT_MAX) 302 self->ul_spin_lock_spin++; 303 if (mutex_queuelock_adaptive(mp) == 0 || 304 set_lock_byte(&mp->mutex_lockw) == 0) { 305 mp->mutex_owner = (uintptr_t)self; 306 return; 307 } 308 /* 309 * Try harder if we were previously at a no premption level. 310 */ 311 if (self->ul_preempt > 1) { 312 if (self->ul_spin_lock_spin2 != UINT_MAX) 313 self->ul_spin_lock_spin2++; 314 if (mutex_queuelock_adaptive(mp) == 0 || 315 set_lock_byte(&mp->mutex_lockw) == 0) { 316 mp->mutex_owner = (uintptr_t)self; 317 return; 318 } 319 } 320 /* 321 * Give up and block in the kernel for the mutex. 322 */ 323 if (self->ul_spin_lock_sleep != UINT_MAX) 324 self->ul_spin_lock_sleep++; 325 (void) ___lwp_mutex_timedlock(mp, NULL); 326 mp->mutex_owner = (uintptr_t)self; 327 } 328 329 void 330 spin_lock_clear(mutex_t *mp) 331 { 332 ulwp_t *self = curthread; 333 334 mp->mutex_owner = 0; 335 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 336 (void) ___lwp_mutex_wakeup(mp, 0); 337 if (self->ul_spin_lock_wakeup != UINT_MAX) 338 self->ul_spin_lock_wakeup++; 339 } 340 preempt(self); 341 } 342 343 /* 344 * Allocate the sleep queue hash table. 345 */ 346 void 347 queue_alloc(void) 348 { 349 ulwp_t *self = curthread; 350 uberdata_t *udp = self->ul_uberdata; 351 mutex_t *mp; 352 void *data; 353 int i; 354 355 /* 356 * No locks are needed; we call here only when single-threaded. 357 */ 358 ASSERT(self == udp->ulwp_one); 359 ASSERT(!udp->uberflags.uf_mt); 360 if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 361 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 362 == MAP_FAILED) 363 thr_panic("cannot allocate thread queue_head table"); 364 udp->queue_head = (queue_head_t *)data; 365 for (i = 0; i < 2 * QHASHSIZE; i++) { 366 mp = &udp->queue_head[i].qh_lock; 367 mp->mutex_flag = LOCK_INITED; 368 mp->mutex_magic = MUTEX_MAGIC; 369 } 370 } 371 372 #if defined(THREAD_DEBUG) 373 374 /* 375 * Debugging: verify correctness of a sleep queue. 376 */ 377 void 378 QVERIFY(queue_head_t *qp) 379 { 380 ulwp_t *self = curthread; 381 uberdata_t *udp = self->ul_uberdata; 382 ulwp_t *ulwp; 383 ulwp_t *prev; 384 uint_t index; 385 uint32_t cnt = 0; 386 char qtype; 387 void *wchan; 388 389 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 390 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 391 ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) || 392 (qp->qh_head == NULL && qp->qh_tail == NULL)); 393 if (!thread_queue_verify) 394 return; 395 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 396 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 397 for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL; 398 prev = ulwp, ulwp = ulwp->ul_link, cnt++) { 399 ASSERT(ulwp->ul_qtype == qtype); 400 ASSERT(ulwp->ul_wchan != NULL); 401 ASSERT(ulwp->ul_sleepq == qp); 402 wchan = ulwp->ul_wchan; 403 index = QUEUE_HASH(wchan, qtype); 404 ASSERT(&udp->queue_head[index] == qp); 405 } 406 ASSERT(qp->qh_tail == prev); 407 ASSERT(qp->qh_qlen == cnt); 408 } 409 410 #else /* THREAD_DEBUG */ 411 412 #define QVERIFY(qp) 413 414 #endif /* THREAD_DEBUG */ 415 416 /* 417 * Acquire a queue head. 418 */ 419 queue_head_t * 420 queue_lock(void *wchan, int qtype) 421 { 422 uberdata_t *udp = curthread->ul_uberdata; 423 queue_head_t *qp; 424 425 ASSERT(qtype == MX || qtype == CV); 426 427 /* 428 * It is possible that we could be called while still single-threaded. 429 * If so, we call queue_alloc() to allocate the queue_head[] array. 430 */ 431 if ((qp = udp->queue_head) == NULL) { 432 queue_alloc(); 433 qp = udp->queue_head; 434 } 435 qp += QUEUE_HASH(wchan, qtype); 436 spin_lock_set(&qp->qh_lock); 437 /* 438 * At once per nanosecond, qh_lockcount will wrap after 512 years. 439 * Were we to care about this, we could peg the value at UINT64_MAX. 440 */ 441 qp->qh_lockcount++; 442 QVERIFY(qp); 443 return (qp); 444 } 445 446 /* 447 * Release a queue head. 448 */ 449 void 450 queue_unlock(queue_head_t *qp) 451 { 452 QVERIFY(qp); 453 spin_lock_clear(&qp->qh_lock); 454 } 455 456 /* 457 * For rwlock queueing, we must queue writers ahead of readers of the 458 * same priority. We do this by making writers appear to have a half 459 * point higher priority for purposes of priority comparisons below. 460 */ 461 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 462 463 void 464 enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) 465 { 466 ulwp_t **ulwpp; 467 ulwp_t *next; 468 int pri = CMP_PRIO(ulwp); 469 int force_fifo = (qtype & FIFOQ); 470 int do_fifo; 471 472 qtype &= ~FIFOQ; 473 ASSERT(qtype == MX || qtype == CV); 474 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 475 ASSERT(ulwp->ul_sleepq != qp); 476 477 /* 478 * LIFO queue ordering is unfair and can lead to starvation, 479 * but it gives better performance for heavily contended locks. 480 * We use thread_queue_fifo (range is 0..8) to determine 481 * the frequency of FIFO vs LIFO queuing: 482 * 0 : every 256th time (almost always LIFO) 483 * 1 : every 128th time 484 * 2 : every 64th time 485 * 3 : every 32nd time 486 * 4 : every 16th time (the default value, mostly LIFO) 487 * 5 : every 8th time 488 * 6 : every 4th time 489 * 7 : every 2nd time 490 * 8 : every time (never LIFO, always FIFO) 491 * Note that there is always some degree of FIFO ordering. 492 * This breaks live lock conditions that occur in applications 493 * that are written assuming (incorrectly) that threads acquire 494 * locks fairly, that is, in roughly round-robin order. 495 * In any event, the queue is maintained in priority order. 496 * 497 * If we are given the FIFOQ flag in qtype, fifo queueing is forced. 498 * SUSV3 requires this for semaphores. 499 */ 500 do_fifo = (force_fifo || 501 ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0); 502 503 if (qp->qh_head == NULL) { 504 /* 505 * The queue is empty. LIFO/FIFO doesn't matter. 506 */ 507 ASSERT(qp->qh_tail == NULL); 508 ulwpp = &qp->qh_head; 509 } else if (do_fifo) { 510 /* 511 * Enqueue after the last thread whose priority is greater 512 * than or equal to the priority of the thread being queued. 513 * Attempt first to go directly onto the tail of the queue. 514 */ 515 if (pri <= CMP_PRIO(qp->qh_tail)) 516 ulwpp = &qp->qh_tail->ul_link; 517 else { 518 for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 519 ulwpp = &next->ul_link) 520 if (pri > CMP_PRIO(next)) 521 break; 522 } 523 } else { 524 /* 525 * Enqueue before the first thread whose priority is less 526 * than or equal to the priority of the thread being queued. 527 * Hopefully we can go directly onto the head of the queue. 528 */ 529 for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 530 ulwpp = &next->ul_link) 531 if (pri >= CMP_PRIO(next)) 532 break; 533 } 534 if ((ulwp->ul_link = *ulwpp) == NULL) 535 qp->qh_tail = ulwp; 536 *ulwpp = ulwp; 537 538 ulwp->ul_sleepq = qp; 539 ulwp->ul_wchan = wchan; 540 ulwp->ul_qtype = qtype; 541 if (qp->qh_qmax < ++qp->qh_qlen) 542 qp->qh_qmax = qp->qh_qlen; 543 } 544 545 /* 546 * Return a pointer to the queue slot of the 547 * highest priority thread on the queue. 548 * On return, prevp, if not NULL, will contain a pointer 549 * to the thread's predecessor on the queue 550 */ 551 static ulwp_t ** 552 queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp) 553 { 554 ulwp_t **ulwpp; 555 ulwp_t *ulwp; 556 ulwp_t *prev = NULL; 557 ulwp_t **suspp = NULL; 558 ulwp_t *susprev; 559 560 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 561 562 /* 563 * Find a waiter on the sleep queue. 564 */ 565 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 566 prev = ulwp, ulwpp = &ulwp->ul_link) { 567 if (ulwp->ul_wchan == wchan) { 568 if (!ulwp->ul_stop) 569 break; 570 /* 571 * Try not to return a suspended thread. 572 * This mimics the old libthread's behavior. 573 */ 574 if (suspp == NULL) { 575 suspp = ulwpp; 576 susprev = prev; 577 } 578 } 579 } 580 581 if (ulwp == NULL && suspp != NULL) { 582 ulwp = *(ulwpp = suspp); 583 prev = susprev; 584 suspp = NULL; 585 } 586 if (ulwp == NULL) { 587 if (more != NULL) 588 *more = 0; 589 return (NULL); 590 } 591 592 if (prevp != NULL) 593 *prevp = prev; 594 if (more == NULL) 595 return (ulwpp); 596 597 /* 598 * Scan the remainder of the queue for another waiter. 599 */ 600 if (suspp != NULL) { 601 *more = 1; 602 return (ulwpp); 603 } 604 for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) { 605 if (ulwp->ul_wchan == wchan) { 606 *more = 1; 607 return (ulwpp); 608 } 609 } 610 611 *more = 0; 612 return (ulwpp); 613 } 614 615 ulwp_t * 616 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 617 { 618 ulwp_t *ulwp; 619 620 ulwp = *ulwpp; 621 *ulwpp = ulwp->ul_link; 622 ulwp->ul_link = NULL; 623 if (qp->qh_tail == ulwp) 624 qp->qh_tail = prev; 625 qp->qh_qlen--; 626 ulwp->ul_sleepq = NULL; 627 ulwp->ul_wchan = NULL; 628 629 return (ulwp); 630 } 631 632 ulwp_t * 633 dequeue(queue_head_t *qp, void *wchan, int *more) 634 { 635 ulwp_t **ulwpp; 636 ulwp_t *prev; 637 638 if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL) 639 return (NULL); 640 return (queue_unlink(qp, ulwpp, prev)); 641 } 642 643 /* 644 * Return a pointer to the highest priority thread sleeping on wchan. 645 */ 646 ulwp_t * 647 queue_waiter(queue_head_t *qp, void *wchan) 648 { 649 ulwp_t **ulwpp; 650 651 if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL) 652 return (NULL); 653 return (*ulwpp); 654 } 655 656 uint8_t 657 dequeue_self(queue_head_t *qp, void *wchan) 658 { 659 ulwp_t *self = curthread; 660 ulwp_t **ulwpp; 661 ulwp_t *ulwp; 662 ulwp_t *prev = NULL; 663 int found = 0; 664 int more = 0; 665 666 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 667 668 /* find self on the sleep queue */ 669 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 670 prev = ulwp, ulwpp = &ulwp->ul_link) { 671 if (ulwp == self) { 672 /* dequeue ourself */ 673 ASSERT(self->ul_wchan == wchan); 674 (void) queue_unlink(qp, ulwpp, prev); 675 self->ul_cvmutex = NULL; 676 self->ul_cv_wake = 0; 677 found = 1; 678 break; 679 } 680 if (ulwp->ul_wchan == wchan) 681 more = 1; 682 } 683 684 if (!found) 685 thr_panic("dequeue_self(): curthread not found on queue"); 686 687 if (more) 688 return (1); 689 690 /* scan the remainder of the queue for another waiter */ 691 for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) { 692 if (ulwp->ul_wchan == wchan) 693 return (1); 694 } 695 696 return (0); 697 } 698 699 /* 700 * Called from call_user_handler() and _thrp_suspend() to take 701 * ourself off of our sleep queue so we can grab locks. 702 */ 703 void 704 unsleep_self(void) 705 { 706 ulwp_t *self = curthread; 707 queue_head_t *qp; 708 709 /* 710 * Calling enter_critical()/exit_critical() here would lead 711 * to recursion. Just manipulate self->ul_critical directly. 712 */ 713 self->ul_critical++; 714 while (self->ul_sleepq != NULL) { 715 qp = queue_lock(self->ul_wchan, self->ul_qtype); 716 /* 717 * We may have been moved from a CV queue to a 718 * mutex queue while we were attempting queue_lock(). 719 * If so, just loop around and try again. 720 * dequeue_self() clears self->ul_sleepq. 721 */ 722 if (qp == self->ul_sleepq) { 723 (void) dequeue_self(qp, self->ul_wchan); 724 self->ul_writer = 0; 725 } 726 queue_unlock(qp); 727 } 728 self->ul_critical--; 729 } 730 731 /* 732 * Common code for calling the the ___lwp_mutex_timedlock() system call. 733 * Returns with mutex_owner and mutex_ownerpid set correctly. 734 */ 735 static int 736 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 737 { 738 ulwp_t *self = curthread; 739 uberdata_t *udp = self->ul_uberdata; 740 int mtype = mp->mutex_type; 741 hrtime_t begin_sleep; 742 int acquired; 743 int error; 744 745 self->ul_sp = stkptr(); 746 self->ul_wchan = mp; 747 if (__td_event_report(self, TD_SLEEP, udp)) { 748 self->ul_td_evbuf.eventnum = TD_SLEEP; 749 self->ul_td_evbuf.eventdata = mp; 750 tdb_event(TD_SLEEP, udp); 751 } 752 if (msp) { 753 tdb_incr(msp->mutex_sleep); 754 begin_sleep = gethrtime(); 755 } 756 757 DTRACE_PROBE1(plockstat, mutex__block, mp); 758 759 for (;;) { 760 /* 761 * A return value of EOWNERDEAD or ELOCKUNMAPPED 762 * means we successfully acquired the lock. 763 */ 764 if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 765 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 766 acquired = 0; 767 break; 768 } 769 770 if (mtype & USYNC_PROCESS) { 771 /* 772 * Defend against forkall(). We may be the child, 773 * in which case we don't actually own the mutex. 774 */ 775 enter_critical(self); 776 if (mp->mutex_ownerpid == udp->pid) { 777 mp->mutex_owner = (uintptr_t)self; 778 exit_critical(self); 779 acquired = 1; 780 break; 781 } 782 exit_critical(self); 783 } else { 784 mp->mutex_owner = (uintptr_t)self; 785 acquired = 1; 786 break; 787 } 788 } 789 if (msp) 790 msp->mutex_sleep_time += gethrtime() - begin_sleep; 791 self->ul_wchan = NULL; 792 self->ul_sp = 0; 793 794 if (acquired) { 795 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 796 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 797 } else { 798 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 799 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 800 } 801 802 return (error); 803 } 804 805 /* 806 * Common code for calling the ___lwp_mutex_trylock() system call. 807 * Returns with mutex_owner and mutex_ownerpid set correctly. 808 */ 809 int 810 mutex_trylock_kernel(mutex_t *mp) 811 { 812 ulwp_t *self = curthread; 813 uberdata_t *udp = self->ul_uberdata; 814 int mtype = mp->mutex_type; 815 int error; 816 int acquired; 817 818 for (;;) { 819 /* 820 * A return value of EOWNERDEAD or ELOCKUNMAPPED 821 * means we successfully acquired the lock. 822 */ 823 if ((error = ___lwp_mutex_trylock(mp)) != 0 && 824 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 825 acquired = 0; 826 break; 827 } 828 829 if (mtype & USYNC_PROCESS) { 830 /* 831 * Defend against forkall(). We may be the child, 832 * in which case we don't actually own the mutex. 833 */ 834 enter_critical(self); 835 if (mp->mutex_ownerpid == udp->pid) { 836 mp->mutex_owner = (uintptr_t)self; 837 exit_critical(self); 838 acquired = 1; 839 break; 840 } 841 exit_critical(self); 842 } else { 843 mp->mutex_owner = (uintptr_t)self; 844 acquired = 1; 845 break; 846 } 847 } 848 849 if (acquired) { 850 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 851 } else if (error != EBUSY) { 852 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 853 } 854 855 return (error); 856 } 857 858 volatile sc_shared_t * 859 setup_schedctl(void) 860 { 861 ulwp_t *self = curthread; 862 volatile sc_shared_t *scp; 863 sc_shared_t *tmp; 864 865 if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 866 !self->ul_vfork && /* not a child of vfork() */ 867 !self->ul_schedctl_called) { /* haven't been called before */ 868 enter_critical(self); 869 self->ul_schedctl_called = &self->ul_uberdata->uberflags; 870 if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 871 self->ul_schedctl = scp = tmp; 872 exit_critical(self); 873 } 874 /* 875 * Unless the call to setup_schedctl() is surrounded 876 * by enter_critical()/exit_critical(), the address 877 * we are returning could be invalid due to a forkall() 878 * having occurred in another thread. 879 */ 880 return (scp); 881 } 882 883 /* 884 * Interfaces from libsched, incorporated into libc. 885 * libsched.so.1 is now a filter library onto libc. 886 */ 887 #pragma weak schedctl_lookup = _schedctl_init 888 #pragma weak _schedctl_lookup = _schedctl_init 889 #pragma weak schedctl_init = _schedctl_init 890 schedctl_t * 891 _schedctl_init(void) 892 { 893 volatile sc_shared_t *scp = setup_schedctl(); 894 return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 895 } 896 897 #pragma weak schedctl_exit = _schedctl_exit 898 void 899 _schedctl_exit(void) 900 { 901 } 902 903 /* 904 * Contract private interface for java. 905 * Set up the schedctl data if it doesn't exist yet. 906 * Return a pointer to the pointer to the schedctl data. 907 */ 908 volatile sc_shared_t *volatile * 909 _thr_schedctl(void) 910 { 911 ulwp_t *self = curthread; 912 volatile sc_shared_t *volatile *ptr; 913 914 if (self->ul_vfork) 915 return (NULL); 916 if (*(ptr = &self->ul_schedctl) == NULL) 917 (void) setup_schedctl(); 918 return (ptr); 919 } 920 921 /* 922 * Block signals and attempt to block preemption. 923 * no_preempt()/preempt() must be used in pairs but can be nested. 924 */ 925 void 926 no_preempt(ulwp_t *self) 927 { 928 volatile sc_shared_t *scp; 929 930 if (self->ul_preempt++ == 0) { 931 enter_critical(self); 932 if ((scp = self->ul_schedctl) != NULL || 933 (scp = setup_schedctl()) != NULL) { 934 /* 935 * Save the pre-existing preempt value. 936 */ 937 self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 938 scp->sc_preemptctl.sc_nopreempt = 1; 939 } 940 } 941 } 942 943 /* 944 * Undo the effects of no_preempt(). 945 */ 946 void 947 preempt(ulwp_t *self) 948 { 949 volatile sc_shared_t *scp; 950 951 ASSERT(self->ul_preempt > 0); 952 if (--self->ul_preempt == 0) { 953 if ((scp = self->ul_schedctl) != NULL) { 954 /* 955 * Restore the pre-existing preempt value. 956 */ 957 scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 958 if (scp->sc_preemptctl.sc_yield && 959 scp->sc_preemptctl.sc_nopreempt == 0) { 960 lwp_yield(); 961 if (scp->sc_preemptctl.sc_yield) { 962 /* 963 * Shouldn't happen. This is either 964 * a race condition or the thread 965 * just entered the real-time class. 966 */ 967 lwp_yield(); 968 scp->sc_preemptctl.sc_yield = 0; 969 } 970 } 971 } 972 exit_critical(self); 973 } 974 } 975 976 /* 977 * If a call to preempt() would cause the current thread to yield or to 978 * take deferred actions in exit_critical(), then unpark the specified 979 * lwp so it can run while we delay. Return the original lwpid if the 980 * unpark was not performed, else return zero. The tests are a repeat 981 * of some of the tests in preempt(), above. This is a statistical 982 * optimization solely for cond_sleep_queue(), below. 983 */ 984 static lwpid_t 985 preempt_unpark(ulwp_t *self, lwpid_t lwpid) 986 { 987 volatile sc_shared_t *scp = self->ul_schedctl; 988 989 ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 990 if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 991 (self->ul_curplease && self->ul_critical == 1)) { 992 (void) __lwp_unpark(lwpid); 993 lwpid = 0; 994 } 995 return (lwpid); 996 } 997 998 /* 999 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1000 * If this fails, return EBUSY and let the caller deal with it. 1001 * If this succeeds, return 0 with mutex_owner set to curthread. 1002 */ 1003 static int 1004 mutex_trylock_adaptive(mutex_t *mp, int tryhard) 1005 { 1006 ulwp_t *self = curthread; 1007 int error = EBUSY; 1008 ulwp_t *ulwp; 1009 volatile sc_shared_t *scp; 1010 volatile uint8_t *lockp; 1011 volatile uint64_t *ownerp; 1012 int count; 1013 int max; 1014 1015 ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 1016 1017 if (MUTEX_OWNER(mp) == self) 1018 return (EBUSY); 1019 1020 /* short-cut, not definitive (see below) */ 1021 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1022 ASSERT(mp->mutex_type & LOCK_ROBUST); 1023 DTRACE_PROBE2(plockstat, mutex__error, mp, ENOTRECOVERABLE); 1024 return (ENOTRECOVERABLE); 1025 } 1026 1027 if (!tryhard || 1028 (max = self->ul_adaptive_spin) == 0 || 1029 mp->mutex_spinners >= self->ul_max_spinners) 1030 max = 1; /* try at least once */ 1031 1032 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1033 1034 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1035 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1036 /* 1037 * This spin loop is unfair to lwps that have already dropped into 1038 * the kernel to sleep. They will starve on a highly-contended mutex. 1039 * This is just too bad. The adaptive spin algorithm is intended 1040 * to allow programs with highly-contended locks (that is, broken 1041 * programs) to execute with reasonable speed despite their contention. 1042 * Being fair would reduce the speed of such programs and well-written 1043 * programs will not suffer in any case. 1044 */ 1045 enter_critical(self); /* protects ul_schedctl */ 1046 atomic_inc_32(&mp->mutex_spinners); 1047 for (count = 1; count <= max; count++) { 1048 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1049 *ownerp = (uintptr_t)self; 1050 error = 0; 1051 break; 1052 } 1053 SMT_PAUSE(); 1054 /* 1055 * Stop spinning if the mutex owner is not running on 1056 * a processor; it will not drop the lock any time soon 1057 * and we would just be wasting time to keep spinning. 1058 * 1059 * Note that we are looking at another thread (ulwp_t) 1060 * without ensuring that the other thread does not exit. 1061 * The scheme relies on ulwp_t structures never being 1062 * deallocated by the library (the library employs a free 1063 * list of ulwp_t structs that are reused when new threads 1064 * are created) and on schedctl shared memory never being 1065 * deallocated once created via __schedctl(). 1066 * 1067 * Thus, the worst that can happen when the spinning thread 1068 * looks at the owner's schedctl data is that it is looking 1069 * at some other thread's schedctl data. This almost never 1070 * happens and is benign when it does. 1071 */ 1072 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1073 ((scp = ulwp->ul_schedctl) == NULL || 1074 scp->sc_state != SC_ONPROC)) 1075 break; 1076 } 1077 atomic_dec_32(&mp->mutex_spinners); 1078 exit_critical(self); 1079 1080 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1081 ASSERT(mp->mutex_type & LOCK_ROBUST); 1082 /* 1083 * We shouldn't own the mutex; clear the lock. 1084 */ 1085 mp->mutex_owner = 0; 1086 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) 1087 mutex_wakeup_all(mp); 1088 error = ENOTRECOVERABLE; 1089 } 1090 1091 if (error) { 1092 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1093 if (error != EBUSY) { 1094 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1095 } 1096 } else { 1097 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1098 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1099 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1100 ASSERT(mp->mutex_type & LOCK_ROBUST); 1101 error = EOWNERDEAD; 1102 } 1103 } 1104 1105 return (error); 1106 } 1107 1108 /* 1109 * Same as mutex_trylock_adaptive(), except specifically for queue locks. 1110 * The owner field is not set here; the caller (spin_lock_set()) sets it. 1111 */ 1112 static int 1113 mutex_queuelock_adaptive(mutex_t *mp) 1114 { 1115 ulwp_t *ulwp; 1116 volatile sc_shared_t *scp; 1117 volatile uint8_t *lockp; 1118 volatile uint64_t *ownerp; 1119 int count = curthread->ul_queue_spin; 1120 1121 ASSERT(mp->mutex_type == USYNC_THREAD); 1122 1123 if (count == 0) 1124 return (EBUSY); 1125 1126 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1127 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1128 while (--count >= 0) { 1129 if (*lockp == 0 && set_lock_byte(lockp) == 0) 1130 return (0); 1131 SMT_PAUSE(); 1132 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1133 ((scp = ulwp->ul_schedctl) == NULL || 1134 scp->sc_state != SC_ONPROC)) 1135 break; 1136 } 1137 1138 return (EBUSY); 1139 } 1140 1141 /* 1142 * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1143 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1144 * If this fails, return EBUSY and let the caller deal with it. 1145 * If this succeeds, return 0 with mutex_owner set to curthread 1146 * and mutex_ownerpid set to the current pid. 1147 */ 1148 static int 1149 mutex_trylock_process(mutex_t *mp, int tryhard) 1150 { 1151 ulwp_t *self = curthread; 1152 int error = EBUSY; 1153 volatile uint8_t *lockp; 1154 int count; 1155 int max; 1156 1157 ASSERT(mp->mutex_type & USYNC_PROCESS); 1158 1159 if (shared_mutex_held(mp)) 1160 return (EBUSY); 1161 1162 /* short-cut, not definitive (see below) */ 1163 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1164 ASSERT(mp->mutex_type & LOCK_ROBUST); 1165 DTRACE_PROBE2(plockstat, mutex__error, mp, ENOTRECOVERABLE); 1166 return (ENOTRECOVERABLE); 1167 } 1168 1169 if (ncpus == 0) 1170 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1171 max = (tryhard && ncpus > 1)? self->ul_adaptive_spin : 1; 1172 if (max == 0) 1173 max = 1; /* try at least once */ 1174 1175 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1176 1177 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1178 /* 1179 * This is a process-shared mutex. 1180 * We cannot know if the owner is running on a processor. 1181 * We just spin and hope that it is on a processor. 1182 */ 1183 enter_critical(self); 1184 for (count = 1; count <= max; count++) { 1185 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1186 mp->mutex_owner = (uintptr_t)self; 1187 mp->mutex_ownerpid = self->ul_uberdata->pid; 1188 error = 0; 1189 break; 1190 } 1191 SMT_PAUSE(); 1192 } 1193 exit_critical(self); 1194 1195 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1196 ASSERT(mp->mutex_type & LOCK_ROBUST); 1197 /* 1198 * We shouldn't own the mutex; clear the lock. 1199 */ 1200 mp->mutex_owner = 0; 1201 mp->mutex_ownerpid = 0; 1202 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 1203 no_preempt(self); 1204 (void) ___lwp_mutex_wakeup(mp, 1); 1205 preempt(self); 1206 } 1207 error = ENOTRECOVERABLE; 1208 } 1209 1210 if (error) { 1211 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1212 if (error != EBUSY) { 1213 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1214 } 1215 } else { 1216 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1217 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1218 if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1219 ASSERT(mp->mutex_type & LOCK_ROBUST); 1220 if (mp->mutex_flag & LOCK_OWNERDEAD) 1221 error = EOWNERDEAD; 1222 else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1223 error = ELOCKUNMAPPED; 1224 else 1225 error = EOWNERDEAD; 1226 } 1227 } 1228 1229 return (error); 1230 } 1231 1232 /* 1233 * Mutex wakeup code for releasing a USYNC_THREAD mutex. 1234 * Returns the lwpid of the thread that was dequeued, if any. 1235 * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 1236 * to wake up the specified lwp. 1237 */ 1238 static lwpid_t 1239 mutex_wakeup(mutex_t *mp) 1240 { 1241 lwpid_t lwpid = 0; 1242 queue_head_t *qp; 1243 ulwp_t *ulwp; 1244 int more; 1245 1246 /* 1247 * Dequeue a waiter from the sleep queue. Don't touch the mutex 1248 * waiters bit if no one was found on the queue because the mutex 1249 * might have been deallocated or reallocated for another purpose. 1250 */ 1251 qp = queue_lock(mp, MX); 1252 if ((ulwp = dequeue(qp, mp, &more)) != NULL) { 1253 lwpid = ulwp->ul_lwpid; 1254 mp->mutex_waiters = (more? 1 : 0); 1255 } 1256 queue_unlock(qp); 1257 return (lwpid); 1258 } 1259 1260 /* 1261 * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1262 */ 1263 static void 1264 mutex_wakeup_all(mutex_t *mp) 1265 { 1266 queue_head_t *qp; 1267 int nlwpid = 0; 1268 int maxlwps = MAXLWPS; 1269 ulwp_t **ulwpp; 1270 ulwp_t *ulwp; 1271 ulwp_t *prev = NULL; 1272 lwpid_t buffer[MAXLWPS]; 1273 lwpid_t *lwpid = buffer; 1274 1275 /* 1276 * Walk the list of waiters and prepare to wake up all of them. 1277 * The waiters flag has already been cleared from the mutex. 1278 * 1279 * We keep track of lwpids that are to be unparked in lwpid[]. 1280 * __lwp_unpark_all() is called to unpark all of them after 1281 * they have been removed from the sleep queue and the sleep 1282 * queue lock has been dropped. If we run out of space in our 1283 * on-stack buffer, we need to allocate more but we can't call 1284 * lmalloc() because we are holding a queue lock when the overflow 1285 * occurs and lmalloc() acquires a lock. We can't use alloca() 1286 * either because the application may have allocated a small 1287 * stack and we don't want to overrun the stack. So we call 1288 * alloc_lwpids() to allocate a bigger buffer using the mmap() 1289 * system call directly since that path acquires no locks. 1290 */ 1291 qp = queue_lock(mp, MX); 1292 ulwpp = &qp->qh_head; 1293 while ((ulwp = *ulwpp) != NULL) { 1294 if (ulwp->ul_wchan != mp) { 1295 prev = ulwp; 1296 ulwpp = &ulwp->ul_link; 1297 } else { 1298 if (nlwpid == maxlwps) 1299 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1300 (void) queue_unlink(qp, ulwpp, prev); 1301 lwpid[nlwpid++] = ulwp->ul_lwpid; 1302 } 1303 } 1304 mp->mutex_waiters = 0; 1305 1306 if (nlwpid == 0) { 1307 queue_unlock(qp); 1308 } else { 1309 no_preempt(curthread); 1310 queue_unlock(qp); 1311 if (nlwpid == 1) 1312 (void) __lwp_unpark(lwpid[0]); 1313 else 1314 (void) __lwp_unpark_all(lwpid, nlwpid); 1315 preempt(curthread); 1316 } 1317 1318 if (lwpid != buffer) 1319 (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 1320 } 1321 1322 /* 1323 * Spin for a while, testing to see if the lock has been grabbed. 1324 * If this fails, call mutex_wakeup() to release a waiter. 1325 */ 1326 static lwpid_t 1327 mutex_unlock_queue(mutex_t *mp, int release_all) 1328 { 1329 ulwp_t *self = curthread; 1330 uint32_t *lockw = &mp->mutex_lockword; 1331 lwpid_t lwpid; 1332 volatile uint8_t *lockp; 1333 volatile uint32_t *spinp; 1334 int count; 1335 1336 /* 1337 * We use the swap primitive to clear the lock, but we must 1338 * atomically retain the waiters bit for the remainder of this 1339 * code to work. We first check to see if the waiters bit is 1340 * set and if so clear the lock by swapping in a word containing 1341 * only the waiters bit. This could produce a false positive test 1342 * for whether there are waiters that need to be waked up, but 1343 * this just causes an extra call to mutex_wakeup() to do nothing. 1344 * The opposite case is more delicate: If there are no waiters, 1345 * we swap in a zero lock byte and a zero waiters bit. The result 1346 * of the swap could indicate that there really was a waiter so in 1347 * this case we go directly to mutex_wakeup() without performing 1348 * any of the adaptive code because the waiter bit has been cleared 1349 * and the adaptive code is unreliable in this case. 1350 */ 1351 if (release_all || !(*lockw & WAITERMASK)) { 1352 mp->mutex_owner = 0; 1353 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1354 if (!(atomic_swap_32(lockw, 0) & WAITERMASK)) 1355 return (0); /* no waiters */ 1356 no_preempt(self); /* ensure a prompt wakeup */ 1357 } else { 1358 no_preempt(self); /* ensure a prompt wakeup */ 1359 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1360 spinp = (volatile uint32_t *)&mp->mutex_spinners; 1361 mp->mutex_owner = 0; 1362 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1363 /* clear lock, retain waiter */ 1364 (void) atomic_swap_32(lockw, WAITER); 1365 1366 /* 1367 * We spin here fewer times than mutex_trylock_adaptive(). 1368 * We are trying to balance two conflicting goals: 1369 * 1. Avoid waking up anyone if a spinning thread 1370 * grabs the lock. 1371 * 2. Wake up a sleeping thread promptly to get on 1372 * with useful work. 1373 * We don't spin at all if there is no acquiring spinner; 1374 * (mp->mutex_spinners is non-zero if there are spinners). 1375 */ 1376 for (count = self->ul_release_spin; 1377 *spinp && count > 0; count--) { 1378 /* 1379 * There is a waiter that we will have to wake 1380 * up unless someone else grabs the lock while 1381 * we are busy spinning. Like the spin loop in 1382 * mutex_trylock_adaptive(), this spin loop is 1383 * unfair to lwps that have already dropped into 1384 * the kernel to sleep. They will starve on a 1385 * highly-contended mutex. Too bad. 1386 */ 1387 if (*lockp != 0) { /* somebody grabbed the lock */ 1388 preempt(self); 1389 return (0); 1390 } 1391 SMT_PAUSE(); 1392 } 1393 1394 /* 1395 * No one grabbed the lock. 1396 * Wake up some lwp that is waiting for it. 1397 */ 1398 mp->mutex_waiters = 0; 1399 } 1400 1401 if (release_all) { 1402 mutex_wakeup_all(mp); 1403 lwpid = 0; 1404 } else { 1405 lwpid = mutex_wakeup(mp); 1406 } 1407 if (lwpid == 0) 1408 preempt(self); 1409 return (lwpid); 1410 } 1411 1412 /* 1413 * Like mutex_unlock_queue(), but for process-shared mutexes. 1414 * We tested the waiters field before calling here and it was non-zero. 1415 */ 1416 static void 1417 mutex_unlock_process(mutex_t *mp, int release_all) 1418 { 1419 ulwp_t *self = curthread; 1420 int count; 1421 volatile uint8_t *lockp; 1422 1423 /* 1424 * See the comments in mutex_unlock_queue(), above. 1425 */ 1426 if (ncpus == 0) 1427 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1428 count = (ncpus > 1)? self->ul_release_spin : 0; 1429 no_preempt(self); 1430 mp->mutex_owner = 0; 1431 mp->mutex_ownerpid = 0; 1432 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1433 if (release_all || count == 0) { 1434 /* clear lock, test waiter */ 1435 if (!(atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK)) { 1436 /* no waiters now */ 1437 preempt(self); 1438 return; 1439 } 1440 } else { 1441 /* clear lock, retain waiter */ 1442 (void) atomic_swap_32(&mp->mutex_lockword, WAITER); 1443 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1444 while (--count >= 0) { 1445 if (*lockp != 0) { 1446 /* somebody grabbed the lock */ 1447 preempt(self); 1448 return; 1449 } 1450 SMT_PAUSE(); 1451 } 1452 /* 1453 * We must clear the waiters field before going 1454 * to the kernel, else it could remain set forever. 1455 */ 1456 mp->mutex_waiters = 0; 1457 } 1458 (void) ___lwp_mutex_wakeup(mp, release_all); 1459 preempt(self); 1460 } 1461 1462 /* 1463 * Return the real priority of a thread. 1464 */ 1465 int 1466 real_priority(ulwp_t *ulwp) 1467 { 1468 if (ulwp->ul_epri == 0) 1469 return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri); 1470 return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri); 1471 } 1472 1473 void 1474 stall(void) 1475 { 1476 for (;;) 1477 (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 1478 } 1479 1480 /* 1481 * Acquire a USYNC_THREAD mutex via user-level sleep queues. 1482 * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1483 * If successful, returns with mutex_owner set correctly. 1484 */ 1485 int 1486 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 1487 timespec_t *tsp) 1488 { 1489 uberdata_t *udp = curthread->ul_uberdata; 1490 queue_head_t *qp; 1491 hrtime_t begin_sleep; 1492 int error = 0; 1493 1494 self->ul_sp = stkptr(); 1495 if (__td_event_report(self, TD_SLEEP, udp)) { 1496 self->ul_wchan = mp; 1497 self->ul_td_evbuf.eventnum = TD_SLEEP; 1498 self->ul_td_evbuf.eventdata = mp; 1499 tdb_event(TD_SLEEP, udp); 1500 } 1501 if (msp) { 1502 tdb_incr(msp->mutex_sleep); 1503 begin_sleep = gethrtime(); 1504 } 1505 1506 DTRACE_PROBE1(plockstat, mutex__block, mp); 1507 1508 /* 1509 * Put ourself on the sleep queue, and while we are 1510 * unable to grab the lock, go park in the kernel. 1511 * Take ourself off the sleep queue after we acquire the lock. 1512 * The waiter bit can be set/cleared only while holding the queue lock. 1513 */ 1514 qp = queue_lock(mp, MX); 1515 enqueue(qp, self, mp, MX); 1516 mp->mutex_waiters = 1; 1517 for (;;) { 1518 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1519 mp->mutex_owner = (uintptr_t)self; 1520 mp->mutex_waiters = dequeue_self(qp, mp); 1521 break; 1522 } 1523 set_parking_flag(self, 1); 1524 queue_unlock(qp); 1525 /* 1526 * __lwp_park() will return the residual time in tsp 1527 * if we are unparked before the timeout expires. 1528 */ 1529 if ((error = __lwp_park(tsp, 0)) == EINTR) 1530 error = 0; 1531 set_parking_flag(self, 0); 1532 /* 1533 * We could have taken a signal or suspended ourself. 1534 * If we did, then we removed ourself from the queue. 1535 * Someone else may have removed us from the queue 1536 * as a consequence of mutex_unlock(). We may have 1537 * gotten a timeout from __lwp_park(). Or we may still 1538 * be on the queue and this is just a spurious wakeup. 1539 */ 1540 qp = queue_lock(mp, MX); 1541 if (self->ul_sleepq == NULL) { 1542 if (error) 1543 break; 1544 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1545 mp->mutex_owner = (uintptr_t)self; 1546 break; 1547 } 1548 enqueue(qp, self, mp, MX); 1549 mp->mutex_waiters = 1; 1550 } 1551 ASSERT(self->ul_sleepq == qp && 1552 self->ul_qtype == MX && 1553 self->ul_wchan == mp); 1554 if (error) { 1555 mp->mutex_waiters = dequeue_self(qp, mp); 1556 break; 1557 } 1558 } 1559 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 1560 self->ul_wchan == NULL); 1561 self->ul_sp = 0; 1562 queue_unlock(qp); 1563 1564 if (msp) 1565 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1566 1567 ASSERT(error == 0 || error == EINVAL || error == ETIME); 1568 1569 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1570 ASSERT(mp->mutex_type & LOCK_ROBUST); 1571 /* 1572 * We shouldn't own the mutex; clear the lock. 1573 */ 1574 mp->mutex_owner = 0; 1575 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) 1576 mutex_wakeup_all(mp); 1577 error = ENOTRECOVERABLE; 1578 } 1579 1580 if (error) { 1581 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1582 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1583 } else { 1584 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1585 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1586 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1587 ASSERT(mp->mutex_type & LOCK_ROBUST); 1588 error = EOWNERDEAD; 1589 } 1590 } 1591 1592 return (error); 1593 } 1594 1595 static int 1596 mutex_recursion(mutex_t *mp, int mtype, int try) 1597 { 1598 ASSERT(mutex_is_held(mp)); 1599 ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1600 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1601 1602 if (mtype & LOCK_RECURSIVE) { 1603 if (mp->mutex_rcount == RECURSION_MAX) { 1604 DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1605 return (EAGAIN); 1606 } 1607 mp->mutex_rcount++; 1608 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1609 return (0); 1610 } 1611 if (try == MUTEX_LOCK) { 1612 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1613 return (EDEADLK); 1614 } 1615 return (EBUSY); 1616 } 1617 1618 /* 1619 * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1620 * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1621 * We use tdb_hash_lock here and in the synch object tracking code in 1622 * the tdb_agent.c file. There is no conflict between these two usages. 1623 */ 1624 void 1625 register_lock(mutex_t *mp) 1626 { 1627 uberdata_t *udp = curthread->ul_uberdata; 1628 uint_t hash = LOCK_HASH(mp); 1629 robust_t *rlp; 1630 robust_t **rlpp; 1631 robust_t **table; 1632 1633 if ((table = udp->robustlocks) == NULL) { 1634 lmutex_lock(&udp->tdb_hash_lock); 1635 if ((table = udp->robustlocks) == NULL) { 1636 table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1637 _membar_producer(); 1638 udp->robustlocks = table; 1639 } 1640 lmutex_unlock(&udp->tdb_hash_lock); 1641 } 1642 _membar_consumer(); 1643 1644 /* 1645 * First search the registered table with no locks held. 1646 * This is safe because the table never shrinks 1647 * and we can only get a false negative. 1648 */ 1649 for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1650 if (rlp->robust_lock == mp) /* already registered */ 1651 return; 1652 } 1653 1654 /* 1655 * The lock was not found. 1656 * Repeat the operation with tdb_hash_lock held. 1657 */ 1658 lmutex_lock(&udp->tdb_hash_lock); 1659 1660 for (rlpp = &table[hash]; 1661 (rlp = *rlpp) != NULL; 1662 rlpp = &rlp->robust_next) { 1663 if (rlp->robust_lock == mp) { /* already registered */ 1664 lmutex_unlock(&udp->tdb_hash_lock); 1665 return; 1666 } 1667 } 1668 1669 /* 1670 * The lock has never been registered. 1671 * Register it now and add it to the table. 1672 */ 1673 (void) ___lwp_mutex_register(mp); 1674 rlp = lmalloc(sizeof (*rlp)); 1675 rlp->robust_lock = mp; 1676 _membar_producer(); 1677 *rlpp = rlp; 1678 1679 lmutex_unlock(&udp->tdb_hash_lock); 1680 } 1681 1682 /* 1683 * This is called in the child of fork()/forkall() to start over 1684 * with a clean slate. (Each process must register its own locks.) 1685 * No locks are needed because all other threads are suspended or gone. 1686 */ 1687 void 1688 unregister_locks(void) 1689 { 1690 uberdata_t *udp = curthread->ul_uberdata; 1691 uint_t hash; 1692 robust_t **table; 1693 robust_t *rlp; 1694 robust_t *next; 1695 1696 if ((table = udp->robustlocks) != NULL) { 1697 for (hash = 0; hash < LOCKHASHSZ; hash++) { 1698 rlp = table[hash]; 1699 while (rlp != NULL) { 1700 next = rlp->robust_next; 1701 lfree(rlp, sizeof (*rlp)); 1702 rlp = next; 1703 } 1704 } 1705 lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 1706 udp->robustlocks = NULL; 1707 } 1708 } 1709 1710 /* 1711 * Returns with mutex_owner set correctly. 1712 */ 1713 static int 1714 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 1715 { 1716 ulwp_t *self = curthread; 1717 uberdata_t *udp = self->ul_uberdata; 1718 int mtype = mp->mutex_type; 1719 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 1720 int error = 0; 1721 uint8_t ceil; 1722 int myprio; 1723 1724 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1725 1726 if (!self->ul_schedctl_called) 1727 (void) setup_schedctl(); 1728 1729 if (msp && try == MUTEX_TRY) 1730 tdb_incr(msp->mutex_try); 1731 1732 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 1733 return (mutex_recursion(mp, mtype, try)); 1734 1735 if (self->ul_error_detection && try == MUTEX_LOCK && 1736 tsp == NULL && mutex_is_held(mp)) 1737 lock_error(mp, "mutex_lock", NULL, NULL); 1738 1739 if (mtype & LOCK_PRIO_PROTECT) { 1740 ceil = mp->mutex_ceiling; 1741 ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); 1742 myprio = real_priority(self); 1743 if (myprio > ceil) { 1744 DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 1745 return (EINVAL); 1746 } 1747 if ((error = _ceil_mylist_add(mp)) != 0) { 1748 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1749 return (error); 1750 } 1751 if (myprio < ceil) 1752 _ceil_prio_inherit(ceil); 1753 } 1754 1755 if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 1756 == (USYNC_PROCESS | LOCK_ROBUST)) 1757 register_lock(mp); 1758 1759 if (mtype & LOCK_PRIO_INHERIT) { 1760 /* go straight to the kernel */ 1761 if (try == MUTEX_TRY) 1762 error = mutex_trylock_kernel(mp); 1763 else /* MUTEX_LOCK */ 1764 error = mutex_lock_kernel(mp, tsp, msp); 1765 /* 1766 * The kernel never sets or clears the lock byte 1767 * for LOCK_PRIO_INHERIT mutexes. 1768 * Set it here for consistency. 1769 */ 1770 switch (error) { 1771 case 0: 1772 mp->mutex_lockw = LOCKSET; 1773 break; 1774 case EOWNERDEAD: 1775 case ELOCKUNMAPPED: 1776 mp->mutex_lockw = LOCKSET; 1777 /* FALLTHROUGH */ 1778 case ENOTRECOVERABLE: 1779 ASSERT(mtype & LOCK_ROBUST); 1780 break; 1781 case EDEADLK: 1782 if (try == MUTEX_LOCK) 1783 stall(); 1784 error = EBUSY; 1785 break; 1786 } 1787 } else if (mtype & USYNC_PROCESS) { 1788 error = mutex_trylock_process(mp, try == MUTEX_LOCK); 1789 if (error == EBUSY && try == MUTEX_LOCK) 1790 error = mutex_lock_kernel(mp, tsp, msp); 1791 } else { /* USYNC_THREAD */ 1792 error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 1793 if (error == EBUSY && try == MUTEX_LOCK) 1794 error = mutex_lock_queue(self, msp, mp, tsp); 1795 } 1796 1797 switch (error) { 1798 case 0: 1799 case EOWNERDEAD: 1800 case ELOCKUNMAPPED: 1801 if (mtype & LOCK_ROBUST) 1802 remember_lock(mp); 1803 if (msp) 1804 record_begin_hold(msp); 1805 break; 1806 default: 1807 if (mtype & LOCK_PRIO_PROTECT) { 1808 (void) _ceil_mylist_del(mp); 1809 if (myprio < ceil) 1810 _ceil_prio_waive(); 1811 } 1812 if (try == MUTEX_TRY) { 1813 if (msp) 1814 tdb_incr(msp->mutex_try_fail); 1815 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 1816 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 1817 tdb_event(TD_LOCK_TRY, udp); 1818 } 1819 } 1820 break; 1821 } 1822 1823 return (error); 1824 } 1825 1826 int 1827 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 1828 { 1829 ulwp_t *self = curthread; 1830 uberdata_t *udp = self->ul_uberdata; 1831 1832 /* 1833 * We know that USYNC_PROCESS is set in mtype and that 1834 * zero, one, or both of the flags LOCK_RECURSIVE and 1835 * LOCK_ERRORCHECK are set, and that no other flags are set. 1836 */ 1837 ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 1838 enter_critical(self); 1839 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1840 mp->mutex_owner = (uintptr_t)self; 1841 mp->mutex_ownerpid = udp->pid; 1842 exit_critical(self); 1843 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1844 return (0); 1845 } 1846 exit_critical(self); 1847 1848 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 1849 return (mutex_recursion(mp, mtype, try)); 1850 1851 if (try == MUTEX_LOCK) { 1852 if (mutex_trylock_process(mp, 1) == 0) 1853 return (0); 1854 return (mutex_lock_kernel(mp, tsp, NULL)); 1855 } 1856 1857 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 1858 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 1859 tdb_event(TD_LOCK_TRY, udp); 1860 } 1861 return (EBUSY); 1862 } 1863 1864 static int 1865 mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 1866 { 1867 ulwp_t *self = curthread; 1868 uberdata_t *udp = self->ul_uberdata; 1869 uberflags_t *gflags; 1870 int mtype; 1871 1872 /* 1873 * Optimize the case of USYNC_THREAD, including 1874 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 1875 * no error detection, no lock statistics, 1876 * and the process has only a single thread. 1877 * (Most likely a traditional single-threaded application.) 1878 */ 1879 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 1880 udp->uberflags.uf_all) == 0) { 1881 /* 1882 * Only one thread exists so we don't need an atomic operation. 1883 */ 1884 if (mp->mutex_lockw == 0) { 1885 mp->mutex_lockw = LOCKSET; 1886 mp->mutex_owner = (uintptr_t)self; 1887 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1888 return (0); 1889 } 1890 if (mtype && MUTEX_OWNER(mp) == self) 1891 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 1892 /* 1893 * We have reached a deadlock, probably because the 1894 * process is executing non-async-signal-safe code in 1895 * a signal handler and is attempting to acquire a lock 1896 * that it already owns. This is not surprising, given 1897 * bad programming practices over the years that has 1898 * resulted in applications calling printf() and such 1899 * in their signal handlers. Unless the user has told 1900 * us that the signal handlers are safe by setting: 1901 * export _THREAD_ASYNC_SAFE=1 1902 * we return EDEADLK rather than actually deadlocking. 1903 */ 1904 if (tsp == NULL && 1905 MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 1906 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1907 return (EDEADLK); 1908 } 1909 } 1910 1911 /* 1912 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 1913 * no error detection, and no lock statistics. 1914 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 1915 */ 1916 if ((gflags = self->ul_schedctl_called) != NULL && 1917 (gflags->uf_trs_ted | 1918 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 1919 if (mtype & USYNC_PROCESS) 1920 return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 1921 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1922 mp->mutex_owner = (uintptr_t)self; 1923 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1924 return (0); 1925 } 1926 if (mtype && MUTEX_OWNER(mp) == self) 1927 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 1928 if (mutex_trylock_adaptive(mp, 1) != 0) 1929 return (mutex_lock_queue(self, NULL, mp, tsp)); 1930 return (0); 1931 } 1932 1933 /* else do it the long way */ 1934 return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 1935 } 1936 1937 #pragma weak _private_mutex_lock = __mutex_lock 1938 #pragma weak mutex_lock = __mutex_lock 1939 #pragma weak _mutex_lock = __mutex_lock 1940 #pragma weak pthread_mutex_lock = __mutex_lock 1941 #pragma weak _pthread_mutex_lock = __mutex_lock 1942 int 1943 __mutex_lock(mutex_t *mp) 1944 { 1945 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 1946 return (mutex_lock_impl(mp, NULL)); 1947 } 1948 1949 #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 1950 int 1951 _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 1952 { 1953 timespec_t tslocal; 1954 int error; 1955 1956 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 1957 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 1958 error = mutex_lock_impl(mp, &tslocal); 1959 if (error == ETIME) 1960 error = ETIMEDOUT; 1961 return (error); 1962 } 1963 1964 #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 1965 int 1966 _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 1967 { 1968 timespec_t tslocal; 1969 int error; 1970 1971 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 1972 tslocal = *reltime; 1973 error = mutex_lock_impl(mp, &tslocal); 1974 if (error == ETIME) 1975 error = ETIMEDOUT; 1976 return (error); 1977 } 1978 1979 #pragma weak _private_mutex_trylock = __mutex_trylock 1980 #pragma weak mutex_trylock = __mutex_trylock 1981 #pragma weak _mutex_trylock = __mutex_trylock 1982 #pragma weak pthread_mutex_trylock = __mutex_trylock 1983 #pragma weak _pthread_mutex_trylock = __mutex_trylock 1984 int 1985 __mutex_trylock(mutex_t *mp) 1986 { 1987 ulwp_t *self = curthread; 1988 uberdata_t *udp = self->ul_uberdata; 1989 uberflags_t *gflags; 1990 int mtype; 1991 1992 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 1993 /* 1994 * Optimize the case of USYNC_THREAD, including 1995 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 1996 * no error detection, no lock statistics, 1997 * and the process has only a single thread. 1998 * (Most likely a traditional single-threaded application.) 1999 */ 2000 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2001 udp->uberflags.uf_all) == 0) { 2002 /* 2003 * Only one thread exists so we don't need an atomic operation. 2004 */ 2005 if (mp->mutex_lockw == 0) { 2006 mp->mutex_lockw = LOCKSET; 2007 mp->mutex_owner = (uintptr_t)self; 2008 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2009 return (0); 2010 } 2011 if (mtype && MUTEX_OWNER(mp) == self) 2012 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2013 return (EBUSY); 2014 } 2015 2016 /* 2017 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2018 * no error detection, and no lock statistics. 2019 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2020 */ 2021 if ((gflags = self->ul_schedctl_called) != NULL && 2022 (gflags->uf_trs_ted | 2023 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2024 if (mtype & USYNC_PROCESS) 2025 return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 2026 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2027 mp->mutex_owner = (uintptr_t)self; 2028 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2029 return (0); 2030 } 2031 if (mtype && MUTEX_OWNER(mp) == self) 2032 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2033 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2034 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2035 tdb_event(TD_LOCK_TRY, udp); 2036 } 2037 return (EBUSY); 2038 } 2039 2040 /* else do it the long way */ 2041 return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 2042 } 2043 2044 int 2045 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 2046 { 2047 ulwp_t *self = curthread; 2048 uberdata_t *udp = self->ul_uberdata; 2049 int mtype = mp->mutex_type; 2050 tdb_mutex_stats_t *msp; 2051 int error = 0; 2052 int release_all; 2053 lwpid_t lwpid; 2054 2055 if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 2056 return (EPERM); 2057 2058 if (self->ul_error_detection && !mutex_is_held(mp)) 2059 lock_error(mp, "mutex_unlock", NULL, NULL); 2060 2061 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2062 mp->mutex_rcount--; 2063 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2064 return (0); 2065 } 2066 2067 if ((msp = MUTEX_STATS(mp, udp)) != NULL) 2068 (void) record_hold_time(msp); 2069 2070 if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2071 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2072 ASSERT(mp->mutex_type & LOCK_ROBUST); 2073 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2074 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2075 } 2076 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2077 2078 if (mtype & LOCK_PRIO_INHERIT) { 2079 no_preempt(self); 2080 mp->mutex_owner = 0; 2081 mp->mutex_ownerpid = 0; 2082 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2083 mp->mutex_lockw = LOCKCLEAR; 2084 error = ___lwp_mutex_unlock(mp); 2085 preempt(self); 2086 } else if (mtype & USYNC_PROCESS) { 2087 if (mp->mutex_lockword & WAITERMASK) { 2088 mutex_unlock_process(mp, release_all); 2089 } else { 2090 mp->mutex_owner = 0; 2091 mp->mutex_ownerpid = 0; 2092 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2093 if (atomic_swap_32(&mp->mutex_lockword, 0) & 2094 WAITERMASK) { /* a waiter suddenly appeared */ 2095 no_preempt(self); 2096 (void) ___lwp_mutex_wakeup(mp, release_all); 2097 preempt(self); 2098 } 2099 } 2100 } else { /* USYNC_THREAD */ 2101 if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 2102 (void) __lwp_unpark(lwpid); 2103 preempt(self); 2104 } 2105 } 2106 2107 if (mtype & LOCK_ROBUST) 2108 forget_lock(mp); 2109 2110 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2111 _ceil_prio_waive(); 2112 2113 return (error); 2114 } 2115 2116 #pragma weak _private_mutex_unlock = __mutex_unlock 2117 #pragma weak mutex_unlock = __mutex_unlock 2118 #pragma weak _mutex_unlock = __mutex_unlock 2119 #pragma weak pthread_mutex_unlock = __mutex_unlock 2120 #pragma weak _pthread_mutex_unlock = __mutex_unlock 2121 int 2122 __mutex_unlock(mutex_t *mp) 2123 { 2124 ulwp_t *self = curthread; 2125 uberdata_t *udp = self->ul_uberdata; 2126 uberflags_t *gflags; 2127 lwpid_t lwpid; 2128 int mtype; 2129 short el; 2130 2131 /* 2132 * Optimize the case of USYNC_THREAD, including 2133 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2134 * no error detection, no lock statistics, 2135 * and the process has only a single thread. 2136 * (Most likely a traditional single-threaded application.) 2137 */ 2138 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2139 udp->uberflags.uf_all) == 0) { 2140 if (mtype) { 2141 /* 2142 * At this point we know that one or both of the 2143 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2144 */ 2145 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2146 return (EPERM); 2147 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2148 mp->mutex_rcount--; 2149 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2150 return (0); 2151 } 2152 } 2153 /* 2154 * Only one thread exists so we don't need an atomic operation. 2155 * Also, there can be no waiters. 2156 */ 2157 mp->mutex_owner = 0; 2158 mp->mutex_lockword = 0; 2159 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2160 return (0); 2161 } 2162 2163 /* 2164 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2165 * no error detection, and no lock statistics. 2166 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2167 */ 2168 if ((gflags = self->ul_schedctl_called) != NULL) { 2169 if (((el = gflags->uf_trs_ted) | mtype) == 0) { 2170 fast_unlock: 2171 if (!(mp->mutex_lockword & WAITERMASK)) { 2172 /* no waiter exists right now */ 2173 mp->mutex_owner = 0; 2174 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2175 if (atomic_swap_32(&mp->mutex_lockword, 0) & 2176 WAITERMASK) { 2177 /* a waiter suddenly appeared */ 2178 no_preempt(self); 2179 if ((lwpid = mutex_wakeup(mp)) != 0) 2180 (void) __lwp_unpark(lwpid); 2181 preempt(self); 2182 } 2183 } else if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2184 (void) __lwp_unpark(lwpid); 2185 preempt(self); 2186 } 2187 return (0); 2188 } 2189 if (el) /* error detection or lock statistics */ 2190 goto slow_unlock; 2191 if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2192 /* 2193 * At this point we know that one or both of the 2194 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2195 */ 2196 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2197 return (EPERM); 2198 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2199 mp->mutex_rcount--; 2200 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2201 return (0); 2202 } 2203 goto fast_unlock; 2204 } 2205 if ((mtype & 2206 ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2207 /* 2208 * At this point we know that zero, one, or both of the 2209 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 2210 * that the USYNC_PROCESS flag is set. 2211 */ 2212 if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 2213 return (EPERM); 2214 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2215 mp->mutex_rcount--; 2216 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2217 return (0); 2218 } 2219 if (mp->mutex_lockword & WAITERMASK) { 2220 mutex_unlock_process(mp, 0); 2221 } else { 2222 mp->mutex_owner = 0; 2223 mp->mutex_ownerpid = 0; 2224 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2225 if (atomic_swap_32(&mp->mutex_lockword, 0) & 2226 WAITERMASK) { 2227 no_preempt(self); 2228 (void) ___lwp_mutex_wakeup(mp, 0); 2229 preempt(self); 2230 } 2231 } 2232 return (0); 2233 } 2234 } 2235 2236 /* else do it the long way */ 2237 slow_unlock: 2238 return (mutex_unlock_internal(mp, 0)); 2239 } 2240 2241 /* 2242 * Internally to the library, almost all mutex lock/unlock actions 2243 * go through these lmutex_ functions, to protect critical regions. 2244 * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 2245 * to make these functions faster since we know that the mutex type 2246 * of all internal locks is USYNC_THREAD. We also know that internal 2247 * locking can never fail, so we panic if it does. 2248 */ 2249 void 2250 lmutex_lock(mutex_t *mp) 2251 { 2252 ulwp_t *self = curthread; 2253 uberdata_t *udp = self->ul_uberdata; 2254 2255 ASSERT(mp->mutex_type == USYNC_THREAD); 2256 2257 enter_critical(self); 2258 /* 2259 * Optimize the case of no lock statistics and only a single thread. 2260 * (Most likely a traditional single-threaded application.) 2261 */ 2262 if (udp->uberflags.uf_all == 0) { 2263 /* 2264 * Only one thread exists; the mutex must be free. 2265 */ 2266 ASSERT(mp->mutex_lockw == 0); 2267 mp->mutex_lockw = LOCKSET; 2268 mp->mutex_owner = (uintptr_t)self; 2269 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2270 } else { 2271 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2272 2273 if (!self->ul_schedctl_called) 2274 (void) setup_schedctl(); 2275 2276 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2277 mp->mutex_owner = (uintptr_t)self; 2278 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2279 } else if (mutex_trylock_adaptive(mp, 1) != 0) { 2280 (void) mutex_lock_queue(self, msp, mp, NULL); 2281 } 2282 2283 if (msp) 2284 record_begin_hold(msp); 2285 } 2286 } 2287 2288 void 2289 lmutex_unlock(mutex_t *mp) 2290 { 2291 ulwp_t *self = curthread; 2292 uberdata_t *udp = self->ul_uberdata; 2293 2294 ASSERT(mp->mutex_type == USYNC_THREAD); 2295 2296 /* 2297 * Optimize the case of no lock statistics and only a single thread. 2298 * (Most likely a traditional single-threaded application.) 2299 */ 2300 if (udp->uberflags.uf_all == 0) { 2301 /* 2302 * Only one thread exists so there can be no waiters. 2303 */ 2304 mp->mutex_owner = 0; 2305 mp->mutex_lockword = 0; 2306 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2307 } else { 2308 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2309 lwpid_t lwpid; 2310 2311 if (msp) 2312 (void) record_hold_time(msp); 2313 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2314 (void) __lwp_unpark(lwpid); 2315 preempt(self); 2316 } 2317 } 2318 exit_critical(self); 2319 } 2320 2321 /* 2322 * For specialized code in libc, like the asynchronous i/o code, 2323 * the following sig_*() locking primitives are used in order 2324 * to make the code asynchronous signal safe. Signals are 2325 * deferred while locks acquired by these functions are held. 2326 */ 2327 void 2328 sig_mutex_lock(mutex_t *mp) 2329 { 2330 sigoff(curthread); 2331 (void) _private_mutex_lock(mp); 2332 } 2333 2334 void 2335 sig_mutex_unlock(mutex_t *mp) 2336 { 2337 (void) _private_mutex_unlock(mp); 2338 sigon(curthread); 2339 } 2340 2341 int 2342 sig_mutex_trylock(mutex_t *mp) 2343 { 2344 int error; 2345 2346 sigoff(curthread); 2347 if ((error = _private_mutex_trylock(mp)) != 0) 2348 sigon(curthread); 2349 return (error); 2350 } 2351 2352 /* 2353 * sig_cond_wait() is a cancellation point. 2354 */ 2355 int 2356 sig_cond_wait(cond_t *cv, mutex_t *mp) 2357 { 2358 int error; 2359 2360 ASSERT(curthread->ul_sigdefer != 0); 2361 _private_testcancel(); 2362 error = _cond_wait(cv, mp); 2363 if (error == EINTR && curthread->ul_cursig) { 2364 sig_mutex_unlock(mp); 2365 /* take the deferred signal here */ 2366 sig_mutex_lock(mp); 2367 } 2368 _private_testcancel(); 2369 return (error); 2370 } 2371 2372 /* 2373 * sig_cond_reltimedwait() is a cancellation point. 2374 */ 2375 int 2376 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2377 { 2378 int error; 2379 2380 ASSERT(curthread->ul_sigdefer != 0); 2381 _private_testcancel(); 2382 error = _cond_reltimedwait(cv, mp, ts); 2383 if (error == EINTR && curthread->ul_cursig) { 2384 sig_mutex_unlock(mp); 2385 /* take the deferred signal here */ 2386 sig_mutex_lock(mp); 2387 } 2388 _private_testcancel(); 2389 return (error); 2390 } 2391 2392 static int 2393 shared_mutex_held(mutex_t *mparg) 2394 { 2395 /* 2396 * The 'volatile' is necessary to make sure the compiler doesn't 2397 * reorder the tests of the various components of the mutex. 2398 * They must be tested in this order: 2399 * mutex_lockw 2400 * mutex_owner 2401 * mutex_ownerpid 2402 * This relies on the fact that everywhere mutex_lockw is cleared, 2403 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2404 * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2405 * and mutex_ownerpid are set after mutex_lockw is set, and that 2406 * mutex_lockw is set or cleared with a memory barrier. 2407 */ 2408 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2409 ulwp_t *self = curthread; 2410 uberdata_t *udp = self->ul_uberdata; 2411 2412 return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 2413 } 2414 2415 /* 2416 * Some crufty old programs define their own version of _mutex_held() 2417 * to be simply return(1). This breaks internal libc logic, so we 2418 * define a private version for exclusive use by libc, mutex_is_held(), 2419 * and also a new public function, __mutex_held(), to be used in new 2420 * code to circumvent these crufty old programs. 2421 */ 2422 #pragma weak mutex_held = mutex_is_held 2423 #pragma weak _mutex_held = mutex_is_held 2424 #pragma weak __mutex_held = mutex_is_held 2425 int 2426 mutex_is_held(mutex_t *mparg) 2427 { 2428 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2429 2430 if (mparg->mutex_type & USYNC_PROCESS) 2431 return (shared_mutex_held(mparg)); 2432 return (MUTEX_OWNED(mp, curthread)); 2433 } 2434 2435 #pragma weak _private_mutex_destroy = __mutex_destroy 2436 #pragma weak mutex_destroy = __mutex_destroy 2437 #pragma weak _mutex_destroy = __mutex_destroy 2438 #pragma weak pthread_mutex_destroy = __mutex_destroy 2439 #pragma weak _pthread_mutex_destroy = __mutex_destroy 2440 int 2441 __mutex_destroy(mutex_t *mp) 2442 { 2443 if (mp->mutex_type & USYNC_PROCESS) 2444 forget_lock(mp); 2445 (void) _memset(mp, 0, sizeof (*mp)); 2446 tdb_sync_obj_deregister(mp); 2447 return (0); 2448 } 2449 2450 #pragma weak mutex_consistent = __mutex_consistent 2451 #pragma weak _mutex_consistent = __mutex_consistent 2452 #pragma weak pthread_mutex_consistent_np = __mutex_consistent 2453 #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 2454 int 2455 __mutex_consistent(mutex_t *mp) 2456 { 2457 /* 2458 * Do this only for an inconsistent, initialized robust lock 2459 * that we hold. For all other cases, return EINVAL. 2460 */ 2461 if (mutex_is_held(mp) && 2462 (mp->mutex_type & LOCK_ROBUST) && 2463 (mp->mutex_flag & LOCK_INITED) && 2464 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2465 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2466 mp->mutex_rcount = 0; 2467 return (0); 2468 } 2469 return (EINVAL); 2470 } 2471 2472 /* 2473 * Spin locks are separate from ordinary mutexes, 2474 * but we use the same data structure for them. 2475 */ 2476 2477 #pragma weak pthread_spin_init = _pthread_spin_init 2478 int 2479 _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 2480 { 2481 mutex_t *mp = (mutex_t *)lock; 2482 2483 (void) _memset(mp, 0, sizeof (*mp)); 2484 if (pshared == PTHREAD_PROCESS_SHARED) 2485 mp->mutex_type = USYNC_PROCESS; 2486 else 2487 mp->mutex_type = USYNC_THREAD; 2488 mp->mutex_flag = LOCK_INITED; 2489 mp->mutex_magic = MUTEX_MAGIC; 2490 return (0); 2491 } 2492 2493 #pragma weak pthread_spin_destroy = _pthread_spin_destroy 2494 int 2495 _pthread_spin_destroy(pthread_spinlock_t *lock) 2496 { 2497 (void) _memset(lock, 0, sizeof (*lock)); 2498 return (0); 2499 } 2500 2501 #pragma weak pthread_spin_trylock = _pthread_spin_trylock 2502 int 2503 _pthread_spin_trylock(pthread_spinlock_t *lock) 2504 { 2505 mutex_t *mp = (mutex_t *)lock; 2506 ulwp_t *self = curthread; 2507 int error = 0; 2508 2509 no_preempt(self); 2510 if (set_lock_byte(&mp->mutex_lockw) != 0) 2511 error = EBUSY; 2512 else { 2513 mp->mutex_owner = (uintptr_t)self; 2514 if (mp->mutex_type == USYNC_PROCESS) 2515 mp->mutex_ownerpid = self->ul_uberdata->pid; 2516 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2517 } 2518 preempt(self); 2519 return (error); 2520 } 2521 2522 #pragma weak pthread_spin_lock = _pthread_spin_lock 2523 int 2524 _pthread_spin_lock(pthread_spinlock_t *lock) 2525 { 2526 mutex_t *mp = (mutex_t *)lock; 2527 ulwp_t *self = curthread; 2528 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2529 int count = 0; 2530 2531 ASSERT(!self->ul_critical || self->ul_bindflags); 2532 2533 DTRACE_PROBE1(plockstat, mutex__spin, mp); 2534 2535 /* 2536 * We don't care whether the owner is running on a processor. 2537 * We just spin because that's what this interface requires. 2538 */ 2539 for (;;) { 2540 if (count < INT_MAX) 2541 count++; 2542 if (*lockp == 0) { /* lock byte appears to be clear */ 2543 no_preempt(self); 2544 if (set_lock_byte(lockp) == 0) 2545 break; 2546 preempt(self); 2547 } 2548 SMT_PAUSE(); 2549 } 2550 mp->mutex_owner = (uintptr_t)self; 2551 if (mp->mutex_type == USYNC_PROCESS) 2552 mp->mutex_ownerpid = self->ul_uberdata->pid; 2553 preempt(self); 2554 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 2555 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 2556 return (0); 2557 } 2558 2559 #pragma weak pthread_spin_unlock = _pthread_spin_unlock 2560 int 2561 _pthread_spin_unlock(pthread_spinlock_t *lock) 2562 { 2563 mutex_t *mp = (mutex_t *)lock; 2564 ulwp_t *self = curthread; 2565 2566 no_preempt(self); 2567 mp->mutex_owner = 0; 2568 mp->mutex_ownerpid = 0; 2569 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2570 (void) atomic_swap_32(&mp->mutex_lockword, 0); 2571 preempt(self); 2572 return (0); 2573 } 2574 2575 #define INITIAL_LOCKS 8 /* initialial size of ul_heldlocks.array */ 2576 2577 /* 2578 * Find/allocate an entry for 'lock' in our array of held locks. 2579 */ 2580 static mutex_t ** 2581 find_lock_entry(mutex_t *lock) 2582 { 2583 ulwp_t *self = curthread; 2584 mutex_t **remembered = NULL; 2585 mutex_t **lockptr; 2586 uint_t nlocks; 2587 2588 if ((nlocks = self->ul_heldlockcnt) != 0) 2589 lockptr = self->ul_heldlocks.array; 2590 else { 2591 nlocks = 1; 2592 lockptr = &self->ul_heldlocks.single; 2593 } 2594 2595 for (; nlocks; nlocks--, lockptr++) { 2596 if (*lockptr == lock) 2597 return (lockptr); 2598 if (*lockptr == NULL && remembered == NULL) 2599 remembered = lockptr; 2600 } 2601 if (remembered != NULL) { 2602 *remembered = lock; 2603 return (remembered); 2604 } 2605 2606 /* 2607 * No entry available. Allocate more space, converting 2608 * the single entry into an array of entries if necessary. 2609 */ 2610 if ((nlocks = self->ul_heldlockcnt) == 0) { 2611 /* 2612 * Initial allocation of the array. 2613 * Convert the single entry into an array. 2614 */ 2615 self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 2616 lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 2617 /* 2618 * The single entry becomes the first entry in the array. 2619 */ 2620 *lockptr = self->ul_heldlocks.single; 2621 self->ul_heldlocks.array = lockptr; 2622 /* 2623 * Return the next available entry in the array. 2624 */ 2625 *++lockptr = lock; 2626 return (lockptr); 2627 } 2628 /* 2629 * Reallocate the array, double the size each time. 2630 */ 2631 lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 2632 (void) _memcpy(lockptr, self->ul_heldlocks.array, 2633 nlocks * sizeof (mutex_t *)); 2634 lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2635 self->ul_heldlocks.array = lockptr; 2636 self->ul_heldlockcnt *= 2; 2637 /* 2638 * Return the next available entry in the newly allocated array. 2639 */ 2640 *(lockptr += nlocks) = lock; 2641 return (lockptr); 2642 } 2643 2644 /* 2645 * Insert 'lock' into our list of held locks. 2646 * Currently only used for LOCK_ROBUST mutexes. 2647 */ 2648 void 2649 remember_lock(mutex_t *lock) 2650 { 2651 (void) find_lock_entry(lock); 2652 } 2653 2654 /* 2655 * Remove 'lock' from our list of held locks. 2656 * Currently only used for LOCK_ROBUST mutexes. 2657 */ 2658 void 2659 forget_lock(mutex_t *lock) 2660 { 2661 *find_lock_entry(lock) = NULL; 2662 } 2663 2664 /* 2665 * Free the array of held locks. 2666 */ 2667 void 2668 heldlock_free(ulwp_t *ulwp) 2669 { 2670 uint_t nlocks; 2671 2672 if ((nlocks = ulwp->ul_heldlockcnt) != 0) 2673 lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2674 ulwp->ul_heldlockcnt = 0; 2675 ulwp->ul_heldlocks.array = NULL; 2676 } 2677 2678 /* 2679 * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 2680 * Called from _thrp_exit() to deal with abandoned locks. 2681 */ 2682 void 2683 heldlock_exit(void) 2684 { 2685 ulwp_t *self = curthread; 2686 mutex_t **lockptr; 2687 uint_t nlocks; 2688 mutex_t *mp; 2689 2690 if ((nlocks = self->ul_heldlockcnt) != 0) 2691 lockptr = self->ul_heldlocks.array; 2692 else { 2693 nlocks = 1; 2694 lockptr = &self->ul_heldlocks.single; 2695 } 2696 2697 for (; nlocks; nlocks--, lockptr++) { 2698 /* 2699 * The kernel takes care of transitioning held 2700 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 2701 * We avoid that case here. 2702 */ 2703 if ((mp = *lockptr) != NULL && 2704 mutex_is_held(mp) && 2705 (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 2706 LOCK_ROBUST) { 2707 mp->mutex_rcount = 0; 2708 if (!(mp->mutex_flag & LOCK_UNMAPPED)) 2709 mp->mutex_flag |= LOCK_OWNERDEAD; 2710 (void) mutex_unlock_internal(mp, 1); 2711 } 2712 } 2713 2714 heldlock_free(self); 2715 } 2716 2717 #pragma weak cond_init = _cond_init 2718 /* ARGSUSED2 */ 2719 int 2720 _cond_init(cond_t *cvp, int type, void *arg) 2721 { 2722 if (type != USYNC_THREAD && type != USYNC_PROCESS) 2723 return (EINVAL); 2724 (void) _memset(cvp, 0, sizeof (*cvp)); 2725 cvp->cond_type = (uint16_t)type; 2726 cvp->cond_magic = COND_MAGIC; 2727 return (0); 2728 } 2729 2730 /* 2731 * cond_sleep_queue(): utility function for cond_wait_queue(). 2732 * 2733 * Go to sleep on a condvar sleep queue, expect to be waked up 2734 * by someone calling cond_signal() or cond_broadcast() or due 2735 * to receiving a UNIX signal or being cancelled, or just simply 2736 * due to a spurious wakeup (like someome calling forkall()). 2737 * 2738 * The associated mutex is *not* reacquired before returning. 2739 * That must be done by the caller of cond_sleep_queue(). 2740 */ 2741 static int 2742 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2743 { 2744 ulwp_t *self = curthread; 2745 queue_head_t *qp; 2746 queue_head_t *mqp; 2747 lwpid_t lwpid; 2748 int signalled; 2749 int error; 2750 int release_all; 2751 2752 /* 2753 * Put ourself on the CV sleep queue, unlock the mutex, then 2754 * park ourself and unpark a candidate lwp to grab the mutex. 2755 * We must go onto the CV sleep queue before dropping the 2756 * mutex in order to guarantee atomicity of the operation. 2757 */ 2758 self->ul_sp = stkptr(); 2759 qp = queue_lock(cvp, CV); 2760 enqueue(qp, self, cvp, CV); 2761 cvp->cond_waiters_user = 1; 2762 self->ul_cvmutex = mp; 2763 self->ul_cv_wake = (tsp != NULL); 2764 self->ul_signalled = 0; 2765 if (mp->mutex_flag & LOCK_OWNERDEAD) { 2766 mp->mutex_flag &= ~LOCK_OWNERDEAD; 2767 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2768 } 2769 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2770 lwpid = mutex_unlock_queue(mp, release_all); 2771 for (;;) { 2772 set_parking_flag(self, 1); 2773 queue_unlock(qp); 2774 if (lwpid != 0) { 2775 lwpid = preempt_unpark(self, lwpid); 2776 preempt(self); 2777 } 2778 /* 2779 * We may have a deferred signal present, 2780 * in which case we should return EINTR. 2781 * Also, we may have received a SIGCANCEL; if so 2782 * and we are cancelable we should return EINTR. 2783 * We force an immediate EINTR return from 2784 * __lwp_park() by turning our parking flag off. 2785 */ 2786 if (self->ul_cursig != 0 || 2787 (self->ul_cancelable && self->ul_cancel_pending)) 2788 set_parking_flag(self, 0); 2789 /* 2790 * __lwp_park() will return the residual time in tsp 2791 * if we are unparked before the timeout expires. 2792 */ 2793 error = __lwp_park(tsp, lwpid); 2794 set_parking_flag(self, 0); 2795 lwpid = 0; /* unpark the other lwp only once */ 2796 /* 2797 * We were waked up by cond_signal(), cond_broadcast(), 2798 * by an interrupt or timeout (EINTR or ETIME), 2799 * or we may just have gotten a spurious wakeup. 2800 */ 2801 qp = queue_lock(cvp, CV); 2802 mqp = queue_lock(mp, MX); 2803 if (self->ul_sleepq == NULL) 2804 break; 2805 /* 2806 * We are on either the condvar sleep queue or the 2807 * mutex sleep queue. Break out of the sleep if we 2808 * were interrupted or we timed out (EINTR or ETIME). 2809 * Else this is a spurious wakeup; continue the loop. 2810 */ 2811 if (self->ul_sleepq == mqp) { /* mutex queue */ 2812 if (error) { 2813 mp->mutex_waiters = dequeue_self(mqp, mp); 2814 break; 2815 } 2816 tsp = NULL; /* no more timeout */ 2817 } else if (self->ul_sleepq == qp) { /* condvar queue */ 2818 if (error) { 2819 cvp->cond_waiters_user = dequeue_self(qp, cvp); 2820 break; 2821 } 2822 /* 2823 * Else a spurious wakeup on the condvar queue. 2824 * __lwp_park() has already adjusted the timeout. 2825 */ 2826 } else { 2827 thr_panic("cond_sleep_queue(): thread not on queue"); 2828 } 2829 queue_unlock(mqp); 2830 } 2831 2832 self->ul_sp = 0; 2833 ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0); 2834 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 2835 self->ul_wchan == NULL); 2836 2837 signalled = self->ul_signalled; 2838 self->ul_signalled = 0; 2839 queue_unlock(qp); 2840 queue_unlock(mqp); 2841 2842 /* 2843 * If we were concurrently cond_signal()d and any of: 2844 * received a UNIX signal, were cancelled, or got a timeout, 2845 * then perform another cond_signal() to avoid consuming it. 2846 */ 2847 if (error && signalled) 2848 (void) cond_signal_internal(cvp); 2849 2850 return (error); 2851 } 2852 2853 int 2854 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp, 2855 tdb_mutex_stats_t *msp) 2856 { 2857 ulwp_t *self = curthread; 2858 int error; 2859 int merror; 2860 2861 /* 2862 * The old thread library was programmed to defer signals 2863 * while in cond_wait() so that the associated mutex would 2864 * be guaranteed to be held when the application signal 2865 * handler was invoked. 2866 * 2867 * We do not behave this way by default; the state of the 2868 * associated mutex in the signal handler is undefined. 2869 * 2870 * To accommodate applications that depend on the old 2871 * behavior, the _THREAD_COND_WAIT_DEFER environment 2872 * variable can be set to 1 and we will behave in the 2873 * old way with respect to cond_wait(). 2874 */ 2875 if (self->ul_cond_wait_defer) 2876 sigoff(self); 2877 2878 error = cond_sleep_queue(cvp, mp, tsp); 2879 2880 /* 2881 * Reacquire the mutex. 2882 */ 2883 if ((merror = mutex_trylock_adaptive(mp, 1)) == EBUSY) 2884 merror = mutex_lock_queue(self, msp, mp, NULL); 2885 if (merror) 2886 error = merror; 2887 if (msp && (merror == 0 || merror == EOWNERDEAD)) 2888 record_begin_hold(msp); 2889 2890 /* 2891 * Take any deferred signal now, after we have reacquired the mutex. 2892 */ 2893 if (self->ul_cond_wait_defer) 2894 sigon(self); 2895 2896 return (error); 2897 } 2898 2899 /* 2900 * cond_sleep_kernel(): utility function for cond_wait_kernel(). 2901 * See the comment ahead of cond_sleep_queue(), above. 2902 */ 2903 static int 2904 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2905 { 2906 int mtype = mp->mutex_type; 2907 ulwp_t *self = curthread; 2908 int error; 2909 2910 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2911 _ceil_prio_waive(); 2912 2913 self->ul_sp = stkptr(); 2914 self->ul_wchan = cvp; 2915 mp->mutex_owner = 0; 2916 mp->mutex_ownerpid = 0; 2917 if (mtype & LOCK_PRIO_INHERIT) 2918 mp->mutex_lockw = LOCKCLEAR; 2919 /* 2920 * ___lwp_cond_wait() returns immediately with EINTR if 2921 * set_parking_flag(self,0) is called on this lwp before it 2922 * goes to sleep in the kernel. sigacthandler() calls this 2923 * when a deferred signal is noted. This assures that we don't 2924 * get stuck in ___lwp_cond_wait() with all signals blocked 2925 * due to taking a deferred signal before going to sleep. 2926 */ 2927 set_parking_flag(self, 1); 2928 if (self->ul_cursig != 0 || 2929 (self->ul_cancelable && self->ul_cancel_pending)) 2930 set_parking_flag(self, 0); 2931 error = ___lwp_cond_wait(cvp, mp, tsp, 1); 2932 set_parking_flag(self, 0); 2933 self->ul_sp = 0; 2934 self->ul_wchan = NULL; 2935 return (error); 2936 } 2937 2938 int 2939 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2940 { 2941 ulwp_t *self = curthread; 2942 int error; 2943 int merror; 2944 2945 /* 2946 * See the large comment in cond_wait_queue(), above. 2947 */ 2948 if (self->ul_cond_wait_defer) 2949 sigoff(self); 2950 2951 error = cond_sleep_kernel(cvp, mp, tsp); 2952 2953 /* 2954 * Override the return code from ___lwp_cond_wait() 2955 * with any non-zero return code from mutex_lock(). 2956 * This addresses robust lock failures in particular; 2957 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 2958 * errors in order to take corrective action. 2959 */ 2960 if ((merror = _private_mutex_lock(mp)) != 0) 2961 error = merror; 2962 2963 /* 2964 * Take any deferred signal now, after we have reacquired the mutex. 2965 */ 2966 if (self->ul_cond_wait_defer) 2967 sigon(self); 2968 2969 return (error); 2970 } 2971 2972 /* 2973 * Common code for _cond_wait() and _cond_timedwait() 2974 */ 2975 int 2976 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2977 { 2978 int mtype = mp->mutex_type; 2979 hrtime_t begin_sleep = 0; 2980 ulwp_t *self = curthread; 2981 uberdata_t *udp = self->ul_uberdata; 2982 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 2983 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2984 uint8_t rcount; 2985 int error = 0; 2986 2987 /* 2988 * The SUSV3 Posix spec for pthread_cond_timedwait() states: 2989 * Except in the case of [ETIMEDOUT], all these error checks 2990 * shall act as if they were performed immediately at the 2991 * beginning of processing for the function and shall cause 2992 * an error return, in effect, prior to modifying the state 2993 * of the mutex specified by mutex or the condition variable 2994 * specified by cond. 2995 * Therefore, we must return EINVAL now if the timout is invalid. 2996 */ 2997 if (tsp != NULL && 2998 (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 2999 return (EINVAL); 3000 3001 if (__td_event_report(self, TD_SLEEP, udp)) { 3002 self->ul_sp = stkptr(); 3003 self->ul_wchan = cvp; 3004 self->ul_td_evbuf.eventnum = TD_SLEEP; 3005 self->ul_td_evbuf.eventdata = cvp; 3006 tdb_event(TD_SLEEP, udp); 3007 self->ul_sp = 0; 3008 } 3009 if (csp) { 3010 if (tsp) 3011 tdb_incr(csp->cond_timedwait); 3012 else 3013 tdb_incr(csp->cond_wait); 3014 } 3015 if (msp) 3016 begin_sleep = record_hold_time(msp); 3017 else if (csp) 3018 begin_sleep = gethrtime(); 3019 3020 if (self->ul_error_detection) { 3021 if (!mutex_is_held(mp)) 3022 lock_error(mp, "cond_wait", cvp, NULL); 3023 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 3024 lock_error(mp, "recursive mutex in cond_wait", 3025 cvp, NULL); 3026 if (cvp->cond_type & USYNC_PROCESS) { 3027 if (!(mtype & USYNC_PROCESS)) 3028 lock_error(mp, "cond_wait", cvp, 3029 "condvar process-shared, " 3030 "mutex process-private"); 3031 } else { 3032 if (mtype & USYNC_PROCESS) 3033 lock_error(mp, "cond_wait", cvp, 3034 "condvar process-private, " 3035 "mutex process-shared"); 3036 } 3037 } 3038 3039 /* 3040 * We deal with recursive mutexes by completely 3041 * dropping the lock and restoring the recursion 3042 * count after waking up. This is arguably wrong, 3043 * but it obeys the principle of least astonishment. 3044 */ 3045 rcount = mp->mutex_rcount; 3046 mp->mutex_rcount = 0; 3047 if ((mtype & 3048 (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 3049 (cvp->cond_type & USYNC_PROCESS)) 3050 error = cond_wait_kernel(cvp, mp, tsp); 3051 else 3052 error = cond_wait_queue(cvp, mp, tsp, msp); 3053 mp->mutex_rcount = rcount; 3054 3055 if (csp) { 3056 hrtime_t lapse = gethrtime() - begin_sleep; 3057 if (tsp == NULL) 3058 csp->cond_wait_sleep_time += lapse; 3059 else { 3060 csp->cond_timedwait_sleep_time += lapse; 3061 if (error == ETIME) 3062 tdb_incr(csp->cond_timedwait_timeout); 3063 } 3064 } 3065 return (error); 3066 } 3067 3068 /* 3069 * cond_wait() is a cancellation point but _cond_wait() is not. 3070 * System libraries call the non-cancellation version. 3071 * It is expected that only applications call the cancellation version. 3072 */ 3073 int 3074 _cond_wait(cond_t *cvp, mutex_t *mp) 3075 { 3076 ulwp_t *self = curthread; 3077 uberdata_t *udp = self->ul_uberdata; 3078 uberflags_t *gflags; 3079 3080 /* 3081 * Optimize the common case of USYNC_THREAD plus 3082 * no error detection, no lock statistics, and no event tracing. 3083 */ 3084 if ((gflags = self->ul_schedctl_called) != NULL && 3085 (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 3086 self->ul_td_events_enable | 3087 udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3088 return (cond_wait_queue(cvp, mp, NULL, NULL)); 3089 3090 /* 3091 * Else do it the long way. 3092 */ 3093 return (cond_wait_common(cvp, mp, NULL)); 3094 } 3095 3096 int 3097 cond_wait(cond_t *cvp, mutex_t *mp) 3098 { 3099 int error; 3100 3101 _cancelon(); 3102 error = _cond_wait(cvp, mp); 3103 if (error == EINTR) 3104 _canceloff(); 3105 else 3106 _canceloff_nocancel(); 3107 return (error); 3108 } 3109 3110 #pragma weak pthread_cond_wait = _pthread_cond_wait 3111 int 3112 _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 3113 { 3114 int error; 3115 3116 error = cond_wait(cvp, mp); 3117 return ((error == EINTR)? 0 : error); 3118 } 3119 3120 /* 3121 * cond_timedwait() is a cancellation point but _cond_timedwait() is not. 3122 * System libraries call the non-cancellation version. 3123 * It is expected that only applications call the cancellation version. 3124 */ 3125 int 3126 _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3127 { 3128 clockid_t clock_id = cvp->cond_clockid; 3129 timespec_t reltime; 3130 int error; 3131 3132 if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 3133 clock_id = CLOCK_REALTIME; 3134 abstime_to_reltime(clock_id, abstime, &reltime); 3135 error = cond_wait_common(cvp, mp, &reltime); 3136 if (error == ETIME && clock_id == CLOCK_HIGHRES) { 3137 /* 3138 * Don't return ETIME if we didn't really get a timeout. 3139 * This can happen if we return because someone resets 3140 * the system clock. Just return zero in this case, 3141 * giving a spurious wakeup but not a timeout. 3142 */ 3143 if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 3144 abstime->tv_nsec > gethrtime()) 3145 error = 0; 3146 } 3147 return (error); 3148 } 3149 3150 int 3151 cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3152 { 3153 int error; 3154 3155 _cancelon(); 3156 error = _cond_timedwait(cvp, mp, abstime); 3157 if (error == EINTR) 3158 _canceloff(); 3159 else 3160 _canceloff_nocancel(); 3161 return (error); 3162 } 3163 3164 #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 3165 int 3166 _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3167 { 3168 int error; 3169 3170 error = cond_timedwait(cvp, mp, abstime); 3171 if (error == ETIME) 3172 error = ETIMEDOUT; 3173 else if (error == EINTR) 3174 error = 0; 3175 return (error); 3176 } 3177 3178 /* 3179 * cond_reltimedwait() is a cancellation point but _cond_reltimedwait() 3180 * is not. System libraries call the non-cancellation version. 3181 * It is expected that only applications call the cancellation version. 3182 */ 3183 int 3184 _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3185 { 3186 timespec_t tslocal = *reltime; 3187 3188 return (cond_wait_common(cvp, mp, &tslocal)); 3189 } 3190 3191 #pragma weak cond_reltimedwait = _cond_reltimedwait_cancel 3192 int 3193 _cond_reltimedwait_cancel(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3194 { 3195 int error; 3196 3197 _cancelon(); 3198 error = _cond_reltimedwait(cvp, mp, reltime); 3199 if (error == EINTR) 3200 _canceloff(); 3201 else 3202 _canceloff_nocancel(); 3203 return (error); 3204 } 3205 3206 #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 3207 int 3208 _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 3209 const timespec_t *reltime) 3210 { 3211 int error; 3212 3213 error = _cond_reltimedwait_cancel(cvp, mp, reltime); 3214 if (error == ETIME) 3215 error = ETIMEDOUT; 3216 else if (error == EINTR) 3217 error = 0; 3218 return (error); 3219 } 3220 3221 #pragma weak pthread_cond_signal = cond_signal_internal 3222 #pragma weak _pthread_cond_signal = cond_signal_internal 3223 #pragma weak cond_signal = cond_signal_internal 3224 #pragma weak _cond_signal = cond_signal_internal 3225 int 3226 cond_signal_internal(cond_t *cvp) 3227 { 3228 ulwp_t *self = curthread; 3229 uberdata_t *udp = self->ul_uberdata; 3230 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3231 int error = 0; 3232 queue_head_t *qp; 3233 mutex_t *mp; 3234 queue_head_t *mqp; 3235 ulwp_t **ulwpp; 3236 ulwp_t *ulwp; 3237 ulwp_t *prev = NULL; 3238 ulwp_t *next; 3239 ulwp_t **suspp = NULL; 3240 ulwp_t *susprev; 3241 3242 if (csp) 3243 tdb_incr(csp->cond_signal); 3244 3245 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3246 error = __lwp_cond_signal(cvp); 3247 3248 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3249 return (error); 3250 3251 /* 3252 * Move someone from the condvar sleep queue to the mutex sleep 3253 * queue for the mutex that he will acquire on being waked up. 3254 * We can do this only if we own the mutex he will acquire. 3255 * If we do not own the mutex, or if his ul_cv_wake flag 3256 * is set, just dequeue and unpark him. 3257 */ 3258 qp = queue_lock(cvp, CV); 3259 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 3260 prev = ulwp, ulwpp = &ulwp->ul_link) { 3261 if (ulwp->ul_wchan == cvp) { 3262 if (!ulwp->ul_stop) 3263 break; 3264 /* 3265 * Try not to dequeue a suspended thread. 3266 * This mimics the old libthread's behavior. 3267 */ 3268 if (suspp == NULL) { 3269 suspp = ulwpp; 3270 susprev = prev; 3271 } 3272 } 3273 } 3274 if (ulwp == NULL && suspp != NULL) { 3275 ulwp = *(ulwpp = suspp); 3276 prev = susprev; 3277 suspp = NULL; 3278 } 3279 if (ulwp == NULL) { /* no one on the sleep queue */ 3280 cvp->cond_waiters_user = 0; 3281 queue_unlock(qp); 3282 return (error); 3283 } 3284 /* 3285 * Scan the remainder of the CV queue for another waiter. 3286 */ 3287 if (suspp != NULL) { 3288 next = *suspp; 3289 } else { 3290 for (next = ulwp->ul_link; next != NULL; next = next->ul_link) 3291 if (next->ul_wchan == cvp) 3292 break; 3293 } 3294 if (next == NULL) 3295 cvp->cond_waiters_user = 0; 3296 3297 /* 3298 * Inform the thread that he was the recipient of a cond_signal(). 3299 * This lets him deal with cond_signal() and, concurrently, 3300 * one or more of a cancellation, a UNIX signal, or a timeout. 3301 * These latter conditions must not consume a cond_signal(). 3302 */ 3303 ulwp->ul_signalled = 1; 3304 3305 /* 3306 * Dequeue the waiter but leave his ul_sleepq non-NULL 3307 * while we move him to the mutex queue so that he can 3308 * deal properly with spurious wakeups. 3309 */ 3310 *ulwpp = ulwp->ul_link; 3311 ulwp->ul_link = NULL; 3312 if (qp->qh_tail == ulwp) 3313 qp->qh_tail = prev; 3314 qp->qh_qlen--; 3315 3316 mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 3317 ulwp->ul_cvmutex = NULL; 3318 ASSERT(mp != NULL); 3319 3320 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3321 lwpid_t lwpid = ulwp->ul_lwpid; 3322 3323 no_preempt(self); 3324 ulwp->ul_sleepq = NULL; 3325 ulwp->ul_wchan = NULL; 3326 ulwp->ul_cv_wake = 0; 3327 queue_unlock(qp); 3328 (void) __lwp_unpark(lwpid); 3329 preempt(self); 3330 } else { 3331 mqp = queue_lock(mp, MX); 3332 enqueue(mqp, ulwp, mp, MX); 3333 mp->mutex_waiters = 1; 3334 queue_unlock(mqp); 3335 queue_unlock(qp); 3336 } 3337 3338 return (error); 3339 } 3340 3341 /* 3342 * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3343 * and rw_queue_release() to (re)allocate a big buffer to hold the 3344 * lwpids of all the threads to be set running after they are removed 3345 * from their sleep queues. Since we are holding a queue lock, we 3346 * cannot call any function that might acquire a lock. mmap(), munmap(), 3347 * lwp_unpark_all() are simple system calls and are safe in this regard. 3348 */ 3349 lwpid_t * 3350 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 3351 { 3352 /* 3353 * Allocate NEWLWPS ids on the first overflow. 3354 * Double the allocation each time after that. 3355 */ 3356 int nlwpid = *nlwpid_ptr; 3357 int maxlwps = *maxlwps_ptr; 3358 int first_allocation; 3359 int newlwps; 3360 void *vaddr; 3361 3362 ASSERT(nlwpid == maxlwps); 3363 3364 first_allocation = (maxlwps == MAXLWPS); 3365 newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 3366 vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t), 3367 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 3368 3369 if (vaddr == MAP_FAILED) { 3370 /* 3371 * Let's hope this never happens. 3372 * If it does, then we have a terrible 3373 * thundering herd on our hands. 3374 */ 3375 (void) __lwp_unpark_all(lwpid, nlwpid); 3376 *nlwpid_ptr = 0; 3377 } else { 3378 (void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 3379 if (!first_allocation) 3380 (void) _private_munmap(lwpid, 3381 maxlwps * sizeof (lwpid_t)); 3382 lwpid = vaddr; 3383 *maxlwps_ptr = newlwps; 3384 } 3385 3386 return (lwpid); 3387 } 3388 3389 #pragma weak pthread_cond_broadcast = cond_broadcast_internal 3390 #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 3391 #pragma weak cond_broadcast = cond_broadcast_internal 3392 #pragma weak _cond_broadcast = cond_broadcast_internal 3393 int 3394 cond_broadcast_internal(cond_t *cvp) 3395 { 3396 ulwp_t *self = curthread; 3397 uberdata_t *udp = self->ul_uberdata; 3398 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3399 int error = 0; 3400 queue_head_t *qp; 3401 mutex_t *mp; 3402 mutex_t *mp_cache = NULL; 3403 queue_head_t *mqp = NULL; 3404 ulwp_t **ulwpp; 3405 ulwp_t *ulwp; 3406 ulwp_t *prev = NULL; 3407 int nlwpid = 0; 3408 int maxlwps = MAXLWPS; 3409 lwpid_t buffer[MAXLWPS]; 3410 lwpid_t *lwpid = buffer; 3411 3412 if (csp) 3413 tdb_incr(csp->cond_broadcast); 3414 3415 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3416 error = __lwp_cond_broadcast(cvp); 3417 3418 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3419 return (error); 3420 3421 /* 3422 * Move everyone from the condvar sleep queue to the mutex sleep 3423 * queue for the mutex that they will acquire on being waked up. 3424 * We can do this only if we own the mutex they will acquire. 3425 * If we do not own the mutex, or if their ul_cv_wake flag 3426 * is set, just dequeue and unpark them. 3427 * 3428 * We keep track of lwpids that are to be unparked in lwpid[]. 3429 * __lwp_unpark_all() is called to unpark all of them after 3430 * they have been removed from the sleep queue and the sleep 3431 * queue lock has been dropped. If we run out of space in our 3432 * on-stack buffer, we need to allocate more but we can't call 3433 * lmalloc() because we are holding a queue lock when the overflow 3434 * occurs and lmalloc() acquires a lock. We can't use alloca() 3435 * either because the application may have allocated a small 3436 * stack and we don't want to overrun the stack. So we call 3437 * alloc_lwpids() to allocate a bigger buffer using the mmap() 3438 * system call directly since that path acquires no locks. 3439 */ 3440 qp = queue_lock(cvp, CV); 3441 cvp->cond_waiters_user = 0; 3442 ulwpp = &qp->qh_head; 3443 while ((ulwp = *ulwpp) != NULL) { 3444 if (ulwp->ul_wchan != cvp) { 3445 prev = ulwp; 3446 ulwpp = &ulwp->ul_link; 3447 continue; 3448 } 3449 *ulwpp = ulwp->ul_link; 3450 ulwp->ul_link = NULL; 3451 if (qp->qh_tail == ulwp) 3452 qp->qh_tail = prev; 3453 qp->qh_qlen--; 3454 mp = ulwp->ul_cvmutex; /* his mutex */ 3455 ulwp->ul_cvmutex = NULL; 3456 ASSERT(mp != NULL); 3457 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3458 ulwp->ul_sleepq = NULL; 3459 ulwp->ul_wchan = NULL; 3460 ulwp->ul_cv_wake = 0; 3461 if (nlwpid == maxlwps) 3462 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 3463 lwpid[nlwpid++] = ulwp->ul_lwpid; 3464 } else { 3465 if (mp != mp_cache) { 3466 mp_cache = mp; 3467 if (mqp != NULL) 3468 queue_unlock(mqp); 3469 mqp = queue_lock(mp, MX); 3470 } 3471 enqueue(mqp, ulwp, mp, MX); 3472 mp->mutex_waiters = 1; 3473 } 3474 } 3475 if (mqp != NULL) 3476 queue_unlock(mqp); 3477 if (nlwpid == 0) { 3478 queue_unlock(qp); 3479 } else { 3480 no_preempt(self); 3481 queue_unlock(qp); 3482 if (nlwpid == 1) 3483 (void) __lwp_unpark(lwpid[0]); 3484 else 3485 (void) __lwp_unpark_all(lwpid, nlwpid); 3486 preempt(self); 3487 } 3488 if (lwpid != buffer) 3489 (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 3490 return (error); 3491 } 3492 3493 #pragma weak pthread_cond_destroy = _cond_destroy 3494 #pragma weak _pthread_cond_destroy = _cond_destroy 3495 #pragma weak cond_destroy = _cond_destroy 3496 int 3497 _cond_destroy(cond_t *cvp) 3498 { 3499 cvp->cond_magic = 0; 3500 tdb_sync_obj_deregister(cvp); 3501 return (0); 3502 } 3503 3504 #if defined(THREAD_DEBUG) 3505 void 3506 assert_no_libc_locks_held(void) 3507 { 3508 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 3509 } 3510 #endif 3511 3512 /* protected by link_lock */ 3513 uint64_t spin_lock_spin; 3514 uint64_t spin_lock_spin2; 3515 uint64_t spin_lock_sleep; 3516 uint64_t spin_lock_wakeup; 3517 3518 /* 3519 * Record spin lock statistics. 3520 * Called by a thread exiting itself in thrp_exit(). 3521 * Also called via atexit() from the thread calling 3522 * exit() to do all the other threads as well. 3523 */ 3524 void 3525 record_spin_locks(ulwp_t *ulwp) 3526 { 3527 spin_lock_spin += ulwp->ul_spin_lock_spin; 3528 spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 3529 spin_lock_sleep += ulwp->ul_spin_lock_sleep; 3530 spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 3531 ulwp->ul_spin_lock_spin = 0; 3532 ulwp->ul_spin_lock_spin2 = 0; 3533 ulwp->ul_spin_lock_sleep = 0; 3534 ulwp->ul_spin_lock_wakeup = 0; 3535 } 3536 3537 /* 3538 * atexit function: dump the queue statistics to stderr. 3539 */ 3540 #if !defined(__lint) 3541 #define fprintf _fprintf 3542 #endif 3543 #include <stdio.h> 3544 void 3545 dump_queue_statistics(void) 3546 { 3547 uberdata_t *udp = curthread->ul_uberdata; 3548 queue_head_t *qp; 3549 int qn; 3550 uint64_t spin_lock_total = 0; 3551 3552 if (udp->queue_head == NULL || thread_queue_dump == 0) 3553 return; 3554 3555 if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 3556 fprintf(stderr, "queue# lockcount max qlen\n") < 0) 3557 return; 3558 for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 3559 if (qp->qh_lockcount == 0) 3560 continue; 3561 spin_lock_total += qp->qh_lockcount; 3562 if (fprintf(stderr, "%5d %12llu%12u\n", qn, 3563 (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 3564 return; 3565 } 3566 3567 if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 3568 fprintf(stderr, "queue# lockcount max qlen\n") < 0) 3569 return; 3570 for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 3571 if (qp->qh_lockcount == 0) 3572 continue; 3573 spin_lock_total += qp->qh_lockcount; 3574 if (fprintf(stderr, "%5d %12llu%12u\n", qn, 3575 (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 3576 return; 3577 } 3578 3579 (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 3580 (u_longlong_t)spin_lock_total); 3581 (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 3582 (u_longlong_t)spin_lock_spin); 3583 (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 3584 (u_longlong_t)spin_lock_spin2); 3585 (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 3586 (u_longlong_t)spin_lock_sleep); 3587 (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 3588 (u_longlong_t)spin_lock_wakeup); 3589 } 3590