1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/sdt.h> 30 31 #include "lint.h" 32 #include "thr_uberdata.h" 33 34 /* 35 * This mutex is initialized to be held by lwp#1. 36 * It is used to block a thread that has returned from a mutex_lock() 37 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 38 */ 39 mutex_t stall_mutex = DEFAULTMUTEX; 40 41 static int shared_mutex_held(mutex_t *); 42 static int mutex_unlock_internal(mutex_t *, int); 43 static int mutex_queuelock_adaptive(mutex_t *); 44 static void mutex_wakeup_all(mutex_t *); 45 46 /* 47 * Lock statistics support functions. 48 */ 49 void 50 record_begin_hold(tdb_mutex_stats_t *msp) 51 { 52 tdb_incr(msp->mutex_lock); 53 msp->mutex_begin_hold = gethrtime(); 54 } 55 56 hrtime_t 57 record_hold_time(tdb_mutex_stats_t *msp) 58 { 59 hrtime_t now = gethrtime(); 60 61 if (msp->mutex_begin_hold) 62 msp->mutex_hold_time += now - msp->mutex_begin_hold; 63 msp->mutex_begin_hold = 0; 64 return (now); 65 } 66 67 /* 68 * Called once at library initialization. 69 */ 70 void 71 mutex_setup(void) 72 { 73 if (set_lock_byte(&stall_mutex.mutex_lockw)) 74 thr_panic("mutex_setup() cannot acquire stall_mutex"); 75 stall_mutex.mutex_owner = (uintptr_t)curthread; 76 } 77 78 /* 79 * The default spin counts of 1000 and 500 are experimentally determined. 80 * On sun4u machines with any number of processors they could be raised 81 * to 10,000 but that (experimentally) makes almost no difference. 82 * The environment variables: 83 * _THREAD_ADAPTIVE_SPIN=count 84 * _THREAD_RELEASE_SPIN=count 85 * can be used to override and set the counts in the range [0 .. 1,000,000]. 86 */ 87 int thread_adaptive_spin = 1000; 88 uint_t thread_max_spinners = 100; 89 int thread_release_spin = 500; 90 int thread_queue_verify = 0; 91 static int ncpus; 92 93 /* 94 * Distinguish spinning for queue locks from spinning for regular locks. 95 * The environment variable: 96 * _THREAD_QUEUE_SPIN=count 97 * can be used to override and set the count in the range [0 .. 1,000,000]. 98 * There is no release spin concept for queue locks. 99 */ 100 int thread_queue_spin = 1000; 101 102 /* 103 * Use the otherwise-unused 'mutex_ownerpid' field of a USYNC_THREAD 104 * mutex to be a count of adaptive spins in progress. 105 */ 106 #define mutex_spinners mutex_ownerpid 107 108 #define ALL_ATTRIBUTES \ 109 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 110 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 111 LOCK_ROBUST) 112 113 /* 114 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 115 * augmented by zero or more the flags: 116 * LOCK_RECURSIVE 117 * LOCK_ERRORCHECK 118 * LOCK_PRIO_INHERIT 119 * LOCK_PRIO_PROTECT 120 * LOCK_ROBUST 121 */ 122 #pragma weak _private_mutex_init = __mutex_init 123 #pragma weak mutex_init = __mutex_init 124 #pragma weak _mutex_init = __mutex_init 125 /* ARGSUSED2 */ 126 int 127 __mutex_init(mutex_t *mp, int type, void *arg) 128 { 129 int basetype = (type & ~ALL_ATTRIBUTES); 130 int error = 0; 131 132 if (basetype == USYNC_PROCESS_ROBUST) { 133 /* 134 * USYNC_PROCESS_ROBUST is a deprecated historical type. 135 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 136 * retain the USYNC_PROCESS_ROBUST flag so we can return 137 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 138 * mutexes will ever draw ELOCKUNMAPPED). 139 */ 140 type |= (USYNC_PROCESS | LOCK_ROBUST); 141 basetype = USYNC_PROCESS; 142 } 143 144 if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) || 145 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 146 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) { 147 error = EINVAL; 148 } else if (type & LOCK_ROBUST) { 149 /* 150 * Callers of mutex_init() with the LOCK_ROBUST attribute 151 * are required to pass an initially all-zero mutex. 152 * Multiple calls to mutex_init() are allowed; all but 153 * the first return EBUSY. A call to mutex_init() is 154 * allowed to make an inconsistent robust lock consistent 155 * (for historical usage, even though the proper interface 156 * for this is mutex_consistent()). Note that we use 157 * atomic_or_16() to set the LOCK_INITED flag so as 158 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 159 */ 160 extern void _atomic_or_16(volatile uint16_t *, uint16_t); 161 if (!(mp->mutex_flag & LOCK_INITED)) { 162 mp->mutex_type = (uint8_t)type; 163 _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 164 mp->mutex_magic = MUTEX_MAGIC; 165 } else if (type != mp->mutex_type || 166 ((type & LOCK_PRIO_PROTECT) && 167 mp->mutex_ceiling != (*(int *)arg))) { 168 error = EINVAL; 169 } else if (__mutex_consistent(mp) != 0) { 170 error = EBUSY; 171 } 172 /* register a process robust mutex with the kernel */ 173 if (basetype == USYNC_PROCESS) 174 register_lock(mp); 175 } else { 176 (void) _memset(mp, 0, sizeof (*mp)); 177 mp->mutex_type = (uint8_t)type; 178 mp->mutex_flag = LOCK_INITED; 179 mp->mutex_magic = MUTEX_MAGIC; 180 } 181 182 if (error == 0 && (type & LOCK_PRIO_PROTECT)) 183 mp->mutex_ceiling = (uint8_t)(*(int *)arg); 184 185 return (error); 186 } 187 188 /* 189 * Delete mp from list of ceil mutexes owned by curthread. 190 * Return 1 if the head of the chain was updated. 191 */ 192 int 193 _ceil_mylist_del(mutex_t *mp) 194 { 195 ulwp_t *self = curthread; 196 mxchain_t **mcpp; 197 mxchain_t *mcp; 198 199 mcpp = &self->ul_mxchain; 200 while ((*mcpp)->mxchain_mx != mp) 201 mcpp = &(*mcpp)->mxchain_next; 202 mcp = *mcpp; 203 *mcpp = mcp->mxchain_next; 204 lfree(mcp, sizeof (*mcp)); 205 return (mcpp == &self->ul_mxchain); 206 } 207 208 /* 209 * Add mp to head of list of ceil mutexes owned by curthread. 210 * Return ENOMEM if no memory could be allocated. 211 */ 212 int 213 _ceil_mylist_add(mutex_t *mp) 214 { 215 ulwp_t *self = curthread; 216 mxchain_t *mcp; 217 218 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 219 return (ENOMEM); 220 mcp->mxchain_mx = mp; 221 mcp->mxchain_next = self->ul_mxchain; 222 self->ul_mxchain = mcp; 223 return (0); 224 } 225 226 /* 227 * Inherit priority from ceiling. The inheritance impacts the effective 228 * priority, not the assigned priority. See _thread_setschedparam_main(). 229 */ 230 void 231 _ceil_prio_inherit(int ceil) 232 { 233 ulwp_t *self = curthread; 234 struct sched_param param; 235 236 (void) _memset(¶m, 0, sizeof (param)); 237 param.sched_priority = ceil; 238 if (_thread_setschedparam_main(self->ul_lwpid, 239 self->ul_policy, ¶m, PRIO_INHERIT)) { 240 /* 241 * Panic since unclear what error code to return. 242 * If we do return the error codes returned by above 243 * called routine, update the man page... 244 */ 245 thr_panic("_thread_setschedparam_main() fails"); 246 } 247 } 248 249 /* 250 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 251 * if holding at least one ceiling lock. If no ceiling locks are held at this 252 * point, disinherit completely, reverting back to assigned priority. 253 */ 254 void 255 _ceil_prio_waive(void) 256 { 257 ulwp_t *self = curthread; 258 struct sched_param param; 259 260 (void) _memset(¶m, 0, sizeof (param)); 261 if (self->ul_mxchain == NULL) { 262 /* 263 * No ceil locks held. Zero the epri, revert back to ul_pri. 264 * Since thread's hash lock is not held, one cannot just 265 * read ul_pri here...do it in the called routine... 266 */ 267 param.sched_priority = self->ul_pri; /* ignored */ 268 if (_thread_setschedparam_main(self->ul_lwpid, 269 self->ul_policy, ¶m, PRIO_DISINHERIT)) 270 thr_panic("_thread_setschedparam_main() fails"); 271 } else { 272 /* 273 * Set priority to that of the mutex at the head 274 * of the ceilmutex chain. 275 */ 276 param.sched_priority = 277 self->ul_mxchain->mxchain_mx->mutex_ceiling; 278 if (_thread_setschedparam_main(self->ul_lwpid, 279 self->ul_policy, ¶m, PRIO_INHERIT)) 280 thr_panic("_thread_setschedparam_main() fails"); 281 } 282 } 283 284 /* 285 * Non-preemptive spin locks. Used by queue_lock(). 286 * No lock statistics are gathered for these locks. 287 */ 288 void 289 spin_lock_set(mutex_t *mp) 290 { 291 ulwp_t *self = curthread; 292 293 no_preempt(self); 294 if (set_lock_byte(&mp->mutex_lockw) == 0) { 295 mp->mutex_owner = (uintptr_t)self; 296 return; 297 } 298 /* 299 * Spin for a while, attempting to acquire the lock. 300 */ 301 if (self->ul_spin_lock_spin != UINT_MAX) 302 self->ul_spin_lock_spin++; 303 if (mutex_queuelock_adaptive(mp) == 0 || 304 set_lock_byte(&mp->mutex_lockw) == 0) { 305 mp->mutex_owner = (uintptr_t)self; 306 return; 307 } 308 /* 309 * Try harder if we were previously at a no premption level. 310 */ 311 if (self->ul_preempt > 1) { 312 if (self->ul_spin_lock_spin2 != UINT_MAX) 313 self->ul_spin_lock_spin2++; 314 if (mutex_queuelock_adaptive(mp) == 0 || 315 set_lock_byte(&mp->mutex_lockw) == 0) { 316 mp->mutex_owner = (uintptr_t)self; 317 return; 318 } 319 } 320 /* 321 * Give up and block in the kernel for the mutex. 322 */ 323 if (self->ul_spin_lock_sleep != UINT_MAX) 324 self->ul_spin_lock_sleep++; 325 (void) ___lwp_mutex_timedlock(mp, NULL); 326 mp->mutex_owner = (uintptr_t)self; 327 } 328 329 void 330 spin_lock_clear(mutex_t *mp) 331 { 332 ulwp_t *self = curthread; 333 334 mp->mutex_owner = 0; 335 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 336 (void) ___lwp_mutex_wakeup(mp, 0); 337 if (self->ul_spin_lock_wakeup != UINT_MAX) 338 self->ul_spin_lock_wakeup++; 339 } 340 preempt(self); 341 } 342 343 /* 344 * Allocate the sleep queue hash table. 345 */ 346 void 347 queue_alloc(void) 348 { 349 ulwp_t *self = curthread; 350 uberdata_t *udp = self->ul_uberdata; 351 mutex_t *mp; 352 void *data; 353 int i; 354 355 /* 356 * No locks are needed; we call here only when single-threaded. 357 */ 358 ASSERT(self == udp->ulwp_one); 359 ASSERT(!udp->uberflags.uf_mt); 360 if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 361 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 362 == MAP_FAILED) 363 thr_panic("cannot allocate thread queue_head table"); 364 udp->queue_head = (queue_head_t *)data; 365 for (i = 0; i < 2 * QHASHSIZE; i++) { 366 mp = &udp->queue_head[i].qh_lock; 367 mp->mutex_flag = LOCK_INITED; 368 mp->mutex_magic = MUTEX_MAGIC; 369 } 370 } 371 372 #if defined(THREAD_DEBUG) 373 374 /* 375 * Debugging: verify correctness of a sleep queue. 376 */ 377 void 378 QVERIFY(queue_head_t *qp) 379 { 380 ulwp_t *self = curthread; 381 uberdata_t *udp = self->ul_uberdata; 382 ulwp_t *ulwp; 383 ulwp_t *prev; 384 uint_t index; 385 uint32_t cnt = 0; 386 char qtype; 387 void *wchan; 388 389 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 390 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 391 ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) || 392 (qp->qh_head == NULL && qp->qh_tail == NULL)); 393 if (!thread_queue_verify) 394 return; 395 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 396 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 397 for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL; 398 prev = ulwp, ulwp = ulwp->ul_link, cnt++) { 399 ASSERT(ulwp->ul_qtype == qtype); 400 ASSERT(ulwp->ul_wchan != NULL); 401 ASSERT(ulwp->ul_sleepq == qp); 402 wchan = ulwp->ul_wchan; 403 index = QUEUE_HASH(wchan, qtype); 404 ASSERT(&udp->queue_head[index] == qp); 405 } 406 ASSERT(qp->qh_tail == prev); 407 ASSERT(qp->qh_qlen == cnt); 408 } 409 410 #else /* THREAD_DEBUG */ 411 412 #define QVERIFY(qp) 413 414 #endif /* THREAD_DEBUG */ 415 416 /* 417 * Acquire a queue head. 418 */ 419 queue_head_t * 420 queue_lock(void *wchan, int qtype) 421 { 422 uberdata_t *udp = curthread->ul_uberdata; 423 queue_head_t *qp; 424 425 ASSERT(qtype == MX || qtype == CV); 426 427 /* 428 * It is possible that we could be called while still single-threaded. 429 * If so, we call queue_alloc() to allocate the queue_head[] array. 430 */ 431 if ((qp = udp->queue_head) == NULL) { 432 queue_alloc(); 433 qp = udp->queue_head; 434 } 435 qp += QUEUE_HASH(wchan, qtype); 436 spin_lock_set(&qp->qh_lock); 437 /* 438 * At once per nanosecond, qh_lockcount will wrap after 512 years. 439 * Were we to care about this, we could peg the value at UINT64_MAX. 440 */ 441 qp->qh_lockcount++; 442 QVERIFY(qp); 443 return (qp); 444 } 445 446 /* 447 * Release a queue head. 448 */ 449 void 450 queue_unlock(queue_head_t *qp) 451 { 452 QVERIFY(qp); 453 spin_lock_clear(&qp->qh_lock); 454 } 455 456 /* 457 * For rwlock queueing, we must queue writers ahead of readers of the 458 * same priority. We do this by making writers appear to have a half 459 * point higher priority for purposes of priority comparisons below. 460 */ 461 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 462 463 void 464 enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype) 465 { 466 ulwp_t **ulwpp; 467 ulwp_t *next; 468 int pri = CMP_PRIO(ulwp); 469 int force_fifo = (qtype & FIFOQ); 470 int do_fifo; 471 472 qtype &= ~FIFOQ; 473 ASSERT(qtype == MX || qtype == CV); 474 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 475 ASSERT(ulwp->ul_sleepq != qp); 476 477 /* 478 * LIFO queue ordering is unfair and can lead to starvation, 479 * but it gives better performance for heavily contended locks. 480 * We use thread_queue_fifo (range is 0..8) to determine 481 * the frequency of FIFO vs LIFO queuing: 482 * 0 : every 256th time (almost always LIFO) 483 * 1 : every 128th time 484 * 2 : every 64th time 485 * 3 : every 32nd time 486 * 4 : every 16th time (the default value, mostly LIFO) 487 * 5 : every 8th time 488 * 6 : every 4th time 489 * 7 : every 2nd time 490 * 8 : every time (never LIFO, always FIFO) 491 * Note that there is always some degree of FIFO ordering. 492 * This breaks live lock conditions that occur in applications 493 * that are written assuming (incorrectly) that threads acquire 494 * locks fairly, that is, in roughly round-robin order. 495 * In any event, the queue is maintained in priority order. 496 * 497 * If we are given the FIFOQ flag in qtype, fifo queueing is forced. 498 * SUSV3 requires this for semaphores. 499 */ 500 do_fifo = (force_fifo || 501 ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0); 502 503 if (qp->qh_head == NULL) { 504 /* 505 * The queue is empty. LIFO/FIFO doesn't matter. 506 */ 507 ASSERT(qp->qh_tail == NULL); 508 ulwpp = &qp->qh_head; 509 } else if (do_fifo) { 510 /* 511 * Enqueue after the last thread whose priority is greater 512 * than or equal to the priority of the thread being queued. 513 * Attempt first to go directly onto the tail of the queue. 514 */ 515 if (pri <= CMP_PRIO(qp->qh_tail)) 516 ulwpp = &qp->qh_tail->ul_link; 517 else { 518 for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 519 ulwpp = &next->ul_link) 520 if (pri > CMP_PRIO(next)) 521 break; 522 } 523 } else { 524 /* 525 * Enqueue before the first thread whose priority is less 526 * than or equal to the priority of the thread being queued. 527 * Hopefully we can go directly onto the head of the queue. 528 */ 529 for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL; 530 ulwpp = &next->ul_link) 531 if (pri >= CMP_PRIO(next)) 532 break; 533 } 534 if ((ulwp->ul_link = *ulwpp) == NULL) 535 qp->qh_tail = ulwp; 536 *ulwpp = ulwp; 537 538 ulwp->ul_sleepq = qp; 539 ulwp->ul_wchan = wchan; 540 ulwp->ul_qtype = qtype; 541 if (qp->qh_qmax < ++qp->qh_qlen) 542 qp->qh_qmax = qp->qh_qlen; 543 } 544 545 /* 546 * Return a pointer to the queue slot of the 547 * highest priority thread on the queue. 548 * On return, prevp, if not NULL, will contain a pointer 549 * to the thread's predecessor on the queue 550 */ 551 static ulwp_t ** 552 queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp) 553 { 554 ulwp_t **ulwpp; 555 ulwp_t *ulwp; 556 ulwp_t *prev = NULL; 557 ulwp_t **suspp = NULL; 558 ulwp_t *susprev; 559 560 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 561 562 /* 563 * Find a waiter on the sleep queue. 564 */ 565 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 566 prev = ulwp, ulwpp = &ulwp->ul_link) { 567 if (ulwp->ul_wchan == wchan) { 568 if (!ulwp->ul_stop) 569 break; 570 /* 571 * Try not to return a suspended thread. 572 * This mimics the old libthread's behavior. 573 */ 574 if (suspp == NULL) { 575 suspp = ulwpp; 576 susprev = prev; 577 } 578 } 579 } 580 581 if (ulwp == NULL && suspp != NULL) { 582 ulwp = *(ulwpp = suspp); 583 prev = susprev; 584 suspp = NULL; 585 } 586 if (ulwp == NULL) { 587 if (more != NULL) 588 *more = 0; 589 return (NULL); 590 } 591 592 if (prevp != NULL) 593 *prevp = prev; 594 if (more == NULL) 595 return (ulwpp); 596 597 /* 598 * Scan the remainder of the queue for another waiter. 599 */ 600 if (suspp != NULL) { 601 *more = 1; 602 return (ulwpp); 603 } 604 for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) { 605 if (ulwp->ul_wchan == wchan) { 606 *more = 1; 607 return (ulwpp); 608 } 609 } 610 611 *more = 0; 612 return (ulwpp); 613 } 614 615 ulwp_t * 616 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 617 { 618 ulwp_t *ulwp; 619 620 ulwp = *ulwpp; 621 *ulwpp = ulwp->ul_link; 622 ulwp->ul_link = NULL; 623 if (qp->qh_tail == ulwp) 624 qp->qh_tail = prev; 625 qp->qh_qlen--; 626 ulwp->ul_sleepq = NULL; 627 ulwp->ul_wchan = NULL; 628 629 return (ulwp); 630 } 631 632 ulwp_t * 633 dequeue(queue_head_t *qp, void *wchan, int *more) 634 { 635 ulwp_t **ulwpp; 636 ulwp_t *prev; 637 638 if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL) 639 return (NULL); 640 return (queue_unlink(qp, ulwpp, prev)); 641 } 642 643 /* 644 * Return a pointer to the highest priority thread sleeping on wchan. 645 */ 646 ulwp_t * 647 queue_waiter(queue_head_t *qp, void *wchan) 648 { 649 ulwp_t **ulwpp; 650 651 if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL) 652 return (NULL); 653 return (*ulwpp); 654 } 655 656 uint8_t 657 dequeue_self(queue_head_t *qp, void *wchan) 658 { 659 ulwp_t *self = curthread; 660 ulwp_t **ulwpp; 661 ulwp_t *ulwp; 662 ulwp_t *prev = NULL; 663 int found = 0; 664 int more = 0; 665 666 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 667 668 /* find self on the sleep queue */ 669 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 670 prev = ulwp, ulwpp = &ulwp->ul_link) { 671 if (ulwp == self) { 672 /* dequeue ourself */ 673 ASSERT(self->ul_wchan == wchan); 674 (void) queue_unlink(qp, ulwpp, prev); 675 self->ul_cvmutex = NULL; 676 self->ul_cv_wake = 0; 677 found = 1; 678 break; 679 } 680 if (ulwp->ul_wchan == wchan) 681 more = 1; 682 } 683 684 if (!found) 685 thr_panic("dequeue_self(): curthread not found on queue"); 686 687 if (more) 688 return (1); 689 690 /* scan the remainder of the queue for another waiter */ 691 for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) { 692 if (ulwp->ul_wchan == wchan) 693 return (1); 694 } 695 696 return (0); 697 } 698 699 /* 700 * Called from call_user_handler() and _thrp_suspend() to take 701 * ourself off of our sleep queue so we can grab locks. 702 */ 703 void 704 unsleep_self(void) 705 { 706 ulwp_t *self = curthread; 707 queue_head_t *qp; 708 709 /* 710 * Calling enter_critical()/exit_critical() here would lead 711 * to recursion. Just manipulate self->ul_critical directly. 712 */ 713 self->ul_critical++; 714 while (self->ul_sleepq != NULL) { 715 qp = queue_lock(self->ul_wchan, self->ul_qtype); 716 /* 717 * We may have been moved from a CV queue to a 718 * mutex queue while we were attempting queue_lock(). 719 * If so, just loop around and try again. 720 * dequeue_self() clears self->ul_sleepq. 721 */ 722 if (qp == self->ul_sleepq) { 723 (void) dequeue_self(qp, self->ul_wchan); 724 self->ul_writer = 0; 725 } 726 queue_unlock(qp); 727 } 728 self->ul_critical--; 729 } 730 731 /* 732 * Common code for calling the the ___lwp_mutex_timedlock() system call. 733 * Returns with mutex_owner and mutex_ownerpid set correctly. 734 */ 735 static int 736 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 737 { 738 ulwp_t *self = curthread; 739 uberdata_t *udp = self->ul_uberdata; 740 int mtype = mp->mutex_type; 741 hrtime_t begin_sleep; 742 int acquired; 743 int error; 744 745 self->ul_sp = stkptr(); 746 self->ul_wchan = mp; 747 if (__td_event_report(self, TD_SLEEP, udp)) { 748 self->ul_td_evbuf.eventnum = TD_SLEEP; 749 self->ul_td_evbuf.eventdata = mp; 750 tdb_event(TD_SLEEP, udp); 751 } 752 if (msp) { 753 tdb_incr(msp->mutex_sleep); 754 begin_sleep = gethrtime(); 755 } 756 757 DTRACE_PROBE1(plockstat, mutex__block, mp); 758 759 for (;;) { 760 /* 761 * A return value of EOWNERDEAD or ELOCKUNMAPPED 762 * means we successfully acquired the lock. 763 */ 764 if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 765 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 766 acquired = 0; 767 break; 768 } 769 770 if (mtype & USYNC_PROCESS) { 771 /* 772 * Defend against forkall(). We may be the child, 773 * in which case we don't actually own the mutex. 774 */ 775 enter_critical(self); 776 if (mp->mutex_ownerpid == udp->pid) { 777 mp->mutex_owner = (uintptr_t)self; 778 exit_critical(self); 779 acquired = 1; 780 break; 781 } 782 exit_critical(self); 783 } else { 784 mp->mutex_owner = (uintptr_t)self; 785 acquired = 1; 786 break; 787 } 788 } 789 if (msp) 790 msp->mutex_sleep_time += gethrtime() - begin_sleep; 791 self->ul_wchan = NULL; 792 self->ul_sp = 0; 793 794 if (acquired) { 795 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 796 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 797 } else { 798 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 799 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 800 } 801 802 return (error); 803 } 804 805 /* 806 * Common code for calling the ___lwp_mutex_trylock() system call. 807 * Returns with mutex_owner and mutex_ownerpid set correctly. 808 */ 809 int 810 mutex_trylock_kernel(mutex_t *mp) 811 { 812 ulwp_t *self = curthread; 813 uberdata_t *udp = self->ul_uberdata; 814 int mtype = mp->mutex_type; 815 int error; 816 int acquired; 817 818 for (;;) { 819 /* 820 * A return value of EOWNERDEAD or ELOCKUNMAPPED 821 * means we successfully acquired the lock. 822 */ 823 if ((error = ___lwp_mutex_trylock(mp)) != 0 && 824 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 825 acquired = 0; 826 break; 827 } 828 829 if (mtype & USYNC_PROCESS) { 830 /* 831 * Defend against forkall(). We may be the child, 832 * in which case we don't actually own the mutex. 833 */ 834 enter_critical(self); 835 if (mp->mutex_ownerpid == udp->pid) { 836 mp->mutex_owner = (uintptr_t)self; 837 exit_critical(self); 838 acquired = 1; 839 break; 840 } 841 exit_critical(self); 842 } else { 843 mp->mutex_owner = (uintptr_t)self; 844 acquired = 1; 845 break; 846 } 847 } 848 849 if (acquired) { 850 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 851 } else if (error != EBUSY) { 852 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 853 } 854 855 return (error); 856 } 857 858 volatile sc_shared_t * 859 setup_schedctl(void) 860 { 861 ulwp_t *self = curthread; 862 volatile sc_shared_t *scp; 863 sc_shared_t *tmp; 864 865 if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 866 !self->ul_vfork && /* not a child of vfork() */ 867 !self->ul_schedctl_called) { /* haven't been called before */ 868 enter_critical(self); 869 self->ul_schedctl_called = &self->ul_uberdata->uberflags; 870 if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 871 self->ul_schedctl = scp = tmp; 872 exit_critical(self); 873 } 874 /* 875 * Unless the call to setup_schedctl() is surrounded 876 * by enter_critical()/exit_critical(), the address 877 * we are returning could be invalid due to a forkall() 878 * having occurred in another thread. 879 */ 880 return (scp); 881 } 882 883 /* 884 * Interfaces from libsched, incorporated into libc. 885 * libsched.so.1 is now a filter library onto libc. 886 */ 887 #pragma weak schedctl_lookup = _schedctl_init 888 #pragma weak _schedctl_lookup = _schedctl_init 889 #pragma weak schedctl_init = _schedctl_init 890 schedctl_t * 891 _schedctl_init(void) 892 { 893 volatile sc_shared_t *scp = setup_schedctl(); 894 return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 895 } 896 897 #pragma weak schedctl_exit = _schedctl_exit 898 void 899 _schedctl_exit(void) 900 { 901 } 902 903 /* 904 * Contract private interface for java. 905 * Set up the schedctl data if it doesn't exist yet. 906 * Return a pointer to the pointer to the schedctl data. 907 */ 908 volatile sc_shared_t *volatile * 909 _thr_schedctl(void) 910 { 911 ulwp_t *self = curthread; 912 volatile sc_shared_t *volatile *ptr; 913 914 if (self->ul_vfork) 915 return (NULL); 916 if (*(ptr = &self->ul_schedctl) == NULL) 917 (void) setup_schedctl(); 918 return (ptr); 919 } 920 921 /* 922 * Block signals and attempt to block preemption. 923 * no_preempt()/preempt() must be used in pairs but can be nested. 924 */ 925 void 926 no_preempt(ulwp_t *self) 927 { 928 volatile sc_shared_t *scp; 929 930 if (self->ul_preempt++ == 0) { 931 enter_critical(self); 932 if ((scp = self->ul_schedctl) != NULL || 933 (scp = setup_schedctl()) != NULL) { 934 /* 935 * Save the pre-existing preempt value. 936 */ 937 self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 938 scp->sc_preemptctl.sc_nopreempt = 1; 939 } 940 } 941 } 942 943 /* 944 * Undo the effects of no_preempt(). 945 */ 946 void 947 preempt(ulwp_t *self) 948 { 949 volatile sc_shared_t *scp; 950 951 ASSERT(self->ul_preempt > 0); 952 if (--self->ul_preempt == 0) { 953 if ((scp = self->ul_schedctl) != NULL) { 954 /* 955 * Restore the pre-existing preempt value. 956 */ 957 scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 958 if (scp->sc_preemptctl.sc_yield && 959 scp->sc_preemptctl.sc_nopreempt == 0) { 960 lwp_yield(); 961 if (scp->sc_preemptctl.sc_yield) { 962 /* 963 * Shouldn't happen. This is either 964 * a race condition or the thread 965 * just entered the real-time class. 966 */ 967 lwp_yield(); 968 scp->sc_preemptctl.sc_yield = 0; 969 } 970 } 971 } 972 exit_critical(self); 973 } 974 } 975 976 /* 977 * If a call to preempt() would cause the current thread to yield or to 978 * take deferred actions in exit_critical(), then unpark the specified 979 * lwp so it can run while we delay. Return the original lwpid if the 980 * unpark was not performed, else return zero. The tests are a repeat 981 * of some of the tests in preempt(), above. This is a statistical 982 * optimization solely for cond_sleep_queue(), below. 983 */ 984 static lwpid_t 985 preempt_unpark(ulwp_t *self, lwpid_t lwpid) 986 { 987 volatile sc_shared_t *scp = self->ul_schedctl; 988 989 ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 990 if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 991 (self->ul_curplease && self->ul_critical == 1)) { 992 (void) __lwp_unpark(lwpid); 993 lwpid = 0; 994 } 995 return (lwpid); 996 } 997 998 /* 999 * Spin for a while, trying to grab the lock. 1000 * If this fails, return EBUSY and let the caller deal with it. 1001 * If this succeeds, return 0 with mutex_owner set to curthread. 1002 */ 1003 static int 1004 mutex_trylock_adaptive(mutex_t *mp) 1005 { 1006 ulwp_t *self = curthread; 1007 int error = EBUSY; 1008 ulwp_t *ulwp; 1009 volatile sc_shared_t *scp; 1010 volatile uint8_t *lockp; 1011 volatile uint64_t *ownerp; 1012 int count; 1013 int max; 1014 1015 ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 1016 1017 if (MUTEX_OWNER(mp) == self) 1018 return (EBUSY); 1019 1020 /* short-cut, not definitive (see below) */ 1021 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1022 ASSERT(mp->mutex_type & LOCK_ROBUST); 1023 DTRACE_PROBE2(plockstat, mutex__error, mp, ENOTRECOVERABLE); 1024 return (ENOTRECOVERABLE); 1025 } 1026 1027 if ((max = self->ul_adaptive_spin) == 0 || 1028 mp->mutex_spinners >= self->ul_max_spinners) 1029 max = 1; /* try at least once */ 1030 1031 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1032 1033 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1034 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1035 /* 1036 * This spin loop is unfair to lwps that have already dropped into 1037 * the kernel to sleep. They will starve on a highly-contended mutex. 1038 * This is just too bad. The adaptive spin algorithm is intended 1039 * to allow programs with highly-contended locks (that is, broken 1040 * programs) to execute with reasonable speed despite their contention. 1041 * Being fair would reduce the speed of such programs and well-written 1042 * programs will not suffer in any case. 1043 */ 1044 enter_critical(self); /* protects ul_schedctl */ 1045 atomic_inc_32(&mp->mutex_spinners); 1046 for (count = 1; count <= max; count++) { 1047 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1048 *ownerp = (uintptr_t)self; 1049 error = 0; 1050 break; 1051 } 1052 SMT_PAUSE(); 1053 /* 1054 * Stop spinning if the mutex owner is not running on 1055 * a processor; it will not drop the lock any time soon 1056 * and we would just be wasting time to keep spinning. 1057 * 1058 * Note that we are looking at another thread (ulwp_t) 1059 * without ensuring that the other thread does not exit. 1060 * The scheme relies on ulwp_t structures never being 1061 * deallocated by the library (the library employs a free 1062 * list of ulwp_t structs that are reused when new threads 1063 * are created) and on schedctl shared memory never being 1064 * deallocated once created via __schedctl(). 1065 * 1066 * Thus, the worst that can happen when the spinning thread 1067 * looks at the owner's schedctl data is that it is looking 1068 * at some other thread's schedctl data. This almost never 1069 * happens and is benign when it does. 1070 */ 1071 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1072 ((scp = ulwp->ul_schedctl) == NULL || 1073 scp->sc_state != SC_ONPROC)) 1074 break; 1075 } 1076 atomic_dec_32(&mp->mutex_spinners); 1077 exit_critical(self); 1078 1079 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1080 ASSERT(mp->mutex_type & LOCK_ROBUST); 1081 /* 1082 * We shouldn't own the mutex; clear the lock. 1083 */ 1084 mp->mutex_owner = 0; 1085 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) 1086 mutex_wakeup_all(mp); 1087 error = ENOTRECOVERABLE; 1088 } 1089 1090 if (error) { 1091 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1092 if (error != EBUSY) { 1093 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1094 } 1095 } else { 1096 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1097 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1098 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1099 ASSERT(mp->mutex_type & LOCK_ROBUST); 1100 error = EOWNERDEAD; 1101 } 1102 } 1103 1104 return (error); 1105 } 1106 1107 /* 1108 * Same as mutex_trylock_adaptive(), except specifically for queue locks. 1109 * The owner field is not set here; the caller (spin_lock_set()) sets it. 1110 */ 1111 static int 1112 mutex_queuelock_adaptive(mutex_t *mp) 1113 { 1114 ulwp_t *ulwp; 1115 volatile sc_shared_t *scp; 1116 volatile uint8_t *lockp; 1117 volatile uint64_t *ownerp; 1118 int count = curthread->ul_queue_spin; 1119 1120 ASSERT(mp->mutex_type == USYNC_THREAD); 1121 1122 if (count == 0) 1123 return (EBUSY); 1124 1125 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1126 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1127 while (--count >= 0) { 1128 if (*lockp == 0 && set_lock_byte(lockp) == 0) 1129 return (0); 1130 SMT_PAUSE(); 1131 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1132 ((scp = ulwp->ul_schedctl) == NULL || 1133 scp->sc_state != SC_ONPROC)) 1134 break; 1135 } 1136 1137 return (EBUSY); 1138 } 1139 1140 /* 1141 * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1142 * Spin for a while, trying to grab the lock. 1143 * If this fails, return EBUSY and let the caller deal with it. 1144 * If this succeeds, return 0 with mutex_owner set to curthread 1145 * and mutex_ownerpid set to the current pid. 1146 */ 1147 static int 1148 mutex_trylock_process(mutex_t *mp) 1149 { 1150 ulwp_t *self = curthread; 1151 int error = EBUSY; 1152 volatile uint8_t *lockp; 1153 int count; 1154 int max; 1155 1156 ASSERT(mp->mutex_type & USYNC_PROCESS); 1157 1158 if (shared_mutex_held(mp)) 1159 return (EBUSY); 1160 1161 /* short-cut, not definitive (see below) */ 1162 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1163 ASSERT(mp->mutex_type & LOCK_ROBUST); 1164 DTRACE_PROBE2(plockstat, mutex__error, mp, ENOTRECOVERABLE); 1165 return (ENOTRECOVERABLE); 1166 } 1167 1168 if (ncpus == 0) 1169 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1170 max = (ncpus > 1)? self->ul_adaptive_spin : 1; 1171 if (max == 0) 1172 max = 1; /* try at least once */ 1173 1174 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1175 1176 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1177 /* 1178 * This is a process-shared mutex. 1179 * We cannot know if the owner is running on a processor. 1180 * We just spin and hope that it is on a processor. 1181 */ 1182 enter_critical(self); 1183 for (count = 1; count <= max; count++) { 1184 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1185 mp->mutex_owner = (uintptr_t)self; 1186 mp->mutex_ownerpid = self->ul_uberdata->pid; 1187 error = 0; 1188 break; 1189 } 1190 SMT_PAUSE(); 1191 } 1192 exit_critical(self); 1193 1194 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1195 ASSERT(mp->mutex_type & LOCK_ROBUST); 1196 /* 1197 * We shouldn't own the mutex; clear the lock. 1198 */ 1199 mp->mutex_owner = 0; 1200 mp->mutex_ownerpid = 0; 1201 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 1202 no_preempt(self); 1203 (void) ___lwp_mutex_wakeup(mp, 1); 1204 preempt(self); 1205 } 1206 error = ENOTRECOVERABLE; 1207 } 1208 1209 if (error) { 1210 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1211 if (error != EBUSY) { 1212 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1213 } 1214 } else { 1215 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1216 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1217 if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1218 ASSERT(mp->mutex_type & LOCK_ROBUST); 1219 if (mp->mutex_flag & LOCK_OWNERDEAD) 1220 error = EOWNERDEAD; 1221 else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1222 error = ELOCKUNMAPPED; 1223 else 1224 error = EOWNERDEAD; 1225 } 1226 } 1227 1228 return (error); 1229 } 1230 1231 /* 1232 * Mutex wakeup code for releasing a USYNC_THREAD mutex. 1233 * Returns the lwpid of the thread that was dequeued, if any. 1234 * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 1235 * to wake up the specified lwp. 1236 */ 1237 static lwpid_t 1238 mutex_wakeup(mutex_t *mp) 1239 { 1240 lwpid_t lwpid = 0; 1241 queue_head_t *qp; 1242 ulwp_t *ulwp; 1243 int more; 1244 1245 /* 1246 * Dequeue a waiter from the sleep queue. Don't touch the mutex 1247 * waiters bit if no one was found on the queue because the mutex 1248 * might have been deallocated or reallocated for another purpose. 1249 */ 1250 qp = queue_lock(mp, MX); 1251 if ((ulwp = dequeue(qp, mp, &more)) != NULL) { 1252 lwpid = ulwp->ul_lwpid; 1253 mp->mutex_waiters = (more? 1 : 0); 1254 } 1255 queue_unlock(qp); 1256 return (lwpid); 1257 } 1258 1259 /* 1260 * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1261 */ 1262 static void 1263 mutex_wakeup_all(mutex_t *mp) 1264 { 1265 queue_head_t *qp; 1266 int nlwpid = 0; 1267 int maxlwps = MAXLWPS; 1268 ulwp_t **ulwpp; 1269 ulwp_t *ulwp; 1270 ulwp_t *prev = NULL; 1271 lwpid_t buffer[MAXLWPS]; 1272 lwpid_t *lwpid = buffer; 1273 1274 /* 1275 * Walk the list of waiters and prepare to wake up all of them. 1276 * The waiters flag has already been cleared from the mutex. 1277 * 1278 * We keep track of lwpids that are to be unparked in lwpid[]. 1279 * __lwp_unpark_all() is called to unpark all of them after 1280 * they have been removed from the sleep queue and the sleep 1281 * queue lock has been dropped. If we run out of space in our 1282 * on-stack buffer, we need to allocate more but we can't call 1283 * lmalloc() because we are holding a queue lock when the overflow 1284 * occurs and lmalloc() acquires a lock. We can't use alloca() 1285 * either because the application may have allocated a small 1286 * stack and we don't want to overrun the stack. So we call 1287 * alloc_lwpids() to allocate a bigger buffer using the mmap() 1288 * system call directly since that path acquires no locks. 1289 */ 1290 qp = queue_lock(mp, MX); 1291 ulwpp = &qp->qh_head; 1292 while ((ulwp = *ulwpp) != NULL) { 1293 if (ulwp->ul_wchan != mp) { 1294 prev = ulwp; 1295 ulwpp = &ulwp->ul_link; 1296 } else { 1297 if (nlwpid == maxlwps) 1298 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1299 (void) queue_unlink(qp, ulwpp, prev); 1300 lwpid[nlwpid++] = ulwp->ul_lwpid; 1301 } 1302 } 1303 mp->mutex_waiters = 0; 1304 1305 if (nlwpid == 0) { 1306 queue_unlock(qp); 1307 } else { 1308 no_preempt(curthread); 1309 queue_unlock(qp); 1310 if (nlwpid == 1) 1311 (void) __lwp_unpark(lwpid[0]); 1312 else 1313 (void) __lwp_unpark_all(lwpid, nlwpid); 1314 preempt(curthread); 1315 } 1316 1317 if (lwpid != buffer) 1318 (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 1319 } 1320 1321 /* 1322 * Spin for a while, testing to see if the lock has been grabbed. 1323 * If this fails, call mutex_wakeup() to release a waiter. 1324 */ 1325 static lwpid_t 1326 mutex_unlock_queue(mutex_t *mp, int release_all) 1327 { 1328 ulwp_t *self = curthread; 1329 uint32_t *lockw = &mp->mutex_lockword; 1330 lwpid_t lwpid; 1331 volatile uint8_t *lockp; 1332 volatile uint32_t *spinp; 1333 int count; 1334 1335 /* 1336 * We use the swap primitive to clear the lock, but we must 1337 * atomically retain the waiters bit for the remainder of this 1338 * code to work. We first check to see if the waiters bit is 1339 * set and if so clear the lock by swapping in a word containing 1340 * only the waiters bit. This could produce a false positive test 1341 * for whether there are waiters that need to be waked up, but 1342 * this just causes an extra call to mutex_wakeup() to do nothing. 1343 * The opposite case is more delicate: If there are no waiters, 1344 * we swap in a zero lock byte and a zero waiters bit. The result 1345 * of the swap could indicate that there really was a waiter so in 1346 * this case we go directly to mutex_wakeup() without performing 1347 * any of the adaptive code because the waiter bit has been cleared 1348 * and the adaptive code is unreliable in this case. 1349 */ 1350 if (release_all || !(*lockw & WAITERMASK)) { 1351 mp->mutex_owner = 0; 1352 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1353 if (!(atomic_swap_32(lockw, 0) & WAITERMASK)) 1354 return (0); /* no waiters */ 1355 no_preempt(self); /* ensure a prompt wakeup */ 1356 } else { 1357 no_preempt(self); /* ensure a prompt wakeup */ 1358 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1359 spinp = (volatile uint32_t *)&mp->mutex_spinners; 1360 mp->mutex_owner = 0; 1361 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1362 /* clear lock, retain waiter */ 1363 (void) atomic_swap_32(lockw, WAITER); 1364 1365 /* 1366 * We spin here fewer times than mutex_trylock_adaptive(). 1367 * We are trying to balance two conflicting goals: 1368 * 1. Avoid waking up anyone if a spinning thread 1369 * grabs the lock. 1370 * 2. Wake up a sleeping thread promptly to get on 1371 * with useful work. 1372 * We don't spin at all if there is no acquiring spinner; 1373 * (mp->mutex_spinners is non-zero if there are spinners). 1374 */ 1375 for (count = self->ul_release_spin; 1376 *spinp && count > 0; count--) { 1377 /* 1378 * There is a waiter that we will have to wake 1379 * up unless someone else grabs the lock while 1380 * we are busy spinning. Like the spin loop in 1381 * mutex_trylock_adaptive(), this spin loop is 1382 * unfair to lwps that have already dropped into 1383 * the kernel to sleep. They will starve on a 1384 * highly-contended mutex. Too bad. 1385 */ 1386 if (*lockp != 0) { /* somebody grabbed the lock */ 1387 preempt(self); 1388 return (0); 1389 } 1390 SMT_PAUSE(); 1391 } 1392 1393 /* 1394 * No one grabbed the lock. 1395 * Wake up some lwp that is waiting for it. 1396 */ 1397 mp->mutex_waiters = 0; 1398 } 1399 1400 if (release_all) { 1401 mutex_wakeup_all(mp); 1402 lwpid = 0; 1403 } else { 1404 lwpid = mutex_wakeup(mp); 1405 } 1406 if (lwpid == 0) 1407 preempt(self); 1408 return (lwpid); 1409 } 1410 1411 /* 1412 * Like mutex_unlock_queue(), but for process-shared mutexes. 1413 * We tested the waiters field before calling here and it was non-zero. 1414 */ 1415 static void 1416 mutex_unlock_process(mutex_t *mp, int release_all) 1417 { 1418 ulwp_t *self = curthread; 1419 int count; 1420 volatile uint8_t *lockp; 1421 1422 /* 1423 * See the comments in mutex_unlock_queue(), above. 1424 */ 1425 if (ncpus == 0) 1426 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1427 count = (ncpus > 1)? self->ul_release_spin : 0; 1428 no_preempt(self); 1429 mp->mutex_owner = 0; 1430 mp->mutex_ownerpid = 0; 1431 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1432 if (release_all || count == 0) { 1433 /* clear lock, test waiter */ 1434 if (!(atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK)) { 1435 /* no waiters now */ 1436 preempt(self); 1437 return; 1438 } 1439 } else { 1440 /* clear lock, retain waiter */ 1441 (void) atomic_swap_32(&mp->mutex_lockword, WAITER); 1442 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1443 while (--count >= 0) { 1444 if (*lockp != 0) { 1445 /* somebody grabbed the lock */ 1446 preempt(self); 1447 return; 1448 } 1449 SMT_PAUSE(); 1450 } 1451 /* 1452 * We must clear the waiters field before going 1453 * to the kernel, else it could remain set forever. 1454 */ 1455 mp->mutex_waiters = 0; 1456 } 1457 (void) ___lwp_mutex_wakeup(mp, release_all); 1458 preempt(self); 1459 } 1460 1461 /* 1462 * Return the real priority of a thread. 1463 */ 1464 int 1465 real_priority(ulwp_t *ulwp) 1466 { 1467 if (ulwp->ul_epri == 0) 1468 return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri); 1469 return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri); 1470 } 1471 1472 void 1473 stall(void) 1474 { 1475 for (;;) 1476 (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 1477 } 1478 1479 /* 1480 * Acquire a USYNC_THREAD mutex via user-level sleep queues. 1481 * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1482 * If successful, returns with mutex_owner set correctly. 1483 */ 1484 int 1485 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 1486 timespec_t *tsp) 1487 { 1488 uberdata_t *udp = curthread->ul_uberdata; 1489 queue_head_t *qp; 1490 hrtime_t begin_sleep; 1491 int error = 0; 1492 1493 self->ul_sp = stkptr(); 1494 if (__td_event_report(self, TD_SLEEP, udp)) { 1495 self->ul_wchan = mp; 1496 self->ul_td_evbuf.eventnum = TD_SLEEP; 1497 self->ul_td_evbuf.eventdata = mp; 1498 tdb_event(TD_SLEEP, udp); 1499 } 1500 if (msp) { 1501 tdb_incr(msp->mutex_sleep); 1502 begin_sleep = gethrtime(); 1503 } 1504 1505 DTRACE_PROBE1(plockstat, mutex__block, mp); 1506 1507 /* 1508 * Put ourself on the sleep queue, and while we are 1509 * unable to grab the lock, go park in the kernel. 1510 * Take ourself off the sleep queue after we acquire the lock. 1511 * The waiter bit can be set/cleared only while holding the queue lock. 1512 */ 1513 qp = queue_lock(mp, MX); 1514 enqueue(qp, self, mp, MX); 1515 mp->mutex_waiters = 1; 1516 for (;;) { 1517 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1518 mp->mutex_owner = (uintptr_t)self; 1519 mp->mutex_waiters = dequeue_self(qp, mp); 1520 break; 1521 } 1522 set_parking_flag(self, 1); 1523 queue_unlock(qp); 1524 /* 1525 * __lwp_park() will return the residual time in tsp 1526 * if we are unparked before the timeout expires. 1527 */ 1528 if ((error = __lwp_park(tsp, 0)) == EINTR) 1529 error = 0; 1530 set_parking_flag(self, 0); 1531 /* 1532 * We could have taken a signal or suspended ourself. 1533 * If we did, then we removed ourself from the queue. 1534 * Someone else may have removed us from the queue 1535 * as a consequence of mutex_unlock(). We may have 1536 * gotten a timeout from __lwp_park(). Or we may still 1537 * be on the queue and this is just a spurious wakeup. 1538 */ 1539 qp = queue_lock(mp, MX); 1540 if (self->ul_sleepq == NULL) { 1541 if (error) 1542 break; 1543 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1544 mp->mutex_owner = (uintptr_t)self; 1545 break; 1546 } 1547 enqueue(qp, self, mp, MX); 1548 mp->mutex_waiters = 1; 1549 } 1550 ASSERT(self->ul_sleepq == qp && 1551 self->ul_qtype == MX && 1552 self->ul_wchan == mp); 1553 if (error) { 1554 mp->mutex_waiters = dequeue_self(qp, mp); 1555 break; 1556 } 1557 } 1558 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 1559 self->ul_wchan == NULL); 1560 self->ul_sp = 0; 1561 queue_unlock(qp); 1562 1563 if (msp) 1564 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1565 1566 ASSERT(error == 0 || error == EINVAL || error == ETIME); 1567 1568 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1569 ASSERT(mp->mutex_type & LOCK_ROBUST); 1570 /* 1571 * We shouldn't own the mutex; clear the lock. 1572 */ 1573 mp->mutex_owner = 0; 1574 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) 1575 mutex_wakeup_all(mp); 1576 error = ENOTRECOVERABLE; 1577 } 1578 1579 if (error) { 1580 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1581 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1582 } else { 1583 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1584 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1585 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1586 ASSERT(mp->mutex_type & LOCK_ROBUST); 1587 error = EOWNERDEAD; 1588 } 1589 } 1590 1591 return (error); 1592 } 1593 1594 static int 1595 mutex_recursion(mutex_t *mp, int mtype, int try) 1596 { 1597 ASSERT(mutex_is_held(mp)); 1598 ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1599 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1600 1601 if (mtype & LOCK_RECURSIVE) { 1602 if (mp->mutex_rcount == RECURSION_MAX) { 1603 DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1604 return (EAGAIN); 1605 } 1606 mp->mutex_rcount++; 1607 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1608 return (0); 1609 } 1610 if (try == MUTEX_LOCK) { 1611 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1612 return (EDEADLK); 1613 } 1614 return (EBUSY); 1615 } 1616 1617 /* 1618 * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1619 * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1620 * We use tdb_hash_lock here and in the synch object tracking code in 1621 * the tdb_agent.c file. There is no conflict between these two usages. 1622 */ 1623 void 1624 register_lock(mutex_t *mp) 1625 { 1626 uberdata_t *udp = curthread->ul_uberdata; 1627 uint_t hash = LOCK_HASH(mp); 1628 robust_t *rlp; 1629 robust_t **rlpp; 1630 robust_t **table; 1631 1632 if ((table = udp->robustlocks) == NULL) { 1633 lmutex_lock(&udp->tdb_hash_lock); 1634 if ((table = udp->robustlocks) == NULL) { 1635 table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1636 _membar_producer(); 1637 udp->robustlocks = table; 1638 } 1639 lmutex_unlock(&udp->tdb_hash_lock); 1640 } 1641 _membar_consumer(); 1642 1643 /* 1644 * First search the registered table with no locks held. 1645 * This is safe because the table never shrinks 1646 * and we can only get a false negative. 1647 */ 1648 for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1649 if (rlp->robust_lock == mp) /* already registered */ 1650 return; 1651 } 1652 1653 /* 1654 * The lock was not found. 1655 * Repeat the operation with tdb_hash_lock held. 1656 */ 1657 lmutex_lock(&udp->tdb_hash_lock); 1658 1659 for (rlpp = &table[hash]; 1660 (rlp = *rlpp) != NULL; 1661 rlpp = &rlp->robust_next) { 1662 if (rlp->robust_lock == mp) { /* already registered */ 1663 lmutex_unlock(&udp->tdb_hash_lock); 1664 return; 1665 } 1666 } 1667 1668 /* 1669 * The lock has never been registered. 1670 * Register it now and add it to the table. 1671 */ 1672 (void) ___lwp_mutex_register(mp); 1673 rlp = lmalloc(sizeof (*rlp)); 1674 rlp->robust_lock = mp; 1675 _membar_producer(); 1676 *rlpp = rlp; 1677 1678 lmutex_unlock(&udp->tdb_hash_lock); 1679 } 1680 1681 /* 1682 * This is called in the child of fork()/forkall() to start over 1683 * with a clean slate. (Each process must register its own locks.) 1684 * No locks are needed because all other threads are suspended or gone. 1685 */ 1686 void 1687 unregister_locks(void) 1688 { 1689 uberdata_t *udp = curthread->ul_uberdata; 1690 uint_t hash; 1691 robust_t **table; 1692 robust_t *rlp; 1693 robust_t *next; 1694 1695 if ((table = udp->robustlocks) != NULL) { 1696 for (hash = 0; hash < LOCKHASHSZ; hash++) { 1697 rlp = table[hash]; 1698 while (rlp != NULL) { 1699 next = rlp->robust_next; 1700 lfree(rlp, sizeof (*rlp)); 1701 rlp = next; 1702 } 1703 } 1704 lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 1705 udp->robustlocks = NULL; 1706 } 1707 } 1708 1709 /* 1710 * Returns with mutex_owner set correctly. 1711 */ 1712 static int 1713 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 1714 { 1715 ulwp_t *self = curthread; 1716 uberdata_t *udp = self->ul_uberdata; 1717 int mtype = mp->mutex_type; 1718 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 1719 int error = 0; 1720 uint8_t ceil; 1721 int myprio; 1722 1723 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1724 1725 if (!self->ul_schedctl_called) 1726 (void) setup_schedctl(); 1727 1728 if (msp && try == MUTEX_TRY) 1729 tdb_incr(msp->mutex_try); 1730 1731 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 1732 return (mutex_recursion(mp, mtype, try)); 1733 1734 if (self->ul_error_detection && try == MUTEX_LOCK && 1735 tsp == NULL && mutex_is_held(mp)) 1736 lock_error(mp, "mutex_lock", NULL, NULL); 1737 1738 if (mtype & LOCK_PRIO_PROTECT) { 1739 ceil = mp->mutex_ceiling; 1740 ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0); 1741 myprio = real_priority(self); 1742 if (myprio > ceil) { 1743 DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 1744 return (EINVAL); 1745 } 1746 if ((error = _ceil_mylist_add(mp)) != 0) { 1747 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1748 return (error); 1749 } 1750 if (myprio < ceil) 1751 _ceil_prio_inherit(ceil); 1752 } 1753 1754 if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 1755 == (USYNC_PROCESS | LOCK_ROBUST)) 1756 register_lock(mp); 1757 1758 if (mtype & LOCK_PRIO_INHERIT) { 1759 /* go straight to the kernel */ 1760 if (try == MUTEX_TRY) 1761 error = mutex_trylock_kernel(mp); 1762 else /* MUTEX_LOCK */ 1763 error = mutex_lock_kernel(mp, tsp, msp); 1764 /* 1765 * The kernel never sets or clears the lock byte 1766 * for LOCK_PRIO_INHERIT mutexes. 1767 * Set it here for consistency. 1768 */ 1769 switch (error) { 1770 case 0: 1771 mp->mutex_lockw = LOCKSET; 1772 break; 1773 case EOWNERDEAD: 1774 case ELOCKUNMAPPED: 1775 mp->mutex_lockw = LOCKSET; 1776 /* FALLTHROUGH */ 1777 case ENOTRECOVERABLE: 1778 ASSERT(mtype & LOCK_ROBUST); 1779 break; 1780 case EDEADLK: 1781 if (try == MUTEX_LOCK) 1782 stall(); 1783 error = EBUSY; 1784 break; 1785 } 1786 } else if (mtype & USYNC_PROCESS) { 1787 error = mutex_trylock_process(mp); 1788 if (error == EBUSY && try == MUTEX_LOCK) 1789 error = mutex_lock_kernel(mp, tsp, msp); 1790 } else { /* USYNC_THREAD */ 1791 error = mutex_trylock_adaptive(mp); 1792 if (error == EBUSY && try == MUTEX_LOCK) 1793 error = mutex_lock_queue(self, msp, mp, tsp); 1794 } 1795 1796 switch (error) { 1797 case 0: 1798 case EOWNERDEAD: 1799 case ELOCKUNMAPPED: 1800 if (mtype & LOCK_ROBUST) 1801 remember_lock(mp); 1802 if (msp) 1803 record_begin_hold(msp); 1804 break; 1805 default: 1806 if (mtype & LOCK_PRIO_PROTECT) { 1807 (void) _ceil_mylist_del(mp); 1808 if (myprio < ceil) 1809 _ceil_prio_waive(); 1810 } 1811 if (try == MUTEX_TRY) { 1812 if (msp) 1813 tdb_incr(msp->mutex_try_fail); 1814 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 1815 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 1816 tdb_event(TD_LOCK_TRY, udp); 1817 } 1818 } 1819 break; 1820 } 1821 1822 return (error); 1823 } 1824 1825 int 1826 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 1827 { 1828 ulwp_t *self = curthread; 1829 uberdata_t *udp = self->ul_uberdata; 1830 1831 /* 1832 * We know that USYNC_PROCESS is set in mtype and that 1833 * zero, one, or both of the flags LOCK_RECURSIVE and 1834 * LOCK_ERRORCHECK are set, and that no other flags are set. 1835 */ 1836 ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 1837 enter_critical(self); 1838 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1839 mp->mutex_owner = (uintptr_t)self; 1840 mp->mutex_ownerpid = udp->pid; 1841 exit_critical(self); 1842 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1843 return (0); 1844 } 1845 exit_critical(self); 1846 1847 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 1848 return (mutex_recursion(mp, mtype, try)); 1849 1850 /* try a little harder */ 1851 if (mutex_trylock_process(mp) == 0) 1852 return (0); 1853 1854 if (try == MUTEX_LOCK) 1855 return (mutex_lock_kernel(mp, tsp, NULL)); 1856 1857 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 1858 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 1859 tdb_event(TD_LOCK_TRY, udp); 1860 } 1861 return (EBUSY); 1862 } 1863 1864 static int 1865 mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 1866 { 1867 ulwp_t *self = curthread; 1868 uberdata_t *udp = self->ul_uberdata; 1869 uberflags_t *gflags; 1870 int mtype; 1871 1872 /* 1873 * Optimize the case of USYNC_THREAD, including 1874 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 1875 * no error detection, no lock statistics, 1876 * and the process has only a single thread. 1877 * (Most likely a traditional single-threaded application.) 1878 */ 1879 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 1880 udp->uberflags.uf_all) == 0) { 1881 /* 1882 * Only one thread exists so we don't need an atomic operation. 1883 */ 1884 if (mp->mutex_lockw == 0) { 1885 mp->mutex_lockw = LOCKSET; 1886 mp->mutex_owner = (uintptr_t)self; 1887 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1888 return (0); 1889 } 1890 if (mtype && MUTEX_OWNER(mp) == self) 1891 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 1892 /* 1893 * We have reached a deadlock, probably because the 1894 * process is executing non-async-signal-safe code in 1895 * a signal handler and is attempting to acquire a lock 1896 * that it already owns. This is not surprising, given 1897 * bad programming practices over the years that has 1898 * resulted in applications calling printf() and such 1899 * in their signal handlers. Unless the user has told 1900 * us that the signal handlers are safe by setting: 1901 * export _THREAD_ASYNC_SAFE=1 1902 * we return EDEADLK rather than actually deadlocking. 1903 */ 1904 if (tsp == NULL && 1905 MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 1906 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1907 return (EDEADLK); 1908 } 1909 } 1910 1911 /* 1912 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 1913 * no error detection, and no lock statistics. 1914 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 1915 */ 1916 if ((gflags = self->ul_schedctl_called) != NULL && 1917 (gflags->uf_trs_ted | 1918 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 1919 if (mtype & USYNC_PROCESS) 1920 return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 1921 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1922 mp->mutex_owner = (uintptr_t)self; 1923 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1924 return (0); 1925 } 1926 if (mtype && MUTEX_OWNER(mp) == self) 1927 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 1928 if (mutex_trylock_adaptive(mp) != 0) 1929 return (mutex_lock_queue(self, NULL, mp, tsp)); 1930 return (0); 1931 } 1932 1933 /* else do it the long way */ 1934 return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 1935 } 1936 1937 #pragma weak _private_mutex_lock = __mutex_lock 1938 #pragma weak mutex_lock = __mutex_lock 1939 #pragma weak _mutex_lock = __mutex_lock 1940 #pragma weak pthread_mutex_lock = __mutex_lock 1941 #pragma weak _pthread_mutex_lock = __mutex_lock 1942 int 1943 __mutex_lock(mutex_t *mp) 1944 { 1945 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 1946 return (mutex_lock_impl(mp, NULL)); 1947 } 1948 1949 #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 1950 int 1951 _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 1952 { 1953 timespec_t tslocal; 1954 int error; 1955 1956 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 1957 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 1958 error = mutex_lock_impl(mp, &tslocal); 1959 if (error == ETIME) 1960 error = ETIMEDOUT; 1961 return (error); 1962 } 1963 1964 #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 1965 int 1966 _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 1967 { 1968 timespec_t tslocal; 1969 int error; 1970 1971 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 1972 tslocal = *reltime; 1973 error = mutex_lock_impl(mp, &tslocal); 1974 if (error == ETIME) 1975 error = ETIMEDOUT; 1976 return (error); 1977 } 1978 1979 #pragma weak _private_mutex_trylock = __mutex_trylock 1980 #pragma weak mutex_trylock = __mutex_trylock 1981 #pragma weak _mutex_trylock = __mutex_trylock 1982 #pragma weak pthread_mutex_trylock = __mutex_trylock 1983 #pragma weak _pthread_mutex_trylock = __mutex_trylock 1984 int 1985 __mutex_trylock(mutex_t *mp) 1986 { 1987 ulwp_t *self = curthread; 1988 uberdata_t *udp = self->ul_uberdata; 1989 uberflags_t *gflags; 1990 int mtype; 1991 1992 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 1993 /* 1994 * Optimize the case of USYNC_THREAD, including 1995 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 1996 * no error detection, no lock statistics, 1997 * and the process has only a single thread. 1998 * (Most likely a traditional single-threaded application.) 1999 */ 2000 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2001 udp->uberflags.uf_all) == 0) { 2002 /* 2003 * Only one thread exists so we don't need an atomic operation. 2004 */ 2005 if (mp->mutex_lockw == 0) { 2006 mp->mutex_lockw = LOCKSET; 2007 mp->mutex_owner = (uintptr_t)self; 2008 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2009 return (0); 2010 } 2011 if (mtype && MUTEX_OWNER(mp) == self) 2012 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2013 return (EBUSY); 2014 } 2015 2016 /* 2017 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2018 * no error detection, and no lock statistics. 2019 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2020 */ 2021 if ((gflags = self->ul_schedctl_called) != NULL && 2022 (gflags->uf_trs_ted | 2023 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2024 if (mtype & USYNC_PROCESS) 2025 return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 2026 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2027 mp->mutex_owner = (uintptr_t)self; 2028 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2029 return (0); 2030 } 2031 if (mtype && MUTEX_OWNER(mp) == self) 2032 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2033 if (mutex_trylock_adaptive(mp) != 0) { 2034 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2035 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2036 tdb_event(TD_LOCK_TRY, udp); 2037 } 2038 return (EBUSY); 2039 } 2040 return (0); 2041 } 2042 2043 /* else do it the long way */ 2044 return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 2045 } 2046 2047 int 2048 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 2049 { 2050 ulwp_t *self = curthread; 2051 uberdata_t *udp = self->ul_uberdata; 2052 int mtype = mp->mutex_type; 2053 tdb_mutex_stats_t *msp; 2054 int error = 0; 2055 int release_all; 2056 lwpid_t lwpid; 2057 2058 if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 2059 return (EPERM); 2060 2061 if (self->ul_error_detection && !mutex_is_held(mp)) 2062 lock_error(mp, "mutex_unlock", NULL, NULL); 2063 2064 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2065 mp->mutex_rcount--; 2066 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2067 return (0); 2068 } 2069 2070 if ((msp = MUTEX_STATS(mp, udp)) != NULL) 2071 (void) record_hold_time(msp); 2072 2073 if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2074 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2075 ASSERT(mp->mutex_type & LOCK_ROBUST); 2076 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2077 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2078 } 2079 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2080 2081 if (mtype & LOCK_PRIO_INHERIT) { 2082 no_preempt(self); 2083 mp->mutex_owner = 0; 2084 mp->mutex_ownerpid = 0; 2085 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2086 mp->mutex_lockw = LOCKCLEAR; 2087 error = ___lwp_mutex_unlock(mp); 2088 preempt(self); 2089 } else if (mtype & USYNC_PROCESS) { 2090 if (mp->mutex_lockword & WAITERMASK) { 2091 mutex_unlock_process(mp, release_all); 2092 } else { 2093 mp->mutex_owner = 0; 2094 mp->mutex_ownerpid = 0; 2095 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2096 if (atomic_swap_32(&mp->mutex_lockword, 0) & 2097 WAITERMASK) { /* a waiter suddenly appeared */ 2098 no_preempt(self); 2099 (void) ___lwp_mutex_wakeup(mp, release_all); 2100 preempt(self); 2101 } 2102 } 2103 } else { /* USYNC_THREAD */ 2104 if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 2105 (void) __lwp_unpark(lwpid); 2106 preempt(self); 2107 } 2108 } 2109 2110 if (mtype & LOCK_ROBUST) 2111 forget_lock(mp); 2112 2113 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2114 _ceil_prio_waive(); 2115 2116 return (error); 2117 } 2118 2119 #pragma weak _private_mutex_unlock = __mutex_unlock 2120 #pragma weak mutex_unlock = __mutex_unlock 2121 #pragma weak _mutex_unlock = __mutex_unlock 2122 #pragma weak pthread_mutex_unlock = __mutex_unlock 2123 #pragma weak _pthread_mutex_unlock = __mutex_unlock 2124 int 2125 __mutex_unlock(mutex_t *mp) 2126 { 2127 ulwp_t *self = curthread; 2128 uberdata_t *udp = self->ul_uberdata; 2129 uberflags_t *gflags; 2130 lwpid_t lwpid; 2131 int mtype; 2132 short el; 2133 2134 /* 2135 * Optimize the case of USYNC_THREAD, including 2136 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2137 * no error detection, no lock statistics, 2138 * and the process has only a single thread. 2139 * (Most likely a traditional single-threaded application.) 2140 */ 2141 if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2142 udp->uberflags.uf_all) == 0) { 2143 if (mtype) { 2144 /* 2145 * At this point we know that one or both of the 2146 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2147 */ 2148 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2149 return (EPERM); 2150 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2151 mp->mutex_rcount--; 2152 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2153 return (0); 2154 } 2155 } 2156 /* 2157 * Only one thread exists so we don't need an atomic operation. 2158 * Also, there can be no waiters. 2159 */ 2160 mp->mutex_owner = 0; 2161 mp->mutex_lockword = 0; 2162 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2163 return (0); 2164 } 2165 2166 /* 2167 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2168 * no error detection, and no lock statistics. 2169 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2170 */ 2171 if ((gflags = self->ul_schedctl_called) != NULL) { 2172 if (((el = gflags->uf_trs_ted) | mtype) == 0) { 2173 fast_unlock: 2174 if (!(mp->mutex_lockword & WAITERMASK)) { 2175 /* no waiter exists right now */ 2176 mp->mutex_owner = 0; 2177 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2178 if (atomic_swap_32(&mp->mutex_lockword, 0) & 2179 WAITERMASK) { 2180 /* a waiter suddenly appeared */ 2181 no_preempt(self); 2182 if ((lwpid = mutex_wakeup(mp)) != 0) 2183 (void) __lwp_unpark(lwpid); 2184 preempt(self); 2185 } 2186 } else if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2187 (void) __lwp_unpark(lwpid); 2188 preempt(self); 2189 } 2190 return (0); 2191 } 2192 if (el) /* error detection or lock statistics */ 2193 goto slow_unlock; 2194 if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2195 /* 2196 * At this point we know that one or both of the 2197 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2198 */ 2199 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2200 return (EPERM); 2201 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2202 mp->mutex_rcount--; 2203 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2204 return (0); 2205 } 2206 goto fast_unlock; 2207 } 2208 if ((mtype & 2209 ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2210 /* 2211 * At this point we know that zero, one, or both of the 2212 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 2213 * that the USYNC_PROCESS flag is set. 2214 */ 2215 if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 2216 return (EPERM); 2217 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2218 mp->mutex_rcount--; 2219 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2220 return (0); 2221 } 2222 if (mp->mutex_lockword & WAITERMASK) { 2223 mutex_unlock_process(mp, 0); 2224 } else { 2225 mp->mutex_owner = 0; 2226 mp->mutex_ownerpid = 0; 2227 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2228 if (atomic_swap_32(&mp->mutex_lockword, 0) & 2229 WAITERMASK) { 2230 no_preempt(self); 2231 (void) ___lwp_mutex_wakeup(mp, 0); 2232 preempt(self); 2233 } 2234 } 2235 return (0); 2236 } 2237 } 2238 2239 /* else do it the long way */ 2240 slow_unlock: 2241 return (mutex_unlock_internal(mp, 0)); 2242 } 2243 2244 /* 2245 * Internally to the library, almost all mutex lock/unlock actions 2246 * go through these lmutex_ functions, to protect critical regions. 2247 * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 2248 * to make these functions faster since we know that the mutex type 2249 * of all internal locks is USYNC_THREAD. We also know that internal 2250 * locking can never fail, so we panic if it does. 2251 */ 2252 void 2253 lmutex_lock(mutex_t *mp) 2254 { 2255 ulwp_t *self = curthread; 2256 uberdata_t *udp = self->ul_uberdata; 2257 2258 ASSERT(mp->mutex_type == USYNC_THREAD); 2259 2260 enter_critical(self); 2261 /* 2262 * Optimize the case of no lock statistics and only a single thread. 2263 * (Most likely a traditional single-threaded application.) 2264 */ 2265 if (udp->uberflags.uf_all == 0) { 2266 /* 2267 * Only one thread exists; the mutex must be free. 2268 */ 2269 ASSERT(mp->mutex_lockw == 0); 2270 mp->mutex_lockw = LOCKSET; 2271 mp->mutex_owner = (uintptr_t)self; 2272 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2273 } else { 2274 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2275 2276 if (!self->ul_schedctl_called) 2277 (void) setup_schedctl(); 2278 2279 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2280 mp->mutex_owner = (uintptr_t)self; 2281 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2282 } else if (mutex_trylock_adaptive(mp) != 0) { 2283 (void) mutex_lock_queue(self, msp, mp, NULL); 2284 } 2285 2286 if (msp) 2287 record_begin_hold(msp); 2288 } 2289 } 2290 2291 void 2292 lmutex_unlock(mutex_t *mp) 2293 { 2294 ulwp_t *self = curthread; 2295 uberdata_t *udp = self->ul_uberdata; 2296 2297 ASSERT(mp->mutex_type == USYNC_THREAD); 2298 2299 /* 2300 * Optimize the case of no lock statistics and only a single thread. 2301 * (Most likely a traditional single-threaded application.) 2302 */ 2303 if (udp->uberflags.uf_all == 0) { 2304 /* 2305 * Only one thread exists so there can be no waiters. 2306 */ 2307 mp->mutex_owner = 0; 2308 mp->mutex_lockword = 0; 2309 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2310 } else { 2311 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2312 lwpid_t lwpid; 2313 2314 if (msp) 2315 (void) record_hold_time(msp); 2316 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2317 (void) __lwp_unpark(lwpid); 2318 preempt(self); 2319 } 2320 } 2321 exit_critical(self); 2322 } 2323 2324 /* 2325 * For specialized code in libc, like the asynchronous i/o code, 2326 * the following sig_*() locking primitives are used in order 2327 * to make the code asynchronous signal safe. Signals are 2328 * deferred while locks acquired by these functions are held. 2329 */ 2330 void 2331 sig_mutex_lock(mutex_t *mp) 2332 { 2333 sigoff(curthread); 2334 (void) _private_mutex_lock(mp); 2335 } 2336 2337 void 2338 sig_mutex_unlock(mutex_t *mp) 2339 { 2340 (void) _private_mutex_unlock(mp); 2341 sigon(curthread); 2342 } 2343 2344 int 2345 sig_mutex_trylock(mutex_t *mp) 2346 { 2347 int error; 2348 2349 sigoff(curthread); 2350 if ((error = _private_mutex_trylock(mp)) != 0) 2351 sigon(curthread); 2352 return (error); 2353 } 2354 2355 /* 2356 * sig_cond_wait() is a cancellation point. 2357 */ 2358 int 2359 sig_cond_wait(cond_t *cv, mutex_t *mp) 2360 { 2361 int error; 2362 2363 ASSERT(curthread->ul_sigdefer != 0); 2364 _private_testcancel(); 2365 error = _cond_wait(cv, mp); 2366 if (error == EINTR && curthread->ul_cursig) { 2367 sig_mutex_unlock(mp); 2368 /* take the deferred signal here */ 2369 sig_mutex_lock(mp); 2370 } 2371 _private_testcancel(); 2372 return (error); 2373 } 2374 2375 /* 2376 * sig_cond_reltimedwait() is a cancellation point. 2377 */ 2378 int 2379 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2380 { 2381 int error; 2382 2383 ASSERT(curthread->ul_sigdefer != 0); 2384 _private_testcancel(); 2385 error = _cond_reltimedwait(cv, mp, ts); 2386 if (error == EINTR && curthread->ul_cursig) { 2387 sig_mutex_unlock(mp); 2388 /* take the deferred signal here */ 2389 sig_mutex_lock(mp); 2390 } 2391 _private_testcancel(); 2392 return (error); 2393 } 2394 2395 static int 2396 shared_mutex_held(mutex_t *mparg) 2397 { 2398 /* 2399 * The 'volatile' is necessary to make sure the compiler doesn't 2400 * reorder the tests of the various components of the mutex. 2401 * They must be tested in this order: 2402 * mutex_lockw 2403 * mutex_owner 2404 * mutex_ownerpid 2405 * This relies on the fact that everywhere mutex_lockw is cleared, 2406 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2407 * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2408 * and mutex_ownerpid are set after mutex_lockw is set, and that 2409 * mutex_lockw is set or cleared with a memory barrier. 2410 */ 2411 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2412 ulwp_t *self = curthread; 2413 uberdata_t *udp = self->ul_uberdata; 2414 2415 return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 2416 } 2417 2418 /* 2419 * Some crufty old programs define their own version of _mutex_held() 2420 * to be simply return(1). This breaks internal libc logic, so we 2421 * define a private version for exclusive use by libc, mutex_is_held(), 2422 * and also a new public function, __mutex_held(), to be used in new 2423 * code to circumvent these crufty old programs. 2424 */ 2425 #pragma weak mutex_held = mutex_is_held 2426 #pragma weak _mutex_held = mutex_is_held 2427 #pragma weak __mutex_held = mutex_is_held 2428 int 2429 mutex_is_held(mutex_t *mparg) 2430 { 2431 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2432 2433 if (mparg->mutex_type & USYNC_PROCESS) 2434 return (shared_mutex_held(mparg)); 2435 return (MUTEX_OWNED(mp, curthread)); 2436 } 2437 2438 #pragma weak _private_mutex_destroy = __mutex_destroy 2439 #pragma weak mutex_destroy = __mutex_destroy 2440 #pragma weak _mutex_destroy = __mutex_destroy 2441 #pragma weak pthread_mutex_destroy = __mutex_destroy 2442 #pragma weak _pthread_mutex_destroy = __mutex_destroy 2443 int 2444 __mutex_destroy(mutex_t *mp) 2445 { 2446 if (mp->mutex_type & USYNC_PROCESS) 2447 forget_lock(mp); 2448 (void) _memset(mp, 0, sizeof (*mp)); 2449 tdb_sync_obj_deregister(mp); 2450 return (0); 2451 } 2452 2453 #pragma weak mutex_consistent = __mutex_consistent 2454 #pragma weak _mutex_consistent = __mutex_consistent 2455 #pragma weak pthread_mutex_consistent_np = __mutex_consistent 2456 #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 2457 int 2458 __mutex_consistent(mutex_t *mp) 2459 { 2460 /* 2461 * Do this only for an inconsistent, initialized robust lock 2462 * that we hold. For all other cases, return EINVAL. 2463 */ 2464 if (mutex_is_held(mp) && 2465 (mp->mutex_type & LOCK_ROBUST) && 2466 (mp->mutex_flag & LOCK_INITED) && 2467 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2468 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2469 mp->mutex_rcount = 0; 2470 return (0); 2471 } 2472 return (EINVAL); 2473 } 2474 2475 /* 2476 * Spin locks are separate from ordinary mutexes, 2477 * but we use the same data structure for them. 2478 */ 2479 2480 #pragma weak pthread_spin_init = _pthread_spin_init 2481 int 2482 _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 2483 { 2484 mutex_t *mp = (mutex_t *)lock; 2485 2486 (void) _memset(mp, 0, sizeof (*mp)); 2487 if (pshared == PTHREAD_PROCESS_SHARED) 2488 mp->mutex_type = USYNC_PROCESS; 2489 else 2490 mp->mutex_type = USYNC_THREAD; 2491 mp->mutex_flag = LOCK_INITED; 2492 mp->mutex_magic = MUTEX_MAGIC; 2493 return (0); 2494 } 2495 2496 #pragma weak pthread_spin_destroy = _pthread_spin_destroy 2497 int 2498 _pthread_spin_destroy(pthread_spinlock_t *lock) 2499 { 2500 (void) _memset(lock, 0, sizeof (*lock)); 2501 return (0); 2502 } 2503 2504 #pragma weak pthread_spin_trylock = _pthread_spin_trylock 2505 int 2506 _pthread_spin_trylock(pthread_spinlock_t *lock) 2507 { 2508 mutex_t *mp = (mutex_t *)lock; 2509 ulwp_t *self = curthread; 2510 int error = 0; 2511 2512 no_preempt(self); 2513 if (set_lock_byte(&mp->mutex_lockw) != 0) 2514 error = EBUSY; 2515 else { 2516 mp->mutex_owner = (uintptr_t)self; 2517 if (mp->mutex_type == USYNC_PROCESS) 2518 mp->mutex_ownerpid = self->ul_uberdata->pid; 2519 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2520 } 2521 preempt(self); 2522 return (error); 2523 } 2524 2525 #pragma weak pthread_spin_lock = _pthread_spin_lock 2526 int 2527 _pthread_spin_lock(pthread_spinlock_t *lock) 2528 { 2529 mutex_t *mp = (mutex_t *)lock; 2530 ulwp_t *self = curthread; 2531 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2532 int count = 0; 2533 2534 ASSERT(!self->ul_critical || self->ul_bindflags); 2535 2536 DTRACE_PROBE1(plockstat, mutex__spin, mp); 2537 2538 /* 2539 * We don't care whether the owner is running on a processor. 2540 * We just spin because that's what this interface requires. 2541 */ 2542 for (;;) { 2543 if (count < INT_MAX) 2544 count++; 2545 if (*lockp == 0) { /* lock byte appears to be clear */ 2546 no_preempt(self); 2547 if (set_lock_byte(lockp) == 0) 2548 break; 2549 preempt(self); 2550 } 2551 SMT_PAUSE(); 2552 } 2553 mp->mutex_owner = (uintptr_t)self; 2554 if (mp->mutex_type == USYNC_PROCESS) 2555 mp->mutex_ownerpid = self->ul_uberdata->pid; 2556 preempt(self); 2557 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 2558 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 2559 return (0); 2560 } 2561 2562 #pragma weak pthread_spin_unlock = _pthread_spin_unlock 2563 int 2564 _pthread_spin_unlock(pthread_spinlock_t *lock) 2565 { 2566 mutex_t *mp = (mutex_t *)lock; 2567 ulwp_t *self = curthread; 2568 2569 no_preempt(self); 2570 mp->mutex_owner = 0; 2571 mp->mutex_ownerpid = 0; 2572 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2573 (void) atomic_swap_32(&mp->mutex_lockword, 0); 2574 preempt(self); 2575 return (0); 2576 } 2577 2578 #define INITIAL_LOCKS 8 /* initialial size of ul_heldlocks.array */ 2579 2580 /* 2581 * Find/allocate an entry for 'lock' in our array of held locks. 2582 */ 2583 static mutex_t ** 2584 find_lock_entry(mutex_t *lock) 2585 { 2586 ulwp_t *self = curthread; 2587 mutex_t **remembered = NULL; 2588 mutex_t **lockptr; 2589 uint_t nlocks; 2590 2591 if ((nlocks = self->ul_heldlockcnt) != 0) 2592 lockptr = self->ul_heldlocks.array; 2593 else { 2594 nlocks = 1; 2595 lockptr = &self->ul_heldlocks.single; 2596 } 2597 2598 for (; nlocks; nlocks--, lockptr++) { 2599 if (*lockptr == lock) 2600 return (lockptr); 2601 if (*lockptr == NULL && remembered == NULL) 2602 remembered = lockptr; 2603 } 2604 if (remembered != NULL) { 2605 *remembered = lock; 2606 return (remembered); 2607 } 2608 2609 /* 2610 * No entry available. Allocate more space, converting 2611 * the single entry into an array of entries if necessary. 2612 */ 2613 if ((nlocks = self->ul_heldlockcnt) == 0) { 2614 /* 2615 * Initial allocation of the array. 2616 * Convert the single entry into an array. 2617 */ 2618 self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 2619 lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 2620 /* 2621 * The single entry becomes the first entry in the array. 2622 */ 2623 *lockptr = self->ul_heldlocks.single; 2624 self->ul_heldlocks.array = lockptr; 2625 /* 2626 * Return the next available entry in the array. 2627 */ 2628 *++lockptr = lock; 2629 return (lockptr); 2630 } 2631 /* 2632 * Reallocate the array, double the size each time. 2633 */ 2634 lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 2635 (void) _memcpy(lockptr, self->ul_heldlocks.array, 2636 nlocks * sizeof (mutex_t *)); 2637 lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2638 self->ul_heldlocks.array = lockptr; 2639 self->ul_heldlockcnt *= 2; 2640 /* 2641 * Return the next available entry in the newly allocated array. 2642 */ 2643 *(lockptr += nlocks) = lock; 2644 return (lockptr); 2645 } 2646 2647 /* 2648 * Insert 'lock' into our list of held locks. 2649 * Currently only used for LOCK_ROBUST mutexes. 2650 */ 2651 void 2652 remember_lock(mutex_t *lock) 2653 { 2654 (void) find_lock_entry(lock); 2655 } 2656 2657 /* 2658 * Remove 'lock' from our list of held locks. 2659 * Currently only used for LOCK_ROBUST mutexes. 2660 */ 2661 void 2662 forget_lock(mutex_t *lock) 2663 { 2664 *find_lock_entry(lock) = NULL; 2665 } 2666 2667 /* 2668 * Free the array of held locks. 2669 */ 2670 void 2671 heldlock_free(ulwp_t *ulwp) 2672 { 2673 uint_t nlocks; 2674 2675 if ((nlocks = ulwp->ul_heldlockcnt) != 0) 2676 lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2677 ulwp->ul_heldlockcnt = 0; 2678 ulwp->ul_heldlocks.array = NULL; 2679 } 2680 2681 /* 2682 * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 2683 * Called from _thrp_exit() to deal with abandoned locks. 2684 */ 2685 void 2686 heldlock_exit(void) 2687 { 2688 ulwp_t *self = curthread; 2689 mutex_t **lockptr; 2690 uint_t nlocks; 2691 mutex_t *mp; 2692 2693 if ((nlocks = self->ul_heldlockcnt) != 0) 2694 lockptr = self->ul_heldlocks.array; 2695 else { 2696 nlocks = 1; 2697 lockptr = &self->ul_heldlocks.single; 2698 } 2699 2700 for (; nlocks; nlocks--, lockptr++) { 2701 /* 2702 * The kernel takes care of transitioning held 2703 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 2704 * We avoid that case here. 2705 */ 2706 if ((mp = *lockptr) != NULL && 2707 mutex_is_held(mp) && 2708 (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 2709 LOCK_ROBUST) { 2710 mp->mutex_rcount = 0; 2711 if (!(mp->mutex_flag & LOCK_UNMAPPED)) 2712 mp->mutex_flag |= LOCK_OWNERDEAD; 2713 (void) mutex_unlock_internal(mp, 1); 2714 } 2715 } 2716 2717 heldlock_free(self); 2718 } 2719 2720 #pragma weak cond_init = _cond_init 2721 /* ARGSUSED2 */ 2722 int 2723 _cond_init(cond_t *cvp, int type, void *arg) 2724 { 2725 if (type != USYNC_THREAD && type != USYNC_PROCESS) 2726 return (EINVAL); 2727 (void) _memset(cvp, 0, sizeof (*cvp)); 2728 cvp->cond_type = (uint16_t)type; 2729 cvp->cond_magic = COND_MAGIC; 2730 return (0); 2731 } 2732 2733 /* 2734 * cond_sleep_queue(): utility function for cond_wait_queue(). 2735 * 2736 * Go to sleep on a condvar sleep queue, expect to be waked up 2737 * by someone calling cond_signal() or cond_broadcast() or due 2738 * to receiving a UNIX signal or being cancelled, or just simply 2739 * due to a spurious wakeup (like someome calling forkall()). 2740 * 2741 * The associated mutex is *not* reacquired before returning. 2742 * That must be done by the caller of cond_sleep_queue(). 2743 */ 2744 static int 2745 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2746 { 2747 ulwp_t *self = curthread; 2748 queue_head_t *qp; 2749 queue_head_t *mqp; 2750 lwpid_t lwpid; 2751 int signalled; 2752 int error; 2753 int release_all; 2754 2755 /* 2756 * Put ourself on the CV sleep queue, unlock the mutex, then 2757 * park ourself and unpark a candidate lwp to grab the mutex. 2758 * We must go onto the CV sleep queue before dropping the 2759 * mutex in order to guarantee atomicity of the operation. 2760 */ 2761 self->ul_sp = stkptr(); 2762 qp = queue_lock(cvp, CV); 2763 enqueue(qp, self, cvp, CV); 2764 cvp->cond_waiters_user = 1; 2765 self->ul_cvmutex = mp; 2766 self->ul_cv_wake = (tsp != NULL); 2767 self->ul_signalled = 0; 2768 if (mp->mutex_flag & LOCK_OWNERDEAD) { 2769 mp->mutex_flag &= ~LOCK_OWNERDEAD; 2770 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2771 } 2772 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2773 lwpid = mutex_unlock_queue(mp, release_all); 2774 for (;;) { 2775 set_parking_flag(self, 1); 2776 queue_unlock(qp); 2777 if (lwpid != 0) { 2778 lwpid = preempt_unpark(self, lwpid); 2779 preempt(self); 2780 } 2781 /* 2782 * We may have a deferred signal present, 2783 * in which case we should return EINTR. 2784 * Also, we may have received a SIGCANCEL; if so 2785 * and we are cancelable we should return EINTR. 2786 * We force an immediate EINTR return from 2787 * __lwp_park() by turning our parking flag off. 2788 */ 2789 if (self->ul_cursig != 0 || 2790 (self->ul_cancelable && self->ul_cancel_pending)) 2791 set_parking_flag(self, 0); 2792 /* 2793 * __lwp_park() will return the residual time in tsp 2794 * if we are unparked before the timeout expires. 2795 */ 2796 error = __lwp_park(tsp, lwpid); 2797 set_parking_flag(self, 0); 2798 lwpid = 0; /* unpark the other lwp only once */ 2799 /* 2800 * We were waked up by cond_signal(), cond_broadcast(), 2801 * by an interrupt or timeout (EINTR or ETIME), 2802 * or we may just have gotten a spurious wakeup. 2803 */ 2804 qp = queue_lock(cvp, CV); 2805 mqp = queue_lock(mp, MX); 2806 if (self->ul_sleepq == NULL) 2807 break; 2808 /* 2809 * We are on either the condvar sleep queue or the 2810 * mutex sleep queue. Break out of the sleep if we 2811 * were interrupted or we timed out (EINTR or ETIME). 2812 * Else this is a spurious wakeup; continue the loop. 2813 */ 2814 if (self->ul_sleepq == mqp) { /* mutex queue */ 2815 if (error) { 2816 mp->mutex_waiters = dequeue_self(mqp, mp); 2817 break; 2818 } 2819 tsp = NULL; /* no more timeout */ 2820 } else if (self->ul_sleepq == qp) { /* condvar queue */ 2821 if (error) { 2822 cvp->cond_waiters_user = dequeue_self(qp, cvp); 2823 break; 2824 } 2825 /* 2826 * Else a spurious wakeup on the condvar queue. 2827 * __lwp_park() has already adjusted the timeout. 2828 */ 2829 } else { 2830 thr_panic("cond_sleep_queue(): thread not on queue"); 2831 } 2832 queue_unlock(mqp); 2833 } 2834 2835 self->ul_sp = 0; 2836 ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0); 2837 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 2838 self->ul_wchan == NULL); 2839 2840 signalled = self->ul_signalled; 2841 self->ul_signalled = 0; 2842 queue_unlock(qp); 2843 queue_unlock(mqp); 2844 2845 /* 2846 * If we were concurrently cond_signal()d and any of: 2847 * received a UNIX signal, were cancelled, or got a timeout, 2848 * then perform another cond_signal() to avoid consuming it. 2849 */ 2850 if (error && signalled) 2851 (void) cond_signal_internal(cvp); 2852 2853 return (error); 2854 } 2855 2856 int 2857 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp, 2858 tdb_mutex_stats_t *msp) 2859 { 2860 ulwp_t *self = curthread; 2861 int error; 2862 int merror; 2863 2864 /* 2865 * The old thread library was programmed to defer signals 2866 * while in cond_wait() so that the associated mutex would 2867 * be guaranteed to be held when the application signal 2868 * handler was invoked. 2869 * 2870 * We do not behave this way by default; the state of the 2871 * associated mutex in the signal handler is undefined. 2872 * 2873 * To accommodate applications that depend on the old 2874 * behavior, the _THREAD_COND_WAIT_DEFER environment 2875 * variable can be set to 1 and we will behave in the 2876 * old way with respect to cond_wait(). 2877 */ 2878 if (self->ul_cond_wait_defer) 2879 sigoff(self); 2880 2881 error = cond_sleep_queue(cvp, mp, tsp); 2882 2883 /* 2884 * Reacquire the mutex. 2885 */ 2886 if ((merror = mutex_trylock_adaptive(mp)) == EBUSY) 2887 merror = mutex_lock_queue(self, msp, mp, NULL); 2888 if (merror) 2889 error = merror; 2890 if (msp && (merror == 0 || merror == EOWNERDEAD)) 2891 record_begin_hold(msp); 2892 2893 /* 2894 * Take any deferred signal now, after we have reacquired the mutex. 2895 */ 2896 if (self->ul_cond_wait_defer) 2897 sigon(self); 2898 2899 return (error); 2900 } 2901 2902 /* 2903 * cond_sleep_kernel(): utility function for cond_wait_kernel(). 2904 * See the comment ahead of cond_sleep_queue(), above. 2905 */ 2906 static int 2907 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2908 { 2909 int mtype = mp->mutex_type; 2910 ulwp_t *self = curthread; 2911 int error; 2912 2913 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2914 _ceil_prio_waive(); 2915 2916 self->ul_sp = stkptr(); 2917 self->ul_wchan = cvp; 2918 mp->mutex_owner = 0; 2919 mp->mutex_ownerpid = 0; 2920 if (mtype & LOCK_PRIO_INHERIT) 2921 mp->mutex_lockw = LOCKCLEAR; 2922 /* 2923 * ___lwp_cond_wait() returns immediately with EINTR if 2924 * set_parking_flag(self,0) is called on this lwp before it 2925 * goes to sleep in the kernel. sigacthandler() calls this 2926 * when a deferred signal is noted. This assures that we don't 2927 * get stuck in ___lwp_cond_wait() with all signals blocked 2928 * due to taking a deferred signal before going to sleep. 2929 */ 2930 set_parking_flag(self, 1); 2931 if (self->ul_cursig != 0 || 2932 (self->ul_cancelable && self->ul_cancel_pending)) 2933 set_parking_flag(self, 0); 2934 error = ___lwp_cond_wait(cvp, mp, tsp, 1); 2935 set_parking_flag(self, 0); 2936 self->ul_sp = 0; 2937 self->ul_wchan = NULL; 2938 return (error); 2939 } 2940 2941 int 2942 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2943 { 2944 ulwp_t *self = curthread; 2945 int error; 2946 int merror; 2947 2948 /* 2949 * See the large comment in cond_wait_queue(), above. 2950 */ 2951 if (self->ul_cond_wait_defer) 2952 sigoff(self); 2953 2954 error = cond_sleep_kernel(cvp, mp, tsp); 2955 2956 /* 2957 * Override the return code from ___lwp_cond_wait() 2958 * with any non-zero return code from mutex_lock(). 2959 * This addresses robust lock failures in particular; 2960 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 2961 * errors in order to take corrective action. 2962 */ 2963 if ((merror = _private_mutex_lock(mp)) != 0) 2964 error = merror; 2965 2966 /* 2967 * Take any deferred signal now, after we have reacquired the mutex. 2968 */ 2969 if (self->ul_cond_wait_defer) 2970 sigon(self); 2971 2972 return (error); 2973 } 2974 2975 /* 2976 * Common code for _cond_wait() and _cond_timedwait() 2977 */ 2978 int 2979 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2980 { 2981 int mtype = mp->mutex_type; 2982 hrtime_t begin_sleep = 0; 2983 ulwp_t *self = curthread; 2984 uberdata_t *udp = self->ul_uberdata; 2985 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 2986 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2987 uint8_t rcount; 2988 int error = 0; 2989 2990 /* 2991 * The SUSV3 Posix spec for pthread_cond_timedwait() states: 2992 * Except in the case of [ETIMEDOUT], all these error checks 2993 * shall act as if they were performed immediately at the 2994 * beginning of processing for the function and shall cause 2995 * an error return, in effect, prior to modifying the state 2996 * of the mutex specified by mutex or the condition variable 2997 * specified by cond. 2998 * Therefore, we must return EINVAL now if the timout is invalid. 2999 */ 3000 if (tsp != NULL && 3001 (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 3002 return (EINVAL); 3003 3004 if (__td_event_report(self, TD_SLEEP, udp)) { 3005 self->ul_sp = stkptr(); 3006 self->ul_wchan = cvp; 3007 self->ul_td_evbuf.eventnum = TD_SLEEP; 3008 self->ul_td_evbuf.eventdata = cvp; 3009 tdb_event(TD_SLEEP, udp); 3010 self->ul_sp = 0; 3011 } 3012 if (csp) { 3013 if (tsp) 3014 tdb_incr(csp->cond_timedwait); 3015 else 3016 tdb_incr(csp->cond_wait); 3017 } 3018 if (msp) 3019 begin_sleep = record_hold_time(msp); 3020 else if (csp) 3021 begin_sleep = gethrtime(); 3022 3023 if (self->ul_error_detection) { 3024 if (!mutex_is_held(mp)) 3025 lock_error(mp, "cond_wait", cvp, NULL); 3026 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 3027 lock_error(mp, "recursive mutex in cond_wait", 3028 cvp, NULL); 3029 if (cvp->cond_type & USYNC_PROCESS) { 3030 if (!(mtype & USYNC_PROCESS)) 3031 lock_error(mp, "cond_wait", cvp, 3032 "condvar process-shared, " 3033 "mutex process-private"); 3034 } else { 3035 if (mtype & USYNC_PROCESS) 3036 lock_error(mp, "cond_wait", cvp, 3037 "condvar process-private, " 3038 "mutex process-shared"); 3039 } 3040 } 3041 3042 /* 3043 * We deal with recursive mutexes by completely 3044 * dropping the lock and restoring the recursion 3045 * count after waking up. This is arguably wrong, 3046 * but it obeys the principle of least astonishment. 3047 */ 3048 rcount = mp->mutex_rcount; 3049 mp->mutex_rcount = 0; 3050 if ((mtype & 3051 (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 3052 (cvp->cond_type & USYNC_PROCESS)) 3053 error = cond_wait_kernel(cvp, mp, tsp); 3054 else 3055 error = cond_wait_queue(cvp, mp, tsp, msp); 3056 mp->mutex_rcount = rcount; 3057 3058 if (csp) { 3059 hrtime_t lapse = gethrtime() - begin_sleep; 3060 if (tsp == NULL) 3061 csp->cond_wait_sleep_time += lapse; 3062 else { 3063 csp->cond_timedwait_sleep_time += lapse; 3064 if (error == ETIME) 3065 tdb_incr(csp->cond_timedwait_timeout); 3066 } 3067 } 3068 return (error); 3069 } 3070 3071 /* 3072 * cond_wait() is a cancellation point but _cond_wait() is not. 3073 * System libraries call the non-cancellation version. 3074 * It is expected that only applications call the cancellation version. 3075 */ 3076 int 3077 _cond_wait(cond_t *cvp, mutex_t *mp) 3078 { 3079 ulwp_t *self = curthread; 3080 uberdata_t *udp = self->ul_uberdata; 3081 uberflags_t *gflags; 3082 3083 /* 3084 * Optimize the common case of USYNC_THREAD plus 3085 * no error detection, no lock statistics, and no event tracing. 3086 */ 3087 if ((gflags = self->ul_schedctl_called) != NULL && 3088 (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 3089 self->ul_td_events_enable | 3090 udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3091 return (cond_wait_queue(cvp, mp, NULL, NULL)); 3092 3093 /* 3094 * Else do it the long way. 3095 */ 3096 return (cond_wait_common(cvp, mp, NULL)); 3097 } 3098 3099 int 3100 cond_wait(cond_t *cvp, mutex_t *mp) 3101 { 3102 int error; 3103 3104 _cancelon(); 3105 error = _cond_wait(cvp, mp); 3106 if (error == EINTR) 3107 _canceloff(); 3108 else 3109 _canceloff_nocancel(); 3110 return (error); 3111 } 3112 3113 #pragma weak pthread_cond_wait = _pthread_cond_wait 3114 int 3115 _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 3116 { 3117 int error; 3118 3119 error = cond_wait(cvp, mp); 3120 return ((error == EINTR)? 0 : error); 3121 } 3122 3123 /* 3124 * cond_timedwait() is a cancellation point but _cond_timedwait() is not. 3125 * System libraries call the non-cancellation version. 3126 * It is expected that only applications call the cancellation version. 3127 */ 3128 int 3129 _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3130 { 3131 clockid_t clock_id = cvp->cond_clockid; 3132 timespec_t reltime; 3133 int error; 3134 3135 if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 3136 clock_id = CLOCK_REALTIME; 3137 abstime_to_reltime(clock_id, abstime, &reltime); 3138 error = cond_wait_common(cvp, mp, &reltime); 3139 if (error == ETIME && clock_id == CLOCK_HIGHRES) { 3140 /* 3141 * Don't return ETIME if we didn't really get a timeout. 3142 * This can happen if we return because someone resets 3143 * the system clock. Just return zero in this case, 3144 * giving a spurious wakeup but not a timeout. 3145 */ 3146 if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 3147 abstime->tv_nsec > gethrtime()) 3148 error = 0; 3149 } 3150 return (error); 3151 } 3152 3153 int 3154 cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3155 { 3156 int error; 3157 3158 _cancelon(); 3159 error = _cond_timedwait(cvp, mp, abstime); 3160 if (error == EINTR) 3161 _canceloff(); 3162 else 3163 _canceloff_nocancel(); 3164 return (error); 3165 } 3166 3167 #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 3168 int 3169 _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3170 { 3171 int error; 3172 3173 error = cond_timedwait(cvp, mp, abstime); 3174 if (error == ETIME) 3175 error = ETIMEDOUT; 3176 else if (error == EINTR) 3177 error = 0; 3178 return (error); 3179 } 3180 3181 /* 3182 * cond_reltimedwait() is a cancellation point but _cond_reltimedwait() 3183 * is not. System libraries call the non-cancellation version. 3184 * It is expected that only applications call the cancellation version. 3185 */ 3186 int 3187 _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3188 { 3189 timespec_t tslocal = *reltime; 3190 3191 return (cond_wait_common(cvp, mp, &tslocal)); 3192 } 3193 3194 #pragma weak cond_reltimedwait = _cond_reltimedwait_cancel 3195 int 3196 _cond_reltimedwait_cancel(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3197 { 3198 int error; 3199 3200 _cancelon(); 3201 error = _cond_reltimedwait(cvp, mp, reltime); 3202 if (error == EINTR) 3203 _canceloff(); 3204 else 3205 _canceloff_nocancel(); 3206 return (error); 3207 } 3208 3209 #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 3210 int 3211 _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 3212 const timespec_t *reltime) 3213 { 3214 int error; 3215 3216 error = _cond_reltimedwait_cancel(cvp, mp, reltime); 3217 if (error == ETIME) 3218 error = ETIMEDOUT; 3219 else if (error == EINTR) 3220 error = 0; 3221 return (error); 3222 } 3223 3224 #pragma weak pthread_cond_signal = cond_signal_internal 3225 #pragma weak _pthread_cond_signal = cond_signal_internal 3226 #pragma weak cond_signal = cond_signal_internal 3227 #pragma weak _cond_signal = cond_signal_internal 3228 int 3229 cond_signal_internal(cond_t *cvp) 3230 { 3231 ulwp_t *self = curthread; 3232 uberdata_t *udp = self->ul_uberdata; 3233 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3234 int error = 0; 3235 queue_head_t *qp; 3236 mutex_t *mp; 3237 queue_head_t *mqp; 3238 ulwp_t **ulwpp; 3239 ulwp_t *ulwp; 3240 ulwp_t *prev = NULL; 3241 ulwp_t *next; 3242 ulwp_t **suspp = NULL; 3243 ulwp_t *susprev; 3244 3245 if (csp) 3246 tdb_incr(csp->cond_signal); 3247 3248 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3249 error = __lwp_cond_signal(cvp); 3250 3251 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3252 return (error); 3253 3254 /* 3255 * Move someone from the condvar sleep queue to the mutex sleep 3256 * queue for the mutex that he will acquire on being waked up. 3257 * We can do this only if we own the mutex he will acquire. 3258 * If we do not own the mutex, or if his ul_cv_wake flag 3259 * is set, just dequeue and unpark him. 3260 */ 3261 qp = queue_lock(cvp, CV); 3262 for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL; 3263 prev = ulwp, ulwpp = &ulwp->ul_link) { 3264 if (ulwp->ul_wchan == cvp) { 3265 if (!ulwp->ul_stop) 3266 break; 3267 /* 3268 * Try not to dequeue a suspended thread. 3269 * This mimics the old libthread's behavior. 3270 */ 3271 if (suspp == NULL) { 3272 suspp = ulwpp; 3273 susprev = prev; 3274 } 3275 } 3276 } 3277 if (ulwp == NULL && suspp != NULL) { 3278 ulwp = *(ulwpp = suspp); 3279 prev = susprev; 3280 suspp = NULL; 3281 } 3282 if (ulwp == NULL) { /* no one on the sleep queue */ 3283 cvp->cond_waiters_user = 0; 3284 queue_unlock(qp); 3285 return (error); 3286 } 3287 /* 3288 * Scan the remainder of the CV queue for another waiter. 3289 */ 3290 if (suspp != NULL) { 3291 next = *suspp; 3292 } else { 3293 for (next = ulwp->ul_link; next != NULL; next = next->ul_link) 3294 if (next->ul_wchan == cvp) 3295 break; 3296 } 3297 if (next == NULL) 3298 cvp->cond_waiters_user = 0; 3299 3300 /* 3301 * Inform the thread that he was the recipient of a cond_signal(). 3302 * This lets him deal with cond_signal() and, concurrently, 3303 * one or more of a cancellation, a UNIX signal, or a timeout. 3304 * These latter conditions must not consume a cond_signal(). 3305 */ 3306 ulwp->ul_signalled = 1; 3307 3308 /* 3309 * Dequeue the waiter but leave his ul_sleepq non-NULL 3310 * while we move him to the mutex queue so that he can 3311 * deal properly with spurious wakeups. 3312 */ 3313 *ulwpp = ulwp->ul_link; 3314 ulwp->ul_link = NULL; 3315 if (qp->qh_tail == ulwp) 3316 qp->qh_tail = prev; 3317 qp->qh_qlen--; 3318 3319 mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 3320 ulwp->ul_cvmutex = NULL; 3321 ASSERT(mp != NULL); 3322 3323 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3324 lwpid_t lwpid = ulwp->ul_lwpid; 3325 3326 no_preempt(self); 3327 ulwp->ul_sleepq = NULL; 3328 ulwp->ul_wchan = NULL; 3329 ulwp->ul_cv_wake = 0; 3330 queue_unlock(qp); 3331 (void) __lwp_unpark(lwpid); 3332 preempt(self); 3333 } else { 3334 mqp = queue_lock(mp, MX); 3335 enqueue(mqp, ulwp, mp, MX); 3336 mp->mutex_waiters = 1; 3337 queue_unlock(mqp); 3338 queue_unlock(qp); 3339 } 3340 3341 return (error); 3342 } 3343 3344 /* 3345 * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3346 * and rw_queue_release() to (re)allocate a big buffer to hold the 3347 * lwpids of all the threads to be set running after they are removed 3348 * from their sleep queues. Since we are holding a queue lock, we 3349 * cannot call any function that might acquire a lock. mmap(), munmap(), 3350 * lwp_unpark_all() are simple system calls and are safe in this regard. 3351 */ 3352 lwpid_t * 3353 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 3354 { 3355 /* 3356 * Allocate NEWLWPS ids on the first overflow. 3357 * Double the allocation each time after that. 3358 */ 3359 int nlwpid = *nlwpid_ptr; 3360 int maxlwps = *maxlwps_ptr; 3361 int first_allocation; 3362 int newlwps; 3363 void *vaddr; 3364 3365 ASSERT(nlwpid == maxlwps); 3366 3367 first_allocation = (maxlwps == MAXLWPS); 3368 newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 3369 vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t), 3370 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 3371 3372 if (vaddr == MAP_FAILED) { 3373 /* 3374 * Let's hope this never happens. 3375 * If it does, then we have a terrible 3376 * thundering herd on our hands. 3377 */ 3378 (void) __lwp_unpark_all(lwpid, nlwpid); 3379 *nlwpid_ptr = 0; 3380 } else { 3381 (void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 3382 if (!first_allocation) 3383 (void) _private_munmap(lwpid, 3384 maxlwps * sizeof (lwpid_t)); 3385 lwpid = vaddr; 3386 *maxlwps_ptr = newlwps; 3387 } 3388 3389 return (lwpid); 3390 } 3391 3392 #pragma weak pthread_cond_broadcast = cond_broadcast_internal 3393 #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 3394 #pragma weak cond_broadcast = cond_broadcast_internal 3395 #pragma weak _cond_broadcast = cond_broadcast_internal 3396 int 3397 cond_broadcast_internal(cond_t *cvp) 3398 { 3399 ulwp_t *self = curthread; 3400 uberdata_t *udp = self->ul_uberdata; 3401 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3402 int error = 0; 3403 queue_head_t *qp; 3404 mutex_t *mp; 3405 mutex_t *mp_cache = NULL; 3406 queue_head_t *mqp = NULL; 3407 ulwp_t **ulwpp; 3408 ulwp_t *ulwp; 3409 ulwp_t *prev = NULL; 3410 int nlwpid = 0; 3411 int maxlwps = MAXLWPS; 3412 lwpid_t buffer[MAXLWPS]; 3413 lwpid_t *lwpid = buffer; 3414 3415 if (csp) 3416 tdb_incr(csp->cond_broadcast); 3417 3418 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3419 error = __lwp_cond_broadcast(cvp); 3420 3421 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3422 return (error); 3423 3424 /* 3425 * Move everyone from the condvar sleep queue to the mutex sleep 3426 * queue for the mutex that they will acquire on being waked up. 3427 * We can do this only if we own the mutex they will acquire. 3428 * If we do not own the mutex, or if their ul_cv_wake flag 3429 * is set, just dequeue and unpark them. 3430 * 3431 * We keep track of lwpids that are to be unparked in lwpid[]. 3432 * __lwp_unpark_all() is called to unpark all of them after 3433 * they have been removed from the sleep queue and the sleep 3434 * queue lock has been dropped. If we run out of space in our 3435 * on-stack buffer, we need to allocate more but we can't call 3436 * lmalloc() because we are holding a queue lock when the overflow 3437 * occurs and lmalloc() acquires a lock. We can't use alloca() 3438 * either because the application may have allocated a small 3439 * stack and we don't want to overrun the stack. So we call 3440 * alloc_lwpids() to allocate a bigger buffer using the mmap() 3441 * system call directly since that path acquires no locks. 3442 */ 3443 qp = queue_lock(cvp, CV); 3444 cvp->cond_waiters_user = 0; 3445 ulwpp = &qp->qh_head; 3446 while ((ulwp = *ulwpp) != NULL) { 3447 if (ulwp->ul_wchan != cvp) { 3448 prev = ulwp; 3449 ulwpp = &ulwp->ul_link; 3450 continue; 3451 } 3452 *ulwpp = ulwp->ul_link; 3453 ulwp->ul_link = NULL; 3454 if (qp->qh_tail == ulwp) 3455 qp->qh_tail = prev; 3456 qp->qh_qlen--; 3457 mp = ulwp->ul_cvmutex; /* his mutex */ 3458 ulwp->ul_cvmutex = NULL; 3459 ASSERT(mp != NULL); 3460 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3461 ulwp->ul_sleepq = NULL; 3462 ulwp->ul_wchan = NULL; 3463 ulwp->ul_cv_wake = 0; 3464 if (nlwpid == maxlwps) 3465 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 3466 lwpid[nlwpid++] = ulwp->ul_lwpid; 3467 } else { 3468 if (mp != mp_cache) { 3469 mp_cache = mp; 3470 if (mqp != NULL) 3471 queue_unlock(mqp); 3472 mqp = queue_lock(mp, MX); 3473 } 3474 enqueue(mqp, ulwp, mp, MX); 3475 mp->mutex_waiters = 1; 3476 } 3477 } 3478 if (mqp != NULL) 3479 queue_unlock(mqp); 3480 if (nlwpid == 0) { 3481 queue_unlock(qp); 3482 } else { 3483 no_preempt(self); 3484 queue_unlock(qp); 3485 if (nlwpid == 1) 3486 (void) __lwp_unpark(lwpid[0]); 3487 else 3488 (void) __lwp_unpark_all(lwpid, nlwpid); 3489 preempt(self); 3490 } 3491 if (lwpid != buffer) 3492 (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 3493 return (error); 3494 } 3495 3496 #pragma weak pthread_cond_destroy = _cond_destroy 3497 #pragma weak _pthread_cond_destroy = _cond_destroy 3498 #pragma weak cond_destroy = _cond_destroy 3499 int 3500 _cond_destroy(cond_t *cvp) 3501 { 3502 cvp->cond_magic = 0; 3503 tdb_sync_obj_deregister(cvp); 3504 return (0); 3505 } 3506 3507 #if defined(THREAD_DEBUG) 3508 void 3509 assert_no_libc_locks_held(void) 3510 { 3511 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 3512 } 3513 #endif 3514 3515 /* protected by link_lock */ 3516 uint64_t spin_lock_spin; 3517 uint64_t spin_lock_spin2; 3518 uint64_t spin_lock_sleep; 3519 uint64_t spin_lock_wakeup; 3520 3521 /* 3522 * Record spin lock statistics. 3523 * Called by a thread exiting itself in thrp_exit(). 3524 * Also called via atexit() from the thread calling 3525 * exit() to do all the other threads as well. 3526 */ 3527 void 3528 record_spin_locks(ulwp_t *ulwp) 3529 { 3530 spin_lock_spin += ulwp->ul_spin_lock_spin; 3531 spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 3532 spin_lock_sleep += ulwp->ul_spin_lock_sleep; 3533 spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 3534 ulwp->ul_spin_lock_spin = 0; 3535 ulwp->ul_spin_lock_spin2 = 0; 3536 ulwp->ul_spin_lock_sleep = 0; 3537 ulwp->ul_spin_lock_wakeup = 0; 3538 } 3539 3540 /* 3541 * atexit function: dump the queue statistics to stderr. 3542 */ 3543 #if !defined(__lint) 3544 #define fprintf _fprintf 3545 #endif 3546 #include <stdio.h> 3547 void 3548 dump_queue_statistics(void) 3549 { 3550 uberdata_t *udp = curthread->ul_uberdata; 3551 queue_head_t *qp; 3552 int qn; 3553 uint64_t spin_lock_total = 0; 3554 3555 if (udp->queue_head == NULL || thread_queue_dump == 0) 3556 return; 3557 3558 if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 3559 fprintf(stderr, "queue# lockcount max qlen\n") < 0) 3560 return; 3561 for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 3562 if (qp->qh_lockcount == 0) 3563 continue; 3564 spin_lock_total += qp->qh_lockcount; 3565 if (fprintf(stderr, "%5d %12llu%12u\n", qn, 3566 (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 3567 return; 3568 } 3569 3570 if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 3571 fprintf(stderr, "queue# lockcount max qlen\n") < 0) 3572 return; 3573 for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 3574 if (qp->qh_lockcount == 0) 3575 continue; 3576 spin_lock_total += qp->qh_lockcount; 3577 if (fprintf(stderr, "%5d %12llu%12u\n", qn, 3578 (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0) 3579 return; 3580 } 3581 3582 (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 3583 (u_longlong_t)spin_lock_total); 3584 (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 3585 (u_longlong_t)spin_lock_spin); 3586 (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 3587 (u_longlong_t)spin_lock_spin2); 3588 (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 3589 (u_longlong_t)spin_lock_sleep); 3590 (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 3591 (u_longlong_t)spin_lock_wakeup); 3592 } 3593