1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #define atomic_cas_64 _atomic_cas_64 30 31 #include "lint.h" 32 #include "thr_uberdata.h" 33 #include <sys/rtpriocntl.h> 34 #include <sys/sdt.h> 35 #include <atomic.h> 36 37 #if defined(THREAD_DEBUG) 38 #define INCR32(x) (((x) != UINT32_MAX)? (x)++ : 0) 39 #define INCR(x) ((x)++) 40 #define DECR(x) ((x)--) 41 #define MAXINCR(m, x) ((m < ++x)? (m = x) : 0) 42 #else 43 #define INCR32(x) 44 #define INCR(x) 45 #define DECR(x) 46 #define MAXINCR(m, x) 47 #endif 48 49 /* 50 * This mutex is initialized to be held by lwp#1. 51 * It is used to block a thread that has returned from a mutex_lock() 52 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 53 */ 54 mutex_t stall_mutex = DEFAULTMUTEX; 55 56 static int shared_mutex_held(mutex_t *); 57 static int mutex_queuelock_adaptive(mutex_t *); 58 static void mutex_wakeup_all(mutex_t *); 59 60 /* 61 * Lock statistics support functions. 62 */ 63 void 64 record_begin_hold(tdb_mutex_stats_t *msp) 65 { 66 tdb_incr(msp->mutex_lock); 67 msp->mutex_begin_hold = gethrtime(); 68 } 69 70 hrtime_t 71 record_hold_time(tdb_mutex_stats_t *msp) 72 { 73 hrtime_t now = gethrtime(); 74 75 if (msp->mutex_begin_hold) 76 msp->mutex_hold_time += now - msp->mutex_begin_hold; 77 msp->mutex_begin_hold = 0; 78 return (now); 79 } 80 81 /* 82 * Called once at library initialization. 83 */ 84 void 85 mutex_setup(void) 86 { 87 if (set_lock_byte(&stall_mutex.mutex_lockw)) 88 thr_panic("mutex_setup() cannot acquire stall_mutex"); 89 stall_mutex.mutex_owner = (uintptr_t)curthread; 90 } 91 92 /* 93 * The default spin count of 1000 is experimentally determined. 94 * On sun4u machines with any number of processors it could be raised 95 * to 10,000 but that (experimentally) makes almost no difference. 96 * The environment variable: 97 * _THREAD_ADAPTIVE_SPIN=count 98 * can be used to override and set the count in the range [0 .. 1,000,000]. 99 */ 100 int thread_adaptive_spin = 1000; 101 uint_t thread_max_spinners = 100; 102 int thread_queue_verify = 0; 103 static int ncpus; 104 105 /* 106 * Distinguish spinning for queue locks from spinning for regular locks. 107 * We try harder to acquire queue locks by spinning. 108 * The environment variable: 109 * _THREAD_QUEUE_SPIN=count 110 * can be used to override and set the count in the range [0 .. 1,000,000]. 111 */ 112 int thread_queue_spin = 10000; 113 114 #define ALL_ATTRIBUTES \ 115 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 116 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 117 LOCK_ROBUST) 118 119 /* 120 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 121 * augmented by zero or more the flags: 122 * LOCK_RECURSIVE 123 * LOCK_ERRORCHECK 124 * LOCK_PRIO_INHERIT 125 * LOCK_PRIO_PROTECT 126 * LOCK_ROBUST 127 */ 128 #pragma weak mutex_init = __mutex_init 129 #pragma weak _mutex_init = __mutex_init 130 /* ARGSUSED2 */ 131 int 132 __mutex_init(mutex_t *mp, int type, void *arg) 133 { 134 int basetype = (type & ~ALL_ATTRIBUTES); 135 const pcclass_t *pccp; 136 int error = 0; 137 int ceil; 138 139 if (basetype == USYNC_PROCESS_ROBUST) { 140 /* 141 * USYNC_PROCESS_ROBUST is a deprecated historical type. 142 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 143 * retain the USYNC_PROCESS_ROBUST flag so we can return 144 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 145 * mutexes will ever draw ELOCKUNMAPPED). 146 */ 147 type |= (USYNC_PROCESS | LOCK_ROBUST); 148 basetype = USYNC_PROCESS; 149 } 150 151 if (type & LOCK_PRIO_PROTECT) 152 pccp = get_info_by_policy(SCHED_FIFO); 153 if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) || 154 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 155 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) || 156 ((type & LOCK_PRIO_PROTECT) && 157 ((ceil = *(int *)arg) < pccp->pcc_primin || 158 ceil > pccp->pcc_primax))) { 159 error = EINVAL; 160 } else if (type & LOCK_ROBUST) { 161 /* 162 * Callers of mutex_init() with the LOCK_ROBUST attribute 163 * are required to pass an initially all-zero mutex. 164 * Multiple calls to mutex_init() are allowed; all but 165 * the first return EBUSY. A call to mutex_init() is 166 * allowed to make an inconsistent robust lock consistent 167 * (for historical usage, even though the proper interface 168 * for this is mutex_consistent()). Note that we use 169 * atomic_or_16() to set the LOCK_INITED flag so as 170 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 171 */ 172 extern void _atomic_or_16(volatile uint16_t *, uint16_t); 173 if (!(mp->mutex_flag & LOCK_INITED)) { 174 mp->mutex_type = (uint8_t)type; 175 _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 176 mp->mutex_magic = MUTEX_MAGIC; 177 } else if (type != mp->mutex_type || 178 ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) { 179 error = EINVAL; 180 } else if (__mutex_consistent(mp) != 0) { 181 error = EBUSY; 182 } 183 /* register a process robust mutex with the kernel */ 184 if (basetype == USYNC_PROCESS) 185 register_lock(mp); 186 } else { 187 (void) memset(mp, 0, sizeof (*mp)); 188 mp->mutex_type = (uint8_t)type; 189 mp->mutex_flag = LOCK_INITED; 190 mp->mutex_magic = MUTEX_MAGIC; 191 } 192 193 if (error == 0 && (type & LOCK_PRIO_PROTECT)) { 194 mp->mutex_ceiling = ceil; 195 } 196 197 return (error); 198 } 199 200 /* 201 * Delete mp from list of ceiling mutexes owned by curthread. 202 * Return 1 if the head of the chain was updated. 203 */ 204 int 205 _ceil_mylist_del(mutex_t *mp) 206 { 207 ulwp_t *self = curthread; 208 mxchain_t **mcpp; 209 mxchain_t *mcp; 210 211 for (mcpp = &self->ul_mxchain; 212 (mcp = *mcpp) != NULL; 213 mcpp = &mcp->mxchain_next) { 214 if (mcp->mxchain_mx == mp) { 215 *mcpp = mcp->mxchain_next; 216 lfree(mcp, sizeof (*mcp)); 217 return (mcpp == &self->ul_mxchain); 218 } 219 } 220 return (0); 221 } 222 223 /* 224 * Add mp to the list of ceiling mutexes owned by curthread. 225 * Return ENOMEM if no memory could be allocated. 226 */ 227 int 228 _ceil_mylist_add(mutex_t *mp) 229 { 230 ulwp_t *self = curthread; 231 mxchain_t *mcp; 232 233 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 234 return (ENOMEM); 235 mcp->mxchain_mx = mp; 236 mcp->mxchain_next = self->ul_mxchain; 237 self->ul_mxchain = mcp; 238 return (0); 239 } 240 241 /* 242 * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below. 243 */ 244 static void 245 set_rt_priority(ulwp_t *self, int prio) 246 { 247 pcparms_t pcparm; 248 249 pcparm.pc_cid = self->ul_rtclassid; 250 ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; 251 ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; 252 (void) priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm); 253 } 254 255 /* 256 * Inherit priority from ceiling. 257 * This changes the effective priority, not the assigned priority. 258 */ 259 void 260 _ceil_prio_inherit(int prio) 261 { 262 ulwp_t *self = curthread; 263 264 self->ul_epri = prio; 265 set_rt_priority(self, prio); 266 } 267 268 /* 269 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 270 * if holding at least one ceiling lock. If no ceiling locks are held at this 271 * point, disinherit completely, reverting back to assigned priority. 272 */ 273 void 274 _ceil_prio_waive(void) 275 { 276 ulwp_t *self = curthread; 277 mxchain_t *mcp = self->ul_mxchain; 278 int prio; 279 280 if (mcp == NULL) { 281 prio = self->ul_pri; 282 self->ul_epri = 0; 283 } else { 284 prio = mcp->mxchain_mx->mutex_ceiling; 285 self->ul_epri = prio; 286 } 287 set_rt_priority(self, prio); 288 } 289 290 /* 291 * Clear the lock byte. Retain the waiters byte and the spinners byte. 292 * Return the old value of the lock word. 293 */ 294 static uint32_t 295 clear_lockbyte(volatile uint32_t *lockword) 296 { 297 uint32_t old; 298 uint32_t new; 299 300 do { 301 old = *lockword; 302 new = old & ~LOCKMASK; 303 } while (atomic_cas_32(lockword, old, new) != old); 304 305 return (old); 306 } 307 308 /* 309 * Same as clear_lockbyte(), but operates on mutex_lockword64. 310 * The mutex_ownerpid field is cleared along with the lock byte. 311 */ 312 static uint64_t 313 clear_lockbyte64(volatile uint64_t *lockword64) 314 { 315 uint64_t old; 316 uint64_t new; 317 318 do { 319 old = *lockword64; 320 new = old & ~LOCKMASK64; 321 } while (atomic_cas_64(lockword64, old, new) != old); 322 323 return (old); 324 } 325 326 /* 327 * Similar to set_lock_byte(), which only tries to set the lock byte. 328 * Here, we attempt to set the lock byte AND the mutex_ownerpid, 329 * keeping the remaining bytes constant. 330 */ 331 static int 332 set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid) 333 { 334 uint64_t old; 335 uint64_t new; 336 337 old = *lockword64 & ~LOCKMASK64; 338 new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64; 339 if (atomic_cas_64(lockword64, old, new) == old) 340 return (LOCKCLEAR); 341 342 return (LOCKSET); 343 } 344 345 /* 346 * Increment the spinners count in the mutex lock word. 347 * Return 0 on success. Return -1 if the count would overflow. 348 */ 349 static int 350 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 351 { 352 uint32_t old; 353 uint32_t new; 354 355 do { 356 old = *lockword; 357 if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 358 return (-1); 359 new = old + (1 << SPINNERSHIFT); 360 } while (atomic_cas_32(lockword, old, new) != old); 361 362 return (0); 363 } 364 365 /* 366 * Decrement the spinners count in the mutex lock word. 367 * Return the new value of the lock word. 368 */ 369 static uint32_t 370 spinners_decr(volatile uint32_t *lockword) 371 { 372 uint32_t old; 373 uint32_t new; 374 375 do { 376 new = old = *lockword; 377 if (new & SPINNERMASK) 378 new -= (1 << SPINNERSHIFT); 379 } while (atomic_cas_32(lockword, old, new) != old); 380 381 return (new); 382 } 383 384 /* 385 * Non-preemptive spin locks. Used by queue_lock(). 386 * No lock statistics are gathered for these locks. 387 * No DTrace probes are provided for these locks. 388 */ 389 void 390 spin_lock_set(mutex_t *mp) 391 { 392 ulwp_t *self = curthread; 393 394 no_preempt(self); 395 if (set_lock_byte(&mp->mutex_lockw) == 0) { 396 mp->mutex_owner = (uintptr_t)self; 397 return; 398 } 399 /* 400 * Spin for a while, attempting to acquire the lock. 401 */ 402 INCR32(self->ul_spin_lock_spin); 403 if (mutex_queuelock_adaptive(mp) == 0 || 404 set_lock_byte(&mp->mutex_lockw) == 0) { 405 mp->mutex_owner = (uintptr_t)self; 406 return; 407 } 408 /* 409 * Try harder if we were previously at a no premption level. 410 */ 411 if (self->ul_preempt > 1) { 412 INCR32(self->ul_spin_lock_spin2); 413 if (mutex_queuelock_adaptive(mp) == 0 || 414 set_lock_byte(&mp->mutex_lockw) == 0) { 415 mp->mutex_owner = (uintptr_t)self; 416 return; 417 } 418 } 419 /* 420 * Give up and block in the kernel for the mutex. 421 */ 422 INCR32(self->ul_spin_lock_sleep); 423 (void) ___lwp_mutex_timedlock(mp, NULL); 424 mp->mutex_owner = (uintptr_t)self; 425 } 426 427 void 428 spin_lock_clear(mutex_t *mp) 429 { 430 ulwp_t *self = curthread; 431 432 mp->mutex_owner = 0; 433 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 434 (void) ___lwp_mutex_wakeup(mp, 0); 435 INCR32(self->ul_spin_lock_wakeup); 436 } 437 preempt(self); 438 } 439 440 /* 441 * Allocate the sleep queue hash table. 442 */ 443 void 444 queue_alloc(void) 445 { 446 ulwp_t *self = curthread; 447 uberdata_t *udp = self->ul_uberdata; 448 queue_head_t *qp; 449 void *data; 450 int i; 451 452 /* 453 * No locks are needed; we call here only when single-threaded. 454 */ 455 ASSERT(self == udp->ulwp_one); 456 ASSERT(!udp->uberflags.uf_mt); 457 if ((data = mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 458 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 459 == MAP_FAILED) 460 thr_panic("cannot allocate thread queue_head table"); 461 udp->queue_head = qp = (queue_head_t *)data; 462 for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { 463 qp->qh_type = (i < QHASHSIZE)? MX : CV; 464 qp->qh_lock.mutex_flag = LOCK_INITED; 465 qp->qh_lock.mutex_magic = MUTEX_MAGIC; 466 qp->qh_hlist = &qp->qh_def_root; 467 #if defined(THREAD_DEBUG) 468 qp->qh_hlen = 1; 469 qp->qh_hmax = 1; 470 #endif 471 } 472 } 473 474 #if defined(THREAD_DEBUG) 475 476 /* 477 * Debugging: verify correctness of a sleep queue. 478 */ 479 void 480 QVERIFY(queue_head_t *qp) 481 { 482 ulwp_t *self = curthread; 483 uberdata_t *udp = self->ul_uberdata; 484 queue_root_t *qrp; 485 ulwp_t *ulwp; 486 ulwp_t *prev; 487 uint_t index; 488 uint32_t cnt; 489 char qtype; 490 void *wchan; 491 492 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 493 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 494 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 495 cnt++; 496 ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) || 497 (qrp->qr_head == NULL && qrp->qr_tail == NULL)); 498 } 499 ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt); 500 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 501 ASSERT(qp->qh_type == qtype); 502 if (!thread_queue_verify) 503 return; 504 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 505 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 506 for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL; 507 prev = ulwp, ulwp = ulwp->ul_link) { 508 cnt++; 509 if (ulwp->ul_writer) 510 ASSERT(prev == NULL || prev->ul_writer); 511 ASSERT(ulwp->ul_qtype == qtype); 512 ASSERT(ulwp->ul_wchan != NULL); 513 ASSERT(ulwp->ul_sleepq == qp); 514 wchan = ulwp->ul_wchan; 515 ASSERT(qrp->qr_wchan == wchan); 516 index = QUEUE_HASH(wchan, qtype); 517 ASSERT(&udp->queue_head[index] == qp); 518 } 519 ASSERT(qrp->qr_tail == prev); 520 } 521 ASSERT(qp->qh_qlen == cnt); 522 } 523 524 #else /* THREAD_DEBUG */ 525 526 #define QVERIFY(qp) 527 528 #endif /* THREAD_DEBUG */ 529 530 /* 531 * Acquire a queue head. 532 */ 533 queue_head_t * 534 queue_lock(void *wchan, int qtype) 535 { 536 uberdata_t *udp = curthread->ul_uberdata; 537 queue_head_t *qp; 538 queue_root_t *qrp; 539 540 ASSERT(qtype == MX || qtype == CV); 541 542 /* 543 * It is possible that we could be called while still single-threaded. 544 * If so, we call queue_alloc() to allocate the queue_head[] array. 545 */ 546 if ((qp = udp->queue_head) == NULL) { 547 queue_alloc(); 548 qp = udp->queue_head; 549 } 550 qp += QUEUE_HASH(wchan, qtype); 551 spin_lock_set(&qp->qh_lock); 552 for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) 553 if (qrp->qr_wchan == wchan) 554 break; 555 if (qrp == NULL && qp->qh_def_root.qr_head == NULL) { 556 /* the default queue root is available; use it */ 557 qrp = &qp->qh_def_root; 558 qrp->qr_wchan = wchan; 559 ASSERT(qrp->qr_next == NULL); 560 ASSERT(qrp->qr_tail == NULL && 561 qrp->qr_rtcount == 0 && qrp->qr_qlen == 0); 562 } 563 qp->qh_wchan = wchan; /* valid until queue_unlock() is called */ 564 qp->qh_root = qrp; /* valid until queue_unlock() is called */ 565 INCR32(qp->qh_lockcount); 566 QVERIFY(qp); 567 return (qp); 568 } 569 570 /* 571 * Release a queue head. 572 */ 573 void 574 queue_unlock(queue_head_t *qp) 575 { 576 QVERIFY(qp); 577 spin_lock_clear(&qp->qh_lock); 578 } 579 580 /* 581 * For rwlock queueing, we must queue writers ahead of readers of the 582 * same priority. We do this by making writers appear to have a half 583 * point higher priority for purposes of priority comparisons below. 584 */ 585 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 586 587 void 588 enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo) 589 { 590 queue_root_t *qrp; 591 ulwp_t **ulwpp; 592 ulwp_t *next; 593 int pri = CMP_PRIO(ulwp); 594 595 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 596 ASSERT(ulwp->ul_sleepq != qp); 597 598 if ((qrp = qp->qh_root) == NULL) { 599 /* use the thread's queue root for the linkage */ 600 qrp = &ulwp->ul_queue_root; 601 qrp->qr_next = qp->qh_hlist; 602 qrp->qr_prev = NULL; 603 qrp->qr_head = NULL; 604 qrp->qr_tail = NULL; 605 qrp->qr_wchan = qp->qh_wchan; 606 qrp->qr_rtcount = 0; 607 qrp->qr_qlen = 0; 608 qrp->qr_qmax = 0; 609 qp->qh_hlist->qr_prev = qrp; 610 qp->qh_hlist = qrp; 611 qp->qh_root = qrp; 612 MAXINCR(qp->qh_hmax, qp->qh_hlen); 613 } 614 615 /* 616 * LIFO queue ordering is unfair and can lead to starvation, 617 * but it gives better performance for heavily contended locks. 618 * We use thread_queue_fifo (range is 0..8) to determine 619 * the frequency of FIFO vs LIFO queuing: 620 * 0 : every 256th time (almost always LIFO) 621 * 1 : every 128th time 622 * 2 : every 64th time 623 * 3 : every 32nd time 624 * 4 : every 16th time (the default value, mostly LIFO) 625 * 5 : every 8th time 626 * 6 : every 4th time 627 * 7 : every 2nd time 628 * 8 : every time (never LIFO, always FIFO) 629 * Note that there is always some degree of FIFO ordering. 630 * This breaks live lock conditions that occur in applications 631 * that are written assuming (incorrectly) that threads acquire 632 * locks fairly, that is, in roughly round-robin order. 633 * In any event, the queue is maintained in kernel priority order. 634 * 635 * If force_fifo is non-zero, fifo queueing is forced. 636 * SUSV3 requires this for semaphores. 637 */ 638 if (qrp->qr_head == NULL) { 639 /* 640 * The queue is empty. LIFO/FIFO doesn't matter. 641 */ 642 ASSERT(qrp->qr_tail == NULL); 643 ulwpp = &qrp->qr_head; 644 } else if (force_fifo | 645 (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) { 646 /* 647 * Enqueue after the last thread whose priority is greater 648 * than or equal to the priority of the thread being queued. 649 * Attempt first to go directly onto the tail of the queue. 650 */ 651 if (pri <= CMP_PRIO(qrp->qr_tail)) 652 ulwpp = &qrp->qr_tail->ul_link; 653 else { 654 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 655 ulwpp = &next->ul_link) 656 if (pri > CMP_PRIO(next)) 657 break; 658 } 659 } else { 660 /* 661 * Enqueue before the first thread whose priority is less 662 * than or equal to the priority of the thread being queued. 663 * Hopefully we can go directly onto the head of the queue. 664 */ 665 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 666 ulwpp = &next->ul_link) 667 if (pri >= CMP_PRIO(next)) 668 break; 669 } 670 if ((ulwp->ul_link = *ulwpp) == NULL) 671 qrp->qr_tail = ulwp; 672 *ulwpp = ulwp; 673 674 ulwp->ul_sleepq = qp; 675 ulwp->ul_wchan = qp->qh_wchan; 676 ulwp->ul_qtype = qp->qh_type; 677 if ((ulwp->ul_schedctl != NULL && 678 ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) | 679 ulwp->ul_pilocks) { 680 ulwp->ul_rtqueued = 1; 681 qrp->qr_rtcount++; 682 } 683 MAXINCR(qrp->qr_qmax, qrp->qr_qlen); 684 MAXINCR(qp->qh_qmax, qp->qh_qlen); 685 } 686 687 /* 688 * Helper function for queue_slot() and queue_slot_rt(). 689 * Try to find a non-suspended thread on the queue. 690 */ 691 static ulwp_t ** 692 queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt) 693 { 694 ulwp_t *ulwp; 695 ulwp_t **foundpp = NULL; 696 int priority = -1; 697 ulwp_t *prev; 698 int tpri; 699 700 for (prev = NULL; 701 (ulwp = *ulwpp) != NULL; 702 prev = ulwp, ulwpp = &ulwp->ul_link) { 703 if (ulwp->ul_stop) /* skip suspended threads */ 704 continue; 705 tpri = rt? CMP_PRIO(ulwp) : 0; 706 if (tpri > priority) { 707 foundpp = ulwpp; 708 *prevp = prev; 709 priority = tpri; 710 if (!rt) 711 break; 712 } 713 } 714 return (foundpp); 715 } 716 717 /* 718 * For real-time, we search the entire queue because the dispatch 719 * (kernel) priorities may have changed since enqueueing. 720 */ 721 static ulwp_t ** 722 queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp) 723 { 724 ulwp_t **ulwpp = ulwpp_org; 725 ulwp_t *ulwp = *ulwpp; 726 ulwp_t **foundpp = ulwpp; 727 int priority = CMP_PRIO(ulwp); 728 ulwp_t *prev; 729 int tpri; 730 731 for (prev = ulwp, ulwpp = &ulwp->ul_link; 732 (ulwp = *ulwpp) != NULL; 733 prev = ulwp, ulwpp = &ulwp->ul_link) { 734 tpri = CMP_PRIO(ulwp); 735 if (tpri > priority) { 736 foundpp = ulwpp; 737 *prevp = prev; 738 priority = tpri; 739 } 740 } 741 ulwp = *foundpp; 742 743 /* 744 * Try not to return a suspended thread. 745 * This mimics the old libthread's behavior. 746 */ 747 if (ulwp->ul_stop && 748 (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) { 749 foundpp = ulwpp; 750 ulwp = *foundpp; 751 } 752 ulwp->ul_rt = 1; 753 return (foundpp); 754 } 755 756 ulwp_t ** 757 queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more) 758 { 759 queue_root_t *qrp; 760 ulwp_t **ulwpp; 761 ulwp_t *ulwp; 762 int rt; 763 764 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 765 766 if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) { 767 *more = 0; 768 return (NULL); /* no lwps on the queue */ 769 } 770 rt = (qrp->qr_rtcount != 0); 771 *prevp = NULL; 772 if (ulwp->ul_link == NULL) { /* only one lwp on the queue */ 773 *more = 0; 774 ulwp->ul_rt = rt; 775 return (&qrp->qr_head); 776 } 777 *more = 1; 778 779 if (rt) /* real-time queue */ 780 return (queue_slot_rt(&qrp->qr_head, prevp)); 781 /* 782 * Try not to return a suspended thread. 783 * This mimics the old libthread's behavior. 784 */ 785 if (ulwp->ul_stop && 786 (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) { 787 ulwp = *ulwpp; 788 ulwp->ul_rt = 0; 789 return (ulwpp); 790 } 791 /* 792 * The common case; just pick the first thread on the queue. 793 */ 794 ulwp->ul_rt = 0; 795 return (&qrp->qr_head); 796 } 797 798 /* 799 * Common code for unlinking an lwp from a user-level sleep queue. 800 */ 801 void 802 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 803 { 804 queue_root_t *qrp = qp->qh_root; 805 queue_root_t *nqrp; 806 ulwp_t *ulwp = *ulwpp; 807 ulwp_t *next; 808 809 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 810 ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan); 811 812 DECR(qp->qh_qlen); 813 DECR(qrp->qr_qlen); 814 if (ulwp->ul_rtqueued) { 815 ulwp->ul_rtqueued = 0; 816 qrp->qr_rtcount--; 817 } 818 next = ulwp->ul_link; 819 *ulwpp = next; 820 ulwp->ul_link = NULL; 821 if (qrp->qr_tail == ulwp) 822 qrp->qr_tail = prev; 823 if (qrp == &ulwp->ul_queue_root) { 824 /* 825 * We can't continue to use the unlinked thread's 826 * queue root for the linkage. 827 */ 828 queue_root_t *qr_next = qrp->qr_next; 829 queue_root_t *qr_prev = qrp->qr_prev; 830 831 if (qrp->qr_tail) { 832 /* switch to using the last thread's queue root */ 833 ASSERT(qrp->qr_qlen != 0); 834 nqrp = &qrp->qr_tail->ul_queue_root; 835 *nqrp = *qrp; 836 if (qr_next) 837 qr_next->qr_prev = nqrp; 838 if (qr_prev) 839 qr_prev->qr_next = nqrp; 840 else 841 qp->qh_hlist = nqrp; 842 qp->qh_root = nqrp; 843 } else { 844 /* empty queue root; just delete from the hash list */ 845 ASSERT(qrp->qr_qlen == 0); 846 if (qr_next) 847 qr_next->qr_prev = qr_prev; 848 if (qr_prev) 849 qr_prev->qr_next = qr_next; 850 else 851 qp->qh_hlist = qr_next; 852 qp->qh_root = NULL; 853 DECR(qp->qh_hlen); 854 } 855 } 856 } 857 858 ulwp_t * 859 dequeue(queue_head_t *qp, int *more) 860 { 861 ulwp_t **ulwpp; 862 ulwp_t *ulwp; 863 ulwp_t *prev; 864 865 if ((ulwpp = queue_slot(qp, &prev, more)) == NULL) 866 return (NULL); 867 ulwp = *ulwpp; 868 queue_unlink(qp, ulwpp, prev); 869 ulwp->ul_sleepq = NULL; 870 ulwp->ul_wchan = NULL; 871 return (ulwp); 872 } 873 874 /* 875 * Return a pointer to the highest priority thread sleeping on wchan. 876 */ 877 ulwp_t * 878 queue_waiter(queue_head_t *qp) 879 { 880 ulwp_t **ulwpp; 881 ulwp_t *prev; 882 int more; 883 884 if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL) 885 return (NULL); 886 return (*ulwpp); 887 } 888 889 int 890 dequeue_self(queue_head_t *qp) 891 { 892 ulwp_t *self = curthread; 893 queue_root_t *qrp; 894 ulwp_t **ulwpp; 895 ulwp_t *ulwp; 896 ulwp_t *prev; 897 int found = 0; 898 899 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 900 901 /* find self on the sleep queue */ 902 if ((qrp = qp->qh_root) != NULL) { 903 for (prev = NULL, ulwpp = &qrp->qr_head; 904 (ulwp = *ulwpp) != NULL; 905 prev = ulwp, ulwpp = &ulwp->ul_link) { 906 if (ulwp == self) { 907 queue_unlink(qp, ulwpp, prev); 908 self->ul_cvmutex = NULL; 909 self->ul_sleepq = NULL; 910 self->ul_wchan = NULL; 911 found = 1; 912 break; 913 } 914 } 915 } 916 917 if (!found) 918 thr_panic("dequeue_self(): curthread not found on queue"); 919 920 return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL); 921 } 922 923 /* 924 * Called from call_user_handler() and _thrp_suspend() to take 925 * ourself off of our sleep queue so we can grab locks. 926 */ 927 void 928 unsleep_self(void) 929 { 930 ulwp_t *self = curthread; 931 queue_head_t *qp; 932 933 /* 934 * Calling enter_critical()/exit_critical() here would lead 935 * to recursion. Just manipulate self->ul_critical directly. 936 */ 937 self->ul_critical++; 938 while (self->ul_sleepq != NULL) { 939 qp = queue_lock(self->ul_wchan, self->ul_qtype); 940 /* 941 * We may have been moved from a CV queue to a 942 * mutex queue while we were attempting queue_lock(). 943 * If so, just loop around and try again. 944 * dequeue_self() clears self->ul_sleepq. 945 */ 946 if (qp == self->ul_sleepq) 947 (void) dequeue_self(qp); 948 queue_unlock(qp); 949 } 950 self->ul_writer = 0; 951 self->ul_critical--; 952 } 953 954 /* 955 * Common code for calling the the ___lwp_mutex_timedlock() system call. 956 * Returns with mutex_owner and mutex_ownerpid set correctly. 957 */ 958 static int 959 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 960 { 961 ulwp_t *self = curthread; 962 uberdata_t *udp = self->ul_uberdata; 963 int mtype = mp->mutex_type; 964 hrtime_t begin_sleep; 965 int acquired; 966 int error; 967 968 self->ul_sp = stkptr(); 969 self->ul_wchan = mp; 970 if (__td_event_report(self, TD_SLEEP, udp)) { 971 self->ul_td_evbuf.eventnum = TD_SLEEP; 972 self->ul_td_evbuf.eventdata = mp; 973 tdb_event(TD_SLEEP, udp); 974 } 975 if (msp) { 976 tdb_incr(msp->mutex_sleep); 977 begin_sleep = gethrtime(); 978 } 979 980 DTRACE_PROBE1(plockstat, mutex__block, mp); 981 982 for (;;) { 983 /* 984 * A return value of EOWNERDEAD or ELOCKUNMAPPED 985 * means we successfully acquired the lock. 986 */ 987 if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 988 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 989 acquired = 0; 990 break; 991 } 992 993 if (mtype & USYNC_PROCESS) { 994 /* 995 * Defend against forkall(). We may be the child, 996 * in which case we don't actually own the mutex. 997 */ 998 enter_critical(self); 999 if (mp->mutex_ownerpid == udp->pid) { 1000 mp->mutex_owner = (uintptr_t)self; 1001 exit_critical(self); 1002 acquired = 1; 1003 break; 1004 } 1005 exit_critical(self); 1006 } else { 1007 mp->mutex_owner = (uintptr_t)self; 1008 acquired = 1; 1009 break; 1010 } 1011 } 1012 if (msp) 1013 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1014 self->ul_wchan = NULL; 1015 self->ul_sp = 0; 1016 1017 if (acquired) { 1018 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1019 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1020 } else { 1021 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1022 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1023 } 1024 1025 return (error); 1026 } 1027 1028 /* 1029 * Common code for calling the ___lwp_mutex_trylock() system call. 1030 * Returns with mutex_owner and mutex_ownerpid set correctly. 1031 */ 1032 int 1033 mutex_trylock_kernel(mutex_t *mp) 1034 { 1035 ulwp_t *self = curthread; 1036 uberdata_t *udp = self->ul_uberdata; 1037 int mtype = mp->mutex_type; 1038 int error; 1039 int acquired; 1040 1041 for (;;) { 1042 /* 1043 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1044 * means we successfully acquired the lock. 1045 */ 1046 if ((error = ___lwp_mutex_trylock(mp)) != 0 && 1047 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1048 acquired = 0; 1049 break; 1050 } 1051 1052 if (mtype & USYNC_PROCESS) { 1053 /* 1054 * Defend against forkall(). We may be the child, 1055 * in which case we don't actually own the mutex. 1056 */ 1057 enter_critical(self); 1058 if (mp->mutex_ownerpid == udp->pid) { 1059 mp->mutex_owner = (uintptr_t)self; 1060 exit_critical(self); 1061 acquired = 1; 1062 break; 1063 } 1064 exit_critical(self); 1065 } else { 1066 mp->mutex_owner = (uintptr_t)self; 1067 acquired = 1; 1068 break; 1069 } 1070 } 1071 1072 if (acquired) { 1073 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1074 } else if (error != EBUSY) { 1075 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1076 } 1077 1078 return (error); 1079 } 1080 1081 volatile sc_shared_t * 1082 setup_schedctl(void) 1083 { 1084 ulwp_t *self = curthread; 1085 volatile sc_shared_t *scp; 1086 sc_shared_t *tmp; 1087 1088 if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 1089 !self->ul_vfork && /* not a child of vfork() */ 1090 !self->ul_schedctl_called) { /* haven't been called before */ 1091 enter_critical(self); 1092 self->ul_schedctl_called = &self->ul_uberdata->uberflags; 1093 if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 1094 self->ul_schedctl = scp = tmp; 1095 exit_critical(self); 1096 } 1097 /* 1098 * Unless the call to setup_schedctl() is surrounded 1099 * by enter_critical()/exit_critical(), the address 1100 * we are returning could be invalid due to a forkall() 1101 * having occurred in another thread. 1102 */ 1103 return (scp); 1104 } 1105 1106 /* 1107 * Interfaces from libsched, incorporated into libc. 1108 * libsched.so.1 is now a filter library onto libc. 1109 */ 1110 #pragma weak schedctl_lookup = _schedctl_init 1111 #pragma weak _schedctl_lookup = _schedctl_init 1112 #pragma weak schedctl_init = _schedctl_init 1113 schedctl_t * 1114 _schedctl_init(void) 1115 { 1116 volatile sc_shared_t *scp = setup_schedctl(); 1117 return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 1118 } 1119 1120 #pragma weak schedctl_exit = _schedctl_exit 1121 void 1122 _schedctl_exit(void) 1123 { 1124 } 1125 1126 /* 1127 * Contract private interface for java. 1128 * Set up the schedctl data if it doesn't exist yet. 1129 * Return a pointer to the pointer to the schedctl data. 1130 */ 1131 volatile sc_shared_t *volatile * 1132 _thr_schedctl(void) 1133 { 1134 ulwp_t *self = curthread; 1135 volatile sc_shared_t *volatile *ptr; 1136 1137 if (self->ul_vfork) 1138 return (NULL); 1139 if (*(ptr = &self->ul_schedctl) == NULL) 1140 (void) setup_schedctl(); 1141 return (ptr); 1142 } 1143 1144 /* 1145 * Block signals and attempt to block preemption. 1146 * no_preempt()/preempt() must be used in pairs but can be nested. 1147 */ 1148 void 1149 no_preempt(ulwp_t *self) 1150 { 1151 volatile sc_shared_t *scp; 1152 1153 if (self->ul_preempt++ == 0) { 1154 enter_critical(self); 1155 if ((scp = self->ul_schedctl) != NULL || 1156 (scp = setup_schedctl()) != NULL) { 1157 /* 1158 * Save the pre-existing preempt value. 1159 */ 1160 self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 1161 scp->sc_preemptctl.sc_nopreempt = 1; 1162 } 1163 } 1164 } 1165 1166 /* 1167 * Undo the effects of no_preempt(). 1168 */ 1169 void 1170 preempt(ulwp_t *self) 1171 { 1172 volatile sc_shared_t *scp; 1173 1174 ASSERT(self->ul_preempt > 0); 1175 if (--self->ul_preempt == 0) { 1176 if ((scp = self->ul_schedctl) != NULL) { 1177 /* 1178 * Restore the pre-existing preempt value. 1179 */ 1180 scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 1181 if (scp->sc_preemptctl.sc_yield && 1182 scp->sc_preemptctl.sc_nopreempt == 0) { 1183 yield(); 1184 if (scp->sc_preemptctl.sc_yield) { 1185 /* 1186 * Shouldn't happen. This is either 1187 * a race condition or the thread 1188 * just entered the real-time class. 1189 */ 1190 yield(); 1191 scp->sc_preemptctl.sc_yield = 0; 1192 } 1193 } 1194 } 1195 exit_critical(self); 1196 } 1197 } 1198 1199 /* 1200 * If a call to preempt() would cause the current thread to yield or to 1201 * take deferred actions in exit_critical(), then unpark the specified 1202 * lwp so it can run while we delay. Return the original lwpid if the 1203 * unpark was not performed, else return zero. The tests are a repeat 1204 * of some of the tests in preempt(), above. This is a statistical 1205 * optimization solely for cond_sleep_queue(), below. 1206 */ 1207 static lwpid_t 1208 preempt_unpark(ulwp_t *self, lwpid_t lwpid) 1209 { 1210 volatile sc_shared_t *scp = self->ul_schedctl; 1211 1212 ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 1213 if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 1214 (self->ul_curplease && self->ul_critical == 1)) { 1215 (void) __lwp_unpark(lwpid); 1216 lwpid = 0; 1217 } 1218 return (lwpid); 1219 } 1220 1221 /* 1222 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1223 * If this fails, return EBUSY and let the caller deal with it. 1224 * If this succeeds, return 0 with mutex_owner set to curthread. 1225 */ 1226 static int 1227 mutex_trylock_adaptive(mutex_t *mp, int tryhard) 1228 { 1229 ulwp_t *self = curthread; 1230 int error = EBUSY; 1231 ulwp_t *ulwp; 1232 volatile sc_shared_t *scp; 1233 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1234 volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 1235 uint32_t new_lockword; 1236 int count = 0; 1237 int max_count; 1238 uint8_t max_spinners; 1239 1240 ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 1241 1242 if (MUTEX_OWNER(mp) == self) 1243 return (EBUSY); 1244 1245 /* short-cut, not definitive (see below) */ 1246 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1247 ASSERT(mp->mutex_type & LOCK_ROBUST); 1248 error = ENOTRECOVERABLE; 1249 goto done; 1250 } 1251 1252 /* 1253 * Make one attempt to acquire the lock before 1254 * incurring the overhead of the spin loop. 1255 */ 1256 if (set_lock_byte(lockp) == 0) { 1257 *ownerp = (uintptr_t)self; 1258 error = 0; 1259 goto done; 1260 } 1261 if (!tryhard) 1262 goto done; 1263 if (ncpus == 0) 1264 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1265 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1266 max_spinners = ncpus - 1; 1267 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1268 if (max_count == 0) 1269 goto done; 1270 1271 /* 1272 * This spin loop is unfair to lwps that have already dropped into 1273 * the kernel to sleep. They will starve on a highly-contended mutex. 1274 * This is just too bad. The adaptive spin algorithm is intended 1275 * to allow programs with highly-contended locks (that is, broken 1276 * programs) to execute with reasonable speed despite their contention. 1277 * Being fair would reduce the speed of such programs and well-written 1278 * programs will not suffer in any case. 1279 */ 1280 enter_critical(self); 1281 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 1282 exit_critical(self); 1283 goto done; 1284 } 1285 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1286 for (count = 1; ; count++) { 1287 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1288 *ownerp = (uintptr_t)self; 1289 error = 0; 1290 break; 1291 } 1292 if (count == max_count) 1293 break; 1294 SMT_PAUSE(); 1295 /* 1296 * Stop spinning if the mutex owner is not running on 1297 * a processor; it will not drop the lock any time soon 1298 * and we would just be wasting time to keep spinning. 1299 * 1300 * Note that we are looking at another thread (ulwp_t) 1301 * without ensuring that the other thread does not exit. 1302 * The scheme relies on ulwp_t structures never being 1303 * deallocated by the library (the library employs a free 1304 * list of ulwp_t structs that are reused when new threads 1305 * are created) and on schedctl shared memory never being 1306 * deallocated once created via __schedctl(). 1307 * 1308 * Thus, the worst that can happen when the spinning thread 1309 * looks at the owner's schedctl data is that it is looking 1310 * at some other thread's schedctl data. This almost never 1311 * happens and is benign when it does. 1312 */ 1313 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1314 ((scp = ulwp->ul_schedctl) == NULL || 1315 scp->sc_state != SC_ONPROC)) 1316 break; 1317 } 1318 new_lockword = spinners_decr(&mp->mutex_lockword); 1319 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1320 /* 1321 * We haven't yet acquired the lock, the lock 1322 * is free, and there are no other spinners. 1323 * Make one final attempt to acquire the lock. 1324 * 1325 * This isn't strictly necessary since mutex_lock_queue() 1326 * (the next action this thread will take if it doesn't 1327 * acquire the lock here) makes one attempt to acquire 1328 * the lock before putting the thread to sleep. 1329 * 1330 * If the next action for this thread (on failure here) 1331 * were not to call mutex_lock_queue(), this would be 1332 * necessary for correctness, to avoid ending up with an 1333 * unheld mutex with waiters but no one to wake them up. 1334 */ 1335 if (set_lock_byte(lockp) == 0) { 1336 *ownerp = (uintptr_t)self; 1337 error = 0; 1338 } 1339 count++; 1340 } 1341 exit_critical(self); 1342 1343 done: 1344 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1345 ASSERT(mp->mutex_type & LOCK_ROBUST); 1346 /* 1347 * We shouldn't own the mutex. 1348 * Just clear the lock; everyone has already been waked up. 1349 */ 1350 mp->mutex_owner = 0; 1351 (void) clear_lockbyte(&mp->mutex_lockword); 1352 error = ENOTRECOVERABLE; 1353 } 1354 1355 if (error) { 1356 if (count) { 1357 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1358 } 1359 if (error != EBUSY) { 1360 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1361 } 1362 } else { 1363 if (count) { 1364 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1365 } 1366 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1367 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1368 ASSERT(mp->mutex_type & LOCK_ROBUST); 1369 error = EOWNERDEAD; 1370 } 1371 } 1372 1373 return (error); 1374 } 1375 1376 /* 1377 * Same as mutex_trylock_adaptive(), except specifically for queue locks. 1378 * The owner field is not set here; the caller (spin_lock_set()) sets it. 1379 */ 1380 static int 1381 mutex_queuelock_adaptive(mutex_t *mp) 1382 { 1383 ulwp_t *ulwp; 1384 volatile sc_shared_t *scp; 1385 volatile uint8_t *lockp; 1386 volatile uint64_t *ownerp; 1387 int count = curthread->ul_queue_spin; 1388 1389 ASSERT(mp->mutex_type == USYNC_THREAD); 1390 1391 if (count == 0) 1392 return (EBUSY); 1393 1394 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1395 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1396 while (--count >= 0) { 1397 if (*lockp == 0 && set_lock_byte(lockp) == 0) 1398 return (0); 1399 SMT_PAUSE(); 1400 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1401 ((scp = ulwp->ul_schedctl) == NULL || 1402 scp->sc_state != SC_ONPROC)) 1403 break; 1404 } 1405 1406 return (EBUSY); 1407 } 1408 1409 /* 1410 * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1411 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1412 * If this fails, return EBUSY and let the caller deal with it. 1413 * If this succeeds, return 0 with mutex_owner set to curthread 1414 * and mutex_ownerpid set to the current pid. 1415 */ 1416 static int 1417 mutex_trylock_process(mutex_t *mp, int tryhard) 1418 { 1419 ulwp_t *self = curthread; 1420 uberdata_t *udp = self->ul_uberdata; 1421 int error = EBUSY; 1422 volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64; 1423 uint32_t new_lockword; 1424 int count = 0; 1425 int max_count; 1426 uint8_t max_spinners; 1427 1428 ASSERT(mp->mutex_type & USYNC_PROCESS); 1429 1430 if (shared_mutex_held(mp)) 1431 return (EBUSY); 1432 1433 /* short-cut, not definitive (see below) */ 1434 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1435 ASSERT(mp->mutex_type & LOCK_ROBUST); 1436 error = ENOTRECOVERABLE; 1437 goto done; 1438 } 1439 1440 /* 1441 * Make one attempt to acquire the lock before 1442 * incurring the overhead of the spin loop. 1443 */ 1444 enter_critical(self); 1445 if (set_lock_byte64(lockp, udp->pid) == 0) { 1446 mp->mutex_owner = (uintptr_t)self; 1447 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1448 exit_critical(self); 1449 error = 0; 1450 goto done; 1451 } 1452 exit_critical(self); 1453 if (!tryhard) 1454 goto done; 1455 if (ncpus == 0) 1456 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1457 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1458 max_spinners = ncpus - 1; 1459 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1460 if (max_count == 0) 1461 goto done; 1462 1463 /* 1464 * This is a process-shared mutex. 1465 * We cannot know if the owner is running on a processor. 1466 * We just spin and hope that it is on a processor. 1467 */ 1468 enter_critical(self); 1469 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 1470 exit_critical(self); 1471 goto done; 1472 } 1473 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1474 for (count = 1; ; count++) { 1475 if ((*lockp & LOCKMASK64) == 0 && 1476 set_lock_byte64(lockp, udp->pid) == 0) { 1477 mp->mutex_owner = (uintptr_t)self; 1478 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1479 error = 0; 1480 break; 1481 } 1482 if (count == max_count) 1483 break; 1484 SMT_PAUSE(); 1485 } 1486 new_lockword = spinners_decr(&mp->mutex_lockword); 1487 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1488 /* 1489 * We haven't yet acquired the lock, the lock 1490 * is free, and there are no other spinners. 1491 * Make one final attempt to acquire the lock. 1492 * 1493 * This isn't strictly necessary since mutex_lock_kernel() 1494 * (the next action this thread will take if it doesn't 1495 * acquire the lock here) makes one attempt to acquire 1496 * the lock before putting the thread to sleep. 1497 * 1498 * If the next action for this thread (on failure here) 1499 * were not to call mutex_lock_kernel(), this would be 1500 * necessary for correctness, to avoid ending up with an 1501 * unheld mutex with waiters but no one to wake them up. 1502 */ 1503 if (set_lock_byte64(lockp, udp->pid) == 0) { 1504 mp->mutex_owner = (uintptr_t)self; 1505 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1506 error = 0; 1507 } 1508 count++; 1509 } 1510 exit_critical(self); 1511 1512 done: 1513 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1514 ASSERT(mp->mutex_type & LOCK_ROBUST); 1515 /* 1516 * We shouldn't own the mutex. 1517 * Just clear the lock; everyone has already been waked up. 1518 */ 1519 mp->mutex_owner = 0; 1520 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1521 (void) clear_lockbyte64(&mp->mutex_lockword64); 1522 error = ENOTRECOVERABLE; 1523 } 1524 1525 if (error) { 1526 if (count) { 1527 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1528 } 1529 if (error != EBUSY) { 1530 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1531 } 1532 } else { 1533 if (count) { 1534 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1535 } 1536 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1537 if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1538 ASSERT(mp->mutex_type & LOCK_ROBUST); 1539 if (mp->mutex_flag & LOCK_OWNERDEAD) 1540 error = EOWNERDEAD; 1541 else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1542 error = ELOCKUNMAPPED; 1543 else 1544 error = EOWNERDEAD; 1545 } 1546 } 1547 1548 return (error); 1549 } 1550 1551 /* 1552 * Mutex wakeup code for releasing a USYNC_THREAD mutex. 1553 * Returns the lwpid of the thread that was dequeued, if any. 1554 * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 1555 * to wake up the specified lwp. 1556 */ 1557 static lwpid_t 1558 mutex_wakeup(mutex_t *mp) 1559 { 1560 lwpid_t lwpid = 0; 1561 int more; 1562 queue_head_t *qp; 1563 ulwp_t *ulwp; 1564 1565 /* 1566 * Dequeue a waiter from the sleep queue. Don't touch the mutex 1567 * waiters bit if no one was found on the queue because the mutex 1568 * might have been deallocated or reallocated for another purpose. 1569 */ 1570 qp = queue_lock(mp, MX); 1571 if ((ulwp = dequeue(qp, &more)) != NULL) { 1572 lwpid = ulwp->ul_lwpid; 1573 mp->mutex_waiters = more; 1574 } 1575 queue_unlock(qp); 1576 return (lwpid); 1577 } 1578 1579 /* 1580 * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1581 */ 1582 static void 1583 mutex_wakeup_all(mutex_t *mp) 1584 { 1585 queue_head_t *qp; 1586 queue_root_t *qrp; 1587 int nlwpid = 0; 1588 int maxlwps = MAXLWPS; 1589 ulwp_t *ulwp; 1590 lwpid_t buffer[MAXLWPS]; 1591 lwpid_t *lwpid = buffer; 1592 1593 /* 1594 * Walk the list of waiters and prepare to wake up all of them. 1595 * The waiters flag has already been cleared from the mutex. 1596 * 1597 * We keep track of lwpids that are to be unparked in lwpid[]. 1598 * __lwp_unpark_all() is called to unpark all of them after 1599 * they have been removed from the sleep queue and the sleep 1600 * queue lock has been dropped. If we run out of space in our 1601 * on-stack buffer, we need to allocate more but we can't call 1602 * lmalloc() because we are holding a queue lock when the overflow 1603 * occurs and lmalloc() acquires a lock. We can't use alloca() 1604 * either because the application may have allocated a small 1605 * stack and we don't want to overrun the stack. So we call 1606 * alloc_lwpids() to allocate a bigger buffer using the mmap() 1607 * system call directly since that path acquires no locks. 1608 */ 1609 qp = queue_lock(mp, MX); 1610 for (;;) { 1611 if ((qrp = qp->qh_root) == NULL || 1612 (ulwp = qrp->qr_head) == NULL) 1613 break; 1614 ASSERT(ulwp->ul_wchan == mp); 1615 queue_unlink(qp, &qrp->qr_head, NULL); 1616 ulwp->ul_sleepq = NULL; 1617 ulwp->ul_wchan = NULL; 1618 if (nlwpid == maxlwps) 1619 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1620 lwpid[nlwpid++] = ulwp->ul_lwpid; 1621 } 1622 1623 if (nlwpid == 0) { 1624 queue_unlock(qp); 1625 } else { 1626 mp->mutex_waiters = 0; 1627 no_preempt(curthread); 1628 queue_unlock(qp); 1629 if (nlwpid == 1) 1630 (void) __lwp_unpark(lwpid[0]); 1631 else 1632 (void) __lwp_unpark_all(lwpid, nlwpid); 1633 preempt(curthread); 1634 } 1635 1636 if (lwpid != buffer) 1637 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 1638 } 1639 1640 /* 1641 * Release a process-private mutex. 1642 * As an optimization, if there are waiters but there are also spinners 1643 * attempting to acquire the mutex, then don't bother waking up a waiter; 1644 * one of the spinners will acquire the mutex soon and it would be a waste 1645 * of resources to wake up some thread just to have it spin for a while 1646 * and then possibly go back to sleep. See mutex_trylock_adaptive(). 1647 */ 1648 static lwpid_t 1649 mutex_unlock_queue(mutex_t *mp, int release_all) 1650 { 1651 lwpid_t lwpid = 0; 1652 uint32_t old_lockword; 1653 1654 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1655 mp->mutex_owner = 0; 1656 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1657 if ((old_lockword & WAITERMASK) && 1658 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1659 ulwp_t *self = curthread; 1660 no_preempt(self); /* ensure a prompt wakeup */ 1661 if (release_all) 1662 mutex_wakeup_all(mp); 1663 else 1664 lwpid = mutex_wakeup(mp); 1665 if (lwpid == 0) 1666 preempt(self); 1667 } 1668 return (lwpid); 1669 } 1670 1671 /* 1672 * Like mutex_unlock_queue(), but for process-shared mutexes. 1673 */ 1674 static void 1675 mutex_unlock_process(mutex_t *mp, int release_all) 1676 { 1677 uint64_t old_lockword64; 1678 1679 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1680 mp->mutex_owner = 0; 1681 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1682 old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64); 1683 if ((old_lockword64 & WAITERMASK64) && 1684 (release_all || (old_lockword64 & SPINNERMASK64) == 0)) { 1685 ulwp_t *self = curthread; 1686 no_preempt(self); /* ensure a prompt wakeup */ 1687 (void) ___lwp_mutex_wakeup(mp, release_all); 1688 preempt(self); 1689 } 1690 } 1691 1692 void 1693 stall(void) 1694 { 1695 for (;;) 1696 (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 1697 } 1698 1699 /* 1700 * Acquire a USYNC_THREAD mutex via user-level sleep queues. 1701 * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1702 * If successful, returns with mutex_owner set correctly. 1703 */ 1704 int 1705 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 1706 timespec_t *tsp) 1707 { 1708 uberdata_t *udp = curthread->ul_uberdata; 1709 queue_head_t *qp; 1710 hrtime_t begin_sleep; 1711 int error = 0; 1712 1713 self->ul_sp = stkptr(); 1714 if (__td_event_report(self, TD_SLEEP, udp)) { 1715 self->ul_wchan = mp; 1716 self->ul_td_evbuf.eventnum = TD_SLEEP; 1717 self->ul_td_evbuf.eventdata = mp; 1718 tdb_event(TD_SLEEP, udp); 1719 } 1720 if (msp) { 1721 tdb_incr(msp->mutex_sleep); 1722 begin_sleep = gethrtime(); 1723 } 1724 1725 DTRACE_PROBE1(plockstat, mutex__block, mp); 1726 1727 /* 1728 * Put ourself on the sleep queue, and while we are 1729 * unable to grab the lock, go park in the kernel. 1730 * Take ourself off the sleep queue after we acquire the lock. 1731 * The waiter bit can be set/cleared only while holding the queue lock. 1732 */ 1733 qp = queue_lock(mp, MX); 1734 enqueue(qp, self, 0); 1735 mp->mutex_waiters = 1; 1736 for (;;) { 1737 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1738 mp->mutex_owner = (uintptr_t)self; 1739 mp->mutex_waiters = dequeue_self(qp); 1740 break; 1741 } 1742 set_parking_flag(self, 1); 1743 queue_unlock(qp); 1744 /* 1745 * __lwp_park() will return the residual time in tsp 1746 * if we are unparked before the timeout expires. 1747 */ 1748 error = __lwp_park(tsp, 0); 1749 set_parking_flag(self, 0); 1750 /* 1751 * We could have taken a signal or suspended ourself. 1752 * If we did, then we removed ourself from the queue. 1753 * Someone else may have removed us from the queue 1754 * as a consequence of mutex_unlock(). We may have 1755 * gotten a timeout from __lwp_park(). Or we may still 1756 * be on the queue and this is just a spurious wakeup. 1757 */ 1758 qp = queue_lock(mp, MX); 1759 if (self->ul_sleepq == NULL) { 1760 if (error) { 1761 mp->mutex_waiters = queue_waiter(qp)? 1 : 0; 1762 if (error != EINTR) 1763 break; 1764 error = 0; 1765 } 1766 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1767 mp->mutex_owner = (uintptr_t)self; 1768 break; 1769 } 1770 enqueue(qp, self, 0); 1771 mp->mutex_waiters = 1; 1772 } 1773 ASSERT(self->ul_sleepq == qp && 1774 self->ul_qtype == MX && 1775 self->ul_wchan == mp); 1776 if (error) { 1777 if (error != EINTR) { 1778 mp->mutex_waiters = dequeue_self(qp); 1779 break; 1780 } 1781 error = 0; 1782 } 1783 } 1784 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 1785 self->ul_wchan == NULL); 1786 self->ul_sp = 0; 1787 queue_unlock(qp); 1788 1789 if (msp) 1790 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1791 1792 ASSERT(error == 0 || error == EINVAL || error == ETIME); 1793 1794 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1795 ASSERT(mp->mutex_type & LOCK_ROBUST); 1796 /* 1797 * We shouldn't own the mutex. 1798 * Just clear the lock; everyone has already been waked up. 1799 */ 1800 mp->mutex_owner = 0; 1801 (void) clear_lockbyte(&mp->mutex_lockword); 1802 error = ENOTRECOVERABLE; 1803 } 1804 1805 if (error) { 1806 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1807 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1808 } else { 1809 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1810 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1811 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1812 ASSERT(mp->mutex_type & LOCK_ROBUST); 1813 error = EOWNERDEAD; 1814 } 1815 } 1816 1817 return (error); 1818 } 1819 1820 static int 1821 mutex_recursion(mutex_t *mp, int mtype, int try) 1822 { 1823 ASSERT(mutex_is_held(mp)); 1824 ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1825 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1826 1827 if (mtype & LOCK_RECURSIVE) { 1828 if (mp->mutex_rcount == RECURSION_MAX) { 1829 DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1830 return (EAGAIN); 1831 } 1832 mp->mutex_rcount++; 1833 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1834 return (0); 1835 } 1836 if (try == MUTEX_LOCK) { 1837 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1838 return (EDEADLK); 1839 } 1840 return (EBUSY); 1841 } 1842 1843 /* 1844 * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1845 * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1846 * We use tdb_hash_lock here and in the synch object tracking code in 1847 * the tdb_agent.c file. There is no conflict between these two usages. 1848 */ 1849 void 1850 register_lock(mutex_t *mp) 1851 { 1852 uberdata_t *udp = curthread->ul_uberdata; 1853 uint_t hash = LOCK_HASH(mp); 1854 robust_t *rlp; 1855 robust_t **rlpp; 1856 robust_t **table; 1857 1858 if ((table = udp->robustlocks) == NULL) { 1859 lmutex_lock(&udp->tdb_hash_lock); 1860 if ((table = udp->robustlocks) == NULL) { 1861 table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1862 _membar_producer(); 1863 udp->robustlocks = table; 1864 } 1865 lmutex_unlock(&udp->tdb_hash_lock); 1866 } 1867 _membar_consumer(); 1868 1869 /* 1870 * First search the registered table with no locks held. 1871 * This is safe because the table never shrinks 1872 * and we can only get a false negative. 1873 */ 1874 for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1875 if (rlp->robust_lock == mp) /* already registered */ 1876 return; 1877 } 1878 1879 /* 1880 * The lock was not found. 1881 * Repeat the operation with tdb_hash_lock held. 1882 */ 1883 lmutex_lock(&udp->tdb_hash_lock); 1884 1885 for (rlpp = &table[hash]; 1886 (rlp = *rlpp) != NULL; 1887 rlpp = &rlp->robust_next) { 1888 if (rlp->robust_lock == mp) { /* already registered */ 1889 lmutex_unlock(&udp->tdb_hash_lock); 1890 return; 1891 } 1892 } 1893 1894 /* 1895 * The lock has never been registered. 1896 * Register it now and add it to the table. 1897 */ 1898 (void) ___lwp_mutex_register(mp); 1899 rlp = lmalloc(sizeof (*rlp)); 1900 rlp->robust_lock = mp; 1901 _membar_producer(); 1902 *rlpp = rlp; 1903 1904 lmutex_unlock(&udp->tdb_hash_lock); 1905 } 1906 1907 /* 1908 * This is called in the child of fork()/forkall() to start over 1909 * with a clean slate. (Each process must register its own locks.) 1910 * No locks are needed because all other threads are suspended or gone. 1911 */ 1912 void 1913 unregister_locks(void) 1914 { 1915 uberdata_t *udp = curthread->ul_uberdata; 1916 uint_t hash; 1917 robust_t **table; 1918 robust_t *rlp; 1919 robust_t *next; 1920 1921 if ((table = udp->robustlocks) != NULL) { 1922 for (hash = 0; hash < LOCKHASHSZ; hash++) { 1923 rlp = table[hash]; 1924 while (rlp != NULL) { 1925 next = rlp->robust_next; 1926 lfree(rlp, sizeof (*rlp)); 1927 rlp = next; 1928 } 1929 } 1930 lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 1931 udp->robustlocks = NULL; 1932 } 1933 } 1934 1935 /* 1936 * Returns with mutex_owner set correctly. 1937 */ 1938 int 1939 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 1940 { 1941 ulwp_t *self = curthread; 1942 uberdata_t *udp = self->ul_uberdata; 1943 int mtype = mp->mutex_type; 1944 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 1945 int error = 0; 1946 int noceil = try & MUTEX_NOCEIL; 1947 uint8_t ceil; 1948 int myprio; 1949 1950 try &= ~MUTEX_NOCEIL; 1951 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1952 1953 if (!self->ul_schedctl_called) 1954 (void) setup_schedctl(); 1955 1956 if (msp && try == MUTEX_TRY) 1957 tdb_incr(msp->mutex_try); 1958 1959 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 1960 return (mutex_recursion(mp, mtype, try)); 1961 1962 if (self->ul_error_detection && try == MUTEX_LOCK && 1963 tsp == NULL && mutex_is_held(mp)) 1964 lock_error(mp, "mutex_lock", NULL, NULL); 1965 1966 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 1967 update_sched(self); 1968 if (self->ul_cid != self->ul_rtclassid) { 1969 DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM); 1970 return (EPERM); 1971 } 1972 ceil = mp->mutex_ceiling; 1973 myprio = self->ul_epri? self->ul_epri : self->ul_pri; 1974 if (myprio > ceil) { 1975 DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 1976 return (EINVAL); 1977 } 1978 if ((error = _ceil_mylist_add(mp)) != 0) { 1979 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1980 return (error); 1981 } 1982 if (myprio < ceil) 1983 _ceil_prio_inherit(ceil); 1984 } 1985 1986 if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 1987 == (USYNC_PROCESS | LOCK_ROBUST)) 1988 register_lock(mp); 1989 1990 if (mtype & LOCK_PRIO_INHERIT) { 1991 /* go straight to the kernel */ 1992 if (try == MUTEX_TRY) 1993 error = mutex_trylock_kernel(mp); 1994 else /* MUTEX_LOCK */ 1995 error = mutex_lock_kernel(mp, tsp, msp); 1996 /* 1997 * The kernel never sets or clears the lock byte 1998 * for LOCK_PRIO_INHERIT mutexes. 1999 * Set it here for consistency. 2000 */ 2001 switch (error) { 2002 case 0: 2003 self->ul_pilocks++; 2004 mp->mutex_lockw = LOCKSET; 2005 break; 2006 case EOWNERDEAD: 2007 case ELOCKUNMAPPED: 2008 self->ul_pilocks++; 2009 mp->mutex_lockw = LOCKSET; 2010 /* FALLTHROUGH */ 2011 case ENOTRECOVERABLE: 2012 ASSERT(mtype & LOCK_ROBUST); 2013 break; 2014 case EDEADLK: 2015 if (try == MUTEX_LOCK) 2016 stall(); 2017 error = EBUSY; 2018 break; 2019 } 2020 } else if (mtype & USYNC_PROCESS) { 2021 error = mutex_trylock_process(mp, try == MUTEX_LOCK); 2022 if (error == EBUSY && try == MUTEX_LOCK) 2023 error = mutex_lock_kernel(mp, tsp, msp); 2024 } else { /* USYNC_THREAD */ 2025 error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 2026 if (error == EBUSY && try == MUTEX_LOCK) 2027 error = mutex_lock_queue(self, msp, mp, tsp); 2028 } 2029 2030 switch (error) { 2031 case 0: 2032 case EOWNERDEAD: 2033 case ELOCKUNMAPPED: 2034 if (mtype & LOCK_ROBUST) 2035 remember_lock(mp); 2036 if (msp) 2037 record_begin_hold(msp); 2038 break; 2039 default: 2040 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2041 (void) _ceil_mylist_del(mp); 2042 if (myprio < ceil) 2043 _ceil_prio_waive(); 2044 } 2045 if (try == MUTEX_TRY) { 2046 if (msp) 2047 tdb_incr(msp->mutex_try_fail); 2048 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2049 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2050 tdb_event(TD_LOCK_TRY, udp); 2051 } 2052 } 2053 break; 2054 } 2055 2056 return (error); 2057 } 2058 2059 int 2060 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 2061 { 2062 ulwp_t *self = curthread; 2063 uberdata_t *udp = self->ul_uberdata; 2064 2065 /* 2066 * We know that USYNC_PROCESS is set in mtype and that 2067 * zero, one, or both of the flags LOCK_RECURSIVE and 2068 * LOCK_ERRORCHECK are set, and that no other flags are set. 2069 */ 2070 ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 2071 enter_critical(self); 2072 if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) { 2073 mp->mutex_owner = (uintptr_t)self; 2074 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 2075 exit_critical(self); 2076 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2077 return (0); 2078 } 2079 exit_critical(self); 2080 2081 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 2082 return (mutex_recursion(mp, mtype, try)); 2083 2084 if (try == MUTEX_LOCK) { 2085 if (mutex_trylock_process(mp, 1) == 0) 2086 return (0); 2087 return (mutex_lock_kernel(mp, tsp, NULL)); 2088 } 2089 2090 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2091 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2092 tdb_event(TD_LOCK_TRY, udp); 2093 } 2094 return (EBUSY); 2095 } 2096 2097 static int 2098 mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 2099 { 2100 ulwp_t *self = curthread; 2101 int mtype = mp->mutex_type; 2102 uberflags_t *gflags; 2103 2104 /* 2105 * Optimize the case of USYNC_THREAD, including 2106 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2107 * no error detection, no lock statistics, 2108 * and the process has only a single thread. 2109 * (Most likely a traditional single-threaded application.) 2110 */ 2111 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2112 self->ul_uberdata->uberflags.uf_all) == 0) { 2113 /* 2114 * Only one thread exists so we don't need an atomic operation. 2115 */ 2116 if (mp->mutex_lockw == 0) { 2117 mp->mutex_lockw = LOCKSET; 2118 mp->mutex_owner = (uintptr_t)self; 2119 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2120 return (0); 2121 } 2122 if (mtype && MUTEX_OWNER(mp) == self) 2123 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2124 /* 2125 * We have reached a deadlock, probably because the 2126 * process is executing non-async-signal-safe code in 2127 * a signal handler and is attempting to acquire a lock 2128 * that it already owns. This is not surprising, given 2129 * bad programming practices over the years that has 2130 * resulted in applications calling printf() and such 2131 * in their signal handlers. Unless the user has told 2132 * us that the signal handlers are safe by setting: 2133 * export _THREAD_ASYNC_SAFE=1 2134 * we return EDEADLK rather than actually deadlocking. 2135 */ 2136 if (tsp == NULL && 2137 MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 2138 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 2139 return (EDEADLK); 2140 } 2141 } 2142 2143 /* 2144 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2145 * no error detection, and no lock statistics. 2146 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2147 */ 2148 if ((gflags = self->ul_schedctl_called) != NULL && 2149 (gflags->uf_trs_ted | 2150 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2151 if (mtype & USYNC_PROCESS) 2152 return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 2153 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2154 mp->mutex_owner = (uintptr_t)self; 2155 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2156 return (0); 2157 } 2158 if (mtype && MUTEX_OWNER(mp) == self) 2159 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2160 if (mutex_trylock_adaptive(mp, 1) != 0) 2161 return (mutex_lock_queue(self, NULL, mp, tsp)); 2162 return (0); 2163 } 2164 2165 /* else do it the long way */ 2166 return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 2167 } 2168 2169 #pragma weak mutex_lock = __mutex_lock 2170 #pragma weak _mutex_lock = __mutex_lock 2171 #pragma weak pthread_mutex_lock = __mutex_lock 2172 #pragma weak _pthread_mutex_lock = __mutex_lock 2173 int 2174 __mutex_lock(mutex_t *mp) 2175 { 2176 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2177 return (mutex_lock_impl(mp, NULL)); 2178 } 2179 2180 #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 2181 int 2182 _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 2183 { 2184 timespec_t tslocal; 2185 int error; 2186 2187 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2188 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 2189 error = mutex_lock_impl(mp, &tslocal); 2190 if (error == ETIME) 2191 error = ETIMEDOUT; 2192 return (error); 2193 } 2194 2195 #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 2196 int 2197 _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 2198 { 2199 timespec_t tslocal; 2200 int error; 2201 2202 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2203 tslocal = *reltime; 2204 error = mutex_lock_impl(mp, &tslocal); 2205 if (error == ETIME) 2206 error = ETIMEDOUT; 2207 return (error); 2208 } 2209 2210 #pragma weak mutex_trylock = __mutex_trylock 2211 #pragma weak _mutex_trylock = __mutex_trylock 2212 #pragma weak pthread_mutex_trylock = __mutex_trylock 2213 #pragma weak _pthread_mutex_trylock = __mutex_trylock 2214 int 2215 __mutex_trylock(mutex_t *mp) 2216 { 2217 ulwp_t *self = curthread; 2218 uberdata_t *udp = self->ul_uberdata; 2219 int mtype = mp->mutex_type; 2220 uberflags_t *gflags; 2221 2222 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2223 2224 /* 2225 * Optimize the case of USYNC_THREAD, including 2226 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2227 * no error detection, no lock statistics, 2228 * and the process has only a single thread. 2229 * (Most likely a traditional single-threaded application.) 2230 */ 2231 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2232 udp->uberflags.uf_all) == 0) { 2233 /* 2234 * Only one thread exists so we don't need an atomic operation. 2235 */ 2236 if (mp->mutex_lockw == 0) { 2237 mp->mutex_lockw = LOCKSET; 2238 mp->mutex_owner = (uintptr_t)self; 2239 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2240 return (0); 2241 } 2242 if (mtype && MUTEX_OWNER(mp) == self) 2243 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2244 return (EBUSY); 2245 } 2246 2247 /* 2248 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2249 * no error detection, and no lock statistics. 2250 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2251 */ 2252 if ((gflags = self->ul_schedctl_called) != NULL && 2253 (gflags->uf_trs_ted | 2254 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2255 if (mtype & USYNC_PROCESS) 2256 return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 2257 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2258 mp->mutex_owner = (uintptr_t)self; 2259 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2260 return (0); 2261 } 2262 if (mtype && MUTEX_OWNER(mp) == self) 2263 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2264 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2265 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2266 tdb_event(TD_LOCK_TRY, udp); 2267 } 2268 return (EBUSY); 2269 } 2270 2271 /* else do it the long way */ 2272 return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 2273 } 2274 2275 int 2276 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 2277 { 2278 ulwp_t *self = curthread; 2279 uberdata_t *udp = self->ul_uberdata; 2280 int mtype = mp->mutex_type; 2281 tdb_mutex_stats_t *msp; 2282 int error = 0; 2283 int release_all; 2284 lwpid_t lwpid; 2285 2286 if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 2287 return (EPERM); 2288 2289 if (self->ul_error_detection && !mutex_is_held(mp)) 2290 lock_error(mp, "mutex_unlock", NULL, NULL); 2291 2292 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2293 mp->mutex_rcount--; 2294 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2295 return (0); 2296 } 2297 2298 if ((msp = MUTEX_STATS(mp, udp)) != NULL) 2299 (void) record_hold_time(msp); 2300 2301 if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2302 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2303 ASSERT(mp->mutex_type & LOCK_ROBUST); 2304 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2305 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2306 } 2307 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2308 2309 if (mtype & LOCK_PRIO_INHERIT) { 2310 no_preempt(self); 2311 mp->mutex_owner = 0; 2312 /* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */ 2313 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2314 mp->mutex_lockw = LOCKCLEAR; 2315 self->ul_pilocks--; 2316 error = ___lwp_mutex_unlock(mp); 2317 preempt(self); 2318 } else if (mtype & USYNC_PROCESS) { 2319 mutex_unlock_process(mp, release_all); 2320 } else { /* USYNC_THREAD */ 2321 if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 2322 (void) __lwp_unpark(lwpid); 2323 preempt(self); 2324 } 2325 } 2326 2327 if (mtype & LOCK_ROBUST) 2328 forget_lock(mp); 2329 2330 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2331 _ceil_prio_waive(); 2332 2333 return (error); 2334 } 2335 2336 #pragma weak mutex_unlock = __mutex_unlock 2337 #pragma weak _mutex_unlock = __mutex_unlock 2338 #pragma weak pthread_mutex_unlock = __mutex_unlock 2339 #pragma weak _pthread_mutex_unlock = __mutex_unlock 2340 int 2341 __mutex_unlock(mutex_t *mp) 2342 { 2343 ulwp_t *self = curthread; 2344 int mtype = mp->mutex_type; 2345 uberflags_t *gflags; 2346 lwpid_t lwpid; 2347 short el; 2348 2349 /* 2350 * Optimize the case of USYNC_THREAD, including 2351 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2352 * no error detection, no lock statistics, 2353 * and the process has only a single thread. 2354 * (Most likely a traditional single-threaded application.) 2355 */ 2356 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2357 self->ul_uberdata->uberflags.uf_all) == 0) { 2358 if (mtype) { 2359 /* 2360 * At this point we know that one or both of the 2361 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2362 */ 2363 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2364 return (EPERM); 2365 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2366 mp->mutex_rcount--; 2367 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2368 return (0); 2369 } 2370 } 2371 /* 2372 * Only one thread exists so we don't need an atomic operation. 2373 * Also, there can be no waiters. 2374 */ 2375 mp->mutex_owner = 0; 2376 mp->mutex_lockword = 0; 2377 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2378 return (0); 2379 } 2380 2381 /* 2382 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2383 * no error detection, and no lock statistics. 2384 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2385 */ 2386 if ((gflags = self->ul_schedctl_called) != NULL) { 2387 if (((el = gflags->uf_trs_ted) | mtype) == 0) { 2388 fast_unlock: 2389 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2390 (void) __lwp_unpark(lwpid); 2391 preempt(self); 2392 } 2393 return (0); 2394 } 2395 if (el) /* error detection or lock statistics */ 2396 goto slow_unlock; 2397 if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2398 /* 2399 * At this point we know that one or both of the 2400 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2401 */ 2402 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2403 return (EPERM); 2404 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2405 mp->mutex_rcount--; 2406 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2407 return (0); 2408 } 2409 goto fast_unlock; 2410 } 2411 if ((mtype & 2412 ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2413 /* 2414 * At this point we know that zero, one, or both of the 2415 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 2416 * that the USYNC_PROCESS flag is set. 2417 */ 2418 if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 2419 return (EPERM); 2420 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2421 mp->mutex_rcount--; 2422 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2423 return (0); 2424 } 2425 mutex_unlock_process(mp, 0); 2426 return (0); 2427 } 2428 } 2429 2430 /* else do it the long way */ 2431 slow_unlock: 2432 return (mutex_unlock_internal(mp, 0)); 2433 } 2434 2435 /* 2436 * Internally to the library, almost all mutex lock/unlock actions 2437 * go through these lmutex_ functions, to protect critical regions. 2438 * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 2439 * to make these functions faster since we know that the mutex type 2440 * of all internal locks is USYNC_THREAD. We also know that internal 2441 * locking can never fail, so we panic if it does. 2442 */ 2443 void 2444 lmutex_lock(mutex_t *mp) 2445 { 2446 ulwp_t *self = curthread; 2447 uberdata_t *udp = self->ul_uberdata; 2448 2449 ASSERT(mp->mutex_type == USYNC_THREAD); 2450 2451 enter_critical(self); 2452 /* 2453 * Optimize the case of no lock statistics and only a single thread. 2454 * (Most likely a traditional single-threaded application.) 2455 */ 2456 if (udp->uberflags.uf_all == 0) { 2457 /* 2458 * Only one thread exists; the mutex must be free. 2459 */ 2460 ASSERT(mp->mutex_lockw == 0); 2461 mp->mutex_lockw = LOCKSET; 2462 mp->mutex_owner = (uintptr_t)self; 2463 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2464 } else { 2465 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2466 2467 if (!self->ul_schedctl_called) 2468 (void) setup_schedctl(); 2469 2470 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2471 mp->mutex_owner = (uintptr_t)self; 2472 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2473 } else if (mutex_trylock_adaptive(mp, 1) != 0) { 2474 (void) mutex_lock_queue(self, msp, mp, NULL); 2475 } 2476 2477 if (msp) 2478 record_begin_hold(msp); 2479 } 2480 } 2481 2482 void 2483 lmutex_unlock(mutex_t *mp) 2484 { 2485 ulwp_t *self = curthread; 2486 uberdata_t *udp = self->ul_uberdata; 2487 2488 ASSERT(mp->mutex_type == USYNC_THREAD); 2489 2490 /* 2491 * Optimize the case of no lock statistics and only a single thread. 2492 * (Most likely a traditional single-threaded application.) 2493 */ 2494 if (udp->uberflags.uf_all == 0) { 2495 /* 2496 * Only one thread exists so there can be no waiters. 2497 */ 2498 mp->mutex_owner = 0; 2499 mp->mutex_lockword = 0; 2500 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2501 } else { 2502 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2503 lwpid_t lwpid; 2504 2505 if (msp) 2506 (void) record_hold_time(msp); 2507 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2508 (void) __lwp_unpark(lwpid); 2509 preempt(self); 2510 } 2511 } 2512 exit_critical(self); 2513 } 2514 2515 /* 2516 * For specialized code in libc, like the asynchronous i/o code, 2517 * the following sig_*() locking primitives are used in order 2518 * to make the code asynchronous signal safe. Signals are 2519 * deferred while locks acquired by these functions are held. 2520 */ 2521 void 2522 sig_mutex_lock(mutex_t *mp) 2523 { 2524 sigoff(curthread); 2525 (void) mutex_lock(mp); 2526 } 2527 2528 void 2529 sig_mutex_unlock(mutex_t *mp) 2530 { 2531 (void) mutex_unlock(mp); 2532 sigon(curthread); 2533 } 2534 2535 int 2536 sig_mutex_trylock(mutex_t *mp) 2537 { 2538 int error; 2539 2540 sigoff(curthread); 2541 if ((error = mutex_trylock(mp)) != 0) 2542 sigon(curthread); 2543 return (error); 2544 } 2545 2546 /* 2547 * sig_cond_wait() is a cancellation point. 2548 */ 2549 int 2550 sig_cond_wait(cond_t *cv, mutex_t *mp) 2551 { 2552 int error; 2553 2554 ASSERT(curthread->ul_sigdefer != 0); 2555 pthread_testcancel(); 2556 error = __cond_wait(cv, mp); 2557 if (error == EINTR && curthread->ul_cursig) { 2558 sig_mutex_unlock(mp); 2559 /* take the deferred signal here */ 2560 sig_mutex_lock(mp); 2561 } 2562 pthread_testcancel(); 2563 return (error); 2564 } 2565 2566 /* 2567 * sig_cond_reltimedwait() is a cancellation point. 2568 */ 2569 int 2570 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2571 { 2572 int error; 2573 2574 ASSERT(curthread->ul_sigdefer != 0); 2575 pthread_testcancel(); 2576 error = __cond_reltimedwait(cv, mp, ts); 2577 if (error == EINTR && curthread->ul_cursig) { 2578 sig_mutex_unlock(mp); 2579 /* take the deferred signal here */ 2580 sig_mutex_lock(mp); 2581 } 2582 pthread_testcancel(); 2583 return (error); 2584 } 2585 2586 /* 2587 * For specialized code in libc, like the stdio code. 2588 * the following cancel_safe_*() locking primitives are used in 2589 * order to make the code cancellation-safe. Cancellation is 2590 * deferred while locks acquired by these functions are held. 2591 */ 2592 void 2593 cancel_safe_mutex_lock(mutex_t *mp) 2594 { 2595 (void) mutex_lock(mp); 2596 curthread->ul_libc_locks++; 2597 } 2598 2599 int 2600 cancel_safe_mutex_trylock(mutex_t *mp) 2601 { 2602 int error; 2603 2604 if ((error = mutex_trylock(mp)) == 0) 2605 curthread->ul_libc_locks++; 2606 return (error); 2607 } 2608 2609 void 2610 cancel_safe_mutex_unlock(mutex_t *mp) 2611 { 2612 ulwp_t *self = curthread; 2613 2614 ASSERT(self->ul_libc_locks != 0); 2615 2616 (void) mutex_unlock(mp); 2617 2618 /* 2619 * Decrement the count of locks held by cancel_safe_mutex_lock(). 2620 * If we are then in a position to terminate cleanly and 2621 * if there is a pending cancellation and cancellation 2622 * is not disabled and we received EINTR from a recent 2623 * system call then perform the cancellation action now. 2624 */ 2625 if (--self->ul_libc_locks == 0 && 2626 !(self->ul_vfork | self->ul_nocancel | 2627 self->ul_critical | self->ul_sigdefer) && 2628 cancel_active()) 2629 _pthread_exit(PTHREAD_CANCELED); 2630 } 2631 2632 static int 2633 shared_mutex_held(mutex_t *mparg) 2634 { 2635 /* 2636 * The 'volatile' is necessary to make sure the compiler doesn't 2637 * reorder the tests of the various components of the mutex. 2638 * They must be tested in this order: 2639 * mutex_lockw 2640 * mutex_owner 2641 * mutex_ownerpid 2642 * This relies on the fact that everywhere mutex_lockw is cleared, 2643 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2644 * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2645 * and mutex_ownerpid are set after mutex_lockw is set, and that 2646 * mutex_lockw is set or cleared with a memory barrier. 2647 */ 2648 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2649 ulwp_t *self = curthread; 2650 uberdata_t *udp = self->ul_uberdata; 2651 2652 return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 2653 } 2654 2655 /* 2656 * Some crufty old programs define their own version of _mutex_held() 2657 * to be simply return(1). This breaks internal libc logic, so we 2658 * define a private version for exclusive use by libc, mutex_is_held(), 2659 * and also a new public function, __mutex_held(), to be used in new 2660 * code to circumvent these crufty old programs. 2661 */ 2662 #pragma weak mutex_held = mutex_is_held 2663 #pragma weak _mutex_held = mutex_is_held 2664 #pragma weak __mutex_held = mutex_is_held 2665 int 2666 mutex_is_held(mutex_t *mparg) 2667 { 2668 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2669 2670 if (mparg->mutex_type & USYNC_PROCESS) 2671 return (shared_mutex_held(mparg)); 2672 return (MUTEX_OWNED(mp, curthread)); 2673 } 2674 2675 #pragma weak mutex_destroy = __mutex_destroy 2676 #pragma weak _mutex_destroy = __mutex_destroy 2677 #pragma weak pthread_mutex_destroy = __mutex_destroy 2678 #pragma weak _pthread_mutex_destroy = __mutex_destroy 2679 int 2680 __mutex_destroy(mutex_t *mp) 2681 { 2682 if (mp->mutex_type & USYNC_PROCESS) 2683 forget_lock(mp); 2684 (void) memset(mp, 0, sizeof (*mp)); 2685 tdb_sync_obj_deregister(mp); 2686 return (0); 2687 } 2688 2689 #pragma weak mutex_consistent = __mutex_consistent 2690 #pragma weak _mutex_consistent = __mutex_consistent 2691 #pragma weak pthread_mutex_consistent_np = __mutex_consistent 2692 #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 2693 int 2694 __mutex_consistent(mutex_t *mp) 2695 { 2696 /* 2697 * Do this only for an inconsistent, initialized robust lock 2698 * that we hold. For all other cases, return EINVAL. 2699 */ 2700 if (mutex_is_held(mp) && 2701 (mp->mutex_type & LOCK_ROBUST) && 2702 (mp->mutex_flag & LOCK_INITED) && 2703 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2704 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2705 mp->mutex_rcount = 0; 2706 return (0); 2707 } 2708 return (EINVAL); 2709 } 2710 2711 /* 2712 * Spin locks are separate from ordinary mutexes, 2713 * but we use the same data structure for them. 2714 */ 2715 2716 #pragma weak pthread_spin_init = _pthread_spin_init 2717 int 2718 _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 2719 { 2720 mutex_t *mp = (mutex_t *)lock; 2721 2722 (void) memset(mp, 0, sizeof (*mp)); 2723 if (pshared == PTHREAD_PROCESS_SHARED) 2724 mp->mutex_type = USYNC_PROCESS; 2725 else 2726 mp->mutex_type = USYNC_THREAD; 2727 mp->mutex_flag = LOCK_INITED; 2728 mp->mutex_magic = MUTEX_MAGIC; 2729 return (0); 2730 } 2731 2732 #pragma weak pthread_spin_destroy = _pthread_spin_destroy 2733 int 2734 _pthread_spin_destroy(pthread_spinlock_t *lock) 2735 { 2736 (void) memset(lock, 0, sizeof (*lock)); 2737 return (0); 2738 } 2739 2740 #pragma weak pthread_spin_trylock = _pthread_spin_trylock 2741 int 2742 _pthread_spin_trylock(pthread_spinlock_t *lock) 2743 { 2744 mutex_t *mp = (mutex_t *)lock; 2745 ulwp_t *self = curthread; 2746 int error = 0; 2747 2748 no_preempt(self); 2749 if (set_lock_byte(&mp->mutex_lockw) != 0) 2750 error = EBUSY; 2751 else { 2752 mp->mutex_owner = (uintptr_t)self; 2753 if (mp->mutex_type == USYNC_PROCESS) 2754 mp->mutex_ownerpid = self->ul_uberdata->pid; 2755 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2756 } 2757 preempt(self); 2758 return (error); 2759 } 2760 2761 #pragma weak pthread_spin_lock = _pthread_spin_lock 2762 int 2763 _pthread_spin_lock(pthread_spinlock_t *lock) 2764 { 2765 mutex_t *mp = (mutex_t *)lock; 2766 ulwp_t *self = curthread; 2767 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2768 int count = 0; 2769 2770 ASSERT(!self->ul_critical || self->ul_bindflags); 2771 2772 DTRACE_PROBE1(plockstat, mutex__spin, mp); 2773 2774 /* 2775 * We don't care whether the owner is running on a processor. 2776 * We just spin because that's what this interface requires. 2777 */ 2778 for (;;) { 2779 if (*lockp == 0) { /* lock byte appears to be clear */ 2780 no_preempt(self); 2781 if (set_lock_byte(lockp) == 0) 2782 break; 2783 preempt(self); 2784 } 2785 if (count < INT_MAX) 2786 count++; 2787 SMT_PAUSE(); 2788 } 2789 mp->mutex_owner = (uintptr_t)self; 2790 if (mp->mutex_type == USYNC_PROCESS) 2791 mp->mutex_ownerpid = self->ul_uberdata->pid; 2792 preempt(self); 2793 if (count) { 2794 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 2795 } 2796 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 2797 return (0); 2798 } 2799 2800 #pragma weak pthread_spin_unlock = _pthread_spin_unlock 2801 int 2802 _pthread_spin_unlock(pthread_spinlock_t *lock) 2803 { 2804 mutex_t *mp = (mutex_t *)lock; 2805 ulwp_t *self = curthread; 2806 2807 no_preempt(self); 2808 mp->mutex_owner = 0; 2809 mp->mutex_ownerpid = 0; 2810 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2811 (void) atomic_swap_32(&mp->mutex_lockword, 0); 2812 preempt(self); 2813 return (0); 2814 } 2815 2816 #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 2817 2818 /* 2819 * Find/allocate an entry for 'lock' in our array of held locks. 2820 */ 2821 static mutex_t ** 2822 find_lock_entry(mutex_t *lock) 2823 { 2824 ulwp_t *self = curthread; 2825 mutex_t **remembered = NULL; 2826 mutex_t **lockptr; 2827 uint_t nlocks; 2828 2829 if ((nlocks = self->ul_heldlockcnt) != 0) 2830 lockptr = self->ul_heldlocks.array; 2831 else { 2832 nlocks = 1; 2833 lockptr = &self->ul_heldlocks.single; 2834 } 2835 2836 for (; nlocks; nlocks--, lockptr++) { 2837 if (*lockptr == lock) 2838 return (lockptr); 2839 if (*lockptr == NULL && remembered == NULL) 2840 remembered = lockptr; 2841 } 2842 if (remembered != NULL) { 2843 *remembered = lock; 2844 return (remembered); 2845 } 2846 2847 /* 2848 * No entry available. Allocate more space, converting 2849 * the single entry into an array of entries if necessary. 2850 */ 2851 if ((nlocks = self->ul_heldlockcnt) == 0) { 2852 /* 2853 * Initial allocation of the array. 2854 * Convert the single entry into an array. 2855 */ 2856 self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 2857 lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 2858 /* 2859 * The single entry becomes the first entry in the array. 2860 */ 2861 *lockptr = self->ul_heldlocks.single; 2862 self->ul_heldlocks.array = lockptr; 2863 /* 2864 * Return the next available entry in the array. 2865 */ 2866 *++lockptr = lock; 2867 return (lockptr); 2868 } 2869 /* 2870 * Reallocate the array, double the size each time. 2871 */ 2872 lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 2873 (void) memcpy(lockptr, self->ul_heldlocks.array, 2874 nlocks * sizeof (mutex_t *)); 2875 lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2876 self->ul_heldlocks.array = lockptr; 2877 self->ul_heldlockcnt *= 2; 2878 /* 2879 * Return the next available entry in the newly allocated array. 2880 */ 2881 *(lockptr += nlocks) = lock; 2882 return (lockptr); 2883 } 2884 2885 /* 2886 * Insert 'lock' into our list of held locks. 2887 * Currently only used for LOCK_ROBUST mutexes. 2888 */ 2889 void 2890 remember_lock(mutex_t *lock) 2891 { 2892 (void) find_lock_entry(lock); 2893 } 2894 2895 /* 2896 * Remove 'lock' from our list of held locks. 2897 * Currently only used for LOCK_ROBUST mutexes. 2898 */ 2899 void 2900 forget_lock(mutex_t *lock) 2901 { 2902 *find_lock_entry(lock) = NULL; 2903 } 2904 2905 /* 2906 * Free the array of held locks. 2907 */ 2908 void 2909 heldlock_free(ulwp_t *ulwp) 2910 { 2911 uint_t nlocks; 2912 2913 if ((nlocks = ulwp->ul_heldlockcnt) != 0) 2914 lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2915 ulwp->ul_heldlockcnt = 0; 2916 ulwp->ul_heldlocks.array = NULL; 2917 } 2918 2919 /* 2920 * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 2921 * Called from _thrp_exit() to deal with abandoned locks. 2922 */ 2923 void 2924 heldlock_exit(void) 2925 { 2926 ulwp_t *self = curthread; 2927 mutex_t **lockptr; 2928 uint_t nlocks; 2929 mutex_t *mp; 2930 2931 if ((nlocks = self->ul_heldlockcnt) != 0) 2932 lockptr = self->ul_heldlocks.array; 2933 else { 2934 nlocks = 1; 2935 lockptr = &self->ul_heldlocks.single; 2936 } 2937 2938 for (; nlocks; nlocks--, lockptr++) { 2939 /* 2940 * The kernel takes care of transitioning held 2941 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 2942 * We avoid that case here. 2943 */ 2944 if ((mp = *lockptr) != NULL && 2945 mutex_is_held(mp) && 2946 (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 2947 LOCK_ROBUST) { 2948 mp->mutex_rcount = 0; 2949 if (!(mp->mutex_flag & LOCK_UNMAPPED)) 2950 mp->mutex_flag |= LOCK_OWNERDEAD; 2951 (void) mutex_unlock_internal(mp, 1); 2952 } 2953 } 2954 2955 heldlock_free(self); 2956 } 2957 2958 #pragma weak cond_init = _cond_init 2959 /* ARGSUSED2 */ 2960 int 2961 _cond_init(cond_t *cvp, int type, void *arg) 2962 { 2963 if (type != USYNC_THREAD && type != USYNC_PROCESS) 2964 return (EINVAL); 2965 (void) memset(cvp, 0, sizeof (*cvp)); 2966 cvp->cond_type = (uint16_t)type; 2967 cvp->cond_magic = COND_MAGIC; 2968 return (0); 2969 } 2970 2971 /* 2972 * cond_sleep_queue(): utility function for cond_wait_queue(). 2973 * 2974 * Go to sleep on a condvar sleep queue, expect to be waked up 2975 * by someone calling cond_signal() or cond_broadcast() or due 2976 * to receiving a UNIX signal or being cancelled, or just simply 2977 * due to a spurious wakeup (like someome calling forkall()). 2978 * 2979 * The associated mutex is *not* reacquired before returning. 2980 * That must be done by the caller of cond_sleep_queue(). 2981 */ 2982 static int 2983 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 2984 { 2985 ulwp_t *self = curthread; 2986 queue_head_t *qp; 2987 queue_head_t *mqp; 2988 lwpid_t lwpid; 2989 int signalled; 2990 int error; 2991 int cv_wake; 2992 int release_all; 2993 2994 /* 2995 * Put ourself on the CV sleep queue, unlock the mutex, then 2996 * park ourself and unpark a candidate lwp to grab the mutex. 2997 * We must go onto the CV sleep queue before dropping the 2998 * mutex in order to guarantee atomicity of the operation. 2999 */ 3000 self->ul_sp = stkptr(); 3001 qp = queue_lock(cvp, CV); 3002 enqueue(qp, self, 0); 3003 cvp->cond_waiters_user = 1; 3004 self->ul_cvmutex = mp; 3005 self->ul_cv_wake = cv_wake = (tsp != NULL); 3006 self->ul_signalled = 0; 3007 if (mp->mutex_flag & LOCK_OWNERDEAD) { 3008 mp->mutex_flag &= ~LOCK_OWNERDEAD; 3009 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 3010 } 3011 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 3012 lwpid = mutex_unlock_queue(mp, release_all); 3013 for (;;) { 3014 set_parking_flag(self, 1); 3015 queue_unlock(qp); 3016 if (lwpid != 0) { 3017 lwpid = preempt_unpark(self, lwpid); 3018 preempt(self); 3019 } 3020 /* 3021 * We may have a deferred signal present, 3022 * in which case we should return EINTR. 3023 * Also, we may have received a SIGCANCEL; if so 3024 * and we are cancelable we should return EINTR. 3025 * We force an immediate EINTR return from 3026 * __lwp_park() by turning our parking flag off. 3027 */ 3028 if (self->ul_cursig != 0 || 3029 (self->ul_cancelable && self->ul_cancel_pending)) 3030 set_parking_flag(self, 0); 3031 /* 3032 * __lwp_park() will return the residual time in tsp 3033 * if we are unparked before the timeout expires. 3034 */ 3035 error = __lwp_park(tsp, lwpid); 3036 set_parking_flag(self, 0); 3037 lwpid = 0; /* unpark the other lwp only once */ 3038 /* 3039 * We were waked up by cond_signal(), cond_broadcast(), 3040 * by an interrupt or timeout (EINTR or ETIME), 3041 * or we may just have gotten a spurious wakeup. 3042 */ 3043 qp = queue_lock(cvp, CV); 3044 if (!cv_wake) 3045 mqp = queue_lock(mp, MX); 3046 if (self->ul_sleepq == NULL) 3047 break; 3048 /* 3049 * We are on either the condvar sleep queue or the 3050 * mutex sleep queue. Break out of the sleep if we 3051 * were interrupted or we timed out (EINTR or ETIME). 3052 * Else this is a spurious wakeup; continue the loop. 3053 */ 3054 if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */ 3055 if (error) { 3056 mp->mutex_waiters = dequeue_self(mqp); 3057 break; 3058 } 3059 tsp = NULL; /* no more timeout */ 3060 } else if (self->ul_sleepq == qp) { /* condvar queue */ 3061 if (error) { 3062 cvp->cond_waiters_user = dequeue_self(qp); 3063 break; 3064 } 3065 /* 3066 * Else a spurious wakeup on the condvar queue. 3067 * __lwp_park() has already adjusted the timeout. 3068 */ 3069 } else { 3070 thr_panic("cond_sleep_queue(): thread not on queue"); 3071 } 3072 if (!cv_wake) 3073 queue_unlock(mqp); 3074 } 3075 3076 self->ul_sp = 0; 3077 self->ul_cv_wake = 0; 3078 ASSERT(self->ul_cvmutex == NULL); 3079 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 3080 self->ul_wchan == NULL); 3081 3082 signalled = self->ul_signalled; 3083 self->ul_signalled = 0; 3084 queue_unlock(qp); 3085 if (!cv_wake) 3086 queue_unlock(mqp); 3087 3088 /* 3089 * If we were concurrently cond_signal()d and any of: 3090 * received a UNIX signal, were cancelled, or got a timeout, 3091 * then perform another cond_signal() to avoid consuming it. 3092 */ 3093 if (error && signalled) 3094 (void) cond_signal_internal(cvp); 3095 3096 return (error); 3097 } 3098 3099 int 3100 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3101 { 3102 ulwp_t *self = curthread; 3103 int error; 3104 int merror; 3105 3106 /* 3107 * The old thread library was programmed to defer signals 3108 * while in cond_wait() so that the associated mutex would 3109 * be guaranteed to be held when the application signal 3110 * handler was invoked. 3111 * 3112 * We do not behave this way by default; the state of the 3113 * associated mutex in the signal handler is undefined. 3114 * 3115 * To accommodate applications that depend on the old 3116 * behavior, the _THREAD_COND_WAIT_DEFER environment 3117 * variable can be set to 1 and we will behave in the 3118 * old way with respect to cond_wait(). 3119 */ 3120 if (self->ul_cond_wait_defer) 3121 sigoff(self); 3122 3123 error = cond_sleep_queue(cvp, mp, tsp); 3124 3125 /* 3126 * Reacquire the mutex. 3127 */ 3128 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3129 error = merror; 3130 3131 /* 3132 * Take any deferred signal now, after we have reacquired the mutex. 3133 */ 3134 if (self->ul_cond_wait_defer) 3135 sigon(self); 3136 3137 return (error); 3138 } 3139 3140 /* 3141 * cond_sleep_kernel(): utility function for cond_wait_kernel(). 3142 * See the comment ahead of cond_sleep_queue(), above. 3143 */ 3144 static int 3145 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3146 { 3147 int mtype = mp->mutex_type; 3148 ulwp_t *self = curthread; 3149 int error; 3150 3151 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 3152 _ceil_prio_waive(); 3153 3154 self->ul_sp = stkptr(); 3155 self->ul_wchan = cvp; 3156 mp->mutex_owner = 0; 3157 /* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */ 3158 if (mtype & LOCK_PRIO_INHERIT) { 3159 mp->mutex_lockw = LOCKCLEAR; 3160 self->ul_pilocks--; 3161 } 3162 /* 3163 * ___lwp_cond_wait() returns immediately with EINTR if 3164 * set_parking_flag(self,0) is called on this lwp before it 3165 * goes to sleep in the kernel. sigacthandler() calls this 3166 * when a deferred signal is noted. This assures that we don't 3167 * get stuck in ___lwp_cond_wait() with all signals blocked 3168 * due to taking a deferred signal before going to sleep. 3169 */ 3170 set_parking_flag(self, 1); 3171 if (self->ul_cursig != 0 || 3172 (self->ul_cancelable && self->ul_cancel_pending)) 3173 set_parking_flag(self, 0); 3174 error = ___lwp_cond_wait(cvp, mp, tsp, 1); 3175 set_parking_flag(self, 0); 3176 self->ul_sp = 0; 3177 self->ul_wchan = NULL; 3178 return (error); 3179 } 3180 3181 int 3182 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3183 { 3184 ulwp_t *self = curthread; 3185 int error; 3186 int merror; 3187 3188 /* 3189 * See the large comment in cond_wait_queue(), above. 3190 */ 3191 if (self->ul_cond_wait_defer) 3192 sigoff(self); 3193 3194 error = cond_sleep_kernel(cvp, mp, tsp); 3195 3196 /* 3197 * Override the return code from ___lwp_cond_wait() 3198 * with any non-zero return code from mutex_lock(). 3199 * This addresses robust lock failures in particular; 3200 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 3201 * errors in order to take corrective action. 3202 */ 3203 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3204 error = merror; 3205 3206 /* 3207 * Take any deferred signal now, after we have reacquired the mutex. 3208 */ 3209 if (self->ul_cond_wait_defer) 3210 sigon(self); 3211 3212 return (error); 3213 } 3214 3215 /* 3216 * Common code for _cond_wait() and _cond_timedwait() 3217 */ 3218 int 3219 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3220 { 3221 int mtype = mp->mutex_type; 3222 hrtime_t begin_sleep = 0; 3223 ulwp_t *self = curthread; 3224 uberdata_t *udp = self->ul_uberdata; 3225 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3226 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 3227 uint8_t rcount; 3228 int error = 0; 3229 3230 /* 3231 * The SUSV3 Posix spec for pthread_cond_timedwait() states: 3232 * Except in the case of [ETIMEDOUT], all these error checks 3233 * shall act as if they were performed immediately at the 3234 * beginning of processing for the function and shall cause 3235 * an error return, in effect, prior to modifying the state 3236 * of the mutex specified by mutex or the condition variable 3237 * specified by cond. 3238 * Therefore, we must return EINVAL now if the timout is invalid. 3239 */ 3240 if (tsp != NULL && 3241 (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 3242 return (EINVAL); 3243 3244 if (__td_event_report(self, TD_SLEEP, udp)) { 3245 self->ul_sp = stkptr(); 3246 self->ul_wchan = cvp; 3247 self->ul_td_evbuf.eventnum = TD_SLEEP; 3248 self->ul_td_evbuf.eventdata = cvp; 3249 tdb_event(TD_SLEEP, udp); 3250 self->ul_sp = 0; 3251 } 3252 if (csp) { 3253 if (tsp) 3254 tdb_incr(csp->cond_timedwait); 3255 else 3256 tdb_incr(csp->cond_wait); 3257 } 3258 if (msp) 3259 begin_sleep = record_hold_time(msp); 3260 else if (csp) 3261 begin_sleep = gethrtime(); 3262 3263 if (self->ul_error_detection) { 3264 if (!mutex_is_held(mp)) 3265 lock_error(mp, "cond_wait", cvp, NULL); 3266 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 3267 lock_error(mp, "recursive mutex in cond_wait", 3268 cvp, NULL); 3269 if (cvp->cond_type & USYNC_PROCESS) { 3270 if (!(mtype & USYNC_PROCESS)) 3271 lock_error(mp, "cond_wait", cvp, 3272 "condvar process-shared, " 3273 "mutex process-private"); 3274 } else { 3275 if (mtype & USYNC_PROCESS) 3276 lock_error(mp, "cond_wait", cvp, 3277 "condvar process-private, " 3278 "mutex process-shared"); 3279 } 3280 } 3281 3282 /* 3283 * We deal with recursive mutexes by completely 3284 * dropping the lock and restoring the recursion 3285 * count after waking up. This is arguably wrong, 3286 * but it obeys the principle of least astonishment. 3287 */ 3288 rcount = mp->mutex_rcount; 3289 mp->mutex_rcount = 0; 3290 if ((mtype & 3291 (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 3292 (cvp->cond_type & USYNC_PROCESS)) 3293 error = cond_wait_kernel(cvp, mp, tsp); 3294 else 3295 error = cond_wait_queue(cvp, mp, tsp); 3296 mp->mutex_rcount = rcount; 3297 3298 if (csp) { 3299 hrtime_t lapse = gethrtime() - begin_sleep; 3300 if (tsp == NULL) 3301 csp->cond_wait_sleep_time += lapse; 3302 else { 3303 csp->cond_timedwait_sleep_time += lapse; 3304 if (error == ETIME) 3305 tdb_incr(csp->cond_timedwait_timeout); 3306 } 3307 } 3308 return (error); 3309 } 3310 3311 /* 3312 * cond_wait() and _cond_wait() are cancellation points but __cond_wait() 3313 * is not. Internally, libc calls the non-cancellation version. 3314 * Other libraries need to use pthread_setcancelstate(), as appropriate, 3315 * since __cond_wait() is not exported from libc. 3316 */ 3317 int 3318 __cond_wait(cond_t *cvp, mutex_t *mp) 3319 { 3320 ulwp_t *self = curthread; 3321 uberdata_t *udp = self->ul_uberdata; 3322 uberflags_t *gflags; 3323 3324 /* 3325 * Optimize the common case of USYNC_THREAD plus 3326 * no error detection, no lock statistics, and no event tracing. 3327 */ 3328 if ((gflags = self->ul_schedctl_called) != NULL && 3329 (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 3330 self->ul_td_events_enable | 3331 udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3332 return (cond_wait_queue(cvp, mp, NULL)); 3333 3334 /* 3335 * Else do it the long way. 3336 */ 3337 return (cond_wait_common(cvp, mp, NULL)); 3338 } 3339 3340 #pragma weak cond_wait = _cond_wait 3341 int 3342 _cond_wait(cond_t *cvp, mutex_t *mp) 3343 { 3344 int error; 3345 3346 _cancelon(); 3347 error = __cond_wait(cvp, mp); 3348 if (error == EINTR) 3349 _canceloff(); 3350 else 3351 _canceloff_nocancel(); 3352 return (error); 3353 } 3354 3355 /* 3356 * pthread_cond_wait() is a cancellation point. 3357 */ 3358 #pragma weak pthread_cond_wait = _pthread_cond_wait 3359 int 3360 _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 3361 { 3362 int error; 3363 3364 error = _cond_wait(cvp, mp); 3365 return ((error == EINTR)? 0 : error); 3366 } 3367 3368 /* 3369 * cond_timedwait() and _cond_timedwait() are cancellation points 3370 * but __cond_timedwait() is not. 3371 */ 3372 int 3373 __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3374 { 3375 clockid_t clock_id = cvp->cond_clockid; 3376 timespec_t reltime; 3377 int error; 3378 3379 if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 3380 clock_id = CLOCK_REALTIME; 3381 abstime_to_reltime(clock_id, abstime, &reltime); 3382 error = cond_wait_common(cvp, mp, &reltime); 3383 if (error == ETIME && clock_id == CLOCK_HIGHRES) { 3384 /* 3385 * Don't return ETIME if we didn't really get a timeout. 3386 * This can happen if we return because someone resets 3387 * the system clock. Just return zero in this case, 3388 * giving a spurious wakeup but not a timeout. 3389 */ 3390 if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 3391 abstime->tv_nsec > gethrtime()) 3392 error = 0; 3393 } 3394 return (error); 3395 } 3396 3397 #pragma weak cond_timedwait = _cond_timedwait 3398 int 3399 _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3400 { 3401 int error; 3402 3403 _cancelon(); 3404 error = __cond_timedwait(cvp, mp, abstime); 3405 if (error == EINTR) 3406 _canceloff(); 3407 else 3408 _canceloff_nocancel(); 3409 return (error); 3410 } 3411 3412 /* 3413 * pthread_cond_timedwait() is a cancellation point. 3414 */ 3415 #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 3416 int 3417 _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3418 { 3419 int error; 3420 3421 error = _cond_timedwait(cvp, mp, abstime); 3422 if (error == ETIME) 3423 error = ETIMEDOUT; 3424 else if (error == EINTR) 3425 error = 0; 3426 return (error); 3427 } 3428 3429 /* 3430 * cond_reltimedwait() and _cond_reltimedwait() are cancellation points 3431 * but __cond_reltimedwait() is not. 3432 */ 3433 int 3434 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3435 { 3436 timespec_t tslocal = *reltime; 3437 3438 return (cond_wait_common(cvp, mp, &tslocal)); 3439 } 3440 3441 #pragma weak cond_reltimedwait = _cond_reltimedwait 3442 int 3443 _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3444 { 3445 int error; 3446 3447 _cancelon(); 3448 error = __cond_reltimedwait(cvp, mp, reltime); 3449 if (error == EINTR) 3450 _canceloff(); 3451 else 3452 _canceloff_nocancel(); 3453 return (error); 3454 } 3455 3456 #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 3457 int 3458 _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 3459 const timespec_t *reltime) 3460 { 3461 int error; 3462 3463 error = _cond_reltimedwait(cvp, mp, reltime); 3464 if (error == ETIME) 3465 error = ETIMEDOUT; 3466 else if (error == EINTR) 3467 error = 0; 3468 return (error); 3469 } 3470 3471 #pragma weak pthread_cond_signal = cond_signal_internal 3472 #pragma weak _pthread_cond_signal = cond_signal_internal 3473 #pragma weak cond_signal = cond_signal_internal 3474 #pragma weak _cond_signal = cond_signal_internal 3475 int 3476 cond_signal_internal(cond_t *cvp) 3477 { 3478 ulwp_t *self = curthread; 3479 uberdata_t *udp = self->ul_uberdata; 3480 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3481 int error = 0; 3482 int more; 3483 lwpid_t lwpid; 3484 queue_head_t *qp; 3485 mutex_t *mp; 3486 queue_head_t *mqp; 3487 ulwp_t **ulwpp; 3488 ulwp_t *ulwp; 3489 ulwp_t *prev; 3490 3491 if (csp) 3492 tdb_incr(csp->cond_signal); 3493 3494 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3495 error = __lwp_cond_signal(cvp); 3496 3497 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3498 return (error); 3499 3500 /* 3501 * Move someone from the condvar sleep queue to the mutex sleep 3502 * queue for the mutex that he will acquire on being waked up. 3503 * We can do this only if we own the mutex he will acquire. 3504 * If we do not own the mutex, or if his ul_cv_wake flag 3505 * is set, just dequeue and unpark him. 3506 */ 3507 qp = queue_lock(cvp, CV); 3508 ulwpp = queue_slot(qp, &prev, &more); 3509 cvp->cond_waiters_user = more; 3510 if (ulwpp == NULL) { /* no one on the sleep queue */ 3511 queue_unlock(qp); 3512 return (error); 3513 } 3514 ulwp = *ulwpp; 3515 3516 /* 3517 * Inform the thread that he was the recipient of a cond_signal(). 3518 * This lets him deal with cond_signal() and, concurrently, 3519 * one or more of a cancellation, a UNIX signal, or a timeout. 3520 * These latter conditions must not consume a cond_signal(). 3521 */ 3522 ulwp->ul_signalled = 1; 3523 3524 /* 3525 * Dequeue the waiter but leave his ul_sleepq non-NULL 3526 * while we move him to the mutex queue so that he can 3527 * deal properly with spurious wakeups. 3528 */ 3529 queue_unlink(qp, ulwpp, prev); 3530 3531 mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 3532 ulwp->ul_cvmutex = NULL; 3533 ASSERT(mp != NULL); 3534 3535 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3536 /* just wake him up */ 3537 lwpid = ulwp->ul_lwpid; 3538 no_preempt(self); 3539 ulwp->ul_sleepq = NULL; 3540 ulwp->ul_wchan = NULL; 3541 queue_unlock(qp); 3542 (void) __lwp_unpark(lwpid); 3543 preempt(self); 3544 } else { 3545 /* move him to the mutex queue */ 3546 mqp = queue_lock(mp, MX); 3547 enqueue(mqp, ulwp, 0); 3548 mp->mutex_waiters = 1; 3549 queue_unlock(mqp); 3550 queue_unlock(qp); 3551 } 3552 3553 return (error); 3554 } 3555 3556 /* 3557 * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3558 * and rw_queue_release() to (re)allocate a big buffer to hold the 3559 * lwpids of all the threads to be set running after they are removed 3560 * from their sleep queues. Since we are holding a queue lock, we 3561 * cannot call any function that might acquire a lock. mmap(), munmap(), 3562 * lwp_unpark_all() are simple system calls and are safe in this regard. 3563 */ 3564 lwpid_t * 3565 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 3566 { 3567 /* 3568 * Allocate NEWLWPS ids on the first overflow. 3569 * Double the allocation each time after that. 3570 */ 3571 int nlwpid = *nlwpid_ptr; 3572 int maxlwps = *maxlwps_ptr; 3573 int first_allocation; 3574 int newlwps; 3575 void *vaddr; 3576 3577 ASSERT(nlwpid == maxlwps); 3578 3579 first_allocation = (maxlwps == MAXLWPS); 3580 newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 3581 vaddr = mmap(NULL, newlwps * sizeof (lwpid_t), 3582 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 3583 3584 if (vaddr == MAP_FAILED) { 3585 /* 3586 * Let's hope this never happens. 3587 * If it does, then we have a terrible 3588 * thundering herd on our hands. 3589 */ 3590 (void) __lwp_unpark_all(lwpid, nlwpid); 3591 *nlwpid_ptr = 0; 3592 } else { 3593 (void) memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 3594 if (!first_allocation) 3595 (void) munmap((caddr_t)lwpid, 3596 maxlwps * sizeof (lwpid_t)); 3597 lwpid = vaddr; 3598 *maxlwps_ptr = newlwps; 3599 } 3600 3601 return (lwpid); 3602 } 3603 3604 #pragma weak pthread_cond_broadcast = cond_broadcast_internal 3605 #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 3606 #pragma weak cond_broadcast = cond_broadcast_internal 3607 #pragma weak _cond_broadcast = cond_broadcast_internal 3608 int 3609 cond_broadcast_internal(cond_t *cvp) 3610 { 3611 ulwp_t *self = curthread; 3612 uberdata_t *udp = self->ul_uberdata; 3613 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3614 int error = 0; 3615 queue_head_t *qp; 3616 queue_root_t *qrp; 3617 mutex_t *mp; 3618 mutex_t *mp_cache = NULL; 3619 queue_head_t *mqp = NULL; 3620 ulwp_t *ulwp; 3621 int nlwpid = 0; 3622 int maxlwps = MAXLWPS; 3623 lwpid_t buffer[MAXLWPS]; 3624 lwpid_t *lwpid = buffer; 3625 3626 if (csp) 3627 tdb_incr(csp->cond_broadcast); 3628 3629 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3630 error = __lwp_cond_broadcast(cvp); 3631 3632 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3633 return (error); 3634 3635 /* 3636 * Move everyone from the condvar sleep queue to the mutex sleep 3637 * queue for the mutex that they will acquire on being waked up. 3638 * We can do this only if we own the mutex they will acquire. 3639 * If we do not own the mutex, or if their ul_cv_wake flag 3640 * is set, just dequeue and unpark them. 3641 * 3642 * We keep track of lwpids that are to be unparked in lwpid[]. 3643 * __lwp_unpark_all() is called to unpark all of them after 3644 * they have been removed from the sleep queue and the sleep 3645 * queue lock has been dropped. If we run out of space in our 3646 * on-stack buffer, we need to allocate more but we can't call 3647 * lmalloc() because we are holding a queue lock when the overflow 3648 * occurs and lmalloc() acquires a lock. We can't use alloca() 3649 * either because the application may have allocated a small 3650 * stack and we don't want to overrun the stack. So we call 3651 * alloc_lwpids() to allocate a bigger buffer using the mmap() 3652 * system call directly since that path acquires no locks. 3653 */ 3654 qp = queue_lock(cvp, CV); 3655 cvp->cond_waiters_user = 0; 3656 for (;;) { 3657 if ((qrp = qp->qh_root) == NULL || 3658 (ulwp = qrp->qr_head) == NULL) 3659 break; 3660 ASSERT(ulwp->ul_wchan == cvp); 3661 queue_unlink(qp, &qrp->qr_head, NULL); 3662 mp = ulwp->ul_cvmutex; /* his mutex */ 3663 ulwp->ul_cvmutex = NULL; 3664 ASSERT(mp != NULL); 3665 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3666 /* just wake him up */ 3667 ulwp->ul_sleepq = NULL; 3668 ulwp->ul_wchan = NULL; 3669 if (nlwpid == maxlwps) 3670 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 3671 lwpid[nlwpid++] = ulwp->ul_lwpid; 3672 } else { 3673 /* move him to the mutex queue */ 3674 if (mp != mp_cache) { 3675 mp_cache = mp; 3676 if (mqp != NULL) 3677 queue_unlock(mqp); 3678 mqp = queue_lock(mp, MX); 3679 } 3680 enqueue(mqp, ulwp, 0); 3681 mp->mutex_waiters = 1; 3682 } 3683 } 3684 if (mqp != NULL) 3685 queue_unlock(mqp); 3686 if (nlwpid == 0) { 3687 queue_unlock(qp); 3688 } else { 3689 no_preempt(self); 3690 queue_unlock(qp); 3691 if (nlwpid == 1) 3692 (void) __lwp_unpark(lwpid[0]); 3693 else 3694 (void) __lwp_unpark_all(lwpid, nlwpid); 3695 preempt(self); 3696 } 3697 if (lwpid != buffer) 3698 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 3699 return (error); 3700 } 3701 3702 #pragma weak pthread_cond_destroy = _cond_destroy 3703 #pragma weak _pthread_cond_destroy = _cond_destroy 3704 #pragma weak cond_destroy = _cond_destroy 3705 int 3706 _cond_destroy(cond_t *cvp) 3707 { 3708 cvp->cond_magic = 0; 3709 tdb_sync_obj_deregister(cvp); 3710 return (0); 3711 } 3712 3713 #if defined(THREAD_DEBUG) 3714 void 3715 assert_no_libc_locks_held(void) 3716 { 3717 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 3718 } 3719 3720 /* protected by link_lock */ 3721 uint64_t spin_lock_spin; 3722 uint64_t spin_lock_spin2; 3723 uint64_t spin_lock_sleep; 3724 uint64_t spin_lock_wakeup; 3725 3726 /* 3727 * Record spin lock statistics. 3728 * Called by a thread exiting itself in thrp_exit(). 3729 * Also called via atexit() from the thread calling 3730 * exit() to do all the other threads as well. 3731 */ 3732 void 3733 record_spin_locks(ulwp_t *ulwp) 3734 { 3735 spin_lock_spin += ulwp->ul_spin_lock_spin; 3736 spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 3737 spin_lock_sleep += ulwp->ul_spin_lock_sleep; 3738 spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 3739 ulwp->ul_spin_lock_spin = 0; 3740 ulwp->ul_spin_lock_spin2 = 0; 3741 ulwp->ul_spin_lock_sleep = 0; 3742 ulwp->ul_spin_lock_wakeup = 0; 3743 } 3744 3745 /* 3746 * atexit function: dump the queue statistics to stderr. 3747 */ 3748 #if !defined(__lint) 3749 #define fprintf _fprintf 3750 #endif 3751 #include <stdio.h> 3752 void 3753 dump_queue_statistics(void) 3754 { 3755 uberdata_t *udp = curthread->ul_uberdata; 3756 queue_head_t *qp; 3757 int qn; 3758 uint64_t spin_lock_total = 0; 3759 3760 if (udp->queue_head == NULL || thread_queue_dump == 0) 3761 return; 3762 3763 if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 3764 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 3765 return; 3766 for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 3767 if (qp->qh_lockcount == 0) 3768 continue; 3769 spin_lock_total += qp->qh_lockcount; 3770 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3771 (u_longlong_t)qp->qh_lockcount, 3772 qp->qh_qmax, qp->qh_hmax) < 0) 3773 return; 3774 } 3775 3776 if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 3777 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 3778 return; 3779 for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 3780 if (qp->qh_lockcount == 0) 3781 continue; 3782 spin_lock_total += qp->qh_lockcount; 3783 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3784 (u_longlong_t)qp->qh_lockcount, 3785 qp->qh_qmax, qp->qh_hmax) < 0) 3786 return; 3787 } 3788 3789 (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 3790 (u_longlong_t)spin_lock_total); 3791 (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 3792 (u_longlong_t)spin_lock_spin); 3793 (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 3794 (u_longlong_t)spin_lock_spin2); 3795 (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 3796 (u_longlong_t)spin_lock_sleep); 3797 (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 3798 (u_longlong_t)spin_lock_wakeup); 3799 } 3800 #endif 3801