1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #define atomic_cas_64 _atomic_cas_64 30 31 #include "lint.h" 32 #include "thr_uberdata.h" 33 #include <sys/rtpriocntl.h> 34 #include <sys/sdt.h> 35 #include <atomic.h> 36 37 #if defined(THREAD_DEBUG) 38 #define INCR32(x) (((x) != UINT32_MAX)? (x)++ : 0) 39 #define INCR(x) ((x)++) 40 #define DECR(x) ((x)--) 41 #define MAXINCR(m, x) ((m < ++x)? (m = x) : 0) 42 #else 43 #define INCR32(x) 44 #define INCR(x) 45 #define DECR(x) 46 #define MAXINCR(m, x) 47 #endif 48 49 /* 50 * This mutex is initialized to be held by lwp#1. 51 * It is used to block a thread that has returned from a mutex_lock() 52 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 53 */ 54 mutex_t stall_mutex = DEFAULTMUTEX; 55 56 static int shared_mutex_held(mutex_t *); 57 static int mutex_queuelock_adaptive(mutex_t *); 58 static void mutex_wakeup_all(mutex_t *); 59 60 /* 61 * Lock statistics support functions. 62 */ 63 void 64 record_begin_hold(tdb_mutex_stats_t *msp) 65 { 66 tdb_incr(msp->mutex_lock); 67 msp->mutex_begin_hold = gethrtime(); 68 } 69 70 hrtime_t 71 record_hold_time(tdb_mutex_stats_t *msp) 72 { 73 hrtime_t now = gethrtime(); 74 75 if (msp->mutex_begin_hold) 76 msp->mutex_hold_time += now - msp->mutex_begin_hold; 77 msp->mutex_begin_hold = 0; 78 return (now); 79 } 80 81 /* 82 * Called once at library initialization. 83 */ 84 void 85 mutex_setup(void) 86 { 87 if (set_lock_byte(&stall_mutex.mutex_lockw)) 88 thr_panic("mutex_setup() cannot acquire stall_mutex"); 89 stall_mutex.mutex_owner = (uintptr_t)curthread; 90 } 91 92 /* 93 * The default spin count of 1000 is experimentally determined. 94 * On sun4u machines with any number of processors it could be raised 95 * to 10,000 but that (experimentally) makes almost no difference. 96 * The environment variable: 97 * _THREAD_ADAPTIVE_SPIN=count 98 * can be used to override and set the count in the range [0 .. 1,000,000]. 99 */ 100 int thread_adaptive_spin = 1000; 101 uint_t thread_max_spinners = 100; 102 int thread_queue_verify = 0; 103 static int ncpus; 104 105 /* 106 * Distinguish spinning for queue locks from spinning for regular locks. 107 * We try harder to acquire queue locks by spinning. 108 * The environment variable: 109 * _THREAD_QUEUE_SPIN=count 110 * can be used to override and set the count in the range [0 .. 1,000,000]. 111 */ 112 int thread_queue_spin = 10000; 113 114 #define ALL_ATTRIBUTES \ 115 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 116 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 117 LOCK_ROBUST) 118 119 /* 120 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 121 * augmented by zero or more the flags: 122 * LOCK_RECURSIVE 123 * LOCK_ERRORCHECK 124 * LOCK_PRIO_INHERIT 125 * LOCK_PRIO_PROTECT 126 * LOCK_ROBUST 127 */ 128 #pragma weak _private_mutex_init = __mutex_init 129 #pragma weak mutex_init = __mutex_init 130 #pragma weak _mutex_init = __mutex_init 131 /* ARGSUSED2 */ 132 int 133 __mutex_init(mutex_t *mp, int type, void *arg) 134 { 135 int basetype = (type & ~ALL_ATTRIBUTES); 136 const pcclass_t *pccp; 137 int error = 0; 138 int ceil; 139 140 if (basetype == USYNC_PROCESS_ROBUST) { 141 /* 142 * USYNC_PROCESS_ROBUST is a deprecated historical type. 143 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 144 * retain the USYNC_PROCESS_ROBUST flag so we can return 145 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 146 * mutexes will ever draw ELOCKUNMAPPED). 147 */ 148 type |= (USYNC_PROCESS | LOCK_ROBUST); 149 basetype = USYNC_PROCESS; 150 } 151 152 if (type & LOCK_PRIO_PROTECT) 153 pccp = get_info_by_policy(SCHED_FIFO); 154 if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) || 155 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 156 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) || 157 ((type & LOCK_PRIO_PROTECT) && 158 ((ceil = *(int *)arg) < pccp->pcc_primin || 159 ceil > pccp->pcc_primax))) { 160 error = EINVAL; 161 } else if (type & LOCK_ROBUST) { 162 /* 163 * Callers of mutex_init() with the LOCK_ROBUST attribute 164 * are required to pass an initially all-zero mutex. 165 * Multiple calls to mutex_init() are allowed; all but 166 * the first return EBUSY. A call to mutex_init() is 167 * allowed to make an inconsistent robust lock consistent 168 * (for historical usage, even though the proper interface 169 * for this is mutex_consistent()). Note that we use 170 * atomic_or_16() to set the LOCK_INITED flag so as 171 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 172 */ 173 extern void _atomic_or_16(volatile uint16_t *, uint16_t); 174 if (!(mp->mutex_flag & LOCK_INITED)) { 175 mp->mutex_type = (uint8_t)type; 176 _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 177 mp->mutex_magic = MUTEX_MAGIC; 178 } else if (type != mp->mutex_type || 179 ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) { 180 error = EINVAL; 181 } else if (__mutex_consistent(mp) != 0) { 182 error = EBUSY; 183 } 184 /* register a process robust mutex with the kernel */ 185 if (basetype == USYNC_PROCESS) 186 register_lock(mp); 187 } else { 188 (void) _memset(mp, 0, sizeof (*mp)); 189 mp->mutex_type = (uint8_t)type; 190 mp->mutex_flag = LOCK_INITED; 191 mp->mutex_magic = MUTEX_MAGIC; 192 } 193 194 if (error == 0 && (type & LOCK_PRIO_PROTECT)) { 195 mp->mutex_ceiling = ceil; 196 } 197 198 return (error); 199 } 200 201 /* 202 * Delete mp from list of ceiling mutexes owned by curthread. 203 * Return 1 if the head of the chain was updated. 204 */ 205 int 206 _ceil_mylist_del(mutex_t *mp) 207 { 208 ulwp_t *self = curthread; 209 mxchain_t **mcpp; 210 mxchain_t *mcp; 211 212 for (mcpp = &self->ul_mxchain; 213 (mcp = *mcpp) != NULL; 214 mcpp = &mcp->mxchain_next) { 215 if (mcp->mxchain_mx == mp) { 216 *mcpp = mcp->mxchain_next; 217 lfree(mcp, sizeof (*mcp)); 218 return (mcpp == &self->ul_mxchain); 219 } 220 } 221 return (0); 222 } 223 224 /* 225 * Add mp to the list of ceiling mutexes owned by curthread. 226 * Return ENOMEM if no memory could be allocated. 227 */ 228 int 229 _ceil_mylist_add(mutex_t *mp) 230 { 231 ulwp_t *self = curthread; 232 mxchain_t *mcp; 233 234 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 235 return (ENOMEM); 236 mcp->mxchain_mx = mp; 237 mcp->mxchain_next = self->ul_mxchain; 238 self->ul_mxchain = mcp; 239 return (0); 240 } 241 242 /* 243 * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below. 244 */ 245 static void 246 set_rt_priority(ulwp_t *self, int prio) 247 { 248 pcparms_t pcparm; 249 250 pcparm.pc_cid = self->ul_rtclassid; 251 ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; 252 ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; 253 (void) _private_priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm); 254 } 255 256 /* 257 * Inherit priority from ceiling. 258 * This changes the effective priority, not the assigned priority. 259 */ 260 void 261 _ceil_prio_inherit(int prio) 262 { 263 ulwp_t *self = curthread; 264 265 self->ul_epri = prio; 266 set_rt_priority(self, prio); 267 } 268 269 /* 270 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 271 * if holding at least one ceiling lock. If no ceiling locks are held at this 272 * point, disinherit completely, reverting back to assigned priority. 273 */ 274 void 275 _ceil_prio_waive(void) 276 { 277 ulwp_t *self = curthread; 278 mxchain_t *mcp = self->ul_mxchain; 279 int prio; 280 281 if (mcp == NULL) { 282 prio = self->ul_pri; 283 self->ul_epri = 0; 284 } else { 285 prio = mcp->mxchain_mx->mutex_ceiling; 286 self->ul_epri = prio; 287 } 288 set_rt_priority(self, prio); 289 } 290 291 /* 292 * Clear the lock byte. Retain the waiters byte and the spinners byte. 293 * Return the old value of the lock word. 294 */ 295 static uint32_t 296 clear_lockbyte(volatile uint32_t *lockword) 297 { 298 uint32_t old; 299 uint32_t new; 300 301 do { 302 old = *lockword; 303 new = old & ~LOCKMASK; 304 } while (atomic_cas_32(lockword, old, new) != old); 305 306 return (old); 307 } 308 309 /* 310 * Same as clear_lockbyte(), but operates on mutex_lockword64. 311 * The mutex_ownerpid field is cleared along with the lock byte. 312 */ 313 static uint64_t 314 clear_lockbyte64(volatile uint64_t *lockword64) 315 { 316 uint64_t old; 317 uint64_t new; 318 319 do { 320 old = *lockword64; 321 new = old & ~LOCKMASK64; 322 } while (atomic_cas_64(lockword64, old, new) != old); 323 324 return (old); 325 } 326 327 /* 328 * Similar to set_lock_byte(), which only tries to set the lock byte. 329 * Here, we attempt to set the lock byte AND the mutex_ownerpid, 330 * keeping the remaining bytes constant. 331 */ 332 static int 333 set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid) 334 { 335 uint64_t old; 336 uint64_t new; 337 338 old = *lockword64 & ~LOCKMASK64; 339 new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64; 340 if (atomic_cas_64(lockword64, old, new) == old) 341 return (LOCKCLEAR); 342 343 return (LOCKSET); 344 } 345 346 /* 347 * Increment the spinners count in the mutex lock word. 348 * Return 0 on success. Return -1 if the count would overflow. 349 */ 350 static int 351 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 352 { 353 uint32_t old; 354 uint32_t new; 355 356 do { 357 old = *lockword; 358 if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 359 return (-1); 360 new = old + (1 << SPINNERSHIFT); 361 } while (atomic_cas_32(lockword, old, new) != old); 362 363 return (0); 364 } 365 366 /* 367 * Decrement the spinners count in the mutex lock word. 368 * Return the new value of the lock word. 369 */ 370 static uint32_t 371 spinners_decr(volatile uint32_t *lockword) 372 { 373 uint32_t old; 374 uint32_t new; 375 376 do { 377 new = old = *lockword; 378 if (new & SPINNERMASK) 379 new -= (1 << SPINNERSHIFT); 380 } while (atomic_cas_32(lockword, old, new) != old); 381 382 return (new); 383 } 384 385 /* 386 * Non-preemptive spin locks. Used by queue_lock(). 387 * No lock statistics are gathered for these locks. 388 * No DTrace probes are provided for these locks. 389 */ 390 void 391 spin_lock_set(mutex_t *mp) 392 { 393 ulwp_t *self = curthread; 394 395 no_preempt(self); 396 if (set_lock_byte(&mp->mutex_lockw) == 0) { 397 mp->mutex_owner = (uintptr_t)self; 398 return; 399 } 400 /* 401 * Spin for a while, attempting to acquire the lock. 402 */ 403 INCR32(self->ul_spin_lock_spin); 404 if (mutex_queuelock_adaptive(mp) == 0 || 405 set_lock_byte(&mp->mutex_lockw) == 0) { 406 mp->mutex_owner = (uintptr_t)self; 407 return; 408 } 409 /* 410 * Try harder if we were previously at a no premption level. 411 */ 412 if (self->ul_preempt > 1) { 413 INCR32(self->ul_spin_lock_spin2); 414 if (mutex_queuelock_adaptive(mp) == 0 || 415 set_lock_byte(&mp->mutex_lockw) == 0) { 416 mp->mutex_owner = (uintptr_t)self; 417 return; 418 } 419 } 420 /* 421 * Give up and block in the kernel for the mutex. 422 */ 423 INCR32(self->ul_spin_lock_sleep); 424 (void) ___lwp_mutex_timedlock(mp, NULL); 425 mp->mutex_owner = (uintptr_t)self; 426 } 427 428 void 429 spin_lock_clear(mutex_t *mp) 430 { 431 ulwp_t *self = curthread; 432 433 mp->mutex_owner = 0; 434 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 435 (void) ___lwp_mutex_wakeup(mp, 0); 436 INCR32(self->ul_spin_lock_wakeup); 437 } 438 preempt(self); 439 } 440 441 /* 442 * Allocate the sleep queue hash table. 443 */ 444 void 445 queue_alloc(void) 446 { 447 ulwp_t *self = curthread; 448 uberdata_t *udp = self->ul_uberdata; 449 queue_head_t *qp; 450 void *data; 451 int i; 452 453 /* 454 * No locks are needed; we call here only when single-threaded. 455 */ 456 ASSERT(self == udp->ulwp_one); 457 ASSERT(!udp->uberflags.uf_mt); 458 if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 459 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 460 == MAP_FAILED) 461 thr_panic("cannot allocate thread queue_head table"); 462 udp->queue_head = qp = (queue_head_t *)data; 463 for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { 464 qp->qh_type = (i < QHASHSIZE)? MX : CV; 465 qp->qh_lock.mutex_flag = LOCK_INITED; 466 qp->qh_lock.mutex_magic = MUTEX_MAGIC; 467 qp->qh_hlist = &qp->qh_def_root; 468 #if defined(THREAD_DEBUG) 469 qp->qh_hlen = 1; 470 qp->qh_hmax = 1; 471 #endif 472 } 473 } 474 475 #if defined(THREAD_DEBUG) 476 477 /* 478 * Debugging: verify correctness of a sleep queue. 479 */ 480 void 481 QVERIFY(queue_head_t *qp) 482 { 483 ulwp_t *self = curthread; 484 uberdata_t *udp = self->ul_uberdata; 485 queue_root_t *qrp; 486 ulwp_t *ulwp; 487 ulwp_t *prev; 488 uint_t index; 489 uint32_t cnt; 490 char qtype; 491 void *wchan; 492 493 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 494 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 495 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 496 cnt++; 497 ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) || 498 (qrp->qr_head == NULL && qrp->qr_tail == NULL)); 499 } 500 ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt); 501 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 502 ASSERT(qp->qh_type == qtype); 503 if (!thread_queue_verify) 504 return; 505 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 506 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 507 for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL; 508 prev = ulwp, ulwp = ulwp->ul_link) { 509 cnt++; 510 if (ulwp->ul_writer) 511 ASSERT(prev == NULL || prev->ul_writer); 512 ASSERT(ulwp->ul_qtype == qtype); 513 ASSERT(ulwp->ul_wchan != NULL); 514 ASSERT(ulwp->ul_sleepq == qp); 515 wchan = ulwp->ul_wchan; 516 ASSERT(qrp->qr_wchan == wchan); 517 index = QUEUE_HASH(wchan, qtype); 518 ASSERT(&udp->queue_head[index] == qp); 519 } 520 ASSERT(qrp->qr_tail == prev); 521 } 522 ASSERT(qp->qh_qlen == cnt); 523 } 524 525 #else /* THREAD_DEBUG */ 526 527 #define QVERIFY(qp) 528 529 #endif /* THREAD_DEBUG */ 530 531 /* 532 * Acquire a queue head. 533 */ 534 queue_head_t * 535 queue_lock(void *wchan, int qtype) 536 { 537 uberdata_t *udp = curthread->ul_uberdata; 538 queue_head_t *qp; 539 queue_root_t *qrp; 540 541 ASSERT(qtype == MX || qtype == CV); 542 543 /* 544 * It is possible that we could be called while still single-threaded. 545 * If so, we call queue_alloc() to allocate the queue_head[] array. 546 */ 547 if ((qp = udp->queue_head) == NULL) { 548 queue_alloc(); 549 qp = udp->queue_head; 550 } 551 qp += QUEUE_HASH(wchan, qtype); 552 spin_lock_set(&qp->qh_lock); 553 for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) 554 if (qrp->qr_wchan == wchan) 555 break; 556 if (qrp == NULL && qp->qh_def_root.qr_head == NULL) { 557 /* the default queue root is available; use it */ 558 qrp = &qp->qh_def_root; 559 qrp->qr_wchan = wchan; 560 ASSERT(qrp->qr_next == NULL); 561 ASSERT(qrp->qr_tail == NULL && 562 qrp->qr_rtcount == 0 && qrp->qr_qlen == 0); 563 } 564 qp->qh_wchan = wchan; /* valid until queue_unlock() is called */ 565 qp->qh_root = qrp; /* valid until queue_unlock() is called */ 566 INCR32(qp->qh_lockcount); 567 QVERIFY(qp); 568 return (qp); 569 } 570 571 /* 572 * Release a queue head. 573 */ 574 void 575 queue_unlock(queue_head_t *qp) 576 { 577 QVERIFY(qp); 578 spin_lock_clear(&qp->qh_lock); 579 } 580 581 /* 582 * For rwlock queueing, we must queue writers ahead of readers of the 583 * same priority. We do this by making writers appear to have a half 584 * point higher priority for purposes of priority comparisons below. 585 */ 586 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 587 588 void 589 enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo) 590 { 591 queue_root_t *qrp; 592 ulwp_t **ulwpp; 593 ulwp_t *next; 594 int pri = CMP_PRIO(ulwp); 595 596 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 597 ASSERT(ulwp->ul_sleepq != qp); 598 599 if ((qrp = qp->qh_root) == NULL) { 600 /* use the thread's queue root for the linkage */ 601 qrp = &ulwp->ul_queue_root; 602 qrp->qr_next = qp->qh_hlist; 603 qrp->qr_prev = NULL; 604 qrp->qr_head = NULL; 605 qrp->qr_tail = NULL; 606 qrp->qr_wchan = qp->qh_wchan; 607 qrp->qr_rtcount = 0; 608 qrp->qr_qlen = 0; 609 qrp->qr_qmax = 0; 610 qp->qh_hlist->qr_prev = qrp; 611 qp->qh_hlist = qrp; 612 qp->qh_root = qrp; 613 MAXINCR(qp->qh_hmax, qp->qh_hlen); 614 } 615 616 /* 617 * LIFO queue ordering is unfair and can lead to starvation, 618 * but it gives better performance for heavily contended locks. 619 * We use thread_queue_fifo (range is 0..8) to determine 620 * the frequency of FIFO vs LIFO queuing: 621 * 0 : every 256th time (almost always LIFO) 622 * 1 : every 128th time 623 * 2 : every 64th time 624 * 3 : every 32nd time 625 * 4 : every 16th time (the default value, mostly LIFO) 626 * 5 : every 8th time 627 * 6 : every 4th time 628 * 7 : every 2nd time 629 * 8 : every time (never LIFO, always FIFO) 630 * Note that there is always some degree of FIFO ordering. 631 * This breaks live lock conditions that occur in applications 632 * that are written assuming (incorrectly) that threads acquire 633 * locks fairly, that is, in roughly round-robin order. 634 * In any event, the queue is maintained in kernel priority order. 635 * 636 * If force_fifo is non-zero, fifo queueing is forced. 637 * SUSV3 requires this for semaphores. 638 */ 639 if (qrp->qr_head == NULL) { 640 /* 641 * The queue is empty. LIFO/FIFO doesn't matter. 642 */ 643 ASSERT(qrp->qr_tail == NULL); 644 ulwpp = &qrp->qr_head; 645 } else if (force_fifo | 646 (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) { 647 /* 648 * Enqueue after the last thread whose priority is greater 649 * than or equal to the priority of the thread being queued. 650 * Attempt first to go directly onto the tail of the queue. 651 */ 652 if (pri <= CMP_PRIO(qrp->qr_tail)) 653 ulwpp = &qrp->qr_tail->ul_link; 654 else { 655 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 656 ulwpp = &next->ul_link) 657 if (pri > CMP_PRIO(next)) 658 break; 659 } 660 } else { 661 /* 662 * Enqueue before the first thread whose priority is less 663 * than or equal to the priority of the thread being queued. 664 * Hopefully we can go directly onto the head of the queue. 665 */ 666 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 667 ulwpp = &next->ul_link) 668 if (pri >= CMP_PRIO(next)) 669 break; 670 } 671 if ((ulwp->ul_link = *ulwpp) == NULL) 672 qrp->qr_tail = ulwp; 673 *ulwpp = ulwp; 674 675 ulwp->ul_sleepq = qp; 676 ulwp->ul_wchan = qp->qh_wchan; 677 ulwp->ul_qtype = qp->qh_type; 678 if ((ulwp->ul_schedctl != NULL && 679 ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) | 680 ulwp->ul_pilocks) { 681 ulwp->ul_rtqueued = 1; 682 qrp->qr_rtcount++; 683 } 684 MAXINCR(qrp->qr_qmax, qrp->qr_qlen); 685 MAXINCR(qp->qh_qmax, qp->qh_qlen); 686 } 687 688 /* 689 * Helper function for queue_slot() and queue_slot_rt(). 690 * Try to find a non-suspended thread on the queue. 691 */ 692 static ulwp_t ** 693 queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt) 694 { 695 ulwp_t *ulwp; 696 ulwp_t **foundpp = NULL; 697 int priority = -1; 698 ulwp_t *prev; 699 int tpri; 700 701 for (prev = NULL; 702 (ulwp = *ulwpp) != NULL; 703 prev = ulwp, ulwpp = &ulwp->ul_link) { 704 if (ulwp->ul_stop) /* skip suspended threads */ 705 continue; 706 tpri = rt? CMP_PRIO(ulwp) : 0; 707 if (tpri > priority) { 708 foundpp = ulwpp; 709 *prevp = prev; 710 priority = tpri; 711 if (!rt) 712 break; 713 } 714 } 715 return (foundpp); 716 } 717 718 /* 719 * For real-time, we search the entire queue because the dispatch 720 * (kernel) priorities may have changed since enqueueing. 721 */ 722 static ulwp_t ** 723 queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp) 724 { 725 ulwp_t **ulwpp = ulwpp_org; 726 ulwp_t *ulwp = *ulwpp; 727 ulwp_t **foundpp = ulwpp; 728 int priority = CMP_PRIO(ulwp); 729 ulwp_t *prev; 730 int tpri; 731 732 for (prev = ulwp, ulwpp = &ulwp->ul_link; 733 (ulwp = *ulwpp) != NULL; 734 prev = ulwp, ulwpp = &ulwp->ul_link) { 735 tpri = CMP_PRIO(ulwp); 736 if (tpri > priority) { 737 foundpp = ulwpp; 738 *prevp = prev; 739 priority = tpri; 740 } 741 } 742 ulwp = *foundpp; 743 744 /* 745 * Try not to return a suspended thread. 746 * This mimics the old libthread's behavior. 747 */ 748 if (ulwp->ul_stop && 749 (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) { 750 foundpp = ulwpp; 751 ulwp = *foundpp; 752 } 753 ulwp->ul_rt = 1; 754 return (foundpp); 755 } 756 757 ulwp_t ** 758 queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more) 759 { 760 queue_root_t *qrp; 761 ulwp_t **ulwpp; 762 ulwp_t *ulwp; 763 int rt; 764 765 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 766 767 if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) { 768 *more = 0; 769 return (NULL); /* no lwps on the queue */ 770 } 771 rt = (qrp->qr_rtcount != 0); 772 *prevp = NULL; 773 if (ulwp->ul_link == NULL) { /* only one lwp on the queue */ 774 *more = 0; 775 ulwp->ul_rt = rt; 776 return (&qrp->qr_head); 777 } 778 *more = 1; 779 780 if (rt) /* real-time queue */ 781 return (queue_slot_rt(&qrp->qr_head, prevp)); 782 /* 783 * Try not to return a suspended thread. 784 * This mimics the old libthread's behavior. 785 */ 786 if (ulwp->ul_stop && 787 (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) { 788 ulwp = *ulwpp; 789 ulwp->ul_rt = 0; 790 return (ulwpp); 791 } 792 /* 793 * The common case; just pick the first thread on the queue. 794 */ 795 ulwp->ul_rt = 0; 796 return (&qrp->qr_head); 797 } 798 799 /* 800 * Common code for unlinking an lwp from a user-level sleep queue. 801 */ 802 void 803 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 804 { 805 queue_root_t *qrp = qp->qh_root; 806 queue_root_t *nqrp; 807 ulwp_t *ulwp = *ulwpp; 808 ulwp_t *next; 809 810 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 811 ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan); 812 813 DECR(qp->qh_qlen); 814 DECR(qrp->qr_qlen); 815 if (ulwp->ul_rtqueued) { 816 ulwp->ul_rtqueued = 0; 817 qrp->qr_rtcount--; 818 } 819 next = ulwp->ul_link; 820 *ulwpp = next; 821 ulwp->ul_link = NULL; 822 if (qrp->qr_tail == ulwp) 823 qrp->qr_tail = prev; 824 if (qrp == &ulwp->ul_queue_root) { 825 /* 826 * We can't continue to use the unlinked thread's 827 * queue root for the linkage. 828 */ 829 queue_root_t *qr_next = qrp->qr_next; 830 queue_root_t *qr_prev = qrp->qr_prev; 831 832 if (qrp->qr_tail) { 833 /* switch to using the last thread's queue root */ 834 ASSERT(qrp->qr_qlen != 0); 835 nqrp = &qrp->qr_tail->ul_queue_root; 836 *nqrp = *qrp; 837 if (qr_next) 838 qr_next->qr_prev = nqrp; 839 if (qr_prev) 840 qr_prev->qr_next = nqrp; 841 else 842 qp->qh_hlist = nqrp; 843 qp->qh_root = nqrp; 844 } else { 845 /* empty queue root; just delete from the hash list */ 846 ASSERT(qrp->qr_qlen == 0); 847 if (qr_next) 848 qr_next->qr_prev = qr_prev; 849 if (qr_prev) 850 qr_prev->qr_next = qr_next; 851 else 852 qp->qh_hlist = qr_next; 853 qp->qh_root = NULL; 854 DECR(qp->qh_hlen); 855 } 856 } 857 } 858 859 ulwp_t * 860 dequeue(queue_head_t *qp, int *more) 861 { 862 ulwp_t **ulwpp; 863 ulwp_t *ulwp; 864 ulwp_t *prev; 865 866 if ((ulwpp = queue_slot(qp, &prev, more)) == NULL) 867 return (NULL); 868 ulwp = *ulwpp; 869 queue_unlink(qp, ulwpp, prev); 870 ulwp->ul_sleepq = NULL; 871 ulwp->ul_wchan = NULL; 872 return (ulwp); 873 } 874 875 /* 876 * Return a pointer to the highest priority thread sleeping on wchan. 877 */ 878 ulwp_t * 879 queue_waiter(queue_head_t *qp) 880 { 881 ulwp_t **ulwpp; 882 ulwp_t *prev; 883 int more; 884 885 if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL) 886 return (NULL); 887 return (*ulwpp); 888 } 889 890 int 891 dequeue_self(queue_head_t *qp) 892 { 893 ulwp_t *self = curthread; 894 queue_root_t *qrp; 895 ulwp_t **ulwpp; 896 ulwp_t *ulwp; 897 ulwp_t *prev; 898 int found = 0; 899 900 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 901 902 /* find self on the sleep queue */ 903 if ((qrp = qp->qh_root) != NULL) { 904 for (prev = NULL, ulwpp = &qrp->qr_head; 905 (ulwp = *ulwpp) != NULL; 906 prev = ulwp, ulwpp = &ulwp->ul_link) { 907 if (ulwp == self) { 908 queue_unlink(qp, ulwpp, prev); 909 self->ul_cvmutex = NULL; 910 self->ul_sleepq = NULL; 911 self->ul_wchan = NULL; 912 found = 1; 913 break; 914 } 915 } 916 } 917 918 if (!found) 919 thr_panic("dequeue_self(): curthread not found on queue"); 920 921 return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL); 922 } 923 924 /* 925 * Called from call_user_handler() and _thrp_suspend() to take 926 * ourself off of our sleep queue so we can grab locks. 927 */ 928 void 929 unsleep_self(void) 930 { 931 ulwp_t *self = curthread; 932 queue_head_t *qp; 933 934 /* 935 * Calling enter_critical()/exit_critical() here would lead 936 * to recursion. Just manipulate self->ul_critical directly. 937 */ 938 self->ul_critical++; 939 while (self->ul_sleepq != NULL) { 940 qp = queue_lock(self->ul_wchan, self->ul_qtype); 941 /* 942 * We may have been moved from a CV queue to a 943 * mutex queue while we were attempting queue_lock(). 944 * If so, just loop around and try again. 945 * dequeue_self() clears self->ul_sleepq. 946 */ 947 if (qp == self->ul_sleepq) 948 (void) dequeue_self(qp); 949 queue_unlock(qp); 950 } 951 self->ul_writer = 0; 952 self->ul_critical--; 953 } 954 955 /* 956 * Common code for calling the the ___lwp_mutex_timedlock() system call. 957 * Returns with mutex_owner and mutex_ownerpid set correctly. 958 */ 959 static int 960 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 961 { 962 ulwp_t *self = curthread; 963 uberdata_t *udp = self->ul_uberdata; 964 int mtype = mp->mutex_type; 965 hrtime_t begin_sleep; 966 int acquired; 967 int error; 968 969 self->ul_sp = stkptr(); 970 self->ul_wchan = mp; 971 if (__td_event_report(self, TD_SLEEP, udp)) { 972 self->ul_td_evbuf.eventnum = TD_SLEEP; 973 self->ul_td_evbuf.eventdata = mp; 974 tdb_event(TD_SLEEP, udp); 975 } 976 if (msp) { 977 tdb_incr(msp->mutex_sleep); 978 begin_sleep = gethrtime(); 979 } 980 981 DTRACE_PROBE1(plockstat, mutex__block, mp); 982 983 for (;;) { 984 /* 985 * A return value of EOWNERDEAD or ELOCKUNMAPPED 986 * means we successfully acquired the lock. 987 */ 988 if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 989 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 990 acquired = 0; 991 break; 992 } 993 994 if (mtype & USYNC_PROCESS) { 995 /* 996 * Defend against forkall(). We may be the child, 997 * in which case we don't actually own the mutex. 998 */ 999 enter_critical(self); 1000 if (mp->mutex_ownerpid == udp->pid) { 1001 mp->mutex_owner = (uintptr_t)self; 1002 exit_critical(self); 1003 acquired = 1; 1004 break; 1005 } 1006 exit_critical(self); 1007 } else { 1008 mp->mutex_owner = (uintptr_t)self; 1009 acquired = 1; 1010 break; 1011 } 1012 } 1013 if (msp) 1014 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1015 self->ul_wchan = NULL; 1016 self->ul_sp = 0; 1017 1018 if (acquired) { 1019 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1020 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1021 } else { 1022 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1023 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1024 } 1025 1026 return (error); 1027 } 1028 1029 /* 1030 * Common code for calling the ___lwp_mutex_trylock() system call. 1031 * Returns with mutex_owner and mutex_ownerpid set correctly. 1032 */ 1033 int 1034 mutex_trylock_kernel(mutex_t *mp) 1035 { 1036 ulwp_t *self = curthread; 1037 uberdata_t *udp = self->ul_uberdata; 1038 int mtype = mp->mutex_type; 1039 int error; 1040 int acquired; 1041 1042 for (;;) { 1043 /* 1044 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1045 * means we successfully acquired the lock. 1046 */ 1047 if ((error = ___lwp_mutex_trylock(mp)) != 0 && 1048 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1049 acquired = 0; 1050 break; 1051 } 1052 1053 if (mtype & USYNC_PROCESS) { 1054 /* 1055 * Defend against forkall(). We may be the child, 1056 * in which case we don't actually own the mutex. 1057 */ 1058 enter_critical(self); 1059 if (mp->mutex_ownerpid == udp->pid) { 1060 mp->mutex_owner = (uintptr_t)self; 1061 exit_critical(self); 1062 acquired = 1; 1063 break; 1064 } 1065 exit_critical(self); 1066 } else { 1067 mp->mutex_owner = (uintptr_t)self; 1068 acquired = 1; 1069 break; 1070 } 1071 } 1072 1073 if (acquired) { 1074 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1075 } else if (error != EBUSY) { 1076 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1077 } 1078 1079 return (error); 1080 } 1081 1082 volatile sc_shared_t * 1083 setup_schedctl(void) 1084 { 1085 ulwp_t *self = curthread; 1086 volatile sc_shared_t *scp; 1087 sc_shared_t *tmp; 1088 1089 if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 1090 !self->ul_vfork && /* not a child of vfork() */ 1091 !self->ul_schedctl_called) { /* haven't been called before */ 1092 enter_critical(self); 1093 self->ul_schedctl_called = &self->ul_uberdata->uberflags; 1094 if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 1095 self->ul_schedctl = scp = tmp; 1096 exit_critical(self); 1097 } 1098 /* 1099 * Unless the call to setup_schedctl() is surrounded 1100 * by enter_critical()/exit_critical(), the address 1101 * we are returning could be invalid due to a forkall() 1102 * having occurred in another thread. 1103 */ 1104 return (scp); 1105 } 1106 1107 /* 1108 * Interfaces from libsched, incorporated into libc. 1109 * libsched.so.1 is now a filter library onto libc. 1110 */ 1111 #pragma weak schedctl_lookup = _schedctl_init 1112 #pragma weak _schedctl_lookup = _schedctl_init 1113 #pragma weak schedctl_init = _schedctl_init 1114 schedctl_t * 1115 _schedctl_init(void) 1116 { 1117 volatile sc_shared_t *scp = setup_schedctl(); 1118 return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 1119 } 1120 1121 #pragma weak schedctl_exit = _schedctl_exit 1122 void 1123 _schedctl_exit(void) 1124 { 1125 } 1126 1127 /* 1128 * Contract private interface for java. 1129 * Set up the schedctl data if it doesn't exist yet. 1130 * Return a pointer to the pointer to the schedctl data. 1131 */ 1132 volatile sc_shared_t *volatile * 1133 _thr_schedctl(void) 1134 { 1135 ulwp_t *self = curthread; 1136 volatile sc_shared_t *volatile *ptr; 1137 1138 if (self->ul_vfork) 1139 return (NULL); 1140 if (*(ptr = &self->ul_schedctl) == NULL) 1141 (void) setup_schedctl(); 1142 return (ptr); 1143 } 1144 1145 /* 1146 * Block signals and attempt to block preemption. 1147 * no_preempt()/preempt() must be used in pairs but can be nested. 1148 */ 1149 void 1150 no_preempt(ulwp_t *self) 1151 { 1152 volatile sc_shared_t *scp; 1153 1154 if (self->ul_preempt++ == 0) { 1155 enter_critical(self); 1156 if ((scp = self->ul_schedctl) != NULL || 1157 (scp = setup_schedctl()) != NULL) { 1158 /* 1159 * Save the pre-existing preempt value. 1160 */ 1161 self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 1162 scp->sc_preemptctl.sc_nopreempt = 1; 1163 } 1164 } 1165 } 1166 1167 /* 1168 * Undo the effects of no_preempt(). 1169 */ 1170 void 1171 preempt(ulwp_t *self) 1172 { 1173 volatile sc_shared_t *scp; 1174 1175 ASSERT(self->ul_preempt > 0); 1176 if (--self->ul_preempt == 0) { 1177 if ((scp = self->ul_schedctl) != NULL) { 1178 /* 1179 * Restore the pre-existing preempt value. 1180 */ 1181 scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 1182 if (scp->sc_preemptctl.sc_yield && 1183 scp->sc_preemptctl.sc_nopreempt == 0) { 1184 lwp_yield(); 1185 if (scp->sc_preemptctl.sc_yield) { 1186 /* 1187 * Shouldn't happen. This is either 1188 * a race condition or the thread 1189 * just entered the real-time class. 1190 */ 1191 lwp_yield(); 1192 scp->sc_preemptctl.sc_yield = 0; 1193 } 1194 } 1195 } 1196 exit_critical(self); 1197 } 1198 } 1199 1200 /* 1201 * If a call to preempt() would cause the current thread to yield or to 1202 * take deferred actions in exit_critical(), then unpark the specified 1203 * lwp so it can run while we delay. Return the original lwpid if the 1204 * unpark was not performed, else return zero. The tests are a repeat 1205 * of some of the tests in preempt(), above. This is a statistical 1206 * optimization solely for cond_sleep_queue(), below. 1207 */ 1208 static lwpid_t 1209 preempt_unpark(ulwp_t *self, lwpid_t lwpid) 1210 { 1211 volatile sc_shared_t *scp = self->ul_schedctl; 1212 1213 ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 1214 if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 1215 (self->ul_curplease && self->ul_critical == 1)) { 1216 (void) __lwp_unpark(lwpid); 1217 lwpid = 0; 1218 } 1219 return (lwpid); 1220 } 1221 1222 /* 1223 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1224 * If this fails, return EBUSY and let the caller deal with it. 1225 * If this succeeds, return 0 with mutex_owner set to curthread. 1226 */ 1227 static int 1228 mutex_trylock_adaptive(mutex_t *mp, int tryhard) 1229 { 1230 ulwp_t *self = curthread; 1231 int error = EBUSY; 1232 ulwp_t *ulwp; 1233 volatile sc_shared_t *scp; 1234 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1235 volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 1236 uint32_t new_lockword; 1237 int count = 0; 1238 int max_count; 1239 uint8_t max_spinners; 1240 1241 ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 1242 1243 if (MUTEX_OWNER(mp) == self) 1244 return (EBUSY); 1245 1246 /* short-cut, not definitive (see below) */ 1247 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1248 ASSERT(mp->mutex_type & LOCK_ROBUST); 1249 error = ENOTRECOVERABLE; 1250 goto done; 1251 } 1252 1253 /* 1254 * Make one attempt to acquire the lock before 1255 * incurring the overhead of the spin loop. 1256 */ 1257 if (set_lock_byte(lockp) == 0) { 1258 *ownerp = (uintptr_t)self; 1259 error = 0; 1260 goto done; 1261 } 1262 if (!tryhard) 1263 goto done; 1264 if (ncpus == 0) 1265 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1266 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1267 max_spinners = ncpus - 1; 1268 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1269 if (max_count == 0) 1270 goto done; 1271 1272 /* 1273 * This spin loop is unfair to lwps that have already dropped into 1274 * the kernel to sleep. They will starve on a highly-contended mutex. 1275 * This is just too bad. The adaptive spin algorithm is intended 1276 * to allow programs with highly-contended locks (that is, broken 1277 * programs) to execute with reasonable speed despite their contention. 1278 * Being fair would reduce the speed of such programs and well-written 1279 * programs will not suffer in any case. 1280 */ 1281 enter_critical(self); 1282 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 1283 exit_critical(self); 1284 goto done; 1285 } 1286 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1287 for (count = 1; ; count++) { 1288 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1289 *ownerp = (uintptr_t)self; 1290 error = 0; 1291 break; 1292 } 1293 if (count == max_count) 1294 break; 1295 SMT_PAUSE(); 1296 /* 1297 * Stop spinning if the mutex owner is not running on 1298 * a processor; it will not drop the lock any time soon 1299 * and we would just be wasting time to keep spinning. 1300 * 1301 * Note that we are looking at another thread (ulwp_t) 1302 * without ensuring that the other thread does not exit. 1303 * The scheme relies on ulwp_t structures never being 1304 * deallocated by the library (the library employs a free 1305 * list of ulwp_t structs that are reused when new threads 1306 * are created) and on schedctl shared memory never being 1307 * deallocated once created via __schedctl(). 1308 * 1309 * Thus, the worst that can happen when the spinning thread 1310 * looks at the owner's schedctl data is that it is looking 1311 * at some other thread's schedctl data. This almost never 1312 * happens and is benign when it does. 1313 */ 1314 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1315 ((scp = ulwp->ul_schedctl) == NULL || 1316 scp->sc_state != SC_ONPROC)) 1317 break; 1318 } 1319 new_lockword = spinners_decr(&mp->mutex_lockword); 1320 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1321 /* 1322 * We haven't yet acquired the lock, the lock 1323 * is free, and there are no other spinners. 1324 * Make one final attempt to acquire the lock. 1325 * 1326 * This isn't strictly necessary since mutex_lock_queue() 1327 * (the next action this thread will take if it doesn't 1328 * acquire the lock here) makes one attempt to acquire 1329 * the lock before putting the thread to sleep. 1330 * 1331 * If the next action for this thread (on failure here) 1332 * were not to call mutex_lock_queue(), this would be 1333 * necessary for correctness, to avoid ending up with an 1334 * unheld mutex with waiters but no one to wake them up. 1335 */ 1336 if (set_lock_byte(lockp) == 0) { 1337 *ownerp = (uintptr_t)self; 1338 error = 0; 1339 } 1340 count++; 1341 } 1342 exit_critical(self); 1343 1344 done: 1345 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1346 ASSERT(mp->mutex_type & LOCK_ROBUST); 1347 /* 1348 * We shouldn't own the mutex. 1349 * Just clear the lock; everyone has already been waked up. 1350 */ 1351 mp->mutex_owner = 0; 1352 (void) clear_lockbyte(&mp->mutex_lockword); 1353 error = ENOTRECOVERABLE; 1354 } 1355 1356 if (error) { 1357 if (count) { 1358 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1359 } 1360 if (error != EBUSY) { 1361 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1362 } 1363 } else { 1364 if (count) { 1365 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1366 } 1367 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1368 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1369 ASSERT(mp->mutex_type & LOCK_ROBUST); 1370 error = EOWNERDEAD; 1371 } 1372 } 1373 1374 return (error); 1375 } 1376 1377 /* 1378 * Same as mutex_trylock_adaptive(), except specifically for queue locks. 1379 * The owner field is not set here; the caller (spin_lock_set()) sets it. 1380 */ 1381 static int 1382 mutex_queuelock_adaptive(mutex_t *mp) 1383 { 1384 ulwp_t *ulwp; 1385 volatile sc_shared_t *scp; 1386 volatile uint8_t *lockp; 1387 volatile uint64_t *ownerp; 1388 int count = curthread->ul_queue_spin; 1389 1390 ASSERT(mp->mutex_type == USYNC_THREAD); 1391 1392 if (count == 0) 1393 return (EBUSY); 1394 1395 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1396 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1397 while (--count >= 0) { 1398 if (*lockp == 0 && set_lock_byte(lockp) == 0) 1399 return (0); 1400 SMT_PAUSE(); 1401 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1402 ((scp = ulwp->ul_schedctl) == NULL || 1403 scp->sc_state != SC_ONPROC)) 1404 break; 1405 } 1406 1407 return (EBUSY); 1408 } 1409 1410 /* 1411 * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1412 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1413 * If this fails, return EBUSY and let the caller deal with it. 1414 * If this succeeds, return 0 with mutex_owner set to curthread 1415 * and mutex_ownerpid set to the current pid. 1416 */ 1417 static int 1418 mutex_trylock_process(mutex_t *mp, int tryhard) 1419 { 1420 ulwp_t *self = curthread; 1421 uberdata_t *udp = self->ul_uberdata; 1422 int error = EBUSY; 1423 volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64; 1424 uint32_t new_lockword; 1425 int count = 0; 1426 int max_count; 1427 uint8_t max_spinners; 1428 1429 ASSERT(mp->mutex_type & USYNC_PROCESS); 1430 1431 if (shared_mutex_held(mp)) 1432 return (EBUSY); 1433 1434 /* short-cut, not definitive (see below) */ 1435 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1436 ASSERT(mp->mutex_type & LOCK_ROBUST); 1437 error = ENOTRECOVERABLE; 1438 goto done; 1439 } 1440 1441 /* 1442 * Make one attempt to acquire the lock before 1443 * incurring the overhead of the spin loop. 1444 */ 1445 enter_critical(self); 1446 if (set_lock_byte64(lockp, udp->pid) == 0) { 1447 mp->mutex_owner = (uintptr_t)self; 1448 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1449 exit_critical(self); 1450 error = 0; 1451 goto done; 1452 } 1453 exit_critical(self); 1454 if (!tryhard) 1455 goto done; 1456 if (ncpus == 0) 1457 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1458 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1459 max_spinners = ncpus - 1; 1460 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1461 if (max_count == 0) 1462 goto done; 1463 1464 /* 1465 * This is a process-shared mutex. 1466 * We cannot know if the owner is running on a processor. 1467 * We just spin and hope that it is on a processor. 1468 */ 1469 enter_critical(self); 1470 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 1471 exit_critical(self); 1472 goto done; 1473 } 1474 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1475 for (count = 1; ; count++) { 1476 if ((*lockp & LOCKMASK64) == 0 && 1477 set_lock_byte64(lockp, udp->pid) == 0) { 1478 mp->mutex_owner = (uintptr_t)self; 1479 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1480 error = 0; 1481 break; 1482 } 1483 if (count == max_count) 1484 break; 1485 SMT_PAUSE(); 1486 } 1487 new_lockword = spinners_decr(&mp->mutex_lockword); 1488 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1489 /* 1490 * We haven't yet acquired the lock, the lock 1491 * is free, and there are no other spinners. 1492 * Make one final attempt to acquire the lock. 1493 * 1494 * This isn't strictly necessary since mutex_lock_kernel() 1495 * (the next action this thread will take if it doesn't 1496 * acquire the lock here) makes one attempt to acquire 1497 * the lock before putting the thread to sleep. 1498 * 1499 * If the next action for this thread (on failure here) 1500 * were not to call mutex_lock_kernel(), this would be 1501 * necessary for correctness, to avoid ending up with an 1502 * unheld mutex with waiters but no one to wake them up. 1503 */ 1504 if (set_lock_byte64(lockp, udp->pid) == 0) { 1505 mp->mutex_owner = (uintptr_t)self; 1506 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1507 error = 0; 1508 } 1509 count++; 1510 } 1511 exit_critical(self); 1512 1513 done: 1514 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1515 ASSERT(mp->mutex_type & LOCK_ROBUST); 1516 /* 1517 * We shouldn't own the mutex. 1518 * Just clear the lock; everyone has already been waked up. 1519 */ 1520 mp->mutex_owner = 0; 1521 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1522 (void) clear_lockbyte64(&mp->mutex_lockword64); 1523 error = ENOTRECOVERABLE; 1524 } 1525 1526 if (error) { 1527 if (count) { 1528 DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 1529 } 1530 if (error != EBUSY) { 1531 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1532 } 1533 } else { 1534 if (count) { 1535 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 1536 } 1537 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1538 if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1539 ASSERT(mp->mutex_type & LOCK_ROBUST); 1540 if (mp->mutex_flag & LOCK_OWNERDEAD) 1541 error = EOWNERDEAD; 1542 else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1543 error = ELOCKUNMAPPED; 1544 else 1545 error = EOWNERDEAD; 1546 } 1547 } 1548 1549 return (error); 1550 } 1551 1552 /* 1553 * Mutex wakeup code for releasing a USYNC_THREAD mutex. 1554 * Returns the lwpid of the thread that was dequeued, if any. 1555 * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 1556 * to wake up the specified lwp. 1557 */ 1558 static lwpid_t 1559 mutex_wakeup(mutex_t *mp) 1560 { 1561 lwpid_t lwpid = 0; 1562 int more; 1563 queue_head_t *qp; 1564 ulwp_t *ulwp; 1565 1566 /* 1567 * Dequeue a waiter from the sleep queue. Don't touch the mutex 1568 * waiters bit if no one was found on the queue because the mutex 1569 * might have been deallocated or reallocated for another purpose. 1570 */ 1571 qp = queue_lock(mp, MX); 1572 if ((ulwp = dequeue(qp, &more)) != NULL) { 1573 lwpid = ulwp->ul_lwpid; 1574 mp->mutex_waiters = more; 1575 } 1576 queue_unlock(qp); 1577 return (lwpid); 1578 } 1579 1580 /* 1581 * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1582 */ 1583 static void 1584 mutex_wakeup_all(mutex_t *mp) 1585 { 1586 queue_head_t *qp; 1587 queue_root_t *qrp; 1588 int nlwpid = 0; 1589 int maxlwps = MAXLWPS; 1590 ulwp_t *ulwp; 1591 lwpid_t buffer[MAXLWPS]; 1592 lwpid_t *lwpid = buffer; 1593 1594 /* 1595 * Walk the list of waiters and prepare to wake up all of them. 1596 * The waiters flag has already been cleared from the mutex. 1597 * 1598 * We keep track of lwpids that are to be unparked in lwpid[]. 1599 * __lwp_unpark_all() is called to unpark all of them after 1600 * they have been removed from the sleep queue and the sleep 1601 * queue lock has been dropped. If we run out of space in our 1602 * on-stack buffer, we need to allocate more but we can't call 1603 * lmalloc() because we are holding a queue lock when the overflow 1604 * occurs and lmalloc() acquires a lock. We can't use alloca() 1605 * either because the application may have allocated a small 1606 * stack and we don't want to overrun the stack. So we call 1607 * alloc_lwpids() to allocate a bigger buffer using the mmap() 1608 * system call directly since that path acquires no locks. 1609 */ 1610 qp = queue_lock(mp, MX); 1611 for (;;) { 1612 if ((qrp = qp->qh_root) == NULL || 1613 (ulwp = qrp->qr_head) == NULL) 1614 break; 1615 ASSERT(ulwp->ul_wchan == mp); 1616 queue_unlink(qp, &qrp->qr_head, NULL); 1617 ulwp->ul_sleepq = NULL; 1618 ulwp->ul_wchan = NULL; 1619 if (nlwpid == maxlwps) 1620 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1621 lwpid[nlwpid++] = ulwp->ul_lwpid; 1622 } 1623 1624 if (nlwpid == 0) { 1625 queue_unlock(qp); 1626 } else { 1627 mp->mutex_waiters = 0; 1628 no_preempt(curthread); 1629 queue_unlock(qp); 1630 if (nlwpid == 1) 1631 (void) __lwp_unpark(lwpid[0]); 1632 else 1633 (void) __lwp_unpark_all(lwpid, nlwpid); 1634 preempt(curthread); 1635 } 1636 1637 if (lwpid != buffer) 1638 (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 1639 } 1640 1641 /* 1642 * Release a process-private mutex. 1643 * As an optimization, if there are waiters but there are also spinners 1644 * attempting to acquire the mutex, then don't bother waking up a waiter; 1645 * one of the spinners will acquire the mutex soon and it would be a waste 1646 * of resources to wake up some thread just to have it spin for a while 1647 * and then possibly go back to sleep. See mutex_trylock_adaptive(). 1648 */ 1649 static lwpid_t 1650 mutex_unlock_queue(mutex_t *mp, int release_all) 1651 { 1652 lwpid_t lwpid = 0; 1653 uint32_t old_lockword; 1654 1655 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1656 mp->mutex_owner = 0; 1657 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1658 if ((old_lockword & WAITERMASK) && 1659 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1660 ulwp_t *self = curthread; 1661 no_preempt(self); /* ensure a prompt wakeup */ 1662 if (release_all) 1663 mutex_wakeup_all(mp); 1664 else 1665 lwpid = mutex_wakeup(mp); 1666 if (lwpid == 0) 1667 preempt(self); 1668 } 1669 return (lwpid); 1670 } 1671 1672 /* 1673 * Like mutex_unlock_queue(), but for process-shared mutexes. 1674 */ 1675 static void 1676 mutex_unlock_process(mutex_t *mp, int release_all) 1677 { 1678 uint64_t old_lockword64; 1679 1680 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1681 mp->mutex_owner = 0; 1682 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1683 old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64); 1684 if ((old_lockword64 & WAITERMASK64) && 1685 (release_all || (old_lockword64 & SPINNERMASK64) == 0)) { 1686 ulwp_t *self = curthread; 1687 no_preempt(self); /* ensure a prompt wakeup */ 1688 (void) ___lwp_mutex_wakeup(mp, release_all); 1689 preempt(self); 1690 } 1691 } 1692 1693 void 1694 stall(void) 1695 { 1696 for (;;) 1697 (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 1698 } 1699 1700 /* 1701 * Acquire a USYNC_THREAD mutex via user-level sleep queues. 1702 * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1703 * If successful, returns with mutex_owner set correctly. 1704 */ 1705 int 1706 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 1707 timespec_t *tsp) 1708 { 1709 uberdata_t *udp = curthread->ul_uberdata; 1710 queue_head_t *qp; 1711 hrtime_t begin_sleep; 1712 int error = 0; 1713 1714 self->ul_sp = stkptr(); 1715 if (__td_event_report(self, TD_SLEEP, udp)) { 1716 self->ul_wchan = mp; 1717 self->ul_td_evbuf.eventnum = TD_SLEEP; 1718 self->ul_td_evbuf.eventdata = mp; 1719 tdb_event(TD_SLEEP, udp); 1720 } 1721 if (msp) { 1722 tdb_incr(msp->mutex_sleep); 1723 begin_sleep = gethrtime(); 1724 } 1725 1726 DTRACE_PROBE1(plockstat, mutex__block, mp); 1727 1728 /* 1729 * Put ourself on the sleep queue, and while we are 1730 * unable to grab the lock, go park in the kernel. 1731 * Take ourself off the sleep queue after we acquire the lock. 1732 * The waiter bit can be set/cleared only while holding the queue lock. 1733 */ 1734 qp = queue_lock(mp, MX); 1735 enqueue(qp, self, 0); 1736 mp->mutex_waiters = 1; 1737 for (;;) { 1738 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1739 mp->mutex_owner = (uintptr_t)self; 1740 mp->mutex_waiters = dequeue_self(qp); 1741 break; 1742 } 1743 set_parking_flag(self, 1); 1744 queue_unlock(qp); 1745 /* 1746 * __lwp_park() will return the residual time in tsp 1747 * if we are unparked before the timeout expires. 1748 */ 1749 error = __lwp_park(tsp, 0); 1750 set_parking_flag(self, 0); 1751 /* 1752 * We could have taken a signal or suspended ourself. 1753 * If we did, then we removed ourself from the queue. 1754 * Someone else may have removed us from the queue 1755 * as a consequence of mutex_unlock(). We may have 1756 * gotten a timeout from __lwp_park(). Or we may still 1757 * be on the queue and this is just a spurious wakeup. 1758 */ 1759 qp = queue_lock(mp, MX); 1760 if (self->ul_sleepq == NULL) { 1761 if (error) { 1762 mp->mutex_waiters = queue_waiter(qp)? 1 : 0; 1763 if (error != EINTR) 1764 break; 1765 error = 0; 1766 } 1767 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1768 mp->mutex_owner = (uintptr_t)self; 1769 break; 1770 } 1771 enqueue(qp, self, 0); 1772 mp->mutex_waiters = 1; 1773 } 1774 ASSERT(self->ul_sleepq == qp && 1775 self->ul_qtype == MX && 1776 self->ul_wchan == mp); 1777 if (error) { 1778 if (error != EINTR) { 1779 mp->mutex_waiters = dequeue_self(qp); 1780 break; 1781 } 1782 error = 0; 1783 } 1784 } 1785 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 1786 self->ul_wchan == NULL); 1787 self->ul_sp = 0; 1788 queue_unlock(qp); 1789 1790 if (msp) 1791 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1792 1793 ASSERT(error == 0 || error == EINVAL || error == ETIME); 1794 1795 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1796 ASSERT(mp->mutex_type & LOCK_ROBUST); 1797 /* 1798 * We shouldn't own the mutex. 1799 * Just clear the lock; everyone has already been waked up. 1800 */ 1801 mp->mutex_owner = 0; 1802 (void) clear_lockbyte(&mp->mutex_lockword); 1803 error = ENOTRECOVERABLE; 1804 } 1805 1806 if (error) { 1807 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1808 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1809 } else { 1810 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1811 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1812 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1813 ASSERT(mp->mutex_type & LOCK_ROBUST); 1814 error = EOWNERDEAD; 1815 } 1816 } 1817 1818 return (error); 1819 } 1820 1821 static int 1822 mutex_recursion(mutex_t *mp, int mtype, int try) 1823 { 1824 ASSERT(mutex_is_held(mp)); 1825 ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1826 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1827 1828 if (mtype & LOCK_RECURSIVE) { 1829 if (mp->mutex_rcount == RECURSION_MAX) { 1830 DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1831 return (EAGAIN); 1832 } 1833 mp->mutex_rcount++; 1834 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1835 return (0); 1836 } 1837 if (try == MUTEX_LOCK) { 1838 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1839 return (EDEADLK); 1840 } 1841 return (EBUSY); 1842 } 1843 1844 /* 1845 * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1846 * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1847 * We use tdb_hash_lock here and in the synch object tracking code in 1848 * the tdb_agent.c file. There is no conflict between these two usages. 1849 */ 1850 void 1851 register_lock(mutex_t *mp) 1852 { 1853 uberdata_t *udp = curthread->ul_uberdata; 1854 uint_t hash = LOCK_HASH(mp); 1855 robust_t *rlp; 1856 robust_t **rlpp; 1857 robust_t **table; 1858 1859 if ((table = udp->robustlocks) == NULL) { 1860 lmutex_lock(&udp->tdb_hash_lock); 1861 if ((table = udp->robustlocks) == NULL) { 1862 table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1863 _membar_producer(); 1864 udp->robustlocks = table; 1865 } 1866 lmutex_unlock(&udp->tdb_hash_lock); 1867 } 1868 _membar_consumer(); 1869 1870 /* 1871 * First search the registered table with no locks held. 1872 * This is safe because the table never shrinks 1873 * and we can only get a false negative. 1874 */ 1875 for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1876 if (rlp->robust_lock == mp) /* already registered */ 1877 return; 1878 } 1879 1880 /* 1881 * The lock was not found. 1882 * Repeat the operation with tdb_hash_lock held. 1883 */ 1884 lmutex_lock(&udp->tdb_hash_lock); 1885 1886 for (rlpp = &table[hash]; 1887 (rlp = *rlpp) != NULL; 1888 rlpp = &rlp->robust_next) { 1889 if (rlp->robust_lock == mp) { /* already registered */ 1890 lmutex_unlock(&udp->tdb_hash_lock); 1891 return; 1892 } 1893 } 1894 1895 /* 1896 * The lock has never been registered. 1897 * Register it now and add it to the table. 1898 */ 1899 (void) ___lwp_mutex_register(mp); 1900 rlp = lmalloc(sizeof (*rlp)); 1901 rlp->robust_lock = mp; 1902 _membar_producer(); 1903 *rlpp = rlp; 1904 1905 lmutex_unlock(&udp->tdb_hash_lock); 1906 } 1907 1908 /* 1909 * This is called in the child of fork()/forkall() to start over 1910 * with a clean slate. (Each process must register its own locks.) 1911 * No locks are needed because all other threads are suspended or gone. 1912 */ 1913 void 1914 unregister_locks(void) 1915 { 1916 uberdata_t *udp = curthread->ul_uberdata; 1917 uint_t hash; 1918 robust_t **table; 1919 robust_t *rlp; 1920 robust_t *next; 1921 1922 if ((table = udp->robustlocks) != NULL) { 1923 for (hash = 0; hash < LOCKHASHSZ; hash++) { 1924 rlp = table[hash]; 1925 while (rlp != NULL) { 1926 next = rlp->robust_next; 1927 lfree(rlp, sizeof (*rlp)); 1928 rlp = next; 1929 } 1930 } 1931 lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 1932 udp->robustlocks = NULL; 1933 } 1934 } 1935 1936 /* 1937 * Returns with mutex_owner set correctly. 1938 */ 1939 int 1940 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 1941 { 1942 ulwp_t *self = curthread; 1943 uberdata_t *udp = self->ul_uberdata; 1944 int mtype = mp->mutex_type; 1945 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 1946 int error = 0; 1947 int noceil = try & MUTEX_NOCEIL; 1948 uint8_t ceil; 1949 int myprio; 1950 1951 try &= ~MUTEX_NOCEIL; 1952 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1953 1954 if (!self->ul_schedctl_called) 1955 (void) setup_schedctl(); 1956 1957 if (msp && try == MUTEX_TRY) 1958 tdb_incr(msp->mutex_try); 1959 1960 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 1961 return (mutex_recursion(mp, mtype, try)); 1962 1963 if (self->ul_error_detection && try == MUTEX_LOCK && 1964 tsp == NULL && mutex_is_held(mp)) 1965 lock_error(mp, "mutex_lock", NULL, NULL); 1966 1967 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 1968 update_sched(self); 1969 if (self->ul_cid != self->ul_rtclassid) { 1970 DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM); 1971 return (EPERM); 1972 } 1973 ceil = mp->mutex_ceiling; 1974 myprio = self->ul_epri? self->ul_epri : self->ul_pri; 1975 if (myprio > ceil) { 1976 DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 1977 return (EINVAL); 1978 } 1979 if ((error = _ceil_mylist_add(mp)) != 0) { 1980 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1981 return (error); 1982 } 1983 if (myprio < ceil) 1984 _ceil_prio_inherit(ceil); 1985 } 1986 1987 if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 1988 == (USYNC_PROCESS | LOCK_ROBUST)) 1989 register_lock(mp); 1990 1991 if (mtype & LOCK_PRIO_INHERIT) { 1992 /* go straight to the kernel */ 1993 if (try == MUTEX_TRY) 1994 error = mutex_trylock_kernel(mp); 1995 else /* MUTEX_LOCK */ 1996 error = mutex_lock_kernel(mp, tsp, msp); 1997 /* 1998 * The kernel never sets or clears the lock byte 1999 * for LOCK_PRIO_INHERIT mutexes. 2000 * Set it here for consistency. 2001 */ 2002 switch (error) { 2003 case 0: 2004 self->ul_pilocks++; 2005 mp->mutex_lockw = LOCKSET; 2006 break; 2007 case EOWNERDEAD: 2008 case ELOCKUNMAPPED: 2009 self->ul_pilocks++; 2010 mp->mutex_lockw = LOCKSET; 2011 /* FALLTHROUGH */ 2012 case ENOTRECOVERABLE: 2013 ASSERT(mtype & LOCK_ROBUST); 2014 break; 2015 case EDEADLK: 2016 if (try == MUTEX_LOCK) 2017 stall(); 2018 error = EBUSY; 2019 break; 2020 } 2021 } else if (mtype & USYNC_PROCESS) { 2022 error = mutex_trylock_process(mp, try == MUTEX_LOCK); 2023 if (error == EBUSY && try == MUTEX_LOCK) 2024 error = mutex_lock_kernel(mp, tsp, msp); 2025 } else { /* USYNC_THREAD */ 2026 error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 2027 if (error == EBUSY && try == MUTEX_LOCK) 2028 error = mutex_lock_queue(self, msp, mp, tsp); 2029 } 2030 2031 switch (error) { 2032 case 0: 2033 case EOWNERDEAD: 2034 case ELOCKUNMAPPED: 2035 if (mtype & LOCK_ROBUST) 2036 remember_lock(mp); 2037 if (msp) 2038 record_begin_hold(msp); 2039 break; 2040 default: 2041 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2042 (void) _ceil_mylist_del(mp); 2043 if (myprio < ceil) 2044 _ceil_prio_waive(); 2045 } 2046 if (try == MUTEX_TRY) { 2047 if (msp) 2048 tdb_incr(msp->mutex_try_fail); 2049 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2050 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2051 tdb_event(TD_LOCK_TRY, udp); 2052 } 2053 } 2054 break; 2055 } 2056 2057 return (error); 2058 } 2059 2060 int 2061 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 2062 { 2063 ulwp_t *self = curthread; 2064 uberdata_t *udp = self->ul_uberdata; 2065 2066 /* 2067 * We know that USYNC_PROCESS is set in mtype and that 2068 * zero, one, or both of the flags LOCK_RECURSIVE and 2069 * LOCK_ERRORCHECK are set, and that no other flags are set. 2070 */ 2071 ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 2072 enter_critical(self); 2073 if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) { 2074 mp->mutex_owner = (uintptr_t)self; 2075 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 2076 exit_critical(self); 2077 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2078 return (0); 2079 } 2080 exit_critical(self); 2081 2082 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 2083 return (mutex_recursion(mp, mtype, try)); 2084 2085 if (try == MUTEX_LOCK) { 2086 if (mutex_trylock_process(mp, 1) == 0) 2087 return (0); 2088 return (mutex_lock_kernel(mp, tsp, NULL)); 2089 } 2090 2091 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2092 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2093 tdb_event(TD_LOCK_TRY, udp); 2094 } 2095 return (EBUSY); 2096 } 2097 2098 static int 2099 mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 2100 { 2101 ulwp_t *self = curthread; 2102 int mtype = mp->mutex_type; 2103 uberflags_t *gflags; 2104 2105 /* 2106 * Optimize the case of USYNC_THREAD, including 2107 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2108 * no error detection, no lock statistics, 2109 * and the process has only a single thread. 2110 * (Most likely a traditional single-threaded application.) 2111 */ 2112 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2113 self->ul_uberdata->uberflags.uf_all) == 0) { 2114 /* 2115 * Only one thread exists so we don't need an atomic operation. 2116 */ 2117 if (mp->mutex_lockw == 0) { 2118 mp->mutex_lockw = LOCKSET; 2119 mp->mutex_owner = (uintptr_t)self; 2120 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2121 return (0); 2122 } 2123 if (mtype && MUTEX_OWNER(mp) == self) 2124 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2125 /* 2126 * We have reached a deadlock, probably because the 2127 * process is executing non-async-signal-safe code in 2128 * a signal handler and is attempting to acquire a lock 2129 * that it already owns. This is not surprising, given 2130 * bad programming practices over the years that has 2131 * resulted in applications calling printf() and such 2132 * in their signal handlers. Unless the user has told 2133 * us that the signal handlers are safe by setting: 2134 * export _THREAD_ASYNC_SAFE=1 2135 * we return EDEADLK rather than actually deadlocking. 2136 */ 2137 if (tsp == NULL && 2138 MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 2139 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 2140 return (EDEADLK); 2141 } 2142 } 2143 2144 /* 2145 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2146 * no error detection, and no lock statistics. 2147 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2148 */ 2149 if ((gflags = self->ul_schedctl_called) != NULL && 2150 (gflags->uf_trs_ted | 2151 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2152 if (mtype & USYNC_PROCESS) 2153 return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 2154 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2155 mp->mutex_owner = (uintptr_t)self; 2156 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2157 return (0); 2158 } 2159 if (mtype && MUTEX_OWNER(mp) == self) 2160 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2161 if (mutex_trylock_adaptive(mp, 1) != 0) 2162 return (mutex_lock_queue(self, NULL, mp, tsp)); 2163 return (0); 2164 } 2165 2166 /* else do it the long way */ 2167 return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 2168 } 2169 2170 /* 2171 * Of the following function names (all the same function, of course), 2172 * only _private_mutex_lock() is not exported from libc. This means 2173 * that calling _private_mutex_lock() within libc will not invoke the 2174 * dynamic linker. This is critical for any code called in the child 2175 * of vfork() (via posix_spawn()) because invoking the dynamic linker 2176 * in such a case would corrupt the parent's address space. There are 2177 * other places in libc where avoiding the dynamic linker is necessary. 2178 * Of course, _private_mutex_lock() can be called in cases not requiring 2179 * the avoidance of the dynamic linker too, and often is. 2180 */ 2181 #pragma weak _private_mutex_lock = __mutex_lock 2182 #pragma weak mutex_lock = __mutex_lock 2183 #pragma weak _mutex_lock = __mutex_lock 2184 #pragma weak pthread_mutex_lock = __mutex_lock 2185 #pragma weak _pthread_mutex_lock = __mutex_lock 2186 int 2187 __mutex_lock(mutex_t *mp) 2188 { 2189 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2190 return (mutex_lock_impl(mp, NULL)); 2191 } 2192 2193 #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 2194 int 2195 _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 2196 { 2197 timespec_t tslocal; 2198 int error; 2199 2200 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2201 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 2202 error = mutex_lock_impl(mp, &tslocal); 2203 if (error == ETIME) 2204 error = ETIMEDOUT; 2205 return (error); 2206 } 2207 2208 #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 2209 int 2210 _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 2211 { 2212 timespec_t tslocal; 2213 int error; 2214 2215 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2216 tslocal = *reltime; 2217 error = mutex_lock_impl(mp, &tslocal); 2218 if (error == ETIME) 2219 error = ETIMEDOUT; 2220 return (error); 2221 } 2222 2223 #pragma weak _private_mutex_trylock = __mutex_trylock 2224 #pragma weak mutex_trylock = __mutex_trylock 2225 #pragma weak _mutex_trylock = __mutex_trylock 2226 #pragma weak pthread_mutex_trylock = __mutex_trylock 2227 #pragma weak _pthread_mutex_trylock = __mutex_trylock 2228 int 2229 __mutex_trylock(mutex_t *mp) 2230 { 2231 ulwp_t *self = curthread; 2232 uberdata_t *udp = self->ul_uberdata; 2233 int mtype = mp->mutex_type; 2234 uberflags_t *gflags; 2235 2236 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2237 2238 /* 2239 * Optimize the case of USYNC_THREAD, including 2240 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2241 * no error detection, no lock statistics, 2242 * and the process has only a single thread. 2243 * (Most likely a traditional single-threaded application.) 2244 */ 2245 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2246 udp->uberflags.uf_all) == 0) { 2247 /* 2248 * Only one thread exists so we don't need an atomic operation. 2249 */ 2250 if (mp->mutex_lockw == 0) { 2251 mp->mutex_lockw = LOCKSET; 2252 mp->mutex_owner = (uintptr_t)self; 2253 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2254 return (0); 2255 } 2256 if (mtype && MUTEX_OWNER(mp) == self) 2257 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2258 return (EBUSY); 2259 } 2260 2261 /* 2262 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2263 * no error detection, and no lock statistics. 2264 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2265 */ 2266 if ((gflags = self->ul_schedctl_called) != NULL && 2267 (gflags->uf_trs_ted | 2268 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2269 if (mtype & USYNC_PROCESS) 2270 return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 2271 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2272 mp->mutex_owner = (uintptr_t)self; 2273 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2274 return (0); 2275 } 2276 if (mtype && MUTEX_OWNER(mp) == self) 2277 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2278 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2279 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2280 tdb_event(TD_LOCK_TRY, udp); 2281 } 2282 return (EBUSY); 2283 } 2284 2285 /* else do it the long way */ 2286 return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 2287 } 2288 2289 int 2290 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 2291 { 2292 ulwp_t *self = curthread; 2293 uberdata_t *udp = self->ul_uberdata; 2294 int mtype = mp->mutex_type; 2295 tdb_mutex_stats_t *msp; 2296 int error = 0; 2297 int release_all; 2298 lwpid_t lwpid; 2299 2300 if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 2301 return (EPERM); 2302 2303 if (self->ul_error_detection && !mutex_is_held(mp)) 2304 lock_error(mp, "mutex_unlock", NULL, NULL); 2305 2306 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2307 mp->mutex_rcount--; 2308 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2309 return (0); 2310 } 2311 2312 if ((msp = MUTEX_STATS(mp, udp)) != NULL) 2313 (void) record_hold_time(msp); 2314 2315 if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2316 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2317 ASSERT(mp->mutex_type & LOCK_ROBUST); 2318 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2319 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2320 } 2321 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2322 2323 if (mtype & LOCK_PRIO_INHERIT) { 2324 no_preempt(self); 2325 mp->mutex_owner = 0; 2326 /* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */ 2327 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2328 mp->mutex_lockw = LOCKCLEAR; 2329 self->ul_pilocks--; 2330 error = ___lwp_mutex_unlock(mp); 2331 preempt(self); 2332 } else if (mtype & USYNC_PROCESS) { 2333 mutex_unlock_process(mp, release_all); 2334 } else { /* USYNC_THREAD */ 2335 if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 2336 (void) __lwp_unpark(lwpid); 2337 preempt(self); 2338 } 2339 } 2340 2341 if (mtype & LOCK_ROBUST) 2342 forget_lock(mp); 2343 2344 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2345 _ceil_prio_waive(); 2346 2347 return (error); 2348 } 2349 2350 #pragma weak _private_mutex_unlock = __mutex_unlock 2351 #pragma weak mutex_unlock = __mutex_unlock 2352 #pragma weak _mutex_unlock = __mutex_unlock 2353 #pragma weak pthread_mutex_unlock = __mutex_unlock 2354 #pragma weak _pthread_mutex_unlock = __mutex_unlock 2355 int 2356 __mutex_unlock(mutex_t *mp) 2357 { 2358 ulwp_t *self = curthread; 2359 int mtype = mp->mutex_type; 2360 uberflags_t *gflags; 2361 lwpid_t lwpid; 2362 short el; 2363 2364 /* 2365 * Optimize the case of USYNC_THREAD, including 2366 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2367 * no error detection, no lock statistics, 2368 * and the process has only a single thread. 2369 * (Most likely a traditional single-threaded application.) 2370 */ 2371 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2372 self->ul_uberdata->uberflags.uf_all) == 0) { 2373 if (mtype) { 2374 /* 2375 * At this point we know that one or both of the 2376 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2377 */ 2378 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2379 return (EPERM); 2380 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2381 mp->mutex_rcount--; 2382 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2383 return (0); 2384 } 2385 } 2386 /* 2387 * Only one thread exists so we don't need an atomic operation. 2388 * Also, there can be no waiters. 2389 */ 2390 mp->mutex_owner = 0; 2391 mp->mutex_lockword = 0; 2392 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2393 return (0); 2394 } 2395 2396 /* 2397 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2398 * no error detection, and no lock statistics. 2399 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2400 */ 2401 if ((gflags = self->ul_schedctl_called) != NULL) { 2402 if (((el = gflags->uf_trs_ted) | mtype) == 0) { 2403 fast_unlock: 2404 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2405 (void) __lwp_unpark(lwpid); 2406 preempt(self); 2407 } 2408 return (0); 2409 } 2410 if (el) /* error detection or lock statistics */ 2411 goto slow_unlock; 2412 if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2413 /* 2414 * At this point we know that one or both of the 2415 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2416 */ 2417 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2418 return (EPERM); 2419 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2420 mp->mutex_rcount--; 2421 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2422 return (0); 2423 } 2424 goto fast_unlock; 2425 } 2426 if ((mtype & 2427 ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2428 /* 2429 * At this point we know that zero, one, or both of the 2430 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 2431 * that the USYNC_PROCESS flag is set. 2432 */ 2433 if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 2434 return (EPERM); 2435 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2436 mp->mutex_rcount--; 2437 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2438 return (0); 2439 } 2440 mutex_unlock_process(mp, 0); 2441 return (0); 2442 } 2443 } 2444 2445 /* else do it the long way */ 2446 slow_unlock: 2447 return (mutex_unlock_internal(mp, 0)); 2448 } 2449 2450 /* 2451 * Internally to the library, almost all mutex lock/unlock actions 2452 * go through these lmutex_ functions, to protect critical regions. 2453 * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 2454 * to make these functions faster since we know that the mutex type 2455 * of all internal locks is USYNC_THREAD. We also know that internal 2456 * locking can never fail, so we panic if it does. 2457 */ 2458 void 2459 lmutex_lock(mutex_t *mp) 2460 { 2461 ulwp_t *self = curthread; 2462 uberdata_t *udp = self->ul_uberdata; 2463 2464 ASSERT(mp->mutex_type == USYNC_THREAD); 2465 2466 enter_critical(self); 2467 /* 2468 * Optimize the case of no lock statistics and only a single thread. 2469 * (Most likely a traditional single-threaded application.) 2470 */ 2471 if (udp->uberflags.uf_all == 0) { 2472 /* 2473 * Only one thread exists; the mutex must be free. 2474 */ 2475 ASSERT(mp->mutex_lockw == 0); 2476 mp->mutex_lockw = LOCKSET; 2477 mp->mutex_owner = (uintptr_t)self; 2478 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2479 } else { 2480 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2481 2482 if (!self->ul_schedctl_called) 2483 (void) setup_schedctl(); 2484 2485 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2486 mp->mutex_owner = (uintptr_t)self; 2487 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2488 } else if (mutex_trylock_adaptive(mp, 1) != 0) { 2489 (void) mutex_lock_queue(self, msp, mp, NULL); 2490 } 2491 2492 if (msp) 2493 record_begin_hold(msp); 2494 } 2495 } 2496 2497 void 2498 lmutex_unlock(mutex_t *mp) 2499 { 2500 ulwp_t *self = curthread; 2501 uberdata_t *udp = self->ul_uberdata; 2502 2503 ASSERT(mp->mutex_type == USYNC_THREAD); 2504 2505 /* 2506 * Optimize the case of no lock statistics and only a single thread. 2507 * (Most likely a traditional single-threaded application.) 2508 */ 2509 if (udp->uberflags.uf_all == 0) { 2510 /* 2511 * Only one thread exists so there can be no waiters. 2512 */ 2513 mp->mutex_owner = 0; 2514 mp->mutex_lockword = 0; 2515 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2516 } else { 2517 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2518 lwpid_t lwpid; 2519 2520 if (msp) 2521 (void) record_hold_time(msp); 2522 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2523 (void) __lwp_unpark(lwpid); 2524 preempt(self); 2525 } 2526 } 2527 exit_critical(self); 2528 } 2529 2530 /* 2531 * For specialized code in libc, like the asynchronous i/o code, 2532 * the following sig_*() locking primitives are used in order 2533 * to make the code asynchronous signal safe. Signals are 2534 * deferred while locks acquired by these functions are held. 2535 */ 2536 void 2537 sig_mutex_lock(mutex_t *mp) 2538 { 2539 sigoff(curthread); 2540 (void) _private_mutex_lock(mp); 2541 } 2542 2543 void 2544 sig_mutex_unlock(mutex_t *mp) 2545 { 2546 (void) _private_mutex_unlock(mp); 2547 sigon(curthread); 2548 } 2549 2550 int 2551 sig_mutex_trylock(mutex_t *mp) 2552 { 2553 int error; 2554 2555 sigoff(curthread); 2556 if ((error = _private_mutex_trylock(mp)) != 0) 2557 sigon(curthread); 2558 return (error); 2559 } 2560 2561 /* 2562 * sig_cond_wait() is a cancellation point. 2563 */ 2564 int 2565 sig_cond_wait(cond_t *cv, mutex_t *mp) 2566 { 2567 int error; 2568 2569 ASSERT(curthread->ul_sigdefer != 0); 2570 _private_testcancel(); 2571 error = __cond_wait(cv, mp); 2572 if (error == EINTR && curthread->ul_cursig) { 2573 sig_mutex_unlock(mp); 2574 /* take the deferred signal here */ 2575 sig_mutex_lock(mp); 2576 } 2577 _private_testcancel(); 2578 return (error); 2579 } 2580 2581 /* 2582 * sig_cond_reltimedwait() is a cancellation point. 2583 */ 2584 int 2585 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2586 { 2587 int error; 2588 2589 ASSERT(curthread->ul_sigdefer != 0); 2590 _private_testcancel(); 2591 error = __cond_reltimedwait(cv, mp, ts); 2592 if (error == EINTR && curthread->ul_cursig) { 2593 sig_mutex_unlock(mp); 2594 /* take the deferred signal here */ 2595 sig_mutex_lock(mp); 2596 } 2597 _private_testcancel(); 2598 return (error); 2599 } 2600 2601 /* 2602 * For specialized code in libc, like the stdio code. 2603 * the following cancel_safe_*() locking primitives are used in 2604 * order to make the code cancellation-safe. Cancellation is 2605 * deferred while locks acquired by these functions are held. 2606 */ 2607 void 2608 cancel_safe_mutex_lock(mutex_t *mp) 2609 { 2610 (void) _private_mutex_lock(mp); 2611 curthread->ul_libc_locks++; 2612 } 2613 2614 int 2615 cancel_safe_mutex_trylock(mutex_t *mp) 2616 { 2617 int error; 2618 2619 if ((error = _private_mutex_trylock(mp)) == 0) 2620 curthread->ul_libc_locks++; 2621 return (error); 2622 } 2623 2624 void 2625 cancel_safe_mutex_unlock(mutex_t *mp) 2626 { 2627 ulwp_t *self = curthread; 2628 2629 ASSERT(self->ul_libc_locks != 0); 2630 2631 (void) _private_mutex_unlock(mp); 2632 2633 /* 2634 * Decrement the count of locks held by cancel_safe_mutex_lock(). 2635 * If we are then in a position to terminate cleanly and 2636 * if there is a pending cancellation and cancellation 2637 * is not disabled and we received EINTR from a recent 2638 * system call then perform the cancellation action now. 2639 */ 2640 if (--self->ul_libc_locks == 0 && 2641 !(self->ul_vfork | self->ul_nocancel | 2642 self->ul_critical | self->ul_sigdefer) && 2643 cancel_active()) 2644 _pthread_exit(PTHREAD_CANCELED); 2645 } 2646 2647 static int 2648 shared_mutex_held(mutex_t *mparg) 2649 { 2650 /* 2651 * The 'volatile' is necessary to make sure the compiler doesn't 2652 * reorder the tests of the various components of the mutex. 2653 * They must be tested in this order: 2654 * mutex_lockw 2655 * mutex_owner 2656 * mutex_ownerpid 2657 * This relies on the fact that everywhere mutex_lockw is cleared, 2658 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2659 * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2660 * and mutex_ownerpid are set after mutex_lockw is set, and that 2661 * mutex_lockw is set or cleared with a memory barrier. 2662 */ 2663 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2664 ulwp_t *self = curthread; 2665 uberdata_t *udp = self->ul_uberdata; 2666 2667 return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 2668 } 2669 2670 /* 2671 * Some crufty old programs define their own version of _mutex_held() 2672 * to be simply return(1). This breaks internal libc logic, so we 2673 * define a private version for exclusive use by libc, mutex_is_held(), 2674 * and also a new public function, __mutex_held(), to be used in new 2675 * code to circumvent these crufty old programs. 2676 */ 2677 #pragma weak mutex_held = mutex_is_held 2678 #pragma weak _mutex_held = mutex_is_held 2679 #pragma weak __mutex_held = mutex_is_held 2680 int 2681 mutex_is_held(mutex_t *mparg) 2682 { 2683 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2684 2685 if (mparg->mutex_type & USYNC_PROCESS) 2686 return (shared_mutex_held(mparg)); 2687 return (MUTEX_OWNED(mp, curthread)); 2688 } 2689 2690 #pragma weak _private_mutex_destroy = __mutex_destroy 2691 #pragma weak mutex_destroy = __mutex_destroy 2692 #pragma weak _mutex_destroy = __mutex_destroy 2693 #pragma weak pthread_mutex_destroy = __mutex_destroy 2694 #pragma weak _pthread_mutex_destroy = __mutex_destroy 2695 int 2696 __mutex_destroy(mutex_t *mp) 2697 { 2698 if (mp->mutex_type & USYNC_PROCESS) 2699 forget_lock(mp); 2700 (void) _memset(mp, 0, sizeof (*mp)); 2701 tdb_sync_obj_deregister(mp); 2702 return (0); 2703 } 2704 2705 #pragma weak mutex_consistent = __mutex_consistent 2706 #pragma weak _mutex_consistent = __mutex_consistent 2707 #pragma weak pthread_mutex_consistent_np = __mutex_consistent 2708 #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 2709 int 2710 __mutex_consistent(mutex_t *mp) 2711 { 2712 /* 2713 * Do this only for an inconsistent, initialized robust lock 2714 * that we hold. For all other cases, return EINVAL. 2715 */ 2716 if (mutex_is_held(mp) && 2717 (mp->mutex_type & LOCK_ROBUST) && 2718 (mp->mutex_flag & LOCK_INITED) && 2719 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2720 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2721 mp->mutex_rcount = 0; 2722 return (0); 2723 } 2724 return (EINVAL); 2725 } 2726 2727 /* 2728 * Spin locks are separate from ordinary mutexes, 2729 * but we use the same data structure for them. 2730 */ 2731 2732 #pragma weak pthread_spin_init = _pthread_spin_init 2733 int 2734 _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 2735 { 2736 mutex_t *mp = (mutex_t *)lock; 2737 2738 (void) _memset(mp, 0, sizeof (*mp)); 2739 if (pshared == PTHREAD_PROCESS_SHARED) 2740 mp->mutex_type = USYNC_PROCESS; 2741 else 2742 mp->mutex_type = USYNC_THREAD; 2743 mp->mutex_flag = LOCK_INITED; 2744 mp->mutex_magic = MUTEX_MAGIC; 2745 return (0); 2746 } 2747 2748 #pragma weak pthread_spin_destroy = _pthread_spin_destroy 2749 int 2750 _pthread_spin_destroy(pthread_spinlock_t *lock) 2751 { 2752 (void) _memset(lock, 0, sizeof (*lock)); 2753 return (0); 2754 } 2755 2756 #pragma weak pthread_spin_trylock = _pthread_spin_trylock 2757 int 2758 _pthread_spin_trylock(pthread_spinlock_t *lock) 2759 { 2760 mutex_t *mp = (mutex_t *)lock; 2761 ulwp_t *self = curthread; 2762 int error = 0; 2763 2764 no_preempt(self); 2765 if (set_lock_byte(&mp->mutex_lockw) != 0) 2766 error = EBUSY; 2767 else { 2768 mp->mutex_owner = (uintptr_t)self; 2769 if (mp->mutex_type == USYNC_PROCESS) 2770 mp->mutex_ownerpid = self->ul_uberdata->pid; 2771 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2772 } 2773 preempt(self); 2774 return (error); 2775 } 2776 2777 #pragma weak pthread_spin_lock = _pthread_spin_lock 2778 int 2779 _pthread_spin_lock(pthread_spinlock_t *lock) 2780 { 2781 mutex_t *mp = (mutex_t *)lock; 2782 ulwp_t *self = curthread; 2783 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2784 int count = 0; 2785 2786 ASSERT(!self->ul_critical || self->ul_bindflags); 2787 2788 DTRACE_PROBE1(plockstat, mutex__spin, mp); 2789 2790 /* 2791 * We don't care whether the owner is running on a processor. 2792 * We just spin because that's what this interface requires. 2793 */ 2794 for (;;) { 2795 if (*lockp == 0) { /* lock byte appears to be clear */ 2796 no_preempt(self); 2797 if (set_lock_byte(lockp) == 0) 2798 break; 2799 preempt(self); 2800 } 2801 if (count < INT_MAX) 2802 count++; 2803 SMT_PAUSE(); 2804 } 2805 mp->mutex_owner = (uintptr_t)self; 2806 if (mp->mutex_type == USYNC_PROCESS) 2807 mp->mutex_ownerpid = self->ul_uberdata->pid; 2808 preempt(self); 2809 if (count) { 2810 DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 2811 } 2812 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 2813 return (0); 2814 } 2815 2816 #pragma weak pthread_spin_unlock = _pthread_spin_unlock 2817 int 2818 _pthread_spin_unlock(pthread_spinlock_t *lock) 2819 { 2820 mutex_t *mp = (mutex_t *)lock; 2821 ulwp_t *self = curthread; 2822 2823 no_preempt(self); 2824 mp->mutex_owner = 0; 2825 mp->mutex_ownerpid = 0; 2826 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2827 (void) atomic_swap_32(&mp->mutex_lockword, 0); 2828 preempt(self); 2829 return (0); 2830 } 2831 2832 #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 2833 2834 /* 2835 * Find/allocate an entry for 'lock' in our array of held locks. 2836 */ 2837 static mutex_t ** 2838 find_lock_entry(mutex_t *lock) 2839 { 2840 ulwp_t *self = curthread; 2841 mutex_t **remembered = NULL; 2842 mutex_t **lockptr; 2843 uint_t nlocks; 2844 2845 if ((nlocks = self->ul_heldlockcnt) != 0) 2846 lockptr = self->ul_heldlocks.array; 2847 else { 2848 nlocks = 1; 2849 lockptr = &self->ul_heldlocks.single; 2850 } 2851 2852 for (; nlocks; nlocks--, lockptr++) { 2853 if (*lockptr == lock) 2854 return (lockptr); 2855 if (*lockptr == NULL && remembered == NULL) 2856 remembered = lockptr; 2857 } 2858 if (remembered != NULL) { 2859 *remembered = lock; 2860 return (remembered); 2861 } 2862 2863 /* 2864 * No entry available. Allocate more space, converting 2865 * the single entry into an array of entries if necessary. 2866 */ 2867 if ((nlocks = self->ul_heldlockcnt) == 0) { 2868 /* 2869 * Initial allocation of the array. 2870 * Convert the single entry into an array. 2871 */ 2872 self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 2873 lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 2874 /* 2875 * The single entry becomes the first entry in the array. 2876 */ 2877 *lockptr = self->ul_heldlocks.single; 2878 self->ul_heldlocks.array = lockptr; 2879 /* 2880 * Return the next available entry in the array. 2881 */ 2882 *++lockptr = lock; 2883 return (lockptr); 2884 } 2885 /* 2886 * Reallocate the array, double the size each time. 2887 */ 2888 lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 2889 (void) _memcpy(lockptr, self->ul_heldlocks.array, 2890 nlocks * sizeof (mutex_t *)); 2891 lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2892 self->ul_heldlocks.array = lockptr; 2893 self->ul_heldlockcnt *= 2; 2894 /* 2895 * Return the next available entry in the newly allocated array. 2896 */ 2897 *(lockptr += nlocks) = lock; 2898 return (lockptr); 2899 } 2900 2901 /* 2902 * Insert 'lock' into our list of held locks. 2903 * Currently only used for LOCK_ROBUST mutexes. 2904 */ 2905 void 2906 remember_lock(mutex_t *lock) 2907 { 2908 (void) find_lock_entry(lock); 2909 } 2910 2911 /* 2912 * Remove 'lock' from our list of held locks. 2913 * Currently only used for LOCK_ROBUST mutexes. 2914 */ 2915 void 2916 forget_lock(mutex_t *lock) 2917 { 2918 *find_lock_entry(lock) = NULL; 2919 } 2920 2921 /* 2922 * Free the array of held locks. 2923 */ 2924 void 2925 heldlock_free(ulwp_t *ulwp) 2926 { 2927 uint_t nlocks; 2928 2929 if ((nlocks = ulwp->ul_heldlockcnt) != 0) 2930 lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2931 ulwp->ul_heldlockcnt = 0; 2932 ulwp->ul_heldlocks.array = NULL; 2933 } 2934 2935 /* 2936 * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 2937 * Called from _thrp_exit() to deal with abandoned locks. 2938 */ 2939 void 2940 heldlock_exit(void) 2941 { 2942 ulwp_t *self = curthread; 2943 mutex_t **lockptr; 2944 uint_t nlocks; 2945 mutex_t *mp; 2946 2947 if ((nlocks = self->ul_heldlockcnt) != 0) 2948 lockptr = self->ul_heldlocks.array; 2949 else { 2950 nlocks = 1; 2951 lockptr = &self->ul_heldlocks.single; 2952 } 2953 2954 for (; nlocks; nlocks--, lockptr++) { 2955 /* 2956 * The kernel takes care of transitioning held 2957 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 2958 * We avoid that case here. 2959 */ 2960 if ((mp = *lockptr) != NULL && 2961 mutex_is_held(mp) && 2962 (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 2963 LOCK_ROBUST) { 2964 mp->mutex_rcount = 0; 2965 if (!(mp->mutex_flag & LOCK_UNMAPPED)) 2966 mp->mutex_flag |= LOCK_OWNERDEAD; 2967 (void) mutex_unlock_internal(mp, 1); 2968 } 2969 } 2970 2971 heldlock_free(self); 2972 } 2973 2974 #pragma weak cond_init = _cond_init 2975 /* ARGSUSED2 */ 2976 int 2977 _cond_init(cond_t *cvp, int type, void *arg) 2978 { 2979 if (type != USYNC_THREAD && type != USYNC_PROCESS) 2980 return (EINVAL); 2981 (void) _memset(cvp, 0, sizeof (*cvp)); 2982 cvp->cond_type = (uint16_t)type; 2983 cvp->cond_magic = COND_MAGIC; 2984 return (0); 2985 } 2986 2987 /* 2988 * cond_sleep_queue(): utility function for cond_wait_queue(). 2989 * 2990 * Go to sleep on a condvar sleep queue, expect to be waked up 2991 * by someone calling cond_signal() or cond_broadcast() or due 2992 * to receiving a UNIX signal or being cancelled, or just simply 2993 * due to a spurious wakeup (like someome calling forkall()). 2994 * 2995 * The associated mutex is *not* reacquired before returning. 2996 * That must be done by the caller of cond_sleep_queue(). 2997 */ 2998 static int 2999 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3000 { 3001 ulwp_t *self = curthread; 3002 queue_head_t *qp; 3003 queue_head_t *mqp; 3004 lwpid_t lwpid; 3005 int signalled; 3006 int error; 3007 int cv_wake; 3008 int release_all; 3009 3010 /* 3011 * Put ourself on the CV sleep queue, unlock the mutex, then 3012 * park ourself and unpark a candidate lwp to grab the mutex. 3013 * We must go onto the CV sleep queue before dropping the 3014 * mutex in order to guarantee atomicity of the operation. 3015 */ 3016 self->ul_sp = stkptr(); 3017 qp = queue_lock(cvp, CV); 3018 enqueue(qp, self, 0); 3019 cvp->cond_waiters_user = 1; 3020 self->ul_cvmutex = mp; 3021 self->ul_cv_wake = cv_wake = (tsp != NULL); 3022 self->ul_signalled = 0; 3023 if (mp->mutex_flag & LOCK_OWNERDEAD) { 3024 mp->mutex_flag &= ~LOCK_OWNERDEAD; 3025 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 3026 } 3027 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 3028 lwpid = mutex_unlock_queue(mp, release_all); 3029 for (;;) { 3030 set_parking_flag(self, 1); 3031 queue_unlock(qp); 3032 if (lwpid != 0) { 3033 lwpid = preempt_unpark(self, lwpid); 3034 preempt(self); 3035 } 3036 /* 3037 * We may have a deferred signal present, 3038 * in which case we should return EINTR. 3039 * Also, we may have received a SIGCANCEL; if so 3040 * and we are cancelable we should return EINTR. 3041 * We force an immediate EINTR return from 3042 * __lwp_park() by turning our parking flag off. 3043 */ 3044 if (self->ul_cursig != 0 || 3045 (self->ul_cancelable && self->ul_cancel_pending)) 3046 set_parking_flag(self, 0); 3047 /* 3048 * __lwp_park() will return the residual time in tsp 3049 * if we are unparked before the timeout expires. 3050 */ 3051 error = __lwp_park(tsp, lwpid); 3052 set_parking_flag(self, 0); 3053 lwpid = 0; /* unpark the other lwp only once */ 3054 /* 3055 * We were waked up by cond_signal(), cond_broadcast(), 3056 * by an interrupt or timeout (EINTR or ETIME), 3057 * or we may just have gotten a spurious wakeup. 3058 */ 3059 qp = queue_lock(cvp, CV); 3060 if (!cv_wake) 3061 mqp = queue_lock(mp, MX); 3062 if (self->ul_sleepq == NULL) 3063 break; 3064 /* 3065 * We are on either the condvar sleep queue or the 3066 * mutex sleep queue. Break out of the sleep if we 3067 * were interrupted or we timed out (EINTR or ETIME). 3068 * Else this is a spurious wakeup; continue the loop. 3069 */ 3070 if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */ 3071 if (error) { 3072 mp->mutex_waiters = dequeue_self(mqp); 3073 break; 3074 } 3075 tsp = NULL; /* no more timeout */ 3076 } else if (self->ul_sleepq == qp) { /* condvar queue */ 3077 if (error) { 3078 cvp->cond_waiters_user = dequeue_self(qp); 3079 break; 3080 } 3081 /* 3082 * Else a spurious wakeup on the condvar queue. 3083 * __lwp_park() has already adjusted the timeout. 3084 */ 3085 } else { 3086 thr_panic("cond_sleep_queue(): thread not on queue"); 3087 } 3088 if (!cv_wake) 3089 queue_unlock(mqp); 3090 } 3091 3092 self->ul_sp = 0; 3093 self->ul_cv_wake = 0; 3094 ASSERT(self->ul_cvmutex == NULL); 3095 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 3096 self->ul_wchan == NULL); 3097 3098 signalled = self->ul_signalled; 3099 self->ul_signalled = 0; 3100 queue_unlock(qp); 3101 if (!cv_wake) 3102 queue_unlock(mqp); 3103 3104 /* 3105 * If we were concurrently cond_signal()d and any of: 3106 * received a UNIX signal, were cancelled, or got a timeout, 3107 * then perform another cond_signal() to avoid consuming it. 3108 */ 3109 if (error && signalled) 3110 (void) cond_signal_internal(cvp); 3111 3112 return (error); 3113 } 3114 3115 int 3116 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3117 { 3118 ulwp_t *self = curthread; 3119 int error; 3120 int merror; 3121 3122 /* 3123 * The old thread library was programmed to defer signals 3124 * while in cond_wait() so that the associated mutex would 3125 * be guaranteed to be held when the application signal 3126 * handler was invoked. 3127 * 3128 * We do not behave this way by default; the state of the 3129 * associated mutex in the signal handler is undefined. 3130 * 3131 * To accommodate applications that depend on the old 3132 * behavior, the _THREAD_COND_WAIT_DEFER environment 3133 * variable can be set to 1 and we will behave in the 3134 * old way with respect to cond_wait(). 3135 */ 3136 if (self->ul_cond_wait_defer) 3137 sigoff(self); 3138 3139 error = cond_sleep_queue(cvp, mp, tsp); 3140 3141 /* 3142 * Reacquire the mutex. 3143 */ 3144 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3145 error = merror; 3146 3147 /* 3148 * Take any deferred signal now, after we have reacquired the mutex. 3149 */ 3150 if (self->ul_cond_wait_defer) 3151 sigon(self); 3152 3153 return (error); 3154 } 3155 3156 /* 3157 * cond_sleep_kernel(): utility function for cond_wait_kernel(). 3158 * See the comment ahead of cond_sleep_queue(), above. 3159 */ 3160 static int 3161 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3162 { 3163 int mtype = mp->mutex_type; 3164 ulwp_t *self = curthread; 3165 int error; 3166 3167 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 3168 _ceil_prio_waive(); 3169 3170 self->ul_sp = stkptr(); 3171 self->ul_wchan = cvp; 3172 mp->mutex_owner = 0; 3173 /* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */ 3174 if (mtype & LOCK_PRIO_INHERIT) { 3175 mp->mutex_lockw = LOCKCLEAR; 3176 self->ul_pilocks--; 3177 } 3178 /* 3179 * ___lwp_cond_wait() returns immediately with EINTR if 3180 * set_parking_flag(self,0) is called on this lwp before it 3181 * goes to sleep in the kernel. sigacthandler() calls this 3182 * when a deferred signal is noted. This assures that we don't 3183 * get stuck in ___lwp_cond_wait() with all signals blocked 3184 * due to taking a deferred signal before going to sleep. 3185 */ 3186 set_parking_flag(self, 1); 3187 if (self->ul_cursig != 0 || 3188 (self->ul_cancelable && self->ul_cancel_pending)) 3189 set_parking_flag(self, 0); 3190 error = ___lwp_cond_wait(cvp, mp, tsp, 1); 3191 set_parking_flag(self, 0); 3192 self->ul_sp = 0; 3193 self->ul_wchan = NULL; 3194 return (error); 3195 } 3196 3197 int 3198 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3199 { 3200 ulwp_t *self = curthread; 3201 int error; 3202 int merror; 3203 3204 /* 3205 * See the large comment in cond_wait_queue(), above. 3206 */ 3207 if (self->ul_cond_wait_defer) 3208 sigoff(self); 3209 3210 error = cond_sleep_kernel(cvp, mp, tsp); 3211 3212 /* 3213 * Override the return code from ___lwp_cond_wait() 3214 * with any non-zero return code from mutex_lock(). 3215 * This addresses robust lock failures in particular; 3216 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 3217 * errors in order to take corrective action. 3218 */ 3219 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3220 error = merror; 3221 3222 /* 3223 * Take any deferred signal now, after we have reacquired the mutex. 3224 */ 3225 if (self->ul_cond_wait_defer) 3226 sigon(self); 3227 3228 return (error); 3229 } 3230 3231 /* 3232 * Common code for _cond_wait() and _cond_timedwait() 3233 */ 3234 int 3235 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3236 { 3237 int mtype = mp->mutex_type; 3238 hrtime_t begin_sleep = 0; 3239 ulwp_t *self = curthread; 3240 uberdata_t *udp = self->ul_uberdata; 3241 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3242 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 3243 uint8_t rcount; 3244 int error = 0; 3245 3246 /* 3247 * The SUSV3 Posix spec for pthread_cond_timedwait() states: 3248 * Except in the case of [ETIMEDOUT], all these error checks 3249 * shall act as if they were performed immediately at the 3250 * beginning of processing for the function and shall cause 3251 * an error return, in effect, prior to modifying the state 3252 * of the mutex specified by mutex or the condition variable 3253 * specified by cond. 3254 * Therefore, we must return EINVAL now if the timout is invalid. 3255 */ 3256 if (tsp != NULL && 3257 (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 3258 return (EINVAL); 3259 3260 if (__td_event_report(self, TD_SLEEP, udp)) { 3261 self->ul_sp = stkptr(); 3262 self->ul_wchan = cvp; 3263 self->ul_td_evbuf.eventnum = TD_SLEEP; 3264 self->ul_td_evbuf.eventdata = cvp; 3265 tdb_event(TD_SLEEP, udp); 3266 self->ul_sp = 0; 3267 } 3268 if (csp) { 3269 if (tsp) 3270 tdb_incr(csp->cond_timedwait); 3271 else 3272 tdb_incr(csp->cond_wait); 3273 } 3274 if (msp) 3275 begin_sleep = record_hold_time(msp); 3276 else if (csp) 3277 begin_sleep = gethrtime(); 3278 3279 if (self->ul_error_detection) { 3280 if (!mutex_is_held(mp)) 3281 lock_error(mp, "cond_wait", cvp, NULL); 3282 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 3283 lock_error(mp, "recursive mutex in cond_wait", 3284 cvp, NULL); 3285 if (cvp->cond_type & USYNC_PROCESS) { 3286 if (!(mtype & USYNC_PROCESS)) 3287 lock_error(mp, "cond_wait", cvp, 3288 "condvar process-shared, " 3289 "mutex process-private"); 3290 } else { 3291 if (mtype & USYNC_PROCESS) 3292 lock_error(mp, "cond_wait", cvp, 3293 "condvar process-private, " 3294 "mutex process-shared"); 3295 } 3296 } 3297 3298 /* 3299 * We deal with recursive mutexes by completely 3300 * dropping the lock and restoring the recursion 3301 * count after waking up. This is arguably wrong, 3302 * but it obeys the principle of least astonishment. 3303 */ 3304 rcount = mp->mutex_rcount; 3305 mp->mutex_rcount = 0; 3306 if ((mtype & 3307 (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 3308 (cvp->cond_type & USYNC_PROCESS)) 3309 error = cond_wait_kernel(cvp, mp, tsp); 3310 else 3311 error = cond_wait_queue(cvp, mp, tsp); 3312 mp->mutex_rcount = rcount; 3313 3314 if (csp) { 3315 hrtime_t lapse = gethrtime() - begin_sleep; 3316 if (tsp == NULL) 3317 csp->cond_wait_sleep_time += lapse; 3318 else { 3319 csp->cond_timedwait_sleep_time += lapse; 3320 if (error == ETIME) 3321 tdb_incr(csp->cond_timedwait_timeout); 3322 } 3323 } 3324 return (error); 3325 } 3326 3327 /* 3328 * cond_wait() and _cond_wait() are cancellation points but __cond_wait() 3329 * is not. Internally, libc calls the non-cancellation version. 3330 * Other libraries need to use pthread_setcancelstate(), as appropriate, 3331 * since __cond_wait() is not exported from libc. 3332 */ 3333 int 3334 __cond_wait(cond_t *cvp, mutex_t *mp) 3335 { 3336 ulwp_t *self = curthread; 3337 uberdata_t *udp = self->ul_uberdata; 3338 uberflags_t *gflags; 3339 3340 /* 3341 * Optimize the common case of USYNC_THREAD plus 3342 * no error detection, no lock statistics, and no event tracing. 3343 */ 3344 if ((gflags = self->ul_schedctl_called) != NULL && 3345 (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 3346 self->ul_td_events_enable | 3347 udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3348 return (cond_wait_queue(cvp, mp, NULL)); 3349 3350 /* 3351 * Else do it the long way. 3352 */ 3353 return (cond_wait_common(cvp, mp, NULL)); 3354 } 3355 3356 #pragma weak cond_wait = _cond_wait 3357 int 3358 _cond_wait(cond_t *cvp, mutex_t *mp) 3359 { 3360 int error; 3361 3362 _cancelon(); 3363 error = __cond_wait(cvp, mp); 3364 if (error == EINTR) 3365 _canceloff(); 3366 else 3367 _canceloff_nocancel(); 3368 return (error); 3369 } 3370 3371 /* 3372 * pthread_cond_wait() is a cancellation point. 3373 */ 3374 #pragma weak pthread_cond_wait = _pthread_cond_wait 3375 int 3376 _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 3377 { 3378 int error; 3379 3380 error = _cond_wait(cvp, mp); 3381 return ((error == EINTR)? 0 : error); 3382 } 3383 3384 /* 3385 * cond_timedwait() and _cond_timedwait() are cancellation points 3386 * but __cond_timedwait() is not. 3387 */ 3388 int 3389 __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3390 { 3391 clockid_t clock_id = cvp->cond_clockid; 3392 timespec_t reltime; 3393 int error; 3394 3395 if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 3396 clock_id = CLOCK_REALTIME; 3397 abstime_to_reltime(clock_id, abstime, &reltime); 3398 error = cond_wait_common(cvp, mp, &reltime); 3399 if (error == ETIME && clock_id == CLOCK_HIGHRES) { 3400 /* 3401 * Don't return ETIME if we didn't really get a timeout. 3402 * This can happen if we return because someone resets 3403 * the system clock. Just return zero in this case, 3404 * giving a spurious wakeup but not a timeout. 3405 */ 3406 if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 3407 abstime->tv_nsec > gethrtime()) 3408 error = 0; 3409 } 3410 return (error); 3411 } 3412 3413 #pragma weak cond_timedwait = _cond_timedwait 3414 int 3415 _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3416 { 3417 int error; 3418 3419 _cancelon(); 3420 error = __cond_timedwait(cvp, mp, abstime); 3421 if (error == EINTR) 3422 _canceloff(); 3423 else 3424 _canceloff_nocancel(); 3425 return (error); 3426 } 3427 3428 /* 3429 * pthread_cond_timedwait() is a cancellation point. 3430 */ 3431 #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 3432 int 3433 _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3434 { 3435 int error; 3436 3437 error = _cond_timedwait(cvp, mp, abstime); 3438 if (error == ETIME) 3439 error = ETIMEDOUT; 3440 else if (error == EINTR) 3441 error = 0; 3442 return (error); 3443 } 3444 3445 /* 3446 * cond_reltimedwait() and _cond_reltimedwait() are cancellation points 3447 * but __cond_reltimedwait() is not. 3448 */ 3449 int 3450 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3451 { 3452 timespec_t tslocal = *reltime; 3453 3454 return (cond_wait_common(cvp, mp, &tslocal)); 3455 } 3456 3457 #pragma weak cond_reltimedwait = _cond_reltimedwait 3458 int 3459 _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3460 { 3461 int error; 3462 3463 _cancelon(); 3464 error = __cond_reltimedwait(cvp, mp, reltime); 3465 if (error == EINTR) 3466 _canceloff(); 3467 else 3468 _canceloff_nocancel(); 3469 return (error); 3470 } 3471 3472 #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 3473 int 3474 _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 3475 const timespec_t *reltime) 3476 { 3477 int error; 3478 3479 error = _cond_reltimedwait(cvp, mp, reltime); 3480 if (error == ETIME) 3481 error = ETIMEDOUT; 3482 else if (error == EINTR) 3483 error = 0; 3484 return (error); 3485 } 3486 3487 #pragma weak pthread_cond_signal = cond_signal_internal 3488 #pragma weak _pthread_cond_signal = cond_signal_internal 3489 #pragma weak cond_signal = cond_signal_internal 3490 #pragma weak _cond_signal = cond_signal_internal 3491 int 3492 cond_signal_internal(cond_t *cvp) 3493 { 3494 ulwp_t *self = curthread; 3495 uberdata_t *udp = self->ul_uberdata; 3496 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3497 int error = 0; 3498 int more; 3499 lwpid_t lwpid; 3500 queue_head_t *qp; 3501 mutex_t *mp; 3502 queue_head_t *mqp; 3503 ulwp_t **ulwpp; 3504 ulwp_t *ulwp; 3505 ulwp_t *prev; 3506 3507 if (csp) 3508 tdb_incr(csp->cond_signal); 3509 3510 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3511 error = __lwp_cond_signal(cvp); 3512 3513 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3514 return (error); 3515 3516 /* 3517 * Move someone from the condvar sleep queue to the mutex sleep 3518 * queue for the mutex that he will acquire on being waked up. 3519 * We can do this only if we own the mutex he will acquire. 3520 * If we do not own the mutex, or if his ul_cv_wake flag 3521 * is set, just dequeue and unpark him. 3522 */ 3523 qp = queue_lock(cvp, CV); 3524 ulwpp = queue_slot(qp, &prev, &more); 3525 cvp->cond_waiters_user = more; 3526 if (ulwpp == NULL) { /* no one on the sleep queue */ 3527 queue_unlock(qp); 3528 return (error); 3529 } 3530 ulwp = *ulwpp; 3531 3532 /* 3533 * Inform the thread that he was the recipient of a cond_signal(). 3534 * This lets him deal with cond_signal() and, concurrently, 3535 * one or more of a cancellation, a UNIX signal, or a timeout. 3536 * These latter conditions must not consume a cond_signal(). 3537 */ 3538 ulwp->ul_signalled = 1; 3539 3540 /* 3541 * Dequeue the waiter but leave his ul_sleepq non-NULL 3542 * while we move him to the mutex queue so that he can 3543 * deal properly with spurious wakeups. 3544 */ 3545 queue_unlink(qp, ulwpp, prev); 3546 3547 mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 3548 ulwp->ul_cvmutex = NULL; 3549 ASSERT(mp != NULL); 3550 3551 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3552 /* just wake him up */ 3553 lwpid = ulwp->ul_lwpid; 3554 no_preempt(self); 3555 ulwp->ul_sleepq = NULL; 3556 ulwp->ul_wchan = NULL; 3557 queue_unlock(qp); 3558 (void) __lwp_unpark(lwpid); 3559 preempt(self); 3560 } else { 3561 /* move him to the mutex queue */ 3562 mqp = queue_lock(mp, MX); 3563 enqueue(mqp, ulwp, 0); 3564 mp->mutex_waiters = 1; 3565 queue_unlock(mqp); 3566 queue_unlock(qp); 3567 } 3568 3569 return (error); 3570 } 3571 3572 /* 3573 * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3574 * and rw_queue_release() to (re)allocate a big buffer to hold the 3575 * lwpids of all the threads to be set running after they are removed 3576 * from their sleep queues. Since we are holding a queue lock, we 3577 * cannot call any function that might acquire a lock. mmap(), munmap(), 3578 * lwp_unpark_all() are simple system calls and are safe in this regard. 3579 */ 3580 lwpid_t * 3581 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 3582 { 3583 /* 3584 * Allocate NEWLWPS ids on the first overflow. 3585 * Double the allocation each time after that. 3586 */ 3587 int nlwpid = *nlwpid_ptr; 3588 int maxlwps = *maxlwps_ptr; 3589 int first_allocation; 3590 int newlwps; 3591 void *vaddr; 3592 3593 ASSERT(nlwpid == maxlwps); 3594 3595 first_allocation = (maxlwps == MAXLWPS); 3596 newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 3597 vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t), 3598 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 3599 3600 if (vaddr == MAP_FAILED) { 3601 /* 3602 * Let's hope this never happens. 3603 * If it does, then we have a terrible 3604 * thundering herd on our hands. 3605 */ 3606 (void) __lwp_unpark_all(lwpid, nlwpid); 3607 *nlwpid_ptr = 0; 3608 } else { 3609 (void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 3610 if (!first_allocation) 3611 (void) _private_munmap(lwpid, 3612 maxlwps * sizeof (lwpid_t)); 3613 lwpid = vaddr; 3614 *maxlwps_ptr = newlwps; 3615 } 3616 3617 return (lwpid); 3618 } 3619 3620 #pragma weak pthread_cond_broadcast = cond_broadcast_internal 3621 #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 3622 #pragma weak cond_broadcast = cond_broadcast_internal 3623 #pragma weak _cond_broadcast = cond_broadcast_internal 3624 int 3625 cond_broadcast_internal(cond_t *cvp) 3626 { 3627 ulwp_t *self = curthread; 3628 uberdata_t *udp = self->ul_uberdata; 3629 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3630 int error = 0; 3631 queue_head_t *qp; 3632 queue_root_t *qrp; 3633 mutex_t *mp; 3634 mutex_t *mp_cache = NULL; 3635 queue_head_t *mqp = NULL; 3636 ulwp_t *ulwp; 3637 int nlwpid = 0; 3638 int maxlwps = MAXLWPS; 3639 lwpid_t buffer[MAXLWPS]; 3640 lwpid_t *lwpid = buffer; 3641 3642 if (csp) 3643 tdb_incr(csp->cond_broadcast); 3644 3645 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3646 error = __lwp_cond_broadcast(cvp); 3647 3648 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3649 return (error); 3650 3651 /* 3652 * Move everyone from the condvar sleep queue to the mutex sleep 3653 * queue for the mutex that they will acquire on being waked up. 3654 * We can do this only if we own the mutex they will acquire. 3655 * If we do not own the mutex, or if their ul_cv_wake flag 3656 * is set, just dequeue and unpark them. 3657 * 3658 * We keep track of lwpids that are to be unparked in lwpid[]. 3659 * __lwp_unpark_all() is called to unpark all of them after 3660 * they have been removed from the sleep queue and the sleep 3661 * queue lock has been dropped. If we run out of space in our 3662 * on-stack buffer, we need to allocate more but we can't call 3663 * lmalloc() because we are holding a queue lock when the overflow 3664 * occurs and lmalloc() acquires a lock. We can't use alloca() 3665 * either because the application may have allocated a small 3666 * stack and we don't want to overrun the stack. So we call 3667 * alloc_lwpids() to allocate a bigger buffer using the mmap() 3668 * system call directly since that path acquires no locks. 3669 */ 3670 qp = queue_lock(cvp, CV); 3671 cvp->cond_waiters_user = 0; 3672 for (;;) { 3673 if ((qrp = qp->qh_root) == NULL || 3674 (ulwp = qrp->qr_head) == NULL) 3675 break; 3676 ASSERT(ulwp->ul_wchan == cvp); 3677 queue_unlink(qp, &qrp->qr_head, NULL); 3678 mp = ulwp->ul_cvmutex; /* his mutex */ 3679 ulwp->ul_cvmutex = NULL; 3680 ASSERT(mp != NULL); 3681 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3682 /* just wake him up */ 3683 ulwp->ul_sleepq = NULL; 3684 ulwp->ul_wchan = NULL; 3685 if (nlwpid == maxlwps) 3686 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 3687 lwpid[nlwpid++] = ulwp->ul_lwpid; 3688 } else { 3689 /* move him to the mutex queue */ 3690 if (mp != mp_cache) { 3691 mp_cache = mp; 3692 if (mqp != NULL) 3693 queue_unlock(mqp); 3694 mqp = queue_lock(mp, MX); 3695 } 3696 enqueue(mqp, ulwp, 0); 3697 mp->mutex_waiters = 1; 3698 } 3699 } 3700 if (mqp != NULL) 3701 queue_unlock(mqp); 3702 if (nlwpid == 0) { 3703 queue_unlock(qp); 3704 } else { 3705 no_preempt(self); 3706 queue_unlock(qp); 3707 if (nlwpid == 1) 3708 (void) __lwp_unpark(lwpid[0]); 3709 else 3710 (void) __lwp_unpark_all(lwpid, nlwpid); 3711 preempt(self); 3712 } 3713 if (lwpid != buffer) 3714 (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 3715 return (error); 3716 } 3717 3718 #pragma weak pthread_cond_destroy = _cond_destroy 3719 #pragma weak _pthread_cond_destroy = _cond_destroy 3720 #pragma weak cond_destroy = _cond_destroy 3721 int 3722 _cond_destroy(cond_t *cvp) 3723 { 3724 cvp->cond_magic = 0; 3725 tdb_sync_obj_deregister(cvp); 3726 return (0); 3727 } 3728 3729 #if defined(THREAD_DEBUG) 3730 void 3731 assert_no_libc_locks_held(void) 3732 { 3733 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 3734 } 3735 3736 /* protected by link_lock */ 3737 uint64_t spin_lock_spin; 3738 uint64_t spin_lock_spin2; 3739 uint64_t spin_lock_sleep; 3740 uint64_t spin_lock_wakeup; 3741 3742 /* 3743 * Record spin lock statistics. 3744 * Called by a thread exiting itself in thrp_exit(). 3745 * Also called via atexit() from the thread calling 3746 * exit() to do all the other threads as well. 3747 */ 3748 void 3749 record_spin_locks(ulwp_t *ulwp) 3750 { 3751 spin_lock_spin += ulwp->ul_spin_lock_spin; 3752 spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 3753 spin_lock_sleep += ulwp->ul_spin_lock_sleep; 3754 spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 3755 ulwp->ul_spin_lock_spin = 0; 3756 ulwp->ul_spin_lock_spin2 = 0; 3757 ulwp->ul_spin_lock_sleep = 0; 3758 ulwp->ul_spin_lock_wakeup = 0; 3759 } 3760 3761 /* 3762 * atexit function: dump the queue statistics to stderr. 3763 */ 3764 #if !defined(__lint) 3765 #define fprintf _fprintf 3766 #endif 3767 #include <stdio.h> 3768 void 3769 dump_queue_statistics(void) 3770 { 3771 uberdata_t *udp = curthread->ul_uberdata; 3772 queue_head_t *qp; 3773 int qn; 3774 uint64_t spin_lock_total = 0; 3775 3776 if (udp->queue_head == NULL || thread_queue_dump == 0) 3777 return; 3778 3779 if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 3780 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 3781 return; 3782 for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 3783 if (qp->qh_lockcount == 0) 3784 continue; 3785 spin_lock_total += qp->qh_lockcount; 3786 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3787 (u_longlong_t)qp->qh_lockcount, 3788 qp->qh_qmax, qp->qh_hmax) < 0) 3789 return; 3790 } 3791 3792 if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 3793 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 3794 return; 3795 for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 3796 if (qp->qh_lockcount == 0) 3797 continue; 3798 spin_lock_total += qp->qh_lockcount; 3799 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3800 (u_longlong_t)qp->qh_lockcount, 3801 qp->qh_qmax, qp->qh_hmax) < 0) 3802 return; 3803 } 3804 3805 (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 3806 (u_longlong_t)spin_lock_total); 3807 (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 3808 (u_longlong_t)spin_lock_spin); 3809 (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 3810 (u_longlong_t)spin_lock_spin2); 3811 (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 3812 (u_longlong_t)spin_lock_sleep); 3813 (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 3814 (u_longlong_t)spin_lock_wakeup); 3815 } 3816 #endif 3817