1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2015, Joyent, Inc. 26 * Copyright (c) 2016 by Delphix. All rights reserved. 27 * Copyright 2024 Oxide Computer Company 28 */ 29 30 #include "lint.h" 31 #include "thr_uberdata.h" 32 #include <sys/rtpriocntl.h> 33 #include <sys/sdt.h> 34 #include <atomic.h> 35 36 #if defined(DEBUG) 37 #define INCR32(x) (((x) != UINT32_MAX)? (x)++ : 0) 38 #define INCR(x) ((x)++) 39 #define DECR(x) ((x)--) 40 #define MAXINCR(m, x) ((m < ++x)? (m = x) : 0) 41 #else 42 #define INCR32(x) 43 #define INCR(x) 44 #define DECR(x) 45 #define MAXINCR(m, x) 46 #endif 47 48 /* 49 * This mutex is initialized to be held by lwp#1. 50 * It is used to block a thread that has returned from a mutex_lock() 51 * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 52 */ 53 mutex_t stall_mutex = DEFAULTMUTEX; 54 55 static int shared_mutex_held(mutex_t *); 56 static int mutex_queuelock_adaptive(mutex_t *); 57 static void mutex_wakeup_all(mutex_t *); 58 59 /* 60 * Lock statistics support functions. 61 */ 62 void 63 record_begin_hold(tdb_mutex_stats_t *msp) 64 { 65 tdb_incr(msp->mutex_lock); 66 msp->mutex_begin_hold = gethrtime(); 67 } 68 69 hrtime_t 70 record_hold_time(tdb_mutex_stats_t *msp) 71 { 72 hrtime_t now = gethrtime(); 73 74 if (msp->mutex_begin_hold) 75 msp->mutex_hold_time += now - msp->mutex_begin_hold; 76 msp->mutex_begin_hold = 0; 77 return (now); 78 } 79 80 /* 81 * Called once at library initialization. 82 */ 83 void 84 mutex_setup(void) 85 { 86 if (set_lock_byte(&stall_mutex.mutex_lockw)) 87 thr_panic("mutex_setup() cannot acquire stall_mutex"); 88 stall_mutex.mutex_owner = (uintptr_t)curthread; 89 } 90 91 /* 92 * The default spin count of 1000 is experimentally determined. 93 * On sun4u machines with any number of processors it could be raised 94 * to 10,000 but that (experimentally) makes almost no difference. 95 * The environment variable: 96 * _THREAD_ADAPTIVE_SPIN=count 97 * can be used to override and set the count in the range [0 .. 1,000,000]. 98 */ 99 int thread_adaptive_spin = 1000; 100 uint_t thread_max_spinners = 100; 101 int thread_queue_verify = 0; 102 static int ncpus; 103 104 /* 105 * Distinguish spinning for queue locks from spinning for regular locks. 106 * We try harder to acquire queue locks by spinning. 107 * The environment variable: 108 * _THREAD_QUEUE_SPIN=count 109 * can be used to override and set the count in the range [0 .. 1,000,000]. 110 */ 111 int thread_queue_spin = 10000; 112 113 #define ALL_ATTRIBUTES \ 114 (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 115 LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 116 LOCK_ROBUST) 117 118 /* 119 * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 120 * augmented by zero or more the flags: 121 * LOCK_RECURSIVE 122 * LOCK_ERRORCHECK 123 * LOCK_PRIO_INHERIT 124 * LOCK_PRIO_PROTECT 125 * LOCK_ROBUST 126 */ 127 #pragma weak _mutex_init = mutex_init 128 /* ARGSUSED2 */ 129 int 130 mutex_init(mutex_t *mp, int type, void *arg) 131 { 132 int basetype = (type & ~ALL_ATTRIBUTES); 133 const pcclass_t *pccp; 134 int error = 0; 135 int ceil; 136 137 if (basetype == USYNC_PROCESS_ROBUST) { 138 /* 139 * USYNC_PROCESS_ROBUST is a deprecated historical type. 140 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 141 * retain the USYNC_PROCESS_ROBUST flag so we can return 142 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 143 * mutexes will ever draw ELOCKUNMAPPED). 144 */ 145 type |= (USYNC_PROCESS | LOCK_ROBUST); 146 basetype = USYNC_PROCESS; 147 } 148 149 if (type & LOCK_PRIO_PROTECT) 150 pccp = get_info_by_policy(SCHED_FIFO); 151 if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) || 152 (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 153 == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) || 154 ((type & LOCK_PRIO_PROTECT) && 155 ((ceil = *(int *)arg) < pccp->pcc_primin || 156 ceil > pccp->pcc_primax))) { 157 error = EINVAL; 158 } else if (type & LOCK_ROBUST) { 159 /* 160 * Callers of mutex_init() with the LOCK_ROBUST attribute 161 * are required to pass an initially all-zero mutex. 162 * Multiple calls to mutex_init() are allowed; all but 163 * the first return EBUSY. A call to mutex_init() is 164 * allowed to make an inconsistent robust lock consistent 165 * (for historical usage, even though the proper interface 166 * for this is mutex_consistent()). Note that we use 167 * atomic_or_16() to set the LOCK_INITED flag so as 168 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 169 */ 170 if (!(mp->mutex_flag & LOCK_INITED)) { 171 mp->mutex_type = (uint8_t)type; 172 atomic_or_16(&mp->mutex_flag, LOCK_INITED); 173 mp->mutex_magic = MUTEX_MAGIC; 174 } else if (type != mp->mutex_type || 175 ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) { 176 error = EINVAL; 177 } else if (mutex_consistent(mp) != 0) { 178 error = EBUSY; 179 } 180 /* register a process robust mutex with the kernel */ 181 if (basetype == USYNC_PROCESS) 182 register_lock(mp); 183 } else { 184 (void) memset(mp, 0, sizeof (*mp)); 185 mp->mutex_type = (uint8_t)type; 186 mp->mutex_flag = LOCK_INITED; 187 mp->mutex_magic = MUTEX_MAGIC; 188 } 189 190 if (error == 0 && (type & LOCK_PRIO_PROTECT)) { 191 mp->mutex_ceiling = ceil; 192 } 193 194 /* 195 * This should be at the beginning of the function, 196 * but for the sake of old broken applications that 197 * do not have proper alignment for their mutexes 198 * (and don't check the return code from mutex_init), 199 * we put it here, after initializing the mutex regardless. 200 */ 201 if (error == 0 && 202 ((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 203 curthread->ul_misaligned == 0) 204 error = EINVAL; 205 206 return (error); 207 } 208 209 /* 210 * Delete mp from list of ceiling mutexes owned by curthread. 211 * Return 1 if the head of the chain was updated. 212 */ 213 int 214 _ceil_mylist_del(mutex_t *mp) 215 { 216 ulwp_t *self = curthread; 217 mxchain_t **mcpp; 218 mxchain_t *mcp; 219 220 for (mcpp = &self->ul_mxchain; 221 (mcp = *mcpp) != NULL; 222 mcpp = &mcp->mxchain_next) { 223 if (mcp->mxchain_mx == mp) { 224 *mcpp = mcp->mxchain_next; 225 lfree(mcp, sizeof (*mcp)); 226 return (mcpp == &self->ul_mxchain); 227 } 228 } 229 return (0); 230 } 231 232 /* 233 * Add mp to the list of ceiling mutexes owned by curthread. 234 * Return ENOMEM if no memory could be allocated. 235 */ 236 int 237 _ceil_mylist_add(mutex_t *mp) 238 { 239 ulwp_t *self = curthread; 240 mxchain_t *mcp; 241 242 if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 243 return (ENOMEM); 244 mcp->mxchain_mx = mp; 245 mcp->mxchain_next = self->ul_mxchain; 246 self->ul_mxchain = mcp; 247 return (0); 248 } 249 250 /* 251 * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below. 252 */ 253 static void 254 set_rt_priority(ulwp_t *self, int prio) 255 { 256 pcparms_t pcparm; 257 258 pcparm.pc_cid = self->ul_rtclassid; 259 ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; 260 ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; 261 (void) priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm); 262 } 263 264 /* 265 * Inherit priority from ceiling. 266 * This changes the effective priority, not the assigned priority. 267 */ 268 void 269 _ceil_prio_inherit(int prio) 270 { 271 ulwp_t *self = curthread; 272 273 self->ul_epri = prio; 274 set_rt_priority(self, prio); 275 } 276 277 /* 278 * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 279 * if holding at least one ceiling lock. If no ceiling locks are held at this 280 * point, disinherit completely, reverting back to assigned priority. 281 */ 282 void 283 _ceil_prio_waive(void) 284 { 285 ulwp_t *self = curthread; 286 mxchain_t *mcp = self->ul_mxchain; 287 int prio; 288 289 if (mcp == NULL) { 290 prio = self->ul_pri; 291 self->ul_epri = 0; 292 } else { 293 prio = mcp->mxchain_mx->mutex_ceiling; 294 self->ul_epri = prio; 295 } 296 set_rt_priority(self, prio); 297 } 298 299 /* 300 * Clear the lock byte. Retain the waiters byte and the spinners byte. 301 * Return the old value of the lock word. 302 */ 303 static uint32_t 304 clear_lockbyte(volatile uint32_t *lockword) 305 { 306 uint32_t old; 307 uint32_t new; 308 309 do { 310 old = *lockword; 311 new = old & ~LOCKMASK; 312 } while (atomic_cas_32(lockword, old, new) != old); 313 314 return (old); 315 } 316 317 /* 318 * Same as clear_lockbyte(), but operates on mutex_lockword64. 319 * The mutex_ownerpid field is cleared along with the lock byte. 320 */ 321 static uint64_t 322 clear_lockbyte64(volatile uint64_t *lockword64) 323 { 324 uint64_t old; 325 uint64_t new; 326 327 do { 328 old = *lockword64; 329 new = old & ~LOCKMASK64; 330 } while (atomic_cas_64(lockword64, old, new) != old); 331 332 return (old); 333 } 334 335 /* 336 * Similar to set_lock_byte(), which only tries to set the lock byte. 337 * Here, we attempt to set the lock byte AND the mutex_ownerpid, keeping 338 * the remaining bytes constant. This atomic operation is required for the 339 * correctness of process-shared robust locks, otherwise there would be 340 * a window or vulnerability in which the lock byte had been set but the 341 * mutex_ownerpid had not yet been set. If the process were to die in 342 * this window of vulnerability (due to some other thread calling exit() 343 * or the process receiving a fatal signal), the mutex would be left locked 344 * but without a process-ID to determine which process was holding the lock. 345 * The kernel would then be unable to mark the robust mutex as LOCK_OWNERDEAD 346 * when the process died. For all other cases of process-shared locks, this 347 * operation is just a convenience, for the sake of common code. 348 * 349 * This operation requires process-shared robust locks to be properly 350 * aligned on an 8-byte boundary, at least on sparc machines, lest the 351 * operation incur an alignment fault. This is automatic when locks 352 * are declared properly using the mutex_t or pthread_mutex_t data types 353 * and the application does not allocate dynamic memory on less than an 354 * 8-byte boundary. See the 'horrible hack' comments below for cases 355 * dealing with such broken applications. 356 */ 357 static int 358 set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid) 359 { 360 uint64_t old; 361 uint64_t new; 362 363 old = *lockword64 & ~LOCKMASK64; 364 new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64; 365 if (atomic_cas_64(lockword64, old, new) == old) 366 return (LOCKCLEAR); 367 368 return (LOCKSET); 369 } 370 371 /* 372 * Increment the spinners count in the mutex lock word. 373 * Return 0 on success. Return -1 if the count would overflow. 374 */ 375 static int 376 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 377 { 378 uint32_t old; 379 uint32_t new; 380 381 do { 382 old = *lockword; 383 if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 384 return (-1); 385 new = old + (1 << SPINNERSHIFT); 386 } while (atomic_cas_32(lockword, old, new) != old); 387 388 return (0); 389 } 390 391 /* 392 * Decrement the spinners count in the mutex lock word. 393 * Return the new value of the lock word. 394 */ 395 static uint32_t 396 spinners_decr(volatile uint32_t *lockword) 397 { 398 uint32_t old; 399 uint32_t new; 400 401 do { 402 new = old = *lockword; 403 if (new & SPINNERMASK) 404 new -= (1 << SPINNERSHIFT); 405 } while (atomic_cas_32(lockword, old, new) != old); 406 407 return (new); 408 } 409 410 /* 411 * Non-preemptive spin locks. Used by queue_lock(). 412 * No lock statistics are gathered for these locks. 413 * No DTrace probes are provided for these locks. 414 */ 415 void 416 spin_lock_set(mutex_t *mp) 417 { 418 ulwp_t *self = curthread; 419 420 no_preempt(self); 421 if (set_lock_byte(&mp->mutex_lockw) == 0) { 422 mp->mutex_owner = (uintptr_t)self; 423 return; 424 } 425 /* 426 * Spin for a while, attempting to acquire the lock. 427 */ 428 INCR32(self->ul_spin_lock_spin); 429 if (mutex_queuelock_adaptive(mp) == 0 || 430 set_lock_byte(&mp->mutex_lockw) == 0) { 431 mp->mutex_owner = (uintptr_t)self; 432 return; 433 } 434 /* 435 * Try harder if we were previously at a no premption level. 436 */ 437 if (self->ul_preempt > 1) { 438 INCR32(self->ul_spin_lock_spin2); 439 if (mutex_queuelock_adaptive(mp) == 0 || 440 set_lock_byte(&mp->mutex_lockw) == 0) { 441 mp->mutex_owner = (uintptr_t)self; 442 return; 443 } 444 } 445 /* 446 * Give up and block in the kernel for the mutex. 447 */ 448 INCR32(self->ul_spin_lock_sleep); 449 (void) ___lwp_mutex_timedlock(mp, NULL, self); 450 } 451 452 void 453 spin_lock_clear(mutex_t *mp) 454 { 455 ulwp_t *self = curthread; 456 457 mp->mutex_owner = 0; 458 if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 459 (void) ___lwp_mutex_wakeup(mp, 0); 460 INCR32(self->ul_spin_lock_wakeup); 461 } 462 preempt(self); 463 } 464 465 /* 466 * Allocate the sleep queue hash table. 467 */ 468 void 469 queue_alloc(void) 470 { 471 ulwp_t *self = curthread; 472 uberdata_t *udp = self->ul_uberdata; 473 queue_head_t *qp; 474 void *data; 475 int i; 476 477 /* 478 * No locks are needed; we call here only when single-threaded. 479 */ 480 ASSERT(self == udp->ulwp_one); 481 ASSERT(!udp->uberflags.uf_mt); 482 if ((data = mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 483 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 484 == MAP_FAILED) 485 thr_panic("cannot allocate thread queue_head table"); 486 udp->queue_head = qp = (queue_head_t *)data; 487 for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { 488 qp->qh_type = (i < QHASHSIZE)? MX : CV; 489 qp->qh_lock.mutex_flag = LOCK_INITED; 490 qp->qh_lock.mutex_magic = MUTEX_MAGIC; 491 qp->qh_hlist = &qp->qh_def_root; 492 #if defined(DEBUG) 493 qp->qh_hlen = 1; 494 qp->qh_hmax = 1; 495 #endif 496 } 497 } 498 499 #if defined(DEBUG) 500 501 /* 502 * Debugging: verify correctness of a sleep queue. 503 */ 504 void 505 QVERIFY(queue_head_t *qp) 506 { 507 ulwp_t *self = curthread; 508 uberdata_t *udp = self->ul_uberdata; 509 queue_root_t *qrp; 510 ulwp_t *ulwp; 511 ulwp_t *prev; 512 uint_t index; 513 uint32_t cnt; 514 char qtype; 515 void *wchan; 516 517 ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 518 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 519 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 520 cnt++; 521 ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) || 522 (qrp->qr_head == NULL && qrp->qr_tail == NULL)); 523 } 524 ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt); 525 qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 526 ASSERT(qp->qh_type == qtype); 527 if (!thread_queue_verify) 528 return; 529 /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 530 for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 531 for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL; 532 prev = ulwp, ulwp = ulwp->ul_link) { 533 cnt++; 534 if (ulwp->ul_writer) 535 ASSERT(prev == NULL || prev->ul_writer); 536 ASSERT(ulwp->ul_qtype == qtype); 537 ASSERT(ulwp->ul_wchan != NULL); 538 ASSERT(ulwp->ul_sleepq == qp); 539 wchan = ulwp->ul_wchan; 540 ASSERT(qrp->qr_wchan == wchan); 541 index = QUEUE_HASH(wchan, qtype); 542 ASSERT(&udp->queue_head[index] == qp); 543 } 544 ASSERT(qrp->qr_tail == prev); 545 } 546 ASSERT(qp->qh_qlen == cnt); 547 } 548 549 #else /* DEBUG */ 550 551 #define QVERIFY(qp) 552 553 #endif /* DEBUG */ 554 555 /* 556 * Acquire a queue head. 557 */ 558 queue_head_t * 559 queue_lock(void *wchan, int qtype) 560 { 561 uberdata_t *udp = curthread->ul_uberdata; 562 queue_head_t *qp; 563 queue_root_t *qrp; 564 565 ASSERT(qtype == MX || qtype == CV); 566 567 /* 568 * It is possible that we could be called while still single-threaded. 569 * If so, we call queue_alloc() to allocate the queue_head[] array. 570 */ 571 if ((qp = udp->queue_head) == NULL) { 572 queue_alloc(); 573 qp = udp->queue_head; 574 } 575 qp += QUEUE_HASH(wchan, qtype); 576 spin_lock_set(&qp->qh_lock); 577 for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) 578 if (qrp->qr_wchan == wchan) 579 break; 580 if (qrp == NULL && qp->qh_def_root.qr_head == NULL) { 581 /* the default queue root is available; use it */ 582 qrp = &qp->qh_def_root; 583 qrp->qr_wchan = wchan; 584 ASSERT(qrp->qr_next == NULL); 585 ASSERT(qrp->qr_tail == NULL && 586 qrp->qr_rtcount == 0 && qrp->qr_qlen == 0); 587 } 588 qp->qh_wchan = wchan; /* valid until queue_unlock() is called */ 589 qp->qh_root = qrp; /* valid until queue_unlock() is called */ 590 INCR32(qp->qh_lockcount); 591 QVERIFY(qp); 592 return (qp); 593 } 594 595 /* 596 * Release a queue head. 597 */ 598 void 599 queue_unlock(queue_head_t *qp) 600 { 601 QVERIFY(qp); 602 spin_lock_clear(&qp->qh_lock); 603 } 604 605 /* 606 * For rwlock queueing, we must queue writers ahead of readers of the 607 * same priority. We do this by making writers appear to have a half 608 * point higher priority for purposes of priority comparisons below. 609 */ 610 #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 611 612 void 613 enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo) 614 { 615 queue_root_t *qrp; 616 ulwp_t **ulwpp; 617 ulwp_t *next; 618 int pri = CMP_PRIO(ulwp); 619 620 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 621 ASSERT(ulwp->ul_sleepq != qp); 622 623 if ((qrp = qp->qh_root) == NULL) { 624 /* use the thread's queue root for the linkage */ 625 qrp = &ulwp->ul_queue_root; 626 qrp->qr_next = qp->qh_hlist; 627 qrp->qr_prev = NULL; 628 qrp->qr_head = NULL; 629 qrp->qr_tail = NULL; 630 qrp->qr_wchan = qp->qh_wchan; 631 qrp->qr_rtcount = 0; 632 qrp->qr_qlen = 0; 633 qrp->qr_qmax = 0; 634 qp->qh_hlist->qr_prev = qrp; 635 qp->qh_hlist = qrp; 636 qp->qh_root = qrp; 637 MAXINCR(qp->qh_hmax, qp->qh_hlen); 638 } 639 640 /* 641 * LIFO queue ordering is unfair and can lead to starvation, 642 * but it gives better performance for heavily contended locks. 643 * We use thread_queue_fifo (range is 0..8) to determine 644 * the frequency of FIFO vs LIFO queuing: 645 * 0 : every 256th time (almost always LIFO) 646 * 1 : every 128th time 647 * 2 : every 64th time 648 * 3 : every 32nd time 649 * 4 : every 16th time (the default value, mostly LIFO) 650 * 5 : every 8th time 651 * 6 : every 4th time 652 * 7 : every 2nd time 653 * 8 : every time (never LIFO, always FIFO) 654 * Note that there is always some degree of FIFO ordering. 655 * This breaks live lock conditions that occur in applications 656 * that are written assuming (incorrectly) that threads acquire 657 * locks fairly, that is, in roughly round-robin order. 658 * In any event, the queue is maintained in kernel priority order. 659 * 660 * If force_fifo is non-zero, fifo queueing is forced. 661 * SUSV3 requires this for semaphores. 662 */ 663 if (qrp->qr_head == NULL) { 664 /* 665 * The queue is empty. LIFO/FIFO doesn't matter. 666 */ 667 ASSERT(qrp->qr_tail == NULL); 668 ulwpp = &qrp->qr_head; 669 } else if (force_fifo | 670 (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) { 671 /* 672 * Enqueue after the last thread whose priority is greater 673 * than or equal to the priority of the thread being queued. 674 * Attempt first to go directly onto the tail of the queue. 675 */ 676 if (pri <= CMP_PRIO(qrp->qr_tail)) 677 ulwpp = &qrp->qr_tail->ul_link; 678 else { 679 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 680 ulwpp = &next->ul_link) 681 if (pri > CMP_PRIO(next)) 682 break; 683 } 684 } else { 685 /* 686 * Enqueue before the first thread whose priority is less 687 * than or equal to the priority of the thread being queued. 688 * Hopefully we can go directly onto the head of the queue. 689 */ 690 for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 691 ulwpp = &next->ul_link) 692 if (pri >= CMP_PRIO(next)) 693 break; 694 } 695 if ((ulwp->ul_link = *ulwpp) == NULL) 696 qrp->qr_tail = ulwp; 697 *ulwpp = ulwp; 698 699 ulwp->ul_sleepq = qp; 700 ulwp->ul_wchan = qp->qh_wchan; 701 ulwp->ul_qtype = qp->qh_type; 702 if ((ulwp->ul_schedctl != NULL && 703 ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) | 704 ulwp->ul_pilocks) { 705 ulwp->ul_rtqueued = 1; 706 qrp->qr_rtcount++; 707 } 708 MAXINCR(qrp->qr_qmax, qrp->qr_qlen); 709 MAXINCR(qp->qh_qmax, qp->qh_qlen); 710 } 711 712 /* 713 * Helper function for queue_slot() and queue_slot_rt(). 714 * Try to find a non-suspended thread on the queue. 715 */ 716 static ulwp_t ** 717 queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt) 718 { 719 ulwp_t *ulwp; 720 ulwp_t **foundpp = NULL; 721 int priority = -1; 722 ulwp_t *prev; 723 int tpri; 724 725 for (prev = NULL; 726 (ulwp = *ulwpp) != NULL; 727 prev = ulwp, ulwpp = &ulwp->ul_link) { 728 if (ulwp->ul_stop) /* skip suspended threads */ 729 continue; 730 tpri = rt? CMP_PRIO(ulwp) : 0; 731 if (tpri > priority) { 732 foundpp = ulwpp; 733 *prevp = prev; 734 priority = tpri; 735 if (!rt) 736 break; 737 } 738 } 739 return (foundpp); 740 } 741 742 /* 743 * For real-time, we search the entire queue because the dispatch 744 * (kernel) priorities may have changed since enqueueing. 745 */ 746 static ulwp_t ** 747 queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp) 748 { 749 ulwp_t **ulwpp = ulwpp_org; 750 ulwp_t *ulwp = *ulwpp; 751 ulwp_t **foundpp = ulwpp; 752 int priority = CMP_PRIO(ulwp); 753 ulwp_t *prev; 754 int tpri; 755 756 for (prev = ulwp, ulwpp = &ulwp->ul_link; 757 (ulwp = *ulwpp) != NULL; 758 prev = ulwp, ulwpp = &ulwp->ul_link) { 759 tpri = CMP_PRIO(ulwp); 760 if (tpri > priority) { 761 foundpp = ulwpp; 762 *prevp = prev; 763 priority = tpri; 764 } 765 } 766 ulwp = *foundpp; 767 768 /* 769 * Try not to return a suspended thread. 770 * This mimics the old libthread's behavior. 771 */ 772 if (ulwp->ul_stop && 773 (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) { 774 foundpp = ulwpp; 775 ulwp = *foundpp; 776 } 777 ulwp->ul_rt = 1; 778 return (foundpp); 779 } 780 781 ulwp_t ** 782 queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more) 783 { 784 queue_root_t *qrp; 785 ulwp_t **ulwpp; 786 ulwp_t *ulwp; 787 int rt; 788 789 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 790 791 if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) { 792 *more = 0; 793 return (NULL); /* no lwps on the queue */ 794 } 795 rt = (qrp->qr_rtcount != 0); 796 *prevp = NULL; 797 if (ulwp->ul_link == NULL) { /* only one lwp on the queue */ 798 *more = 0; 799 ulwp->ul_rt = rt; 800 return (&qrp->qr_head); 801 } 802 *more = 1; 803 804 if (rt) /* real-time queue */ 805 return (queue_slot_rt(&qrp->qr_head, prevp)); 806 /* 807 * Try not to return a suspended thread. 808 * This mimics the old libthread's behavior. 809 */ 810 if (ulwp->ul_stop && 811 (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) { 812 ulwp = *ulwpp; 813 ulwp->ul_rt = 0; 814 return (ulwpp); 815 } 816 /* 817 * The common case; just pick the first thread on the queue. 818 */ 819 ulwp->ul_rt = 0; 820 return (&qrp->qr_head); 821 } 822 823 /* 824 * Common code for unlinking an lwp from a user-level sleep queue. 825 */ 826 void 827 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 828 { 829 queue_root_t *qrp = qp->qh_root; 830 queue_root_t *nqrp; 831 ulwp_t *ulwp = *ulwpp; 832 ulwp_t *next; 833 834 ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 835 ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan); 836 837 DECR(qp->qh_qlen); 838 DECR(qrp->qr_qlen); 839 if (ulwp->ul_rtqueued) { 840 ulwp->ul_rtqueued = 0; 841 qrp->qr_rtcount--; 842 } 843 next = ulwp->ul_link; 844 *ulwpp = next; 845 ulwp->ul_link = NULL; 846 if (qrp->qr_tail == ulwp) 847 qrp->qr_tail = prev; 848 if (qrp == &ulwp->ul_queue_root) { 849 /* 850 * We can't continue to use the unlinked thread's 851 * queue root for the linkage. 852 */ 853 queue_root_t *qr_next = qrp->qr_next; 854 queue_root_t *qr_prev = qrp->qr_prev; 855 856 if (qrp->qr_tail) { 857 /* switch to using the last thread's queue root */ 858 ASSERT(qrp->qr_qlen != 0); 859 nqrp = &qrp->qr_tail->ul_queue_root; 860 *nqrp = *qrp; 861 if (qr_next) 862 qr_next->qr_prev = nqrp; 863 if (qr_prev) 864 qr_prev->qr_next = nqrp; 865 else 866 qp->qh_hlist = nqrp; 867 qp->qh_root = nqrp; 868 } else { 869 /* empty queue root; just delete from the hash list */ 870 ASSERT(qrp->qr_qlen == 0); 871 if (qr_next) 872 qr_next->qr_prev = qr_prev; 873 if (qr_prev) 874 qr_prev->qr_next = qr_next; 875 else 876 qp->qh_hlist = qr_next; 877 qp->qh_root = NULL; 878 DECR(qp->qh_hlen); 879 } 880 } 881 } 882 883 ulwp_t * 884 dequeue(queue_head_t *qp, int *more) 885 { 886 ulwp_t **ulwpp; 887 ulwp_t *ulwp; 888 ulwp_t *prev; 889 890 if ((ulwpp = queue_slot(qp, &prev, more)) == NULL) 891 return (NULL); 892 ulwp = *ulwpp; 893 queue_unlink(qp, ulwpp, prev); 894 ulwp->ul_sleepq = NULL; 895 ulwp->ul_wchan = NULL; 896 return (ulwp); 897 } 898 899 /* 900 * Return a pointer to the highest priority thread sleeping on wchan. 901 */ 902 ulwp_t * 903 queue_waiter(queue_head_t *qp) 904 { 905 ulwp_t **ulwpp; 906 ulwp_t *prev; 907 int more; 908 909 if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL) 910 return (NULL); 911 return (*ulwpp); 912 } 913 914 int 915 dequeue_self(queue_head_t *qp) 916 { 917 ulwp_t *self = curthread; 918 queue_root_t *qrp; 919 ulwp_t **ulwpp; 920 ulwp_t *ulwp; 921 ulwp_t *prev; 922 int found = 0; 923 924 ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 925 926 /* find self on the sleep queue */ 927 if ((qrp = qp->qh_root) != NULL) { 928 for (prev = NULL, ulwpp = &qrp->qr_head; 929 (ulwp = *ulwpp) != NULL; 930 prev = ulwp, ulwpp = &ulwp->ul_link) { 931 if (ulwp == self) { 932 queue_unlink(qp, ulwpp, prev); 933 self->ul_cvmutex = NULL; 934 self->ul_sleepq = NULL; 935 self->ul_wchan = NULL; 936 found = 1; 937 break; 938 } 939 } 940 } 941 942 if (!found) 943 thr_panic("dequeue_self(): curthread not found on queue"); 944 945 return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL); 946 } 947 948 /* 949 * Called from call_user_handler() and _thrp_suspend() to take 950 * ourself off of our sleep queue so we can grab locks. 951 */ 952 void 953 unsleep_self(void) 954 { 955 ulwp_t *self = curthread; 956 queue_head_t *qp; 957 958 /* 959 * Calling enter_critical()/exit_critical() here would lead 960 * to recursion. Just manipulate self->ul_critical directly. 961 */ 962 self->ul_critical++; 963 while (self->ul_sleepq != NULL) { 964 qp = queue_lock(self->ul_wchan, self->ul_qtype); 965 /* 966 * We may have been moved from a CV queue to a 967 * mutex queue while we were attempting queue_lock(). 968 * If so, just loop around and try again. 969 * dequeue_self() clears self->ul_sleepq. 970 */ 971 if (qp == self->ul_sleepq) 972 (void) dequeue_self(qp); 973 queue_unlock(qp); 974 } 975 self->ul_writer = 0; 976 self->ul_critical--; 977 } 978 979 /* 980 * Common code for calling the the ___lwp_mutex_timedlock() system call. 981 * Returns with mutex_owner and mutex_ownerpid set correctly. 982 */ 983 static int 984 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 985 { 986 ulwp_t *self = curthread; 987 uberdata_t *udp = self->ul_uberdata; 988 int mtype = mp->mutex_type; 989 hrtime_t begin_sleep; 990 int acquired; 991 int error; 992 993 self->ul_sp = stkptr(); 994 self->ul_wchan = mp; 995 if (__td_event_report(self, TD_SLEEP, udp)) { 996 self->ul_td_evbuf.eventnum = TD_SLEEP; 997 self->ul_td_evbuf.eventdata = mp; 998 tdb_event(TD_SLEEP, udp); 999 } 1000 if (msp) { 1001 tdb_incr(msp->mutex_sleep); 1002 begin_sleep = gethrtime(); 1003 } 1004 1005 DTRACE_PROBE1(plockstat, mutex__block, mp); 1006 1007 for (;;) { 1008 /* 1009 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1010 * means we successfully acquired the lock. 1011 */ 1012 if ((error = ___lwp_mutex_timedlock(mp, tsp, self)) != 0 && 1013 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1014 acquired = 0; 1015 break; 1016 } 1017 1018 if (mtype & USYNC_PROCESS) { 1019 /* 1020 * Defend against forkall(). We may be the child, 1021 * in which case we don't actually own the mutex. 1022 */ 1023 enter_critical(self); 1024 if (mp->mutex_ownerpid == udp->pid) { 1025 exit_critical(self); 1026 acquired = 1; 1027 break; 1028 } 1029 exit_critical(self); 1030 } else { 1031 acquired = 1; 1032 break; 1033 } 1034 } 1035 1036 if (msp) 1037 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1038 self->ul_wchan = NULL; 1039 self->ul_sp = 0; 1040 1041 if (acquired) { 1042 ASSERT(mp->mutex_owner == (uintptr_t)self); 1043 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1044 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1045 } else { 1046 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1047 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1048 } 1049 1050 return (error); 1051 } 1052 1053 /* 1054 * Common code for calling the ___lwp_mutex_trylock() system call. 1055 * Returns with mutex_owner and mutex_ownerpid set correctly. 1056 */ 1057 int 1058 mutex_trylock_kernel(mutex_t *mp) 1059 { 1060 ulwp_t *self = curthread; 1061 uberdata_t *udp = self->ul_uberdata; 1062 int mtype = mp->mutex_type; 1063 int error; 1064 int acquired; 1065 1066 for (;;) { 1067 /* 1068 * A return value of EOWNERDEAD or ELOCKUNMAPPED 1069 * means we successfully acquired the lock. 1070 */ 1071 if ((error = ___lwp_mutex_trylock(mp, self)) != 0 && 1072 error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1073 acquired = 0; 1074 break; 1075 } 1076 1077 if (mtype & USYNC_PROCESS) { 1078 /* 1079 * Defend against forkall(). We may be the child, 1080 * in which case we don't actually own the mutex. 1081 */ 1082 enter_critical(self); 1083 if (mp->mutex_ownerpid == udp->pid) { 1084 exit_critical(self); 1085 acquired = 1; 1086 break; 1087 } 1088 exit_critical(self); 1089 } else { 1090 acquired = 1; 1091 break; 1092 } 1093 } 1094 1095 if (acquired) { 1096 ASSERT(mp->mutex_owner == (uintptr_t)self); 1097 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1098 } else if (error != EBUSY) { 1099 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1100 } 1101 1102 return (error); 1103 } 1104 1105 volatile sc_shared_t * 1106 setup_schedctl(void) 1107 { 1108 ulwp_t *self = curthread; 1109 volatile sc_shared_t *scp; 1110 sc_shared_t *tmp; 1111 1112 if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 1113 !self->ul_vfork && /* not a child of vfork() */ 1114 !self->ul_schedctl_called) { /* haven't been called before */ 1115 enter_critical(self); 1116 self->ul_schedctl_called = &self->ul_uberdata->uberflags; 1117 if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 1118 self->ul_schedctl = scp = tmp; 1119 exit_critical(self); 1120 } 1121 /* 1122 * Unless the call to setup_schedctl() is surrounded 1123 * by enter_critical()/exit_critical(), the address 1124 * we are returning could be invalid due to a forkall() 1125 * having occurred in another thread. 1126 */ 1127 return (scp); 1128 } 1129 1130 /* 1131 * Interfaces from libsched, incorporated into libc. 1132 * libsched.so.1 is now a filter library onto libc. 1133 */ 1134 #pragma weak schedctl_lookup = schedctl_init 1135 schedctl_t * 1136 schedctl_init(void) 1137 { 1138 volatile sc_shared_t *scp = setup_schedctl(); 1139 return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 1140 } 1141 1142 void 1143 schedctl_exit(void) 1144 { 1145 } 1146 1147 /* 1148 * Contract private interface for java. 1149 * Set up the schedctl data if it doesn't exist yet. 1150 * Return a pointer to the pointer to the schedctl data. 1151 */ 1152 volatile sc_shared_t *volatile * 1153 _thr_schedctl(void) 1154 { 1155 ulwp_t *self = curthread; 1156 volatile sc_shared_t *volatile *ptr; 1157 1158 if (self->ul_vfork) 1159 return (NULL); 1160 if (*(ptr = &self->ul_schedctl) == NULL) 1161 (void) setup_schedctl(); 1162 return (ptr); 1163 } 1164 1165 /* 1166 * Block signals and attempt to block preemption. 1167 * no_preempt()/preempt() must be used in pairs but can be nested. 1168 */ 1169 void 1170 no_preempt(ulwp_t *self) 1171 { 1172 volatile sc_shared_t *scp; 1173 1174 if (self->ul_preempt++ == 0) { 1175 enter_critical(self); 1176 if ((scp = self->ul_schedctl) != NULL || 1177 (scp = setup_schedctl()) != NULL) { 1178 /* 1179 * Save the pre-existing preempt value. 1180 */ 1181 self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 1182 scp->sc_preemptctl.sc_nopreempt = 1; 1183 } 1184 } 1185 } 1186 1187 /* 1188 * Undo the effects of no_preempt(). 1189 */ 1190 void 1191 preempt(ulwp_t *self) 1192 { 1193 volatile sc_shared_t *scp; 1194 1195 ASSERT(self->ul_preempt > 0); 1196 if (--self->ul_preempt == 0) { 1197 if ((scp = self->ul_schedctl) != NULL) { 1198 /* 1199 * Restore the pre-existing preempt value. 1200 */ 1201 scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 1202 if (scp->sc_preemptctl.sc_yield && 1203 scp->sc_preemptctl.sc_nopreempt == 0) { 1204 yield(); 1205 if (scp->sc_preemptctl.sc_yield) { 1206 /* 1207 * Shouldn't happen. This is either 1208 * a race condition or the thread 1209 * just entered the real-time class. 1210 */ 1211 yield(); 1212 scp->sc_preemptctl.sc_yield = 0; 1213 } 1214 } 1215 } 1216 exit_critical(self); 1217 } 1218 } 1219 1220 /* 1221 * If a call to preempt() would cause the current thread to yield or to 1222 * take deferred actions in exit_critical(), then unpark the specified 1223 * lwp so it can run while we delay. Return the original lwpid if the 1224 * unpark was not performed, else return zero. The tests are a repeat 1225 * of some of the tests in preempt(), above. This is a statistical 1226 * optimization solely for cond_sleep_queue(), below. 1227 */ 1228 static lwpid_t 1229 preempt_unpark(ulwp_t *self, lwpid_t lwpid) 1230 { 1231 volatile sc_shared_t *scp = self->ul_schedctl; 1232 1233 ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 1234 if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 1235 (self->ul_curplease && self->ul_critical == 1)) { 1236 (void) __lwp_unpark(lwpid); 1237 lwpid = 0; 1238 } 1239 return (lwpid); 1240 } 1241 1242 /* 1243 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1244 * If this fails, return EBUSY and let the caller deal with it. 1245 * If this succeeds, return 0 with mutex_owner set to curthread. 1246 */ 1247 static int 1248 mutex_trylock_adaptive(mutex_t *mp, int tryhard) 1249 { 1250 ulwp_t *self = curthread; 1251 int error = EBUSY; 1252 ulwp_t *ulwp; 1253 volatile sc_shared_t *scp; 1254 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 1255 volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 1256 uint32_t new_lockword; 1257 int count = 0; 1258 int max_count; 1259 uint8_t max_spinners; 1260 1261 ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 1262 1263 if (MUTEX_OWNED(mp, self)) 1264 return (EBUSY); 1265 1266 enter_critical(self); 1267 1268 /* short-cut, not definitive (see below) */ 1269 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1270 ASSERT(mp->mutex_type & LOCK_ROBUST); 1271 error = ENOTRECOVERABLE; 1272 goto done; 1273 } 1274 1275 /* 1276 * Make one attempt to acquire the lock before 1277 * incurring the overhead of the spin loop. 1278 */ 1279 if (set_lock_byte(lockp) == 0) { 1280 *ownerp = (uintptr_t)self; 1281 error = 0; 1282 goto done; 1283 } 1284 if (!tryhard) 1285 goto done; 1286 if (ncpus == 0) 1287 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1288 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1289 max_spinners = ncpus - 1; 1290 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1291 if (max_count == 0) 1292 goto done; 1293 1294 /* 1295 * This spin loop is unfair to lwps that have already dropped into 1296 * the kernel to sleep. They will starve on a highly-contended mutex. 1297 * This is just too bad. The adaptive spin algorithm is intended 1298 * to allow programs with highly-contended locks (that is, broken 1299 * programs) to execute with reasonable speed despite their contention. 1300 * Being fair would reduce the speed of such programs and well-written 1301 * programs will not suffer in any case. 1302 */ 1303 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) 1304 goto done; 1305 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1306 for (count = 1; ; count++) { 1307 if (*lockp == 0 && set_lock_byte(lockp) == 0) { 1308 *ownerp = (uintptr_t)self; 1309 error = 0; 1310 break; 1311 } 1312 if (count == max_count) 1313 break; 1314 SMT_PAUSE(); 1315 /* 1316 * Stop spinning if the mutex owner is not running on 1317 * a processor; it will not drop the lock any time soon 1318 * and we would just be wasting time to keep spinning. 1319 * 1320 * Note that we are looking at another thread (ulwp_t) 1321 * without ensuring that the other thread does not exit. 1322 * The scheme relies on ulwp_t structures never being 1323 * deallocated by the library (the library employs a free 1324 * list of ulwp_t structs that are reused when new threads 1325 * are created) and on schedctl shared memory never being 1326 * deallocated once created via __schedctl(). 1327 * 1328 * Thus, the worst that can happen when the spinning thread 1329 * looks at the owner's schedctl data is that it is looking 1330 * at some other thread's schedctl data. This almost never 1331 * happens and is benign when it does. 1332 */ 1333 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1334 ((scp = ulwp->ul_schedctl) == NULL || 1335 scp->sc_state != SC_ONPROC)) 1336 break; 1337 } 1338 new_lockword = spinners_decr(&mp->mutex_lockword); 1339 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1340 /* 1341 * We haven't yet acquired the lock, the lock 1342 * is free, and there are no other spinners. 1343 * Make one final attempt to acquire the lock. 1344 * 1345 * This isn't strictly necessary since mutex_lock_queue() 1346 * (the next action this thread will take if it doesn't 1347 * acquire the lock here) makes one attempt to acquire 1348 * the lock before putting the thread to sleep. 1349 * 1350 * If the next action for this thread (on failure here) 1351 * were not to call mutex_lock_queue(), this would be 1352 * necessary for correctness, to avoid ending up with an 1353 * unheld mutex with waiters but no one to wake them up. 1354 */ 1355 if (set_lock_byte(lockp) == 0) { 1356 *ownerp = (uintptr_t)self; 1357 error = 0; 1358 } 1359 count++; 1360 } 1361 1362 done: 1363 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1364 ASSERT(mp->mutex_type & LOCK_ROBUST); 1365 /* 1366 * We shouldn't own the mutex. 1367 * Just clear the lock; everyone has already been waked up. 1368 */ 1369 *ownerp = 0; 1370 (void) clear_lockbyte(&mp->mutex_lockword); 1371 error = ENOTRECOVERABLE; 1372 } 1373 1374 exit_critical(self); 1375 1376 if (error) { 1377 if (count) { 1378 DTRACE_PROBE3(plockstat, mutex__spun, mp, 0, count); 1379 } 1380 if (error != EBUSY) { 1381 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1382 } 1383 } else { 1384 if (count) { 1385 DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count); 1386 } 1387 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1388 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1389 ASSERT(mp->mutex_type & LOCK_ROBUST); 1390 error = EOWNERDEAD; 1391 } 1392 } 1393 1394 return (error); 1395 } 1396 1397 /* 1398 * Same as mutex_trylock_adaptive(), except specifically for queue locks. 1399 * The owner field is not set here; the caller (spin_lock_set()) sets it. 1400 */ 1401 static int 1402 mutex_queuelock_adaptive(mutex_t *mp) 1403 { 1404 ulwp_t *ulwp; 1405 volatile sc_shared_t *scp; 1406 volatile uint8_t *lockp; 1407 volatile uint64_t *ownerp; 1408 int count = curthread->ul_queue_spin; 1409 1410 ASSERT(mp->mutex_type == USYNC_THREAD); 1411 1412 if (count == 0) 1413 return (EBUSY); 1414 1415 lockp = (volatile uint8_t *)&mp->mutex_lockw; 1416 ownerp = (volatile uint64_t *)&mp->mutex_owner; 1417 while (--count >= 0) { 1418 if (*lockp == 0 && set_lock_byte(lockp) == 0) 1419 return (0); 1420 SMT_PAUSE(); 1421 if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 1422 ((scp = ulwp->ul_schedctl) == NULL || 1423 scp->sc_state != SC_ONPROC)) 1424 break; 1425 } 1426 1427 return (EBUSY); 1428 } 1429 1430 /* 1431 * Like mutex_trylock_adaptive(), but for process-shared mutexes. 1432 * Spin for a while (if 'tryhard' is true), trying to grab the lock. 1433 * If this fails, return EBUSY and let the caller deal with it. 1434 * If this succeeds, return 0 with mutex_owner set to curthread 1435 * and mutex_ownerpid set to the current pid. 1436 */ 1437 static int 1438 mutex_trylock_process(mutex_t *mp, int tryhard) 1439 { 1440 ulwp_t *self = curthread; 1441 uberdata_t *udp = self->ul_uberdata; 1442 int error = EBUSY; 1443 volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64; 1444 uint32_t new_lockword; 1445 int count = 0; 1446 int max_count; 1447 uint8_t max_spinners; 1448 1449 #if defined(__sparc) && !defined(_LP64) 1450 /* horrible hack, necessary only on 32-bit sparc */ 1451 int fix_alignment_problem = 1452 (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 1453 self->ul_misaligned && !(mp->mutex_type & LOCK_ROBUST)); 1454 #endif 1455 1456 ASSERT(mp->mutex_type & USYNC_PROCESS); 1457 1458 if (shared_mutex_held(mp)) 1459 return (EBUSY); 1460 1461 enter_critical(self); 1462 1463 /* short-cut, not definitive (see below) */ 1464 if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1465 ASSERT(mp->mutex_type & LOCK_ROBUST); 1466 error = ENOTRECOVERABLE; 1467 goto done; 1468 } 1469 1470 /* 1471 * Make one attempt to acquire the lock before 1472 * incurring the overhead of the spin loop. 1473 */ 1474 #if defined(__sparc) && !defined(_LP64) 1475 /* horrible hack, necessary only on 32-bit sparc */ 1476 if (fix_alignment_problem) { 1477 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1478 mp->mutex_ownerpid = udp->pid; 1479 mp->mutex_owner = (uintptr_t)self; 1480 error = 0; 1481 goto done; 1482 } 1483 } else 1484 #endif 1485 if (set_lock_byte64(lockp, udp->pid) == 0) { 1486 mp->mutex_owner = (uintptr_t)self; 1487 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1488 error = 0; 1489 goto done; 1490 } 1491 if (!tryhard) 1492 goto done; 1493 if (ncpus == 0) 1494 ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 1495 if ((max_spinners = self->ul_max_spinners) >= ncpus) 1496 max_spinners = ncpus - 1; 1497 max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 1498 if (max_count == 0) 1499 goto done; 1500 1501 /* 1502 * This is a process-shared mutex. 1503 * We cannot know if the owner is running on a processor. 1504 * We just spin and hope that it is on a processor. 1505 */ 1506 if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) 1507 goto done; 1508 DTRACE_PROBE1(plockstat, mutex__spin, mp); 1509 for (count = 1; ; count++) { 1510 #if defined(__sparc) && !defined(_LP64) 1511 /* horrible hack, necessary only on 32-bit sparc */ 1512 if (fix_alignment_problem) { 1513 if ((*lockp & LOCKMASK64) == 0 && 1514 set_lock_byte(&mp->mutex_lockw) == 0) { 1515 mp->mutex_ownerpid = udp->pid; 1516 mp->mutex_owner = (uintptr_t)self; 1517 error = 0; 1518 break; 1519 } 1520 } else 1521 #endif 1522 if ((*lockp & LOCKMASK64) == 0 && 1523 set_lock_byte64(lockp, udp->pid) == 0) { 1524 mp->mutex_owner = (uintptr_t)self; 1525 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1526 error = 0; 1527 break; 1528 } 1529 if (count == max_count) 1530 break; 1531 SMT_PAUSE(); 1532 } 1533 new_lockword = spinners_decr(&mp->mutex_lockword); 1534 if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 1535 /* 1536 * We haven't yet acquired the lock, the lock 1537 * is free, and there are no other spinners. 1538 * Make one final attempt to acquire the lock. 1539 * 1540 * This isn't strictly necessary since mutex_lock_kernel() 1541 * (the next action this thread will take if it doesn't 1542 * acquire the lock here) makes one attempt to acquire 1543 * the lock before putting the thread to sleep. 1544 * 1545 * If the next action for this thread (on failure here) 1546 * were not to call mutex_lock_kernel(), this would be 1547 * necessary for correctness, to avoid ending up with an 1548 * unheld mutex with waiters but no one to wake them up. 1549 */ 1550 #if defined(__sparc) && !defined(_LP64) 1551 /* horrible hack, necessary only on 32-bit sparc */ 1552 if (fix_alignment_problem) { 1553 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1554 mp->mutex_ownerpid = udp->pid; 1555 mp->mutex_owner = (uintptr_t)self; 1556 error = 0; 1557 } 1558 } else 1559 #endif 1560 if (set_lock_byte64(lockp, udp->pid) == 0) { 1561 mp->mutex_owner = (uintptr_t)self; 1562 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1563 error = 0; 1564 } 1565 count++; 1566 } 1567 1568 done: 1569 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1570 ASSERT(mp->mutex_type & LOCK_ROBUST); 1571 /* 1572 * We shouldn't own the mutex. 1573 * Just clear the lock; everyone has already been waked up. 1574 */ 1575 mp->mutex_owner = 0; 1576 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1577 (void) clear_lockbyte64(&mp->mutex_lockword64); 1578 error = ENOTRECOVERABLE; 1579 } 1580 1581 exit_critical(self); 1582 1583 if (error) { 1584 if (count) { 1585 DTRACE_PROBE3(plockstat, mutex__spun, mp, 0, count); 1586 } 1587 if (error != EBUSY) { 1588 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1589 } 1590 } else { 1591 if (count) { 1592 DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count); 1593 } 1594 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1595 if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1596 ASSERT(mp->mutex_type & LOCK_ROBUST); 1597 if (mp->mutex_flag & LOCK_OWNERDEAD) 1598 error = EOWNERDEAD; 1599 else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1600 error = ELOCKUNMAPPED; 1601 else 1602 error = EOWNERDEAD; 1603 } 1604 } 1605 1606 return (error); 1607 } 1608 1609 /* 1610 * Mutex wakeup code for releasing a USYNC_THREAD mutex. 1611 * Returns the lwpid of the thread that was dequeued, if any. 1612 * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 1613 * to wake up the specified lwp. 1614 */ 1615 static lwpid_t 1616 mutex_wakeup(mutex_t *mp) 1617 { 1618 lwpid_t lwpid = 0; 1619 int more; 1620 queue_head_t *qp; 1621 ulwp_t *ulwp; 1622 1623 /* 1624 * Dequeue a waiter from the sleep queue. Don't touch the mutex 1625 * waiters bit if no one was found on the queue because the mutex 1626 * might have been deallocated or reallocated for another purpose. 1627 */ 1628 qp = queue_lock(mp, MX); 1629 if ((ulwp = dequeue(qp, &more)) != NULL) { 1630 lwpid = ulwp->ul_lwpid; 1631 mp->mutex_waiters = more; 1632 } 1633 queue_unlock(qp); 1634 return (lwpid); 1635 } 1636 1637 /* 1638 * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1639 */ 1640 static void 1641 mutex_wakeup_all(mutex_t *mp) 1642 { 1643 queue_head_t *qp; 1644 queue_root_t *qrp; 1645 int nlwpid = 0; 1646 int maxlwps = MAXLWPS; 1647 ulwp_t *ulwp; 1648 lwpid_t buffer[MAXLWPS]; 1649 lwpid_t *lwpid = buffer; 1650 1651 /* 1652 * Walk the list of waiters and prepare to wake up all of them. 1653 * The waiters flag has already been cleared from the mutex. 1654 * 1655 * We keep track of lwpids that are to be unparked in lwpid[]. 1656 * __lwp_unpark_all() is called to unpark all of them after 1657 * they have been removed from the sleep queue and the sleep 1658 * queue lock has been dropped. If we run out of space in our 1659 * on-stack buffer, we need to allocate more but we can't call 1660 * lmalloc() because we are holding a queue lock when the overflow 1661 * occurs and lmalloc() acquires a lock. We can't use alloca() 1662 * either because the application may have allocated a small 1663 * stack and we don't want to overrun the stack. So we call 1664 * alloc_lwpids() to allocate a bigger buffer using the mmap() 1665 * system call directly since that path acquires no locks. 1666 */ 1667 qp = queue_lock(mp, MX); 1668 for (;;) { 1669 if ((qrp = qp->qh_root) == NULL || 1670 (ulwp = qrp->qr_head) == NULL) 1671 break; 1672 ASSERT(ulwp->ul_wchan == mp); 1673 queue_unlink(qp, &qrp->qr_head, NULL); 1674 ulwp->ul_sleepq = NULL; 1675 ulwp->ul_wchan = NULL; 1676 if (nlwpid == maxlwps) 1677 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1678 lwpid[nlwpid++] = ulwp->ul_lwpid; 1679 } 1680 1681 if (nlwpid == 0) { 1682 queue_unlock(qp); 1683 } else { 1684 mp->mutex_waiters = 0; 1685 no_preempt(curthread); 1686 queue_unlock(qp); 1687 if (nlwpid == 1) 1688 (void) __lwp_unpark(lwpid[0]); 1689 else 1690 (void) __lwp_unpark_all(lwpid, nlwpid); 1691 preempt(curthread); 1692 } 1693 1694 if (lwpid != buffer) 1695 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 1696 } 1697 1698 /* 1699 * Release a process-private mutex. 1700 * As an optimization, if there are waiters but there are also spinners 1701 * attempting to acquire the mutex, then don't bother waking up a waiter; 1702 * one of the spinners will acquire the mutex soon and it would be a waste 1703 * of resources to wake up some thread just to have it spin for a while 1704 * and then possibly go back to sleep. See mutex_trylock_adaptive(). 1705 */ 1706 static lwpid_t 1707 mutex_unlock_queue(mutex_t *mp, int release_all) 1708 { 1709 ulwp_t *self = curthread; 1710 lwpid_t lwpid = 0; 1711 uint32_t old_lockword; 1712 1713 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1714 sigoff(self); 1715 mp->mutex_owner = 0; 1716 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1717 if ((old_lockword & WAITERMASK) && 1718 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1719 no_preempt(self); /* ensure a prompt wakeup */ 1720 if (release_all) 1721 mutex_wakeup_all(mp); 1722 else 1723 lwpid = mutex_wakeup(mp); 1724 if (lwpid == 0) 1725 preempt(self); 1726 } 1727 sigon(self); 1728 return (lwpid); 1729 } 1730 1731 /* 1732 * Like mutex_unlock_queue(), but for process-shared mutexes. 1733 */ 1734 static void 1735 mutex_unlock_process(mutex_t *mp, int release_all) 1736 { 1737 ulwp_t *self = curthread; 1738 uint64_t old_lockword64; 1739 1740 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 1741 sigoff(self); 1742 mp->mutex_owner = 0; 1743 #if defined(__sparc) && !defined(_LP64) 1744 /* horrible hack, necessary only on 32-bit sparc */ 1745 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 1746 self->ul_misaligned && !(mp->mutex_type & LOCK_ROBUST)) { 1747 uint32_t old_lockword; 1748 mp->mutex_ownerpid = 0; 1749 old_lockword = clear_lockbyte(&mp->mutex_lockword); 1750 if ((old_lockword & WAITERMASK) && 1751 (release_all || (old_lockword & SPINNERMASK) == 0)) { 1752 no_preempt(self); /* ensure a prompt wakeup */ 1753 (void) ___lwp_mutex_wakeup(mp, release_all); 1754 preempt(self); 1755 } 1756 sigon(self); 1757 return; 1758 } 1759 #endif 1760 /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 1761 old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64); 1762 if ((old_lockword64 & WAITERMASK64) && 1763 (release_all || (old_lockword64 & SPINNERMASK64) == 0)) { 1764 no_preempt(self); /* ensure a prompt wakeup */ 1765 (void) ___lwp_mutex_wakeup(mp, release_all); 1766 preempt(self); 1767 } 1768 sigon(self); 1769 } 1770 1771 void 1772 stall(void) 1773 { 1774 for (;;) 1775 (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 1776 } 1777 1778 /* 1779 * Acquire a USYNC_THREAD mutex via user-level sleep queues. 1780 * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1781 * If successful, returns with mutex_owner set correctly. 1782 */ 1783 int 1784 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 1785 timespec_t *tsp) 1786 { 1787 uberdata_t *udp = curthread->ul_uberdata; 1788 queue_head_t *qp; 1789 hrtime_t begin_sleep; 1790 int error = 0; 1791 1792 self->ul_sp = stkptr(); 1793 if (__td_event_report(self, TD_SLEEP, udp)) { 1794 self->ul_wchan = mp; 1795 self->ul_td_evbuf.eventnum = TD_SLEEP; 1796 self->ul_td_evbuf.eventdata = mp; 1797 tdb_event(TD_SLEEP, udp); 1798 } 1799 if (msp) { 1800 tdb_incr(msp->mutex_sleep); 1801 begin_sleep = gethrtime(); 1802 } 1803 1804 DTRACE_PROBE1(plockstat, mutex__block, mp); 1805 1806 /* 1807 * Put ourself on the sleep queue, and while we are 1808 * unable to grab the lock, go park in the kernel. 1809 * Take ourself off the sleep queue after we acquire the lock. 1810 * The waiter bit can be set/cleared only while holding the queue lock. 1811 */ 1812 qp = queue_lock(mp, MX); 1813 enqueue(qp, self, 0); 1814 mp->mutex_waiters = 1; 1815 for (;;) { 1816 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1817 mp->mutex_owner = (uintptr_t)self; 1818 mp->mutex_waiters = dequeue_self(qp); 1819 break; 1820 } 1821 set_parking_flag(self, 1); 1822 queue_unlock(qp); 1823 /* 1824 * __lwp_park() will return the residual time in tsp 1825 * if we are unparked before the timeout expires. 1826 */ 1827 error = __lwp_park(tsp, 0); 1828 set_parking_flag(self, 0); 1829 /* 1830 * We could have taken a signal or suspended ourself. 1831 * If we did, then we removed ourself from the queue. 1832 * Someone else may have removed us from the queue 1833 * as a consequence of mutex_unlock(). We may have 1834 * gotten a timeout from __lwp_park(). Or we may still 1835 * be on the queue and this is just a spurious wakeup. 1836 */ 1837 qp = queue_lock(mp, MX); 1838 if (self->ul_sleepq == NULL) { 1839 if (error) { 1840 mp->mutex_waiters = queue_waiter(qp)? 1 : 0; 1841 if (error != EINTR) 1842 break; 1843 error = 0; 1844 } 1845 if (set_lock_byte(&mp->mutex_lockw) == 0) { 1846 mp->mutex_owner = (uintptr_t)self; 1847 break; 1848 } 1849 enqueue(qp, self, 0); 1850 mp->mutex_waiters = 1; 1851 } 1852 ASSERT(self->ul_sleepq == qp && 1853 self->ul_qtype == MX && 1854 self->ul_wchan == mp); 1855 if (error) { 1856 if (error != EINTR) { 1857 mp->mutex_waiters = dequeue_self(qp); 1858 break; 1859 } 1860 error = 0; 1861 } 1862 } 1863 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 1864 self->ul_wchan == NULL); 1865 self->ul_sp = 0; 1866 1867 ASSERT(error == 0 || error == EINVAL || error == ETIME); 1868 1869 if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1870 ASSERT(mp->mutex_type & LOCK_ROBUST); 1871 /* 1872 * We shouldn't own the mutex. 1873 * Just clear the lock; everyone has already been waked up. 1874 */ 1875 mp->mutex_owner = 0; 1876 (void) clear_lockbyte(&mp->mutex_lockword); 1877 error = ENOTRECOVERABLE; 1878 } 1879 1880 queue_unlock(qp); 1881 1882 if (msp) 1883 msp->mutex_sleep_time += gethrtime() - begin_sleep; 1884 1885 if (error) { 1886 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1887 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1888 } else { 1889 DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1890 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1891 if (mp->mutex_flag & LOCK_OWNERDEAD) { 1892 ASSERT(mp->mutex_type & LOCK_ROBUST); 1893 error = EOWNERDEAD; 1894 } 1895 } 1896 1897 return (error); 1898 } 1899 1900 static int 1901 mutex_recursion(mutex_t *mp, int mtype, int try) 1902 { 1903 ASSERT(mutex_held(mp)); 1904 ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1905 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1906 1907 if (mtype & LOCK_RECURSIVE) { 1908 if (mp->mutex_rcount == RECURSION_MAX) { 1909 DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1910 return (EAGAIN); 1911 } 1912 mp->mutex_rcount++; 1913 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1914 return (0); 1915 } 1916 if (try == MUTEX_LOCK) { 1917 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1918 return (EDEADLK); 1919 } 1920 return (EBUSY); 1921 } 1922 1923 /* 1924 * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1925 * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1926 * We use tdb_hash_lock here and in the synch object tracking code in 1927 * the tdb_agent.c file. There is no conflict between these two usages. 1928 */ 1929 void 1930 register_lock(mutex_t *mp) 1931 { 1932 uberdata_t *udp = curthread->ul_uberdata; 1933 uint_t hash = LOCK_HASH(mp); 1934 robust_t *rlp; 1935 robust_t *invalid; 1936 robust_t **rlpp; 1937 robust_t **table; 1938 1939 if ((table = udp->robustlocks) == NULL) { 1940 lmutex_lock(&udp->tdb_hash_lock); 1941 if ((table = udp->robustlocks) == NULL) { 1942 table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1943 membar_producer(); 1944 udp->robustlocks = table; 1945 } 1946 lmutex_unlock(&udp->tdb_hash_lock); 1947 } 1948 membar_consumer(); 1949 1950 /* 1951 * First search the registered table with no locks held. 1952 * This is safe because the table never shrinks 1953 * and we can only get a false negative. 1954 */ 1955 for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1956 if (rlp->robust_lock == mp) /* already registered */ 1957 return; 1958 } 1959 1960 /* 1961 * The lock was not found. 1962 * Repeat the operation with tdb_hash_lock held. 1963 */ 1964 lmutex_lock(&udp->tdb_hash_lock); 1965 1966 invalid = NULL; 1967 for (rlpp = &table[hash]; 1968 (rlp = *rlpp) != NULL; 1969 rlpp = &rlp->robust_next) { 1970 if (rlp->robust_lock == mp) { /* already registered */ 1971 lmutex_unlock(&udp->tdb_hash_lock); 1972 return; 1973 } 1974 /* remember the first invalid entry, if any */ 1975 if (rlp->robust_lock == INVALID_ADDR && invalid == NULL) 1976 invalid = rlp; 1977 } 1978 1979 /* 1980 * The lock has never been registered. 1981 * Add it to the table and register it now. 1982 */ 1983 if ((rlp = invalid) != NULL) { 1984 /* 1985 * Reuse the invalid entry we found above. 1986 * The linkages are still correct. 1987 */ 1988 rlp->robust_lock = mp; 1989 membar_producer(); 1990 } else { 1991 /* 1992 * Allocate a new entry and add it to 1993 * the hash table and to the global list. 1994 */ 1995 rlp = lmalloc(sizeof (*rlp)); 1996 rlp->robust_lock = mp; 1997 rlp->robust_next = NULL; 1998 rlp->robust_list = udp->robustlist; 1999 udp->robustlist = rlp; 2000 membar_producer(); 2001 *rlpp = rlp; 2002 } 2003 2004 lmutex_unlock(&udp->tdb_hash_lock); 2005 2006 (void) ___lwp_mutex_register(mp, &rlp->robust_lock); 2007 } 2008 2009 /* 2010 * This is called in the child of fork()/forkall() to start over 2011 * with a clean slate. (Each process must register its own locks.) 2012 * No locks are needed because all other threads are suspended or gone. 2013 */ 2014 void 2015 unregister_locks(void) 2016 { 2017 uberdata_t *udp = curthread->ul_uberdata; 2018 robust_t **table; 2019 robust_t *rlp; 2020 robust_t *next; 2021 2022 /* 2023 * Do this first, before calling lfree(). 2024 */ 2025 table = udp->robustlocks; 2026 udp->robustlocks = NULL; 2027 rlp = udp->robustlist; 2028 udp->robustlist = NULL; 2029 2030 /* 2031 * Do this by traversing the global list, not the hash table. 2032 */ 2033 while (rlp != NULL) { 2034 next = rlp->robust_list; 2035 lfree(rlp, sizeof (*rlp)); 2036 rlp = next; 2037 } 2038 if (table != NULL) 2039 lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 2040 } 2041 2042 /* 2043 * Returns with mutex_owner set correctly. 2044 */ 2045 int 2046 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 2047 { 2048 ulwp_t *self = curthread; 2049 uberdata_t *udp = self->ul_uberdata; 2050 int mtype = mp->mutex_type; 2051 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2052 int error = 0; 2053 int noceil = try & MUTEX_NOCEIL; 2054 uint8_t ceil; 2055 int myprio; 2056 2057 try &= ~MUTEX_NOCEIL; 2058 ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 2059 2060 if (!self->ul_schedctl_called) 2061 (void) setup_schedctl(); 2062 2063 if (msp && try == MUTEX_TRY) 2064 tdb_incr(msp->mutex_try); 2065 2066 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_held(mp)) 2067 return (mutex_recursion(mp, mtype, try)); 2068 2069 if (self->ul_error_detection && try == MUTEX_LOCK && 2070 tsp == NULL && mutex_held(mp)) 2071 lock_error(mp, "mutex_lock", NULL, NULL); 2072 2073 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2074 update_sched(self); 2075 if (self->ul_cid != self->ul_rtclassid) { 2076 DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM); 2077 return (EPERM); 2078 } 2079 ceil = mp->mutex_ceiling; 2080 myprio = self->ul_epri? self->ul_epri : self->ul_pri; 2081 if (myprio > ceil) { 2082 DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 2083 return (EINVAL); 2084 } 2085 if ((error = _ceil_mylist_add(mp)) != 0) { 2086 DTRACE_PROBE2(plockstat, mutex__error, mp, error); 2087 return (error); 2088 } 2089 if (myprio < ceil) 2090 _ceil_prio_inherit(ceil); 2091 } 2092 2093 if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 2094 == (USYNC_PROCESS | LOCK_ROBUST)) 2095 register_lock(mp); 2096 2097 if (mtype & LOCK_PRIO_INHERIT) { 2098 /* go straight to the kernel */ 2099 if (try == MUTEX_TRY) 2100 error = mutex_trylock_kernel(mp); 2101 else /* MUTEX_LOCK */ 2102 error = mutex_lock_kernel(mp, tsp, msp); 2103 /* 2104 * The kernel never sets or clears the lock byte 2105 * for LOCK_PRIO_INHERIT mutexes. 2106 * Set it here for consistency. 2107 */ 2108 switch (error) { 2109 case 0: 2110 self->ul_pilocks++; 2111 mp->mutex_lockw = LOCKSET; 2112 break; 2113 case EOWNERDEAD: 2114 case ELOCKUNMAPPED: 2115 self->ul_pilocks++; 2116 mp->mutex_lockw = LOCKSET; 2117 /* FALLTHROUGH */ 2118 case ENOTRECOVERABLE: 2119 ASSERT(mtype & LOCK_ROBUST); 2120 break; 2121 case EDEADLK: 2122 if (try == MUTEX_TRY) { 2123 error = EBUSY; 2124 } else if (tsp != NULL) { /* simulate a timeout */ 2125 /* 2126 * Note: mutex_timedlock() never returns EINTR. 2127 */ 2128 timespec_t ts = *tsp; 2129 timespec_t rts; 2130 2131 while (__nanosleep(&ts, &rts) == EINTR) 2132 ts = rts; 2133 error = ETIME; 2134 } else { /* simulate a deadlock */ 2135 stall(); 2136 } 2137 break; 2138 } 2139 } else if (mtype & USYNC_PROCESS) { 2140 error = mutex_trylock_process(mp, try == MUTEX_LOCK); 2141 if (error == EBUSY && try == MUTEX_LOCK) 2142 error = mutex_lock_kernel(mp, tsp, msp); 2143 } else { /* USYNC_THREAD */ 2144 error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 2145 if (error == EBUSY && try == MUTEX_LOCK) 2146 error = mutex_lock_queue(self, msp, mp, tsp); 2147 } 2148 2149 switch (error) { 2150 case 0: 2151 case EOWNERDEAD: 2152 case ELOCKUNMAPPED: 2153 if (mtype & LOCK_ROBUST) 2154 remember_lock(mp); 2155 if (msp) 2156 record_begin_hold(msp); 2157 break; 2158 default: 2159 if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2160 (void) _ceil_mylist_del(mp); 2161 if (myprio < ceil) 2162 _ceil_prio_waive(); 2163 } 2164 if (try == MUTEX_TRY) { 2165 if (msp) 2166 tdb_incr(msp->mutex_try_fail); 2167 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2168 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2169 tdb_event(TD_LOCK_TRY, udp); 2170 } 2171 } 2172 break; 2173 } 2174 2175 return (error); 2176 } 2177 2178 int 2179 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 2180 { 2181 ulwp_t *self = curthread; 2182 uberdata_t *udp = self->ul_uberdata; 2183 2184 /* 2185 * We know that USYNC_PROCESS is set in mtype and that 2186 * zero, one, or both of the flags LOCK_RECURSIVE and 2187 * LOCK_ERRORCHECK are set, and that no other flags are set. 2188 */ 2189 ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 2190 enter_critical(self); 2191 #if defined(__sparc) && !defined(_LP64) 2192 /* horrible hack, necessary only on 32-bit sparc */ 2193 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2194 self->ul_misaligned) { 2195 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2196 mp->mutex_ownerpid = udp->pid; 2197 mp->mutex_owner = (uintptr_t)self; 2198 exit_critical(self); 2199 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2200 return (0); 2201 } 2202 } else 2203 #endif 2204 if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) { 2205 mp->mutex_owner = (uintptr_t)self; 2206 /* mp->mutex_ownerpid was set by set_lock_byte64() */ 2207 exit_critical(self); 2208 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2209 return (0); 2210 } 2211 exit_critical(self); 2212 2213 if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 2214 return (mutex_recursion(mp, mtype, try)); 2215 2216 if (try == MUTEX_LOCK) { 2217 if (mutex_trylock_process(mp, 1) == 0) 2218 return (0); 2219 return (mutex_lock_kernel(mp, tsp, NULL)); 2220 } 2221 2222 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2223 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2224 tdb_event(TD_LOCK_TRY, udp); 2225 } 2226 return (EBUSY); 2227 } 2228 2229 static int 2230 mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 2231 { 2232 ulwp_t *self = curthread; 2233 int mtype = mp->mutex_type; 2234 uberflags_t *gflags; 2235 2236 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2237 self->ul_error_detection && self->ul_misaligned == 0) 2238 lock_error(mp, "mutex_lock", NULL, "mutex is misaligned"); 2239 2240 /* 2241 * Optimize the case of USYNC_THREAD, including 2242 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2243 * no error detection, no lock statistics, 2244 * and the process has only a single thread. 2245 * (Most likely a traditional single-threaded application.) 2246 */ 2247 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2248 self->ul_uberdata->uberflags.uf_all) == 0) { 2249 /* 2250 * Only one thread exists so we don't need an atomic operation. 2251 * We do, however, need to protect against signals. 2252 */ 2253 if (mp->mutex_lockw == 0) { 2254 sigoff(self); 2255 mp->mutex_lockw = LOCKSET; 2256 mp->mutex_owner = (uintptr_t)self; 2257 sigon(self); 2258 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2259 return (0); 2260 } 2261 if (mtype && MUTEX_OWNER(mp) == self) 2262 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2263 /* 2264 * We have reached a deadlock, probably because the 2265 * process is executing non-async-signal-safe code in 2266 * a signal handler and is attempting to acquire a lock 2267 * that it already owns. This is not surprising, given 2268 * bad programming practices over the years that has 2269 * resulted in applications calling printf() and such 2270 * in their signal handlers. Unless the user has told 2271 * us that the signal handlers are safe by setting: 2272 * export _THREAD_ASYNC_SAFE=1 2273 * we return EDEADLK rather than actually deadlocking. 2274 * 2275 * A lock may explicitly override this with the 2276 * LOCK_DEADLOCK flag which is currently set for POSIX 2277 * NORMAL mutexes as the specification requires deadlock 2278 * behavior and applications _do_ rely on that for their 2279 * correctness guarantees. 2280 */ 2281 if (tsp == NULL && 2282 MUTEX_OWNER(mp) == self && !self->ul_async_safe && 2283 (mp->mutex_flag & LOCK_DEADLOCK) == 0) { 2284 DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 2285 return (EDEADLK); 2286 } 2287 } 2288 2289 /* 2290 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2291 * no error detection, and no lock statistics. 2292 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2293 */ 2294 if ((gflags = self->ul_schedctl_called) != NULL && 2295 (gflags->uf_trs_ted | 2296 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2297 if (mtype & USYNC_PROCESS) 2298 return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 2299 sigoff(self); 2300 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2301 mp->mutex_owner = (uintptr_t)self; 2302 sigon(self); 2303 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2304 return (0); 2305 } 2306 sigon(self); 2307 if (mtype && MUTEX_OWNER(mp) == self) 2308 return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 2309 if (mutex_trylock_adaptive(mp, 1) != 0) 2310 return (mutex_lock_queue(self, NULL, mp, tsp)); 2311 return (0); 2312 } 2313 2314 /* else do it the long way */ 2315 return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 2316 } 2317 2318 #pragma weak pthread_mutex_lock = mutex_lock 2319 #pragma weak _mutex_lock = mutex_lock 2320 int 2321 mutex_lock(mutex_t *mp) 2322 { 2323 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2324 return (mutex_lock_impl(mp, NULL)); 2325 } 2326 2327 #pragma weak pthread_mutex_enter_np = mutex_enter 2328 void 2329 mutex_enter(mutex_t *mp) 2330 { 2331 int ret; 2332 int attr = mp->mutex_type & ALL_ATTRIBUTES; 2333 2334 /* 2335 * Require LOCK_ERRORCHECK, accept LOCK_RECURSIVE. 2336 */ 2337 if (attr != LOCK_ERRORCHECK && 2338 attr != (LOCK_ERRORCHECK | LOCK_RECURSIVE)) { 2339 mutex_panic(mp, "mutex_enter: bad mutex type"); 2340 } 2341 ret = mutex_lock(mp); 2342 if (ret == EDEADLK) { 2343 mutex_panic(mp, "recursive mutex_enter"); 2344 } else if (ret == EAGAIN) { 2345 mutex_panic(mp, "excessive recursive mutex_enter"); 2346 } else if (ret != 0) { 2347 mutex_panic(mp, "unknown mutex_enter failure"); 2348 } 2349 } 2350 2351 int 2352 pthread_mutex_clocklock(pthread_mutex_t *restrict mp, clockid_t clock, 2353 const struct timespec *restrict abstime) 2354 { 2355 timespec_t tslocal; 2356 int error; 2357 2358 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2359 2360 switch (clock) { 2361 case CLOCK_REALTIME: 2362 case CLOCK_HIGHRES: 2363 break; 2364 default: 2365 return (EINVAL); 2366 } 2367 2368 abstime_to_reltime(clock, abstime, &tslocal); 2369 error = mutex_lock_impl((mutex_t *)mp, &tslocal); 2370 if (error == ETIME) 2371 error = ETIMEDOUT; 2372 return (error); 2373 } 2374 2375 int 2376 pthread_mutex_timedlock(pthread_mutex_t *restrict mp, 2377 const struct timespec *restrict abstime) 2378 { 2379 return (pthread_mutex_clocklock(mp, CLOCK_REALTIME, abstime)); 2380 } 2381 2382 int 2383 pthread_mutex_relclocklock_np(pthread_mutex_t *restrict mp, clockid_t clock, 2384 const struct timespec *restrict reltime) 2385 { 2386 timespec_t tslocal; 2387 int error; 2388 2389 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2390 2391 switch (clock) { 2392 case CLOCK_REALTIME: 2393 case CLOCK_HIGHRES: 2394 break; 2395 default: 2396 return (EINVAL); 2397 } 2398 2399 tslocal = *reltime; 2400 error = mutex_lock_impl((mutex_t *)mp, &tslocal); 2401 if (error == ETIME) 2402 error = ETIMEDOUT; 2403 return (error); 2404 } 2405 2406 int 2407 pthread_mutex_reltimedlock_np(pthread_mutex_t *restrict mp, 2408 const struct timespec *restrict reltime) 2409 { 2410 return (pthread_mutex_relclocklock_np(mp, CLOCK_REALTIME, reltime)); 2411 } 2412 2413 #pragma weak pthread_mutex_trylock = mutex_trylock 2414 int 2415 mutex_trylock(mutex_t *mp) 2416 { 2417 ulwp_t *self = curthread; 2418 uberdata_t *udp = self->ul_uberdata; 2419 int mtype = mp->mutex_type; 2420 uberflags_t *gflags; 2421 2422 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2423 2424 /* 2425 * Optimize the case of USYNC_THREAD, including 2426 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2427 * no error detection, no lock statistics, 2428 * and the process has only a single thread. 2429 * (Most likely a traditional single-threaded application.) 2430 */ 2431 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2432 udp->uberflags.uf_all) == 0) { 2433 /* 2434 * Only one thread exists so we don't need an atomic operation. 2435 * We do, however, need to protect against signals. 2436 */ 2437 if (mp->mutex_lockw == 0) { 2438 sigoff(self); 2439 mp->mutex_lockw = LOCKSET; 2440 mp->mutex_owner = (uintptr_t)self; 2441 sigon(self); 2442 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2443 return (0); 2444 } 2445 if (mtype && MUTEX_OWNER(mp) == self) 2446 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2447 return (EBUSY); 2448 } 2449 2450 /* 2451 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2452 * no error detection, and no lock statistics. 2453 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2454 */ 2455 if ((gflags = self->ul_schedctl_called) != NULL && 2456 (gflags->uf_trs_ted | 2457 (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 2458 if (mtype & USYNC_PROCESS) 2459 return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 2460 sigoff(self); 2461 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2462 mp->mutex_owner = (uintptr_t)self; 2463 sigon(self); 2464 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2465 return (0); 2466 } 2467 sigon(self); 2468 if (mtype && MUTEX_OWNER(mp) == self) 2469 return (mutex_recursion(mp, mtype, MUTEX_TRY)); 2470 if (__td_event_report(self, TD_LOCK_TRY, udp)) { 2471 self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 2472 tdb_event(TD_LOCK_TRY, udp); 2473 } 2474 return (EBUSY); 2475 } 2476 2477 /* else do it the long way */ 2478 return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 2479 } 2480 2481 int 2482 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 2483 { 2484 ulwp_t *self = curthread; 2485 uberdata_t *udp = self->ul_uberdata; 2486 int mtype = mp->mutex_type; 2487 tdb_mutex_stats_t *msp; 2488 int error = 0; 2489 int release_all; 2490 lwpid_t lwpid; 2491 2492 if ((mtype & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 2493 !mutex_held(mp)) 2494 return (EPERM); 2495 2496 if (self->ul_error_detection && !mutex_held(mp)) 2497 lock_error(mp, "mutex_unlock", NULL, NULL); 2498 2499 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2500 mp->mutex_rcount--; 2501 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2502 return (0); 2503 } 2504 2505 if ((msp = MUTEX_STATS(mp, udp)) != NULL) 2506 (void) record_hold_time(msp); 2507 2508 if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2509 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2510 ASSERT(mtype & LOCK_ROBUST); 2511 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2512 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2513 } 2514 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2515 2516 if (mtype & LOCK_PRIO_INHERIT) { 2517 no_preempt(self); 2518 mp->mutex_owner = 0; 2519 /* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */ 2520 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2521 mp->mutex_lockw = LOCKCLEAR; 2522 self->ul_pilocks--; 2523 error = ___lwp_mutex_unlock(mp); 2524 preempt(self); 2525 } else if (mtype & USYNC_PROCESS) { 2526 mutex_unlock_process(mp, release_all); 2527 } else { /* USYNC_THREAD */ 2528 if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 2529 (void) __lwp_unpark(lwpid); 2530 preempt(self); 2531 } 2532 } 2533 2534 if (mtype & LOCK_ROBUST) 2535 forget_lock(mp); 2536 2537 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2538 _ceil_prio_waive(); 2539 2540 return (error); 2541 } 2542 2543 #pragma weak pthread_mutex_unlock = mutex_unlock 2544 #pragma weak _mutex_unlock = mutex_unlock 2545 int 2546 mutex_unlock(mutex_t *mp) 2547 { 2548 ulwp_t *self = curthread; 2549 int mtype = mp->mutex_type; 2550 uberflags_t *gflags; 2551 lwpid_t lwpid; 2552 short el; 2553 2554 /* 2555 * Optimize the case of USYNC_THREAD, including 2556 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 2557 * no error detection, no lock statistics, 2558 * and the process has only a single thread. 2559 * (Most likely a traditional single-threaded application.) 2560 */ 2561 if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2562 self->ul_uberdata->uberflags.uf_all) == 0) { 2563 if (mtype) { 2564 /* 2565 * At this point we know that one or both of the 2566 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2567 */ 2568 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2569 return (EPERM); 2570 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2571 mp->mutex_rcount--; 2572 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2573 return (0); 2574 } 2575 } 2576 /* 2577 * Only one thread exists so we don't need an atomic operation. 2578 * Also, there can be no waiters. 2579 */ 2580 sigoff(self); 2581 mp->mutex_owner = 0; 2582 mp->mutex_lockword = 0; 2583 sigon(self); 2584 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2585 return (0); 2586 } 2587 2588 /* 2589 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 2590 * no error detection, and no lock statistics. 2591 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 2592 */ 2593 if ((gflags = self->ul_schedctl_called) != NULL) { 2594 if (((el = gflags->uf_trs_ted) | mtype) == 0) { 2595 fast_unlock: 2596 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2597 (void) __lwp_unpark(lwpid); 2598 preempt(self); 2599 } 2600 return (0); 2601 } 2602 if (el) /* error detection or lock statistics */ 2603 goto slow_unlock; 2604 if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2605 /* 2606 * At this point we know that one or both of the 2607 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 2608 */ 2609 if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 2610 return (EPERM); 2611 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2612 mp->mutex_rcount--; 2613 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2614 return (0); 2615 } 2616 goto fast_unlock; 2617 } 2618 if ((mtype & 2619 ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 2620 /* 2621 * At this point we know that zero, one, or both of the 2622 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 2623 * that the USYNC_PROCESS flag is set. 2624 */ 2625 if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 2626 return (EPERM); 2627 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 2628 mp->mutex_rcount--; 2629 DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 2630 return (0); 2631 } 2632 mutex_unlock_process(mp, 0); 2633 return (0); 2634 } 2635 } 2636 2637 /* else do it the long way */ 2638 slow_unlock: 2639 return (mutex_unlock_internal(mp, 0)); 2640 } 2641 2642 #pragma weak pthread_mutex_exit_np = mutex_exit 2643 void 2644 mutex_exit(mutex_t *mp) 2645 { 2646 int ret; 2647 int attr = mp->mutex_type & ALL_ATTRIBUTES; 2648 2649 if (attr != LOCK_ERRORCHECK && 2650 attr != (LOCK_ERRORCHECK | LOCK_RECURSIVE)) { 2651 mutex_panic(mp, "mutex_exit: bad mutex type"); 2652 } 2653 ret = mutex_unlock(mp); 2654 if (ret == EPERM) { 2655 mutex_panic(mp, "mutex_exit: not owner"); 2656 } else if (ret != 0) { 2657 mutex_panic(mp, "unknown mutex_exit failure"); 2658 } 2659 2660 } 2661 2662 /* 2663 * Internally to the library, almost all mutex lock/unlock actions 2664 * go through these lmutex_ functions, to protect critical regions. 2665 * We replicate a bit of code from mutex_lock() and mutex_unlock() 2666 * to make these functions faster since we know that the mutex type 2667 * of all internal locks is USYNC_THREAD. We also know that internal 2668 * locking can never fail, so we panic if it does. 2669 */ 2670 void 2671 lmutex_lock(mutex_t *mp) 2672 { 2673 ulwp_t *self = curthread; 2674 uberdata_t *udp = self->ul_uberdata; 2675 2676 ASSERT(mp->mutex_type == USYNC_THREAD); 2677 2678 enter_critical(self); 2679 /* 2680 * Optimize the case of no lock statistics and only a single thread. 2681 * (Most likely a traditional single-threaded application.) 2682 */ 2683 if (udp->uberflags.uf_all == 0) { 2684 /* 2685 * Only one thread exists; the mutex must be free. 2686 */ 2687 ASSERT(mp->mutex_lockw == 0); 2688 mp->mutex_lockw = LOCKSET; 2689 mp->mutex_owner = (uintptr_t)self; 2690 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2691 } else { 2692 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2693 2694 if (!self->ul_schedctl_called) 2695 (void) setup_schedctl(); 2696 2697 if (set_lock_byte(&mp->mutex_lockw) == 0) { 2698 mp->mutex_owner = (uintptr_t)self; 2699 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2700 } else if (mutex_trylock_adaptive(mp, 1) != 0) { 2701 (void) mutex_lock_queue(self, msp, mp, NULL); 2702 } 2703 2704 if (msp) 2705 record_begin_hold(msp); 2706 } 2707 } 2708 2709 void 2710 lmutex_unlock(mutex_t *mp) 2711 { 2712 ulwp_t *self = curthread; 2713 uberdata_t *udp = self->ul_uberdata; 2714 2715 ASSERT(mp->mutex_type == USYNC_THREAD); 2716 2717 /* 2718 * Optimize the case of no lock statistics and only a single thread. 2719 * (Most likely a traditional single-threaded application.) 2720 */ 2721 if (udp->uberflags.uf_all == 0) { 2722 /* 2723 * Only one thread exists so there can be no waiters. 2724 */ 2725 mp->mutex_owner = 0; 2726 mp->mutex_lockword = 0; 2727 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2728 } else { 2729 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 2730 lwpid_t lwpid; 2731 2732 if (msp) 2733 (void) record_hold_time(msp); 2734 if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 2735 (void) __lwp_unpark(lwpid); 2736 preempt(self); 2737 } 2738 } 2739 exit_critical(self); 2740 } 2741 2742 /* 2743 * For specialized code in libc, like the asynchronous i/o code, 2744 * the following sig_*() locking primitives are used in order 2745 * to make the code asynchronous signal safe. Signals are 2746 * deferred while locks acquired by these functions are held. 2747 */ 2748 void 2749 sig_mutex_lock(mutex_t *mp) 2750 { 2751 ulwp_t *self = curthread; 2752 2753 sigoff(self); 2754 (void) mutex_lock(mp); 2755 } 2756 2757 void 2758 sig_mutex_unlock(mutex_t *mp) 2759 { 2760 ulwp_t *self = curthread; 2761 2762 (void) mutex_unlock(mp); 2763 sigon(self); 2764 } 2765 2766 int 2767 sig_mutex_trylock(mutex_t *mp) 2768 { 2769 ulwp_t *self = curthread; 2770 int error; 2771 2772 sigoff(self); 2773 if ((error = mutex_trylock(mp)) != 0) 2774 sigon(self); 2775 return (error); 2776 } 2777 2778 /* 2779 * sig_cond_wait() is a cancellation point. 2780 */ 2781 int 2782 sig_cond_wait(cond_t *cv, mutex_t *mp) 2783 { 2784 int error; 2785 2786 ASSERT(curthread->ul_sigdefer != 0); 2787 pthread_testcancel(); 2788 error = __cond_wait(cv, mp); 2789 if (error == EINTR && curthread->ul_cursig) { 2790 sig_mutex_unlock(mp); 2791 /* take the deferred signal here */ 2792 sig_mutex_lock(mp); 2793 } 2794 pthread_testcancel(); 2795 return (error); 2796 } 2797 2798 /* 2799 * sig_cond_reltimedwait() is a cancellation point. 2800 */ 2801 int 2802 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2803 { 2804 int error; 2805 2806 ASSERT(curthread->ul_sigdefer != 0); 2807 pthread_testcancel(); 2808 error = __cond_reltimedwait(cv, mp, ts); 2809 if (error == EINTR && curthread->ul_cursig) { 2810 sig_mutex_unlock(mp); 2811 /* take the deferred signal here */ 2812 sig_mutex_lock(mp); 2813 } 2814 pthread_testcancel(); 2815 return (error); 2816 } 2817 2818 /* 2819 * For specialized code in libc, like the stdio code. 2820 * the following cancel_safe_*() locking primitives are used in 2821 * order to make the code cancellation-safe. Cancellation is 2822 * deferred while locks acquired by these functions are held. 2823 */ 2824 void 2825 cancel_safe_mutex_lock(mutex_t *mp) 2826 { 2827 (void) mutex_lock(mp); 2828 curthread->ul_libc_locks++; 2829 } 2830 2831 int 2832 cancel_safe_mutex_trylock(mutex_t *mp) 2833 { 2834 int error; 2835 2836 if ((error = mutex_trylock(mp)) == 0) 2837 curthread->ul_libc_locks++; 2838 return (error); 2839 } 2840 2841 void 2842 cancel_safe_mutex_unlock(mutex_t *mp) 2843 { 2844 ulwp_t *self = curthread; 2845 2846 ASSERT(self->ul_libc_locks != 0); 2847 2848 (void) mutex_unlock(mp); 2849 2850 /* 2851 * Decrement the count of locks held by cancel_safe_mutex_lock(). 2852 * If we are then in a position to terminate cleanly and 2853 * if there is a pending cancellation and cancellation 2854 * is not disabled and we received EINTR from a recent 2855 * system call then perform the cancellation action now. 2856 */ 2857 if (--self->ul_libc_locks == 0 && 2858 !(self->ul_vfork | self->ul_nocancel | 2859 self->ul_critical | self->ul_sigdefer) && 2860 cancel_active()) 2861 pthread_exit(PTHREAD_CANCELED); 2862 } 2863 2864 static int 2865 shared_mutex_held(mutex_t *mparg) 2866 { 2867 /* 2868 * The 'volatile' is necessary to make sure the compiler doesn't 2869 * reorder the tests of the various components of the mutex. 2870 * They must be tested in this order: 2871 * mutex_lockw 2872 * mutex_owner 2873 * mutex_ownerpid 2874 * This relies on the fact that everywhere mutex_lockw is cleared, 2875 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2876 * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2877 * and mutex_ownerpid are set after mutex_lockw is set, and that 2878 * mutex_lockw is set or cleared with a memory barrier. 2879 */ 2880 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2881 ulwp_t *self = curthread; 2882 uberdata_t *udp = self->ul_uberdata; 2883 2884 return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 2885 } 2886 2887 #pragma weak _mutex_held = mutex_held 2888 int 2889 mutex_held(mutex_t *mparg) 2890 { 2891 volatile mutex_t *mp = (volatile mutex_t *)mparg; 2892 2893 if (mparg->mutex_type & USYNC_PROCESS) 2894 return (shared_mutex_held(mparg)); 2895 return (MUTEX_OWNED(mp, curthread)); 2896 } 2897 2898 #pragma weak pthread_mutex_destroy = mutex_destroy 2899 #pragma weak _mutex_destroy = mutex_destroy 2900 int 2901 mutex_destroy(mutex_t *mp) 2902 { 2903 if (mp->mutex_type & USYNC_PROCESS) 2904 forget_lock(mp); 2905 (void) memset(mp, 0, sizeof (*mp)); 2906 tdb_sync_obj_deregister(mp); 2907 return (0); 2908 } 2909 2910 #pragma weak pthread_mutex_consistent_np = mutex_consistent 2911 #pragma weak pthread_mutex_consistent = mutex_consistent 2912 int 2913 mutex_consistent(mutex_t *mp) 2914 { 2915 /* 2916 * Do this only for an inconsistent, initialized robust lock 2917 * that we hold. For all other cases, return EINVAL. 2918 */ 2919 if (mutex_held(mp) && 2920 (mp->mutex_type & LOCK_ROBUST) && 2921 (mp->mutex_flag & LOCK_INITED) && 2922 (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2923 mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2924 mp->mutex_rcount = 0; 2925 return (0); 2926 } 2927 return (EINVAL); 2928 } 2929 2930 /* 2931 * Spin locks are separate from ordinary mutexes, 2932 * but we use the same data structure for them. 2933 */ 2934 2935 int 2936 pthread_spin_init(pthread_spinlock_t *lock, int pshared) 2937 { 2938 mutex_t *mp = (mutex_t *)lock; 2939 2940 (void) memset(mp, 0, sizeof (*mp)); 2941 if (pshared == PTHREAD_PROCESS_SHARED) 2942 mp->mutex_type = USYNC_PROCESS; 2943 else 2944 mp->mutex_type = USYNC_THREAD; 2945 mp->mutex_flag = LOCK_INITED; 2946 mp->mutex_magic = MUTEX_MAGIC; 2947 2948 /* 2949 * This should be at the beginning of the function, 2950 * but for the sake of old broken applications that 2951 * do not have proper alignment for their mutexes 2952 * (and don't check the return code from pthread_spin_init), 2953 * we put it here, after initializing the mutex regardless. 2954 */ 2955 if (((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) && 2956 curthread->ul_misaligned == 0) 2957 return (EINVAL); 2958 2959 return (0); 2960 } 2961 2962 int 2963 pthread_spin_destroy(pthread_spinlock_t *lock) 2964 { 2965 (void) memset(lock, 0, sizeof (*lock)); 2966 return (0); 2967 } 2968 2969 int 2970 pthread_spin_trylock(pthread_spinlock_t *lock) 2971 { 2972 mutex_t *mp = (mutex_t *)lock; 2973 ulwp_t *self = curthread; 2974 int error = 0; 2975 2976 no_preempt(self); 2977 if (set_lock_byte(&mp->mutex_lockw) != 0) 2978 error = EBUSY; 2979 else { 2980 mp->mutex_owner = (uintptr_t)self; 2981 if (mp->mutex_type == USYNC_PROCESS) 2982 mp->mutex_ownerpid = self->ul_uberdata->pid; 2983 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 2984 } 2985 preempt(self); 2986 return (error); 2987 } 2988 2989 int 2990 pthread_spin_lock(pthread_spinlock_t *lock) 2991 { 2992 mutex_t *mp = (mutex_t *)lock; 2993 ulwp_t *self = curthread; 2994 volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2995 int count = 0; 2996 2997 ASSERT(!self->ul_critical || self->ul_bindflags); 2998 2999 DTRACE_PROBE1(plockstat, mutex__spin, mp); 3000 3001 /* 3002 * We don't care whether the owner is running on a processor. 3003 * We just spin because that's what this interface requires. 3004 */ 3005 for (;;) { 3006 if (*lockp == 0) { /* lock byte appears to be clear */ 3007 no_preempt(self); 3008 if (set_lock_byte(lockp) == 0) 3009 break; 3010 preempt(self); 3011 } 3012 if (count < INT_MAX) 3013 count++; 3014 SMT_PAUSE(); 3015 } 3016 mp->mutex_owner = (uintptr_t)self; 3017 if (mp->mutex_type == USYNC_PROCESS) 3018 mp->mutex_ownerpid = self->ul_uberdata->pid; 3019 preempt(self); 3020 if (count) { 3021 DTRACE_PROBE3(plockstat, mutex__spun, mp, 1, count); 3022 } 3023 DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 3024 return (0); 3025 } 3026 3027 int 3028 pthread_spin_unlock(pthread_spinlock_t *lock) 3029 { 3030 mutex_t *mp = (mutex_t *)lock; 3031 ulwp_t *self = curthread; 3032 3033 no_preempt(self); 3034 mp->mutex_owner = 0; 3035 mp->mutex_ownerpid = 0; 3036 DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 3037 (void) atomic_swap_32(&mp->mutex_lockword, 0); 3038 preempt(self); 3039 return (0); 3040 } 3041 3042 #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 3043 3044 /* 3045 * Find/allocate an entry for 'lock' in our array of held locks. 3046 */ 3047 static mutex_t ** 3048 find_lock_entry(mutex_t *lock) 3049 { 3050 ulwp_t *self = curthread; 3051 mutex_t **remembered = NULL; 3052 mutex_t **lockptr; 3053 uint_t nlocks; 3054 3055 if ((nlocks = self->ul_heldlockcnt) != 0) 3056 lockptr = self->ul_heldlocks.array; 3057 else { 3058 nlocks = 1; 3059 lockptr = &self->ul_heldlocks.single; 3060 } 3061 3062 for (; nlocks; nlocks--, lockptr++) { 3063 if (*lockptr == lock) 3064 return (lockptr); 3065 if (*lockptr == NULL && remembered == NULL) 3066 remembered = lockptr; 3067 } 3068 if (remembered != NULL) { 3069 *remembered = lock; 3070 return (remembered); 3071 } 3072 3073 /* 3074 * No entry available. Allocate more space, converting 3075 * the single entry into an array of entries if necessary. 3076 */ 3077 if ((nlocks = self->ul_heldlockcnt) == 0) { 3078 /* 3079 * Initial allocation of the array. 3080 * Convert the single entry into an array. 3081 */ 3082 self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 3083 lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 3084 /* 3085 * The single entry becomes the first entry in the array. 3086 */ 3087 *lockptr = self->ul_heldlocks.single; 3088 self->ul_heldlocks.array = lockptr; 3089 /* 3090 * Return the next available entry in the array. 3091 */ 3092 *++lockptr = lock; 3093 return (lockptr); 3094 } 3095 /* 3096 * Reallocate the array, double the size each time. 3097 */ 3098 lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 3099 (void) memcpy(lockptr, self->ul_heldlocks.array, 3100 nlocks * sizeof (mutex_t *)); 3101 lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 3102 self->ul_heldlocks.array = lockptr; 3103 self->ul_heldlockcnt *= 2; 3104 /* 3105 * Return the next available entry in the newly allocated array. 3106 */ 3107 *(lockptr += nlocks) = lock; 3108 return (lockptr); 3109 } 3110 3111 /* 3112 * Insert 'lock' into our list of held locks. 3113 * Currently only used for LOCK_ROBUST mutexes. 3114 */ 3115 void 3116 remember_lock(mutex_t *lock) 3117 { 3118 (void) find_lock_entry(lock); 3119 } 3120 3121 /* 3122 * Remove 'lock' from our list of held locks. 3123 * Currently only used for LOCK_ROBUST mutexes. 3124 */ 3125 void 3126 forget_lock(mutex_t *lock) 3127 { 3128 *find_lock_entry(lock) = NULL; 3129 } 3130 3131 /* 3132 * Free the array of held locks. 3133 */ 3134 void 3135 heldlock_free(ulwp_t *ulwp) 3136 { 3137 uint_t nlocks; 3138 3139 if ((nlocks = ulwp->ul_heldlockcnt) != 0) 3140 lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 3141 ulwp->ul_heldlockcnt = 0; 3142 ulwp->ul_heldlocks.array = NULL; 3143 } 3144 3145 /* 3146 * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 3147 * Called from _thrp_exit() to deal with abandoned locks. 3148 */ 3149 void 3150 heldlock_exit(void) 3151 { 3152 ulwp_t *self = curthread; 3153 mutex_t **lockptr; 3154 uint_t nlocks; 3155 mutex_t *mp; 3156 3157 if ((nlocks = self->ul_heldlockcnt) != 0) 3158 lockptr = self->ul_heldlocks.array; 3159 else { 3160 nlocks = 1; 3161 lockptr = &self->ul_heldlocks.single; 3162 } 3163 3164 for (; nlocks; nlocks--, lockptr++) { 3165 /* 3166 * The kernel takes care of transitioning held 3167 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 3168 * We avoid that case here. 3169 */ 3170 if ((mp = *lockptr) != NULL && 3171 mutex_held(mp) && 3172 (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 3173 LOCK_ROBUST) { 3174 mp->mutex_rcount = 0; 3175 if (!(mp->mutex_flag & LOCK_UNMAPPED)) 3176 mp->mutex_flag |= LOCK_OWNERDEAD; 3177 (void) mutex_unlock_internal(mp, 1); 3178 } 3179 } 3180 3181 heldlock_free(self); 3182 } 3183 3184 #pragma weak _cond_init = cond_init 3185 int 3186 cond_init(cond_t *cvp, int type, void *arg __unused) 3187 { 3188 if (type != USYNC_THREAD && type != USYNC_PROCESS) 3189 return (EINVAL); 3190 3191 /* 3192 * This memset initializes cond_clock to CLOCK_REALTIME. 3193 */ 3194 (void) memset(cvp, 0, sizeof (*cvp)); 3195 cvp->cond_type = (uint16_t)type; 3196 cvp->cond_magic = COND_MAGIC; 3197 3198 /* 3199 * This should be at the beginning of the function, 3200 * but for the sake of old broken applications that 3201 * do not have proper alignment for their condvars 3202 * (and don't check the return code from cond_init), 3203 * we put it here, after initializing the condvar regardless. 3204 */ 3205 if (((uintptr_t)cvp & (_LONG_LONG_ALIGNMENT - 1)) && 3206 curthread->ul_misaligned == 0) 3207 return (EINVAL); 3208 3209 return (0); 3210 } 3211 3212 /* 3213 * cond_sleep_queue(): utility function for cond_wait_queue(). 3214 * 3215 * Go to sleep on a condvar sleep queue, expect to be waked up 3216 * by someone calling cond_signal() or cond_broadcast() or due 3217 * to receiving a UNIX signal or being cancelled, or just simply 3218 * due to a spurious wakeup (like someome calling forkall()). 3219 * 3220 * The associated mutex is *not* reacquired before returning. 3221 * That must be done by the caller of cond_sleep_queue(). 3222 */ 3223 static int 3224 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3225 { 3226 ulwp_t *self = curthread; 3227 queue_head_t *qp; 3228 queue_head_t *mqp; 3229 lwpid_t lwpid; 3230 int signalled; 3231 int error; 3232 int cv_wake; 3233 int release_all; 3234 3235 /* 3236 * Put ourself on the CV sleep queue, unlock the mutex, then 3237 * park ourself and unpark a candidate lwp to grab the mutex. 3238 * We must go onto the CV sleep queue before dropping the 3239 * mutex in order to guarantee atomicity of the operation. 3240 */ 3241 self->ul_sp = stkptr(); 3242 qp = queue_lock(cvp, CV); 3243 enqueue(qp, self, 0); 3244 cvp->cond_waiters_user = 1; 3245 self->ul_cvmutex = mp; 3246 self->ul_cv_wake = cv_wake = (tsp != NULL); 3247 self->ul_signalled = 0; 3248 if (mp->mutex_flag & LOCK_OWNERDEAD) { 3249 mp->mutex_flag &= ~LOCK_OWNERDEAD; 3250 mp->mutex_flag |= LOCK_NOTRECOVERABLE; 3251 } 3252 release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 3253 lwpid = mutex_unlock_queue(mp, release_all); 3254 for (;;) { 3255 set_parking_flag(self, 1); 3256 queue_unlock(qp); 3257 if (lwpid != 0) { 3258 lwpid = preempt_unpark(self, lwpid); 3259 preempt(self); 3260 } 3261 /* 3262 * We may have a deferred signal present, 3263 * in which case we should return EINTR. 3264 * Also, we may have received a SIGCANCEL; if so 3265 * and we are cancelable we should return EINTR. 3266 * We force an immediate EINTR return from 3267 * __lwp_park() by turning our parking flag off. 3268 */ 3269 if (self->ul_cursig != 0 || 3270 (self->ul_cancelable && self->ul_cancel_pending)) 3271 set_parking_flag(self, 0); 3272 /* 3273 * __lwp_park() will return the residual time in tsp 3274 * if we are unparked before the timeout expires. 3275 */ 3276 error = __lwp_park(tsp, lwpid); 3277 set_parking_flag(self, 0); 3278 lwpid = 0; /* unpark the other lwp only once */ 3279 /* 3280 * We were waked up by cond_signal(), cond_broadcast(), 3281 * by an interrupt or timeout (EINTR or ETIME), 3282 * or we may just have gotten a spurious wakeup. 3283 */ 3284 qp = queue_lock(cvp, CV); 3285 if (!cv_wake) 3286 mqp = queue_lock(mp, MX); 3287 if (self->ul_sleepq == NULL) 3288 break; 3289 /* 3290 * We are on either the condvar sleep queue or the 3291 * mutex sleep queue. Break out of the sleep if we 3292 * were interrupted or we timed out (EINTR or ETIME). 3293 * Else this is a spurious wakeup; continue the loop. 3294 */ 3295 if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */ 3296 if (error) { 3297 mp->mutex_waiters = dequeue_self(mqp); 3298 break; 3299 } 3300 tsp = NULL; /* no more timeout */ 3301 } else if (self->ul_sleepq == qp) { /* condvar queue */ 3302 if (error) { 3303 cvp->cond_waiters_user = dequeue_self(qp); 3304 break; 3305 } 3306 /* 3307 * Else a spurious wakeup on the condvar queue. 3308 * __lwp_park() has already adjusted the timeout. 3309 */ 3310 } else { 3311 thr_panic("cond_sleep_queue(): thread not on queue"); 3312 } 3313 if (!cv_wake) 3314 queue_unlock(mqp); 3315 } 3316 3317 self->ul_sp = 0; 3318 self->ul_cv_wake = 0; 3319 ASSERT(self->ul_cvmutex == NULL); 3320 ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 3321 self->ul_wchan == NULL); 3322 3323 signalled = self->ul_signalled; 3324 self->ul_signalled = 0; 3325 queue_unlock(qp); 3326 if (!cv_wake) 3327 queue_unlock(mqp); 3328 3329 /* 3330 * If we were concurrently cond_signal()d and any of: 3331 * received a UNIX signal, were cancelled, or got a timeout, 3332 * then perform another cond_signal() to avoid consuming it. 3333 */ 3334 if (error && signalled) 3335 (void) cond_signal(cvp); 3336 3337 return (error); 3338 } 3339 3340 static void 3341 cond_wait_check_alignment(cond_t *cvp, mutex_t *mp) 3342 { 3343 if ((uintptr_t)mp & (_LONG_LONG_ALIGNMENT - 1)) 3344 lock_error(mp, "cond_wait", cvp, "mutex is misaligned"); 3345 if ((uintptr_t)cvp & (_LONG_LONG_ALIGNMENT - 1)) 3346 lock_error(mp, "cond_wait", cvp, "condvar is misaligned"); 3347 } 3348 3349 int 3350 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3351 { 3352 ulwp_t *self = curthread; 3353 int error; 3354 int merror; 3355 3356 if (self->ul_error_detection && self->ul_misaligned == 0) 3357 cond_wait_check_alignment(cvp, mp); 3358 3359 /* 3360 * The old thread library was programmed to defer signals 3361 * while in cond_wait() so that the associated mutex would 3362 * be guaranteed to be held when the application signal 3363 * handler was invoked. 3364 * 3365 * We do not behave this way by default; the state of the 3366 * associated mutex in the signal handler is undefined. 3367 * 3368 * To accommodate applications that depend on the old 3369 * behavior, the _THREAD_COND_WAIT_DEFER environment 3370 * variable can be set to 1 and we will behave in the 3371 * old way with respect to cond_wait(). 3372 */ 3373 if (self->ul_cond_wait_defer) 3374 sigoff(self); 3375 3376 error = cond_sleep_queue(cvp, mp, tsp); 3377 3378 /* 3379 * Reacquire the mutex. 3380 */ 3381 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3382 error = merror; 3383 3384 /* 3385 * Take any deferred signal now, after we have reacquired the mutex. 3386 */ 3387 if (self->ul_cond_wait_defer) 3388 sigon(self); 3389 3390 return (error); 3391 } 3392 3393 /* 3394 * cond_sleep_kernel(): utility function for cond_wait_kernel(). 3395 * See the comment ahead of cond_sleep_queue(), above. 3396 */ 3397 static int 3398 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3399 { 3400 int mtype = mp->mutex_type; 3401 ulwp_t *self = curthread; 3402 int error; 3403 3404 if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 3405 _ceil_prio_waive(); 3406 3407 self->ul_sp = stkptr(); 3408 self->ul_wchan = cvp; 3409 sigoff(self); 3410 mp->mutex_owner = 0; 3411 /* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */ 3412 if (mtype & LOCK_PRIO_INHERIT) { 3413 mp->mutex_lockw = LOCKCLEAR; 3414 self->ul_pilocks--; 3415 } 3416 /* 3417 * ___lwp_cond_wait() returns immediately with EINTR if 3418 * set_parking_flag(self,0) is called on this lwp before it 3419 * goes to sleep in the kernel. sigacthandler() calls this 3420 * when a deferred signal is noted. This assures that we don't 3421 * get stuck in ___lwp_cond_wait() with all signals blocked 3422 * due to taking a deferred signal before going to sleep. 3423 */ 3424 set_parking_flag(self, 1); 3425 if (self->ul_cursig != 0 || 3426 (self->ul_cancelable && self->ul_cancel_pending)) 3427 set_parking_flag(self, 0); 3428 error = ___lwp_cond_wait(cvp, mp, tsp, 1); 3429 set_parking_flag(self, 0); 3430 sigon(self); 3431 self->ul_sp = 0; 3432 self->ul_wchan = NULL; 3433 return (error); 3434 } 3435 3436 int 3437 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3438 { 3439 ulwp_t *self = curthread; 3440 int error; 3441 int merror; 3442 3443 if (self->ul_error_detection && self->ul_misaligned == 0) 3444 cond_wait_check_alignment(cvp, mp); 3445 3446 /* 3447 * See the large comment in cond_wait_queue(), above. 3448 */ 3449 if (self->ul_cond_wait_defer) 3450 sigoff(self); 3451 3452 error = cond_sleep_kernel(cvp, mp, tsp); 3453 3454 /* 3455 * Override the return code from ___lwp_cond_wait() 3456 * with any non-zero return code from mutex_lock(). 3457 * This addresses robust lock failures in particular; 3458 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 3459 * errors in order to take corrective action. 3460 */ 3461 if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3462 error = merror; 3463 3464 /* 3465 * Take any deferred signal now, after we have reacquired the mutex. 3466 */ 3467 if (self->ul_cond_wait_defer) 3468 sigon(self); 3469 3470 return (error); 3471 } 3472 3473 /* 3474 * Common code for cond_wait() and cond_timedwait() 3475 */ 3476 int 3477 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 3478 { 3479 int mtype = mp->mutex_type; 3480 hrtime_t begin_sleep = 0; 3481 ulwp_t *self = curthread; 3482 uberdata_t *udp = self->ul_uberdata; 3483 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3484 tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 3485 uint8_t rcount; 3486 int error = 0; 3487 3488 /* 3489 * The SUSV3 Posix spec for pthread_cond_timedwait() states: 3490 * Except in the case of [ETIMEDOUT], all these error checks 3491 * shall act as if they were performed immediately at the 3492 * beginning of processing for the function and shall cause 3493 * an error return, in effect, prior to modifying the state 3494 * of the mutex specified by mutex or the condition variable 3495 * specified by cond. 3496 * Therefore, we must return EINVAL now if the timout is invalid. 3497 */ 3498 if (tsp != NULL && 3499 (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 3500 return (EINVAL); 3501 3502 if (__td_event_report(self, TD_SLEEP, udp)) { 3503 self->ul_sp = stkptr(); 3504 self->ul_wchan = cvp; 3505 self->ul_td_evbuf.eventnum = TD_SLEEP; 3506 self->ul_td_evbuf.eventdata = cvp; 3507 tdb_event(TD_SLEEP, udp); 3508 self->ul_sp = 0; 3509 } 3510 if (csp) { 3511 if (tsp) 3512 tdb_incr(csp->cond_timedwait); 3513 else 3514 tdb_incr(csp->cond_wait); 3515 } 3516 if (msp) 3517 begin_sleep = record_hold_time(msp); 3518 else if (csp) 3519 begin_sleep = gethrtime(); 3520 3521 if (self->ul_error_detection) { 3522 if (!mutex_held(mp)) 3523 lock_error(mp, "cond_wait", cvp, NULL); 3524 if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 3525 lock_error(mp, "recursive mutex in cond_wait", 3526 cvp, NULL); 3527 if (cvp->cond_type & USYNC_PROCESS) { 3528 if (!(mtype & USYNC_PROCESS)) 3529 lock_error(mp, "cond_wait", cvp, 3530 "condvar process-shared, " 3531 "mutex process-private"); 3532 } else { 3533 if (mtype & USYNC_PROCESS) 3534 lock_error(mp, "cond_wait", cvp, 3535 "condvar process-private, " 3536 "mutex process-shared"); 3537 } 3538 } 3539 3540 /* 3541 * We deal with recursive mutexes by completely 3542 * dropping the lock and restoring the recursion 3543 * count after waking up. This is arguably wrong, 3544 * but it obeys the principle of least astonishment. 3545 */ 3546 rcount = mp->mutex_rcount; 3547 mp->mutex_rcount = 0; 3548 if ((mtype & 3549 (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 3550 (cvp->cond_type & USYNC_PROCESS)) 3551 error = cond_wait_kernel(cvp, mp, tsp); 3552 else 3553 error = cond_wait_queue(cvp, mp, tsp); 3554 mp->mutex_rcount = rcount; 3555 3556 if (csp) { 3557 hrtime_t lapse = gethrtime() - begin_sleep; 3558 if (tsp == NULL) 3559 csp->cond_wait_sleep_time += lapse; 3560 else { 3561 csp->cond_timedwait_sleep_time += lapse; 3562 if (error == ETIME) 3563 tdb_incr(csp->cond_timedwait_timeout); 3564 } 3565 } 3566 return (error); 3567 } 3568 3569 /* 3570 * cond_wait() is a cancellation point but __cond_wait() is not. 3571 * Internally, libc calls the non-cancellation version. 3572 * Other libraries need to use pthread_setcancelstate(), as appropriate, 3573 * since __cond_wait() is not exported from libc. 3574 */ 3575 int 3576 __cond_wait(cond_t *cvp, mutex_t *mp) 3577 { 3578 ulwp_t *self = curthread; 3579 uberdata_t *udp = self->ul_uberdata; 3580 uberflags_t *gflags; 3581 3582 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3583 !mutex_held(mp)) 3584 return (EPERM); 3585 3586 /* 3587 * Optimize the common case of USYNC_THREAD plus 3588 * no error detection, no lock statistics, and no event tracing. 3589 */ 3590 if ((gflags = self->ul_schedctl_called) != NULL && 3591 (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 3592 self->ul_td_events_enable | 3593 udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 3594 return (cond_wait_queue(cvp, mp, NULL)); 3595 3596 /* 3597 * Else do it the long way. 3598 */ 3599 return (cond_wait_common(cvp, mp, NULL)); 3600 } 3601 3602 #pragma weak _cond_wait = cond_wait 3603 int 3604 cond_wait(cond_t *cvp, mutex_t *mp) 3605 { 3606 int error; 3607 3608 _cancelon(); 3609 error = __cond_wait(cvp, mp); 3610 if (error == EINTR) 3611 _canceloff(); 3612 else 3613 _canceloff_nocancel(); 3614 return (error); 3615 } 3616 3617 /* 3618 * pthread_cond_wait() is a cancellation point. 3619 */ 3620 int 3621 pthread_cond_wait(pthread_cond_t *restrict cvp, pthread_mutex_t *restrict mp) 3622 { 3623 int error; 3624 3625 error = cond_wait((cond_t *)cvp, (mutex_t *)mp); 3626 return ((error == EINTR)? 0 : error); 3627 } 3628 3629 /* 3630 * cond_timedwait() is a cancellation point but __cond_timedwait() is not. 3631 */ 3632 int 3633 __cond_timedwait(cond_t *cvp, mutex_t *mp, clockid_t clock_id, 3634 const timespec_t *abstime) 3635 { 3636 timespec_t reltime; 3637 int error; 3638 3639 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3640 !mutex_held(mp)) 3641 return (EPERM); 3642 3643 if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 3644 clock_id = CLOCK_REALTIME; 3645 abstime_to_reltime(clock_id, abstime, &reltime); 3646 error = cond_wait_common(cvp, mp, &reltime); 3647 if (error == ETIME && clock_id == CLOCK_HIGHRES) { 3648 /* 3649 * Don't return ETIME if we didn't really get a timeout. 3650 * This can happen if we return because someone resets 3651 * the system clock. Just return zero in this case, 3652 * giving a spurious wakeup but not a timeout. 3653 */ 3654 if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 3655 abstime->tv_nsec > gethrtime()) 3656 error = 0; 3657 } 3658 return (error); 3659 } 3660 3661 static int 3662 cond_clockwait(cond_t *cvp, mutex_t *mp, clockid_t clock, 3663 const timespec_t *abstime) 3664 { 3665 int error; 3666 3667 _cancelon(); 3668 error = __cond_timedwait(cvp, mp, clock, abstime); 3669 if (error == EINTR) 3670 _canceloff(); 3671 else 3672 _canceloff_nocancel(); 3673 return (error); 3674 } 3675 3676 /* 3677 * This is a function internal to libc that determines the clockid to return for 3678 * a cond_t. The cond_t (and the pthreads / C equivalent) encode a clock id that 3679 * should be used as a timing source. When using the static initializers, which 3680 * set this to zero, cond_clockid will end up set to __CLOCK_REALTIME0 which 3681 * isn't really used in the system any more. Consumers of the clockid call this 3682 * to translate this. Note, we fail open such that if someone has corrupted the 3683 * clockid it will end up in a well known clock to continue the traditional 3684 * system behavior. 3685 */ 3686 static clockid_t 3687 cond_clock(cond_t *cvp) 3688 { 3689 if (cvp->cond_clockid != CLOCK_REALTIME && 3690 cvp->cond_clockid != CLOCK_MONOTONIC) { 3691 return (CLOCK_REALTIME); 3692 } 3693 3694 return (cvp->cond_clockid); 3695 } 3696 3697 int 3698 cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 3699 { 3700 return (cond_clockwait(cvp, mp, cond_clock(cvp), abstime)); 3701 } 3702 3703 /* 3704 * pthread_cond_timedwait() and pthread_cond_clockwait() are cancellation 3705 * points. We need to check for cancellation before we evaluate whether the 3706 * clock is valid. 3707 */ 3708 int 3709 pthread_cond_clockwait(pthread_cond_t *restrict cvp, 3710 pthread_mutex_t *restrict mp, clockid_t clock, 3711 const struct timespec *restrict abstime) 3712 { 3713 int error; 3714 3715 switch (clock) { 3716 case CLOCK_REALTIME: 3717 case CLOCK_HIGHRES: 3718 break; 3719 default: 3720 return (EINVAL); 3721 } 3722 3723 /* We need to translate between the native threads errors and POSIX */ 3724 error = cond_clockwait((cond_t *)cvp, (mutex_t *)mp, clock, abstime); 3725 if (error == ETIME) 3726 error = ETIMEDOUT; 3727 else if (error == EINTR) 3728 error = 0; 3729 return (error); 3730 } 3731 3732 int 3733 pthread_cond_timedwait(pthread_cond_t *restrict cvp, 3734 pthread_mutex_t *restrict mp, const struct timespec *restrict abstime) 3735 { 3736 cond_t *cond = (cond_t *)cvp; 3737 return (pthread_cond_clockwait(cvp, mp, cond_clock(cond), abstime)); 3738 } 3739 3740 /* 3741 * cond_reltimedwait() is a cancellation point but __cond_reltimedwait() is not. 3742 * 3743 * Note, this function does not actually consume the clock id. Internally all 3744 * waits are based upon the highres clock in the system and therefore the actual 3745 * clock used is ignored at this point. 3746 */ 3747 int 3748 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3749 { 3750 timespec_t tslocal = *reltime; 3751 3752 if ((mp->mutex_type & (LOCK_ERRORCHECK | LOCK_ROBUST)) && 3753 !mutex_held(mp)) 3754 return (EPERM); 3755 3756 return (cond_wait_common(cvp, mp, &tslocal)); 3757 } 3758 3759 int 3760 cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 3761 { 3762 int error; 3763 3764 _cancelon(); 3765 error = __cond_reltimedwait(cvp, mp, reltime); 3766 if (error == EINTR) 3767 _canceloff(); 3768 else 3769 _canceloff_nocancel(); 3770 return (error); 3771 } 3772 3773 int 3774 pthread_cond_relclockwait_np(pthread_cond_t *restrict cvp, 3775 pthread_mutex_t *restrict mp, clockid_t clock, 3776 const struct timespec *restrict reltime) 3777 { 3778 int error; 3779 3780 switch (clock) { 3781 case CLOCK_REALTIME: 3782 case CLOCK_HIGHRES: 3783 break; 3784 default: 3785 return (EINVAL); 3786 } 3787 3788 error = cond_reltimedwait((cond_t *)cvp, (mutex_t *)mp, reltime); 3789 if (error == ETIME) 3790 error = ETIMEDOUT; 3791 else if (error == EINTR) 3792 error = 0; 3793 return (error); 3794 } 3795 3796 int 3797 pthread_cond_reltimedwait_np(pthread_cond_t *restrict cvp, 3798 pthread_mutex_t *restrict mp, const struct timespec *restrict reltime) 3799 { 3800 cond_t *cond = (cond_t *)cvp; 3801 return (pthread_cond_relclockwait_np(cvp, mp, cond_clock(cond), 3802 reltime)); 3803 } 3804 3805 #pragma weak pthread_cond_signal = cond_signal 3806 #pragma weak _cond_signal = cond_signal 3807 int 3808 cond_signal(cond_t *cvp) 3809 { 3810 ulwp_t *self = curthread; 3811 uberdata_t *udp = self->ul_uberdata; 3812 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3813 int error = 0; 3814 int more; 3815 lwpid_t lwpid; 3816 queue_head_t *qp; 3817 mutex_t *mp; 3818 queue_head_t *mqp; 3819 ulwp_t **ulwpp; 3820 ulwp_t *ulwp; 3821 ulwp_t *prev; 3822 3823 if (csp) 3824 tdb_incr(csp->cond_signal); 3825 3826 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3827 error = _lwp_cond_signal(cvp); 3828 3829 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3830 return (error); 3831 3832 /* 3833 * Move some thread from the condvar sleep queue to the mutex sleep 3834 * queue for the mutex that it will acquire on being waked up. 3835 * We can do this only if we own the mutex it will acquire. 3836 * If we do not own the mutex, or if its ul_cv_wake flag 3837 * is set, just dequeue and unpark it. 3838 */ 3839 qp = queue_lock(cvp, CV); 3840 ulwpp = queue_slot(qp, &prev, &more); 3841 cvp->cond_waiters_user = more; 3842 if (ulwpp == NULL) { /* no one on the sleep queue */ 3843 queue_unlock(qp); 3844 return (error); 3845 } 3846 ulwp = *ulwpp; 3847 3848 /* 3849 * Inform the thread that it was the recipient of a cond_signal(). 3850 * This lets it deal with cond_signal() and, concurrently, 3851 * one or more of a cancellation, a UNIX signal, or a timeout. 3852 * These latter conditions must not consume a cond_signal(). 3853 */ 3854 ulwp->ul_signalled = 1; 3855 3856 /* 3857 * Dequeue the waiter but leave its ul_sleepq non-NULL 3858 * while we move it to the mutex queue so that it can 3859 * deal properly with spurious wakeups. 3860 */ 3861 queue_unlink(qp, ulwpp, prev); 3862 3863 mp = ulwp->ul_cvmutex; /* the mutex it will acquire */ 3864 ulwp->ul_cvmutex = NULL; 3865 ASSERT(mp != NULL); 3866 3867 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3868 /* just wake it up */ 3869 lwpid = ulwp->ul_lwpid; 3870 no_preempt(self); 3871 ulwp->ul_sleepq = NULL; 3872 ulwp->ul_wchan = NULL; 3873 queue_unlock(qp); 3874 (void) __lwp_unpark(lwpid); 3875 preempt(self); 3876 } else { 3877 /* move it to the mutex queue */ 3878 mqp = queue_lock(mp, MX); 3879 enqueue(mqp, ulwp, 0); 3880 mp->mutex_waiters = 1; 3881 queue_unlock(mqp); 3882 queue_unlock(qp); 3883 } 3884 3885 return (error); 3886 } 3887 3888 /* 3889 * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3890 * and rw_queue_release() to (re)allocate a big buffer to hold the 3891 * lwpids of all the threads to be set running after they are removed 3892 * from their sleep queues. Since we are holding a queue lock, we 3893 * cannot call any function that might acquire a lock. mmap(), munmap(), 3894 * lwp_unpark_all() are simple system calls and are safe in this regard. 3895 */ 3896 lwpid_t * 3897 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 3898 { 3899 /* 3900 * Allocate NEWLWPS ids on the first overflow. 3901 * Double the allocation each time after that. 3902 */ 3903 int nlwpid = *nlwpid_ptr; 3904 int maxlwps = *maxlwps_ptr; 3905 int first_allocation; 3906 int newlwps; 3907 void *vaddr; 3908 3909 ASSERT(nlwpid == maxlwps); 3910 3911 first_allocation = (maxlwps == MAXLWPS); 3912 newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 3913 vaddr = mmap(NULL, newlwps * sizeof (lwpid_t), 3914 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 3915 3916 if (vaddr == MAP_FAILED) { 3917 /* 3918 * Let's hope this never happens. 3919 * If it does, then we have a terrible 3920 * thundering herd on our hands. 3921 */ 3922 (void) __lwp_unpark_all(lwpid, nlwpid); 3923 *nlwpid_ptr = 0; 3924 } else { 3925 (void) memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 3926 if (!first_allocation) 3927 (void) munmap((caddr_t)lwpid, 3928 maxlwps * sizeof (lwpid_t)); 3929 lwpid = vaddr; 3930 *maxlwps_ptr = newlwps; 3931 } 3932 3933 return (lwpid); 3934 } 3935 3936 #pragma weak pthread_cond_broadcast = cond_broadcast 3937 #pragma weak _cond_broadcast = cond_broadcast 3938 int 3939 cond_broadcast(cond_t *cvp) 3940 { 3941 ulwp_t *self = curthread; 3942 uberdata_t *udp = self->ul_uberdata; 3943 tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 3944 int error = 0; 3945 queue_head_t *qp; 3946 queue_root_t *qrp; 3947 mutex_t *mp; 3948 mutex_t *mp_cache = NULL; 3949 queue_head_t *mqp = NULL; 3950 ulwp_t *ulwp; 3951 int nlwpid = 0; 3952 int maxlwps = MAXLWPS; 3953 lwpid_t buffer[MAXLWPS]; 3954 lwpid_t *lwpid = buffer; 3955 3956 if (csp) 3957 tdb_incr(csp->cond_broadcast); 3958 3959 if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 3960 error = _lwp_cond_broadcast(cvp); 3961 3962 if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 3963 return (error); 3964 3965 /* 3966 * Move everyone from the condvar sleep queue to the mutex sleep 3967 * queue for the mutex that they will acquire on being waked up. 3968 * We can do this only if we own the mutex they will acquire. 3969 * If we do not own the mutex, or if their ul_cv_wake flag 3970 * is set, just dequeue and unpark them. 3971 * 3972 * We keep track of lwpids that are to be unparked in lwpid[]. 3973 * __lwp_unpark_all() is called to unpark all of them after 3974 * they have been removed from the sleep queue and the sleep 3975 * queue lock has been dropped. If we run out of space in our 3976 * on-stack buffer, we need to allocate more but we can't call 3977 * lmalloc() because we are holding a queue lock when the overflow 3978 * occurs and lmalloc() acquires a lock. We can't use alloca() 3979 * either because the application may have allocated a small 3980 * stack and we don't want to overrun the stack. So we call 3981 * alloc_lwpids() to allocate a bigger buffer using the mmap() 3982 * system call directly since that path acquires no locks. 3983 */ 3984 qp = queue_lock(cvp, CV); 3985 cvp->cond_waiters_user = 0; 3986 for (;;) { 3987 if ((qrp = qp->qh_root) == NULL || 3988 (ulwp = qrp->qr_head) == NULL) 3989 break; 3990 ASSERT(ulwp->ul_wchan == cvp); 3991 queue_unlink(qp, &qrp->qr_head, NULL); 3992 mp = ulwp->ul_cvmutex; /* its mutex */ 3993 ulwp->ul_cvmutex = NULL; 3994 ASSERT(mp != NULL); 3995 if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3996 /* just wake it up */ 3997 ulwp->ul_sleepq = NULL; 3998 ulwp->ul_wchan = NULL; 3999 if (nlwpid == maxlwps) 4000 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 4001 lwpid[nlwpid++] = ulwp->ul_lwpid; 4002 } else { 4003 /* move it to the mutex queue */ 4004 if (mp != mp_cache) { 4005 mp_cache = mp; 4006 if (mqp != NULL) 4007 queue_unlock(mqp); 4008 mqp = queue_lock(mp, MX); 4009 } 4010 enqueue(mqp, ulwp, 0); 4011 mp->mutex_waiters = 1; 4012 } 4013 } 4014 if (mqp != NULL) 4015 queue_unlock(mqp); 4016 if (nlwpid == 0) { 4017 queue_unlock(qp); 4018 } else { 4019 no_preempt(self); 4020 queue_unlock(qp); 4021 if (nlwpid == 1) 4022 (void) __lwp_unpark(lwpid[0]); 4023 else 4024 (void) __lwp_unpark_all(lwpid, nlwpid); 4025 preempt(self); 4026 } 4027 if (lwpid != buffer) 4028 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t)); 4029 return (error); 4030 } 4031 4032 #pragma weak pthread_cond_destroy = cond_destroy 4033 int 4034 cond_destroy(cond_t *cvp) 4035 { 4036 cvp->cond_magic = 0; 4037 tdb_sync_obj_deregister(cvp); 4038 return (0); 4039 } 4040 4041 #if defined(DEBUG) 4042 void 4043 assert_no_libc_locks_held(void) 4044 { 4045 ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 4046 } 4047 4048 /* protected by link_lock */ 4049 uint64_t spin_lock_spin; 4050 uint64_t spin_lock_spin2; 4051 uint64_t spin_lock_sleep; 4052 uint64_t spin_lock_wakeup; 4053 4054 /* 4055 * Record spin lock statistics. 4056 * Called by a thread exiting itself in thrp_exit(). 4057 * Also called via atexit() from the thread calling 4058 * exit() to do all the other threads as well. 4059 */ 4060 void 4061 record_spin_locks(ulwp_t *ulwp) 4062 { 4063 spin_lock_spin += ulwp->ul_spin_lock_spin; 4064 spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 4065 spin_lock_sleep += ulwp->ul_spin_lock_sleep; 4066 spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 4067 ulwp->ul_spin_lock_spin = 0; 4068 ulwp->ul_spin_lock_spin2 = 0; 4069 ulwp->ul_spin_lock_sleep = 0; 4070 ulwp->ul_spin_lock_wakeup = 0; 4071 } 4072 4073 /* 4074 * atexit function: dump the queue statistics to stderr. 4075 */ 4076 #include <stdio.h> 4077 void 4078 dump_queue_statistics(void) 4079 { 4080 uberdata_t *udp = curthread->ul_uberdata; 4081 queue_head_t *qp; 4082 int qn; 4083 uint64_t spin_lock_total = 0; 4084 4085 if (udp->queue_head == NULL || thread_queue_dump == 0) 4086 return; 4087 4088 if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 4089 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 4090 return; 4091 for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 4092 if (qp->qh_lockcount == 0) 4093 continue; 4094 spin_lock_total += qp->qh_lockcount; 4095 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 4096 (u_longlong_t)qp->qh_lockcount, 4097 qp->qh_qmax, qp->qh_hmax) < 0) 4098 return; 4099 } 4100 4101 if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 4102 fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 4103 return; 4104 for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 4105 if (qp->qh_lockcount == 0) 4106 continue; 4107 spin_lock_total += qp->qh_lockcount; 4108 if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 4109 (u_longlong_t)qp->qh_lockcount, 4110 qp->qh_qmax, qp->qh_hmax) < 0) 4111 return; 4112 } 4113 4114 (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 4115 (u_longlong_t)spin_lock_total); 4116 (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 4117 (u_longlong_t)spin_lock_spin); 4118 (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 4119 (u_longlong_t)spin_lock_spin2); 4120 (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 4121 (u_longlong_t)spin_lock_sleep); 4122 (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 4123 (u_longlong_t)spin_lock_wakeup); 4124 } 4125 #endif 4126