1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
25 * Copyright 2024 Oxide Computer Company
26 */
27
28 #include "lint.h"
29 #include "thr_uberdata.h"
30 #include <sys/sdt.h>
31
32 #define TRY_FLAG 0x10
33 #define READ_LOCK 0
34 #define WRITE_LOCK 1
35 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG)
36 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG)
37
38 #define NLOCKS 4 /* initial number of readlock_t structs allocated */
39
40 #define ASSERT_CONSISTENT_STATE(readers) \
41 ASSERT(!((readers) & URW_WRITE_LOCKED) || \
42 ((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
43
44 /*
45 * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
46 * We must be deferring signals for this to be safe.
47 * Else if we are returning an entry with ul_rdlockcnt == 0,
48 * it could be reassigned behind our back in a signal handler.
49 */
50 static readlock_t *
rwl_entry(rwlock_t * rwlp)51 rwl_entry(rwlock_t *rwlp)
52 {
53 ulwp_t *self = curthread;
54 readlock_t *remembered = NULL;
55 readlock_t *readlockp;
56 uint_t nlocks;
57
58 /* we must be deferring signals */
59 ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
60
61 if ((nlocks = self->ul_rdlockcnt) != 0)
62 readlockp = self->ul_readlock.array;
63 else {
64 nlocks = 1;
65 readlockp = &self->ul_readlock.single;
66 }
67
68 for (; nlocks; nlocks--, readlockp++) {
69 if (readlockp->rd_rwlock == rwlp)
70 return (readlockp);
71 if (readlockp->rd_count == 0 && remembered == NULL)
72 remembered = readlockp;
73 }
74 if (remembered != NULL) {
75 remembered->rd_rwlock = rwlp;
76 return (remembered);
77 }
78
79 /*
80 * No entry available. Allocate more space, converting the single
81 * readlock_t entry into an array of readlock_t entries if necessary.
82 */
83 if ((nlocks = self->ul_rdlockcnt) == 0) {
84 /*
85 * Initial allocation of the readlock_t array.
86 * Convert the single entry into an array.
87 */
88 self->ul_rdlockcnt = nlocks = NLOCKS;
89 readlockp = lmalloc(nlocks * sizeof (readlock_t));
90 /*
91 * The single readlock_t becomes the first entry in the array.
92 */
93 *readlockp = self->ul_readlock.single;
94 self->ul_readlock.single.rd_count = 0;
95 self->ul_readlock.array = readlockp;
96 /*
97 * Return the next available entry in the array.
98 */
99 (++readlockp)->rd_rwlock = rwlp;
100 return (readlockp);
101 }
102 /*
103 * Reallocate the array, double the size each time.
104 */
105 readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
106 (void) memcpy(readlockp, self->ul_readlock.array,
107 nlocks * sizeof (readlock_t));
108 lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
109 self->ul_readlock.array = readlockp;
110 self->ul_rdlockcnt *= 2;
111 /*
112 * Return the next available entry in the newly allocated array.
113 */
114 (readlockp += nlocks)->rd_rwlock = rwlp;
115 return (readlockp);
116 }
117
118 /*
119 * Free the array of rwlocks held for reading.
120 */
121 void
rwl_free(ulwp_t * ulwp)122 rwl_free(ulwp_t *ulwp)
123 {
124 uint_t nlocks;
125
126 if ((nlocks = ulwp->ul_rdlockcnt) != 0)
127 lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
128 ulwp->ul_rdlockcnt = 0;
129 ulwp->ul_readlock.single.rd_rwlock = NULL;
130 ulwp->ul_readlock.single.rd_count = 0;
131 }
132
133 /*
134 * Check if a reader version of the lock is held by the current thread.
135 */
136 #pragma weak _rw_read_held = rw_read_held
137 int
rw_read_held(rwlock_t * rwlp)138 rw_read_held(rwlock_t *rwlp)
139 {
140 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
141 uint32_t readers;
142 ulwp_t *self = curthread;
143 readlock_t *readlockp;
144 uint_t nlocks;
145 int rval = 0;
146
147 no_preempt(self);
148
149 readers = *rwstate;
150 ASSERT_CONSISTENT_STATE(readers);
151 if (!(readers & URW_WRITE_LOCKED) &&
152 (readers & URW_READERS_MASK) != 0) {
153 /*
154 * The lock is held for reading by some thread.
155 * Search our array of rwlocks held for reading for a match.
156 */
157 if ((nlocks = self->ul_rdlockcnt) != 0)
158 readlockp = self->ul_readlock.array;
159 else {
160 nlocks = 1;
161 readlockp = &self->ul_readlock.single;
162 }
163 for (; nlocks; nlocks--, readlockp++) {
164 if (readlockp->rd_rwlock == rwlp) {
165 if (readlockp->rd_count)
166 rval = 1;
167 break;
168 }
169 }
170 }
171
172 preempt(self);
173 return (rval);
174 }
175
176 /*
177 * Check if a writer version of the lock is held by the current thread.
178 */
179 #pragma weak _rw_write_held = rw_write_held
180 int
rw_write_held(rwlock_t * rwlp)181 rw_write_held(rwlock_t *rwlp)
182 {
183 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
184 uint32_t readers;
185 ulwp_t *self = curthread;
186 int rval;
187
188 no_preempt(self);
189
190 readers = *rwstate;
191 ASSERT_CONSISTENT_STATE(readers);
192 rval = ((readers & URW_WRITE_LOCKED) &&
193 rwlp->rwlock_owner == (uintptr_t)self &&
194 (rwlp->rwlock_type == USYNC_THREAD ||
195 rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
196
197 preempt(self);
198 return (rval);
199 }
200
201 #pragma weak _rwlock_init = rwlock_init
202 int
rwlock_init(rwlock_t * rwlp,int type,void * arg __unused)203 rwlock_init(rwlock_t *rwlp, int type, void *arg __unused)
204 {
205 ulwp_t *self = curthread;
206
207 if (type != USYNC_THREAD && type != USYNC_PROCESS)
208 return (EINVAL);
209 /*
210 * Once reinitialized, we can no longer be holding a read or write lock.
211 * We can do nothing about other threads that are holding read locks.
212 */
213 sigoff(self);
214 rwl_entry(rwlp)->rd_count = 0;
215 sigon(self);
216 (void) memset(rwlp, 0, sizeof (*rwlp));
217 rwlp->rwlock_type = (uint16_t)type;
218 rwlp->rwlock_magic = RWL_MAGIC;
219 rwlp->mutex.mutex_type = (uint8_t)type;
220 rwlp->mutex.mutex_flag = LOCK_INITED;
221 rwlp->mutex.mutex_magic = MUTEX_MAGIC;
222
223 /*
224 * This should be at the beginning of the function,
225 * but for the sake of old broken applications that
226 * do not have proper alignment for their rwlocks
227 * (and don't check the return code from rwlock_init),
228 * we put it here, after initializing the rwlock regardless.
229 */
230 if (((uintptr_t)rwlp & (_LONG_LONG_ALIGNMENT - 1)) &&
231 self->ul_misaligned == 0)
232 return (EINVAL);
233
234 return (0);
235 }
236
237 #pragma weak pthread_rwlock_destroy = rwlock_destroy
238 #pragma weak _rwlock_destroy = rwlock_destroy
239 int
rwlock_destroy(rwlock_t * rwlp)240 rwlock_destroy(rwlock_t *rwlp)
241 {
242 ulwp_t *self = curthread;
243
244 /*
245 * Once destroyed, we can no longer be holding a read or write lock.
246 * We can do nothing about other threads that are holding read locks.
247 */
248 sigoff(self);
249 rwl_entry(rwlp)->rd_count = 0;
250 sigon(self);
251 rwlp->rwlock_magic = 0;
252 tdb_sync_obj_deregister(rwlp);
253 return (0);
254 }
255
256 /*
257 * The following four functions:
258 * read_lock_try()
259 * read_unlock_try()
260 * write_lock_try()
261 * write_unlock_try()
262 * lie at the heart of the fast-path code for rwlocks,
263 * both process-private and process-shared.
264 *
265 * They are called once without recourse to any other locking primitives.
266 * If they succeed, we are done and the fast-path code was successful.
267 * If they fail, we have to deal with lock queues, either to enqueue
268 * ourself and sleep or to dequeue and wake up someone else (slow paths).
269 *
270 * Unless 'ignore_waiters_flag' is true (a condition that applies only
271 * when read_lock_try() or write_lock_try() is called from code that
272 * is already in the slow path and has already acquired the queue lock),
273 * these functions will always fail if the waiters flag, URW_HAS_WAITERS,
274 * is set in the 'rwstate' word. Thus, setting the waiters flag on the
275 * rwlock and acquiring the queue lock guarantees exclusive access to
276 * the rwlock (and is the only way to guarantee exclusive access).
277 */
278
279 /*
280 * Attempt to acquire a readers lock. Return true on success.
281 */
282 static int
read_lock_try(rwlock_t * rwlp,int ignore_waiters_flag)283 read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
284 {
285 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
286 uint32_t mask = ignore_waiters_flag?
287 URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
288 uint32_t readers;
289 ulwp_t *self = curthread;
290
291 no_preempt(self);
292 while (((readers = *rwstate) & mask) == 0) {
293 if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
294 preempt(self);
295 return (1);
296 }
297 }
298 preempt(self);
299 return (0);
300 }
301
302 /*
303 * Attempt to release a reader lock. Return true on success.
304 */
305 static int
read_unlock_try(rwlock_t * rwlp)306 read_unlock_try(rwlock_t *rwlp)
307 {
308 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
309 uint32_t readers;
310 ulwp_t *self = curthread;
311
312 no_preempt(self);
313 while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
314 if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
315 preempt(self);
316 return (1);
317 }
318 }
319 preempt(self);
320 return (0);
321 }
322
323 /*
324 * Attempt to acquire a writer lock. Return true on success.
325 */
326 static int
write_lock_try(rwlock_t * rwlp,int ignore_waiters_flag)327 write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
328 {
329 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
330 uint32_t mask = ignore_waiters_flag?
331 (URW_WRITE_LOCKED | URW_READERS_MASK) :
332 (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
333 ulwp_t *self = curthread;
334 uint32_t readers;
335
336 no_preempt(self);
337 while (((readers = *rwstate) & mask) == 0) {
338 if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
339 == readers) {
340 preempt(self);
341 return (1);
342 }
343 }
344 preempt(self);
345 return (0);
346 }
347
348 /*
349 * Attempt to release a writer lock. Return true on success.
350 */
351 static int
write_unlock_try(rwlock_t * rwlp)352 write_unlock_try(rwlock_t *rwlp)
353 {
354 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
355 uint32_t readers;
356 ulwp_t *self = curthread;
357
358 no_preempt(self);
359 while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
360 if (atomic_cas_32(rwstate, readers, 0) == readers) {
361 preempt(self);
362 return (1);
363 }
364 }
365 preempt(self);
366 return (0);
367 }
368
369 /*
370 * Release a process-private rwlock and wake up any thread(s) sleeping on it.
371 * This is called when a thread releases a lock that appears to have waiters.
372 */
373 static void
rw_queue_release(rwlock_t * rwlp)374 rw_queue_release(rwlock_t *rwlp)
375 {
376 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
377 queue_head_t *qp;
378 uint32_t readers;
379 uint32_t writer;
380 ulwp_t **ulwpp;
381 ulwp_t *ulwp;
382 ulwp_t *prev;
383 int nlwpid = 0;
384 int more;
385 int maxlwps = MAXLWPS;
386 lwpid_t buffer[MAXLWPS];
387 lwpid_t *lwpid = buffer;
388
389 qp = queue_lock(rwlp, MX);
390
391 /*
392 * Here is where we actually drop the lock,
393 * but we retain the URW_HAS_WAITERS flag, if it is already set.
394 */
395 readers = *rwstate;
396 ASSERT_CONSISTENT_STATE(readers);
397 if (readers & URW_WRITE_LOCKED) /* drop the writer lock */
398 atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
399 else /* drop the readers lock */
400 atomic_dec_32(rwstate);
401 if (!(readers & URW_HAS_WAITERS)) { /* no waiters */
402 queue_unlock(qp);
403 return;
404 }
405
406 /*
407 * The presence of the URW_HAS_WAITERS flag causes all rwlock
408 * code to go through the slow path, acquiring queue_lock(qp).
409 * Therefore, the rest of this code is safe because we are
410 * holding the queue lock and the URW_HAS_WAITERS flag is set.
411 */
412
413 readers = *rwstate; /* must fetch the value again */
414 ASSERT_CONSISTENT_STATE(readers);
415 ASSERT(readers & URW_HAS_WAITERS);
416 readers &= URW_READERS_MASK; /* count of current readers */
417 writer = 0; /* no current writer */
418
419 /*
420 * Examine the queue of waiters in priority order and prepare
421 * to wake up as many readers as we encounter before encountering
422 * a writer. If the highest priority thread on the queue is a
423 * writer, stop there and wake it up.
424 *
425 * We keep track of lwpids that are to be unparked in lwpid[].
426 * __lwp_unpark_all() is called to unpark all of them after
427 * they have been removed from the sleep queue and the sleep
428 * queue lock has been dropped. If we run out of space in our
429 * on-stack buffer, we need to allocate more but we can't call
430 * lmalloc() because we are holding a queue lock when the overflow
431 * occurs and lmalloc() acquires a lock. We can't use alloca()
432 * either because the application may have allocated a small
433 * stack and we don't want to overrun the stack. So we call
434 * alloc_lwpids() to allocate a bigger buffer using the mmap()
435 * system call directly since that path acquires no locks.
436 */
437 while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
438 ulwp = *ulwpp;
439 ASSERT(ulwp->ul_wchan == rwlp);
440 if (ulwp->ul_writer) {
441 if (writer != 0 || readers != 0)
442 break;
443 /* one writer to wake */
444 writer++;
445 } else {
446 if (writer != 0)
447 break;
448 /* at least one reader to wake */
449 readers++;
450 if (nlwpid == maxlwps)
451 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
452 }
453 queue_unlink(qp, ulwpp, prev);
454 ulwp->ul_sleepq = NULL;
455 ulwp->ul_wchan = NULL;
456 if (writer) {
457 /*
458 * Hand off the lock to the writer we will be waking.
459 */
460 ASSERT((*rwstate & ~URW_HAS_WAITERS) == 0);
461 atomic_or_32(rwstate, URW_WRITE_LOCKED);
462 rwlp->rwlock_owner = (uintptr_t)ulwp;
463 }
464 lwpid[nlwpid++] = ulwp->ul_lwpid;
465 }
466
467 /*
468 * This modification of rwstate must be done last.
469 * The presence of the URW_HAS_WAITERS flag causes all rwlock
470 * code to go through the slow path, acquiring queue_lock(qp).
471 * Otherwise the read_lock_try() and write_lock_try() fast paths
472 * are effective.
473 */
474 if (ulwpp == NULL)
475 atomic_and_32(rwstate, ~URW_HAS_WAITERS);
476
477 if (nlwpid == 0) {
478 queue_unlock(qp);
479 } else {
480 ulwp_t *self = curthread;
481 no_preempt(self);
482 queue_unlock(qp);
483 if (nlwpid == 1)
484 (void) __lwp_unpark(lwpid[0]);
485 else
486 (void) __lwp_unpark_all(lwpid, nlwpid);
487 preempt(self);
488 }
489 if (lwpid != buffer)
490 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
491 }
492
493 /*
494 * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
495 * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
496 *
497 * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
498 * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
499 * released, and if they need to sleep will release the mutex first. In the
500 * event of a spurious wakeup, these will return EAGAIN (because it is much
501 * easier for us to re-acquire the mutex here).
502 */
503 int
shared_rwlock_lock(rwlock_t * rwlp,timespec_t * tsp,int rd_wr)504 shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
505 {
506 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
507 mutex_t *mp = &rwlp->mutex;
508 int try_flag;
509 int error;
510
511 try_flag = (rd_wr & TRY_FLAG);
512 rd_wr &= ~TRY_FLAG;
513 ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
514
515 if (!try_flag) {
516 DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
517 }
518
519 do {
520 if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
521 error = EBUSY;
522 break;
523 }
524 if ((error = mutex_lock(mp)) != 0)
525 break;
526 if (rd_wr == READ_LOCK) {
527 if (read_lock_try(rwlp, 0)) {
528 (void) mutex_unlock(mp);
529 break;
530 }
531 } else {
532 if (write_lock_try(rwlp, 0)) {
533 (void) mutex_unlock(mp);
534 break;
535 }
536 }
537 atomic_or_32(rwstate, URW_HAS_WAITERS);
538
539 #ifdef DEBUG
540 uint32_t readers;
541 readers = *rwstate;
542 ASSERT_CONSISTENT_STATE(readers);
543 #endif
544 /*
545 * The calls to __lwp_rwlock_*() below will release the mutex,
546 * so we need a dtrace probe here. The owner field of the
547 * mutex is cleared in the kernel when the mutex is released,
548 * so we should not clear it here.
549 */
550 DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
551 /*
552 * The waiters bit may be inaccurate.
553 * Only the kernel knows for sure.
554 */
555 if (rd_wr == READ_LOCK) {
556 if (try_flag)
557 error = __lwp_rwlock_tryrdlock(rwlp);
558 else
559 error = __lwp_rwlock_rdlock(rwlp, tsp);
560 } else {
561 if (try_flag)
562 error = __lwp_rwlock_trywrlock(rwlp);
563 else
564 error = __lwp_rwlock_wrlock(rwlp, tsp);
565 }
566 } while (error == EAGAIN || error == EINTR);
567
568 if (!try_flag) {
569 DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
570 }
571
572 return (error);
573 }
574
575 /*
576 * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
577 * and trywrlock for process-private (USYNC_THREAD) rwlocks.
578 */
579 int
rwlock_lock(rwlock_t * rwlp,timespec_t * tsp,int rd_wr)580 rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
581 {
582 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
583 uint32_t readers;
584 ulwp_t *self = curthread;
585 queue_head_t *qp;
586 ulwp_t *ulwp;
587 int try_flag;
588 int ignore_waiters_flag;
589 int error = 0;
590
591 try_flag = (rd_wr & TRY_FLAG);
592 rd_wr &= ~TRY_FLAG;
593 ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
594
595 if (!try_flag) {
596 DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
597 }
598
599 qp = queue_lock(rwlp, MX);
600 /* initial attempt to acquire the lock fails if there are waiters */
601 ignore_waiters_flag = 0;
602 while (error == 0) {
603 if (rd_wr == READ_LOCK) {
604 if (read_lock_try(rwlp, ignore_waiters_flag))
605 break;
606 } else {
607 if (write_lock_try(rwlp, ignore_waiters_flag))
608 break;
609 }
610 /* subsequent attempts do not fail due to waiters */
611 ignore_waiters_flag = 1;
612 atomic_or_32(rwstate, URW_HAS_WAITERS);
613 readers = *rwstate;
614 ASSERT_CONSISTENT_STATE(readers);
615 if ((readers & URW_WRITE_LOCKED) ||
616 (rd_wr == WRITE_LOCK &&
617 (readers & URW_READERS_MASK) != 0))
618 /* EMPTY */; /* somebody holds the lock */
619 else if ((ulwp = queue_waiter(qp)) == NULL) {
620 atomic_and_32(rwstate, ~URW_HAS_WAITERS);
621 ignore_waiters_flag = 0;
622 continue; /* no queued waiters, start over */
623 } else {
624 /*
625 * Do a priority check on the queued waiter (the
626 * highest priority thread on the queue) to see
627 * if we should defer to it or just grab the lock.
628 */
629 int our_pri = real_priority(self);
630 int his_pri = real_priority(ulwp);
631
632 if (rd_wr == WRITE_LOCK) {
633 /*
634 * We defer to a queued thread that has
635 * a higher priority than ours.
636 */
637 if (his_pri <= our_pri) {
638 /*
639 * Don't defer, just grab the lock.
640 */
641 continue;
642 }
643 } else {
644 /*
645 * We defer to a queued thread that has
646 * a higher priority than ours or that
647 * is a writer whose priority equals ours.
648 */
649 if (his_pri < our_pri ||
650 (his_pri == our_pri && !ulwp->ul_writer)) {
651 /*
652 * Don't defer, just grab the lock.
653 */
654 continue;
655 }
656 }
657 }
658 /*
659 * We are about to block.
660 * If we're doing a trylock, return EBUSY instead.
661 */
662 if (try_flag) {
663 error = EBUSY;
664 break;
665 }
666 /*
667 * Enqueue writers ahead of readers.
668 */
669 self->ul_writer = rd_wr; /* *must* be 0 or 1 */
670 enqueue(qp, self, 0);
671 set_parking_flag(self, 1);
672 queue_unlock(qp);
673 if ((error = __lwp_park(tsp, 0)) == EINTR)
674 error = 0;
675 set_parking_flag(self, 0);
676 qp = queue_lock(rwlp, MX);
677 if (self->ul_sleepq && dequeue_self(qp) == 0) {
678 atomic_and_32(rwstate, ~URW_HAS_WAITERS);
679 ignore_waiters_flag = 0;
680 }
681 self->ul_writer = 0;
682 if (rd_wr == WRITE_LOCK &&
683 (*rwstate & URW_WRITE_LOCKED) &&
684 rwlp->rwlock_owner == (uintptr_t)self) {
685 /*
686 * We acquired the lock by hand-off
687 * from the previous owner,
688 */
689 error = 0; /* timedlock did not fail */
690 break;
691 }
692 }
693
694 /*
695 * Make one final check to see if there are any threads left
696 * on the rwlock queue. Clear the URW_HAS_WAITERS flag if not.
697 */
698 if (qp->qh_root == NULL || qp->qh_root->qr_head == NULL)
699 atomic_and_32(rwstate, ~URW_HAS_WAITERS);
700
701 queue_unlock(qp);
702
703 if (!try_flag) {
704 DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
705 }
706
707 return (error);
708 }
709
710 int
rw_rdlock_impl(rwlock_t * rwlp,timespec_t * tsp)711 rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
712 {
713 ulwp_t *self = curthread;
714 uberdata_t *udp = self->ul_uberdata;
715 readlock_t *readlockp;
716 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
717 int error;
718
719 /*
720 * If we already hold a readers lock on this rwlock,
721 * just increment our reference count and return.
722 */
723 sigoff(self);
724 readlockp = rwl_entry(rwlp);
725 if (readlockp->rd_count != 0) {
726 if (readlockp->rd_count == READ_LOCK_MAX) {
727 sigon(self);
728 error = EAGAIN;
729 goto out;
730 }
731 sigon(self);
732 error = 0;
733 goto out;
734 }
735 sigon(self);
736
737 /*
738 * If we hold the writer lock, bail out.
739 */
740 if (rw_write_held(rwlp)) {
741 if (self->ul_error_detection)
742 rwlock_error(rwlp, "rwlock_rdlock",
743 "calling thread owns the writer lock");
744 error = EDEADLK;
745 goto out;
746 }
747
748 if (read_lock_try(rwlp, 0))
749 error = 0;
750 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
751 error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
752 else /* user-level */
753 error = rwlock_lock(rwlp, tsp, READ_LOCK);
754
755 out:
756 if (error == 0) {
757 sigoff(self);
758 rwl_entry(rwlp)->rd_count++;
759 sigon(self);
760 if (rwsp)
761 tdb_incr(rwsp->rw_rdlock);
762 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
763 } else {
764 DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
765 }
766
767 return (error);
768 }
769
770 #pragma weak pthread_rwlock_rdlock = rw_rdlock
771 #pragma weak _rw_rdlock = rw_rdlock
772 int
rw_rdlock(rwlock_t * rwlp)773 rw_rdlock(rwlock_t *rwlp)
774 {
775 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
776 return (rw_rdlock_impl(rwlp, NULL));
777 }
778
779 void
lrw_rdlock(rwlock_t * rwlp)780 lrw_rdlock(rwlock_t *rwlp)
781 {
782 enter_critical(curthread);
783 (void) rw_rdlock_impl(rwlp, NULL);
784 }
785
786 int
pthread_rwlock_relclockrdlock_np(pthread_rwlock_t * restrict rwlp,clockid_t clock,const struct timespec * restrict reltime)787 pthread_rwlock_relclockrdlock_np(pthread_rwlock_t *restrict rwlp,
788 clockid_t clock, const struct timespec *restrict reltime)
789 {
790 timespec_t tslocal = *reltime;
791 int error;
792
793 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
794
795 switch (clock) {
796 case CLOCK_REALTIME:
797 case CLOCK_HIGHRES:
798 break;
799 default:
800 return (EINVAL);
801 }
802
803 error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
804 if (error == ETIME)
805 error = ETIMEDOUT;
806 return (error);
807 }
808
809 int
pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t * restrict rwlp,const struct timespec * restrict reltime)810 pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t *restrict rwlp,
811 const struct timespec *restrict reltime)
812 {
813 return (pthread_rwlock_relclockrdlock_np(rwlp, CLOCK_REALTIME,
814 reltime));
815 }
816
817 int
pthread_rwlock_clockrdlock(pthread_rwlock_t * restrict rwlp,clockid_t clock,const struct timespec * restrict abstime)818 pthread_rwlock_clockrdlock(pthread_rwlock_t *restrict rwlp, clockid_t clock,
819 const struct timespec *restrict abstime)
820 {
821 timespec_t tslocal;
822 int error;
823
824 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
825
826 switch (clock) {
827 case CLOCK_REALTIME:
828 case CLOCK_HIGHRES:
829 break;
830 default:
831 return (EINVAL);
832 }
833
834 abstime_to_reltime(clock, abstime, &tslocal);
835 error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
836 if (error == ETIME)
837 error = ETIMEDOUT;
838 return (error);
839 }
840
841 int
pthread_rwlock_timedrdlock(pthread_rwlock_t * restrict rwlp,const struct timespec * restrict abstime)842 pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rwlp,
843 const struct timespec *restrict abstime)
844 {
845 return (pthread_rwlock_clockrdlock(rwlp, CLOCK_REALTIME, abstime));
846 }
847
848 int
rw_wrlock_impl(rwlock_t * rwlp,timespec_t * tsp)849 rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
850 {
851 ulwp_t *self = curthread;
852 uberdata_t *udp = self->ul_uberdata;
853 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
854 int error;
855
856 /*
857 * If we hold a readers lock on this rwlock, bail out.
858 */
859 if (rw_read_held(rwlp)) {
860 if (self->ul_error_detection)
861 rwlock_error(rwlp, "rwlock_wrlock",
862 "calling thread owns the readers lock");
863 error = EDEADLK;
864 goto out;
865 }
866
867 /*
868 * If we hold the writer lock, bail out.
869 */
870 if (rw_write_held(rwlp)) {
871 if (self->ul_error_detection)
872 rwlock_error(rwlp, "rwlock_wrlock",
873 "calling thread owns the writer lock");
874 error = EDEADLK;
875 goto out;
876 }
877
878 if (write_lock_try(rwlp, 0))
879 error = 0;
880 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
881 error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
882 else /* user-level */
883 error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
884
885 out:
886 if (error == 0) {
887 rwlp->rwlock_owner = (uintptr_t)self;
888 if (rwlp->rwlock_type == USYNC_PROCESS)
889 rwlp->rwlock_ownerpid = udp->pid;
890 if (rwsp) {
891 tdb_incr(rwsp->rw_wrlock);
892 rwsp->rw_wrlock_begin_hold = gethrtime();
893 }
894 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
895 } else {
896 DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
897 }
898 return (error);
899 }
900
901 #pragma weak pthread_rwlock_wrlock = rw_wrlock
902 #pragma weak _rw_wrlock = rw_wrlock
903 int
rw_wrlock(rwlock_t * rwlp)904 rw_wrlock(rwlock_t *rwlp)
905 {
906 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
907 return (rw_wrlock_impl(rwlp, NULL));
908 }
909
910 void
lrw_wrlock(rwlock_t * rwlp)911 lrw_wrlock(rwlock_t *rwlp)
912 {
913 enter_critical(curthread);
914 (void) rw_wrlock_impl(rwlp, NULL);
915 }
916
917 int
pthread_rwlock_relclockwrlock_np(pthread_rwlock_t * restrict rwlp,clockid_t clock,const struct timespec * restrict reltime)918 pthread_rwlock_relclockwrlock_np(pthread_rwlock_t *restrict rwlp,
919 clockid_t clock, const struct timespec *restrict reltime)
920 {
921 timespec_t tslocal = *reltime;
922 int error;
923
924 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
925
926 switch (clock) {
927 case CLOCK_REALTIME:
928 case CLOCK_HIGHRES:
929 break;
930 default:
931 return (EINVAL);
932 }
933
934 error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
935 if (error == ETIME)
936 error = ETIMEDOUT;
937 return (error);
938 }
939
940 int
pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t * restrict rwlp,const struct timespec * restrict reltime)941 pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t *restrict rwlp,
942 const struct timespec *restrict reltime)
943 {
944 return (pthread_rwlock_relclockwrlock_np(rwlp, CLOCK_REALTIME,
945 reltime));
946 }
947
948 int
pthread_rwlock_clockwrlock(pthread_rwlock_t * rwlp,clockid_t clock,const timespec_t * abstime)949 pthread_rwlock_clockwrlock(pthread_rwlock_t *rwlp, clockid_t clock,
950 const timespec_t *abstime)
951 {
952 timespec_t tslocal;
953 int error;
954
955 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
956
957 switch (clock) {
958 case CLOCK_REALTIME:
959 case CLOCK_HIGHRES:
960 break;
961 default:
962 return (EINVAL);
963 }
964
965 abstime_to_reltime(clock, abstime, &tslocal);
966 error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
967 if (error == ETIME)
968 error = ETIMEDOUT;
969 return (error);
970 }
971
972 int
pthread_rwlock_timedwrlock(pthread_rwlock_t * rwlp,const timespec_t * abstime)973 pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlp, const timespec_t *abstime)
974 {
975 return (pthread_rwlock_clockwrlock(rwlp, CLOCK_REALTIME, abstime));
976 }
977
978 #pragma weak pthread_rwlock_tryrdlock = rw_tryrdlock
979 int
rw_tryrdlock(rwlock_t * rwlp)980 rw_tryrdlock(rwlock_t *rwlp)
981 {
982 ulwp_t *self = curthread;
983 uberdata_t *udp = self->ul_uberdata;
984 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
985 readlock_t *readlockp;
986 int error;
987
988 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
989
990 if (rwsp)
991 tdb_incr(rwsp->rw_rdlock_try);
992
993 /*
994 * If we already hold a readers lock on this rwlock,
995 * just increment our reference count and return.
996 */
997 sigoff(self);
998 readlockp = rwl_entry(rwlp);
999 if (readlockp->rd_count != 0) {
1000 if (readlockp->rd_count == READ_LOCK_MAX) {
1001 sigon(self);
1002 error = EAGAIN;
1003 goto out;
1004 }
1005 sigon(self);
1006 error = 0;
1007 goto out;
1008 }
1009 sigon(self);
1010
1011 if (read_lock_try(rwlp, 0))
1012 error = 0;
1013 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
1014 error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
1015 else /* user-level */
1016 error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
1017
1018 out:
1019 if (error == 0) {
1020 sigoff(self);
1021 rwl_entry(rwlp)->rd_count++;
1022 sigon(self);
1023 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
1024 } else {
1025 if (rwsp)
1026 tdb_incr(rwsp->rw_rdlock_try_fail);
1027 if (error != EBUSY) {
1028 DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
1029 error);
1030 }
1031 }
1032
1033 return (error);
1034 }
1035
1036 #pragma weak pthread_rwlock_trywrlock = rw_trywrlock
1037 int
rw_trywrlock(rwlock_t * rwlp)1038 rw_trywrlock(rwlock_t *rwlp)
1039 {
1040 ulwp_t *self = curthread;
1041 uberdata_t *udp = self->ul_uberdata;
1042 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
1043 int error;
1044
1045 ASSERT(!self->ul_critical || self->ul_bindflags);
1046
1047 if (rwsp)
1048 tdb_incr(rwsp->rw_wrlock_try);
1049
1050 if (write_lock_try(rwlp, 0))
1051 error = 0;
1052 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
1053 error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
1054 else /* user-level */
1055 error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
1056
1057 if (error == 0) {
1058 rwlp->rwlock_owner = (uintptr_t)self;
1059 if (rwlp->rwlock_type == USYNC_PROCESS)
1060 rwlp->rwlock_ownerpid = udp->pid;
1061 if (rwsp)
1062 rwsp->rw_wrlock_begin_hold = gethrtime();
1063 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
1064 } else {
1065 if (rwsp)
1066 tdb_incr(rwsp->rw_wrlock_try_fail);
1067 if (error != EBUSY) {
1068 DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
1069 error);
1070 }
1071 }
1072 return (error);
1073 }
1074
1075 #pragma weak pthread_rwlock_unlock = rw_unlock
1076 #pragma weak _rw_unlock = rw_unlock
1077 int
rw_unlock(rwlock_t * rwlp)1078 rw_unlock(rwlock_t *rwlp)
1079 {
1080 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
1081 uint32_t readers;
1082 ulwp_t *self = curthread;
1083 uberdata_t *udp = self->ul_uberdata;
1084 tdb_rwlock_stats_t *rwsp;
1085 int rd_wr;
1086
1087 readers = *rwstate;
1088 ASSERT_CONSISTENT_STATE(readers);
1089 if (readers & URW_WRITE_LOCKED) {
1090 rd_wr = WRITE_LOCK;
1091 readers = 0;
1092 } else {
1093 rd_wr = READ_LOCK;
1094 readers &= URW_READERS_MASK;
1095 }
1096
1097 if (rd_wr == WRITE_LOCK) {
1098 /*
1099 * Since the writer lock is held, we'd better be
1100 * holding it, else we cannot legitimately be here.
1101 */
1102 if (!rw_write_held(rwlp)) {
1103 if (self->ul_error_detection)
1104 rwlock_error(rwlp, "rwlock_unlock",
1105 "writer lock held, "
1106 "but not by the calling thread");
1107 return (EPERM);
1108 }
1109 if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
1110 if (rwsp->rw_wrlock_begin_hold)
1111 rwsp->rw_wrlock_hold_time +=
1112 gethrtime() - rwsp->rw_wrlock_begin_hold;
1113 rwsp->rw_wrlock_begin_hold = 0;
1114 }
1115 rwlp->rwlock_owner = 0;
1116 rwlp->rwlock_ownerpid = 0;
1117 } else if (readers > 0) {
1118 /*
1119 * A readers lock is held; if we don't hold one, bail out.
1120 */
1121 readlock_t *readlockp;
1122
1123 sigoff(self);
1124 readlockp = rwl_entry(rwlp);
1125 if (readlockp->rd_count == 0) {
1126 sigon(self);
1127 if (self->ul_error_detection)
1128 rwlock_error(rwlp, "rwlock_unlock",
1129 "readers lock held, "
1130 "but not by the calling thread");
1131 return (EPERM);
1132 }
1133 /*
1134 * If we hold more than one readers lock on this rwlock,
1135 * just decrement our reference count and return.
1136 */
1137 if (--readlockp->rd_count != 0) {
1138 sigon(self);
1139 goto out;
1140 }
1141 sigon(self);
1142 } else {
1143 /*
1144 * This is a usage error.
1145 * No thread should release an unowned lock.
1146 */
1147 if (self->ul_error_detection)
1148 rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
1149 return (EPERM);
1150 }
1151
1152 if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
1153 /* EMPTY */;
1154 } else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
1155 /* EMPTY */;
1156 } else if (rwlp->rwlock_type == USYNC_PROCESS) {
1157 (void) mutex_lock(&rwlp->mutex);
1158 (void) __lwp_rwlock_unlock(rwlp);
1159 (void) mutex_unlock(&rwlp->mutex);
1160 } else {
1161 rw_queue_release(rwlp);
1162 }
1163
1164 out:
1165 DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
1166 return (0);
1167 }
1168
1169 void
lrw_unlock(rwlock_t * rwlp)1170 lrw_unlock(rwlock_t *rwlp)
1171 {
1172 (void) rw_unlock(rwlp);
1173 exit_critical(curthread);
1174 }
1175