1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2015 Joyent, Inc.
26 */
27
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/user.h>
37 #include <sys/errno.h>
38 #include <sys/file.h>
39 #include <sys/proc.h>
40 #include <sys/prsystm.h>
41 #include <sys/kmem.h>
42 #include <sys/sobject.h>
43 #include <sys/fault.h>
44 #include <sys/procfs.h>
45 #include <sys/watchpoint.h>
46 #include <sys/time.h>
47 #include <sys/cmn_err.h>
48 #include <sys/machlock.h>
49 #include <sys/debug.h>
50 #include <sys/synch.h>
51 #include <sys/synch32.h>
52 #include <sys/mman.h>
53 #include <sys/class.h>
54 #include <sys/schedctl.h>
55 #include <sys/sleepq.h>
56 #include <sys/policy.h>
57 #include <sys/lwpchan_impl.h>
58 #include <sys/turnstile.h>
59 #include <sys/atomic.h>
60 #include <sys/lwp_timer_impl.h>
61 #include <sys/lwp_upimutex_impl.h>
62 #include <vm/as.h>
63 #include <sys/sdt.h>
64
65 static kthread_t *lwpsobj_owner(caddr_t);
66 static void lwp_unsleep(kthread_t *t);
67 static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip);
68 static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg);
69 static void lwp_mutex_unregister(void *uaddr);
70 static void set_owner_pid(lwp_mutex_t *, uintptr_t, pid_t);
71 static int iswanted(kthread_t *, lwpchan_t *);
72
73 extern int lwp_cond_signal(lwp_cond_t *cv);
74
75 /*
76 * Maximum number of user prio inheritance locks that can be held by a thread.
77 * Used to limit kmem for each thread. This is a per-thread limit that
78 * can be administered on a system wide basis (using /etc/system).
79 *
80 * Also, when a limit, say maxlwps is added for numbers of lwps within a
81 * process, the per-thread limit automatically becomes a process-wide limit
82 * of maximum number of held upi locks within a process:
83 * maxheldupimx = maxnestupimx * maxlwps;
84 */
85 static uint32_t maxnestupimx = 2000;
86
87 /*
88 * The sobj_ops vector exports a set of functions needed when a thread
89 * is asleep on a synchronization object of this type.
90 */
91 static sobj_ops_t lwp_sobj_ops = {
92 SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri
93 };
94
95 static kthread_t *lwpsobj_pi_owner(upimutex_t *up);
96
97 static sobj_ops_t lwp_sobj_pi_ops = {
98 SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep,
99 turnstile_change_pri
100 };
101
102 static sleepq_head_t lwpsleepq[NSLEEPQ];
103 upib_t upimutextab[UPIMUTEX_TABSIZE];
104
105 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */
106 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT)
107
108 /*
109 * We know that both lc_wchan and lc_wchan0 are addresses that most
110 * likely are 8-byte aligned, so we shift off the low-order 3 bits.
111 * 'pool' is either 0 or 1.
112 */
113 #define LWPCHAN_LOCK_HASH(X, pool) \
114 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \
115 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0))
116
117 static kmutex_t lwpchanlock[2 * LWPCHAN_LOCK_SIZE];
118
119 /*
120 * Is this a POSIX threads user-level lock requiring priority inheritance?
121 */
122 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT)
123
124 static sleepq_head_t *
lwpsqhash(lwpchan_t * lwpchan)125 lwpsqhash(lwpchan_t *lwpchan)
126 {
127 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
128 return (&lwpsleepq[SQHASHINDEX(x)]);
129 }
130
131 /*
132 * Lock an lwpchan.
133 * Keep this in sync with lwpchan_unlock(), below.
134 */
135 static void
lwpchan_lock(lwpchan_t * lwpchan,int pool)136 lwpchan_lock(lwpchan_t *lwpchan, int pool)
137 {
138 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
139 mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]);
140 }
141
142 /*
143 * Unlock an lwpchan.
144 * Keep this in sync with lwpchan_lock(), above.
145 */
146 static void
lwpchan_unlock(lwpchan_t * lwpchan,int pool)147 lwpchan_unlock(lwpchan_t *lwpchan, int pool)
148 {
149 uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
150 mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]);
151 }
152
153 /*
154 * Delete mappings from the lwpchan cache for pages that are being
155 * unmapped by as_unmap(). Given a range of addresses, "start" to "end",
156 * all mappings within the range are deleted from the lwpchan cache.
157 */
158 void
lwpchan_delete_mapping(proc_t * p,caddr_t start,caddr_t end)159 lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end)
160 {
161 lwpchan_data_t *lcp;
162 lwpchan_hashbucket_t *hashbucket;
163 lwpchan_hashbucket_t *endbucket;
164 lwpchan_entry_t *ent;
165 lwpchan_entry_t **prev;
166 caddr_t addr;
167
168 mutex_enter(&p->p_lcp_lock);
169 lcp = p->p_lcp;
170 hashbucket = lcp->lwpchan_cache;
171 endbucket = hashbucket + lcp->lwpchan_size;
172 for (; hashbucket < endbucket; hashbucket++) {
173 if (hashbucket->lwpchan_chain == NULL)
174 continue;
175 mutex_enter(&hashbucket->lwpchan_lock);
176 prev = &hashbucket->lwpchan_chain;
177 /* check entire chain */
178 while ((ent = *prev) != NULL) {
179 addr = ent->lwpchan_addr;
180 if (start <= addr && addr < end) {
181 *prev = ent->lwpchan_next;
182 /*
183 * We do this only for the obsolete type
184 * USYNC_PROCESS_ROBUST. Otherwise robust
185 * locks do not draw ELOCKUNMAPPED or
186 * EOWNERDEAD due to being unmapped.
187 */
188 if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
189 (ent->lwpchan_type & USYNC_PROCESS_ROBUST))
190 lwp_mutex_cleanup(ent, LOCK_UNMAPPED);
191 /*
192 * If there is a user-level robust lock
193 * registration, mark it as invalid.
194 */
195 if ((addr = ent->lwpchan_uaddr) != NULL)
196 lwp_mutex_unregister(addr);
197 kmem_free(ent, sizeof (*ent));
198 atomic_dec_32(&lcp->lwpchan_entries);
199 } else {
200 prev = &ent->lwpchan_next;
201 }
202 }
203 mutex_exit(&hashbucket->lwpchan_lock);
204 }
205 mutex_exit(&p->p_lcp_lock);
206 }
207
208 /*
209 * Given an lwpchan cache pointer and a process virtual address,
210 * return a pointer to the corresponding lwpchan hash bucket.
211 */
212 static lwpchan_hashbucket_t *
lwpchan_bucket(lwpchan_data_t * lcp,uintptr_t addr)213 lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr)
214 {
215 uint_t i;
216
217 /*
218 * All user-level sync object addresses are 8-byte aligned.
219 * Ignore the lowest 3 bits of the address and use the
220 * higher-order 2*lwpchan_bits bits for the hash index.
221 */
222 addr >>= 3;
223 i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask;
224 return (lcp->lwpchan_cache + i);
225 }
226
227 /*
228 * (Re)allocate the per-process lwpchan cache.
229 */
230 static void
lwpchan_alloc_cache(proc_t * p,uint_t bits)231 lwpchan_alloc_cache(proc_t *p, uint_t bits)
232 {
233 lwpchan_data_t *lcp;
234 lwpchan_data_t *old_lcp;
235 lwpchan_hashbucket_t *hashbucket;
236 lwpchan_hashbucket_t *endbucket;
237 lwpchan_hashbucket_t *newbucket;
238 lwpchan_entry_t *ent;
239 lwpchan_entry_t *next;
240 uint_t count;
241
242 ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS);
243
244 lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP);
245 lcp->lwpchan_bits = bits;
246 lcp->lwpchan_size = 1 << lcp->lwpchan_bits;
247 lcp->lwpchan_mask = lcp->lwpchan_size - 1;
248 lcp->lwpchan_entries = 0;
249 lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size *
250 sizeof (lwpchan_hashbucket_t), KM_SLEEP);
251 lcp->lwpchan_next_data = NULL;
252
253 mutex_enter(&p->p_lcp_lock);
254 if ((old_lcp = p->p_lcp) != NULL) {
255 if (old_lcp->lwpchan_bits >= bits) {
256 /* someone beat us to it */
257 mutex_exit(&p->p_lcp_lock);
258 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
259 sizeof (lwpchan_hashbucket_t));
260 kmem_free(lcp, sizeof (lwpchan_data_t));
261 return;
262 }
263 /*
264 * Acquire all of the old hash table locks.
265 */
266 hashbucket = old_lcp->lwpchan_cache;
267 endbucket = hashbucket + old_lcp->lwpchan_size;
268 for (; hashbucket < endbucket; hashbucket++)
269 mutex_enter(&hashbucket->lwpchan_lock);
270 /*
271 * Move all of the old hash table entries to the
272 * new hash table. The new hash table has not yet
273 * been installed so we don't need any of its locks.
274 */
275 count = 0;
276 hashbucket = old_lcp->lwpchan_cache;
277 for (; hashbucket < endbucket; hashbucket++) {
278 ent = hashbucket->lwpchan_chain;
279 while (ent != NULL) {
280 next = ent->lwpchan_next;
281 newbucket = lwpchan_bucket(lcp,
282 (uintptr_t)ent->lwpchan_addr);
283 ent->lwpchan_next = newbucket->lwpchan_chain;
284 newbucket->lwpchan_chain = ent;
285 ent = next;
286 count++;
287 }
288 hashbucket->lwpchan_chain = NULL;
289 }
290 lcp->lwpchan_entries = count;
291 }
292
293 /*
294 * Retire the old hash table. We can't actually kmem_free() it
295 * now because someone may still have a pointer to it. Instead,
296 * we link it onto the new hash table's list of retired hash tables.
297 * The new hash table is double the size of the previous one, so
298 * the total size of all retired hash tables is less than the size
299 * of the new one. exit() and exec() free the retired hash tables
300 * (see lwpchan_destroy_cache(), below).
301 */
302 lcp->lwpchan_next_data = old_lcp;
303
304 /*
305 * As soon as we store the new lcp, future locking operations will
306 * use it. Therefore, we must ensure that all the state we've just
307 * established reaches global visibility before the new lcp does.
308 */
309 membar_producer();
310 p->p_lcp = lcp;
311
312 if (old_lcp != NULL) {
313 /*
314 * Release all of the old hash table locks.
315 */
316 hashbucket = old_lcp->lwpchan_cache;
317 for (; hashbucket < endbucket; hashbucket++)
318 mutex_exit(&hashbucket->lwpchan_lock);
319 }
320 mutex_exit(&p->p_lcp_lock);
321 }
322
323 /*
324 * Deallocate the lwpchan cache, and any dynamically allocated mappings.
325 * Called when the process exits or execs. All lwps except one have
326 * exited so we need no locks here.
327 */
328 void
lwpchan_destroy_cache(int exec)329 lwpchan_destroy_cache(int exec)
330 {
331 proc_t *p = curproc;
332 lwpchan_hashbucket_t *hashbucket;
333 lwpchan_hashbucket_t *endbucket;
334 lwpchan_data_t *lcp;
335 lwpchan_entry_t *ent;
336 lwpchan_entry_t *next;
337 uint16_t lockflg;
338
339 lcp = p->p_lcp;
340 p->p_lcp = NULL;
341
342 lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD;
343 hashbucket = lcp->lwpchan_cache;
344 endbucket = hashbucket + lcp->lwpchan_size;
345 for (; hashbucket < endbucket; hashbucket++) {
346 ent = hashbucket->lwpchan_chain;
347 hashbucket->lwpchan_chain = NULL;
348 while (ent != NULL) {
349 next = ent->lwpchan_next;
350 if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
351 (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST))
352 == (USYNC_PROCESS | LOCK_ROBUST))
353 lwp_mutex_cleanup(ent, lockflg);
354 kmem_free(ent, sizeof (*ent));
355 ent = next;
356 }
357 }
358
359 while (lcp != NULL) {
360 lwpchan_data_t *next_lcp = lcp->lwpchan_next_data;
361 kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
362 sizeof (lwpchan_hashbucket_t));
363 kmem_free(lcp, sizeof (lwpchan_data_t));
364 lcp = next_lcp;
365 }
366 }
367
368 /*
369 * Return zero when there is an entry in the lwpchan cache for the
370 * given process virtual address and non-zero when there is not.
371 * The returned non-zero value is the current length of the
372 * hash chain plus one. The caller holds the hash bucket lock.
373 */
374 static uint_t
lwpchan_cache_mapping(caddr_t addr,int type,int pool,lwpchan_t * lwpchan,lwpchan_hashbucket_t * hashbucket)375 lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan,
376 lwpchan_hashbucket_t *hashbucket)
377 {
378 lwpchan_entry_t *ent;
379 uint_t count = 1;
380
381 for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) {
382 if (ent->lwpchan_addr == addr) {
383 if (ent->lwpchan_type != type ||
384 ent->lwpchan_pool != pool) {
385 /*
386 * This shouldn't happen, but might if the
387 * process reuses its memory for different
388 * types of sync objects. We test first
389 * to avoid grabbing the memory cache line.
390 */
391 ent->lwpchan_type = (uint16_t)type;
392 ent->lwpchan_pool = (uint16_t)pool;
393 }
394 *lwpchan = ent->lwpchan_lwpchan;
395 return (0);
396 }
397 count++;
398 }
399 return (count);
400 }
401
402 /*
403 * Return the cached lwpchan mapping if cached, otherwise insert
404 * a virtual address to lwpchan mapping into the cache.
405 */
406 static int
lwpchan_get_mapping(struct as * as,caddr_t addr,caddr_t uaddr,int type,lwpchan_t * lwpchan,int pool)407 lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr,
408 int type, lwpchan_t *lwpchan, int pool)
409 {
410 proc_t *p = curproc;
411 lwpchan_data_t *lcp;
412 lwpchan_hashbucket_t *hashbucket;
413 lwpchan_entry_t *ent;
414 memid_t memid;
415 uint_t count;
416 uint_t bits;
417
418 top:
419 /* initialize the lwpchan cache, if necesary */
420 if ((lcp = p->p_lcp) == NULL) {
421 lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS);
422 goto top;
423 }
424 hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr);
425 mutex_enter(&hashbucket->lwpchan_lock);
426 if (lcp != p->p_lcp) {
427 /* someone resized the lwpchan cache; start over */
428 mutex_exit(&hashbucket->lwpchan_lock);
429 goto top;
430 }
431 if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) {
432 /* it's in the cache */
433 mutex_exit(&hashbucket->lwpchan_lock);
434 return (1);
435 }
436 mutex_exit(&hashbucket->lwpchan_lock);
437 if (as_getmemid(as, addr, &memid) != 0)
438 return (0);
439 lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0];
440 lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1];
441 ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP);
442 mutex_enter(&hashbucket->lwpchan_lock);
443 if (lcp != p->p_lcp) {
444 /* someone resized the lwpchan cache; start over */
445 mutex_exit(&hashbucket->lwpchan_lock);
446 kmem_free(ent, sizeof (*ent));
447 goto top;
448 }
449 count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket);
450 if (count == 0) {
451 /* someone else added this entry to the cache */
452 mutex_exit(&hashbucket->lwpchan_lock);
453 kmem_free(ent, sizeof (*ent));
454 return (1);
455 }
456 if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */
457 (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) {
458 /* hash chain too long; reallocate the hash table */
459 mutex_exit(&hashbucket->lwpchan_lock);
460 kmem_free(ent, sizeof (*ent));
461 lwpchan_alloc_cache(p, bits + 1);
462 goto top;
463 }
464 ent->lwpchan_addr = addr;
465 ent->lwpchan_uaddr = uaddr;
466 ent->lwpchan_type = (uint16_t)type;
467 ent->lwpchan_pool = (uint16_t)pool;
468 ent->lwpchan_lwpchan = *lwpchan;
469 ent->lwpchan_next = hashbucket->lwpchan_chain;
470 hashbucket->lwpchan_chain = ent;
471 atomic_inc_32(&lcp->lwpchan_entries);
472 mutex_exit(&hashbucket->lwpchan_lock);
473 return (1);
474 }
475
476 /*
477 * Return a unique pair of identifiers that corresponds to a
478 * synchronization object's virtual address. Process-shared
479 * sync objects usually get vnode/offset from as_getmemid().
480 */
481 static int
get_lwpchan(struct as * as,caddr_t addr,int type,lwpchan_t * lwpchan,int pool)482 get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool)
483 {
484 /*
485 * If the lwp synch object is defined to be process-private,
486 * we just make the first field of the lwpchan be 'as' and
487 * the second field be the synch object's virtual address.
488 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.)
489 * The lwpchan cache is used only for process-shared objects.
490 */
491 if (!(type & USYNC_PROCESS)) {
492 lwpchan->lc_wchan0 = (caddr_t)as;
493 lwpchan->lc_wchan = addr;
494 return (1);
495 }
496
497 return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool));
498 }
499
500 static void
lwp_block(lwpchan_t * lwpchan)501 lwp_block(lwpchan_t *lwpchan)
502 {
503 kthread_t *t = curthread;
504 klwp_t *lwp = ttolwp(t);
505 sleepq_head_t *sqh;
506
507 thread_lock(t);
508 t->t_flag |= T_WAKEABLE;
509 t->t_lwpchan = *lwpchan;
510 t->t_sobj_ops = &lwp_sobj_ops;
511 t->t_release = 0;
512 sqh = lwpsqhash(lwpchan);
513 disp_lock_enter_high(&sqh->sq_lock);
514 CL_SLEEP(t);
515 DTRACE_SCHED(sleep);
516 THREAD_SLEEP(t, &sqh->sq_lock);
517 sleepq_insert(&sqh->sq_queue, t);
518 thread_unlock(t);
519 lwp->lwp_asleep = 1;
520 lwp->lwp_sysabort = 0;
521 lwp->lwp_ru.nvcsw++;
522 (void) new_mstate(curthread, LMS_SLEEP);
523 }
524
525 static kthread_t *
lwpsobj_pi_owner(upimutex_t * up)526 lwpsobj_pi_owner(upimutex_t *up)
527 {
528 return (up->upi_owner);
529 }
530
531 static struct upimutex *
upi_get(upib_t * upibp,lwpchan_t * lcp)532 upi_get(upib_t *upibp, lwpchan_t *lcp)
533 {
534 struct upimutex *upip;
535
536 for (upip = upibp->upib_first; upip != NULL;
537 upip = upip->upi_nextchain) {
538 if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 &&
539 upip->upi_lwpchan.lc_wchan == lcp->lc_wchan)
540 break;
541 }
542 return (upip);
543 }
544
545 static void
upi_chain_add(upib_t * upibp,struct upimutex * upimutex)546 upi_chain_add(upib_t *upibp, struct upimutex *upimutex)
547 {
548 ASSERT(MUTEX_HELD(&upibp->upib_lock));
549
550 /*
551 * Insert upimutex at front of list. Maybe a bit unfair
552 * but assume that not many lwpchans hash to the same
553 * upimutextab bucket, i.e. the list of upimutexes from
554 * upib_first is not too long.
555 */
556 upimutex->upi_nextchain = upibp->upib_first;
557 upibp->upib_first = upimutex;
558 }
559
560 static void
upi_chain_del(upib_t * upibp,struct upimutex * upimutex)561 upi_chain_del(upib_t *upibp, struct upimutex *upimutex)
562 {
563 struct upimutex **prev;
564
565 ASSERT(MUTEX_HELD(&upibp->upib_lock));
566
567 prev = &upibp->upib_first;
568 while (*prev != upimutex) {
569 prev = &(*prev)->upi_nextchain;
570 }
571 *prev = upimutex->upi_nextchain;
572 upimutex->upi_nextchain = NULL;
573 }
574
575 /*
576 * Add upimutex to chain of upimutexes held by curthread.
577 * Returns number of upimutexes held by curthread.
578 */
579 static uint32_t
upi_mylist_add(struct upimutex * upimutex)580 upi_mylist_add(struct upimutex *upimutex)
581 {
582 kthread_t *t = curthread;
583
584 /*
585 * Insert upimutex at front of list of upimutexes owned by t. This
586 * would match typical LIFO order in which nested locks are acquired
587 * and released.
588 */
589 upimutex->upi_nextowned = t->t_upimutex;
590 t->t_upimutex = upimutex;
591 t->t_nupinest++;
592 ASSERT(t->t_nupinest > 0);
593 return (t->t_nupinest);
594 }
595
596 /*
597 * Delete upimutex from list of upimutexes owned by curthread.
598 */
599 static void
upi_mylist_del(struct upimutex * upimutex)600 upi_mylist_del(struct upimutex *upimutex)
601 {
602 kthread_t *t = curthread;
603 struct upimutex **prev;
604
605 /*
606 * Since the order in which nested locks are acquired and released,
607 * is typically LIFO, and typical nesting levels are not too deep, the
608 * following should not be expensive in the general case.
609 */
610 prev = &t->t_upimutex;
611 while (*prev != upimutex) {
612 prev = &(*prev)->upi_nextowned;
613 }
614 *prev = upimutex->upi_nextowned;
615 upimutex->upi_nextowned = NULL;
616 ASSERT(t->t_nupinest > 0);
617 t->t_nupinest--;
618 }
619
620 /*
621 * Returns true if upimutex is owned. Should be called only when upim points
622 * to kmem which cannot disappear from underneath.
623 */
624 static int
upi_owned(upimutex_t * upim)625 upi_owned(upimutex_t *upim)
626 {
627 return (upim->upi_owner == curthread);
628 }
629
630 /*
631 * Returns pointer to kernel object (upimutex_t *) if lp is owned.
632 */
633 static struct upimutex *
lwp_upimutex_owned(lwp_mutex_t * lp,uint8_t type)634 lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type)
635 {
636 lwpchan_t lwpchan;
637 upib_t *upibp;
638 struct upimutex *upimutex;
639
640 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
641 &lwpchan, LWPCHAN_MPPOOL))
642 return (NULL);
643
644 upibp = &UPI_CHAIN(lwpchan);
645 mutex_enter(&upibp->upib_lock);
646 upimutex = upi_get(upibp, &lwpchan);
647 if (upimutex == NULL || upimutex->upi_owner != curthread) {
648 mutex_exit(&upibp->upib_lock);
649 return (NULL);
650 }
651 mutex_exit(&upibp->upib_lock);
652 return (upimutex);
653 }
654
655 /*
656 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if
657 * no lock hand-off occurrs.
658 */
659 static void
upimutex_unlock(struct upimutex * upimutex,uint16_t flag)660 upimutex_unlock(struct upimutex *upimutex, uint16_t flag)
661 {
662 turnstile_t *ts;
663 upib_t *upibp;
664 kthread_t *newowner;
665
666 upi_mylist_del(upimutex);
667 upibp = upimutex->upi_upibp;
668 mutex_enter(&upibp->upib_lock);
669 if (upimutex->upi_waiter != 0) { /* if waiters */
670 ts = turnstile_lookup(upimutex);
671 if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) {
672 /* hand-off lock to highest prio waiter */
673 newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first;
674 upimutex->upi_owner = newowner;
675 if (ts->ts_waiters == 1)
676 upimutex->upi_waiter = 0;
677 turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner);
678 mutex_exit(&upibp->upib_lock);
679 return;
680 } else if (ts != NULL) {
681 /* LOCK_NOTRECOVERABLE: wakeup all */
682 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
683 } else {
684 /*
685 * Misleading w bit. Waiters might have been
686 * interrupted. No need to clear the w bit (upimutex
687 * will soon be freed). Re-calculate PI from existing
688 * waiters.
689 */
690 turnstile_exit(upimutex);
691 turnstile_pi_recalc();
692 }
693 }
694 /*
695 * no waiters, or LOCK_NOTRECOVERABLE.
696 * remove from the bucket chain of upi mutexes.
697 * de-allocate kernel memory (upimutex).
698 */
699 upi_chain_del(upimutex->upi_upibp, upimutex);
700 mutex_exit(&upibp->upib_lock);
701 kmem_free(upimutex, sizeof (upimutex_t));
702 }
703
704 static int
lwp_upimutex_lock(lwp_mutex_t * lp,uint8_t type,int try,lwp_timer_t * lwptp)705 lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp)
706 {
707 label_t ljb;
708 int error = 0;
709 lwpchan_t lwpchan;
710 uint16_t flag;
711 upib_t *upibp;
712 volatile struct upimutex *upimutex = NULL;
713 turnstile_t *ts;
714 uint32_t nupinest;
715 volatile int upilocked = 0;
716
717 if (on_fault(&ljb)) {
718 if (upilocked)
719 upimutex_unlock((upimutex_t *)upimutex, 0);
720 error = EFAULT;
721 goto out;
722 }
723 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
724 &lwpchan, LWPCHAN_MPPOOL)) {
725 error = EFAULT;
726 goto out;
727 }
728 upibp = &UPI_CHAIN(lwpchan);
729 retry:
730 mutex_enter(&upibp->upib_lock);
731 upimutex = upi_get(upibp, &lwpchan);
732 if (upimutex == NULL) {
733 /* lock available since lwpchan has no upimutex */
734 upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP);
735 upi_chain_add(upibp, (upimutex_t *)upimutex);
736 upimutex->upi_owner = curthread; /* grab lock */
737 upimutex->upi_upibp = upibp;
738 upimutex->upi_vaddr = lp;
739 upimutex->upi_lwpchan = lwpchan;
740 mutex_exit(&upibp->upib_lock);
741 nupinest = upi_mylist_add((upimutex_t *)upimutex);
742 upilocked = 1;
743 fuword16_noerr(&lp->mutex_flag, &flag);
744 if (nupinest > maxnestupimx &&
745 secpolicy_resource(CRED()) != 0) {
746 upimutex_unlock((upimutex_t *)upimutex, flag);
747 error = ENOMEM;
748 goto out;
749 }
750 if (flag & LOCK_NOTRECOVERABLE) {
751 /*
752 * Since the setting of LOCK_NOTRECOVERABLE
753 * was done under the high-level upi mutex,
754 * in lwp_upimutex_unlock(), this flag needs to
755 * be checked while holding the upi mutex.
756 * If set, this thread should return without
757 * the lock held, and with the right error code.
758 */
759 upimutex_unlock((upimutex_t *)upimutex, flag);
760 upilocked = 0;
761 error = ENOTRECOVERABLE;
762 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
763 if (flag & LOCK_OWNERDEAD)
764 error = EOWNERDEAD;
765 else if (type & USYNC_PROCESS_ROBUST)
766 error = ELOCKUNMAPPED;
767 else
768 error = EOWNERDEAD;
769 }
770 goto out;
771 }
772 /*
773 * If a upimutex object exists, it must have an owner.
774 * This is due to lock hand-off, and release of upimutex when no
775 * waiters are present at unlock time,
776 */
777 ASSERT(upimutex->upi_owner != NULL);
778 if (upimutex->upi_owner == curthread) {
779 /*
780 * The user wrapper can check if the mutex type is
781 * ERRORCHECK: if not, it should stall at user-level.
782 * If so, it should return the error code.
783 */
784 mutex_exit(&upibp->upib_lock);
785 error = EDEADLK;
786 goto out;
787 }
788 if (try == UPIMUTEX_TRY) {
789 mutex_exit(&upibp->upib_lock);
790 error = EBUSY;
791 goto out;
792 }
793 /*
794 * Block for the lock.
795 */
796 if ((error = lwptp->lwpt_time_error) != 0) {
797 /*
798 * The SUSV3 Posix spec is very clear that we
799 * should get no error from validating the
800 * timer until we would actually sleep.
801 */
802 mutex_exit(&upibp->upib_lock);
803 goto out;
804 }
805 if (lwptp->lwpt_tsp != NULL) {
806 /*
807 * Unlike the protocol for other lwp timedwait operations,
808 * we must drop t_delay_lock before going to sleep in
809 * turnstile_block() for a upi mutex.
810 * See the comments below and in turnstile.c
811 */
812 mutex_enter(&curthread->t_delay_lock);
813 (void) lwp_timer_enqueue(lwptp);
814 mutex_exit(&curthread->t_delay_lock);
815 }
816 /*
817 * Now, set the waiter bit and block for the lock in turnstile_block().
818 * No need to preserve the previous wbit since a lock try is not
819 * attempted after setting the wait bit. Wait bit is set under
820 * the upib_lock, which is not released until the turnstile lock
821 * is acquired. Say, the upimutex is L:
822 *
823 * 1. upib_lock is held so the waiter does not have to retry L after
824 * setting the wait bit: since the owner has to grab the upib_lock
825 * to unlock L, it will certainly see the wait bit set.
826 * 2. upib_lock is not released until the turnstile lock is acquired.
827 * This is the key to preventing a missed wake-up. Otherwise, the
828 * owner could acquire the upib_lock, and the tc_lock, to call
829 * turnstile_wakeup(). All this, before the waiter gets tc_lock
830 * to sleep in turnstile_block(). turnstile_wakeup() will then not
831 * find this waiter, resulting in the missed wakeup.
832 * 3. The upib_lock, being a kernel mutex, cannot be released while
833 * holding the tc_lock (since mutex_exit() could need to acquire
834 * the same tc_lock)...and so is held when calling turnstile_block().
835 * The address of upib_lock is passed to turnstile_block() which
836 * releases it after releasing all turnstile locks, and before going
837 * to sleep in swtch().
838 * 4. The waiter value cannot be a count of waiters, because a waiter
839 * can be interrupted. The interrupt occurs under the tc_lock, at
840 * which point, the upib_lock cannot be locked, to decrement waiter
841 * count. So, just treat the waiter state as a bit, not a count.
842 */
843 ts = turnstile_lookup((upimutex_t *)upimutex);
844 upimutex->upi_waiter = 1;
845 error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex,
846 &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp);
847 /*
848 * Hand-off implies that we wakeup holding the lock, except when:
849 * - deadlock is detected
850 * - lock is not recoverable
851 * - we got an interrupt or timeout
852 * If we wake up due to an interrupt or timeout, we may
853 * or may not be holding the lock due to mutex hand-off.
854 * Use lwp_upimutex_owned() to check if we do hold the lock.
855 */
856 if (error != 0) {
857 if ((error == EINTR || error == ETIME) &&
858 (upimutex = lwp_upimutex_owned(lp, type))) {
859 /*
860 * Unlock and return - the re-startable syscall will
861 * try the lock again if we got EINTR.
862 */
863 (void) upi_mylist_add((upimutex_t *)upimutex);
864 upimutex_unlock((upimutex_t *)upimutex, 0);
865 }
866 /*
867 * The only other possible error is EDEADLK. If so, upimutex
868 * is valid, since its owner is deadlocked with curthread.
869 */
870 ASSERT(error == EINTR || error == ETIME ||
871 (error == EDEADLK && !upi_owned((upimutex_t *)upimutex)));
872 ASSERT(!lwp_upimutex_owned(lp, type));
873 goto out;
874 }
875 if (lwp_upimutex_owned(lp, type)) {
876 ASSERT(lwp_upimutex_owned(lp, type) == upimutex);
877 nupinest = upi_mylist_add((upimutex_t *)upimutex);
878 upilocked = 1;
879 }
880 /*
881 * Now, need to read the user-level lp->mutex_flag to do the following:
882 *
883 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED
884 * should be returned.
885 * - if lock isn't held, check if ENOTRECOVERABLE should
886 * be returned.
887 *
888 * Now, either lp->mutex_flag is readable or it's not. If not
889 * readable, the on_fault path will cause a return with EFAULT
890 * as it should. If it is readable, the state of the flag
891 * encodes the robustness state of the lock:
892 *
893 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD
894 * or LOCK_UNMAPPED setting will influence the return code
895 * appropriately. If the upimutex is not locked here, this
896 * could be due to a spurious wake-up or a NOTRECOVERABLE
897 * event. The flag's setting can be used to distinguish
898 * between these two events.
899 */
900 fuword16_noerr(&lp->mutex_flag, &flag);
901 if (upilocked) {
902 /*
903 * If the thread wakes up from turnstile_block with the lock
904 * held, the flag could not be set to LOCK_NOTRECOVERABLE,
905 * since it would not have been handed-off the lock.
906 * So, no need to check for this case.
907 */
908 if (nupinest > maxnestupimx &&
909 secpolicy_resource(CRED()) != 0) {
910 upimutex_unlock((upimutex_t *)upimutex, flag);
911 upilocked = 0;
912 error = ENOMEM;
913 } else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
914 if (flag & LOCK_OWNERDEAD)
915 error = EOWNERDEAD;
916 else if (type & USYNC_PROCESS_ROBUST)
917 error = ELOCKUNMAPPED;
918 else
919 error = EOWNERDEAD;
920 }
921 } else {
922 /*
923 * Wake-up without the upimutex held. Either this is a
924 * spurious wake-up (due to signals, forkall(), whatever), or
925 * it is a LOCK_NOTRECOVERABLE robustness event. The setting
926 * of the mutex flag can be used to distinguish between the
927 * two events.
928 */
929 if (flag & LOCK_NOTRECOVERABLE) {
930 error = ENOTRECOVERABLE;
931 } else {
932 /*
933 * Here, the flag could be set to LOCK_OWNERDEAD or
934 * not. In both cases, this is a spurious wakeup,
935 * since the upi lock is not held, but the thread
936 * has returned from turnstile_block().
937 *
938 * The user flag could be LOCK_OWNERDEAD if, at the
939 * same time as curthread having been woken up
940 * spuriously, the owner (say Tdead) has died, marked
941 * the mutex flag accordingly, and handed off the lock
942 * to some other waiter (say Tnew). curthread just
943 * happened to read the flag while Tnew has yet to deal
944 * with the owner-dead event.
945 *
946 * In this event, curthread should retry the lock.
947 * If Tnew is able to cleanup the lock, curthread
948 * will eventually get the lock with a zero error code,
949 * If Tnew is unable to cleanup, its eventual call to
950 * unlock the lock will result in the mutex flag being
951 * set to LOCK_NOTRECOVERABLE, and the wake-up of
952 * all waiters, including curthread, which will then
953 * eventually return ENOTRECOVERABLE due to the above
954 * check.
955 *
956 * Of course, if the user-flag is not set with
957 * LOCK_OWNERDEAD, retrying is the thing to do, since
958 * this is definitely a spurious wakeup.
959 */
960 goto retry;
961 }
962 }
963
964 out:
965 no_fault();
966 return (error);
967 }
968
969
970 static int
lwp_upimutex_unlock(lwp_mutex_t * lp,uint8_t type)971 lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type)
972 {
973 label_t ljb;
974 int error = 0;
975 lwpchan_t lwpchan;
976 uint16_t flag;
977 upib_t *upibp;
978 volatile struct upimutex *upimutex = NULL;
979 volatile int upilocked = 0;
980
981 if (on_fault(&ljb)) {
982 if (upilocked)
983 upimutex_unlock((upimutex_t *)upimutex, 0);
984 error = EFAULT;
985 goto out;
986 }
987 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
988 &lwpchan, LWPCHAN_MPPOOL)) {
989 error = EFAULT;
990 goto out;
991 }
992 upibp = &UPI_CHAIN(lwpchan);
993 mutex_enter(&upibp->upib_lock);
994 upimutex = upi_get(upibp, &lwpchan);
995 /*
996 * If the lock is not held, or the owner is not curthread, return
997 * error. The user-level wrapper can return this error or stall,
998 * depending on whether mutex is of ERRORCHECK type or not.
999 */
1000 if (upimutex == NULL || upimutex->upi_owner != curthread) {
1001 mutex_exit(&upibp->upib_lock);
1002 error = EPERM;
1003 goto out;
1004 }
1005 mutex_exit(&upibp->upib_lock); /* release for user memory access */
1006 upilocked = 1;
1007 fuword16_noerr(&lp->mutex_flag, &flag);
1008 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
1009 /*
1010 * transition mutex to the LOCK_NOTRECOVERABLE state.
1011 */
1012 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
1013 flag |= LOCK_NOTRECOVERABLE;
1014 suword16_noerr(&lp->mutex_flag, flag);
1015 }
1016 set_owner_pid(lp, 0, 0);
1017 upimutex_unlock((upimutex_t *)upimutex, flag);
1018 upilocked = 0;
1019 out:
1020 no_fault();
1021 return (error);
1022 }
1023
1024 /*
1025 * Set the owner and ownerpid fields of a user-level mutex. Note, this function
1026 * uses the suword*_noerr routines which must be called between
1027 * on_fault/no_fault. However, this routine itself does not do the
1028 * on_fault/no_fault and it is assumed all the callers will do so instead!
1029 */
1030 static void
set_owner_pid(lwp_mutex_t * lp,uintptr_t owner,pid_t pid)1031 set_owner_pid(lwp_mutex_t *lp, uintptr_t owner, pid_t pid)
1032 {
1033 union {
1034 uint64_t word64;
1035 uint32_t word32[2];
1036 } un;
1037
1038 un.word64 = (uint64_t)owner;
1039
1040 suword32_noerr(&lp->mutex_ownerpid, pid);
1041 #if defined(_LP64)
1042 if (((uintptr_t)lp & (_LONG_LONG_ALIGNMENT - 1)) == 0) { /* aligned */
1043 suword64_noerr(&lp->mutex_owner, un.word64);
1044 return;
1045 }
1046 #endif
1047 /* mutex is unaligned or we are running on a 32-bit kernel */
1048 suword32_noerr((uint32_t *)&lp->mutex_owner, un.word32[0]);
1049 suword32_noerr((uint32_t *)&lp->mutex_owner + 1, un.word32[1]);
1050 }
1051
1052 /*
1053 * Clear the contents of a user-level mutex; return the flags.
1054 * Used only by upi_dead() and lwp_mutex_cleanup(), below.
1055 */
1056 static uint16_t
lwp_clear_mutex(lwp_mutex_t * lp,uint16_t lockflg)1057 lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg)
1058 {
1059 uint16_t flag;
1060
1061 fuword16_noerr(&lp->mutex_flag, &flag);
1062 if ((flag &
1063 (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) {
1064 flag |= lockflg;
1065 suword16_noerr(&lp->mutex_flag, flag);
1066 }
1067 set_owner_pid(lp, 0, 0);
1068 suword8_noerr(&lp->mutex_rcount, 0);
1069
1070 return (flag);
1071 }
1072
1073 /*
1074 * Mark user mutex state, corresponding to kernel upimutex,
1075 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate
1076 */
1077 static int
upi_dead(upimutex_t * upip,uint16_t lockflg)1078 upi_dead(upimutex_t *upip, uint16_t lockflg)
1079 {
1080 label_t ljb;
1081 int error = 0;
1082 lwp_mutex_t *lp;
1083
1084 if (on_fault(&ljb)) {
1085 error = EFAULT;
1086 goto out;
1087 }
1088
1089 lp = upip->upi_vaddr;
1090 (void) lwp_clear_mutex(lp, lockflg);
1091 suword8_noerr(&lp->mutex_lockw, 0);
1092 out:
1093 no_fault();
1094 return (error);
1095 }
1096
1097 /*
1098 * Unlock all upimutexes held by curthread, since curthread is dying.
1099 * For each upimutex, attempt to mark its corresponding user mutex object as
1100 * dead.
1101 */
1102 void
upimutex_cleanup()1103 upimutex_cleanup()
1104 {
1105 kthread_t *t = curthread;
1106 uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)?
1107 LOCK_UNMAPPED : LOCK_OWNERDEAD;
1108 struct upimutex *upip;
1109
1110 while ((upip = t->t_upimutex) != NULL) {
1111 if (upi_dead(upip, lockflg) != 0) {
1112 /*
1113 * If the user object associated with this upimutex is
1114 * unmapped, unlock upimutex with the
1115 * LOCK_NOTRECOVERABLE flag, so that all waiters are
1116 * woken up. Since user object is unmapped, it could
1117 * not be marked as dead or notrecoverable.
1118 * The waiters will now all wake up and return
1119 * ENOTRECOVERABLE, since they would find that the lock
1120 * has not been handed-off to them.
1121 * See lwp_upimutex_lock().
1122 */
1123 upimutex_unlock(upip, LOCK_NOTRECOVERABLE);
1124 } else {
1125 /*
1126 * The user object has been updated as dead.
1127 * Unlock the upimutex: if no waiters, upip kmem will
1128 * be freed. If there is a waiter, the lock will be
1129 * handed off. If exit() is in progress, each existing
1130 * waiter will successively get the lock, as owners
1131 * die, and each new owner will call this routine as
1132 * it dies. The last owner will free kmem, since
1133 * it will find the upimutex has no waiters. So,
1134 * eventually, the kmem is guaranteed to be freed.
1135 */
1136 upimutex_unlock(upip, 0);
1137 }
1138 /*
1139 * Note that the call to upimutex_unlock() above will delete
1140 * upimutex from the t_upimutexes chain. And so the
1141 * while loop will eventually terminate.
1142 */
1143 }
1144 }
1145
1146 int
lwp_mutex_timedlock(lwp_mutex_t * lp,timespec_t * tsp,uintptr_t owner)1147 lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp, uintptr_t owner)
1148 {
1149 kthread_t *t = curthread;
1150 klwp_t *lwp = ttolwp(t);
1151 proc_t *p = ttoproc(t);
1152 lwp_timer_t lwpt;
1153 caddr_t timedwait;
1154 int error = 0;
1155 int time_error;
1156 clock_t tim = -1;
1157 uchar_t waiters;
1158 volatile int locked = 0;
1159 volatile int watched = 0;
1160 label_t ljb;
1161 volatile uint8_t type = 0;
1162 lwpchan_t lwpchan;
1163 sleepq_head_t *sqh;
1164 uint16_t flag;
1165 int imm_timeout = 0;
1166
1167 if ((caddr_t)lp >= p->p_as->a_userlimit)
1168 return (set_errno(EFAULT));
1169
1170 /*
1171 * Put the lwp in an orderly state for debugging,
1172 * in case we are stopped while sleeping, below.
1173 */
1174 prstop(PR_REQUESTED, 0);
1175
1176 timedwait = (caddr_t)tsp;
1177 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
1178 lwpt.lwpt_imm_timeout) {
1179 imm_timeout = 1;
1180 timedwait = NULL;
1181 }
1182
1183 /*
1184 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock",
1185 * this micro state is really a run state. If the thread indeed blocks,
1186 * this state becomes valid. If not, the state is converted back to
1187 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just
1188 * when blocking.
1189 */
1190 (void) new_mstate(t, LMS_USER_LOCK);
1191 if (on_fault(&ljb)) {
1192 if (locked)
1193 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1194 error = EFAULT;
1195 goto out;
1196 }
1197 /*
1198 * Force Copy-on-write if necessary and ensure that the
1199 * synchronization object resides in read/write memory.
1200 * Cause an EFAULT return now if this is not so.
1201 */
1202 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
1203 suword8_noerr(&lp->mutex_type, type);
1204 if (UPIMUTEX(type)) {
1205 no_fault();
1206 error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt);
1207 if (error == 0 || error == EOWNERDEAD ||
1208 error == ELOCKUNMAPPED) {
1209 volatile int locked = error != 0;
1210 if (on_fault(&ljb)) {
1211 if (locked != 0)
1212 error = lwp_upimutex_unlock(lp, type);
1213 else
1214 error = EFAULT;
1215 goto upierr;
1216 }
1217 set_owner_pid(lp, owner,
1218 (type & USYNC_PROCESS)? p->p_pid : 0);
1219 no_fault();
1220 }
1221 upierr:
1222 if (tsp && !time_error) /* copyout the residual time left */
1223 error = lwp_timer_copyout(&lwpt, error);
1224 if (error)
1225 return (set_errno(error));
1226 return (0);
1227 }
1228 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
1229 &lwpchan, LWPCHAN_MPPOOL)) {
1230 error = EFAULT;
1231 goto out;
1232 }
1233 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
1234 locked = 1;
1235 if (type & LOCK_ROBUST) {
1236 fuword16_noerr(&lp->mutex_flag, &flag);
1237 if (flag & LOCK_NOTRECOVERABLE) {
1238 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1239 error = ENOTRECOVERABLE;
1240 goto out;
1241 }
1242 }
1243 fuword8_noerr(&lp->mutex_waiters, &waiters);
1244 suword8_noerr(&lp->mutex_waiters, 1);
1245
1246 /*
1247 * If watchpoints are set, they need to be restored, since
1248 * atomic accesses of memory such as the call to ulock_try()
1249 * below cannot be watched.
1250 */
1251
1252 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1253
1254 while (!ulock_try(&lp->mutex_lockw)) {
1255 if (time_error) {
1256 /*
1257 * The SUSV3 Posix spec is very clear that we
1258 * should get no error from validating the
1259 * timer until we would actually sleep.
1260 */
1261 error = time_error;
1262 break;
1263 }
1264
1265 if (watched) {
1266 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1267 watched = 0;
1268 }
1269
1270 if (timedwait) {
1271 /*
1272 * If we successfully queue the timeout,
1273 * then don't drop t_delay_lock until
1274 * we are on the sleep queue (below).
1275 */
1276 mutex_enter(&t->t_delay_lock);
1277 if (lwp_timer_enqueue(&lwpt) != 0) {
1278 mutex_exit(&t->t_delay_lock);
1279 imm_timeout = 1;
1280 timedwait = NULL;
1281 }
1282 }
1283 lwp_block(&lwpchan);
1284 /*
1285 * Nothing should happen to cause the lwp to go to
1286 * sleep again until after it returns from swtch().
1287 */
1288 if (timedwait)
1289 mutex_exit(&t->t_delay_lock);
1290 locked = 0;
1291 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1292 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout)
1293 setrun(t);
1294 swtch();
1295 t->t_flag &= ~T_WAKEABLE;
1296 if (timedwait)
1297 tim = lwp_timer_dequeue(&lwpt);
1298 setallwatch();
1299 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t))
1300 error = EINTR;
1301 else if (imm_timeout || (timedwait && tim == -1))
1302 error = ETIME;
1303 if (error) {
1304 lwp->lwp_asleep = 0;
1305 lwp->lwp_sysabort = 0;
1306 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp),
1307 S_WRITE);
1308
1309 /*
1310 * Need to re-compute waiters bit. The waiters field in
1311 * the lock is not reliable. Either of two things could
1312 * have occurred: no lwp may have called lwp_release()
1313 * for me but I have woken up due to a signal or
1314 * timeout. In this case, the waiter bit is incorrect
1315 * since it is still set to 1, set above.
1316 * OR an lwp_release() did occur for some other lwp on
1317 * the same lwpchan. In this case, the waiter bit is
1318 * correct. But which event occurred, one can't tell.
1319 * So, recompute.
1320 */
1321 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
1322 locked = 1;
1323 sqh = lwpsqhash(&lwpchan);
1324 disp_lock_enter(&sqh->sq_lock);
1325 waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan);
1326 disp_lock_exit(&sqh->sq_lock);
1327 break;
1328 }
1329 lwp->lwp_asleep = 0;
1330 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp),
1331 S_WRITE);
1332 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
1333 locked = 1;
1334 fuword8_noerr(&lp->mutex_waiters, &waiters);
1335 suword8_noerr(&lp->mutex_waiters, 1);
1336 if (type & LOCK_ROBUST) {
1337 fuword16_noerr(&lp->mutex_flag, &flag);
1338 if (flag & LOCK_NOTRECOVERABLE) {
1339 error = ENOTRECOVERABLE;
1340 break;
1341 }
1342 }
1343 }
1344
1345 if (t->t_mstate == LMS_USER_LOCK)
1346 (void) new_mstate(t, LMS_SYSTEM);
1347
1348 if (error == 0) {
1349 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0);
1350 if (type & LOCK_ROBUST) {
1351 fuword16_noerr(&lp->mutex_flag, &flag);
1352 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
1353 if (flag & LOCK_OWNERDEAD)
1354 error = EOWNERDEAD;
1355 else if (type & USYNC_PROCESS_ROBUST)
1356 error = ELOCKUNMAPPED;
1357 else
1358 error = EOWNERDEAD;
1359 }
1360 }
1361 }
1362 suword8_noerr(&lp->mutex_waiters, waiters);
1363 locked = 0;
1364 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1365 out:
1366 no_fault();
1367 if (watched)
1368 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1369 if (tsp && !time_error) /* copyout the residual time left */
1370 error = lwp_timer_copyout(&lwpt, error);
1371 if (error)
1372 return (set_errno(error));
1373 return (0);
1374 }
1375
1376 static int
iswanted(kthread_t * t,lwpchan_t * lwpchan)1377 iswanted(kthread_t *t, lwpchan_t *lwpchan)
1378 {
1379 /*
1380 * The caller holds the dispatcher lock on the sleep queue.
1381 */
1382 while (t != NULL) {
1383 if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
1384 t->t_lwpchan.lc_wchan == lwpchan->lc_wchan)
1385 return (1);
1386 t = t->t_link;
1387 }
1388 return (0);
1389 }
1390
1391 /*
1392 * Return the highest priority thread sleeping on this lwpchan.
1393 */
1394 static kthread_t *
lwp_queue_waiter(lwpchan_t * lwpchan)1395 lwp_queue_waiter(lwpchan_t *lwpchan)
1396 {
1397 sleepq_head_t *sqh;
1398 kthread_t *tp;
1399
1400 sqh = lwpsqhash(lwpchan);
1401 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */
1402 for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) {
1403 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
1404 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan)
1405 break;
1406 }
1407 disp_lock_exit(&sqh->sq_lock);
1408 return (tp);
1409 }
1410
1411 static int
lwp_release(lwpchan_t * lwpchan,uchar_t * waiters,int sync_type)1412 lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type)
1413 {
1414 sleepq_head_t *sqh;
1415 kthread_t *tp;
1416 kthread_t **tpp;
1417
1418 sqh = lwpsqhash(lwpchan);
1419 disp_lock_enter(&sqh->sq_lock); /* lock the sleep queue */
1420 tpp = &sqh->sq_queue.sq_first;
1421 while ((tp = *tpp) != NULL) {
1422 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
1423 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
1424 /*
1425 * The following is typically false. It could be true
1426 * only if lwp_release() is called from
1427 * lwp_mutex_wakeup() after reading the waiters field
1428 * from memory in which the lwp lock used to be, but has
1429 * since been re-used to hold a lwp cv or lwp semaphore.
1430 * The thread "tp" found to match the lwp lock's wchan
1431 * is actually sleeping for the cv or semaphore which
1432 * now has the same wchan. In this case, lwp_release()
1433 * should return failure.
1434 */
1435 if (sync_type != (tp->t_flag & T_WAITCVSEM)) {
1436 ASSERT(sync_type == 0);
1437 /*
1438 * assert that this can happen only for mutexes
1439 * i.e. sync_type == 0, for correctly written
1440 * user programs.
1441 */
1442 disp_lock_exit(&sqh->sq_lock);
1443 return (0);
1444 }
1445 *waiters = iswanted(tp->t_link, lwpchan);
1446 sleepq_unlink(tpp, tp);
1447 DTRACE_SCHED1(wakeup, kthread_t *, tp);
1448 tp->t_wchan0 = NULL;
1449 tp->t_wchan = NULL;
1450 tp->t_sobj_ops = NULL;
1451 tp->t_release = 1;
1452 THREAD_TRANSITION(tp); /* drops sleepq lock */
1453 CL_WAKEUP(tp);
1454 thread_unlock(tp); /* drop run queue lock */
1455 return (1);
1456 }
1457 tpp = &tp->t_link;
1458 }
1459 *waiters = 0;
1460 disp_lock_exit(&sqh->sq_lock);
1461 return (0);
1462 }
1463
1464 static void
lwp_release_all(lwpchan_t * lwpchan)1465 lwp_release_all(lwpchan_t *lwpchan)
1466 {
1467 sleepq_head_t *sqh;
1468 kthread_t *tp;
1469 kthread_t **tpp;
1470
1471 sqh = lwpsqhash(lwpchan);
1472 disp_lock_enter(&sqh->sq_lock); /* lock sleep q queue */
1473 tpp = &sqh->sq_queue.sq_first;
1474 while ((tp = *tpp) != NULL) {
1475 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
1476 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
1477 sleepq_unlink(tpp, tp);
1478 DTRACE_SCHED1(wakeup, kthread_t *, tp);
1479 tp->t_wchan0 = NULL;
1480 tp->t_wchan = NULL;
1481 tp->t_sobj_ops = NULL;
1482 CL_WAKEUP(tp);
1483 thread_unlock_high(tp); /* release run queue lock */
1484 } else {
1485 tpp = &tp->t_link;
1486 }
1487 }
1488 disp_lock_exit(&sqh->sq_lock); /* drop sleep q lock */
1489 }
1490
1491 /*
1492 * unblock a lwp that is trying to acquire this mutex. the blocked
1493 * lwp resumes and retries to acquire the lock.
1494 */
1495 int
lwp_mutex_wakeup(lwp_mutex_t * lp,int release_all)1496 lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all)
1497 {
1498 proc_t *p = ttoproc(curthread);
1499 lwpchan_t lwpchan;
1500 uchar_t waiters;
1501 volatile int locked = 0;
1502 volatile int watched = 0;
1503 volatile uint8_t type = 0;
1504 label_t ljb;
1505 int error = 0;
1506
1507 if ((caddr_t)lp >= p->p_as->a_userlimit)
1508 return (set_errno(EFAULT));
1509
1510 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1511
1512 if (on_fault(&ljb)) {
1513 if (locked)
1514 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1515 error = EFAULT;
1516 goto out;
1517 }
1518 /*
1519 * Force Copy-on-write if necessary and ensure that the
1520 * synchronization object resides in read/write memory.
1521 * Cause an EFAULT return now if this is not so.
1522 */
1523 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
1524 suword8_noerr(&lp->mutex_type, type);
1525 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
1526 &lwpchan, LWPCHAN_MPPOOL)) {
1527 error = EFAULT;
1528 goto out;
1529 }
1530 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
1531 locked = 1;
1532 /*
1533 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
1534 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
1535 * may fail. If it fails, do not write into the waiter bit.
1536 * The call to lwp_release() might fail due to one of three reasons:
1537 *
1538 * 1. due to the thread which set the waiter bit not actually
1539 * sleeping since it got the lock on the re-try. The waiter
1540 * bit will then be correctly updated by that thread. This
1541 * window may be closed by reading the wait bit again here
1542 * and not calling lwp_release() at all if it is zero.
1543 * 2. the thread which set the waiter bit and went to sleep
1544 * was woken up by a signal. This time, the waiter recomputes
1545 * the wait bit in the return with EINTR code.
1546 * 3. the waiter bit read by lwp_mutex_wakeup() was in
1547 * memory that has been re-used after the lock was dropped.
1548 * In this case, writing into the waiter bit would cause data
1549 * corruption.
1550 */
1551 if (release_all)
1552 lwp_release_all(&lwpchan);
1553 else if (lwp_release(&lwpchan, &waiters, 0))
1554 suword8_noerr(&lp->mutex_waiters, waiters);
1555 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1556 out:
1557 no_fault();
1558 if (watched)
1559 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1560 if (error)
1561 return (set_errno(error));
1562 return (0);
1563 }
1564
1565 /*
1566 * lwp_cond_wait() has four arguments, a pointer to a condition variable,
1567 * a pointer to a mutex, a pointer to a timespec for a timed wait and
1568 * a flag telling the kernel whether or not to honor the kernel/user
1569 * schedctl parking protocol (see schedctl_is_park() in schedctl.c).
1570 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an
1571 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL,
1572 * it is used an an in/out parameter. On entry, it contains the relative
1573 * time until timeout. On exit, we copyout the residual time left to it.
1574 */
1575 int
lwp_cond_wait(lwp_cond_t * cv,lwp_mutex_t * mp,timespec_t * tsp,int check_park)1576 lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park)
1577 {
1578 kthread_t *t = curthread;
1579 klwp_t *lwp = ttolwp(t);
1580 proc_t *p = ttoproc(t);
1581 lwp_timer_t lwpt;
1582 lwpchan_t cv_lwpchan;
1583 lwpchan_t m_lwpchan;
1584 caddr_t timedwait;
1585 volatile uint16_t type = 0;
1586 volatile uint8_t mtype = 0;
1587 uchar_t waiters;
1588 volatile int error;
1589 clock_t tim = -1;
1590 volatile int locked = 0;
1591 volatile int m_locked = 0;
1592 volatile int cvwatched = 0;
1593 volatile int mpwatched = 0;
1594 label_t ljb;
1595 volatile int no_lwpchan = 1;
1596 int imm_timeout = 0;
1597 int imm_unpark = 0;
1598
1599 if ((caddr_t)cv >= p->p_as->a_userlimit ||
1600 (caddr_t)mp >= p->p_as->a_userlimit)
1601 return (set_errno(EFAULT));
1602
1603 /*
1604 * Put the lwp in an orderly state for debugging,
1605 * in case we are stopped while sleeping, below.
1606 */
1607 prstop(PR_REQUESTED, 0);
1608
1609 timedwait = (caddr_t)tsp;
1610 if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0)
1611 return (set_errno(error));
1612 if (lwpt.lwpt_imm_timeout) {
1613 imm_timeout = 1;
1614 timedwait = NULL;
1615 }
1616
1617 (void) new_mstate(t, LMS_USER_LOCK);
1618
1619 if (on_fault(&ljb)) {
1620 if (no_lwpchan) {
1621 error = EFAULT;
1622 goto out;
1623 }
1624 if (m_locked) {
1625 m_locked = 0;
1626 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
1627 }
1628 if (locked) {
1629 locked = 0;
1630 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
1631 }
1632 /*
1633 * set up another on_fault() for a possible fault
1634 * on the user lock accessed at "efault"
1635 */
1636 if (on_fault(&ljb)) {
1637 if (m_locked) {
1638 m_locked = 0;
1639 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
1640 }
1641 goto out;
1642 }
1643 error = EFAULT;
1644 goto efault;
1645 }
1646
1647 /*
1648 * Force Copy-on-write if necessary and ensure that the
1649 * synchronization object resides in read/write memory.
1650 * Cause an EFAULT return now if this is not so.
1651 */
1652 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype);
1653 suword8_noerr(&mp->mutex_type, mtype);
1654 if (UPIMUTEX(mtype) == 0) {
1655 /* convert user level mutex, "mp", to a unique lwpchan */
1656 /* check if mtype is ok to use below, instead of type from cv */
1657 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype,
1658 &m_lwpchan, LWPCHAN_MPPOOL)) {
1659 error = EFAULT;
1660 goto out;
1661 }
1662 }
1663 fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
1664 suword16_noerr(&cv->cond_type, type);
1665 /* convert user level condition variable, "cv", to a unique lwpchan */
1666 if (!get_lwpchan(p->p_as, (caddr_t)cv, type,
1667 &cv_lwpchan, LWPCHAN_CVPOOL)) {
1668 error = EFAULT;
1669 goto out;
1670 }
1671 no_lwpchan = 0;
1672 cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1673 if (UPIMUTEX(mtype) == 0)
1674 mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp),
1675 S_WRITE);
1676
1677 /*
1678 * lwpchan_lock ensures that the calling lwp is put to sleep atomically
1679 * with respect to a possible wakeup which is a result of either
1680 * an lwp_cond_signal() or an lwp_cond_broadcast().
1681 *
1682 * What's misleading, is that the lwp is put to sleep after the
1683 * condition variable's mutex is released. This is OK as long as
1684 * the release operation is also done while holding lwpchan_lock.
1685 * The lwp is then put to sleep when the possibility of pagefaulting
1686 * or sleeping is completely eliminated.
1687 */
1688 lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL);
1689 locked = 1;
1690 if (UPIMUTEX(mtype) == 0) {
1691 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
1692 m_locked = 1;
1693 suword8_noerr(&cv->cond_waiters_kernel, 1);
1694 /*
1695 * unlock the condition variable's mutex. (pagefaults are
1696 * possible here.)
1697 */
1698 set_owner_pid(mp, 0, 0);
1699 ulock_clear(&mp->mutex_lockw);
1700 fuword8_noerr(&mp->mutex_waiters, &waiters);
1701 if (waiters != 0) {
1702 /*
1703 * Given the locking of lwpchan_lock around the release
1704 * of the mutex and checking for waiters, the following
1705 * call to lwp_release() can fail ONLY if the lock
1706 * acquirer is interrupted after setting the waiter bit,
1707 * calling lwp_block() and releasing lwpchan_lock.
1708 * In this case, it could get pulled off the lwp sleep
1709 * q (via setrun()) before the following call to
1710 * lwp_release() occurs. In this case, the lock
1711 * requestor will update the waiter bit correctly by
1712 * re-evaluating it.
1713 */
1714 if (lwp_release(&m_lwpchan, &waiters, 0))
1715 suword8_noerr(&mp->mutex_waiters, waiters);
1716 }
1717 m_locked = 0;
1718 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
1719 } else {
1720 suword8_noerr(&cv->cond_waiters_kernel, 1);
1721 error = lwp_upimutex_unlock(mp, mtype);
1722 if (error) { /* if the upimutex unlock failed */
1723 locked = 0;
1724 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
1725 goto out;
1726 }
1727 }
1728 no_fault();
1729
1730 if (mpwatched) {
1731 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
1732 mpwatched = 0;
1733 }
1734 if (cvwatched) {
1735 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1736 cvwatched = 0;
1737 }
1738
1739 if (check_park && (!schedctl_is_park() || t->t_unpark)) {
1740 /*
1741 * We received a signal at user-level before calling here
1742 * or another thread wants us to return immediately
1743 * with EINTR. See lwp_unpark().
1744 */
1745 imm_unpark = 1;
1746 t->t_unpark = 0;
1747 timedwait = NULL;
1748 } else if (timedwait) {
1749 /*
1750 * If we successfully queue the timeout,
1751 * then don't drop t_delay_lock until
1752 * we are on the sleep queue (below).
1753 */
1754 mutex_enter(&t->t_delay_lock);
1755 if (lwp_timer_enqueue(&lwpt) != 0) {
1756 mutex_exit(&t->t_delay_lock);
1757 imm_timeout = 1;
1758 timedwait = NULL;
1759 }
1760 }
1761 t->t_flag |= T_WAITCVSEM;
1762 lwp_block(&cv_lwpchan);
1763 /*
1764 * Nothing should happen to cause the lwp to go to sleep
1765 * until after it returns from swtch().
1766 */
1767 if (timedwait)
1768 mutex_exit(&t->t_delay_lock);
1769 locked = 0;
1770 lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
1771 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) ||
1772 (imm_timeout | imm_unpark))
1773 setrun(t);
1774 swtch();
1775 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
1776 if (timedwait)
1777 tim = lwp_timer_dequeue(&lwpt);
1778 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort ||
1779 MUSTRETURN(p, t) || imm_unpark)
1780 error = EINTR;
1781 else if (imm_timeout || (timedwait && tim == -1))
1782 error = ETIME;
1783 lwp->lwp_asleep = 0;
1784 lwp->lwp_sysabort = 0;
1785 setallwatch();
1786
1787 if (t->t_mstate == LMS_USER_LOCK)
1788 (void) new_mstate(t, LMS_SYSTEM);
1789
1790 if (tsp && check_park) /* copyout the residual time left */
1791 error = lwp_timer_copyout(&lwpt, error);
1792
1793 /* the mutex is reacquired by the caller on return to user level */
1794 if (error) {
1795 /*
1796 * If we were concurrently lwp_cond_signal()d and we
1797 * received a UNIX signal or got a timeout, then perform
1798 * another lwp_cond_signal() to avoid consuming the wakeup.
1799 */
1800 if (t->t_release)
1801 (void) lwp_cond_signal(cv);
1802 return (set_errno(error));
1803 }
1804 return (0);
1805
1806 efault:
1807 /*
1808 * make sure that the user level lock is dropped before
1809 * returning to caller, since the caller always re-acquires it.
1810 */
1811 if (UPIMUTEX(mtype) == 0) {
1812 lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
1813 m_locked = 1;
1814 set_owner_pid(mp, 0, 0);
1815 ulock_clear(&mp->mutex_lockw);
1816 fuword8_noerr(&mp->mutex_waiters, &waiters);
1817 if (waiters != 0) {
1818 /*
1819 * See comment above on lock clearing and lwp_release()
1820 * success/failure.
1821 */
1822 if (lwp_release(&m_lwpchan, &waiters, 0))
1823 suword8_noerr(&mp->mutex_waiters, waiters);
1824 }
1825 m_locked = 0;
1826 lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
1827 } else {
1828 (void) lwp_upimutex_unlock(mp, mtype);
1829 }
1830 out:
1831 no_fault();
1832 if (mpwatched)
1833 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
1834 if (cvwatched)
1835 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1836 if (t->t_mstate == LMS_USER_LOCK)
1837 (void) new_mstate(t, LMS_SYSTEM);
1838 return (set_errno(error));
1839 }
1840
1841 /*
1842 * wakeup one lwp that's blocked on this condition variable.
1843 */
1844 int
lwp_cond_signal(lwp_cond_t * cv)1845 lwp_cond_signal(lwp_cond_t *cv)
1846 {
1847 proc_t *p = ttoproc(curthread);
1848 lwpchan_t lwpchan;
1849 uchar_t waiters;
1850 volatile uint16_t type = 0;
1851 volatile int locked = 0;
1852 volatile int watched = 0;
1853 label_t ljb;
1854 int error = 0;
1855
1856 if ((caddr_t)cv >= p->p_as->a_userlimit)
1857 return (set_errno(EFAULT));
1858
1859 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1860
1861 if (on_fault(&ljb)) {
1862 if (locked)
1863 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1864 error = EFAULT;
1865 goto out;
1866 }
1867 /*
1868 * Force Copy-on-write if necessary and ensure that the
1869 * synchronization object resides in read/write memory.
1870 * Cause an EFAULT return now if this is not so.
1871 */
1872 fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
1873 suword16_noerr(&cv->cond_type, type);
1874 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type,
1875 &lwpchan, LWPCHAN_CVPOOL)) {
1876 error = EFAULT;
1877 goto out;
1878 }
1879 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
1880 locked = 1;
1881 fuword8_noerr(&cv->cond_waiters_kernel, &waiters);
1882 if (waiters != 0) {
1883 /*
1884 * The following call to lwp_release() might fail but it is
1885 * OK to write into the waiters bit below, since the memory
1886 * could not have been re-used or unmapped (for correctly
1887 * written user programs) as in the case of lwp_mutex_wakeup().
1888 * For an incorrect program, we should not care about data
1889 * corruption since this is just one instance of other places
1890 * where corruption can occur for such a program. Of course
1891 * if the memory is unmapped, normal fault recovery occurs.
1892 */
1893 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
1894 suword8_noerr(&cv->cond_waiters_kernel, waiters);
1895 }
1896 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1897 out:
1898 no_fault();
1899 if (watched)
1900 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1901 if (error)
1902 return (set_errno(error));
1903 return (0);
1904 }
1905
1906 /*
1907 * wakeup every lwp that's blocked on this condition variable.
1908 */
1909 int
lwp_cond_broadcast(lwp_cond_t * cv)1910 lwp_cond_broadcast(lwp_cond_t *cv)
1911 {
1912 proc_t *p = ttoproc(curthread);
1913 lwpchan_t lwpchan;
1914 volatile uint16_t type = 0;
1915 volatile int locked = 0;
1916 volatile int watched = 0;
1917 label_t ljb;
1918 uchar_t waiters;
1919 int error = 0;
1920
1921 if ((caddr_t)cv >= p->p_as->a_userlimit)
1922 return (set_errno(EFAULT));
1923
1924 watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1925
1926 if (on_fault(&ljb)) {
1927 if (locked)
1928 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1929 error = EFAULT;
1930 goto out;
1931 }
1932 /*
1933 * Force Copy-on-write if necessary and ensure that the
1934 * synchronization object resides in read/write memory.
1935 * Cause an EFAULT return now if this is not so.
1936 */
1937 fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
1938 suword16_noerr(&cv->cond_type, type);
1939 if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type,
1940 &lwpchan, LWPCHAN_CVPOOL)) {
1941 error = EFAULT;
1942 goto out;
1943 }
1944 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
1945 locked = 1;
1946 fuword8_noerr(&cv->cond_waiters_kernel, &waiters);
1947 if (waiters != 0) {
1948 lwp_release_all(&lwpchan);
1949 suword8_noerr(&cv->cond_waiters_kernel, 0);
1950 }
1951 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1952 out:
1953 no_fault();
1954 if (watched)
1955 watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1956 if (error)
1957 return (set_errno(error));
1958 return (0);
1959 }
1960
1961 int
lwp_sema_trywait(lwp_sema_t * sp)1962 lwp_sema_trywait(lwp_sema_t *sp)
1963 {
1964 kthread_t *t = curthread;
1965 proc_t *p = ttoproc(t);
1966 label_t ljb;
1967 volatile int locked = 0;
1968 volatile int watched = 0;
1969 volatile uint16_t type = 0;
1970 int count;
1971 lwpchan_t lwpchan;
1972 uchar_t waiters;
1973 int error = 0;
1974
1975 if ((caddr_t)sp >= p->p_as->a_userlimit)
1976 return (set_errno(EFAULT));
1977
1978 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
1979
1980 if (on_fault(&ljb)) {
1981 if (locked)
1982 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1983 error = EFAULT;
1984 goto out;
1985 }
1986 /*
1987 * Force Copy-on-write if necessary and ensure that the
1988 * synchronization object resides in read/write memory.
1989 * Cause an EFAULT return now if this is not so.
1990 */
1991 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type);
1992 suword16_noerr((void *)&sp->sema_type, type);
1993 if (!get_lwpchan(p->p_as, (caddr_t)sp, type,
1994 &lwpchan, LWPCHAN_CVPOOL)) {
1995 error = EFAULT;
1996 goto out;
1997 }
1998 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
1999 locked = 1;
2000 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
2001 if (count == 0)
2002 error = EBUSY;
2003 else
2004 suword32_noerr((void *)&sp->sema_count, --count);
2005 if (count != 0) {
2006 fuword8_noerr(&sp->sema_waiters, &waiters);
2007 if (waiters != 0) {
2008 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
2009 suword8_noerr(&sp->sema_waiters, waiters);
2010 }
2011 }
2012 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2013 out:
2014 no_fault();
2015 if (watched)
2016 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2017 if (error)
2018 return (set_errno(error));
2019 return (0);
2020 }
2021
2022 /*
2023 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument.
2024 */
2025 int
lwp_sema_timedwait(lwp_sema_t * sp,timespec_t * tsp,int check_park)2026 lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park)
2027 {
2028 kthread_t *t = curthread;
2029 klwp_t *lwp = ttolwp(t);
2030 proc_t *p = ttoproc(t);
2031 lwp_timer_t lwpt;
2032 caddr_t timedwait;
2033 clock_t tim = -1;
2034 label_t ljb;
2035 volatile int locked = 0;
2036 volatile int watched = 0;
2037 volatile uint16_t type = 0;
2038 int count;
2039 lwpchan_t lwpchan;
2040 uchar_t waiters;
2041 int error = 0;
2042 int time_error;
2043 int imm_timeout = 0;
2044 int imm_unpark = 0;
2045
2046 if ((caddr_t)sp >= p->p_as->a_userlimit)
2047 return (set_errno(EFAULT));
2048
2049 /*
2050 * Put the lwp in an orderly state for debugging,
2051 * in case we are stopped while sleeping, below.
2052 */
2053 prstop(PR_REQUESTED, 0);
2054
2055 timedwait = (caddr_t)tsp;
2056 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
2057 lwpt.lwpt_imm_timeout) {
2058 imm_timeout = 1;
2059 timedwait = NULL;
2060 }
2061
2062 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2063
2064 if (on_fault(&ljb)) {
2065 if (locked)
2066 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2067 error = EFAULT;
2068 goto out;
2069 }
2070 /*
2071 * Force Copy-on-write if necessary and ensure that the
2072 * synchronization object resides in read/write memory.
2073 * Cause an EFAULT return now if this is not so.
2074 */
2075 fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type);
2076 suword16_noerr((void *)&sp->sema_type, type);
2077 if (!get_lwpchan(p->p_as, (caddr_t)sp, type,
2078 &lwpchan, LWPCHAN_CVPOOL)) {
2079 error = EFAULT;
2080 goto out;
2081 }
2082 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2083 locked = 1;
2084 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
2085 while (error == 0 && count == 0) {
2086 if (time_error) {
2087 /*
2088 * The SUSV3 Posix spec is very clear that we
2089 * should get no error from validating the
2090 * timer until we would actually sleep.
2091 */
2092 error = time_error;
2093 break;
2094 }
2095 suword8_noerr(&sp->sema_waiters, 1);
2096 if (watched)
2097 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2098 if (check_park && (!schedctl_is_park() || t->t_unpark)) {
2099 /*
2100 * We received a signal at user-level before calling
2101 * here or another thread wants us to return
2102 * immediately with EINTR. See lwp_unpark().
2103 */
2104 imm_unpark = 1;
2105 t->t_unpark = 0;
2106 timedwait = NULL;
2107 } else if (timedwait) {
2108 /*
2109 * If we successfully queue the timeout,
2110 * then don't drop t_delay_lock until
2111 * we are on the sleep queue (below).
2112 */
2113 mutex_enter(&t->t_delay_lock);
2114 if (lwp_timer_enqueue(&lwpt) != 0) {
2115 mutex_exit(&t->t_delay_lock);
2116 imm_timeout = 1;
2117 timedwait = NULL;
2118 }
2119 }
2120 t->t_flag |= T_WAITCVSEM;
2121 lwp_block(&lwpchan);
2122 /*
2123 * Nothing should happen to cause the lwp to sleep
2124 * again until after it returns from swtch().
2125 */
2126 if (timedwait)
2127 mutex_exit(&t->t_delay_lock);
2128 locked = 0;
2129 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2130 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) ||
2131 (imm_timeout | imm_unpark))
2132 setrun(t);
2133 swtch();
2134 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
2135 if (timedwait)
2136 tim = lwp_timer_dequeue(&lwpt);
2137 setallwatch();
2138 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort ||
2139 MUSTRETURN(p, t) || imm_unpark)
2140 error = EINTR;
2141 else if (imm_timeout || (timedwait && tim == -1))
2142 error = ETIME;
2143 lwp->lwp_asleep = 0;
2144 lwp->lwp_sysabort = 0;
2145 watched = watch_disable_addr((caddr_t)sp,
2146 sizeof (*sp), S_WRITE);
2147 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2148 locked = 1;
2149 fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
2150 }
2151 if (error == 0)
2152 suword32_noerr((void *)&sp->sema_count, --count);
2153 if (count != 0) {
2154 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
2155 suword8_noerr(&sp->sema_waiters, waiters);
2156 }
2157 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2158 out:
2159 no_fault();
2160 if (watched)
2161 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2162 if (tsp && check_park && !time_error)
2163 error = lwp_timer_copyout(&lwpt, error);
2164 if (error)
2165 return (set_errno(error));
2166 return (0);
2167 }
2168
2169 int
lwp_sema_post(lwp_sema_t * sp)2170 lwp_sema_post(lwp_sema_t *sp)
2171 {
2172 proc_t *p = ttoproc(curthread);
2173 label_t ljb;
2174 volatile int locked = 0;
2175 volatile int watched = 0;
2176 volatile uint16_t type = 0;
2177 int count;
2178 lwpchan_t lwpchan;
2179 uchar_t waiters;
2180 int error = 0;
2181
2182 if ((caddr_t)sp >= p->p_as->a_userlimit)
2183 return (set_errno(EFAULT));
2184
2185 watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2186
2187 if (on_fault(&ljb)) {
2188 if (locked)
2189 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2190 error = EFAULT;
2191 goto out;
2192 }
2193 /*
2194 * Force Copy-on-write if necessary and ensure that the
2195 * synchronization object resides in read/write memory.
2196 * Cause an EFAULT return now if this is not so.
2197 */
2198 fuword16_noerr(&sp->sema_type, (uint16_t *)&type);
2199 suword16_noerr(&sp->sema_type, type);
2200 if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type,
2201 &lwpchan, LWPCHAN_CVPOOL)) {
2202 error = EFAULT;
2203 goto out;
2204 }
2205 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2206 locked = 1;
2207 fuword32_noerr(&sp->sema_count, (uint32_t *)&count);
2208 if (count == _SEM_VALUE_MAX)
2209 error = EOVERFLOW;
2210 else
2211 suword32_noerr(&sp->sema_count, ++count);
2212 if (count == 1) {
2213 fuword8_noerr(&sp->sema_waiters, &waiters);
2214 if (waiters) {
2215 (void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
2216 suword8_noerr(&sp->sema_waiters, waiters);
2217 }
2218 }
2219 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2220 out:
2221 no_fault();
2222 if (watched)
2223 watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2224 if (error)
2225 return (set_errno(error));
2226 return (0);
2227 }
2228
2229 #define TRW_WANT_WRITE 0x1
2230 #define TRW_LOCK_GRANTED 0x2
2231
2232 #define READ_LOCK 0
2233 #define WRITE_LOCK 1
2234 #define TRY_FLAG 0x10
2235 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG)
2236 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG)
2237
2238 /*
2239 * Release one writer or one or more readers. Compute the rwstate word to
2240 * reflect the new state of the queue. For a safe hand-off we copy the new
2241 * rwstate value back to userland before we wake any of the new lock holders.
2242 *
2243 * Note that sleepq_insert() implements a prioritized FIFO (with writers
2244 * being given precedence over readers of the same priority).
2245 *
2246 * If the first thread is a reader we scan the queue releasing all readers
2247 * until we hit a writer or the end of the queue. If the first thread is a
2248 * writer we still need to check for another writer.
2249 */
2250 void
lwp_rwlock_release(lwpchan_t * lwpchan,lwp_rwlock_t * rw)2251 lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw)
2252 {
2253 sleepq_head_t *sqh;
2254 kthread_t *tp;
2255 kthread_t **tpp;
2256 kthread_t *tpnext;
2257 kthread_t *wakelist = NULL;
2258 uint32_t rwstate = 0;
2259 int wcount = 0;
2260 int rcount = 0;
2261
2262 sqh = lwpsqhash(lwpchan);
2263 disp_lock_enter(&sqh->sq_lock);
2264 tpp = &sqh->sq_queue.sq_first;
2265 while ((tp = *tpp) != NULL) {
2266 if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
2267 tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
2268 if (tp->t_writer & TRW_WANT_WRITE) {
2269 if ((wcount++ == 0) && (rcount == 0)) {
2270 rwstate |= URW_WRITE_LOCKED;
2271
2272 /* Just one writer to wake. */
2273 sleepq_unlink(tpp, tp);
2274 wakelist = tp;
2275
2276 /* tpp already set for next thread. */
2277 continue;
2278 } else {
2279 rwstate |= URW_HAS_WAITERS;
2280 /* We need look no further. */
2281 break;
2282 }
2283 } else {
2284 rcount++;
2285 if (wcount == 0) {
2286 rwstate++;
2287
2288 /* Add reader to wake list. */
2289 sleepq_unlink(tpp, tp);
2290 tp->t_link = wakelist;
2291 wakelist = tp;
2292
2293 /* tpp already set for next thread. */
2294 continue;
2295 } else {
2296 rwstate |= URW_HAS_WAITERS;
2297 /* We need look no further. */
2298 break;
2299 }
2300 }
2301 }
2302 tpp = &tp->t_link;
2303 }
2304
2305 /* Copy the new rwstate back to userland. */
2306 suword32_noerr(&rw->rwlock_readers, rwstate);
2307
2308 /* Wake the new lock holder(s) up. */
2309 tp = wakelist;
2310 while (tp != NULL) {
2311 DTRACE_SCHED1(wakeup, kthread_t *, tp);
2312 tp->t_wchan0 = NULL;
2313 tp->t_wchan = NULL;
2314 tp->t_sobj_ops = NULL;
2315 tp->t_writer |= TRW_LOCK_GRANTED;
2316 tpnext = tp->t_link;
2317 tp->t_link = NULL;
2318 CL_WAKEUP(tp);
2319 thread_unlock_high(tp);
2320 tp = tpnext;
2321 }
2322
2323 disp_lock_exit(&sqh->sq_lock);
2324 }
2325
2326 /*
2327 * We enter here holding the user-level mutex, which we must release before
2328 * returning or blocking. Based on lwp_cond_wait().
2329 */
2330 static int
lwp_rwlock_lock(lwp_rwlock_t * rw,timespec_t * tsp,int rd_wr)2331 lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr)
2332 {
2333 lwp_mutex_t *mp = NULL;
2334 kthread_t *t = curthread;
2335 kthread_t *tp;
2336 klwp_t *lwp = ttolwp(t);
2337 proc_t *p = ttoproc(t);
2338 lwp_timer_t lwpt;
2339 lwpchan_t lwpchan;
2340 lwpchan_t mlwpchan;
2341 caddr_t timedwait;
2342 volatile uint16_t type = 0;
2343 volatile uint8_t mtype = 0;
2344 uchar_t mwaiters;
2345 volatile int error = 0;
2346 int time_error;
2347 clock_t tim = -1;
2348 volatile int locked = 0;
2349 volatile int mlocked = 0;
2350 volatile int watched = 0;
2351 volatile int mwatched = 0;
2352 label_t ljb;
2353 volatile int no_lwpchan = 1;
2354 int imm_timeout = 0;
2355 int try_flag;
2356 uint32_t rwstate;
2357 int acquired = 0;
2358
2359 /* We only check rw because the mutex is included in it. */
2360 if ((caddr_t)rw >= p->p_as->a_userlimit)
2361 return (set_errno(EFAULT));
2362
2363 /*
2364 * Put the lwp in an orderly state for debugging,
2365 * in case we are stopped while sleeping, below.
2366 */
2367 prstop(PR_REQUESTED, 0);
2368
2369 /* We must only report this error if we are about to sleep (later). */
2370 timedwait = (caddr_t)tsp;
2371 if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
2372 lwpt.lwpt_imm_timeout) {
2373 imm_timeout = 1;
2374 timedwait = NULL;
2375 }
2376
2377 (void) new_mstate(t, LMS_USER_LOCK);
2378
2379 if (on_fault(&ljb)) {
2380 if (no_lwpchan) {
2381 error = EFAULT;
2382 goto out_nodrop;
2383 }
2384 if (mlocked) {
2385 mlocked = 0;
2386 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
2387 }
2388 if (locked) {
2389 locked = 0;
2390 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2391 }
2392 /*
2393 * Set up another on_fault() for a possible fault
2394 * on the user lock accessed at "out_drop".
2395 */
2396 if (on_fault(&ljb)) {
2397 if (mlocked) {
2398 mlocked = 0;
2399 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
2400 }
2401 error = EFAULT;
2402 goto out_nodrop;
2403 }
2404 error = EFAULT;
2405 goto out_nodrop;
2406 }
2407
2408 /* Process rd_wr (including sanity check). */
2409 try_flag = (rd_wr & TRY_FLAG);
2410 rd_wr &= ~TRY_FLAG;
2411 if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) {
2412 error = EINVAL;
2413 goto out_nodrop;
2414 }
2415
2416 /*
2417 * Force Copy-on-write if necessary and ensure that the
2418 * synchronization object resides in read/write memory.
2419 * Cause an EFAULT return now if this is not so.
2420 */
2421 mp = &rw->mutex;
2422 fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype);
2423 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type);
2424 suword8_noerr(&mp->mutex_type, mtype);
2425 suword16_noerr(&rw->rwlock_type, type);
2426
2427 /* We can only continue for simple USYNC_PROCESS locks. */
2428 if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) {
2429 error = EINVAL;
2430 goto out_nodrop;
2431 }
2432
2433 /* Convert user level mutex, "mp", to a unique lwpchan. */
2434 if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype,
2435 &mlwpchan, LWPCHAN_MPPOOL)) {
2436 error = EFAULT;
2437 goto out_nodrop;
2438 }
2439
2440 /* Convert user level rwlock, "rw", to a unique lwpchan. */
2441 if (!get_lwpchan(p->p_as, (caddr_t)rw, type,
2442 &lwpchan, LWPCHAN_CVPOOL)) {
2443 error = EFAULT;
2444 goto out_nodrop;
2445 }
2446
2447 no_lwpchan = 0;
2448 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2449 mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
2450
2451 /*
2452 * lwpchan_lock() ensures that the calling LWP is put to sleep
2453 * atomically with respect to a possible wakeup which is a result
2454 * of lwp_rwlock_unlock().
2455 *
2456 * What's misleading is that the LWP is put to sleep after the
2457 * rwlock's mutex is released. This is OK as long as the release
2458 * operation is also done while holding mlwpchan. The LWP is then
2459 * put to sleep when the possibility of pagefaulting or sleeping
2460 * has been completely eliminated.
2461 */
2462 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2463 locked = 1;
2464 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL);
2465 mlocked = 1;
2466
2467 /*
2468 * Fetch the current rwlock state.
2469 *
2470 * The possibility of spurious wake-ups or killed waiters means
2471 * rwstate's URW_HAS_WAITERS bit may indicate false positives.
2472 * We only fix these if they are important to us.
2473 *
2474 * Although various error states can be observed here (e.g. the lock
2475 * is not held, but there are waiters) we assume these are applicaton
2476 * errors and so we take no corrective action.
2477 */
2478 fuword32_noerr(&rw->rwlock_readers, &rwstate);
2479 /*
2480 * We cannot legitimately get here from user-level
2481 * without URW_HAS_WAITERS being set.
2482 * Set it now to guard against user-level error.
2483 */
2484 rwstate |= URW_HAS_WAITERS;
2485
2486 /*
2487 * We can try only if the lock isn't held by a writer.
2488 */
2489 if (!(rwstate & URW_WRITE_LOCKED)) {
2490 tp = lwp_queue_waiter(&lwpchan);
2491 if (tp == NULL) {
2492 /*
2493 * Hmmm, rwstate indicates waiters but there are
2494 * none queued. This could just be the result of a
2495 * spurious wakeup, so let's ignore it.
2496 *
2497 * We now have a chance to acquire the lock
2498 * uncontended, but this is the last chance for
2499 * a writer to acquire the lock without blocking.
2500 */
2501 if (rd_wr == READ_LOCK) {
2502 rwstate++;
2503 acquired = 1;
2504 } else if ((rwstate & URW_READERS_MASK) == 0) {
2505 rwstate |= URW_WRITE_LOCKED;
2506 acquired = 1;
2507 }
2508 } else if (rd_wr == READ_LOCK) {
2509 /*
2510 * This is the last chance for a reader to acquire
2511 * the lock now, but it can only do so if there is
2512 * no writer of equal or greater priority at the
2513 * head of the queue .
2514 *
2515 * It is also just possible that there is a reader
2516 * at the head of the queue. This may be the result
2517 * of a spurious wakeup or an application failure.
2518 * In this case we only acquire the lock if we have
2519 * equal or greater priority. It is not our job to
2520 * release spurious waiters.
2521 */
2522 pri_t our_pri = DISP_PRIO(t);
2523 pri_t his_pri = DISP_PRIO(tp);
2524
2525 if ((our_pri > his_pri) || ((our_pri == his_pri) &&
2526 !(tp->t_writer & TRW_WANT_WRITE))) {
2527 rwstate++;
2528 acquired = 1;
2529 }
2530 }
2531 }
2532
2533 if (acquired || try_flag || time_error) {
2534 /*
2535 * We're not going to block this time.
2536 */
2537 suword32_noerr(&rw->rwlock_readers, rwstate);
2538 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2539 locked = 0;
2540
2541 if (acquired) {
2542 /*
2543 * Got the lock!
2544 */
2545 error = 0;
2546
2547 } else if (try_flag) {
2548 /*
2549 * We didn't get the lock and we're about to block.
2550 * If we're doing a trylock, return EBUSY instead.
2551 */
2552 error = EBUSY;
2553
2554 } else if (time_error) {
2555 /*
2556 * The SUSV3 POSIX spec is very clear that we should
2557 * get no error from validating the timer (above)
2558 * until we would actually sleep.
2559 */
2560 error = time_error;
2561 }
2562
2563 goto out_drop;
2564 }
2565
2566 /*
2567 * We're about to block, so indicate what kind of waiter we are.
2568 */
2569 t->t_writer = 0;
2570 if (rd_wr == WRITE_LOCK)
2571 t->t_writer = TRW_WANT_WRITE;
2572 suword32_noerr(&rw->rwlock_readers, rwstate);
2573
2574 /*
2575 * Unlock the rwlock's mutex (pagefaults are possible here).
2576 */
2577 set_owner_pid(mp, 0, 0);
2578 ulock_clear(&mp->mutex_lockw);
2579 fuword8_noerr(&mp->mutex_waiters, &mwaiters);
2580 if (mwaiters != 0) {
2581 /*
2582 * Given the locking of mlwpchan around the release of
2583 * the mutex and checking for waiters, the following
2584 * call to lwp_release() can fail ONLY if the lock
2585 * acquirer is interrupted after setting the waiter bit,
2586 * calling lwp_block() and releasing mlwpchan.
2587 * In this case, it could get pulled off the LWP sleep
2588 * queue (via setrun()) before the following call to
2589 * lwp_release() occurs, and the lock requestor will
2590 * update the waiter bit correctly by re-evaluating it.
2591 */
2592 if (lwp_release(&mlwpchan, &mwaiters, 0))
2593 suword8_noerr(&mp->mutex_waiters, mwaiters);
2594 }
2595 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
2596 mlocked = 0;
2597 no_fault();
2598
2599 if (mwatched) {
2600 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
2601 mwatched = 0;
2602 }
2603 if (watched) {
2604 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2605 watched = 0;
2606 }
2607
2608 if (timedwait) {
2609 /*
2610 * If we successfully queue the timeout,
2611 * then don't drop t_delay_lock until
2612 * we are on the sleep queue (below).
2613 */
2614 mutex_enter(&t->t_delay_lock);
2615 if (lwp_timer_enqueue(&lwpt) != 0) {
2616 mutex_exit(&t->t_delay_lock);
2617 imm_timeout = 1;
2618 timedwait = NULL;
2619 }
2620 }
2621 t->t_flag |= T_WAITCVSEM;
2622 lwp_block(&lwpchan);
2623
2624 /*
2625 * Nothing should happen to cause the LWp to go to sleep until after
2626 * it returns from swtch().
2627 */
2628 if (timedwait)
2629 mutex_exit(&t->t_delay_lock);
2630 locked = 0;
2631 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2632 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout)
2633 setrun(t);
2634 swtch();
2635
2636 /*
2637 * We're back, but we need to work out why. Were we interrupted? Did
2638 * we timeout? Were we granted the lock?
2639 */
2640 error = EAGAIN;
2641 acquired = (t->t_writer & TRW_LOCK_GRANTED);
2642 t->t_writer = 0;
2643 t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
2644 if (timedwait)
2645 tim = lwp_timer_dequeue(&lwpt);
2646 if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t))
2647 error = EINTR;
2648 else if (imm_timeout || (timedwait && tim == -1))
2649 error = ETIME;
2650 lwp->lwp_asleep = 0;
2651 lwp->lwp_sysabort = 0;
2652 setallwatch();
2653
2654 /*
2655 * If we were granted the lock we don't care about EINTR or ETIME.
2656 */
2657 if (acquired)
2658 error = 0;
2659
2660 if (t->t_mstate == LMS_USER_LOCK)
2661 (void) new_mstate(t, LMS_SYSTEM);
2662
2663 if (error)
2664 return (set_errno(error));
2665 return (0);
2666
2667 out_drop:
2668 /*
2669 * Make sure that the user level lock is dropped before returning
2670 * to the caller.
2671 */
2672 if (!mlocked) {
2673 lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL);
2674 mlocked = 1;
2675 }
2676 set_owner_pid(mp, 0, 0);
2677 ulock_clear(&mp->mutex_lockw);
2678 fuword8_noerr(&mp->mutex_waiters, &mwaiters);
2679 if (mwaiters != 0) {
2680 /*
2681 * See comment above on lock clearing and lwp_release()
2682 * success/failure.
2683 */
2684 if (lwp_release(&mlwpchan, &mwaiters, 0))
2685 suword8_noerr(&mp->mutex_waiters, mwaiters);
2686 }
2687 lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
2688 mlocked = 0;
2689
2690 out_nodrop:
2691 no_fault();
2692 if (mwatched)
2693 watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
2694 if (watched)
2695 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2696 if (t->t_mstate == LMS_USER_LOCK)
2697 (void) new_mstate(t, LMS_SYSTEM);
2698 if (error)
2699 return (set_errno(error));
2700 return (0);
2701 }
2702
2703 /*
2704 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(),
2705 * we never drop the lock.
2706 */
2707 static int
lwp_rwlock_unlock(lwp_rwlock_t * rw)2708 lwp_rwlock_unlock(lwp_rwlock_t *rw)
2709 {
2710 kthread_t *t = curthread;
2711 proc_t *p = ttoproc(t);
2712 lwpchan_t lwpchan;
2713 volatile uint16_t type = 0;
2714 volatile int error = 0;
2715 volatile int locked = 0;
2716 volatile int watched = 0;
2717 label_t ljb;
2718 volatile int no_lwpchan = 1;
2719 uint32_t rwstate;
2720
2721 /* We only check rw because the mutex is included in it. */
2722 if ((caddr_t)rw >= p->p_as->a_userlimit)
2723 return (set_errno(EFAULT));
2724
2725 if (on_fault(&ljb)) {
2726 if (no_lwpchan) {
2727 error = EFAULT;
2728 goto out_nodrop;
2729 }
2730 if (locked) {
2731 locked = 0;
2732 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2733 }
2734 error = EFAULT;
2735 goto out_nodrop;
2736 }
2737
2738 /*
2739 * Force Copy-on-write if necessary and ensure that the
2740 * synchronization object resides in read/write memory.
2741 * Cause an EFAULT return now if this is not so.
2742 */
2743 fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type);
2744 suword16_noerr(&rw->rwlock_type, type);
2745
2746 /* We can only continue for simple USYNC_PROCESS locks. */
2747 if (type != USYNC_PROCESS) {
2748 error = EINVAL;
2749 goto out_nodrop;
2750 }
2751
2752 /* Convert user level rwlock, "rw", to a unique lwpchan. */
2753 if (!get_lwpchan(p->p_as, (caddr_t)rw, type,
2754 &lwpchan, LWPCHAN_CVPOOL)) {
2755 error = EFAULT;
2756 goto out_nodrop;
2757 }
2758
2759 no_lwpchan = 0;
2760 watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2761
2762 lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2763 locked = 1;
2764
2765 /*
2766 * We can resolve multiple readers (except the last reader) here.
2767 * For the last reader or a writer we need lwp_rwlock_release(),
2768 * to which we also delegate the task of copying the new rwstate
2769 * back to userland (see the comment there).
2770 */
2771 fuword32_noerr(&rw->rwlock_readers, &rwstate);
2772 if (rwstate & URW_WRITE_LOCKED)
2773 lwp_rwlock_release(&lwpchan, rw);
2774 else if ((rwstate & URW_READERS_MASK) > 0) {
2775 rwstate--;
2776 if ((rwstate & URW_READERS_MASK) == 0)
2777 lwp_rwlock_release(&lwpchan, rw);
2778 else
2779 suword32_noerr(&rw->rwlock_readers, rwstate);
2780 }
2781
2782 lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2783 locked = 0;
2784 error = 0;
2785
2786 out_nodrop:
2787 no_fault();
2788 if (watched)
2789 watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2790 if (error)
2791 return (set_errno(error));
2792 return (0);
2793 }
2794
2795 int
lwp_rwlock_sys(int subcode,lwp_rwlock_t * rwlp,timespec_t * tsp)2796 lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp)
2797 {
2798 switch (subcode) {
2799 case 0:
2800 return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK));
2801 case 1:
2802 return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK));
2803 case 2:
2804 return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY));
2805 case 3:
2806 return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY));
2807 case 4:
2808 return (lwp_rwlock_unlock(rwlp));
2809 }
2810 return (set_errno(EINVAL));
2811 }
2812
2813 /*
2814 * Return the owner of the user-level s-object.
2815 * Since we can't really do this, return NULL.
2816 */
2817 /* ARGSUSED */
2818 static kthread_t *
lwpsobj_owner(caddr_t sobj)2819 lwpsobj_owner(caddr_t sobj)
2820 {
2821 return ((kthread_t *)NULL);
2822 }
2823
2824 /*
2825 * Wake up a thread asleep on a user-level synchronization
2826 * object.
2827 */
2828 static void
lwp_unsleep(kthread_t * t)2829 lwp_unsleep(kthread_t *t)
2830 {
2831 ASSERT(THREAD_LOCK_HELD(t));
2832 if (t->t_wchan0 != NULL) {
2833 sleepq_head_t *sqh;
2834 sleepq_t *sqp = t->t_sleepq;
2835
2836 if (sqp != NULL) {
2837 sqh = lwpsqhash(&t->t_lwpchan);
2838 ASSERT(&sqh->sq_queue == sqp);
2839 sleepq_unsleep(t);
2840 disp_lock_exit_high(&sqh->sq_lock);
2841 CL_SETRUN(t);
2842 return;
2843 }
2844 }
2845 panic("lwp_unsleep: thread %p not on sleepq", (void *)t);
2846 }
2847
2848 /*
2849 * Change the priority of a thread asleep on a user-level
2850 * synchronization object. To maintain proper priority order,
2851 * we:
2852 * o dequeue the thread.
2853 * o change its priority.
2854 * o re-enqueue the thread.
2855 * Assumption: the thread is locked on entry.
2856 */
2857 static void
lwp_change_pri(kthread_t * t,pri_t pri,pri_t * t_prip)2858 lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip)
2859 {
2860 ASSERT(THREAD_LOCK_HELD(t));
2861 if (t->t_wchan0 != NULL) {
2862 sleepq_t *sqp = t->t_sleepq;
2863
2864 sleepq_dequeue(t);
2865 *t_prip = pri;
2866 sleepq_insert(sqp, t);
2867 } else
2868 panic("lwp_change_pri: %p not on a sleep queue", (void *)t);
2869 }
2870
2871 /*
2872 * Clean up a left-over process-shared robust mutex
2873 */
2874 static void
lwp_mutex_cleanup(lwpchan_entry_t * ent,uint16_t lockflg)2875 lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg)
2876 {
2877 uint16_t flag;
2878 uchar_t waiters;
2879 label_t ljb;
2880 pid_t owner_pid;
2881 lwp_mutex_t *lp;
2882 volatile int locked = 0;
2883 volatile int watched = 0;
2884 volatile struct upimutex *upimutex = NULL;
2885 volatile int upilocked = 0;
2886
2887 if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST))
2888 != (USYNC_PROCESS | LOCK_ROBUST))
2889 return;
2890
2891 lp = (lwp_mutex_t *)ent->lwpchan_addr;
2892 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
2893 if (on_fault(&ljb)) {
2894 if (locked)
2895 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
2896 if (upilocked)
2897 upimutex_unlock((upimutex_t *)upimutex, 0);
2898 goto out;
2899 }
2900
2901 fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid);
2902
2903 if (UPIMUTEX(ent->lwpchan_type)) {
2904 lwpchan_t lwpchan = ent->lwpchan_lwpchan;
2905 upib_t *upibp = &UPI_CHAIN(lwpchan);
2906
2907 if (owner_pid != curproc->p_pid)
2908 goto out;
2909 mutex_enter(&upibp->upib_lock);
2910 upimutex = upi_get(upibp, &lwpchan);
2911 if (upimutex == NULL || upimutex->upi_owner != curthread) {
2912 mutex_exit(&upibp->upib_lock);
2913 goto out;
2914 }
2915 mutex_exit(&upibp->upib_lock);
2916 upilocked = 1;
2917 flag = lwp_clear_mutex(lp, lockflg);
2918 suword8_noerr(&lp->mutex_lockw, 0);
2919 upimutex_unlock((upimutex_t *)upimutex, flag);
2920 } else {
2921 lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
2922 locked = 1;
2923 /*
2924 * Clear the spinners count because one of our
2925 * threads could have been spinning for this lock
2926 * at user level when the process was suddenly killed.
2927 * There is no harm in this since user-level libc code
2928 * will adapt to the sudden change in the spinner count.
2929 */
2930 suword8_noerr(&lp->mutex_spinners, 0);
2931 if (owner_pid != curproc->p_pid) {
2932 /*
2933 * We are not the owner. There may or may not be one.
2934 * If there are waiters, we wake up one or all of them.
2935 * It doesn't hurt to wake them up in error since
2936 * they will just retry the lock and go to sleep
2937 * again if necessary.
2938 */
2939 fuword8_noerr(&lp->mutex_waiters, &waiters);
2940 if (waiters != 0) { /* there are waiters */
2941 fuword16_noerr(&lp->mutex_flag, &flag);
2942 if (flag & LOCK_NOTRECOVERABLE) {
2943 lwp_release_all(&ent->lwpchan_lwpchan);
2944 suword8_noerr(&lp->mutex_waiters, 0);
2945 } else if (lwp_release(&ent->lwpchan_lwpchan,
2946 &waiters, 0)) {
2947 suword8_noerr(&lp->mutex_waiters,
2948 waiters);
2949 }
2950 }
2951 } else {
2952 /*
2953 * We are the owner. Release it.
2954 */
2955 (void) lwp_clear_mutex(lp, lockflg);
2956 ulock_clear(&lp->mutex_lockw);
2957 fuword8_noerr(&lp->mutex_waiters, &waiters);
2958 if (waiters &&
2959 lwp_release(&ent->lwpchan_lwpchan, &waiters, 0))
2960 suword8_noerr(&lp->mutex_waiters, waiters);
2961 }
2962 lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
2963 }
2964 out:
2965 no_fault();
2966 if (watched)
2967 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
2968 }
2969
2970 /*
2971 * Register a process-shared robust mutex in the lwpchan cache.
2972 */
2973 int
lwp_mutex_register(lwp_mutex_t * lp,caddr_t uaddr)2974 lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr)
2975 {
2976 int error = 0;
2977 volatile int watched;
2978 label_t ljb;
2979 uint8_t type;
2980 lwpchan_t lwpchan;
2981
2982 if ((caddr_t)lp >= (caddr_t)USERLIMIT)
2983 return (set_errno(EFAULT));
2984
2985 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
2986
2987 if (on_fault(&ljb)) {
2988 error = EFAULT;
2989 } else {
2990 /*
2991 * Force Copy-on-write if necessary and ensure that the
2992 * synchronization object resides in read/write memory.
2993 * Cause an EFAULT return now if this is not so.
2994 */
2995 fuword8_noerr(&lp->mutex_type, &type);
2996 suword8_noerr(&lp->mutex_type, type);
2997 if ((type & (USYNC_PROCESS|LOCK_ROBUST))
2998 != (USYNC_PROCESS|LOCK_ROBUST)) {
2999 error = EINVAL;
3000 } else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp,
3001 uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) {
3002 error = EFAULT;
3003 }
3004 }
3005 no_fault();
3006 if (watched)
3007 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
3008 if (error)
3009 return (set_errno(error));
3010 return (0);
3011 }
3012
3013 /*
3014 * There is a user-level robust lock registration in libc.
3015 * Mark it as invalid by storing -1 into the location of the pointer.
3016 */
3017 static void
lwp_mutex_unregister(void * uaddr)3018 lwp_mutex_unregister(void *uaddr)
3019 {
3020 if (get_udatamodel() == DATAMODEL_NATIVE) {
3021 (void) sulword(uaddr, (ulong_t)-1);
3022 #ifdef _SYSCALL32_IMPL
3023 } else {
3024 (void) suword32(uaddr, (uint32_t)-1);
3025 #endif
3026 }
3027 }
3028
3029 int
lwp_mutex_trylock(lwp_mutex_t * lp,uintptr_t owner)3030 lwp_mutex_trylock(lwp_mutex_t *lp, uintptr_t owner)
3031 {
3032 kthread_t *t = curthread;
3033 proc_t *p = ttoproc(t);
3034 int error = 0;
3035 volatile int locked = 0;
3036 volatile int watched = 0;
3037 label_t ljb;
3038 volatile uint8_t type = 0;
3039 uint16_t flag;
3040 lwpchan_t lwpchan;
3041
3042 if ((caddr_t)lp >= p->p_as->a_userlimit)
3043 return (set_errno(EFAULT));
3044
3045 (void) new_mstate(t, LMS_USER_LOCK);
3046
3047 if (on_fault(&ljb)) {
3048 if (locked)
3049 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
3050 error = EFAULT;
3051 goto out;
3052 }
3053 /*
3054 * Force Copy-on-write if necessary and ensure that the
3055 * synchronization object resides in read/write memory.
3056 * Cause an EFAULT return now if this is not so.
3057 */
3058 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
3059 suword8_noerr(&lp->mutex_type, type);
3060 if (UPIMUTEX(type)) {
3061 no_fault();
3062 error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL);
3063 if (error == 0 || error == EOWNERDEAD ||
3064 error == ELOCKUNMAPPED) {
3065 volatile int locked = error != 0;
3066 if (on_fault(&ljb)) {
3067 if (locked != 0)
3068 error = lwp_upimutex_unlock(lp, type);
3069 else
3070 error = EFAULT;
3071 goto upierr;
3072 }
3073 set_owner_pid(lp, owner,
3074 (type & USYNC_PROCESS)? p->p_pid : 0);
3075 no_fault();
3076 }
3077
3078 upierr:
3079 if (error)
3080 return (set_errno(error));
3081 return (0);
3082 }
3083 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
3084 &lwpchan, LWPCHAN_MPPOOL)) {
3085 error = EFAULT;
3086 goto out;
3087 }
3088 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
3089 locked = 1;
3090 if (type & LOCK_ROBUST) {
3091 fuword16_noerr(&lp->mutex_flag, &flag);
3092 if (flag & LOCK_NOTRECOVERABLE) {
3093 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
3094 error = ENOTRECOVERABLE;
3095 goto out;
3096 }
3097 }
3098
3099 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
3100
3101 if (!ulock_try(&lp->mutex_lockw))
3102 error = EBUSY;
3103 else {
3104 set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0);
3105 if (type & LOCK_ROBUST) {
3106 fuword16_noerr(&lp->mutex_flag, &flag);
3107 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
3108 if (flag & LOCK_OWNERDEAD)
3109 error = EOWNERDEAD;
3110 else if (type & USYNC_PROCESS_ROBUST)
3111 error = ELOCKUNMAPPED;
3112 else
3113 error = EOWNERDEAD;
3114 }
3115 }
3116 }
3117 locked = 0;
3118 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
3119 out:
3120
3121 if (t->t_mstate == LMS_USER_LOCK)
3122 (void) new_mstate(t, LMS_SYSTEM);
3123
3124 no_fault();
3125 if (watched)
3126 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
3127 if (error)
3128 return (set_errno(error));
3129 return (0);
3130 }
3131
3132 /*
3133 * unlock the mutex and unblock lwps that is trying to acquire this mutex.
3134 * the blocked lwp resumes and retries to acquire the lock.
3135 */
3136 int
lwp_mutex_unlock(lwp_mutex_t * lp)3137 lwp_mutex_unlock(lwp_mutex_t *lp)
3138 {
3139 proc_t *p = ttoproc(curthread);
3140 lwpchan_t lwpchan;
3141 uchar_t waiters;
3142 volatile int locked = 0;
3143 volatile int watched = 0;
3144 volatile uint8_t type = 0;
3145 label_t ljb;
3146 uint16_t flag;
3147 int error = 0;
3148
3149 if ((caddr_t)lp >= p->p_as->a_userlimit)
3150 return (set_errno(EFAULT));
3151
3152 if (on_fault(&ljb)) {
3153 if (locked)
3154 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
3155 error = EFAULT;
3156 goto out;
3157 }
3158
3159 /*
3160 * Force Copy-on-write if necessary and ensure that the
3161 * synchronization object resides in read/write memory.
3162 * Cause an EFAULT return now if this is not so.
3163 */
3164 fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
3165 suword8_noerr(&lp->mutex_type, type);
3166
3167 if (UPIMUTEX(type)) {
3168 no_fault();
3169 error = lwp_upimutex_unlock(lp, type);
3170 if (error)
3171 return (set_errno(error));
3172 return (0);
3173 }
3174
3175 watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
3176
3177 if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
3178 &lwpchan, LWPCHAN_MPPOOL)) {
3179 error = EFAULT;
3180 goto out;
3181 }
3182 lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
3183 locked = 1;
3184 if (type & LOCK_ROBUST) {
3185 fuword16_noerr(&lp->mutex_flag, &flag);
3186 if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
3187 flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
3188 flag |= LOCK_NOTRECOVERABLE;
3189 suword16_noerr(&lp->mutex_flag, flag);
3190 }
3191 }
3192 set_owner_pid(lp, 0, 0);
3193 ulock_clear(&lp->mutex_lockw);
3194 /*
3195 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
3196 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
3197 * may fail. If it fails, do not write into the waiter bit.
3198 * The call to lwp_release() might fail due to one of three reasons:
3199 *
3200 * 1. due to the thread which set the waiter bit not actually
3201 * sleeping since it got the lock on the re-try. The waiter
3202 * bit will then be correctly updated by that thread. This
3203 * window may be closed by reading the wait bit again here
3204 * and not calling lwp_release() at all if it is zero.
3205 * 2. the thread which set the waiter bit and went to sleep
3206 * was woken up by a signal. This time, the waiter recomputes
3207 * the wait bit in the return with EINTR code.
3208 * 3. the waiter bit read by lwp_mutex_wakeup() was in
3209 * memory that has been re-used after the lock was dropped.
3210 * In this case, writing into the waiter bit would cause data
3211 * corruption.
3212 */
3213 fuword8_noerr(&lp->mutex_waiters, &waiters);
3214 if (waiters) {
3215 if ((type & LOCK_ROBUST) &&
3216 (flag & LOCK_NOTRECOVERABLE)) {
3217 lwp_release_all(&lwpchan);
3218 suword8_noerr(&lp->mutex_waiters, 0);
3219 } else if (lwp_release(&lwpchan, &waiters, 0)) {
3220 suword8_noerr(&lp->mutex_waiters, waiters);
3221 }
3222 }
3223
3224 lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
3225 out:
3226 no_fault();
3227 if (watched)
3228 watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
3229 if (error)
3230 return (set_errno(error));
3231 return (0);
3232 }
3233