xref: /illumos-gate/usr/src/uts/common/os/timer.c (revision c21bd51d7acbaf77116c4cc3a23dfc6d16c637c2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2020 Joyent, Inc.
29  */
30 
31 #include <sys/timer.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/param.h>
35 #include <sys/kmem.h>
36 #include <sys/debug.h>
37 #include <sys/policy.h>
38 #include <sys/port_impl.h>
39 #include <sys/port_kernel.h>
40 #include <sys/contract/process_impl.h>
41 
42 static kmem_cache_t *clock_timer_cache;
43 static clock_backend_t *clock_backend[CLOCK_MAX];
44 static int timer_port_callback(void *, int *, pid_t, int, void *);
45 static void timer_close_port(void *, int, pid_t, int);
46 
47 #define	CLOCK_BACKEND(clk) \
48 	((clk) < CLOCK_MAX && (clk) >= 0 ? clock_backend[(clk)] : NULL)
49 
50 /*
51  * Tunable to increase the maximum number of POSIX timers per-process.  This
52  * may _only_ be tuned in /etc/system or by patching the kernel binary; it
53  * _cannot_ be tuned on a running system.
54  */
55 int timer_max = _TIMER_MAX;
56 
57 /*
58  * timer_lock() locks the specified interval timer.  It doesn't look at the
59  * ITLK_REMOVE bit; it's up to callers to look at this if they need to
60  * care.  p_lock must be held on entry; it may be dropped and reaquired,
61  * but timer_lock() will always return with p_lock held.
62  *
63  * Note that timer_create() doesn't call timer_lock(); it creates timers
64  * with the ITLK_LOCKED bit explictly set.
65  */
66 static void
67 timer_lock(proc_t *p, itimer_t *it)
68 {
69 	ASSERT(MUTEX_HELD(&p->p_lock));
70 
71 	while (it->it_lock & ITLK_LOCKED) {
72 		it->it_blockers++;
73 		cv_wait(&it->it_cv, &p->p_lock);
74 		it->it_blockers--;
75 	}
76 
77 	it->it_lock |= ITLK_LOCKED;
78 }
79 
80 /*
81  * timer_unlock() unlocks the specified interval timer, waking up any
82  * waiters.  p_lock must be held on entry; it will not be dropped by
83  * timer_unlock().
84  */
85 static void
86 timer_unlock(proc_t *p, itimer_t *it)
87 {
88 	ASSERT(MUTEX_HELD(&p->p_lock));
89 	ASSERT(it->it_lock & ITLK_LOCKED);
90 	it->it_lock &= ~ITLK_LOCKED;
91 	cv_signal(&it->it_cv);
92 }
93 
94 /*
95  * timer_delete_locked() takes a proc pointer, timer ID and locked interval
96  * timer, and deletes the specified timer.  It must be called with p_lock
97  * held, and cannot be called on a timer which already has ITLK_REMOVE set;
98  * the caller must check this.  timer_delete_locked() will set the ITLK_REMOVE
99  * bit and will iteratively unlock and lock the interval timer until all
100  * blockers have seen the ITLK_REMOVE and cleared out.  It will then zero
101  * out the specified entry in the p_itimer array, and call into the clock
102  * backend to complete the deletion.
103  *
104  * This function will always return with p_lock held.
105  */
106 static void
107 timer_delete_locked(proc_t *p, timer_t tid, itimer_t *it)
108 {
109 	ASSERT(MUTEX_HELD(&p->p_lock));
110 	ASSERT(!(it->it_lock & ITLK_REMOVE));
111 	ASSERT(it->it_lock & ITLK_LOCKED);
112 
113 	it->it_lock |= ITLK_REMOVE;
114 
115 	/*
116 	 * If there are threads waiting to lock this timer, we'll unlock
117 	 * the timer, and block on the cv.  Threads blocking our removal will
118 	 * have the opportunity to run; when they see the ITLK_REMOVE flag
119 	 * set, they will immediately unlock the timer.
120 	 */
121 	while (it->it_blockers) {
122 		timer_unlock(p, it);
123 		cv_wait(&it->it_cv, &p->p_lock);
124 		timer_lock(p, it);
125 	}
126 
127 	ASSERT(p->p_itimer_sz > tid);
128 	ASSERT(p->p_itimer[tid] == it);
129 	p->p_itimer[tid] = NULL;
130 
131 	/*
132 	 * No one is blocked on this timer, and no one will be (we've set
133 	 * p_itimer[tid] to be NULL; no one can find it).  Now we call into
134 	 * the clock backend to delete the timer; it is up to the backend to
135 	 * guarantee that timer_fire() has completed (and will never again
136 	 * be called) for this timer.
137 	 */
138 	mutex_exit(&p->p_lock);
139 
140 	it->it_backend->clk_timer_delete(it);
141 
142 	if (it->it_portev) {
143 		mutex_enter(&it->it_mutex);
144 		if (it->it_portev) {
145 			port_kevent_t	*pev;
146 			/* dissociate timer from the event port */
147 			(void) port_dissociate_ksource(it->it_portfd,
148 			    PORT_SOURCE_TIMER, (port_source_t *)it->it_portsrc);
149 			pev = (port_kevent_t *)it->it_portev;
150 			it->it_portev = NULL;
151 			it->it_flags &= ~IT_PORT;
152 			it->it_pending = 0;
153 			mutex_exit(&it->it_mutex);
154 			(void) port_remove_done_event(pev);
155 			port_free_event(pev);
156 		} else {
157 			mutex_exit(&it->it_mutex);
158 		}
159 	}
160 
161 	mutex_enter(&p->p_lock);
162 
163 	/*
164 	 * We need to be careful freeing the sigqueue for this timer;
165 	 * if a signal is pending, the sigqueue needs to be freed
166 	 * synchronously in siginfofree().  The need to free the sigqueue
167 	 * in siginfofree() is indicated by setting sq_func to NULL.
168 	 */
169 	if (it->it_pending > 0) {
170 		it->it_sigq->sq_func = NULL;
171 	} else {
172 		kmem_free(it->it_sigq, sizeof (sigqueue_t));
173 	}
174 
175 	ASSERT(it->it_blockers == 0);
176 	kmem_cache_free(clock_timer_cache, it);
177 }
178 
179 /*
180  * timer_grab() and its companion routine, timer_release(), are wrappers
181  * around timer_lock()/_unlock() which allow the timer_*(3R) routines to
182  * (a) share error handling code and (b) not grab p_lock themselves.  Routines
183  * which are called with p_lock held (e.g. timer_lwpbind(), timer_lwpexit())
184  * must call timer_lock()/_unlock() explictly.
185  *
186  * timer_grab() takes a proc and a timer ID, and returns a pointer to a
187  * locked interval timer.  p_lock must _not_ be held on entry; timer_grab()
188  * may acquire p_lock, but will always return with p_lock dropped.
189  *
190  * If timer_grab() fails, it will return NULL.  timer_grab() will fail if
191  * one or more of the following is true:
192  *
193  *  (a)	The specified timer ID is out of range.
194  *
195  *  (b)	The specified timer ID does not correspond to a timer ID returned
196  *	from timer_create(3R).
197  *
198  *  (c)	The specified timer ID is currently being removed.
199  *
200  */
201 static itimer_t *
202 timer_grab(proc_t *p, timer_t tid)
203 {
204 	itimer_t **itp, *it;
205 
206 	if (tid < 0) {
207 		return (NULL);
208 	}
209 
210 	mutex_enter(&p->p_lock);
211 
212 	if ((itp = p->p_itimer) == NULL || tid >= p->p_itimer_sz ||
213 	    (it = itp[tid]) == NULL) {
214 		mutex_exit(&p->p_lock);
215 		return (NULL);
216 	}
217 
218 	timer_lock(p, it);
219 
220 	if (it->it_lock & ITLK_REMOVE) {
221 		/*
222 		 * Someone is removing this timer; it will soon be invalid.
223 		 */
224 		timer_unlock(p, it);
225 		mutex_exit(&p->p_lock);
226 		return (NULL);
227 	}
228 
229 	mutex_exit(&p->p_lock);
230 
231 	return (it);
232 }
233 
234 /*
235  * timer_release() releases a timer acquired with timer_grab().  p_lock
236  * should not be held on entry; timer_release() will acquire p_lock but
237  * will drop it before returning.
238  */
239 static void
240 timer_release(proc_t *p, itimer_t *it)
241 {
242 	mutex_enter(&p->p_lock);
243 	timer_unlock(p, it);
244 	mutex_exit(&p->p_lock);
245 }
246 
247 /*
248  * timer_delete_grabbed() deletes a timer acquired with timer_grab().
249  * p_lock should not be held on entry; timer_delete_grabbed() will acquire
250  * p_lock, but will drop it before returning.
251  */
252 static void
253 timer_delete_grabbed(proc_t *p, timer_t tid, itimer_t *it)
254 {
255 	mutex_enter(&p->p_lock);
256 	timer_delete_locked(p, tid, it);
257 	mutex_exit(&p->p_lock);
258 }
259 
260 void
261 clock_timer_init()
262 {
263 	clock_timer_cache = kmem_cache_create("timer_cache",
264 	    sizeof (itimer_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
265 
266 	/*
267 	 * Push the timer_max limit up to at least 4 * NCPU.  Due to the way
268 	 * NCPU is defined, proper initialization of the timer limit is
269 	 * performed at runtime.
270 	 */
271 	timer_max = MAX(NCPU * 4, timer_max);
272 }
273 
274 void
275 clock_add_backend(clockid_t clock, clock_backend_t *backend)
276 {
277 	ASSERT(clock >= 0 && clock < CLOCK_MAX);
278 	ASSERT(clock_backend[clock] == NULL);
279 
280 	clock_backend[clock] = backend;
281 }
282 
283 clock_backend_t *
284 clock_get_backend(clockid_t clock)
285 {
286 	if (clock < 0 || clock >= CLOCK_MAX)
287 		return (NULL);
288 
289 	return (clock_backend[clock]);
290 }
291 
292 int
293 clock_settime(clockid_t clock, timespec_t *tp)
294 {
295 	timespec_t t;
296 	clock_backend_t *backend;
297 	int error;
298 
299 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
300 		return (set_errno(EINVAL));
301 
302 	if (secpolicy_settime(CRED()) != 0)
303 		return (set_errno(EPERM));
304 
305 	if (get_udatamodel() == DATAMODEL_NATIVE) {
306 		if (copyin(tp, &t, sizeof (timespec_t)) != 0)
307 			return (set_errno(EFAULT));
308 	} else {
309 		timespec32_t t32;
310 
311 		if (copyin(tp, &t32, sizeof (timespec32_t)) != 0)
312 			return (set_errno(EFAULT));
313 
314 		TIMESPEC32_TO_TIMESPEC(&t, &t32);
315 	}
316 
317 	if (itimerspecfix(&t))
318 		return (set_errno(EINVAL));
319 
320 	error = backend->clk_clock_settime(&t);
321 
322 	if (error)
323 		return (set_errno(error));
324 
325 	return (0);
326 }
327 
328 int
329 clock_gettime(clockid_t clock, timespec_t *tp)
330 {
331 	timespec_t t;
332 	clock_backend_t *backend;
333 	int error;
334 
335 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
336 		return (set_errno(EINVAL));
337 
338 	error = backend->clk_clock_gettime(&t);
339 
340 	if (error)
341 		return (set_errno(error));
342 
343 	if (get_udatamodel() == DATAMODEL_NATIVE) {
344 		if (copyout(&t, tp, sizeof (timespec_t)) != 0)
345 			return (set_errno(EFAULT));
346 	} else {
347 		timespec32_t t32;
348 
349 		if (TIMESPEC_OVERFLOW(&t))
350 			return (set_errno(EOVERFLOW));
351 		TIMESPEC_TO_TIMESPEC32(&t32, &t);
352 
353 		if (copyout(&t32, tp, sizeof (timespec32_t)) != 0)
354 			return (set_errno(EFAULT));
355 	}
356 
357 	return (0);
358 }
359 
360 int
361 clock_getres(clockid_t clock, timespec_t *tp)
362 {
363 	timespec_t t;
364 	clock_backend_t *backend;
365 	int error;
366 
367 	/*
368 	 * Strangely, the standard defines clock_getres() with a NULL tp
369 	 * to do nothing (regardless of the validity of the specified
370 	 * clock_id).  Go figure.
371 	 */
372 	if (tp == NULL)
373 		return (0);
374 
375 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
376 		return (set_errno(EINVAL));
377 
378 	error = backend->clk_clock_getres(&t);
379 
380 	if (error)
381 		return (set_errno(error));
382 
383 	if (get_udatamodel() == DATAMODEL_NATIVE) {
384 		if (copyout(&t, tp, sizeof (timespec_t)) != 0)
385 			return (set_errno(EFAULT));
386 	} else {
387 		timespec32_t t32;
388 
389 		if (TIMESPEC_OVERFLOW(&t))
390 			return (set_errno(EOVERFLOW));
391 		TIMESPEC_TO_TIMESPEC32(&t32, &t);
392 
393 		if (copyout(&t32, tp, sizeof (timespec32_t)) != 0)
394 			return (set_errno(EFAULT));
395 	}
396 
397 	return (0);
398 }
399 
400 void
401 timer_signal(sigqueue_t *sigq)
402 {
403 	itimer_t *it = (itimer_t *)sigq->sq_backptr;
404 
405 	/*
406 	 * There are some conditions during a fork or an exit when we can
407 	 * call siginfofree() without p_lock held.  To prevent a race
408 	 * between timer_signal() and timer_fire() with regard to it_pending,
409 	 * we therefore acquire it_mutex in both paths.
410 	 */
411 	mutex_enter(&it->it_mutex);
412 	ASSERT(it->it_pending > 0);
413 	it->it_overrun = it->it_pending - 1;
414 	it->it_pending = 0;
415 	mutex_exit(&it->it_mutex);
416 }
417 
418 /*
419  * This routine is called from the clock backend.
420  */
421 static void
422 timer_fire(itimer_t *it)
423 {
424 	proc_t *p = NULL;
425 	int proc_lock_held;
426 
427 	if (it->it_flags & IT_SIGNAL) {
428 		/*
429 		 * See the comment in timer_signal() for why it is not
430 		 * sufficient to only grab p_lock here. Because p_lock can be
431 		 * held on entry to timer_signal(), the lock ordering is
432 		 * necessarily p_lock before it_mutex.
433 		 */
434 
435 		p = it->it_proc;
436 		proc_lock_held = 1;
437 		mutex_enter(&p->p_lock);
438 	} else {
439 		/*
440 		 * IT_PORT:
441 		 * If a timer was ever programmed to send events to a port,
442 		 * the IT_PORT flag will remain set until:
443 		 * a) the timer is deleted (see timer_delete_locked()) or
444 		 * b) the port is being closed (see timer_close_port()).
445 		 * Both cases are synchronized with the it_mutex.
446 		 * We don't need to use the p_lock because it is only
447 		 * required in the IT_SIGNAL case.
448 		 * If IT_PORT was set and the port is being closed then
449 		 * the timer notification is set to NONE. In such a case
450 		 * the timer itself and the it_pending counter remain active
451 		 * until the application deletes the counter or the process
452 		 * exits.
453 		 */
454 		proc_lock_held = 0;
455 	}
456 	mutex_enter(&it->it_mutex);
457 
458 	if (it->it_pending > 0) {
459 		if (it->it_pending < INT_MAX)
460 			it->it_pending++;
461 		mutex_exit(&it->it_mutex);
462 	} else {
463 		if (it->it_flags & IT_PORT) {
464 			it->it_pending = 1;
465 			port_send_event((port_kevent_t *)it->it_portev);
466 			mutex_exit(&it->it_mutex);
467 		} else if (it->it_flags & IT_SIGNAL) {
468 			it->it_pending = 1;
469 			mutex_exit(&it->it_mutex);
470 			sigaddqa(p, NULL, it->it_sigq);
471 		} else {
472 			mutex_exit(&it->it_mutex);
473 		}
474 	}
475 
476 	if (proc_lock_held)
477 		mutex_exit(&p->p_lock);
478 }
479 
480 /*
481  * Find an unused (i.e. NULL) entry in p->p_itimer and set *id to the
482  * index of the unused entry, growing p->p_itimer as necessary (up to timer_max
483  * entries). Returns B_TRUE (with *id set) on success, B_FALSE on failure
484  * (e.g. the process already has the maximum number of allowed timers
485  * allocated).
486  */
487 static boolean_t
488 timer_get_id(proc_t *p, timer_t *id)
489 {
490 	itimer_t **itp = NULL, **itp_new;
491 	uint_t target_sz;
492 	uint_t i;
493 
494 	ASSERT(MUTEX_HELD(&p->p_lock));
495 
496 	if (p->p_itimer == NULL) {
497 		/*
498 		 * No timers have been allocated for this process, allocate
499 		 * the initial array.
500 		 */
501 		ASSERT0(p->p_itimer_sz);
502 		target_sz = _TIMER_ALLOC_INIT;
503 
504 		mutex_exit(&p->p_lock);
505 		itp_new = kmem_zalloc(target_sz * sizeof (itimer_t *),
506 		    KM_SLEEP);
507 		mutex_enter(&p->p_lock);
508 
509 		if (p->p_itimer == NULL) {
510 			/*
511 			 * As long as no other thread beat us to allocating
512 			 * the initial p_itimer array, use what we allocated.
513 			 * Since we just allocated it, we know slot 0 is
514 			 * free.
515 			 */
516 			p->p_itimer = itp_new;
517 			p->p_itimer_sz = target_sz;
518 			i = 0;
519 			goto done;
520 		}
521 
522 		/*
523 		 * Another thread beat us to allocating the initial array.
524 		 * Proceed to searching for an empty slot and growing the
525 		 * array if needed.
526 		 */
527 		kmem_free(itp_new, target_sz * sizeof (itimer_t *));
528 	}
529 
530 retry:
531 	/* Use the first empty slot (if any exist) */
532 	for (i = 0; i < p->p_itimer_sz; i++) {
533 		if (p->p_itimer[i] == NULL) {
534 			goto done;
535 		}
536 	}
537 
538 	/* No empty slots, try to grow p->p_itimer and retry */
539 	target_sz = p->p_itimer_sz * 2;
540 	if (target_sz > timer_max || target_sz > INT_MAX ||
541 	    target_sz < p->p_itimer_sz) {
542 		/* Protect against exceeding the max or overflow */
543 		return (B_FALSE);
544 	}
545 
546 	mutex_exit(&p->p_lock);
547 	itp_new = kmem_zalloc(target_sz * sizeof (itimer_t *), KM_SLEEP);
548 	mutex_enter(&p->p_lock);
549 
550 	if (target_sz <= p->p_itimer_sz) {
551 		/*
552 		 * A racing thread performed the resize while we were
553 		 * waiting outside p_lock.  Discard our now-useless
554 		 * allocation and retry.
555 		 */
556 		kmem_free(itp_new, target_sz * sizeof (itimer_t *));
557 		goto retry;
558 	}
559 
560 	ASSERT3P(p->p_itimer, !=, NULL);
561 	bcopy(p->p_itimer, itp_new, p->p_itimer_sz * sizeof (itimer_t *));
562 	kmem_free(p->p_itimer, p->p_itimer_sz * sizeof (itimer_t *));
563 
564 	/*
565 	 * Short circuit to use the first free entry in the new allocation.
566 	 * It's possible that other lower-indexed timers were freed while
567 	 * p_lock was dropped, but skipping over them is not harmful at all.
568 	 * In the common case, we skip the need to walk over an array filled
569 	 * with timers before arriving at the slot we know is fresh from the
570 	 * allocation.
571 	 */
572 	i = p->p_itimer_sz;
573 
574 	p->p_itimer = itp_new;
575 	p->p_itimer_sz = target_sz;
576 
577 done:
578 	ASSERT3U(i, <=, INT_MAX);
579 	*id = (timer_t)i;
580 	return (B_TRUE);
581 }
582 
583 int
584 timer_create(clockid_t clock, struct sigevent *evp, timer_t *tid)
585 {
586 	struct sigevent ev;
587 	proc_t *p = curproc;
588 	clock_backend_t *backend;
589 	itimer_t *it;
590 	sigqueue_t *sigq;
591 	cred_t *cr = CRED();
592 	int error = 0;
593 	timer_t i;
594 	port_notify_t tim_pnevp;
595 	port_kevent_t *pkevp = NULL;
596 
597 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
598 		return (set_errno(EINVAL));
599 
600 	if (evp != NULL) {
601 		/*
602 		 * short copyin() for binary compatibility
603 		 * fetch oldsigevent to determine how much to copy in.
604 		 */
605 		if (get_udatamodel() == DATAMODEL_NATIVE) {
606 			if (copyin(evp, &ev, sizeof (struct oldsigevent)))
607 				return (set_errno(EFAULT));
608 
609 			if (ev.sigev_notify == SIGEV_PORT ||
610 			    ev.sigev_notify == SIGEV_THREAD) {
611 				if (copyin(ev.sigev_value.sival_ptr, &tim_pnevp,
612 				    sizeof (port_notify_t)))
613 					return (set_errno(EFAULT));
614 			}
615 #ifdef	_SYSCALL32_IMPL
616 		} else {
617 			struct sigevent32 ev32;
618 			port_notify32_t tim_pnevp32;
619 
620 			if (copyin(evp, &ev32, sizeof (struct oldsigevent32)))
621 				return (set_errno(EFAULT));
622 			ev.sigev_notify = ev32.sigev_notify;
623 			ev.sigev_signo = ev32.sigev_signo;
624 			/*
625 			 * See comment in sigqueue32() on handling of 32-bit
626 			 * sigvals in a 64-bit kernel.
627 			 */
628 			ev.sigev_value.sival_int = ev32.sigev_value.sival_int;
629 			if (ev.sigev_notify == SIGEV_PORT ||
630 			    ev.sigev_notify == SIGEV_THREAD) {
631 				if (copyin((void *)(uintptr_t)
632 				    ev32.sigev_value.sival_ptr,
633 				    (void *)&tim_pnevp32,
634 				    sizeof (port_notify32_t)))
635 					return (set_errno(EFAULT));
636 				tim_pnevp.portnfy_port =
637 				    tim_pnevp32.portnfy_port;
638 				tim_pnevp.portnfy_user =
639 				    (void *)(uintptr_t)tim_pnevp32.portnfy_user;
640 			}
641 #endif
642 		}
643 		switch (ev.sigev_notify) {
644 		case SIGEV_NONE:
645 			break;
646 		case SIGEV_SIGNAL:
647 			if (ev.sigev_signo < 1 || ev.sigev_signo >= NSIG)
648 				return (set_errno(EINVAL));
649 			break;
650 		case SIGEV_THREAD:
651 		case SIGEV_PORT:
652 			break;
653 		default:
654 			return (set_errno(EINVAL));
655 		}
656 	} else {
657 		/*
658 		 * Use the clock's default sigevent (this is a structure copy).
659 		 */
660 		ev = backend->clk_default;
661 	}
662 
663 	/*
664 	 * We'll allocate our sigqueue now, before we grab p_lock.
665 	 * If we can't find an empty slot, we'll free it before returning.
666 	 */
667 	sigq = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
668 
669 	/*
670 	 * Allocate a timer and choose a slot for it.
671 	 */
672 	it = kmem_cache_alloc(clock_timer_cache, KM_SLEEP);
673 	bzero(it, sizeof (*it));
674 	mutex_init(&it->it_mutex, NULL, MUTEX_DEFAULT, NULL);
675 
676 	mutex_enter(&p->p_lock);
677 	if (!timer_get_id(p, &i)) {
678 		mutex_exit(&p->p_lock);
679 		kmem_cache_free(clock_timer_cache, it);
680 		kmem_free(sigq, sizeof (sigqueue_t));
681 		return (set_errno(EAGAIN));
682 	}
683 
684 	ASSERT(i < p->p_itimer_sz && p->p_itimer[i] == NULL);
685 
686 	/*
687 	 * If we develop other notification mechanisms, this will need
688 	 * to call into (yet another) backend.
689 	 */
690 	sigq->sq_info.si_signo = ev.sigev_signo;
691 	if (evp == NULL)
692 		sigq->sq_info.si_value.sival_int = i;
693 	else
694 		sigq->sq_info.si_value = ev.sigev_value;
695 	sigq->sq_info.si_code = SI_TIMER;
696 	sigq->sq_info.si_pid = p->p_pid;
697 	sigq->sq_info.si_ctid = PRCTID(p);
698 	sigq->sq_info.si_zoneid = getzoneid();
699 	sigq->sq_info.si_uid = crgetruid(cr);
700 	sigq->sq_func = timer_signal;
701 	sigq->sq_next = NULL;
702 	sigq->sq_backptr = it;
703 	it->it_sigq = sigq;
704 	it->it_backend = backend;
705 	it->it_lock = ITLK_LOCKED;
706 
707 	if (ev.sigev_notify == SIGEV_THREAD ||
708 	    ev.sigev_notify == SIGEV_PORT) {
709 		int port;
710 
711 		/*
712 		 * This timer is programmed to use event port notification when
713 		 * the timer fires:
714 		 * - allocate a port event structure and prepare it to be sent
715 		 *   to the port as soon as the timer fires.
716 		 * - when the timer fires :
717 		 *   - if event structure was already sent to the port then this
718 		 *	is a timer fire overflow => increment overflow counter.
719 		 *   - otherwise send pre-allocated event structure to the port.
720 		 * - the events field of the port_event_t structure counts the
721 		 *   number of timer fired events.
722 		 * - The event structured is allocated using the
723 		 *   PORT_ALLOC_CACHED flag.
724 		 *   This flag indicates that the timer itself will manage and
725 		 *   free the event structure when required.
726 		 */
727 
728 		it->it_flags |= IT_PORT;
729 		port = tim_pnevp.portnfy_port;
730 
731 		/* associate timer as event source with the port */
732 		error = port_associate_ksource(port, PORT_SOURCE_TIMER,
733 		    (port_source_t **)&it->it_portsrc, timer_close_port,
734 		    (void *)it, NULL);
735 		if (error) {
736 			mutex_exit(&p->p_lock);
737 			kmem_cache_free(clock_timer_cache, it);
738 			kmem_free(sigq, sizeof (sigqueue_t));
739 			return (set_errno(error));
740 		}
741 
742 		/* allocate an event structure/slot */
743 		error = port_alloc_event(port, PORT_ALLOC_SCACHED,
744 		    PORT_SOURCE_TIMER, &pkevp);
745 		if (error) {
746 			(void) port_dissociate_ksource(port, PORT_SOURCE_TIMER,
747 			    (port_source_t *)it->it_portsrc);
748 			mutex_exit(&p->p_lock);
749 			kmem_cache_free(clock_timer_cache, it);
750 			kmem_free(sigq, sizeof (sigqueue_t));
751 			return (set_errno(error));
752 		}
753 
754 		/* initialize event data */
755 		port_init_event(pkevp, i, tim_pnevp.portnfy_user,
756 		    timer_port_callback, it);
757 		it->it_portev = pkevp;
758 		it->it_portfd = port;
759 	} else {
760 		if (ev.sigev_notify == SIGEV_SIGNAL)
761 			it->it_flags |= IT_SIGNAL;
762 	}
763 
764 	/* Populate the slot now that the timer is prepped. */
765 	p->p_itimer[i] = it;
766 	mutex_exit(&p->p_lock);
767 
768 	/*
769 	 * Call on the backend to verify the event argument (or return
770 	 * EINVAL if this clock type does not support timers).
771 	 */
772 	if ((error = backend->clk_timer_create(it, timer_fire)) != 0)
773 		goto err;
774 
775 	it->it_lwp = ttolwp(curthread);
776 	it->it_proc = p;
777 
778 	if (copyout(&i, tid, sizeof (timer_t)) != 0) {
779 		error = EFAULT;
780 		goto err;
781 	}
782 
783 	/*
784 	 * If we're here, then we have successfully created the timer; we
785 	 * just need to release the timer and return.
786 	 */
787 	timer_release(p, it);
788 
789 	return (0);
790 
791 err:
792 	/*
793 	 * If we're here, an error has occurred late in the timer creation
794 	 * process.  We need to regrab p_lock, and delete the incipient timer.
795 	 * Since we never unlocked the timer (it was born locked), it's
796 	 * impossible for a removal to be pending.
797 	 */
798 	ASSERT(!(it->it_lock & ITLK_REMOVE));
799 	timer_delete_grabbed(p, i, it);
800 
801 	return (set_errno(error));
802 }
803 
804 int
805 timer_gettime(timer_t tid, itimerspec_t *val)
806 {
807 	proc_t *p = curproc;
808 	itimer_t *it;
809 	itimerspec_t when;
810 	int error;
811 
812 	if ((it = timer_grab(p, tid)) == NULL)
813 		return (set_errno(EINVAL));
814 
815 	error = it->it_backend->clk_timer_gettime(it, &when);
816 
817 	timer_release(p, it);
818 
819 	if (error == 0) {
820 		if (get_udatamodel() == DATAMODEL_NATIVE) {
821 			if (copyout(&when, val, sizeof (itimerspec_t)))
822 				error = EFAULT;
823 		} else {
824 			if (ITIMERSPEC_OVERFLOW(&when))
825 				error = EOVERFLOW;
826 			else {
827 				itimerspec32_t w32;
828 
829 				ITIMERSPEC_TO_ITIMERSPEC32(&w32, &when)
830 				if (copyout(&w32, val, sizeof (itimerspec32_t)))
831 					error = EFAULT;
832 			}
833 		}
834 	}
835 
836 	return (error ? set_errno(error) : 0);
837 }
838 
839 int
840 timer_settime(timer_t tid, int flags, itimerspec_t *val, itimerspec_t *oval)
841 {
842 	itimerspec_t when;
843 	itimer_t *it;
844 	proc_t *p = curproc;
845 	int error;
846 
847 	if (oval != NULL) {
848 		if ((error = timer_gettime(tid, oval)) != 0)
849 			return (error);
850 	}
851 
852 	if (get_udatamodel() == DATAMODEL_NATIVE) {
853 		if (copyin(val, &when, sizeof (itimerspec_t)))
854 			return (set_errno(EFAULT));
855 	} else {
856 		itimerspec32_t w32;
857 
858 		if (copyin(val, &w32, sizeof (itimerspec32_t)))
859 			return (set_errno(EFAULT));
860 
861 		ITIMERSPEC32_TO_ITIMERSPEC(&when, &w32);
862 	}
863 
864 	if (itimerspecfix(&when.it_value) ||
865 	    (itimerspecfix(&when.it_interval) &&
866 	    timerspecisset(&when.it_value))) {
867 		return (set_errno(EINVAL));
868 	}
869 
870 	if ((it = timer_grab(p, tid)) == NULL)
871 		return (set_errno(EINVAL));
872 
873 	error = it->it_backend->clk_timer_settime(it, flags, &when);
874 
875 	timer_release(p, it);
876 
877 	return (error ? set_errno(error) : 0);
878 }
879 
880 int
881 timer_delete(timer_t tid)
882 {
883 	proc_t *p = curproc;
884 	itimer_t *it;
885 
886 	if ((it = timer_grab(p, tid)) == NULL)
887 		return (set_errno(EINVAL));
888 
889 	timer_delete_grabbed(p, tid, it);
890 
891 	return (0);
892 }
893 
894 int
895 timer_getoverrun(timer_t tid)
896 {
897 	int overrun;
898 	proc_t *p = curproc;
899 	itimer_t *it;
900 
901 	if ((it = timer_grab(p, tid)) == NULL)
902 		return (set_errno(EINVAL));
903 
904 	/*
905 	 * The it_overrun field is protected by p_lock; we need to acquire
906 	 * it before looking at the value.
907 	 */
908 	mutex_enter(&p->p_lock);
909 	overrun = it->it_overrun;
910 	mutex_exit(&p->p_lock);
911 
912 	timer_release(p, it);
913 
914 	return (overrun);
915 }
916 
917 /*
918  * Entered/exited with p_lock held, but will repeatedly drop and regrab p_lock.
919  */
920 void
921 timer_lwpexit(void)
922 {
923 	uint_t i;
924 	proc_t *p = curproc;
925 	klwp_t *lwp = ttolwp(curthread);
926 	itimer_t *it, **itp;
927 
928 	ASSERT(MUTEX_HELD(&p->p_lock));
929 
930 	if ((itp = p->p_itimer) == NULL)
931 		return;
932 
933 	for (i = 0; i < p->p_itimer_sz; i++) {
934 		if ((it = itp[i]) == NULL)
935 			continue;
936 
937 		timer_lock(p, it);
938 
939 		if ((it->it_lock & ITLK_REMOVE) || it->it_lwp != lwp) {
940 			/*
941 			 * This timer is either being removed or it isn't
942 			 * associated with this lwp.
943 			 */
944 			timer_unlock(p, it);
945 			continue;
946 		}
947 
948 		/*
949 		 * The LWP that created this timer is going away.  To the user,
950 		 * our behavior here is explicitly undefined.  We will simply
951 		 * null out the it_lwp field; if the LWP was bound to a CPU,
952 		 * the cyclic will stay bound to that CPU until the process
953 		 * exits.
954 		 */
955 		it->it_lwp = NULL;
956 		timer_unlock(p, it);
957 	}
958 }
959 
960 /*
961  * Called to notify of an LWP binding change.  Entered/exited with p_lock
962  * held, but will repeatedly drop and regrab p_lock.
963  */
964 void
965 timer_lwpbind()
966 {
967 	uint_t i;
968 	proc_t *p = curproc;
969 	klwp_t *lwp = ttolwp(curthread);
970 	itimer_t *it, **itp;
971 
972 	ASSERT(MUTEX_HELD(&p->p_lock));
973 
974 	if ((itp = p->p_itimer) == NULL)
975 		return;
976 
977 	for (i = 0; i < p->p_itimer_sz; i++) {
978 		if ((it = itp[i]) == NULL)
979 			continue;
980 
981 		timer_lock(p, it);
982 
983 		if (!(it->it_lock & ITLK_REMOVE) && it->it_lwp == lwp) {
984 			/*
985 			 * Drop p_lock and jump into the backend.
986 			 */
987 			mutex_exit(&p->p_lock);
988 			it->it_backend->clk_timer_lwpbind(it);
989 			mutex_enter(&p->p_lock);
990 		}
991 
992 		timer_unlock(p, it);
993 	}
994 }
995 
996 /*
997  * This function should only be called if p_itimer is non-NULL.
998  */
999 void
1000 timer_exit(void)
1001 {
1002 	uint_t i;
1003 	proc_t *p = curproc;
1004 
1005 	ASSERT(p->p_itimer != NULL);
1006 	ASSERT(p->p_itimer_sz != 0);
1007 
1008 	for (i = 0; i < p->p_itimer_sz; i++) {
1009 		(void) timer_delete((timer_t)i);
1010 	}
1011 
1012 	kmem_free(p->p_itimer, p->p_itimer_sz * sizeof (itimer_t *));
1013 	p->p_itimer = NULL;
1014 	p->p_itimer_sz = 0;
1015 }
1016 
1017 /*
1018  * timer_port_callback() is a callback function which is associated with the
1019  * timer event and is activated just before the event is delivered to the user.
1020  * The timer uses this function to update/set the overflow counter and
1021  * to reenable the use of the event structure.
1022  */
1023 
1024 /* ARGSUSED */
1025 static int
1026 timer_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
1027 {
1028 	itimer_t	*it = arg;
1029 
1030 	mutex_enter(&it->it_mutex);
1031 	if (curproc != it->it_proc) {
1032 		/* can not deliver timer events to another proc */
1033 		mutex_exit(&it->it_mutex);
1034 		return (EACCES);
1035 	}
1036 	*events = it->it_pending;	/* 1 = 1 event, >1 # of overflows */
1037 	it->it_pending = 0;		/* reinit overflow counter	*/
1038 	/*
1039 	 * This function can also be activated when the port is being closed
1040 	 * and a timer event is already submitted to the port.
1041 	 * In such a case the event port framework will use the
1042 	 * close-callback function to notify the events sources.
1043 	 * The timer close-callback function is timer_close_port() which
1044 	 * will free all allocated resources (including the allocated
1045 	 * port event structure).
1046 	 * For that reason we don't need to check the value of flag here.
1047 	 */
1048 	mutex_exit(&it->it_mutex);
1049 	return (0);
1050 }
1051 
1052 /*
1053  * port is being closed ... free all allocated port event structures
1054  * The delivered arg currently correspond to the first timer associated with
1055  * the port and it is not useable in this case.
1056  * We have to scan the list of activated timers in the current proc and
1057  * compare them with the delivered port id.
1058  */
1059 
1060 /* ARGSUSED */
1061 static void
1062 timer_close_port(void *arg, int port, pid_t pid, int lastclose)
1063 {
1064 	proc_t		*p = curproc;
1065 	timer_t		tid;
1066 	itimer_t	*it;
1067 
1068 	for (tid = 0; tid < timer_max; tid++) {
1069 		if ((it = timer_grab(p, tid)) == NULL)
1070 			continue;
1071 		if (it->it_portev) {
1072 			mutex_enter(&it->it_mutex);
1073 			if (it->it_portfd == port) {
1074 				port_kevent_t *pev;
1075 				pev = (port_kevent_t *)it->it_portev;
1076 				it->it_portev = NULL;
1077 				it->it_flags &= ~IT_PORT;
1078 				mutex_exit(&it->it_mutex);
1079 				(void) port_remove_done_event(pev);
1080 				port_free_event(pev);
1081 			} else {
1082 				mutex_exit(&it->it_mutex);
1083 			}
1084 		}
1085 		timer_release(p, it);
1086 	}
1087 }
1088