xref: /titanic_44/usr/src/uts/common/os/timer.c (revision e429788e241121c1f81089f762558027000ea25f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/timer.h>
30 #include <sys/systm.h>
31 #include <sys/param.h>
32 #include <sys/kmem.h>
33 #include <sys/debug.h>
34 #include <sys/policy.h>
35 #include <sys/port.h>
36 #include <sys/port_kernel.h>
37 #include <sys/contract/process_impl.h>
38 
39 static kmem_cache_t *clock_timer_cache;
40 static clock_backend_t *clock_backend[CLOCK_MAX];
41 static int timer_port_callback(void *, int *, pid_t, int, void *);
42 static void timer_close_port(void *, int, pid_t, int);
43 
44 #define	CLOCK_BACKEND(clk) \
45 	((clk) < CLOCK_MAX && (clk) >= 0 ? clock_backend[(clk)] : NULL)
46 
47 /*
48  * Tunable to increase the maximum number of POSIX timers per-process.  This
49  * may _only_ be tuned in /etc/system or by patching the kernel binary; it
50  * _cannot_ be tuned on a running system.
51  */
52 int timer_max = _TIMER_MAX;
53 
54 /*
55  * timer_lock() locks the specified interval timer.  It doesn't look at the
56  * ITLK_REMOVE bit; it's up to callers to look at this if they need to
57  * care.  p_lock must be held on entry; it may be dropped and reaquired,
58  * but timer_lock() will always return with p_lock held.
59  *
60  * Note that timer_create() doesn't call timer_lock(); it creates timers
61  * with the ITLK_LOCKED bit explictly set.
62  */
63 static void
64 timer_lock(proc_t *p, itimer_t *it)
65 {
66 	ASSERT(MUTEX_HELD(&p->p_lock));
67 
68 	while (it->it_lock & ITLK_LOCKED) {
69 		it->it_blockers++;
70 		cv_wait(&it->it_cv, &p->p_lock);
71 		it->it_blockers--;
72 	}
73 
74 	it->it_lock |= ITLK_LOCKED;
75 }
76 
77 /*
78  * timer_unlock() unlocks the specified interval timer, waking up any
79  * waiters.  p_lock must be held on entry; it will not be dropped by
80  * timer_unlock().
81  */
82 static void
83 timer_unlock(proc_t *p, itimer_t *it)
84 {
85 	ASSERT(MUTEX_HELD(&p->p_lock));
86 	ASSERT(it->it_lock & ITLK_LOCKED);
87 	it->it_lock &= ~ITLK_LOCKED;
88 	cv_signal(&it->it_cv);
89 }
90 
91 /*
92  * timer_delete_locked() takes a proc pointer, timer ID and locked interval
93  * timer, and deletes the specified timer.  It must be called with p_lock
94  * held, and cannot be called on a timer which already has ITLK_REMOVE set;
95  * the caller must check this.  timer_delete_locked() will set the ITLK_REMOVE
96  * bit and will iteratively unlock and lock the interval timer until all
97  * blockers have seen the ITLK_REMOVE and cleared out.  It will then zero
98  * out the specified entry in the p_itimer array, and call into the clock
99  * backend to complete the deletion.
100  *
101  * This function will always return with p_lock held.
102  */
103 static void
104 timer_delete_locked(proc_t *p, timer_t tid, itimer_t *it)
105 {
106 	ASSERT(MUTEX_HELD(&p->p_lock));
107 	ASSERT(!(it->it_lock & ITLK_REMOVE));
108 	ASSERT(it->it_lock & ITLK_LOCKED);
109 
110 	it->it_lock |= ITLK_REMOVE;
111 
112 	/*
113 	 * If there are threads waiting to lock this timer, we'll unlock
114 	 * the timer, and block on the cv.  Threads blocking our removal will
115 	 * have the opportunity to run; when they see the ITLK_REMOVE flag
116 	 * set, they will immediately unlock the timer.
117 	 */
118 	while (it->it_blockers) {
119 		timer_unlock(p, it);
120 		cv_wait(&it->it_cv, &p->p_lock);
121 		timer_lock(p, it);
122 	}
123 
124 	ASSERT(p->p_itimer[tid] == it);
125 	p->p_itimer[tid] = NULL;
126 
127 	/*
128 	 * No one is blocked on this timer, and no one will be (we've set
129 	 * p_itimer[tid] to be NULL; no one can find it).  Now we call into
130 	 * the clock backend to delete the timer; it is up to the backend to
131 	 * guarantee that timer_fire() has completed (and will never again
132 	 * be called) for this timer.
133 	 */
134 	mutex_exit(&p->p_lock);
135 
136 	it->it_backend->clk_timer_delete(it);
137 
138 	if (it->it_portev) {
139 		mutex_enter(&it->it_mutex);
140 		if (it->it_portev) {
141 			/* dissociate timer from the event port */
142 			(void) port_dissociate_ksource(it->it_portfd,
143 			    PORT_SOURCE_TIMER, (port_source_t *)it->it_portsrc);
144 			port_free_event((port_kevent_t *)it->it_portev);
145 			it->it_portev = NULL;
146 			it->it_flags &= ~IT_PORT;
147 			it->it_pending = 0;
148 		}
149 		mutex_exit(&it->it_mutex);
150 	}
151 
152 	mutex_enter(&p->p_lock);
153 
154 	/*
155 	 * We need to be careful freeing the sigqueue for this timer;
156 	 * if a signal is pending, the sigqueue needs to be freed
157 	 * synchronously in siginfofree().  The need to free the sigqueue
158 	 * in siginfofree() is indicated by setting sq_func to NULL.
159 	 */
160 	if (it->it_pending > 0) {
161 		it->it_sigq->sq_func = NULL;
162 	} else {
163 		kmem_free(it->it_sigq, sizeof (sigqueue_t));
164 	}
165 
166 	ASSERT(it->it_blockers == 0);
167 	kmem_cache_free(clock_timer_cache, it);
168 }
169 
170 /*
171  * timer_grab() and its companion routine, timer_release(), are wrappers
172  * around timer_lock()/_unlock() which allow the timer_*(3R) routines to
173  * (a) share error handling code and (b) not grab p_lock themselves.  Routines
174  * which are called with p_lock held (e.g. timer_lwpbind(), timer_lwpexit())
175  * must call timer_lock()/_unlock() explictly.
176  *
177  * timer_grab() takes a proc and a timer ID, and returns a pointer to a
178  * locked interval timer.  p_lock must _not_ be held on entry; timer_grab()
179  * may acquire p_lock, but will always return with p_lock dropped.
180  *
181  * If timer_grab() fails, it will return NULL.  timer_grab() will fail if
182  * one or more of the following is true:
183  *
184  *  (a)	The specified timer ID is out of range.
185  *
186  *  (b)	The specified timer ID does not correspond to a timer ID returned
187  *	from timer_create(3R).
188  *
189  *  (c)	The specified timer ID is currently being removed.
190  *
191  */
192 static itimer_t *
193 timer_grab(proc_t *p, timer_t tid)
194 {
195 	itimer_t **itp, *it;
196 
197 	if (tid >= timer_max || tid < 0)
198 		return (NULL);
199 
200 	mutex_enter(&p->p_lock);
201 
202 	if ((itp = p->p_itimer) == NULL || (it = itp[tid]) == NULL) {
203 		mutex_exit(&p->p_lock);
204 		return (NULL);
205 	}
206 
207 	timer_lock(p, it);
208 
209 	if (it->it_lock & ITLK_REMOVE) {
210 		/*
211 		 * Someone is removing this timer; it will soon be invalid.
212 		 */
213 		timer_unlock(p, it);
214 		mutex_exit(&p->p_lock);
215 		return (NULL);
216 	}
217 
218 	mutex_exit(&p->p_lock);
219 
220 	return (it);
221 }
222 
223 /*
224  * timer_release() releases a timer acquired with timer_grab().  p_lock
225  * should not be held on entry; timer_release() will acquire p_lock but
226  * will drop it before returning.
227  */
228 static void
229 timer_release(proc_t *p, itimer_t *it)
230 {
231 	mutex_enter(&p->p_lock);
232 	timer_unlock(p, it);
233 	mutex_exit(&p->p_lock);
234 }
235 
236 /*
237  * timer_delete_grabbed() deletes a timer acquired with timer_grab().
238  * p_lock should not be held on entry; timer_delete_grabbed() will acquire
239  * p_lock, but will drop it before returning.
240  */
241 static void
242 timer_delete_grabbed(proc_t *p, timer_t tid, itimer_t *it)
243 {
244 	mutex_enter(&p->p_lock);
245 	timer_delete_locked(p, tid, it);
246 	mutex_exit(&p->p_lock);
247 }
248 
249 void
250 clock_timer_init()
251 {
252 	clock_timer_cache = kmem_cache_create("timer_cache",
253 	    sizeof (itimer_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
254 }
255 
256 void
257 clock_add_backend(clockid_t clock, clock_backend_t *backend)
258 {
259 	ASSERT(clock >= 0 && clock < CLOCK_MAX);
260 	ASSERT(clock_backend[clock] == NULL);
261 
262 	clock_backend[clock] = backend;
263 }
264 
265 int
266 clock_settime(clockid_t clock, timespec_t *tp)
267 {
268 	timespec_t t;
269 	clock_backend_t *backend;
270 	int error;
271 
272 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
273 		return (set_errno(EINVAL));
274 
275 	if (secpolicy_settime(CRED()) != 0)
276 		return (set_errno(EPERM));
277 
278 	if (get_udatamodel() == DATAMODEL_NATIVE) {
279 		if (copyin(tp, &t, sizeof (timespec_t)) != 0)
280 			return (set_errno(EFAULT));
281 	} else {
282 		timespec32_t t32;
283 
284 		if (copyin(tp, &t32, sizeof (timespec32_t)) != 0)
285 			return (set_errno(EFAULT));
286 
287 		TIMESPEC32_TO_TIMESPEC(&t, &t32);
288 	}
289 
290 	if (itimerspecfix(&t))
291 		return (set_errno(EINVAL));
292 
293 	error = backend->clk_clock_settime(&t);
294 
295 	if (error)
296 		return (set_errno(error));
297 
298 	return (0);
299 }
300 
301 int
302 clock_gettime(clockid_t clock, timespec_t *tp)
303 {
304 	timespec_t t;
305 	clock_backend_t *backend;
306 	int error;
307 
308 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
309 		return (set_errno(EINVAL));
310 
311 	error = backend->clk_clock_gettime(&t);
312 
313 	if (error)
314 		return (set_errno(error));
315 
316 	if (get_udatamodel() == DATAMODEL_NATIVE) {
317 		if (copyout(&t, tp, sizeof (timespec_t)) != 0)
318 			return (set_errno(EFAULT));
319 	} else {
320 		timespec32_t t32;
321 
322 		if (TIMESPEC_OVERFLOW(&t))
323 			return (set_errno(EOVERFLOW));
324 		TIMESPEC_TO_TIMESPEC32(&t32, &t);
325 
326 		if (copyout(&t32, tp, sizeof (timespec32_t)) != 0)
327 			return (set_errno(EFAULT));
328 	}
329 
330 	return (0);
331 }
332 
333 int
334 clock_getres(clockid_t clock, timespec_t *tp)
335 {
336 	timespec_t t;
337 	clock_backend_t *backend;
338 	int error;
339 
340 	/*
341 	 * Strangely, the standard defines clock_getres() with a NULL tp
342 	 * to do nothing (regardless of the validity of the specified
343 	 * clock_id).  Go figure.
344 	 */
345 	if (tp == NULL)
346 		return (0);
347 
348 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
349 		return (set_errno(EINVAL));
350 
351 	error = backend->clk_clock_getres(&t);
352 
353 	if (error)
354 		return (set_errno(error));
355 
356 	if (get_udatamodel() == DATAMODEL_NATIVE) {
357 		if (copyout(&t, tp, sizeof (timespec_t)) != 0)
358 			return (set_errno(EFAULT));
359 	} else {
360 		timespec32_t t32;
361 
362 		if (TIMESPEC_OVERFLOW(&t))
363 			return (set_errno(EOVERFLOW));
364 		TIMESPEC_TO_TIMESPEC32(&t32, &t);
365 
366 		if (copyout(&t32, tp, sizeof (timespec32_t)) != 0)
367 			return (set_errno(EFAULT));
368 	}
369 
370 	return (0);
371 }
372 
373 void
374 timer_signal(sigqueue_t *sigq)
375 {
376 	itimer_t *it = (itimer_t *)sigq->sq_backptr;
377 
378 	/*
379 	 * There are some conditions during a fork or an exit when we can
380 	 * call siginfofree() without p_lock held.  To prevent a race
381 	 * between timer_signal() and timer_fire() with regard to it_pending,
382 	 * we therefore acquire it_mutex in both paths.
383 	 */
384 	mutex_enter(&it->it_mutex);
385 	ASSERT(it->it_pending > 0);
386 	it->it_overrun = it->it_pending - 1;
387 	it->it_pending = 0;
388 	mutex_exit(&it->it_mutex);
389 }
390 
391 /*
392  * This routine is called from the clock backend.
393  */
394 void
395 timer_fire(itimer_t *it)
396 {
397 	proc_t *p;
398 	int proc_lock_held;
399 
400 	if (it->it_flags & IT_SIGNAL) {
401 		/*
402 		 * See the comment in timer_signal() for why it is not
403 		 * sufficient to only grab p_lock here. Because p_lock can be
404 		 * held on entry to timer_signal(), the lock ordering is
405 		 * necessarily p_lock before it_mutex.
406 		 */
407 
408 		p = it->it_proc;
409 		proc_lock_held = 1;
410 		mutex_enter(&p->p_lock);
411 	} else {
412 		/*
413 		 * IT_PORT:
414 		 * If a timer was ever programmed to send events to a port,
415 		 * the IT_PORT flag will remain set until:
416 		 * a) the timer is deleted (see timer_delete_locked()) or
417 		 * b) the port is being closed (see timer_close_port()).
418 		 * Both cases are synchronized with the it_mutex.
419 		 * We don't need to use the p_lock because it is only
420 		 * required in the IT_SIGNAL case.
421 		 * If IT_PORT was set and the port is being closed then
422 		 * the timer notification is set to NONE. In such a case
423 		 * the timer itself and the it_pending counter remain active
424 		 * until the application deletes the counter or the process
425 		 * exits.
426 		 */
427 		proc_lock_held = 0;
428 	}
429 	mutex_enter(&it->it_mutex);
430 
431 	if (it->it_pending > 0) {
432 		if (it->it_pending < INT_MAX)
433 			it->it_pending++;
434 		mutex_exit(&it->it_mutex);
435 	} else {
436 		if (it->it_flags & IT_PORT) {
437 			it->it_pending = 1;
438 			port_send_event((port_kevent_t *)it->it_portev);
439 			mutex_exit(&it->it_mutex);
440 		} else if (it->it_flags & IT_SIGNAL) {
441 			it->it_pending = 1;
442 			mutex_exit(&it->it_mutex);
443 			sigaddqa(p, NULL, it->it_sigq);
444 		} else {
445 			mutex_exit(&it->it_mutex);
446 		}
447 	}
448 
449 	if (proc_lock_held)
450 		mutex_exit(&p->p_lock);
451 }
452 
453 int
454 timer_create(clockid_t clock, struct sigevent *evp, timer_t *tid)
455 {
456 	struct sigevent ev;
457 	proc_t *p = curproc;
458 	clock_backend_t *backend;
459 	itimer_t *it, **itp;
460 	sigqueue_t *sigq;
461 	cred_t *cr = CRED();
462 	int error = 0;
463 	timer_t i;
464 	port_notify_t tim_pnevp;
465 	port_kevent_t *pkevp = NULL;
466 
467 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
468 		return (set_errno(EINVAL));
469 
470 	if (evp != NULL) {
471 		/*
472 		 * short copyin() for binary compatibility
473 		 * fetch oldsigevent to determine how much to copy in.
474 		 */
475 		if (get_udatamodel() == DATAMODEL_NATIVE) {
476 			if (copyin(evp, &ev, sizeof (struct oldsigevent)))
477 				return (set_errno(EFAULT));
478 
479 			if (ev.sigev_notify == SIGEV_PORT ||
480 			    ev.sigev_notify == SIGEV_THREAD) {
481 				if (copyin(ev.sigev_value.sival_ptr, &tim_pnevp,
482 				    sizeof (port_notify_t)))
483 					return (set_errno(EFAULT));
484 			}
485 #ifdef	_SYSCALL32_IMPL
486 		} else {
487 			struct sigevent32 ev32;
488 			port_notify32_t tim_pnevp32;
489 
490 			if (copyin(evp, &ev32, sizeof (struct oldsigevent32)))
491 				return (set_errno(EFAULT));
492 			ev.sigev_notify = ev32.sigev_notify;
493 			ev.sigev_signo = ev32.sigev_signo;
494 			/*
495 			 * See comment in sigqueue32() on handling of 32-bit
496 			 * sigvals in a 64-bit kernel.
497 			 */
498 			ev.sigev_value.sival_int = ev32.sigev_value.sival_int;
499 			if (ev.sigev_notify == SIGEV_PORT ||
500 			    ev.sigev_notify == SIGEV_THREAD) {
501 				if (copyin((void *)(uintptr_t)
502 				    ev32.sigev_value.sival_ptr,
503 				    (void *)&tim_pnevp32,
504 				    sizeof (port_notify32_t)))
505 					return (set_errno(EFAULT));
506 				tim_pnevp.portnfy_port =
507 				    tim_pnevp32.portnfy_port;
508 				tim_pnevp.portnfy_user =
509 				    (void *)(uintptr_t)tim_pnevp32.portnfy_user;
510 			}
511 #endif
512 		}
513 		switch (ev.sigev_notify) {
514 		case SIGEV_NONE:
515 			break;
516 		case SIGEV_SIGNAL:
517 			if (ev.sigev_signo < 1 || ev.sigev_signo >= NSIG)
518 				return (set_errno(EINVAL));
519 			break;
520 		case SIGEV_THREAD:
521 		case SIGEV_PORT:
522 			break;
523 		default:
524 			return (set_errno(EINVAL));
525 		}
526 	} else {
527 		/*
528 		 * Use the clock's default sigevent (this is a structure copy).
529 		 */
530 		ev = backend->clk_default;
531 	}
532 
533 	/*
534 	 * We'll allocate our timer and sigqueue now, before we grab p_lock.
535 	 * If we can't find an empty slot, we'll free them before returning.
536 	 */
537 	it = kmem_cache_alloc(clock_timer_cache, KM_SLEEP);
538 	bzero(it, sizeof (itimer_t));
539 	mutex_init(&it->it_mutex, NULL, MUTEX_DEFAULT, NULL);
540 	sigq = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
541 
542 	mutex_enter(&p->p_lock);
543 
544 	/*
545 	 * If this is this process' first timer, we need to attempt to allocate
546 	 * an array of timerstr_t pointers.  We drop p_lock to perform the
547 	 * allocation; if we return to discover that p_itimer is non-NULL,
548 	 * we will free our allocation and drive on.
549 	 */
550 	if ((itp = p->p_itimer) == NULL) {
551 		mutex_exit(&p->p_lock);
552 		itp = kmem_zalloc(timer_max * sizeof (itimer_t *), KM_SLEEP);
553 		mutex_enter(&p->p_lock);
554 
555 		if (p->p_itimer == NULL)
556 			p->p_itimer = itp;
557 		else {
558 			kmem_free(itp, timer_max * sizeof (itimer_t *));
559 			itp = p->p_itimer;
560 		}
561 	}
562 
563 	for (i = 0; i < timer_max && itp[i] != NULL; i++)
564 		continue;
565 
566 	if (i == timer_max) {
567 		/*
568 		 * We couldn't find a slot.  Drop p_lock, free the preallocated
569 		 * timer and sigqueue, and return an error.
570 		 */
571 		mutex_exit(&p->p_lock);
572 		kmem_cache_free(clock_timer_cache, it);
573 		kmem_free(sigq, sizeof (sigqueue_t));
574 
575 		return (set_errno(EAGAIN));
576 	}
577 
578 	ASSERT(i < timer_max && itp[i] == NULL);
579 
580 	/*
581 	 * If we develop other notification mechanisms, this will need
582 	 * to call into (yet another) backend.
583 	 */
584 	sigq->sq_info.si_signo = ev.sigev_signo;
585 	if (evp == NULL)
586 		sigq->sq_info.si_value.sival_int = i;
587 	else
588 		sigq->sq_info.si_value = ev.sigev_value;
589 	sigq->sq_info.si_code = SI_TIMER;
590 	sigq->sq_info.si_pid = p->p_pid;
591 	sigq->sq_info.si_ctid = PRCTID(p);
592 	sigq->sq_info.si_zoneid = getzoneid();
593 	sigq->sq_info.si_uid = crgetruid(cr);
594 	sigq->sq_func = timer_signal;
595 	sigq->sq_next = NULL;
596 	sigq->sq_backptr = it;
597 	it->it_sigq = sigq;
598 	it->it_backend = backend;
599 	it->it_lock = ITLK_LOCKED;
600 	itp[i] = it;
601 
602 
603 	if (ev.sigev_notify == SIGEV_THREAD ||
604 	    ev.sigev_notify == SIGEV_PORT) {
605 		int port;
606 
607 		/*
608 		 * This timer is programmed to use event port notification when
609 		 * the timer fires:
610 		 * - allocate a port event structure and prepare it to be sent
611 		 *   to the port as soon as the timer fires.
612 		 * - when the timer fires :
613 		 *   - if event structure was already sent to the port then this
614 		 *	is a timer fire overflow => increment overflow counter.
615 		 *   - otherwise send pre-allocated event structure to the port.
616 		 * - the events field of the port_event_t structure counts the
617 		 *   number of timer fired events.
618 		 * - The event structured is allocated using the
619 		 *   PORT_ALLOC_CACHED flag.
620 		 *   This flag indicates that the timer itself will manage and
621 		 *   free the event structure when required.
622 		 */
623 
624 		it->it_flags |= IT_PORT;
625 		port = tim_pnevp.portnfy_port;
626 
627 		/* associate timer as event source with the port */
628 		error = port_associate_ksource(port, PORT_SOURCE_TIMER,
629 		    (port_source_t **)&it->it_portsrc, timer_close_port,
630 		    (void *)it, NULL);
631 		if (error) {
632 			itp[i] = NULL;		/* clear slot */
633 			mutex_exit(&p->p_lock);
634 			kmem_cache_free(clock_timer_cache, it);
635 			kmem_free(sigq, sizeof (sigqueue_t));
636 			return (set_errno(error));
637 		}
638 
639 		/* allocate an event structure/slot */
640 		error = port_alloc_event(port, PORT_ALLOC_SCACHED,
641 		    PORT_SOURCE_TIMER, &pkevp);
642 		if (error) {
643 			(void) port_dissociate_ksource(port, PORT_SOURCE_TIMER,
644 			    (port_source_t *)it->it_portsrc);
645 			itp[i] = NULL;		/* clear slot */
646 			mutex_exit(&p->p_lock);
647 			kmem_cache_free(clock_timer_cache, it);
648 			kmem_free(sigq, sizeof (sigqueue_t));
649 			return (set_errno(error));
650 		}
651 
652 		/* initialize event data */
653 		port_init_event(pkevp, i, tim_pnevp.portnfy_user,
654 		    timer_port_callback, it);
655 		it->it_portev = pkevp;
656 		it->it_portfd = port;
657 	} else {
658 		if (ev.sigev_notify == SIGEV_SIGNAL)
659 			it->it_flags |= IT_SIGNAL;
660 	}
661 
662 	mutex_exit(&p->p_lock);
663 
664 	/*
665 	 * Call on the backend to verify the event argument (or return
666 	 * EINVAL if this clock type does not support timers).
667 	 */
668 	if ((error = backend->clk_timer_create(it, &ev)) != 0)
669 		goto err;
670 
671 	it->it_lwp = ttolwp(curthread);
672 	it->it_proc = p;
673 
674 	if (copyout(&i, tid, sizeof (timer_t)) != 0) {
675 		error = EFAULT;
676 		goto err;
677 	}
678 
679 	/*
680 	 * If we're here, then we have successfully created the timer; we
681 	 * just need to release the timer and return.
682 	 */
683 	timer_release(p, it);
684 
685 	return (0);
686 
687 err:
688 	/*
689 	 * If we're here, an error has occurred late in the timer creation
690 	 * process.  We need to regrab p_lock, and delete the incipient timer.
691 	 * Since we never unlocked the timer (it was born locked), it's
692 	 * impossible for a removal to be pending.
693 	 */
694 	ASSERT(!(it->it_lock & ITLK_REMOVE));
695 	timer_delete_grabbed(p, i, it);
696 
697 	return (set_errno(error));
698 }
699 
700 int
701 timer_gettime(timer_t tid, itimerspec_t *val)
702 {
703 	proc_t *p = curproc;
704 	itimer_t *it;
705 	itimerspec_t when;
706 	int error;
707 
708 	if ((it = timer_grab(p, tid)) == NULL)
709 		return (set_errno(EINVAL));
710 
711 	error = it->it_backend->clk_timer_gettime(it, &when);
712 
713 	timer_release(p, it);
714 
715 	if (error == 0) {
716 		if (get_udatamodel() == DATAMODEL_NATIVE) {
717 			if (copyout(&when, val, sizeof (itimerspec_t)))
718 				error = EFAULT;
719 		} else {
720 			if (ITIMERSPEC_OVERFLOW(&when))
721 				error = EOVERFLOW;
722 			else {
723 				itimerspec32_t w32;
724 
725 				ITIMERSPEC_TO_ITIMERSPEC32(&w32, &when)
726 				if (copyout(&w32, val, sizeof (itimerspec32_t)))
727 					error = EFAULT;
728 			}
729 		}
730 	}
731 
732 	return (error ? set_errno(error) : 0);
733 }
734 
735 int
736 timer_settime(timer_t tid, int flags, itimerspec_t *val, itimerspec_t *oval)
737 {
738 	itimerspec_t when;
739 	timespec_t res;
740 	itimer_t *it;
741 	proc_t *p = curproc;
742 	int error;
743 
744 	if (oval != NULL) {
745 		if ((error = timer_gettime(tid, oval)) != 0)
746 			return (error);
747 	}
748 
749 	if (get_udatamodel() == DATAMODEL_NATIVE) {
750 		if (copyin(val, &when, sizeof (itimerspec_t)))
751 			return (set_errno(EFAULT));
752 	} else {
753 		itimerspec32_t w32;
754 
755 		if (copyin(val, &w32, sizeof (itimerspec32_t)))
756 			return (set_errno(EFAULT));
757 
758 		ITIMERSPEC32_TO_ITIMERSPEC(&when, &w32);
759 	}
760 
761 	if (itimerspecfix(&when.it_value) ||
762 	    (itimerspecfix(&when.it_interval) &&
763 	    timerspecisset(&when.it_value))) {
764 		return (set_errno(EINVAL));
765 	}
766 
767 	if ((it = timer_grab(p, tid)) == NULL)
768 		return (set_errno(EINVAL));
769 
770 	/*
771 	 * From the man page:
772 	 *	Time values that are between two consecutive non-negative
773 	 *	integer multiples of the resolution of the specified timer
774 	 *	shall be rounded up to the larger multiple of the resolution.
775 	 * We assume that the resolution of any clock is less than one second.
776 	 */
777 	if (it->it_backend->clk_clock_getres(&res) == 0 && res.tv_nsec > 1) {
778 		long rem;
779 
780 		if ((rem = when.it_interval.tv_nsec % res.tv_nsec) != 0) {
781 			when.it_interval.tv_nsec += res.tv_nsec - rem;
782 			timespecfix(&when.it_interval);
783 		}
784 		if ((rem = when.it_value.tv_nsec % res.tv_nsec) != 0) {
785 			when.it_value.tv_nsec += res.tv_nsec - rem;
786 			timespecfix(&when.it_value);
787 		}
788 	}
789 	error = it->it_backend->clk_timer_settime(it, flags, &when);
790 
791 	timer_release(p, it);
792 
793 	return (error ? set_errno(error) : 0);
794 }
795 
796 int
797 timer_delete(timer_t tid)
798 {
799 	proc_t *p = curproc;
800 	itimer_t *it;
801 
802 	if ((it = timer_grab(p, tid)) == NULL)
803 		return (set_errno(EINVAL));
804 
805 	timer_delete_grabbed(p, tid, it);
806 
807 	return (0);
808 }
809 
810 int
811 timer_getoverrun(timer_t tid)
812 {
813 	int overrun;
814 	proc_t *p = curproc;
815 	itimer_t *it;
816 
817 	if ((it = timer_grab(p, tid)) == NULL)
818 		return (set_errno(EINVAL));
819 
820 	/*
821 	 * The it_overrun field is protected by p_lock; we need to acquire
822 	 * it before looking at the value.
823 	 */
824 	mutex_enter(&p->p_lock);
825 	overrun = it->it_overrun;
826 	mutex_exit(&p->p_lock);
827 
828 	timer_release(p, it);
829 
830 	return (overrun);
831 }
832 
833 /*
834  * Entered/exited with p_lock held, but will repeatedly drop and regrab p_lock.
835  */
836 void
837 timer_lwpexit(void)
838 {
839 	timer_t i;
840 	proc_t *p = curproc;
841 	klwp_t *lwp = ttolwp(curthread);
842 	itimer_t *it, **itp;
843 
844 	ASSERT(MUTEX_HELD(&p->p_lock));
845 
846 	if ((itp = p->p_itimer) == NULL)
847 		return;
848 
849 	for (i = 0; i < timer_max; i++) {
850 		if ((it = itp[i]) == NULL)
851 			continue;
852 
853 		timer_lock(p, it);
854 
855 		if ((it->it_lock & ITLK_REMOVE) || it->it_lwp != lwp) {
856 			/*
857 			 * This timer is either being removed or it isn't
858 			 * associated with this lwp.
859 			 */
860 			timer_unlock(p, it);
861 			continue;
862 		}
863 
864 		/*
865 		 * The LWP that created this timer is going away.  To the user,
866 		 * our behavior here is explicitly undefined.  We will simply
867 		 * null out the it_lwp field; if the LWP was bound to a CPU,
868 		 * the cyclic will stay bound to that CPU until the process
869 		 * exits.
870 		 */
871 		it->it_lwp = NULL;
872 		timer_unlock(p, it);
873 	}
874 }
875 
876 /*
877  * Called to notify of an LWP binding change.  Entered/exited with p_lock
878  * held, but will repeatedly drop and regrab p_lock.
879  */
880 void
881 timer_lwpbind()
882 {
883 	timer_t i;
884 	proc_t *p = curproc;
885 	klwp_t *lwp = ttolwp(curthread);
886 	itimer_t *it, **itp;
887 
888 	ASSERT(MUTEX_HELD(&p->p_lock));
889 
890 	if ((itp = p->p_itimer) == NULL)
891 		return;
892 
893 	for (i = 0; i < timer_max; i++) {
894 		if ((it = itp[i]) == NULL)
895 			continue;
896 
897 		timer_lock(p, it);
898 
899 		if (!(it->it_lock & ITLK_REMOVE) && it->it_lwp == lwp) {
900 			/*
901 			 * Drop p_lock and jump into the backend.
902 			 */
903 			mutex_exit(&p->p_lock);
904 			it->it_backend->clk_timer_lwpbind(it);
905 			mutex_enter(&p->p_lock);
906 		}
907 
908 		timer_unlock(p, it);
909 	}
910 }
911 
912 /*
913  * This function should only be called if p_itimer is non-NULL.
914  */
915 void
916 timer_exit(void)
917 {
918 	timer_t i;
919 	proc_t *p = curproc;
920 
921 	ASSERT(p->p_itimer != NULL);
922 
923 	for (i = 0; i < timer_max; i++)
924 		(void) timer_delete(i);
925 
926 	kmem_free(p->p_itimer, timer_max * sizeof (itimer_t *));
927 	p->p_itimer = NULL;
928 }
929 
930 /*
931  * timer_port_callback() is a callback function which is associated with the
932  * timer event and is activated just before the event is delivered to the user.
933  * The timer uses this function to update/set the overflow counter and
934  * to reenable the use of the event structure.
935  */
936 
937 /* ARGSUSED */
938 static int
939 timer_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
940 {
941 	itimer_t	*it = arg;
942 
943 	mutex_enter(&it->it_mutex);
944 	if (curproc != it->it_proc) {
945 		/* can not deliver timer events to another proc */
946 		mutex_exit(&it->it_mutex);
947 		return (EACCES);
948 	}
949 	*events = it->it_pending;	/* 1 = 1 event, >1 # of overflows */
950 	it->it_pending = 0;		/* reinit overflow counter	*/
951 	/*
952 	 * This function can also be activated when the port is being closed
953 	 * and a timer event is already submitted to the port.
954 	 * In such a case the event port framework will use the
955 	 * close-callback function to notify the events sources.
956 	 * The timer close-callback function is timer_close_port() which
957 	 * will free all allocated resources (including the allocated
958 	 * port event structure).
959 	 * For that reason we don't need to check the value of flag here.
960 	 */
961 	mutex_exit(&it->it_mutex);
962 	return (0);
963 }
964 
965 /*
966  * port is being closed ... free all allocated port event structures
967  * The delivered arg currently correspond to the first timer associated with
968  * the port and it is not useable in this case.
969  * We have to scan the list of activated timers in the current proc and
970  * compare them with the delivered port id.
971  */
972 
973 /* ARGSUSED */
974 static void
975 timer_close_port(void *arg, int port, pid_t pid, int lastclose)
976 {
977 	proc_t		*p = curproc;
978 	timer_t		tid;
979 	itimer_t	*it;
980 
981 	for (tid = 0; tid < timer_max; tid++) {
982 		if ((it = timer_grab(p, tid)) == NULL)
983 			continue;
984 		if (it->it_portev) {
985 			mutex_enter(&it->it_mutex);
986 			if (it->it_portfd == port) {
987 				port_free_event((port_kevent_t *)it->it_portev);
988 				it->it_portev = NULL;
989 				it->it_flags &= ~IT_PORT;
990 			}
991 			mutex_exit(&it->it_mutex);
992 		}
993 		timer_release(p, it);
994 	}
995 }
996