xref: /titanic_50/usr/src/uts/common/os/timer.c (revision 75ce41a57ff334bd8fe2cb9ed51eea835892f944)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/timer.h>
30 #include <sys/systm.h>
31 #include <sys/param.h>
32 #include <sys/kmem.h>
33 #include <sys/debug.h>
34 #include <sys/policy.h>
35 #include <sys/port_impl.h>
36 #include <sys/port_kernel.h>
37 #include <sys/contract/process_impl.h>
38 
39 static kmem_cache_t *clock_timer_cache;
40 static clock_backend_t *clock_backend[CLOCK_MAX];
41 static int timer_port_callback(void *, int *, pid_t, int, void *);
42 static void timer_close_port(void *, int, pid_t, int);
43 
44 #define	CLOCK_BACKEND(clk) \
45 	((clk) < CLOCK_MAX && (clk) >= 0 ? clock_backend[(clk)] : NULL)
46 
47 /*
48  * Tunable to increase the maximum number of POSIX timers per-process.  This
49  * may _only_ be tuned in /etc/system or by patching the kernel binary; it
50  * _cannot_ be tuned on a running system.
51  */
52 int timer_max = _TIMER_MAX;
53 
54 /*
55  * timer_lock() locks the specified interval timer.  It doesn't look at the
56  * ITLK_REMOVE bit; it's up to callers to look at this if they need to
57  * care.  p_lock must be held on entry; it may be dropped and reaquired,
58  * but timer_lock() will always return with p_lock held.
59  *
60  * Note that timer_create() doesn't call timer_lock(); it creates timers
61  * with the ITLK_LOCKED bit explictly set.
62  */
63 static void
64 timer_lock(proc_t *p, itimer_t *it)
65 {
66 	ASSERT(MUTEX_HELD(&p->p_lock));
67 
68 	while (it->it_lock & ITLK_LOCKED) {
69 		it->it_blockers++;
70 		cv_wait(&it->it_cv, &p->p_lock);
71 		it->it_blockers--;
72 	}
73 
74 	it->it_lock |= ITLK_LOCKED;
75 }
76 
77 /*
78  * timer_unlock() unlocks the specified interval timer, waking up any
79  * waiters.  p_lock must be held on entry; it will not be dropped by
80  * timer_unlock().
81  */
82 static void
83 timer_unlock(proc_t *p, itimer_t *it)
84 {
85 	ASSERT(MUTEX_HELD(&p->p_lock));
86 	ASSERT(it->it_lock & ITLK_LOCKED);
87 	it->it_lock &= ~ITLK_LOCKED;
88 	cv_signal(&it->it_cv);
89 }
90 
91 /*
92  * timer_delete_locked() takes a proc pointer, timer ID and locked interval
93  * timer, and deletes the specified timer.  It must be called with p_lock
94  * held, and cannot be called on a timer which already has ITLK_REMOVE set;
95  * the caller must check this.  timer_delete_locked() will set the ITLK_REMOVE
96  * bit and will iteratively unlock and lock the interval timer until all
97  * blockers have seen the ITLK_REMOVE and cleared out.  It will then zero
98  * out the specified entry in the p_itimer array, and call into the clock
99  * backend to complete the deletion.
100  *
101  * This function will always return with p_lock held.
102  */
103 static void
104 timer_delete_locked(proc_t *p, timer_t tid, itimer_t *it)
105 {
106 	ASSERT(MUTEX_HELD(&p->p_lock));
107 	ASSERT(!(it->it_lock & ITLK_REMOVE));
108 	ASSERT(it->it_lock & ITLK_LOCKED);
109 
110 	it->it_lock |= ITLK_REMOVE;
111 
112 	/*
113 	 * If there are threads waiting to lock this timer, we'll unlock
114 	 * the timer, and block on the cv.  Threads blocking our removal will
115 	 * have the opportunity to run; when they see the ITLK_REMOVE flag
116 	 * set, they will immediately unlock the timer.
117 	 */
118 	while (it->it_blockers) {
119 		timer_unlock(p, it);
120 		cv_wait(&it->it_cv, &p->p_lock);
121 		timer_lock(p, it);
122 	}
123 
124 	ASSERT(p->p_itimer[tid] == it);
125 	p->p_itimer[tid] = NULL;
126 
127 	/*
128 	 * No one is blocked on this timer, and no one will be (we've set
129 	 * p_itimer[tid] to be NULL; no one can find it).  Now we call into
130 	 * the clock backend to delete the timer; it is up to the backend to
131 	 * guarantee that timer_fire() has completed (and will never again
132 	 * be called) for this timer.
133 	 */
134 	mutex_exit(&p->p_lock);
135 
136 	it->it_backend->clk_timer_delete(it);
137 
138 	if (it->it_portev) {
139 		mutex_enter(&it->it_mutex);
140 		if (it->it_portev) {
141 			port_kevent_t	*pev;
142 			/* dissociate timer from the event port */
143 			(void) port_dissociate_ksource(it->it_portfd,
144 			    PORT_SOURCE_TIMER, (port_source_t *)it->it_portsrc);
145 			pev = (port_kevent_t *)it->it_portev;
146 			it->it_portev = NULL;
147 			it->it_flags &= ~IT_PORT;
148 			it->it_pending = 0;
149 			mutex_exit(&it->it_mutex);
150 			(void) port_remove_done_event(pev);
151 			port_free_event(pev);
152 		} else {
153 			mutex_exit(&it->it_mutex);
154 		}
155 	}
156 
157 	mutex_enter(&p->p_lock);
158 
159 	/*
160 	 * We need to be careful freeing the sigqueue for this timer;
161 	 * if a signal is pending, the sigqueue needs to be freed
162 	 * synchronously in siginfofree().  The need to free the sigqueue
163 	 * in siginfofree() is indicated by setting sq_func to NULL.
164 	 */
165 	if (it->it_pending > 0) {
166 		it->it_sigq->sq_func = NULL;
167 	} else {
168 		kmem_free(it->it_sigq, sizeof (sigqueue_t));
169 	}
170 
171 	ASSERT(it->it_blockers == 0);
172 	kmem_cache_free(clock_timer_cache, it);
173 }
174 
175 /*
176  * timer_grab() and its companion routine, timer_release(), are wrappers
177  * around timer_lock()/_unlock() which allow the timer_*(3R) routines to
178  * (a) share error handling code and (b) not grab p_lock themselves.  Routines
179  * which are called with p_lock held (e.g. timer_lwpbind(), timer_lwpexit())
180  * must call timer_lock()/_unlock() explictly.
181  *
182  * timer_grab() takes a proc and a timer ID, and returns a pointer to a
183  * locked interval timer.  p_lock must _not_ be held on entry; timer_grab()
184  * may acquire p_lock, but will always return with p_lock dropped.
185  *
186  * If timer_grab() fails, it will return NULL.  timer_grab() will fail if
187  * one or more of the following is true:
188  *
189  *  (a)	The specified timer ID is out of range.
190  *
191  *  (b)	The specified timer ID does not correspond to a timer ID returned
192  *	from timer_create(3R).
193  *
194  *  (c)	The specified timer ID is currently being removed.
195  *
196  */
197 static itimer_t *
198 timer_grab(proc_t *p, timer_t tid)
199 {
200 	itimer_t **itp, *it;
201 
202 	if (tid >= timer_max || tid < 0)
203 		return (NULL);
204 
205 	mutex_enter(&p->p_lock);
206 
207 	if ((itp = p->p_itimer) == NULL || (it = itp[tid]) == NULL) {
208 		mutex_exit(&p->p_lock);
209 		return (NULL);
210 	}
211 
212 	timer_lock(p, it);
213 
214 	if (it->it_lock & ITLK_REMOVE) {
215 		/*
216 		 * Someone is removing this timer; it will soon be invalid.
217 		 */
218 		timer_unlock(p, it);
219 		mutex_exit(&p->p_lock);
220 		return (NULL);
221 	}
222 
223 	mutex_exit(&p->p_lock);
224 
225 	return (it);
226 }
227 
228 /*
229  * timer_release() releases a timer acquired with timer_grab().  p_lock
230  * should not be held on entry; timer_release() will acquire p_lock but
231  * will drop it before returning.
232  */
233 static void
234 timer_release(proc_t *p, itimer_t *it)
235 {
236 	mutex_enter(&p->p_lock);
237 	timer_unlock(p, it);
238 	mutex_exit(&p->p_lock);
239 }
240 
241 /*
242  * timer_delete_grabbed() deletes a timer acquired with timer_grab().
243  * p_lock should not be held on entry; timer_delete_grabbed() will acquire
244  * p_lock, but will drop it before returning.
245  */
246 static void
247 timer_delete_grabbed(proc_t *p, timer_t tid, itimer_t *it)
248 {
249 	mutex_enter(&p->p_lock);
250 	timer_delete_locked(p, tid, it);
251 	mutex_exit(&p->p_lock);
252 }
253 
254 void
255 clock_timer_init()
256 {
257 	clock_timer_cache = kmem_cache_create("timer_cache",
258 	    sizeof (itimer_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
259 }
260 
261 void
262 clock_add_backend(clockid_t clock, clock_backend_t *backend)
263 {
264 	ASSERT(clock >= 0 && clock < CLOCK_MAX);
265 	ASSERT(clock_backend[clock] == NULL);
266 
267 	clock_backend[clock] = backend;
268 }
269 
270 int
271 clock_settime(clockid_t clock, timespec_t *tp)
272 {
273 	timespec_t t;
274 	clock_backend_t *backend;
275 	int error;
276 
277 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
278 		return (set_errno(EINVAL));
279 
280 	if (secpolicy_settime(CRED()) != 0)
281 		return (set_errno(EPERM));
282 
283 	if (get_udatamodel() == DATAMODEL_NATIVE) {
284 		if (copyin(tp, &t, sizeof (timespec_t)) != 0)
285 			return (set_errno(EFAULT));
286 	} else {
287 		timespec32_t t32;
288 
289 		if (copyin(tp, &t32, sizeof (timespec32_t)) != 0)
290 			return (set_errno(EFAULT));
291 
292 		TIMESPEC32_TO_TIMESPEC(&t, &t32);
293 	}
294 
295 	if (itimerspecfix(&t))
296 		return (set_errno(EINVAL));
297 
298 	error = backend->clk_clock_settime(&t);
299 
300 	if (error)
301 		return (set_errno(error));
302 
303 	return (0);
304 }
305 
306 int
307 clock_gettime(clockid_t clock, timespec_t *tp)
308 {
309 	timespec_t t;
310 	clock_backend_t *backend;
311 	int error;
312 
313 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
314 		return (set_errno(EINVAL));
315 
316 	error = backend->clk_clock_gettime(&t);
317 
318 	if (error)
319 		return (set_errno(error));
320 
321 	if (get_udatamodel() == DATAMODEL_NATIVE) {
322 		if (copyout(&t, tp, sizeof (timespec_t)) != 0)
323 			return (set_errno(EFAULT));
324 	} else {
325 		timespec32_t t32;
326 
327 		if (TIMESPEC_OVERFLOW(&t))
328 			return (set_errno(EOVERFLOW));
329 		TIMESPEC_TO_TIMESPEC32(&t32, &t);
330 
331 		if (copyout(&t32, tp, sizeof (timespec32_t)) != 0)
332 			return (set_errno(EFAULT));
333 	}
334 
335 	return (0);
336 }
337 
338 int
339 clock_getres(clockid_t clock, timespec_t *tp)
340 {
341 	timespec_t t;
342 	clock_backend_t *backend;
343 	int error;
344 
345 	/*
346 	 * Strangely, the standard defines clock_getres() with a NULL tp
347 	 * to do nothing (regardless of the validity of the specified
348 	 * clock_id).  Go figure.
349 	 */
350 	if (tp == NULL)
351 		return (0);
352 
353 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
354 		return (set_errno(EINVAL));
355 
356 	error = backend->clk_clock_getres(&t);
357 
358 	if (error)
359 		return (set_errno(error));
360 
361 	if (get_udatamodel() == DATAMODEL_NATIVE) {
362 		if (copyout(&t, tp, sizeof (timespec_t)) != 0)
363 			return (set_errno(EFAULT));
364 	} else {
365 		timespec32_t t32;
366 
367 		if (TIMESPEC_OVERFLOW(&t))
368 			return (set_errno(EOVERFLOW));
369 		TIMESPEC_TO_TIMESPEC32(&t32, &t);
370 
371 		if (copyout(&t32, tp, sizeof (timespec32_t)) != 0)
372 			return (set_errno(EFAULT));
373 	}
374 
375 	return (0);
376 }
377 
378 void
379 timer_signal(sigqueue_t *sigq)
380 {
381 	itimer_t *it = (itimer_t *)sigq->sq_backptr;
382 
383 	/*
384 	 * There are some conditions during a fork or an exit when we can
385 	 * call siginfofree() without p_lock held.  To prevent a race
386 	 * between timer_signal() and timer_fire() with regard to it_pending,
387 	 * we therefore acquire it_mutex in both paths.
388 	 */
389 	mutex_enter(&it->it_mutex);
390 	ASSERT(it->it_pending > 0);
391 	it->it_overrun = it->it_pending - 1;
392 	it->it_pending = 0;
393 	mutex_exit(&it->it_mutex);
394 }
395 
396 /*
397  * This routine is called from the clock backend.
398  */
399 void
400 timer_fire(itimer_t *it)
401 {
402 	proc_t *p;
403 	int proc_lock_held;
404 
405 	if (it->it_flags & IT_SIGNAL) {
406 		/*
407 		 * See the comment in timer_signal() for why it is not
408 		 * sufficient to only grab p_lock here. Because p_lock can be
409 		 * held on entry to timer_signal(), the lock ordering is
410 		 * necessarily p_lock before it_mutex.
411 		 */
412 
413 		p = it->it_proc;
414 		proc_lock_held = 1;
415 		mutex_enter(&p->p_lock);
416 	} else {
417 		/*
418 		 * IT_PORT:
419 		 * If a timer was ever programmed to send events to a port,
420 		 * the IT_PORT flag will remain set until:
421 		 * a) the timer is deleted (see timer_delete_locked()) or
422 		 * b) the port is being closed (see timer_close_port()).
423 		 * Both cases are synchronized with the it_mutex.
424 		 * We don't need to use the p_lock because it is only
425 		 * required in the IT_SIGNAL case.
426 		 * If IT_PORT was set and the port is being closed then
427 		 * the timer notification is set to NONE. In such a case
428 		 * the timer itself and the it_pending counter remain active
429 		 * until the application deletes the counter or the process
430 		 * exits.
431 		 */
432 		proc_lock_held = 0;
433 	}
434 	mutex_enter(&it->it_mutex);
435 
436 	if (it->it_pending > 0) {
437 		if (it->it_pending < INT_MAX)
438 			it->it_pending++;
439 		mutex_exit(&it->it_mutex);
440 	} else {
441 		if (it->it_flags & IT_PORT) {
442 			it->it_pending = 1;
443 			port_send_event((port_kevent_t *)it->it_portev);
444 			mutex_exit(&it->it_mutex);
445 		} else if (it->it_flags & IT_SIGNAL) {
446 			it->it_pending = 1;
447 			mutex_exit(&it->it_mutex);
448 			sigaddqa(p, NULL, it->it_sigq);
449 		} else {
450 			mutex_exit(&it->it_mutex);
451 		}
452 	}
453 
454 	if (proc_lock_held)
455 		mutex_exit(&p->p_lock);
456 }
457 
458 int
459 timer_create(clockid_t clock, struct sigevent *evp, timer_t *tid)
460 {
461 	struct sigevent ev;
462 	proc_t *p = curproc;
463 	clock_backend_t *backend;
464 	itimer_t *it, **itp;
465 	sigqueue_t *sigq;
466 	cred_t *cr = CRED();
467 	int error = 0;
468 	timer_t i;
469 	port_notify_t tim_pnevp;
470 	port_kevent_t *pkevp = NULL;
471 
472 	if ((backend = CLOCK_BACKEND(clock)) == NULL)
473 		return (set_errno(EINVAL));
474 
475 	if (evp != NULL) {
476 		/*
477 		 * short copyin() for binary compatibility
478 		 * fetch oldsigevent to determine how much to copy in.
479 		 */
480 		if (get_udatamodel() == DATAMODEL_NATIVE) {
481 			if (copyin(evp, &ev, sizeof (struct oldsigevent)))
482 				return (set_errno(EFAULT));
483 
484 			if (ev.sigev_notify == SIGEV_PORT ||
485 			    ev.sigev_notify == SIGEV_THREAD) {
486 				if (copyin(ev.sigev_value.sival_ptr, &tim_pnevp,
487 				    sizeof (port_notify_t)))
488 					return (set_errno(EFAULT));
489 			}
490 #ifdef	_SYSCALL32_IMPL
491 		} else {
492 			struct sigevent32 ev32;
493 			port_notify32_t tim_pnevp32;
494 
495 			if (copyin(evp, &ev32, sizeof (struct oldsigevent32)))
496 				return (set_errno(EFAULT));
497 			ev.sigev_notify = ev32.sigev_notify;
498 			ev.sigev_signo = ev32.sigev_signo;
499 			/*
500 			 * See comment in sigqueue32() on handling of 32-bit
501 			 * sigvals in a 64-bit kernel.
502 			 */
503 			ev.sigev_value.sival_int = ev32.sigev_value.sival_int;
504 			if (ev.sigev_notify == SIGEV_PORT ||
505 			    ev.sigev_notify == SIGEV_THREAD) {
506 				if (copyin((void *)(uintptr_t)
507 				    ev32.sigev_value.sival_ptr,
508 				    (void *)&tim_pnevp32,
509 				    sizeof (port_notify32_t)))
510 					return (set_errno(EFAULT));
511 				tim_pnevp.portnfy_port =
512 				    tim_pnevp32.portnfy_port;
513 				tim_pnevp.portnfy_user =
514 				    (void *)(uintptr_t)tim_pnevp32.portnfy_user;
515 			}
516 #endif
517 		}
518 		switch (ev.sigev_notify) {
519 		case SIGEV_NONE:
520 			break;
521 		case SIGEV_SIGNAL:
522 			if (ev.sigev_signo < 1 || ev.sigev_signo >= NSIG)
523 				return (set_errno(EINVAL));
524 			break;
525 		case SIGEV_THREAD:
526 		case SIGEV_PORT:
527 			break;
528 		default:
529 			return (set_errno(EINVAL));
530 		}
531 	} else {
532 		/*
533 		 * Use the clock's default sigevent (this is a structure copy).
534 		 */
535 		ev = backend->clk_default;
536 	}
537 
538 	/*
539 	 * We'll allocate our timer and sigqueue now, before we grab p_lock.
540 	 * If we can't find an empty slot, we'll free them before returning.
541 	 */
542 	it = kmem_cache_alloc(clock_timer_cache, KM_SLEEP);
543 	bzero(it, sizeof (itimer_t));
544 	mutex_init(&it->it_mutex, NULL, MUTEX_DEFAULT, NULL);
545 	sigq = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
546 
547 	mutex_enter(&p->p_lock);
548 
549 	/*
550 	 * If this is this process' first timer, we need to attempt to allocate
551 	 * an array of timerstr_t pointers.  We drop p_lock to perform the
552 	 * allocation; if we return to discover that p_itimer is non-NULL,
553 	 * we will free our allocation and drive on.
554 	 */
555 	if ((itp = p->p_itimer) == NULL) {
556 		mutex_exit(&p->p_lock);
557 		itp = kmem_zalloc(timer_max * sizeof (itimer_t *), KM_SLEEP);
558 		mutex_enter(&p->p_lock);
559 
560 		if (p->p_itimer == NULL)
561 			p->p_itimer = itp;
562 		else {
563 			kmem_free(itp, timer_max * sizeof (itimer_t *));
564 			itp = p->p_itimer;
565 		}
566 	}
567 
568 	for (i = 0; i < timer_max && itp[i] != NULL; i++)
569 		continue;
570 
571 	if (i == timer_max) {
572 		/*
573 		 * We couldn't find a slot.  Drop p_lock, free the preallocated
574 		 * timer and sigqueue, and return an error.
575 		 */
576 		mutex_exit(&p->p_lock);
577 		kmem_cache_free(clock_timer_cache, it);
578 		kmem_free(sigq, sizeof (sigqueue_t));
579 
580 		return (set_errno(EAGAIN));
581 	}
582 
583 	ASSERT(i < timer_max && itp[i] == NULL);
584 
585 	/*
586 	 * If we develop other notification mechanisms, this will need
587 	 * to call into (yet another) backend.
588 	 */
589 	sigq->sq_info.si_signo = ev.sigev_signo;
590 	if (evp == NULL)
591 		sigq->sq_info.si_value.sival_int = i;
592 	else
593 		sigq->sq_info.si_value = ev.sigev_value;
594 	sigq->sq_info.si_code = SI_TIMER;
595 	sigq->sq_info.si_pid = p->p_pid;
596 	sigq->sq_info.si_ctid = PRCTID(p);
597 	sigq->sq_info.si_zoneid = getzoneid();
598 	sigq->sq_info.si_uid = crgetruid(cr);
599 	sigq->sq_func = timer_signal;
600 	sigq->sq_next = NULL;
601 	sigq->sq_backptr = it;
602 	it->it_sigq = sigq;
603 	it->it_backend = backend;
604 	it->it_lock = ITLK_LOCKED;
605 	itp[i] = it;
606 
607 
608 	if (ev.sigev_notify == SIGEV_THREAD ||
609 	    ev.sigev_notify == SIGEV_PORT) {
610 		int port;
611 
612 		/*
613 		 * This timer is programmed to use event port notification when
614 		 * the timer fires:
615 		 * - allocate a port event structure and prepare it to be sent
616 		 *   to the port as soon as the timer fires.
617 		 * - when the timer fires :
618 		 *   - if event structure was already sent to the port then this
619 		 *	is a timer fire overflow => increment overflow counter.
620 		 *   - otherwise send pre-allocated event structure to the port.
621 		 * - the events field of the port_event_t structure counts the
622 		 *   number of timer fired events.
623 		 * - The event structured is allocated using the
624 		 *   PORT_ALLOC_CACHED flag.
625 		 *   This flag indicates that the timer itself will manage and
626 		 *   free the event structure when required.
627 		 */
628 
629 		it->it_flags |= IT_PORT;
630 		port = tim_pnevp.portnfy_port;
631 
632 		/* associate timer as event source with the port */
633 		error = port_associate_ksource(port, PORT_SOURCE_TIMER,
634 		    (port_source_t **)&it->it_portsrc, timer_close_port,
635 		    (void *)it, NULL);
636 		if (error) {
637 			itp[i] = NULL;		/* clear slot */
638 			mutex_exit(&p->p_lock);
639 			kmem_cache_free(clock_timer_cache, it);
640 			kmem_free(sigq, sizeof (sigqueue_t));
641 			return (set_errno(error));
642 		}
643 
644 		/* allocate an event structure/slot */
645 		error = port_alloc_event(port, PORT_ALLOC_SCACHED,
646 		    PORT_SOURCE_TIMER, &pkevp);
647 		if (error) {
648 			(void) port_dissociate_ksource(port, PORT_SOURCE_TIMER,
649 			    (port_source_t *)it->it_portsrc);
650 			itp[i] = NULL;		/* clear slot */
651 			mutex_exit(&p->p_lock);
652 			kmem_cache_free(clock_timer_cache, it);
653 			kmem_free(sigq, sizeof (sigqueue_t));
654 			return (set_errno(error));
655 		}
656 
657 		/* initialize event data */
658 		port_init_event(pkevp, i, tim_pnevp.portnfy_user,
659 		    timer_port_callback, it);
660 		it->it_portev = pkevp;
661 		it->it_portfd = port;
662 	} else {
663 		if (ev.sigev_notify == SIGEV_SIGNAL)
664 			it->it_flags |= IT_SIGNAL;
665 	}
666 
667 	mutex_exit(&p->p_lock);
668 
669 	/*
670 	 * Call on the backend to verify the event argument (or return
671 	 * EINVAL if this clock type does not support timers).
672 	 */
673 	if ((error = backend->clk_timer_create(it, &ev)) != 0)
674 		goto err;
675 
676 	it->it_lwp = ttolwp(curthread);
677 	it->it_proc = p;
678 
679 	if (copyout(&i, tid, sizeof (timer_t)) != 0) {
680 		error = EFAULT;
681 		goto err;
682 	}
683 
684 	/*
685 	 * If we're here, then we have successfully created the timer; we
686 	 * just need to release the timer and return.
687 	 */
688 	timer_release(p, it);
689 
690 	return (0);
691 
692 err:
693 	/*
694 	 * If we're here, an error has occurred late in the timer creation
695 	 * process.  We need to regrab p_lock, and delete the incipient timer.
696 	 * Since we never unlocked the timer (it was born locked), it's
697 	 * impossible for a removal to be pending.
698 	 */
699 	ASSERT(!(it->it_lock & ITLK_REMOVE));
700 	timer_delete_grabbed(p, i, it);
701 
702 	return (set_errno(error));
703 }
704 
705 int
706 timer_gettime(timer_t tid, itimerspec_t *val)
707 {
708 	proc_t *p = curproc;
709 	itimer_t *it;
710 	itimerspec_t when;
711 	int error;
712 
713 	if ((it = timer_grab(p, tid)) == NULL)
714 		return (set_errno(EINVAL));
715 
716 	error = it->it_backend->clk_timer_gettime(it, &when);
717 
718 	timer_release(p, it);
719 
720 	if (error == 0) {
721 		if (get_udatamodel() == DATAMODEL_NATIVE) {
722 			if (copyout(&when, val, sizeof (itimerspec_t)))
723 				error = EFAULT;
724 		} else {
725 			if (ITIMERSPEC_OVERFLOW(&when))
726 				error = EOVERFLOW;
727 			else {
728 				itimerspec32_t w32;
729 
730 				ITIMERSPEC_TO_ITIMERSPEC32(&w32, &when)
731 				if (copyout(&w32, val, sizeof (itimerspec32_t)))
732 					error = EFAULT;
733 			}
734 		}
735 	}
736 
737 	return (error ? set_errno(error) : 0);
738 }
739 
740 int
741 timer_settime(timer_t tid, int flags, itimerspec_t *val, itimerspec_t *oval)
742 {
743 	itimerspec_t when;
744 	timespec_t res;
745 	itimer_t *it;
746 	proc_t *p = curproc;
747 	int error;
748 
749 	if (oval != NULL) {
750 		if ((error = timer_gettime(tid, oval)) != 0)
751 			return (error);
752 	}
753 
754 	if (get_udatamodel() == DATAMODEL_NATIVE) {
755 		if (copyin(val, &when, sizeof (itimerspec_t)))
756 			return (set_errno(EFAULT));
757 	} else {
758 		itimerspec32_t w32;
759 
760 		if (copyin(val, &w32, sizeof (itimerspec32_t)))
761 			return (set_errno(EFAULT));
762 
763 		ITIMERSPEC32_TO_ITIMERSPEC(&when, &w32);
764 	}
765 
766 	if (itimerspecfix(&when.it_value) ||
767 	    (itimerspecfix(&when.it_interval) &&
768 	    timerspecisset(&when.it_value))) {
769 		return (set_errno(EINVAL));
770 	}
771 
772 	if ((it = timer_grab(p, tid)) == NULL)
773 		return (set_errno(EINVAL));
774 
775 	/*
776 	 * From the man page:
777 	 *	Time values that are between two consecutive non-negative
778 	 *	integer multiples of the resolution of the specified timer
779 	 *	shall be rounded up to the larger multiple of the resolution.
780 	 * We assume that the resolution of any clock is less than one second.
781 	 */
782 	if (it->it_backend->clk_clock_getres(&res) == 0 && res.tv_nsec > 1) {
783 		long rem;
784 
785 		if ((rem = when.it_interval.tv_nsec % res.tv_nsec) != 0) {
786 			when.it_interval.tv_nsec += res.tv_nsec - rem;
787 			timespecfix(&when.it_interval);
788 		}
789 		if ((rem = when.it_value.tv_nsec % res.tv_nsec) != 0) {
790 			when.it_value.tv_nsec += res.tv_nsec - rem;
791 			timespecfix(&when.it_value);
792 		}
793 	}
794 	error = it->it_backend->clk_timer_settime(it, flags, &when);
795 
796 	timer_release(p, it);
797 
798 	return (error ? set_errno(error) : 0);
799 }
800 
801 int
802 timer_delete(timer_t tid)
803 {
804 	proc_t *p = curproc;
805 	itimer_t *it;
806 
807 	if ((it = timer_grab(p, tid)) == NULL)
808 		return (set_errno(EINVAL));
809 
810 	timer_delete_grabbed(p, tid, it);
811 
812 	return (0);
813 }
814 
815 int
816 timer_getoverrun(timer_t tid)
817 {
818 	int overrun;
819 	proc_t *p = curproc;
820 	itimer_t *it;
821 
822 	if ((it = timer_grab(p, tid)) == NULL)
823 		return (set_errno(EINVAL));
824 
825 	/*
826 	 * The it_overrun field is protected by p_lock; we need to acquire
827 	 * it before looking at the value.
828 	 */
829 	mutex_enter(&p->p_lock);
830 	overrun = it->it_overrun;
831 	mutex_exit(&p->p_lock);
832 
833 	timer_release(p, it);
834 
835 	return (overrun);
836 }
837 
838 /*
839  * Entered/exited with p_lock held, but will repeatedly drop and regrab p_lock.
840  */
841 void
842 timer_lwpexit(void)
843 {
844 	timer_t i;
845 	proc_t *p = curproc;
846 	klwp_t *lwp = ttolwp(curthread);
847 	itimer_t *it, **itp;
848 
849 	ASSERT(MUTEX_HELD(&p->p_lock));
850 
851 	if ((itp = p->p_itimer) == NULL)
852 		return;
853 
854 	for (i = 0; i < timer_max; i++) {
855 		if ((it = itp[i]) == NULL)
856 			continue;
857 
858 		timer_lock(p, it);
859 
860 		if ((it->it_lock & ITLK_REMOVE) || it->it_lwp != lwp) {
861 			/*
862 			 * This timer is either being removed or it isn't
863 			 * associated with this lwp.
864 			 */
865 			timer_unlock(p, it);
866 			continue;
867 		}
868 
869 		/*
870 		 * The LWP that created this timer is going away.  To the user,
871 		 * our behavior here is explicitly undefined.  We will simply
872 		 * null out the it_lwp field; if the LWP was bound to a CPU,
873 		 * the cyclic will stay bound to that CPU until the process
874 		 * exits.
875 		 */
876 		it->it_lwp = NULL;
877 		timer_unlock(p, it);
878 	}
879 }
880 
881 /*
882  * Called to notify of an LWP binding change.  Entered/exited with p_lock
883  * held, but will repeatedly drop and regrab p_lock.
884  */
885 void
886 timer_lwpbind()
887 {
888 	timer_t i;
889 	proc_t *p = curproc;
890 	klwp_t *lwp = ttolwp(curthread);
891 	itimer_t *it, **itp;
892 
893 	ASSERT(MUTEX_HELD(&p->p_lock));
894 
895 	if ((itp = p->p_itimer) == NULL)
896 		return;
897 
898 	for (i = 0; i < timer_max; i++) {
899 		if ((it = itp[i]) == NULL)
900 			continue;
901 
902 		timer_lock(p, it);
903 
904 		if (!(it->it_lock & ITLK_REMOVE) && it->it_lwp == lwp) {
905 			/*
906 			 * Drop p_lock and jump into the backend.
907 			 */
908 			mutex_exit(&p->p_lock);
909 			it->it_backend->clk_timer_lwpbind(it);
910 			mutex_enter(&p->p_lock);
911 		}
912 
913 		timer_unlock(p, it);
914 	}
915 }
916 
917 /*
918  * This function should only be called if p_itimer is non-NULL.
919  */
920 void
921 timer_exit(void)
922 {
923 	timer_t i;
924 	proc_t *p = curproc;
925 
926 	ASSERT(p->p_itimer != NULL);
927 
928 	for (i = 0; i < timer_max; i++)
929 		(void) timer_delete(i);
930 
931 	kmem_free(p->p_itimer, timer_max * sizeof (itimer_t *));
932 	p->p_itimer = NULL;
933 }
934 
935 /*
936  * timer_port_callback() is a callback function which is associated with the
937  * timer event and is activated just before the event is delivered to the user.
938  * The timer uses this function to update/set the overflow counter and
939  * to reenable the use of the event structure.
940  */
941 
942 /* ARGSUSED */
943 static int
944 timer_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
945 {
946 	itimer_t	*it = arg;
947 
948 	mutex_enter(&it->it_mutex);
949 	if (curproc != it->it_proc) {
950 		/* can not deliver timer events to another proc */
951 		mutex_exit(&it->it_mutex);
952 		return (EACCES);
953 	}
954 	*events = it->it_pending;	/* 1 = 1 event, >1 # of overflows */
955 	it->it_pending = 0;		/* reinit overflow counter	*/
956 	/*
957 	 * This function can also be activated when the port is being closed
958 	 * and a timer event is already submitted to the port.
959 	 * In such a case the event port framework will use the
960 	 * close-callback function to notify the events sources.
961 	 * The timer close-callback function is timer_close_port() which
962 	 * will free all allocated resources (including the allocated
963 	 * port event structure).
964 	 * For that reason we don't need to check the value of flag here.
965 	 */
966 	mutex_exit(&it->it_mutex);
967 	return (0);
968 }
969 
970 /*
971  * port is being closed ... free all allocated port event structures
972  * The delivered arg currently correspond to the first timer associated with
973  * the port and it is not useable in this case.
974  * We have to scan the list of activated timers in the current proc and
975  * compare them with the delivered port id.
976  */
977 
978 /* ARGSUSED */
979 static void
980 timer_close_port(void *arg, int port, pid_t pid, int lastclose)
981 {
982 	proc_t		*p = curproc;
983 	timer_t		tid;
984 	itimer_t	*it;
985 
986 	for (tid = 0; tid < timer_max; tid++) {
987 		if ((it = timer_grab(p, tid)) == NULL)
988 			continue;
989 		if (it->it_portev) {
990 			mutex_enter(&it->it_mutex);
991 			if (it->it_portfd == port) {
992 				port_kevent_t *pev;
993 				pev = (port_kevent_t *)it->it_portev;
994 				it->it_portev = NULL;
995 				it->it_flags &= ~IT_PORT;
996 				mutex_exit(&it->it_mutex);
997 				(void) port_remove_done_event(pev);
998 				port_free_event(pev);
999 			} else {
1000 				mutex_exit(&it->it_mutex);
1001 			}
1002 		}
1003 		timer_release(p, it);
1004 	}
1005 }
1006