xref: /illumos-gate/usr/src/uts/common/os/clock_highres.c (revision fd75ca8de430ee0ba5ce650efee0ac0b85ed43e9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2012, Joyent Inc. All rights reserved.
29  */
30 
31 #include <sys/timer.h>
32 #include <sys/systm.h>
33 #include <sys/param.h>
34 #include <sys/kmem.h>
35 #include <sys/debug.h>
36 #include <sys/cyclic.h>
37 #include <sys/cmn_err.h>
38 #include <sys/pset.h>
39 #include <sys/atomic.h>
40 #include <sys/policy.h>
41 
42 static clock_backend_t clock_highres;
43 
44 /*ARGSUSED*/
45 static int
46 clock_highres_settime(timespec_t *ts)
47 {
48 	return (EINVAL);
49 }
50 
51 static int
52 clock_highres_gettime(timespec_t *ts)
53 {
54 	hrt2ts(gethrtime(), (timestruc_t *)ts);
55 
56 	return (0);
57 }
58 
59 static int
60 clock_highres_getres(timespec_t *ts)
61 {
62 	hrt2ts(cyclic_getres(), (timestruc_t *)ts);
63 
64 	return (0);
65 }
66 
67 /*ARGSUSED*/
68 static int
69 clock_highres_timer_create(itimer_t *it, struct sigevent *ev)
70 {
71 	/*
72 	 * CLOCK_HIGHRES timers of sufficiently high resolution can deny
73 	 * service; only allow privileged users to create such timers.
74 	 * Sites that do not wish to have this restriction should
75 	 * give users the "proc_clock_highres" privilege.
76 	 */
77 	if (secpolicy_clock_highres(CRED()) != 0) {
78 		it->it_arg = NULL;
79 		return (EPERM);
80 	}
81 
82 	it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP);
83 
84 	return (0);
85 }
86 
87 static void
88 clock_highres_fire(void *arg)
89 {
90 	itimer_t *it = (itimer_t *)arg;
91 	hrtime_t *addr = &it->it_hrtime;
92 	hrtime_t old = *addr, new = gethrtime();
93 
94 	do {
95 		old = *addr;
96 	} while (atomic_cas_64((uint64_t *)addr, old, new) != old);
97 
98 	timer_fire(it);
99 }
100 
101 static int
102 clock_highres_timer_settime(itimer_t *it, int flags,
103 	const struct itimerspec *when)
104 {
105 	cyclic_id_t cyc, *cycp = it->it_arg;
106 	proc_t *p = curproc;
107 	kthread_t *t = curthread;
108 	cyc_time_t cyctime;
109 	cyc_handler_t hdlr;
110 	cpu_t *cpu;
111 	cpupart_t *cpupart;
112 	int pset;
113 
114 	cyctime.cyt_when = ts2hrt(&when->it_value);
115 	cyctime.cyt_interval = ts2hrt(&when->it_interval);
116 
117 	if (cyctime.cyt_when != 0 && cyctime.cyt_interval == 0 &&
118 	    it->it_itime.it_interval.tv_sec == 0 &&
119 	    it->it_itime.it_interval.tv_nsec == 0 &&
120 	    (cyc = *cycp) != CYCLIC_NONE) {
121 		/*
122 		 * If our existing timer is a one-shot and our new timer is a
123 		 * one-shot, we'll save ourselves a world of grief and just
124 		 * reprogram the cyclic.
125 		 */
126 		it->it_itime = *when;
127 
128 		if (!(flags & TIMER_ABSTIME))
129 			cyctime.cyt_when += gethrtime();
130 
131 		hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
132 		(void) cyclic_reprogram(cyc, cyctime.cyt_when);
133 		return (0);
134 	}
135 
136 	mutex_enter(&cpu_lock);
137 	if ((cyc = *cycp) != CYCLIC_NONE) {
138 		cyclic_remove(cyc);
139 		*cycp = CYCLIC_NONE;
140 	}
141 
142 	if (cyctime.cyt_when == 0) {
143 		mutex_exit(&cpu_lock);
144 		return (0);
145 	}
146 
147 	if (!(flags & TIMER_ABSTIME))
148 		cyctime.cyt_when += gethrtime();
149 
150 	/*
151 	 * Now we will check for overflow (that is, we will check to see
152 	 * that the start time plus the interval time doesn't exceed
153 	 * INT64_MAX).  The astute code reviewer will observe that this
154 	 * one-time check doesn't guarantee that a future expiration
155 	 * will not wrap.  We wish to prove, then, that if a future
156 	 * expiration does wrap, the earliest the problem can be encountered
157 	 * is (INT64_MAX / 2) nanoseconds (191 years) after boot.  Formally:
158 	 *
159 	 *  Given:	s + i < m	s > 0	i > 0
160 	 *		s + ni > m	n > 1
161 	 *
162 	 *    (where "s" is the start time, "i" is the interval, "n" is the
163 	 *    number of times the cyclic has fired and "m" is INT64_MAX)
164 	 *
165 	 *  Prove:
166 	 *		(a)  s + (n - 1)i > (m / 2)
167 	 *		(b)  s + (n - 1)i < m
168 	 *
169 	 * That is, prove that we must have fired at least once 191 years
170 	 * after boot.  The proof is very straightforward; since the left
171 	 * side of (a) is minimized when i is small, it is sufficient to show
172 	 * that the statement is true for i's smallest possible value
173 	 * (((m - s) / n) + epsilon).  The same goes for (b); showing that the
174 	 * statement is true for i's largest possible value (m - s + epsilon)
175 	 * is sufficient to prove the statement.
176 	 *
177 	 * The actual arithmetic manipulation is left up to reader.
178 	 */
179 	if (cyctime.cyt_when > INT64_MAX - cyctime.cyt_interval) {
180 		mutex_exit(&cpu_lock);
181 		return (EOVERFLOW);
182 	}
183 
184 	if (cyctime.cyt_interval == 0) {
185 		/*
186 		 * If this is a one-shot, then we set the interval to be
187 		 * inifinite.  If this timer is never touched, this cyclic will
188 		 * simply consume space in the cyclic subsystem.  As soon as
189 		 * timer_settime() or timer_delete() is called, the cyclic is
190 		 * removed (so it's not possible to run the machine out
191 		 * of resources by creating one-shots).
192 		 */
193 		cyctime.cyt_interval = CY_INFINITY;
194 	}
195 
196 	it->it_itime = *when;
197 
198 	hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
199 
200 	hdlr.cyh_func = (cyc_func_t)clock_highres_fire;
201 	hdlr.cyh_arg = it;
202 	hdlr.cyh_level = CY_LOW_LEVEL;
203 
204 	if (cyctime.cyt_when != 0)
205 		*cycp = cyc = cyclic_add(&hdlr, &cyctime);
206 
207 	/*
208 	 * Now that we have the cyclic created, we need to bind it to our
209 	 * bound CPU and processor set (if any).
210 	 */
211 	mutex_enter(&p->p_lock);
212 	cpu = t->t_bound_cpu;
213 	cpupart = t->t_cpupart;
214 	pset = t->t_bind_pset;
215 
216 	mutex_exit(&p->p_lock);
217 
218 	cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
219 
220 	mutex_exit(&cpu_lock);
221 
222 	return (0);
223 }
224 
225 static int
226 clock_highres_timer_gettime(itimer_t *it, struct itimerspec *when)
227 {
228 	/*
229 	 * CLOCK_HIGHRES doesn't update it_itime.
230 	 */
231 	hrtime_t start = ts2hrt(&it->it_itime.it_value);
232 	hrtime_t interval = ts2hrt(&it->it_itime.it_interval);
233 	hrtime_t diff, now = gethrtime();
234 	hrtime_t *addr = &it->it_hrtime;
235 	hrtime_t last;
236 
237 	/*
238 	 * We're using atomic_cas_64() here only to assure that we slurp the
239 	 * entire timestamp atomically.
240 	 */
241 	last = atomic_cas_64((uint64_t *)addr, 0, 0);
242 
243 	*when = it->it_itime;
244 
245 	if (!timerspecisset(&when->it_value))
246 		return (0);
247 
248 	if (start > now) {
249 		/*
250 		 * We haven't gone off yet...
251 		 */
252 		diff = start - now;
253 	} else {
254 		if (interval == 0) {
255 			/*
256 			 * This is a one-shot which should have already
257 			 * fired; set it_value to 0.
258 			 */
259 			timerspecclear(&when->it_value);
260 			return (0);
261 		}
262 
263 		/*
264 		 * Calculate how far we are into this interval.
265 		 */
266 		diff = (now - start) % interval;
267 
268 		/*
269 		 * Now check to see if we've dealt with the last interval
270 		 * yet.
271 		 */
272 		if (now - diff > last) {
273 			/*
274 			 * The last interval hasn't fired; set it_value to 0.
275 			 */
276 			timerspecclear(&when->it_value);
277 			return (0);
278 		}
279 
280 		/*
281 		 * The last interval _has_ fired; we can return the amount
282 		 * of time left in this interval.
283 		 */
284 		diff = interval - diff;
285 	}
286 
287 	hrt2ts(diff, &when->it_value);
288 
289 	return (0);
290 }
291 
292 static int
293 clock_highres_timer_delete(itimer_t *it)
294 {
295 	cyclic_id_t cyc;
296 
297 	if (it->it_arg == NULL) {
298 		/*
299 		 * This timer was never fully created; we must have failed
300 		 * in the clock_highres_timer_create() routine.
301 		 */
302 		return (0);
303 	}
304 
305 	mutex_enter(&cpu_lock);
306 
307 	if ((cyc = *((cyclic_id_t *)it->it_arg)) != CYCLIC_NONE)
308 		cyclic_remove(cyc);
309 
310 	mutex_exit(&cpu_lock);
311 
312 	kmem_free(it->it_arg, sizeof (cyclic_id_t));
313 
314 	return (0);
315 }
316 
317 static void
318 clock_highres_timer_lwpbind(itimer_t *it)
319 {
320 	proc_t *p = curproc;
321 	kthread_t *t = curthread;
322 	cyclic_id_t cyc = *((cyclic_id_t *)it->it_arg);
323 	cpu_t *cpu;
324 	cpupart_t *cpupart;
325 	int pset;
326 
327 	if (cyc == CYCLIC_NONE)
328 		return;
329 
330 	mutex_enter(&cpu_lock);
331 	mutex_enter(&p->p_lock);
332 
333 	/*
334 	 * Okay, now we can safely look at the bindings.
335 	 */
336 	cpu = t->t_bound_cpu;
337 	cpupart = t->t_cpupart;
338 	pset = t->t_bind_pset;
339 
340 	/*
341 	 * Now we drop p_lock.  We haven't dropped cpu_lock; we're guaranteed
342 	 * that even if the bindings change, the CPU and/or processor set
343 	 * that this timer was bound to remain valid (and the combination
344 	 * remains self-consistent).
345 	 */
346 	mutex_exit(&p->p_lock);
347 
348 	cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
349 
350 	mutex_exit(&cpu_lock);
351 }
352 
353 void
354 clock_highres_init()
355 {
356 	clock_backend_t *be = &clock_highres;
357 	struct sigevent *ev = &be->clk_default;
358 
359 	ev->sigev_signo = SIGALRM;
360 	ev->sigev_notify = SIGEV_SIGNAL;
361 	ev->sigev_value.sival_ptr = NULL;
362 
363 	be->clk_clock_settime = clock_highres_settime;
364 	be->clk_clock_gettime = clock_highres_gettime;
365 	be->clk_clock_getres = clock_highres_getres;
366 	be->clk_timer_create = clock_highres_timer_create;
367 	be->clk_timer_gettime = clock_highres_timer_gettime;
368 	be->clk_timer_settime = clock_highres_timer_settime;
369 	be->clk_timer_delete = clock_highres_timer_delete;
370 	be->clk_timer_lwpbind = clock_highres_timer_lwpbind;
371 
372 	clock_add_backend(CLOCK_HIGHRES, &clock_highres);
373 }
374