xref: /illumos-gate/usr/src/uts/common/os/clock_highres.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/timer.h>
30 #include <sys/systm.h>
31 #include <sys/param.h>
32 #include <sys/kmem.h>
33 #include <sys/debug.h>
34 #include <sys/cyclic.h>
35 #include <sys/cmn_err.h>
36 #include <sys/pset.h>
37 #include <sys/atomic.h>
38 #include <sys/policy.h>
39 
40 static clock_backend_t clock_highres;
41 
42 /*ARGSUSED*/
43 static int
44 clock_highres_settime(timespec_t *ts)
45 {
46 	return (EINVAL);
47 }
48 
49 static int
50 clock_highres_gettime(timespec_t *ts)
51 {
52 	hrt2ts(gethrtime(), (timestruc_t *)ts);
53 
54 	return (0);
55 }
56 
57 static int
58 clock_highres_getres(timespec_t *ts)
59 {
60 	hrt2ts(cyclic_getres(), (timestruc_t *)ts);
61 
62 	return (0);
63 }
64 
65 /*ARGSUSED*/
66 static int
67 clock_highres_timer_create(itimer_t *it, struct sigevent *ev)
68 {
69 	/*
70 	 * CLOCK_HIGHRES timers of sufficiently high resolution can deny
71 	 * service; only allow privileged users to create such timers.
72 	 * Sites that do not wish to have this restriction should
73 	 * give users the "proc_clock_highres" privilege.
74 	 */
75 	if (secpolicy_clock_highres(CRED()) != 0) {
76 		it->it_arg = NULL;
77 		return (EPERM);
78 	}
79 
80 	it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP);
81 
82 	return (0);
83 }
84 
85 static void
86 clock_highres_fire(void *arg)
87 {
88 	itimer_t *it = (itimer_t *)arg;
89 	hrtime_t *addr = &it->it_hrtime;
90 	hrtime_t old = *addr, new = gethrtime();
91 
92 	do {
93 		old = *addr;
94 	} while (cas64((uint64_t *)addr, old, new) != old);
95 
96 	timer_fire(it);
97 }
98 
99 static int
100 clock_highres_timer_settime(itimer_t *it, int flags,
101 	const struct itimerspec *when)
102 {
103 	cyclic_id_t cyc, *cycp = it->it_arg;
104 	proc_t *p = curproc;
105 	kthread_t *t = curthread;
106 	cyc_time_t cyctime;
107 	cyc_handler_t hdlr;
108 	cpu_t *cpu;
109 	cpupart_t *cpupart;
110 	int pset;
111 
112 	cyctime.cyt_when = ts2hrt(&when->it_value);
113 	cyctime.cyt_interval = ts2hrt(&when->it_interval);
114 
115 	mutex_enter(&cpu_lock);
116 	if ((cyc = *cycp) != CYCLIC_NONE) {
117 		cyclic_remove(cyc);
118 		*cycp = CYCLIC_NONE;
119 	}
120 
121 	if (cyctime.cyt_when == 0) {
122 		mutex_exit(&cpu_lock);
123 		return (0);
124 	}
125 
126 	if (!(flags & TIMER_ABSTIME))
127 		cyctime.cyt_when += gethrtime();
128 
129 	/*
130 	 * Now we will check for overflow (that is, we will check to see
131 	 * that the start time plus the interval time doesn't exceed
132 	 * INT64_MAX).  The astute code reviewer will observe that this
133 	 * one-time check doesn't guarantee that a future expiration
134 	 * will not wrap.  We wish to prove, then, that if a future
135 	 * expiration does wrap, the earliest the problem can be encountered
136 	 * is (INT64_MAX / 2) nanoseconds (191 years) after boot.  Formally:
137 	 *
138 	 *  Given:	s + i < m	s > 0	i > 0
139 	 *		s + ni > m	n > 1
140 	 *
141 	 *    (where "s" is the start time, "i" is the interval, "n" is the
142 	 *    number of times the cyclic has fired and "m" is INT64_MAX)
143 	 *
144 	 *  Prove:
145 	 *		(a)  s + (n - 1)i > (m / 2)
146 	 *		(b)  s + (n - 1)i < m
147 	 *
148 	 * That is, prove that we must have fired at least once 191 years
149 	 * after boot.  The proof is very straightforward; since the left
150 	 * side of (a) is minimized when i is small, it is sufficient to show
151 	 * that the statement is true for i's smallest possible value
152 	 * (((m - s) / n) + epsilon).  The same goes for (b); showing that the
153 	 * statement is true for i's largest possible value (m - s + epsilon)
154 	 * is sufficient to prove the statement.
155 	 *
156 	 * The actual arithmetic manipulation is left up to reader.
157 	 */
158 	if (cyctime.cyt_when > INT64_MAX - cyctime.cyt_interval) {
159 		mutex_exit(&cpu_lock);
160 		return (EOVERFLOW);
161 	}
162 
163 	if (cyctime.cyt_interval == 0) {
164 		/*
165 		 * If this is a one-shot, then we set the interval to assure
166 		 * that the cyclic will next fire INT64_MAX nanoseconds after
167 		 * boot (which corresponds to over 292 years -- yes, Buck Rogers
168 		 * may have his 292-year-uptime-Solaris box malfunction).  If
169 		 * this timer is never touched, this cyclic will simply
170 		 * consume space in the cyclic subsystem.  As soon as
171 		 * timer_settime() or timer_delete() is called, the cyclic is
172 		 * removed (so it's not possible to run the machine out
173 		 * of resources by creating one-shots).
174 		 */
175 		cyctime.cyt_interval = INT64_MAX - cyctime.cyt_when;
176 	}
177 
178 	it->it_itime = *when;
179 
180 	hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
181 
182 	hdlr.cyh_func = (cyc_func_t)clock_highres_fire;
183 	hdlr.cyh_arg = it;
184 	hdlr.cyh_level = CY_LOW_LEVEL;
185 
186 	if (cyctime.cyt_when != 0)
187 		*cycp = cyc = cyclic_add(&hdlr, &cyctime);
188 	else
189 		*cycp = cyc = CYCLIC_NONE;
190 
191 	/*
192 	 * Now that we have the cyclic created, we need to bind it to our
193 	 * bound CPU and processor set (if any).
194 	 */
195 	mutex_enter(&p->p_lock);
196 	cpu = t->t_bound_cpu;
197 	cpupart = t->t_cpupart;
198 	pset = t->t_bind_pset;
199 
200 	mutex_exit(&p->p_lock);
201 
202 	cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
203 
204 	mutex_exit(&cpu_lock);
205 
206 	return (0);
207 }
208 
209 static int
210 clock_highres_timer_gettime(itimer_t *it, struct itimerspec *when)
211 {
212 	/*
213 	 * CLOCK_HIGHRES doesn't update it_itime.
214 	 */
215 	hrtime_t start = ts2hrt(&it->it_itime.it_value);
216 	hrtime_t interval = ts2hrt(&it->it_itime.it_interval);
217 	hrtime_t diff, now = gethrtime();
218 	hrtime_t *addr = &it->it_hrtime;
219 	hrtime_t last;
220 
221 	/*
222 	 * We're using cas64() here only to assure that we slurp the entire
223 	 * timestamp atomically.
224 	 */
225 	last = cas64((uint64_t *)addr, 0, 0);
226 
227 	*when = it->it_itime;
228 
229 	if (!timerspecisset(&when->it_value))
230 		return (0);
231 
232 	if (start > now) {
233 		/*
234 		 * We haven't gone off yet...
235 		 */
236 		diff = start - now;
237 	} else {
238 		if (interval == 0) {
239 			/*
240 			 * This is a one-shot which should have already
241 			 * fired; set it_value to 0.
242 			 */
243 			timerspecclear(&when->it_value);
244 			return (0);
245 		}
246 
247 		/*
248 		 * Calculate how far we are into this interval.
249 		 */
250 		diff = (now - start) % interval;
251 
252 		/*
253 		 * Now check to see if we've dealt with the last interval
254 		 * yet.
255 		 */
256 		if (now - diff > last) {
257 			/*
258 			 * The last interval hasn't fired; set it_value to 0.
259 			 */
260 			timerspecclear(&when->it_value);
261 			return (0);
262 		}
263 
264 		/*
265 		 * The last interval _has_ fired; we can return the amount
266 		 * of time left in this interval.
267 		 */
268 		diff = interval - diff;
269 	}
270 
271 	hrt2ts(diff, &when->it_value);
272 
273 	return (0);
274 }
275 
276 static int
277 clock_highres_timer_delete(itimer_t *it)
278 {
279 	cyclic_id_t cyc;
280 
281 	if (it->it_arg == NULL) {
282 		/*
283 		 * This timer was never fully created; we must have failed
284 		 * in the clock_highres_timer_create() routine.
285 		 */
286 		return (0);
287 	}
288 
289 	mutex_enter(&cpu_lock);
290 
291 	if ((cyc = *((cyclic_id_t *)it->it_arg)) != CYCLIC_NONE)
292 		cyclic_remove(cyc);
293 
294 	mutex_exit(&cpu_lock);
295 
296 	kmem_free(it->it_arg, sizeof (cyclic_id_t));
297 
298 	return (0);
299 }
300 
301 static void
302 clock_highres_timer_lwpbind(itimer_t *it)
303 {
304 	proc_t *p = curproc;
305 	kthread_t *t = curthread;
306 	cyclic_id_t cyc = *((cyclic_id_t *)it->it_arg);
307 	cpu_t *cpu;
308 	cpupart_t *cpupart;
309 	int pset;
310 
311 	if (cyc == CYCLIC_NONE)
312 		return;
313 
314 	mutex_enter(&cpu_lock);
315 	mutex_enter(&p->p_lock);
316 
317 	/*
318 	 * Okay, now we can safely look at the bindings.
319 	 */
320 	cpu = t->t_bound_cpu;
321 	cpupart = t->t_cpupart;
322 	pset = t->t_bind_pset;
323 
324 	/*
325 	 * Now we drop p_lock.  We haven't dropped cpu_lock; we're guaranteed
326 	 * that even if the bindings change, the CPU and/or processor set
327 	 * that this timer was bound to remain valid (and the combination
328 	 * remains self-consistent).
329 	 */
330 	mutex_exit(&p->p_lock);
331 
332 	cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
333 
334 	mutex_exit(&cpu_lock);
335 }
336 
337 void
338 clock_highres_init()
339 {
340 	clock_backend_t *be = &clock_highres;
341 	struct sigevent *ev = &be->clk_default;
342 
343 	ev->sigev_signo = SIGALRM;
344 	ev->sigev_notify = SIGEV_SIGNAL;
345 	ev->sigev_value.sival_ptr = NULL;
346 
347 	be->clk_clock_settime = clock_highres_settime;
348 	be->clk_clock_gettime = clock_highres_gettime;
349 	be->clk_clock_getres = clock_highres_getres;
350 	be->clk_timer_create = clock_highres_timer_create;
351 	be->clk_timer_gettime = clock_highres_timer_gettime;
352 	be->clk_timer_settime = clock_highres_timer_settime;
353 	be->clk_timer_delete = clock_highres_timer_delete;
354 	be->clk_timer_lwpbind = clock_highres_timer_lwpbind;
355 
356 	clock_add_backend(CLOCK_HIGHRES, &clock_highres);
357 }
358