1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright (c) 2012, Joyent Inc. All rights reserved.
29 */
30
31 #include <sys/timer.h>
32 #include <sys/systm.h>
33 #include <sys/param.h>
34 #include <sys/kmem.h>
35 #include <sys/debug.h>
36 #include <sys/cyclic.h>
37 #include <sys/cmn_err.h>
38 #include <sys/pset.h>
39 #include <sys/atomic.h>
40 #include <sys/policy.h>
41
42 static clock_backend_t clock_highres;
43
44 /*ARGSUSED*/
45 static int
clock_highres_settime(timespec_t * ts)46 clock_highres_settime(timespec_t *ts)
47 {
48 return (EINVAL);
49 }
50
51 static int
clock_highres_gettime(timespec_t * ts)52 clock_highres_gettime(timespec_t *ts)
53 {
54 hrt2ts(gethrtime(), (timestruc_t *)ts);
55
56 return (0);
57 }
58
59 static int
clock_highres_getres(timespec_t * ts)60 clock_highres_getres(timespec_t *ts)
61 {
62 hrt2ts(cyclic_getres(), (timestruc_t *)ts);
63
64 return (0);
65 }
66
67 /*ARGSUSED*/
68 static int
clock_highres_timer_create(itimer_t * it,struct sigevent * ev)69 clock_highres_timer_create(itimer_t *it, struct sigevent *ev)
70 {
71 /*
72 * CLOCK_HIGHRES timers of sufficiently high resolution can deny
73 * service; only allow privileged users to create such timers.
74 * Sites that do not wish to have this restriction should
75 * give users the "proc_clock_highres" privilege.
76 */
77 if (secpolicy_clock_highres(CRED()) != 0) {
78 it->it_arg = NULL;
79 return (EPERM);
80 }
81
82 it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP);
83
84 return (0);
85 }
86
87 static void
clock_highres_fire(void * arg)88 clock_highres_fire(void *arg)
89 {
90 itimer_t *it = (itimer_t *)arg;
91 hrtime_t *addr = &it->it_hrtime;
92 hrtime_t old = *addr, new = gethrtime();
93
94 do {
95 old = *addr;
96 } while (atomic_cas_64((uint64_t *)addr, old, new) != old);
97
98 timer_fire(it);
99 }
100
101 static int
clock_highres_timer_settime(itimer_t * it,int flags,const struct itimerspec * when)102 clock_highres_timer_settime(itimer_t *it, int flags,
103 const struct itimerspec *when)
104 {
105 cyclic_id_t cyc, *cycp = it->it_arg;
106 proc_t *p = curproc;
107 kthread_t *t = curthread;
108 cyc_time_t cyctime;
109 cyc_handler_t hdlr;
110 cpu_t *cpu;
111 cpupart_t *cpupart;
112 int pset;
113
114 cyctime.cyt_when = ts2hrt(&when->it_value);
115 cyctime.cyt_interval = ts2hrt(&when->it_interval);
116
117 if (cyctime.cyt_when != 0 && cyctime.cyt_interval == 0 &&
118 it->it_itime.it_interval.tv_sec == 0 &&
119 it->it_itime.it_interval.tv_nsec == 0 &&
120 (cyc = *cycp) != CYCLIC_NONE) {
121 /*
122 * If our existing timer is a one-shot and our new timer is a
123 * one-shot, we'll save ourselves a world of grief and just
124 * reprogram the cyclic.
125 */
126 it->it_itime = *when;
127
128 if (!(flags & TIMER_ABSTIME))
129 cyctime.cyt_when += gethrtime();
130
131 hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
132 (void) cyclic_reprogram(cyc, cyctime.cyt_when);
133 return (0);
134 }
135
136 mutex_enter(&cpu_lock);
137 if ((cyc = *cycp) != CYCLIC_NONE) {
138 cyclic_remove(cyc);
139 *cycp = CYCLIC_NONE;
140 }
141
142 if (cyctime.cyt_when == 0) {
143 mutex_exit(&cpu_lock);
144 return (0);
145 }
146
147 if (!(flags & TIMER_ABSTIME))
148 cyctime.cyt_when += gethrtime();
149
150 /*
151 * Now we will check for overflow (that is, we will check to see
152 * that the start time plus the interval time doesn't exceed
153 * INT64_MAX). The astute code reviewer will observe that this
154 * one-time check doesn't guarantee that a future expiration
155 * will not wrap. We wish to prove, then, that if a future
156 * expiration does wrap, the earliest the problem can be encountered
157 * is (INT64_MAX / 2) nanoseconds (191 years) after boot. Formally:
158 *
159 * Given: s + i < m s > 0 i > 0
160 * s + ni > m n > 1
161 *
162 * (where "s" is the start time, "i" is the interval, "n" is the
163 * number of times the cyclic has fired and "m" is INT64_MAX)
164 *
165 * Prove:
166 * (a) s + (n - 1)i > (m / 2)
167 * (b) s + (n - 1)i < m
168 *
169 * That is, prove that we must have fired at least once 191 years
170 * after boot. The proof is very straightforward; since the left
171 * side of (a) is minimized when i is small, it is sufficient to show
172 * that the statement is true for i's smallest possible value
173 * (((m - s) / n) + epsilon). The same goes for (b); showing that the
174 * statement is true for i's largest possible value (m - s + epsilon)
175 * is sufficient to prove the statement.
176 *
177 * The actual arithmetic manipulation is left up to reader.
178 */
179 if (cyctime.cyt_when > INT64_MAX - cyctime.cyt_interval) {
180 mutex_exit(&cpu_lock);
181 return (EOVERFLOW);
182 }
183
184 if (cyctime.cyt_interval == 0) {
185 /*
186 * If this is a one-shot, then we set the interval to be
187 * inifinite. If this timer is never touched, this cyclic will
188 * simply consume space in the cyclic subsystem. As soon as
189 * timer_settime() or timer_delete() is called, the cyclic is
190 * removed (so it's not possible to run the machine out
191 * of resources by creating one-shots).
192 */
193 cyctime.cyt_interval = CY_INFINITY;
194 }
195
196 it->it_itime = *when;
197
198 hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
199
200 hdlr.cyh_func = (cyc_func_t)clock_highres_fire;
201 hdlr.cyh_arg = it;
202 hdlr.cyh_level = CY_LOW_LEVEL;
203
204 if (cyctime.cyt_when != 0)
205 *cycp = cyc = cyclic_add(&hdlr, &cyctime);
206
207 /*
208 * Now that we have the cyclic created, we need to bind it to our
209 * bound CPU and processor set (if any).
210 */
211 mutex_enter(&p->p_lock);
212 cpu = t->t_bound_cpu;
213 cpupart = t->t_cpupart;
214 pset = t->t_bind_pset;
215
216 mutex_exit(&p->p_lock);
217
218 cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
219
220 mutex_exit(&cpu_lock);
221
222 return (0);
223 }
224
225 static int
clock_highres_timer_gettime(itimer_t * it,struct itimerspec * when)226 clock_highres_timer_gettime(itimer_t *it, struct itimerspec *when)
227 {
228 /*
229 * CLOCK_HIGHRES doesn't update it_itime.
230 */
231 hrtime_t start = ts2hrt(&it->it_itime.it_value);
232 hrtime_t interval = ts2hrt(&it->it_itime.it_interval);
233 hrtime_t diff, now = gethrtime();
234 hrtime_t *addr = &it->it_hrtime;
235 hrtime_t last;
236
237 /*
238 * We're using atomic_cas_64() here only to assure that we slurp the
239 * entire timestamp atomically.
240 */
241 last = atomic_cas_64((uint64_t *)addr, 0, 0);
242
243 *when = it->it_itime;
244
245 if (!timerspecisset(&when->it_value))
246 return (0);
247
248 if (start > now) {
249 /*
250 * We haven't gone off yet...
251 */
252 diff = start - now;
253 } else {
254 if (interval == 0) {
255 /*
256 * This is a one-shot which should have already
257 * fired; set it_value to 0.
258 */
259 timerspecclear(&when->it_value);
260 return (0);
261 }
262
263 /*
264 * Calculate how far we are into this interval.
265 */
266 diff = (now - start) % interval;
267
268 /*
269 * Now check to see if we've dealt with the last interval
270 * yet.
271 */
272 if (now - diff > last) {
273 /*
274 * The last interval hasn't fired; set it_value to 0.
275 */
276 timerspecclear(&when->it_value);
277 return (0);
278 }
279
280 /*
281 * The last interval _has_ fired; we can return the amount
282 * of time left in this interval.
283 */
284 diff = interval - diff;
285 }
286
287 hrt2ts(diff, &when->it_value);
288
289 return (0);
290 }
291
292 static int
clock_highres_timer_delete(itimer_t * it)293 clock_highres_timer_delete(itimer_t *it)
294 {
295 cyclic_id_t cyc;
296
297 if (it->it_arg == NULL) {
298 /*
299 * This timer was never fully created; we must have failed
300 * in the clock_highres_timer_create() routine.
301 */
302 return (0);
303 }
304
305 mutex_enter(&cpu_lock);
306
307 if ((cyc = *((cyclic_id_t *)it->it_arg)) != CYCLIC_NONE)
308 cyclic_remove(cyc);
309
310 mutex_exit(&cpu_lock);
311
312 kmem_free(it->it_arg, sizeof (cyclic_id_t));
313
314 return (0);
315 }
316
317 static void
clock_highres_timer_lwpbind(itimer_t * it)318 clock_highres_timer_lwpbind(itimer_t *it)
319 {
320 proc_t *p = curproc;
321 kthread_t *t = curthread;
322 cyclic_id_t cyc = *((cyclic_id_t *)it->it_arg);
323 cpu_t *cpu;
324 cpupart_t *cpupart;
325 int pset;
326
327 if (cyc == CYCLIC_NONE)
328 return;
329
330 mutex_enter(&cpu_lock);
331 mutex_enter(&p->p_lock);
332
333 /*
334 * Okay, now we can safely look at the bindings.
335 */
336 cpu = t->t_bound_cpu;
337 cpupart = t->t_cpupart;
338 pset = t->t_bind_pset;
339
340 /*
341 * Now we drop p_lock. We haven't dropped cpu_lock; we're guaranteed
342 * that even if the bindings change, the CPU and/or processor set
343 * that this timer was bound to remain valid (and the combination
344 * remains self-consistent).
345 */
346 mutex_exit(&p->p_lock);
347
348 cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
349
350 mutex_exit(&cpu_lock);
351 }
352
353 void
clock_highres_init()354 clock_highres_init()
355 {
356 clock_backend_t *be = &clock_highres;
357 struct sigevent *ev = &be->clk_default;
358
359 ev->sigev_signo = SIGALRM;
360 ev->sigev_notify = SIGEV_SIGNAL;
361 ev->sigev_value.sival_ptr = NULL;
362
363 be->clk_clock_settime = clock_highres_settime;
364 be->clk_clock_gettime = clock_highres_gettime;
365 be->clk_clock_getres = clock_highres_getres;
366 be->clk_timer_create = clock_highres_timer_create;
367 be->clk_timer_gettime = clock_highres_timer_gettime;
368 be->clk_timer_settime = clock_highres_timer_settime;
369 be->clk_timer_delete = clock_highres_timer_delete;
370 be->clk_timer_lwpbind = clock_highres_timer_lwpbind;
371
372 clock_add_backend(CLOCK_HIGHRES, &clock_highres);
373 }
374