xref: /linux/lib/vdso/gettimeofday.c (revision 5e3c6a312a0946d2d83e32359612cbb925a8bed0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Generic userspace implementations of gettimeofday() and similar.
4  */
5 #include <linux/compiler.h>
6 #include <linux/math64.h>
7 #include <linux/time.h>
8 #include <linux/kernel.h>
9 #include <linux/hrtimer_defs.h>
10 #include <linux/clocksource.h>
11 #include <vdso/datapage.h>
12 #include <vdso/helpers.h>
13 
14 /*
15  * The generic vDSO implementation requires that gettimeofday.h
16  * provides:
17  * - __arch_get_vdso_data(): to get the vdso datapage.
18  * - __arch_get_hw_counter(): to get the hw counter based on the
19  *   clock_mode.
20  * - gettimeofday_fallback(): fallback for gettimeofday.
21  * - clock_gettime_fallback(): fallback for clock_gettime.
22  * - clock_getres_fallback(): fallback for clock_getres.
23  */
24 #ifdef ENABLE_COMPAT_VDSO
25 #include <asm/vdso/compat_gettimeofday.h>
26 #else
27 #include <asm/vdso/gettimeofday.h>
28 #endif /* ENABLE_COMPAT_VDSO */
29 
30 #ifndef vdso_calc_delta
31 /*
32  * Default implementation which works for all sane clocksources. That
33  * obviously excludes x86/TSC.
34  */
35 static __always_inline
36 u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
37 {
38 	return ((cycles - last) & mask) * mult;
39 }
40 #endif
41 
42 #ifndef __arch_vdso_hres_capable
43 static inline bool __arch_vdso_hres_capable(void)
44 {
45 	return true;
46 }
47 #endif
48 
49 #ifdef CONFIG_TIME_NS
50 static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
51 			  struct __kernel_timespec *ts)
52 {
53 	const struct vdso_data *vd = __arch_get_timens_vdso_data();
54 	const struct timens_offset *offs = &vdns->offset[clk];
55 	const struct vdso_timestamp *vdso_ts;
56 	u64 cycles, last, ns;
57 	u32 seq;
58 	s64 sec;
59 
60 	if (clk != CLOCK_MONOTONIC_RAW)
61 		vd = &vd[CS_HRES_COARSE];
62 	else
63 		vd = &vd[CS_RAW];
64 	vdso_ts = &vd->basetime[clk];
65 
66 	do {
67 		seq = vdso_read_begin(vd);
68 		if (IS_ENABLED(CONFIG_GENERIC_VDSO_CLOCK_MODE) &&
69 		    vd->clock_mode == VDSO_CLOCKMODE_NONE)
70 			return -1;
71 		cycles = __arch_get_hw_counter(vd->clock_mode);
72 		ns = vdso_ts->nsec;
73 		last = vd->cycle_last;
74 		if (!IS_ENABLED(CONFIG_GENERIC_VDSO_CLOCK_MODE) &&
75 		    unlikely((s64)cycles < 0))
76 			return -1;
77 
78 		ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
79 		ns >>= vd->shift;
80 		sec = vdso_ts->sec;
81 	} while (unlikely(vdso_read_retry(vd, seq)));
82 
83 	/* Add the namespace offset */
84 	sec += offs->sec;
85 	ns += offs->nsec;
86 
87 	/*
88 	 * Do this outside the loop: a race inside the loop could result
89 	 * in __iter_div_u64_rem() being extremely slow.
90 	 */
91 	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
92 	ts->tv_nsec = ns;
93 
94 	return 0;
95 }
96 #else
97 static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
98 {
99 	return NULL;
100 }
101 
102 static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
103 			  struct __kernel_timespec *ts)
104 {
105 	return -EINVAL;
106 }
107 #endif
108 
109 static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
110 				   struct __kernel_timespec *ts)
111 {
112 	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
113 	u64 cycles, last, sec, ns;
114 	u32 seq;
115 
116 	/* Allows to compile the high resolution parts out */
117 	if (!__arch_vdso_hres_capable())
118 		return -1;
119 
120 	do {
121 		/*
122 		 * Open coded to handle VCLOCK_TIMENS. Time namespace
123 		 * enabled tasks have a special VVAR page installed which
124 		 * has vd->seq set to 1 and vd->clock_mode set to
125 		 * VCLOCK_TIMENS. For non time namespace affected tasks
126 		 * this does not affect performance because if vd->seq is
127 		 * odd, i.e. a concurrent update is in progress the extra
128 		 * check for vd->clock_mode is just a few extra
129 		 * instructions while spin waiting for vd->seq to become
130 		 * even again.
131 		 */
132 		while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) {
133 			if (IS_ENABLED(CONFIG_TIME_NS) &&
134 			    vd->clock_mode == VCLOCK_TIMENS)
135 				return do_hres_timens(vd, clk, ts);
136 			cpu_relax();
137 		}
138 		smp_rmb();
139 
140 		if (IS_ENABLED(CONFIG_GENERIC_VDSO_CLOCK_MODE) &&
141 		    vd->clock_mode == VDSO_CLOCKMODE_NONE)
142 			return -1;
143 		cycles = __arch_get_hw_counter(vd->clock_mode);
144 		ns = vdso_ts->nsec;
145 		last = vd->cycle_last;
146 		if (!IS_ENABLED(CONFIG_GENERIC_VDSO_CLOCK_MODE) &&
147 		    unlikely((s64)cycles < 0))
148 			return -1;
149 
150 		ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
151 		ns >>= vd->shift;
152 		sec = vdso_ts->sec;
153 	} while (unlikely(vdso_read_retry(vd, seq)));
154 
155 	/*
156 	 * Do this outside the loop: a race inside the loop could result
157 	 * in __iter_div_u64_rem() being extremely slow.
158 	 */
159 	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
160 	ts->tv_nsec = ns;
161 
162 	return 0;
163 }
164 
165 #ifdef CONFIG_TIME_NS
166 static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
167 			    struct __kernel_timespec *ts)
168 {
169 	const struct vdso_data *vd = __arch_get_timens_vdso_data();
170 	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
171 	const struct timens_offset *offs = &vdns->offset[clk];
172 	u64 nsec;
173 	s64 sec;
174 	s32 seq;
175 
176 	do {
177 		seq = vdso_read_begin(vd);
178 		sec = vdso_ts->sec;
179 		nsec = vdso_ts->nsec;
180 	} while (unlikely(vdso_read_retry(vd, seq)));
181 
182 	/* Add the namespace offset */
183 	sec += offs->sec;
184 	nsec += offs->nsec;
185 
186 	/*
187 	 * Do this outside the loop: a race inside the loop could result
188 	 * in __iter_div_u64_rem() being extremely slow.
189 	 */
190 	ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec);
191 	ts->tv_nsec = nsec;
192 	return 0;
193 }
194 #else
195 static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
196 			    struct __kernel_timespec *ts)
197 {
198 	return -1;
199 }
200 #endif
201 
202 static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
203 				     struct __kernel_timespec *ts)
204 {
205 	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
206 	u32 seq;
207 
208 	do {
209 		/*
210 		 * Open coded to handle VCLOCK_TIMENS. See comment in
211 		 * do_hres().
212 		 */
213 		while ((seq = READ_ONCE(vd->seq)) & 1) {
214 			if (IS_ENABLED(CONFIG_TIME_NS) &&
215 			    vd->clock_mode == VCLOCK_TIMENS)
216 				return do_coarse_timens(vd, clk, ts);
217 			cpu_relax();
218 		}
219 		smp_rmb();
220 
221 		ts->tv_sec = vdso_ts->sec;
222 		ts->tv_nsec = vdso_ts->nsec;
223 	} while (unlikely(vdso_read_retry(vd, seq)));
224 
225 	return 0;
226 }
227 
228 static __maybe_unused int
229 __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
230 {
231 	const struct vdso_data *vd = __arch_get_vdso_data();
232 	u32 msk;
233 
234 	/* Check for negative values or invalid clocks */
235 	if (unlikely((u32) clock >= MAX_CLOCKS))
236 		return -1;
237 
238 	/*
239 	 * Convert the clockid to a bitmask and use it to check which
240 	 * clocks are handled in the VDSO directly.
241 	 */
242 	msk = 1U << clock;
243 	if (likely(msk & VDSO_HRES))
244 		vd = &vd[CS_HRES_COARSE];
245 	else if (msk & VDSO_COARSE)
246 		return do_coarse(&vd[CS_HRES_COARSE], clock, ts);
247 	else if (msk & VDSO_RAW)
248 		vd = &vd[CS_RAW];
249 	else
250 		return -1;
251 
252 	return do_hres(vd, clock, ts);
253 }
254 
255 static __maybe_unused int
256 __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
257 {
258 	int ret = __cvdso_clock_gettime_common(clock, ts);
259 
260 	if (unlikely(ret))
261 		return clock_gettime_fallback(clock, ts);
262 	return 0;
263 }
264 
265 #ifdef BUILD_VDSO32
266 static __maybe_unused int
267 __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
268 {
269 	struct __kernel_timespec ts;
270 	int ret;
271 
272 	ret = __cvdso_clock_gettime_common(clock, &ts);
273 
274 	if (unlikely(ret))
275 		return clock_gettime32_fallback(clock, res);
276 
277 	/* For ret == 0 */
278 	res->tv_sec = ts.tv_sec;
279 	res->tv_nsec = ts.tv_nsec;
280 
281 	return ret;
282 }
283 #endif /* BUILD_VDSO32 */
284 
285 static __maybe_unused int
286 __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
287 {
288 	const struct vdso_data *vd = __arch_get_vdso_data();
289 
290 	if (likely(tv != NULL)) {
291 		struct __kernel_timespec ts;
292 
293 		if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
294 			return gettimeofday_fallback(tv, tz);
295 
296 		tv->tv_sec = ts.tv_sec;
297 		tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC;
298 	}
299 
300 	if (unlikely(tz != NULL)) {
301 		if (IS_ENABLED(CONFIG_TIME_NS) &&
302 		    vd->clock_mode == VCLOCK_TIMENS)
303 			vd = __arch_get_timens_vdso_data();
304 
305 		tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
306 		tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
307 	}
308 
309 	return 0;
310 }
311 
312 #ifdef VDSO_HAS_TIME
313 static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time)
314 {
315 	const struct vdso_data *vd = __arch_get_vdso_data();
316 	__kernel_old_time_t t;
317 
318 	if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS)
319 		vd = __arch_get_timens_vdso_data();
320 
321 	t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
322 
323 	if (time)
324 		*time = t;
325 
326 	return t;
327 }
328 #endif /* VDSO_HAS_TIME */
329 
330 #ifdef VDSO_HAS_CLOCK_GETRES
331 static __maybe_unused
332 int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
333 {
334 	const struct vdso_data *vd = __arch_get_vdso_data();
335 	u32 msk;
336 	u64 ns;
337 
338 	/* Check for negative values or invalid clocks */
339 	if (unlikely((u32) clock >= MAX_CLOCKS))
340 		return -1;
341 
342 	if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS)
343 		vd = __arch_get_timens_vdso_data();
344 
345 	/*
346 	 * Convert the clockid to a bitmask and use it to check which
347 	 * clocks are handled in the VDSO directly.
348 	 */
349 	msk = 1U << clock;
350 	if (msk & (VDSO_HRES | VDSO_RAW)) {
351 		/*
352 		 * Preserves the behaviour of posix_get_hrtimer_res().
353 		 */
354 		ns = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
355 	} else if (msk & VDSO_COARSE) {
356 		/*
357 		 * Preserves the behaviour of posix_get_coarse_res().
358 		 */
359 		ns = LOW_RES_NSEC;
360 	} else {
361 		return -1;
362 	}
363 
364 	if (likely(res)) {
365 		res->tv_sec = 0;
366 		res->tv_nsec = ns;
367 	}
368 	return 0;
369 }
370 
371 static __maybe_unused
372 int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
373 {
374 	int ret = __cvdso_clock_getres_common(clock, res);
375 
376 	if (unlikely(ret))
377 		return clock_getres_fallback(clock, res);
378 	return 0;
379 }
380 
381 #ifdef BUILD_VDSO32
382 static __maybe_unused int
383 __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
384 {
385 	struct __kernel_timespec ts;
386 	int ret;
387 
388 	ret = __cvdso_clock_getres_common(clock, &ts);
389 
390 	if (unlikely(ret))
391 		return clock_getres32_fallback(clock, res);
392 
393 	if (likely(res)) {
394 		res->tv_sec = ts.tv_sec;
395 		res->tv_nsec = ts.tv_nsec;
396 	}
397 	return ret;
398 }
399 #endif /* BUILD_VDSO32 */
400 #endif /* VDSO_HAS_CLOCK_GETRES */
401