xref: /freebsd/sys/kern/sys_timerfd.c (revision 2e3f49888ec8851bafb22011533217487764fdb0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
5  * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/callout.h>
32 #include <sys/fcntl.h>
33 #include <sys/file.h>
34 #include <sys/filedesc.h>
35 #include <sys/filio.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mount.h>
40 #include <sys/mutex.h>
41 #include <sys/poll.h>
42 #include <sys/proc.h>
43 #include <sys/queue.h>
44 #include <sys/selinfo.h>
45 #include <sys/stat.h>
46 #include <sys/sx.h>
47 #include <sys/sysctl.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 #include <sys/timerfd.h>
51 #include <sys/timespec.h>
52 #include <sys/uio.h>
53 #include <sys/user.h>
54 
55 #include <security/audit/audit.h>
56 
57 static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures");
58 
59 static struct mtx timerfd_list_lock;
60 static LIST_HEAD(, timerfd) timerfd_list;
61 MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF);
62 
63 static struct unrhdr64 tfdino_unr;
64 
65 #define	TFD_NOJUMP	0	/* Realtime clock has not jumped. */
66 #define	TFD_READ	1	/* Jumped, tfd has been read since. */
67 #define	TFD_ZREAD	2	/* Jumped backwards, CANCEL_ON_SET=false. */
68 #define	TFD_CANCELED	4	/* Jumped, CANCEL_ON_SET=true. */
69 #define	TFD_JUMPED	(TFD_ZREAD | TFD_CANCELED)
70 
71 /*
72  * One structure allocated per timerfd descriptor.
73  *
74  * Locking semantics:
75  * (t)	locked by tfd_lock mtx
76  * (l)	locked by timerfd_list_lock sx
77  * (c)	const until freeing
78  */
79 struct timerfd {
80 	/* User specified. */
81 	struct itimerspec tfd_time;	/* (t) tfd timer */
82 	clockid_t	tfd_clockid;	/* (c) timing base */
83 	int		tfd_flags;	/* (c) creation flags */
84 	int		tfd_timflags;	/* (t) timer flags */
85 
86 	/* Used internally. */
87 	timerfd_t	tfd_count;	/* (t) expiration count since read */
88 	bool		tfd_expired;	/* (t) true upon initial expiration */
89 	struct mtx	tfd_lock;	/* tfd mtx lock */
90 	struct callout	tfd_callout;	/* (t) expiration notification */
91 	struct selinfo	tfd_sel;	/* (t) I/O alerts */
92 	struct timespec	tfd_boottim;	/* (t) cached boottime */
93 	int		tfd_jumped;	/* (t) timer jump status */
94 	LIST_ENTRY(timerfd) entry;	/* (l) entry in list */
95 
96 	/* For stat(2). */
97 	ino_t		tfd_ino;	/* (c) inode number */
98 	struct timespec	tfd_atim;	/* (t) time of last read */
99 	struct timespec	tfd_mtim;	/* (t) time of last settime */
100 	struct timespec tfd_birthtim;	/* (c) creation time */
101 };
102 
103 static void
104 timerfd_init(void *data)
105 {
106 	new_unrhdr64(&tfdino_unr, 1);
107 }
108 
109 SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL);
110 
111 static inline void
112 timerfd_getboottime(struct timespec *ts)
113 {
114 	struct timeval tv;
115 
116 	getboottime(&tv);
117 	TIMEVAL_TO_TIMESPEC(&tv, ts);
118 }
119 
120 /*
121  * Call when a discontinuous jump has occured in CLOCK_REALTIME and
122  * update timerfd's cached boottime. A jump can be triggered using
123  * functions like clock_settime(2) or settimeofday(2).
124  *
125  * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set
126  * and the realtime clock jumps.
127  * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set,
128  * but the realtime clock jumps backwards.
129  */
130 void
131 timerfd_jumped(void)
132 {
133 	struct timerfd *tfd;
134 	struct timespec boottime, diff;
135 
136 	if (LIST_EMPTY(&timerfd_list))
137 		return;
138 
139 	timerfd_getboottime(&boottime);
140 	mtx_lock(&timerfd_list_lock);
141 	LIST_FOREACH(tfd, &timerfd_list, entry) {
142 		mtx_lock(&tfd->tfd_lock);
143 		if (tfd->tfd_clockid != CLOCK_REALTIME ||
144 		    (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 ||
145 		    timespeccmp(&boottime, &tfd->tfd_boottim, ==)) {
146 			mtx_unlock(&tfd->tfd_lock);
147 			continue;
148 		}
149 
150 		if (callout_active(&tfd->tfd_callout)) {
151 			if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0)
152 				tfd->tfd_jumped = TFD_CANCELED;
153 			else if (timespeccmp(&boottime, &tfd->tfd_boottim, <))
154 				tfd->tfd_jumped = TFD_ZREAD;
155 
156 			/*
157 			 * Do not reschedule callout when
158 			 * inside interval time loop.
159 			 */
160 			if (!tfd->tfd_expired) {
161 				timespecsub(&boottime,
162 				    &tfd->tfd_boottim, &diff);
163 				timespecsub(&tfd->tfd_time.it_value,
164 				    &diff, &tfd->tfd_time.it_value);
165 				if (callout_stop(&tfd->tfd_callout) == 1) {
166 					callout_schedule_sbt(&tfd->tfd_callout,
167 					    tstosbt(tfd->tfd_time.it_value),
168 					    0, C_ABSOLUTE);
169 				}
170 			}
171 		}
172 
173 		tfd->tfd_boottim = boottime;
174 		mtx_unlock(&tfd->tfd_lock);
175 	}
176 	mtx_unlock(&timerfd_list_lock);
177 }
178 
179 static int
180 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
181     int flags, struct thread *td)
182 {
183 	struct timerfd *tfd = fp->f_data;
184 	timerfd_t count;
185 	int error = 0;
186 
187 	if (uio->uio_resid < sizeof(timerfd_t))
188 		return (EINVAL);
189 
190 	mtx_lock(&tfd->tfd_lock);
191 retry:
192 	getnanotime(&tfd->tfd_atim);
193 	if ((tfd->tfd_jumped & TFD_JUMPED) != 0) {
194 		if (tfd->tfd_jumped == TFD_CANCELED)
195 			error = ECANCELED;
196 		tfd->tfd_jumped = TFD_READ;
197 		tfd->tfd_count = 0;
198 		mtx_unlock(&tfd->tfd_lock);
199 		return (error);
200 	} else {
201 		tfd->tfd_jumped = TFD_NOJUMP;
202 	}
203 	if (tfd->tfd_count == 0) {
204 		if ((fp->f_flag & FNONBLOCK) != 0) {
205 			mtx_unlock(&tfd->tfd_lock);
206 			return (EAGAIN);
207 		}
208 		td->td_rtcgen = atomic_load_acq_int(&rtc_generation);
209 		error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock,
210 		    PCATCH, "tfdrd", 0);
211 		if (error == 0) {
212 			goto retry;
213 		} else {
214 			mtx_unlock(&tfd->tfd_lock);
215 			return (error);
216 		}
217 	}
218 
219 	count = tfd->tfd_count;
220 	tfd->tfd_count = 0;
221 	mtx_unlock(&tfd->tfd_lock);
222 	error = uiomove(&count, sizeof(timerfd_t), uio);
223 
224 	return (error);
225 }
226 
227 static int
228 timerfd_ioctl(struct file *fp, u_long cmd, void *data,
229     struct ucred *active_cred, struct thread *td)
230 {
231 	switch (cmd) {
232 	case FIOASYNC:
233 		if (*(int *)data != 0)
234 			atomic_set_int(&fp->f_flag, FASYNC);
235 		else
236 			atomic_clear_int(&fp->f_flag, FASYNC);
237 		return (0);
238 	case FIONBIO:
239 		if (*(int *)data != 0)
240 			atomic_set_int(&fp->f_flag, FNONBLOCK);
241 		else
242 			atomic_clear_int(&fp->f_flag, FNONBLOCK);
243 		return (0);
244 	}
245 	return (ENOTTY);
246 }
247 
248 static int
249 timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
250     struct thread *td)
251 {
252 	struct timerfd *tfd = fp->f_data;
253 	int revents = 0;
254 
255 	mtx_lock(&tfd->tfd_lock);
256 	if ((events & (POLLIN | POLLRDNORM)) != 0 &&
257 	    tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ)
258 		revents |= events & (POLLIN | POLLRDNORM);
259 	if (revents == 0)
260 		selrecord(td, &tfd->tfd_sel);
261 	mtx_unlock(&tfd->tfd_lock);
262 
263 	return (revents);
264 }
265 
266 static void
267 filt_timerfddetach(struct knote *kn)
268 {
269 	struct timerfd *tfd = kn->kn_hook;
270 
271 	mtx_lock(&tfd->tfd_lock);
272 	knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
273 	mtx_unlock(&tfd->tfd_lock);
274 }
275 
276 static int
277 filt_timerfdread(struct knote *kn, long hint)
278 {
279 	struct timerfd *tfd = kn->kn_hook;
280 
281 	mtx_assert(&tfd->tfd_lock, MA_OWNED);
282 	kn->kn_data = (int64_t)tfd->tfd_count;
283 	return (tfd->tfd_count > 0);
284 }
285 
286 static struct filterops timerfd_rfiltops = {
287 	.f_isfd = 1,
288 	.f_detach = filt_timerfddetach,
289 	.f_event = filt_timerfdread,
290 };
291 
292 static int
293 timerfd_kqfilter(struct file *fp, struct knote *kn)
294 {
295 	struct timerfd *tfd = fp->f_data;
296 
297 	if (kn->kn_filter != EVFILT_READ)
298 		return (EINVAL);
299 
300 	kn->kn_fop = &timerfd_rfiltops;
301 	kn->kn_hook = tfd;
302 	knlist_add(&tfd->tfd_sel.si_note, kn, 0);
303 
304 	return (0);
305 }
306 
307 static int
308 timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
309 {
310 	struct timerfd *tfd = fp->f_data;
311 
312 	bzero(sb, sizeof(*sb));
313 	sb->st_nlink = fp->f_count - 1;
314 	sb->st_uid = fp->f_cred->cr_uid;
315 	sb->st_gid = fp->f_cred->cr_gid;
316 	sb->st_blksize = PAGE_SIZE;
317 	mtx_lock(&tfd->tfd_lock);
318 	sb->st_atim = tfd->tfd_atim;
319 	sb->st_mtim = tfd->tfd_mtim;
320 	mtx_unlock(&tfd->tfd_lock);
321 	sb->st_ctim = sb->st_mtim;
322 	sb->st_ino = tfd->tfd_ino;
323 	sb->st_birthtim = tfd->tfd_birthtim;
324 
325 	return (0);
326 }
327 
328 static int
329 timerfd_close(struct file *fp, struct thread *td)
330 {
331 	struct timerfd *tfd = fp->f_data;
332 
333 	mtx_lock(&timerfd_list_lock);
334 	LIST_REMOVE(tfd, entry);
335 	mtx_unlock(&timerfd_list_lock);
336 
337 	callout_drain(&tfd->tfd_callout);
338 	seldrain(&tfd->tfd_sel);
339 	knlist_destroy(&tfd->tfd_sel.si_note);
340 	mtx_destroy(&tfd->tfd_lock);
341 	free(tfd, M_TIMERFD);
342 	fp->f_ops = &badfileops;
343 
344 	return (0);
345 }
346 
347 static int
348 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif,
349     struct filedesc *fdp)
350 {
351 	struct timerfd *tfd = fp->f_data;
352 
353 	kif->kf_type = KF_TYPE_TIMERFD;
354 	kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid;
355 	kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags;
356 	kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd;
357 
358 	return (0);
359 }
360 
361 static struct fileops timerfdops = {
362 	.fo_read = timerfd_read,
363 	.fo_write = invfo_rdwr,
364 	.fo_truncate = invfo_truncate,
365 	.fo_ioctl = timerfd_ioctl,
366 	.fo_poll = timerfd_poll,
367 	.fo_kqfilter = timerfd_kqfilter,
368 	.fo_stat = timerfd_stat,
369 	.fo_close = timerfd_close,
370 	.fo_chmod = invfo_chmod,
371 	.fo_chown = invfo_chown,
372 	.fo_sendfile = invfo_sendfile,
373 	.fo_fill_kinfo = timerfd_fill_kinfo,
374 	.fo_cmp = file_kcmp_generic,
375 	.fo_flags = DFLAG_PASSABLE,
376 };
377 
378 static void
379 timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value)
380 {
381 	struct timespec curr_value;
382 
383 	mtx_assert(&tfd->tfd_lock, MA_OWNED);
384 	*old_value = tfd->tfd_time;
385 	if (timespecisset(&tfd->tfd_time.it_value)) {
386 		nanouptime(&curr_value);
387 		timespecsub(&tfd->tfd_time.it_value, &curr_value,
388 		    &old_value->it_value);
389 	}
390 }
391 
392 static void
393 timerfd_expire(void *arg)
394 {
395 	struct timerfd *tfd = (struct timerfd *)arg;
396 	struct timespec uptime;
397 
398 	++tfd->tfd_count;
399 	tfd->tfd_expired = true;
400 	if (timespecisset(&tfd->tfd_time.it_interval)) {
401 		/* Count missed events. */
402 		nanouptime(&uptime);
403 		if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) {
404 			timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime);
405 			tfd->tfd_count += tstosbt(uptime) /
406 			    tstosbt(tfd->tfd_time.it_interval);
407 		}
408 		timespecadd(&tfd->tfd_time.it_value,
409 		    &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value);
410 		callout_schedule_sbt(&tfd->tfd_callout,
411 		    tstosbt(tfd->tfd_time.it_value),
412 		    0, C_ABSOLUTE);
413 	} else {
414 		/* Single shot timer. */
415 		callout_deactivate(&tfd->tfd_callout);
416 		timespecclear(&tfd->tfd_time.it_value);
417 	}
418 
419 	wakeup(&tfd->tfd_count);
420 	selwakeup(&tfd->tfd_sel);
421 	KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
422 }
423 
424 int
425 kern_timerfd_create(struct thread *td, int clockid, int flags)
426 {
427 	struct file *fp;
428 	struct timerfd *tfd;
429 	int error, fd, fflags;
430 
431 	AUDIT_ARG_VALUE(clockid);
432 	AUDIT_ARG_FFLAGS(flags);
433 
434 	if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
435 		return (EINVAL);
436 	if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0)
437 		return (EINVAL);
438 
439 	fflags = FREAD;
440 	if ((flags & TFD_CLOEXEC) != 0)
441 		fflags |= O_CLOEXEC;
442 	if ((flags & TFD_NONBLOCK) != 0)
443 		fflags |= FNONBLOCK;
444 
445 	error = falloc(td, &fp, &fd, fflags);
446 	if (error != 0)
447 		return (error);
448 
449 	tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO);
450 	tfd->tfd_clockid = (clockid_t)clockid;
451 	tfd->tfd_flags = flags;
452 	tfd->tfd_ino = alloc_unr64(&tfdino_unr);
453 	mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
454 	callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
455 	knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
456 	timerfd_getboottime(&tfd->tfd_boottim);
457 	getnanotime(&tfd->tfd_birthtim);
458 	mtx_lock(&timerfd_list_lock);
459 	LIST_INSERT_HEAD(&timerfd_list, tfd, entry);
460 	mtx_unlock(&timerfd_list_lock);
461 
462 	finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops);
463 
464 	fdrop(fp, td);
465 
466 	td->td_retval[0] = fd;
467 	return (0);
468 }
469 
470 int
471 kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value)
472 {
473 	struct file *fp;
474 	struct timerfd *tfd;
475 	int error;
476 
477 	error = fget(td, fd, &cap_write_rights, &fp);
478 	if (error != 0)
479 		return (error);
480 	if (fp->f_type != DTYPE_TIMERFD) {
481 		fdrop(fp, td);
482 		return (EINVAL);
483 	}
484 	tfd = fp->f_data;
485 
486 	mtx_lock(&tfd->tfd_lock);
487 	timerfd_curval(tfd, curr_value);
488 	mtx_unlock(&tfd->tfd_lock);
489 
490 	fdrop(fp, td);
491 	return (0);
492 }
493 
494 int
495 kern_timerfd_settime(struct thread *td, int fd, int flags,
496     const struct itimerspec *new_value, struct itimerspec *old_value)
497 {
498 	struct file *fp;
499 	struct timerfd *tfd;
500 	struct timespec ts;
501 	int error = 0;
502 
503 	if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0)
504 		return (EINVAL);
505 	if (!timespecvalid_interval(&new_value->it_value) ||
506 	    !timespecvalid_interval(&new_value->it_interval))
507 		return (EINVAL);
508 
509 	error = fget(td, fd, &cap_write_rights, &fp);
510 	if (error != 0)
511 		return (error);
512 	if (fp->f_type != DTYPE_TIMERFD) {
513 		fdrop(fp, td);
514 		return (EINVAL);
515 	}
516 	tfd = fp->f_data;
517 
518 	mtx_lock(&tfd->tfd_lock);
519 	getnanotime(&tfd->tfd_mtim);
520 	tfd->tfd_timflags = flags;
521 
522 	/* Store old itimerspec, if applicable. */
523 	if (old_value != NULL)
524 		timerfd_curval(tfd, old_value);
525 
526 	/* Set new expiration. */
527 	tfd->tfd_time = *new_value;
528 	if (timespecisset(&tfd->tfd_time.it_value)) {
529 		if ((flags & TFD_TIMER_ABSTIME) == 0) {
530 			nanouptime(&ts);
531 			timespecadd(&tfd->tfd_time.it_value, &ts,
532 			    &tfd->tfd_time.it_value);
533 		} else if (tfd->tfd_clockid == CLOCK_REALTIME) {
534 			/* ECANCELED if unread jump is pending. */
535 			if (tfd->tfd_jumped == TFD_CANCELED)
536 				error = ECANCELED;
537 			/* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */
538 			timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim,
539 			    &tfd->tfd_time.it_value);
540 		}
541 		callout_reset_sbt(&tfd->tfd_callout,
542 		    tstosbt(tfd->tfd_time.it_value),
543 		    0, timerfd_expire, tfd, C_ABSOLUTE);
544 	} else {
545 		callout_stop(&tfd->tfd_callout);
546 	}
547 	tfd->tfd_count = 0;
548 	tfd->tfd_expired = false;
549 	tfd->tfd_jumped = TFD_NOJUMP;
550 	mtx_unlock(&tfd->tfd_lock);
551 
552 	fdrop(fp, td);
553 	return (error);
554 }
555 
556 int
557 sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap)
558 {
559 	return (kern_timerfd_create(td, uap->clockid, uap->flags));
560 }
561 
562 int
563 sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap)
564 {
565 	struct itimerspec curr_value;
566 	int error;
567 
568 	error = kern_timerfd_gettime(td, uap->fd, &curr_value);
569 	if (error == 0)
570 		error = copyout(&curr_value, uap->curr_value,
571 		    sizeof(curr_value));
572 
573 	return (error);
574 }
575 
576 int
577 sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap)
578 {
579 	struct itimerspec new_value, old_value;
580 	int error;
581 
582 	error = copyin(uap->new_value, &new_value, sizeof(new_value));
583 	if (error != 0)
584 		return (error);
585 	if (uap->old_value == NULL) {
586 		error = kern_timerfd_settime(td, uap->fd, uap->flags,
587 		    &new_value, NULL);
588 	} else {
589 		error = kern_timerfd_settime(td, uap->fd, uap->flags,
590 		    &new_value, &old_value);
591 		if (error == 0)
592 			error = copyout(&old_value, uap->old_value,
593 			    sizeof(old_value));
594 	}
595 	return (error);
596 }
597