xref: /freebsd/sys/kern/sys_timerfd.c (revision f4296cfb409a48de00bfa60e76f686c2b031876f)
1af93fea7SJake Freeland /*-
2af93fea7SJake Freeland  * SPDX-License-Identifier: BSD-2-Clause
3af93fea7SJake Freeland  *
4af93fea7SJake Freeland  * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
5af93fea7SJake Freeland  * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org>
6af93fea7SJake Freeland  *
7af93fea7SJake Freeland  * Redistribution and use in source and binary forms, with or without
8af93fea7SJake Freeland  * modification, are permitted provided that the following conditions
9af93fea7SJake Freeland  * are met:
10af93fea7SJake Freeland  * 1. Redistributions of source code must retain the above copyright
11af93fea7SJake Freeland  *    notice, this list of conditions and the following disclaimer.
12af93fea7SJake Freeland  * 2. Redistributions in binary form must reproduce the above copyright
13af93fea7SJake Freeland  *    notice, this list of conditions and the following disclaimer in the
14af93fea7SJake Freeland  *    documentation and/or other materials provided with the distribution.
15af93fea7SJake Freeland  *
16af93fea7SJake Freeland  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17af93fea7SJake Freeland  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18af93fea7SJake Freeland  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19af93fea7SJake Freeland  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20af93fea7SJake Freeland  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21af93fea7SJake Freeland  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22af93fea7SJake Freeland  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23af93fea7SJake Freeland  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24af93fea7SJake Freeland  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25af93fea7SJake Freeland  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26af93fea7SJake Freeland  * SUCH DAMAGE.
27af93fea7SJake Freeland  */
28af93fea7SJake Freeland 
29af93fea7SJake Freeland #include <sys/param.h>
30af93fea7SJake Freeland #include <sys/systm.h>
31af93fea7SJake Freeland #include <sys/callout.h>
32af93fea7SJake Freeland #include <sys/fcntl.h>
33af93fea7SJake Freeland #include <sys/file.h>
34af93fea7SJake Freeland #include <sys/filedesc.h>
35af93fea7SJake Freeland #include <sys/filio.h>
36af93fea7SJake Freeland #include <sys/kernel.h>
37af93fea7SJake Freeland #include <sys/lock.h>
38af93fea7SJake Freeland #include <sys/malloc.h>
39af93fea7SJake Freeland #include <sys/mount.h>
40af93fea7SJake Freeland #include <sys/mutex.h>
41af93fea7SJake Freeland #include <sys/poll.h>
42af93fea7SJake Freeland #include <sys/proc.h>
43af93fea7SJake Freeland #include <sys/queue.h>
44af93fea7SJake Freeland #include <sys/selinfo.h>
45af93fea7SJake Freeland #include <sys/stat.h>
4602f534b5SMateusz Guzik #include <sys/sx.h>
47af93fea7SJake Freeland #include <sys/sysctl.h>
48af93fea7SJake Freeland #include <sys/sysent.h>
49af93fea7SJake Freeland #include <sys/sysproto.h>
50af93fea7SJake Freeland #include <sys/timerfd.h>
51af93fea7SJake Freeland #include <sys/timespec.h>
52af93fea7SJake Freeland #include <sys/uio.h>
53af93fea7SJake Freeland #include <sys/user.h>
54af93fea7SJake Freeland 
55af93fea7SJake Freeland #include <security/audit/audit.h>
56af93fea7SJake Freeland 
57af93fea7SJake Freeland #ifdef COMPAT_FREEBSD32
58af93fea7SJake Freeland #include <compat/freebsd32/freebsd32.h>
59af93fea7SJake Freeland #include <compat/freebsd32/freebsd32_proto.h>
60af93fea7SJake Freeland #endif
61af93fea7SJake Freeland 
62af93fea7SJake Freeland static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures");
6302f534b5SMateusz Guzik 
64*f4296cfbSMateusz Guzik static struct mtx timerfd_list_lock;
6502f534b5SMateusz Guzik static LIST_HEAD(, timerfd) timerfd_list;
66*f4296cfbSMateusz Guzik MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF);
6702f534b5SMateusz Guzik 
68af93fea7SJake Freeland static struct unrhdr64 tfdino_unr;
69af93fea7SJake Freeland 
70af93fea7SJake Freeland #define	TFD_NOJUMP	0	/* Realtime clock has not jumped. */
71af93fea7SJake Freeland #define	TFD_READ	1	/* Jumped, tfd has been read since. */
72af93fea7SJake Freeland #define	TFD_ZREAD	2	/* Jumped backwards, CANCEL_ON_SET=false. */
73af93fea7SJake Freeland #define	TFD_CANCELED	4	/* Jumped, CANCEL_ON_SET=true. */
74af93fea7SJake Freeland #define	TFD_JUMPED	(TFD_ZREAD | TFD_CANCELED)
75af93fea7SJake Freeland 
76af93fea7SJake Freeland struct timerfd {
77af93fea7SJake Freeland 	/* User specified. */
78af93fea7SJake Freeland 	struct itimerspec tfd_time;	/* tfd timer */
79af93fea7SJake Freeland 	clockid_t	tfd_clockid;	/* timing base */
80af93fea7SJake Freeland 	int		tfd_flags;	/* creation flags */
81af93fea7SJake Freeland 	int		tfd_timflags;	/* timer flags */
82af93fea7SJake Freeland 
83af93fea7SJake Freeland 	/* Used internally. */
84af93fea7SJake Freeland 	timerfd_t	tfd_count;	/* expiration count since last read */
85af93fea7SJake Freeland 	bool		tfd_expired;	/* true upon initial expiration */
86af93fea7SJake Freeland 	struct mtx	tfd_lock;	/* mtx lock */
87af93fea7SJake Freeland 	struct callout	tfd_callout;	/* expiration notification */
88af93fea7SJake Freeland 	struct selinfo	tfd_sel;	/* I/O alerts */
89af93fea7SJake Freeland 	struct timespec	tfd_boottim;	/* cached boottime */
90af93fea7SJake Freeland 	int		tfd_jumped;	/* timer jump status */
91af93fea7SJake Freeland 	LIST_ENTRY(timerfd) entry;	/* entry in list */
92af93fea7SJake Freeland 
93af93fea7SJake Freeland 	/* For stat(2). */
94af93fea7SJake Freeland 	ino_t		tfd_ino;	/* inode number */
95af93fea7SJake Freeland 	struct timespec	tfd_atim;	/* time of last read */
96af93fea7SJake Freeland 	struct timespec	tfd_mtim;	/* time of last settime */
97af93fea7SJake Freeland 	struct timespec tfd_birthtim;	/* creation time */
98af93fea7SJake Freeland };
99af93fea7SJake Freeland 
100af93fea7SJake Freeland static void
101af93fea7SJake Freeland timerfd_init(void *data)
102af93fea7SJake Freeland {
103af93fea7SJake Freeland 	new_unrhdr64(&tfdino_unr, 1);
104af93fea7SJake Freeland }
105af93fea7SJake Freeland 
106af93fea7SJake Freeland SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL);
107af93fea7SJake Freeland 
108af93fea7SJake Freeland static inline void
109af93fea7SJake Freeland timerfd_getboottime(struct timespec *ts)
110af93fea7SJake Freeland {
111af93fea7SJake Freeland 	struct timeval tv;
112af93fea7SJake Freeland 	getboottime(&tv);
113af93fea7SJake Freeland 	TIMEVAL_TO_TIMESPEC(&tv, ts);
114af93fea7SJake Freeland }
115af93fea7SJake Freeland 
116af93fea7SJake Freeland /*
117af93fea7SJake Freeland  * Call when a discontinuous jump has occured in CLOCK_REALTIME and
118af93fea7SJake Freeland  * update timerfd's cached boottime. A jump can be triggered using
119af93fea7SJake Freeland  * functions like clock_settime(2) or settimeofday(2).
120af93fea7SJake Freeland  *
121af93fea7SJake Freeland  * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set
122af93fea7SJake Freeland  * and the realtime clock jumps.
123af93fea7SJake Freeland  * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set,
124af93fea7SJake Freeland  * but the realtime clock jumps backwards.
125af93fea7SJake Freeland  */
126af93fea7SJake Freeland void
127af93fea7SJake Freeland timerfd_jumped(void)
128af93fea7SJake Freeland {
129af93fea7SJake Freeland 	struct timerfd *tfd;
130af93fea7SJake Freeland 	struct timespec boottime, diff;
131af93fea7SJake Freeland 
1325eab5230SMateusz Guzik 	if (LIST_EMPTY(&timerfd_list))
1335eab5230SMateusz Guzik 		return;
1345eab5230SMateusz Guzik 
135af93fea7SJake Freeland 	timerfd_getboottime(&boottime);
136*f4296cfbSMateusz Guzik 	mtx_lock(&timerfd_list_lock);
13702f534b5SMateusz Guzik 	LIST_FOREACH(tfd, &timerfd_list, entry) {
138af93fea7SJake Freeland 		mtx_lock(&tfd->tfd_lock);
139af93fea7SJake Freeland 		if (tfd->tfd_clockid != CLOCK_REALTIME ||
140af93fea7SJake Freeland 		    (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 ||
141af93fea7SJake Freeland 		    timespeccmp(&boottime, &tfd->tfd_boottim, ==)) {
142af93fea7SJake Freeland 			mtx_unlock(&tfd->tfd_lock);
143af93fea7SJake Freeland 			continue;
144af93fea7SJake Freeland 		}
145af93fea7SJake Freeland 
146af93fea7SJake Freeland 		if (callout_active(&tfd->tfd_callout)) {
147af93fea7SJake Freeland 			if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0)
148af93fea7SJake Freeland 				tfd->tfd_jumped = TFD_CANCELED;
149af93fea7SJake Freeland 			else if (timespeccmp(&boottime, &tfd->tfd_boottim, <))
150af93fea7SJake Freeland 				tfd->tfd_jumped = TFD_ZREAD;
151af93fea7SJake Freeland 
152af93fea7SJake Freeland 			/*
153af93fea7SJake Freeland 			 * Do not reschedule callout when
154af93fea7SJake Freeland 			 * inside interval time loop.
155af93fea7SJake Freeland 			 */
156af93fea7SJake Freeland 			if (!tfd->tfd_expired) {
157af93fea7SJake Freeland 				timespecsub(&boottime,
158af93fea7SJake Freeland 				    &tfd->tfd_boottim, &diff);
159af93fea7SJake Freeland 				timespecsub(&tfd->tfd_time.it_value,
160af93fea7SJake Freeland 				    &diff, &tfd->tfd_time.it_value);
161af93fea7SJake Freeland 				if (callout_stop(&tfd->tfd_callout) == 1) {
162af93fea7SJake Freeland 					callout_schedule_sbt(&tfd->tfd_callout,
163af93fea7SJake Freeland 					    tstosbt(tfd->tfd_time.it_value),
164af93fea7SJake Freeland 					    0, C_ABSOLUTE);
165af93fea7SJake Freeland 				}
166af93fea7SJake Freeland 			}
167af93fea7SJake Freeland 		}
168af93fea7SJake Freeland 
169af93fea7SJake Freeland 		tfd->tfd_boottim = boottime;
170af93fea7SJake Freeland 		mtx_unlock(&tfd->tfd_lock);
171af93fea7SJake Freeland 	}
172*f4296cfbSMateusz Guzik 	mtx_unlock(&timerfd_list_lock);
173af93fea7SJake Freeland }
174af93fea7SJake Freeland 
175af93fea7SJake Freeland static int
176af93fea7SJake Freeland timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
177af93fea7SJake Freeland     int flags, struct thread *td)
178af93fea7SJake Freeland {
179af93fea7SJake Freeland 	struct timerfd *tfd = fp->f_data;
180af93fea7SJake Freeland 	timerfd_t count;
181af93fea7SJake Freeland 	int error = 0;
182af93fea7SJake Freeland 
183af93fea7SJake Freeland 	if (uio->uio_resid < sizeof(timerfd_t))
184af93fea7SJake Freeland 		return (EINVAL);
185af93fea7SJake Freeland 
186af93fea7SJake Freeland 	mtx_lock(&tfd->tfd_lock);
187af93fea7SJake Freeland retry:
188af93fea7SJake Freeland 	getnanotime(&tfd->tfd_atim);
189af93fea7SJake Freeland 	if ((tfd->tfd_jumped & TFD_JUMPED) != 0) {
190af93fea7SJake Freeland 		if (tfd->tfd_jumped == TFD_CANCELED)
191af93fea7SJake Freeland 			error = ECANCELED;
192af93fea7SJake Freeland 		tfd->tfd_jumped = TFD_READ;
193af93fea7SJake Freeland 		tfd->tfd_count = 0;
194af93fea7SJake Freeland 		mtx_unlock(&tfd->tfd_lock);
195af93fea7SJake Freeland 		return (error);
196af93fea7SJake Freeland 	} else {
197af93fea7SJake Freeland 		tfd->tfd_jumped = TFD_NOJUMP;
198af93fea7SJake Freeland 	}
199af93fea7SJake Freeland 	if (tfd->tfd_count == 0) {
200af93fea7SJake Freeland 		if ((fp->f_flag & FNONBLOCK) != 0) {
201af93fea7SJake Freeland 			mtx_unlock(&tfd->tfd_lock);
202af93fea7SJake Freeland 			return (EAGAIN);
203af93fea7SJake Freeland 		}
204af93fea7SJake Freeland 		td->td_rtcgen = atomic_load_acq_int(&rtc_generation);
205af93fea7SJake Freeland 		error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock,
206af93fea7SJake Freeland 		    PCATCH, "tfdrd", 0);
207af93fea7SJake Freeland 		if (error == 0) {
208af93fea7SJake Freeland 			goto retry;
209af93fea7SJake Freeland 		} else {
210af93fea7SJake Freeland 			mtx_unlock(&tfd->tfd_lock);
211af93fea7SJake Freeland 			return (error);
212af93fea7SJake Freeland 		}
213af93fea7SJake Freeland 	}
214af93fea7SJake Freeland 
215af93fea7SJake Freeland 	count = tfd->tfd_count;
216af93fea7SJake Freeland 	tfd->tfd_count = 0;
217af93fea7SJake Freeland 	mtx_unlock(&tfd->tfd_lock);
218af93fea7SJake Freeland 	error = uiomove(&count, sizeof(timerfd_t), uio);
219af93fea7SJake Freeland 
220af93fea7SJake Freeland 	return (error);
221af93fea7SJake Freeland }
222af93fea7SJake Freeland 
223af93fea7SJake Freeland static int
224af93fea7SJake Freeland timerfd_ioctl(struct file *fp, u_long cmd, void *data,
225af93fea7SJake Freeland     struct ucred *active_cred, struct thread *td)
226af93fea7SJake Freeland {
227af93fea7SJake Freeland 	switch (cmd) {
228af93fea7SJake Freeland 	case FIOASYNC:
229af93fea7SJake Freeland 		if (*(int *)data != 0)
230af93fea7SJake Freeland 			atomic_set_int(&fp->f_flag, FASYNC);
231af93fea7SJake Freeland 		else
232af93fea7SJake Freeland 			atomic_clear_int(&fp->f_flag, FASYNC);
233af93fea7SJake Freeland 		return (0);
234af93fea7SJake Freeland 	case FIONBIO:
235af93fea7SJake Freeland 		if (*(int *)data != 0)
236af93fea7SJake Freeland 			atomic_set_int(&fp->f_flag, FNONBLOCK);
237af93fea7SJake Freeland 		else
238af93fea7SJake Freeland 			atomic_clear_int(&fp->f_flag, FNONBLOCK);
239af93fea7SJake Freeland 		return (0);
240af93fea7SJake Freeland 	}
241af93fea7SJake Freeland 	return (ENOTTY);
242af93fea7SJake Freeland }
243af93fea7SJake Freeland 
244af93fea7SJake Freeland static int
245af93fea7SJake Freeland timerfd_poll(struct file *fp, int events, struct ucred *active_cred,
246af93fea7SJake Freeland     struct thread *td)
247af93fea7SJake Freeland {
248af93fea7SJake Freeland 	struct timerfd *tfd = fp->f_data;
249af93fea7SJake Freeland 	int revents = 0;
250af93fea7SJake Freeland 
251af93fea7SJake Freeland 	mtx_lock(&tfd->tfd_lock);
252af93fea7SJake Freeland 	if ((events & (POLLIN | POLLRDNORM)) != 0 &&
253af93fea7SJake Freeland 	    tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ)
254af93fea7SJake Freeland 		revents |= events & (POLLIN | POLLRDNORM);
255af93fea7SJake Freeland 	if (revents == 0)
256af93fea7SJake Freeland 		selrecord(td, &tfd->tfd_sel);
257af93fea7SJake Freeland 	mtx_unlock(&tfd->tfd_lock);
258af93fea7SJake Freeland 
259af93fea7SJake Freeland 	return (revents);
260af93fea7SJake Freeland }
261af93fea7SJake Freeland 
262af93fea7SJake Freeland static void
263af93fea7SJake Freeland filt_timerfddetach(struct knote *kn)
264af93fea7SJake Freeland {
265af93fea7SJake Freeland 	struct timerfd *tfd = kn->kn_hook;
266af93fea7SJake Freeland 
267af93fea7SJake Freeland 	mtx_lock(&tfd->tfd_lock);
268af93fea7SJake Freeland 	knlist_remove(&tfd->tfd_sel.si_note, kn, 1);
269af93fea7SJake Freeland 	mtx_unlock(&tfd->tfd_lock);
270af93fea7SJake Freeland }
271af93fea7SJake Freeland 
272af93fea7SJake Freeland static int
273af93fea7SJake Freeland filt_timerfdread(struct knote *kn, long hint)
274af93fea7SJake Freeland {
275af93fea7SJake Freeland 	struct timerfd *tfd = kn->kn_hook;
276af93fea7SJake Freeland 
277af93fea7SJake Freeland 	return (tfd->tfd_count > 0);
278af93fea7SJake Freeland }
279af93fea7SJake Freeland 
280af93fea7SJake Freeland static struct filterops timerfd_rfiltops = {
281af93fea7SJake Freeland 	.f_isfd = 1,
282af93fea7SJake Freeland 	.f_detach = filt_timerfddetach,
283af93fea7SJake Freeland 	.f_event = filt_timerfdread,
284af93fea7SJake Freeland };
285af93fea7SJake Freeland 
286af93fea7SJake Freeland static int
287af93fea7SJake Freeland timerfd_kqfilter(struct file *fp, struct knote *kn)
288af93fea7SJake Freeland {
289af93fea7SJake Freeland 	struct timerfd *tfd = fp->f_data;
290af93fea7SJake Freeland 
291af93fea7SJake Freeland 	if (kn->kn_filter != EVFILT_READ)
292af93fea7SJake Freeland 		return (EINVAL);
293af93fea7SJake Freeland 
294af93fea7SJake Freeland 	kn->kn_fop = &timerfd_rfiltops;
295af93fea7SJake Freeland 	kn->kn_hook = tfd;
296af93fea7SJake Freeland 	knlist_add(&tfd->tfd_sel.si_note, kn, 0);
297af93fea7SJake Freeland 
298af93fea7SJake Freeland 	return (0);
299af93fea7SJake Freeland }
300af93fea7SJake Freeland 
301af93fea7SJake Freeland static int
302af93fea7SJake Freeland timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
303af93fea7SJake Freeland {
304af93fea7SJake Freeland 	struct timerfd *tfd = fp->f_data;
305af93fea7SJake Freeland 
306af93fea7SJake Freeland 	bzero(sb, sizeof(*sb));
307af93fea7SJake Freeland 	sb->st_nlink = fp->f_count - 1;
308af93fea7SJake Freeland 	sb->st_uid = fp->f_cred->cr_uid;
309af93fea7SJake Freeland 	sb->st_gid = fp->f_cred->cr_gid;
310af93fea7SJake Freeland 	sb->st_blksize = PAGE_SIZE;
311af93fea7SJake Freeland 
312af93fea7SJake Freeland 	mtx_lock(&tfd->tfd_lock);
313af93fea7SJake Freeland 	sb->st_ino = tfd->tfd_ino;
314af93fea7SJake Freeland 	sb->st_atim = tfd->tfd_atim;
315af93fea7SJake Freeland 	sb->st_mtim = tfd->tfd_mtim;
316af93fea7SJake Freeland 	sb->st_birthtim = tfd->tfd_birthtim;
317af93fea7SJake Freeland 	mtx_unlock(&tfd->tfd_lock);
318af93fea7SJake Freeland 
319af93fea7SJake Freeland 	return (0);
320af93fea7SJake Freeland }
321af93fea7SJake Freeland 
322af93fea7SJake Freeland static int
323af93fea7SJake Freeland timerfd_close(struct file *fp, struct thread *td)
324af93fea7SJake Freeland {
325af93fea7SJake Freeland 	struct timerfd *tfd = fp->f_data;
326af93fea7SJake Freeland 
327*f4296cfbSMateusz Guzik 	mtx_lock(&timerfd_list_lock);
32802f534b5SMateusz Guzik 	LIST_REMOVE(tfd, entry);
329*f4296cfbSMateusz Guzik 	mtx_unlock(&timerfd_list_lock);
33002f534b5SMateusz Guzik 
331af93fea7SJake Freeland 	callout_drain(&tfd->tfd_callout);
332af93fea7SJake Freeland 	seldrain(&tfd->tfd_sel);
333af93fea7SJake Freeland 	knlist_destroy(&tfd->tfd_sel.si_note);
334af93fea7SJake Freeland 	mtx_destroy(&tfd->tfd_lock);
335af93fea7SJake Freeland 	free(tfd, M_TIMERFD);
336af93fea7SJake Freeland 	fp->f_ops = &badfileops;
337af93fea7SJake Freeland 
338af93fea7SJake Freeland 	return (0);
339af93fea7SJake Freeland }
340af93fea7SJake Freeland 
341af93fea7SJake Freeland static int
342af93fea7SJake Freeland timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif,
343af93fea7SJake Freeland     struct filedesc *fdp)
344af93fea7SJake Freeland {
345af93fea7SJake Freeland 
346af93fea7SJake Freeland 	struct timerfd *tfd = fp->f_data;
347af93fea7SJake Freeland 
348af93fea7SJake Freeland 	kif->kf_type = KF_TYPE_TIMERFD;
349af93fea7SJake Freeland 	mtx_lock(&tfd->tfd_lock);
350af93fea7SJake Freeland 	kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid;
351af93fea7SJake Freeland 	kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags;
352af93fea7SJake Freeland 	kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd;
353af93fea7SJake Freeland 	mtx_unlock(&tfd->tfd_lock);
354af93fea7SJake Freeland 
355af93fea7SJake Freeland 	return (0);
356af93fea7SJake Freeland }
357af93fea7SJake Freeland 
358af93fea7SJake Freeland static struct fileops timerfdops = {
359af93fea7SJake Freeland 	.fo_read = timerfd_read,
360af93fea7SJake Freeland 	.fo_write = invfo_rdwr,
361af93fea7SJake Freeland 	.fo_truncate = invfo_truncate,
362af93fea7SJake Freeland 	.fo_ioctl = timerfd_ioctl,
363af93fea7SJake Freeland 	.fo_poll = timerfd_poll,
364af93fea7SJake Freeland 	.fo_kqfilter = timerfd_kqfilter,
365af93fea7SJake Freeland 	.fo_stat = timerfd_stat,
366af93fea7SJake Freeland 	.fo_close = timerfd_close,
367af93fea7SJake Freeland 	.fo_chmod = invfo_chmod,
368af93fea7SJake Freeland 	.fo_chown = invfo_chown,
369af93fea7SJake Freeland 	.fo_sendfile = invfo_sendfile,
370af93fea7SJake Freeland 	.fo_fill_kinfo = timerfd_fill_kinfo,
371af93fea7SJake Freeland 	.fo_flags = DFLAG_PASSABLE,
372af93fea7SJake Freeland };
373af93fea7SJake Freeland 
374af93fea7SJake Freeland static void
375af93fea7SJake Freeland timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value)
376af93fea7SJake Freeland {
377af93fea7SJake Freeland 	struct timespec curr_value;
378af93fea7SJake Freeland 
379af93fea7SJake Freeland 	*old_value = tfd->tfd_time;
380af93fea7SJake Freeland 	if (timespecisset(&tfd->tfd_time.it_value)) {
381af93fea7SJake Freeland 		nanouptime(&curr_value);
382af93fea7SJake Freeland 		timespecsub(&tfd->tfd_time.it_value, &curr_value,
383af93fea7SJake Freeland 		    &old_value->it_value);
384af93fea7SJake Freeland 	}
385af93fea7SJake Freeland }
386af93fea7SJake Freeland 
387af93fea7SJake Freeland static void
388af93fea7SJake Freeland timerfd_expire(void *arg)
389af93fea7SJake Freeland {
390af93fea7SJake Freeland 	struct timerfd *tfd = (struct timerfd *)arg;
391af93fea7SJake Freeland 	struct timespec uptime;
392af93fea7SJake Freeland 
393af93fea7SJake Freeland 	++tfd->tfd_count;
394af93fea7SJake Freeland 	tfd->tfd_expired = true;
395af93fea7SJake Freeland 	if (timespecisset(&tfd->tfd_time.it_interval)) {
396af93fea7SJake Freeland 		/* Count missed events. */
397af93fea7SJake Freeland 		nanouptime(&uptime);
398af93fea7SJake Freeland 		if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) {
399af93fea7SJake Freeland 			timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime);
400af93fea7SJake Freeland 			tfd->tfd_count += tstosbt(uptime) /
401af93fea7SJake Freeland 			    tstosbt(tfd->tfd_time.it_interval);
402af93fea7SJake Freeland 		}
403af93fea7SJake Freeland 		timespecadd(&tfd->tfd_time.it_value,
404af93fea7SJake Freeland 		    &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value);
405af93fea7SJake Freeland 		callout_schedule_sbt(&tfd->tfd_callout,
406af93fea7SJake Freeland 		    tstosbt(tfd->tfd_time.it_value),
407af93fea7SJake Freeland 		    0, C_ABSOLUTE);
408af93fea7SJake Freeland 	} else {
409af93fea7SJake Freeland 		/* Single shot timer. */
410af93fea7SJake Freeland 		callout_deactivate(&tfd->tfd_callout);
411af93fea7SJake Freeland 		timespecclear(&tfd->tfd_time.it_value);
412af93fea7SJake Freeland 	}
413af93fea7SJake Freeland 
414af93fea7SJake Freeland 	wakeup(&tfd->tfd_count);
415af93fea7SJake Freeland 	selwakeup(&tfd->tfd_sel);
416af93fea7SJake Freeland 	KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
417af93fea7SJake Freeland }
418af93fea7SJake Freeland 
419af93fea7SJake Freeland int
420af93fea7SJake Freeland kern_timerfd_create(struct thread *td, int clockid, int flags)
421af93fea7SJake Freeland {
422af93fea7SJake Freeland 	struct file *fp;
423af93fea7SJake Freeland 	struct timerfd *tfd;
4245eab5230SMateusz Guzik 	int error, fd, fflags;
425af93fea7SJake Freeland 
426af93fea7SJake Freeland 	AUDIT_ARG_VALUE(clockid);
427af93fea7SJake Freeland 	AUDIT_ARG_FFLAGS(flags);
428af93fea7SJake Freeland 
429af93fea7SJake Freeland 	if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
430af93fea7SJake Freeland 		return (EINVAL);
431af93fea7SJake Freeland 	if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0)
432af93fea7SJake Freeland 		return (EINVAL);
4335eab5230SMateusz Guzik 
4345eab5230SMateusz Guzik 	fflags = FREAD;
435af93fea7SJake Freeland 	if ((flags & TFD_CLOEXEC) != 0)
436af93fea7SJake Freeland 		fflags |= O_CLOEXEC;
4375eab5230SMateusz Guzik 	if ((flags & TFD_NONBLOCK) != 0)
4385eab5230SMateusz Guzik 		fflags |= FNONBLOCK;
439af93fea7SJake Freeland 
44002f534b5SMateusz Guzik 	error = falloc(td, &fp, &fd, fflags);
44102f534b5SMateusz Guzik 	if (error != 0)
44202f534b5SMateusz Guzik 		return (error);
44302f534b5SMateusz Guzik 
444af93fea7SJake Freeland 	tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO);
445af93fea7SJake Freeland 	tfd->tfd_clockid = (clockid_t)clockid;
446af93fea7SJake Freeland 	tfd->tfd_flags = flags;
447af93fea7SJake Freeland 	tfd->tfd_ino = alloc_unr64(&tfdino_unr);
448af93fea7SJake Freeland 	mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF);
449af93fea7SJake Freeland 	callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0);
450af93fea7SJake Freeland 	knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock);
451af93fea7SJake Freeland 	timerfd_getboottime(&tfd->tfd_boottim);
452af93fea7SJake Freeland 	getnanotime(&tfd->tfd_birthtim);
453*f4296cfbSMateusz Guzik 	mtx_lock(&timerfd_list_lock);
45402f534b5SMateusz Guzik 	LIST_INSERT_HEAD(&timerfd_list, tfd, entry);
455*f4296cfbSMateusz Guzik 	mtx_unlock(&timerfd_list_lock);
456af93fea7SJake Freeland 
457af93fea7SJake Freeland 	finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops);
45802f534b5SMateusz Guzik 
459af93fea7SJake Freeland 	fdrop(fp, td);
460af93fea7SJake Freeland 
461af93fea7SJake Freeland 	td->td_retval[0] = fd;
462af93fea7SJake Freeland 	return (0);
463af93fea7SJake Freeland }
464af93fea7SJake Freeland 
465af93fea7SJake Freeland int
466af93fea7SJake Freeland kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value)
467af93fea7SJake Freeland {
468af93fea7SJake Freeland 	struct file *fp;
469af93fea7SJake Freeland 	struct timerfd *tfd;
470af93fea7SJake Freeland 	int error;
471af93fea7SJake Freeland 
472af93fea7SJake Freeland 	error = fget(td, fd, &cap_write_rights, &fp);
473af93fea7SJake Freeland 	if (error != 0)
474af93fea7SJake Freeland 		return (error);
475af93fea7SJake Freeland 	tfd = fp->f_data;
476af93fea7SJake Freeland 	if (tfd == NULL || fp->f_type != DTYPE_TIMERFD) {
477af93fea7SJake Freeland 		fdrop(fp, td);
478af93fea7SJake Freeland 		return (EINVAL);
479af93fea7SJake Freeland 	}
480af93fea7SJake Freeland 
481af93fea7SJake Freeland 	mtx_lock(&tfd->tfd_lock);
482af93fea7SJake Freeland 	timerfd_curval(tfd, curr_value);
483af93fea7SJake Freeland 	mtx_unlock(&tfd->tfd_lock);
484af93fea7SJake Freeland 
485af93fea7SJake Freeland 	fdrop(fp, td);
486af93fea7SJake Freeland 	return (0);
487af93fea7SJake Freeland }
488af93fea7SJake Freeland 
489af93fea7SJake Freeland int
490af93fea7SJake Freeland kern_timerfd_settime(struct thread *td, int fd, int flags,
491af93fea7SJake Freeland     const struct itimerspec *new_value, struct itimerspec *old_value)
492af93fea7SJake Freeland {
493af93fea7SJake Freeland 	struct file *fp;
494af93fea7SJake Freeland 	struct timerfd *tfd;
495af93fea7SJake Freeland 	struct timespec ts;
496af93fea7SJake Freeland 	int error = 0;
497af93fea7SJake Freeland 
498af93fea7SJake Freeland 	if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0)
499af93fea7SJake Freeland 		return (EINVAL);
500af93fea7SJake Freeland 	if (!timespecvalid_interval(&new_value->it_value) ||
501af93fea7SJake Freeland 	    !timespecvalid_interval(&new_value->it_interval))
502af93fea7SJake Freeland 		return (EINVAL);
503af93fea7SJake Freeland 
504af93fea7SJake Freeland 	error = fget(td, fd, &cap_write_rights, &fp);
505af93fea7SJake Freeland 	if (error != 0)
506af93fea7SJake Freeland 		return (error);
507af93fea7SJake Freeland 	tfd = fp->f_data;
508af93fea7SJake Freeland 	if (tfd == NULL || fp->f_type != DTYPE_TIMERFD) {
509af93fea7SJake Freeland 		fdrop(fp, td);
510af93fea7SJake Freeland 		return (EINVAL);
511af93fea7SJake Freeland 	}
512af93fea7SJake Freeland 
513af93fea7SJake Freeland 	mtx_lock(&tfd->tfd_lock);
514af93fea7SJake Freeland 	getnanotime(&tfd->tfd_mtim);
515af93fea7SJake Freeland 	tfd->tfd_timflags = flags;
516af93fea7SJake Freeland 
517af93fea7SJake Freeland 	/* Store old itimerspec, if applicable. */
518af93fea7SJake Freeland 	if (old_value != NULL)
519af93fea7SJake Freeland 		timerfd_curval(tfd, old_value);
520af93fea7SJake Freeland 
521af93fea7SJake Freeland 	/* Set new expiration. */
522af93fea7SJake Freeland 	tfd->tfd_time = *new_value;
523af93fea7SJake Freeland 	if (timespecisset(&tfd->tfd_time.it_value)) {
524af93fea7SJake Freeland 		if ((flags & TFD_TIMER_ABSTIME) == 0) {
525af93fea7SJake Freeland 			nanouptime(&ts);
526af93fea7SJake Freeland 			timespecadd(&tfd->tfd_time.it_value, &ts,
527af93fea7SJake Freeland 			    &tfd->tfd_time.it_value);
528af93fea7SJake Freeland 		} else if (tfd->tfd_clockid == CLOCK_REALTIME) {
529af93fea7SJake Freeland 			/* ECANCELED if unread jump is pending. */
530af93fea7SJake Freeland 			if (tfd->tfd_jumped == TFD_CANCELED)
531af93fea7SJake Freeland 				error = ECANCELED;
532af93fea7SJake Freeland 			/* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */
533af93fea7SJake Freeland 			timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim,
534af93fea7SJake Freeland 			    &tfd->tfd_time.it_value);
535af93fea7SJake Freeland 		}
536af93fea7SJake Freeland 		callout_reset_sbt(&tfd->tfd_callout,
537af93fea7SJake Freeland 		    tstosbt(tfd->tfd_time.it_value),
538af93fea7SJake Freeland 		    0, timerfd_expire, tfd, C_ABSOLUTE);
539af93fea7SJake Freeland 	} else {
540af93fea7SJake Freeland 		callout_stop(&tfd->tfd_callout);
541af93fea7SJake Freeland 	}
542af93fea7SJake Freeland 	tfd->tfd_count = 0;
543af93fea7SJake Freeland 	tfd->tfd_expired = false;
544af93fea7SJake Freeland 	tfd->tfd_jumped = TFD_NOJUMP;
545af93fea7SJake Freeland 	mtx_unlock(&tfd->tfd_lock);
546af93fea7SJake Freeland 
547af93fea7SJake Freeland 	fdrop(fp, td);
548af93fea7SJake Freeland 	return (error);
549af93fea7SJake Freeland }
550af93fea7SJake Freeland 
551af93fea7SJake Freeland int
552af93fea7SJake Freeland sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap)
553af93fea7SJake Freeland {
554af93fea7SJake Freeland 	return (kern_timerfd_create(td, uap->clockid, uap->flags));
555af93fea7SJake Freeland }
556af93fea7SJake Freeland 
557af93fea7SJake Freeland int
558af93fea7SJake Freeland sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap)
559af93fea7SJake Freeland {
560af93fea7SJake Freeland 	struct itimerspec curr_value;
561af93fea7SJake Freeland 	int error;
562af93fea7SJake Freeland 
563af93fea7SJake Freeland 	error = kern_timerfd_gettime(td, uap->fd, &curr_value);
564af93fea7SJake Freeland 	if (error == 0)
565af93fea7SJake Freeland 		error = copyout(&curr_value, uap->curr_value,
566af93fea7SJake Freeland 		    sizeof(curr_value));
567af93fea7SJake Freeland 
568af93fea7SJake Freeland 	return (error);
569af93fea7SJake Freeland }
570af93fea7SJake Freeland 
571af93fea7SJake Freeland int
572af93fea7SJake Freeland sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap)
573af93fea7SJake Freeland {
574af93fea7SJake Freeland 	struct itimerspec new_value, old_value;
575af93fea7SJake Freeland 	int error;
576af93fea7SJake Freeland 
577af93fea7SJake Freeland 	error = copyin(uap->new_value, &new_value, sizeof(new_value));
578af93fea7SJake Freeland 	if (error != 0)
579af93fea7SJake Freeland 		return (error);
580af93fea7SJake Freeland 	if (uap->old_value == NULL) {
581af93fea7SJake Freeland 		error = kern_timerfd_settime(td, uap->fd, uap->flags,
582af93fea7SJake Freeland 		    &new_value, NULL);
583af93fea7SJake Freeland 	} else {
584af93fea7SJake Freeland 		error = kern_timerfd_settime(td, uap->fd, uap->flags,
585af93fea7SJake Freeland 		    &new_value, &old_value);
586af93fea7SJake Freeland 		if (error == 0)
587af93fea7SJake Freeland 			error = copyout(&old_value, uap->old_value,
588af93fea7SJake Freeland 			    sizeof(old_value));
589af93fea7SJake Freeland 	}
590af93fea7SJake Freeland 	return (error);
591af93fea7SJake Freeland }
592af93fea7SJake Freeland 
593af93fea7SJake Freeland #ifdef COMPAT_FREEBSD32
594af93fea7SJake Freeland int
595af93fea7SJake Freeland freebsd32_timerfd_gettime(struct thread *td,
596af93fea7SJake Freeland     struct freebsd32_timerfd_gettime_args *uap)
597af93fea7SJake Freeland {
598af93fea7SJake Freeland 	struct itimerspec curr_value;
599af93fea7SJake Freeland 	struct itimerspec32 curr_value32;
600af93fea7SJake Freeland 	int error;
601af93fea7SJake Freeland 
602af93fea7SJake Freeland 	error = kern_timerfd_gettime(td, uap->fd, &curr_value);
603af93fea7SJake Freeland 	if (error == 0) {
604af93fea7SJake Freeland 		CP(curr_value, curr_value32, it_value.tv_sec);
605af93fea7SJake Freeland 		CP(curr_value, curr_value32, it_value.tv_nsec);
606af93fea7SJake Freeland 		CP(curr_value, curr_value32, it_interval.tv_sec);
607af93fea7SJake Freeland 		CP(curr_value, curr_value32, it_interval.tv_nsec);
608af93fea7SJake Freeland 		error = copyout(&curr_value32, uap->curr_value,
609af93fea7SJake Freeland 		    sizeof(curr_value32));
610af93fea7SJake Freeland 	}
611af93fea7SJake Freeland 
612af93fea7SJake Freeland 	return (error);
613af93fea7SJake Freeland }
614af93fea7SJake Freeland 
615af93fea7SJake Freeland int
616af93fea7SJake Freeland freebsd32_timerfd_settime(struct thread *td,
617af93fea7SJake Freeland     struct freebsd32_timerfd_settime_args *uap)
618af93fea7SJake Freeland {
619af93fea7SJake Freeland 	struct itimerspec new_value, old_value;
620af93fea7SJake Freeland 	struct itimerspec32 new_value32, old_value32;
621af93fea7SJake Freeland 	int error;
622af93fea7SJake Freeland 
623af93fea7SJake Freeland 	error = copyin(uap->new_value, &new_value32, sizeof(new_value32));
624af93fea7SJake Freeland 	if (error != 0)
625af93fea7SJake Freeland 		return (error);
626af93fea7SJake Freeland 	CP(new_value32, new_value, it_value.tv_sec);
627af93fea7SJake Freeland 	CP(new_value32, new_value, it_value.tv_nsec);
628af93fea7SJake Freeland 	CP(new_value32, new_value, it_interval.tv_sec);
629af93fea7SJake Freeland 	CP(new_value32, new_value, it_interval.tv_nsec);
630af93fea7SJake Freeland 	if (uap->old_value == NULL) {
631af93fea7SJake Freeland 		error = kern_timerfd_settime(td, uap->fd, uap->flags,
632af93fea7SJake Freeland 		    &new_value, NULL);
633af93fea7SJake Freeland 	} else {
634af93fea7SJake Freeland 		error = kern_timerfd_settime(td, uap->fd, uap->flags,
635af93fea7SJake Freeland 		    &new_value, &old_value);
636af93fea7SJake Freeland 		if (error == 0) {
637af93fea7SJake Freeland 			CP(old_value, old_value32, it_value.tv_sec);
638af93fea7SJake Freeland 			CP(old_value, old_value32, it_value.tv_nsec);
639af93fea7SJake Freeland 			CP(old_value, old_value32, it_interval.tv_sec);
640af93fea7SJake Freeland 			CP(old_value, old_value32, it_interval.tv_nsec);
641af93fea7SJake Freeland 			error = copyout(&old_value32, uap->old_value,
642af93fea7SJake Freeland 			    sizeof(old_value32));
643af93fea7SJake Freeland 		}
644af93fea7SJake Freeland 	}
645af93fea7SJake Freeland 	return (error);
646af93fea7SJake Freeland }
647af93fea7SJake Freeland #endif
648