1a5eb7107SBryan Cantrill /*
2a5eb7107SBryan Cantrill * This file and its contents are supplied under the terms of the
3a5eb7107SBryan Cantrill * Common Development and Distribution License ("CDDL"), version 1.0.
4a5eb7107SBryan Cantrill * You may only use this file in accordance with the terms of version
5a5eb7107SBryan Cantrill * 1.0 of the CDDL.
6a5eb7107SBryan Cantrill *
7a5eb7107SBryan Cantrill * A full copy of the text of the CDDL should have accompanied this
8a5eb7107SBryan Cantrill * source. A copy of the CDDL is also available via the Internet at
9a5eb7107SBryan Cantrill * http://www.illumos.org/license/CDDL.
10a5eb7107SBryan Cantrill */
11a5eb7107SBryan Cantrill
12a5eb7107SBryan Cantrill /*
13f4f9009fSPatrick Mooney * Copyright 2017 Joyent, Inc.
1466373fa7SPatrick Mooney * Copyright 2020 Oxide Computer Company
15a5eb7107SBryan Cantrill */
16a5eb7107SBryan Cantrill
17a5eb7107SBryan Cantrill #include <sys/types.h>
18a5eb7107SBryan Cantrill #include <sys/epoll.h>
19a5eb7107SBryan Cantrill #include <sys/devpoll.h>
20a5eb7107SBryan Cantrill #include <unistd.h>
21a5eb7107SBryan Cantrill #include <errno.h>
22a5eb7107SBryan Cantrill #include <fcntl.h>
23a5eb7107SBryan Cantrill #include <poll.h>
24a5eb7107SBryan Cantrill
25a5eb7107SBryan Cantrill /*
26a5eb7107SBryan Cantrill * Events that match their epoll(7) equivalents.
27a5eb7107SBryan Cantrill */
28a5eb7107SBryan Cantrill #if EPOLLIN != POLLIN
29a5eb7107SBryan Cantrill #error value of EPOLLIN does not match value of POLLIN
30a5eb7107SBryan Cantrill #endif
31a5eb7107SBryan Cantrill
32a5eb7107SBryan Cantrill #if EPOLLPRI != POLLPRI
33a5eb7107SBryan Cantrill #error value of EPOLLPRI does not match value of POLLPRI
34a5eb7107SBryan Cantrill #endif
35a5eb7107SBryan Cantrill
36a5eb7107SBryan Cantrill #if EPOLLOUT != POLLOUT
37a5eb7107SBryan Cantrill #error value of EPOLLOUT does not match value of POLLOUT
38a5eb7107SBryan Cantrill #endif
39a5eb7107SBryan Cantrill
40a5eb7107SBryan Cantrill #if EPOLLRDNORM != POLLRDNORM
41a5eb7107SBryan Cantrill #error value of EPOLLRDNORM does not match value of POLLRDNORM
42a5eb7107SBryan Cantrill #endif
43a5eb7107SBryan Cantrill
44a5eb7107SBryan Cantrill #if EPOLLRDBAND != POLLRDBAND
45a5eb7107SBryan Cantrill #error value of EPOLLRDBAND does not match value of POLLRDBAND
46a5eb7107SBryan Cantrill #endif
47a5eb7107SBryan Cantrill
48a5eb7107SBryan Cantrill #if EPOLLERR != POLLERR
49a5eb7107SBryan Cantrill #error value of EPOLLERR does not match value of POLLERR
50a5eb7107SBryan Cantrill #endif
51a5eb7107SBryan Cantrill
52a5eb7107SBryan Cantrill #if EPOLLHUP != POLLHUP
53a5eb7107SBryan Cantrill #error value of EPOLLHUP does not match value of POLLHUP
54a5eb7107SBryan Cantrill #endif
55a5eb7107SBryan Cantrill
56a5eb7107SBryan Cantrill /*
57a5eb7107SBryan Cantrill * Events that we ignore entirely. They can be set in events, but they will
58a5eb7107SBryan Cantrill * never be returned.
59a5eb7107SBryan Cantrill */
6066373fa7SPatrick Mooney #define EPOLLIGNORED (EPOLLMSG | EPOLLWAKEUP | EPOLLEXCLUSIVE)
61a5eb7107SBryan Cantrill
62a5eb7107SBryan Cantrill /*
63a5eb7107SBryan Cantrill * Events that we swizzle into other bit positions.
64a5eb7107SBryan Cantrill */
65a5eb7107SBryan Cantrill #define EPOLLSWIZZLED \
66a5eb7107SBryan Cantrill (EPOLLRDHUP | EPOLLONESHOT | EPOLLET | EPOLLWRBAND | EPOLLWRNORM)
67a5eb7107SBryan Cantrill
68f4f9009fSPatrick Mooney /*
69f4f9009fSPatrick Mooney * The defined behavior for epoll_wait/epoll_pwait when using a timeout less
70f4f9009fSPatrick Mooney * than 0 is to wait for events until they arrive (or interrupted by a signal).
71*bbf21555SRichard Lowe * While poll(4D) operates in this manner for a timeout of -1, using other
72f4f9009fSPatrick Mooney * negative values results in an immediate timeout, as if it had been set to 0.
73f4f9009fSPatrick Mooney * For that reason, negative values are clamped to -1.
74f4f9009fSPatrick Mooney */
75f4f9009fSPatrick Mooney #define EPOLL_TIMEOUT_CLAMP(t) (((t) < -1) ? -1 : (t))
76f4f9009fSPatrick Mooney
77a5eb7107SBryan Cantrill int
epoll_create(int size)78a5eb7107SBryan Cantrill epoll_create(int size)
79a5eb7107SBryan Cantrill {
80a5eb7107SBryan Cantrill int fd;
81a5eb7107SBryan Cantrill
82a5eb7107SBryan Cantrill /*
83a5eb7107SBryan Cantrill * From the epoll_create() man page: "Since Linux 2.6.8, the size
84a5eb7107SBryan Cantrill * argument is ignored, but must be greater than zero." You keep using
85a5eb7107SBryan Cantrill * that word "ignored"...
86a5eb7107SBryan Cantrill */
87a5eb7107SBryan Cantrill if (size <= 0) {
88a5eb7107SBryan Cantrill errno = EINVAL;
89a5eb7107SBryan Cantrill return (-1);
90a5eb7107SBryan Cantrill }
91a5eb7107SBryan Cantrill
92a5eb7107SBryan Cantrill if ((fd = open("/dev/poll", O_RDWR)) == -1)
93a5eb7107SBryan Cantrill return (-1);
94a5eb7107SBryan Cantrill
95a5eb7107SBryan Cantrill if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) {
96a5eb7107SBryan Cantrill (void) close(fd);
97a5eb7107SBryan Cantrill return (-1);
98a5eb7107SBryan Cantrill }
99a5eb7107SBryan Cantrill
100a5eb7107SBryan Cantrill return (fd);
101a5eb7107SBryan Cantrill }
102a5eb7107SBryan Cantrill
103a5eb7107SBryan Cantrill int
epoll_create1(int flags)104a5eb7107SBryan Cantrill epoll_create1(int flags)
105a5eb7107SBryan Cantrill {
106a5eb7107SBryan Cantrill int fd, oflags = O_RDWR;
107a5eb7107SBryan Cantrill
108a192d1c0SPatrick Mooney if (flags & EPOLL_CLOEXEC) {
109a5eb7107SBryan Cantrill oflags |= O_CLOEXEC;
110a192d1c0SPatrick Mooney flags ^= EPOLL_CLOEXEC;
111a192d1c0SPatrick Mooney }
112a192d1c0SPatrick Mooney /* Reject unrecognized flags */
113a192d1c0SPatrick Mooney if (flags != 0) {
114a192d1c0SPatrick Mooney errno = EINVAL;
115a192d1c0SPatrick Mooney return (-1);
116a192d1c0SPatrick Mooney }
117a5eb7107SBryan Cantrill
118a5eb7107SBryan Cantrill if ((fd = open("/dev/poll", oflags)) == -1)
119a5eb7107SBryan Cantrill return (-1);
120a5eb7107SBryan Cantrill
121a5eb7107SBryan Cantrill if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) {
122a5eb7107SBryan Cantrill (void) close(fd);
123a5eb7107SBryan Cantrill return (-1);
124a5eb7107SBryan Cantrill }
125a5eb7107SBryan Cantrill
126a5eb7107SBryan Cantrill return (fd);
127a5eb7107SBryan Cantrill }
128a5eb7107SBryan Cantrill
129a5eb7107SBryan Cantrill int
epoll_ctl(int epfd,int op,int fd,struct epoll_event * event)130a5eb7107SBryan Cantrill epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
131a5eb7107SBryan Cantrill {
132a5eb7107SBryan Cantrill dvpoll_epollfd_t epoll[2];
133a5eb7107SBryan Cantrill uint32_t events, ev = 0;
13457a0264bSPatrick Mooney int i = 0, res;
135a5eb7107SBryan Cantrill
136a5eb7107SBryan Cantrill epoll[i].dpep_pollfd.fd = fd;
137a5eb7107SBryan Cantrill
138a5eb7107SBryan Cantrill switch (op) {
139a5eb7107SBryan Cantrill case EPOLL_CTL_DEL:
140a5eb7107SBryan Cantrill ev = POLLREMOVE;
141a5eb7107SBryan Cantrill break;
142a5eb7107SBryan Cantrill
143a5eb7107SBryan Cantrill case EPOLL_CTL_MOD:
14466373fa7SPatrick Mooney /* EPOLLEXCLUSIVE is prohibited for modify operations */
14566373fa7SPatrick Mooney if ((event->events & EPOLLEXCLUSIVE) != 0) {
14666373fa7SPatrick Mooney errno = EINVAL;
14766373fa7SPatrick Mooney return (-1);
14866373fa7SPatrick Mooney }
149a5eb7107SBryan Cantrill /*
150a5eb7107SBryan Cantrill * In the modify case, we pass down two events: one to
151a5eb7107SBryan Cantrill * remove the event and another to add it back.
152a5eb7107SBryan Cantrill */
153a5eb7107SBryan Cantrill epoll[i++].dpep_pollfd.events = POLLREMOVE;
154a5eb7107SBryan Cantrill epoll[i].dpep_pollfd.fd = fd;
155a5eb7107SBryan Cantrill /* FALLTHROUGH */
156a5eb7107SBryan Cantrill
157a5eb7107SBryan Cantrill case EPOLL_CTL_ADD:
158a5eb7107SBryan Cantrill /*
159a5eb7107SBryan Cantrill * Mask off the events that we ignore, and then swizzle the
160a5eb7107SBryan Cantrill * events for which our values differ from their epoll(7)
161a5eb7107SBryan Cantrill * equivalents.
162a5eb7107SBryan Cantrill */
163a5eb7107SBryan Cantrill events = event->events;
164a5eb7107SBryan Cantrill ev = events & ~(EPOLLIGNORED | EPOLLSWIZZLED);
165a5eb7107SBryan Cantrill
166a5eb7107SBryan Cantrill if (events & EPOLLRDHUP)
167a5eb7107SBryan Cantrill ev |= POLLRDHUP;
168a5eb7107SBryan Cantrill
169a5eb7107SBryan Cantrill if (events & EPOLLET)
170a5eb7107SBryan Cantrill ev |= POLLET;
171a5eb7107SBryan Cantrill
172a5eb7107SBryan Cantrill if (events & EPOLLONESHOT)
173a5eb7107SBryan Cantrill ev |= POLLONESHOT;
174a5eb7107SBryan Cantrill
175a5eb7107SBryan Cantrill if (events & EPOLLWRNORM)
176a5eb7107SBryan Cantrill ev |= POLLWRNORM;
177a5eb7107SBryan Cantrill
178a5eb7107SBryan Cantrill if (events & EPOLLWRBAND)
179a5eb7107SBryan Cantrill ev |= POLLWRBAND;
180a5eb7107SBryan Cantrill
181a5eb7107SBryan Cantrill epoll[i].dpep_data = event->data.u64;
182a5eb7107SBryan Cantrill break;
183a5eb7107SBryan Cantrill
184a5eb7107SBryan Cantrill default:
185a5eb7107SBryan Cantrill errno = EOPNOTSUPP;
186a5eb7107SBryan Cantrill return (-1);
187a5eb7107SBryan Cantrill }
188a5eb7107SBryan Cantrill
189a5eb7107SBryan Cantrill epoll[i].dpep_pollfd.events = ev;
19057a0264bSPatrick Mooney retry:
19157a0264bSPatrick Mooney res = write(epfd, epoll, sizeof (epoll[0]) * (i + 1));
192a5eb7107SBryan Cantrill
19357a0264bSPatrick Mooney if (res == -1) {
19457a0264bSPatrick Mooney if (errno == EINTR) {
19557a0264bSPatrick Mooney /*
19657a0264bSPatrick Mooney * Linux does not document EINTR as an allowed error
19757a0264bSPatrick Mooney * for epoll_ctl. The write must be retried if it is
19857a0264bSPatrick Mooney * not done automatically via SA_RESTART.
19957a0264bSPatrick Mooney */
20057a0264bSPatrick Mooney goto retry;
20157a0264bSPatrick Mooney }
20257a0264bSPatrick Mooney if (errno == ELOOP) {
20357a0264bSPatrick Mooney /*
20457a0264bSPatrick Mooney * Convert the specific /dev/poll error about an fd
20557a0264bSPatrick Mooney * loop into what is expected from the Linux epoll
20657a0264bSPatrick Mooney * interface.
20757a0264bSPatrick Mooney */
20857a0264bSPatrick Mooney errno = EINVAL;
20957a0264bSPatrick Mooney }
21057a0264bSPatrick Mooney return (-1);
21157a0264bSPatrick Mooney }
21257a0264bSPatrick Mooney return (0);
213a5eb7107SBryan Cantrill }
214a5eb7107SBryan Cantrill
215a5eb7107SBryan Cantrill int
epoll_wait(int epfd,struct epoll_event * events,int maxevents,int timeout)216a5eb7107SBryan Cantrill epoll_wait(int epfd, struct epoll_event *events,
217a5eb7107SBryan Cantrill int maxevents, int timeout)
218a5eb7107SBryan Cantrill {
219a5eb7107SBryan Cantrill struct dvpoll arg;
220a5eb7107SBryan Cantrill
221a5eb7107SBryan Cantrill if (maxevents <= 0) {
222a5eb7107SBryan Cantrill errno = EINVAL;
223a5eb7107SBryan Cantrill return (-1);
224a5eb7107SBryan Cantrill }
225a5eb7107SBryan Cantrill
226a5eb7107SBryan Cantrill arg.dp_nfds = maxevents;
227f4f9009fSPatrick Mooney arg.dp_timeout = EPOLL_TIMEOUT_CLAMP(timeout);
228a5eb7107SBryan Cantrill arg.dp_fds = (pollfd_t *)events;
229a5eb7107SBryan Cantrill
230a5eb7107SBryan Cantrill return (ioctl(epfd, DP_POLL, &arg));
231a5eb7107SBryan Cantrill }
232a5eb7107SBryan Cantrill
233a5eb7107SBryan Cantrill int
epoll_pwait(int epfd,struct epoll_event * events,int maxevents,int timeout,const sigset_t * sigmask)234a5eb7107SBryan Cantrill epoll_pwait(int epfd, struct epoll_event *events,
235a5eb7107SBryan Cantrill int maxevents, int timeout, const sigset_t *sigmask)
236a5eb7107SBryan Cantrill {
237a5eb7107SBryan Cantrill struct dvpoll arg;
238a5eb7107SBryan Cantrill
239a5eb7107SBryan Cantrill if (maxevents <= 0) {
240a5eb7107SBryan Cantrill errno = EINVAL;
241a5eb7107SBryan Cantrill return (-1);
242a5eb7107SBryan Cantrill }
243a5eb7107SBryan Cantrill
244a5eb7107SBryan Cantrill arg.dp_nfds = maxevents;
245f4f9009fSPatrick Mooney arg.dp_timeout = EPOLL_TIMEOUT_CLAMP(timeout);
246a5eb7107SBryan Cantrill arg.dp_fds = (pollfd_t *)events;
247a5eb7107SBryan Cantrill arg.dp_setp = (sigset_t *)sigmask;
248a5eb7107SBryan Cantrill
249a5eb7107SBryan Cantrill return (ioctl(epfd, DP_PPOLL, &arg));
250a5eb7107SBryan Cantrill }
251