1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2017 Joyent, Inc.
14 * Copyright 2020 Oxide Computer Company
15 */
16
17 #include <sys/types.h>
18 #include <sys/epoll.h>
19 #include <sys/devpoll.h>
20 #include <unistd.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <poll.h>
24
25 /*
26 * Events that match their epoll(7) equivalents.
27 */
28 #if EPOLLIN != POLLIN
29 #error value of EPOLLIN does not match value of POLLIN
30 #endif
31
32 #if EPOLLPRI != POLLPRI
33 #error value of EPOLLPRI does not match value of POLLPRI
34 #endif
35
36 #if EPOLLOUT != POLLOUT
37 #error value of EPOLLOUT does not match value of POLLOUT
38 #endif
39
40 #if EPOLLRDNORM != POLLRDNORM
41 #error value of EPOLLRDNORM does not match value of POLLRDNORM
42 #endif
43
44 #if EPOLLRDBAND != POLLRDBAND
45 #error value of EPOLLRDBAND does not match value of POLLRDBAND
46 #endif
47
48 #if EPOLLERR != POLLERR
49 #error value of EPOLLERR does not match value of POLLERR
50 #endif
51
52 #if EPOLLHUP != POLLHUP
53 #error value of EPOLLHUP does not match value of POLLHUP
54 #endif
55
56 /*
57 * Events that we ignore entirely. They can be set in events, but they will
58 * never be returned.
59 */
60 #define EPOLLIGNORED (EPOLLMSG | EPOLLWAKEUP | EPOLLEXCLUSIVE)
61
62 /*
63 * Events that we swizzle into other bit positions.
64 */
65 #define EPOLLSWIZZLED \
66 (EPOLLRDHUP | EPOLLONESHOT | EPOLLET | EPOLLWRBAND | EPOLLWRNORM)
67
68 /*
69 * The defined behavior for epoll_wait/epoll_pwait when using a timeout less
70 * than 0 is to wait for events until they arrive (or interrupted by a signal).
71 * While poll(4D) operates in this manner for a timeout of -1, using other
72 * negative values results in an immediate timeout, as if it had been set to 0.
73 * For that reason, negative values are clamped to -1.
74 */
75 #define EPOLL_TIMEOUT_CLAMP(t) (((t) < -1) ? -1 : (t))
76
77 int
epoll_create(int size)78 epoll_create(int size)
79 {
80 int fd;
81
82 /*
83 * From the epoll_create() man page: "Since Linux 2.6.8, the size
84 * argument is ignored, but must be greater than zero." You keep using
85 * that word "ignored"...
86 */
87 if (size <= 0) {
88 errno = EINVAL;
89 return (-1);
90 }
91
92 if ((fd = open("/dev/poll", O_RDWR)) == -1)
93 return (-1);
94
95 if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) {
96 (void) close(fd);
97 return (-1);
98 }
99
100 return (fd);
101 }
102
103 int
epoll_create1(int flags)104 epoll_create1(int flags)
105 {
106 int fd, oflags = O_RDWR;
107
108 if (flags & EPOLL_CLOEXEC) {
109 oflags |= O_CLOEXEC;
110 flags ^= EPOLL_CLOEXEC;
111 }
112 /* Reject unrecognized flags */
113 if (flags != 0) {
114 errno = EINVAL;
115 return (-1);
116 }
117
118 if ((fd = open("/dev/poll", oflags)) == -1)
119 return (-1);
120
121 if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) {
122 (void) close(fd);
123 return (-1);
124 }
125
126 return (fd);
127 }
128
129 int
epoll_ctl(int epfd,int op,int fd,struct epoll_event * event)130 epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
131 {
132 dvpoll_epollfd_t epoll[2];
133 uint32_t events, ev = 0;
134 int i = 0, res;
135
136 epoll[i].dpep_pollfd.fd = fd;
137
138 switch (op) {
139 case EPOLL_CTL_DEL:
140 ev = POLLREMOVE;
141 break;
142
143 case EPOLL_CTL_MOD:
144 /* EPOLLEXCLUSIVE is prohibited for modify operations */
145 if ((event->events & EPOLLEXCLUSIVE) != 0) {
146 errno = EINVAL;
147 return (-1);
148 }
149 /*
150 * In the modify case, we pass down two events: one to
151 * remove the event and another to add it back.
152 */
153 epoll[i++].dpep_pollfd.events = POLLREMOVE;
154 epoll[i].dpep_pollfd.fd = fd;
155 /* FALLTHROUGH */
156
157 case EPOLL_CTL_ADD:
158 /*
159 * Mask off the events that we ignore, and then swizzle the
160 * events for which our values differ from their epoll(7)
161 * equivalents.
162 */
163 events = event->events;
164 ev = events & ~(EPOLLIGNORED | EPOLLSWIZZLED);
165
166 if (events & EPOLLRDHUP)
167 ev |= POLLRDHUP;
168
169 if (events & EPOLLET)
170 ev |= POLLET;
171
172 if (events & EPOLLONESHOT)
173 ev |= POLLONESHOT;
174
175 if (events & EPOLLWRNORM)
176 ev |= POLLWRNORM;
177
178 if (events & EPOLLWRBAND)
179 ev |= POLLWRBAND;
180
181 epoll[i].dpep_data = event->data.u64;
182 break;
183
184 default:
185 errno = EOPNOTSUPP;
186 return (-1);
187 }
188
189 epoll[i].dpep_pollfd.events = ev;
190 retry:
191 res = write(epfd, epoll, sizeof (epoll[0]) * (i + 1));
192
193 if (res == -1) {
194 if (errno == EINTR) {
195 /*
196 * Linux does not document EINTR as an allowed error
197 * for epoll_ctl. The write must be retried if it is
198 * not done automatically via SA_RESTART.
199 */
200 goto retry;
201 }
202 if (errno == ELOOP) {
203 /*
204 * Convert the specific /dev/poll error about an fd
205 * loop into what is expected from the Linux epoll
206 * interface.
207 */
208 errno = EINVAL;
209 }
210 return (-1);
211 }
212 return (0);
213 }
214
215 int
epoll_wait(int epfd,struct epoll_event * events,int maxevents,int timeout)216 epoll_wait(int epfd, struct epoll_event *events,
217 int maxevents, int timeout)
218 {
219 struct dvpoll arg;
220
221 if (maxevents <= 0) {
222 errno = EINVAL;
223 return (-1);
224 }
225
226 arg.dp_nfds = maxevents;
227 arg.dp_timeout = EPOLL_TIMEOUT_CLAMP(timeout);
228 arg.dp_fds = (pollfd_t *)events;
229
230 return (ioctl(epfd, DP_POLL, &arg));
231 }
232
233 int
epoll_pwait(int epfd,struct epoll_event * events,int maxevents,int timeout,const sigset_t * sigmask)234 epoll_pwait(int epfd, struct epoll_event *events,
235 int maxevents, int timeout, const sigset_t *sigmask)
236 {
237 struct dvpoll arg;
238
239 if (maxevents <= 0) {
240 errno = EINVAL;
241 return (-1);
242 }
243
244 arg.dp_nfds = maxevents;
245 arg.dp_timeout = EPOLL_TIMEOUT_CLAMP(timeout);
246 arg.dp_fds = (pollfd_t *)events;
247 arg.dp_setp = (sigset_t *)sigmask;
248
249 return (ioctl(epfd, DP_PPOLL, &arg));
250 }
251