1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2017 Joyent, Inc. 14 * Copyright 2020 Oxide Computer Company 15 */ 16 17 #include <sys/types.h> 18 #include <sys/epoll.h> 19 #include <sys/devpoll.h> 20 #include <unistd.h> 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <poll.h> 24 25 /* 26 * Events that match their epoll(7) equivalents. 27 */ 28 #if EPOLLIN != POLLIN 29 #error value of EPOLLIN does not match value of POLLIN 30 #endif 31 32 #if EPOLLPRI != POLLPRI 33 #error value of EPOLLPRI does not match value of POLLPRI 34 #endif 35 36 #if EPOLLOUT != POLLOUT 37 #error value of EPOLLOUT does not match value of POLLOUT 38 #endif 39 40 #if EPOLLRDNORM != POLLRDNORM 41 #error value of EPOLLRDNORM does not match value of POLLRDNORM 42 #endif 43 44 #if EPOLLRDBAND != POLLRDBAND 45 #error value of EPOLLRDBAND does not match value of POLLRDBAND 46 #endif 47 48 #if EPOLLERR != POLLERR 49 #error value of EPOLLERR does not match value of POLLERR 50 #endif 51 52 #if EPOLLHUP != POLLHUP 53 #error value of EPOLLHUP does not match value of POLLHUP 54 #endif 55 56 /* 57 * Events that we ignore entirely. They can be set in events, but they will 58 * never be returned. 59 */ 60 #define EPOLLIGNORED (EPOLLMSG | EPOLLWAKEUP | EPOLLEXCLUSIVE) 61 62 /* 63 * Events that we swizzle into other bit positions. 64 */ 65 #define EPOLLSWIZZLED \ 66 (EPOLLRDHUP | EPOLLONESHOT | EPOLLET | EPOLLWRBAND | EPOLLWRNORM) 67 68 /* 69 * The defined behavior for epoll_wait/epoll_pwait when using a timeout less 70 * than 0 is to wait for events until they arrive (or interrupted by a signal). 71 * While poll(7d) operates in this manner for a timeout of -1, using other 72 * negative values results in an immediate timeout, as if it had been set to 0. 73 * For that reason, negative values are clamped to -1. 74 */ 75 #define EPOLL_TIMEOUT_CLAMP(t) (((t) < -1) ? -1 : (t)) 76 77 int 78 epoll_create(int size) 79 { 80 int fd; 81 82 /* 83 * From the epoll_create() man page: "Since Linux 2.6.8, the size 84 * argument is ignored, but must be greater than zero." You keep using 85 * that word "ignored"... 86 */ 87 if (size <= 0) { 88 errno = EINVAL; 89 return (-1); 90 } 91 92 if ((fd = open("/dev/poll", O_RDWR)) == -1) 93 return (-1); 94 95 if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) { 96 (void) close(fd); 97 return (-1); 98 } 99 100 return (fd); 101 } 102 103 int 104 epoll_create1(int flags) 105 { 106 int fd, oflags = O_RDWR; 107 108 if (flags & EPOLL_CLOEXEC) { 109 oflags |= O_CLOEXEC; 110 flags ^= EPOLL_CLOEXEC; 111 } 112 /* Reject unrecognized flags */ 113 if (flags != 0) { 114 errno = EINVAL; 115 return (-1); 116 } 117 118 if ((fd = open("/dev/poll", oflags)) == -1) 119 return (-1); 120 121 if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) { 122 (void) close(fd); 123 return (-1); 124 } 125 126 return (fd); 127 } 128 129 int 130 epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) 131 { 132 dvpoll_epollfd_t epoll[2]; 133 uint32_t events, ev = 0; 134 int i = 0, res; 135 136 epoll[i].dpep_pollfd.fd = fd; 137 138 switch (op) { 139 case EPOLL_CTL_DEL: 140 ev = POLLREMOVE; 141 break; 142 143 case EPOLL_CTL_MOD: 144 /* EPOLLEXCLUSIVE is prohibited for modify operations */ 145 if ((event->events & EPOLLEXCLUSIVE) != 0) { 146 errno = EINVAL; 147 return (-1); 148 } 149 /* 150 * In the modify case, we pass down two events: one to 151 * remove the event and another to add it back. 152 */ 153 epoll[i++].dpep_pollfd.events = POLLREMOVE; 154 epoll[i].dpep_pollfd.fd = fd; 155 /* FALLTHROUGH */ 156 157 case EPOLL_CTL_ADD: 158 /* 159 * Mask off the events that we ignore, and then swizzle the 160 * events for which our values differ from their epoll(7) 161 * equivalents. 162 */ 163 events = event->events; 164 ev = events & ~(EPOLLIGNORED | EPOLLSWIZZLED); 165 166 if (events & EPOLLRDHUP) 167 ev |= POLLRDHUP; 168 169 if (events & EPOLLET) 170 ev |= POLLET; 171 172 if (events & EPOLLONESHOT) 173 ev |= POLLONESHOT; 174 175 if (events & EPOLLWRNORM) 176 ev |= POLLWRNORM; 177 178 if (events & EPOLLWRBAND) 179 ev |= POLLWRBAND; 180 181 epoll[i].dpep_data = event->data.u64; 182 break; 183 184 default: 185 errno = EOPNOTSUPP; 186 return (-1); 187 } 188 189 epoll[i].dpep_pollfd.events = ev; 190 retry: 191 res = write(epfd, epoll, sizeof (epoll[0]) * (i + 1)); 192 193 if (res == -1) { 194 if (errno == EINTR) { 195 /* 196 * Linux does not document EINTR as an allowed error 197 * for epoll_ctl. The write must be retried if it is 198 * not done automatically via SA_RESTART. 199 */ 200 goto retry; 201 } 202 if (errno == ELOOP) { 203 /* 204 * Convert the specific /dev/poll error about an fd 205 * loop into what is expected from the Linux epoll 206 * interface. 207 */ 208 errno = EINVAL; 209 } 210 return (-1); 211 } 212 return (0); 213 } 214 215 int 216 epoll_wait(int epfd, struct epoll_event *events, 217 int maxevents, int timeout) 218 { 219 struct dvpoll arg; 220 221 if (maxevents <= 0) { 222 errno = EINVAL; 223 return (-1); 224 } 225 226 arg.dp_nfds = maxevents; 227 arg.dp_timeout = EPOLL_TIMEOUT_CLAMP(timeout); 228 arg.dp_fds = (pollfd_t *)events; 229 230 return (ioctl(epfd, DP_POLL, &arg)); 231 } 232 233 int 234 epoll_pwait(int epfd, struct epoll_event *events, 235 int maxevents, int timeout, const sigset_t *sigmask) 236 { 237 struct dvpoll arg; 238 239 if (maxevents <= 0) { 240 errno = EINVAL; 241 return (-1); 242 } 243 244 arg.dp_nfds = maxevents; 245 arg.dp_timeout = EPOLL_TIMEOUT_CLAMP(timeout); 246 arg.dp_fds = (pollfd_t *)events; 247 arg.dp_setp = (sigset_t *)sigmask; 248 249 return (ioctl(epfd, DP_PPOLL, &arg)); 250 } 251