1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
14 */
15
16 /*
17 * Support for the eventfd facility, a Linux-borne facility for user-generated
18 * file descriptor-based events.
19 */
20
21 #include <sys/ddi.h>
22 #include <sys/sunddi.h>
23 #include <sys/eventfd.h>
24 #include <sys/conf.h>
25 #include <sys/vmem.h>
26 #include <sys/sysmacros.h>
27 #include <sys/filio.h>
28 #include <sys/stat.h>
29 #include <sys/file.h>
30
31 struct eventfd_state;
32 typedef struct eventfd_state eventfd_state_t;
33
34 struct eventfd_state {
35 kmutex_t efd_lock; /* lock protecting state */
36 boolean_t efd_semaphore; /* boolean: sema. semantics */
37 kcondvar_t efd_cv; /* condvar */
38 pollhead_t efd_pollhd; /* poll head */
39 uint64_t efd_value; /* value */
40 eventfd_state_t *efd_next; /* next state on global list */
41 };
42
43 /*
44 * Internal global variables.
45 */
46 static kmutex_t eventfd_lock; /* lock protecting state */
47 static dev_info_t *eventfd_devi; /* device info */
48 static vmem_t *eventfd_minor; /* minor number arena */
49 static void *eventfd_softstate; /* softstate pointer */
50 static eventfd_state_t *eventfd_state; /* global list of state */
51
52 /*ARGSUSED*/
53 static int
eventfd_open(dev_t * devp,int flag,int otyp,cred_t * cred_p)54 eventfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
55 {
56 eventfd_state_t *state;
57 major_t major = getemajor(*devp);
58 minor_t minor = getminor(*devp);
59
60 if (minor != EVENTFDMNRN_EVENTFD)
61 return (ENXIO);
62
63 mutex_enter(&eventfd_lock);
64
65 minor = (minor_t)(uintptr_t)vmem_alloc(eventfd_minor, 1,
66 VM_BESTFIT | VM_SLEEP);
67
68 if (ddi_soft_state_zalloc(eventfd_softstate, minor) != DDI_SUCCESS) {
69 vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
70 mutex_exit(&eventfd_lock);
71 return (NULL);
72 }
73
74 state = ddi_get_soft_state(eventfd_softstate, minor);
75 *devp = makedevice(major, minor);
76
77 state->efd_next = eventfd_state;
78 eventfd_state = state;
79
80 mutex_exit(&eventfd_lock);
81
82 return (0);
83 }
84
85 /*ARGSUSED*/
86 static int
eventfd_read(dev_t dev,uio_t * uio,cred_t * cr)87 eventfd_read(dev_t dev, uio_t *uio, cred_t *cr)
88 {
89 eventfd_state_t *state;
90 minor_t minor = getminor(dev);
91 uint64_t val, oval;
92 int err;
93
94 if (uio->uio_resid < sizeof (val))
95 return (EINVAL);
96
97 state = ddi_get_soft_state(eventfd_softstate, minor);
98
99 mutex_enter(&state->efd_lock);
100
101 while (state->efd_value == 0) {
102 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
103 mutex_exit(&state->efd_lock);
104 return (EAGAIN);
105 }
106
107 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
108 mutex_exit(&state->efd_lock);
109 return (EINTR);
110 }
111 }
112
113 /*
114 * We have a non-zero value and we own the lock; our behavior now
115 * depends on whether or not EFD_SEMAPHORE was set when the eventfd
116 * was created.
117 */
118 val = oval = state->efd_value;
119
120 if (state->efd_semaphore) {
121 state->efd_value--;
122 val = 1;
123 } else {
124 state->efd_value = 0;
125 }
126
127 err = uiomove(&val, sizeof (val), UIO_READ, uio);
128
129 mutex_exit(&state->efd_lock);
130
131 if (oval == EVENTFD_VALMAX) {
132 cv_broadcast(&state->efd_cv);
133 pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT);
134 }
135
136 return (err);
137 }
138
139 /*ARGSUSED*/
140 static int
eventfd_write(dev_t dev,struct uio * uio,cred_t * credp)141 eventfd_write(dev_t dev, struct uio *uio, cred_t *credp)
142 {
143 eventfd_state_t *state;
144 minor_t minor = getminor(dev);
145 uint64_t val, oval;
146 int err;
147
148 if (uio->uio_resid < sizeof (val))
149 return (EINVAL);
150
151 if ((err = uiomove(&val, sizeof (val), UIO_WRITE, uio)) != 0)
152 return (err);
153
154 if (val > EVENTFD_VALMAX)
155 return (EINVAL);
156
157 state = ddi_get_soft_state(eventfd_softstate, minor);
158
159 mutex_enter(&state->efd_lock);
160
161 while (val > EVENTFD_VALMAX - state->efd_value) {
162 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
163 mutex_exit(&state->efd_lock);
164 return (EAGAIN);
165 }
166
167 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
168 mutex_exit(&state->efd_lock);
169 return (EINTR);
170 }
171 }
172
173 /*
174 * We now know that we can add the value without overflowing.
175 */
176 state->efd_value = (oval = state->efd_value) + val;
177
178 mutex_exit(&state->efd_lock);
179
180 if (oval == 0) {
181 cv_broadcast(&state->efd_cv);
182 pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN);
183 }
184
185 return (0);
186 }
187
188 /*ARGSUSED*/
189 static int
eventfd_poll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)190 eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
191 struct pollhead **phpp)
192 {
193 eventfd_state_t *state;
194 minor_t minor = getminor(dev);
195 short revents = 0;
196
197 state = ddi_get_soft_state(eventfd_softstate, minor);
198
199 mutex_enter(&state->efd_lock);
200
201 if (state->efd_value > 0)
202 revents |= POLLRDNORM | POLLIN;
203
204 if (state->efd_value < EVENTFD_VALMAX)
205 revents |= POLLWRNORM | POLLOUT;
206
207 if (!(*reventsp = revents & events) && !anyyet)
208 *phpp = &state->efd_pollhd;
209
210 mutex_exit(&state->efd_lock);
211
212 return (0);
213 }
214
215 /*ARGSUSED*/
216 static int
eventfd_ioctl(dev_t dev,int cmd,intptr_t arg,int md,cred_t * cr,int * rv)217 eventfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
218 {
219 eventfd_state_t *state;
220 minor_t minor = getminor(dev);
221
222 state = ddi_get_soft_state(eventfd_softstate, minor);
223
224 switch (cmd) {
225 case EVENTFDIOC_SEMAPHORE: {
226 mutex_enter(&state->efd_lock);
227 state->efd_semaphore ^= 1;
228 mutex_exit(&state->efd_lock);
229
230 return (0);
231 }
232
233 default:
234 break;
235 }
236
237 return (ENOTTY);
238 }
239
240 /*ARGSUSED*/
241 static int
eventfd_close(dev_t dev,int flag,int otyp,cred_t * cred_p)242 eventfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
243 {
244 eventfd_state_t *state, **sp;
245 minor_t minor = getminor(dev);
246
247 state = ddi_get_soft_state(eventfd_softstate, minor);
248
249 if (state->efd_pollhd.ph_list != NULL) {
250 pollwakeup(&state->efd_pollhd, POLLERR);
251 pollhead_clean(&state->efd_pollhd);
252 }
253
254 mutex_enter(&eventfd_lock);
255
256 /*
257 * Remove our state from our global list.
258 */
259 for (sp = &eventfd_state; *sp != state; sp = &((*sp)->efd_next))
260 VERIFY(*sp != NULL);
261
262 *sp = (*sp)->efd_next;
263
264 ddi_soft_state_free(eventfd_softstate, minor);
265 vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
266
267 mutex_exit(&eventfd_lock);
268
269 return (0);
270 }
271
272 static int
eventfd_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)273 eventfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
274 {
275 switch (cmd) {
276 case DDI_ATTACH:
277 break;
278
279 case DDI_RESUME:
280 return (DDI_SUCCESS);
281
282 default:
283 return (DDI_FAILURE);
284 }
285
286 mutex_enter(&eventfd_lock);
287
288 if (ddi_soft_state_init(&eventfd_softstate,
289 sizeof (eventfd_state_t), 0) != 0) {
290 cmn_err(CE_NOTE, "/dev/eventfd failed to create soft state");
291 mutex_exit(&eventfd_lock);
292 return (DDI_FAILURE);
293 }
294
295 if (ddi_create_minor_node(devi, "eventfd", S_IFCHR,
296 EVENTFDMNRN_EVENTFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
297 cmn_err(CE_NOTE, "/dev/eventfd couldn't create minor node");
298 ddi_soft_state_fini(&eventfd_softstate);
299 mutex_exit(&eventfd_lock);
300 return (DDI_FAILURE);
301 }
302
303 ddi_report_dev(devi);
304 eventfd_devi = devi;
305
306 eventfd_minor = vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE,
307 UINT32_MAX - EVENTFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
308 VM_SLEEP | VMC_IDENTIFIER);
309
310 mutex_exit(&eventfd_lock);
311
312 return (DDI_SUCCESS);
313 }
314
315 /*ARGSUSED*/
316 static int
eventfd_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)317 eventfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
318 {
319 switch (cmd) {
320 case DDI_DETACH:
321 break;
322
323 case DDI_SUSPEND:
324 return (DDI_SUCCESS);
325
326 default:
327 return (DDI_FAILURE);
328 }
329
330 mutex_enter(&eventfd_lock);
331 vmem_destroy(eventfd_minor);
332
333 ddi_remove_minor_node(eventfd_devi, NULL);
334 eventfd_devi = NULL;
335
336 ddi_soft_state_fini(&eventfd_softstate);
337 mutex_exit(&eventfd_lock);
338
339 return (DDI_SUCCESS);
340 }
341
342 /*ARGSUSED*/
343 static int
eventfd_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)344 eventfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
345 {
346 int error;
347
348 switch (infocmd) {
349 case DDI_INFO_DEVT2DEVINFO:
350 *result = (void *)eventfd_devi;
351 error = DDI_SUCCESS;
352 break;
353 case DDI_INFO_DEVT2INSTANCE:
354 *result = (void *)0;
355 error = DDI_SUCCESS;
356 break;
357 default:
358 error = DDI_FAILURE;
359 }
360 return (error);
361 }
362
363 static struct cb_ops eventfd_cb_ops = {
364 eventfd_open, /* open */
365 eventfd_close, /* close */
366 nulldev, /* strategy */
367 nulldev, /* print */
368 nodev, /* dump */
369 eventfd_read, /* read */
370 eventfd_write, /* write */
371 eventfd_ioctl, /* ioctl */
372 nodev, /* devmap */
373 nodev, /* mmap */
374 nodev, /* segmap */
375 eventfd_poll, /* poll */
376 ddi_prop_op, /* cb_prop_op */
377 0, /* streamtab */
378 D_NEW | D_MP /* Driver compatibility flag */
379 };
380
381 static struct dev_ops eventfd_ops = {
382 DEVO_REV, /* devo_rev */
383 0, /* refcnt */
384 eventfd_info, /* get_dev_info */
385 nulldev, /* identify */
386 nulldev, /* probe */
387 eventfd_attach, /* attach */
388 eventfd_detach, /* detach */
389 nodev, /* reset */
390 &eventfd_cb_ops, /* driver operations */
391 NULL, /* bus operations */
392 nodev, /* dev power */
393 ddi_quiesce_not_needed, /* quiesce */
394 };
395
396 static struct modldrv modldrv = {
397 &mod_driverops, /* module type (this is a pseudo driver) */
398 "eventfd support", /* name of module */
399 &eventfd_ops, /* driver ops */
400 };
401
402 static struct modlinkage modlinkage = {
403 MODREV_1,
404 (void *)&modldrv,
405 NULL
406 };
407
408 int
_init(void)409 _init(void)
410 {
411 return (mod_install(&modlinkage));
412 }
413
414 int
_info(struct modinfo * modinfop)415 _info(struct modinfo *modinfop)
416 {
417 return (mod_info(&modlinkage, modinfop));
418 }
419
420 int
_fini(void)421 _fini(void)
422 {
423 return (mod_remove(&modlinkage));
424 }
425