xref: /titanic_51/usr/src/uts/common/io/eventfd.c (revision ce0d9371c4da921f289a8bc7fd846d3bf1b8cca7)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
14  */
15 
16 /*
17  * Support for the eventfd facility, a Linux-borne facility for user-generated
18  * file descriptor-based events.
19  */
20 
21 #include <sys/ddi.h>
22 #include <sys/sunddi.h>
23 #include <sys/eventfd.h>
24 #include <sys/conf.h>
25 #include <sys/vmem.h>
26 #include <sys/sysmacros.h>
27 #include <sys/filio.h>
28 #include <sys/stat.h>
29 #include <sys/file.h>
30 
31 struct eventfd_state;
32 typedef struct eventfd_state eventfd_state_t;
33 
34 struct eventfd_state {
35 	kmutex_t efd_lock;			/* lock protecting state */
36 	boolean_t efd_semaphore;		/* boolean: sema. semantics */
37 	kcondvar_t efd_cv;			/* condvar */
38 	pollhead_t efd_pollhd;			/* poll head */
39 	uint64_t efd_value;			/* value */
40 	eventfd_state_t *efd_next;		/* next state on global list */
41 };
42 
43 /*
44  * Internal global variables.
45  */
46 static kmutex_t		eventfd_lock;		/* lock protecting state */
47 static dev_info_t	*eventfd_devi;		/* device info */
48 static vmem_t		*eventfd_minor;		/* minor number arena */
49 static void		*eventfd_softstate;	/* softstate pointer */
50 static eventfd_state_t	*eventfd_state;		/* global list of state */
51 
52 /*ARGSUSED*/
53 static int
54 eventfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
55 {
56 	eventfd_state_t *state;
57 	major_t major = getemajor(*devp);
58 	minor_t minor = getminor(*devp);
59 
60 	if (minor != EVENTFDMNRN_EVENTFD)
61 		return (ENXIO);
62 
63 	mutex_enter(&eventfd_lock);
64 
65 	minor = (minor_t)(uintptr_t)vmem_alloc(eventfd_minor, 1,
66 	    VM_BESTFIT | VM_SLEEP);
67 
68 	if (ddi_soft_state_zalloc(eventfd_softstate, minor) != DDI_SUCCESS) {
69 		vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
70 		mutex_exit(&eventfd_lock);
71 		return (NULL);
72 	}
73 
74 	state = ddi_get_soft_state(eventfd_softstate, minor);
75 	*devp = makedevice(major, minor);
76 
77 	state->efd_next = eventfd_state;
78 	eventfd_state = state;
79 
80 	mutex_exit(&eventfd_lock);
81 
82 	return (0);
83 }
84 
85 /*ARGSUSED*/
86 static int
87 eventfd_read(dev_t dev, uio_t *uio, cred_t *cr)
88 {
89 	eventfd_state_t *state;
90 	minor_t minor = getminor(dev);
91 	uint64_t val, oval;
92 	int err;
93 
94 	if (uio->uio_resid < sizeof (val))
95 		return (EINVAL);
96 
97 	state = ddi_get_soft_state(eventfd_softstate, minor);
98 
99 	mutex_enter(&state->efd_lock);
100 
101 	while (state->efd_value == 0) {
102 		if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
103 			mutex_exit(&state->efd_lock);
104 			return (EAGAIN);
105 		}
106 
107 		if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
108 			mutex_exit(&state->efd_lock);
109 			return (EINTR);
110 		}
111 	}
112 
113 	/*
114 	 * We have a non-zero value and we own the lock; our behavior now
115 	 * depends on whether or not EFD_SEMAPHORE was set when the eventfd
116 	 * was created.
117 	 */
118 	val = oval = state->efd_value;
119 
120 	if (state->efd_semaphore) {
121 		state->efd_value--;
122 		val = 1;
123 	} else {
124 		state->efd_value = 0;
125 	}
126 
127 	err = uiomove(&val, sizeof (val), UIO_READ, uio);
128 
129 	mutex_exit(&state->efd_lock);
130 
131 	if (oval == EVENTFD_VALMAX) {
132 		cv_broadcast(&state->efd_cv);
133 		pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT);
134 	}
135 
136 	return (err);
137 }
138 
139 /*ARGSUSED*/
140 static int
141 eventfd_write(dev_t dev, struct uio *uio, cred_t *credp)
142 {
143 	eventfd_state_t *state;
144 	minor_t minor = getminor(dev);
145 	uint64_t val, oval;
146 	int err;
147 
148 	if (uio->uio_resid < sizeof (val))
149 		return (EINVAL);
150 
151 	if ((err = uiomove(&val, sizeof (val), UIO_WRITE, uio)) != 0)
152 		return (err);
153 
154 	if (val > EVENTFD_VALMAX)
155 		return (EINVAL);
156 
157 	state = ddi_get_soft_state(eventfd_softstate, minor);
158 
159 	mutex_enter(&state->efd_lock);
160 
161 	while (val > EVENTFD_VALMAX - state->efd_value) {
162 		if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
163 			mutex_exit(&state->efd_lock);
164 			return (EAGAIN);
165 		}
166 
167 		if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
168 			mutex_exit(&state->efd_lock);
169 			return (EINTR);
170 		}
171 	}
172 
173 	/*
174 	 * We now know that we can add the value without overflowing.
175 	 */
176 	state->efd_value = (oval = state->efd_value) + val;
177 
178 	mutex_exit(&state->efd_lock);
179 
180 	if (oval == 0) {
181 		cv_broadcast(&state->efd_cv);
182 		pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN);
183 	}
184 
185 	return (0);
186 }
187 
188 /*ARGSUSED*/
189 static int
190 eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
191     struct pollhead **phpp)
192 {
193 	eventfd_state_t *state;
194 	minor_t minor = getminor(dev);
195 	short revents = 0;
196 
197 	state = ddi_get_soft_state(eventfd_softstate, minor);
198 
199 	mutex_enter(&state->efd_lock);
200 
201 	if (state->efd_value > 0)
202 		revents |= POLLRDNORM | POLLIN;
203 
204 	if (state->efd_value < EVENTFD_VALMAX)
205 		revents |= POLLWRNORM | POLLOUT;
206 
207 	if (!(*reventsp = revents & events) && !anyyet)
208 		*phpp = &state->efd_pollhd;
209 
210 	mutex_exit(&state->efd_lock);
211 
212 	return (0);
213 }
214 
215 /*ARGSUSED*/
216 static int
217 eventfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
218 {
219 	eventfd_state_t *state;
220 	minor_t minor = getminor(dev);
221 
222 	state = ddi_get_soft_state(eventfd_softstate, minor);
223 
224 	switch (cmd) {
225 	case EVENTFDIOC_SEMAPHORE: {
226 		mutex_enter(&state->efd_lock);
227 		state->efd_semaphore ^= 1;
228 		mutex_exit(&state->efd_lock);
229 
230 		return (0);
231 	}
232 
233 	default:
234 		break;
235 	}
236 
237 	return (ENOTTY);
238 }
239 
240 /*ARGSUSED*/
241 static int
242 eventfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
243 {
244 	eventfd_state_t *state, **sp;
245 	minor_t minor = getminor(dev);
246 
247 	state = ddi_get_soft_state(eventfd_softstate, minor);
248 
249 	if (state->efd_pollhd.ph_list != NULL) {
250 		pollwakeup(&state->efd_pollhd, POLLERR);
251 		pollhead_clean(&state->efd_pollhd);
252 	}
253 
254 	mutex_enter(&eventfd_lock);
255 
256 	/*
257 	 * Remove our state from our global list.
258 	 */
259 	for (sp = &eventfd_state; *sp != state; sp = &((*sp)->efd_next))
260 		VERIFY(*sp != NULL);
261 
262 	*sp = (*sp)->efd_next;
263 
264 	ddi_soft_state_free(eventfd_softstate, minor);
265 	vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
266 
267 	mutex_exit(&eventfd_lock);
268 
269 	return (0);
270 }
271 
272 static int
273 eventfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
274 {
275 	switch (cmd) {
276 	case DDI_ATTACH:
277 		break;
278 
279 	case DDI_RESUME:
280 		return (DDI_SUCCESS);
281 
282 	default:
283 		return (DDI_FAILURE);
284 	}
285 
286 	mutex_enter(&eventfd_lock);
287 
288 	if (ddi_soft_state_init(&eventfd_softstate,
289 	    sizeof (eventfd_state_t), 0) != 0) {
290 		cmn_err(CE_NOTE, "/dev/eventfd failed to create soft state");
291 		mutex_exit(&eventfd_lock);
292 		return (DDI_FAILURE);
293 	}
294 
295 	if (ddi_create_minor_node(devi, "eventfd", S_IFCHR,
296 	    EVENTFDMNRN_EVENTFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
297 		cmn_err(CE_NOTE, "/dev/eventfd couldn't create minor node");
298 		ddi_soft_state_fini(&eventfd_softstate);
299 		mutex_exit(&eventfd_lock);
300 		return (DDI_FAILURE);
301 	}
302 
303 	ddi_report_dev(devi);
304 	eventfd_devi = devi;
305 
306 	eventfd_minor = vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE,
307 	    UINT32_MAX - EVENTFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
308 	    VM_SLEEP | VMC_IDENTIFIER);
309 
310 	mutex_exit(&eventfd_lock);
311 
312 	return (DDI_SUCCESS);
313 }
314 
315 /*ARGSUSED*/
316 static int
317 eventfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
318 {
319 	switch (cmd) {
320 	case DDI_DETACH:
321 		break;
322 
323 	case DDI_SUSPEND:
324 		return (DDI_SUCCESS);
325 
326 	default:
327 		return (DDI_FAILURE);
328 	}
329 
330 	mutex_enter(&eventfd_lock);
331 	vmem_destroy(eventfd_minor);
332 
333 	ddi_remove_minor_node(eventfd_devi, NULL);
334 	eventfd_devi = NULL;
335 
336 	ddi_soft_state_fini(&eventfd_softstate);
337 	mutex_exit(&eventfd_lock);
338 
339 	return (DDI_SUCCESS);
340 }
341 
342 /*ARGSUSED*/
343 static int
344 eventfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
345 {
346 	int error;
347 
348 	switch (infocmd) {
349 	case DDI_INFO_DEVT2DEVINFO:
350 		*result = (void *)eventfd_devi;
351 		error = DDI_SUCCESS;
352 		break;
353 	case DDI_INFO_DEVT2INSTANCE:
354 		*result = (void *)0;
355 		error = DDI_SUCCESS;
356 		break;
357 	default:
358 		error = DDI_FAILURE;
359 	}
360 	return (error);
361 }
362 
363 static struct cb_ops eventfd_cb_ops = {
364 	eventfd_open,		/* open */
365 	eventfd_close,		/* close */
366 	nulldev,		/* strategy */
367 	nulldev,		/* print */
368 	nodev,			/* dump */
369 	eventfd_read,		/* read */
370 	eventfd_write,		/* write */
371 	eventfd_ioctl,		/* ioctl */
372 	nodev,			/* devmap */
373 	nodev,			/* mmap */
374 	nodev,			/* segmap */
375 	eventfd_poll,		/* poll */
376 	ddi_prop_op,		/* cb_prop_op */
377 	0,			/* streamtab  */
378 	D_NEW | D_MP		/* Driver compatibility flag */
379 };
380 
381 static struct dev_ops eventfd_ops = {
382 	DEVO_REV,		/* devo_rev */
383 	0,			/* refcnt */
384 	eventfd_info,		/* get_dev_info */
385 	nulldev,		/* identify */
386 	nulldev,		/* probe */
387 	eventfd_attach,		/* attach */
388 	eventfd_detach,		/* detach */
389 	nodev,			/* reset */
390 	&eventfd_cb_ops,	/* driver operations */
391 	NULL,			/* bus operations */
392 	nodev,			/* dev power */
393 	ddi_quiesce_not_needed,	/* quiesce */
394 };
395 
396 static struct modldrv modldrv = {
397 	&mod_driverops,		/* module type (this is a pseudo driver) */
398 	"eventfd support",	/* name of module */
399 	&eventfd_ops,		/* driver ops */
400 };
401 
402 static struct modlinkage modlinkage = {
403 	MODREV_1,
404 	(void *)&modldrv,
405 	NULL
406 };
407 
408 int
409 _init(void)
410 {
411 	return (mod_install(&modlinkage));
412 }
413 
414 int
415 _info(struct modinfo *modinfop)
416 {
417 	return (mod_info(&modlinkage, modinfop));
418 }
419 
420 int
421 _fini(void)
422 {
423 	return (mod_remove(&modlinkage));
424 }
425