xref: /freebsd/sys/kern/kern_devctl.c (revision 43e29d03f416d7dda52112a29600a7c82ee1a91e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2002-2020 M. Warner Losh <imp@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_bus.h"
32 #include "opt_ddb.h"
33 
34 #include <sys/param.h>
35 #include <sys/conf.h>
36 #include <sys/eventhandler.h>
37 #include <sys/filio.h>
38 #include <sys/lock.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/poll.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/condvar.h>
46 #include <sys/queue.h>
47 #include <machine/bus.h>
48 #include <sys/sbuf.h>
49 #include <sys/selinfo.h>
50 #include <sys/smp.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/uio.h>
54 #include <sys/bus.h>
55 
56 #include <machine/cpu.h>
57 #include <machine/stdarg.h>
58 
59 #include <vm/uma.h>
60 #include <vm/vm.h>
61 
62 #include <ddb/ddb.h>
63 
64 STAILQ_HEAD(devq, dev_event_info);
65 
66 static struct dev_softc {
67 	int		inuse;
68 	int		nonblock;
69 	int		queued;
70 	int		async;
71 	struct mtx	mtx;
72 	struct cv	cv;
73 	struct selinfo	sel;
74 	struct devq	devq;
75 	struct sigio	*sigio;
76 	uma_zone_t	zone;
77 } devsoftc;
78 
79 /*
80  * This design allows only one reader for /dev/devctl.  This is not desirable
81  * in the long run, but will get a lot of hair out of this implementation.
82  * Maybe we should make this device a clonable device.
83  *
84  * Also note: we specifically do not attach a device to the device_t tree
85  * to avoid potential chicken and egg problems.  One could argue that all
86  * of this belongs to the root node.
87  */
88 
89 #define DEVCTL_DEFAULT_QUEUE_LEN 1000
90 static int sysctl_devctl_queue(SYSCTL_HANDLER_ARGS);
91 static int devctl_queue_length = DEVCTL_DEFAULT_QUEUE_LEN;
92 SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RWTUN |
93     CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_queue, "I", "devctl queue length");
94 
95 static void devctl_attach_handler(void *arg __unused, device_t dev);
96 static void devctl_detach_handler(void *arg __unused, device_t dev,
97     enum evhdev_detach state);
98 static void devctl_nomatch_handler(void *arg __unused, device_t dev);
99 
100 static d_open_t		devopen;
101 static d_close_t	devclose;
102 static d_read_t		devread;
103 static d_ioctl_t	devioctl;
104 static d_poll_t		devpoll;
105 static d_kqfilter_t	devkqfilter;
106 
107 #define DEVCTL_BUFFER (1024 - sizeof(void *))
108 struct dev_event_info {
109 	STAILQ_ENTRY(dev_event_info) dei_link;
110 	char dei_data[DEVCTL_BUFFER];
111 };
112 
113 
114 static struct cdevsw dev_cdevsw = {
115 	.d_version =	D_VERSION,
116 	.d_open =	devopen,
117 	.d_close =	devclose,
118 	.d_read =	devread,
119 	.d_ioctl =	devioctl,
120 	.d_poll =	devpoll,
121 	.d_kqfilter =	devkqfilter,
122 	.d_name =	"devctl",
123 };
124 
125 static void	filt_devctl_detach(struct knote *kn);
126 static int	filt_devctl_read(struct knote *kn, long hint);
127 
128 static struct filterops devctl_rfiltops = {
129 	.f_isfd = 1,
130 	.f_detach = filt_devctl_detach,
131 	.f_event = filt_devctl_read,
132 };
133 
134 static struct cdev *devctl_dev;
135 static void devaddq(const char *type, const char *what, device_t dev);
136 
137 static struct devctlbridge {
138 	send_event_f *send_f;
139 } devctl_notify_hook = { .send_f = NULL };
140 
141 static void
142 devctl_init(void)
143 {
144 	int reserve;
145 	uma_zone_t z;
146 
147 	devctl_dev = make_dev_credf(MAKEDEV_ETERNAL, &dev_cdevsw, 0, NULL,
148 	    UID_ROOT, GID_WHEEL, 0600, "devctl");
149 	mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF);
150 	cv_init(&devsoftc.cv, "dev cv");
151 	STAILQ_INIT(&devsoftc.devq);
152 	knlist_init_mtx(&devsoftc.sel.si_note, &devsoftc.mtx);
153 	if (devctl_queue_length > 0) {
154 		/*
155 		 * Allocate a zone for the messages. Preallocate 2% of these for
156 		 * a reserve. Allow only devctl_queue_length slabs to cap memory
157 		 * usage.  The reserve usually allows coverage of surges of
158 		 * events during memory shortages. Normally we won't have to
159 		 * re-use events from the queue, but will in extreme shortages.
160 		 */
161 		z = devsoftc.zone = uma_zcreate("DEVCTL",
162 		    sizeof(struct dev_event_info), NULL, NULL, NULL, NULL,
163 		    UMA_ALIGN_PTR, 0);
164 		reserve = max(devctl_queue_length / 50, 100);	/* 2% reserve */
165 		uma_zone_set_max(z, devctl_queue_length);
166 		uma_zone_set_maxcache(z, 0);
167 		uma_zone_reserve(z, reserve);
168 		uma_prealloc(z, reserve);
169 	}
170 	EVENTHANDLER_REGISTER(device_attach, devctl_attach_handler,
171 	    NULL, EVENTHANDLER_PRI_LAST);
172 	EVENTHANDLER_REGISTER(device_detach, devctl_detach_handler,
173 	    NULL, EVENTHANDLER_PRI_LAST);
174 	EVENTHANDLER_REGISTER(device_nomatch, devctl_nomatch_handler,
175 	    NULL, EVENTHANDLER_PRI_LAST);
176 }
177 SYSINIT(devctl_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, devctl_init, NULL);
178 
179 /*
180  * A device was added to the tree.  We are called just after it successfully
181  * attaches (that is, probe and attach success for this device).  No call
182  * is made if a device is merely parented into the tree.  See devnomatch
183  * if probe fails.  If attach fails, no notification is sent (but maybe
184  * we should have a different message for this).
185  */
186 static void
187 devctl_attach_handler(void *arg __unused, device_t dev)
188 {
189 	devaddq("+", device_get_nameunit(dev), dev);
190 }
191 
192 /*
193  * A device was removed from the tree.  We are called just before this
194  * happens.
195  */
196 static void
197 devctl_detach_handler(void *arg __unused, device_t dev, enum evhdev_detach state)
198 {
199 	if (state == EVHDEV_DETACH_COMPLETE)
200 		devaddq("-", device_get_nameunit(dev), dev);
201 }
202 
203 /*
204  * Called when there's no match for this device.  This is only called
205  * the first time that no match happens, so we don't keep getting this
206  * message.  Should that prove to be undesirable, we can change it.
207  * This is called when all drivers that can attach to a given bus
208  * decline to accept this device.  Other errors may not be detected.
209  */
210 static void
211 devctl_nomatch_handler(void *arg __unused, device_t dev)
212 {
213 	devaddq("?", "", dev);
214 }
215 
216 static int
217 devopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
218 {
219 	mtx_lock(&devsoftc.mtx);
220 	if (devsoftc.inuse) {
221 		mtx_unlock(&devsoftc.mtx);
222 		return (EBUSY);
223 	}
224 	/* move to init */
225 	devsoftc.inuse = 1;
226 	mtx_unlock(&devsoftc.mtx);
227 	return (0);
228 }
229 
230 static int
231 devclose(struct cdev *dev, int fflag, int devtype, struct thread *td)
232 {
233 	mtx_lock(&devsoftc.mtx);
234 	devsoftc.inuse = 0;
235 	devsoftc.nonblock = 0;
236 	devsoftc.async = 0;
237 	cv_broadcast(&devsoftc.cv);
238 	funsetown(&devsoftc.sigio);
239 	mtx_unlock(&devsoftc.mtx);
240 	return (0);
241 }
242 
243 /*
244  * The read channel for this device is used to report changes to
245  * userland in realtime.  We are required to free the data as well as
246  * the n1 object because we allocate them separately.  Also note that
247  * we return one record at a time.  If you try to read this device a
248  * character at a time, you will lose the rest of the data.  Listening
249  * programs are expected to cope.
250  */
251 static int
252 devread(struct cdev *dev, struct uio *uio, int ioflag)
253 {
254 	struct dev_event_info *n1;
255 	int rv;
256 
257 	mtx_lock(&devsoftc.mtx);
258 	while (STAILQ_EMPTY(&devsoftc.devq)) {
259 		if (devsoftc.nonblock) {
260 			mtx_unlock(&devsoftc.mtx);
261 			return (EAGAIN);
262 		}
263 		rv = cv_wait_sig(&devsoftc.cv, &devsoftc.mtx);
264 		if (rv) {
265 			/*
266 			 * Need to translate ERESTART to EINTR here? -- jake
267 			 */
268 			mtx_unlock(&devsoftc.mtx);
269 			return (rv);
270 		}
271 	}
272 	n1 = STAILQ_FIRST(&devsoftc.devq);
273 	STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
274 	devsoftc.queued--;
275 	mtx_unlock(&devsoftc.mtx);
276 	rv = uiomove(n1->dei_data, strlen(n1->dei_data), uio);
277 	uma_zfree(devsoftc.zone, n1);
278 	return (rv);
279 }
280 
281 static	int
282 devioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
283 {
284 	switch (cmd) {
285 	case FIONBIO:
286 		if (*(int*)data)
287 			devsoftc.nonblock = 1;
288 		else
289 			devsoftc.nonblock = 0;
290 		return (0);
291 	case FIOASYNC:
292 		if (*(int*)data)
293 			devsoftc.async = 1;
294 		else
295 			devsoftc.async = 0;
296 		return (0);
297 	case FIOSETOWN:
298 		return fsetown(*(int *)data, &devsoftc.sigio);
299 	case FIOGETOWN:
300 		*(int *)data = fgetown(&devsoftc.sigio);
301 		return (0);
302 
303 		/* (un)Support for other fcntl() calls. */
304 	case FIOCLEX:
305 	case FIONCLEX:
306 	case FIONREAD:
307 	default:
308 		break;
309 	}
310 	return (ENOTTY);
311 }
312 
313 static	int
314 devpoll(struct cdev *dev, int events, struct thread *td)
315 {
316 	int	revents = 0;
317 
318 	mtx_lock(&devsoftc.mtx);
319 	if (events & (POLLIN | POLLRDNORM)) {
320 		if (!STAILQ_EMPTY(&devsoftc.devq))
321 			revents = events & (POLLIN | POLLRDNORM);
322 		else
323 			selrecord(td, &devsoftc.sel);
324 	}
325 	mtx_unlock(&devsoftc.mtx);
326 
327 	return (revents);
328 }
329 
330 static int
331 devkqfilter(struct cdev *dev, struct knote *kn)
332 {
333 	int error;
334 
335 	if (kn->kn_filter == EVFILT_READ) {
336 		kn->kn_fop = &devctl_rfiltops;
337 		knlist_add(&devsoftc.sel.si_note, kn, 0);
338 		error = 0;
339 	} else
340 		error = EINVAL;
341 	return (error);
342 }
343 
344 static void
345 filt_devctl_detach(struct knote *kn)
346 {
347 	knlist_remove(&devsoftc.sel.si_note, kn, 0);
348 }
349 
350 static int
351 filt_devctl_read(struct knote *kn, long hint)
352 {
353 	kn->kn_data = devsoftc.queued;
354 	return (kn->kn_data != 0);
355 }
356 
357 /**
358  * @brief Return whether the userland process is running
359  */
360 bool
361 devctl_process_running(void)
362 {
363 	return (devsoftc.inuse == 1);
364 }
365 
366 static struct dev_event_info *
367 devctl_alloc_dei(void)
368 {
369 	struct dev_event_info *dei = NULL;
370 
371 	mtx_lock(&devsoftc.mtx);
372 	if (devctl_queue_length == 0)
373 		goto out;
374 	dei = uma_zalloc(devsoftc.zone, M_NOWAIT);
375 	if (dei == NULL)
376 		dei = uma_zalloc(devsoftc.zone, M_NOWAIT | M_USE_RESERVE);
377 	if (dei == NULL) {
378 		/*
379 		 * Guard against no items in the queue. Normally, this won't
380 		 * happen, but if lots of events happen all at once and there's
381 		 * a chance we're out of allocated space but none have yet been
382 		 * queued when we get here, leaving nothing to steal. This can
383 		 * also happen with error injection. Fail safe by returning
384 		 * NULL in that case..
385 		 */
386 		if (devsoftc.queued == 0)
387 			goto out;
388 		dei = STAILQ_FIRST(&devsoftc.devq);
389 		STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
390 		devsoftc.queued--;
391 	}
392 	MPASS(dei != NULL);
393 	*dei->dei_data = '\0';
394 out:
395 	mtx_unlock(&devsoftc.mtx);
396 	return (dei);
397 }
398 
399 static struct dev_event_info *
400 devctl_alloc_dei_sb(struct sbuf *sb)
401 {
402 	struct dev_event_info *dei;
403 
404 	dei = devctl_alloc_dei();
405 	if (dei != NULL)
406 		sbuf_new(sb, dei->dei_data, sizeof(dei->dei_data), SBUF_FIXEDLEN);
407 	return (dei);
408 }
409 
410 static void
411 devctl_free_dei(struct dev_event_info *dei)
412 {
413 	uma_zfree(devsoftc.zone, dei);
414 }
415 
416 static void
417 devctl_queue(struct dev_event_info *dei)
418 {
419 	mtx_lock(&devsoftc.mtx);
420 	STAILQ_INSERT_TAIL(&devsoftc.devq, dei, dei_link);
421 	devsoftc.queued++;
422 	cv_broadcast(&devsoftc.cv);
423 	KNOTE_LOCKED(&devsoftc.sel.si_note, 0);
424 	mtx_unlock(&devsoftc.mtx);
425 	selwakeup(&devsoftc.sel);
426 	if (devsoftc.async && devsoftc.sigio != NULL)
427 		pgsigio(&devsoftc.sigio, SIGIO, 0);
428 }
429 
430 /**
431  * @brief Send a 'notification' to userland, using standard ways
432  */
433 void
434 devctl_notify(const char *system, const char *subsystem, const char *type,
435     const char *data)
436 {
437 	struct dev_event_info *dei;
438 	struct sbuf sb;
439 
440 	if (system == NULL || subsystem == NULL || type == NULL)
441 		return;
442 	if (devctl_notify_hook.send_f != NULL)
443 		devctl_notify_hook.send_f(system, subsystem, type, data);
444 	dei = devctl_alloc_dei_sb(&sb);
445 	if (dei == NULL)
446 		return;
447 	sbuf_cpy(&sb, "!system=");
448 	sbuf_cat(&sb, system);
449 	sbuf_cat(&sb, " subsystem=");
450 	sbuf_cat(&sb, subsystem);
451 	sbuf_cat(&sb, " type=");
452 	sbuf_cat(&sb, type);
453 	if (data != NULL) {
454 		sbuf_putc(&sb, ' ');
455 		sbuf_cat(&sb, data);
456 	}
457 	sbuf_putc(&sb, '\n');
458 	if (sbuf_finish(&sb) != 0)
459 		devctl_free_dei(dei);	/* overflow -> drop it */
460 	else
461 		devctl_queue(dei);
462 }
463 
464 /*
465  * Common routine that tries to make sending messages as easy as possible.
466  * We allocate memory for the data, copy strings into that, but do not
467  * free it unless there's an error.  The dequeue part of the driver should
468  * free the data.  We don't send data when the device is disabled.  We do
469  * send data, even when we have no listeners, because we wish to avoid
470  * races relating to startup and restart of listening applications.
471  *
472  * devaddq is designed to string together the type of event, with the
473  * object of that event, plus the plug and play info and location info
474  * for that event.  This is likely most useful for devices, but less
475  * useful for other consumers of this interface.  Those should use
476  * the devctl_notify() interface instead.
477  *
478  * Output:
479  *	${type}${what} at $(location dev) $(pnp-info dev) on $(parent dev)
480  */
481 static void
482 devaddq(const char *type, const char *what, device_t dev)
483 {
484 	struct dev_event_info *dei;
485 	const char *parstr;
486 	struct sbuf sb;
487 	size_t beginlen;
488 
489 	dei = devctl_alloc_dei_sb(&sb);
490 	if (dei == NULL)
491 		return;
492 	sbuf_cpy(&sb, type);
493 	sbuf_cat(&sb, what);
494 	sbuf_cat(&sb, " at ");
495 	beginlen = sbuf_len(&sb);
496 
497 	/* Add in the location */
498 	bus_child_location(dev, &sb);
499 	sbuf_putc(&sb, ' ');
500 
501 	/* Add in pnpinfo */
502 	bus_child_pnpinfo(dev, &sb);
503 
504 	/* Get the parent of this device, or / if high enough in the tree. */
505 	if (device_get_parent(dev) == NULL)
506 		parstr = ".";	/* Or '/' ? */
507 	else
508 		parstr = device_get_nameunit(device_get_parent(dev));
509 	sbuf_cat(&sb, " on ");
510 	sbuf_cat(&sb, parstr);
511 	sbuf_putc(&sb, '\n');
512 	if (sbuf_finish(&sb) != 0)
513 		goto bad;
514 	if (devctl_notify_hook.send_f != NULL) {
515 		const char *t;
516 
517 		switch (*type) {
518 		case '+':
519 			t = "ATTACH";
520 			break;
521 		case '-':
522 			t = "DETACH";
523 			break;
524 		default:
525 			t = "NOMATCH";
526 			break;
527 		}
528 		devctl_notify_hook.send_f("device",
529 		    what, t, sbuf_data(&sb) + beginlen);
530 	}
531 	devctl_queue(dei);
532 	return;
533 bad:
534 	devctl_free_dei(dei);
535 }
536 
537 static int
538 sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
539 {
540 	int q, error;
541 
542 	q = devctl_queue_length;
543 	error = sysctl_handle_int(oidp, &q, 0, req);
544 	if (error || !req->newptr)
545 		return (error);
546 	if (q < 0)
547 		return (EINVAL);
548 
549 	/*
550 	 * When set as a tunable, we've not yet initialized the mutex.
551 	 * It is safe to just assign to devctl_queue_length and return
552 	 * as we're racing no one. We'll use whatever value set in
553 	 * devinit.
554 	 */
555 	if (!mtx_initialized(&devsoftc.mtx)) {
556 		devctl_queue_length = q;
557 		return (0);
558 	}
559 
560 	/*
561 	 * XXX It's hard to grow or shrink the UMA zone. Only allow
562 	 * disabling the queue size for the moment until underlying
563 	 * UMA issues can be sorted out.
564 	 */
565 	if (q != 0)
566 		return (EINVAL);
567 	if (q == devctl_queue_length)
568 		return (0);
569 	mtx_lock(&devsoftc.mtx);
570 	devctl_queue_length = 0;
571 	uma_zdestroy(devsoftc.zone);
572 	devsoftc.zone = 0;
573 	mtx_unlock(&devsoftc.mtx);
574 	return (0);
575 }
576 
577 /**
578  * @brief safely quotes strings that might have double quotes in them.
579  *
580  * The devctl protocol relies on quoted strings having matching quotes.
581  * This routine quotes any internal quotes so the resulting string
582  * is safe to pass to snprintf to construct, for example pnp info strings.
583  *
584  * @param sb	sbuf to place the characters into
585  * @param src	Original buffer.
586  */
587 void
588 devctl_safe_quote_sb(struct sbuf *sb, const char *src)
589 {
590 	while (*src != '\0') {
591 		if (*src == '"' || *src == '\\')
592 			sbuf_putc(sb, '\\');
593 		sbuf_putc(sb, *src++);
594 	}
595 }
596 
597 void
598 devctl_set_notify_hook(send_event_f *hook)
599 {
600 	devctl_notify_hook.send_f = hook;
601 }
602 
603 void
604 devctl_unset_notify_hook(void)
605 {
606 	devctl_notify_hook.send_f = NULL;
607 }
608 
609