xref: /freebsd/sys/kern/kern_devctl.c (revision 97aedd3395b4e9b017e29823096771aff0835ff0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2002-2020 M. Warner Losh <imp@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 #include <sys/cdefs.h>
29 #include "opt_bus.h"
30 #include "opt_ddb.h"
31 
32 #include <sys/param.h>
33 #include <sys/conf.h>
34 #include <sys/eventhandler.h>
35 #include <sys/filio.h>
36 #include <sys/lock.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/poll.h>
41 #include <sys/priv.h>
42 #include <sys/proc.h>
43 #include <sys/condvar.h>
44 #include <sys/queue.h>
45 #include <machine/bus.h>
46 #include <sys/sbuf.h>
47 #include <sys/selinfo.h>
48 #include <sys/smp.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/uio.h>
52 #include <sys/bus.h>
53 
54 #include <machine/cpu.h>
55 #include <machine/stdarg.h>
56 
57 #include <vm/uma.h>
58 #include <vm/vm.h>
59 
60 #include <ddb/ddb.h>
61 
62 STAILQ_HEAD(devq, dev_event_info);
63 
64 static struct dev_softc {
65 	int		inuse;
66 	int		nonblock;
67 	int		queued;
68 	int		async;
69 	struct mtx	mtx;
70 	struct cv	cv;
71 	struct selinfo	sel;
72 	struct devq	devq;
73 	struct sigio	*sigio;
74 	uma_zone_t	zone;
75 } devsoftc;
76 
77 /*
78  * This design allows only one reader for /dev/devctl.  This is not desirable
79  * in the long run, but will get a lot of hair out of this implementation.
80  * Maybe we should make this device a clonable device.
81  *
82  * Also note: we specifically do not attach a device to the device_t tree
83  * to avoid potential chicken and egg problems.  One could argue that all
84  * of this belongs to the root node.
85  */
86 
87 #define DEVCTL_DEFAULT_QUEUE_LEN 1000
88 static int sysctl_devctl_queue(SYSCTL_HANDLER_ARGS);
89 static int devctl_queue_length = DEVCTL_DEFAULT_QUEUE_LEN;
90 SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RWTUN |
91     CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_queue, "I", "devctl queue length");
92 static bool nomatch_enabled = true;
93 SYSCTL_BOOL(_hw_bus, OID_AUTO, devctl_nomatch_enabled, CTLFLAG_RWTUN,
94     &nomatch_enabled, 0, "enable nomatch events");
95 
96 static void devctl_attach_handler(void *arg __unused, device_t dev);
97 static void devctl_detach_handler(void *arg __unused, device_t dev,
98     enum evhdev_detach state);
99 static void devctl_nomatch_handler(void *arg __unused, device_t dev);
100 
101 static d_open_t		devopen;
102 static d_close_t	devclose;
103 static d_read_t		devread;
104 static d_ioctl_t	devioctl;
105 static d_poll_t		devpoll;
106 static d_kqfilter_t	devkqfilter;
107 
108 #define DEVCTL_BUFFER (1024 - sizeof(void *))
109 struct dev_event_info {
110 	STAILQ_ENTRY(dev_event_info) dei_link;
111 	char dei_data[DEVCTL_BUFFER];
112 };
113 
114 
115 static struct cdevsw dev_cdevsw = {
116 	.d_version =	D_VERSION,
117 	.d_open =	devopen,
118 	.d_close =	devclose,
119 	.d_read =	devread,
120 	.d_ioctl =	devioctl,
121 	.d_poll =	devpoll,
122 	.d_kqfilter =	devkqfilter,
123 	.d_name =	"devctl",
124 };
125 
126 static void	filt_devctl_detach(struct knote *kn);
127 static int	filt_devctl_read(struct knote *kn, long hint);
128 
129 static struct filterops devctl_rfiltops = {
130 	.f_isfd = 1,
131 	.f_detach = filt_devctl_detach,
132 	.f_event = filt_devctl_read,
133 };
134 
135 static struct cdev *devctl_dev;
136 static void devaddq(const char *type, const char *what, device_t dev);
137 
138 static struct devctlbridge {
139 	send_event_f *send_f;
140 } devctl_notify_hook = { .send_f = NULL };
141 
142 static void
devctl_init(void)143 devctl_init(void)
144 {
145 	int reserve;
146 	uma_zone_t z;
147 
148 	devctl_dev = make_dev_credf(MAKEDEV_ETERNAL, &dev_cdevsw, 0, NULL,
149 	    UID_ROOT, GID_WHEEL, 0600, "devctl");
150 	mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF);
151 	cv_init(&devsoftc.cv, "dev cv");
152 	STAILQ_INIT(&devsoftc.devq);
153 	knlist_init_mtx(&devsoftc.sel.si_note, &devsoftc.mtx);
154 	if (devctl_queue_length > 0) {
155 		/*
156 		 * Allocate a zone for the messages. Preallocate 2% of these for
157 		 * a reserve. Allow only devctl_queue_length slabs to cap memory
158 		 * usage.  The reserve usually allows coverage of surges of
159 		 * events during memory shortages. Normally we won't have to
160 		 * re-use events from the queue, but will in extreme shortages.
161 		 */
162 		z = devsoftc.zone = uma_zcreate("DEVCTL",
163 		    sizeof(struct dev_event_info), NULL, NULL, NULL, NULL,
164 		    UMA_ALIGN_PTR, 0);
165 		reserve = max(devctl_queue_length / 50, 100);	/* 2% reserve */
166 		uma_zone_set_max(z, devctl_queue_length);
167 		uma_zone_set_maxcache(z, 0);
168 		uma_zone_reserve(z, reserve);
169 		uma_prealloc(z, reserve);
170 	}
171 	EVENTHANDLER_REGISTER(device_attach, devctl_attach_handler,
172 	    NULL, EVENTHANDLER_PRI_LAST);
173 	EVENTHANDLER_REGISTER(device_detach, devctl_detach_handler,
174 	    NULL, EVENTHANDLER_PRI_LAST);
175 	EVENTHANDLER_REGISTER(device_nomatch, devctl_nomatch_handler,
176 	    NULL, EVENTHANDLER_PRI_LAST);
177 }
178 SYSINIT(devctl_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, devctl_init, NULL);
179 
180 /*
181  * A device was added to the tree.  We are called just after it successfully
182  * attaches (that is, probe and attach success for this device).  No call
183  * is made if a device is merely parented into the tree.  See devnomatch
184  * if probe fails.  If attach fails, no notification is sent (but maybe
185  * we should have a different message for this).
186  */
187 static void
devctl_attach_handler(void * arg __unused,device_t dev)188 devctl_attach_handler(void *arg __unused, device_t dev)
189 {
190 	devaddq("+", device_get_nameunit(dev), dev);
191 }
192 
193 /*
194  * A device was removed from the tree.  We are called just before this
195  * happens.
196  */
197 static void
devctl_detach_handler(void * arg __unused,device_t dev,enum evhdev_detach state)198 devctl_detach_handler(void *arg __unused, device_t dev, enum evhdev_detach state)
199 {
200 	if (state == EVHDEV_DETACH_COMPLETE)
201 		devaddq("-", device_get_nameunit(dev), dev);
202 }
203 
204 /*
205  * Called when there's no match for this device.  This is only called
206  * the first time that no match happens, so we don't keep getting this
207  * message.  Should that prove to be undesirable, we can change it.
208  * This is called when all drivers that can attach to a given bus
209  * decline to accept this device.  Other errors may not be detected.
210  */
211 static void
devctl_nomatch_handler(void * arg __unused,device_t dev)212 devctl_nomatch_handler(void *arg __unused, device_t dev)
213 {
214 	if (nomatch_enabled)
215 		devaddq("?", "", dev);
216 }
217 
218 static int
devopen(struct cdev * dev,int oflags,int devtype,struct thread * td)219 devopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
220 {
221 	mtx_lock(&devsoftc.mtx);
222 	if (devsoftc.inuse) {
223 		mtx_unlock(&devsoftc.mtx);
224 		return (EBUSY);
225 	}
226 	/* move to init */
227 	devsoftc.inuse = 1;
228 	mtx_unlock(&devsoftc.mtx);
229 	return (0);
230 }
231 
232 static int
devclose(struct cdev * dev,int fflag,int devtype,struct thread * td)233 devclose(struct cdev *dev, int fflag, int devtype, struct thread *td)
234 {
235 	mtx_lock(&devsoftc.mtx);
236 	devsoftc.inuse = 0;
237 	devsoftc.nonblock = 0;
238 	devsoftc.async = 0;
239 	cv_broadcast(&devsoftc.cv);
240 	funsetown(&devsoftc.sigio);
241 	mtx_unlock(&devsoftc.mtx);
242 	return (0);
243 }
244 
245 /*
246  * The read channel for this device is used to report changes to
247  * userland in realtime.  We are required to free the data as well as
248  * the n1 object because we allocate them separately.  Also note that
249  * we return one record at a time.  If you try to read this device a
250  * character at a time, you will lose the rest of the data.  Listening
251  * programs are expected to cope.
252  */
253 static int
devread(struct cdev * dev,struct uio * uio,int ioflag)254 devread(struct cdev *dev, struct uio *uio, int ioflag)
255 {
256 	struct dev_event_info *n1;
257 	int rv;
258 
259 	mtx_lock(&devsoftc.mtx);
260 	while (STAILQ_EMPTY(&devsoftc.devq)) {
261 		if (devsoftc.nonblock) {
262 			mtx_unlock(&devsoftc.mtx);
263 			return (EAGAIN);
264 		}
265 		rv = cv_wait_sig(&devsoftc.cv, &devsoftc.mtx);
266 		if (rv) {
267 			/*
268 			 * Need to translate ERESTART to EINTR here? -- jake
269 			 */
270 			mtx_unlock(&devsoftc.mtx);
271 			return (rv);
272 		}
273 	}
274 	n1 = STAILQ_FIRST(&devsoftc.devq);
275 	STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
276 	devsoftc.queued--;
277 	mtx_unlock(&devsoftc.mtx);
278 	rv = uiomove(n1->dei_data, strlen(n1->dei_data), uio);
279 	uma_zfree(devsoftc.zone, n1);
280 	return (rv);
281 }
282 
283 static	int
devioctl(struct cdev * dev,u_long cmd,caddr_t data,int fflag,struct thread * td)284 devioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
285 {
286 	switch (cmd) {
287 	case FIONBIO:
288 		if (*(int*)data)
289 			devsoftc.nonblock = 1;
290 		else
291 			devsoftc.nonblock = 0;
292 		return (0);
293 	case FIOASYNC:
294 		if (*(int*)data)
295 			devsoftc.async = 1;
296 		else
297 			devsoftc.async = 0;
298 		return (0);
299 	case FIOSETOWN:
300 		return fsetown(*(int *)data, &devsoftc.sigio);
301 	case FIOGETOWN:
302 		*(int *)data = fgetown(&devsoftc.sigio);
303 		return (0);
304 
305 		/* (un)Support for other fcntl() calls. */
306 	case FIOCLEX:
307 	case FIONCLEX:
308 	case FIONREAD:
309 	default:
310 		break;
311 	}
312 	return (ENOTTY);
313 }
314 
315 static	int
devpoll(struct cdev * dev,int events,struct thread * td)316 devpoll(struct cdev *dev, int events, struct thread *td)
317 {
318 	int	revents = 0;
319 
320 	mtx_lock(&devsoftc.mtx);
321 	if (events & (POLLIN | POLLRDNORM)) {
322 		if (!STAILQ_EMPTY(&devsoftc.devq))
323 			revents = events & (POLLIN | POLLRDNORM);
324 		else
325 			selrecord(td, &devsoftc.sel);
326 	}
327 	mtx_unlock(&devsoftc.mtx);
328 
329 	return (revents);
330 }
331 
332 static int
devkqfilter(struct cdev * dev,struct knote * kn)333 devkqfilter(struct cdev *dev, struct knote *kn)
334 {
335 	int error;
336 
337 	if (kn->kn_filter == EVFILT_READ) {
338 		kn->kn_fop = &devctl_rfiltops;
339 		knlist_add(&devsoftc.sel.si_note, kn, 0);
340 		error = 0;
341 	} else
342 		error = EINVAL;
343 	return (error);
344 }
345 
346 static void
filt_devctl_detach(struct knote * kn)347 filt_devctl_detach(struct knote *kn)
348 {
349 	knlist_remove(&devsoftc.sel.si_note, kn, 0);
350 }
351 
352 static int
filt_devctl_read(struct knote * kn,long hint)353 filt_devctl_read(struct knote *kn, long hint)
354 {
355 	kn->kn_data = devsoftc.queued;
356 	return (kn->kn_data != 0);
357 }
358 
359 /**
360  * @brief Return whether the userland process is running
361  */
362 bool
devctl_process_running(void)363 devctl_process_running(void)
364 {
365 	return (devsoftc.inuse == 1);
366 }
367 
368 static struct dev_event_info *
devctl_alloc_dei(void)369 devctl_alloc_dei(void)
370 {
371 	struct dev_event_info *dei = NULL;
372 
373 	mtx_lock(&devsoftc.mtx);
374 	if (devctl_queue_length == 0)
375 		goto out;
376 	dei = uma_zalloc(devsoftc.zone, M_NOWAIT);
377 	if (dei == NULL)
378 		dei = uma_zalloc(devsoftc.zone, M_NOWAIT | M_USE_RESERVE);
379 	if (dei == NULL) {
380 		/*
381 		 * Guard against no items in the queue. Normally, this won't
382 		 * happen, but if lots of events happen all at once and there's
383 		 * a chance we're out of allocated space but none have yet been
384 		 * queued when we get here, leaving nothing to steal. This can
385 		 * also happen with error injection. Fail safe by returning
386 		 * NULL in that case..
387 		 */
388 		if (devsoftc.queued == 0)
389 			goto out;
390 		dei = STAILQ_FIRST(&devsoftc.devq);
391 		STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
392 		devsoftc.queued--;
393 	}
394 	MPASS(dei != NULL);
395 	*dei->dei_data = '\0';
396 out:
397 	mtx_unlock(&devsoftc.mtx);
398 	return (dei);
399 }
400 
401 static struct dev_event_info *
devctl_alloc_dei_sb(struct sbuf * sb)402 devctl_alloc_dei_sb(struct sbuf *sb)
403 {
404 	struct dev_event_info *dei;
405 
406 	dei = devctl_alloc_dei();
407 	if (dei != NULL)
408 		sbuf_new(sb, dei->dei_data, sizeof(dei->dei_data), SBUF_FIXEDLEN);
409 	return (dei);
410 }
411 
412 static void
devctl_free_dei(struct dev_event_info * dei)413 devctl_free_dei(struct dev_event_info *dei)
414 {
415 	uma_zfree(devsoftc.zone, dei);
416 }
417 
418 static void
devctl_queue(struct dev_event_info * dei)419 devctl_queue(struct dev_event_info *dei)
420 {
421 	mtx_lock(&devsoftc.mtx);
422 	STAILQ_INSERT_TAIL(&devsoftc.devq, dei, dei_link);
423 	devsoftc.queued++;
424 	cv_broadcast(&devsoftc.cv);
425 	KNOTE_LOCKED(&devsoftc.sel.si_note, 0);
426 	mtx_unlock(&devsoftc.mtx);
427 	selwakeup(&devsoftc.sel);
428 	if (devsoftc.async && devsoftc.sigio != NULL)
429 		pgsigio(&devsoftc.sigio, SIGIO, 0);
430 }
431 
432 /**
433  * @brief Send a 'notification' to userland, using standard ways
434  */
435 void
devctl_notify(const char * system,const char * subsystem,const char * type,const char * data)436 devctl_notify(const char *system, const char *subsystem, const char *type,
437     const char *data)
438 {
439 	struct dev_event_info *dei;
440 	struct sbuf sb;
441 
442 	if (system == NULL || subsystem == NULL || type == NULL)
443 		return;
444 	if (devctl_notify_hook.send_f != NULL)
445 		devctl_notify_hook.send_f(system, subsystem, type, data);
446 	dei = devctl_alloc_dei_sb(&sb);
447 	if (dei == NULL)
448 		return;
449 	sbuf_cpy(&sb, "!system=");
450 	sbuf_cat(&sb, system);
451 	sbuf_cat(&sb, " subsystem=");
452 	sbuf_cat(&sb, subsystem);
453 	sbuf_cat(&sb, " type=");
454 	sbuf_cat(&sb, type);
455 	if (data != NULL) {
456 		sbuf_putc(&sb, ' ');
457 		sbuf_cat(&sb, data);
458 	}
459 	sbuf_putc(&sb, '\n');
460 	if (sbuf_finish(&sb) != 0)
461 		devctl_free_dei(dei);	/* overflow -> drop it */
462 	else
463 		devctl_queue(dei);
464 }
465 
466 /*
467  * Common routine that tries to make sending messages as easy as possible.
468  * We allocate memory for the data, copy strings into that, but do not
469  * free it unless there's an error.  The dequeue part of the driver should
470  * free the data.  We don't send data when the device is disabled.  We do
471  * send data, even when we have no listeners, because we wish to avoid
472  * races relating to startup and restart of listening applications.
473  *
474  * devaddq is designed to string together the type of event, with the
475  * object of that event, plus the plug and play info and location info
476  * for that event.  This is likely most useful for devices, but less
477  * useful for other consumers of this interface.  Those should use
478  * the devctl_notify() interface instead.
479  *
480  * Output:
481  *	${type}${what} at $(location dev) $(pnp-info dev) on $(parent dev)
482  */
483 static void
devaddq(const char * type,const char * what,device_t dev)484 devaddq(const char *type, const char *what, device_t dev)
485 {
486 	struct dev_event_info *dei;
487 	const char *parstr;
488 	struct sbuf sb;
489 	size_t beginlen;
490 
491 	dei = devctl_alloc_dei_sb(&sb);
492 	if (dei == NULL)
493 		return;
494 	sbuf_cpy(&sb, type);
495 	sbuf_cat(&sb, what);
496 	sbuf_cat(&sb, " at ");
497 	beginlen = sbuf_len(&sb);
498 
499 	/* Add in the location */
500 	bus_child_location(dev, &sb);
501 	sbuf_putc(&sb, ' ');
502 
503 	/* Add in pnpinfo */
504 	bus_child_pnpinfo(dev, &sb);
505 
506 	/* Get the parent of this device, or / if high enough in the tree. */
507 	if (device_get_parent(dev) == NULL)
508 		parstr = ".";	/* Or '/' ? */
509 	else
510 		parstr = device_get_nameunit(device_get_parent(dev));
511 	sbuf_cat(&sb, " on ");
512 	sbuf_cat(&sb, parstr);
513 	sbuf_putc(&sb, '\n');
514 	if (sbuf_finish(&sb) != 0)
515 		goto bad;
516 	if (devctl_notify_hook.send_f != NULL) {
517 		const char *t;
518 
519 		switch (*type) {
520 		case '+':
521 			t = "ATTACH";
522 			break;
523 		case '-':
524 			t = "DETACH";
525 			break;
526 		default:
527 			t = "NOMATCH";
528 			break;
529 		}
530 		devctl_notify_hook.send_f("device",
531 		    what, t, sbuf_data(&sb) + beginlen);
532 	}
533 	devctl_queue(dei);
534 	return;
535 bad:
536 	devctl_free_dei(dei);
537 }
538 
539 static int
sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)540 sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
541 {
542 	int q, error;
543 
544 	q = devctl_queue_length;
545 	error = sysctl_handle_int(oidp, &q, 0, req);
546 	if (error || !req->newptr)
547 		return (error);
548 	if (q < 0)
549 		return (EINVAL);
550 
551 	/*
552 	 * When set as a tunable, we've not yet initialized the mutex.
553 	 * It is safe to just assign to devctl_queue_length and return
554 	 * as we're racing no one. We'll use whatever value set in
555 	 * devinit.
556 	 */
557 	if (!mtx_initialized(&devsoftc.mtx)) {
558 		devctl_queue_length = q;
559 		return (0);
560 	}
561 
562 	/*
563 	 * XXX It's hard to grow or shrink the UMA zone. Only allow
564 	 * disabling the queue size for the moment until underlying
565 	 * UMA issues can be sorted out.
566 	 */
567 	if (q != 0)
568 		return (EINVAL);
569 	if (q == devctl_queue_length)
570 		return (0);
571 	mtx_lock(&devsoftc.mtx);
572 	devctl_queue_length = 0;
573 	uma_zdestroy(devsoftc.zone);
574 	devsoftc.zone = 0;
575 	mtx_unlock(&devsoftc.mtx);
576 	return (0);
577 }
578 
579 /**
580  * @brief safely quotes strings that might have double quotes in them.
581  *
582  * The devctl protocol relies on quoted strings having matching quotes.
583  * This routine quotes any internal quotes so the resulting string
584  * is safe to pass to snprintf to construct, for example pnp info strings.
585  *
586  * @param sb	sbuf to place the characters into
587  * @param src	Original buffer.
588  */
589 void
devctl_safe_quote_sb(struct sbuf * sb,const char * src)590 devctl_safe_quote_sb(struct sbuf *sb, const char *src)
591 {
592 	while (*src != '\0') {
593 		if (*src == '"' || *src == '\\')
594 			sbuf_putc(sb, '\\');
595 		sbuf_putc(sb, *src++);
596 	}
597 }
598 
599 void
devctl_set_notify_hook(send_event_f * hook)600 devctl_set_notify_hook(send_event_f *hook)
601 {
602 	devctl_notify_hook.send_f = hook;
603 }
604 
605 void
devctl_unset_notify_hook(void)606 devctl_unset_notify_hook(void)
607 {
608 	devctl_notify_hook.send_f = NULL;
609 }
610 
611