xref: /freebsd/sys/kern/kern_devctl.c (revision 36138969847b231cd98f48272e2bdf88a8dc08dd)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2002-2020 M. Warner Losh <imp@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 #include <sys/cdefs.h>
29 #include "opt_bus.h"
30 #include "opt_ddb.h"
31 
32 #include <sys/param.h>
33 #include <sys/conf.h>
34 #include <sys/eventhandler.h>
35 #include <sys/filio.h>
36 #include <sys/lock.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/poll.h>
41 #include <sys/priv.h>
42 #include <sys/proc.h>
43 #include <sys/condvar.h>
44 #include <sys/queue.h>
45 #include <machine/bus.h>
46 #include <sys/sbuf.h>
47 #include <sys/selinfo.h>
48 #include <sys/smp.h>
49 #include <sys/stdarg.h>
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
52 #include <sys/uio.h>
53 #include <sys/bus.h>
54 
55 #include <machine/cpu.h>
56 
57 #include <vm/uma.h>
58 #include <vm/vm.h>
59 
60 #include <ddb/ddb.h>
61 
62 STAILQ_HEAD(devq, dev_event_info);
63 
64 static struct dev_softc {
65 	int		inuse;
66 	int		nonblock;
67 	int		queued;
68 	int		async;
69 	struct mtx	mtx;
70 	struct cv	cv;
71 	struct selinfo	sel;
72 	struct devq	devq;
73 	struct sigio	*sigio;
74 	uma_zone_t	zone;
75 } devsoftc;
76 
77 /*
78  * This design allows only one reader for /dev/devctl.  This is not desirable
79  * in the long run, but will get a lot of hair out of this implementation.
80  * Maybe we should make this device a clonable device.
81  *
82  * Also note: we specifically do not attach a device to the device_t tree
83  * to avoid potential chicken and egg problems.  One could argue that all
84  * of this belongs to the root node.
85  */
86 
87 #define DEVCTL_DEFAULT_QUEUE_LEN 1000
88 static int sysctl_devctl_queue(SYSCTL_HANDLER_ARGS);
89 static int devctl_queue_length = DEVCTL_DEFAULT_QUEUE_LEN;
90 SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RWTUN |
91     CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_queue, "I", "devctl queue length");
92 static bool nomatch_enabled = true;
93 SYSCTL_BOOL(_hw_bus, OID_AUTO, devctl_nomatch_enabled, CTLFLAG_RWTUN,
94     &nomatch_enabled, 0, "enable nomatch events");
95 
96 static void devctl_attach_handler(void *arg __unused, device_t dev);
97 static void devctl_detach_handler(void *arg __unused, device_t dev,
98     enum evhdev_detach state);
99 static void devctl_nomatch_handler(void *arg __unused, device_t dev);
100 
101 static d_open_t		devopen;
102 static d_close_t	devclose;
103 static d_read_t		devread;
104 static d_ioctl_t	devioctl;
105 static d_poll_t		devpoll;
106 static d_kqfilter_t	devkqfilter;
107 
108 #define DEVCTL_BUFFER (1024 - sizeof(void *))
109 struct dev_event_info {
110 	STAILQ_ENTRY(dev_event_info) dei_link;
111 	char dei_data[DEVCTL_BUFFER];
112 };
113 
114 
115 static struct cdevsw dev_cdevsw = {
116 	.d_version =	D_VERSION,
117 	.d_open =	devopen,
118 	.d_close =	devclose,
119 	.d_read =	devread,
120 	.d_ioctl =	devioctl,
121 	.d_poll =	devpoll,
122 	.d_kqfilter =	devkqfilter,
123 	.d_name =	"devctl",
124 };
125 
126 static void	filt_devctl_detach(struct knote *kn);
127 static int	filt_devctl_read(struct knote *kn, long hint);
128 
129 static const struct filterops devctl_rfiltops = {
130 	.f_isfd = 1,
131 	.f_detach = filt_devctl_detach,
132 	.f_event = filt_devctl_read,
133 	.f_copy = knote_triv_copy,
134 };
135 
136 static struct cdev *devctl_dev;
137 static void devaddq(const char *type, const char *what, device_t dev);
138 
139 static struct devctlbridge {
140 	send_event_f *send_f;
141 } devctl_notify_hook = { .send_f = NULL };
142 
143 static void
devctl_init(void * dummy __unused)144 devctl_init(void *dummy __unused)
145 {
146 	int reserve;
147 	uma_zone_t z;
148 
149 	devctl_dev = make_dev_credf(MAKEDEV_ETERNAL, &dev_cdevsw, 0, NULL,
150 	    UID_ROOT, GID_WHEEL, 0600, "devctl");
151 	mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF);
152 	cv_init(&devsoftc.cv, "dev cv");
153 	STAILQ_INIT(&devsoftc.devq);
154 	knlist_init_mtx(&devsoftc.sel.si_note, &devsoftc.mtx);
155 	if (devctl_queue_length > 0) {
156 		/*
157 		 * Allocate a zone for the messages. Preallocate 2% of these for
158 		 * a reserve. Allow only devctl_queue_length slabs to cap memory
159 		 * usage.  The reserve usually allows coverage of surges of
160 		 * events during memory shortages. Normally we won't have to
161 		 * re-use events from the queue, but will in extreme shortages.
162 		 */
163 		z = devsoftc.zone = uma_zcreate("DEVCTL",
164 		    sizeof(struct dev_event_info), NULL, NULL, NULL, NULL,
165 		    UMA_ALIGN_PTR, 0);
166 		reserve = max(devctl_queue_length / 50, 100);	/* 2% reserve */
167 		uma_zone_set_max(z, devctl_queue_length);
168 		uma_zone_set_maxcache(z, 0);
169 		uma_zone_reserve(z, reserve);
170 		uma_prealloc(z, reserve);
171 	}
172 	EVENTHANDLER_REGISTER(device_attach, devctl_attach_handler,
173 	    NULL, EVENTHANDLER_PRI_LAST);
174 	EVENTHANDLER_REGISTER(device_detach, devctl_detach_handler,
175 	    NULL, EVENTHANDLER_PRI_LAST);
176 	EVENTHANDLER_REGISTER(device_nomatch, devctl_nomatch_handler,
177 	    NULL, EVENTHANDLER_PRI_LAST);
178 }
179 SYSINIT(devctl_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, devctl_init, NULL);
180 
181 /*
182  * A device was added to the tree.  We are called just after it successfully
183  * attaches (that is, probe and attach success for this device).  No call
184  * is made if a device is merely parented into the tree.  See devnomatch
185  * if probe fails.  If attach fails, no notification is sent (but maybe
186  * we should have a different message for this).
187  */
188 static void
devctl_attach_handler(void * arg __unused,device_t dev)189 devctl_attach_handler(void *arg __unused, device_t dev)
190 {
191 	devaddq("+", device_get_nameunit(dev), dev);
192 }
193 
194 /*
195  * A device was removed from the tree.  We are called just before this
196  * happens.
197  */
198 static void
devctl_detach_handler(void * arg __unused,device_t dev,enum evhdev_detach state)199 devctl_detach_handler(void *arg __unused, device_t dev, enum evhdev_detach state)
200 {
201 	if (state == EVHDEV_DETACH_COMPLETE)
202 		devaddq("-", device_get_nameunit(dev), dev);
203 }
204 
205 /*
206  * Called when there's no match for this device.  This is only called
207  * the first time that no match happens, so we don't keep getting this
208  * message.  Should that prove to be undesirable, we can change it.
209  * This is called when all drivers that can attach to a given bus
210  * decline to accept this device.  Other errors may not be detected.
211  */
212 static void
devctl_nomatch_handler(void * arg __unused,device_t dev)213 devctl_nomatch_handler(void *arg __unused, device_t dev)
214 {
215 	if (nomatch_enabled)
216 		devaddq("?", "", dev);
217 }
218 
219 static int
devopen(struct cdev * dev,int oflags,int devtype,struct thread * td)220 devopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
221 {
222 	mtx_lock(&devsoftc.mtx);
223 	if (devsoftc.inuse) {
224 		mtx_unlock(&devsoftc.mtx);
225 		return (EBUSY);
226 	}
227 	/* move to init */
228 	devsoftc.inuse = 1;
229 	mtx_unlock(&devsoftc.mtx);
230 	return (0);
231 }
232 
233 static int
devclose(struct cdev * dev,int fflag,int devtype,struct thread * td)234 devclose(struct cdev *dev, int fflag, int devtype, struct thread *td)
235 {
236 	mtx_lock(&devsoftc.mtx);
237 	devsoftc.inuse = 0;
238 	devsoftc.nonblock = 0;
239 	devsoftc.async = 0;
240 	cv_broadcast(&devsoftc.cv);
241 	funsetown(&devsoftc.sigio);
242 	mtx_unlock(&devsoftc.mtx);
243 	return (0);
244 }
245 
246 /*
247  * The read channel for this device is used to report changes to
248  * userland in realtime.  We are required to free the data as well as
249  * the n1 object because we allocate them separately.  Also note that
250  * we return one record at a time.  If you try to read this device a
251  * character at a time, you will lose the rest of the data.  Listening
252  * programs are expected to cope.
253  */
254 static int
devread(struct cdev * dev,struct uio * uio,int ioflag)255 devread(struct cdev *dev, struct uio *uio, int ioflag)
256 {
257 	struct dev_event_info *n1;
258 	int rv;
259 
260 	mtx_lock(&devsoftc.mtx);
261 	while (STAILQ_EMPTY(&devsoftc.devq)) {
262 		if (devsoftc.nonblock) {
263 			mtx_unlock(&devsoftc.mtx);
264 			return (EAGAIN);
265 		}
266 		rv = cv_wait_sig(&devsoftc.cv, &devsoftc.mtx);
267 		if (rv) {
268 			/*
269 			 * Need to translate ERESTART to EINTR here? -- jake
270 			 */
271 			mtx_unlock(&devsoftc.mtx);
272 			return (rv);
273 		}
274 	}
275 	n1 = STAILQ_FIRST(&devsoftc.devq);
276 	STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
277 	devsoftc.queued--;
278 	mtx_unlock(&devsoftc.mtx);
279 	rv = uiomove(n1->dei_data, strlen(n1->dei_data), uio);
280 	uma_zfree(devsoftc.zone, n1);
281 	return (rv);
282 }
283 
284 static	int
devioctl(struct cdev * dev,u_long cmd,caddr_t data,int fflag,struct thread * td)285 devioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
286 {
287 	switch (cmd) {
288 	case FIONBIO:
289 		if (*(int*)data)
290 			devsoftc.nonblock = 1;
291 		else
292 			devsoftc.nonblock = 0;
293 		return (0);
294 	case FIOASYNC:
295 		if (*(int*)data)
296 			devsoftc.async = 1;
297 		else
298 			devsoftc.async = 0;
299 		return (0);
300 	case FIOSETOWN:
301 		return fsetown(*(int *)data, &devsoftc.sigio);
302 	case FIOGETOWN:
303 		*(int *)data = fgetown(&devsoftc.sigio);
304 		return (0);
305 
306 		/* (un)Support for other fcntl() calls. */
307 	case FIOCLEX:
308 	case FIONCLEX:
309 	case FIONREAD:
310 	default:
311 		break;
312 	}
313 	return (ENOTTY);
314 }
315 
316 static	int
devpoll(struct cdev * dev,int events,struct thread * td)317 devpoll(struct cdev *dev, int events, struct thread *td)
318 {
319 	int	revents = 0;
320 
321 	mtx_lock(&devsoftc.mtx);
322 	if (events & (POLLIN | POLLRDNORM)) {
323 		if (!STAILQ_EMPTY(&devsoftc.devq))
324 			revents = events & (POLLIN | POLLRDNORM);
325 		else
326 			selrecord(td, &devsoftc.sel);
327 	}
328 	mtx_unlock(&devsoftc.mtx);
329 
330 	return (revents);
331 }
332 
333 static int
devkqfilter(struct cdev * dev,struct knote * kn)334 devkqfilter(struct cdev *dev, struct knote *kn)
335 {
336 	int error;
337 
338 	if (kn->kn_filter == EVFILT_READ) {
339 		kn->kn_fop = &devctl_rfiltops;
340 		knlist_add(&devsoftc.sel.si_note, kn, 0);
341 		error = 0;
342 	} else
343 		error = EINVAL;
344 	return (error);
345 }
346 
347 static void
filt_devctl_detach(struct knote * kn)348 filt_devctl_detach(struct knote *kn)
349 {
350 	knlist_remove(&devsoftc.sel.si_note, kn, 0);
351 }
352 
353 static int
filt_devctl_read(struct knote * kn,long hint)354 filt_devctl_read(struct knote *kn, long hint)
355 {
356 	kn->kn_data = devsoftc.queued;
357 	return (kn->kn_data != 0);
358 }
359 
360 /**
361  * @brief Return whether the userland process is running
362  */
363 bool
devctl_process_running(void)364 devctl_process_running(void)
365 {
366 	return (devsoftc.inuse == 1);
367 }
368 
369 static struct dev_event_info *
devctl_alloc_dei(void)370 devctl_alloc_dei(void)
371 {
372 	struct dev_event_info *dei = NULL;
373 
374 	mtx_lock(&devsoftc.mtx);
375 	if (devctl_queue_length == 0)
376 		goto out;
377 	dei = uma_zalloc(devsoftc.zone, M_NOWAIT);
378 	if (dei == NULL)
379 		dei = uma_zalloc(devsoftc.zone, M_NOWAIT | M_USE_RESERVE);
380 	if (dei == NULL) {
381 		/*
382 		 * Guard against no items in the queue. Normally, this won't
383 		 * happen, but if lots of events happen all at once and there's
384 		 * a chance we're out of allocated space but none have yet been
385 		 * queued when we get here, leaving nothing to steal. This can
386 		 * also happen with error injection. Fail safe by returning
387 		 * NULL in that case..
388 		 */
389 		if (devsoftc.queued == 0)
390 			goto out;
391 		dei = STAILQ_FIRST(&devsoftc.devq);
392 		STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
393 		devsoftc.queued--;
394 	}
395 	MPASS(dei != NULL);
396 	*dei->dei_data = '\0';
397 out:
398 	mtx_unlock(&devsoftc.mtx);
399 	return (dei);
400 }
401 
402 static struct dev_event_info *
devctl_alloc_dei_sb(struct sbuf * sb)403 devctl_alloc_dei_sb(struct sbuf *sb)
404 {
405 	struct dev_event_info *dei;
406 
407 	dei = devctl_alloc_dei();
408 	if (dei != NULL)
409 		sbuf_new(sb, dei->dei_data, sizeof(dei->dei_data), SBUF_FIXEDLEN);
410 	return (dei);
411 }
412 
413 static void
devctl_free_dei(struct dev_event_info * dei)414 devctl_free_dei(struct dev_event_info *dei)
415 {
416 	uma_zfree(devsoftc.zone, dei);
417 }
418 
419 static void
devctl_queue(struct dev_event_info * dei)420 devctl_queue(struct dev_event_info *dei)
421 {
422 	mtx_lock(&devsoftc.mtx);
423 	STAILQ_INSERT_TAIL(&devsoftc.devq, dei, dei_link);
424 	devsoftc.queued++;
425 	cv_broadcast(&devsoftc.cv);
426 	KNOTE_LOCKED(&devsoftc.sel.si_note, 0);
427 	mtx_unlock(&devsoftc.mtx);
428 	selwakeup(&devsoftc.sel);
429 	if (devsoftc.async && devsoftc.sigio != NULL)
430 		pgsigio(&devsoftc.sigio, SIGIO, 0);
431 }
432 
433 /**
434  * @brief Send a 'notification' to userland, using standard ways
435  */
436 void
devctl_notify(const char * system,const char * subsystem,const char * type,const char * data)437 devctl_notify(const char *system, const char *subsystem, const char *type,
438     const char *data)
439 {
440 	struct dev_event_info *dei;
441 	struct sbuf sb;
442 
443 	if (system == NULL || subsystem == NULL || type == NULL)
444 		return;
445 	if (devctl_notify_hook.send_f != NULL)
446 		devctl_notify_hook.send_f(system, subsystem, type, data);
447 	dei = devctl_alloc_dei_sb(&sb);
448 	if (dei == NULL)
449 		return;
450 	sbuf_cpy(&sb, "!system=");
451 	sbuf_cat(&sb, system);
452 	sbuf_cat(&sb, " subsystem=");
453 	sbuf_cat(&sb, subsystem);
454 	sbuf_cat(&sb, " type=");
455 	sbuf_cat(&sb, type);
456 	if (data != NULL) {
457 		sbuf_putc(&sb, ' ');
458 		sbuf_cat(&sb, data);
459 	}
460 	sbuf_putc(&sb, '\n');
461 	if (sbuf_finish(&sb) != 0)
462 		devctl_free_dei(dei);	/* overflow -> drop it */
463 	else
464 		devctl_queue(dei);
465 }
466 
467 /*
468  * Common routine that tries to make sending messages as easy as possible.
469  * We allocate memory for the data, copy strings into that, but do not
470  * free it unless there's an error.  The dequeue part of the driver should
471  * free the data.  We don't send data when the device is disabled.  We do
472  * send data, even when we have no listeners, because we wish to avoid
473  * races relating to startup and restart of listening applications.
474  *
475  * devaddq is designed to string together the type of event, with the
476  * object of that event, plus the plug and play info and location info
477  * for that event.  This is likely most useful for devices, but less
478  * useful for other consumers of this interface.  Those should use
479  * the devctl_notify() interface instead.
480  *
481  * Output:
482  *	${type}${what} at $(location dev) $(pnp-info dev) on $(parent dev)
483  */
484 static void
devaddq(const char * type,const char * what,device_t dev)485 devaddq(const char *type, const char *what, device_t dev)
486 {
487 	struct dev_event_info *dei;
488 	const char *parstr;
489 	struct sbuf sb;
490 	size_t beginlen;
491 
492 	dei = devctl_alloc_dei_sb(&sb);
493 	if (dei == NULL)
494 		return;
495 	sbuf_cpy(&sb, type);
496 	sbuf_cat(&sb, what);
497 	sbuf_cat(&sb, " at ");
498 	beginlen = sbuf_len(&sb);
499 
500 	/* Add in the location */
501 	bus_child_location(dev, &sb);
502 	sbuf_putc(&sb, ' ');
503 
504 	/* Add in pnpinfo */
505 	bus_child_pnpinfo(dev, &sb);
506 
507 	/* Get the parent of this device, or / if high enough in the tree. */
508 	if (device_get_parent(dev) == NULL)
509 		parstr = ".";	/* Or '/' ? */
510 	else
511 		parstr = device_get_nameunit(device_get_parent(dev));
512 	sbuf_cat(&sb, " on ");
513 	sbuf_cat(&sb, parstr);
514 	sbuf_putc(&sb, '\n');
515 	if (sbuf_finish(&sb) != 0)
516 		goto bad;
517 	if (devctl_notify_hook.send_f != NULL) {
518 		const char *t;
519 
520 		switch (*type) {
521 		case '+':
522 			t = "ATTACH";
523 			break;
524 		case '-':
525 			t = "DETACH";
526 			break;
527 		default:
528 			t = "NOMATCH";
529 			break;
530 		}
531 		devctl_notify_hook.send_f("device",
532 		    what, t, sbuf_data(&sb) + beginlen);
533 	}
534 	devctl_queue(dei);
535 	return;
536 bad:
537 	devctl_free_dei(dei);
538 }
539 
540 static int
sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)541 sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
542 {
543 	int q, error;
544 
545 	q = devctl_queue_length;
546 	error = sysctl_handle_int(oidp, &q, 0, req);
547 	if (error || !req->newptr)
548 		return (error);
549 	if (q < 0)
550 		return (EINVAL);
551 
552 	/*
553 	 * When set as a tunable, we've not yet initialized the mutex.
554 	 * It is safe to just assign to devctl_queue_length and return
555 	 * as we're racing no one. We'll use whatever value set in
556 	 * devinit.
557 	 */
558 	if (!mtx_initialized(&devsoftc.mtx)) {
559 		devctl_queue_length = q;
560 		return (0);
561 	}
562 
563 	/*
564 	 * XXX It's hard to grow or shrink the UMA zone. Only allow
565 	 * disabling the queue size for the moment until underlying
566 	 * UMA issues can be sorted out.
567 	 */
568 	if (q != 0)
569 		return (EINVAL);
570 	if (q == devctl_queue_length)
571 		return (0);
572 	mtx_lock(&devsoftc.mtx);
573 	devctl_queue_length = 0;
574 	uma_zdestroy(devsoftc.zone);
575 	devsoftc.zone = 0;
576 	mtx_unlock(&devsoftc.mtx);
577 	return (0);
578 }
579 
580 /**
581  * @brief safely quotes strings that might have double quotes in them.
582  *
583  * The devctl protocol relies on quoted strings having matching quotes.
584  * This routine quotes any internal quotes so the resulting string
585  * is safe to pass to snprintf to construct, for example pnp info strings.
586  *
587  * @param sb	sbuf to place the characters into
588  * @param src	Original buffer.
589  */
590 void
devctl_safe_quote_sb(struct sbuf * sb,const char * src)591 devctl_safe_quote_sb(struct sbuf *sb, const char *src)
592 {
593 	while (*src != '\0') {
594 		if (*src == '"' || *src == '\\')
595 			sbuf_putc(sb, '\\');
596 		sbuf_putc(sb, *src++);
597 	}
598 }
599 
600 void
devctl_set_notify_hook(send_event_f * hook)601 devctl_set_notify_hook(send_event_f *hook)
602 {
603 	devctl_notify_hook.send_f = hook;
604 }
605 
606 void
devctl_unset_notify_hook(void)607 devctl_unset_notify_hook(void)
608 {
609 	devctl_notify_hook.send_f = NULL;
610 }
611 
612