xref: /freebsd/sys/dev/hwt/hwt.c (revision df114daef4c48548c3c2b86717612761185ae18f)
1*df114daeSRuslan Bukin /*-
2*df114daeSRuslan Bukin  * SPDX-License-Identifier: BSD-2-Clause
3*df114daeSRuslan Bukin  *
4*df114daeSRuslan Bukin  * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com>
5*df114daeSRuslan Bukin  *
6*df114daeSRuslan Bukin  * This work was supported by Innovate UK project 105694, "Digital Security
7*df114daeSRuslan Bukin  * by Design (DSbD) Technology Platform Prototype".
8*df114daeSRuslan Bukin  *
9*df114daeSRuslan Bukin  * Redistribution and use in source and binary forms, with or without
10*df114daeSRuslan Bukin  * modification, are permitted provided that the following conditions
11*df114daeSRuslan Bukin  * are met:
12*df114daeSRuslan Bukin  * 1. Redistributions of source code must retain the above copyright
13*df114daeSRuslan Bukin  *    notice, this list of conditions and the following disclaimer.
14*df114daeSRuslan Bukin  * 2. Redistributions in binary form must reproduce the above copyright
15*df114daeSRuslan Bukin  *    notice, this list of conditions and the following disclaimer in the
16*df114daeSRuslan Bukin  *    documentation and/or other materials provided with the distribution.
17*df114daeSRuslan Bukin  *
18*df114daeSRuslan Bukin  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19*df114daeSRuslan Bukin  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20*df114daeSRuslan Bukin  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21*df114daeSRuslan Bukin  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22*df114daeSRuslan Bukin  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23*df114daeSRuslan Bukin  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24*df114daeSRuslan Bukin  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25*df114daeSRuslan Bukin  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26*df114daeSRuslan Bukin  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27*df114daeSRuslan Bukin  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28*df114daeSRuslan Bukin  * SUCH DAMAGE.
29*df114daeSRuslan Bukin  */
30*df114daeSRuslan Bukin 
31*df114daeSRuslan Bukin /*
32*df114daeSRuslan Bukin  * Hardware Tracing framework.
33*df114daeSRuslan Bukin  *
34*df114daeSRuslan Bukin  *    The framework manages hardware tracing units that collect information
35*df114daeSRuslan Bukin  * about software execution and store it as events in highly compressed format
36*df114daeSRuslan Bukin  * into DRAM. The events cover information about control flow changes of a
37*df114daeSRuslan Bukin  * program, whether branches taken or not, exceptions taken, timing information,
38*df114daeSRuslan Bukin  * cycles elapsed and more. That allows us to restore entire program flow of a
39*df114daeSRuslan Bukin  * given application without performance impact.
40*df114daeSRuslan Bukin  *
41*df114daeSRuslan Bukin  * Design overview.
42*df114daeSRuslan Bukin  *
43*df114daeSRuslan Bukin  *    The framework provides character devices for mmap(2) and ioctl(2) system
44*df114daeSRuslan Bukin  * calls to allow user to manage CPU (hardware) tracing units.
45*df114daeSRuslan Bukin  *
46*df114daeSRuslan Bukin  * /dev/hwt:
47*df114daeSRuslan Bukin  *    .ioctl:
48*df114daeSRuslan Bukin  *        hwt_ioctl():
49*df114daeSRuslan Bukin  *               a) HWT_IOC_ALLOC
50*df114daeSRuslan Bukin  *                  Allocates kernel tracing context CTX based on requested mode
51*df114daeSRuslan Bukin  *                  of operation. Verifies the information that comes with the
52*df114daeSRuslan Bukin  *                  request (pid, cpus), allocates unique ID for the context.
53*df114daeSRuslan Bukin  *                  Creates a new character device for CTX management.
54*df114daeSRuslan Bukin  *
55*df114daeSRuslan Bukin  * /dev/hwt_%d[_%d], ident[, thread_id]
56*df114daeSRuslan Bukin  *    .mmap
57*df114daeSRuslan Bukin  *        Maps tracing buffers of the corresponding thread to userspace.
58*df114daeSRuslan Bukin  *    .ioctl
59*df114daeSRuslan Bukin  *        hwt_thread_ioctl():
60*df114daeSRuslan Bukin  *               a) HWT_IOC_START
61*df114daeSRuslan Bukin  *                  Enables tracing unit for a given context.
62*df114daeSRuslan Bukin  *               b) HWT_IOC_RECORD_GET
63*df114daeSRuslan Bukin  *                  Transfers (small) record entries collected during program
64*df114daeSRuslan Bukin  *                  execution for a given context to userspace, such as mmaping
65*df114daeSRuslan Bukin  *                  tables of executable and dynamic libraries, interpreter,
66*df114daeSRuslan Bukin  *                  kernel mappings, tid of threads created, etc.
67*df114daeSRuslan Bukin  *               c) HWT_IOC_SET_CONFIG
68*df114daeSRuslan Bukin  *                  Allows to specify backend-specific configuration of the
69*df114daeSRuslan Bukin  *                  trace unit.
70*df114daeSRuslan Bukin  *               d) HWT_IOC_WAKEUP
71*df114daeSRuslan Bukin  *                  Wakes up a thread that is currently sleeping.
72*df114daeSRuslan Bukin  *               e) HWT_IOC_BUFPTR_GET
73*df114daeSRuslan Bukin  *                  Transfers current hardware pointer in the filling buffer
74*df114daeSRuslan Bukin  *                  to the userspace.
75*df114daeSRuslan Bukin  *               f) HWT_IOC_SVC_BUF
76*df114daeSRuslan Bukin  *                  To avoid data loss, userspace may notify kernel it has
77*df114daeSRuslan Bukin  *                  copied out the given buffer, so kernel is ok to overwrite
78*df114daeSRuslan Bukin  *
79*df114daeSRuslan Bukin  * HWT context lifecycle in THREAD mode of operation:
80*df114daeSRuslan Bukin  * 1. User invokes HWT_IOC_ALLOC ioctl with information about pid to trace and
81*df114daeSRuslan Bukin  *    size of the buffers for the trace data to allocate.
82*df114daeSRuslan Bukin  *    Some architectures may have different tracing units supported, so user
83*df114daeSRuslan Bukin  *    also provides backend name to use for this context, e.g. "coresight".
84*df114daeSRuslan Bukin  * 2. Kernel allocates context, lookups the proc for the given pid. Then it
85*df114daeSRuslan Bukin  *    creates first hwt_thread in the context and allocates trace buffers for
86*df114daeSRuslan Bukin  *    it. Immediately, kernel initializes tracing backend.
87*df114daeSRuslan Bukin  *    Kernel creates character device and returns unique identificator of
88*df114daeSRuslan Bukin  *    trace context to the user.
89*df114daeSRuslan Bukin  * 3. To manage the new context, user opens the character device created.
90*df114daeSRuslan Bukin  *    User invokes HWT_IOC_START ioctl, kernel marks context as RUNNING.
91*df114daeSRuslan Bukin  *    At this point any HWT hook invocation by scheduler enables/disables
92*df114daeSRuslan Bukin  *    tracing for threads associated with the context (threads of the proc).
93*df114daeSRuslan Bukin  *    Any new threads creation (of the target proc) procedures will be invoking
94*df114daeSRuslan Bukin  *    corresponding hooks in HWT framework, so that new hwt_thread and buffers
95*df114daeSRuslan Bukin  *    allocated, character device for mmap(2) created on the fly.
96*df114daeSRuslan Bukin  * 4. User issues HWT_IOC_RECORD_GET ioctl to fetch information about mmaping
97*df114daeSRuslan Bukin  *    tables and threads created during application startup.
98*df114daeSRuslan Bukin  * 5. User mmaps tracing buffers of each thread to userspace (using
99*df114daeSRuslan Bukin  *    /dev/hwt_%d_%d % (ident, thread_id) character devices).
100*df114daeSRuslan Bukin  * 6. User can repeat 4 if expected thread is not yet created during target
101*df114daeSRuslan Bukin  *    application execution.
102*df114daeSRuslan Bukin  * 7. User issues HWT_IOC_BUFPTR_GET ioctl to get current filling level of the
103*df114daeSRuslan Bukin  *    hardware buffer of a given thread.
104*df114daeSRuslan Bukin  * 8. User invokes trace decoder library to process available data and see the
105*df114daeSRuslan Bukin  *    results in human readable form.
106*df114daeSRuslan Bukin  * 9. User repeats 7 if needed.
107*df114daeSRuslan Bukin  *
108*df114daeSRuslan Bukin  * HWT context lifecycle in CPU mode of operation:
109*df114daeSRuslan Bukin  * 1. User invokes HWT_IOC_ALLOC ioctl providing a set of CPU to trace within
110*df114daeSRuslan Bukin  *    single CTX.
111*df114daeSRuslan Bukin  * 2. Kernel verifies the set of CPU and allocates tracing context, creates
112*df114daeSRuslan Bukin  *    a buffer for each CPU.
113*df114daeSRuslan Bukin  *    Kernel creates a character device for every CPU provided in the request.
114*df114daeSRuslan Bukin  *    Kernel initialized tracing backend.
115*df114daeSRuslan Bukin  * 3. User opens character devices of interest to map the buffers to userspace.
116*df114daeSRuslan Bukin  *    User can start tracing by invoking HWT_IOC_START on any of character
117*df114daeSRuslan Bukin  *    device within the context, entire context will be marked as RUNNING.
118*df114daeSRuslan Bukin  * 4. The rest is similar to the THREAD mode.
119*df114daeSRuslan Bukin  *
120*df114daeSRuslan Bukin  */
121*df114daeSRuslan Bukin 
122*df114daeSRuslan Bukin #include <sys/param.h>
123*df114daeSRuslan Bukin #include <sys/conf.h>
124*df114daeSRuslan Bukin #include <sys/eventhandler.h>
125*df114daeSRuslan Bukin #include <sys/kernel.h>
126*df114daeSRuslan Bukin #include <sys/module.h>
127*df114daeSRuslan Bukin 
128*df114daeSRuslan Bukin #include <dev/hwt/hwt_context.h>
129*df114daeSRuslan Bukin #include <dev/hwt/hwt_contexthash.h>
130*df114daeSRuslan Bukin #include <dev/hwt/hwt_thread.h>
131*df114daeSRuslan Bukin #include <dev/hwt/hwt_owner.h>
132*df114daeSRuslan Bukin #include <dev/hwt/hwt_ownerhash.h>
133*df114daeSRuslan Bukin #include <dev/hwt/hwt_backend.h>
134*df114daeSRuslan Bukin #include <dev/hwt/hwt_record.h>
135*df114daeSRuslan Bukin #include <dev/hwt/hwt_ioctl.h>
136*df114daeSRuslan Bukin #include <dev/hwt/hwt_hook.h>
137*df114daeSRuslan Bukin 
138*df114daeSRuslan Bukin #define	HWT_DEBUG
139*df114daeSRuslan Bukin #undef	HWT_DEBUG
140*df114daeSRuslan Bukin 
141*df114daeSRuslan Bukin #ifdef	HWT_DEBUG
142*df114daeSRuslan Bukin #define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
143*df114daeSRuslan Bukin #else
144*df114daeSRuslan Bukin #define	dprintf(fmt, ...)
145*df114daeSRuslan Bukin #endif
146*df114daeSRuslan Bukin 
147*df114daeSRuslan Bukin static eventhandler_tag hwt_exit_tag;
148*df114daeSRuslan Bukin static struct cdev *hwt_cdev;
149*df114daeSRuslan Bukin static struct cdevsw hwt_cdevsw = {
150*df114daeSRuslan Bukin 	.d_version	= D_VERSION,
151*df114daeSRuslan Bukin 	.d_name		= "hwt",
152*df114daeSRuslan Bukin 	.d_mmap_single	= NULL,
153*df114daeSRuslan Bukin 	.d_ioctl	= hwt_ioctl
154*df114daeSRuslan Bukin };
155*df114daeSRuslan Bukin 
156*df114daeSRuslan Bukin static void
hwt_process_exit(void * arg __unused,struct proc * p)157*df114daeSRuslan Bukin hwt_process_exit(void *arg __unused, struct proc *p)
158*df114daeSRuslan Bukin {
159*df114daeSRuslan Bukin 	struct hwt_owner *ho;
160*df114daeSRuslan Bukin 
161*df114daeSRuslan Bukin 	/* Stop HWTs associated with exiting owner, if any. */
162*df114daeSRuslan Bukin 	ho = hwt_ownerhash_lookup(p);
163*df114daeSRuslan Bukin 	if (ho)
164*df114daeSRuslan Bukin 		hwt_owner_shutdown(ho);
165*df114daeSRuslan Bukin }
166*df114daeSRuslan Bukin 
167*df114daeSRuslan Bukin static int
hwt_load(void)168*df114daeSRuslan Bukin hwt_load(void)
169*df114daeSRuslan Bukin {
170*df114daeSRuslan Bukin 	struct make_dev_args args;
171*df114daeSRuslan Bukin 	int error;
172*df114daeSRuslan Bukin 
173*df114daeSRuslan Bukin 	make_dev_args_init(&args);
174*df114daeSRuslan Bukin 	args.mda_devsw = &hwt_cdevsw;
175*df114daeSRuslan Bukin 	args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
176*df114daeSRuslan Bukin 	args.mda_uid = UID_ROOT;
177*df114daeSRuslan Bukin 	args.mda_gid = GID_WHEEL;
178*df114daeSRuslan Bukin 	args.mda_mode = 0660;
179*df114daeSRuslan Bukin 	args.mda_si_drv1 = NULL;
180*df114daeSRuslan Bukin 
181*df114daeSRuslan Bukin 	hwt_backend_load();
182*df114daeSRuslan Bukin 	hwt_ctx_load();
183*df114daeSRuslan Bukin 	hwt_contexthash_load();
184*df114daeSRuslan Bukin 	hwt_ownerhash_load();
185*df114daeSRuslan Bukin 	hwt_record_load();
186*df114daeSRuslan Bukin 
187*df114daeSRuslan Bukin 	error = make_dev_s(&args, &hwt_cdev, "hwt");
188*df114daeSRuslan Bukin 	if (error != 0)
189*df114daeSRuslan Bukin 		return (error);
190*df114daeSRuslan Bukin 
191*df114daeSRuslan Bukin 	hwt_exit_tag = EVENTHANDLER_REGISTER(process_exit, hwt_process_exit,
192*df114daeSRuslan Bukin 	    NULL, EVENTHANDLER_PRI_ANY);
193*df114daeSRuslan Bukin 
194*df114daeSRuslan Bukin 	hwt_hook_load();
195*df114daeSRuslan Bukin 
196*df114daeSRuslan Bukin 	return (0);
197*df114daeSRuslan Bukin }
198*df114daeSRuslan Bukin 
199*df114daeSRuslan Bukin static int
hwt_unload(void)200*df114daeSRuslan Bukin hwt_unload(void)
201*df114daeSRuslan Bukin {
202*df114daeSRuslan Bukin 
203*df114daeSRuslan Bukin 	hwt_hook_unload();
204*df114daeSRuslan Bukin 	EVENTHANDLER_DEREGISTER(process_exit, hwt_exit_tag);
205*df114daeSRuslan Bukin 	destroy_dev(hwt_cdev);
206*df114daeSRuslan Bukin 	hwt_record_unload();
207*df114daeSRuslan Bukin 	hwt_ownerhash_unload();
208*df114daeSRuslan Bukin 	hwt_contexthash_unload();
209*df114daeSRuslan Bukin 	hwt_ctx_unload();
210*df114daeSRuslan Bukin 	hwt_backend_unload();
211*df114daeSRuslan Bukin 
212*df114daeSRuslan Bukin 	return (0);
213*df114daeSRuslan Bukin }
214*df114daeSRuslan Bukin 
215*df114daeSRuslan Bukin static int
hwt_modevent(module_t mod,int type,void * data)216*df114daeSRuslan Bukin hwt_modevent(module_t mod, int type, void *data)
217*df114daeSRuslan Bukin {
218*df114daeSRuslan Bukin 	int error;
219*df114daeSRuslan Bukin 
220*df114daeSRuslan Bukin 	switch (type) {
221*df114daeSRuslan Bukin 	case MOD_LOAD:
222*df114daeSRuslan Bukin 		error = hwt_load();
223*df114daeSRuslan Bukin 		break;
224*df114daeSRuslan Bukin 	case MOD_UNLOAD:
225*df114daeSRuslan Bukin 		error = hwt_unload();
226*df114daeSRuslan Bukin 		break;
227*df114daeSRuslan Bukin 	default:
228*df114daeSRuslan Bukin 		error = 0;
229*df114daeSRuslan Bukin 		break;
230*df114daeSRuslan Bukin 	}
231*df114daeSRuslan Bukin 
232*df114daeSRuslan Bukin 	return (error);
233*df114daeSRuslan Bukin }
234*df114daeSRuslan Bukin 
235*df114daeSRuslan Bukin static moduledata_t hwt_mod = {
236*df114daeSRuslan Bukin 	"hwt",
237*df114daeSRuslan Bukin 	hwt_modevent,
238*df114daeSRuslan Bukin 	NULL
239*df114daeSRuslan Bukin };
240*df114daeSRuslan Bukin 
241*df114daeSRuslan Bukin DECLARE_MODULE(hwt, hwt_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
242*df114daeSRuslan Bukin MODULE_VERSION(hwt, 1);
243