1*df114daeSRuslan Bukin /*-
2*df114daeSRuslan Bukin * SPDX-License-Identifier: BSD-2-Clause
3*df114daeSRuslan Bukin *
4*df114daeSRuslan Bukin * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com>
5*df114daeSRuslan Bukin *
6*df114daeSRuslan Bukin * This work was supported by Innovate UK project 105694, "Digital Security
7*df114daeSRuslan Bukin * by Design (DSbD) Technology Platform Prototype".
8*df114daeSRuslan Bukin *
9*df114daeSRuslan Bukin * Redistribution and use in source and binary forms, with or without
10*df114daeSRuslan Bukin * modification, are permitted provided that the following conditions
11*df114daeSRuslan Bukin * are met:
12*df114daeSRuslan Bukin * 1. Redistributions of source code must retain the above copyright
13*df114daeSRuslan Bukin * notice, this list of conditions and the following disclaimer.
14*df114daeSRuslan Bukin * 2. Redistributions in binary form must reproduce the above copyright
15*df114daeSRuslan Bukin * notice, this list of conditions and the following disclaimer in the
16*df114daeSRuslan Bukin * documentation and/or other materials provided with the distribution.
17*df114daeSRuslan Bukin *
18*df114daeSRuslan Bukin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19*df114daeSRuslan Bukin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20*df114daeSRuslan Bukin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21*df114daeSRuslan Bukin * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22*df114daeSRuslan Bukin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23*df114daeSRuslan Bukin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24*df114daeSRuslan Bukin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25*df114daeSRuslan Bukin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26*df114daeSRuslan Bukin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27*df114daeSRuslan Bukin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28*df114daeSRuslan Bukin * SUCH DAMAGE.
29*df114daeSRuslan Bukin */
30*df114daeSRuslan Bukin
31*df114daeSRuslan Bukin /*
32*df114daeSRuslan Bukin * Hardware Tracing framework.
33*df114daeSRuslan Bukin *
34*df114daeSRuslan Bukin * The framework manages hardware tracing units that collect information
35*df114daeSRuslan Bukin * about software execution and store it as events in highly compressed format
36*df114daeSRuslan Bukin * into DRAM. The events cover information about control flow changes of a
37*df114daeSRuslan Bukin * program, whether branches taken or not, exceptions taken, timing information,
38*df114daeSRuslan Bukin * cycles elapsed and more. That allows us to restore entire program flow of a
39*df114daeSRuslan Bukin * given application without performance impact.
40*df114daeSRuslan Bukin *
41*df114daeSRuslan Bukin * Design overview.
42*df114daeSRuslan Bukin *
43*df114daeSRuslan Bukin * The framework provides character devices for mmap(2) and ioctl(2) system
44*df114daeSRuslan Bukin * calls to allow user to manage CPU (hardware) tracing units.
45*df114daeSRuslan Bukin *
46*df114daeSRuslan Bukin * /dev/hwt:
47*df114daeSRuslan Bukin * .ioctl:
48*df114daeSRuslan Bukin * hwt_ioctl():
49*df114daeSRuslan Bukin * a) HWT_IOC_ALLOC
50*df114daeSRuslan Bukin * Allocates kernel tracing context CTX based on requested mode
51*df114daeSRuslan Bukin * of operation. Verifies the information that comes with the
52*df114daeSRuslan Bukin * request (pid, cpus), allocates unique ID for the context.
53*df114daeSRuslan Bukin * Creates a new character device for CTX management.
54*df114daeSRuslan Bukin *
55*df114daeSRuslan Bukin * /dev/hwt_%d[_%d], ident[, thread_id]
56*df114daeSRuslan Bukin * .mmap
57*df114daeSRuslan Bukin * Maps tracing buffers of the corresponding thread to userspace.
58*df114daeSRuslan Bukin * .ioctl
59*df114daeSRuslan Bukin * hwt_thread_ioctl():
60*df114daeSRuslan Bukin * a) HWT_IOC_START
61*df114daeSRuslan Bukin * Enables tracing unit for a given context.
62*df114daeSRuslan Bukin * b) HWT_IOC_RECORD_GET
63*df114daeSRuslan Bukin * Transfers (small) record entries collected during program
64*df114daeSRuslan Bukin * execution for a given context to userspace, such as mmaping
65*df114daeSRuslan Bukin * tables of executable and dynamic libraries, interpreter,
66*df114daeSRuslan Bukin * kernel mappings, tid of threads created, etc.
67*df114daeSRuslan Bukin * c) HWT_IOC_SET_CONFIG
68*df114daeSRuslan Bukin * Allows to specify backend-specific configuration of the
69*df114daeSRuslan Bukin * trace unit.
70*df114daeSRuslan Bukin * d) HWT_IOC_WAKEUP
71*df114daeSRuslan Bukin * Wakes up a thread that is currently sleeping.
72*df114daeSRuslan Bukin * e) HWT_IOC_BUFPTR_GET
73*df114daeSRuslan Bukin * Transfers current hardware pointer in the filling buffer
74*df114daeSRuslan Bukin * to the userspace.
75*df114daeSRuslan Bukin * f) HWT_IOC_SVC_BUF
76*df114daeSRuslan Bukin * To avoid data loss, userspace may notify kernel it has
77*df114daeSRuslan Bukin * copied out the given buffer, so kernel is ok to overwrite
78*df114daeSRuslan Bukin *
79*df114daeSRuslan Bukin * HWT context lifecycle in THREAD mode of operation:
80*df114daeSRuslan Bukin * 1. User invokes HWT_IOC_ALLOC ioctl with information about pid to trace and
81*df114daeSRuslan Bukin * size of the buffers for the trace data to allocate.
82*df114daeSRuslan Bukin * Some architectures may have different tracing units supported, so user
83*df114daeSRuslan Bukin * also provides backend name to use for this context, e.g. "coresight".
84*df114daeSRuslan Bukin * 2. Kernel allocates context, lookups the proc for the given pid. Then it
85*df114daeSRuslan Bukin * creates first hwt_thread in the context and allocates trace buffers for
86*df114daeSRuslan Bukin * it. Immediately, kernel initializes tracing backend.
87*df114daeSRuslan Bukin * Kernel creates character device and returns unique identificator of
88*df114daeSRuslan Bukin * trace context to the user.
89*df114daeSRuslan Bukin * 3. To manage the new context, user opens the character device created.
90*df114daeSRuslan Bukin * User invokes HWT_IOC_START ioctl, kernel marks context as RUNNING.
91*df114daeSRuslan Bukin * At this point any HWT hook invocation by scheduler enables/disables
92*df114daeSRuslan Bukin * tracing for threads associated with the context (threads of the proc).
93*df114daeSRuslan Bukin * Any new threads creation (of the target proc) procedures will be invoking
94*df114daeSRuslan Bukin * corresponding hooks in HWT framework, so that new hwt_thread and buffers
95*df114daeSRuslan Bukin * allocated, character device for mmap(2) created on the fly.
96*df114daeSRuslan Bukin * 4. User issues HWT_IOC_RECORD_GET ioctl to fetch information about mmaping
97*df114daeSRuslan Bukin * tables and threads created during application startup.
98*df114daeSRuslan Bukin * 5. User mmaps tracing buffers of each thread to userspace (using
99*df114daeSRuslan Bukin * /dev/hwt_%d_%d % (ident, thread_id) character devices).
100*df114daeSRuslan Bukin * 6. User can repeat 4 if expected thread is not yet created during target
101*df114daeSRuslan Bukin * application execution.
102*df114daeSRuslan Bukin * 7. User issues HWT_IOC_BUFPTR_GET ioctl to get current filling level of the
103*df114daeSRuslan Bukin * hardware buffer of a given thread.
104*df114daeSRuslan Bukin * 8. User invokes trace decoder library to process available data and see the
105*df114daeSRuslan Bukin * results in human readable form.
106*df114daeSRuslan Bukin * 9. User repeats 7 if needed.
107*df114daeSRuslan Bukin *
108*df114daeSRuslan Bukin * HWT context lifecycle in CPU mode of operation:
109*df114daeSRuslan Bukin * 1. User invokes HWT_IOC_ALLOC ioctl providing a set of CPU to trace within
110*df114daeSRuslan Bukin * single CTX.
111*df114daeSRuslan Bukin * 2. Kernel verifies the set of CPU and allocates tracing context, creates
112*df114daeSRuslan Bukin * a buffer for each CPU.
113*df114daeSRuslan Bukin * Kernel creates a character device for every CPU provided in the request.
114*df114daeSRuslan Bukin * Kernel initialized tracing backend.
115*df114daeSRuslan Bukin * 3. User opens character devices of interest to map the buffers to userspace.
116*df114daeSRuslan Bukin * User can start tracing by invoking HWT_IOC_START on any of character
117*df114daeSRuslan Bukin * device within the context, entire context will be marked as RUNNING.
118*df114daeSRuslan Bukin * 4. The rest is similar to the THREAD mode.
119*df114daeSRuslan Bukin *
120*df114daeSRuslan Bukin */
121*df114daeSRuslan Bukin
122*df114daeSRuslan Bukin #include <sys/param.h>
123*df114daeSRuslan Bukin #include <sys/conf.h>
124*df114daeSRuslan Bukin #include <sys/eventhandler.h>
125*df114daeSRuslan Bukin #include <sys/kernel.h>
126*df114daeSRuslan Bukin #include <sys/module.h>
127*df114daeSRuslan Bukin
128*df114daeSRuslan Bukin #include <dev/hwt/hwt_context.h>
129*df114daeSRuslan Bukin #include <dev/hwt/hwt_contexthash.h>
130*df114daeSRuslan Bukin #include <dev/hwt/hwt_thread.h>
131*df114daeSRuslan Bukin #include <dev/hwt/hwt_owner.h>
132*df114daeSRuslan Bukin #include <dev/hwt/hwt_ownerhash.h>
133*df114daeSRuslan Bukin #include <dev/hwt/hwt_backend.h>
134*df114daeSRuslan Bukin #include <dev/hwt/hwt_record.h>
135*df114daeSRuslan Bukin #include <dev/hwt/hwt_ioctl.h>
136*df114daeSRuslan Bukin #include <dev/hwt/hwt_hook.h>
137*df114daeSRuslan Bukin
138*df114daeSRuslan Bukin #define HWT_DEBUG
139*df114daeSRuslan Bukin #undef HWT_DEBUG
140*df114daeSRuslan Bukin
141*df114daeSRuslan Bukin #ifdef HWT_DEBUG
142*df114daeSRuslan Bukin #define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
143*df114daeSRuslan Bukin #else
144*df114daeSRuslan Bukin #define dprintf(fmt, ...)
145*df114daeSRuslan Bukin #endif
146*df114daeSRuslan Bukin
147*df114daeSRuslan Bukin static eventhandler_tag hwt_exit_tag;
148*df114daeSRuslan Bukin static struct cdev *hwt_cdev;
149*df114daeSRuslan Bukin static struct cdevsw hwt_cdevsw = {
150*df114daeSRuslan Bukin .d_version = D_VERSION,
151*df114daeSRuslan Bukin .d_name = "hwt",
152*df114daeSRuslan Bukin .d_mmap_single = NULL,
153*df114daeSRuslan Bukin .d_ioctl = hwt_ioctl
154*df114daeSRuslan Bukin };
155*df114daeSRuslan Bukin
156*df114daeSRuslan Bukin static void
hwt_process_exit(void * arg __unused,struct proc * p)157*df114daeSRuslan Bukin hwt_process_exit(void *arg __unused, struct proc *p)
158*df114daeSRuslan Bukin {
159*df114daeSRuslan Bukin struct hwt_owner *ho;
160*df114daeSRuslan Bukin
161*df114daeSRuslan Bukin /* Stop HWTs associated with exiting owner, if any. */
162*df114daeSRuslan Bukin ho = hwt_ownerhash_lookup(p);
163*df114daeSRuslan Bukin if (ho)
164*df114daeSRuslan Bukin hwt_owner_shutdown(ho);
165*df114daeSRuslan Bukin }
166*df114daeSRuslan Bukin
167*df114daeSRuslan Bukin static int
hwt_load(void)168*df114daeSRuslan Bukin hwt_load(void)
169*df114daeSRuslan Bukin {
170*df114daeSRuslan Bukin struct make_dev_args args;
171*df114daeSRuslan Bukin int error;
172*df114daeSRuslan Bukin
173*df114daeSRuslan Bukin make_dev_args_init(&args);
174*df114daeSRuslan Bukin args.mda_devsw = &hwt_cdevsw;
175*df114daeSRuslan Bukin args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
176*df114daeSRuslan Bukin args.mda_uid = UID_ROOT;
177*df114daeSRuslan Bukin args.mda_gid = GID_WHEEL;
178*df114daeSRuslan Bukin args.mda_mode = 0660;
179*df114daeSRuslan Bukin args.mda_si_drv1 = NULL;
180*df114daeSRuslan Bukin
181*df114daeSRuslan Bukin hwt_backend_load();
182*df114daeSRuslan Bukin hwt_ctx_load();
183*df114daeSRuslan Bukin hwt_contexthash_load();
184*df114daeSRuslan Bukin hwt_ownerhash_load();
185*df114daeSRuslan Bukin hwt_record_load();
186*df114daeSRuslan Bukin
187*df114daeSRuslan Bukin error = make_dev_s(&args, &hwt_cdev, "hwt");
188*df114daeSRuslan Bukin if (error != 0)
189*df114daeSRuslan Bukin return (error);
190*df114daeSRuslan Bukin
191*df114daeSRuslan Bukin hwt_exit_tag = EVENTHANDLER_REGISTER(process_exit, hwt_process_exit,
192*df114daeSRuslan Bukin NULL, EVENTHANDLER_PRI_ANY);
193*df114daeSRuslan Bukin
194*df114daeSRuslan Bukin hwt_hook_load();
195*df114daeSRuslan Bukin
196*df114daeSRuslan Bukin return (0);
197*df114daeSRuslan Bukin }
198*df114daeSRuslan Bukin
199*df114daeSRuslan Bukin static int
hwt_unload(void)200*df114daeSRuslan Bukin hwt_unload(void)
201*df114daeSRuslan Bukin {
202*df114daeSRuslan Bukin
203*df114daeSRuslan Bukin hwt_hook_unload();
204*df114daeSRuslan Bukin EVENTHANDLER_DEREGISTER(process_exit, hwt_exit_tag);
205*df114daeSRuslan Bukin destroy_dev(hwt_cdev);
206*df114daeSRuslan Bukin hwt_record_unload();
207*df114daeSRuslan Bukin hwt_ownerhash_unload();
208*df114daeSRuslan Bukin hwt_contexthash_unload();
209*df114daeSRuslan Bukin hwt_ctx_unload();
210*df114daeSRuslan Bukin hwt_backend_unload();
211*df114daeSRuslan Bukin
212*df114daeSRuslan Bukin return (0);
213*df114daeSRuslan Bukin }
214*df114daeSRuslan Bukin
215*df114daeSRuslan Bukin static int
hwt_modevent(module_t mod,int type,void * data)216*df114daeSRuslan Bukin hwt_modevent(module_t mod, int type, void *data)
217*df114daeSRuslan Bukin {
218*df114daeSRuslan Bukin int error;
219*df114daeSRuslan Bukin
220*df114daeSRuslan Bukin switch (type) {
221*df114daeSRuslan Bukin case MOD_LOAD:
222*df114daeSRuslan Bukin error = hwt_load();
223*df114daeSRuslan Bukin break;
224*df114daeSRuslan Bukin case MOD_UNLOAD:
225*df114daeSRuslan Bukin error = hwt_unload();
226*df114daeSRuslan Bukin break;
227*df114daeSRuslan Bukin default:
228*df114daeSRuslan Bukin error = 0;
229*df114daeSRuslan Bukin break;
230*df114daeSRuslan Bukin }
231*df114daeSRuslan Bukin
232*df114daeSRuslan Bukin return (error);
233*df114daeSRuslan Bukin }
234*df114daeSRuslan Bukin
235*df114daeSRuslan Bukin static moduledata_t hwt_mod = {
236*df114daeSRuslan Bukin "hwt",
237*df114daeSRuslan Bukin hwt_modevent,
238*df114daeSRuslan Bukin NULL
239*df114daeSRuslan Bukin };
240*df114daeSRuslan Bukin
241*df114daeSRuslan Bukin DECLARE_MODULE(hwt, hwt_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
242*df114daeSRuslan Bukin MODULE_VERSION(hwt, 1);
243