xref: /freebsd/sys/dev/hwt/hwt_vm.c (revision 906d7a4b521c19b2b1ae3ec844b5d4626f2fd529)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com>
5  *
6  * This work was supported by Innovate UK project 105694, "Digital Security
7  * by Design (DSbD) Technology Platform Prototype".
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/ioccom.h>
33 #include <sys/conf.h>
34 #include <sys/proc.h>
35 #include <sys/kernel.h>
36 #include <sys/malloc.h>
37 #include <sys/mman.h>
38 #include <sys/refcount.h>
39 #include <sys/rwlock.h>
40 #include <sys/hwt.h>
41 #include <sys/smp.h>
42 
43 #include <vm/vm.h>
44 #include <vm/pmap.h>
45 #include <vm/vm_extern.h>
46 #include <vm/vm_param.h>
47 #include <vm/vm_kern.h>
48 #include <vm/vm_page.h>
49 #include <vm/vm_object.h>
50 #include <vm/vm_pager.h>
51 #include <vm/vm_pageout.h>
52 #include <vm/vm_phys.h>
53 
54 #include <dev/hwt/hwt_hook.h>
55 #include <dev/hwt/hwt_context.h>
56 #include <dev/hwt/hwt_contexthash.h>
57 #include <dev/hwt/hwt_config.h>
58 #include <dev/hwt/hwt_cpu.h>
59 #include <dev/hwt/hwt_owner.h>
60 #include <dev/hwt/hwt_ownerhash.h>
61 #include <dev/hwt/hwt_thread.h>
62 #include <dev/hwt/hwt_backend.h>
63 #include <dev/hwt/hwt_vm.h>
64 #include <dev/hwt/hwt_record.h>
65 
66 #define	HWT_THREAD_DEBUG
67 #undef	HWT_THREAD_DEBUG
68 
69 #ifdef	HWT_THREAD_DEBUG
70 #define	dprintf(fmt, ...)	printf(fmt, ##__VA_ARGS__)
71 #else
72 #define	dprintf(fmt, ...)
73 #endif
74 
75 static MALLOC_DEFINE(M_HWT_VM, "hwt_vm", "Hardware Trace");
76 
77 static int
hwt_vm_fault(vm_object_t vm_obj,vm_ooffset_t offset,int prot,vm_page_t * mres)78 hwt_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset,
79     int prot, vm_page_t *mres)
80 {
81 
82 	return (0);
83 }
84 
85 static int
hwt_vm_ctor(void * handle,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t foff,struct ucred * cred,u_short * color)86 hwt_vm_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
87     vm_ooffset_t foff, struct ucred *cred, u_short *color)
88 {
89 
90 	*color = 0;
91 
92 	return (0);
93 }
94 
95 static void
hwt_vm_dtor(void * handle)96 hwt_vm_dtor(void *handle)
97 {
98 
99 }
100 
101 static struct cdev_pager_ops hwt_vm_pager_ops = {
102 	.cdev_pg_fault = hwt_vm_fault,
103 	.cdev_pg_ctor = hwt_vm_ctor,
104 	.cdev_pg_dtor = hwt_vm_dtor
105 };
106 
107 static int
hwt_vm_alloc_pages(struct hwt_vm * vm,int kva_req)108 hwt_vm_alloc_pages(struct hwt_vm *vm, int kva_req)
109 {
110 	vm_paddr_t low, high, boundary;
111 	vm_memattr_t memattr;
112 #ifdef  __aarch64__
113 	uintptr_t va;
114 #endif
115 	int alignment;
116 	vm_page_t m;
117 	int pflags;
118 	int tries;
119 	int i;
120 
121 	alignment = PAGE_SIZE;
122 	low = 0;
123 	high = -1UL;
124 	boundary = 0;
125 	pflags = VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_ZERO;
126 	memattr = VM_MEMATTR_DEVICE;
127 
128 	if (kva_req) {
129 		vm->kvaddr = kva_alloc(vm->npages * PAGE_SIZE);
130 		if (!vm->kvaddr)
131 			return (ENOMEM);
132 	}
133 
134 	vm->obj = cdev_pager_allocate(vm, OBJT_MGTDEVICE,
135 	    &hwt_vm_pager_ops, vm->npages * PAGE_SIZE, PROT_READ, 0,
136 	    curthread->td_ucred);
137 
138 	for (i = 0; i < vm->npages; i++) {
139 		tries = 0;
140 retry:
141 		m = vm_page_alloc_noobj_contig(pflags, 1, low, high,
142 		    alignment, boundary, memattr);
143 		if (m == NULL) {
144 			if (tries < 3) {
145 				if (!vm_page_reclaim_contig(pflags, 1, low,
146 				    high, alignment, boundary))
147 					vm_wait(NULL);
148 				tries++;
149 				goto retry;
150 			}
151 
152 			return (ENOMEM);
153 		}
154 
155 #if 0
156 		/* TODO: could not clean device memory on arm64. */
157 		if ((m->flags & PG_ZERO) == 0)
158 			pmap_zero_page(m);
159 #endif
160 
161 #ifdef __aarch64__
162 		va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
163 		cpu_dcache_wb_range((void *)va, PAGE_SIZE);
164 #endif
165 
166 		m->valid = VM_PAGE_BITS_ALL;
167 		m->oflags &= ~VPO_UNMANAGED;
168 		m->flags |= PG_FICTITIOUS;
169 		vm->pages[i] = m;
170 
171 		VM_OBJECT_WLOCK(vm->obj);
172 		vm_page_insert(m, vm->obj, i);
173 		if (kva_req)
174 			pmap_qenter(vm->kvaddr + i * PAGE_SIZE, &m, 1);
175 		VM_OBJECT_WUNLOCK(vm->obj);
176 	}
177 
178 	return (0);
179 }
180 
181 static int
hwt_vm_open(struct cdev * cdev,int oflags,int devtype,struct thread * td)182 hwt_vm_open(struct cdev *cdev, int oflags, int devtype, struct thread *td)
183 {
184 
185 	dprintf("%s\n", __func__);
186 
187 	return (0);
188 }
189 
190 static int
hwt_vm_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)191 hwt_vm_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
192     vm_size_t mapsize, struct vm_object **objp, int nprot)
193 {
194 	struct hwt_vm *vm;
195 
196 	vm = cdev->si_drv1;
197 
198 	if (nprot != PROT_READ || *offset != 0)
199 		return (ENXIO);
200 
201 	vm_object_reference(vm->obj);
202 	*objp = vm->obj;
203 
204 	return (0);
205 }
206 
207 static void
hwt_vm_start_cpu_mode(struct hwt_context * ctx)208 hwt_vm_start_cpu_mode(struct hwt_context *ctx)
209 {
210 	cpuset_t enable_cpus;
211 	int cpu_id;
212 
213 	CPU_ZERO(&enable_cpus);
214 
215 	CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
216 #ifdef SMP
217 		/* Ensure CPU is not halted. */
218 		if (CPU_ISSET(cpu_id, &hlt_cpus_mask))
219 			continue;
220 #endif
221 
222 		hwt_backend_configure(ctx, cpu_id, cpu_id);
223 
224 		CPU_SET(cpu_id, &enable_cpus);
225 	}
226 
227 	if (ctx->hwt_backend->ops->hwt_backend_enable_smp == NULL) {
228 		CPU_FOREACH_ISSET(cpu_id, &enable_cpus)
229 			hwt_backend_enable(ctx, cpu_id);
230 	} else {
231 		/* Some backends require enabling all CPUs at once. */
232 		hwt_backend_enable_smp(ctx);
233 	}
234 }
235 
236 static int
hwt_vm_ioctl(struct cdev * dev,u_long cmd,caddr_t addr,int flags,struct thread * td)237 hwt_vm_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
238     struct thread *td)
239 {
240 	struct hwt_record_get *rget;
241 	struct hwt_set_config *sconf;
242 	struct hwt_bufptr_get *ptr_get;
243 	struct hwt_svc_buf *sbuf;
244 
245 	struct hwt_context *ctx;
246 	struct hwt_vm *vm;
247 	struct hwt_owner *ho;
248 
249 	vm_offset_t offset;
250 	int ident;
251 	int error;
252 	uint64_t data = 0;
253 	void *data2;
254 	size_t data_size;
255 	int data_version;
256 
257 	vm = dev->si_drv1;
258 	KASSERT(vm != NULL, ("si_drv1 is NULL"));
259 
260 	ctx = vm->ctx;
261 
262 	/* Ensure process is registered owner of this HWT. */
263 	ho = hwt_ownerhash_lookup(td->td_proc);
264 	if (ho == NULL)
265 		return (ENXIO);
266 
267 	if (ctx->hwt_owner != ho)
268 		return (EPERM);
269 
270 	switch (cmd) {
271 	case HWT_IOC_START:
272 		dprintf("%s: start tracing\n", __func__);
273 
274 		HWT_CTX_LOCK(ctx);
275 		if (ctx->state == CTX_STATE_RUNNING) {
276 			/* Already running ? */
277 			HWT_CTX_UNLOCK(ctx);
278 			return (ENXIO);
279 		}
280 		ctx->state = CTX_STATE_RUNNING;
281 		HWT_CTX_UNLOCK(ctx);
282 
283 		if (ctx->mode == HWT_MODE_CPU)
284 			hwt_vm_start_cpu_mode(ctx);
285 		else {
286 			/*
287 			 * Tracing backend will be configured and enabled
288 			 * during hook invocation. See hwt_hook.c.
289 			 */
290 		}
291 
292 		break;
293 
294 	case HWT_IOC_STOP:
295 		if (ctx->state == CTX_STATE_STOPPED)
296 			return (ENXIO);
297 		hwt_backend_stop(ctx);
298 		ctx->state = CTX_STATE_STOPPED;
299 		break;
300 
301 	case HWT_IOC_RECORD_GET:
302 		rget = (struct hwt_record_get *)addr;
303 		error = hwt_record_send(ctx, rget);
304 		if (error)
305 			return (error);
306 		break;
307 
308 	case HWT_IOC_SET_CONFIG:
309 		if (ctx->state == CTX_STATE_RUNNING) {
310 			return (ENXIO);
311 		}
312 		sconf = (struct hwt_set_config *)addr;
313 		error = hwt_config_set(td, ctx, sconf);
314 		if (error)
315 			return (error);
316 		ctx->pause_on_mmap = sconf->pause_on_mmap ? 1 : 0;
317 		break;
318 
319 	case HWT_IOC_WAKEUP:
320 
321 		if (ctx->mode == HWT_MODE_CPU)
322 			return (ENXIO);
323 
324 		KASSERT(vm->thr != NULL, ("thr is NULL"));
325 
326 		wakeup(vm->thr);
327 
328 		break;
329 
330 	case HWT_IOC_BUFPTR_GET:
331 		ptr_get = (struct hwt_bufptr_get *)addr;
332 
333 		error = hwt_backend_read(ctx, vm, &ident, &offset, &data);
334 		if (error)
335 			return (error);
336 
337 		if (ptr_get->ident)
338 			error = copyout(&ident, ptr_get->ident, sizeof(int));
339 		if (error)
340 			return (error);
341 
342 		if (ptr_get->offset)
343 			error = copyout(&offset, ptr_get->offset,
344 			    sizeof(vm_offset_t));
345 		if (error)
346 			return (error);
347 
348 		if (ptr_get->data)
349 			error = copyout(&data, ptr_get->data, sizeof(uint64_t));
350 		if (error)
351 			return (error);
352 
353 		break;
354 
355 	case HWT_IOC_SVC_BUF:
356 		if (ctx->state == CTX_STATE_STOPPED) {
357 			return (ENXIO);
358 		}
359 
360 		sbuf = (struct hwt_svc_buf *)addr;
361 		data_size = sbuf->data_size;
362 		data_version = sbuf->data_version;
363 
364 		if (data_size == 0 || data_size > PAGE_SIZE)
365 			return (EINVAL);
366 
367 		data2 = malloc(data_size, M_HWT_VM, M_WAITOK | M_ZERO);
368 		error = copyin(sbuf->data, data2, data_size);
369 		if (error) {
370 			free(data2, M_HWT_VM);
371 			return (error);
372 		}
373 
374 		error = hwt_backend_svc_buf(ctx, data2, data_size, data_version);
375 		if (error) {
376 			free(data2, M_HWT_VM);
377 			return (error);
378 		}
379 
380 		free(data2, M_HWT_VM);
381 		break;
382 
383 	default:
384 		break;
385 	}
386 
387 	return (0);
388 }
389 
390 static struct cdevsw hwt_vm_cdevsw = {
391 	.d_version	= D_VERSION,
392 	.d_name		= "hwt",
393 	.d_open		= hwt_vm_open,
394 	.d_mmap_single	= hwt_vm_mmap_single,
395 	.d_ioctl	= hwt_vm_ioctl,
396 };
397 
398 static int
hwt_vm_create_cdev(struct hwt_vm * vm,char * path)399 hwt_vm_create_cdev(struct hwt_vm *vm, char *path)
400 {
401 	struct make_dev_args args;
402 	int error;
403 
404 	dprintf("%s: path %s\n", __func__, path);
405 
406 	make_dev_args_init(&args);
407 	args.mda_devsw = &hwt_vm_cdevsw;
408 	args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
409 	args.mda_uid = UID_ROOT;
410 	args.mda_gid = GID_WHEEL;
411 	args.mda_mode = 0660;
412 	args.mda_si_drv1 = vm;
413 
414 	error = make_dev_s(&args, &vm->cdev, "%s", path);
415 	if (error != 0)
416 		return (error);
417 
418 	return (0);
419 }
420 
421 static int
hwt_vm_alloc_buffers(struct hwt_vm * vm,int kva_req)422 hwt_vm_alloc_buffers(struct hwt_vm *vm, int kva_req)
423 {
424 	int error;
425 
426 	vm->pages = malloc(sizeof(struct vm_page *) * vm->npages,
427 	    M_HWT_VM, M_WAITOK | M_ZERO);
428 
429 	error = hwt_vm_alloc_pages(vm, kva_req);
430 	if (error) {
431 		printf("%s: could not alloc pages\n", __func__);
432 		return (error);
433 	}
434 
435 	return (0);
436 }
437 
438 static void
hwt_vm_destroy_buffers(struct hwt_vm * vm)439 hwt_vm_destroy_buffers(struct hwt_vm *vm)
440 {
441 	vm_page_t m;
442 	int i;
443 
444 	if (vm->ctx->hwt_backend->kva_req && vm->kvaddr != 0) {
445 		pmap_qremove(vm->kvaddr, vm->npages);
446 		kva_free(vm->kvaddr, vm->npages * PAGE_SIZE);
447 	}
448 	VM_OBJECT_WLOCK(vm->obj);
449 	for (i = 0; i < vm->npages; i++) {
450 		m = vm->pages[i];
451 		if (m == NULL)
452 			break;
453 
454 		vm_page_busy_acquire(m, 0);
455 		cdev_pager_free_page(vm->obj, m);
456 		m->flags &= ~PG_FICTITIOUS;
457 		vm_page_unwire_noq(m);
458 		vm_page_free(m);
459 
460 	}
461 	vm_pager_deallocate(vm->obj);
462 	VM_OBJECT_WUNLOCK(vm->obj);
463 
464 	free(vm->pages, M_HWT_VM);
465 }
466 
467 void
hwt_vm_free(struct hwt_vm * vm)468 hwt_vm_free(struct hwt_vm *vm)
469 {
470 
471 	dprintf("%s\n", __func__);
472 
473 	if (vm->cdev)
474 		destroy_dev_sched(vm->cdev);
475 	hwt_vm_destroy_buffers(vm);
476 	free(vm, M_HWT_VM);
477 }
478 
479 int
hwt_vm_alloc(size_t bufsize,int kva_req,char * path,struct hwt_vm ** vm0)480 hwt_vm_alloc(size_t bufsize, int kva_req, char *path, struct hwt_vm **vm0)
481 {
482 	struct hwt_vm *vm;
483 	int error;
484 
485 	vm = malloc(sizeof(struct hwt_vm), M_HWT_VM, M_WAITOK | M_ZERO);
486 	vm->npages = bufsize / PAGE_SIZE;
487 
488 	error = hwt_vm_alloc_buffers(vm, kva_req);
489 	if (error) {
490 		free(vm, M_HWT_VM);
491 		return (error);
492 	}
493 
494 	error = hwt_vm_create_cdev(vm, path);
495 	if (error) {
496 		hwt_vm_free(vm);
497 		return (error);
498 	}
499 
500 	*vm0 = vm;
501 
502 	return (0);
503 }
504