xref: /linux/arch/arm64/kvm/hyp_trace.c (revision 65b09bfa8aa7ebe087093b591525385efb2d58b0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2025 Google LLC
4  * Author: Vincent Donnefort <vdonnefort@google.com>
5  */
6 
7 #include <linux/cpumask.h>
8 #include <linux/trace_remote.h>
9 #include <linux/tracefs.h>
10 #include <linux/simple_ring_buffer.h>
11 
12 #include <asm/arch_timer.h>
13 #include <asm/kvm_host.h>
14 #include <asm/kvm_hyptrace.h>
15 #include <asm/kvm_mmu.h>
16 
17 #include "hyp_trace.h"
18 
19 /* Same 10min used by clocksource when width is more than 32-bits */
20 #define CLOCK_MAX_CONVERSION_S	600
21 /*
22  * Time to give for the clock init. Long enough to get a good mult/shift
23  * estimation. Short enough to not delay the tracing start too much.
24  */
25 #define CLOCK_INIT_MS		100
26 /*
27  * Time between clock checks. Must be small enough to catch clock deviation when
28  * it is still tiny.
29  */
30 #define CLOCK_UPDATE_MS		500
31 
32 static struct hyp_trace_clock {
33 	u64			cycles;
34 	u64			cyc_overflow64;
35 	u64			boot;
36 	u32			mult;
37 	u32			shift;
38 	struct delayed_work	work;
39 	struct completion	ready;
40 	struct mutex		lock;
41 	bool			running;
42 } hyp_clock;
43 
44 static void __hyp_clock_work(struct work_struct *work)
45 {
46 	struct delayed_work *dwork = to_delayed_work(work);
47 	struct hyp_trace_clock *hyp_clock;
48 	struct system_time_snapshot snap;
49 	u64 rate, delta_cycles;
50 	u64 boot, delta_boot;
51 
52 	hyp_clock = container_of(dwork, struct hyp_trace_clock, work);
53 
54 	ktime_get_snapshot(&snap);
55 	boot = ktime_to_ns(snap.boot);
56 
57 	delta_boot = boot - hyp_clock->boot;
58 	delta_cycles = snap.cycles - hyp_clock->cycles;
59 
60 	/* Compare hyp clock with the kernel boot clock */
61 	if (hyp_clock->mult) {
62 		u64 err, cur = delta_cycles;
63 
64 		if (WARN_ON_ONCE(cur >= hyp_clock->cyc_overflow64)) {
65 			__uint128_t tmp = (__uint128_t)cur * hyp_clock->mult;
66 
67 			cur = tmp >> hyp_clock->shift;
68 		} else {
69 			cur *= hyp_clock->mult;
70 			cur >>= hyp_clock->shift;
71 		}
72 		cur += hyp_clock->boot;
73 
74 		err = abs_diff(cur, boot);
75 		/* No deviation, only update epoch if necessary */
76 		if (!err) {
77 			if (delta_cycles >= (hyp_clock->cyc_overflow64 >> 1))
78 				goto fast_forward;
79 
80 			goto resched;
81 		}
82 
83 		/* Warn if the error is above tracing precision (1us) */
84 		if (err > NSEC_PER_USEC)
85 			pr_warn_ratelimited("hyp trace clock off by %lluus\n",
86 					    err / NSEC_PER_USEC);
87 	}
88 
89 	rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot);
90 
91 	clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift,
92 			       rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S);
93 
94 	/* Add a comfortable 50% margin */
95 	hyp_clock->cyc_overflow64 = (U64_MAX / hyp_clock->mult) >> 1;
96 
97 fast_forward:
98 	hyp_clock->cycles = snap.cycles;
99 	hyp_clock->boot = boot;
100 	kvm_call_hyp_nvhe(__tracing_update_clock, hyp_clock->mult,
101 			  hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles);
102 	complete(&hyp_clock->ready);
103 
104 resched:
105 	schedule_delayed_work(&hyp_clock->work,
106 			      msecs_to_jiffies(CLOCK_UPDATE_MS));
107 }
108 
109 static void hyp_trace_clock_enable(struct hyp_trace_clock *hyp_clock, bool enable)
110 {
111 	struct system_time_snapshot snap;
112 
113 	if (hyp_clock->running == enable)
114 		return;
115 
116 	if (!enable) {
117 		cancel_delayed_work_sync(&hyp_clock->work);
118 		hyp_clock->running = false;
119 	}
120 
121 	ktime_get_snapshot(&snap);
122 
123 	hyp_clock->boot = ktime_to_ns(snap.boot);
124 	hyp_clock->cycles = snap.cycles;
125 	hyp_clock->mult = 0;
126 
127 	init_completion(&hyp_clock->ready);
128 	INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work);
129 	schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS));
130 	wait_for_completion(&hyp_clock->ready);
131 	hyp_clock->running = true;
132 }
133 
134 /* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */
135 static struct hyp_trace_buffer {
136 	struct hyp_trace_desc	*desc;
137 	size_t			desc_size;
138 } trace_buffer;
139 
140 static int __map_hyp(void *start, size_t size)
141 {
142 	if (is_protected_kvm_enabled())
143 		return 0;
144 
145 	return create_hyp_mappings(start, start + size, PAGE_HYP);
146 }
147 
148 static int __share_page(unsigned long va)
149 {
150 	return kvm_share_hyp((void *)va, (void *)va + 1);
151 }
152 
153 static void __unshare_page(unsigned long va)
154 {
155 	kvm_unshare_hyp((void *)va, (void *)va + 1);
156 }
157 
158 static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size)
159 {
160 	int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
161 	size_t backing_size;
162 	void *start;
163 
164 	backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages *
165 				  num_possible_cpus());
166 
167 	start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT);
168 	if (!start)
169 		return -ENOMEM;
170 
171 	trace_buffer->desc->bpages_backing_start = (unsigned long)start;
172 	trace_buffer->desc->bpages_backing_size = backing_size;
173 
174 	return __map_hyp(start, backing_size);
175 }
176 
177 static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
178 {
179 	free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
180 			 trace_buffer->desc->bpages_backing_size);
181 }
182 
183 static void hyp_trace_buffer_unshare_hyp(struct hyp_trace_buffer *trace_buffer, int last_cpu)
184 {
185 	struct ring_buffer_desc *rb_desc;
186 	int cpu, p;
187 
188 	for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
189 		if (cpu > last_cpu)
190 			break;
191 
192 		__unshare_page(rb_desc->meta_va);
193 		for (p = 0; p < rb_desc->nr_page_va; p++)
194 			__unshare_page(rb_desc->page_va[p]);
195 	}
196 }
197 
198 static int hyp_trace_buffer_share_hyp(struct hyp_trace_buffer *trace_buffer)
199 {
200 	struct ring_buffer_desc *rb_desc;
201 	int cpu, p, ret = 0;
202 
203 	for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
204 		ret = __share_page(rb_desc->meta_va);
205 		if (ret)
206 			break;
207 
208 		for (p = 0; p < rb_desc->nr_page_va; p++) {
209 			ret = __share_page(rb_desc->page_va[p]);
210 			if (ret)
211 				break;
212 		}
213 
214 		if (ret) {
215 			while (--p >= 0)
216 				__unshare_page(rb_desc->page_va[p]);
217 			__unshare_page(rb_desc->meta_va);
218 			break;
219 		}
220 	}
221 
222 	if (ret)
223 		hyp_trace_buffer_unshare_hyp(trace_buffer, --cpu);
224 
225 	return ret;
226 }
227 
228 static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv)
229 {
230 	struct hyp_trace_buffer *trace_buffer = priv;
231 	struct hyp_trace_desc *desc;
232 	size_t desc_size;
233 	int ret;
234 
235 	if (WARN_ON(trace_buffer->desc))
236 		return ERR_PTR(-EINVAL);
237 
238 	desc_size = trace_buffer_desc_size(size, num_possible_cpus());
239 	if (desc_size == SIZE_MAX)
240 		return ERR_PTR(-E2BIG);
241 
242 	desc_size = PAGE_ALIGN(desc_size);
243 	desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL);
244 	if (!desc)
245 		return ERR_PTR(-ENOMEM);
246 
247 	ret = __map_hyp(desc, desc_size);
248 	if (ret)
249 		goto err_free_desc;
250 
251 	trace_buffer->desc = desc;
252 	trace_buffer->desc_size = desc_size;
253 
254 	ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size);
255 	if (ret)
256 		goto err_free_desc;
257 
258 	ret = trace_remote_alloc_buffer(&desc->trace_buffer_desc, desc_size, size,
259 					cpu_possible_mask);
260 	if (ret)
261 		goto err_free_backing;
262 
263 	ret = hyp_trace_buffer_share_hyp(trace_buffer);
264 	if (ret)
265 		goto err_free_buffer;
266 
267 	ret = kvm_call_hyp_nvhe(__tracing_load, (unsigned long)desc, desc_size);
268 	if (ret)
269 		goto err_unload_pages;
270 
271 	return &desc->trace_buffer_desc;
272 
273 err_unload_pages:
274 	hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
275 
276 err_free_buffer:
277 	trace_remote_free_buffer(&desc->trace_buffer_desc);
278 
279 err_free_backing:
280 	hyp_trace_buffer_free_bpages_backing(trace_buffer);
281 
282 err_free_desc:
283 	free_pages_exact(desc, desc_size);
284 	trace_buffer->desc = NULL;
285 
286 	return ERR_PTR(ret);
287 }
288 
289 static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv)
290 {
291 	struct hyp_trace_buffer *trace_buffer = priv;
292 
293 	if (WARN_ON(desc != &trace_buffer->desc->trace_buffer_desc))
294 		return;
295 
296 	kvm_call_hyp_nvhe(__tracing_unload);
297 	hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
298 	trace_remote_free_buffer(desc);
299 	hyp_trace_buffer_free_bpages_backing(trace_buffer);
300 	free_pages_exact(trace_buffer->desc, trace_buffer->desc_size);
301 	trace_buffer->desc = NULL;
302 	trace_buffer->desc_size = 0;
303 }
304 
305 static int hyp_trace_enable_tracing(bool enable, void *priv)
306 {
307 	hyp_trace_clock_enable(&hyp_clock, enable);
308 
309 	return kvm_call_hyp_nvhe(__tracing_enable, enable);
310 }
311 
312 static int hyp_trace_swap_reader_page(unsigned int cpu, void *priv)
313 {
314 	return kvm_call_hyp_nvhe(__tracing_swap_reader, cpu);
315 }
316 
317 static int hyp_trace_reset(unsigned int cpu, void *priv)
318 {
319 	return kvm_call_hyp_nvhe(__tracing_reset, cpu);
320 }
321 
322 static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv)
323 {
324 	struct hyp_event_id *event_id = lm_alias(&__hyp_event_ids_start[id]);
325 	struct page *page;
326 	atomic_t *enabled;
327 	void *map;
328 
329 	if (is_protected_kvm_enabled())
330 		return kvm_call_hyp_nvhe(__tracing_enable_event, id, enable);
331 
332 	enabled = &event_id->enabled;
333 	page = virt_to_page(enabled);
334 	map = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
335 	if (!map)
336 		return -ENOMEM;
337 
338 	enabled = map + offset_in_page(enabled);
339 	atomic_set(enabled, enable);
340 
341 	vunmap(map);
342 
343 	return 0;
344 }
345 
346 static int hyp_trace_clock_show(struct seq_file *m, void *v)
347 {
348 	seq_puts(m, "[boot]\n");
349 
350 	return 0;
351 }
352 DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock);
353 
354 static ssize_t hyp_trace_write_event_write(struct file *f, const char __user *ubuf,
355 					   size_t cnt, loff_t *pos)
356 {
357 	unsigned long val;
358 	int ret;
359 
360 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
361 	if (ret)
362 		return ret;
363 
364 	kvm_call_hyp_nvhe(__tracing_write_event, val);
365 
366 	return cnt;
367 }
368 
369 static const struct file_operations hyp_trace_write_event_fops = {
370 	.write	= hyp_trace_write_event_write,
371 };
372 
373 static int hyp_trace_init_tracefs(struct dentry *d, void *priv)
374 {
375 	if (!tracefs_create_file("write_event", 0200, d, NULL, &hyp_trace_write_event_fops))
376 		return -ENOMEM;
377 
378 	return tracefs_create_file("trace_clock", 0440, d, NULL, &hyp_trace_clock_fops) ?
379 		0 : -ENOMEM;
380 }
381 
382 static struct trace_remote_callbacks trace_remote_callbacks = {
383 	.init			= hyp_trace_init_tracefs,
384 	.load_trace_buffer	= hyp_trace_load,
385 	.unload_trace_buffer	= hyp_trace_unload,
386 	.enable_tracing		= hyp_trace_enable_tracing,
387 	.swap_reader_page	= hyp_trace_swap_reader_page,
388 	.reset			= hyp_trace_reset,
389 	.enable_event		= hyp_trace_enable_event,
390 };
391 
392 static const char *__hyp_enter_exit_reason_str(u8 reason);
393 
394 #include <asm/kvm_define_hypevents.h>
395 
396 static const char *__hyp_enter_exit_reason_str(u8 reason)
397 {
398 	static const char strs[][12] = {
399 		"smc",
400 		"hvc",
401 		"psci",
402 		"host_abort",
403 		"guest_exit",
404 		"eret_host",
405 		"eret_guest",
406 		"unknown",
407 	};
408 
409 	return strs[min(reason, HYP_REASON_UNKNOWN)];
410 }
411 
412 static void __init hyp_trace_init_events(void)
413 {
414 	struct hyp_event_id *hyp_event_id = __hyp_event_ids_start;
415 	struct remote_event *event = __hyp_events_start;
416 	int id = 0;
417 
418 	/* Events on both sides hypervisor are sorted */
419 	for (; event < __hyp_events_end; event++, hyp_event_id++, id++)
420 		event->id = hyp_event_id->id = id;
421 }
422 
423 int __init kvm_hyp_trace_init(void)
424 {
425 	int cpu;
426 
427 	if (is_kernel_in_hyp_mode())
428 		return 0;
429 
430 	for_each_possible_cpu(cpu) {
431 		const struct arch_timer_erratum_workaround *wa =
432 			per_cpu(timer_unstable_counter_workaround, cpu);
433 
434 		if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) &&
435 		    wa && wa->read_cntvct_el0) {
436 			pr_warn("hyp trace can't handle CNTVCT workaround '%s'\n", wa->desc);
437 			return -EOPNOTSUPP;
438 		}
439 	}
440 
441 	hyp_trace_init_events();
442 
443 	return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer,
444 				     __hyp_events_start, __hyp_events_end - __hyp_events_start);
445 }
446