1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Google LLC 4 * Author: Vincent Donnefort <vdonnefort@google.com> 5 */ 6 7 #include <linux/cpumask.h> 8 #include <linux/trace_remote.h> 9 #include <linux/tracefs.h> 10 #include <linux/simple_ring_buffer.h> 11 12 #include <asm/arch_timer.h> 13 #include <asm/kvm_host.h> 14 #include <asm/kvm_hyptrace.h> 15 #include <asm/kvm_mmu.h> 16 17 #include "hyp_trace.h" 18 19 /* Same 10min used by clocksource when width is more than 32-bits */ 20 #define CLOCK_MAX_CONVERSION_S 600 21 /* 22 * Time to give for the clock init. Long enough to get a good mult/shift 23 * estimation. Short enough to not delay the tracing start too much. 24 */ 25 #define CLOCK_INIT_MS 100 26 /* 27 * Time between clock checks. Must be small enough to catch clock deviation when 28 * it is still tiny. 29 */ 30 #define CLOCK_UPDATE_MS 500 31 32 static struct hyp_trace_clock { 33 u64 cycles; 34 u64 cyc_overflow64; 35 u64 boot; 36 u32 mult; 37 u32 shift; 38 struct delayed_work work; 39 struct completion ready; 40 struct mutex lock; 41 bool running; 42 } hyp_clock; 43 44 static void __hyp_clock_work(struct work_struct *work) 45 { 46 struct delayed_work *dwork = to_delayed_work(work); 47 struct hyp_trace_clock *hyp_clock; 48 struct system_time_snapshot snap; 49 u64 rate, delta_cycles; 50 u64 boot, delta_boot; 51 52 hyp_clock = container_of(dwork, struct hyp_trace_clock, work); 53 54 ktime_get_snapshot(&snap); 55 boot = ktime_to_ns(snap.boot); 56 57 delta_boot = boot - hyp_clock->boot; 58 delta_cycles = snap.cycles - hyp_clock->cycles; 59 60 /* Compare hyp clock with the kernel boot clock */ 61 if (hyp_clock->mult) { 62 u64 err, cur = delta_cycles; 63 64 if (WARN_ON_ONCE(cur >= hyp_clock->cyc_overflow64)) { 65 __uint128_t tmp = (__uint128_t)cur * hyp_clock->mult; 66 67 cur = tmp >> hyp_clock->shift; 68 } else { 69 cur *= hyp_clock->mult; 70 cur >>= hyp_clock->shift; 71 } 72 cur += hyp_clock->boot; 73 74 err = abs_diff(cur, boot); 75 /* No deviation, only update epoch if necessary */ 76 if (!err) { 77 if (delta_cycles >= (hyp_clock->cyc_overflow64 >> 1)) 78 goto fast_forward; 79 80 goto resched; 81 } 82 83 /* Warn if the error is above tracing precision (1us) */ 84 if (err > NSEC_PER_USEC) 85 pr_warn_ratelimited("hyp trace clock off by %lluus\n", 86 err / NSEC_PER_USEC); 87 } 88 89 rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot); 90 91 clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift, 92 rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S); 93 94 /* Add a comfortable 50% margin */ 95 hyp_clock->cyc_overflow64 = (U64_MAX / hyp_clock->mult) >> 1; 96 97 fast_forward: 98 hyp_clock->cycles = snap.cycles; 99 hyp_clock->boot = boot; 100 kvm_call_hyp_nvhe(__tracing_update_clock, hyp_clock->mult, 101 hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles); 102 complete(&hyp_clock->ready); 103 104 resched: 105 schedule_delayed_work(&hyp_clock->work, 106 msecs_to_jiffies(CLOCK_UPDATE_MS)); 107 } 108 109 static void hyp_trace_clock_enable(struct hyp_trace_clock *hyp_clock, bool enable) 110 { 111 struct system_time_snapshot snap; 112 113 if (hyp_clock->running == enable) 114 return; 115 116 if (!enable) { 117 cancel_delayed_work_sync(&hyp_clock->work); 118 hyp_clock->running = false; 119 } 120 121 ktime_get_snapshot(&snap); 122 123 hyp_clock->boot = ktime_to_ns(snap.boot); 124 hyp_clock->cycles = snap.cycles; 125 hyp_clock->mult = 0; 126 127 init_completion(&hyp_clock->ready); 128 INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work); 129 schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS)); 130 wait_for_completion(&hyp_clock->ready); 131 hyp_clock->running = true; 132 } 133 134 /* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */ 135 static struct hyp_trace_buffer { 136 struct hyp_trace_desc *desc; 137 size_t desc_size; 138 } trace_buffer; 139 140 static int __map_hyp(void *start, size_t size) 141 { 142 if (is_protected_kvm_enabled()) 143 return 0; 144 145 return create_hyp_mappings(start, start + size, PAGE_HYP); 146 } 147 148 static int __share_page(unsigned long va) 149 { 150 return kvm_share_hyp((void *)va, (void *)va + 1); 151 } 152 153 static void __unshare_page(unsigned long va) 154 { 155 kvm_unshare_hyp((void *)va, (void *)va + 1); 156 } 157 158 static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size) 159 { 160 int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1; 161 size_t backing_size; 162 void *start; 163 164 backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages * 165 num_possible_cpus()); 166 167 start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT); 168 if (!start) 169 return -ENOMEM; 170 171 trace_buffer->desc->bpages_backing_start = (unsigned long)start; 172 trace_buffer->desc->bpages_backing_size = backing_size; 173 174 return __map_hyp(start, backing_size); 175 } 176 177 static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer) 178 { 179 free_pages_exact((void *)trace_buffer->desc->bpages_backing_start, 180 trace_buffer->desc->bpages_backing_size); 181 } 182 183 static void hyp_trace_buffer_unshare_hyp(struct hyp_trace_buffer *trace_buffer, int last_cpu) 184 { 185 struct ring_buffer_desc *rb_desc; 186 int cpu, p; 187 188 for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) { 189 if (cpu > last_cpu) 190 break; 191 192 __share_page(rb_desc->meta_va); 193 for (p = 0; p < rb_desc->nr_page_va; p++) 194 __unshare_page(rb_desc->page_va[p]); 195 } 196 } 197 198 static int hyp_trace_buffer_share_hyp(struct hyp_trace_buffer *trace_buffer) 199 { 200 struct ring_buffer_desc *rb_desc; 201 int cpu, p, ret = 0; 202 203 for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) { 204 ret = __share_page(rb_desc->meta_va); 205 if (ret) 206 break; 207 208 for (p = 0; p < rb_desc->nr_page_va; p++) { 209 ret = __share_page(rb_desc->page_va[p]); 210 if (ret) 211 break; 212 } 213 214 if (ret) { 215 for (p--; p >= 0; p--) 216 __unshare_page(rb_desc->page_va[p]); 217 break; 218 } 219 } 220 221 if (ret) 222 hyp_trace_buffer_unshare_hyp(trace_buffer, cpu--); 223 224 return ret; 225 } 226 227 static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv) 228 { 229 struct hyp_trace_buffer *trace_buffer = priv; 230 struct hyp_trace_desc *desc; 231 size_t desc_size; 232 int ret; 233 234 if (WARN_ON(trace_buffer->desc)) 235 return ERR_PTR(-EINVAL); 236 237 desc_size = trace_buffer_desc_size(size, num_possible_cpus()); 238 if (desc_size == SIZE_MAX) 239 return ERR_PTR(-E2BIG); 240 241 desc_size = PAGE_ALIGN(desc_size); 242 desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL); 243 if (!desc) 244 return ERR_PTR(-ENOMEM); 245 246 ret = __map_hyp(desc, desc_size); 247 if (ret) 248 goto err_free_desc; 249 250 trace_buffer->desc = desc; 251 252 ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size); 253 if (ret) 254 goto err_free_desc; 255 256 ret = trace_remote_alloc_buffer(&desc->trace_buffer_desc, desc_size, size, 257 cpu_possible_mask); 258 if (ret) 259 goto err_free_backing; 260 261 ret = hyp_trace_buffer_share_hyp(trace_buffer); 262 if (ret) 263 goto err_free_buffer; 264 265 ret = kvm_call_hyp_nvhe(__tracing_load, (unsigned long)desc, desc_size); 266 if (ret) 267 goto err_unload_pages; 268 269 return &desc->trace_buffer_desc; 270 271 err_unload_pages: 272 hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX); 273 274 err_free_buffer: 275 trace_remote_free_buffer(&desc->trace_buffer_desc); 276 277 err_free_backing: 278 hyp_trace_buffer_free_bpages_backing(trace_buffer); 279 280 err_free_desc: 281 free_pages_exact(desc, desc_size); 282 trace_buffer->desc = NULL; 283 284 return ERR_PTR(ret); 285 } 286 287 static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv) 288 { 289 struct hyp_trace_buffer *trace_buffer = priv; 290 291 if (WARN_ON(desc != &trace_buffer->desc->trace_buffer_desc)) 292 return; 293 294 kvm_call_hyp_nvhe(__tracing_unload); 295 hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX); 296 trace_remote_free_buffer(desc); 297 hyp_trace_buffer_free_bpages_backing(trace_buffer); 298 free_pages_exact(trace_buffer->desc, trace_buffer->desc_size); 299 trace_buffer->desc = NULL; 300 } 301 302 static int hyp_trace_enable_tracing(bool enable, void *priv) 303 { 304 hyp_trace_clock_enable(&hyp_clock, enable); 305 306 return kvm_call_hyp_nvhe(__tracing_enable, enable); 307 } 308 309 static int hyp_trace_swap_reader_page(unsigned int cpu, void *priv) 310 { 311 return kvm_call_hyp_nvhe(__tracing_swap_reader, cpu); 312 } 313 314 static int hyp_trace_reset(unsigned int cpu, void *priv) 315 { 316 return kvm_call_hyp_nvhe(__tracing_reset, cpu); 317 } 318 319 static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv) 320 { 321 struct hyp_event_id *event_id = lm_alias(&__hyp_event_ids_start[id]); 322 struct page *page; 323 atomic_t *enabled; 324 void *map; 325 326 if (is_protected_kvm_enabled()) 327 return kvm_call_hyp_nvhe(__tracing_enable_event, id, enable); 328 329 enabled = &event_id->enabled; 330 page = virt_to_page(enabled); 331 map = vmap(&page, 1, VM_MAP, PAGE_KERNEL); 332 if (!map) 333 return -ENOMEM; 334 335 enabled = map + offset_in_page(enabled); 336 atomic_set(enabled, enable); 337 338 vunmap(map); 339 340 return 0; 341 } 342 343 static int hyp_trace_clock_show(struct seq_file *m, void *v) 344 { 345 seq_puts(m, "[boot]\n"); 346 347 return 0; 348 } 349 DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock); 350 351 static ssize_t hyp_trace_write_event_write(struct file *f, const char __user *ubuf, 352 size_t cnt, loff_t *pos) 353 { 354 unsigned long val; 355 int ret; 356 357 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 358 if (ret) 359 return ret; 360 361 kvm_call_hyp_nvhe(__tracing_write_event, val); 362 363 return cnt; 364 } 365 366 static const struct file_operations hyp_trace_write_event_fops = { 367 .write = hyp_trace_write_event_write, 368 }; 369 370 static int hyp_trace_init_tracefs(struct dentry *d, void *priv) 371 { 372 if (!tracefs_create_file("write_event", 0200, d, NULL, &hyp_trace_write_event_fops)) 373 return -ENOMEM; 374 375 return tracefs_create_file("trace_clock", 0440, d, NULL, &hyp_trace_clock_fops) ? 376 0 : -ENOMEM; 377 } 378 379 static struct trace_remote_callbacks trace_remote_callbacks = { 380 .init = hyp_trace_init_tracefs, 381 .load_trace_buffer = hyp_trace_load, 382 .unload_trace_buffer = hyp_trace_unload, 383 .enable_tracing = hyp_trace_enable_tracing, 384 .swap_reader_page = hyp_trace_swap_reader_page, 385 .reset = hyp_trace_reset, 386 .enable_event = hyp_trace_enable_event, 387 }; 388 389 static const char *__hyp_enter_exit_reason_str(u8 reason); 390 391 #include <asm/kvm_define_hypevents.h> 392 393 static const char *__hyp_enter_exit_reason_str(u8 reason) 394 { 395 static const char strs[][12] = { 396 "smc", 397 "hvc", 398 "psci", 399 "host_abort", 400 "guest_exit", 401 "eret_host", 402 "eret_guest", 403 "unknown", 404 }; 405 406 return strs[min(reason, HYP_REASON_UNKNOWN)]; 407 } 408 409 static void __init hyp_trace_init_events(void) 410 { 411 struct hyp_event_id *hyp_event_id = __hyp_event_ids_start; 412 struct remote_event *event = __hyp_events_start; 413 int id = 0; 414 415 /* Events on both sides hypervisor are sorted */ 416 for (; event < __hyp_events_end; event++, hyp_event_id++, id++) 417 event->id = hyp_event_id->id = id; 418 } 419 420 int __init kvm_hyp_trace_init(void) 421 { 422 int cpu; 423 424 if (is_kernel_in_hyp_mode()) 425 return 0; 426 427 for_each_possible_cpu(cpu) { 428 const struct arch_timer_erratum_workaround *wa = 429 per_cpu(timer_unstable_counter_workaround, cpu); 430 431 if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) && 432 wa && wa->read_cntvct_el0) { 433 pr_warn("hyp trace can't handle CNTVCT workaround '%s'\n", wa->desc); 434 return -EOPNOTSUPP; 435 } 436 } 437 438 hyp_trace_init_events(); 439 440 return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer, 441 __hyp_events_start, __hyp_events_end - __hyp_events_start); 442 } 443