1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Google LLC 4 * Author: Vincent Donnefort <vdonnefort@google.com> 5 */ 6 7 #include <linux/cpumask.h> 8 #include <linux/trace_remote.h> 9 #include <linux/tracefs.h> 10 #include <linux/simple_ring_buffer.h> 11 12 #include <asm/arch_timer.h> 13 #include <asm/kvm_host.h> 14 #include <asm/kvm_hyptrace.h> 15 #include <asm/kvm_mmu.h> 16 17 #include "hyp_trace.h" 18 19 /* Same 10min used by clocksource when width is more than 32-bits */ 20 #define CLOCK_MAX_CONVERSION_S 600 21 /* 22 * Time to give for the clock init. Long enough to get a good mult/shift 23 * estimation. Short enough to not delay the tracing start too much. 24 */ 25 #define CLOCK_INIT_MS 100 26 /* 27 * Time between clock checks. Must be small enough to catch clock deviation when 28 * it is still tiny. 29 */ 30 #define CLOCK_UPDATE_MS 500 31 32 static struct hyp_trace_clock { 33 u64 cycles; 34 u64 cyc_overflow64; 35 u64 boot; 36 u32 mult; 37 u32 shift; 38 struct delayed_work work; 39 struct completion ready; 40 struct mutex lock; 41 bool running; 42 } hyp_clock; 43 44 static void __hyp_clock_work(struct work_struct *work) 45 { 46 struct delayed_work *dwork = to_delayed_work(work); 47 struct hyp_trace_clock *hyp_clock; 48 struct system_time_snapshot snap; 49 u64 rate, delta_cycles; 50 u64 boot, delta_boot; 51 52 hyp_clock = container_of(dwork, struct hyp_trace_clock, work); 53 54 ktime_get_snapshot(&snap); 55 boot = ktime_to_ns(snap.boot); 56 57 delta_boot = boot - hyp_clock->boot; 58 delta_cycles = snap.cycles - hyp_clock->cycles; 59 60 /* Compare hyp clock with the kernel boot clock */ 61 if (hyp_clock->mult) { 62 u64 err, cur = delta_cycles; 63 64 if (WARN_ON_ONCE(cur >= hyp_clock->cyc_overflow64)) { 65 __uint128_t tmp = (__uint128_t)cur * hyp_clock->mult; 66 67 cur = tmp >> hyp_clock->shift; 68 } else { 69 cur *= hyp_clock->mult; 70 cur >>= hyp_clock->shift; 71 } 72 cur += hyp_clock->boot; 73 74 err = abs_diff(cur, boot); 75 /* No deviation, only update epoch if necessary */ 76 if (!err) { 77 if (delta_cycles >= (hyp_clock->cyc_overflow64 >> 1)) 78 goto fast_forward; 79 80 goto resched; 81 } 82 83 /* Warn if the error is above tracing precision (1us) */ 84 if (err > NSEC_PER_USEC) 85 pr_warn_ratelimited("hyp trace clock off by %lluus\n", 86 err / NSEC_PER_USEC); 87 } 88 89 rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot); 90 91 clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift, 92 rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S); 93 94 /* Add a comfortable 50% margin */ 95 hyp_clock->cyc_overflow64 = (U64_MAX / hyp_clock->mult) >> 1; 96 97 fast_forward: 98 hyp_clock->cycles = snap.cycles; 99 hyp_clock->boot = boot; 100 kvm_call_hyp_nvhe(__tracing_update_clock, hyp_clock->mult, 101 hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles); 102 complete(&hyp_clock->ready); 103 104 resched: 105 schedule_delayed_work(&hyp_clock->work, 106 msecs_to_jiffies(CLOCK_UPDATE_MS)); 107 } 108 109 static void hyp_trace_clock_enable(struct hyp_trace_clock *hyp_clock, bool enable) 110 { 111 struct system_time_snapshot snap; 112 113 if (hyp_clock->running == enable) 114 return; 115 116 if (!enable) { 117 cancel_delayed_work_sync(&hyp_clock->work); 118 hyp_clock->running = false; 119 } 120 121 ktime_get_snapshot(&snap); 122 123 hyp_clock->boot = ktime_to_ns(snap.boot); 124 hyp_clock->cycles = snap.cycles; 125 hyp_clock->mult = 0; 126 127 init_completion(&hyp_clock->ready); 128 INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work); 129 schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS)); 130 wait_for_completion(&hyp_clock->ready); 131 hyp_clock->running = true; 132 } 133 134 /* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */ 135 static struct hyp_trace_buffer { 136 struct hyp_trace_desc *desc; 137 size_t desc_size; 138 } trace_buffer; 139 140 static int __map_hyp(void *start, size_t size) 141 { 142 if (is_protected_kvm_enabled()) 143 return 0; 144 145 return create_hyp_mappings(start, start + size, PAGE_HYP); 146 } 147 148 static int __share_page(unsigned long va) 149 { 150 return kvm_share_hyp((void *)va, (void *)va + 1); 151 } 152 153 static void __unshare_page(unsigned long va) 154 { 155 kvm_unshare_hyp((void *)va, (void *)va + 1); 156 } 157 158 static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size) 159 { 160 int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1; 161 size_t backing_size; 162 void *start; 163 164 backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages * 165 num_possible_cpus()); 166 167 start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT); 168 if (!start) 169 return -ENOMEM; 170 171 trace_buffer->desc->bpages_backing_start = (unsigned long)start; 172 trace_buffer->desc->bpages_backing_size = backing_size; 173 174 return __map_hyp(start, backing_size); 175 } 176 177 static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer) 178 { 179 free_pages_exact((void *)trace_buffer->desc->bpages_backing_start, 180 trace_buffer->desc->bpages_backing_size); 181 } 182 183 static void hyp_trace_buffer_unshare_hyp(struct hyp_trace_buffer *trace_buffer, int last_cpu) 184 { 185 struct ring_buffer_desc *rb_desc; 186 int cpu, p; 187 188 for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) { 189 if (cpu > last_cpu) 190 break; 191 192 __share_page(rb_desc->meta_va); 193 for (p = 0; p < rb_desc->nr_page_va; p++) 194 __unshare_page(rb_desc->page_va[p]); 195 } 196 } 197 198 static int hyp_trace_buffer_share_hyp(struct hyp_trace_buffer *trace_buffer) 199 { 200 struct ring_buffer_desc *rb_desc; 201 int cpu, p, ret = 0; 202 203 for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) { 204 ret = __share_page(rb_desc->meta_va); 205 if (ret) 206 break; 207 208 for (p = 0; p < rb_desc->nr_page_va; p++) { 209 ret = __share_page(rb_desc->page_va[p]); 210 if (ret) 211 break; 212 } 213 214 if (ret) { 215 for (p--; p >= 0; p--) 216 __unshare_page(rb_desc->page_va[p]); 217 break; 218 } 219 } 220 221 if (ret) 222 hyp_trace_buffer_unshare_hyp(trace_buffer, cpu--); 223 224 return ret; 225 } 226 227 static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv) 228 { 229 struct hyp_trace_buffer *trace_buffer = priv; 230 struct hyp_trace_desc *desc; 231 size_t desc_size; 232 int ret; 233 234 if (WARN_ON(trace_buffer->desc)) 235 return ERR_PTR(-EINVAL); 236 237 desc_size = trace_buffer_desc_size(size, num_possible_cpus()); 238 if (desc_size == SIZE_MAX) 239 return ERR_PTR(-E2BIG); 240 241 desc_size = PAGE_ALIGN(desc_size); 242 desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL); 243 if (!desc) 244 return ERR_PTR(-ENOMEM); 245 246 ret = __map_hyp(desc, desc_size); 247 if (ret) 248 goto err_free_desc; 249 250 trace_buffer->desc = desc; 251 252 ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size); 253 if (ret) 254 goto err_free_desc; 255 256 ret = trace_remote_alloc_buffer(&desc->trace_buffer_desc, desc_size, size, 257 cpu_possible_mask); 258 if (ret) 259 goto err_free_backing; 260 261 ret = hyp_trace_buffer_share_hyp(trace_buffer); 262 if (ret) 263 goto err_free_buffer; 264 265 ret = kvm_call_hyp_nvhe(__tracing_load, (unsigned long)desc, desc_size); 266 if (ret) 267 goto err_unload_pages; 268 269 return &desc->trace_buffer_desc; 270 271 err_unload_pages: 272 hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX); 273 274 err_free_buffer: 275 trace_remote_free_buffer(&desc->trace_buffer_desc); 276 277 err_free_backing: 278 hyp_trace_buffer_free_bpages_backing(trace_buffer); 279 280 err_free_desc: 281 free_pages_exact(desc, desc_size); 282 trace_buffer->desc = NULL; 283 284 return ERR_PTR(ret); 285 } 286 287 static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv) 288 { 289 struct hyp_trace_buffer *trace_buffer = priv; 290 291 if (WARN_ON(desc != &trace_buffer->desc->trace_buffer_desc)) 292 return; 293 294 kvm_call_hyp_nvhe(__tracing_unload); 295 hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX); 296 trace_remote_free_buffer(desc); 297 hyp_trace_buffer_free_bpages_backing(trace_buffer); 298 free_pages_exact(trace_buffer->desc, trace_buffer->desc_size); 299 trace_buffer->desc = NULL; 300 } 301 302 static int hyp_trace_enable_tracing(bool enable, void *priv) 303 { 304 hyp_trace_clock_enable(&hyp_clock, enable); 305 306 return kvm_call_hyp_nvhe(__tracing_enable, enable); 307 } 308 309 static int hyp_trace_swap_reader_page(unsigned int cpu, void *priv) 310 { 311 return kvm_call_hyp_nvhe(__tracing_swap_reader, cpu); 312 } 313 314 static int hyp_trace_reset(unsigned int cpu, void *priv) 315 { 316 return 0; 317 } 318 319 static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv) 320 { 321 return 0; 322 } 323 324 static int hyp_trace_clock_show(struct seq_file *m, void *v) 325 { 326 seq_puts(m, "[boot]\n"); 327 328 return 0; 329 } 330 DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock); 331 332 static int hyp_trace_init_tracefs(struct dentry *d, void *priv) 333 { 334 return tracefs_create_file("trace_clock", 0440, d, NULL, &hyp_trace_clock_fops) ? 335 0 : -ENOMEM; 336 } 337 338 static struct trace_remote_callbacks trace_remote_callbacks = { 339 .init = hyp_trace_init_tracefs, 340 .load_trace_buffer = hyp_trace_load, 341 .unload_trace_buffer = hyp_trace_unload, 342 .enable_tracing = hyp_trace_enable_tracing, 343 .swap_reader_page = hyp_trace_swap_reader_page, 344 .reset = hyp_trace_reset, 345 .enable_event = hyp_trace_enable_event, 346 }; 347 348 int __init kvm_hyp_trace_init(void) 349 { 350 int cpu; 351 352 if (is_kernel_in_hyp_mode()) 353 return 0; 354 355 #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND 356 for_each_possible_cpu(cpu) { 357 const struct arch_timer_erratum_workaround *wa = 358 per_cpu(timer_unstable_counter_workaround, cpu); 359 360 if (wa && wa->read_cntvct_el0) { 361 pr_warn("hyp trace can't handle CNTVCT workaround '%s'\n", wa->desc); 362 return -EOPNOTSUPP; 363 } 364 } 365 #endif 366 367 return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer, NULL, 0); 368 } 369