xref: /linux/arch/arm64/kvm/arch_timer.c (revision 7f4f3b14e8079ecde096bd734af10e30d40c27b7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 ARM Ltd.
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  */
6 
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/interrupt.h>
11 #include <linux/irq.h>
12 #include <linux/irqdomain.h>
13 #include <linux/uaccess.h>
14 
15 #include <clocksource/arm_arch_timer.h>
16 #include <asm/arch_timer.h>
17 #include <asm/kvm_emulate.h>
18 #include <asm/kvm_hyp.h>
19 #include <asm/kvm_nested.h>
20 
21 #include <kvm/arm_vgic.h>
22 #include <kvm/arm_arch_timer.h>
23 
24 #include "trace.h"
25 
26 static struct timecounter *timecounter;
27 static unsigned int host_vtimer_irq;
28 static unsigned int host_ptimer_irq;
29 static u32 host_vtimer_irq_flags;
30 static u32 host_ptimer_irq_flags;
31 
32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
33 
34 static const u8 default_ppi[] = {
35 	[TIMER_PTIMER]  = 30,
36 	[TIMER_VTIMER]  = 27,
37 	[TIMER_HPTIMER] = 26,
38 	[TIMER_HVTIMER] = 28,
39 };
40 
41 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
42 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
43 				 struct arch_timer_context *timer_ctx);
44 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
45 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
46 				struct arch_timer_context *timer,
47 				enum kvm_arch_timer_regs treg,
48 				u64 val);
49 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
50 			      struct arch_timer_context *timer,
51 			      enum kvm_arch_timer_regs treg);
52 static bool kvm_arch_timer_get_input_level(int vintid);
53 
54 static struct irq_ops arch_timer_irq_ops = {
55 	.get_input_level = kvm_arch_timer_get_input_level,
56 };
57 
58 static int nr_timers(struct kvm_vcpu *vcpu)
59 {
60 	if (!vcpu_has_nv(vcpu))
61 		return NR_KVM_EL0_TIMERS;
62 
63 	return NR_KVM_TIMERS;
64 }
65 
66 u32 timer_get_ctl(struct arch_timer_context *ctxt)
67 {
68 	struct kvm_vcpu *vcpu = ctxt->vcpu;
69 
70 	switch(arch_timer_ctx_index(ctxt)) {
71 	case TIMER_VTIMER:
72 		return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
73 	case TIMER_PTIMER:
74 		return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
75 	case TIMER_HVTIMER:
76 		return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
77 	case TIMER_HPTIMER:
78 		return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
79 	default:
80 		WARN_ON(1);
81 		return 0;
82 	}
83 }
84 
85 u64 timer_get_cval(struct arch_timer_context *ctxt)
86 {
87 	struct kvm_vcpu *vcpu = ctxt->vcpu;
88 
89 	switch(arch_timer_ctx_index(ctxt)) {
90 	case TIMER_VTIMER:
91 		return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
92 	case TIMER_PTIMER:
93 		return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
94 	case TIMER_HVTIMER:
95 		return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
96 	case TIMER_HPTIMER:
97 		return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
98 	default:
99 		WARN_ON(1);
100 		return 0;
101 	}
102 }
103 
104 static u64 timer_get_offset(struct arch_timer_context *ctxt)
105 {
106 	u64 offset = 0;
107 
108 	if (!ctxt)
109 		return 0;
110 
111 	if (ctxt->offset.vm_offset)
112 		offset += *ctxt->offset.vm_offset;
113 	if (ctxt->offset.vcpu_offset)
114 		offset += *ctxt->offset.vcpu_offset;
115 
116 	return offset;
117 }
118 
119 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
120 {
121 	struct kvm_vcpu *vcpu = ctxt->vcpu;
122 
123 	switch(arch_timer_ctx_index(ctxt)) {
124 	case TIMER_VTIMER:
125 		__vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
126 		break;
127 	case TIMER_PTIMER:
128 		__vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
129 		break;
130 	case TIMER_HVTIMER:
131 		__vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
132 		break;
133 	case TIMER_HPTIMER:
134 		__vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
135 		break;
136 	default:
137 		WARN_ON(1);
138 	}
139 }
140 
141 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
142 {
143 	struct kvm_vcpu *vcpu = ctxt->vcpu;
144 
145 	switch(arch_timer_ctx_index(ctxt)) {
146 	case TIMER_VTIMER:
147 		__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
148 		break;
149 	case TIMER_PTIMER:
150 		__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
151 		break;
152 	case TIMER_HVTIMER:
153 		__vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
154 		break;
155 	case TIMER_HPTIMER:
156 		__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
157 		break;
158 	default:
159 		WARN_ON(1);
160 	}
161 }
162 
163 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
164 {
165 	if (!ctxt->offset.vm_offset) {
166 		WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
167 		return;
168 	}
169 
170 	WRITE_ONCE(*ctxt->offset.vm_offset, offset);
171 }
172 
173 u64 kvm_phys_timer_read(void)
174 {
175 	return timecounter->cc->read(timecounter->cc);
176 }
177 
178 void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
179 {
180 	if (vcpu_has_nv(vcpu)) {
181 		if (is_hyp_ctxt(vcpu)) {
182 			map->direct_vtimer = vcpu_hvtimer(vcpu);
183 			map->direct_ptimer = vcpu_hptimer(vcpu);
184 			map->emul_vtimer = vcpu_vtimer(vcpu);
185 			map->emul_ptimer = vcpu_ptimer(vcpu);
186 		} else {
187 			map->direct_vtimer = vcpu_vtimer(vcpu);
188 			map->direct_ptimer = vcpu_ptimer(vcpu);
189 			map->emul_vtimer = vcpu_hvtimer(vcpu);
190 			map->emul_ptimer = vcpu_hptimer(vcpu);
191 		}
192 	} else if (has_vhe()) {
193 		map->direct_vtimer = vcpu_vtimer(vcpu);
194 		map->direct_ptimer = vcpu_ptimer(vcpu);
195 		map->emul_vtimer = NULL;
196 		map->emul_ptimer = NULL;
197 	} else {
198 		map->direct_vtimer = vcpu_vtimer(vcpu);
199 		map->direct_ptimer = NULL;
200 		map->emul_vtimer = NULL;
201 		map->emul_ptimer = vcpu_ptimer(vcpu);
202 	}
203 
204 	trace_kvm_get_timer_map(vcpu->vcpu_id, map);
205 }
206 
207 static inline bool userspace_irqchip(struct kvm *kvm)
208 {
209 	return unlikely(!irqchip_in_kernel(kvm));
210 }
211 
212 static void soft_timer_start(struct hrtimer *hrt, u64 ns)
213 {
214 	hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
215 		      HRTIMER_MODE_ABS_HARD);
216 }
217 
218 static void soft_timer_cancel(struct hrtimer *hrt)
219 {
220 	hrtimer_cancel(hrt);
221 }
222 
223 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
224 {
225 	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
226 	struct arch_timer_context *ctx;
227 	struct timer_map map;
228 
229 	/*
230 	 * We may see a timer interrupt after vcpu_put() has been called which
231 	 * sets the CPU's vcpu pointer to NULL, because even though the timer
232 	 * has been disabled in timer_save_state(), the hardware interrupt
233 	 * signal may not have been retired from the interrupt controller yet.
234 	 */
235 	if (!vcpu)
236 		return IRQ_HANDLED;
237 
238 	get_timer_map(vcpu, &map);
239 
240 	if (irq == host_vtimer_irq)
241 		ctx = map.direct_vtimer;
242 	else
243 		ctx = map.direct_ptimer;
244 
245 	if (kvm_timer_should_fire(ctx))
246 		kvm_timer_update_irq(vcpu, true, ctx);
247 
248 	if (userspace_irqchip(vcpu->kvm) &&
249 	    !static_branch_unlikely(&has_gic_active_state))
250 		disable_percpu_irq(host_vtimer_irq);
251 
252 	return IRQ_HANDLED;
253 }
254 
255 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
256 				     u64 val)
257 {
258 	u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
259 
260 	if (now < val) {
261 		u64 ns;
262 
263 		ns = cyclecounter_cyc2ns(timecounter->cc,
264 					 val - now,
265 					 timecounter->mask,
266 					 &timer_ctx->ns_frac);
267 		return ns;
268 	}
269 
270 	return 0;
271 }
272 
273 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
274 {
275 	return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
276 }
277 
278 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
279 {
280 	WARN_ON(timer_ctx && timer_ctx->loaded);
281 	return timer_ctx &&
282 		((timer_get_ctl(timer_ctx) &
283 		  (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
284 }
285 
286 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
287 {
288 	return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
289 		vcpu_get_flag(vcpu, IN_WFIT));
290 }
291 
292 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
293 {
294 	u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
295 	struct arch_timer_context *ctx;
296 
297 	ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
298 
299 	return kvm_counter_compute_delta(ctx, val);
300 }
301 
302 /*
303  * Returns the earliest expiration time in ns among guest timers.
304  * Note that it will return 0 if none of timers can fire.
305  */
306 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
307 {
308 	u64 min_delta = ULLONG_MAX;
309 	int i;
310 
311 	for (i = 0; i < nr_timers(vcpu); i++) {
312 		struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
313 
314 		WARN(ctx->loaded, "timer %d loaded\n", i);
315 		if (kvm_timer_irq_can_fire(ctx))
316 			min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
317 	}
318 
319 	if (vcpu_has_wfit_active(vcpu))
320 		min_delta = min(min_delta, wfit_delay_ns(vcpu));
321 
322 	/* If none of timers can fire, then return 0 */
323 	if (min_delta == ULLONG_MAX)
324 		return 0;
325 
326 	return min_delta;
327 }
328 
329 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
330 {
331 	struct arch_timer_cpu *timer;
332 	struct kvm_vcpu *vcpu;
333 	u64 ns;
334 
335 	timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
336 	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
337 
338 	/*
339 	 * Check that the timer has really expired from the guest's
340 	 * PoV (NTP on the host may have forced it to expire
341 	 * early). If we should have slept longer, restart it.
342 	 */
343 	ns = kvm_timer_earliest_exp(vcpu);
344 	if (unlikely(ns)) {
345 		hrtimer_forward_now(hrt, ns_to_ktime(ns));
346 		return HRTIMER_RESTART;
347 	}
348 
349 	kvm_vcpu_wake_up(vcpu);
350 	return HRTIMER_NORESTART;
351 }
352 
353 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
354 {
355 	struct arch_timer_context *ctx;
356 	struct kvm_vcpu *vcpu;
357 	u64 ns;
358 
359 	ctx = container_of(hrt, struct arch_timer_context, hrtimer);
360 	vcpu = ctx->vcpu;
361 
362 	trace_kvm_timer_hrtimer_expire(ctx);
363 
364 	/*
365 	 * Check that the timer has really expired from the guest's
366 	 * PoV (NTP on the host may have forced it to expire
367 	 * early). If not ready, schedule for a later time.
368 	 */
369 	ns = kvm_timer_compute_delta(ctx);
370 	if (unlikely(ns)) {
371 		hrtimer_forward_now(hrt, ns_to_ktime(ns));
372 		return HRTIMER_RESTART;
373 	}
374 
375 	kvm_timer_update_irq(vcpu, true, ctx);
376 	return HRTIMER_NORESTART;
377 }
378 
379 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
380 {
381 	enum kvm_arch_timers index;
382 	u64 cval, now;
383 
384 	if (!timer_ctx)
385 		return false;
386 
387 	index = arch_timer_ctx_index(timer_ctx);
388 
389 	if (timer_ctx->loaded) {
390 		u32 cnt_ctl = 0;
391 
392 		switch (index) {
393 		case TIMER_VTIMER:
394 		case TIMER_HVTIMER:
395 			cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
396 			break;
397 		case TIMER_PTIMER:
398 		case TIMER_HPTIMER:
399 			cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
400 			break;
401 		case NR_KVM_TIMERS:
402 			/* GCC is braindead */
403 			cnt_ctl = 0;
404 			break;
405 		}
406 
407 		return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
408 		        (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
409 		       !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
410 	}
411 
412 	if (!kvm_timer_irq_can_fire(timer_ctx))
413 		return false;
414 
415 	cval = timer_get_cval(timer_ctx);
416 	now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
417 
418 	return cval <= now;
419 }
420 
421 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
422 {
423 	return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
424 }
425 
426 /*
427  * Reflect the timer output level into the kvm_run structure
428  */
429 void kvm_timer_update_run(struct kvm_vcpu *vcpu)
430 {
431 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
432 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
433 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
434 
435 	/* Populate the device bitmap with the timer states */
436 	regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
437 				    KVM_ARM_DEV_EL1_PTIMER);
438 	if (kvm_timer_should_fire(vtimer))
439 		regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
440 	if (kvm_timer_should_fire(ptimer))
441 		regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
442 }
443 
444 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
445 				 struct arch_timer_context *timer_ctx)
446 {
447 	int ret;
448 
449 	timer_ctx->irq.level = new_level;
450 	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
451 				   timer_ctx->irq.level);
452 
453 	if (!userspace_irqchip(vcpu->kvm)) {
454 		ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
455 					  timer_irq(timer_ctx),
456 					  timer_ctx->irq.level,
457 					  timer_ctx);
458 		WARN_ON(ret);
459 	}
460 }
461 
462 /* Only called for a fully emulated timer */
463 static void timer_emulate(struct arch_timer_context *ctx)
464 {
465 	bool should_fire = kvm_timer_should_fire(ctx);
466 
467 	trace_kvm_timer_emulate(ctx, should_fire);
468 
469 	if (should_fire != ctx->irq.level) {
470 		kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
471 		return;
472 	}
473 
474 	/*
475 	 * If the timer can fire now, we don't need to have a soft timer
476 	 * scheduled for the future.  If the timer cannot fire at all,
477 	 * then we also don't need a soft timer.
478 	 */
479 	if (should_fire || !kvm_timer_irq_can_fire(ctx))
480 		return;
481 
482 	soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
483 }
484 
485 static void set_cntvoff(u64 cntvoff)
486 {
487 	kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
488 }
489 
490 static void set_cntpoff(u64 cntpoff)
491 {
492 	if (has_cntpoff())
493 		write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
494 }
495 
496 static void timer_save_state(struct arch_timer_context *ctx)
497 {
498 	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
499 	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
500 	unsigned long flags;
501 
502 	if (!timer->enabled)
503 		return;
504 
505 	local_irq_save(flags);
506 
507 	if (!ctx->loaded)
508 		goto out;
509 
510 	switch (index) {
511 		u64 cval;
512 
513 	case TIMER_VTIMER:
514 	case TIMER_HVTIMER:
515 		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
516 		timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
517 
518 		/* Disable the timer */
519 		write_sysreg_el0(0, SYS_CNTV_CTL);
520 		isb();
521 
522 		/*
523 		 * The kernel may decide to run userspace after
524 		 * calling vcpu_put, so we reset cntvoff to 0 to
525 		 * ensure a consistent read between user accesses to
526 		 * the virtual counter and kernel access to the
527 		 * physical counter of non-VHE case.
528 		 *
529 		 * For VHE, the virtual counter uses a fixed virtual
530 		 * offset of zero, so no need to zero CNTVOFF_EL2
531 		 * register, but this is actually useful when switching
532 		 * between EL1/vEL2 with NV.
533 		 *
534 		 * Do it unconditionally, as this is either unavoidable
535 		 * or dirt cheap.
536 		 */
537 		set_cntvoff(0);
538 		break;
539 	case TIMER_PTIMER:
540 	case TIMER_HPTIMER:
541 		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
542 		cval = read_sysreg_el0(SYS_CNTP_CVAL);
543 
544 		cval -= timer_get_offset(ctx);
545 
546 		timer_set_cval(ctx, cval);
547 
548 		/* Disable the timer */
549 		write_sysreg_el0(0, SYS_CNTP_CTL);
550 		isb();
551 
552 		set_cntpoff(0);
553 		break;
554 	case NR_KVM_TIMERS:
555 		BUG();
556 	}
557 
558 	trace_kvm_timer_save_state(ctx);
559 
560 	ctx->loaded = false;
561 out:
562 	local_irq_restore(flags);
563 }
564 
565 /*
566  * Schedule the background timer before calling kvm_vcpu_halt, so that this
567  * thread is removed from its waitqueue and made runnable when there's a timer
568  * interrupt to handle.
569  */
570 static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
571 {
572 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
573 	struct timer_map map;
574 
575 	get_timer_map(vcpu, &map);
576 
577 	/*
578 	 * If no timers are capable of raising interrupts (disabled or
579 	 * masked), then there's no more work for us to do.
580 	 */
581 	if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
582 	    !kvm_timer_irq_can_fire(map.direct_ptimer) &&
583 	    !kvm_timer_irq_can_fire(map.emul_vtimer) &&
584 	    !kvm_timer_irq_can_fire(map.emul_ptimer) &&
585 	    !vcpu_has_wfit_active(vcpu))
586 		return;
587 
588 	/*
589 	 * At least one guest time will expire. Schedule a background timer.
590 	 * Set the earliest expiration time among the guest timers.
591 	 */
592 	soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
593 }
594 
595 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
596 {
597 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
598 
599 	soft_timer_cancel(&timer->bg_timer);
600 }
601 
602 static void timer_restore_state(struct arch_timer_context *ctx)
603 {
604 	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
605 	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
606 	unsigned long flags;
607 
608 	if (!timer->enabled)
609 		return;
610 
611 	local_irq_save(flags);
612 
613 	if (ctx->loaded)
614 		goto out;
615 
616 	switch (index) {
617 		u64 cval, offset;
618 
619 	case TIMER_VTIMER:
620 	case TIMER_HVTIMER:
621 		set_cntvoff(timer_get_offset(ctx));
622 		write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
623 		isb();
624 		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
625 		break;
626 	case TIMER_PTIMER:
627 	case TIMER_HPTIMER:
628 		cval = timer_get_cval(ctx);
629 		offset = timer_get_offset(ctx);
630 		set_cntpoff(offset);
631 		cval += offset;
632 		write_sysreg_el0(cval, SYS_CNTP_CVAL);
633 		isb();
634 		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
635 		break;
636 	case NR_KVM_TIMERS:
637 		BUG();
638 	}
639 
640 	trace_kvm_timer_restore_state(ctx);
641 
642 	ctx->loaded = true;
643 out:
644 	local_irq_restore(flags);
645 }
646 
647 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
648 {
649 	int r;
650 	r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
651 	WARN_ON(r);
652 }
653 
654 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
655 {
656 	struct kvm_vcpu *vcpu = ctx->vcpu;
657 	bool phys_active = false;
658 
659 	/*
660 	 * Update the timer output so that it is likely to match the
661 	 * state we're about to restore. If the timer expires between
662 	 * this point and the register restoration, we'll take the
663 	 * interrupt anyway.
664 	 */
665 	kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
666 
667 	if (irqchip_in_kernel(vcpu->kvm))
668 		phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
669 
670 	phys_active |= ctx->irq.level;
671 
672 	set_timer_irq_phys_active(ctx, phys_active);
673 }
674 
675 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
676 {
677 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
678 
679 	/*
680 	 * Update the timer output so that it is likely to match the
681 	 * state we're about to restore. If the timer expires between
682 	 * this point and the register restoration, we'll take the
683 	 * interrupt anyway.
684 	 */
685 	kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
686 
687 	/*
688 	 * When using a userspace irqchip with the architected timers and a
689 	 * host interrupt controller that doesn't support an active state, we
690 	 * must still prevent continuously exiting from the guest, and
691 	 * therefore mask the physical interrupt by disabling it on the host
692 	 * interrupt controller when the virtual level is high, such that the
693 	 * guest can make forward progress.  Once we detect the output level
694 	 * being de-asserted, we unmask the interrupt again so that we exit
695 	 * from the guest when the timer fires.
696 	 */
697 	if (vtimer->irq.level)
698 		disable_percpu_irq(host_vtimer_irq);
699 	else
700 		enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
701 }
702 
703 /* If _pred is true, set bit in _set, otherwise set it in _clr */
704 #define assign_clear_set_bit(_pred, _bit, _clr, _set)			\
705 	do {								\
706 		if (_pred)						\
707 			(_set) |= (_bit);				\
708 		else							\
709 			(_clr) |= (_bit);				\
710 	} while (0)
711 
712 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
713 					      struct timer_map *map)
714 {
715 	int hw, ret;
716 
717 	if (!irqchip_in_kernel(vcpu->kvm))
718 		return;
719 
720 	/*
721 	 * We only ever unmap the vtimer irq on a VHE system that runs nested
722 	 * virtualization, in which case we have both a valid emul_vtimer,
723 	 * emul_ptimer, direct_vtimer, and direct_ptimer.
724 	 *
725 	 * Since this is called from kvm_timer_vcpu_load(), a change between
726 	 * vEL2 and vEL1/0 will have just happened, and the timer_map will
727 	 * represent this, and therefore we switch the emul/direct mappings
728 	 * below.
729 	 */
730 	hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
731 	if (hw < 0) {
732 		kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
733 		kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
734 
735 		ret = kvm_vgic_map_phys_irq(vcpu,
736 					    map->direct_vtimer->host_timer_irq,
737 					    timer_irq(map->direct_vtimer),
738 					    &arch_timer_irq_ops);
739 		WARN_ON_ONCE(ret);
740 		ret = kvm_vgic_map_phys_irq(vcpu,
741 					    map->direct_ptimer->host_timer_irq,
742 					    timer_irq(map->direct_ptimer),
743 					    &arch_timer_irq_ops);
744 		WARN_ON_ONCE(ret);
745 
746 		/*
747 		 * The virtual offset behaviour is "interesting", as it
748 		 * always applies when HCR_EL2.E2H==0, but only when
749 		 * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
750 		 * track E2H when putting the HV timer in "direct" mode.
751 		 */
752 		if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
753 			struct arch_timer_offset *offs = &map->direct_vtimer->offset;
754 
755 			if (vcpu_el2_e2h_is_set(vcpu))
756 				offs->vcpu_offset = NULL;
757 			else
758 				offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
759 		}
760 	}
761 }
762 
763 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
764 {
765 	bool tpt, tpc;
766 	u64 clr, set;
767 
768 	/*
769 	 * No trapping gets configured here with nVHE. See
770 	 * __timer_enable_traps(), which is where the stuff happens.
771 	 */
772 	if (!has_vhe())
773 		return;
774 
775 	/*
776 	 * Our default policy is not to trap anything. As we progress
777 	 * within this function, reality kicks in and we start adding
778 	 * traps based on emulation requirements.
779 	 */
780 	tpt = tpc = false;
781 
782 	/*
783 	 * We have two possibility to deal with a physical offset:
784 	 *
785 	 * - Either we have CNTPOFF (yay!) or the offset is 0:
786 	 *   we let the guest freely access the HW
787 	 *
788 	 * - or neither of these condition apply:
789 	 *   we trap accesses to the HW, but still use it
790 	 *   after correcting the physical offset
791 	 */
792 	if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
793 		tpt = tpc = true;
794 
795 	/*
796 	 * Apply the enable bits that the guest hypervisor has requested for
797 	 * its own guest. We can only add traps that wouldn't have been set
798 	 * above.
799 	 */
800 	if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
801 		u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
802 
803 		/* Use the VHE format for mental sanity */
804 		if (!vcpu_el2_e2h_is_set(vcpu))
805 			val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
806 
807 		tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
808 		tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
809 	}
810 
811 	/*
812 	 * Now that we have collected our requirements, compute the
813 	 * trap and enable bits.
814 	 */
815 	set = 0;
816 	clr = 0;
817 
818 	assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
819 	assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
820 
821 	/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
822 	sysreg_clear_set(cnthctl_el2, clr, set);
823 }
824 
825 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
826 {
827 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
828 	struct timer_map map;
829 
830 	if (unlikely(!timer->enabled))
831 		return;
832 
833 	get_timer_map(vcpu, &map);
834 
835 	if (static_branch_likely(&has_gic_active_state)) {
836 		if (vcpu_has_nv(vcpu))
837 			kvm_timer_vcpu_load_nested_switch(vcpu, &map);
838 
839 		kvm_timer_vcpu_load_gic(map.direct_vtimer);
840 		if (map.direct_ptimer)
841 			kvm_timer_vcpu_load_gic(map.direct_ptimer);
842 	} else {
843 		kvm_timer_vcpu_load_nogic(vcpu);
844 	}
845 
846 	kvm_timer_unblocking(vcpu);
847 
848 	timer_restore_state(map.direct_vtimer);
849 	if (map.direct_ptimer)
850 		timer_restore_state(map.direct_ptimer);
851 	if (map.emul_vtimer)
852 		timer_emulate(map.emul_vtimer);
853 	if (map.emul_ptimer)
854 		timer_emulate(map.emul_ptimer);
855 
856 	timer_set_traps(vcpu, &map);
857 }
858 
859 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
860 {
861 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
862 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
863 	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
864 	bool vlevel, plevel;
865 
866 	if (likely(irqchip_in_kernel(vcpu->kvm)))
867 		return false;
868 
869 	vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
870 	plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
871 
872 	return kvm_timer_should_fire(vtimer) != vlevel ||
873 	       kvm_timer_should_fire(ptimer) != plevel;
874 }
875 
876 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
877 {
878 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
879 	struct timer_map map;
880 
881 	if (unlikely(!timer->enabled))
882 		return;
883 
884 	get_timer_map(vcpu, &map);
885 
886 	timer_save_state(map.direct_vtimer);
887 	if (map.direct_ptimer)
888 		timer_save_state(map.direct_ptimer);
889 
890 	/*
891 	 * Cancel soft timer emulation, because the only case where we
892 	 * need it after a vcpu_put is in the context of a sleeping VCPU, and
893 	 * in that case we already factor in the deadline for the physical
894 	 * timer when scheduling the bg_timer.
895 	 *
896 	 * In any case, we re-schedule the hrtimer for the physical timer when
897 	 * coming back to the VCPU thread in kvm_timer_vcpu_load().
898 	 */
899 	if (map.emul_vtimer)
900 		soft_timer_cancel(&map.emul_vtimer->hrtimer);
901 	if (map.emul_ptimer)
902 		soft_timer_cancel(&map.emul_ptimer->hrtimer);
903 
904 	if (kvm_vcpu_is_blocking(vcpu))
905 		kvm_timer_blocking(vcpu);
906 }
907 
908 /*
909  * With a userspace irqchip we have to check if the guest de-asserted the
910  * timer and if so, unmask the timer irq signal on the host interrupt
911  * controller to ensure that we see future timer signals.
912  */
913 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
914 {
915 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
916 
917 	if (!kvm_timer_should_fire(vtimer)) {
918 		kvm_timer_update_irq(vcpu, false, vtimer);
919 		if (static_branch_likely(&has_gic_active_state))
920 			set_timer_irq_phys_active(vtimer, false);
921 		else
922 			enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
923 	}
924 }
925 
926 void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
927 {
928 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
929 
930 	if (unlikely(!timer->enabled))
931 		return;
932 
933 	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
934 		unmask_vtimer_irq_user(vcpu);
935 }
936 
937 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
938 {
939 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
940 	struct timer_map map;
941 
942 	get_timer_map(vcpu, &map);
943 
944 	/*
945 	 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
946 	 * and to 0 for ARMv7.  We provide an implementation that always
947 	 * resets the timer to be disabled and unmasked and is compliant with
948 	 * the ARMv7 architecture.
949 	 */
950 	for (int i = 0; i < nr_timers(vcpu); i++)
951 		timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
952 
953 	/*
954 	 * A vcpu running at EL2 is in charge of the offset applied to
955 	 * the virtual timer, so use the physical VM offset, and point
956 	 * the vcpu offset to CNTVOFF_EL2.
957 	 */
958 	if (vcpu_has_nv(vcpu)) {
959 		struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
960 
961 		offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
962 		offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
963 	}
964 
965 	if (timer->enabled) {
966 		for (int i = 0; i < nr_timers(vcpu); i++)
967 			kvm_timer_update_irq(vcpu, false,
968 					     vcpu_get_timer(vcpu, i));
969 
970 		if (irqchip_in_kernel(vcpu->kvm)) {
971 			kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
972 			if (map.direct_ptimer)
973 				kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
974 		}
975 	}
976 
977 	if (map.emul_vtimer)
978 		soft_timer_cancel(&map.emul_vtimer->hrtimer);
979 	if (map.emul_ptimer)
980 		soft_timer_cancel(&map.emul_ptimer->hrtimer);
981 }
982 
983 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
984 {
985 	struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
986 	struct kvm *kvm = vcpu->kvm;
987 
988 	ctxt->vcpu = vcpu;
989 
990 	if (timerid == TIMER_VTIMER)
991 		ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
992 	else
993 		ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
994 
995 	hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
996 	ctxt->hrtimer.function = kvm_hrtimer_expire;
997 
998 	switch (timerid) {
999 	case TIMER_PTIMER:
1000 	case TIMER_HPTIMER:
1001 		ctxt->host_timer_irq = host_ptimer_irq;
1002 		break;
1003 	case TIMER_VTIMER:
1004 	case TIMER_HVTIMER:
1005 		ctxt->host_timer_irq = host_vtimer_irq;
1006 		break;
1007 	}
1008 }
1009 
1010 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
1011 {
1012 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1013 
1014 	for (int i = 0; i < NR_KVM_TIMERS; i++)
1015 		timer_context_init(vcpu, i);
1016 
1017 	/* Synchronize offsets across timers of a VM if not already provided */
1018 	if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
1019 		timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
1020 		timer_set_offset(vcpu_ptimer(vcpu), 0);
1021 	}
1022 
1023 	hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1024 	timer->bg_timer.function = kvm_bg_timer_expire;
1025 }
1026 
1027 void kvm_timer_init_vm(struct kvm *kvm)
1028 {
1029 	for (int i = 0; i < NR_KVM_TIMERS; i++)
1030 		kvm->arch.timer_data.ppi[i] = default_ppi[i];
1031 }
1032 
1033 void kvm_timer_cpu_up(void)
1034 {
1035 	enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1036 	if (host_ptimer_irq)
1037 		enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
1038 }
1039 
1040 void kvm_timer_cpu_down(void)
1041 {
1042 	disable_percpu_irq(host_vtimer_irq);
1043 	if (host_ptimer_irq)
1044 		disable_percpu_irq(host_ptimer_irq);
1045 }
1046 
1047 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
1048 {
1049 	struct arch_timer_context *timer;
1050 
1051 	switch (regid) {
1052 	case KVM_REG_ARM_TIMER_CTL:
1053 		timer = vcpu_vtimer(vcpu);
1054 		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1055 		break;
1056 	case KVM_REG_ARM_TIMER_CNT:
1057 		if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1058 			      &vcpu->kvm->arch.flags)) {
1059 			timer = vcpu_vtimer(vcpu);
1060 			timer_set_offset(timer, kvm_phys_timer_read() - value);
1061 		}
1062 		break;
1063 	case KVM_REG_ARM_TIMER_CVAL:
1064 		timer = vcpu_vtimer(vcpu);
1065 		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1066 		break;
1067 	case KVM_REG_ARM_PTIMER_CTL:
1068 		timer = vcpu_ptimer(vcpu);
1069 		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1070 		break;
1071 	case KVM_REG_ARM_PTIMER_CNT:
1072 		if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1073 			      &vcpu->kvm->arch.flags)) {
1074 			timer = vcpu_ptimer(vcpu);
1075 			timer_set_offset(timer, kvm_phys_timer_read() - value);
1076 		}
1077 		break;
1078 	case KVM_REG_ARM_PTIMER_CVAL:
1079 		timer = vcpu_ptimer(vcpu);
1080 		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1081 		break;
1082 
1083 	default:
1084 		return -1;
1085 	}
1086 
1087 	return 0;
1088 }
1089 
1090 static u64 read_timer_ctl(struct arch_timer_context *timer)
1091 {
1092 	/*
1093 	 * Set ISTATUS bit if it's expired.
1094 	 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
1095 	 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
1096 	 * regardless of ENABLE bit for our implementation convenience.
1097 	 */
1098 	u32 ctl = timer_get_ctl(timer);
1099 
1100 	if (!kvm_timer_compute_delta(timer))
1101 		ctl |= ARCH_TIMER_CTRL_IT_STAT;
1102 
1103 	return ctl;
1104 }
1105 
1106 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
1107 {
1108 	switch (regid) {
1109 	case KVM_REG_ARM_TIMER_CTL:
1110 		return kvm_arm_timer_read(vcpu,
1111 					  vcpu_vtimer(vcpu), TIMER_REG_CTL);
1112 	case KVM_REG_ARM_TIMER_CNT:
1113 		return kvm_arm_timer_read(vcpu,
1114 					  vcpu_vtimer(vcpu), TIMER_REG_CNT);
1115 	case KVM_REG_ARM_TIMER_CVAL:
1116 		return kvm_arm_timer_read(vcpu,
1117 					  vcpu_vtimer(vcpu), TIMER_REG_CVAL);
1118 	case KVM_REG_ARM_PTIMER_CTL:
1119 		return kvm_arm_timer_read(vcpu,
1120 					  vcpu_ptimer(vcpu), TIMER_REG_CTL);
1121 	case KVM_REG_ARM_PTIMER_CNT:
1122 		return kvm_arm_timer_read(vcpu,
1123 					  vcpu_ptimer(vcpu), TIMER_REG_CNT);
1124 	case KVM_REG_ARM_PTIMER_CVAL:
1125 		return kvm_arm_timer_read(vcpu,
1126 					  vcpu_ptimer(vcpu), TIMER_REG_CVAL);
1127 	}
1128 	return (u64)-1;
1129 }
1130 
1131 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
1132 			      struct arch_timer_context *timer,
1133 			      enum kvm_arch_timer_regs treg)
1134 {
1135 	u64 val;
1136 
1137 	switch (treg) {
1138 	case TIMER_REG_TVAL:
1139 		val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
1140 		val = lower_32_bits(val);
1141 		break;
1142 
1143 	case TIMER_REG_CTL:
1144 		val = read_timer_ctl(timer);
1145 		break;
1146 
1147 	case TIMER_REG_CVAL:
1148 		val = timer_get_cval(timer);
1149 		break;
1150 
1151 	case TIMER_REG_CNT:
1152 		val = kvm_phys_timer_read() - timer_get_offset(timer);
1153 		break;
1154 
1155 	case TIMER_REG_VOFF:
1156 		val = *timer->offset.vcpu_offset;
1157 		break;
1158 
1159 	default:
1160 		BUG();
1161 	}
1162 
1163 	return val;
1164 }
1165 
1166 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
1167 			      enum kvm_arch_timers tmr,
1168 			      enum kvm_arch_timer_regs treg)
1169 {
1170 	struct arch_timer_context *timer;
1171 	struct timer_map map;
1172 	u64 val;
1173 
1174 	get_timer_map(vcpu, &map);
1175 	timer = vcpu_get_timer(vcpu, tmr);
1176 
1177 	if (timer == map.emul_vtimer || timer == map.emul_ptimer)
1178 		return kvm_arm_timer_read(vcpu, timer, treg);
1179 
1180 	preempt_disable();
1181 	timer_save_state(timer);
1182 
1183 	val = kvm_arm_timer_read(vcpu, timer, treg);
1184 
1185 	timer_restore_state(timer);
1186 	preempt_enable();
1187 
1188 	return val;
1189 }
1190 
1191 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
1192 				struct arch_timer_context *timer,
1193 				enum kvm_arch_timer_regs treg,
1194 				u64 val)
1195 {
1196 	switch (treg) {
1197 	case TIMER_REG_TVAL:
1198 		timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
1199 		break;
1200 
1201 	case TIMER_REG_CTL:
1202 		timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
1203 		break;
1204 
1205 	case TIMER_REG_CVAL:
1206 		timer_set_cval(timer, val);
1207 		break;
1208 
1209 	case TIMER_REG_VOFF:
1210 		*timer->offset.vcpu_offset = val;
1211 		break;
1212 
1213 	default:
1214 		BUG();
1215 	}
1216 }
1217 
1218 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
1219 				enum kvm_arch_timers tmr,
1220 				enum kvm_arch_timer_regs treg,
1221 				u64 val)
1222 {
1223 	struct arch_timer_context *timer;
1224 	struct timer_map map;
1225 
1226 	get_timer_map(vcpu, &map);
1227 	timer = vcpu_get_timer(vcpu, tmr);
1228 	if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
1229 		soft_timer_cancel(&timer->hrtimer);
1230 		kvm_arm_timer_write(vcpu, timer, treg, val);
1231 		timer_emulate(timer);
1232 	} else {
1233 		preempt_disable();
1234 		timer_save_state(timer);
1235 		kvm_arm_timer_write(vcpu, timer, treg, val);
1236 		timer_restore_state(timer);
1237 		preempt_enable();
1238 	}
1239 }
1240 
1241 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
1242 {
1243 	if (vcpu)
1244 		irqd_set_forwarded_to_vcpu(d);
1245 	else
1246 		irqd_clr_forwarded_to_vcpu(d);
1247 
1248 	return 0;
1249 }
1250 
1251 static int timer_irq_set_irqchip_state(struct irq_data *d,
1252 				       enum irqchip_irq_state which, bool val)
1253 {
1254 	if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
1255 		return irq_chip_set_parent_state(d, which, val);
1256 
1257 	if (val)
1258 		irq_chip_mask_parent(d);
1259 	else
1260 		irq_chip_unmask_parent(d);
1261 
1262 	return 0;
1263 }
1264 
1265 static void timer_irq_eoi(struct irq_data *d)
1266 {
1267 	if (!irqd_is_forwarded_to_vcpu(d))
1268 		irq_chip_eoi_parent(d);
1269 }
1270 
1271 static void timer_irq_ack(struct irq_data *d)
1272 {
1273 	d = d->parent_data;
1274 	if (d->chip->irq_ack)
1275 		d->chip->irq_ack(d);
1276 }
1277 
1278 static struct irq_chip timer_chip = {
1279 	.name			= "KVM",
1280 	.irq_ack		= timer_irq_ack,
1281 	.irq_mask		= irq_chip_mask_parent,
1282 	.irq_unmask		= irq_chip_unmask_parent,
1283 	.irq_eoi		= timer_irq_eoi,
1284 	.irq_set_type		= irq_chip_set_type_parent,
1285 	.irq_set_vcpu_affinity	= timer_irq_set_vcpu_affinity,
1286 	.irq_set_irqchip_state	= timer_irq_set_irqchip_state,
1287 };
1288 
1289 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1290 				  unsigned int nr_irqs, void *arg)
1291 {
1292 	irq_hw_number_t hwirq = (uintptr_t)arg;
1293 
1294 	return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
1295 					     &timer_chip, NULL);
1296 }
1297 
1298 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
1299 				  unsigned int nr_irqs)
1300 {
1301 }
1302 
1303 static const struct irq_domain_ops timer_domain_ops = {
1304 	.alloc	= timer_irq_domain_alloc,
1305 	.free	= timer_irq_domain_free,
1306 };
1307 
1308 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
1309 {
1310 	*flags = irq_get_trigger_type(virq);
1311 	if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
1312 		kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
1313 			virq);
1314 		*flags = IRQF_TRIGGER_LOW;
1315 	}
1316 }
1317 
1318 static int kvm_irq_init(struct arch_timer_kvm_info *info)
1319 {
1320 	struct irq_domain *domain = NULL;
1321 
1322 	if (info->virtual_irq <= 0) {
1323 		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
1324 			info->virtual_irq);
1325 		return -ENODEV;
1326 	}
1327 
1328 	host_vtimer_irq = info->virtual_irq;
1329 	kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
1330 
1331 	if (kvm_vgic_global_state.no_hw_deactivation) {
1332 		struct fwnode_handle *fwnode;
1333 		struct irq_data *data;
1334 
1335 		fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
1336 		if (!fwnode)
1337 			return -ENOMEM;
1338 
1339 		/* Assume both vtimer and ptimer in the same parent */
1340 		data = irq_get_irq_data(host_vtimer_irq);
1341 		domain = irq_domain_create_hierarchy(data->domain, 0,
1342 						     NR_KVM_TIMERS, fwnode,
1343 						     &timer_domain_ops, NULL);
1344 		if (!domain) {
1345 			irq_domain_free_fwnode(fwnode);
1346 			return -ENOMEM;
1347 		}
1348 
1349 		arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
1350 		WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
1351 					    (void *)TIMER_VTIMER));
1352 	}
1353 
1354 	if (info->physical_irq > 0) {
1355 		host_ptimer_irq = info->physical_irq;
1356 		kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
1357 
1358 		if (domain)
1359 			WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
1360 						    (void *)TIMER_PTIMER));
1361 	}
1362 
1363 	return 0;
1364 }
1365 
1366 int __init kvm_timer_hyp_init(bool has_gic)
1367 {
1368 	struct arch_timer_kvm_info *info;
1369 	int err;
1370 
1371 	info = arch_timer_get_kvm_info();
1372 	timecounter = &info->timecounter;
1373 
1374 	if (!timecounter->cc) {
1375 		kvm_err("kvm_arch_timer: uninitialized timecounter\n");
1376 		return -ENODEV;
1377 	}
1378 
1379 	err = kvm_irq_init(info);
1380 	if (err)
1381 		return err;
1382 
1383 	/* First, do the virtual EL1 timer irq */
1384 
1385 	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
1386 				 "kvm guest vtimer", kvm_get_running_vcpus());
1387 	if (err) {
1388 		kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
1389 			host_vtimer_irq, err);
1390 		return err;
1391 	}
1392 
1393 	if (has_gic) {
1394 		err = irq_set_vcpu_affinity(host_vtimer_irq,
1395 					    kvm_get_running_vcpus());
1396 		if (err) {
1397 			kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1398 			goto out_free_vtimer_irq;
1399 		}
1400 
1401 		static_branch_enable(&has_gic_active_state);
1402 	}
1403 
1404 	kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
1405 
1406 	/* Now let's do the physical EL1 timer irq */
1407 
1408 	if (info->physical_irq > 0) {
1409 		err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
1410 					 "kvm guest ptimer", kvm_get_running_vcpus());
1411 		if (err) {
1412 			kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
1413 				host_ptimer_irq, err);
1414 			goto out_free_vtimer_irq;
1415 		}
1416 
1417 		if (has_gic) {
1418 			err = irq_set_vcpu_affinity(host_ptimer_irq,
1419 						    kvm_get_running_vcpus());
1420 			if (err) {
1421 				kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1422 				goto out_free_ptimer_irq;
1423 			}
1424 		}
1425 
1426 		kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
1427 	} else if (has_vhe()) {
1428 		kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
1429 			info->physical_irq);
1430 		err = -ENODEV;
1431 		goto out_free_vtimer_irq;
1432 	}
1433 
1434 	return 0;
1435 
1436 out_free_ptimer_irq:
1437 	if (info->physical_irq > 0)
1438 		free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
1439 out_free_vtimer_irq:
1440 	free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
1441 	return err;
1442 }
1443 
1444 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
1445 {
1446 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1447 
1448 	soft_timer_cancel(&timer->bg_timer);
1449 }
1450 
1451 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
1452 {
1453 	u32 ppis = 0;
1454 	bool valid;
1455 
1456 	mutex_lock(&vcpu->kvm->arch.config_lock);
1457 
1458 	for (int i = 0; i < nr_timers(vcpu); i++) {
1459 		struct arch_timer_context *ctx;
1460 		int irq;
1461 
1462 		ctx = vcpu_get_timer(vcpu, i);
1463 		irq = timer_irq(ctx);
1464 		if (kvm_vgic_set_owner(vcpu, irq, ctx))
1465 			break;
1466 
1467 		/*
1468 		 * We know by construction that we only have PPIs, so
1469 		 * all values are less than 32.
1470 		 */
1471 		ppis |= BIT(irq);
1472 	}
1473 
1474 	valid = hweight32(ppis) == nr_timers(vcpu);
1475 
1476 	if (valid)
1477 		set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
1478 
1479 	mutex_unlock(&vcpu->kvm->arch.config_lock);
1480 
1481 	return valid;
1482 }
1483 
1484 static bool kvm_arch_timer_get_input_level(int vintid)
1485 {
1486 	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
1487 
1488 	if (WARN(!vcpu, "No vcpu context!\n"))
1489 		return false;
1490 
1491 	for (int i = 0; i < nr_timers(vcpu); i++) {
1492 		struct arch_timer_context *ctx;
1493 
1494 		ctx = vcpu_get_timer(vcpu, i);
1495 		if (timer_irq(ctx) == vintid)
1496 			return kvm_timer_should_fire(ctx);
1497 	}
1498 
1499 	/* A timer IRQ has fired, but no matching timer was found? */
1500 	WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
1501 
1502 	return false;
1503 }
1504 
1505 int kvm_timer_enable(struct kvm_vcpu *vcpu)
1506 {
1507 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1508 	struct timer_map map;
1509 	int ret;
1510 
1511 	if (timer->enabled)
1512 		return 0;
1513 
1514 	/* Without a VGIC we do not map virtual IRQs to physical IRQs */
1515 	if (!irqchip_in_kernel(vcpu->kvm))
1516 		goto no_vgic;
1517 
1518 	/*
1519 	 * At this stage, we have the guarantee that the vgic is both
1520 	 * available and initialized.
1521 	 */
1522 	if (!timer_irqs_are_valid(vcpu)) {
1523 		kvm_debug("incorrectly configured timer irqs\n");
1524 		return -EINVAL;
1525 	}
1526 
1527 	get_timer_map(vcpu, &map);
1528 
1529 	ret = kvm_vgic_map_phys_irq(vcpu,
1530 				    map.direct_vtimer->host_timer_irq,
1531 				    timer_irq(map.direct_vtimer),
1532 				    &arch_timer_irq_ops);
1533 	if (ret)
1534 		return ret;
1535 
1536 	if (map.direct_ptimer) {
1537 		ret = kvm_vgic_map_phys_irq(vcpu,
1538 					    map.direct_ptimer->host_timer_irq,
1539 					    timer_irq(map.direct_ptimer),
1540 					    &arch_timer_irq_ops);
1541 	}
1542 
1543 	if (ret)
1544 		return ret;
1545 
1546 no_vgic:
1547 	timer->enabled = 1;
1548 	return 0;
1549 }
1550 
1551 /* If we have CNTPOFF, permanently set ECV to enable it */
1552 void kvm_timer_init_vhe(void)
1553 {
1554 	if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
1555 		sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
1556 }
1557 
1558 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1559 {
1560 	int __user *uaddr = (int __user *)(long)attr->addr;
1561 	int irq, idx, ret = 0;
1562 
1563 	if (!irqchip_in_kernel(vcpu->kvm))
1564 		return -EINVAL;
1565 
1566 	if (get_user(irq, uaddr))
1567 		return -EFAULT;
1568 
1569 	if (!(irq_is_ppi(irq)))
1570 		return -EINVAL;
1571 
1572 	mutex_lock(&vcpu->kvm->arch.config_lock);
1573 
1574 	if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
1575 		     &vcpu->kvm->arch.flags)) {
1576 		ret = -EBUSY;
1577 		goto out;
1578 	}
1579 
1580 	switch (attr->attr) {
1581 	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1582 		idx = TIMER_VTIMER;
1583 		break;
1584 	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1585 		idx = TIMER_PTIMER;
1586 		break;
1587 	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1588 		idx = TIMER_HVTIMER;
1589 		break;
1590 	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1591 		idx = TIMER_HPTIMER;
1592 		break;
1593 	default:
1594 		ret = -ENXIO;
1595 		goto out;
1596 	}
1597 
1598 	/*
1599 	 * We cannot validate the IRQ unicity before we run, so take it at
1600 	 * face value. The verdict will be given on first vcpu run, for each
1601 	 * vcpu. Yes this is late. Blame it on the stupid API.
1602 	 */
1603 	vcpu->kvm->arch.timer_data.ppi[idx] = irq;
1604 
1605 out:
1606 	mutex_unlock(&vcpu->kvm->arch.config_lock);
1607 	return ret;
1608 }
1609 
1610 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1611 {
1612 	int __user *uaddr = (int __user *)(long)attr->addr;
1613 	struct arch_timer_context *timer;
1614 	int irq;
1615 
1616 	switch (attr->attr) {
1617 	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1618 		timer = vcpu_vtimer(vcpu);
1619 		break;
1620 	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1621 		timer = vcpu_ptimer(vcpu);
1622 		break;
1623 	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1624 		timer = vcpu_hvtimer(vcpu);
1625 		break;
1626 	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1627 		timer = vcpu_hptimer(vcpu);
1628 		break;
1629 	default:
1630 		return -ENXIO;
1631 	}
1632 
1633 	irq = timer_irq(timer);
1634 	return put_user(irq, uaddr);
1635 }
1636 
1637 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1638 {
1639 	switch (attr->attr) {
1640 	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1641 	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1642 	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1643 	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1644 		return 0;
1645 	}
1646 
1647 	return -ENXIO;
1648 }
1649 
1650 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1651 				    struct kvm_arm_counter_offset *offset)
1652 {
1653 	int ret = 0;
1654 
1655 	if (offset->reserved)
1656 		return -EINVAL;
1657 
1658 	mutex_lock(&kvm->lock);
1659 
1660 	if (lock_all_vcpus(kvm)) {
1661 		set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
1662 
1663 		/*
1664 		 * If userspace decides to set the offset using this
1665 		 * API rather than merely restoring the counter
1666 		 * values, the offset applies to both the virtual and
1667 		 * physical views.
1668 		 */
1669 		kvm->arch.timer_data.voffset = offset->counter_offset;
1670 		kvm->arch.timer_data.poffset = offset->counter_offset;
1671 
1672 		unlock_all_vcpus(kvm);
1673 	} else {
1674 		ret = -EBUSY;
1675 	}
1676 
1677 	mutex_unlock(&kvm->lock);
1678 
1679 	return ret;
1680 }
1681