xref: /linux/arch/arm64/kvm/arch_timer.c (revision adc4fb9c814b5d5cc6021022900fd5eb0b3c8165)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 2012 ARM Ltd.
4   * Author: Marc Zyngier <marc.zyngier@arm.com>
5   */
6  
7  #include <linux/cpu.h>
8  #include <linux/kvm.h>
9  #include <linux/kvm_host.h>
10  #include <linux/interrupt.h>
11  #include <linux/irq.h>
12  #include <linux/irqdomain.h>
13  #include <linux/uaccess.h>
14  
15  #include <clocksource/arm_arch_timer.h>
16  #include <asm/arch_timer.h>
17  #include <asm/kvm_emulate.h>
18  #include <asm/kvm_hyp.h>
19  #include <asm/kvm_nested.h>
20  
21  #include <kvm/arm_vgic.h>
22  #include <kvm/arm_arch_timer.h>
23  
24  #include "trace.h"
25  
26  static struct timecounter *timecounter;
27  static unsigned int host_vtimer_irq;
28  static unsigned int host_ptimer_irq;
29  static u32 host_vtimer_irq_flags;
30  static u32 host_ptimer_irq_flags;
31  
32  static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
33  DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
34  
35  static const u8 default_ppi[] = {
36  	[TIMER_PTIMER]  = 30,
37  	[TIMER_VTIMER]  = 27,
38  	[TIMER_HPTIMER] = 26,
39  	[TIMER_HVTIMER] = 28,
40  };
41  
42  static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
43  static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
44  				 struct arch_timer_context *timer_ctx);
45  static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
46  static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
47  				struct arch_timer_context *timer,
48  				enum kvm_arch_timer_regs treg,
49  				u64 val);
50  static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
51  			      struct arch_timer_context *timer,
52  			      enum kvm_arch_timer_regs treg);
53  static bool kvm_arch_timer_get_input_level(int vintid);
54  
55  static struct irq_ops arch_timer_irq_ops = {
56  	.get_input_level = kvm_arch_timer_get_input_level,
57  };
58  
nr_timers(struct kvm_vcpu * vcpu)59  static int nr_timers(struct kvm_vcpu *vcpu)
60  {
61  	if (!vcpu_has_nv(vcpu))
62  		return NR_KVM_EL0_TIMERS;
63  
64  	return NR_KVM_TIMERS;
65  }
66  
timer_get_ctl(struct arch_timer_context * ctxt)67  u32 timer_get_ctl(struct arch_timer_context *ctxt)
68  {
69  	struct kvm_vcpu *vcpu = ctxt->vcpu;
70  
71  	switch(arch_timer_ctx_index(ctxt)) {
72  	case TIMER_VTIMER:
73  		return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
74  	case TIMER_PTIMER:
75  		return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
76  	case TIMER_HVTIMER:
77  		return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
78  	case TIMER_HPTIMER:
79  		return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
80  	default:
81  		WARN_ON(1);
82  		return 0;
83  	}
84  }
85  
timer_get_cval(struct arch_timer_context * ctxt)86  u64 timer_get_cval(struct arch_timer_context *ctxt)
87  {
88  	struct kvm_vcpu *vcpu = ctxt->vcpu;
89  
90  	switch(arch_timer_ctx_index(ctxt)) {
91  	case TIMER_VTIMER:
92  		return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
93  	case TIMER_PTIMER:
94  		return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
95  	case TIMER_HVTIMER:
96  		return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
97  	case TIMER_HPTIMER:
98  		return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
99  	default:
100  		WARN_ON(1);
101  		return 0;
102  	}
103  }
104  
timer_set_ctl(struct arch_timer_context * ctxt,u32 ctl)105  static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
106  {
107  	struct kvm_vcpu *vcpu = ctxt->vcpu;
108  
109  	switch(arch_timer_ctx_index(ctxt)) {
110  	case TIMER_VTIMER:
111  		__vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
112  		break;
113  	case TIMER_PTIMER:
114  		__vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
115  		break;
116  	case TIMER_HVTIMER:
117  		__vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
118  		break;
119  	case TIMER_HPTIMER:
120  		__vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
121  		break;
122  	default:
123  		WARN_ON(1);
124  	}
125  }
126  
timer_set_cval(struct arch_timer_context * ctxt,u64 cval)127  static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
128  {
129  	struct kvm_vcpu *vcpu = ctxt->vcpu;
130  
131  	switch(arch_timer_ctx_index(ctxt)) {
132  	case TIMER_VTIMER:
133  		__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
134  		break;
135  	case TIMER_PTIMER:
136  		__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
137  		break;
138  	case TIMER_HVTIMER:
139  		__vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
140  		break;
141  	case TIMER_HPTIMER:
142  		__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
143  		break;
144  	default:
145  		WARN_ON(1);
146  	}
147  }
148  
timer_set_offset(struct arch_timer_context * ctxt,u64 offset)149  static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
150  {
151  	if (!ctxt->offset.vm_offset) {
152  		WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
153  		return;
154  	}
155  
156  	WRITE_ONCE(*ctxt->offset.vm_offset, offset);
157  }
158  
kvm_phys_timer_read(void)159  u64 kvm_phys_timer_read(void)
160  {
161  	return timecounter->cc->read(timecounter->cc);
162  }
163  
get_timer_map(struct kvm_vcpu * vcpu,struct timer_map * map)164  void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
165  {
166  	if (vcpu_has_nv(vcpu)) {
167  		if (is_hyp_ctxt(vcpu)) {
168  			map->direct_vtimer = vcpu_hvtimer(vcpu);
169  			map->direct_ptimer = vcpu_hptimer(vcpu);
170  			map->emul_vtimer = vcpu_vtimer(vcpu);
171  			map->emul_ptimer = vcpu_ptimer(vcpu);
172  		} else {
173  			map->direct_vtimer = vcpu_vtimer(vcpu);
174  			map->direct_ptimer = vcpu_ptimer(vcpu);
175  			map->emul_vtimer = vcpu_hvtimer(vcpu);
176  			map->emul_ptimer = vcpu_hptimer(vcpu);
177  		}
178  	} else if (has_vhe()) {
179  		map->direct_vtimer = vcpu_vtimer(vcpu);
180  		map->direct_ptimer = vcpu_ptimer(vcpu);
181  		map->emul_vtimer = NULL;
182  		map->emul_ptimer = NULL;
183  	} else {
184  		map->direct_vtimer = vcpu_vtimer(vcpu);
185  		map->direct_ptimer = NULL;
186  		map->emul_vtimer = NULL;
187  		map->emul_ptimer = vcpu_ptimer(vcpu);
188  	}
189  
190  	trace_kvm_get_timer_map(vcpu->vcpu_id, map);
191  }
192  
userspace_irqchip(struct kvm * kvm)193  static inline bool userspace_irqchip(struct kvm *kvm)
194  {
195  	return unlikely(!irqchip_in_kernel(kvm));
196  }
197  
soft_timer_start(struct hrtimer * hrt,u64 ns)198  static void soft_timer_start(struct hrtimer *hrt, u64 ns)
199  {
200  	hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
201  		      HRTIMER_MODE_ABS_HARD);
202  }
203  
soft_timer_cancel(struct hrtimer * hrt)204  static void soft_timer_cancel(struct hrtimer *hrt)
205  {
206  	hrtimer_cancel(hrt);
207  }
208  
kvm_arch_timer_handler(int irq,void * dev_id)209  static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
210  {
211  	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
212  	struct arch_timer_context *ctx;
213  	struct timer_map map;
214  
215  	/*
216  	 * We may see a timer interrupt after vcpu_put() has been called which
217  	 * sets the CPU's vcpu pointer to NULL, because even though the timer
218  	 * has been disabled in timer_save_state(), the hardware interrupt
219  	 * signal may not have been retired from the interrupt controller yet.
220  	 */
221  	if (!vcpu)
222  		return IRQ_HANDLED;
223  
224  	get_timer_map(vcpu, &map);
225  
226  	if (irq == host_vtimer_irq)
227  		ctx = map.direct_vtimer;
228  	else
229  		ctx = map.direct_ptimer;
230  
231  	if (kvm_timer_should_fire(ctx))
232  		kvm_timer_update_irq(vcpu, true, ctx);
233  
234  	if (userspace_irqchip(vcpu->kvm) &&
235  	    !static_branch_unlikely(&has_gic_active_state))
236  		disable_percpu_irq(host_vtimer_irq);
237  
238  	return IRQ_HANDLED;
239  }
240  
kvm_counter_compute_delta(struct arch_timer_context * timer_ctx,u64 val)241  static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
242  				     u64 val)
243  {
244  	u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
245  
246  	if (now < val) {
247  		u64 ns;
248  
249  		ns = cyclecounter_cyc2ns(timecounter->cc,
250  					 val - now,
251  					 timecounter->mask,
252  					 &timer_ctx->ns_frac);
253  		return ns;
254  	}
255  
256  	return 0;
257  }
258  
kvm_timer_compute_delta(struct arch_timer_context * timer_ctx)259  static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
260  {
261  	return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
262  }
263  
kvm_timer_irq_can_fire(struct arch_timer_context * timer_ctx)264  static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
265  {
266  	WARN_ON(timer_ctx && timer_ctx->loaded);
267  	return timer_ctx &&
268  		((timer_get_ctl(timer_ctx) &
269  		  (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
270  }
271  
vcpu_has_wfit_active(struct kvm_vcpu * vcpu)272  static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
273  {
274  	return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
275  		vcpu_get_flag(vcpu, IN_WFIT));
276  }
277  
wfit_delay_ns(struct kvm_vcpu * vcpu)278  static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
279  {
280  	u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
281  	struct arch_timer_context *ctx;
282  
283  	ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
284  
285  	return kvm_counter_compute_delta(ctx, val);
286  }
287  
288  /*
289   * Returns the earliest expiration time in ns among guest timers.
290   * Note that it will return 0 if none of timers can fire.
291   */
kvm_timer_earliest_exp(struct kvm_vcpu * vcpu)292  static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
293  {
294  	u64 min_delta = ULLONG_MAX;
295  	int i;
296  
297  	for (i = 0; i < nr_timers(vcpu); i++) {
298  		struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
299  
300  		WARN(ctx->loaded, "timer %d loaded\n", i);
301  		if (kvm_timer_irq_can_fire(ctx))
302  			min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
303  	}
304  
305  	if (vcpu_has_wfit_active(vcpu))
306  		min_delta = min(min_delta, wfit_delay_ns(vcpu));
307  
308  	/* If none of timers can fire, then return 0 */
309  	if (min_delta == ULLONG_MAX)
310  		return 0;
311  
312  	return min_delta;
313  }
314  
kvm_bg_timer_expire(struct hrtimer * hrt)315  static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
316  {
317  	struct arch_timer_cpu *timer;
318  	struct kvm_vcpu *vcpu;
319  	u64 ns;
320  
321  	timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
322  	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
323  
324  	/*
325  	 * Check that the timer has really expired from the guest's
326  	 * PoV (NTP on the host may have forced it to expire
327  	 * early). If we should have slept longer, restart it.
328  	 */
329  	ns = kvm_timer_earliest_exp(vcpu);
330  	if (unlikely(ns)) {
331  		hrtimer_forward_now(hrt, ns_to_ktime(ns));
332  		return HRTIMER_RESTART;
333  	}
334  
335  	kvm_vcpu_wake_up(vcpu);
336  	return HRTIMER_NORESTART;
337  }
338  
kvm_hrtimer_expire(struct hrtimer * hrt)339  static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
340  {
341  	struct arch_timer_context *ctx;
342  	struct kvm_vcpu *vcpu;
343  	u64 ns;
344  
345  	ctx = container_of(hrt, struct arch_timer_context, hrtimer);
346  	vcpu = ctx->vcpu;
347  
348  	trace_kvm_timer_hrtimer_expire(ctx);
349  
350  	/*
351  	 * Check that the timer has really expired from the guest's
352  	 * PoV (NTP on the host may have forced it to expire
353  	 * early). If not ready, schedule for a later time.
354  	 */
355  	ns = kvm_timer_compute_delta(ctx);
356  	if (unlikely(ns)) {
357  		hrtimer_forward_now(hrt, ns_to_ktime(ns));
358  		return HRTIMER_RESTART;
359  	}
360  
361  	kvm_timer_update_irq(vcpu, true, ctx);
362  	return HRTIMER_NORESTART;
363  }
364  
kvm_timer_should_fire(struct arch_timer_context * timer_ctx)365  static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
366  {
367  	enum kvm_arch_timers index;
368  	u64 cval, now;
369  
370  	if (!timer_ctx)
371  		return false;
372  
373  	index = arch_timer_ctx_index(timer_ctx);
374  
375  	if (timer_ctx->loaded) {
376  		u32 cnt_ctl = 0;
377  
378  		switch (index) {
379  		case TIMER_VTIMER:
380  		case TIMER_HVTIMER:
381  			cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
382  			break;
383  		case TIMER_PTIMER:
384  		case TIMER_HPTIMER:
385  			cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
386  			break;
387  		case NR_KVM_TIMERS:
388  			/* GCC is braindead */
389  			cnt_ctl = 0;
390  			break;
391  		}
392  
393  		return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
394  		        (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
395  		       !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
396  	}
397  
398  	if (!kvm_timer_irq_can_fire(timer_ctx))
399  		return false;
400  
401  	cval = timer_get_cval(timer_ctx);
402  	now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
403  
404  	return cval <= now;
405  }
406  
kvm_cpu_has_pending_timer(struct kvm_vcpu * vcpu)407  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
408  {
409  	return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
410  }
411  
412  /*
413   * Reflect the timer output level into the kvm_run structure
414   */
kvm_timer_update_run(struct kvm_vcpu * vcpu)415  void kvm_timer_update_run(struct kvm_vcpu *vcpu)
416  {
417  	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
418  	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
419  	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
420  
421  	/* Populate the device bitmap with the timer states */
422  	regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
423  				    KVM_ARM_DEV_EL1_PTIMER);
424  	if (kvm_timer_should_fire(vtimer))
425  		regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
426  	if (kvm_timer_should_fire(ptimer))
427  		regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
428  }
429  
kvm_timer_update_status(struct arch_timer_context * ctx,bool level)430  static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
431  {
432  	/*
433  	 * Paper over NV2 brokenness by publishing the interrupt status
434  	 * bit. This still results in a poor quality of emulation (guest
435  	 * writes will have no effect until the next exit).
436  	 *
437  	 * But hey, it's fast, right?
438  	 */
439  	if (is_hyp_ctxt(ctx->vcpu) &&
440  	    (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) {
441  		unsigned long val = timer_get_ctl(ctx);
442  		__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
443  		timer_set_ctl(ctx, val);
444  	}
445  }
446  
kvm_timer_update_irq(struct kvm_vcpu * vcpu,bool new_level,struct arch_timer_context * timer_ctx)447  static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
448  				 struct arch_timer_context *timer_ctx)
449  {
450  	kvm_timer_update_status(timer_ctx, new_level);
451  
452  	timer_ctx->irq.level = new_level;
453  	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
454  				   timer_ctx->irq.level);
455  
456  	if (userspace_irqchip(vcpu->kvm))
457  		return;
458  
459  	kvm_vgic_inject_irq(vcpu->kvm, vcpu,
460  			    timer_irq(timer_ctx),
461  			    timer_ctx->irq.level,
462  			    timer_ctx);
463  }
464  
465  /* Only called for a fully emulated timer */
timer_emulate(struct arch_timer_context * ctx)466  static void timer_emulate(struct arch_timer_context *ctx)
467  {
468  	bool should_fire = kvm_timer_should_fire(ctx);
469  
470  	trace_kvm_timer_emulate(ctx, should_fire);
471  
472  	if (should_fire != ctx->irq.level)
473  		kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
474  
475  	kvm_timer_update_status(ctx, should_fire);
476  
477  	/*
478  	 * If the timer can fire now, we don't need to have a soft timer
479  	 * scheduled for the future.  If the timer cannot fire at all,
480  	 * then we also don't need a soft timer.
481  	 */
482  	if (should_fire || !kvm_timer_irq_can_fire(ctx))
483  		return;
484  
485  	soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
486  }
487  
set_cntvoff(u64 cntvoff)488  static void set_cntvoff(u64 cntvoff)
489  {
490  	kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
491  }
492  
set_cntpoff(u64 cntpoff)493  static void set_cntpoff(u64 cntpoff)
494  {
495  	if (has_cntpoff())
496  		write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
497  }
498  
timer_save_state(struct arch_timer_context * ctx)499  static void timer_save_state(struct arch_timer_context *ctx)
500  {
501  	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
502  	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
503  	unsigned long flags;
504  
505  	if (!timer->enabled)
506  		return;
507  
508  	local_irq_save(flags);
509  
510  	if (!ctx->loaded)
511  		goto out;
512  
513  	switch (index) {
514  		u64 cval;
515  
516  	case TIMER_VTIMER:
517  	case TIMER_HVTIMER:
518  		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
519  		cval = read_sysreg_el0(SYS_CNTV_CVAL);
520  
521  		if (has_broken_cntvoff())
522  			cval -= timer_get_offset(ctx);
523  
524  		timer_set_cval(ctx, cval);
525  
526  		/* Disable the timer */
527  		write_sysreg_el0(0, SYS_CNTV_CTL);
528  		isb();
529  
530  		/*
531  		 * The kernel may decide to run userspace after
532  		 * calling vcpu_put, so we reset cntvoff to 0 to
533  		 * ensure a consistent read between user accesses to
534  		 * the virtual counter and kernel access to the
535  		 * physical counter of non-VHE case.
536  		 *
537  		 * For VHE, the virtual counter uses a fixed virtual
538  		 * offset of zero, so no need to zero CNTVOFF_EL2
539  		 * register, but this is actually useful when switching
540  		 * between EL1/vEL2 with NV.
541  		 *
542  		 * Do it unconditionally, as this is either unavoidable
543  		 * or dirt cheap.
544  		 */
545  		set_cntvoff(0);
546  		break;
547  	case TIMER_PTIMER:
548  	case TIMER_HPTIMER:
549  		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
550  		cval = read_sysreg_el0(SYS_CNTP_CVAL);
551  
552  		cval -= timer_get_offset(ctx);
553  
554  		timer_set_cval(ctx, cval);
555  
556  		/* Disable the timer */
557  		write_sysreg_el0(0, SYS_CNTP_CTL);
558  		isb();
559  
560  		set_cntpoff(0);
561  		break;
562  	case NR_KVM_TIMERS:
563  		BUG();
564  	}
565  
566  	trace_kvm_timer_save_state(ctx);
567  
568  	ctx->loaded = false;
569  out:
570  	local_irq_restore(flags);
571  }
572  
573  /*
574   * Schedule the background timer before calling kvm_vcpu_halt, so that this
575   * thread is removed from its waitqueue and made runnable when there's a timer
576   * interrupt to handle.
577   */
kvm_timer_blocking(struct kvm_vcpu * vcpu)578  static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
579  {
580  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
581  	struct timer_map map;
582  
583  	get_timer_map(vcpu, &map);
584  
585  	/*
586  	 * If no timers are capable of raising interrupts (disabled or
587  	 * masked), then there's no more work for us to do.
588  	 */
589  	if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
590  	    !kvm_timer_irq_can_fire(map.direct_ptimer) &&
591  	    !kvm_timer_irq_can_fire(map.emul_vtimer) &&
592  	    !kvm_timer_irq_can_fire(map.emul_ptimer) &&
593  	    !vcpu_has_wfit_active(vcpu))
594  		return;
595  
596  	/*
597  	 * At least one guest time will expire. Schedule a background timer.
598  	 * Set the earliest expiration time among the guest timers.
599  	 */
600  	soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
601  }
602  
kvm_timer_unblocking(struct kvm_vcpu * vcpu)603  static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
604  {
605  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
606  
607  	soft_timer_cancel(&timer->bg_timer);
608  }
609  
timer_restore_state(struct arch_timer_context * ctx)610  static void timer_restore_state(struct arch_timer_context *ctx)
611  {
612  	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
613  	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
614  	unsigned long flags;
615  
616  	if (!timer->enabled)
617  		return;
618  
619  	local_irq_save(flags);
620  
621  	if (ctx->loaded)
622  		goto out;
623  
624  	switch (index) {
625  		u64 cval, offset;
626  
627  	case TIMER_VTIMER:
628  	case TIMER_HVTIMER:
629  		cval = timer_get_cval(ctx);
630  		offset = timer_get_offset(ctx);
631  		if (has_broken_cntvoff()) {
632  			set_cntvoff(0);
633  			cval += offset;
634  		} else {
635  			set_cntvoff(offset);
636  		}
637  		write_sysreg_el0(cval, SYS_CNTV_CVAL);
638  		isb();
639  		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
640  		break;
641  	case TIMER_PTIMER:
642  	case TIMER_HPTIMER:
643  		cval = timer_get_cval(ctx);
644  		offset = timer_get_offset(ctx);
645  		set_cntpoff(offset);
646  		cval += offset;
647  		write_sysreg_el0(cval, SYS_CNTP_CVAL);
648  		isb();
649  		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
650  		break;
651  	case NR_KVM_TIMERS:
652  		BUG();
653  	}
654  
655  	trace_kvm_timer_restore_state(ctx);
656  
657  	ctx->loaded = true;
658  out:
659  	local_irq_restore(flags);
660  }
661  
set_timer_irq_phys_active(struct arch_timer_context * ctx,bool active)662  static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
663  {
664  	int r;
665  	r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
666  	WARN_ON(r);
667  }
668  
kvm_timer_vcpu_load_gic(struct arch_timer_context * ctx)669  static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
670  {
671  	struct kvm_vcpu *vcpu = ctx->vcpu;
672  	bool phys_active = false;
673  
674  	/*
675  	 * Update the timer output so that it is likely to match the
676  	 * state we're about to restore. If the timer expires between
677  	 * this point and the register restoration, we'll take the
678  	 * interrupt anyway.
679  	 */
680  	kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
681  
682  	if (irqchip_in_kernel(vcpu->kvm))
683  		phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
684  
685  	phys_active |= ctx->irq.level;
686  
687  	set_timer_irq_phys_active(ctx, phys_active);
688  }
689  
kvm_timer_vcpu_load_nogic(struct kvm_vcpu * vcpu)690  static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
691  {
692  	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
693  
694  	/*
695  	 * Update the timer output so that it is likely to match the
696  	 * state we're about to restore. If the timer expires between
697  	 * this point and the register restoration, we'll take the
698  	 * interrupt anyway.
699  	 */
700  	kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
701  
702  	/*
703  	 * When using a userspace irqchip with the architected timers and a
704  	 * host interrupt controller that doesn't support an active state, we
705  	 * must still prevent continuously exiting from the guest, and
706  	 * therefore mask the physical interrupt by disabling it on the host
707  	 * interrupt controller when the virtual level is high, such that the
708  	 * guest can make forward progress.  Once we detect the output level
709  	 * being de-asserted, we unmask the interrupt again so that we exit
710  	 * from the guest when the timer fires.
711  	 */
712  	if (vtimer->irq.level)
713  		disable_percpu_irq(host_vtimer_irq);
714  	else
715  		enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
716  }
717  
718  /* If _pred is true, set bit in _set, otherwise set it in _clr */
719  #define assign_clear_set_bit(_pred, _bit, _clr, _set)			\
720  	do {								\
721  		if (_pred)						\
722  			(_set) |= (_bit);				\
723  		else							\
724  			(_clr) |= (_bit);				\
725  	} while (0)
726  
kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu * vcpu,struct timer_map * map)727  static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
728  					      struct timer_map *map)
729  {
730  	int hw, ret;
731  
732  	if (!irqchip_in_kernel(vcpu->kvm))
733  		return;
734  
735  	/*
736  	 * We only ever unmap the vtimer irq on a VHE system that runs nested
737  	 * virtualization, in which case we have both a valid emul_vtimer,
738  	 * emul_ptimer, direct_vtimer, and direct_ptimer.
739  	 *
740  	 * Since this is called from kvm_timer_vcpu_load(), a change between
741  	 * vEL2 and vEL1/0 will have just happened, and the timer_map will
742  	 * represent this, and therefore we switch the emul/direct mappings
743  	 * below.
744  	 */
745  	hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
746  	if (hw < 0) {
747  		kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
748  		kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
749  
750  		ret = kvm_vgic_map_phys_irq(vcpu,
751  					    map->direct_vtimer->host_timer_irq,
752  					    timer_irq(map->direct_vtimer),
753  					    &arch_timer_irq_ops);
754  		WARN_ON_ONCE(ret);
755  		ret = kvm_vgic_map_phys_irq(vcpu,
756  					    map->direct_ptimer->host_timer_irq,
757  					    timer_irq(map->direct_ptimer),
758  					    &arch_timer_irq_ops);
759  		WARN_ON_ONCE(ret);
760  	}
761  }
762  
timer_set_traps(struct kvm_vcpu * vcpu,struct timer_map * map)763  static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
764  {
765  	bool tvt, tpt, tvc, tpc, tvt02, tpt02;
766  	u64 clr, set;
767  
768  	/*
769  	 * No trapping gets configured here with nVHE. See
770  	 * __timer_enable_traps(), which is where the stuff happens.
771  	 */
772  	if (!has_vhe())
773  		return;
774  
775  	/*
776  	 * Our default policy is not to trap anything. As we progress
777  	 * within this function, reality kicks in and we start adding
778  	 * traps based on emulation requirements.
779  	 */
780  	tvt = tpt = tvc = tpc = false;
781  	tvt02 = tpt02 = false;
782  
783  	/*
784  	 * NV2 badly breaks the timer semantics by redirecting accesses to
785  	 * the EL1 timer state to memory, so let's call ECV to the rescue if
786  	 * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
787  	 *
788  	 * The treatment slightly varies depending whether we run a nVHE or
789  	 * VHE guest: nVHE will use the _EL0 registers directly, while VHE
790  	 * will use the _EL02 accessors. This translates in different trap
791  	 * bits.
792  	 *
793  	 * None of the trapping is required when running in non-HYP context,
794  	 * unless required by the L1 hypervisor settings once we advertise
795  	 * ECV+NV in the guest, or that we need trapping for other reasons.
796  	 */
797  	if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) {
798  		if (vcpu_el2_e2h_is_set(vcpu))
799  			tvt02 = tpt02 = true;
800  		else
801  			tvt = tpt = true;
802  	}
803  
804  	/*
805  	 * We have two possibility to deal with a physical offset:
806  	 *
807  	 * - Either we have CNTPOFF (yay!) or the offset is 0:
808  	 *   we let the guest freely access the HW
809  	 *
810  	 * - or neither of these condition apply:
811  	 *   we trap accesses to the HW, but still use it
812  	 *   after correcting the physical offset
813  	 */
814  	if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
815  		tpt = tpc = true;
816  
817  	/*
818  	 * For the poor sods that could not correctly substract one value
819  	 * from another, trap the full virtual timer and counter.
820  	 */
821  	if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
822  		tvt = tvc = true;
823  
824  	/*
825  	 * Apply the enable bits that the guest hypervisor has requested for
826  	 * its own guest. We can only add traps that wouldn't have been set
827  	 * above.
828  	 * Implementation choices: we do not support NV when E2H=0 in the
829  	 * guest, and we don't support configuration where E2H is writable
830  	 * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
831  	 * not both). This simplifies the handling of the EL1NV* bits.
832  	 */
833  	if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
834  		u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
835  
836  		/* Use the VHE format for mental sanity */
837  		if (!vcpu_el2_e2h_is_set(vcpu))
838  			val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
839  
840  		tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
841  		tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
842  
843  		tpt02 |= (val & CNTHCTL_EL1NVPCT);
844  		tvt02 |= (val & CNTHCTL_EL1NVVCT);
845  	}
846  
847  	/*
848  	 * Now that we have collected our requirements, compute the
849  	 * trap and enable bits.
850  	 */
851  	set = 0;
852  	clr = 0;
853  
854  	assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
855  	assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
856  	assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set);
857  	assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set);
858  	assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set);
859  	assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set);
860  
861  	/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
862  	sysreg_clear_set(cnthctl_el2, clr, set);
863  }
864  
kvm_timer_vcpu_load(struct kvm_vcpu * vcpu)865  void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
866  {
867  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
868  	struct timer_map map;
869  
870  	if (unlikely(!timer->enabled))
871  		return;
872  
873  	get_timer_map(vcpu, &map);
874  
875  	if (static_branch_likely(&has_gic_active_state)) {
876  		if (vcpu_has_nv(vcpu))
877  			kvm_timer_vcpu_load_nested_switch(vcpu, &map);
878  
879  		kvm_timer_vcpu_load_gic(map.direct_vtimer);
880  		if (map.direct_ptimer)
881  			kvm_timer_vcpu_load_gic(map.direct_ptimer);
882  	} else {
883  		kvm_timer_vcpu_load_nogic(vcpu);
884  	}
885  
886  	kvm_timer_unblocking(vcpu);
887  
888  	timer_restore_state(map.direct_vtimer);
889  	if (map.direct_ptimer)
890  		timer_restore_state(map.direct_ptimer);
891  	if (map.emul_vtimer)
892  		timer_emulate(map.emul_vtimer);
893  	if (map.emul_ptimer)
894  		timer_emulate(map.emul_ptimer);
895  
896  	timer_set_traps(vcpu, &map);
897  }
898  
kvm_timer_should_notify_user(struct kvm_vcpu * vcpu)899  bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
900  {
901  	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
902  	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
903  	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
904  	bool vlevel, plevel;
905  
906  	if (likely(irqchip_in_kernel(vcpu->kvm)))
907  		return false;
908  
909  	vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
910  	plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
911  
912  	return kvm_timer_should_fire(vtimer) != vlevel ||
913  	       kvm_timer_should_fire(ptimer) != plevel;
914  }
915  
kvm_timer_vcpu_put(struct kvm_vcpu * vcpu)916  void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
917  {
918  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
919  	struct timer_map map;
920  
921  	if (unlikely(!timer->enabled))
922  		return;
923  
924  	get_timer_map(vcpu, &map);
925  
926  	timer_save_state(map.direct_vtimer);
927  	if (map.direct_ptimer)
928  		timer_save_state(map.direct_ptimer);
929  
930  	/*
931  	 * Cancel soft timer emulation, because the only case where we
932  	 * need it after a vcpu_put is in the context of a sleeping VCPU, and
933  	 * in that case we already factor in the deadline for the physical
934  	 * timer when scheduling the bg_timer.
935  	 *
936  	 * In any case, we re-schedule the hrtimer for the physical timer when
937  	 * coming back to the VCPU thread in kvm_timer_vcpu_load().
938  	 */
939  	if (map.emul_vtimer)
940  		soft_timer_cancel(&map.emul_vtimer->hrtimer);
941  	if (map.emul_ptimer)
942  		soft_timer_cancel(&map.emul_ptimer->hrtimer);
943  
944  	if (kvm_vcpu_is_blocking(vcpu))
945  		kvm_timer_blocking(vcpu);
946  }
947  
kvm_timer_sync_nested(struct kvm_vcpu * vcpu)948  void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
949  {
950  	/*
951  	 * When NV2 is on, guest hypervisors have their EL1 timer register
952  	 * accesses redirected to the VNCR page. Any guest action taken on
953  	 * the timer is postponed until the next exit, leading to a very
954  	 * poor quality of emulation.
955  	 *
956  	 * This is an unmitigated disaster, only papered over by FEAT_ECV,
957  	 * which allows trapping of the timer registers even with NV2.
958  	 * Still, this is still worse than FEAT_NV on its own. Meh.
959  	 */
960  	if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
961  		/*
962  		 * For a VHE guest hypervisor, the EL2 state is directly
963  		 * stored in the host EL1 timers, while the emulated EL1
964  		 * state is stored in the VNCR page. The latter could have
965  		 * been updated behind our back, and we must reset the
966  		 * emulation of the timers.
967  		 *
968  		 * A non-VHE guest hypervisor doesn't have any direct access
969  		 * to its timers: the EL2 registers trap despite being
970  		 * notionally direct (we use the EL1 HW, as for VHE), while
971  		 * the EL1 registers access memory.
972  		 *
973  		 * In both cases, process the emulated timers on each guest
974  		 * exit. Boo.
975  		 */
976  		struct timer_map map;
977  		get_timer_map(vcpu, &map);
978  
979  		soft_timer_cancel(&map.emul_vtimer->hrtimer);
980  		soft_timer_cancel(&map.emul_ptimer->hrtimer);
981  		timer_emulate(map.emul_vtimer);
982  		timer_emulate(map.emul_ptimer);
983  	}
984  }
985  
986  /*
987   * With a userspace irqchip we have to check if the guest de-asserted the
988   * timer and if so, unmask the timer irq signal on the host interrupt
989   * controller to ensure that we see future timer signals.
990   */
unmask_vtimer_irq_user(struct kvm_vcpu * vcpu)991  static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
992  {
993  	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
994  
995  	if (!kvm_timer_should_fire(vtimer)) {
996  		kvm_timer_update_irq(vcpu, false, vtimer);
997  		if (static_branch_likely(&has_gic_active_state))
998  			set_timer_irq_phys_active(vtimer, false);
999  		else
1000  			enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1001  	}
1002  }
1003  
kvm_timer_sync_user(struct kvm_vcpu * vcpu)1004  void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
1005  {
1006  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1007  
1008  	if (unlikely(!timer->enabled))
1009  		return;
1010  
1011  	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
1012  		unmask_vtimer_irq_user(vcpu);
1013  }
1014  
kvm_timer_vcpu_reset(struct kvm_vcpu * vcpu)1015  void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
1016  {
1017  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1018  	struct timer_map map;
1019  
1020  	get_timer_map(vcpu, &map);
1021  
1022  	/*
1023  	 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
1024  	 * and to 0 for ARMv7.  We provide an implementation that always
1025  	 * resets the timer to be disabled and unmasked and is compliant with
1026  	 * the ARMv7 architecture.
1027  	 */
1028  	for (int i = 0; i < nr_timers(vcpu); i++)
1029  		timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
1030  
1031  	/*
1032  	 * A vcpu running at EL2 is in charge of the offset applied to
1033  	 * the virtual timer, so use the physical VM offset, and point
1034  	 * the vcpu offset to CNTVOFF_EL2.
1035  	 */
1036  	if (vcpu_has_nv(vcpu)) {
1037  		struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
1038  
1039  		offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
1040  		offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
1041  	}
1042  
1043  	if (timer->enabled) {
1044  		for (int i = 0; i < nr_timers(vcpu); i++)
1045  			kvm_timer_update_irq(vcpu, false,
1046  					     vcpu_get_timer(vcpu, i));
1047  
1048  		if (irqchip_in_kernel(vcpu->kvm)) {
1049  			kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
1050  			if (map.direct_ptimer)
1051  				kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
1052  		}
1053  	}
1054  
1055  	if (map.emul_vtimer)
1056  		soft_timer_cancel(&map.emul_vtimer->hrtimer);
1057  	if (map.emul_ptimer)
1058  		soft_timer_cancel(&map.emul_ptimer->hrtimer);
1059  }
1060  
timer_context_init(struct kvm_vcpu * vcpu,int timerid)1061  static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
1062  {
1063  	struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
1064  	struct kvm *kvm = vcpu->kvm;
1065  
1066  	ctxt->vcpu = vcpu;
1067  
1068  	if (timerid == TIMER_VTIMER)
1069  		ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
1070  	else
1071  		ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
1072  
1073  	hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1074  
1075  	switch (timerid) {
1076  	case TIMER_PTIMER:
1077  	case TIMER_HPTIMER:
1078  		ctxt->host_timer_irq = host_ptimer_irq;
1079  		break;
1080  	case TIMER_VTIMER:
1081  	case TIMER_HVTIMER:
1082  		ctxt->host_timer_irq = host_vtimer_irq;
1083  		break;
1084  	}
1085  }
1086  
kvm_timer_vcpu_init(struct kvm_vcpu * vcpu)1087  void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
1088  {
1089  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1090  
1091  	for (int i = 0; i < NR_KVM_TIMERS; i++)
1092  		timer_context_init(vcpu, i);
1093  
1094  	/* Synchronize offsets across timers of a VM if not already provided */
1095  	if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
1096  		timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
1097  		timer_set_offset(vcpu_ptimer(vcpu), 0);
1098  	}
1099  
1100  	hrtimer_setup(&timer->bg_timer, kvm_bg_timer_expire, CLOCK_MONOTONIC,
1101  		      HRTIMER_MODE_ABS_HARD);
1102  }
1103  
kvm_timer_init_vm(struct kvm * kvm)1104  void kvm_timer_init_vm(struct kvm *kvm)
1105  {
1106  	for (int i = 0; i < NR_KVM_TIMERS; i++)
1107  		kvm->arch.timer_data.ppi[i] = default_ppi[i];
1108  }
1109  
kvm_timer_cpu_up(void)1110  void kvm_timer_cpu_up(void)
1111  {
1112  	enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1113  	if (host_ptimer_irq)
1114  		enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
1115  }
1116  
kvm_timer_cpu_down(void)1117  void kvm_timer_cpu_down(void)
1118  {
1119  	disable_percpu_irq(host_vtimer_irq);
1120  	if (host_ptimer_irq)
1121  		disable_percpu_irq(host_ptimer_irq);
1122  }
1123  
kvm_arm_timer_set_reg(struct kvm_vcpu * vcpu,u64 regid,u64 value)1124  int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
1125  {
1126  	struct arch_timer_context *timer;
1127  
1128  	switch (regid) {
1129  	case KVM_REG_ARM_TIMER_CTL:
1130  		timer = vcpu_vtimer(vcpu);
1131  		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1132  		break;
1133  	case KVM_REG_ARM_TIMER_CNT:
1134  		if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1135  			      &vcpu->kvm->arch.flags)) {
1136  			timer = vcpu_vtimer(vcpu);
1137  			timer_set_offset(timer, kvm_phys_timer_read() - value);
1138  		}
1139  		break;
1140  	case KVM_REG_ARM_TIMER_CVAL:
1141  		timer = vcpu_vtimer(vcpu);
1142  		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1143  		break;
1144  	case KVM_REG_ARM_PTIMER_CTL:
1145  		timer = vcpu_ptimer(vcpu);
1146  		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1147  		break;
1148  	case KVM_REG_ARM_PTIMER_CNT:
1149  		if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1150  			      &vcpu->kvm->arch.flags)) {
1151  			timer = vcpu_ptimer(vcpu);
1152  			timer_set_offset(timer, kvm_phys_timer_read() - value);
1153  		}
1154  		break;
1155  	case KVM_REG_ARM_PTIMER_CVAL:
1156  		timer = vcpu_ptimer(vcpu);
1157  		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1158  		break;
1159  
1160  	default:
1161  		return -1;
1162  	}
1163  
1164  	return 0;
1165  }
1166  
read_timer_ctl(struct arch_timer_context * timer)1167  static u64 read_timer_ctl(struct arch_timer_context *timer)
1168  {
1169  	/*
1170  	 * Set ISTATUS bit if it's expired.
1171  	 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
1172  	 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
1173  	 * regardless of ENABLE bit for our implementation convenience.
1174  	 */
1175  	u32 ctl = timer_get_ctl(timer);
1176  
1177  	if (!kvm_timer_compute_delta(timer))
1178  		ctl |= ARCH_TIMER_CTRL_IT_STAT;
1179  
1180  	return ctl;
1181  }
1182  
kvm_arm_timer_get_reg(struct kvm_vcpu * vcpu,u64 regid)1183  u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
1184  {
1185  	switch (regid) {
1186  	case KVM_REG_ARM_TIMER_CTL:
1187  		return kvm_arm_timer_read(vcpu,
1188  					  vcpu_vtimer(vcpu), TIMER_REG_CTL);
1189  	case KVM_REG_ARM_TIMER_CNT:
1190  		return kvm_arm_timer_read(vcpu,
1191  					  vcpu_vtimer(vcpu), TIMER_REG_CNT);
1192  	case KVM_REG_ARM_TIMER_CVAL:
1193  		return kvm_arm_timer_read(vcpu,
1194  					  vcpu_vtimer(vcpu), TIMER_REG_CVAL);
1195  	case KVM_REG_ARM_PTIMER_CTL:
1196  		return kvm_arm_timer_read(vcpu,
1197  					  vcpu_ptimer(vcpu), TIMER_REG_CTL);
1198  	case KVM_REG_ARM_PTIMER_CNT:
1199  		return kvm_arm_timer_read(vcpu,
1200  					  vcpu_ptimer(vcpu), TIMER_REG_CNT);
1201  	case KVM_REG_ARM_PTIMER_CVAL:
1202  		return kvm_arm_timer_read(vcpu,
1203  					  vcpu_ptimer(vcpu), TIMER_REG_CVAL);
1204  	}
1205  	return (u64)-1;
1206  }
1207  
kvm_arm_timer_read(struct kvm_vcpu * vcpu,struct arch_timer_context * timer,enum kvm_arch_timer_regs treg)1208  static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
1209  			      struct arch_timer_context *timer,
1210  			      enum kvm_arch_timer_regs treg)
1211  {
1212  	u64 val;
1213  
1214  	switch (treg) {
1215  	case TIMER_REG_TVAL:
1216  		val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
1217  		val = lower_32_bits(val);
1218  		break;
1219  
1220  	case TIMER_REG_CTL:
1221  		val = read_timer_ctl(timer);
1222  		break;
1223  
1224  	case TIMER_REG_CVAL:
1225  		val = timer_get_cval(timer);
1226  		break;
1227  
1228  	case TIMER_REG_CNT:
1229  		val = kvm_phys_timer_read() - timer_get_offset(timer);
1230  		break;
1231  
1232  	case TIMER_REG_VOFF:
1233  		val = *timer->offset.vcpu_offset;
1234  		break;
1235  
1236  	default:
1237  		BUG();
1238  	}
1239  
1240  	return val;
1241  }
1242  
kvm_arm_timer_read_sysreg(struct kvm_vcpu * vcpu,enum kvm_arch_timers tmr,enum kvm_arch_timer_regs treg)1243  u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
1244  			      enum kvm_arch_timers tmr,
1245  			      enum kvm_arch_timer_regs treg)
1246  {
1247  	struct arch_timer_context *timer;
1248  	struct timer_map map;
1249  	u64 val;
1250  
1251  	get_timer_map(vcpu, &map);
1252  	timer = vcpu_get_timer(vcpu, tmr);
1253  
1254  	if (timer == map.emul_vtimer || timer == map.emul_ptimer)
1255  		return kvm_arm_timer_read(vcpu, timer, treg);
1256  
1257  	preempt_disable();
1258  	timer_save_state(timer);
1259  
1260  	val = kvm_arm_timer_read(vcpu, timer, treg);
1261  
1262  	timer_restore_state(timer);
1263  	preempt_enable();
1264  
1265  	return val;
1266  }
1267  
kvm_arm_timer_write(struct kvm_vcpu * vcpu,struct arch_timer_context * timer,enum kvm_arch_timer_regs treg,u64 val)1268  static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
1269  				struct arch_timer_context *timer,
1270  				enum kvm_arch_timer_regs treg,
1271  				u64 val)
1272  {
1273  	switch (treg) {
1274  	case TIMER_REG_TVAL:
1275  		timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
1276  		break;
1277  
1278  	case TIMER_REG_CTL:
1279  		timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
1280  		break;
1281  
1282  	case TIMER_REG_CVAL:
1283  		timer_set_cval(timer, val);
1284  		break;
1285  
1286  	case TIMER_REG_VOFF:
1287  		*timer->offset.vcpu_offset = val;
1288  		break;
1289  
1290  	default:
1291  		BUG();
1292  	}
1293  }
1294  
kvm_arm_timer_write_sysreg(struct kvm_vcpu * vcpu,enum kvm_arch_timers tmr,enum kvm_arch_timer_regs treg,u64 val)1295  void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
1296  				enum kvm_arch_timers tmr,
1297  				enum kvm_arch_timer_regs treg,
1298  				u64 val)
1299  {
1300  	struct arch_timer_context *timer;
1301  	struct timer_map map;
1302  
1303  	get_timer_map(vcpu, &map);
1304  	timer = vcpu_get_timer(vcpu, tmr);
1305  	if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
1306  		soft_timer_cancel(&timer->hrtimer);
1307  		kvm_arm_timer_write(vcpu, timer, treg, val);
1308  		timer_emulate(timer);
1309  	} else {
1310  		preempt_disable();
1311  		timer_save_state(timer);
1312  		kvm_arm_timer_write(vcpu, timer, treg, val);
1313  		timer_restore_state(timer);
1314  		preempt_enable();
1315  	}
1316  }
1317  
timer_irq_set_vcpu_affinity(struct irq_data * d,void * vcpu)1318  static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
1319  {
1320  	if (vcpu)
1321  		irqd_set_forwarded_to_vcpu(d);
1322  	else
1323  		irqd_clr_forwarded_to_vcpu(d);
1324  
1325  	return 0;
1326  }
1327  
timer_irq_set_irqchip_state(struct irq_data * d,enum irqchip_irq_state which,bool val)1328  static int timer_irq_set_irqchip_state(struct irq_data *d,
1329  				       enum irqchip_irq_state which, bool val)
1330  {
1331  	if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
1332  		return irq_chip_set_parent_state(d, which, val);
1333  
1334  	if (val)
1335  		irq_chip_mask_parent(d);
1336  	else
1337  		irq_chip_unmask_parent(d);
1338  
1339  	return 0;
1340  }
1341  
timer_irq_eoi(struct irq_data * d)1342  static void timer_irq_eoi(struct irq_data *d)
1343  {
1344  	if (!irqd_is_forwarded_to_vcpu(d))
1345  		irq_chip_eoi_parent(d);
1346  }
1347  
timer_irq_ack(struct irq_data * d)1348  static void timer_irq_ack(struct irq_data *d)
1349  {
1350  	d = d->parent_data;
1351  	if (d->chip->irq_ack)
1352  		d->chip->irq_ack(d);
1353  }
1354  
1355  static struct irq_chip timer_chip = {
1356  	.name			= "KVM",
1357  	.irq_ack		= timer_irq_ack,
1358  	.irq_mask		= irq_chip_mask_parent,
1359  	.irq_unmask		= irq_chip_unmask_parent,
1360  	.irq_eoi		= timer_irq_eoi,
1361  	.irq_set_type		= irq_chip_set_type_parent,
1362  	.irq_set_vcpu_affinity	= timer_irq_set_vcpu_affinity,
1363  	.irq_set_irqchip_state	= timer_irq_set_irqchip_state,
1364  };
1365  
timer_irq_domain_alloc(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs,void * arg)1366  static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1367  				  unsigned int nr_irqs, void *arg)
1368  {
1369  	irq_hw_number_t hwirq = (uintptr_t)arg;
1370  
1371  	return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
1372  					     &timer_chip, NULL);
1373  }
1374  
timer_irq_domain_free(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs)1375  static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
1376  				  unsigned int nr_irqs)
1377  {
1378  }
1379  
1380  static const struct irq_domain_ops timer_domain_ops = {
1381  	.alloc	= timer_irq_domain_alloc,
1382  	.free	= timer_irq_domain_free,
1383  };
1384  
kvm_irq_fixup_flags(unsigned int virq,u32 * flags)1385  static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
1386  {
1387  	*flags = irq_get_trigger_type(virq);
1388  	if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
1389  		kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
1390  			virq);
1391  		*flags = IRQF_TRIGGER_LOW;
1392  	}
1393  }
1394  
kvm_irq_init(struct arch_timer_kvm_info * info)1395  static int kvm_irq_init(struct arch_timer_kvm_info *info)
1396  {
1397  	struct irq_domain *domain = NULL;
1398  
1399  	if (info->virtual_irq <= 0) {
1400  		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
1401  			info->virtual_irq);
1402  		return -ENODEV;
1403  	}
1404  
1405  	host_vtimer_irq = info->virtual_irq;
1406  	kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
1407  
1408  	if (kvm_vgic_global_state.no_hw_deactivation) {
1409  		struct fwnode_handle *fwnode;
1410  		struct irq_data *data;
1411  
1412  		fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
1413  		if (!fwnode)
1414  			return -ENOMEM;
1415  
1416  		/* Assume both vtimer and ptimer in the same parent */
1417  		data = irq_get_irq_data(host_vtimer_irq);
1418  		domain = irq_domain_create_hierarchy(data->domain, 0,
1419  						     NR_KVM_TIMERS, fwnode,
1420  						     &timer_domain_ops, NULL);
1421  		if (!domain) {
1422  			irq_domain_free_fwnode(fwnode);
1423  			return -ENOMEM;
1424  		}
1425  
1426  		arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
1427  		WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
1428  					    (void *)TIMER_VTIMER));
1429  	}
1430  
1431  	if (info->physical_irq > 0) {
1432  		host_ptimer_irq = info->physical_irq;
1433  		kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
1434  
1435  		if (domain)
1436  			WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
1437  						    (void *)TIMER_PTIMER));
1438  	}
1439  
1440  	return 0;
1441  }
1442  
kvm_timer_handle_errata(void)1443  static void kvm_timer_handle_errata(void)
1444  {
1445  	u64 mmfr0, mmfr1, mmfr4;
1446  
1447  	/*
1448  	 * CNTVOFF_EL2 is broken on some implementations. For those, we trap
1449  	 * all virtual timer/counter accesses, requiring FEAT_ECV.
1450  	 *
1451  	 * However, a hypervisor supporting nesting is likely to mitigate the
1452  	 * erratum at L0, and not require other levels to mitigate it (which
1453  	 * would otherwise be a terrible performance sink due to trap
1454  	 * amplification).
1455  	 *
1456  	 * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
1457  	 * and that NV is likely not to (because of limitations of the
1458  	 * architecture), only enable the workaround when FEAT_VHE and
1459  	 * FEAT_E2H0 are both detected. Time will tell if this actually holds.
1460  	 */
1461  	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
1462  	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
1463  	mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
1464  	if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1)		&&
1465  	    !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4)	&&
1466  	    SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0)		&&
1467  	    (has_vhe() || has_hvhe())				&&
1468  	    cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
1469  		static_branch_enable(&broken_cntvoff_key);
1470  		kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
1471  	}
1472  }
1473  
kvm_timer_hyp_init(bool has_gic)1474  int __init kvm_timer_hyp_init(bool has_gic)
1475  {
1476  	struct arch_timer_kvm_info *info;
1477  	int err;
1478  
1479  	info = arch_timer_get_kvm_info();
1480  	timecounter = &info->timecounter;
1481  
1482  	if (!timecounter->cc) {
1483  		kvm_err("kvm_arch_timer: uninitialized timecounter\n");
1484  		return -ENODEV;
1485  	}
1486  
1487  	err = kvm_irq_init(info);
1488  	if (err)
1489  		return err;
1490  
1491  	/* First, do the virtual EL1 timer irq */
1492  
1493  	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
1494  				 "kvm guest vtimer", kvm_get_running_vcpus());
1495  	if (err) {
1496  		kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
1497  			host_vtimer_irq, err);
1498  		return err;
1499  	}
1500  
1501  	if (has_gic) {
1502  		err = irq_set_vcpu_affinity(host_vtimer_irq,
1503  					    kvm_get_running_vcpus());
1504  		if (err) {
1505  			kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1506  			goto out_free_vtimer_irq;
1507  		}
1508  
1509  		static_branch_enable(&has_gic_active_state);
1510  	}
1511  
1512  	kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
1513  
1514  	/* Now let's do the physical EL1 timer irq */
1515  
1516  	if (info->physical_irq > 0) {
1517  		err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
1518  					 "kvm guest ptimer", kvm_get_running_vcpus());
1519  		if (err) {
1520  			kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
1521  				host_ptimer_irq, err);
1522  			goto out_free_vtimer_irq;
1523  		}
1524  
1525  		if (has_gic) {
1526  			err = irq_set_vcpu_affinity(host_ptimer_irq,
1527  						    kvm_get_running_vcpus());
1528  			if (err) {
1529  				kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1530  				goto out_free_ptimer_irq;
1531  			}
1532  		}
1533  
1534  		kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
1535  	} else if (has_vhe()) {
1536  		kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
1537  			info->physical_irq);
1538  		err = -ENODEV;
1539  		goto out_free_vtimer_irq;
1540  	}
1541  
1542  	kvm_timer_handle_errata();
1543  	return 0;
1544  
1545  out_free_ptimer_irq:
1546  	if (info->physical_irq > 0)
1547  		free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
1548  out_free_vtimer_irq:
1549  	free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
1550  	return err;
1551  }
1552  
kvm_timer_vcpu_terminate(struct kvm_vcpu * vcpu)1553  void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
1554  {
1555  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1556  
1557  	soft_timer_cancel(&timer->bg_timer);
1558  }
1559  
timer_irqs_are_valid(struct kvm_vcpu * vcpu)1560  static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
1561  {
1562  	u32 ppis = 0;
1563  	bool valid;
1564  
1565  	mutex_lock(&vcpu->kvm->arch.config_lock);
1566  
1567  	for (int i = 0; i < nr_timers(vcpu); i++) {
1568  		struct arch_timer_context *ctx;
1569  		int irq;
1570  
1571  		ctx = vcpu_get_timer(vcpu, i);
1572  		irq = timer_irq(ctx);
1573  		if (kvm_vgic_set_owner(vcpu, irq, ctx))
1574  			break;
1575  
1576  		/*
1577  		 * We know by construction that we only have PPIs, so
1578  		 * all values are less than 32.
1579  		 */
1580  		ppis |= BIT(irq);
1581  	}
1582  
1583  	valid = hweight32(ppis) == nr_timers(vcpu);
1584  
1585  	if (valid)
1586  		set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
1587  
1588  	mutex_unlock(&vcpu->kvm->arch.config_lock);
1589  
1590  	return valid;
1591  }
1592  
kvm_arch_timer_get_input_level(int vintid)1593  static bool kvm_arch_timer_get_input_level(int vintid)
1594  {
1595  	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
1596  
1597  	if (WARN(!vcpu, "No vcpu context!\n"))
1598  		return false;
1599  
1600  	for (int i = 0; i < nr_timers(vcpu); i++) {
1601  		struct arch_timer_context *ctx;
1602  
1603  		ctx = vcpu_get_timer(vcpu, i);
1604  		if (timer_irq(ctx) == vintid)
1605  			return kvm_timer_should_fire(ctx);
1606  	}
1607  
1608  	/* A timer IRQ has fired, but no matching timer was found? */
1609  	WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
1610  
1611  	return false;
1612  }
1613  
kvm_timer_enable(struct kvm_vcpu * vcpu)1614  int kvm_timer_enable(struct kvm_vcpu *vcpu)
1615  {
1616  	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1617  	struct timer_map map;
1618  	int ret;
1619  
1620  	if (timer->enabled)
1621  		return 0;
1622  
1623  	/* Without a VGIC we do not map virtual IRQs to physical IRQs */
1624  	if (!irqchip_in_kernel(vcpu->kvm))
1625  		goto no_vgic;
1626  
1627  	/*
1628  	 * At this stage, we have the guarantee that the vgic is both
1629  	 * available and initialized.
1630  	 */
1631  	if (!timer_irqs_are_valid(vcpu)) {
1632  		kvm_debug("incorrectly configured timer irqs\n");
1633  		return -EINVAL;
1634  	}
1635  
1636  	get_timer_map(vcpu, &map);
1637  
1638  	ret = kvm_vgic_map_phys_irq(vcpu,
1639  				    map.direct_vtimer->host_timer_irq,
1640  				    timer_irq(map.direct_vtimer),
1641  				    &arch_timer_irq_ops);
1642  	if (ret)
1643  		return ret;
1644  
1645  	if (map.direct_ptimer) {
1646  		ret = kvm_vgic_map_phys_irq(vcpu,
1647  					    map.direct_ptimer->host_timer_irq,
1648  					    timer_irq(map.direct_ptimer),
1649  					    &arch_timer_irq_ops);
1650  	}
1651  
1652  	if (ret)
1653  		return ret;
1654  
1655  no_vgic:
1656  	timer->enabled = 1;
1657  	return 0;
1658  }
1659  
1660  /* If we have CNTPOFF, permanently set ECV to enable it */
kvm_timer_init_vhe(void)1661  void kvm_timer_init_vhe(void)
1662  {
1663  	if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
1664  		sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
1665  }
1666  
kvm_arm_timer_set_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1667  int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1668  {
1669  	int __user *uaddr = (int __user *)(long)attr->addr;
1670  	int irq, idx, ret = 0;
1671  
1672  	if (!irqchip_in_kernel(vcpu->kvm))
1673  		return -EINVAL;
1674  
1675  	if (get_user(irq, uaddr))
1676  		return -EFAULT;
1677  
1678  	if (!(irq_is_ppi(irq)))
1679  		return -EINVAL;
1680  
1681  	mutex_lock(&vcpu->kvm->arch.config_lock);
1682  
1683  	if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
1684  		     &vcpu->kvm->arch.flags)) {
1685  		ret = -EBUSY;
1686  		goto out;
1687  	}
1688  
1689  	switch (attr->attr) {
1690  	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1691  		idx = TIMER_VTIMER;
1692  		break;
1693  	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1694  		idx = TIMER_PTIMER;
1695  		break;
1696  	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1697  		idx = TIMER_HVTIMER;
1698  		break;
1699  	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1700  		idx = TIMER_HPTIMER;
1701  		break;
1702  	default:
1703  		ret = -ENXIO;
1704  		goto out;
1705  	}
1706  
1707  	/*
1708  	 * We cannot validate the IRQ unicity before we run, so take it at
1709  	 * face value. The verdict will be given on first vcpu run, for each
1710  	 * vcpu. Yes this is late. Blame it on the stupid API.
1711  	 */
1712  	vcpu->kvm->arch.timer_data.ppi[idx] = irq;
1713  
1714  out:
1715  	mutex_unlock(&vcpu->kvm->arch.config_lock);
1716  	return ret;
1717  }
1718  
kvm_arm_timer_get_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1719  int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1720  {
1721  	int __user *uaddr = (int __user *)(long)attr->addr;
1722  	struct arch_timer_context *timer;
1723  	int irq;
1724  
1725  	switch (attr->attr) {
1726  	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1727  		timer = vcpu_vtimer(vcpu);
1728  		break;
1729  	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1730  		timer = vcpu_ptimer(vcpu);
1731  		break;
1732  	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1733  		timer = vcpu_hvtimer(vcpu);
1734  		break;
1735  	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1736  		timer = vcpu_hptimer(vcpu);
1737  		break;
1738  	default:
1739  		return -ENXIO;
1740  	}
1741  
1742  	irq = timer_irq(timer);
1743  	return put_user(irq, uaddr);
1744  }
1745  
kvm_arm_timer_has_attr(struct kvm_vcpu * vcpu,struct kvm_device_attr * attr)1746  int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1747  {
1748  	switch (attr->attr) {
1749  	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1750  	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1751  	case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1752  	case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1753  		return 0;
1754  	}
1755  
1756  	return -ENXIO;
1757  }
1758  
kvm_vm_ioctl_set_counter_offset(struct kvm * kvm,struct kvm_arm_counter_offset * offset)1759  int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1760  				    struct kvm_arm_counter_offset *offset)
1761  {
1762  	int ret = 0;
1763  
1764  	if (offset->reserved)
1765  		return -EINVAL;
1766  
1767  	mutex_lock(&kvm->lock);
1768  
1769  	if (lock_all_vcpus(kvm)) {
1770  		set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
1771  
1772  		/*
1773  		 * If userspace decides to set the offset using this
1774  		 * API rather than merely restoring the counter
1775  		 * values, the offset applies to both the virtual and
1776  		 * physical views.
1777  		 */
1778  		kvm->arch.timer_data.voffset = offset->counter_offset;
1779  		kvm->arch.timer_data.poffset = offset->counter_offset;
1780  
1781  		unlock_all_vcpus(kvm);
1782  	} else {
1783  		ret = -EBUSY;
1784  	}
1785  
1786  	mutex_unlock(&kvm->lock);
1787  
1788  	return ret;
1789  }
1790