xref: /linux/arch/riscv/kvm/vcpu.c (revision c7de79e662b8681f54196c107281f1e63c26a3db)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2019 Western Digital Corporation or its affiliates.
4  *
5  * Authors:
6  *     Anup Patel <anup.patel@wdc.com>
7  */
8 
9 #include <linux/bitops.h>
10 #include <linux/entry-kvm.h>
11 #include <linux/errno.h>
12 #include <linux/err.h>
13 #include <linux/kdebug.h>
14 #include <linux/module.h>
15 #include <linux/percpu.h>
16 #include <linux/vmalloc.h>
17 #include <linux/sched/signal.h>
18 #include <linux/fs.h>
19 #include <linux/kvm_host.h>
20 #include <asm/cacheflush.h>
21 #include <asm/kvm_nacl.h>
22 #include <asm/kvm_vcpu_vector.h>
23 
24 #define CREATE_TRACE_POINTS
25 #include "trace.h"
26 
27 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
28 	KVM_GENERIC_VCPU_STATS(),
29 	STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
30 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
31 	STATS_DESC_COUNTER(VCPU, wrs_exit_stat),
32 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
33 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
34 	STATS_DESC_COUNTER(VCPU, csr_exit_user),
35 	STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
36 	STATS_DESC_COUNTER(VCPU, signal_exits),
37 	STATS_DESC_COUNTER(VCPU, exits),
38 	STATS_DESC_COUNTER(VCPU, instr_illegal_exits),
39 	STATS_DESC_COUNTER(VCPU, load_misaligned_exits),
40 	STATS_DESC_COUNTER(VCPU, store_misaligned_exits),
41 	STATS_DESC_COUNTER(VCPU, load_access_exits),
42 	STATS_DESC_COUNTER(VCPU, store_access_exits),
43 };
44 
45 const struct kvm_stats_header kvm_vcpu_stats_header = {
46 	.name_size = KVM_STATS_NAME_SIZE,
47 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
48 	.id_offset = sizeof(struct kvm_stats_header),
49 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
50 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
51 		       sizeof(kvm_vcpu_stats_desc),
52 };
53 
54 static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu,
55 					 bool kvm_sbi_reset)
56 {
57 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
58 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
59 	void *vector_datap = cntx->vector.datap;
60 
61 	memset(cntx, 0, sizeof(*cntx));
62 	memset(csr, 0, sizeof(*csr));
63 	memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr));
64 
65 	/* Restore datap as it's not a part of the guest context. */
66 	cntx->vector.datap = vector_datap;
67 
68 	if (kvm_sbi_reset)
69 		kvm_riscv_vcpu_sbi_load_reset_state(vcpu);
70 
71 	/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
72 	cntx->sstatus = SR_SPP | SR_SPIE;
73 
74 	cntx->hstatus |= HSTATUS_VTW;
75 	cntx->hstatus |= HSTATUS_SPVP;
76 	cntx->hstatus |= HSTATUS_SPV;
77 }
78 
79 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset)
80 {
81 	bool loaded;
82 
83 	/**
84 	 * The preemption should be disabled here because it races with
85 	 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
86 	 * also calls vcpu_load/put.
87 	 */
88 	get_cpu();
89 	loaded = (vcpu->cpu != -1);
90 	if (loaded)
91 		kvm_arch_vcpu_put(vcpu);
92 
93 	vcpu->arch.last_exit_cpu = -1;
94 
95 	kvm_riscv_vcpu_context_reset(vcpu, kvm_sbi_reset);
96 
97 	kvm_riscv_vcpu_fp_reset(vcpu);
98 
99 	kvm_riscv_vcpu_vector_reset(vcpu);
100 
101 	kvm_riscv_vcpu_timer_reset(vcpu);
102 
103 	kvm_riscv_vcpu_aia_reset(vcpu);
104 
105 	bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
106 	bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
107 
108 	kvm_riscv_vcpu_pmu_reset(vcpu);
109 
110 	vcpu->arch.hfence_head = 0;
111 	vcpu->arch.hfence_tail = 0;
112 	memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
113 
114 	kvm_riscv_vcpu_sbi_sta_reset(vcpu);
115 
116 	/* Reset the guest CSRs for hotplug usecase */
117 	if (loaded)
118 		kvm_arch_vcpu_load(vcpu, smp_processor_id());
119 	put_cpu();
120 }
121 
122 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
123 {
124 	return 0;
125 }
126 
127 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
128 {
129 	int rc;
130 
131 	spin_lock_init(&vcpu->arch.mp_state_lock);
132 
133 	/* Mark this VCPU never ran */
134 	vcpu->arch.ran_atleast_once = false;
135 	vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
136 	bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
137 
138 	/* Setup ISA features available to VCPU */
139 	kvm_riscv_vcpu_setup_isa(vcpu);
140 
141 	/* Setup vendor, arch, and implementation details */
142 	vcpu->arch.mvendorid = sbi_get_mvendorid();
143 	vcpu->arch.marchid = sbi_get_marchid();
144 	vcpu->arch.mimpid = sbi_get_mimpid();
145 
146 	/* Setup VCPU hfence queue */
147 	spin_lock_init(&vcpu->arch.hfence_lock);
148 
149 	spin_lock_init(&vcpu->arch.reset_state.lock);
150 
151 	if (kvm_riscv_vcpu_alloc_vector_context(vcpu))
152 		return -ENOMEM;
153 
154 	/* Setup VCPU timer */
155 	kvm_riscv_vcpu_timer_init(vcpu);
156 
157 	/* setup performance monitoring */
158 	kvm_riscv_vcpu_pmu_init(vcpu);
159 
160 	/* Setup VCPU AIA */
161 	rc = kvm_riscv_vcpu_aia_init(vcpu);
162 	if (rc)
163 		return rc;
164 
165 	/*
166 	 * Setup SBI extensions
167 	 * NOTE: This must be the last thing to be initialized.
168 	 */
169 	kvm_riscv_vcpu_sbi_init(vcpu);
170 
171 	/* Reset VCPU */
172 	kvm_riscv_reset_vcpu(vcpu, false);
173 
174 	return 0;
175 }
176 
177 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
178 {
179 	/**
180 	 * vcpu with id 0 is the designated boot cpu.
181 	 * Keep all vcpus with non-zero id in power-off state so that
182 	 * they can be brought up using SBI HSM extension.
183 	 */
184 	if (vcpu->vcpu_idx != 0)
185 		kvm_riscv_vcpu_power_off(vcpu);
186 }
187 
188 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
189 {
190 	/* Cleanup VCPU AIA context */
191 	kvm_riscv_vcpu_aia_deinit(vcpu);
192 
193 	/* Cleanup VCPU timer */
194 	kvm_riscv_vcpu_timer_deinit(vcpu);
195 
196 	kvm_riscv_vcpu_pmu_deinit(vcpu);
197 
198 	/* Free unused pages pre-allocated for G-stage page table mappings */
199 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
200 
201 	/* Free vector context space for host and guest kernel */
202 	kvm_riscv_vcpu_free_vector_context(vcpu);
203 }
204 
205 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
206 {
207 	return kvm_riscv_vcpu_timer_pending(vcpu);
208 }
209 
210 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
211 {
212 	return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
213 		!kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
214 }
215 
216 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
217 {
218 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
219 }
220 
221 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
222 {
223 	return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
224 }
225 
226 #ifdef CONFIG_GUEST_PERF_EVENTS
227 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
228 {
229 	return vcpu->arch.guest_context.sepc;
230 }
231 #endif
232 
233 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
234 {
235 	return VM_FAULT_SIGBUS;
236 }
237 
238 long kvm_arch_vcpu_async_ioctl(struct file *filp,
239 			       unsigned int ioctl, unsigned long arg)
240 {
241 	struct kvm_vcpu *vcpu = filp->private_data;
242 	void __user *argp = (void __user *)arg;
243 
244 	if (ioctl == KVM_INTERRUPT) {
245 		struct kvm_interrupt irq;
246 
247 		if (copy_from_user(&irq, argp, sizeof(irq)))
248 			return -EFAULT;
249 
250 		if (irq.irq == KVM_INTERRUPT_SET)
251 			return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
252 		else
253 			return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
254 	}
255 
256 	return -ENOIOCTLCMD;
257 }
258 
259 long kvm_arch_vcpu_ioctl(struct file *filp,
260 			 unsigned int ioctl, unsigned long arg)
261 {
262 	struct kvm_vcpu *vcpu = filp->private_data;
263 	void __user *argp = (void __user *)arg;
264 	long r = -EINVAL;
265 
266 	switch (ioctl) {
267 	case KVM_SET_ONE_REG:
268 	case KVM_GET_ONE_REG: {
269 		struct kvm_one_reg reg;
270 
271 		r = -EFAULT;
272 		if (copy_from_user(&reg, argp, sizeof(reg)))
273 			break;
274 
275 		if (ioctl == KVM_SET_ONE_REG)
276 			r = kvm_riscv_vcpu_set_reg(vcpu, &reg);
277 		else
278 			r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
279 		break;
280 	}
281 	case KVM_GET_REG_LIST: {
282 		struct kvm_reg_list __user *user_list = argp;
283 		struct kvm_reg_list reg_list;
284 		unsigned int n;
285 
286 		r = -EFAULT;
287 		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
288 			break;
289 		n = reg_list.n;
290 		reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
291 		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
292 			break;
293 		r = -E2BIG;
294 		if (n < reg_list.n)
295 			break;
296 		r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
297 		break;
298 	}
299 	default:
300 		break;
301 	}
302 
303 	return r;
304 }
305 
306 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
307 				  struct kvm_sregs *sregs)
308 {
309 	return -EINVAL;
310 }
311 
312 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
313 				  struct kvm_sregs *sregs)
314 {
315 	return -EINVAL;
316 }
317 
318 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
319 {
320 	return -EINVAL;
321 }
322 
323 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
324 {
325 	return -EINVAL;
326 }
327 
328 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
329 				  struct kvm_translation *tr)
330 {
331 	return -EINVAL;
332 }
333 
334 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
335 {
336 	return -EINVAL;
337 }
338 
339 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
340 {
341 	return -EINVAL;
342 }
343 
344 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
345 {
346 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
347 	unsigned long mask, val;
348 
349 	if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) {
350 		mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0);
351 		val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask;
352 
353 		csr->hvip &= ~mask;
354 		csr->hvip |= val;
355 	}
356 
357 	/* Flush AIA high interrupts */
358 	kvm_riscv_vcpu_aia_flush_interrupts(vcpu);
359 }
360 
361 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
362 {
363 	unsigned long hvip;
364 	struct kvm_vcpu_arch *v = &vcpu->arch;
365 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
366 
367 	/* Read current HVIP and VSIE CSRs */
368 	csr->vsie = ncsr_read(CSR_VSIE);
369 
370 	/* Sync-up HVIP.VSSIP bit changes does by Guest */
371 	hvip = ncsr_read(CSR_HVIP);
372 	if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
373 		if (hvip & (1UL << IRQ_VS_SOFT)) {
374 			if (!test_and_set_bit(IRQ_VS_SOFT,
375 					      v->irqs_pending_mask))
376 				set_bit(IRQ_VS_SOFT, v->irqs_pending);
377 		} else {
378 			if (!test_and_set_bit(IRQ_VS_SOFT,
379 					      v->irqs_pending_mask))
380 				clear_bit(IRQ_VS_SOFT, v->irqs_pending);
381 		}
382 	}
383 
384 	/* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
385 	if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
386 		if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
387 		    !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
388 			clear_bit(IRQ_PMU_OVF, v->irqs_pending);
389 	}
390 
391 	/* Sync-up AIA high interrupts */
392 	kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
393 
394 	/* Sync-up timer CSRs */
395 	kvm_riscv_vcpu_timer_sync(vcpu);
396 }
397 
398 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
399 {
400 	/*
401 	 * We only allow VS-mode software, timer, and external
402 	 * interrupts when irq is one of the local interrupts
403 	 * defined by RISC-V privilege specification.
404 	 */
405 	if (irq < IRQ_LOCAL_MAX &&
406 	    irq != IRQ_VS_SOFT &&
407 	    irq != IRQ_VS_TIMER &&
408 	    irq != IRQ_VS_EXT &&
409 	    irq != IRQ_PMU_OVF)
410 		return -EINVAL;
411 
412 	set_bit(irq, vcpu->arch.irqs_pending);
413 	smp_mb__before_atomic();
414 	set_bit(irq, vcpu->arch.irqs_pending_mask);
415 
416 	kvm_vcpu_kick(vcpu);
417 
418 	return 0;
419 }
420 
421 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
422 {
423 	/*
424 	 * We only allow VS-mode software, timer, counter overflow and external
425 	 * interrupts when irq is one of the local interrupts
426 	 * defined by RISC-V privilege specification.
427 	 */
428 	if (irq < IRQ_LOCAL_MAX &&
429 	    irq != IRQ_VS_SOFT &&
430 	    irq != IRQ_VS_TIMER &&
431 	    irq != IRQ_VS_EXT &&
432 	    irq != IRQ_PMU_OVF)
433 		return -EINVAL;
434 
435 	clear_bit(irq, vcpu->arch.irqs_pending);
436 	smp_mb__before_atomic();
437 	set_bit(irq, vcpu->arch.irqs_pending_mask);
438 
439 	return 0;
440 }
441 
442 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
443 {
444 	unsigned long ie;
445 
446 	ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
447 		<< VSIP_TO_HVIP_SHIFT) & (unsigned long)mask;
448 	ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK &
449 		(unsigned long)mask;
450 	if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie)
451 		return true;
452 
453 	/* Check AIA high interrupts */
454 	return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
455 }
456 
457 void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
458 {
459 	WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
460 	kvm_make_request(KVM_REQ_SLEEP, vcpu);
461 	kvm_vcpu_kick(vcpu);
462 }
463 
464 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
465 {
466 	spin_lock(&vcpu->arch.mp_state_lock);
467 	__kvm_riscv_vcpu_power_off(vcpu);
468 	spin_unlock(&vcpu->arch.mp_state_lock);
469 }
470 
471 void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
472 {
473 	WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
474 	kvm_vcpu_wake_up(vcpu);
475 }
476 
477 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
478 {
479 	spin_lock(&vcpu->arch.mp_state_lock);
480 	__kvm_riscv_vcpu_power_on(vcpu);
481 	spin_unlock(&vcpu->arch.mp_state_lock);
482 }
483 
484 bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
485 {
486 	return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
487 }
488 
489 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
490 				    struct kvm_mp_state *mp_state)
491 {
492 	*mp_state = READ_ONCE(vcpu->arch.mp_state);
493 
494 	return 0;
495 }
496 
497 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
498 				    struct kvm_mp_state *mp_state)
499 {
500 	int ret = 0;
501 
502 	spin_lock(&vcpu->arch.mp_state_lock);
503 
504 	switch (mp_state->mp_state) {
505 	case KVM_MP_STATE_RUNNABLE:
506 		WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
507 		break;
508 	case KVM_MP_STATE_STOPPED:
509 		__kvm_riscv_vcpu_power_off(vcpu);
510 		break;
511 	case KVM_MP_STATE_INIT_RECEIVED:
512 		if (vcpu->kvm->arch.mp_state_reset)
513 			kvm_riscv_reset_vcpu(vcpu, false);
514 		else
515 			ret = -EINVAL;
516 		break;
517 	default:
518 		ret = -EINVAL;
519 	}
520 
521 	spin_unlock(&vcpu->arch.mp_state_lock);
522 
523 	return ret;
524 }
525 
526 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
527 					struct kvm_guest_debug *dbg)
528 {
529 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
530 		vcpu->guest_debug = dbg->control;
531 		vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
532 	} else {
533 		vcpu->guest_debug = 0;
534 		vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
535 	}
536 
537 	return 0;
538 }
539 
540 static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
541 {
542 	const unsigned long *isa = vcpu->arch.isa;
543 	struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
544 
545 	if (riscv_isa_extension_available(isa, SVPBMT))
546 		cfg->henvcfg |= ENVCFG_PBMTE;
547 
548 	if (riscv_isa_extension_available(isa, SSTC))
549 		cfg->henvcfg |= ENVCFG_STCE;
550 
551 	if (riscv_isa_extension_available(isa, ZICBOM))
552 		cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
553 
554 	if (riscv_isa_extension_available(isa, ZICBOZ))
555 		cfg->henvcfg |= ENVCFG_CBZE;
556 
557 	if (riscv_isa_extension_available(isa, SVADU) &&
558 	    !riscv_isa_extension_available(isa, SVADE))
559 		cfg->henvcfg |= ENVCFG_ADUE;
560 
561 	if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
562 		cfg->hstateen0 |= SMSTATEEN0_HSENVCFG;
563 		if (riscv_isa_extension_available(isa, SSAIA))
564 			cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC |
565 					  SMSTATEEN0_AIA |
566 					  SMSTATEEN0_AIA_ISEL;
567 		if (riscv_isa_extension_available(isa, SMSTATEEN))
568 			cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
569 	}
570 
571 	cfg->hedeleg = KVM_HEDELEG_DEFAULT;
572 	if (vcpu->guest_debug)
573 		cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
574 }
575 
576 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
577 {
578 	void *nsh;
579 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
580 	struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
581 
582 	if (kvm_riscv_nacl_sync_csr_available()) {
583 		nsh = nacl_shmem();
584 		nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus);
585 		nacl_csr_write(nsh, CSR_VSIE, csr->vsie);
586 		nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec);
587 		nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch);
588 		nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc);
589 		nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause);
590 		nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval);
591 		nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg);
592 		nacl_csr_write(nsh, CSR_HVIP, csr->hvip);
593 		nacl_csr_write(nsh, CSR_VSATP, csr->vsatp);
594 		nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg);
595 		if (IS_ENABLED(CONFIG_32BIT))
596 			nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32);
597 		if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
598 			nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0);
599 			if (IS_ENABLED(CONFIG_32BIT))
600 				nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
601 		}
602 	} else {
603 		csr_write(CSR_VSSTATUS, csr->vsstatus);
604 		csr_write(CSR_VSIE, csr->vsie);
605 		csr_write(CSR_VSTVEC, csr->vstvec);
606 		csr_write(CSR_VSSCRATCH, csr->vsscratch);
607 		csr_write(CSR_VSEPC, csr->vsepc);
608 		csr_write(CSR_VSCAUSE, csr->vscause);
609 		csr_write(CSR_VSTVAL, csr->vstval);
610 		csr_write(CSR_HEDELEG, cfg->hedeleg);
611 		csr_write(CSR_HVIP, csr->hvip);
612 		csr_write(CSR_VSATP, csr->vsatp);
613 		csr_write(CSR_HENVCFG, cfg->henvcfg);
614 		if (IS_ENABLED(CONFIG_32BIT))
615 			csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
616 		if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
617 			csr_write(CSR_HSTATEEN0, cfg->hstateen0);
618 			if (IS_ENABLED(CONFIG_32BIT))
619 				csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
620 		}
621 	}
622 
623 	kvm_riscv_gstage_update_hgatp(vcpu);
624 
625 	kvm_riscv_vcpu_timer_restore(vcpu);
626 
627 	kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
628 	kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
629 					vcpu->arch.isa);
630 	kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context);
631 	kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
632 					    vcpu->arch.isa);
633 
634 	kvm_riscv_vcpu_aia_load(vcpu, cpu);
635 
636 	kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
637 
638 	vcpu->cpu = cpu;
639 }
640 
641 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
642 {
643 	void *nsh;
644 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
645 
646 	vcpu->cpu = -1;
647 
648 	kvm_riscv_vcpu_aia_put(vcpu);
649 
650 	kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
651 				     vcpu->arch.isa);
652 	kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
653 
654 	kvm_riscv_vcpu_timer_save(vcpu);
655 	kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context,
656 					 vcpu->arch.isa);
657 	kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context);
658 
659 	if (kvm_riscv_nacl_available()) {
660 		nsh = nacl_shmem();
661 		csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS);
662 		csr->vsie = nacl_csr_read(nsh, CSR_VSIE);
663 		csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC);
664 		csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH);
665 		csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC);
666 		csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE);
667 		csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL);
668 		csr->hvip = nacl_csr_read(nsh, CSR_HVIP);
669 		csr->vsatp = nacl_csr_read(nsh, CSR_VSATP);
670 	} else {
671 		csr->vsstatus = csr_read(CSR_VSSTATUS);
672 		csr->vsie = csr_read(CSR_VSIE);
673 		csr->vstvec = csr_read(CSR_VSTVEC);
674 		csr->vsscratch = csr_read(CSR_VSSCRATCH);
675 		csr->vsepc = csr_read(CSR_VSEPC);
676 		csr->vscause = csr_read(CSR_VSCAUSE);
677 		csr->vstval = csr_read(CSR_VSTVAL);
678 		csr->hvip = csr_read(CSR_HVIP);
679 		csr->vsatp = csr_read(CSR_VSATP);
680 	}
681 }
682 
683 static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
684 {
685 	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
686 
687 	if (kvm_request_pending(vcpu)) {
688 		if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
689 			kvm_vcpu_srcu_read_unlock(vcpu);
690 			rcuwait_wait_event(wait,
691 				(!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
692 				TASK_INTERRUPTIBLE);
693 			kvm_vcpu_srcu_read_lock(vcpu);
694 
695 			if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
696 				/*
697 				 * Awaken to handle a signal, request to
698 				 * sleep again later.
699 				 */
700 				kvm_make_request(KVM_REQ_SLEEP, vcpu);
701 			}
702 		}
703 
704 		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
705 			kvm_riscv_reset_vcpu(vcpu, true);
706 
707 		if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
708 			kvm_riscv_gstage_update_hgatp(vcpu);
709 
710 		if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
711 			kvm_riscv_fence_i_process(vcpu);
712 
713 		/*
714 		 * The generic KVM_REQ_TLB_FLUSH is same as
715 		 * KVM_REQ_HFENCE_GVMA_VMID_ALL
716 		 */
717 		if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
718 			kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
719 
720 		if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
721 			kvm_riscv_hfence_vvma_all_process(vcpu);
722 
723 		if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
724 			kvm_riscv_hfence_process(vcpu);
725 
726 		if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
727 			kvm_riscv_vcpu_record_steal_time(vcpu);
728 	}
729 }
730 
731 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
732 {
733 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
734 
735 	ncsr_write(CSR_HVIP, csr->hvip);
736 	kvm_riscv_vcpu_aia_update_hvip(vcpu);
737 }
738 
739 static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *vcpu)
740 {
741 	struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
742 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
743 	struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
744 
745 	vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren);
746 	vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg);
747 	if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
748 	    (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
749 		vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0,
750 						     smcsr->sstateen0);
751 }
752 
753 static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu)
754 {
755 	struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
756 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
757 	struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
758 
759 	csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren);
760 	csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg);
761 	if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
762 	    (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
763 		smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0,
764 					    vcpu->arch.host_sstateen0);
765 }
766 
767 /*
768  * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
769  * the vCPU is running.
770  *
771  * This must be noinstr as instrumentation may make use of RCU, and this is not
772  * safe during the EQS.
773  */
774 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu,
775 					      struct kvm_cpu_trap *trap)
776 {
777 	void *nsh;
778 	struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context;
779 	struct kvm_cpu_context *hcntx = &vcpu->arch.host_context;
780 
781 	/*
782 	 * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and
783 	 * HTINST) here because we do local_irq_enable() after this
784 	 * function in kvm_arch_vcpu_ioctl_run() which can result in
785 	 * an interrupt immediately after local_irq_enable() and can
786 	 * potentially change trap CSRs.
787 	 */
788 
789 	kvm_riscv_vcpu_swap_in_guest_state(vcpu);
790 	guest_state_enter_irqoff();
791 
792 	if (kvm_riscv_nacl_sync_sret_available()) {
793 		nsh = nacl_shmem();
794 
795 		if (kvm_riscv_nacl_autoswap_csr_available()) {
796 			hcntx->hstatus =
797 				nacl_csr_read(nsh, CSR_HSTATUS);
798 			nacl_scratch_write_long(nsh,
799 						SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
800 						SBI_NACL_SHMEM_AUTOSWAP_HSTATUS,
801 						gcntx->hstatus);
802 			nacl_scratch_write_long(nsh,
803 						SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
804 						SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS);
805 		} else if (kvm_riscv_nacl_sync_csr_available()) {
806 			hcntx->hstatus = nacl_csr_swap(nsh,
807 						       CSR_HSTATUS, gcntx->hstatus);
808 		} else {
809 			hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
810 		}
811 
812 		nacl_scratch_write_longs(nsh,
813 					 SBI_NACL_SHMEM_SRET_OFFSET +
814 					 SBI_NACL_SHMEM_SRET_X(1),
815 					 &gcntx->ra,
816 					 SBI_NACL_SHMEM_SRET_X_LAST);
817 
818 		__kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL,
819 					   SBI_EXT_NACL_SYNC_SRET);
820 
821 		if (kvm_riscv_nacl_autoswap_csr_available()) {
822 			nacl_scratch_write_long(nsh,
823 						SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
824 						0);
825 			gcntx->hstatus = nacl_scratch_read_long(nsh,
826 								SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
827 								SBI_NACL_SHMEM_AUTOSWAP_HSTATUS);
828 		} else {
829 			gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
830 		}
831 
832 		trap->htval = nacl_csr_read(nsh, CSR_HTVAL);
833 		trap->htinst = nacl_csr_read(nsh, CSR_HTINST);
834 	} else {
835 		hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
836 
837 		__kvm_riscv_switch_to(&vcpu->arch);
838 
839 		gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
840 
841 		trap->htval = csr_read(CSR_HTVAL);
842 		trap->htinst = csr_read(CSR_HTINST);
843 	}
844 
845 	trap->sepc = gcntx->sepc;
846 	trap->scause = csr_read(CSR_SCAUSE);
847 	trap->stval = csr_read(CSR_STVAL);
848 
849 	vcpu->arch.last_exit_cpu = vcpu->cpu;
850 	guest_state_exit_irqoff();
851 	kvm_riscv_vcpu_swap_in_host_state(vcpu);
852 }
853 
854 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
855 {
856 	int ret;
857 	struct kvm_cpu_trap trap;
858 	struct kvm_run *run = vcpu->run;
859 
860 	if (!vcpu->arch.ran_atleast_once)
861 		kvm_riscv_vcpu_setup_config(vcpu);
862 
863 	/* Mark this VCPU ran at least once */
864 	vcpu->arch.ran_atleast_once = true;
865 
866 	kvm_vcpu_srcu_read_lock(vcpu);
867 
868 	switch (run->exit_reason) {
869 	case KVM_EXIT_MMIO:
870 		/* Process MMIO value returned from user-space */
871 		ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
872 		break;
873 	case KVM_EXIT_RISCV_SBI:
874 		/* Process SBI value returned from user-space */
875 		ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
876 		break;
877 	case KVM_EXIT_RISCV_CSR:
878 		/* Process CSR value returned from user-space */
879 		ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
880 		break;
881 	default:
882 		ret = 0;
883 		break;
884 	}
885 	if (ret) {
886 		kvm_vcpu_srcu_read_unlock(vcpu);
887 		return ret;
888 	}
889 
890 	if (!vcpu->wants_to_run) {
891 		kvm_vcpu_srcu_read_unlock(vcpu);
892 		return -EINTR;
893 	}
894 
895 	vcpu_load(vcpu);
896 
897 	kvm_sigset_activate(vcpu);
898 
899 	ret = 1;
900 	run->exit_reason = KVM_EXIT_UNKNOWN;
901 	while (ret > 0) {
902 		/* Check conditions before entering the guest */
903 		ret = xfer_to_guest_mode_handle_work(vcpu);
904 		if (ret)
905 			continue;
906 		ret = 1;
907 
908 		kvm_riscv_gstage_vmid_update(vcpu);
909 
910 		kvm_riscv_check_vcpu_requests(vcpu);
911 
912 		preempt_disable();
913 
914 		/* Update AIA HW state before entering guest */
915 		ret = kvm_riscv_vcpu_aia_update(vcpu);
916 		if (ret <= 0) {
917 			preempt_enable();
918 			continue;
919 		}
920 
921 		local_irq_disable();
922 
923 		/*
924 		 * Ensure we set mode to IN_GUEST_MODE after we disable
925 		 * interrupts and before the final VCPU requests check.
926 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
927 		 * Documentation/virt/kvm/vcpu-requests.rst
928 		 */
929 		vcpu->mode = IN_GUEST_MODE;
930 
931 		kvm_vcpu_srcu_read_unlock(vcpu);
932 		smp_mb__after_srcu_read_unlock();
933 
934 		/*
935 		 * We might have got VCPU interrupts updated asynchronously
936 		 * so update it in HW.
937 		 */
938 		kvm_riscv_vcpu_flush_interrupts(vcpu);
939 
940 		/* Update HVIP CSR for current CPU */
941 		kvm_riscv_update_hvip(vcpu);
942 
943 		if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
944 		    kvm_request_pending(vcpu) ||
945 		    xfer_to_guest_mode_work_pending()) {
946 			vcpu->mode = OUTSIDE_GUEST_MODE;
947 			local_irq_enable();
948 			preempt_enable();
949 			kvm_vcpu_srcu_read_lock(vcpu);
950 			continue;
951 		}
952 
953 		/*
954 		 * Cleanup stale TLB enteries
955 		 *
956 		 * Note: This should be done after G-stage VMID has been
957 		 * updated using kvm_riscv_gstage_vmid_ver_changed()
958 		 */
959 		kvm_riscv_local_tlb_sanitize(vcpu);
960 
961 		trace_kvm_entry(vcpu);
962 
963 		guest_timing_enter_irqoff();
964 
965 		kvm_riscv_vcpu_enter_exit(vcpu, &trap);
966 
967 		vcpu->mode = OUTSIDE_GUEST_MODE;
968 		vcpu->stat.exits++;
969 
970 		/* Syncup interrupts state with HW */
971 		kvm_riscv_vcpu_sync_interrupts(vcpu);
972 
973 		/*
974 		 * We must ensure that any pending interrupts are taken before
975 		 * we exit guest timing so that timer ticks are accounted as
976 		 * guest time. Transiently unmask interrupts so that any
977 		 * pending interrupts are taken.
978 		 *
979 		 * There's no barrier which ensures that pending interrupts are
980 		 * recognised, so we just hope that the CPU takes any pending
981 		 * interrupts between the enable and disable.
982 		 */
983 		local_irq_enable();
984 		local_irq_disable();
985 
986 		guest_timing_exit_irqoff();
987 
988 		local_irq_enable();
989 
990 		trace_kvm_exit(&trap);
991 
992 		preempt_enable();
993 
994 		kvm_vcpu_srcu_read_lock(vcpu);
995 
996 		ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
997 	}
998 
999 	kvm_sigset_deactivate(vcpu);
1000 
1001 	vcpu_put(vcpu);
1002 
1003 	kvm_vcpu_srcu_read_unlock(vcpu);
1004 
1005 	return ret;
1006 }
1007