xref: /linux/arch/riscv/kvm/vcpu.c (revision 92481c7d14b8030418f00c4b4ec65556565d892d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2019 Western Digital Corporation or its affiliates.
4  *
5  * Authors:
6  *     Anup Patel <anup.patel@wdc.com>
7  */
8 
9 #include <linux/bitops.h>
10 #include <linux/errno.h>
11 #include <linux/err.h>
12 #include <linux/kdebug.h>
13 #include <linux/module.h>
14 #include <linux/percpu.h>
15 #include <linux/uaccess.h>
16 #include <linux/vmalloc.h>
17 #include <linux/sched/signal.h>
18 #include <linux/fs.h>
19 #include <linux/kvm_host.h>
20 #include <asm/csr.h>
21 #include <asm/hwcap.h>
22 
23 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
24 	KVM_GENERIC_VCPU_STATS(),
25 	STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
26 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
27 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
28 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
29 	STATS_DESC_COUNTER(VCPU, csr_exit_user),
30 	STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
31 	STATS_DESC_COUNTER(VCPU, exits)
32 };
33 
34 const struct kvm_stats_header kvm_vcpu_stats_header = {
35 	.name_size = KVM_STATS_NAME_SIZE,
36 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
37 	.id_offset = sizeof(struct kvm_stats_header),
38 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
39 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
40 		       sizeof(kvm_vcpu_stats_desc),
41 };
42 
43 #define KVM_RISCV_BASE_ISA_MASK		GENMASK(25, 0)
44 
45 /* Mapping between KVM ISA Extension ID & Host ISA extension ID */
46 static const unsigned long kvm_isa_ext_arr[] = {
47 	RISCV_ISA_EXT_a,
48 	RISCV_ISA_EXT_c,
49 	RISCV_ISA_EXT_d,
50 	RISCV_ISA_EXT_f,
51 	RISCV_ISA_EXT_h,
52 	RISCV_ISA_EXT_i,
53 	RISCV_ISA_EXT_m,
54 	RISCV_ISA_EXT_SVPBMT,
55 	RISCV_ISA_EXT_SSTC,
56 };
57 
58 static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
59 {
60 	unsigned long i;
61 
62 	for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
63 		if (kvm_isa_ext_arr[i] == base_ext)
64 			return i;
65 	}
66 
67 	return KVM_RISCV_ISA_EXT_MAX;
68 }
69 
70 static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
71 {
72 	switch (ext) {
73 	case KVM_RISCV_ISA_EXT_H:
74 		return false;
75 	default:
76 		break;
77 	}
78 
79 	return true;
80 }
81 
82 static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
83 {
84 	switch (ext) {
85 	case KVM_RISCV_ISA_EXT_A:
86 	case KVM_RISCV_ISA_EXT_C:
87 	case KVM_RISCV_ISA_EXT_I:
88 	case KVM_RISCV_ISA_EXT_M:
89 	case KVM_RISCV_ISA_EXT_SSTC:
90 		return false;
91 	default:
92 		break;
93 	}
94 
95 	return true;
96 }
97 
98 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
99 {
100 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
101 	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
102 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
103 	struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
104 	bool loaded;
105 
106 	/**
107 	 * The preemption should be disabled here because it races with
108 	 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
109 	 * also calls vcpu_load/put.
110 	 */
111 	get_cpu();
112 	loaded = (vcpu->cpu != -1);
113 	if (loaded)
114 		kvm_arch_vcpu_put(vcpu);
115 
116 	vcpu->arch.last_exit_cpu = -1;
117 
118 	memcpy(csr, reset_csr, sizeof(*csr));
119 
120 	memcpy(cntx, reset_cntx, sizeof(*cntx));
121 
122 	kvm_riscv_vcpu_fp_reset(vcpu);
123 
124 	kvm_riscv_vcpu_timer_reset(vcpu);
125 
126 	WRITE_ONCE(vcpu->arch.irqs_pending, 0);
127 	WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
128 
129 	vcpu->arch.hfence_head = 0;
130 	vcpu->arch.hfence_tail = 0;
131 	memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
132 
133 	/* Reset the guest CSRs for hotplug usecase */
134 	if (loaded)
135 		kvm_arch_vcpu_load(vcpu, smp_processor_id());
136 	put_cpu();
137 }
138 
139 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
140 {
141 	return 0;
142 }
143 
144 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
145 {
146 	struct kvm_cpu_context *cntx;
147 	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
148 	unsigned long host_isa, i;
149 
150 	/* Mark this VCPU never ran */
151 	vcpu->arch.ran_atleast_once = false;
152 	vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
153 	bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
154 
155 	/* Setup ISA features available to VCPU */
156 	for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
157 		host_isa = kvm_isa_ext_arr[i];
158 		if (__riscv_isa_extension_available(NULL, host_isa) &&
159 		    kvm_riscv_vcpu_isa_enable_allowed(i))
160 			set_bit(host_isa, vcpu->arch.isa);
161 	}
162 
163 	/* Setup VCPU hfence queue */
164 	spin_lock_init(&vcpu->arch.hfence_lock);
165 
166 	/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
167 	cntx = &vcpu->arch.guest_reset_context;
168 	cntx->sstatus = SR_SPP | SR_SPIE;
169 	cntx->hstatus = 0;
170 	cntx->hstatus |= HSTATUS_VTW;
171 	cntx->hstatus |= HSTATUS_SPVP;
172 	cntx->hstatus |= HSTATUS_SPV;
173 
174 	/* By default, make CY, TM, and IR counters accessible in VU mode */
175 	reset_csr->scounteren = 0x7;
176 
177 	/* Setup VCPU timer */
178 	kvm_riscv_vcpu_timer_init(vcpu);
179 
180 	/* Reset VCPU */
181 	kvm_riscv_reset_vcpu(vcpu);
182 
183 	return 0;
184 }
185 
186 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
187 {
188 	/**
189 	 * vcpu with id 0 is the designated boot cpu.
190 	 * Keep all vcpus with non-zero id in power-off state so that
191 	 * they can be brought up using SBI HSM extension.
192 	 */
193 	if (vcpu->vcpu_idx != 0)
194 		kvm_riscv_vcpu_power_off(vcpu);
195 }
196 
197 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
198 {
199 	/* Cleanup VCPU timer */
200 	kvm_riscv_vcpu_timer_deinit(vcpu);
201 
202 	/* Free unused pages pre-allocated for G-stage page table mappings */
203 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
204 }
205 
206 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
207 {
208 	return kvm_riscv_vcpu_timer_pending(vcpu);
209 }
210 
211 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
212 {
213 }
214 
215 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
216 {
217 }
218 
219 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
220 {
221 	return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
222 		!vcpu->arch.power_off && !vcpu->arch.pause);
223 }
224 
225 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
226 {
227 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
228 }
229 
230 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
231 {
232 	return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
233 }
234 
235 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
236 {
237 	return VM_FAULT_SIGBUS;
238 }
239 
240 static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
241 					 const struct kvm_one_reg *reg)
242 {
243 	unsigned long __user *uaddr =
244 			(unsigned long __user *)(unsigned long)reg->addr;
245 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
246 					    KVM_REG_SIZE_MASK |
247 					    KVM_REG_RISCV_CONFIG);
248 	unsigned long reg_val;
249 
250 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
251 		return -EINVAL;
252 
253 	switch (reg_num) {
254 	case KVM_REG_RISCV_CONFIG_REG(isa):
255 		reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
256 		break;
257 	default:
258 		return -EINVAL;
259 	}
260 
261 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
262 		return -EFAULT;
263 
264 	return 0;
265 }
266 
267 static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
268 					 const struct kvm_one_reg *reg)
269 {
270 	unsigned long __user *uaddr =
271 			(unsigned long __user *)(unsigned long)reg->addr;
272 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
273 					    KVM_REG_SIZE_MASK |
274 					    KVM_REG_RISCV_CONFIG);
275 	unsigned long i, isa_ext, reg_val;
276 
277 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
278 		return -EINVAL;
279 
280 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
281 		return -EFAULT;
282 
283 	/* This ONE REG interface is only defined for single letter extensions */
284 	if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
285 		return -EINVAL;
286 
287 	switch (reg_num) {
288 	case KVM_REG_RISCV_CONFIG_REG(isa):
289 		if (!vcpu->arch.ran_atleast_once) {
290 			/* Ignore the enable/disable request for certain extensions */
291 			for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
292 				isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
293 				if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
294 					reg_val &= ~BIT(i);
295 					continue;
296 				}
297 				if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
298 					if (reg_val & BIT(i))
299 						reg_val &= ~BIT(i);
300 				if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
301 					if (!(reg_val & BIT(i)))
302 						reg_val |= BIT(i);
303 			}
304 			reg_val &= riscv_isa_extension_base(NULL);
305 			/* Do not modify anything beyond single letter extensions */
306 			reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
307 				  (reg_val & KVM_RISCV_BASE_ISA_MASK);
308 			vcpu->arch.isa[0] = reg_val;
309 			kvm_riscv_vcpu_fp_reset(vcpu);
310 		} else {
311 			return -EOPNOTSUPP;
312 		}
313 		break;
314 	default:
315 		return -EINVAL;
316 	}
317 
318 	return 0;
319 }
320 
321 static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
322 				       const struct kvm_one_reg *reg)
323 {
324 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
325 	unsigned long __user *uaddr =
326 			(unsigned long __user *)(unsigned long)reg->addr;
327 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
328 					    KVM_REG_SIZE_MASK |
329 					    KVM_REG_RISCV_CORE);
330 	unsigned long reg_val;
331 
332 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
333 		return -EINVAL;
334 	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
335 		return -EINVAL;
336 
337 	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
338 		reg_val = cntx->sepc;
339 	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
340 		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
341 		reg_val = ((unsigned long *)cntx)[reg_num];
342 	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
343 		reg_val = (cntx->sstatus & SR_SPP) ?
344 				KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
345 	else
346 		return -EINVAL;
347 
348 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
349 		return -EFAULT;
350 
351 	return 0;
352 }
353 
354 static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
355 				       const struct kvm_one_reg *reg)
356 {
357 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
358 	unsigned long __user *uaddr =
359 			(unsigned long __user *)(unsigned long)reg->addr;
360 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
361 					    KVM_REG_SIZE_MASK |
362 					    KVM_REG_RISCV_CORE);
363 	unsigned long reg_val;
364 
365 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
366 		return -EINVAL;
367 	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
368 		return -EINVAL;
369 
370 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
371 		return -EFAULT;
372 
373 	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
374 		cntx->sepc = reg_val;
375 	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
376 		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
377 		((unsigned long *)cntx)[reg_num] = reg_val;
378 	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
379 		if (reg_val == KVM_RISCV_MODE_S)
380 			cntx->sstatus |= SR_SPP;
381 		else
382 			cntx->sstatus &= ~SR_SPP;
383 	} else
384 		return -EINVAL;
385 
386 	return 0;
387 }
388 
389 static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
390 				      const struct kvm_one_reg *reg)
391 {
392 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
393 	unsigned long __user *uaddr =
394 			(unsigned long __user *)(unsigned long)reg->addr;
395 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
396 					    KVM_REG_SIZE_MASK |
397 					    KVM_REG_RISCV_CSR);
398 	unsigned long reg_val;
399 
400 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
401 		return -EINVAL;
402 	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
403 		return -EINVAL;
404 
405 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
406 		kvm_riscv_vcpu_flush_interrupts(vcpu);
407 		reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
408 	} else
409 		reg_val = ((unsigned long *)csr)[reg_num];
410 
411 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
412 		return -EFAULT;
413 
414 	return 0;
415 }
416 
417 static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
418 				      const struct kvm_one_reg *reg)
419 {
420 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
421 	unsigned long __user *uaddr =
422 			(unsigned long __user *)(unsigned long)reg->addr;
423 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
424 					    KVM_REG_SIZE_MASK |
425 					    KVM_REG_RISCV_CSR);
426 	unsigned long reg_val;
427 
428 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
429 		return -EINVAL;
430 	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
431 		return -EINVAL;
432 
433 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
434 		return -EFAULT;
435 
436 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
437 		reg_val &= VSIP_VALID_MASK;
438 		reg_val <<= VSIP_TO_HVIP_SHIFT;
439 	}
440 
441 	((unsigned long *)csr)[reg_num] = reg_val;
442 
443 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
444 		WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
445 
446 	return 0;
447 }
448 
449 static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
450 					  const struct kvm_one_reg *reg)
451 {
452 	unsigned long __user *uaddr =
453 			(unsigned long __user *)(unsigned long)reg->addr;
454 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
455 					    KVM_REG_SIZE_MASK |
456 					    KVM_REG_RISCV_ISA_EXT);
457 	unsigned long reg_val = 0;
458 	unsigned long host_isa_ext;
459 
460 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
461 		return -EINVAL;
462 
463 	if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
464 	    reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
465 		return -EINVAL;
466 
467 	host_isa_ext = kvm_isa_ext_arr[reg_num];
468 	if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
469 		reg_val = 1; /* Mark the given extension as available */
470 
471 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
472 		return -EFAULT;
473 
474 	return 0;
475 }
476 
477 static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
478 					  const struct kvm_one_reg *reg)
479 {
480 	unsigned long __user *uaddr =
481 			(unsigned long __user *)(unsigned long)reg->addr;
482 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
483 					    KVM_REG_SIZE_MASK |
484 					    KVM_REG_RISCV_ISA_EXT);
485 	unsigned long reg_val;
486 	unsigned long host_isa_ext;
487 
488 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
489 		return -EINVAL;
490 
491 	if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
492 	    reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
493 		return -EINVAL;
494 
495 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
496 		return -EFAULT;
497 
498 	host_isa_ext = kvm_isa_ext_arr[reg_num];
499 	if (!__riscv_isa_extension_available(NULL, host_isa_ext))
500 		return	-EOPNOTSUPP;
501 
502 	if (!vcpu->arch.ran_atleast_once) {
503 		/*
504 		 * All multi-letter extension and a few single letter
505 		 * extension can be disabled
506 		 */
507 		if (reg_val == 1 &&
508 		    kvm_riscv_vcpu_isa_enable_allowed(reg_num))
509 			set_bit(host_isa_ext, vcpu->arch.isa);
510 		else if (!reg_val &&
511 			 kvm_riscv_vcpu_isa_disable_allowed(reg_num))
512 			clear_bit(host_isa_ext, vcpu->arch.isa);
513 		else
514 			return -EINVAL;
515 		kvm_riscv_vcpu_fp_reset(vcpu);
516 	} else {
517 		return -EOPNOTSUPP;
518 	}
519 
520 	return 0;
521 }
522 
523 static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
524 				  const struct kvm_one_reg *reg)
525 {
526 	if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
527 		return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
528 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
529 		return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
530 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
531 		return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
532 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
533 		return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
534 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
535 		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
536 						 KVM_REG_RISCV_FP_F);
537 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
538 		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
539 						 KVM_REG_RISCV_FP_D);
540 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
541 		return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
542 
543 	return -EINVAL;
544 }
545 
546 static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
547 				  const struct kvm_one_reg *reg)
548 {
549 	if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
550 		return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
551 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
552 		return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
553 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
554 		return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
555 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
556 		return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
557 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
558 		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
559 						 KVM_REG_RISCV_FP_F);
560 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
561 		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
562 						 KVM_REG_RISCV_FP_D);
563 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
564 		return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
565 
566 	return -EINVAL;
567 }
568 
569 long kvm_arch_vcpu_async_ioctl(struct file *filp,
570 			       unsigned int ioctl, unsigned long arg)
571 {
572 	struct kvm_vcpu *vcpu = filp->private_data;
573 	void __user *argp = (void __user *)arg;
574 
575 	if (ioctl == KVM_INTERRUPT) {
576 		struct kvm_interrupt irq;
577 
578 		if (copy_from_user(&irq, argp, sizeof(irq)))
579 			return -EFAULT;
580 
581 		if (irq.irq == KVM_INTERRUPT_SET)
582 			return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
583 		else
584 			return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
585 	}
586 
587 	return -ENOIOCTLCMD;
588 }
589 
590 long kvm_arch_vcpu_ioctl(struct file *filp,
591 			 unsigned int ioctl, unsigned long arg)
592 {
593 	struct kvm_vcpu *vcpu = filp->private_data;
594 	void __user *argp = (void __user *)arg;
595 	long r = -EINVAL;
596 
597 	switch (ioctl) {
598 	case KVM_SET_ONE_REG:
599 	case KVM_GET_ONE_REG: {
600 		struct kvm_one_reg reg;
601 
602 		r = -EFAULT;
603 		if (copy_from_user(&reg, argp, sizeof(reg)))
604 			break;
605 
606 		if (ioctl == KVM_SET_ONE_REG)
607 			r = kvm_riscv_vcpu_set_reg(vcpu, &reg);
608 		else
609 			r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
610 		break;
611 	}
612 	default:
613 		break;
614 	}
615 
616 	return r;
617 }
618 
619 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
620 				  struct kvm_sregs *sregs)
621 {
622 	return -EINVAL;
623 }
624 
625 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
626 				  struct kvm_sregs *sregs)
627 {
628 	return -EINVAL;
629 }
630 
631 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
632 {
633 	return -EINVAL;
634 }
635 
636 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
637 {
638 	return -EINVAL;
639 }
640 
641 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
642 				  struct kvm_translation *tr)
643 {
644 	return -EINVAL;
645 }
646 
647 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
648 {
649 	return -EINVAL;
650 }
651 
652 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
653 {
654 	return -EINVAL;
655 }
656 
657 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
658 {
659 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
660 	unsigned long mask, val;
661 
662 	if (READ_ONCE(vcpu->arch.irqs_pending_mask)) {
663 		mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0);
664 		val = READ_ONCE(vcpu->arch.irqs_pending) & mask;
665 
666 		csr->hvip &= ~mask;
667 		csr->hvip |= val;
668 	}
669 }
670 
671 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
672 {
673 	unsigned long hvip;
674 	struct kvm_vcpu_arch *v = &vcpu->arch;
675 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
676 
677 	/* Read current HVIP and VSIE CSRs */
678 	csr->vsie = csr_read(CSR_VSIE);
679 
680 	/* Sync-up HVIP.VSSIP bit changes does by Guest */
681 	hvip = csr_read(CSR_HVIP);
682 	if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
683 		if (hvip & (1UL << IRQ_VS_SOFT)) {
684 			if (!test_and_set_bit(IRQ_VS_SOFT,
685 					      &v->irqs_pending_mask))
686 				set_bit(IRQ_VS_SOFT, &v->irqs_pending);
687 		} else {
688 			if (!test_and_set_bit(IRQ_VS_SOFT,
689 					      &v->irqs_pending_mask))
690 				clear_bit(IRQ_VS_SOFT, &v->irqs_pending);
691 		}
692 	}
693 }
694 
695 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
696 {
697 	if (irq != IRQ_VS_SOFT &&
698 	    irq != IRQ_VS_TIMER &&
699 	    irq != IRQ_VS_EXT)
700 		return -EINVAL;
701 
702 	set_bit(irq, &vcpu->arch.irqs_pending);
703 	smp_mb__before_atomic();
704 	set_bit(irq, &vcpu->arch.irqs_pending_mask);
705 
706 	kvm_vcpu_kick(vcpu);
707 
708 	return 0;
709 }
710 
711 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
712 {
713 	if (irq != IRQ_VS_SOFT &&
714 	    irq != IRQ_VS_TIMER &&
715 	    irq != IRQ_VS_EXT)
716 		return -EINVAL;
717 
718 	clear_bit(irq, &vcpu->arch.irqs_pending);
719 	smp_mb__before_atomic();
720 	set_bit(irq, &vcpu->arch.irqs_pending_mask);
721 
722 	return 0;
723 }
724 
725 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask)
726 {
727 	unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
728 			    << VSIP_TO_HVIP_SHIFT) & mask;
729 
730 	return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false;
731 }
732 
733 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
734 {
735 	vcpu->arch.power_off = true;
736 	kvm_make_request(KVM_REQ_SLEEP, vcpu);
737 	kvm_vcpu_kick(vcpu);
738 }
739 
740 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
741 {
742 	vcpu->arch.power_off = false;
743 	kvm_vcpu_wake_up(vcpu);
744 }
745 
746 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
747 				    struct kvm_mp_state *mp_state)
748 {
749 	if (vcpu->arch.power_off)
750 		mp_state->mp_state = KVM_MP_STATE_STOPPED;
751 	else
752 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
753 
754 	return 0;
755 }
756 
757 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
758 				    struct kvm_mp_state *mp_state)
759 {
760 	int ret = 0;
761 
762 	switch (mp_state->mp_state) {
763 	case KVM_MP_STATE_RUNNABLE:
764 		vcpu->arch.power_off = false;
765 		break;
766 	case KVM_MP_STATE_STOPPED:
767 		kvm_riscv_vcpu_power_off(vcpu);
768 		break;
769 	default:
770 		ret = -EINVAL;
771 	}
772 
773 	return ret;
774 }
775 
776 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
777 					struct kvm_guest_debug *dbg)
778 {
779 	/* TODO; To be implemented later. */
780 	return -EINVAL;
781 }
782 
783 static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
784 {
785 	u64 henvcfg = 0;
786 
787 	if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SVPBMT))
788 		henvcfg |= ENVCFG_PBMTE;
789 
790 	if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SSTC))
791 		henvcfg |= ENVCFG_STCE;
792 	csr_write(CSR_HENVCFG, henvcfg);
793 #ifdef CONFIG_32BIT
794 	csr_write(CSR_HENVCFGH, henvcfg >> 32);
795 #endif
796 }
797 
798 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
799 {
800 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
801 
802 	csr_write(CSR_VSSTATUS, csr->vsstatus);
803 	csr_write(CSR_VSIE, csr->vsie);
804 	csr_write(CSR_VSTVEC, csr->vstvec);
805 	csr_write(CSR_VSSCRATCH, csr->vsscratch);
806 	csr_write(CSR_VSEPC, csr->vsepc);
807 	csr_write(CSR_VSCAUSE, csr->vscause);
808 	csr_write(CSR_VSTVAL, csr->vstval);
809 	csr_write(CSR_HVIP, csr->hvip);
810 	csr_write(CSR_VSATP, csr->vsatp);
811 
812 	kvm_riscv_vcpu_update_config(vcpu->arch.isa);
813 
814 	kvm_riscv_gstage_update_hgatp(vcpu);
815 
816 	kvm_riscv_vcpu_timer_restore(vcpu);
817 
818 	kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
819 	kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
820 					vcpu->arch.isa);
821 
822 	vcpu->cpu = cpu;
823 }
824 
825 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
826 {
827 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
828 
829 	vcpu->cpu = -1;
830 
831 	kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
832 				     vcpu->arch.isa);
833 	kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
834 
835 	kvm_riscv_vcpu_timer_save(vcpu);
836 
837 	csr->vsstatus = csr_read(CSR_VSSTATUS);
838 	csr->vsie = csr_read(CSR_VSIE);
839 	csr->vstvec = csr_read(CSR_VSTVEC);
840 	csr->vsscratch = csr_read(CSR_VSSCRATCH);
841 	csr->vsepc = csr_read(CSR_VSEPC);
842 	csr->vscause = csr_read(CSR_VSCAUSE);
843 	csr->vstval = csr_read(CSR_VSTVAL);
844 	csr->hvip = csr_read(CSR_HVIP);
845 	csr->vsatp = csr_read(CSR_VSATP);
846 }
847 
848 static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
849 {
850 	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
851 
852 	if (kvm_request_pending(vcpu)) {
853 		if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
854 			kvm_vcpu_srcu_read_unlock(vcpu);
855 			rcuwait_wait_event(wait,
856 				(!vcpu->arch.power_off) && (!vcpu->arch.pause),
857 				TASK_INTERRUPTIBLE);
858 			kvm_vcpu_srcu_read_lock(vcpu);
859 
860 			if (vcpu->arch.power_off || vcpu->arch.pause) {
861 				/*
862 				 * Awaken to handle a signal, request to
863 				 * sleep again later.
864 				 */
865 				kvm_make_request(KVM_REQ_SLEEP, vcpu);
866 			}
867 		}
868 
869 		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
870 			kvm_riscv_reset_vcpu(vcpu);
871 
872 		if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
873 			kvm_riscv_gstage_update_hgatp(vcpu);
874 
875 		if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
876 			kvm_riscv_fence_i_process(vcpu);
877 
878 		/*
879 		 * The generic KVM_REQ_TLB_FLUSH is same as
880 		 * KVM_REQ_HFENCE_GVMA_VMID_ALL
881 		 */
882 		if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
883 			kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
884 
885 		if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
886 			kvm_riscv_hfence_vvma_all_process(vcpu);
887 
888 		if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
889 			kvm_riscv_hfence_process(vcpu);
890 	}
891 }
892 
893 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
894 {
895 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
896 
897 	csr_write(CSR_HVIP, csr->hvip);
898 }
899 
900 /*
901  * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
902  * the vCPU is running.
903  *
904  * This must be noinstr as instrumentation may make use of RCU, and this is not
905  * safe during the EQS.
906  */
907 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
908 {
909 	guest_state_enter_irqoff();
910 	__kvm_riscv_switch_to(&vcpu->arch);
911 	vcpu->arch.last_exit_cpu = vcpu->cpu;
912 	guest_state_exit_irqoff();
913 }
914 
915 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
916 {
917 	int ret;
918 	struct kvm_cpu_trap trap;
919 	struct kvm_run *run = vcpu->run;
920 
921 	/* Mark this VCPU ran at least once */
922 	vcpu->arch.ran_atleast_once = true;
923 
924 	kvm_vcpu_srcu_read_lock(vcpu);
925 
926 	switch (run->exit_reason) {
927 	case KVM_EXIT_MMIO:
928 		/* Process MMIO value returned from user-space */
929 		ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
930 		break;
931 	case KVM_EXIT_RISCV_SBI:
932 		/* Process SBI value returned from user-space */
933 		ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
934 		break;
935 	case KVM_EXIT_RISCV_CSR:
936 		/* Process CSR value returned from user-space */
937 		ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
938 		break;
939 	default:
940 		ret = 0;
941 		break;
942 	}
943 	if (ret) {
944 		kvm_vcpu_srcu_read_unlock(vcpu);
945 		return ret;
946 	}
947 
948 	if (run->immediate_exit) {
949 		kvm_vcpu_srcu_read_unlock(vcpu);
950 		return -EINTR;
951 	}
952 
953 	vcpu_load(vcpu);
954 
955 	kvm_sigset_activate(vcpu);
956 
957 	ret = 1;
958 	run->exit_reason = KVM_EXIT_UNKNOWN;
959 	while (ret > 0) {
960 		/* Check conditions before entering the guest */
961 		cond_resched();
962 
963 		kvm_riscv_gstage_vmid_update(vcpu);
964 
965 		kvm_riscv_check_vcpu_requests(vcpu);
966 
967 		local_irq_disable();
968 
969 		/*
970 		 * Exit if we have a signal pending so that we can deliver
971 		 * the signal to user space.
972 		 */
973 		if (signal_pending(current)) {
974 			ret = -EINTR;
975 			run->exit_reason = KVM_EXIT_INTR;
976 		}
977 
978 		/*
979 		 * Ensure we set mode to IN_GUEST_MODE after we disable
980 		 * interrupts and before the final VCPU requests check.
981 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
982 		 * Documentation/virt/kvm/vcpu-requests.rst
983 		 */
984 		vcpu->mode = IN_GUEST_MODE;
985 
986 		kvm_vcpu_srcu_read_unlock(vcpu);
987 		smp_mb__after_srcu_read_unlock();
988 
989 		/*
990 		 * We might have got VCPU interrupts updated asynchronously
991 		 * so update it in HW.
992 		 */
993 		kvm_riscv_vcpu_flush_interrupts(vcpu);
994 
995 		/* Update HVIP CSR for current CPU */
996 		kvm_riscv_update_hvip(vcpu);
997 
998 		if (ret <= 0 ||
999 		    kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
1000 		    kvm_request_pending(vcpu)) {
1001 			vcpu->mode = OUTSIDE_GUEST_MODE;
1002 			local_irq_enable();
1003 			kvm_vcpu_srcu_read_lock(vcpu);
1004 			continue;
1005 		}
1006 
1007 		/*
1008 		 * Cleanup stale TLB enteries
1009 		 *
1010 		 * Note: This should be done after G-stage VMID has been
1011 		 * updated using kvm_riscv_gstage_vmid_ver_changed()
1012 		 */
1013 		kvm_riscv_local_tlb_sanitize(vcpu);
1014 
1015 		guest_timing_enter_irqoff();
1016 
1017 		kvm_riscv_vcpu_enter_exit(vcpu);
1018 
1019 		vcpu->mode = OUTSIDE_GUEST_MODE;
1020 		vcpu->stat.exits++;
1021 
1022 		/*
1023 		 * Save SCAUSE, STVAL, HTVAL, and HTINST because we might
1024 		 * get an interrupt between __kvm_riscv_switch_to() and
1025 		 * local_irq_enable() which can potentially change CSRs.
1026 		 */
1027 		trap.sepc = vcpu->arch.guest_context.sepc;
1028 		trap.scause = csr_read(CSR_SCAUSE);
1029 		trap.stval = csr_read(CSR_STVAL);
1030 		trap.htval = csr_read(CSR_HTVAL);
1031 		trap.htinst = csr_read(CSR_HTINST);
1032 
1033 		/* Syncup interrupts state with HW */
1034 		kvm_riscv_vcpu_sync_interrupts(vcpu);
1035 
1036 		preempt_disable();
1037 
1038 		/*
1039 		 * We must ensure that any pending interrupts are taken before
1040 		 * we exit guest timing so that timer ticks are accounted as
1041 		 * guest time. Transiently unmask interrupts so that any
1042 		 * pending interrupts are taken.
1043 		 *
1044 		 * There's no barrier which ensures that pending interrupts are
1045 		 * recognised, so we just hope that the CPU takes any pending
1046 		 * interrupts between the enable and disable.
1047 		 */
1048 		local_irq_enable();
1049 		local_irq_disable();
1050 
1051 		guest_timing_exit_irqoff();
1052 
1053 		local_irq_enable();
1054 
1055 		preempt_enable();
1056 
1057 		kvm_vcpu_srcu_read_lock(vcpu);
1058 
1059 		ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
1060 	}
1061 
1062 	kvm_sigset_deactivate(vcpu);
1063 
1064 	vcpu_put(vcpu);
1065 
1066 	kvm_vcpu_srcu_read_unlock(vcpu);
1067 
1068 	return ret;
1069 }
1070