1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
4 *
5 * Authors:
6 * Anup Patel <anup.patel@wdc.com>
7 */
8
9 #include <linux/bitops.h>
10 #include <linux/errno.h>
11 #include <linux/err.h>
12 #include <linux/kdebug.h>
13 #include <linux/module.h>
14 #include <linux/percpu.h>
15 #include <linux/vmalloc.h>
16 #include <linux/sched/signal.h>
17 #include <linux/fs.h>
18 #include <linux/kvm_host.h>
19 #include <asm/cacheflush.h>
20 #include <asm/kvm_mmu.h>
21 #include <asm/kvm_nacl.h>
22 #include <asm/kvm_vcpu_vector.h>
23
24 #define CREATE_TRACE_POINTS
25 #include "trace.h"
26
27 static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_former_vcpu);
28
29 const struct kvm_stats_desc kvm_vcpu_stats_desc[] = {
30 KVM_GENERIC_VCPU_STATS(),
31 STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
32 STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
33 STATS_DESC_COUNTER(VCPU, wrs_exit_stat),
34 STATS_DESC_COUNTER(VCPU, mmio_exit_user),
35 STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
36 STATS_DESC_COUNTER(VCPU, csr_exit_user),
37 STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
38 STATS_DESC_COUNTER(VCPU, signal_exits),
39 STATS_DESC_COUNTER(VCPU, exits),
40 STATS_DESC_COUNTER(VCPU, instr_illegal_exits),
41 STATS_DESC_COUNTER(VCPU, load_misaligned_exits),
42 STATS_DESC_COUNTER(VCPU, store_misaligned_exits),
43 STATS_DESC_COUNTER(VCPU, load_access_exits),
44 STATS_DESC_COUNTER(VCPU, store_access_exits),
45 };
46
47 const struct kvm_stats_header kvm_vcpu_stats_header = {
48 .name_size = KVM_STATS_NAME_SIZE,
49 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
50 .id_offset = sizeof(struct kvm_stats_header),
51 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
52 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
53 sizeof(kvm_vcpu_stats_desc),
54 };
55
kvm_riscv_vcpu_context_reset(struct kvm_vcpu * vcpu,bool kvm_sbi_reset)56 static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu,
57 bool kvm_sbi_reset)
58 {
59 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
60 struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
61 void *vector_datap = cntx->vector.datap;
62
63 memset(cntx, 0, sizeof(*cntx));
64 memset(csr, 0, sizeof(*csr));
65 memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr));
66
67 /* Restore datap as it's not a part of the guest context. */
68 cntx->vector.datap = vector_datap;
69
70 if (kvm_sbi_reset)
71 kvm_riscv_vcpu_sbi_load_reset_state(vcpu);
72
73 /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
74 cntx->sstatus = SR_SPP | SR_SPIE;
75
76 cntx->hstatus |= HSTATUS_VTW;
77 cntx->hstatus |= HSTATUS_SPVP;
78 cntx->hstatus |= HSTATUS_SPV;
79 }
80
kvm_riscv_reset_vcpu(struct kvm_vcpu * vcpu,bool kvm_sbi_reset)81 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset)
82 {
83 bool loaded;
84
85 /**
86 * The preemption should be disabled here because it races with
87 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
88 * also calls vcpu_load/put.
89 */
90 get_cpu();
91 loaded = (vcpu->cpu != -1);
92 if (loaded)
93 kvm_arch_vcpu_put(vcpu);
94
95 vcpu->arch.last_exit_cpu = -1;
96
97 kvm_riscv_vcpu_context_reset(vcpu, kvm_sbi_reset);
98
99 kvm_riscv_vcpu_fp_reset(vcpu);
100
101 kvm_riscv_vcpu_vector_reset(vcpu);
102
103 kvm_riscv_vcpu_timer_reset(vcpu);
104
105 kvm_riscv_vcpu_aia_reset(vcpu);
106
107 bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
108 bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
109
110 kvm_riscv_vcpu_pmu_reset(vcpu);
111
112 vcpu->arch.hfence_head = 0;
113 vcpu->arch.hfence_tail = 0;
114 memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
115
116 kvm_riscv_vcpu_sbi_reset(vcpu);
117
118 /* Reset the guest CSRs for hotplug usecase */
119 if (loaded)
120 kvm_arch_vcpu_load(vcpu, smp_processor_id());
121 put_cpu();
122 }
123
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)124 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
125 {
126 return 0;
127 }
128
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)129 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
130 {
131 int rc;
132
133 spin_lock_init(&vcpu->arch.mp_state_lock);
134
135 /* Mark this VCPU never ran */
136 vcpu->arch.ran_atleast_once = false;
137
138 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
139 bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
140
141 /* Setup VCPU config */
142 kvm_riscv_vcpu_config_init(vcpu);
143
144 /* Setup ISA features available to VCPU */
145 kvm_riscv_vcpu_setup_isa(vcpu);
146
147 /* Setup vendor, arch, and implementation details */
148 vcpu->arch.mvendorid = sbi_get_mvendorid();
149 vcpu->arch.marchid = sbi_get_marchid();
150 vcpu->arch.mimpid = sbi_get_mimpid();
151
152 /* Setup VCPU hfence queue */
153 spin_lock_init(&vcpu->arch.hfence_lock);
154
155 spin_lock_init(&vcpu->arch.reset_state.lock);
156
157 rc = kvm_riscv_vcpu_alloc_vector_context(vcpu);
158 if (rc)
159 return rc;
160
161 /* Setup VCPU timer */
162 kvm_riscv_vcpu_timer_init(vcpu);
163
164 /* setup performance monitoring */
165 kvm_riscv_vcpu_pmu_init(vcpu);
166
167 /* Setup VCPU AIA */
168 kvm_riscv_vcpu_aia_init(vcpu);
169
170 /*
171 * Setup SBI extensions
172 * NOTE: This must be the last thing to be initialized.
173 */
174 kvm_riscv_vcpu_sbi_init(vcpu);
175
176 /* Reset VCPU */
177 kvm_riscv_reset_vcpu(vcpu, false);
178
179 return 0;
180 }
181
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)182 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
183 {
184 /**
185 * vcpu with id 0 is the designated boot cpu.
186 * Keep all vcpus with non-zero id in power-off state so that
187 * they can be brought up using SBI HSM extension.
188 */
189 if (vcpu->vcpu_idx != 0)
190 kvm_riscv_vcpu_power_off(vcpu);
191 }
192
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)193 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
194 {
195 kvm_riscv_vcpu_sbi_deinit(vcpu);
196
197 /* Cleanup VCPU AIA context */
198 kvm_riscv_vcpu_aia_deinit(vcpu);
199
200 /* Cleanup VCPU timer */
201 kvm_riscv_vcpu_timer_deinit(vcpu);
202
203 kvm_riscv_vcpu_pmu_deinit(vcpu);
204
205 /* Free unused pages pre-allocated for G-stage page table mappings */
206 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
207
208 /* Free vector context space for host and guest kernel */
209 kvm_riscv_vcpu_free_vector_context(vcpu);
210 }
211
kvm_cpu_has_pending_timer(struct kvm_vcpu * vcpu)212 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
213 {
214 return kvm_riscv_vcpu_timer_pending(vcpu);
215 }
216
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)217 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
218 {
219 return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) &&
220 !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
221 }
222
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)223 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
224 {
225 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
226 }
227
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)228 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
229 {
230 return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
231 }
232
233 #ifdef CONFIG_GUEST_PERF_EVENTS
kvm_arch_vcpu_get_ip(struct kvm_vcpu * vcpu)234 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
235 {
236 return vcpu->arch.guest_context.sepc;
237 }
238 #endif
239
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)240 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
241 {
242 return VM_FAULT_SIGBUS;
243 }
244
kvm_arch_vcpu_unlocked_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)245 long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl,
246 unsigned long arg)
247 {
248 struct kvm_vcpu *vcpu = filp->private_data;
249 void __user *argp = (void __user *)arg;
250
251 if (ioctl == KVM_INTERRUPT) {
252 struct kvm_interrupt irq;
253
254 if (copy_from_user(&irq, argp, sizeof(irq)))
255 return -EFAULT;
256
257 if (irq.irq == KVM_INTERRUPT_SET)
258 return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
259 else
260 return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
261 }
262
263 return -ENOIOCTLCMD;
264 }
265
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)266 long kvm_arch_vcpu_ioctl(struct file *filp,
267 unsigned int ioctl, unsigned long arg)
268 {
269 struct kvm_vcpu *vcpu = filp->private_data;
270 void __user *argp = (void __user *)arg;
271 long r = -EINVAL;
272
273 switch (ioctl) {
274 case KVM_SET_ONE_REG:
275 case KVM_GET_ONE_REG: {
276 struct kvm_one_reg reg;
277
278 r = -EFAULT;
279 if (copy_from_user(®, argp, sizeof(reg)))
280 break;
281
282 if (ioctl == KVM_SET_ONE_REG)
283 r = kvm_riscv_vcpu_set_reg(vcpu, ®);
284 else
285 r = kvm_riscv_vcpu_get_reg(vcpu, ®);
286 break;
287 }
288 case KVM_GET_REG_LIST: {
289 struct kvm_reg_list __user *user_list = argp;
290 struct kvm_reg_list reg_list;
291 unsigned int n;
292
293 r = -EFAULT;
294 if (copy_from_user(®_list, user_list, sizeof(reg_list)))
295 break;
296 n = reg_list.n;
297 reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
298 if (copy_to_user(user_list, ®_list, sizeof(reg_list)))
299 break;
300 r = -E2BIG;
301 if (n < reg_list.n)
302 break;
303 r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
304 break;
305 }
306 default:
307 break;
308 }
309
310 return r;
311 }
312
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)313 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
314 struct kvm_sregs *sregs)
315 {
316 return -EINVAL;
317 }
318
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)319 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
320 struct kvm_sregs *sregs)
321 {
322 return -EINVAL;
323 }
324
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)325 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
326 {
327 return -EINVAL;
328 }
329
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)330 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
331 {
332 return -EINVAL;
333 }
334
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)335 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
336 struct kvm_translation *tr)
337 {
338 return -EINVAL;
339 }
340
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)341 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
342 {
343 return -EINVAL;
344 }
345
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)346 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
347 {
348 return -EINVAL;
349 }
350
kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu * vcpu)351 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
352 {
353 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
354 unsigned long mask, val;
355
356 if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) {
357 mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0);
358 val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask;
359
360 csr->hvip &= ~mask;
361 csr->hvip |= val;
362 }
363
364 /* Flush AIA high interrupts */
365 kvm_riscv_vcpu_aia_flush_interrupts(vcpu);
366 }
367
kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu * vcpu)368 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
369 {
370 unsigned long hvip;
371 struct kvm_vcpu_arch *v = &vcpu->arch;
372 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
373
374 /* Read current HVIP and VSIE CSRs */
375 csr->vsie = ncsr_read(CSR_VSIE);
376
377 /* Sync-up HVIP.VSSIP bit changes does by Guest */
378 hvip = ncsr_read(CSR_HVIP);
379 if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
380 if (hvip & (1UL << IRQ_VS_SOFT)) {
381 if (!test_and_set_bit(IRQ_VS_SOFT,
382 v->irqs_pending_mask))
383 set_bit(IRQ_VS_SOFT, v->irqs_pending);
384 } else {
385 if (!test_and_set_bit(IRQ_VS_SOFT,
386 v->irqs_pending_mask))
387 clear_bit(IRQ_VS_SOFT, v->irqs_pending);
388 }
389 }
390
391 /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
392 if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
393 if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
394 !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
395 clear_bit(IRQ_PMU_OVF, v->irqs_pending);
396 }
397
398 /* Sync-up AIA high interrupts */
399 kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
400
401 /* Sync-up timer CSRs */
402 kvm_riscv_vcpu_timer_sync(vcpu);
403 }
404
kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu * vcpu,unsigned int irq)405 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
406 {
407 /*
408 * We only allow VS-mode software, timer, and external
409 * interrupts when irq is one of the local interrupts
410 * defined by RISC-V privilege specification.
411 */
412 if (irq < IRQ_LOCAL_MAX &&
413 irq != IRQ_VS_SOFT &&
414 irq != IRQ_VS_TIMER &&
415 irq != IRQ_VS_EXT &&
416 irq != IRQ_PMU_OVF)
417 return -EINVAL;
418
419 set_bit(irq, vcpu->arch.irqs_pending);
420 smp_mb__before_atomic();
421 set_bit(irq, vcpu->arch.irqs_pending_mask);
422
423 kvm_vcpu_kick(vcpu);
424
425 return 0;
426 }
427
kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu * vcpu,unsigned int irq)428 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
429 {
430 /*
431 * We only allow VS-mode software, timer, counter overflow and external
432 * interrupts when irq is one of the local interrupts
433 * defined by RISC-V privilege specification.
434 */
435 if (irq < IRQ_LOCAL_MAX &&
436 irq != IRQ_VS_SOFT &&
437 irq != IRQ_VS_TIMER &&
438 irq != IRQ_VS_EXT &&
439 irq != IRQ_PMU_OVF)
440 return -EINVAL;
441
442 clear_bit(irq, vcpu->arch.irqs_pending);
443 smp_mb__before_atomic();
444 set_bit(irq, vcpu->arch.irqs_pending_mask);
445
446 return 0;
447 }
448
kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu * vcpu,u64 mask)449 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
450 {
451 unsigned long ie;
452
453 ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
454 << VSIP_TO_HVIP_SHIFT) & (unsigned long)mask;
455 ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK &
456 (unsigned long)mask;
457 if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie)
458 return true;
459
460 /* Check AIA high interrupts */
461 return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
462 }
463
__kvm_riscv_vcpu_power_off(struct kvm_vcpu * vcpu)464 void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
465 {
466 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
467 kvm_make_request(KVM_REQ_SLEEP, vcpu);
468 kvm_vcpu_kick(vcpu);
469 }
470
kvm_riscv_vcpu_power_off(struct kvm_vcpu * vcpu)471 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
472 {
473 spin_lock(&vcpu->arch.mp_state_lock);
474 __kvm_riscv_vcpu_power_off(vcpu);
475 spin_unlock(&vcpu->arch.mp_state_lock);
476 }
477
__kvm_riscv_vcpu_power_on(struct kvm_vcpu * vcpu)478 void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
479 {
480 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
481 kvm_vcpu_wake_up(vcpu);
482 }
483
kvm_riscv_vcpu_power_on(struct kvm_vcpu * vcpu)484 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
485 {
486 spin_lock(&vcpu->arch.mp_state_lock);
487 __kvm_riscv_vcpu_power_on(vcpu);
488 spin_unlock(&vcpu->arch.mp_state_lock);
489 }
490
kvm_riscv_vcpu_stopped(struct kvm_vcpu * vcpu)491 bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
492 {
493 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
494 }
495
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)496 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
497 struct kvm_mp_state *mp_state)
498 {
499 *mp_state = READ_ONCE(vcpu->arch.mp_state);
500
501 return 0;
502 }
503
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)504 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
505 struct kvm_mp_state *mp_state)
506 {
507 int ret = 0;
508
509 spin_lock(&vcpu->arch.mp_state_lock);
510
511 switch (mp_state->mp_state) {
512 case KVM_MP_STATE_RUNNABLE:
513 WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
514 break;
515 case KVM_MP_STATE_STOPPED:
516 __kvm_riscv_vcpu_power_off(vcpu);
517 break;
518 case KVM_MP_STATE_INIT_RECEIVED:
519 if (vcpu->kvm->arch.mp_state_reset)
520 kvm_riscv_reset_vcpu(vcpu, false);
521 else
522 ret = -EINVAL;
523 break;
524 default:
525 ret = -EINVAL;
526 }
527
528 spin_unlock(&vcpu->arch.mp_state_lock);
529
530 return ret;
531 }
532
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)533 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
534 struct kvm_guest_debug *dbg)
535 {
536 if (dbg->control & KVM_GUESTDBG_ENABLE)
537 vcpu->guest_debug = dbg->control;
538 else
539 vcpu->guest_debug = 0;
540
541 return 0;
542 }
543
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)544 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
545 {
546 void *nsh;
547 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
548
549 /*
550 * If VCPU is being reloaded on the same physical CPU and no
551 * other KVM VCPU has run on this CPU since it was last put,
552 * we can skip the expensive CSR and HGATP writes.
553 *
554 * Note: If a new CSR is added to this fast-path skip block,
555 * make sure that 'csr_dirty' is set to true in any
556 * ioctl (e.g., KVM_SET_ONE_REG) that modifies it.
557 */
558 if (vcpu != __this_cpu_read(kvm_former_vcpu))
559 __this_cpu_write(kvm_former_vcpu, vcpu);
560 else if (vcpu->arch.last_exit_cpu == cpu && !vcpu->arch.csr_dirty)
561 goto csr_restore_done;
562
563 vcpu->arch.csr_dirty = false;
564
565 /*
566 * Load VCPU config CSRs before other CSRs because
567 * the read/write behaviour of certain CSRs change
568 * based on VCPU config CSRs.
569 */
570 kvm_riscv_vcpu_config_load(vcpu);
571
572 if (kvm_riscv_nacl_sync_csr_available()) {
573 nsh = nacl_shmem();
574 nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus);
575 nacl_csr_write(nsh, CSR_VSIE, csr->vsie);
576 nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec);
577 nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch);
578 nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc);
579 nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause);
580 nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval);
581 nacl_csr_write(nsh, CSR_HVIP, csr->hvip);
582 nacl_csr_write(nsh, CSR_VSATP, csr->vsatp);
583 } else {
584 csr_write(CSR_VSSTATUS, csr->vsstatus);
585 csr_write(CSR_VSIE, csr->vsie);
586 csr_write(CSR_VSTVEC, csr->vstvec);
587 csr_write(CSR_VSSCRATCH, csr->vsscratch);
588 csr_write(CSR_VSEPC, csr->vsepc);
589 csr_write(CSR_VSCAUSE, csr->vscause);
590 csr_write(CSR_VSTVAL, csr->vstval);
591 csr_write(CSR_HVIP, csr->hvip);
592 csr_write(CSR_VSATP, csr->vsatp);
593 }
594
595 kvm_riscv_mmu_update_hgatp(vcpu);
596
597 kvm_riscv_vcpu_aia_load(vcpu, cpu);
598
599 csr_restore_done:
600 kvm_riscv_vcpu_timer_restore(vcpu);
601
602 kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
603 kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
604 vcpu->arch.isa);
605 kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context);
606 kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
607 vcpu->arch.isa);
608
609 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
610
611 vcpu->cpu = cpu;
612 }
613
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)614 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
615 {
616 void *nsh;
617 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
618
619 vcpu->cpu = -1;
620
621 kvm_riscv_vcpu_aia_put(vcpu);
622
623 kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
624 vcpu->arch.isa);
625 kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
626
627 kvm_riscv_vcpu_timer_save(vcpu);
628 kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context,
629 vcpu->arch.isa);
630 kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context);
631
632 if (kvm_riscv_nacl_available()) {
633 nsh = nacl_shmem();
634 csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS);
635 csr->vsie = nacl_csr_read(nsh, CSR_VSIE);
636 csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC);
637 csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH);
638 csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC);
639 csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE);
640 csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL);
641 csr->hvip = nacl_csr_read(nsh, CSR_HVIP);
642 csr->vsatp = nacl_csr_read(nsh, CSR_VSATP);
643 } else {
644 csr->vsstatus = csr_read(CSR_VSSTATUS);
645 csr->vsie = csr_read(CSR_VSIE);
646 csr->vstvec = csr_read(CSR_VSTVEC);
647 csr->vsscratch = csr_read(CSR_VSSCRATCH);
648 csr->vsepc = csr_read(CSR_VSEPC);
649 csr->vscause = csr_read(CSR_VSCAUSE);
650 csr->vstval = csr_read(CSR_VSTVAL);
651 csr->hvip = csr_read(CSR_HVIP);
652 csr->vsatp = csr_read(CSR_VSATP);
653 }
654 }
655
656 /**
657 * kvm_riscv_check_vcpu_requests - check and handle pending vCPU requests
658 * @vcpu: the VCPU pointer
659 *
660 * Return: 1 if we should enter the guest
661 * 0 if we should exit to userspace
662 */
kvm_riscv_check_vcpu_requests(struct kvm_vcpu * vcpu)663 static int kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
664 {
665 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
666
667 if (kvm_request_pending(vcpu)) {
668 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
669 kvm_vcpu_srcu_read_unlock(vcpu);
670 rcuwait_wait_event(wait,
671 (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
672 TASK_INTERRUPTIBLE);
673 kvm_vcpu_srcu_read_lock(vcpu);
674
675 if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
676 /*
677 * Awaken to handle a signal, request to
678 * sleep again later.
679 */
680 kvm_make_request(KVM_REQ_SLEEP, vcpu);
681 }
682 }
683
684 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
685 kvm_riscv_reset_vcpu(vcpu, true);
686
687 if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
688 kvm_riscv_mmu_update_hgatp(vcpu);
689
690 if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
691 kvm_riscv_fence_i_process(vcpu);
692
693 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
694 kvm_riscv_tlb_flush_process(vcpu);
695
696 if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
697 kvm_riscv_hfence_vvma_all_process(vcpu);
698
699 if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
700 kvm_riscv_hfence_process(vcpu);
701
702 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
703 kvm_riscv_vcpu_record_steal_time(vcpu);
704
705 if (kvm_dirty_ring_check_request(vcpu))
706 return 0;
707 }
708
709 return 1;
710 }
711
kvm_riscv_update_hvip(struct kvm_vcpu * vcpu)712 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
713 {
714 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
715
716 ncsr_write(CSR_HVIP, csr->hvip);
717 kvm_riscv_vcpu_aia_update_hvip(vcpu);
718 }
719
kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu * vcpu)720 static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *vcpu)
721 {
722 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
723 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
724
725 vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren);
726 vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg);
727 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
728 vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, smcsr->sstateen0);
729 }
730
kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu * vcpu)731 static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu)
732 {
733 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
734 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
735
736 csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren);
737 csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg);
738 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
739 smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, vcpu->arch.host_sstateen0);
740 }
741
742 /*
743 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
744 * the vCPU is running.
745 *
746 * This must be noinstr as instrumentation may make use of RCU, and this is not
747 * safe during the EQS.
748 */
kvm_riscv_vcpu_enter_exit(struct kvm_vcpu * vcpu,struct kvm_cpu_trap * trap)749 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu,
750 struct kvm_cpu_trap *trap)
751 {
752 void *nsh;
753 struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context;
754 struct kvm_cpu_context *hcntx = &vcpu->arch.host_context;
755
756 /*
757 * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and
758 * HTINST) here because we do local_irq_enable() after this
759 * function in kvm_arch_vcpu_ioctl_run() which can result in
760 * an interrupt immediately after local_irq_enable() and can
761 * potentially change trap CSRs.
762 */
763
764 kvm_riscv_vcpu_swap_in_guest_state(vcpu);
765 guest_state_enter_irqoff();
766
767 if (kvm_riscv_nacl_sync_sret_available()) {
768 nsh = nacl_shmem();
769
770 if (kvm_riscv_nacl_autoswap_csr_available()) {
771 hcntx->hstatus =
772 nacl_csr_read(nsh, CSR_HSTATUS);
773 nacl_scratch_write_long(nsh,
774 SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
775 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS,
776 gcntx->hstatus);
777 nacl_scratch_write_long(nsh,
778 SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
779 SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS);
780 } else if (kvm_riscv_nacl_sync_csr_available()) {
781 hcntx->hstatus = nacl_csr_swap(nsh,
782 CSR_HSTATUS, gcntx->hstatus);
783 } else {
784 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
785 }
786
787 nacl_scratch_write_longs(nsh,
788 SBI_NACL_SHMEM_SRET_OFFSET +
789 SBI_NACL_SHMEM_SRET_X(1),
790 &gcntx->ra,
791 SBI_NACL_SHMEM_SRET_X_LAST);
792
793 __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL,
794 SBI_EXT_NACL_SYNC_SRET);
795
796 if (kvm_riscv_nacl_autoswap_csr_available()) {
797 nacl_scratch_write_long(nsh,
798 SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
799 0);
800 gcntx->hstatus = nacl_scratch_read_long(nsh,
801 SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
802 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS);
803 } else {
804 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
805 }
806
807 trap->htval = nacl_csr_read(nsh, CSR_HTVAL);
808 trap->htinst = nacl_csr_read(nsh, CSR_HTINST);
809 } else {
810 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
811
812 __kvm_riscv_switch_to(&vcpu->arch);
813
814 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
815
816 trap->htval = csr_read(CSR_HTVAL);
817 trap->htinst = csr_read(CSR_HTINST);
818 }
819
820 trap->sepc = gcntx->sepc;
821 trap->scause = csr_read(CSR_SCAUSE);
822 trap->stval = csr_read(CSR_STVAL);
823
824 vcpu->arch.last_exit_cpu = vcpu->cpu;
825 guest_state_exit_irqoff();
826 kvm_riscv_vcpu_swap_in_host_state(vcpu);
827 }
828
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)829 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
830 {
831 int ret;
832 struct kvm_cpu_trap trap;
833 struct kvm_run *run = vcpu->run;
834
835 if (!vcpu->arch.ran_atleast_once)
836 kvm_riscv_vcpu_config_ran_once(vcpu);
837
838 /* Mark this VCPU ran at least once */
839 vcpu->arch.ran_atleast_once = true;
840
841 kvm_vcpu_srcu_read_lock(vcpu);
842
843 switch (run->exit_reason) {
844 case KVM_EXIT_MMIO:
845 /* Process MMIO value returned from user-space */
846 ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
847 break;
848 case KVM_EXIT_RISCV_SBI:
849 /* Process SBI value returned from user-space */
850 ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
851 break;
852 case KVM_EXIT_RISCV_CSR:
853 /* Process CSR value returned from user-space */
854 ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
855 break;
856 default:
857 ret = 0;
858 break;
859 }
860 if (ret) {
861 kvm_vcpu_srcu_read_unlock(vcpu);
862 return ret;
863 }
864
865 if (!vcpu->wants_to_run) {
866 kvm_vcpu_srcu_read_unlock(vcpu);
867 return -EINTR;
868 }
869
870 vcpu_load(vcpu);
871
872 kvm_sigset_activate(vcpu);
873
874 ret = 1;
875 run->exit_reason = KVM_EXIT_UNKNOWN;
876 while (ret > 0) {
877 /* Check conditions before entering the guest */
878 ret = kvm_xfer_to_guest_mode_handle_work(vcpu);
879 if (ret)
880 continue;
881 ret = 1;
882
883 kvm_riscv_gstage_vmid_update(vcpu);
884
885 ret = kvm_riscv_check_vcpu_requests(vcpu);
886 if (ret <= 0)
887 continue;
888
889 preempt_disable();
890
891 /* Update AIA HW state before entering guest */
892 ret = kvm_riscv_vcpu_aia_update(vcpu);
893 if (ret <= 0) {
894 preempt_enable();
895 continue;
896 }
897
898 local_irq_disable();
899
900 /*
901 * Ensure we set mode to IN_GUEST_MODE after we disable
902 * interrupts and before the final VCPU requests check.
903 * See the comment in kvm_vcpu_exiting_guest_mode() and
904 * Documentation/virt/kvm/vcpu-requests.rst
905 */
906 vcpu->mode = IN_GUEST_MODE;
907
908 kvm_vcpu_srcu_read_unlock(vcpu);
909 smp_mb__after_srcu_read_unlock();
910
911 /*
912 * We might have got VCPU interrupts updated asynchronously
913 * so update it in HW.
914 */
915 kvm_riscv_vcpu_flush_interrupts(vcpu);
916
917 /* Update HVIP CSR for current CPU */
918 kvm_riscv_update_hvip(vcpu);
919
920 if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
921 kvm_request_pending(vcpu) ||
922 xfer_to_guest_mode_work_pending()) {
923 vcpu->mode = OUTSIDE_GUEST_MODE;
924 local_irq_enable();
925 preempt_enable();
926 kvm_vcpu_srcu_read_lock(vcpu);
927 continue;
928 }
929
930 /*
931 * Sanitize VMID mappings cached (TLB) on current CPU
932 *
933 * Note: This should be done after G-stage VMID has been
934 * updated using kvm_riscv_gstage_vmid_ver_changed()
935 */
936 kvm_riscv_local_tlb_sanitize(vcpu);
937
938 trace_kvm_entry(vcpu);
939
940 guest_timing_enter_irqoff();
941
942 kvm_riscv_vcpu_enter_exit(vcpu, &trap);
943
944 vcpu->mode = OUTSIDE_GUEST_MODE;
945 vcpu->stat.exits++;
946
947 /* Syncup interrupts state with HW */
948 kvm_riscv_vcpu_sync_interrupts(vcpu);
949
950 /*
951 * We must ensure that any pending interrupts are taken before
952 * we exit guest timing so that timer ticks are accounted as
953 * guest time. Transiently unmask interrupts so that any
954 * pending interrupts are taken.
955 *
956 * There's no barrier which ensures that pending interrupts are
957 * recognised, so we just hope that the CPU takes any pending
958 * interrupts between the enable and disable.
959 */
960 local_irq_enable();
961 local_irq_disable();
962
963 guest_timing_exit_irqoff();
964
965 local_irq_enable();
966
967 trace_kvm_exit(&trap);
968
969 preempt_enable();
970
971 kvm_vcpu_srcu_read_lock(vcpu);
972
973 ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
974 }
975
976 kvm_sigset_deactivate(vcpu);
977
978 vcpu_put(vcpu);
979
980 kvm_vcpu_srcu_read_unlock(vcpu);
981
982 return ret;
983 }
984