xref: /linux/arch/x86/kvm/vmx/main.c (revision f694f30e81c4ade358eb8c75273bac1a48f0cb8f)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/moduleparam.h>
3 
4 #include "x86_ops.h"
5 #include "vmx.h"
6 #include "mmu.h"
7 #include "nested.h"
8 #include "pmu.h"
9 #include "posted_intr.h"
10 #include "tdx.h"
11 #include "tdx_arch.h"
12 
13 #ifdef CONFIG_KVM_INTEL_TDX
14 static_assert(offsetof(struct vcpu_vmx, vt) == offsetof(struct vcpu_tdx, vt));
15 #endif
16 
17 static void vt_disable_virtualization_cpu(void)
18 {
19 	/* Note, TDX *and* VMX need to be disabled if TDX is enabled. */
20 	if (enable_tdx)
21 		tdx_disable_virtualization_cpu();
22 	vmx_disable_virtualization_cpu();
23 }
24 
25 static __init int vt_hardware_setup(void)
26 {
27 	int ret;
28 
29 	ret = vmx_hardware_setup();
30 	if (ret)
31 		return ret;
32 
33 	/*
34 	 * Update vt_x86_ops::vm_size here so it is ready before
35 	 * kvm_ops_update() is called in kvm_x86_vendor_init().
36 	 *
37 	 * Note, the actual bringing up of TDX must be done after
38 	 * kvm_ops_update() because enabling TDX requires enabling
39 	 * hardware virtualization first, i.e., all online CPUs must
40 	 * be in post-VMXON state.  This means the @vm_size here
41 	 * may be updated to TDX's size but TDX may fail to enable
42 	 * at later time.
43 	 *
44 	 * The VMX/VT code could update kvm_x86_ops::vm_size again
45 	 * after bringing up TDX, but this would require exporting
46 	 * either kvm_x86_ops or kvm_ops_update() from the base KVM
47 	 * module, which looks overkill.  Anyway, the worst case here
48 	 * is KVM may allocate couple of more bytes than needed for
49 	 * each VM.
50 	 */
51 	if (enable_tdx) {
52 		vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size,
53 				sizeof(struct kvm_tdx));
54 		/*
55 		 * Note, TDX may fail to initialize in a later time in
56 		 * vt_init(), in which case it is not necessary to setup
57 		 * those callbacks.  But making them valid here even
58 		 * when TDX fails to init later is fine because those
59 		 * callbacks won't be called if the VM isn't TDX guest.
60 		 */
61 		vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
62 		vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
63 		vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
64 		vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
65 		vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt;
66 	}
67 
68 	return 0;
69 }
70 
71 static int vt_vm_init(struct kvm *kvm)
72 {
73 	if (is_td(kvm))
74 		return tdx_vm_init(kvm);
75 
76 	return vmx_vm_init(kvm);
77 }
78 
79 static void vt_vm_pre_destroy(struct kvm *kvm)
80 {
81 	if (is_td(kvm))
82 		return tdx_mmu_release_hkid(kvm);
83 }
84 
85 static void vt_vm_destroy(struct kvm *kvm)
86 {
87 	if (is_td(kvm))
88                return tdx_vm_destroy(kvm);
89 
90        vmx_vm_destroy(kvm);
91 }
92 
93 static int vt_vcpu_precreate(struct kvm *kvm)
94 {
95 	if (is_td(kvm))
96 		return 0;
97 
98 	return vmx_vcpu_precreate(kvm);
99 }
100 
101 static int vt_vcpu_create(struct kvm_vcpu *vcpu)
102 {
103 	if (is_td_vcpu(vcpu))
104 		return tdx_vcpu_create(vcpu);
105 
106 	return vmx_vcpu_create(vcpu);
107 }
108 
109 static void vt_vcpu_free(struct kvm_vcpu *vcpu)
110 {
111 	if (is_td_vcpu(vcpu)) {
112 		tdx_vcpu_free(vcpu);
113 		return;
114 	}
115 
116 	vmx_vcpu_free(vcpu);
117 }
118 
119 static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
120 {
121 	if (is_td_vcpu(vcpu)) {
122 		tdx_vcpu_reset(vcpu, init_event);
123 		return;
124 	}
125 
126 	vmx_vcpu_reset(vcpu, init_event);
127 }
128 
129 static void vt_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
130 {
131 	if (is_td_vcpu(vcpu)) {
132 		tdx_vcpu_load(vcpu, cpu);
133 		return;
134 	}
135 
136 	vmx_vcpu_load(vcpu, cpu);
137 }
138 
139 static void vt_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
140 {
141 	/*
142 	 * Basic TDX does not support feature PML. KVM does not enable PML in
143 	 * TD's VMCS, nor does it allocate or flush PML buffer for TDX.
144 	 */
145 	if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
146 		return;
147 
148 	vmx_update_cpu_dirty_logging(vcpu);
149 }
150 
151 static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
152 {
153 	if (is_td_vcpu(vcpu)) {
154 		tdx_prepare_switch_to_guest(vcpu);
155 		return;
156 	}
157 
158 	vmx_prepare_switch_to_guest(vcpu);
159 }
160 
161 static void vt_vcpu_put(struct kvm_vcpu *vcpu)
162 {
163 	if (is_td_vcpu(vcpu)) {
164 		tdx_vcpu_put(vcpu);
165 		return;
166 	}
167 
168 	vmx_vcpu_put(vcpu);
169 }
170 
171 static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
172 {
173 	if (is_td_vcpu(vcpu))
174 		return tdx_vcpu_pre_run(vcpu);
175 
176 	return vmx_vcpu_pre_run(vcpu);
177 }
178 
179 static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
180 {
181 	if (is_td_vcpu(vcpu))
182 		return tdx_vcpu_run(vcpu, force_immediate_exit);
183 
184 	return vmx_vcpu_run(vcpu, force_immediate_exit);
185 }
186 
187 static int vt_handle_exit(struct kvm_vcpu *vcpu,
188 			  enum exit_fastpath_completion fastpath)
189 {
190 	if (is_td_vcpu(vcpu))
191 		return tdx_handle_exit(vcpu, fastpath);
192 
193 	return vmx_handle_exit(vcpu, fastpath);
194 }
195 
196 static int vt_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
197 {
198 	if (unlikely(is_td_vcpu(vcpu)))
199 		return tdx_set_msr(vcpu, msr_info);
200 
201 	return vmx_set_msr(vcpu, msr_info);
202 }
203 
204 /*
205  * The kvm parameter can be NULL (module initialization, or invocation before
206  * VM creation). Be sure to check the kvm parameter before using it.
207  */
208 static bool vt_has_emulated_msr(struct kvm *kvm, u32 index)
209 {
210 	if (kvm && is_td(kvm))
211 		return tdx_has_emulated_msr(index);
212 
213 	return vmx_has_emulated_msr(kvm, index);
214 }
215 
216 static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
217 {
218 	if (unlikely(is_td_vcpu(vcpu)))
219 		return tdx_get_msr(vcpu, msr_info);
220 
221 	return vmx_get_msr(vcpu, msr_info);
222 }
223 
224 static void vt_msr_filter_changed(struct kvm_vcpu *vcpu)
225 {
226 	/*
227 	 * TDX doesn't allow VMM to configure interception of MSR accesses.
228 	 * TDX guest requests MSR accesses by calling TDVMCALL.  The MSR
229 	 * filters will be applied when handling the TDVMCALL for RDMSR/WRMSR
230 	 * if the userspace has set any.
231 	 */
232 	if (is_td_vcpu(vcpu))
233 		return;
234 
235 	vmx_msr_filter_changed(vcpu);
236 }
237 
238 static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
239 {
240 	if (is_td_vcpu(vcpu))
241 		return tdx_complete_emulated_msr(vcpu, err);
242 
243 	return kvm_complete_insn_gp(vcpu, err);
244 }
245 
246 #ifdef CONFIG_KVM_SMM
247 static int vt_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
248 {
249 	if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm))
250 		return 0;
251 
252 	return vmx_smi_allowed(vcpu, for_injection);
253 }
254 
255 static int vt_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
256 {
257 	if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm))
258 		return 0;
259 
260 	return vmx_enter_smm(vcpu, smram);
261 }
262 
263 static int vt_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
264 {
265 	if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm))
266 		return 0;
267 
268 	return vmx_leave_smm(vcpu, smram);
269 }
270 
271 static void vt_enable_smi_window(struct kvm_vcpu *vcpu)
272 {
273 	if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm))
274 		return;
275 
276 	/* RSM will cause a vmexit anyway.  */
277 	vmx_enable_smi_window(vcpu);
278 }
279 #endif
280 
281 static int vt_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
282 					void *insn, int insn_len)
283 {
284 	/*
285 	 * For TDX, this can only be triggered for MMIO emulation.  Let the
286 	 * guest retry after installing the SPTE with suppress #VE bit cleared,
287 	 * so that the guest will receive #VE when retry.  The guest is expected
288 	 * to call TDG.VP.VMCALL<MMIO> to request VMM to do MMIO emulation on
289 	 * #VE.
290 	 */
291 	if (is_td_vcpu(vcpu))
292 		return X86EMUL_RETRY_INSTR;
293 
294 	return vmx_check_emulate_instruction(vcpu, emul_type, insn, insn_len);
295 }
296 
297 static bool vt_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
298 {
299 	/*
300 	 * INIT and SIPI are always blocked for TDX, i.e., INIT handling and
301 	 * the OP vcpu_deliver_sipi_vector() won't be called.
302 	 */
303 	if (is_td_vcpu(vcpu))
304 		return true;
305 
306 	return vmx_apic_init_signal_blocked(vcpu);
307 }
308 
309 static void vt_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
310 {
311 	/* Only x2APIC mode is supported for TD. */
312 	if (is_td_vcpu(vcpu))
313 		return;
314 
315 	return vmx_set_virtual_apic_mode(vcpu);
316 }
317 
318 static void vt_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
319 {
320 	struct pi_desc *pi = vcpu_to_pi_desc(vcpu);
321 
322 	pi_clear_on(pi);
323 	memset(pi->pir, 0, sizeof(pi->pir));
324 }
325 
326 static void vt_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
327 {
328 	if (is_td_vcpu(vcpu))
329 		return;
330 
331 	return vmx_hwapic_isr_update(vcpu, max_isr);
332 }
333 
334 static int vt_sync_pir_to_irr(struct kvm_vcpu *vcpu)
335 {
336 	if (is_td_vcpu(vcpu))
337 		return -1;
338 
339 	return vmx_sync_pir_to_irr(vcpu);
340 }
341 
342 static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
343 			   int trig_mode, int vector)
344 {
345 	if (is_td_vcpu(apic->vcpu)) {
346 		tdx_deliver_interrupt(apic, delivery_mode, trig_mode,
347 					     vector);
348 		return;
349 	}
350 
351 	vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector);
352 }
353 
354 static void vt_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
355 {
356 	if (is_td_vcpu(vcpu))
357 		return;
358 
359 	vmx_vcpu_after_set_cpuid(vcpu);
360 }
361 
362 static void vt_update_exception_bitmap(struct kvm_vcpu *vcpu)
363 {
364 	if (is_td_vcpu(vcpu))
365 		return;
366 
367 	vmx_update_exception_bitmap(vcpu);
368 }
369 
370 static u64 vt_get_segment_base(struct kvm_vcpu *vcpu, int seg)
371 {
372 	if (is_td_vcpu(vcpu))
373 		return 0;
374 
375 	return vmx_get_segment_base(vcpu, seg);
376 }
377 
378 static void vt_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var,
379 			      int seg)
380 {
381 	if (is_td_vcpu(vcpu)) {
382 		memset(var, 0, sizeof(*var));
383 		return;
384 	}
385 
386 	vmx_get_segment(vcpu, var, seg);
387 }
388 
389 static void vt_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var,
390 			      int seg)
391 {
392 	if (is_td_vcpu(vcpu))
393 		return;
394 
395 	vmx_set_segment(vcpu, var, seg);
396 }
397 
398 static int vt_get_cpl(struct kvm_vcpu *vcpu)
399 {
400 	if (is_td_vcpu(vcpu))
401 		return 0;
402 
403 	return vmx_get_cpl(vcpu);
404 }
405 
406 static int vt_get_cpl_no_cache(struct kvm_vcpu *vcpu)
407 {
408 	if (is_td_vcpu(vcpu))
409 		return 0;
410 
411 	return vmx_get_cpl_no_cache(vcpu);
412 }
413 
414 static void vt_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
415 {
416 	if (is_td_vcpu(vcpu)) {
417 		*db = 0;
418 		*l = 0;
419 		return;
420 	}
421 
422 	vmx_get_cs_db_l_bits(vcpu, db, l);
423 }
424 
425 static bool vt_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
426 {
427 	if (is_td_vcpu(vcpu))
428 		return true;
429 
430 	return vmx_is_valid_cr0(vcpu, cr0);
431 }
432 
433 static void vt_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
434 {
435 	if (is_td_vcpu(vcpu))
436 		return;
437 
438 	vmx_set_cr0(vcpu, cr0);
439 }
440 
441 static bool vt_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
442 {
443 	if (is_td_vcpu(vcpu))
444 		return true;
445 
446 	return vmx_is_valid_cr4(vcpu, cr4);
447 }
448 
449 static void vt_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
450 {
451 	if (is_td_vcpu(vcpu))
452 		return;
453 
454 	vmx_set_cr4(vcpu, cr4);
455 }
456 
457 static int vt_set_efer(struct kvm_vcpu *vcpu, u64 efer)
458 {
459 	if (is_td_vcpu(vcpu))
460 		return 0;
461 
462 	return vmx_set_efer(vcpu, efer);
463 }
464 
465 static void vt_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
466 {
467 	if (is_td_vcpu(vcpu)) {
468 		memset(dt, 0, sizeof(*dt));
469 		return;
470 	}
471 
472 	vmx_get_idt(vcpu, dt);
473 }
474 
475 static void vt_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
476 {
477 	if (is_td_vcpu(vcpu))
478 		return;
479 
480 	vmx_set_idt(vcpu, dt);
481 }
482 
483 static void vt_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
484 {
485 	if (is_td_vcpu(vcpu)) {
486 		memset(dt, 0, sizeof(*dt));
487 		return;
488 	}
489 
490 	vmx_get_gdt(vcpu, dt);
491 }
492 
493 static void vt_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
494 {
495 	if (is_td_vcpu(vcpu))
496 		return;
497 
498 	vmx_set_gdt(vcpu, dt);
499 }
500 
501 static void vt_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
502 {
503 	if (is_td_vcpu(vcpu))
504 		return;
505 
506 	vmx_set_dr6(vcpu, val);
507 }
508 
509 static void vt_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
510 {
511 	if (is_td_vcpu(vcpu))
512 		return;
513 
514 	vmx_set_dr7(vcpu, val);
515 }
516 
517 static void vt_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
518 {
519 	/*
520 	 * MOV-DR exiting is always cleared for TD guest, even in debug mode.
521 	 * Thus KVM_DEBUGREG_WONT_EXIT can never be set and it should never
522 	 * reach here for TD vcpu.
523 	 */
524 	if (is_td_vcpu(vcpu))
525 		return;
526 
527 	vmx_sync_dirty_debug_regs(vcpu);
528 }
529 
530 static void vt_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
531 {
532 	if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
533 		return;
534 
535 	vmx_cache_reg(vcpu, reg);
536 }
537 
538 static unsigned long vt_get_rflags(struct kvm_vcpu *vcpu)
539 {
540 	if (is_td_vcpu(vcpu))
541 		return 0;
542 
543 	return vmx_get_rflags(vcpu);
544 }
545 
546 static void vt_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
547 {
548 	if (is_td_vcpu(vcpu))
549 		return;
550 
551 	vmx_set_rflags(vcpu, rflags);
552 }
553 
554 static bool vt_get_if_flag(struct kvm_vcpu *vcpu)
555 {
556 	if (is_td_vcpu(vcpu))
557 		return false;
558 
559 	return vmx_get_if_flag(vcpu);
560 }
561 
562 static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
563 {
564 	if (is_td_vcpu(vcpu)) {
565 		tdx_flush_tlb_all(vcpu);
566 		return;
567 	}
568 
569 	vmx_flush_tlb_all(vcpu);
570 }
571 
572 static void vt_flush_tlb_current(struct kvm_vcpu *vcpu)
573 {
574 	if (is_td_vcpu(vcpu)) {
575 		tdx_flush_tlb_current(vcpu);
576 		return;
577 	}
578 
579 	vmx_flush_tlb_current(vcpu);
580 }
581 
582 static void vt_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
583 {
584 	if (is_td_vcpu(vcpu))
585 		return;
586 
587 	vmx_flush_tlb_gva(vcpu, addr);
588 }
589 
590 static void vt_flush_tlb_guest(struct kvm_vcpu *vcpu)
591 {
592 	if (is_td_vcpu(vcpu))
593 		return;
594 
595 	vmx_flush_tlb_guest(vcpu);
596 }
597 
598 static void vt_inject_nmi(struct kvm_vcpu *vcpu)
599 {
600 	if (is_td_vcpu(vcpu)) {
601 		tdx_inject_nmi(vcpu);
602 		return;
603 	}
604 
605 	vmx_inject_nmi(vcpu);
606 }
607 
608 static int vt_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
609 {
610 	/*
611 	 * The TDX module manages NMI windows and NMI reinjection, and hides NMI
612 	 * blocking, all KVM can do is throw an NMI over the wall.
613 	 */
614 	if (is_td_vcpu(vcpu))
615 		return true;
616 
617 	return vmx_nmi_allowed(vcpu, for_injection);
618 }
619 
620 static bool vt_get_nmi_mask(struct kvm_vcpu *vcpu)
621 {
622 	/*
623 	 * KVM can't get NMI blocking status for TDX guest, assume NMIs are
624 	 * always unmasked.
625 	 */
626 	if (is_td_vcpu(vcpu))
627 		return false;
628 
629 	return vmx_get_nmi_mask(vcpu);
630 }
631 
632 static void vt_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
633 {
634 	if (is_td_vcpu(vcpu))
635 		return;
636 
637 	vmx_set_nmi_mask(vcpu, masked);
638 }
639 
640 static void vt_enable_nmi_window(struct kvm_vcpu *vcpu)
641 {
642 	/* Refer to the comments in tdx_inject_nmi(). */
643 	if (is_td_vcpu(vcpu))
644 		return;
645 
646 	vmx_enable_nmi_window(vcpu);
647 }
648 
649 static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
650 			    int pgd_level)
651 {
652 	if (is_td_vcpu(vcpu)) {
653 		tdx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
654 		return;
655 	}
656 
657 	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
658 }
659 
660 static void vt_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
661 {
662 	if (is_td_vcpu(vcpu))
663 		return;
664 
665 	vmx_set_interrupt_shadow(vcpu, mask);
666 }
667 
668 static u32 vt_get_interrupt_shadow(struct kvm_vcpu *vcpu)
669 {
670 	if (is_td_vcpu(vcpu))
671 		return 0;
672 
673 	return vmx_get_interrupt_shadow(vcpu);
674 }
675 
676 static void vt_patch_hypercall(struct kvm_vcpu *vcpu,
677 				  unsigned char *hypercall)
678 {
679 	/*
680 	 * Because guest memory is protected, guest can't be patched. TD kernel
681 	 * is modified to use TDG.VP.VMCALL for hypercall.
682 	 */
683 	if (is_td_vcpu(vcpu))
684 		return;
685 
686 	vmx_patch_hypercall(vcpu, hypercall);
687 }
688 
689 static void vt_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
690 {
691 	if (is_td_vcpu(vcpu))
692 		return;
693 
694 	vmx_inject_irq(vcpu, reinjected);
695 }
696 
697 static void vt_inject_exception(struct kvm_vcpu *vcpu)
698 {
699 	if (is_td_vcpu(vcpu))
700 		return;
701 
702 	vmx_inject_exception(vcpu);
703 }
704 
705 static void vt_cancel_injection(struct kvm_vcpu *vcpu)
706 {
707 	if (is_td_vcpu(vcpu))
708 		return;
709 
710 	vmx_cancel_injection(vcpu);
711 }
712 
713 static int vt_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
714 {
715 	if (is_td_vcpu(vcpu))
716 		return tdx_interrupt_allowed(vcpu);
717 
718 	return vmx_interrupt_allowed(vcpu, for_injection);
719 }
720 
721 static void vt_enable_irq_window(struct kvm_vcpu *vcpu)
722 {
723 	if (is_td_vcpu(vcpu))
724 		return;
725 
726 	vmx_enable_irq_window(vcpu);
727 }
728 
729 static void vt_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code)
730 {
731 	*intr_info = 0;
732 	*error_code = 0;
733 
734 	if (is_td_vcpu(vcpu))
735 		return;
736 
737 	vmx_get_entry_info(vcpu, intr_info, error_code);
738 }
739 
740 static void vt_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
741 			u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code)
742 {
743 	if (is_td_vcpu(vcpu)) {
744 		tdx_get_exit_info(vcpu, reason, info1, info2, intr_info,
745 				  error_code);
746 		return;
747 	}
748 
749 	vmx_get_exit_info(vcpu, reason, info1, info2, intr_info, error_code);
750 }
751 
752 static void vt_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
753 {
754 	if (is_td_vcpu(vcpu))
755 		return;
756 
757 	vmx_update_cr8_intercept(vcpu, tpr, irr);
758 }
759 
760 static void vt_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
761 {
762 	if (is_td_vcpu(vcpu))
763 		return;
764 
765 	vmx_set_apic_access_page_addr(vcpu);
766 }
767 
768 static void vt_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
769 {
770 	if (is_td_vcpu(vcpu)) {
771 		KVM_BUG_ON(!kvm_vcpu_apicv_active(vcpu), vcpu->kvm);
772 		return;
773 	}
774 
775 	vmx_refresh_apicv_exec_ctrl(vcpu);
776 }
777 
778 static void vt_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
779 {
780 	if (is_td_vcpu(vcpu))
781 		return;
782 
783 	vmx_load_eoi_exitmap(vcpu, eoi_exit_bitmap);
784 }
785 
786 static int vt_set_tss_addr(struct kvm *kvm, unsigned int addr)
787 {
788 	if (is_td(kvm))
789 		return 0;
790 
791 	return vmx_set_tss_addr(kvm, addr);
792 }
793 
794 static int vt_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
795 {
796 	if (is_td(kvm))
797 		return 0;
798 
799 	return vmx_set_identity_map_addr(kvm, ident_addr);
800 }
801 
802 static u64 vt_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
803 {
804 	/* TDX doesn't support L2 guest at the moment. */
805 	if (is_td_vcpu(vcpu))
806 		return 0;
807 
808 	return vmx_get_l2_tsc_offset(vcpu);
809 }
810 
811 static u64 vt_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
812 {
813 	/* TDX doesn't support L2 guest at the moment. */
814 	if (is_td_vcpu(vcpu))
815 		return 0;
816 
817 	return vmx_get_l2_tsc_multiplier(vcpu);
818 }
819 
820 static void vt_write_tsc_offset(struct kvm_vcpu *vcpu)
821 {
822 	/* In TDX, tsc offset can't be changed. */
823 	if (is_td_vcpu(vcpu))
824 		return;
825 
826 	vmx_write_tsc_offset(vcpu);
827 }
828 
829 static void vt_write_tsc_multiplier(struct kvm_vcpu *vcpu)
830 {
831 	/* In TDX, tsc multiplier can't be changed. */
832 	if (is_td_vcpu(vcpu))
833 		return;
834 
835 	vmx_write_tsc_multiplier(vcpu);
836 }
837 
838 #ifdef CONFIG_X86_64
839 static int vt_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
840 			      bool *expired)
841 {
842 	/* VMX-preemption timer isn't available for TDX. */
843 	if (is_td_vcpu(vcpu))
844 		return -EINVAL;
845 
846 	return vmx_set_hv_timer(vcpu, guest_deadline_tsc, expired);
847 }
848 
849 static void vt_cancel_hv_timer(struct kvm_vcpu *vcpu)
850 {
851 	/* VMX-preemption timer can't be set.  See vt_set_hv_timer(). */
852 	if (is_td_vcpu(vcpu))
853 		return;
854 
855 	vmx_cancel_hv_timer(vcpu);
856 }
857 #endif
858 
859 static void vt_setup_mce(struct kvm_vcpu *vcpu)
860 {
861 	if (is_td_vcpu(vcpu))
862 		return;
863 
864 	vmx_setup_mce(vcpu);
865 }
866 
867 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
868 {
869 	if (!is_td(kvm))
870 		return -ENOTTY;
871 
872 	return tdx_vm_ioctl(kvm, argp);
873 }
874 
875 static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
876 {
877 	if (!is_td_vcpu(vcpu))
878 		return -EINVAL;
879 
880 	return tdx_vcpu_ioctl(vcpu, argp);
881 }
882 
883 static int vt_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
884 {
885 	if (is_td(kvm))
886 		return tdx_gmem_private_max_mapping_level(kvm, pfn);
887 
888 	return 0;
889 }
890 
891 #define VMX_REQUIRED_APICV_INHIBITS				\
892 	(BIT(APICV_INHIBIT_REASON_DISABLED) |			\
893 	 BIT(APICV_INHIBIT_REASON_ABSENT) |			\
894 	 BIT(APICV_INHIBIT_REASON_HYPERV) |			\
895 	 BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |			\
896 	 BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) |	\
897 	 BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |		\
898 	 BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED))
899 
900 struct kvm_x86_ops vt_x86_ops __initdata = {
901 	.name = KBUILD_MODNAME,
902 
903 	.check_processor_compatibility = vmx_check_processor_compat,
904 
905 	.hardware_unsetup = vmx_hardware_unsetup,
906 
907 	.enable_virtualization_cpu = vmx_enable_virtualization_cpu,
908 	.disable_virtualization_cpu = vt_disable_virtualization_cpu,
909 	.emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu,
910 
911 	.has_emulated_msr = vt_has_emulated_msr,
912 
913 	.vm_size = sizeof(struct kvm_vmx),
914 
915 	.vm_init = vt_vm_init,
916 	.vm_pre_destroy = vt_vm_pre_destroy,
917 	.vm_destroy = vt_vm_destroy,
918 
919 	.vcpu_precreate = vt_vcpu_precreate,
920 	.vcpu_create = vt_vcpu_create,
921 	.vcpu_free = vt_vcpu_free,
922 	.vcpu_reset = vt_vcpu_reset,
923 
924 	.prepare_switch_to_guest = vt_prepare_switch_to_guest,
925 	.vcpu_load = vt_vcpu_load,
926 	.vcpu_put = vt_vcpu_put,
927 
928 	.update_exception_bitmap = vt_update_exception_bitmap,
929 	.get_feature_msr = vmx_get_feature_msr,
930 	.get_msr = vt_get_msr,
931 	.set_msr = vt_set_msr,
932 
933 	.get_segment_base = vt_get_segment_base,
934 	.get_segment = vt_get_segment,
935 	.set_segment = vt_set_segment,
936 	.get_cpl = vt_get_cpl,
937 	.get_cpl_no_cache = vt_get_cpl_no_cache,
938 	.get_cs_db_l_bits = vt_get_cs_db_l_bits,
939 	.is_valid_cr0 = vt_is_valid_cr0,
940 	.set_cr0 = vt_set_cr0,
941 	.is_valid_cr4 = vt_is_valid_cr4,
942 	.set_cr4 = vt_set_cr4,
943 	.set_efer = vt_set_efer,
944 	.get_idt = vt_get_idt,
945 	.set_idt = vt_set_idt,
946 	.get_gdt = vt_get_gdt,
947 	.set_gdt = vt_set_gdt,
948 	.set_dr6 = vt_set_dr6,
949 	.set_dr7 = vt_set_dr7,
950 	.sync_dirty_debug_regs = vt_sync_dirty_debug_regs,
951 	.cache_reg = vt_cache_reg,
952 	.get_rflags = vt_get_rflags,
953 	.set_rflags = vt_set_rflags,
954 	.get_if_flag = vt_get_if_flag,
955 
956 	.flush_tlb_all = vt_flush_tlb_all,
957 	.flush_tlb_current = vt_flush_tlb_current,
958 	.flush_tlb_gva = vt_flush_tlb_gva,
959 	.flush_tlb_guest = vt_flush_tlb_guest,
960 
961 	.vcpu_pre_run = vt_vcpu_pre_run,
962 	.vcpu_run = vt_vcpu_run,
963 	.handle_exit = vt_handle_exit,
964 	.skip_emulated_instruction = vmx_skip_emulated_instruction,
965 	.update_emulated_instruction = vmx_update_emulated_instruction,
966 	.set_interrupt_shadow = vt_set_interrupt_shadow,
967 	.get_interrupt_shadow = vt_get_interrupt_shadow,
968 	.patch_hypercall = vt_patch_hypercall,
969 	.inject_irq = vt_inject_irq,
970 	.inject_nmi = vt_inject_nmi,
971 	.inject_exception = vt_inject_exception,
972 	.cancel_injection = vt_cancel_injection,
973 	.interrupt_allowed = vt_interrupt_allowed,
974 	.nmi_allowed = vt_nmi_allowed,
975 	.get_nmi_mask = vt_get_nmi_mask,
976 	.set_nmi_mask = vt_set_nmi_mask,
977 	.enable_nmi_window = vt_enable_nmi_window,
978 	.enable_irq_window = vt_enable_irq_window,
979 	.update_cr8_intercept = vt_update_cr8_intercept,
980 
981 	.x2apic_icr_is_split = false,
982 	.set_virtual_apic_mode = vt_set_virtual_apic_mode,
983 	.set_apic_access_page_addr = vt_set_apic_access_page_addr,
984 	.refresh_apicv_exec_ctrl = vt_refresh_apicv_exec_ctrl,
985 	.load_eoi_exitmap = vt_load_eoi_exitmap,
986 	.apicv_pre_state_restore = vt_apicv_pre_state_restore,
987 	.required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
988 	.hwapic_isr_update = vt_hwapic_isr_update,
989 	.sync_pir_to_irr = vt_sync_pir_to_irr,
990 	.deliver_interrupt = vt_deliver_interrupt,
991 	.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
992 
993 	.set_tss_addr = vt_set_tss_addr,
994 	.set_identity_map_addr = vt_set_identity_map_addr,
995 	.get_mt_mask = vmx_get_mt_mask,
996 
997 	.get_exit_info = vt_get_exit_info,
998 	.get_entry_info = vt_get_entry_info,
999 
1000 	.vcpu_after_set_cpuid = vt_vcpu_after_set_cpuid,
1001 
1002 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
1003 
1004 	.get_l2_tsc_offset = vt_get_l2_tsc_offset,
1005 	.get_l2_tsc_multiplier = vt_get_l2_tsc_multiplier,
1006 	.write_tsc_offset = vt_write_tsc_offset,
1007 	.write_tsc_multiplier = vt_write_tsc_multiplier,
1008 
1009 	.load_mmu_pgd = vt_load_mmu_pgd,
1010 
1011 	.check_intercept = vmx_check_intercept,
1012 	.handle_exit_irqoff = vmx_handle_exit_irqoff,
1013 
1014 	.update_cpu_dirty_logging = vt_update_cpu_dirty_logging,
1015 
1016 	.nested_ops = &vmx_nested_ops,
1017 
1018 	.pi_update_irte = vmx_pi_update_irte,
1019 	.pi_start_assignment = vmx_pi_start_assignment,
1020 
1021 #ifdef CONFIG_X86_64
1022 	.set_hv_timer = vt_set_hv_timer,
1023 	.cancel_hv_timer = vt_cancel_hv_timer,
1024 #endif
1025 
1026 	.setup_mce = vt_setup_mce,
1027 
1028 #ifdef CONFIG_KVM_SMM
1029 	.smi_allowed = vt_smi_allowed,
1030 	.enter_smm = vt_enter_smm,
1031 	.leave_smm = vt_leave_smm,
1032 	.enable_smi_window = vt_enable_smi_window,
1033 #endif
1034 
1035 	.check_emulate_instruction = vt_check_emulate_instruction,
1036 	.apic_init_signal_blocked = vt_apic_init_signal_blocked,
1037 	.migrate_timers = vmx_migrate_timers,
1038 
1039 	.msr_filter_changed = vt_msr_filter_changed,
1040 	.complete_emulated_msr = vt_complete_emulated_msr,
1041 
1042 	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
1043 
1044 	.get_untagged_addr = vmx_get_untagged_addr,
1045 
1046 	.mem_enc_ioctl = vt_mem_enc_ioctl,
1047 	.vcpu_mem_enc_ioctl = vt_vcpu_mem_enc_ioctl,
1048 
1049 	.private_max_mapping_level = vt_gmem_private_max_mapping_level
1050 };
1051 
1052 struct kvm_x86_init_ops vt_init_ops __initdata = {
1053 	.hardware_setup = vt_hardware_setup,
1054 	.handle_intel_pt_intr = NULL,
1055 
1056 	.runtime_ops = &vt_x86_ops,
1057 	.pmu_ops = &intel_pmu_ops,
1058 };
1059 
1060 static void __exit vt_exit(void)
1061 {
1062 	kvm_exit();
1063 	tdx_cleanup();
1064 	vmx_exit();
1065 }
1066 module_exit(vt_exit);
1067 
1068 static int __init vt_init(void)
1069 {
1070 	unsigned vcpu_size, vcpu_align;
1071 	int r;
1072 
1073 	r = vmx_init();
1074 	if (r)
1075 		return r;
1076 
1077 	/* tdx_init() has been taken */
1078 	r = tdx_bringup();
1079 	if (r)
1080 		goto err_tdx_bringup;
1081 
1082 	/*
1083 	 * TDX and VMX have different vCPU structures.  Calculate the
1084 	 * maximum size/align so that kvm_init() can use the larger
1085 	 * values to create the kmem_vcpu_cache.
1086 	 */
1087 	vcpu_size = sizeof(struct vcpu_vmx);
1088 	vcpu_align = __alignof__(struct vcpu_vmx);
1089 	if (enable_tdx) {
1090 		vcpu_size = max_t(unsigned, vcpu_size,
1091 				sizeof(struct vcpu_tdx));
1092 		vcpu_align = max_t(unsigned, vcpu_align,
1093 				__alignof__(struct vcpu_tdx));
1094 		kvm_caps.supported_vm_types |= BIT(KVM_X86_TDX_VM);
1095 	}
1096 
1097 	/*
1098 	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
1099 	 * exposed to userspace!
1100 	 */
1101 	r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
1102 	if (r)
1103 		goto err_kvm_init;
1104 
1105 	return 0;
1106 
1107 err_kvm_init:
1108 	tdx_cleanup();
1109 err_tdx_bringup:
1110 	vmx_exit();
1111 	return r;
1112 }
1113 module_init(vt_init);
1114