xref: /linux/arch/x86/kvm/vmx/main.c (revision 7f81907b7e3f93dfed2e903af52659baa4944341)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/moduleparam.h>
3 
4 #include "x86_ops.h"
5 #include "vmx.h"
6 #include "mmu.h"
7 #include "nested.h"
8 #include "pmu.h"
9 #include "posted_intr.h"
10 #include "tdx.h"
11 #include "tdx_arch.h"
12 
13 #ifdef CONFIG_KVM_INTEL_TDX
14 static_assert(offsetof(struct vcpu_vmx, vt) == offsetof(struct vcpu_tdx, vt));
15 
16 static void vt_disable_virtualization_cpu(void)
17 {
18 	/* Note, TDX *and* VMX need to be disabled if TDX is enabled. */
19 	if (enable_tdx)
20 		tdx_disable_virtualization_cpu();
21 	vmx_disable_virtualization_cpu();
22 }
23 
24 static __init int vt_hardware_setup(void)
25 {
26 	int ret;
27 
28 	ret = vmx_hardware_setup();
29 	if (ret)
30 		return ret;
31 
32 	/*
33 	 * Update vt_x86_ops::vm_size here so it is ready before
34 	 * kvm_ops_update() is called in kvm_x86_vendor_init().
35 	 *
36 	 * Note, the actual bringing up of TDX must be done after
37 	 * kvm_ops_update() because enabling TDX requires enabling
38 	 * hardware virtualization first, i.e., all online CPUs must
39 	 * be in post-VMXON state.  This means the @vm_size here
40 	 * may be updated to TDX's size but TDX may fail to enable
41 	 * at later time.
42 	 *
43 	 * The VMX/VT code could update kvm_x86_ops::vm_size again
44 	 * after bringing up TDX, but this would require exporting
45 	 * either kvm_x86_ops or kvm_ops_update() from the base KVM
46 	 * module, which looks overkill.  Anyway, the worst case here
47 	 * is KVM may allocate couple of more bytes than needed for
48 	 * each VM.
49 	 */
50 	if (enable_tdx) {
51 		vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size,
52 				sizeof(struct kvm_tdx));
53 		/*
54 		 * Note, TDX may fail to initialize in a later time in
55 		 * vt_init(), in which case it is not necessary to setup
56 		 * those callbacks.  But making them valid here even
57 		 * when TDX fails to init later is fine because those
58 		 * callbacks won't be called if the VM isn't TDX guest.
59 		 */
60 		vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
61 		vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
62 		vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
63 		vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
64 		vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt;
65 	}
66 
67 	return 0;
68 }
69 
70 static int vt_vm_init(struct kvm *kvm)
71 {
72 	if (is_td(kvm))
73 		return tdx_vm_init(kvm);
74 
75 	return vmx_vm_init(kvm);
76 }
77 
78 static void vt_vm_pre_destroy(struct kvm *kvm)
79 {
80 	if (is_td(kvm))
81 		return tdx_mmu_release_hkid(kvm);
82 }
83 
84 static void vt_vm_destroy(struct kvm *kvm)
85 {
86 	if (is_td(kvm))
87                return tdx_vm_destroy(kvm);
88 
89        vmx_vm_destroy(kvm);
90 }
91 
92 static int vt_vcpu_precreate(struct kvm *kvm)
93 {
94 	if (is_td(kvm))
95 		return 0;
96 
97 	return vmx_vcpu_precreate(kvm);
98 }
99 
100 static int vt_vcpu_create(struct kvm_vcpu *vcpu)
101 {
102 	if (is_td_vcpu(vcpu))
103 		return tdx_vcpu_create(vcpu);
104 
105 	return vmx_vcpu_create(vcpu);
106 }
107 
108 static void vt_vcpu_free(struct kvm_vcpu *vcpu)
109 {
110 	if (is_td_vcpu(vcpu)) {
111 		tdx_vcpu_free(vcpu);
112 		return;
113 	}
114 
115 	vmx_vcpu_free(vcpu);
116 }
117 
118 static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
119 {
120 	if (is_td_vcpu(vcpu)) {
121 		tdx_vcpu_reset(vcpu, init_event);
122 		return;
123 	}
124 
125 	vmx_vcpu_reset(vcpu, init_event);
126 }
127 
128 static void vt_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
129 {
130 	if (is_td_vcpu(vcpu)) {
131 		tdx_vcpu_load(vcpu, cpu);
132 		return;
133 	}
134 
135 	vmx_vcpu_load(vcpu, cpu);
136 }
137 
138 static void vt_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
139 {
140 	/*
141 	 * Basic TDX does not support feature PML. KVM does not enable PML in
142 	 * TD's VMCS, nor does it allocate or flush PML buffer for TDX.
143 	 */
144 	if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
145 		return;
146 
147 	vmx_update_cpu_dirty_logging(vcpu);
148 }
149 
150 static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
151 {
152 	if (is_td_vcpu(vcpu)) {
153 		tdx_prepare_switch_to_guest(vcpu);
154 		return;
155 	}
156 
157 	vmx_prepare_switch_to_guest(vcpu);
158 }
159 
160 static void vt_vcpu_put(struct kvm_vcpu *vcpu)
161 {
162 	if (is_td_vcpu(vcpu)) {
163 		tdx_vcpu_put(vcpu);
164 		return;
165 	}
166 
167 	vmx_vcpu_put(vcpu);
168 }
169 
170 static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
171 {
172 	if (is_td_vcpu(vcpu))
173 		return tdx_vcpu_pre_run(vcpu);
174 
175 	return vmx_vcpu_pre_run(vcpu);
176 }
177 
178 static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
179 {
180 	if (is_td_vcpu(vcpu))
181 		return tdx_vcpu_run(vcpu, force_immediate_exit);
182 
183 	return vmx_vcpu_run(vcpu, force_immediate_exit);
184 }
185 
186 static int vt_handle_exit(struct kvm_vcpu *vcpu,
187 			  enum exit_fastpath_completion fastpath)
188 {
189 	if (is_td_vcpu(vcpu))
190 		return tdx_handle_exit(vcpu, fastpath);
191 
192 	return vmx_handle_exit(vcpu, fastpath);
193 }
194 
195 static int vt_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
196 {
197 	if (unlikely(is_td_vcpu(vcpu)))
198 		return tdx_set_msr(vcpu, msr_info);
199 
200 	return vmx_set_msr(vcpu, msr_info);
201 }
202 
203 /*
204  * The kvm parameter can be NULL (module initialization, or invocation before
205  * VM creation). Be sure to check the kvm parameter before using it.
206  */
207 static bool vt_has_emulated_msr(struct kvm *kvm, u32 index)
208 {
209 	if (kvm && is_td(kvm))
210 		return tdx_has_emulated_msr(index);
211 
212 	return vmx_has_emulated_msr(kvm, index);
213 }
214 
215 static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
216 {
217 	if (unlikely(is_td_vcpu(vcpu)))
218 		return tdx_get_msr(vcpu, msr_info);
219 
220 	return vmx_get_msr(vcpu, msr_info);
221 }
222 
223 static void vt_msr_filter_changed(struct kvm_vcpu *vcpu)
224 {
225 	/*
226 	 * TDX doesn't allow VMM to configure interception of MSR accesses.
227 	 * TDX guest requests MSR accesses by calling TDVMCALL.  The MSR
228 	 * filters will be applied when handling the TDVMCALL for RDMSR/WRMSR
229 	 * if the userspace has set any.
230 	 */
231 	if (is_td_vcpu(vcpu))
232 		return;
233 
234 	vmx_msr_filter_changed(vcpu);
235 }
236 
237 static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
238 {
239 	if (is_td_vcpu(vcpu))
240 		return tdx_complete_emulated_msr(vcpu, err);
241 
242 	return vmx_complete_emulated_msr(vcpu, err);
243 }
244 
245 #ifdef CONFIG_KVM_SMM
246 static int vt_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
247 {
248 	if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm))
249 		return 0;
250 
251 	return vmx_smi_allowed(vcpu, for_injection);
252 }
253 
254 static int vt_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
255 {
256 	if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm))
257 		return 0;
258 
259 	return vmx_enter_smm(vcpu, smram);
260 }
261 
262 static int vt_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
263 {
264 	if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm))
265 		return 0;
266 
267 	return vmx_leave_smm(vcpu, smram);
268 }
269 
270 static void vt_enable_smi_window(struct kvm_vcpu *vcpu)
271 {
272 	if (KVM_BUG_ON(is_td_vcpu(vcpu), vcpu->kvm))
273 		return;
274 
275 	/* RSM will cause a vmexit anyway.  */
276 	vmx_enable_smi_window(vcpu);
277 }
278 #endif
279 
280 static int vt_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
281 					void *insn, int insn_len)
282 {
283 	/*
284 	 * For TDX, this can only be triggered for MMIO emulation.  Let the
285 	 * guest retry after installing the SPTE with suppress #VE bit cleared,
286 	 * so that the guest will receive #VE when retry.  The guest is expected
287 	 * to call TDG.VP.VMCALL<MMIO> to request VMM to do MMIO emulation on
288 	 * #VE.
289 	 */
290 	if (is_td_vcpu(vcpu))
291 		return X86EMUL_RETRY_INSTR;
292 
293 	return vmx_check_emulate_instruction(vcpu, emul_type, insn, insn_len);
294 }
295 
296 static bool vt_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
297 {
298 	/*
299 	 * INIT and SIPI are always blocked for TDX, i.e., INIT handling and
300 	 * the OP vcpu_deliver_sipi_vector() won't be called.
301 	 */
302 	if (is_td_vcpu(vcpu))
303 		return true;
304 
305 	return vmx_apic_init_signal_blocked(vcpu);
306 }
307 
308 static void vt_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
309 {
310 	/* Only x2APIC mode is supported for TD. */
311 	if (is_td_vcpu(vcpu))
312 		return;
313 
314 	return vmx_set_virtual_apic_mode(vcpu);
315 }
316 
317 static void vt_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
318 {
319 	if (is_td_vcpu(vcpu))
320 		return;
321 
322 	return vmx_hwapic_isr_update(vcpu, max_isr);
323 }
324 
325 static int vt_sync_pir_to_irr(struct kvm_vcpu *vcpu)
326 {
327 	if (is_td_vcpu(vcpu))
328 		return -1;
329 
330 	return vmx_sync_pir_to_irr(vcpu);
331 }
332 
333 static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
334 			   int trig_mode, int vector)
335 {
336 	if (is_td_vcpu(apic->vcpu)) {
337 		tdx_deliver_interrupt(apic, delivery_mode, trig_mode,
338 					     vector);
339 		return;
340 	}
341 
342 	vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector);
343 }
344 
345 static void vt_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
346 {
347 	if (is_td_vcpu(vcpu))
348 		return;
349 
350 	vmx_vcpu_after_set_cpuid(vcpu);
351 }
352 
353 static void vt_update_exception_bitmap(struct kvm_vcpu *vcpu)
354 {
355 	if (is_td_vcpu(vcpu))
356 		return;
357 
358 	vmx_update_exception_bitmap(vcpu);
359 }
360 
361 static u64 vt_get_segment_base(struct kvm_vcpu *vcpu, int seg)
362 {
363 	if (is_td_vcpu(vcpu))
364 		return 0;
365 
366 	return vmx_get_segment_base(vcpu, seg);
367 }
368 
369 static void vt_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var,
370 			      int seg)
371 {
372 	if (is_td_vcpu(vcpu)) {
373 		memset(var, 0, sizeof(*var));
374 		return;
375 	}
376 
377 	vmx_get_segment(vcpu, var, seg);
378 }
379 
380 static void vt_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var,
381 			      int seg)
382 {
383 	if (is_td_vcpu(vcpu))
384 		return;
385 
386 	vmx_set_segment(vcpu, var, seg);
387 }
388 
389 static int vt_get_cpl(struct kvm_vcpu *vcpu)
390 {
391 	if (is_td_vcpu(vcpu))
392 		return 0;
393 
394 	return vmx_get_cpl(vcpu);
395 }
396 
397 static int vt_get_cpl_no_cache(struct kvm_vcpu *vcpu)
398 {
399 	if (is_td_vcpu(vcpu))
400 		return 0;
401 
402 	return vmx_get_cpl_no_cache(vcpu);
403 }
404 
405 static void vt_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
406 {
407 	if (is_td_vcpu(vcpu)) {
408 		*db = 0;
409 		*l = 0;
410 		return;
411 	}
412 
413 	vmx_get_cs_db_l_bits(vcpu, db, l);
414 }
415 
416 static bool vt_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
417 {
418 	if (is_td_vcpu(vcpu))
419 		return true;
420 
421 	return vmx_is_valid_cr0(vcpu, cr0);
422 }
423 
424 static void vt_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
425 {
426 	if (is_td_vcpu(vcpu))
427 		return;
428 
429 	vmx_set_cr0(vcpu, cr0);
430 }
431 
432 static bool vt_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
433 {
434 	if (is_td_vcpu(vcpu))
435 		return true;
436 
437 	return vmx_is_valid_cr4(vcpu, cr4);
438 }
439 
440 static void vt_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
441 {
442 	if (is_td_vcpu(vcpu))
443 		return;
444 
445 	vmx_set_cr4(vcpu, cr4);
446 }
447 
448 static int vt_set_efer(struct kvm_vcpu *vcpu, u64 efer)
449 {
450 	if (is_td_vcpu(vcpu))
451 		return 0;
452 
453 	return vmx_set_efer(vcpu, efer);
454 }
455 
456 static void vt_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
457 {
458 	if (is_td_vcpu(vcpu)) {
459 		memset(dt, 0, sizeof(*dt));
460 		return;
461 	}
462 
463 	vmx_get_idt(vcpu, dt);
464 }
465 
466 static void vt_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
467 {
468 	if (is_td_vcpu(vcpu))
469 		return;
470 
471 	vmx_set_idt(vcpu, dt);
472 }
473 
474 static void vt_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
475 {
476 	if (is_td_vcpu(vcpu)) {
477 		memset(dt, 0, sizeof(*dt));
478 		return;
479 	}
480 
481 	vmx_get_gdt(vcpu, dt);
482 }
483 
484 static void vt_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
485 {
486 	if (is_td_vcpu(vcpu))
487 		return;
488 
489 	vmx_set_gdt(vcpu, dt);
490 }
491 
492 static void vt_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
493 {
494 	if (is_td_vcpu(vcpu))
495 		return;
496 
497 	vmx_set_dr6(vcpu, val);
498 }
499 
500 static void vt_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
501 {
502 	if (is_td_vcpu(vcpu))
503 		return;
504 
505 	vmx_set_dr7(vcpu, val);
506 }
507 
508 static void vt_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
509 {
510 	/*
511 	 * MOV-DR exiting is always cleared for TD guest, even in debug mode.
512 	 * Thus KVM_DEBUGREG_WONT_EXIT can never be set and it should never
513 	 * reach here for TD vcpu.
514 	 */
515 	if (is_td_vcpu(vcpu))
516 		return;
517 
518 	vmx_sync_dirty_debug_regs(vcpu);
519 }
520 
521 static void vt_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
522 {
523 	if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
524 		return;
525 
526 	vmx_cache_reg(vcpu, reg);
527 }
528 
529 static unsigned long vt_get_rflags(struct kvm_vcpu *vcpu)
530 {
531 	if (is_td_vcpu(vcpu))
532 		return 0;
533 
534 	return vmx_get_rflags(vcpu);
535 }
536 
537 static void vt_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
538 {
539 	if (is_td_vcpu(vcpu))
540 		return;
541 
542 	vmx_set_rflags(vcpu, rflags);
543 }
544 
545 static bool vt_get_if_flag(struct kvm_vcpu *vcpu)
546 {
547 	if (is_td_vcpu(vcpu))
548 		return false;
549 
550 	return vmx_get_if_flag(vcpu);
551 }
552 
553 static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
554 {
555 	if (is_td_vcpu(vcpu)) {
556 		tdx_flush_tlb_all(vcpu);
557 		return;
558 	}
559 
560 	vmx_flush_tlb_all(vcpu);
561 }
562 
563 static void vt_flush_tlb_current(struct kvm_vcpu *vcpu)
564 {
565 	if (is_td_vcpu(vcpu)) {
566 		tdx_flush_tlb_current(vcpu);
567 		return;
568 	}
569 
570 	vmx_flush_tlb_current(vcpu);
571 }
572 
573 static void vt_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
574 {
575 	if (is_td_vcpu(vcpu))
576 		return;
577 
578 	vmx_flush_tlb_gva(vcpu, addr);
579 }
580 
581 static void vt_flush_tlb_guest(struct kvm_vcpu *vcpu)
582 {
583 	if (is_td_vcpu(vcpu))
584 		return;
585 
586 	vmx_flush_tlb_guest(vcpu);
587 }
588 
589 static void vt_inject_nmi(struct kvm_vcpu *vcpu)
590 {
591 	if (is_td_vcpu(vcpu)) {
592 		tdx_inject_nmi(vcpu);
593 		return;
594 	}
595 
596 	vmx_inject_nmi(vcpu);
597 }
598 
599 static int vt_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
600 {
601 	/*
602 	 * The TDX module manages NMI windows and NMI reinjection, and hides NMI
603 	 * blocking, all KVM can do is throw an NMI over the wall.
604 	 */
605 	if (is_td_vcpu(vcpu))
606 		return true;
607 
608 	return vmx_nmi_allowed(vcpu, for_injection);
609 }
610 
611 static bool vt_get_nmi_mask(struct kvm_vcpu *vcpu)
612 {
613 	/*
614 	 * KVM can't get NMI blocking status for TDX guest, assume NMIs are
615 	 * always unmasked.
616 	 */
617 	if (is_td_vcpu(vcpu))
618 		return false;
619 
620 	return vmx_get_nmi_mask(vcpu);
621 }
622 
623 static void vt_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
624 {
625 	if (is_td_vcpu(vcpu))
626 		return;
627 
628 	vmx_set_nmi_mask(vcpu, masked);
629 }
630 
631 static void vt_enable_nmi_window(struct kvm_vcpu *vcpu)
632 {
633 	/* Refer to the comments in tdx_inject_nmi(). */
634 	if (is_td_vcpu(vcpu))
635 		return;
636 
637 	vmx_enable_nmi_window(vcpu);
638 }
639 
640 static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
641 			    int pgd_level)
642 {
643 	if (is_td_vcpu(vcpu)) {
644 		tdx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
645 		return;
646 	}
647 
648 	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
649 }
650 
651 static void vt_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
652 {
653 	if (is_td_vcpu(vcpu))
654 		return;
655 
656 	vmx_set_interrupt_shadow(vcpu, mask);
657 }
658 
659 static u32 vt_get_interrupt_shadow(struct kvm_vcpu *vcpu)
660 {
661 	if (is_td_vcpu(vcpu))
662 		return 0;
663 
664 	return vmx_get_interrupt_shadow(vcpu);
665 }
666 
667 static void vt_patch_hypercall(struct kvm_vcpu *vcpu,
668 				  unsigned char *hypercall)
669 {
670 	/*
671 	 * Because guest memory is protected, guest can't be patched. TD kernel
672 	 * is modified to use TDG.VP.VMCALL for hypercall.
673 	 */
674 	if (is_td_vcpu(vcpu))
675 		return;
676 
677 	vmx_patch_hypercall(vcpu, hypercall);
678 }
679 
680 static void vt_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
681 {
682 	if (is_td_vcpu(vcpu))
683 		return;
684 
685 	vmx_inject_irq(vcpu, reinjected);
686 }
687 
688 static void vt_inject_exception(struct kvm_vcpu *vcpu)
689 {
690 	if (is_td_vcpu(vcpu))
691 		return;
692 
693 	vmx_inject_exception(vcpu);
694 }
695 
696 static void vt_cancel_injection(struct kvm_vcpu *vcpu)
697 {
698 	if (is_td_vcpu(vcpu))
699 		return;
700 
701 	vmx_cancel_injection(vcpu);
702 }
703 
704 static int vt_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
705 {
706 	if (is_td_vcpu(vcpu))
707 		return tdx_interrupt_allowed(vcpu);
708 
709 	return vmx_interrupt_allowed(vcpu, for_injection);
710 }
711 
712 static void vt_enable_irq_window(struct kvm_vcpu *vcpu)
713 {
714 	if (is_td_vcpu(vcpu))
715 		return;
716 
717 	vmx_enable_irq_window(vcpu);
718 }
719 
720 static void vt_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code)
721 {
722 	*intr_info = 0;
723 	*error_code = 0;
724 
725 	if (is_td_vcpu(vcpu))
726 		return;
727 
728 	vmx_get_entry_info(vcpu, intr_info, error_code);
729 }
730 
731 static void vt_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
732 			u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code)
733 {
734 	if (is_td_vcpu(vcpu)) {
735 		tdx_get_exit_info(vcpu, reason, info1, info2, intr_info,
736 				  error_code);
737 		return;
738 	}
739 
740 	vmx_get_exit_info(vcpu, reason, info1, info2, intr_info, error_code);
741 }
742 
743 static void vt_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
744 {
745 	if (is_td_vcpu(vcpu))
746 		return;
747 
748 	vmx_update_cr8_intercept(vcpu, tpr, irr);
749 }
750 
751 static void vt_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
752 {
753 	if (is_td_vcpu(vcpu))
754 		return;
755 
756 	vmx_set_apic_access_page_addr(vcpu);
757 }
758 
759 static void vt_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
760 {
761 	if (is_td_vcpu(vcpu)) {
762 		KVM_BUG_ON(!kvm_vcpu_apicv_active(vcpu), vcpu->kvm);
763 		return;
764 	}
765 
766 	vmx_refresh_apicv_exec_ctrl(vcpu);
767 }
768 
769 static void vt_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
770 {
771 	if (is_td_vcpu(vcpu))
772 		return;
773 
774 	vmx_load_eoi_exitmap(vcpu, eoi_exit_bitmap);
775 }
776 
777 static int vt_set_tss_addr(struct kvm *kvm, unsigned int addr)
778 {
779 	if (is_td(kvm))
780 		return 0;
781 
782 	return vmx_set_tss_addr(kvm, addr);
783 }
784 
785 static int vt_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
786 {
787 	if (is_td(kvm))
788 		return 0;
789 
790 	return vmx_set_identity_map_addr(kvm, ident_addr);
791 }
792 
793 static u64 vt_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
794 {
795 	/* TDX doesn't support L2 guest at the moment. */
796 	if (is_td_vcpu(vcpu))
797 		return 0;
798 
799 	return vmx_get_l2_tsc_offset(vcpu);
800 }
801 
802 static u64 vt_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
803 {
804 	/* TDX doesn't support L2 guest at the moment. */
805 	if (is_td_vcpu(vcpu))
806 		return 0;
807 
808 	return vmx_get_l2_tsc_multiplier(vcpu);
809 }
810 
811 static void vt_write_tsc_offset(struct kvm_vcpu *vcpu)
812 {
813 	/* In TDX, tsc offset can't be changed. */
814 	if (is_td_vcpu(vcpu))
815 		return;
816 
817 	vmx_write_tsc_offset(vcpu);
818 }
819 
820 static void vt_write_tsc_multiplier(struct kvm_vcpu *vcpu)
821 {
822 	/* In TDX, tsc multiplier can't be changed. */
823 	if (is_td_vcpu(vcpu))
824 		return;
825 
826 	vmx_write_tsc_multiplier(vcpu);
827 }
828 
829 #ifdef CONFIG_X86_64
830 static int vt_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
831 			      bool *expired)
832 {
833 	/* VMX-preemption timer isn't available for TDX. */
834 	if (is_td_vcpu(vcpu))
835 		return -EINVAL;
836 
837 	return vmx_set_hv_timer(vcpu, guest_deadline_tsc, expired);
838 }
839 
840 static void vt_cancel_hv_timer(struct kvm_vcpu *vcpu)
841 {
842 	/* VMX-preemption timer can't be set.  See vt_set_hv_timer(). */
843 	if (is_td_vcpu(vcpu))
844 		return;
845 
846 	vmx_cancel_hv_timer(vcpu);
847 }
848 #endif
849 
850 static void vt_setup_mce(struct kvm_vcpu *vcpu)
851 {
852 	if (is_td_vcpu(vcpu))
853 		return;
854 
855 	vmx_setup_mce(vcpu);
856 }
857 
858 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
859 {
860 	if (!is_td(kvm))
861 		return -ENOTTY;
862 
863 	return tdx_vm_ioctl(kvm, argp);
864 }
865 
866 static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
867 {
868 	if (!is_td_vcpu(vcpu))
869 		return -EINVAL;
870 
871 	return tdx_vcpu_ioctl(vcpu, argp);
872 }
873 
874 static int vt_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
875 {
876 	if (is_td(kvm))
877 		return tdx_gmem_private_max_mapping_level(kvm, pfn);
878 
879 	return 0;
880 }
881 
882 #define vt_op(name) vt_##name
883 #define vt_op_tdx_only(name) vt_##name
884 #else /* CONFIG_KVM_INTEL_TDX */
885 #define vt_op(name) vmx_##name
886 #define vt_op_tdx_only(name) NULL
887 #endif /* CONFIG_KVM_INTEL_TDX */
888 
889 #define VMX_REQUIRED_APICV_INHIBITS				\
890 	(BIT(APICV_INHIBIT_REASON_DISABLED) |			\
891 	 BIT(APICV_INHIBIT_REASON_ABSENT) |			\
892 	 BIT(APICV_INHIBIT_REASON_HYPERV) |			\
893 	 BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |			\
894 	 BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) |	\
895 	 BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |		\
896 	 BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED))
897 
898 struct kvm_x86_ops vt_x86_ops __initdata = {
899 	.name = KBUILD_MODNAME,
900 
901 	.check_processor_compatibility = vmx_check_processor_compat,
902 
903 	.hardware_unsetup = vmx_hardware_unsetup,
904 
905 	.enable_virtualization_cpu = vmx_enable_virtualization_cpu,
906 	.disable_virtualization_cpu = vt_op(disable_virtualization_cpu),
907 	.emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu,
908 
909 	.has_emulated_msr = vt_op(has_emulated_msr),
910 
911 	.vm_size = sizeof(struct kvm_vmx),
912 
913 	.vm_init = vt_op(vm_init),
914 	.vm_destroy = vt_op(vm_destroy),
915 	.vm_pre_destroy = vt_op_tdx_only(vm_pre_destroy),
916 
917 	.vcpu_precreate = vt_op(vcpu_precreate),
918 	.vcpu_create = vt_op(vcpu_create),
919 	.vcpu_free = vt_op(vcpu_free),
920 	.vcpu_reset = vt_op(vcpu_reset),
921 
922 	.prepare_switch_to_guest = vt_op(prepare_switch_to_guest),
923 	.vcpu_load = vt_op(vcpu_load),
924 	.vcpu_put = vt_op(vcpu_put),
925 
926 	.update_exception_bitmap = vt_op(update_exception_bitmap),
927 	.get_feature_msr = vmx_get_feature_msr,
928 	.get_msr = vt_op(get_msr),
929 	.set_msr = vt_op(set_msr),
930 
931 	.get_segment_base = vt_op(get_segment_base),
932 	.get_segment = vt_op(get_segment),
933 	.set_segment = vt_op(set_segment),
934 	.get_cpl = vt_op(get_cpl),
935 	.get_cpl_no_cache = vt_op(get_cpl_no_cache),
936 	.get_cs_db_l_bits = vt_op(get_cs_db_l_bits),
937 	.is_valid_cr0 = vt_op(is_valid_cr0),
938 	.set_cr0 = vt_op(set_cr0),
939 	.is_valid_cr4 = vt_op(is_valid_cr4),
940 	.set_cr4 = vt_op(set_cr4),
941 	.set_efer = vt_op(set_efer),
942 	.get_idt = vt_op(get_idt),
943 	.set_idt = vt_op(set_idt),
944 	.get_gdt = vt_op(get_gdt),
945 	.set_gdt = vt_op(set_gdt),
946 	.set_dr6 = vt_op(set_dr6),
947 	.set_dr7 = vt_op(set_dr7),
948 	.sync_dirty_debug_regs = vt_op(sync_dirty_debug_regs),
949 	.cache_reg = vt_op(cache_reg),
950 	.get_rflags = vt_op(get_rflags),
951 	.set_rflags = vt_op(set_rflags),
952 	.get_if_flag = vt_op(get_if_flag),
953 
954 	.flush_tlb_all = vt_op(flush_tlb_all),
955 	.flush_tlb_current = vt_op(flush_tlb_current),
956 	.flush_tlb_gva = vt_op(flush_tlb_gva),
957 	.flush_tlb_guest = vt_op(flush_tlb_guest),
958 
959 	.vcpu_pre_run = vt_op(vcpu_pre_run),
960 	.vcpu_run = vt_op(vcpu_run),
961 	.handle_exit = vt_op(handle_exit),
962 	.skip_emulated_instruction = vmx_skip_emulated_instruction,
963 	.update_emulated_instruction = vmx_update_emulated_instruction,
964 	.set_interrupt_shadow = vt_op(set_interrupt_shadow),
965 	.get_interrupt_shadow = vt_op(get_interrupt_shadow),
966 	.patch_hypercall = vt_op(patch_hypercall),
967 	.inject_irq = vt_op(inject_irq),
968 	.inject_nmi = vt_op(inject_nmi),
969 	.inject_exception = vt_op(inject_exception),
970 	.cancel_injection = vt_op(cancel_injection),
971 	.interrupt_allowed = vt_op(interrupt_allowed),
972 	.nmi_allowed = vt_op(nmi_allowed),
973 	.get_nmi_mask = vt_op(get_nmi_mask),
974 	.set_nmi_mask = vt_op(set_nmi_mask),
975 	.enable_nmi_window = vt_op(enable_nmi_window),
976 	.enable_irq_window = vt_op(enable_irq_window),
977 	.update_cr8_intercept = vt_op(update_cr8_intercept),
978 
979 	.x2apic_icr_is_split = false,
980 	.set_virtual_apic_mode = vt_op(set_virtual_apic_mode),
981 	.set_apic_access_page_addr = vt_op(set_apic_access_page_addr),
982 	.refresh_apicv_exec_ctrl = vt_op(refresh_apicv_exec_ctrl),
983 	.load_eoi_exitmap = vt_op(load_eoi_exitmap),
984 	.apicv_pre_state_restore = pi_apicv_pre_state_restore,
985 	.required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
986 	.hwapic_isr_update = vt_op(hwapic_isr_update),
987 	.sync_pir_to_irr = vt_op(sync_pir_to_irr),
988 	.deliver_interrupt = vt_op(deliver_interrupt),
989 	.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
990 
991 	.set_tss_addr = vt_op(set_tss_addr),
992 	.set_identity_map_addr = vt_op(set_identity_map_addr),
993 	.get_mt_mask = vmx_get_mt_mask,
994 
995 	.get_exit_info = vt_op(get_exit_info),
996 	.get_entry_info = vt_op(get_entry_info),
997 
998 	.vcpu_after_set_cpuid = vt_op(vcpu_after_set_cpuid),
999 
1000 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
1001 
1002 	.get_l2_tsc_offset = vt_op(get_l2_tsc_offset),
1003 	.get_l2_tsc_multiplier = vt_op(get_l2_tsc_multiplier),
1004 	.write_tsc_offset = vt_op(write_tsc_offset),
1005 	.write_tsc_multiplier = vt_op(write_tsc_multiplier),
1006 
1007 	.load_mmu_pgd = vt_op(load_mmu_pgd),
1008 
1009 	.check_intercept = vmx_check_intercept,
1010 	.handle_exit_irqoff = vmx_handle_exit_irqoff,
1011 
1012 	.update_cpu_dirty_logging = vt_op(update_cpu_dirty_logging),
1013 
1014 	.nested_ops = &vmx_nested_ops,
1015 
1016 	.pi_update_irte = vmx_pi_update_irte,
1017 	.pi_start_assignment = vmx_pi_start_assignment,
1018 
1019 #ifdef CONFIG_X86_64
1020 	.set_hv_timer = vt_op(set_hv_timer),
1021 	.cancel_hv_timer = vt_op(cancel_hv_timer),
1022 #endif
1023 
1024 	.setup_mce = vt_op(setup_mce),
1025 
1026 #ifdef CONFIG_KVM_SMM
1027 	.smi_allowed = vt_op(smi_allowed),
1028 	.enter_smm = vt_op(enter_smm),
1029 	.leave_smm = vt_op(leave_smm),
1030 	.enable_smi_window = vt_op(enable_smi_window),
1031 #endif
1032 
1033 	.check_emulate_instruction = vt_op(check_emulate_instruction),
1034 	.apic_init_signal_blocked = vt_op(apic_init_signal_blocked),
1035 	.migrate_timers = vmx_migrate_timers,
1036 
1037 	.msr_filter_changed = vt_op(msr_filter_changed),
1038 	.complete_emulated_msr = vt_op(complete_emulated_msr),
1039 
1040 	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
1041 
1042 	.get_untagged_addr = vmx_get_untagged_addr,
1043 
1044 	.mem_enc_ioctl = vt_op_tdx_only(mem_enc_ioctl),
1045 	.vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl),
1046 
1047 	.private_max_mapping_level = vt_op_tdx_only(gmem_private_max_mapping_level)
1048 };
1049 
1050 struct kvm_x86_init_ops vt_init_ops __initdata = {
1051 	.hardware_setup = vt_op(hardware_setup),
1052 	.handle_intel_pt_intr = NULL,
1053 
1054 	.runtime_ops = &vt_x86_ops,
1055 	.pmu_ops = &intel_pmu_ops,
1056 };
1057 
1058 static void __exit vt_exit(void)
1059 {
1060 	kvm_exit();
1061 	tdx_cleanup();
1062 	vmx_exit();
1063 }
1064 module_exit(vt_exit);
1065 
1066 static int __init vt_init(void)
1067 {
1068 	unsigned vcpu_size, vcpu_align;
1069 	int r;
1070 
1071 	r = vmx_init();
1072 	if (r)
1073 		return r;
1074 
1075 	/* tdx_init() has been taken */
1076 	r = tdx_bringup();
1077 	if (r)
1078 		goto err_tdx_bringup;
1079 
1080 	/*
1081 	 * TDX and VMX have different vCPU structures.  Calculate the
1082 	 * maximum size/align so that kvm_init() can use the larger
1083 	 * values to create the kmem_vcpu_cache.
1084 	 */
1085 	vcpu_size = sizeof(struct vcpu_vmx);
1086 	vcpu_align = __alignof__(struct vcpu_vmx);
1087 	if (enable_tdx) {
1088 		vcpu_size = max_t(unsigned, vcpu_size,
1089 				sizeof(struct vcpu_tdx));
1090 		vcpu_align = max_t(unsigned, vcpu_align,
1091 				__alignof__(struct vcpu_tdx));
1092 		kvm_caps.supported_vm_types |= BIT(KVM_X86_TDX_VM);
1093 	}
1094 
1095 	/*
1096 	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
1097 	 * exposed to userspace!
1098 	 */
1099 	r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
1100 	if (r)
1101 		goto err_kvm_init;
1102 
1103 	return 0;
1104 
1105 err_kvm_init:
1106 	tdx_cleanup();
1107 err_tdx_bringup:
1108 	vmx_exit();
1109 	return r;
1110 }
1111 module_init(vt_init);
1112