1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Kernel-based Virtual Machine driver for Linux 4 * 5 * AMD SVM support 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Yaniv Kamay <yaniv@qumranet.com> 12 * Avi Kivity <avi@qumranet.com> 13 */ 14 15 #ifndef __SVM_SVM_H 16 #define __SVM_SVM_H 17 18 #include <linux/kvm_types.h> 19 #include <linux/kvm_host.h> 20 #include <linux/bits.h> 21 22 #include <asm/svm.h> 23 #include <asm/sev-common.h> 24 25 #include "cpuid.h" 26 #include "regs.h" 27 #include "x86.h" 28 #include "pmu.h" 29 30 /* 31 * Helpers to convert to/from physical addresses for pages whose address is 32 * consumed directly by hardware. Even though it's a physical address, SVM 33 * often restricts the address to the natural width, hence 'unsigned long' 34 * instead of 'hpa_t'. 35 */ 36 static inline unsigned long __sme_page_pa(struct page *page) 37 { 38 return __sme_set(page_to_pfn(page) << PAGE_SHIFT); 39 } 40 41 static inline struct page *__sme_pa_to_page(unsigned long pa) 42 { 43 return pfn_to_page(__sme_clr(pa) >> PAGE_SHIFT); 44 } 45 46 #define IOPM_SIZE PAGE_SIZE * 3 47 #define MSRPM_SIZE PAGE_SIZE * 2 48 49 extern bool gmet_enabled; 50 extern bool npt_enabled; 51 extern int nrips; 52 extern int vgif; 53 extern bool intercept_smi; 54 extern bool vnmi; 55 extern int lbrv; 56 57 extern int tsc_aux_uret_slot __ro_after_init; 58 59 extern struct kvm_x86_ops svm_x86_ops __initdata; 60 61 /* 62 * Clean bits in VMCB. 63 * VMCB_ALL_CLEAN_MASK might also need to 64 * be updated if this enum is modified. 65 */ 66 enum { 67 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, 68 pause filter count */ 69 VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */ 70 VMCB_ASID, /* ASID */ 71 VMCB_INTR, /* int_ctl, int_vector */ 72 VMCB_NPT, /* npt_en, nCR3, gPAT */ 73 VMCB_CR, /* CR0, CR3, CR4, EFER */ 74 VMCB_DR, /* DR6, DR7 */ 75 VMCB_DT, /* GDT, IDT */ 76 VMCB_SEG, /* CS, DS, SS, ES, CPL */ 77 VMCB_CR2, /* CR2 only */ 78 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ 79 VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE, 80 * AVIC PHYSICAL_TABLE pointer, 81 * AVIC LOGICAL_TABLE pointer 82 */ 83 VMCB_CET, /* S_CET, SSP, ISST_ADDR */ 84 VMCB_SW = 31, /* Reserved for hypervisor/software use */ 85 }; 86 87 #define VMCB_ALL_CLEAN_MASK ( \ 88 (1U << VMCB_INTERCEPTS) | (1U << VMCB_PERM_MAP) | \ 89 (1U << VMCB_ASID) | (1U << VMCB_INTR) | \ 90 (1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) | \ 91 (1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) | \ 92 (1U << VMCB_LBR) | (1U << VMCB_AVIC) | (1U << VMCB_CET) | \ 93 (1U << VMCB_SW)) 94 95 /* TPR and CR2 are always written before VMRUN */ 96 #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) 97 98 #ifdef CONFIG_KVM_AMD_SEV 99 struct kvm_sev_info { 100 bool active; /* SEV enabled guest */ 101 bool es_active; /* SEV-ES enabled guest */ 102 bool need_init; /* waiting for SEV_INIT2 */ 103 unsigned int asid; /* ASID used for this guest */ 104 unsigned int handle; /* SEV firmware handle */ 105 int fd; /* SEV device fd */ 106 unsigned long policy; 107 unsigned long pages_locked; /* Number of pages locked */ 108 struct list_head regions_list; /* List of registered regions */ 109 u64 ap_jump_table; /* SEV-ES AP Jump Table address */ 110 u64 vmsa_features; 111 u16 ghcb_version; /* Highest guest GHCB protocol version allowed */ 112 struct kvm *enc_context_owner; /* Owner of copied encryption context */ 113 struct list_head mirror_vms; /* List of VMs mirroring */ 114 struct list_head mirror_entry; /* Use as a list entry of mirrors */ 115 struct misc_cg *misc_cg; /* For misc cgroup accounting */ 116 atomic_t migration_in_progress; 117 void *snp_context; /* SNP guest context page */ 118 void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */ 119 void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */ 120 struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */ 121 cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */ 122 bool snp_certs_enabled; /* SNP certificate-fetching support. */ 123 }; 124 #endif 125 126 struct kvm_svm { 127 struct kvm kvm; 128 129 /* Struct members for AVIC */ 130 u32 avic_vm_id; 131 u32 *avic_logical_id_table; 132 u64 *avic_physical_id_table; 133 struct hlist_node hnode; 134 135 #ifdef CONFIG_KVM_AMD_SEV 136 struct kvm_sev_info sev_info; 137 #endif 138 }; 139 140 struct kvm_vcpu; 141 142 struct kvm_vmcb_info { 143 struct vmcb *ptr; 144 unsigned long pa; 145 int cpu; 146 uint64_t asid_generation; 147 }; 148 149 struct vmcb_save_area_cached { 150 struct vmcb_seg es; 151 struct vmcb_seg cs; 152 struct vmcb_seg ss; 153 struct vmcb_seg ds; 154 struct vmcb_seg gdtr; 155 struct vmcb_seg idtr; 156 u8 cpl; 157 u64 efer; 158 u64 cr4; 159 u64 cr3; 160 u64 cr0; 161 u64 dr7; 162 u64 dr6; 163 u64 rflags; 164 u64 rip; 165 u64 rsp; 166 u64 s_cet; 167 u64 ssp; 168 u64 isst_addr; 169 u64 rax; 170 u64 cr2; 171 u64 g_pat; 172 u64 dbgctl; 173 u64 br_from; 174 u64 br_to; 175 u64 last_excp_from; 176 u64 last_excp_to; 177 }; 178 179 struct vmcb_ctrl_area_cached { 180 u32 intercepts[MAX_INTERCEPT]; 181 u16 pause_filter_thresh; 182 u16 pause_filter_count; 183 u64 iopm_base_pa; 184 u64 msrpm_base_pa; 185 u64 tsc_offset; 186 u32 asid; 187 u8 tlb_ctl; 188 u8 erap_ctl; 189 u32 int_ctl; 190 u32 int_vector; 191 u32 int_state; 192 u64 exit_code; 193 u64 exit_info_1; 194 u64 exit_info_2; 195 u32 exit_int_info; 196 u32 exit_int_info_err; 197 u64 misc_ctl; 198 u32 event_inj; 199 u32 event_inj_err; 200 u64 next_rip; 201 u64 nested_cr3; 202 u64 misc_ctl2; 203 u32 clean; 204 union { 205 #if IS_ENABLED(CONFIG_HYPERV) || IS_ENABLED(CONFIG_KVM_HYPERV) 206 struct hv_vmcb_enlightenments hv_enlightenments; 207 #endif 208 u8 reserved_sw[32]; 209 }; 210 }; 211 212 struct svm_nested_state { 213 struct kvm_vmcb_info vmcb02; 214 u64 hsave_msr; 215 u64 vm_cr_msr; 216 u64 vmcb12_gpa; 217 u64 last_vmcb12_gpa; 218 u64 last_bus_lock_rip; 219 220 /* 221 * The MSR permissions map used for vmcb02, which is the merge result 222 * of vmcb01 and vmcb12 223 */ 224 void *msrpm; 225 226 /* cache for control fields of the guest */ 227 struct vmcb_ctrl_area_cached ctl; 228 229 /* 230 * Note: this struct is not kept up-to-date while L2 runs; it is only 231 * valid within nested_svm_vmrun. 232 */ 233 struct vmcb_save_area_cached save; 234 235 bool initialized; 236 237 /* 238 * Indicates whether MSR bitmap for L2 needs to be rebuilt due to 239 * changes in MSR bitmap for L1 or switching to a different L2. Note, 240 * this flag can only be used reliably in conjunction with a paravirt L1 241 * which informs L0 whether any changes to MSR bitmap for L2 were done 242 * on its side. 243 */ 244 bool force_msr_bitmap_recalc; 245 }; 246 247 struct vcpu_sev_es_state { 248 /* SEV-ES support */ 249 struct sev_es_save_area *vmsa; 250 struct ghcb *ghcb; 251 u8 valid_bitmap[16]; 252 struct kvm_host_map ghcb_map; 253 bool received_first_sipi; 254 unsigned int ap_reset_hold_type; 255 256 /* SEV-ES scratch area support */ 257 u64 sw_scratch; 258 void *ghcb_sa; 259 u32 ghcb_sa_len; 260 bool ghcb_sa_sync; 261 bool ghcb_sa_free; 262 263 /* SNP Page-State-Change buffer entries currently being processed */ 264 struct { 265 u16 cur_idx; 266 u16 end_idx; 267 u16 batch_size; 268 bool is_2m; 269 } psc; 270 271 u64 ghcb_registered_gpa; 272 273 struct mutex snp_vmsa_mutex; /* Used to handle concurrent updates of VMSA. */ 274 gpa_t snp_vmsa_gpa; 275 bool snp_ap_waiting_for_reset; 276 bool snp_has_guest_vmsa; 277 }; 278 279 struct vcpu_svm { 280 struct kvm_vcpu vcpu; 281 /* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */ 282 struct vmcb *vmcb; 283 struct kvm_vmcb_info vmcb01; 284 struct kvm_vmcb_info *current_vmcb; 285 u32 asid; 286 u32 sysenter_esp_hi; 287 u32 sysenter_eip_hi; 288 uint64_t tsc_aux; 289 290 u64 msr_decfg; 291 292 u64 next_rip; 293 294 u64 spec_ctrl; 295 296 u64 tsc_ratio_msr; 297 /* 298 * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be 299 * translated into the appropriate L2_CFG bits on the host to 300 * perform speculative control. 301 */ 302 u64 virt_spec_ctrl; 303 304 void *msrpm; 305 306 ulong nmi_iret_rip; 307 308 struct svm_nested_state nested; 309 310 /* NMI mask value, used when vNMI is not enabled */ 311 bool nmi_masked; 312 313 /* 314 * True when NMIs are still masked but guest IRET was just intercepted 315 * and KVM is waiting for RIP to change, which will signal that the 316 * intercepted IRET was retired and thus NMI can be unmasked. 317 */ 318 bool awaiting_iret_completion; 319 320 /* 321 * Set when KVM is awaiting IRET completion and needs to inject NMIs as 322 * soon as the IRET completes (e.g. NMI is pending injection). KVM 323 * temporarily steals RFLAGS.TF to single-step the guest in this case 324 * in order to regain control as soon as the NMI-blocking condition 325 * goes away. 326 */ 327 bool nmi_singlestep; 328 u64 nmi_singlestep_guest_rflags; 329 330 bool nmi_l1_to_l2; 331 332 unsigned long soft_int_csbase; 333 unsigned long soft_int_old_rip; 334 unsigned long soft_int_next_rip; 335 bool soft_int_injected; 336 337 u32 ldr_reg; 338 u32 dfr_reg; 339 340 /* This is essentially a shadow of the vCPU's actual entry in the 341 * Physical ID table that is programmed into the VMCB, i.e. that is 342 * seen by the CPU. If IPI virtualization is disabled, IsRunning is 343 * only ever set in the shadow, i.e. is never propagated to the "real" 344 * table, so that hardware never sees IsRunning=1. 345 */ 346 u64 avic_physical_id_entry; 347 348 /* 349 * Per-vCPU list of irqfds that are eligible to post IRQs directly to 350 * the vCPU (a.k.a. device posted IRQs, a.k.a. IRQ bypass). The list 351 * is used to reconfigure IRTEs when the vCPU is loaded/put (to set the 352 * target pCPU), when AVIC is toggled on/off (to (de)activate bypass), 353 * and if the irqfd becomes ineligible for posting (to put the IRTE 354 * back into remapped mode). 355 */ 356 struct list_head ir_list; 357 raw_spinlock_t ir_list_lock; 358 359 struct vcpu_sev_es_state sev_es; 360 361 bool guest_state_loaded; 362 363 bool avic_irq_window; 364 bool x2avic_msrs_intercepted; 365 bool lbr_msrs_intercepted; 366 367 /* Guest GIF value, used when vGIF is not enabled */ 368 bool guest_gif; 369 }; 370 371 struct svm_cpu_data { 372 u64 asid_generation; 373 u32 max_asid; 374 u32 next_asid; 375 u32 min_asid; 376 377 bool bp_spec_reduce_set; 378 379 struct vmcb *save_area; 380 unsigned long save_area_pa; 381 382 /* index = sev_asid, value = vmcb pointer */ 383 struct vmcb **sev_vmcbs; 384 }; 385 386 DECLARE_PER_CPU(struct svm_cpu_data, svm_data); 387 388 static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) 389 { 390 return container_of(kvm, struct kvm_svm, kvm); 391 } 392 393 #ifdef CONFIG_KVM_AMD_SEV 394 static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm) 395 { 396 return &to_kvm_svm(kvm)->sev_info; 397 } 398 399 static __always_inline bool ____sev_guest(struct kvm *kvm) 400 { 401 return to_kvm_sev_info(kvm)->active; 402 } 403 static __always_inline bool ____sev_es_guest(struct kvm *kvm) 404 { 405 struct kvm_sev_info *sev = to_kvm_sev_info(kvm); 406 407 return sev->es_active && !WARN_ON_ONCE(!sev->active); 408 } 409 410 static __always_inline bool ____sev_snp_guest(struct kvm *kvm) 411 { 412 struct kvm_sev_info *sev = to_kvm_sev_info(kvm); 413 414 return (sev->vmsa_features & SVM_SEV_FEAT_SNP_ACTIVE) && 415 !WARN_ON_ONCE(!____sev_es_guest(kvm)); 416 } 417 418 static __always_inline bool is_sev_guest(struct kvm_vcpu *vcpu) 419 { 420 return ____sev_guest(vcpu->kvm); 421 } 422 static __always_inline bool is_sev_es_guest(struct kvm_vcpu *vcpu) 423 { 424 return ____sev_es_guest(vcpu->kvm); 425 } 426 427 static __always_inline bool is_sev_snp_guest(struct kvm_vcpu *vcpu) 428 { 429 return ____sev_snp_guest(vcpu->kvm); 430 } 431 #else 432 static __always_inline bool is_sev_guest(struct kvm_vcpu *vcpu) 433 { 434 return false; 435 } 436 static __always_inline bool is_sev_es_guest(struct kvm_vcpu *vcpu) 437 { 438 return false; 439 } 440 441 static __always_inline bool is_sev_snp_guest(struct kvm_vcpu *vcpu) 442 { 443 return false; 444 } 445 #endif 446 447 static inline bool ghcb_gpa_is_registered(struct vcpu_svm *svm, u64 val) 448 { 449 return svm->sev_es.ghcb_registered_gpa == val; 450 } 451 452 static inline void vmcb_mark_all_dirty(struct vmcb *vmcb) 453 { 454 vmcb->control.clean = 0; 455 } 456 457 static inline void vmcb_mark_all_clean(struct vmcb *vmcb) 458 { 459 vmcb->control.clean = VMCB_ALL_CLEAN_MASK 460 & ~VMCB_ALWAYS_DIRTY_MASK; 461 } 462 463 static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit) 464 { 465 vmcb->control.clean &= ~(1 << bit); 466 } 467 468 static inline bool vmcb12_is_dirty(struct vmcb_ctrl_area_cached *control, int bit) 469 { 470 return !test_bit(bit, (unsigned long *)&control->clean); 471 } 472 473 static inline void vmcb_set_gpat(struct vmcb *vmcb, u64 data) 474 { 475 vmcb->save.g_pat = data; 476 vmcb_mark_dirty(vmcb, VMCB_NPT); 477 } 478 479 static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 480 { 481 return container_of(vcpu, struct vcpu_svm, vcpu); 482 } 483 484 static inline bool svm_is_vmrun_failure(u64 exit_code) 485 { 486 if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) 487 return (u32)exit_code == (u32)SVM_EXIT_ERR; 488 489 return exit_code == SVM_EXIT_ERR; 490 } 491 492 /* 493 * Only the PDPTRs are loaded on demand into the shadow MMU. All other 494 * fields are synchronized on VM-Exit, because accessing the VMCB is cheap. 495 * 496 * CR3 might be out of date in the VMCB but it is not marked dirty; instead, 497 * KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3 498 * is changed. svm_load_mmu_pgd() then syncs the new CR3 value into the VMCB. 499 */ 500 #define SVM_REGS_LAZY_LOAD_SET (BIT(VCPU_REG_PDPTR)) 501 502 static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit) 503 { 504 WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT); 505 __set_bit(bit, intercepts); 506 } 507 508 static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit) 509 { 510 WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT); 511 __clear_bit(bit, intercepts); 512 } 513 514 static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit) 515 { 516 WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT); 517 return test_bit(bit, intercepts); 518 } 519 520 static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit) 521 { 522 __vmcb_set_intercept((unsigned long *)&control->intercepts, bit); 523 } 524 525 static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit) 526 { 527 __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit); 528 } 529 530 static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit) 531 { 532 return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit); 533 } 534 535 static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit) 536 { 537 __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit); 538 } 539 540 static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit) 541 { 542 return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit); 543 } 544 545 void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm); 546 547 static inline void svm_mark_intercepts_dirty(struct vcpu_svm *svm) 548 { 549 vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS); 550 551 /* 552 * If L2 is active, recalculate the intercepts for vmcb02 to account 553 * for the changes made to vmcb01. All intercept configuration is done 554 * for vmcb01 and then propagated to vmcb02 to combine KVM's intercepts 555 * with L1's intercepts (from the vmcb12 snapshot). 556 */ 557 if (is_guest_mode(&svm->vcpu)) 558 nested_vmcb02_recalc_intercepts(svm); 559 } 560 561 static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit) 562 { 563 struct vmcb *vmcb = svm->vmcb01.ptr; 564 565 WARN_ON_ONCE(bit >= 32); 566 vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit); 567 568 svm_mark_intercepts_dirty(svm); 569 } 570 571 static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit) 572 { 573 struct vmcb *vmcb = svm->vmcb01.ptr; 574 575 WARN_ON_ONCE(bit >= 32); 576 vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit); 577 578 svm_mark_intercepts_dirty(svm); 579 } 580 581 static inline void svm_set_intercept(struct vcpu_svm *svm, int bit) 582 { 583 struct vmcb *vmcb = svm->vmcb01.ptr; 584 585 vmcb_set_intercept(&vmcb->control, bit); 586 587 svm_mark_intercepts_dirty(svm); 588 } 589 590 static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit) 591 { 592 struct vmcb *vmcb = svm->vmcb01.ptr; 593 594 vmcb_clr_intercept(&vmcb->control, bit); 595 596 svm_mark_intercepts_dirty(svm); 597 } 598 599 static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit) 600 { 601 return vmcb_is_intercept(&svm->vmcb->control, bit); 602 } 603 604 static inline bool nested_vgif_enabled(struct vcpu_svm *svm) 605 { 606 return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VGIF) && 607 (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK); 608 } 609 610 static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm) 611 { 612 if (!vgif) 613 return NULL; 614 615 if (is_guest_mode(&svm->vcpu) && !nested_vgif_enabled(svm)) 616 return svm->nested.vmcb02.ptr; 617 else 618 return svm->vmcb01.ptr; 619 } 620 621 static inline void enable_gif(struct vcpu_svm *svm) 622 { 623 struct vmcb *vmcb = get_vgif_vmcb(svm); 624 625 if (vmcb) 626 vmcb->control.int_ctl |= V_GIF_MASK; 627 else 628 svm->guest_gif = true; 629 } 630 631 static inline void disable_gif(struct vcpu_svm *svm) 632 { 633 struct vmcb *vmcb = get_vgif_vmcb(svm); 634 635 if (vmcb) 636 vmcb->control.int_ctl &= ~V_GIF_MASK; 637 else 638 svm->guest_gif = false; 639 } 640 641 static inline bool gif_set(struct vcpu_svm *svm) 642 { 643 struct vmcb *vmcb = get_vgif_vmcb(svm); 644 645 if (vmcb) 646 return !!(vmcb->control.int_ctl & V_GIF_MASK); 647 else 648 return svm->guest_gif; 649 } 650 651 static inline bool nested_npt_enabled(struct vcpu_svm *svm) 652 { 653 return svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_NP; 654 } 655 656 static inline bool l2_has_separate_pat(struct kvm_vcpu *vcpu) 657 { 658 /* 659 * If KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT is disabled while a vCPU 660 * is running, the L2 IA32_PAT semantics for that vCPU are undefined. 661 */ 662 return nested_npt_enabled(to_svm(vcpu)) && 663 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT); 664 } 665 666 static inline bool nested_vnmi_enabled(struct vcpu_svm *svm) 667 { 668 return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VNMI) && 669 (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK); 670 } 671 672 static inline bool is_x2apic_msrpm_offset(u32 offset) 673 { 674 /* 4 msrs per u8, and 4 u8 in u32 */ 675 u32 msr = offset * 16; 676 677 return (msr >= APIC_BASE_MSR) && 678 (msr < (APIC_BASE_MSR + 0x100)); 679 } 680 681 static inline struct vmcb *get_vnmi_vmcb_l1(struct vcpu_svm *svm) 682 { 683 if (!vnmi) 684 return NULL; 685 686 if (is_guest_mode(&svm->vcpu)) 687 return NULL; 688 else 689 return svm->vmcb01.ptr; 690 } 691 692 static inline bool is_vnmi_enabled(struct vcpu_svm *svm) 693 { 694 struct vmcb *vmcb = get_vnmi_vmcb_l1(svm); 695 696 if (vmcb) 697 return !!(vmcb->control.int_ctl & V_NMI_ENABLE_MASK); 698 else 699 return false; 700 } 701 702 static inline void svm_vmgexit_set_return_code(struct vcpu_svm *svm, 703 u64 response, u64 data) 704 { 705 ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, response); 706 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, data); 707 } 708 709 static inline void svm_vmgexit_inject_exception(struct vcpu_svm *svm, u8 vector) 710 { 711 u64 data = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT | vector; 712 713 svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_ISSUE_EXCEPTION, data); 714 } 715 716 static inline void svm_vmgexit_bad_input(struct vcpu_svm *svm, u64 suberror) 717 { 718 svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_MALFORMED_INPUT, suberror); 719 } 720 721 static inline void svm_vmgexit_success(struct vcpu_svm *svm, u64 data) 722 { 723 svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_NO_ACTION, data); 724 } 725 726 static inline void svm_vmgexit_no_action(struct vcpu_svm *svm, u64 data) 727 { 728 svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_NO_ACTION, data); 729 } 730 731 /* 732 * The MSRPM is 8KiB in size, divided into four 2KiB ranges (the fourth range 733 * is reserved). Each MSR within a range is covered by two bits, one each for 734 * read (bit 0) and write (bit 1), where a bit value of '1' means intercepted. 735 */ 736 #define SVM_MSRPM_BYTES_PER_RANGE 2048 737 #define SVM_BITS_PER_MSR 2 738 #define SVM_MSRS_PER_BYTE (BITS_PER_BYTE / SVM_BITS_PER_MSR) 739 #define SVM_MSRS_PER_RANGE (SVM_MSRPM_BYTES_PER_RANGE * SVM_MSRS_PER_BYTE) 740 static_assert(SVM_MSRS_PER_RANGE == 8192); 741 #define SVM_MSRPM_OFFSET_MASK (SVM_MSRS_PER_RANGE - 1) 742 743 static __always_inline int svm_msrpm_bit_nr(u32 msr) 744 { 745 int range_nr; 746 747 switch (msr & ~SVM_MSRPM_OFFSET_MASK) { 748 case 0: 749 range_nr = 0; 750 break; 751 case 0xc0000000: 752 range_nr = 1; 753 break; 754 case 0xc0010000: 755 range_nr = 2; 756 break; 757 default: 758 return -EINVAL; 759 } 760 761 return range_nr * SVM_MSRPM_BYTES_PER_RANGE * BITS_PER_BYTE + 762 (msr & SVM_MSRPM_OFFSET_MASK) * SVM_BITS_PER_MSR; 763 } 764 765 #define __BUILD_SVM_MSR_BITMAP_HELPER(rtype, action, bitop, access, bit_rw) \ 766 static inline rtype svm_##action##_msr_bitmap_##access(unsigned long *bitmap, \ 767 u32 msr) \ 768 { \ 769 int bit_nr; \ 770 \ 771 bit_nr = svm_msrpm_bit_nr(msr); \ 772 if (bit_nr < 0) \ 773 return (rtype)true; \ 774 \ 775 return bitop##_bit(bit_nr + bit_rw, bitmap); \ 776 } 777 778 #define BUILD_SVM_MSR_BITMAP_HELPERS(ret_type, action, bitop) \ 779 __BUILD_SVM_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0) \ 780 __BUILD_SVM_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 1) 781 782 BUILD_SVM_MSR_BITMAP_HELPERS(bool, test, test) 783 BUILD_SVM_MSR_BITMAP_HELPERS(void, clear, __clear) 784 BUILD_SVM_MSR_BITMAP_HELPERS(void, set, __set) 785 786 #define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR) 787 788 /* svm.c */ 789 extern bool dump_invalid_vmcb; 790 791 void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask); 792 793 static inline void *svm_vcpu_alloc_msrpm(void) 794 { 795 return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT); 796 } 797 798 #define svm_copy_lbrs(to, from) \ 799 do { \ 800 (to)->dbgctl = (from)->dbgctl; \ 801 (to)->br_from = (from)->br_from; \ 802 (to)->br_to = (from)->br_to; \ 803 (to)->last_excp_from = (from)->last_excp_from; \ 804 (to)->last_excp_to = (from)->last_excp_to; \ 805 } while (0) 806 807 void svm_vcpu_free_msrpm(void *msrpm); 808 void svm_enable_lbrv(struct kvm_vcpu *vcpu); 809 void svm_update_lbrv(struct kvm_vcpu *vcpu); 810 811 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); 812 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 813 void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); 814 void disable_nmi_singlestep(struct vcpu_svm *svm); 815 bool svm_smi_blocked(struct kvm_vcpu *vcpu); 816 bool svm_nmi_blocked(struct kvm_vcpu *vcpu); 817 bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); 818 void svm_set_gif(struct vcpu_svm *svm, bool value); 819 int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code); 820 void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, 821 int read, int write); 822 void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, 823 int trig_mode, int vec); 824 825 void svm_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set); 826 827 static inline void svm_disable_intercept_for_msr(struct kvm_vcpu *vcpu, 828 u32 msr, int type) 829 { 830 svm_set_intercept_for_msr(vcpu, msr, type, false); 831 } 832 833 static inline void svm_enable_intercept_for_msr(struct kvm_vcpu *vcpu, 834 u32 msr, int type) 835 { 836 svm_set_intercept_for_msr(vcpu, msr, type, true); 837 } 838 839 int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu); 840 841 /* nested.c */ 842 843 #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 844 #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 845 #define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ 846 847 static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu) 848 { 849 struct vcpu_svm *svm = to_svm(vcpu); 850 851 return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK); 852 } 853 854 static inline bool nested_exit_on_smi(struct vcpu_svm *svm) 855 { 856 return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SMI); 857 } 858 859 static inline bool nested_exit_on_intr(struct vcpu_svm *svm) 860 { 861 return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INTR); 862 } 863 864 static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) 865 { 866 return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI); 867 } 868 869 int __init nested_svm_init_msrpm_merge_offsets(void); 870 871 int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, bool from_vmrun); 872 void svm_leave_nested(struct kvm_vcpu *vcpu); 873 void svm_free_nested(struct vcpu_svm *svm); 874 int svm_allocate_nested(struct vcpu_svm *svm); 875 int nested_svm_vmrun(struct kvm_vcpu *vcpu); 876 void svm_copy_vmrun_state(struct vmcb_save_area *to_save, 877 struct vmcb_save_area *from_save); 878 void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb); 879 void nested_svm_vmexit(struct vcpu_svm *svm); 880 881 static inline void nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) 882 { 883 svm->vmcb->control.exit_code = exit_code; 884 svm->vmcb->control.exit_info_1 = 0; 885 svm->vmcb->control.exit_info_2 = 0; 886 nested_svm_vmexit(svm); 887 } 888 889 int nested_svm_exit_handled(struct vcpu_svm *svm); 890 int nested_svm_check_permissions(struct kvm_vcpu *vcpu); 891 int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu); 892 int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 893 bool has_error_code, u32 error_code); 894 int nested_svm_exit_special(struct vcpu_svm *svm); 895 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu); 896 void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu); 897 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, 898 struct vmcb_control_area *control); 899 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, 900 struct vmcb_save_area *save); 901 void nested_sync_control_from_vmcb02(struct vcpu_svm *svm); 902 void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb); 903 904 905 static inline void __svm_pmu_handle_nested_transition(struct vcpu_svm *svm, 906 bool defer) 907 { 908 struct kvm_pmu *pmu = vcpu_to_pmu(&svm->vcpu); 909 u64 counters = *(u64 *)pmu->pmc_has_mode_specific_enables; 910 911 __kvm_pmu_reprogram_counters(pmu, counters, defer); 912 } 913 914 static inline void svm_pmu_handle_nested_transition(struct vcpu_svm *svm) 915 { 916 /* 917 * Do NOT defer reprogramming the counters by default. Instructions 918 * causing a state change are counted based on the _new_ CPU state 919 * (e.g. a successful VMRUN is counted in guest mode). Hence, the 920 * counters should be reprogrammed with the new state _before_ the 921 * instruction is potentially counted upon emulation completion. 922 */ 923 __svm_pmu_handle_nested_transition(svm, false); 924 } 925 926 extern struct kvm_x86_nested_ops svm_nested_ops; 927 928 /* avic.c */ 929 #define AVIC_REQUIRED_APICV_INHIBITS \ 930 ( \ 931 BIT(APICV_INHIBIT_REASON_DISABLED) | \ 932 BIT(APICV_INHIBIT_REASON_ABSENT) | \ 933 BIT(APICV_INHIBIT_REASON_HYPERV) | \ 934 BIT(APICV_INHIBIT_REASON_NESTED) | \ 935 BIT(APICV_INHIBIT_REASON_IRQWIN) | \ 936 BIT(APICV_INHIBIT_REASON_PIT_REINJ) | \ 937 BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \ 938 BIT(APICV_INHIBIT_REASON_SEV) | \ 939 BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \ 940 BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \ 941 BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) | \ 942 BIT(APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED) | \ 943 BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG) \ 944 ) 945 946 bool __init avic_hardware_setup(void); 947 void avic_hardware_unsetup(void); 948 int avic_alloc_physical_id_table(struct kvm *kvm); 949 void avic_vm_destroy(struct kvm *kvm); 950 int avic_vm_init(struct kvm *kvm); 951 void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); 952 int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); 953 int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); 954 int avic_init_vcpu(struct vcpu_svm *svm); 955 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); 956 void avic_vcpu_put(struct kvm_vcpu *vcpu); 957 void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); 958 void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); 959 int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, 960 unsigned int host_irq, uint32_t guest_irq, 961 struct kvm_vcpu *vcpu, u32 vector); 962 void avic_vcpu_blocking(struct kvm_vcpu *vcpu); 963 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu); 964 void avic_ring_doorbell(struct kvm_vcpu *vcpu); 965 unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu); 966 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu); 967 968 969 /* sev.c */ 970 971 int pre_sev_run(struct vcpu_svm *svm, int cpu); 972 void sev_init_vmcb(struct vcpu_svm *svm, bool init_event); 973 void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); 974 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); 975 void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu); 976 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); 977 void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa); 978 void sev_es_unmap_ghcb(struct vcpu_svm *svm); 979 980 #ifdef CONFIG_KVM_AMD_SEV 981 int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp); 982 int sev_mem_enc_register_region(struct kvm *kvm, 983 struct kvm_enc_region *range); 984 int sev_mem_enc_unregister_region(struct kvm *kvm, 985 struct kvm_enc_region *range); 986 int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd); 987 int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd); 988 void sev_guest_memory_reclaimed(struct kvm *kvm); 989 int sev_handle_vmgexit(struct kvm_vcpu *vcpu); 990 991 /* These symbols are used in common code and are stubbed below. */ 992 993 struct page *snp_safe_alloc_page_node(int node, gfp_t gfp); 994 static inline struct page *snp_safe_alloc_page(void) 995 { 996 return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT); 997 } 998 999 int sev_vcpu_create(struct kvm_vcpu *vcpu); 1000 void sev_free_vcpu(struct kvm_vcpu *vcpu); 1001 void sev_vm_init(struct kvm *kvm); 1002 void sev_vm_destroy(struct kvm *kvm); 1003 void __init sev_set_cpu_caps(void); 1004 void __init sev_hardware_setup(void); 1005 void sev_hardware_unsetup(void); 1006 int sev_cpu_init(struct svm_cpu_data *sd); 1007 int sev_dev_get_attr(u32 group, u64 attr, u64 *val); 1008 extern unsigned int max_sev_asid; 1009 void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code); 1010 int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); 1011 void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); 1012 int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private); 1013 struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu); 1014 void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa); 1015 #else 1016 static inline struct page *snp_safe_alloc_page_node(int node, gfp_t gfp) 1017 { 1018 return alloc_pages_node(node, gfp | __GFP_ZERO, 0); 1019 } 1020 1021 static inline struct page *snp_safe_alloc_page(void) 1022 { 1023 return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT); 1024 } 1025 1026 static inline int sev_vcpu_create(struct kvm_vcpu *vcpu) { return 0; } 1027 static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {} 1028 static inline void sev_vm_init(struct kvm *kvm) {} 1029 static inline void sev_vm_destroy(struct kvm *kvm) {} 1030 static inline void __init sev_set_cpu_caps(void) {} 1031 static inline void __init sev_hardware_setup(void) {} 1032 static inline void sev_hardware_unsetup(void) {} 1033 static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; } 1034 static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; } 1035 #define max_sev_asid 0 1036 static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {} 1037 static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order) 1038 { 1039 return 0; 1040 } 1041 static inline void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) {} 1042 static inline int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private) 1043 { 1044 return 0; 1045 } 1046 1047 static inline struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu) 1048 { 1049 return NULL; 1050 } 1051 static inline void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa) {} 1052 #endif 1053 1054 /* vmenter.S */ 1055 1056 void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, unsigned int flags, 1057 struct sev_es_save_area *hostsa); 1058 void __svm_vcpu_run(struct vcpu_svm *svm, unsigned int flags); 1059 1060 #define DEFINE_KVM_GHCB_ACCESSORS(field) \ 1061 static __always_inline u64 kvm_ghcb_get_##field(struct vcpu_svm *svm) \ 1062 { \ 1063 return READ_ONCE(svm->sev_es.ghcb->save.field); \ 1064 } \ 1065 \ 1066 static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \ 1067 { \ 1068 return test_bit(GHCB_BITMAP_IDX(field), \ 1069 (unsigned long *)&svm->sev_es.valid_bitmap); \ 1070 } \ 1071 \ 1072 static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm) \ 1073 { \ 1074 return kvm_ghcb_##field##_is_valid(svm) ? kvm_ghcb_get_##field(svm) : 0; \ 1075 } 1076 1077 DEFINE_KVM_GHCB_ACCESSORS(cpl) 1078 DEFINE_KVM_GHCB_ACCESSORS(rax) 1079 DEFINE_KVM_GHCB_ACCESSORS(rcx) 1080 DEFINE_KVM_GHCB_ACCESSORS(rdx) 1081 DEFINE_KVM_GHCB_ACCESSORS(rbx) 1082 DEFINE_KVM_GHCB_ACCESSORS(rsi) 1083 DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code) 1084 DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1) 1085 DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2) 1086 DEFINE_KVM_GHCB_ACCESSORS(sw_scratch) 1087 DEFINE_KVM_GHCB_ACCESSORS(xcr0) 1088 DEFINE_KVM_GHCB_ACCESSORS(xss) 1089 1090 #endif 1091