1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 3 4 #include <linux/kvm_host.h> 5 #include "x86.h" 6 #include "kvm_cache_regs.h" 7 #include "kvm_emulate.h" 8 #include "smm.h" 9 #include "cpuid.h" 10 #include "trace.h" 11 12 #define CHECK_SMRAM32_OFFSET(field, offset) \ 13 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00) 14 15 #define CHECK_SMRAM64_OFFSET(field, offset) \ 16 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00) 17 18 static void check_smram_offsets(void) 19 { 20 /* 32 bit SMRAM image */ 21 CHECK_SMRAM32_OFFSET(reserved1, 0xFE00); 22 CHECK_SMRAM32_OFFSET(smbase, 0xFEF8); 23 CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC); 24 CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00); 25 CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02); 26 CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04); 27 CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08); 28 CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C); 29 CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10); 30 CHECK_SMRAM32_OFFSET(cr4, 0xFF14); 31 CHECK_SMRAM32_OFFSET(reserved2, 0xFF18); 32 CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A); 33 CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B); 34 CHECK_SMRAM32_OFFSET(ds, 0xFF2C); 35 CHECK_SMRAM32_OFFSET(fs, 0xFF38); 36 CHECK_SMRAM32_OFFSET(gs, 0xFF44); 37 CHECK_SMRAM32_OFFSET(idtr, 0xFF50); 38 CHECK_SMRAM32_OFFSET(tr, 0xFF5C); 39 CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C); 40 CHECK_SMRAM32_OFFSET(ldtr, 0xFF78); 41 CHECK_SMRAM32_OFFSET(es, 0xFF84); 42 CHECK_SMRAM32_OFFSET(cs, 0xFF90); 43 CHECK_SMRAM32_OFFSET(ss, 0xFF9C); 44 CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8); 45 CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC); 46 CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0); 47 CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4); 48 CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8); 49 CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC); 50 CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0); 51 CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4); 52 CHECK_SMRAM32_OFFSET(dr7, 0xFFC8); 53 CHECK_SMRAM32_OFFSET(dr6, 0xFFCC); 54 CHECK_SMRAM32_OFFSET(gprs, 0xFFD0); 55 CHECK_SMRAM32_OFFSET(eip, 0xFFF0); 56 CHECK_SMRAM32_OFFSET(eflags, 0xFFF4); 57 CHECK_SMRAM32_OFFSET(cr3, 0xFFF8); 58 CHECK_SMRAM32_OFFSET(cr0, 0xFFFC); 59 60 /* 64 bit SMRAM image */ 61 CHECK_SMRAM64_OFFSET(es, 0xFE00); 62 CHECK_SMRAM64_OFFSET(cs, 0xFE10); 63 CHECK_SMRAM64_OFFSET(ss, 0xFE20); 64 CHECK_SMRAM64_OFFSET(ds, 0xFE30); 65 CHECK_SMRAM64_OFFSET(fs, 0xFE40); 66 CHECK_SMRAM64_OFFSET(gs, 0xFE50); 67 CHECK_SMRAM64_OFFSET(gdtr, 0xFE60); 68 CHECK_SMRAM64_OFFSET(ldtr, 0xFE70); 69 CHECK_SMRAM64_OFFSET(idtr, 0xFE80); 70 CHECK_SMRAM64_OFFSET(tr, 0xFE90); 71 CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0); 72 CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8); 73 CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0); 74 CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8); 75 CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0); 76 CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4); 77 CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8); 78 CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9); 79 CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA); 80 CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB); 81 CHECK_SMRAM64_OFFSET(reserved2, 0xFECC); 82 CHECK_SMRAM64_OFFSET(efer, 0xFED0); 83 CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8); 84 CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0); 85 CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8); 86 CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0); 87 CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC); 88 CHECK_SMRAM64_OFFSET(smbase, 0xFF00); 89 CHECK_SMRAM64_OFFSET(reserved4, 0xFF04); 90 CHECK_SMRAM64_OFFSET(ssp, 0xFF18); 91 CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20); 92 CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28); 93 CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30); 94 CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38); 95 CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40); 96 CHECK_SMRAM64_OFFSET(cr4, 0xFF48); 97 CHECK_SMRAM64_OFFSET(cr3, 0xFF50); 98 CHECK_SMRAM64_OFFSET(cr0, 0xFF58); 99 CHECK_SMRAM64_OFFSET(dr7, 0xFF60); 100 CHECK_SMRAM64_OFFSET(dr6, 0xFF68); 101 CHECK_SMRAM64_OFFSET(rflags, 0xFF70); 102 CHECK_SMRAM64_OFFSET(rip, 0xFF78); 103 CHECK_SMRAM64_OFFSET(gprs, 0xFF80); 104 105 BUILD_BUG_ON(sizeof(union kvm_smram) != 512); 106 } 107 108 #undef CHECK_SMRAM64_OFFSET 109 #undef CHECK_SMRAM32_OFFSET 110 111 112 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) 113 { 114 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm); 115 116 if (entering_smm) { 117 vcpu->arch.hflags |= HF_SMM_MASK; 118 } else { 119 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK); 120 121 /* Process a latched INIT or SMI, if any. */ 122 kvm_make_request(KVM_REQ_EVENT, vcpu); 123 124 /* 125 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band, 126 * on SMM exit we still need to reload them from 127 * guest memory 128 */ 129 vcpu->arch.pdptrs_from_userspace = false; 130 } 131 132 kvm_mmu_reset_context(vcpu); 133 } 134 EXPORT_SYMBOL_GPL(kvm_smm_changed); 135 136 void process_smi(struct kvm_vcpu *vcpu) 137 { 138 vcpu->arch.smi_pending = true; 139 kvm_make_request(KVM_REQ_EVENT, vcpu); 140 } 141 142 static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) 143 { 144 u32 flags = 0; 145 flags |= seg->g << 23; 146 flags |= seg->db << 22; 147 flags |= seg->l << 21; 148 flags |= seg->avl << 20; 149 flags |= seg->present << 15; 150 flags |= seg->dpl << 13; 151 flags |= seg->s << 12; 152 flags |= seg->type << 8; 153 return flags; 154 } 155 156 static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, 157 struct kvm_smm_seg_state_32 *state, 158 u32 *selector, int n) 159 { 160 struct kvm_segment seg; 161 162 kvm_get_segment(vcpu, &seg, n); 163 *selector = seg.selector; 164 state->base = seg.base; 165 state->limit = seg.limit; 166 state->flags = enter_smm_get_segment_flags(&seg); 167 } 168 169 #ifdef CONFIG_X86_64 170 static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, 171 struct kvm_smm_seg_state_64 *state, 172 int n) 173 { 174 struct kvm_segment seg; 175 176 kvm_get_segment(vcpu, &seg, n); 177 state->selector = seg.selector; 178 state->attributes = enter_smm_get_segment_flags(&seg) >> 8; 179 state->limit = seg.limit; 180 state->base = seg.base; 181 } 182 #endif 183 184 static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, 185 struct kvm_smram_state_32 *smram) 186 { 187 struct desc_ptr dt; 188 int i; 189 190 smram->cr0 = kvm_read_cr0(vcpu); 191 smram->cr3 = kvm_read_cr3(vcpu); 192 smram->eflags = kvm_get_rflags(vcpu); 193 smram->eip = kvm_rip_read(vcpu); 194 195 for (i = 0; i < 8; i++) 196 smram->gprs[i] = kvm_register_read_raw(vcpu, i); 197 198 smram->dr6 = (u32)vcpu->arch.dr6; 199 smram->dr7 = (u32)vcpu->arch.dr7; 200 201 enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR); 202 enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR); 203 204 kvm_x86_call(get_gdt)(vcpu, &dt); 205 smram->gdtr.base = dt.address; 206 smram->gdtr.limit = dt.size; 207 208 kvm_x86_call(get_idt)(vcpu, &dt); 209 smram->idtr.base = dt.address; 210 smram->idtr.limit = dt.size; 211 212 enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES); 213 enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS); 214 enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS); 215 216 enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS); 217 enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS); 218 enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS); 219 220 smram->cr4 = kvm_read_cr4(vcpu); 221 smram->smm_revision = 0x00020000; 222 smram->smbase = vcpu->arch.smbase; 223 224 smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu); 225 } 226 227 #ifdef CONFIG_X86_64 228 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, 229 struct kvm_smram_state_64 *smram) 230 { 231 struct desc_ptr dt; 232 int i; 233 234 for (i = 0; i < 16; i++) 235 smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i); 236 237 smram->rip = kvm_rip_read(vcpu); 238 smram->rflags = kvm_get_rflags(vcpu); 239 240 smram->dr6 = vcpu->arch.dr6; 241 smram->dr7 = vcpu->arch.dr7; 242 243 smram->cr0 = kvm_read_cr0(vcpu); 244 smram->cr3 = kvm_read_cr3(vcpu); 245 smram->cr4 = kvm_read_cr4(vcpu); 246 247 smram->smbase = vcpu->arch.smbase; 248 smram->smm_revison = 0x00020064; 249 250 smram->efer = vcpu->arch.efer; 251 252 enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR); 253 254 kvm_x86_call(get_idt)(vcpu, &dt); 255 smram->idtr.limit = dt.size; 256 smram->idtr.base = dt.address; 257 258 enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR); 259 260 kvm_x86_call(get_gdt)(vcpu, &dt); 261 smram->gdtr.limit = dt.size; 262 smram->gdtr.base = dt.address; 263 264 enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES); 265 enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS); 266 enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS); 267 enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); 268 enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); 269 enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); 270 271 smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu); 272 } 273 #endif 274 275 void enter_smm(struct kvm_vcpu *vcpu) 276 { 277 struct kvm_segment cs, ds; 278 struct desc_ptr dt; 279 unsigned long cr0; 280 union kvm_smram smram; 281 282 check_smram_offsets(); 283 284 memset(smram.bytes, 0, sizeof(smram.bytes)); 285 286 #ifdef CONFIG_X86_64 287 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) 288 enter_smm_save_state_64(vcpu, &smram.smram64); 289 else 290 #endif 291 enter_smm_save_state_32(vcpu, &smram.smram32); 292 293 /* 294 * Give enter_smm() a chance to make ISA-specific changes to the vCPU 295 * state (e.g. leave guest mode) after we've saved the state into the 296 * SMM state-save area. 297 * 298 * Kill the VM in the unlikely case of failure, because the VM 299 * can be in undefined state in this case. 300 */ 301 if (kvm_x86_call(enter_smm)(vcpu, &smram)) 302 goto error; 303 304 kvm_smm_changed(vcpu, true); 305 306 if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram))) 307 goto error; 308 309 if (kvm_x86_call(get_nmi_mask)(vcpu)) 310 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; 311 else 312 kvm_x86_call(set_nmi_mask)(vcpu, true); 313 314 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); 315 kvm_rip_write(vcpu, 0x8000); 316 317 kvm_x86_call(set_interrupt_shadow)(vcpu, 0); 318 319 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); 320 kvm_x86_call(set_cr0)(vcpu, cr0); 321 322 kvm_x86_call(set_cr4)(vcpu, 0); 323 324 /* Undocumented: IDT limit is set to zero on entry to SMM. */ 325 dt.address = dt.size = 0; 326 kvm_x86_call(set_idt)(vcpu, &dt); 327 328 if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1))) 329 goto error; 330 331 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; 332 cs.base = vcpu->arch.smbase; 333 334 ds.selector = 0; 335 ds.base = 0; 336 337 cs.limit = ds.limit = 0xffffffff; 338 cs.type = ds.type = 0x3; 339 cs.dpl = ds.dpl = 0; 340 cs.db = ds.db = 0; 341 cs.s = ds.s = 1; 342 cs.l = ds.l = 0; 343 cs.g = ds.g = 1; 344 cs.avl = ds.avl = 0; 345 cs.present = ds.present = 1; 346 cs.unusable = ds.unusable = 0; 347 cs.padding = ds.padding = 0; 348 349 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); 350 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS); 351 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES); 352 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS); 353 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); 354 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); 355 356 #ifdef CONFIG_X86_64 357 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) 358 if (kvm_x86_call(set_efer)(vcpu, 0)) 359 goto error; 360 #endif 361 362 vcpu->arch.cpuid_dynamic_bits_dirty = true; 363 kvm_mmu_reset_context(vcpu); 364 return; 365 error: 366 kvm_vm_dead(vcpu->kvm); 367 } 368 369 static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags) 370 { 371 desc->g = (flags >> 23) & 1; 372 desc->db = (flags >> 22) & 1; 373 desc->l = (flags >> 21) & 1; 374 desc->avl = (flags >> 20) & 1; 375 desc->present = (flags >> 15) & 1; 376 desc->dpl = (flags >> 13) & 3; 377 desc->s = (flags >> 12) & 1; 378 desc->type = (flags >> 8) & 15; 379 380 desc->unusable = !desc->present; 381 desc->padding = 0; 382 } 383 384 static int rsm_load_seg_32(struct kvm_vcpu *vcpu, 385 const struct kvm_smm_seg_state_32 *state, 386 u16 selector, int n) 387 { 388 struct kvm_segment desc; 389 390 desc.selector = selector; 391 desc.base = state->base; 392 desc.limit = state->limit; 393 rsm_set_desc_flags(&desc, state->flags); 394 kvm_set_segment(vcpu, &desc, n); 395 return X86EMUL_CONTINUE; 396 } 397 398 #ifdef CONFIG_X86_64 399 400 static int rsm_load_seg_64(struct kvm_vcpu *vcpu, 401 const struct kvm_smm_seg_state_64 *state, 402 int n) 403 { 404 struct kvm_segment desc; 405 406 desc.selector = state->selector; 407 rsm_set_desc_flags(&desc, state->attributes << 8); 408 desc.limit = state->limit; 409 desc.base = state->base; 410 kvm_set_segment(vcpu, &desc, n); 411 return X86EMUL_CONTINUE; 412 } 413 #endif 414 415 static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu, 416 u64 cr0, u64 cr3, u64 cr4) 417 { 418 int bad; 419 u64 pcid; 420 421 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ 422 pcid = 0; 423 if (cr4 & X86_CR4_PCIDE) { 424 pcid = cr3 & 0xfff; 425 cr3 &= ~0xfff; 426 } 427 428 bad = kvm_set_cr3(vcpu, cr3); 429 if (bad) 430 return X86EMUL_UNHANDLEABLE; 431 432 /* 433 * First enable PAE, long mode needs it before CR0.PG = 1 is set. 434 * Then enable protected mode. However, PCID cannot be enabled 435 * if EFER.LMA=0, so set it separately. 436 */ 437 bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE); 438 if (bad) 439 return X86EMUL_UNHANDLEABLE; 440 441 bad = kvm_set_cr0(vcpu, cr0); 442 if (bad) 443 return X86EMUL_UNHANDLEABLE; 444 445 if (cr4 & X86_CR4_PCIDE) { 446 bad = kvm_set_cr4(vcpu, cr4); 447 if (bad) 448 return X86EMUL_UNHANDLEABLE; 449 if (pcid) { 450 bad = kvm_set_cr3(vcpu, cr3 | pcid); 451 if (bad) 452 return X86EMUL_UNHANDLEABLE; 453 } 454 455 } 456 457 return X86EMUL_CONTINUE; 458 } 459 460 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 461 const struct kvm_smram_state_32 *smstate) 462 { 463 struct kvm_vcpu *vcpu = ctxt->vcpu; 464 struct desc_ptr dt; 465 int i, r; 466 467 ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; 468 ctxt->_eip = smstate->eip; 469 470 for (i = 0; i < 8; i++) 471 *reg_write(ctxt, i) = smstate->gprs[i]; 472 473 if (kvm_set_dr(vcpu, 6, smstate->dr6)) 474 return X86EMUL_UNHANDLEABLE; 475 if (kvm_set_dr(vcpu, 7, smstate->dr7)) 476 return X86EMUL_UNHANDLEABLE; 477 478 rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR); 479 rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR); 480 481 dt.address = smstate->gdtr.base; 482 dt.size = smstate->gdtr.limit; 483 kvm_x86_call(set_gdt)(vcpu, &dt); 484 485 dt.address = smstate->idtr.base; 486 dt.size = smstate->idtr.limit; 487 kvm_x86_call(set_idt)(vcpu, &dt); 488 489 rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES); 490 rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS); 491 rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS); 492 493 rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS); 494 rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS); 495 rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS); 496 497 vcpu->arch.smbase = smstate->smbase; 498 499 r = rsm_enter_protected_mode(vcpu, smstate->cr0, 500 smstate->cr3, smstate->cr4); 501 502 if (r != X86EMUL_CONTINUE) 503 return r; 504 505 kvm_x86_call(set_interrupt_shadow)(vcpu, 0); 506 ctxt->interruptibility = (u8)smstate->int_shadow; 507 508 return r; 509 } 510 511 #ifdef CONFIG_X86_64 512 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, 513 const struct kvm_smram_state_64 *smstate) 514 { 515 struct kvm_vcpu *vcpu = ctxt->vcpu; 516 struct desc_ptr dt; 517 int i, r; 518 519 for (i = 0; i < 16; i++) 520 *reg_write(ctxt, i) = smstate->gprs[15 - i]; 521 522 ctxt->_eip = smstate->rip; 523 ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED; 524 525 if (kvm_set_dr(vcpu, 6, smstate->dr6)) 526 return X86EMUL_UNHANDLEABLE; 527 if (kvm_set_dr(vcpu, 7, smstate->dr7)) 528 return X86EMUL_UNHANDLEABLE; 529 530 vcpu->arch.smbase = smstate->smbase; 531 532 if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) 533 return X86EMUL_UNHANDLEABLE; 534 535 rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR); 536 537 dt.size = smstate->idtr.limit; 538 dt.address = smstate->idtr.base; 539 kvm_x86_call(set_idt)(vcpu, &dt); 540 541 rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR); 542 543 dt.size = smstate->gdtr.limit; 544 dt.address = smstate->gdtr.base; 545 kvm_x86_call(set_gdt)(vcpu, &dt); 546 547 r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4); 548 if (r != X86EMUL_CONTINUE) 549 return r; 550 551 rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES); 552 rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS); 553 rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS); 554 rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS); 555 rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS); 556 rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS); 557 558 kvm_x86_call(set_interrupt_shadow)(vcpu, 0); 559 ctxt->interruptibility = (u8)smstate->int_shadow; 560 561 return X86EMUL_CONTINUE; 562 } 563 #endif 564 565 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) 566 { 567 struct kvm_vcpu *vcpu = ctxt->vcpu; 568 unsigned long cr0; 569 union kvm_smram smram; 570 u64 smbase; 571 int ret; 572 573 smbase = vcpu->arch.smbase; 574 575 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram)); 576 if (ret < 0) 577 return X86EMUL_UNHANDLEABLE; 578 579 if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0) 580 kvm_x86_call(set_nmi_mask)(vcpu, false); 581 582 kvm_smm_changed(vcpu, false); 583 584 /* 585 * Get back to real mode, to prepare a safe state in which to load 586 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU 587 * supports long mode. 588 */ 589 #ifdef CONFIG_X86_64 590 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) { 591 struct kvm_segment cs_desc; 592 unsigned long cr4; 593 594 /* Zero CR4.PCIDE before CR0.PG. */ 595 cr4 = kvm_read_cr4(vcpu); 596 if (cr4 & X86_CR4_PCIDE) 597 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE); 598 599 /* A 32-bit code segment is required to clear EFER.LMA. */ 600 memset(&cs_desc, 0, sizeof(cs_desc)); 601 cs_desc.type = 0xb; 602 cs_desc.s = cs_desc.g = cs_desc.present = 1; 603 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS); 604 } 605 #endif 606 607 /* For the 64-bit case, this will clear EFER.LMA. */ 608 cr0 = kvm_read_cr0(vcpu); 609 if (cr0 & X86_CR0_PE) 610 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); 611 612 #ifdef CONFIG_X86_64 613 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) { 614 unsigned long cr4, efer; 615 616 /* Clear CR4.PAE before clearing EFER.LME. */ 617 cr4 = kvm_read_cr4(vcpu); 618 if (cr4 & X86_CR4_PAE) 619 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE); 620 621 /* And finally go back to 32-bit mode. */ 622 efer = 0; 623 kvm_set_msr(vcpu, MSR_EFER, efer); 624 } 625 #endif 626 627 /* 628 * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest 629 * mode should happen _after_ loading state from SMRAM. However, KVM 630 * piggybacks the nested VM-Enter flows (which is wrong for many other 631 * reasons), and so nSVM/nVMX would clobber state that is loaded from 632 * SMRAM and from the VMCS/VMCB. 633 */ 634 if (kvm_x86_call(leave_smm)(vcpu, &smram)) 635 return X86EMUL_UNHANDLEABLE; 636 637 #ifdef CONFIG_X86_64 638 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) 639 ret = rsm_load_state_64(ctxt, &smram.smram64); 640 else 641 #endif 642 ret = rsm_load_state_32(ctxt, &smram.smram32); 643 644 /* 645 * If RSM fails and triggers shutdown, architecturally the shutdown 646 * occurs *before* the transition to guest mode. But due to KVM's 647 * flawed handling of RSM to L2 (see above), the vCPU may already be 648 * in_guest_mode(). Force the vCPU out of guest mode before delivering 649 * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit 650 * that architecturally shouldn't be possible. 651 */ 652 if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu)) 653 kvm_leave_nested(vcpu); 654 return ret; 655 } 656