1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 3 4 #include <linux/kvm_host.h> 5 #include "x86.h" 6 #include "kvm_cache_regs.h" 7 #include "kvm_emulate.h" 8 #include "smm.h" 9 #include "cpuid.h" 10 #include "trace.h" 11 12 #define CHECK_SMRAM32_OFFSET(field, offset) \ 13 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00) 14 15 #define CHECK_SMRAM64_OFFSET(field, offset) \ 16 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00) 17 18 static void check_smram_offsets(void) 19 { 20 /* 32 bit SMRAM image */ 21 CHECK_SMRAM32_OFFSET(reserved1, 0xFE00); 22 CHECK_SMRAM32_OFFSET(smbase, 0xFEF8); 23 CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC); 24 CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00); 25 CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02); 26 CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04); 27 CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08); 28 CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C); 29 CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10); 30 CHECK_SMRAM32_OFFSET(cr4, 0xFF14); 31 CHECK_SMRAM32_OFFSET(reserved2, 0xFF18); 32 CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A); 33 CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B); 34 CHECK_SMRAM32_OFFSET(ds, 0xFF2C); 35 CHECK_SMRAM32_OFFSET(fs, 0xFF38); 36 CHECK_SMRAM32_OFFSET(gs, 0xFF44); 37 CHECK_SMRAM32_OFFSET(idtr, 0xFF50); 38 CHECK_SMRAM32_OFFSET(tr, 0xFF5C); 39 CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C); 40 CHECK_SMRAM32_OFFSET(ldtr, 0xFF78); 41 CHECK_SMRAM32_OFFSET(es, 0xFF84); 42 CHECK_SMRAM32_OFFSET(cs, 0xFF90); 43 CHECK_SMRAM32_OFFSET(ss, 0xFF9C); 44 CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8); 45 CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC); 46 CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0); 47 CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4); 48 CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8); 49 CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC); 50 CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0); 51 CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4); 52 CHECK_SMRAM32_OFFSET(dr7, 0xFFC8); 53 CHECK_SMRAM32_OFFSET(dr6, 0xFFCC); 54 CHECK_SMRAM32_OFFSET(gprs, 0xFFD0); 55 CHECK_SMRAM32_OFFSET(eip, 0xFFF0); 56 CHECK_SMRAM32_OFFSET(eflags, 0xFFF4); 57 CHECK_SMRAM32_OFFSET(cr3, 0xFFF8); 58 CHECK_SMRAM32_OFFSET(cr0, 0xFFFC); 59 60 /* 64 bit SMRAM image */ 61 CHECK_SMRAM64_OFFSET(es, 0xFE00); 62 CHECK_SMRAM64_OFFSET(cs, 0xFE10); 63 CHECK_SMRAM64_OFFSET(ss, 0xFE20); 64 CHECK_SMRAM64_OFFSET(ds, 0xFE30); 65 CHECK_SMRAM64_OFFSET(fs, 0xFE40); 66 CHECK_SMRAM64_OFFSET(gs, 0xFE50); 67 CHECK_SMRAM64_OFFSET(gdtr, 0xFE60); 68 CHECK_SMRAM64_OFFSET(ldtr, 0xFE70); 69 CHECK_SMRAM64_OFFSET(idtr, 0xFE80); 70 CHECK_SMRAM64_OFFSET(tr, 0xFE90); 71 CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0); 72 CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8); 73 CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0); 74 CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8); 75 CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0); 76 CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4); 77 CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8); 78 CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9); 79 CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA); 80 CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB); 81 CHECK_SMRAM64_OFFSET(reserved2, 0xFECC); 82 CHECK_SMRAM64_OFFSET(efer, 0xFED0); 83 CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8); 84 CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0); 85 CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8); 86 CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0); 87 CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC); 88 CHECK_SMRAM64_OFFSET(smbase, 0xFF00); 89 CHECK_SMRAM64_OFFSET(reserved4, 0xFF04); 90 CHECK_SMRAM64_OFFSET(ssp, 0xFF18); 91 CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20); 92 CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28); 93 CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30); 94 CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38); 95 CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40); 96 CHECK_SMRAM64_OFFSET(cr4, 0xFF48); 97 CHECK_SMRAM64_OFFSET(cr3, 0xFF50); 98 CHECK_SMRAM64_OFFSET(cr0, 0xFF58); 99 CHECK_SMRAM64_OFFSET(dr7, 0xFF60); 100 CHECK_SMRAM64_OFFSET(dr6, 0xFF68); 101 CHECK_SMRAM64_OFFSET(rflags, 0xFF70); 102 CHECK_SMRAM64_OFFSET(rip, 0xFF78); 103 CHECK_SMRAM64_OFFSET(gprs, 0xFF80); 104 105 BUILD_BUG_ON(sizeof(union kvm_smram) != 512); 106 } 107 108 #undef CHECK_SMRAM64_OFFSET 109 #undef CHECK_SMRAM32_OFFSET 110 111 112 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) 113 { 114 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm); 115 116 if (entering_smm) { 117 vcpu->arch.hflags |= HF_SMM_MASK; 118 } else { 119 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK); 120 121 /* Process a latched INIT or SMI, if any. */ 122 kvm_make_request(KVM_REQ_EVENT, vcpu); 123 124 /* 125 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band, 126 * on SMM exit we still need to reload them from 127 * guest memory 128 */ 129 vcpu->arch.pdptrs_from_userspace = false; 130 } 131 132 kvm_mmu_reset_context(vcpu); 133 } 134 135 void process_smi(struct kvm_vcpu *vcpu) 136 { 137 vcpu->arch.smi_pending = true; 138 kvm_make_request(KVM_REQ_EVENT, vcpu); 139 } 140 141 static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) 142 { 143 u32 flags = 0; 144 flags |= seg->g << 23; 145 flags |= seg->db << 22; 146 flags |= seg->l << 21; 147 flags |= seg->avl << 20; 148 flags |= seg->present << 15; 149 flags |= seg->dpl << 13; 150 flags |= seg->s << 12; 151 flags |= seg->type << 8; 152 return flags; 153 } 154 155 static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, 156 struct kvm_smm_seg_state_32 *state, 157 u32 *selector, int n) 158 { 159 struct kvm_segment seg; 160 161 kvm_get_segment(vcpu, &seg, n); 162 *selector = seg.selector; 163 state->base = seg.base; 164 state->limit = seg.limit; 165 state->flags = enter_smm_get_segment_flags(&seg); 166 } 167 168 #ifdef CONFIG_X86_64 169 static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, 170 struct kvm_smm_seg_state_64 *state, 171 int n) 172 { 173 struct kvm_segment seg; 174 175 kvm_get_segment(vcpu, &seg, n); 176 state->selector = seg.selector; 177 state->attributes = enter_smm_get_segment_flags(&seg) >> 8; 178 state->limit = seg.limit; 179 state->base = seg.base; 180 } 181 #endif 182 183 static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, 184 struct kvm_smram_state_32 *smram) 185 { 186 struct desc_ptr dt; 187 int i; 188 189 smram->cr0 = kvm_read_cr0(vcpu); 190 smram->cr3 = kvm_read_cr3(vcpu); 191 smram->eflags = kvm_get_rflags(vcpu); 192 smram->eip = kvm_rip_read(vcpu); 193 194 for (i = 0; i < 8; i++) 195 smram->gprs[i] = kvm_register_read_raw(vcpu, i); 196 197 smram->dr6 = (u32)vcpu->arch.dr6; 198 smram->dr7 = (u32)vcpu->arch.dr7; 199 200 enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR); 201 enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR); 202 203 kvm_x86_call(get_gdt)(vcpu, &dt); 204 smram->gdtr.base = dt.address; 205 smram->gdtr.limit = dt.size; 206 207 kvm_x86_call(get_idt)(vcpu, &dt); 208 smram->idtr.base = dt.address; 209 smram->idtr.limit = dt.size; 210 211 enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES); 212 enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS); 213 enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS); 214 215 enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS); 216 enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS); 217 enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS); 218 219 smram->cr4 = kvm_read_cr4(vcpu); 220 smram->smm_revision = 0x00020000; 221 smram->smbase = vcpu->arch.smbase; 222 223 smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu); 224 } 225 226 #ifdef CONFIG_X86_64 227 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, 228 struct kvm_smram_state_64 *smram) 229 { 230 struct desc_ptr dt; 231 int i; 232 233 for (i = 0; i < 16; i++) 234 smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i); 235 236 smram->rip = kvm_rip_read(vcpu); 237 smram->rflags = kvm_get_rflags(vcpu); 238 239 smram->dr6 = vcpu->arch.dr6; 240 smram->dr7 = vcpu->arch.dr7; 241 242 smram->cr0 = kvm_read_cr0(vcpu); 243 smram->cr3 = kvm_read_cr3(vcpu); 244 smram->cr4 = kvm_read_cr4(vcpu); 245 246 smram->smbase = vcpu->arch.smbase; 247 smram->smm_revison = 0x00020064; 248 249 smram->efer = vcpu->arch.efer; 250 251 enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR); 252 253 kvm_x86_call(get_idt)(vcpu, &dt); 254 smram->idtr.limit = dt.size; 255 smram->idtr.base = dt.address; 256 257 enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR); 258 259 kvm_x86_call(get_gdt)(vcpu, &dt); 260 smram->gdtr.limit = dt.size; 261 smram->gdtr.base = dt.address; 262 263 enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES); 264 enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS); 265 enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS); 266 enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); 267 enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); 268 enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); 269 270 smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu); 271 } 272 #endif 273 274 void enter_smm(struct kvm_vcpu *vcpu) 275 { 276 struct kvm_segment cs, ds; 277 struct desc_ptr dt; 278 unsigned long cr0; 279 union kvm_smram smram; 280 281 check_smram_offsets(); 282 283 memset(smram.bytes, 0, sizeof(smram.bytes)); 284 285 #ifdef CONFIG_X86_64 286 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 287 enter_smm_save_state_64(vcpu, &smram.smram64); 288 else 289 #endif 290 enter_smm_save_state_32(vcpu, &smram.smram32); 291 292 /* 293 * Give enter_smm() a chance to make ISA-specific changes to the vCPU 294 * state (e.g. leave guest mode) after we've saved the state into the 295 * SMM state-save area. 296 * 297 * Kill the VM in the unlikely case of failure, because the VM 298 * can be in undefined state in this case. 299 */ 300 if (kvm_x86_call(enter_smm)(vcpu, &smram)) 301 goto error; 302 303 kvm_smm_changed(vcpu, true); 304 305 if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram))) 306 goto error; 307 308 if (kvm_x86_call(get_nmi_mask)(vcpu)) 309 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; 310 else 311 kvm_x86_call(set_nmi_mask)(vcpu, true); 312 313 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); 314 kvm_rip_write(vcpu, 0x8000); 315 316 kvm_x86_call(set_interrupt_shadow)(vcpu, 0); 317 318 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); 319 kvm_x86_call(set_cr0)(vcpu, cr0); 320 321 kvm_x86_call(set_cr4)(vcpu, 0); 322 323 /* Undocumented: IDT limit is set to zero on entry to SMM. */ 324 dt.address = dt.size = 0; 325 kvm_x86_call(set_idt)(vcpu, &dt); 326 327 if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1))) 328 goto error; 329 330 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; 331 cs.base = vcpu->arch.smbase; 332 333 ds.selector = 0; 334 ds.base = 0; 335 336 cs.limit = ds.limit = 0xffffffff; 337 cs.type = ds.type = 0x3; 338 cs.dpl = ds.dpl = 0; 339 cs.db = ds.db = 0; 340 cs.s = ds.s = 1; 341 cs.l = ds.l = 0; 342 cs.g = ds.g = 1; 343 cs.avl = ds.avl = 0; 344 cs.present = ds.present = 1; 345 cs.unusable = ds.unusable = 0; 346 cs.padding = ds.padding = 0; 347 348 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); 349 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS); 350 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES); 351 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS); 352 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); 353 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); 354 355 #ifdef CONFIG_X86_64 356 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 357 if (kvm_x86_call(set_efer)(vcpu, 0)) 358 goto error; 359 #endif 360 361 kvm_update_cpuid_runtime(vcpu); 362 kvm_mmu_reset_context(vcpu); 363 return; 364 error: 365 kvm_vm_dead(vcpu->kvm); 366 } 367 368 static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags) 369 { 370 desc->g = (flags >> 23) & 1; 371 desc->db = (flags >> 22) & 1; 372 desc->l = (flags >> 21) & 1; 373 desc->avl = (flags >> 20) & 1; 374 desc->present = (flags >> 15) & 1; 375 desc->dpl = (flags >> 13) & 3; 376 desc->s = (flags >> 12) & 1; 377 desc->type = (flags >> 8) & 15; 378 379 desc->unusable = !desc->present; 380 desc->padding = 0; 381 } 382 383 static int rsm_load_seg_32(struct kvm_vcpu *vcpu, 384 const struct kvm_smm_seg_state_32 *state, 385 u16 selector, int n) 386 { 387 struct kvm_segment desc; 388 389 desc.selector = selector; 390 desc.base = state->base; 391 desc.limit = state->limit; 392 rsm_set_desc_flags(&desc, state->flags); 393 kvm_set_segment(vcpu, &desc, n); 394 return X86EMUL_CONTINUE; 395 } 396 397 #ifdef CONFIG_X86_64 398 399 static int rsm_load_seg_64(struct kvm_vcpu *vcpu, 400 const struct kvm_smm_seg_state_64 *state, 401 int n) 402 { 403 struct kvm_segment desc; 404 405 desc.selector = state->selector; 406 rsm_set_desc_flags(&desc, state->attributes << 8); 407 desc.limit = state->limit; 408 desc.base = state->base; 409 kvm_set_segment(vcpu, &desc, n); 410 return X86EMUL_CONTINUE; 411 } 412 #endif 413 414 static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu, 415 u64 cr0, u64 cr3, u64 cr4) 416 { 417 int bad; 418 u64 pcid; 419 420 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ 421 pcid = 0; 422 if (cr4 & X86_CR4_PCIDE) { 423 pcid = cr3 & 0xfff; 424 cr3 &= ~0xfff; 425 } 426 427 bad = kvm_set_cr3(vcpu, cr3); 428 if (bad) 429 return X86EMUL_UNHANDLEABLE; 430 431 /* 432 * First enable PAE, long mode needs it before CR0.PG = 1 is set. 433 * Then enable protected mode. However, PCID cannot be enabled 434 * if EFER.LMA=0, so set it separately. 435 */ 436 bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE); 437 if (bad) 438 return X86EMUL_UNHANDLEABLE; 439 440 bad = kvm_set_cr0(vcpu, cr0); 441 if (bad) 442 return X86EMUL_UNHANDLEABLE; 443 444 if (cr4 & X86_CR4_PCIDE) { 445 bad = kvm_set_cr4(vcpu, cr4); 446 if (bad) 447 return X86EMUL_UNHANDLEABLE; 448 if (pcid) { 449 bad = kvm_set_cr3(vcpu, cr3 | pcid); 450 if (bad) 451 return X86EMUL_UNHANDLEABLE; 452 } 453 454 } 455 456 return X86EMUL_CONTINUE; 457 } 458 459 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 460 const struct kvm_smram_state_32 *smstate) 461 { 462 struct kvm_vcpu *vcpu = ctxt->vcpu; 463 struct desc_ptr dt; 464 int i, r; 465 466 ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; 467 ctxt->_eip = smstate->eip; 468 469 for (i = 0; i < 8; i++) 470 *reg_write(ctxt, i) = smstate->gprs[i]; 471 472 if (kvm_set_dr(vcpu, 6, smstate->dr6)) 473 return X86EMUL_UNHANDLEABLE; 474 if (kvm_set_dr(vcpu, 7, smstate->dr7)) 475 return X86EMUL_UNHANDLEABLE; 476 477 rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR); 478 rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR); 479 480 dt.address = smstate->gdtr.base; 481 dt.size = smstate->gdtr.limit; 482 kvm_x86_call(set_gdt)(vcpu, &dt); 483 484 dt.address = smstate->idtr.base; 485 dt.size = smstate->idtr.limit; 486 kvm_x86_call(set_idt)(vcpu, &dt); 487 488 rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES); 489 rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS); 490 rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS); 491 492 rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS); 493 rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS); 494 rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS); 495 496 vcpu->arch.smbase = smstate->smbase; 497 498 r = rsm_enter_protected_mode(vcpu, smstate->cr0, 499 smstate->cr3, smstate->cr4); 500 501 if (r != X86EMUL_CONTINUE) 502 return r; 503 504 kvm_x86_call(set_interrupt_shadow)(vcpu, 0); 505 ctxt->interruptibility = (u8)smstate->int_shadow; 506 507 return r; 508 } 509 510 #ifdef CONFIG_X86_64 511 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, 512 const struct kvm_smram_state_64 *smstate) 513 { 514 struct kvm_vcpu *vcpu = ctxt->vcpu; 515 struct desc_ptr dt; 516 int i, r; 517 518 for (i = 0; i < 16; i++) 519 *reg_write(ctxt, i) = smstate->gprs[15 - i]; 520 521 ctxt->_eip = smstate->rip; 522 ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED; 523 524 if (kvm_set_dr(vcpu, 6, smstate->dr6)) 525 return X86EMUL_UNHANDLEABLE; 526 if (kvm_set_dr(vcpu, 7, smstate->dr7)) 527 return X86EMUL_UNHANDLEABLE; 528 529 vcpu->arch.smbase = smstate->smbase; 530 531 if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) 532 return X86EMUL_UNHANDLEABLE; 533 534 rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR); 535 536 dt.size = smstate->idtr.limit; 537 dt.address = smstate->idtr.base; 538 kvm_x86_call(set_idt)(vcpu, &dt); 539 540 rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR); 541 542 dt.size = smstate->gdtr.limit; 543 dt.address = smstate->gdtr.base; 544 kvm_x86_call(set_gdt)(vcpu, &dt); 545 546 r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4); 547 if (r != X86EMUL_CONTINUE) 548 return r; 549 550 rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES); 551 rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS); 552 rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS); 553 rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS); 554 rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS); 555 rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS); 556 557 kvm_x86_call(set_interrupt_shadow)(vcpu, 0); 558 ctxt->interruptibility = (u8)smstate->int_shadow; 559 560 return X86EMUL_CONTINUE; 561 } 562 #endif 563 564 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) 565 { 566 struct kvm_vcpu *vcpu = ctxt->vcpu; 567 unsigned long cr0; 568 union kvm_smram smram; 569 u64 smbase; 570 int ret; 571 572 smbase = vcpu->arch.smbase; 573 574 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram)); 575 if (ret < 0) 576 return X86EMUL_UNHANDLEABLE; 577 578 if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0) 579 kvm_x86_call(set_nmi_mask)(vcpu, false); 580 581 kvm_smm_changed(vcpu, false); 582 583 /* 584 * Get back to real mode, to prepare a safe state in which to load 585 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU 586 * supports long mode. 587 */ 588 #ifdef CONFIG_X86_64 589 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { 590 struct kvm_segment cs_desc; 591 unsigned long cr4; 592 593 /* Zero CR4.PCIDE before CR0.PG. */ 594 cr4 = kvm_read_cr4(vcpu); 595 if (cr4 & X86_CR4_PCIDE) 596 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE); 597 598 /* A 32-bit code segment is required to clear EFER.LMA. */ 599 memset(&cs_desc, 0, sizeof(cs_desc)); 600 cs_desc.type = 0xb; 601 cs_desc.s = cs_desc.g = cs_desc.present = 1; 602 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS); 603 } 604 #endif 605 606 /* For the 64-bit case, this will clear EFER.LMA. */ 607 cr0 = kvm_read_cr0(vcpu); 608 if (cr0 & X86_CR0_PE) 609 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); 610 611 #ifdef CONFIG_X86_64 612 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { 613 unsigned long cr4, efer; 614 615 /* Clear CR4.PAE before clearing EFER.LME. */ 616 cr4 = kvm_read_cr4(vcpu); 617 if (cr4 & X86_CR4_PAE) 618 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE); 619 620 /* And finally go back to 32-bit mode. */ 621 efer = 0; 622 kvm_set_msr(vcpu, MSR_EFER, efer); 623 } 624 #endif 625 626 /* 627 * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest 628 * mode should happen _after_ loading state from SMRAM. However, KVM 629 * piggybacks the nested VM-Enter flows (which is wrong for many other 630 * reasons), and so nSVM/nVMX would clobber state that is loaded from 631 * SMRAM and from the VMCS/VMCB. 632 */ 633 if (kvm_x86_call(leave_smm)(vcpu, &smram)) 634 return X86EMUL_UNHANDLEABLE; 635 636 #ifdef CONFIG_X86_64 637 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 638 ret = rsm_load_state_64(ctxt, &smram.smram64); 639 else 640 #endif 641 ret = rsm_load_state_32(ctxt, &smram.smram32); 642 643 /* 644 * If RSM fails and triggers shutdown, architecturally the shutdown 645 * occurs *before* the transition to guest mode. But due to KVM's 646 * flawed handling of RSM to L2 (see above), the vCPU may already be 647 * in_guest_mode(). Force the vCPU out of guest mode before delivering 648 * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit 649 * that architecturally shouldn't be possible. 650 */ 651 if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu)) 652 kvm_leave_nested(vcpu); 653 return ret; 654 } 655