1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4 #include <linux/kvm_host.h>
5 #include "x86.h"
6 #include "kvm_cache_regs.h"
7 #include "kvm_emulate.h"
8 #include "smm.h"
9 #include "cpuid.h"
10 #include "trace.h"
11
12 #define CHECK_SMRAM32_OFFSET(field, offset) \
13 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
14
15 #define CHECK_SMRAM64_OFFSET(field, offset) \
16 ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
17
check_smram_offsets(void)18 static void check_smram_offsets(void)
19 {
20 /* 32 bit SMRAM image */
21 CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
22 CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
23 CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
24 CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00);
25 CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02);
26 CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04);
27 CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08);
28 CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C);
29 CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10);
30 CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
31 CHECK_SMRAM32_OFFSET(reserved2, 0xFF18);
32 CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A);
33 CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B);
34 CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
35 CHECK_SMRAM32_OFFSET(fs, 0xFF38);
36 CHECK_SMRAM32_OFFSET(gs, 0xFF44);
37 CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
38 CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
39 CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
40 CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
41 CHECK_SMRAM32_OFFSET(es, 0xFF84);
42 CHECK_SMRAM32_OFFSET(cs, 0xFF90);
43 CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
44 CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
45 CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
46 CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
47 CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
48 CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
49 CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
50 CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
51 CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
52 CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
53 CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
54 CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
55 CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
56 CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
57 CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
58 CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
59
60 /* 64 bit SMRAM image */
61 CHECK_SMRAM64_OFFSET(es, 0xFE00);
62 CHECK_SMRAM64_OFFSET(cs, 0xFE10);
63 CHECK_SMRAM64_OFFSET(ss, 0xFE20);
64 CHECK_SMRAM64_OFFSET(ds, 0xFE30);
65 CHECK_SMRAM64_OFFSET(fs, 0xFE40);
66 CHECK_SMRAM64_OFFSET(gs, 0xFE50);
67 CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
68 CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
69 CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
70 CHECK_SMRAM64_OFFSET(tr, 0xFE90);
71 CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
72 CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
73 CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
74 CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
75 CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
76 CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
77 CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
78 CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
79 CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA);
80 CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB);
81 CHECK_SMRAM64_OFFSET(reserved2, 0xFECC);
82 CHECK_SMRAM64_OFFSET(efer, 0xFED0);
83 CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
84 CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
85 CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
86 CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
87 CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
88 CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
89 CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
90 CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
91 CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
92 CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
93 CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
94 CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
95 CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
96 CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
97 CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
98 CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
99 CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
100 CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
101 CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
102 CHECK_SMRAM64_OFFSET(rip, 0xFF78);
103 CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
104
105 BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
106 }
107
108 #undef CHECK_SMRAM64_OFFSET
109 #undef CHECK_SMRAM32_OFFSET
110
111
kvm_smm_changed(struct kvm_vcpu * vcpu,bool entering_smm)112 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
113 {
114 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
115
116 if (entering_smm) {
117 vcpu->arch.hflags |= HF_SMM_MASK;
118 } else {
119 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
120
121 /* Process a latched INIT or SMI, if any. */
122 kvm_make_request(KVM_REQ_EVENT, vcpu);
123
124 /*
125 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126 * on SMM exit we still need to reload them from
127 * guest memory
128 */
129 vcpu->arch.pdptrs_from_userspace = false;
130 }
131
132 kvm_mmu_reset_context(vcpu);
133 }
134 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_smm_changed);
135
process_smi(struct kvm_vcpu * vcpu)136 void process_smi(struct kvm_vcpu *vcpu)
137 {
138 vcpu->arch.smi_pending = true;
139 kvm_make_request(KVM_REQ_EVENT, vcpu);
140 }
141
enter_smm_get_segment_flags(struct kvm_segment * seg)142 static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
143 {
144 u32 flags = 0;
145 flags |= seg->g << 23;
146 flags |= seg->db << 22;
147 flags |= seg->l << 21;
148 flags |= seg->avl << 20;
149 flags |= seg->present << 15;
150 flags |= seg->dpl << 13;
151 flags |= seg->s << 12;
152 flags |= seg->type << 8;
153 return flags;
154 }
155
enter_smm_save_seg_32(struct kvm_vcpu * vcpu,struct kvm_smm_seg_state_32 * state,u32 * selector,int n)156 static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
157 struct kvm_smm_seg_state_32 *state,
158 u32 *selector, int n)
159 {
160 struct kvm_segment seg;
161
162 kvm_get_segment(vcpu, &seg, n);
163 *selector = seg.selector;
164 state->base = seg.base;
165 state->limit = seg.limit;
166 state->flags = enter_smm_get_segment_flags(&seg);
167 }
168
169 #ifdef CONFIG_X86_64
enter_smm_save_seg_64(struct kvm_vcpu * vcpu,struct kvm_smm_seg_state_64 * state,int n)170 static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
171 struct kvm_smm_seg_state_64 *state,
172 int n)
173 {
174 struct kvm_segment seg;
175
176 kvm_get_segment(vcpu, &seg, n);
177 state->selector = seg.selector;
178 state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
179 state->limit = seg.limit;
180 state->base = seg.base;
181 }
182 #endif
183
enter_smm_save_state_32(struct kvm_vcpu * vcpu,struct kvm_smram_state_32 * smram)184 static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
185 struct kvm_smram_state_32 *smram)
186 {
187 struct desc_ptr dt;
188 int i;
189
190 smram->cr0 = kvm_read_cr0(vcpu);
191 smram->cr3 = kvm_read_cr3(vcpu);
192 smram->eflags = kvm_get_rflags(vcpu);
193 smram->eip = kvm_rip_read(vcpu);
194
195 for (i = 0; i < 8; i++)
196 smram->gprs[i] = kvm_register_read_raw(vcpu, i);
197
198 smram->dr6 = (u32)vcpu->arch.dr6;
199 smram->dr7 = (u32)vcpu->arch.dr7;
200
201 enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
202 enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
203
204 kvm_x86_call(get_gdt)(vcpu, &dt);
205 smram->gdtr.base = dt.address;
206 smram->gdtr.limit = dt.size;
207
208 kvm_x86_call(get_idt)(vcpu, &dt);
209 smram->idtr.base = dt.address;
210 smram->idtr.limit = dt.size;
211
212 enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
213 enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
214 enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
215
216 enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
217 enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
218 enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
219
220 smram->cr4 = kvm_read_cr4(vcpu);
221 smram->smm_revision = 0x00020000;
222 smram->smbase = vcpu->arch.smbase;
223
224 smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
225 }
226
227 #ifdef CONFIG_X86_64
enter_smm_save_state_64(struct kvm_vcpu * vcpu,struct kvm_smram_state_64 * smram)228 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
229 struct kvm_smram_state_64 *smram)
230 {
231 struct desc_ptr dt;
232 int i;
233
234 for (i = 0; i < 16; i++)
235 smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
236
237 smram->rip = kvm_rip_read(vcpu);
238 smram->rflags = kvm_get_rflags(vcpu);
239
240 smram->dr6 = vcpu->arch.dr6;
241 smram->dr7 = vcpu->arch.dr7;
242
243 smram->cr0 = kvm_read_cr0(vcpu);
244 smram->cr3 = kvm_read_cr3(vcpu);
245 smram->cr4 = kvm_read_cr4(vcpu);
246
247 smram->smbase = vcpu->arch.smbase;
248 smram->smm_revison = 0x00020064;
249
250 smram->efer = vcpu->arch.efer;
251
252 enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
253
254 kvm_x86_call(get_idt)(vcpu, &dt);
255 smram->idtr.limit = dt.size;
256 smram->idtr.base = dt.address;
257
258 enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
259
260 kvm_x86_call(get_gdt)(vcpu, &dt);
261 smram->gdtr.limit = dt.size;
262 smram->gdtr.base = dt.address;
263
264 enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
265 enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
266 enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
267 enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
268 enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
269 enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
270
271 smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
272
273 if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
274 kvm_msr_read(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, &smram->ssp))
275 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
276 }
277 #endif
278
enter_smm(struct kvm_vcpu * vcpu)279 void enter_smm(struct kvm_vcpu *vcpu)
280 {
281 struct kvm_segment cs, ds;
282 struct desc_ptr dt;
283 unsigned long cr0;
284 union kvm_smram smram;
285
286 check_smram_offsets();
287
288 memset(smram.bytes, 0, sizeof(smram.bytes));
289
290 #ifdef CONFIG_X86_64
291 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
292 enter_smm_save_state_64(vcpu, &smram.smram64);
293 else
294 #endif
295 enter_smm_save_state_32(vcpu, &smram.smram32);
296
297 /*
298 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
299 * state (e.g. leave guest mode) after we've saved the state into the
300 * SMM state-save area.
301 *
302 * Kill the VM in the unlikely case of failure, because the VM
303 * can be in undefined state in this case.
304 */
305 if (kvm_x86_call(enter_smm)(vcpu, &smram))
306 goto error;
307
308 kvm_smm_changed(vcpu, true);
309
310 if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
311 goto error;
312
313 if (kvm_x86_call(get_nmi_mask)(vcpu))
314 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
315 else
316 kvm_x86_call(set_nmi_mask)(vcpu, true);
317
318 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
319 kvm_rip_write(vcpu, 0x8000);
320
321 kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
322
323 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
324 kvm_x86_call(set_cr0)(vcpu, cr0);
325
326 kvm_x86_call(set_cr4)(vcpu, 0);
327
328 /* Undocumented: IDT limit is set to zero on entry to SMM. */
329 dt.address = dt.size = 0;
330 kvm_x86_call(set_idt)(vcpu, &dt);
331
332 if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
333 goto error;
334
335 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
336 cs.base = vcpu->arch.smbase;
337
338 ds.selector = 0;
339 ds.base = 0;
340
341 cs.limit = ds.limit = 0xffffffff;
342 cs.type = ds.type = 0x3;
343 cs.dpl = ds.dpl = 0;
344 cs.db = ds.db = 0;
345 cs.s = ds.s = 1;
346 cs.l = ds.l = 0;
347 cs.g = ds.g = 1;
348 cs.avl = ds.avl = 0;
349 cs.present = ds.present = 1;
350 cs.unusable = ds.unusable = 0;
351 cs.padding = ds.padding = 0;
352
353 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
354 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
355 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
356 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
357 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
358 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
359
360 #ifdef CONFIG_X86_64
361 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
362 if (kvm_x86_call(set_efer)(vcpu, 0))
363 goto error;
364 #endif
365
366 vcpu->arch.cpuid_dynamic_bits_dirty = true;
367 kvm_mmu_reset_context(vcpu);
368 return;
369 error:
370 kvm_vm_dead(vcpu->kvm);
371 }
372
rsm_set_desc_flags(struct kvm_segment * desc,u32 flags)373 static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
374 {
375 desc->g = (flags >> 23) & 1;
376 desc->db = (flags >> 22) & 1;
377 desc->l = (flags >> 21) & 1;
378 desc->avl = (flags >> 20) & 1;
379 desc->present = (flags >> 15) & 1;
380 desc->dpl = (flags >> 13) & 3;
381 desc->s = (flags >> 12) & 1;
382 desc->type = (flags >> 8) & 15;
383
384 desc->unusable = !desc->present;
385 desc->padding = 0;
386 }
387
rsm_load_seg_32(struct kvm_vcpu * vcpu,const struct kvm_smm_seg_state_32 * state,u16 selector,int n)388 static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
389 const struct kvm_smm_seg_state_32 *state,
390 u16 selector, int n)
391 {
392 struct kvm_segment desc;
393
394 desc.selector = selector;
395 desc.base = state->base;
396 desc.limit = state->limit;
397 rsm_set_desc_flags(&desc, state->flags);
398 kvm_set_segment(vcpu, &desc, n);
399 return X86EMUL_CONTINUE;
400 }
401
402 #ifdef CONFIG_X86_64
403
rsm_load_seg_64(struct kvm_vcpu * vcpu,const struct kvm_smm_seg_state_64 * state,int n)404 static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
405 const struct kvm_smm_seg_state_64 *state,
406 int n)
407 {
408 struct kvm_segment desc;
409
410 desc.selector = state->selector;
411 rsm_set_desc_flags(&desc, state->attributes << 8);
412 desc.limit = state->limit;
413 desc.base = state->base;
414 kvm_set_segment(vcpu, &desc, n);
415 return X86EMUL_CONTINUE;
416 }
417 #endif
418
rsm_enter_protected_mode(struct kvm_vcpu * vcpu,u64 cr0,u64 cr3,u64 cr4)419 static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
420 u64 cr0, u64 cr3, u64 cr4)
421 {
422 int bad;
423 u64 pcid;
424
425 /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
426 pcid = 0;
427 if (cr4 & X86_CR4_PCIDE) {
428 pcid = cr3 & 0xfff;
429 cr3 &= ~0xfff;
430 }
431
432 bad = kvm_set_cr3(vcpu, cr3);
433 if (bad)
434 return X86EMUL_UNHANDLEABLE;
435
436 /*
437 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
438 * Then enable protected mode. However, PCID cannot be enabled
439 * if EFER.LMA=0, so set it separately.
440 */
441 bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
442 if (bad)
443 return X86EMUL_UNHANDLEABLE;
444
445 bad = kvm_set_cr0(vcpu, cr0);
446 if (bad)
447 return X86EMUL_UNHANDLEABLE;
448
449 if (cr4 & X86_CR4_PCIDE) {
450 bad = kvm_set_cr4(vcpu, cr4);
451 if (bad)
452 return X86EMUL_UNHANDLEABLE;
453 if (pcid) {
454 bad = kvm_set_cr3(vcpu, cr3 | pcid);
455 if (bad)
456 return X86EMUL_UNHANDLEABLE;
457 }
458
459 }
460
461 return X86EMUL_CONTINUE;
462 }
463
rsm_load_state_32(struct x86_emulate_ctxt * ctxt,const struct kvm_smram_state_32 * smstate)464 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
465 const struct kvm_smram_state_32 *smstate)
466 {
467 struct kvm_vcpu *vcpu = ctxt->vcpu;
468 struct desc_ptr dt;
469 int i, r;
470
471 ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
472 ctxt->_eip = smstate->eip;
473
474 for (i = 0; i < 8; i++)
475 *reg_write(ctxt, i) = smstate->gprs[i];
476
477 if (kvm_set_dr(vcpu, 6, smstate->dr6))
478 return X86EMUL_UNHANDLEABLE;
479 if (kvm_set_dr(vcpu, 7, smstate->dr7))
480 return X86EMUL_UNHANDLEABLE;
481
482 rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
483 rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
484
485 dt.address = smstate->gdtr.base;
486 dt.size = smstate->gdtr.limit;
487 kvm_x86_call(set_gdt)(vcpu, &dt);
488
489 dt.address = smstate->idtr.base;
490 dt.size = smstate->idtr.limit;
491 kvm_x86_call(set_idt)(vcpu, &dt);
492
493 rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
494 rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
495 rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
496
497 rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
498 rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
499 rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
500
501 vcpu->arch.smbase = smstate->smbase;
502
503 r = rsm_enter_protected_mode(vcpu, smstate->cr0,
504 smstate->cr3, smstate->cr4);
505
506 if (r != X86EMUL_CONTINUE)
507 return r;
508
509 kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
510 ctxt->interruptibility = (u8)smstate->int_shadow;
511
512 return r;
513 }
514
515 #ifdef CONFIG_X86_64
rsm_load_state_64(struct x86_emulate_ctxt * ctxt,const struct kvm_smram_state_64 * smstate)516 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
517 const struct kvm_smram_state_64 *smstate)
518 {
519 struct kvm_vcpu *vcpu = ctxt->vcpu;
520 struct desc_ptr dt;
521 int i, r;
522
523 for (i = 0; i < 16; i++)
524 *reg_write(ctxt, i) = smstate->gprs[15 - i];
525
526 ctxt->_eip = smstate->rip;
527 ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
528
529 if (kvm_set_dr(vcpu, 6, smstate->dr6))
530 return X86EMUL_UNHANDLEABLE;
531 if (kvm_set_dr(vcpu, 7, smstate->dr7))
532 return X86EMUL_UNHANDLEABLE;
533
534 vcpu->arch.smbase = smstate->smbase;
535
536 if (__kvm_emulate_msr_write(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
537 return X86EMUL_UNHANDLEABLE;
538
539 rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
540
541 dt.size = smstate->idtr.limit;
542 dt.address = smstate->idtr.base;
543 kvm_x86_call(set_idt)(vcpu, &dt);
544
545 rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
546
547 dt.size = smstate->gdtr.limit;
548 dt.address = smstate->gdtr.base;
549 kvm_x86_call(set_gdt)(vcpu, &dt);
550
551 r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
552 if (r != X86EMUL_CONTINUE)
553 return r;
554
555 rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
556 rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
557 rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
558 rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
559 rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
560 rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
561
562 kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
563 ctxt->interruptibility = (u8)smstate->int_shadow;
564
565 if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
566 kvm_msr_write(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, smstate->ssp))
567 return X86EMUL_UNHANDLEABLE;
568
569 return X86EMUL_CONTINUE;
570 }
571 #endif
572
emulator_leave_smm(struct x86_emulate_ctxt * ctxt)573 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
574 {
575 struct kvm_vcpu *vcpu = ctxt->vcpu;
576 unsigned long cr0;
577 union kvm_smram smram;
578 u64 smbase;
579 int ret;
580
581 smbase = vcpu->arch.smbase;
582
583 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
584 if (ret < 0)
585 return X86EMUL_UNHANDLEABLE;
586
587 if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
588 kvm_x86_call(set_nmi_mask)(vcpu, false);
589
590 kvm_smm_changed(vcpu, false);
591
592 /*
593 * Get back to real mode, to prepare a safe state in which to load
594 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
595 * supports long mode.
596 */
597 #ifdef CONFIG_X86_64
598 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
599 struct kvm_segment cs_desc;
600 unsigned long cr4;
601
602 /* Zero CR4.PCIDE before CR0.PG. */
603 cr4 = kvm_read_cr4(vcpu);
604 if (cr4 & X86_CR4_PCIDE)
605 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
606
607 /* A 32-bit code segment is required to clear EFER.LMA. */
608 memset(&cs_desc, 0, sizeof(cs_desc));
609 cs_desc.type = 0xb;
610 cs_desc.s = cs_desc.g = cs_desc.present = 1;
611 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
612 }
613 #endif
614
615 /* For the 64-bit case, this will clear EFER.LMA. */
616 cr0 = kvm_read_cr0(vcpu);
617 if (cr0 & X86_CR0_PE)
618 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
619
620 #ifdef CONFIG_X86_64
621 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
622 unsigned long cr4, efer;
623
624 /* Clear CR4.PAE before clearing EFER.LME. */
625 cr4 = kvm_read_cr4(vcpu);
626 if (cr4 & X86_CR4_PAE)
627 kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
628
629 /* And finally go back to 32-bit mode. */
630 efer = 0;
631 __kvm_emulate_msr_write(vcpu, MSR_EFER, efer);
632 }
633 #endif
634
635 /*
636 * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
637 * mode should happen _after_ loading state from SMRAM. However, KVM
638 * piggybacks the nested VM-Enter flows (which is wrong for many other
639 * reasons), and so nSVM/nVMX would clobber state that is loaded from
640 * SMRAM and from the VMCS/VMCB.
641 */
642 if (kvm_x86_call(leave_smm)(vcpu, &smram))
643 return X86EMUL_UNHANDLEABLE;
644
645 #ifdef CONFIG_X86_64
646 if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
647 ret = rsm_load_state_64(ctxt, &smram.smram64);
648 else
649 #endif
650 ret = rsm_load_state_32(ctxt, &smram.smram32);
651
652 /*
653 * If RSM fails and triggers shutdown, architecturally the shutdown
654 * occurs *before* the transition to guest mode. But due to KVM's
655 * flawed handling of RSM to L2 (see above), the vCPU may already be
656 * in_guest_mode(). Force the vCPU out of guest mode before delivering
657 * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
658 * that architecturally shouldn't be possible.
659 */
660 if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
661 kvm_leave_nested(vcpu);
662 return ret;
663 }
664