xref: /linux/arch/x86/kvm/smm.c (revision 256e3417065b2721f77bcd37331796b59483ef3b)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3 
4 #include <linux/kvm_host.h>
5 #include "x86.h"
6 #include "kvm_cache_regs.h"
7 #include "kvm_emulate.h"
8 #include "smm.h"
9 #include "cpuid.h"
10 #include "trace.h"
11 
12 #define CHECK_SMRAM32_OFFSET(field, offset) \
13 	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
14 
15 #define CHECK_SMRAM64_OFFSET(field, offset) \
16 	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
17 
check_smram_offsets(void)18 static void check_smram_offsets(void)
19 {
20 	/* 32 bit SMRAM image */
21 	CHECK_SMRAM32_OFFSET(reserved1,			0xFE00);
22 	CHECK_SMRAM32_OFFSET(smbase,			0xFEF8);
23 	CHECK_SMRAM32_OFFSET(smm_revision,		0xFEFC);
24 	CHECK_SMRAM32_OFFSET(io_inst_restart,		0xFF00);
25 	CHECK_SMRAM32_OFFSET(auto_hlt_restart,		0xFF02);
26 	CHECK_SMRAM32_OFFSET(io_restart_rdi,		0xFF04);
27 	CHECK_SMRAM32_OFFSET(io_restart_rcx,		0xFF08);
28 	CHECK_SMRAM32_OFFSET(io_restart_rsi,		0xFF0C);
29 	CHECK_SMRAM32_OFFSET(io_restart_rip,		0xFF10);
30 	CHECK_SMRAM32_OFFSET(cr4,			0xFF14);
31 	CHECK_SMRAM32_OFFSET(reserved2,			0xFF18);
32 	CHECK_SMRAM32_OFFSET(int_shadow,		0xFF1A);
33 	CHECK_SMRAM32_OFFSET(reserved3,			0xFF1B);
34 	CHECK_SMRAM32_OFFSET(ds,			0xFF2C);
35 	CHECK_SMRAM32_OFFSET(fs,			0xFF38);
36 	CHECK_SMRAM32_OFFSET(gs,			0xFF44);
37 	CHECK_SMRAM32_OFFSET(idtr,			0xFF50);
38 	CHECK_SMRAM32_OFFSET(tr,			0xFF5C);
39 	CHECK_SMRAM32_OFFSET(gdtr,			0xFF6C);
40 	CHECK_SMRAM32_OFFSET(ldtr,			0xFF78);
41 	CHECK_SMRAM32_OFFSET(es,			0xFF84);
42 	CHECK_SMRAM32_OFFSET(cs,			0xFF90);
43 	CHECK_SMRAM32_OFFSET(ss,			0xFF9C);
44 	CHECK_SMRAM32_OFFSET(es_sel,			0xFFA8);
45 	CHECK_SMRAM32_OFFSET(cs_sel,			0xFFAC);
46 	CHECK_SMRAM32_OFFSET(ss_sel,			0xFFB0);
47 	CHECK_SMRAM32_OFFSET(ds_sel,			0xFFB4);
48 	CHECK_SMRAM32_OFFSET(fs_sel,			0xFFB8);
49 	CHECK_SMRAM32_OFFSET(gs_sel,			0xFFBC);
50 	CHECK_SMRAM32_OFFSET(ldtr_sel,			0xFFC0);
51 	CHECK_SMRAM32_OFFSET(tr_sel,			0xFFC4);
52 	CHECK_SMRAM32_OFFSET(dr7,			0xFFC8);
53 	CHECK_SMRAM32_OFFSET(dr6,			0xFFCC);
54 	CHECK_SMRAM32_OFFSET(gprs,			0xFFD0);
55 	CHECK_SMRAM32_OFFSET(eip,			0xFFF0);
56 	CHECK_SMRAM32_OFFSET(eflags,			0xFFF4);
57 	CHECK_SMRAM32_OFFSET(cr3,			0xFFF8);
58 	CHECK_SMRAM32_OFFSET(cr0,			0xFFFC);
59 
60 	/* 64 bit SMRAM image */
61 	CHECK_SMRAM64_OFFSET(es,			0xFE00);
62 	CHECK_SMRAM64_OFFSET(cs,			0xFE10);
63 	CHECK_SMRAM64_OFFSET(ss,			0xFE20);
64 	CHECK_SMRAM64_OFFSET(ds,			0xFE30);
65 	CHECK_SMRAM64_OFFSET(fs,			0xFE40);
66 	CHECK_SMRAM64_OFFSET(gs,			0xFE50);
67 	CHECK_SMRAM64_OFFSET(gdtr,			0xFE60);
68 	CHECK_SMRAM64_OFFSET(ldtr,			0xFE70);
69 	CHECK_SMRAM64_OFFSET(idtr,			0xFE80);
70 	CHECK_SMRAM64_OFFSET(tr,			0xFE90);
71 	CHECK_SMRAM64_OFFSET(io_restart_rip,		0xFEA0);
72 	CHECK_SMRAM64_OFFSET(io_restart_rcx,		0xFEA8);
73 	CHECK_SMRAM64_OFFSET(io_restart_rsi,		0xFEB0);
74 	CHECK_SMRAM64_OFFSET(io_restart_rdi,		0xFEB8);
75 	CHECK_SMRAM64_OFFSET(io_restart_dword,		0xFEC0);
76 	CHECK_SMRAM64_OFFSET(reserved1,			0xFEC4);
77 	CHECK_SMRAM64_OFFSET(io_inst_restart,		0xFEC8);
78 	CHECK_SMRAM64_OFFSET(auto_hlt_restart,		0xFEC9);
79 	CHECK_SMRAM64_OFFSET(amd_nmi_mask,		0xFECA);
80 	CHECK_SMRAM64_OFFSET(int_shadow,		0xFECB);
81 	CHECK_SMRAM64_OFFSET(reserved2,			0xFECC);
82 	CHECK_SMRAM64_OFFSET(efer,			0xFED0);
83 	CHECK_SMRAM64_OFFSET(svm_guest_flag,		0xFED8);
84 	CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,	0xFEE0);
85 	CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,	0xFEE8);
86 	CHECK_SMRAM64_OFFSET(reserved3,			0xFEF0);
87 	CHECK_SMRAM64_OFFSET(smm_revison,		0xFEFC);
88 	CHECK_SMRAM64_OFFSET(smbase,			0xFF00);
89 	CHECK_SMRAM64_OFFSET(reserved4,			0xFF04);
90 	CHECK_SMRAM64_OFFSET(ssp,			0xFF18);
91 	CHECK_SMRAM64_OFFSET(svm_guest_pat,		0xFF20);
92 	CHECK_SMRAM64_OFFSET(svm_host_efer,		0xFF28);
93 	CHECK_SMRAM64_OFFSET(svm_host_cr4,		0xFF30);
94 	CHECK_SMRAM64_OFFSET(svm_host_cr3,		0xFF38);
95 	CHECK_SMRAM64_OFFSET(svm_host_cr0,		0xFF40);
96 	CHECK_SMRAM64_OFFSET(cr4,			0xFF48);
97 	CHECK_SMRAM64_OFFSET(cr3,			0xFF50);
98 	CHECK_SMRAM64_OFFSET(cr0,			0xFF58);
99 	CHECK_SMRAM64_OFFSET(dr7,			0xFF60);
100 	CHECK_SMRAM64_OFFSET(dr6,			0xFF68);
101 	CHECK_SMRAM64_OFFSET(rflags,			0xFF70);
102 	CHECK_SMRAM64_OFFSET(rip,			0xFF78);
103 	CHECK_SMRAM64_OFFSET(gprs,			0xFF80);
104 
105 	BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
106 }
107 
108 #undef CHECK_SMRAM64_OFFSET
109 #undef CHECK_SMRAM32_OFFSET
110 
111 
kvm_smm_changed(struct kvm_vcpu * vcpu,bool entering_smm)112 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
113 {
114 	trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
115 
116 	if (entering_smm) {
117 		vcpu->arch.hflags |= HF_SMM_MASK;
118 	} else {
119 		vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
120 
121 		/* Process a latched INIT or SMI, if any.  */
122 		kvm_make_request(KVM_REQ_EVENT, vcpu);
123 
124 		/*
125 		 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126 		 * on SMM exit we still need to reload them from
127 		 * guest memory
128 		 */
129 		vcpu->arch.pdptrs_from_userspace = false;
130 	}
131 
132 	kvm_mmu_reset_context(vcpu);
133 }
134 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_smm_changed);
135 
process_smi(struct kvm_vcpu * vcpu)136 void process_smi(struct kvm_vcpu *vcpu)
137 {
138 	vcpu->arch.smi_pending = true;
139 	kvm_make_request(KVM_REQ_EVENT, vcpu);
140 }
141 
enter_smm_get_segment_flags(struct kvm_segment * seg)142 static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
143 {
144 	u32 flags = 0;
145 	flags |= seg->g       << 23;
146 	flags |= seg->db      << 22;
147 	flags |= seg->l       << 21;
148 	flags |= seg->avl     << 20;
149 	flags |= seg->present << 15;
150 	flags |= seg->dpl     << 13;
151 	flags |= seg->s       << 12;
152 	flags |= seg->type    << 8;
153 	return flags;
154 }
155 
enter_smm_save_seg_32(struct kvm_vcpu * vcpu,struct kvm_smm_seg_state_32 * state,u32 * selector,int n)156 static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
157 				  struct kvm_smm_seg_state_32 *state,
158 				  u32 *selector, int n)
159 {
160 	struct kvm_segment seg;
161 
162 	kvm_get_segment(vcpu, &seg, n);
163 	*selector = seg.selector;
164 	state->base = seg.base;
165 	state->limit = seg.limit;
166 	state->flags = enter_smm_get_segment_flags(&seg);
167 }
168 
169 #ifdef CONFIG_X86_64
enter_smm_save_seg_64(struct kvm_vcpu * vcpu,struct kvm_smm_seg_state_64 * state,int n)170 static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
171 				  struct kvm_smm_seg_state_64 *state,
172 				  int n)
173 {
174 	struct kvm_segment seg;
175 
176 	kvm_get_segment(vcpu, &seg, n);
177 	state->selector = seg.selector;
178 	state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
179 	state->limit = seg.limit;
180 	state->base = seg.base;
181 }
182 #endif
183 
enter_smm_save_state_32(struct kvm_vcpu * vcpu,struct kvm_smram_state_32 * smram)184 static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
185 				    struct kvm_smram_state_32 *smram)
186 {
187 	struct desc_ptr dt;
188 	int i;
189 
190 	smram->cr0     = kvm_read_cr0(vcpu);
191 	smram->cr3     = kvm_read_cr3(vcpu);
192 	smram->eflags  = kvm_get_rflags(vcpu);
193 	smram->eip     = kvm_rip_read(vcpu);
194 
195 	for (i = 0; i < 8; i++)
196 		smram->gprs[i] = kvm_register_read_raw(vcpu, i);
197 
198 	smram->dr6     = (u32)vcpu->arch.dr6;
199 	smram->dr7     = (u32)vcpu->arch.dr7;
200 
201 	enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
202 	enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
203 
204 	kvm_x86_call(get_gdt)(vcpu, &dt);
205 	smram->gdtr.base = dt.address;
206 	smram->gdtr.limit = dt.size;
207 
208 	kvm_x86_call(get_idt)(vcpu, &dt);
209 	smram->idtr.base = dt.address;
210 	smram->idtr.limit = dt.size;
211 
212 	enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
213 	enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
214 	enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
215 
216 	enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
217 	enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
218 	enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
219 
220 	smram->cr4 = kvm_read_cr4(vcpu);
221 	smram->smm_revision = 0x00020000;
222 	smram->smbase = vcpu->arch.smbase;
223 
224 	smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
225 }
226 
227 #ifdef CONFIG_X86_64
enter_smm_save_state_64(struct kvm_vcpu * vcpu,struct kvm_smram_state_64 * smram)228 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
229 				    struct kvm_smram_state_64 *smram)
230 {
231 	struct desc_ptr dt;
232 	int i;
233 
234 	for (i = 0; i < 16; i++)
235 		smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
236 
237 	smram->rip    = kvm_rip_read(vcpu);
238 	smram->rflags = kvm_get_rflags(vcpu);
239 
240 	smram->dr6 = vcpu->arch.dr6;
241 	smram->dr7 = vcpu->arch.dr7;
242 
243 	smram->cr0 = kvm_read_cr0(vcpu);
244 	smram->cr3 = kvm_read_cr3(vcpu);
245 	smram->cr4 = kvm_read_cr4(vcpu);
246 
247 	smram->smbase = vcpu->arch.smbase;
248 	smram->smm_revison = 0x00020064;
249 
250 	smram->efer = vcpu->arch.efer;
251 
252 	enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
253 
254 	kvm_x86_call(get_idt)(vcpu, &dt);
255 	smram->idtr.limit = dt.size;
256 	smram->idtr.base = dt.address;
257 
258 	enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
259 
260 	kvm_x86_call(get_gdt)(vcpu, &dt);
261 	smram->gdtr.limit = dt.size;
262 	smram->gdtr.base = dt.address;
263 
264 	enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
265 	enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
266 	enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
267 	enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
268 	enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
269 	enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
270 
271 	smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
272 
273 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
274 	    kvm_msr_read(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, &smram->ssp))
275 		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
276 }
277 #endif
278 
enter_smm(struct kvm_vcpu * vcpu)279 void enter_smm(struct kvm_vcpu *vcpu)
280 {
281 	struct kvm_segment cs, ds;
282 	struct desc_ptr dt;
283 	unsigned long cr0;
284 	union kvm_smram smram;
285 
286 	check_smram_offsets();
287 
288 	memset(smram.bytes, 0, sizeof(smram.bytes));
289 
290 #ifdef CONFIG_X86_64
291 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
292 		enter_smm_save_state_64(vcpu, &smram.smram64);
293 	else
294 #endif
295 		enter_smm_save_state_32(vcpu, &smram.smram32);
296 
297 	/*
298 	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
299 	 * state (e.g. leave guest mode) after we've saved the state into the
300 	 * SMM state-save area.
301 	 *
302 	 * Kill the VM in the unlikely case of failure, because the VM
303 	 * can be in undefined state in this case.
304 	 */
305 	if (kvm_x86_call(enter_smm)(vcpu, &smram))
306 		goto error;
307 
308 	kvm_smm_changed(vcpu, true);
309 
310 	if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
311 		goto error;
312 
313 	if (kvm_x86_call(get_nmi_mask)(vcpu))
314 		vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
315 	else
316 		kvm_x86_call(set_nmi_mask)(vcpu, true);
317 
318 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
319 	kvm_rip_write(vcpu, 0x8000);
320 
321 	kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
322 
323 	cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
324 	kvm_x86_call(set_cr0)(vcpu, cr0);
325 
326 	kvm_x86_call(set_cr4)(vcpu, 0);
327 
328 	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
329 	dt.address = dt.size = 0;
330 	kvm_x86_call(set_idt)(vcpu, &dt);
331 
332 	if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
333 		goto error;
334 
335 	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
336 	cs.base = vcpu->arch.smbase;
337 
338 	ds.selector = 0;
339 	ds.base = 0;
340 
341 	cs.limit    = ds.limit = 0xffffffff;
342 	cs.type     = ds.type = 0x3;
343 	cs.dpl      = ds.dpl = 0;
344 	cs.db       = ds.db = 0;
345 	cs.s        = ds.s = 1;
346 	cs.l        = ds.l = 0;
347 	cs.g        = ds.g = 1;
348 	cs.avl      = ds.avl = 0;
349 	cs.present  = ds.present = 1;
350 	cs.unusable = ds.unusable = 0;
351 	cs.padding  = ds.padding = 0;
352 
353 	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
354 	kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
355 	kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
356 	kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
357 	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
358 	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
359 
360 #ifdef CONFIG_X86_64
361 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
362 		if (kvm_x86_call(set_efer)(vcpu, 0))
363 			goto error;
364 #endif
365 
366 	vcpu->arch.cpuid_dynamic_bits_dirty = true;
367 	kvm_mmu_reset_context(vcpu);
368 	return;
369 error:
370 	kvm_vm_dead(vcpu->kvm);
371 }
372 
rsm_set_desc_flags(struct kvm_segment * desc,u32 flags)373 static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
374 {
375 	desc->g    = (flags >> 23) & 1;
376 	desc->db   = (flags >> 22) & 1;
377 	desc->l    = (flags >> 21) & 1;
378 	desc->avl  = (flags >> 20) & 1;
379 	desc->present = (flags >> 15) & 1;
380 	desc->dpl  = (flags >> 13) & 3;
381 	desc->s    = (flags >> 12) & 1;
382 	desc->type = (flags >>  8) & 15;
383 
384 	desc->unusable = !desc->present;
385 	desc->padding = 0;
386 }
387 
rsm_load_seg_32(struct kvm_vcpu * vcpu,const struct kvm_smm_seg_state_32 * state,u16 selector,int n)388 static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
389 			   const struct kvm_smm_seg_state_32 *state,
390 			   u16 selector, int n)
391 {
392 	struct kvm_segment desc;
393 
394 	desc.selector =           selector;
395 	desc.base =               state->base;
396 	desc.limit =              state->limit;
397 	rsm_set_desc_flags(&desc, state->flags);
398 	kvm_set_segment(vcpu, &desc, n);
399 	return X86EMUL_CONTINUE;
400 }
401 
402 #ifdef CONFIG_X86_64
403 
rsm_load_seg_64(struct kvm_vcpu * vcpu,const struct kvm_smm_seg_state_64 * state,int n)404 static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
405 			   const struct kvm_smm_seg_state_64 *state,
406 			   int n)
407 {
408 	struct kvm_segment desc;
409 
410 	desc.selector =           state->selector;
411 	rsm_set_desc_flags(&desc, state->attributes << 8);
412 	desc.limit =              state->limit;
413 	desc.base =               state->base;
414 	kvm_set_segment(vcpu, &desc, n);
415 	return X86EMUL_CONTINUE;
416 }
417 #endif
418 
rsm_enter_protected_mode(struct kvm_vcpu * vcpu,u64 cr0,u64 cr3,u64 cr4)419 static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
420 				    u64 cr0, u64 cr3, u64 cr4)
421 {
422 	int bad;
423 	u64 pcid;
424 
425 	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
426 	pcid = 0;
427 	if (cr4 & X86_CR4_PCIDE) {
428 		pcid = cr3 & 0xfff;
429 		cr3 &= ~0xfff;
430 	}
431 
432 	bad = kvm_set_cr3(vcpu, cr3);
433 	if (bad)
434 		return X86EMUL_UNHANDLEABLE;
435 
436 	/*
437 	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
438 	 * Then enable protected mode.	However, PCID cannot be enabled
439 	 * if EFER.LMA=0, so set it separately.
440 	 */
441 	bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
442 	if (bad)
443 		return X86EMUL_UNHANDLEABLE;
444 
445 	bad = kvm_set_cr0(vcpu, cr0);
446 	if (bad)
447 		return X86EMUL_UNHANDLEABLE;
448 
449 	if (cr4 & X86_CR4_PCIDE) {
450 		bad = kvm_set_cr4(vcpu, cr4);
451 		if (bad)
452 			return X86EMUL_UNHANDLEABLE;
453 		if (pcid) {
454 			bad = kvm_set_cr3(vcpu, cr3 | pcid);
455 			if (bad)
456 				return X86EMUL_UNHANDLEABLE;
457 		}
458 
459 	}
460 
461 	return X86EMUL_CONTINUE;
462 }
463 
rsm_load_state_32(struct x86_emulate_ctxt * ctxt,const struct kvm_smram_state_32 * smstate)464 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
465 			     const struct kvm_smram_state_32 *smstate)
466 {
467 	struct kvm_vcpu *vcpu = ctxt->vcpu;
468 	struct desc_ptr dt;
469 	int i, r;
470 
471 	ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
472 	ctxt->_eip =  smstate->eip;
473 
474 	for (i = 0; i < 8; i++)
475 		*reg_write(ctxt, i) = smstate->gprs[i];
476 
477 	if (kvm_set_dr(vcpu, 6, smstate->dr6))
478 		return X86EMUL_UNHANDLEABLE;
479 	if (kvm_set_dr(vcpu, 7, smstate->dr7))
480 		return X86EMUL_UNHANDLEABLE;
481 
482 	rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
483 	rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
484 
485 	dt.address =               smstate->gdtr.base;
486 	dt.size =                  smstate->gdtr.limit;
487 	kvm_x86_call(set_gdt)(vcpu, &dt);
488 
489 	dt.address =               smstate->idtr.base;
490 	dt.size =                  smstate->idtr.limit;
491 	kvm_x86_call(set_idt)(vcpu, &dt);
492 
493 	rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
494 	rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
495 	rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
496 
497 	rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
498 	rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
499 	rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
500 
501 	vcpu->arch.smbase = smstate->smbase;
502 
503 	r = rsm_enter_protected_mode(vcpu, smstate->cr0,
504 					smstate->cr3, smstate->cr4);
505 
506 	if (r != X86EMUL_CONTINUE)
507 		return r;
508 
509 	kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
510 	ctxt->interruptibility = (u8)smstate->int_shadow;
511 
512 	return r;
513 }
514 
515 #ifdef CONFIG_X86_64
rsm_load_state_64(struct x86_emulate_ctxt * ctxt,const struct kvm_smram_state_64 * smstate)516 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
517 			     const struct kvm_smram_state_64 *smstate)
518 {
519 	struct kvm_vcpu *vcpu = ctxt->vcpu;
520 	struct desc_ptr dt;
521 	int i, r;
522 
523 	for (i = 0; i < 16; i++)
524 		*reg_write(ctxt, i) = smstate->gprs[15 - i];
525 
526 	ctxt->_eip   = smstate->rip;
527 	ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
528 
529 	if (kvm_set_dr(vcpu, 6, smstate->dr6))
530 		return X86EMUL_UNHANDLEABLE;
531 	if (kvm_set_dr(vcpu, 7, smstate->dr7))
532 		return X86EMUL_UNHANDLEABLE;
533 
534 	vcpu->arch.smbase =         smstate->smbase;
535 
536 	if (__kvm_emulate_msr_write(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
537 		return X86EMUL_UNHANDLEABLE;
538 
539 	rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
540 
541 	dt.size =                   smstate->idtr.limit;
542 	dt.address =                smstate->idtr.base;
543 	kvm_x86_call(set_idt)(vcpu, &dt);
544 
545 	rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
546 
547 	dt.size =                   smstate->gdtr.limit;
548 	dt.address =                smstate->gdtr.base;
549 	kvm_x86_call(set_gdt)(vcpu, &dt);
550 
551 	r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
552 	if (r != X86EMUL_CONTINUE)
553 		return r;
554 
555 	rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
556 	rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
557 	rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
558 	rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
559 	rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
560 	rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
561 
562 	kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
563 	ctxt->interruptibility = (u8)smstate->int_shadow;
564 
565 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
566 	    kvm_msr_write(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, smstate->ssp))
567 		return X86EMUL_UNHANDLEABLE;
568 
569 	return X86EMUL_CONTINUE;
570 }
571 #endif
572 
emulator_leave_smm(struct x86_emulate_ctxt * ctxt)573 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
574 {
575 	struct kvm_vcpu *vcpu = ctxt->vcpu;
576 	unsigned long cr0;
577 	union kvm_smram smram;
578 	u64 smbase;
579 	int ret;
580 
581 	smbase = vcpu->arch.smbase;
582 
583 	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
584 	if (ret < 0)
585 		return X86EMUL_UNHANDLEABLE;
586 
587 	if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
588 		kvm_x86_call(set_nmi_mask)(vcpu, false);
589 
590 	kvm_smm_changed(vcpu, false);
591 
592 	/*
593 	 * Get back to real mode, to prepare a safe state in which to load
594 	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
595 	 * supports long mode.
596 	 */
597 #ifdef CONFIG_X86_64
598 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
599 		struct kvm_segment cs_desc;
600 		unsigned long cr4;
601 
602 		/* Zero CR4.PCIDE before CR0.PG.  */
603 		cr4 = kvm_read_cr4(vcpu);
604 		if (cr4 & X86_CR4_PCIDE)
605 			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
606 
607 		/* A 32-bit code segment is required to clear EFER.LMA.  */
608 		memset(&cs_desc, 0, sizeof(cs_desc));
609 		cs_desc.type = 0xb;
610 		cs_desc.s = cs_desc.g = cs_desc.present = 1;
611 		kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
612 	}
613 #endif
614 
615 	/* For the 64-bit case, this will clear EFER.LMA.  */
616 	cr0 = kvm_read_cr0(vcpu);
617 	if (cr0 & X86_CR0_PE)
618 		kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
619 
620 #ifdef CONFIG_X86_64
621 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
622 		unsigned long cr4, efer;
623 
624 		/* Clear CR4.PAE before clearing EFER.LME. */
625 		cr4 = kvm_read_cr4(vcpu);
626 		if (cr4 & X86_CR4_PAE)
627 			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
628 
629 		/* And finally go back to 32-bit mode.  */
630 		efer = 0;
631 		__kvm_emulate_msr_write(vcpu, MSR_EFER, efer);
632 	}
633 #endif
634 
635 	/*
636 	 * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
637 	 * mode should happen _after_ loading state from SMRAM.  However, KVM
638 	 * piggybacks the nested VM-Enter flows (which is wrong for many other
639 	 * reasons), and so nSVM/nVMX would clobber state that is loaded from
640 	 * SMRAM and from the VMCS/VMCB.
641 	 */
642 	if (kvm_x86_call(leave_smm)(vcpu, &smram))
643 		return X86EMUL_UNHANDLEABLE;
644 
645 #ifdef CONFIG_X86_64
646 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
647 		ret = rsm_load_state_64(ctxt, &smram.smram64);
648 	else
649 #endif
650 		ret = rsm_load_state_32(ctxt, &smram.smram32);
651 
652 	/*
653 	 * If RSM fails and triggers shutdown, architecturally the shutdown
654 	 * occurs *before* the transition to guest mode.  But due to KVM's
655 	 * flawed handling of RSM to L2 (see above), the vCPU may already be
656 	 * in_guest_mode().  Force the vCPU out of guest mode before delivering
657 	 * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
658 	 * that architecturally shouldn't be possible.
659 	 */
660 	if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
661 		kvm_leave_nested(vcpu);
662 	return ret;
663 }
664