1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
3 * emulate.c
4 *
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 *
7 * Copyright (c) 2005 Keir Fraser
8 *
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
11 *
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14 *
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
17 *
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
19 */
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22 #include <linux/kvm_host.h>
23 #include "kvm_cache_regs.h"
24 #include "kvm_emulate.h"
25 #include <linux/stringify.h>
26 #include <asm/debugreg.h>
27 #include <asm/nospec-branch.h>
28 #include <asm/ibt.h>
29
30 #include "x86.h"
31 #include "tss.h"
32 #include "mmu.h"
33 #include "pmu.h"
34
35 /*
36 * Operand types
37 */
38 #define OpNone 0ull
39 #define OpImplicit 1ull /* No generic decode */
40 #define OpReg 2ull /* Register */
41 #define OpMem 3ull /* Memory */
42 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
43 #define OpDI 5ull /* ES:DI/EDI/RDI */
44 #define OpMem64 6ull /* Memory, 64-bit */
45 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
46 #define OpDX 8ull /* DX register */
47 #define OpCL 9ull /* CL register (for shifts) */
48 #define OpImmByte 10ull /* 8-bit sign extended immediate */
49 #define OpOne 11ull /* Implied 1 */
50 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
51 #define OpMem16 13ull /* Memory operand (16-bit). */
52 #define OpMem32 14ull /* Memory operand (32-bit). */
53 #define OpImmU 15ull /* Immediate operand, zero extended */
54 #define OpSI 16ull /* SI/ESI/RSI */
55 #define OpImmFAddr 17ull /* Immediate far address */
56 #define OpMemFAddr 18ull /* Far address in memory */
57 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
58 #define OpES 20ull /* ES */
59 #define OpCS 21ull /* CS */
60 #define OpSS 22ull /* SS */
61 #define OpDS 23ull /* DS */
62 #define OpFS 24ull /* FS */
63 #define OpGS 25ull /* GS */
64 #define OpMem8 26ull /* 8-bit zero extended memory operand */
65 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
66 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
67 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
68 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
69
70 #define OpBits 5 /* Width of operand field */
71 #define OpMask ((1ull << OpBits) - 1)
72
73 /*
74 * Opcode effective-address decode tables.
75 * Note that we only emulate instructions that have at least one memory
76 * operand (excluding implicit stack references). We assume that stack
77 * references and instruction fetches will never occur in special memory
78 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
79 * not be handled.
80 */
81
82 /* Operand sizes: 8-bit operands or specified/overridden size. */
83 #define ByteOp (1<<0) /* 8-bit operands. */
84 /* Destination operand type. */
85 #define DstShift 1
86 #define ImplicitOps (OpImplicit << DstShift)
87 #define DstReg (OpReg << DstShift)
88 #define DstMem (OpMem << DstShift)
89 #define DstAcc (OpAcc << DstShift)
90 #define DstDI (OpDI << DstShift)
91 #define DstMem64 (OpMem64 << DstShift)
92 #define DstMem16 (OpMem16 << DstShift)
93 #define DstImmUByte (OpImmUByte << DstShift)
94 #define DstDX (OpDX << DstShift)
95 #define DstAccLo (OpAccLo << DstShift)
96 #define DstMask (OpMask << DstShift)
97 /* Source operand type. */
98 #define SrcShift 6
99 #define SrcNone (OpNone << SrcShift)
100 #define SrcReg (OpReg << SrcShift)
101 #define SrcMem (OpMem << SrcShift)
102 #define SrcMem16 (OpMem16 << SrcShift)
103 #define SrcMem32 (OpMem32 << SrcShift)
104 #define SrcImm (OpImm << SrcShift)
105 #define SrcImmByte (OpImmByte << SrcShift)
106 #define SrcOne (OpOne << SrcShift)
107 #define SrcImmUByte (OpImmUByte << SrcShift)
108 #define SrcImmU (OpImmU << SrcShift)
109 #define SrcSI (OpSI << SrcShift)
110 #define SrcXLat (OpXLat << SrcShift)
111 #define SrcImmFAddr (OpImmFAddr << SrcShift)
112 #define SrcMemFAddr (OpMemFAddr << SrcShift)
113 #define SrcAcc (OpAcc << SrcShift)
114 #define SrcImmU16 (OpImmU16 << SrcShift)
115 #define SrcImm64 (OpImm64 << SrcShift)
116 #define SrcDX (OpDX << SrcShift)
117 #define SrcMem8 (OpMem8 << SrcShift)
118 #define SrcAccHi (OpAccHi << SrcShift)
119 #define SrcMask (OpMask << SrcShift)
120 #define BitOp (1<<11)
121 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
122 #define String (1<<13) /* String instruction (rep capable) */
123 #define Stack (1<<14) /* Stack instruction (push/pop) */
124 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
125 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
126 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
127 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
128 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
129 #define Escape (5<<15) /* Escape to coprocessor instruction */
130 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
131 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
132 #define Sse (1<<18) /* SSE Vector instruction */
133 /* Generic ModRM decode. */
134 #define ModRM (1<<19)
135 /* Destination is only written; never read. */
136 #define Mov (1<<20)
137 /* Misc flags */
138 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
139 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
140 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
141 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
142 #define Undefined (1<<25) /* No Such Instruction */
143 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
144 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
145 #define No64 (1<<28)
146 #define PageTable (1 << 29) /* instruction used to write page table */
147 #define NotImpl (1 << 30) /* instruction is not implemented */
148 /* Source 2 operand type */
149 #define Src2Shift (31)
150 #define Src2None (OpNone << Src2Shift)
151 #define Src2Mem (OpMem << Src2Shift)
152 #define Src2CL (OpCL << Src2Shift)
153 #define Src2ImmByte (OpImmByte << Src2Shift)
154 #define Src2One (OpOne << Src2Shift)
155 #define Src2Imm (OpImm << Src2Shift)
156 #define Src2ES (OpES << Src2Shift)
157 #define Src2CS (OpCS << Src2Shift)
158 #define Src2SS (OpSS << Src2Shift)
159 #define Src2DS (OpDS << Src2Shift)
160 #define Src2FS (OpFS << Src2Shift)
161 #define Src2GS (OpGS << Src2Shift)
162 #define Src2Mask (OpMask << Src2Shift)
163 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
164 #define AlignMask ((u64)7 << 41)
165 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
166 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
167 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
168 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
169 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
170 #define NoWrite ((u64)1 << 45) /* No writeback */
171 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
172 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
173 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
174 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
175 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
176 #define NearBranch ((u64)1 << 52) /* Near branches */
177 #define No16 ((u64)1 << 53) /* No 16 bit operand */
178 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
179 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
180 #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
181 #define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */
182
183 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
184
185 #define X2(x...) x, x
186 #define X3(x...) X2(x), x
187 #define X4(x...) X2(x), X2(x)
188 #define X5(x...) X4(x), x
189 #define X6(x...) X4(x), X2(x)
190 #define X7(x...) X4(x), X3(x)
191 #define X8(x...) X4(x), X4(x)
192 #define X16(x...) X8(x), X8(x)
193
194 struct opcode {
195 u64 flags;
196 u8 intercept;
197 u8 pad[7];
198 union {
199 int (*execute)(struct x86_emulate_ctxt *ctxt);
200 const struct opcode *group;
201 const struct group_dual *gdual;
202 const struct gprefix *gprefix;
203 const struct escape *esc;
204 const struct instr_dual *idual;
205 const struct mode_dual *mdual;
206 void (*fastop)(struct fastop *fake);
207 } u;
208 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
209 };
210
211 struct group_dual {
212 struct opcode mod012[8];
213 struct opcode mod3[8];
214 };
215
216 struct gprefix {
217 struct opcode pfx_no;
218 struct opcode pfx_66;
219 struct opcode pfx_f2;
220 struct opcode pfx_f3;
221 };
222
223 struct escape {
224 struct opcode op[8];
225 struct opcode high[64];
226 };
227
228 struct instr_dual {
229 struct opcode mod012;
230 struct opcode mod3;
231 };
232
233 struct mode_dual {
234 struct opcode mode32;
235 struct opcode mode64;
236 };
237
238 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
239
240 enum x86_transfer_type {
241 X86_TRANSFER_NONE,
242 X86_TRANSFER_CALL_JMP,
243 X86_TRANSFER_RET,
244 X86_TRANSFER_TASK_SWITCH,
245 };
246
writeback_registers(struct x86_emulate_ctxt * ctxt)247 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
248 {
249 unsigned long dirty = ctxt->regs_dirty;
250 unsigned reg;
251
252 for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
253 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
254 }
255
invalidate_registers(struct x86_emulate_ctxt * ctxt)256 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
257 {
258 ctxt->regs_dirty = 0;
259 ctxt->regs_valid = 0;
260 }
261
262 /*
263 * These EFLAGS bits are restored from saved value during emulation, and
264 * any changes are written back to the saved value after emulation.
265 */
266 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
267 X86_EFLAGS_PF|X86_EFLAGS_CF)
268
269 #ifdef CONFIG_X86_64
270 #define ON64(x) x
271 #else
272 #define ON64(x)
273 #endif
274
275 /*
276 * fastop functions have a special calling convention:
277 *
278 * dst: rax (in/out)
279 * src: rdx (in/out)
280 * src2: rcx (in)
281 * flags: rflags (in/out)
282 * ex: rsi (in:fastop pointer, out:zero if exception)
283 *
284 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
285 * different operand sizes can be reached by calculation, rather than a jump
286 * table (which would be bigger than the code).
287 *
288 * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
289 * and 1 for the straight line speculation INT3, leaves 7 bytes for the
290 * body of the function. Currently none is larger than 4.
291 */
292 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
293
294 #define FASTOP_SIZE 16
295
296 #define __FOP_FUNC(name) \
297 ".align " __stringify(FASTOP_SIZE) " \n\t" \
298 ".type " name ", @function \n\t" \
299 name ":\n\t" \
300 ASM_ENDBR \
301 IBT_NOSEAL(name)
302
303 #define FOP_FUNC(name) \
304 __FOP_FUNC(#name)
305
306 #define __FOP_RET(name) \
307 "11: " ASM_RET \
308 ".size " name ", .-" name "\n\t"
309
310 #define FOP_RET(name) \
311 __FOP_RET(#name)
312
313 #define __FOP_START(op, align) \
314 extern void em_##op(struct fastop *fake); \
315 asm(".pushsection .text, \"ax\" \n\t" \
316 ".global em_" #op " \n\t" \
317 ".align " __stringify(align) " \n\t" \
318 "em_" #op ":\n\t"
319
320 #define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
321
322 #define FOP_END \
323 ".popsection")
324
325 #define __FOPNOP(name) \
326 __FOP_FUNC(name) \
327 __FOP_RET(name)
328
329 #define FOPNOP() \
330 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
331
332 #define FOP1E(op, dst) \
333 __FOP_FUNC(#op "_" #dst) \
334 "10: " #op " %" #dst " \n\t" \
335 __FOP_RET(#op "_" #dst)
336
337 #define FOP1EEX(op, dst) \
338 FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
339
340 #define FASTOP1(op) \
341 FOP_START(op) \
342 FOP1E(op##b, al) \
343 FOP1E(op##w, ax) \
344 FOP1E(op##l, eax) \
345 ON64(FOP1E(op##q, rax)) \
346 FOP_END
347
348 /* 1-operand, using src2 (for MUL/DIV r/m) */
349 #define FASTOP1SRC2(op, name) \
350 FOP_START(name) \
351 FOP1E(op, cl) \
352 FOP1E(op, cx) \
353 FOP1E(op, ecx) \
354 ON64(FOP1E(op, rcx)) \
355 FOP_END
356
357 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
358 #define FASTOP1SRC2EX(op, name) \
359 FOP_START(name) \
360 FOP1EEX(op, cl) \
361 FOP1EEX(op, cx) \
362 FOP1EEX(op, ecx) \
363 ON64(FOP1EEX(op, rcx)) \
364 FOP_END
365
366 #define FOP2E(op, dst, src) \
367 __FOP_FUNC(#op "_" #dst "_" #src) \
368 #op " %" #src ", %" #dst " \n\t" \
369 __FOP_RET(#op "_" #dst "_" #src)
370
371 #define FASTOP2(op) \
372 FOP_START(op) \
373 FOP2E(op##b, al, dl) \
374 FOP2E(op##w, ax, dx) \
375 FOP2E(op##l, eax, edx) \
376 ON64(FOP2E(op##q, rax, rdx)) \
377 FOP_END
378
379 /* 2 operand, word only */
380 #define FASTOP2W(op) \
381 FOP_START(op) \
382 FOPNOP() \
383 FOP2E(op##w, ax, dx) \
384 FOP2E(op##l, eax, edx) \
385 ON64(FOP2E(op##q, rax, rdx)) \
386 FOP_END
387
388 /* 2 operand, src is CL */
389 #define FASTOP2CL(op) \
390 FOP_START(op) \
391 FOP2E(op##b, al, cl) \
392 FOP2E(op##w, ax, cl) \
393 FOP2E(op##l, eax, cl) \
394 ON64(FOP2E(op##q, rax, cl)) \
395 FOP_END
396
397 /* 2 operand, src and dest are reversed */
398 #define FASTOP2R(op, name) \
399 FOP_START(name) \
400 FOP2E(op##b, dl, al) \
401 FOP2E(op##w, dx, ax) \
402 FOP2E(op##l, edx, eax) \
403 ON64(FOP2E(op##q, rdx, rax)) \
404 FOP_END
405
406 #define FOP3E(op, dst, src, src2) \
407 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
408 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
409 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
410
411 /* 3-operand, word-only, src2=cl */
412 #define FASTOP3WCL(op) \
413 FOP_START(op) \
414 FOPNOP() \
415 FOP3E(op##w, ax, dx, cl) \
416 FOP3E(op##l, eax, edx, cl) \
417 ON64(FOP3E(op##q, rax, rdx, cl)) \
418 FOP_END
419
420 /* Special case for SETcc - 1 instruction per cc */
421 #define FOP_SETCC(op) \
422 FOP_FUNC(op) \
423 #op " %al \n\t" \
424 FOP_RET(op)
425
426 FOP_START(setcc)
427 FOP_SETCC(seto)
428 FOP_SETCC(setno)
429 FOP_SETCC(setc)
430 FOP_SETCC(setnc)
431 FOP_SETCC(setz)
432 FOP_SETCC(setnz)
433 FOP_SETCC(setbe)
434 FOP_SETCC(setnbe)
435 FOP_SETCC(sets)
436 FOP_SETCC(setns)
437 FOP_SETCC(setp)
438 FOP_SETCC(setnp)
439 FOP_SETCC(setl)
440 FOP_SETCC(setnl)
441 FOP_SETCC(setle)
442 FOP_SETCC(setnle)
443 FOP_END;
444
445 FOP_START(salc)
446 FOP_FUNC(salc)
447 "pushf; sbb %al, %al; popf \n\t"
448 FOP_RET(salc)
449 FOP_END;
450
451 /*
452 * XXX: inoutclob user must know where the argument is being expanded.
453 * Using asm goto would allow us to remove _fault.
454 */
455 #define asm_safe(insn, inoutclob...) \
456 ({ \
457 int _fault = 0; \
458 \
459 asm volatile("1:" insn "\n" \
460 "2:\n" \
461 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
462 : [_fault] "+r"(_fault) inoutclob ); \
463 \
464 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
465 })
466
emulator_check_intercept(struct x86_emulate_ctxt * ctxt,enum x86_intercept intercept,enum x86_intercept_stage stage)467 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
468 enum x86_intercept intercept,
469 enum x86_intercept_stage stage)
470 {
471 struct x86_instruction_info info = {
472 .intercept = intercept,
473 .rep_prefix = ctxt->rep_prefix,
474 .modrm_mod = ctxt->modrm_mod,
475 .modrm_reg = ctxt->modrm_reg,
476 .modrm_rm = ctxt->modrm_rm,
477 .src_val = ctxt->src.val64,
478 .dst_val = ctxt->dst.val64,
479 .src_bytes = ctxt->src.bytes,
480 .dst_bytes = ctxt->dst.bytes,
481 .src_type = ctxt->src.type,
482 .dst_type = ctxt->dst.type,
483 .ad_bytes = ctxt->ad_bytes,
484 .rip = ctxt->eip,
485 .next_rip = ctxt->_eip,
486 };
487
488 return ctxt->ops->intercept(ctxt, &info, stage);
489 }
490
assign_masked(ulong * dest,ulong src,ulong mask)491 static void assign_masked(ulong *dest, ulong src, ulong mask)
492 {
493 *dest = (*dest & ~mask) | (src & mask);
494 }
495
assign_register(unsigned long * reg,u64 val,int bytes)496 static void assign_register(unsigned long *reg, u64 val, int bytes)
497 {
498 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
499 switch (bytes) {
500 case 1:
501 *(u8 *)reg = (u8)val;
502 break;
503 case 2:
504 *(u16 *)reg = (u16)val;
505 break;
506 case 4:
507 *reg = (u32)val;
508 break; /* 64b: zero-extend */
509 case 8:
510 *reg = val;
511 break;
512 }
513 }
514
ad_mask(struct x86_emulate_ctxt * ctxt)515 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
516 {
517 return (1UL << (ctxt->ad_bytes << 3)) - 1;
518 }
519
stack_mask(struct x86_emulate_ctxt * ctxt)520 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
521 {
522 u16 sel;
523 struct desc_struct ss;
524
525 if (ctxt->mode == X86EMUL_MODE_PROT64)
526 return ~0UL;
527 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
528 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
529 }
530
stack_size(struct x86_emulate_ctxt * ctxt)531 static int stack_size(struct x86_emulate_ctxt *ctxt)
532 {
533 return (__fls(stack_mask(ctxt)) + 1) >> 3;
534 }
535
536 /* Access/update address held in a register, based on addressing mode. */
537 static inline unsigned long
address_mask(struct x86_emulate_ctxt * ctxt,unsigned long reg)538 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
539 {
540 if (ctxt->ad_bytes == sizeof(unsigned long))
541 return reg;
542 else
543 return reg & ad_mask(ctxt);
544 }
545
546 static inline unsigned long
register_address(struct x86_emulate_ctxt * ctxt,int reg)547 register_address(struct x86_emulate_ctxt *ctxt, int reg)
548 {
549 return address_mask(ctxt, reg_read(ctxt, reg));
550 }
551
masked_increment(ulong * reg,ulong mask,int inc)552 static void masked_increment(ulong *reg, ulong mask, int inc)
553 {
554 assign_masked(reg, *reg + inc, mask);
555 }
556
557 static inline void
register_address_increment(struct x86_emulate_ctxt * ctxt,int reg,int inc)558 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
559 {
560 ulong *preg = reg_rmw(ctxt, reg);
561
562 assign_register(preg, *preg + inc, ctxt->ad_bytes);
563 }
564
rsp_increment(struct x86_emulate_ctxt * ctxt,int inc)565 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
566 {
567 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
568 }
569
desc_limit_scaled(struct desc_struct * desc)570 static u32 desc_limit_scaled(struct desc_struct *desc)
571 {
572 u32 limit = get_desc_limit(desc);
573
574 return desc->g ? (limit << 12) | 0xfff : limit;
575 }
576
seg_base(struct x86_emulate_ctxt * ctxt,int seg)577 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
578 {
579 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
580 return 0;
581
582 return ctxt->ops->get_cached_segment_base(ctxt, seg);
583 }
584
emulate_exception(struct x86_emulate_ctxt * ctxt,int vec,u32 error,bool valid)585 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
586 u32 error, bool valid)
587 {
588 if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
589 return X86EMUL_UNHANDLEABLE;
590
591 ctxt->exception.vector = vec;
592 ctxt->exception.error_code = error;
593 ctxt->exception.error_code_valid = valid;
594 return X86EMUL_PROPAGATE_FAULT;
595 }
596
emulate_db(struct x86_emulate_ctxt * ctxt)597 static int emulate_db(struct x86_emulate_ctxt *ctxt)
598 {
599 return emulate_exception(ctxt, DB_VECTOR, 0, false);
600 }
601
emulate_gp(struct x86_emulate_ctxt * ctxt,int err)602 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
603 {
604 return emulate_exception(ctxt, GP_VECTOR, err, true);
605 }
606
emulate_ss(struct x86_emulate_ctxt * ctxt,int err)607 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
608 {
609 return emulate_exception(ctxt, SS_VECTOR, err, true);
610 }
611
emulate_ud(struct x86_emulate_ctxt * ctxt)612 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
613 {
614 return emulate_exception(ctxt, UD_VECTOR, 0, false);
615 }
616
emulate_ts(struct x86_emulate_ctxt * ctxt,int err)617 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
618 {
619 return emulate_exception(ctxt, TS_VECTOR, err, true);
620 }
621
emulate_de(struct x86_emulate_ctxt * ctxt)622 static int emulate_de(struct x86_emulate_ctxt *ctxt)
623 {
624 return emulate_exception(ctxt, DE_VECTOR, 0, false);
625 }
626
emulate_nm(struct x86_emulate_ctxt * ctxt)627 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
628 {
629 return emulate_exception(ctxt, NM_VECTOR, 0, false);
630 }
631
get_segment_selector(struct x86_emulate_ctxt * ctxt,unsigned seg)632 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
633 {
634 u16 selector;
635 struct desc_struct desc;
636
637 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
638 return selector;
639 }
640
set_segment_selector(struct x86_emulate_ctxt * ctxt,u16 selector,unsigned seg)641 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
642 unsigned seg)
643 {
644 u16 dummy;
645 u32 base3;
646 struct desc_struct desc;
647
648 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
649 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
650 }
651
ctxt_virt_addr_bits(struct x86_emulate_ctxt * ctxt)652 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
653 {
654 return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
655 }
656
emul_is_noncanonical_address(u64 la,struct x86_emulate_ctxt * ctxt,unsigned int flags)657 static inline bool emul_is_noncanonical_address(u64 la,
658 struct x86_emulate_ctxt *ctxt,
659 unsigned int flags)
660 {
661 return !ctxt->ops->is_canonical_addr(ctxt, la, flags);
662 }
663
664 /*
665 * x86 defines three classes of vector instructions: explicitly
666 * aligned, explicitly unaligned, and the rest, which change behaviour
667 * depending on whether they're AVX encoded or not.
668 *
669 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
670 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
671 * 512 bytes of data must be aligned to a 16 byte boundary.
672 */
insn_alignment(struct x86_emulate_ctxt * ctxt,unsigned size)673 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
674 {
675 u64 alignment = ctxt->d & AlignMask;
676
677 if (likely(size < 16))
678 return 1;
679
680 switch (alignment) {
681 case Unaligned:
682 case Avx:
683 return 1;
684 case Aligned16:
685 return 16;
686 case Aligned:
687 default:
688 return size;
689 }
690 }
691
__linearize(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,unsigned * max_size,unsigned size,enum x86emul_mode mode,ulong * linear,unsigned int flags)692 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
693 struct segmented_address addr,
694 unsigned *max_size, unsigned size,
695 enum x86emul_mode mode, ulong *linear,
696 unsigned int flags)
697 {
698 struct desc_struct desc;
699 bool usable;
700 ulong la;
701 u32 lim;
702 u16 sel;
703 u8 va_bits;
704
705 la = seg_base(ctxt, addr.seg) + addr.ea;
706 *max_size = 0;
707 switch (mode) {
708 case X86EMUL_MODE_PROT64:
709 *linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
710 va_bits = ctxt_virt_addr_bits(ctxt);
711 if (!__is_canonical_address(la, va_bits))
712 goto bad;
713
714 *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
715 if (size > *max_size)
716 goto bad;
717 break;
718 default:
719 *linear = la = (u32)la;
720 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
721 addr.seg);
722 if (!usable)
723 goto bad;
724 /* code segment in protected mode or read-only data segment */
725 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
726 (flags & X86EMUL_F_WRITE))
727 goto bad;
728 /* unreadable code segment */
729 if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
730 goto bad;
731 lim = desc_limit_scaled(&desc);
732 if (!(desc.type & 8) && (desc.type & 4)) {
733 /* expand-down segment */
734 if (addr.ea <= lim)
735 goto bad;
736 lim = desc.d ? 0xffffffff : 0xffff;
737 }
738 if (addr.ea > lim)
739 goto bad;
740 if (lim == 0xffffffff)
741 *max_size = ~0u;
742 else {
743 *max_size = (u64)lim + 1 - addr.ea;
744 if (size > *max_size)
745 goto bad;
746 }
747 break;
748 }
749 if (la & (insn_alignment(ctxt, size) - 1))
750 return emulate_gp(ctxt, 0);
751 return X86EMUL_CONTINUE;
752 bad:
753 if (addr.seg == VCPU_SREG_SS)
754 return emulate_ss(ctxt, 0);
755 else
756 return emulate_gp(ctxt, 0);
757 }
758
linearize(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,unsigned size,bool write,ulong * linear)759 static int linearize(struct x86_emulate_ctxt *ctxt,
760 struct segmented_address addr,
761 unsigned size, bool write,
762 ulong *linear)
763 {
764 unsigned max_size;
765 return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
766 write ? X86EMUL_F_WRITE : 0);
767 }
768
assign_eip(struct x86_emulate_ctxt * ctxt,ulong dst)769 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
770 {
771 ulong linear;
772 int rc;
773 unsigned max_size;
774 struct segmented_address addr = { .seg = VCPU_SREG_CS,
775 .ea = dst };
776
777 if (ctxt->op_bytes != sizeof(unsigned long))
778 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
779 rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
780 X86EMUL_F_FETCH);
781 if (rc == X86EMUL_CONTINUE)
782 ctxt->_eip = addr.ea;
783 return rc;
784 }
785
emulator_recalc_and_set_mode(struct x86_emulate_ctxt * ctxt)786 static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
787 {
788 u64 efer;
789 struct desc_struct cs;
790 u16 selector;
791 u32 base3;
792
793 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
794
795 if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
796 /* Real mode. cpu must not have long mode active */
797 if (efer & EFER_LMA)
798 return X86EMUL_UNHANDLEABLE;
799 ctxt->mode = X86EMUL_MODE_REAL;
800 return X86EMUL_CONTINUE;
801 }
802
803 if (ctxt->eflags & X86_EFLAGS_VM) {
804 /* Protected/VM86 mode. cpu must not have long mode active */
805 if (efer & EFER_LMA)
806 return X86EMUL_UNHANDLEABLE;
807 ctxt->mode = X86EMUL_MODE_VM86;
808 return X86EMUL_CONTINUE;
809 }
810
811 if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
812 return X86EMUL_UNHANDLEABLE;
813
814 if (efer & EFER_LMA) {
815 if (cs.l) {
816 /* Proper long mode */
817 ctxt->mode = X86EMUL_MODE_PROT64;
818 } else if (cs.d) {
819 /* 32 bit compatibility mode*/
820 ctxt->mode = X86EMUL_MODE_PROT32;
821 } else {
822 ctxt->mode = X86EMUL_MODE_PROT16;
823 }
824 } else {
825 /* Legacy 32 bit / 16 bit mode */
826 ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
827 }
828
829 return X86EMUL_CONTINUE;
830 }
831
assign_eip_near(struct x86_emulate_ctxt * ctxt,ulong dst)832 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
833 {
834 return assign_eip(ctxt, dst);
835 }
836
assign_eip_far(struct x86_emulate_ctxt * ctxt,ulong dst)837 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
838 {
839 int rc = emulator_recalc_and_set_mode(ctxt);
840
841 if (rc != X86EMUL_CONTINUE)
842 return rc;
843
844 return assign_eip(ctxt, dst);
845 }
846
jmp_rel(struct x86_emulate_ctxt * ctxt,int rel)847 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
848 {
849 return assign_eip_near(ctxt, ctxt->_eip + rel);
850 }
851
linear_read_system(struct x86_emulate_ctxt * ctxt,ulong linear,void * data,unsigned size)852 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
853 void *data, unsigned size)
854 {
855 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
856 }
857
linear_write_system(struct x86_emulate_ctxt * ctxt,ulong linear,void * data,unsigned int size)858 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
859 ulong linear, void *data,
860 unsigned int size)
861 {
862 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
863 }
864
segmented_read_std(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,void * data,unsigned size)865 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
866 struct segmented_address addr,
867 void *data,
868 unsigned size)
869 {
870 int rc;
871 ulong linear;
872
873 rc = linearize(ctxt, addr, size, false, &linear);
874 if (rc != X86EMUL_CONTINUE)
875 return rc;
876 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
877 }
878
segmented_write_std(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,void * data,unsigned int size)879 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
880 struct segmented_address addr,
881 void *data,
882 unsigned int size)
883 {
884 int rc;
885 ulong linear;
886
887 rc = linearize(ctxt, addr, size, true, &linear);
888 if (rc != X86EMUL_CONTINUE)
889 return rc;
890 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
891 }
892
893 /*
894 * Prefetch the remaining bytes of the instruction without crossing page
895 * boundary if they are not in fetch_cache yet.
896 */
__do_insn_fetch_bytes(struct x86_emulate_ctxt * ctxt,int op_size)897 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
898 {
899 int rc;
900 unsigned size, max_size;
901 unsigned long linear;
902 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
903 struct segmented_address addr = { .seg = VCPU_SREG_CS,
904 .ea = ctxt->eip + cur_size };
905
906 /*
907 * We do not know exactly how many bytes will be needed, and
908 * __linearize is expensive, so fetch as much as possible. We
909 * just have to avoid going beyond the 15 byte limit, the end
910 * of the segment, or the end of the page.
911 *
912 * __linearize is called with size 0 so that it does not do any
913 * boundary check itself. Instead, we use max_size to check
914 * against op_size.
915 */
916 rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
917 X86EMUL_F_FETCH);
918 if (unlikely(rc != X86EMUL_CONTINUE))
919 return rc;
920
921 size = min_t(unsigned, 15UL ^ cur_size, max_size);
922 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
923
924 /*
925 * One instruction can only straddle two pages,
926 * and one has been loaded at the beginning of
927 * x86_decode_insn. So, if not enough bytes
928 * still, we must have hit the 15-byte boundary.
929 */
930 if (unlikely(size < op_size))
931 return emulate_gp(ctxt, 0);
932
933 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
934 size, &ctxt->exception);
935 if (unlikely(rc != X86EMUL_CONTINUE))
936 return rc;
937 ctxt->fetch.end += size;
938 return X86EMUL_CONTINUE;
939 }
940
do_insn_fetch_bytes(struct x86_emulate_ctxt * ctxt,unsigned size)941 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
942 unsigned size)
943 {
944 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
945
946 if (unlikely(done_size < size))
947 return __do_insn_fetch_bytes(ctxt, size - done_size);
948 else
949 return X86EMUL_CONTINUE;
950 }
951
952 /* Fetch next part of the instruction being emulated. */
953 #define insn_fetch(_type, _ctxt) \
954 ({ _type _x; \
955 \
956 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
957 if (rc != X86EMUL_CONTINUE) \
958 goto done; \
959 ctxt->_eip += sizeof(_type); \
960 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
961 ctxt->fetch.ptr += sizeof(_type); \
962 _x; \
963 })
964
965 #define insn_fetch_arr(_arr, _size, _ctxt) \
966 ({ \
967 rc = do_insn_fetch_bytes(_ctxt, _size); \
968 if (rc != X86EMUL_CONTINUE) \
969 goto done; \
970 ctxt->_eip += (_size); \
971 memcpy(_arr, ctxt->fetch.ptr, _size); \
972 ctxt->fetch.ptr += (_size); \
973 })
974
975 /*
976 * Given the 'reg' portion of a ModRM byte, and a register block, return a
977 * pointer into the block that addresses the relevant register.
978 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
979 */
decode_register(struct x86_emulate_ctxt * ctxt,u8 modrm_reg,int byteop)980 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
981 int byteop)
982 {
983 void *p;
984 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
985
986 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
987 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
988 else
989 p = reg_rmw(ctxt, modrm_reg);
990 return p;
991 }
992
read_descriptor(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,u16 * size,unsigned long * address,int op_bytes)993 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
994 struct segmented_address addr,
995 u16 *size, unsigned long *address, int op_bytes)
996 {
997 int rc;
998
999 if (op_bytes == 2)
1000 op_bytes = 3;
1001 *address = 0;
1002 rc = segmented_read_std(ctxt, addr, size, 2);
1003 if (rc != X86EMUL_CONTINUE)
1004 return rc;
1005 addr.ea += 2;
1006 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1007 return rc;
1008 }
1009
1010 FASTOP2(add);
1011 FASTOP2(or);
1012 FASTOP2(adc);
1013 FASTOP2(sbb);
1014 FASTOP2(and);
1015 FASTOP2(sub);
1016 FASTOP2(xor);
1017 FASTOP2(cmp);
1018 FASTOP2(test);
1019
1020 FASTOP1SRC2(mul, mul_ex);
1021 FASTOP1SRC2(imul, imul_ex);
1022 FASTOP1SRC2EX(div, div_ex);
1023 FASTOP1SRC2EX(idiv, idiv_ex);
1024
1025 FASTOP3WCL(shld);
1026 FASTOP3WCL(shrd);
1027
1028 FASTOP2W(imul);
1029
1030 FASTOP1(not);
1031 FASTOP1(neg);
1032 FASTOP1(inc);
1033 FASTOP1(dec);
1034
1035 FASTOP2CL(rol);
1036 FASTOP2CL(ror);
1037 FASTOP2CL(rcl);
1038 FASTOP2CL(rcr);
1039 FASTOP2CL(shl);
1040 FASTOP2CL(shr);
1041 FASTOP2CL(sar);
1042
1043 FASTOP2W(bsf);
1044 FASTOP2W(bsr);
1045 FASTOP2W(bt);
1046 FASTOP2W(bts);
1047 FASTOP2W(btr);
1048 FASTOP2W(btc);
1049
1050 FASTOP2(xadd);
1051
1052 FASTOP2R(cmp, cmp_r);
1053
em_bsf_c(struct x86_emulate_ctxt * ctxt)1054 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1055 {
1056 /* If src is zero, do not writeback, but update flags */
1057 if (ctxt->src.val == 0)
1058 ctxt->dst.type = OP_NONE;
1059 return fastop(ctxt, em_bsf);
1060 }
1061
em_bsr_c(struct x86_emulate_ctxt * ctxt)1062 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1063 {
1064 /* If src is zero, do not writeback, but update flags */
1065 if (ctxt->src.val == 0)
1066 ctxt->dst.type = OP_NONE;
1067 return fastop(ctxt, em_bsr);
1068 }
1069
test_cc(unsigned int condition,unsigned long flags)1070 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1071 {
1072 u8 rc;
1073 void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
1074
1075 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1076 asm("push %[flags]; popf; " CALL_NOSPEC
1077 : "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags));
1078 return rc;
1079 }
1080
fetch_register_operand(struct operand * op)1081 static void fetch_register_operand(struct operand *op)
1082 {
1083 switch (op->bytes) {
1084 case 1:
1085 op->val = *(u8 *)op->addr.reg;
1086 break;
1087 case 2:
1088 op->val = *(u16 *)op->addr.reg;
1089 break;
1090 case 4:
1091 op->val = *(u32 *)op->addr.reg;
1092 break;
1093 case 8:
1094 op->val = *(u64 *)op->addr.reg;
1095 break;
1096 }
1097 }
1098
em_fninit(struct x86_emulate_ctxt * ctxt)1099 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1100 {
1101 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1102 return emulate_nm(ctxt);
1103
1104 kvm_fpu_get();
1105 asm volatile("fninit");
1106 kvm_fpu_put();
1107 return X86EMUL_CONTINUE;
1108 }
1109
em_fnstcw(struct x86_emulate_ctxt * ctxt)1110 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1111 {
1112 u16 fcw;
1113
1114 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1115 return emulate_nm(ctxt);
1116
1117 kvm_fpu_get();
1118 asm volatile("fnstcw %0": "+m"(fcw));
1119 kvm_fpu_put();
1120
1121 ctxt->dst.val = fcw;
1122
1123 return X86EMUL_CONTINUE;
1124 }
1125
em_fnstsw(struct x86_emulate_ctxt * ctxt)1126 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1127 {
1128 u16 fsw;
1129
1130 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1131 return emulate_nm(ctxt);
1132
1133 kvm_fpu_get();
1134 asm volatile("fnstsw %0": "+m"(fsw));
1135 kvm_fpu_put();
1136
1137 ctxt->dst.val = fsw;
1138
1139 return X86EMUL_CONTINUE;
1140 }
1141
decode_register_operand(struct x86_emulate_ctxt * ctxt,struct operand * op)1142 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1143 struct operand *op)
1144 {
1145 unsigned int reg;
1146
1147 if (ctxt->d & ModRM)
1148 reg = ctxt->modrm_reg;
1149 else
1150 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1151
1152 if (ctxt->d & Sse) {
1153 op->type = OP_XMM;
1154 op->bytes = 16;
1155 op->addr.xmm = reg;
1156 kvm_read_sse_reg(reg, &op->vec_val);
1157 return;
1158 }
1159 if (ctxt->d & Mmx) {
1160 reg &= 7;
1161 op->type = OP_MM;
1162 op->bytes = 8;
1163 op->addr.mm = reg;
1164 return;
1165 }
1166
1167 op->type = OP_REG;
1168 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1169 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1170
1171 fetch_register_operand(op);
1172 op->orig_val = op->val;
1173 }
1174
adjust_modrm_seg(struct x86_emulate_ctxt * ctxt,int base_reg)1175 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1176 {
1177 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1178 ctxt->modrm_seg = VCPU_SREG_SS;
1179 }
1180
decode_modrm(struct x86_emulate_ctxt * ctxt,struct operand * op)1181 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1182 struct operand *op)
1183 {
1184 u8 sib;
1185 int index_reg, base_reg, scale;
1186 int rc = X86EMUL_CONTINUE;
1187 ulong modrm_ea = 0;
1188
1189 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1190 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1191 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1192
1193 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1194 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1195 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1196 ctxt->modrm_seg = VCPU_SREG_DS;
1197
1198 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1199 op->type = OP_REG;
1200 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1201 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1202 ctxt->d & ByteOp);
1203 if (ctxt->d & Sse) {
1204 op->type = OP_XMM;
1205 op->bytes = 16;
1206 op->addr.xmm = ctxt->modrm_rm;
1207 kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1208 return rc;
1209 }
1210 if (ctxt->d & Mmx) {
1211 op->type = OP_MM;
1212 op->bytes = 8;
1213 op->addr.mm = ctxt->modrm_rm & 7;
1214 return rc;
1215 }
1216 fetch_register_operand(op);
1217 return rc;
1218 }
1219
1220 op->type = OP_MEM;
1221
1222 if (ctxt->ad_bytes == 2) {
1223 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1224 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1225 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1226 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1227
1228 /* 16-bit ModR/M decode. */
1229 switch (ctxt->modrm_mod) {
1230 case 0:
1231 if (ctxt->modrm_rm == 6)
1232 modrm_ea += insn_fetch(u16, ctxt);
1233 break;
1234 case 1:
1235 modrm_ea += insn_fetch(s8, ctxt);
1236 break;
1237 case 2:
1238 modrm_ea += insn_fetch(u16, ctxt);
1239 break;
1240 }
1241 switch (ctxt->modrm_rm) {
1242 case 0:
1243 modrm_ea += bx + si;
1244 break;
1245 case 1:
1246 modrm_ea += bx + di;
1247 break;
1248 case 2:
1249 modrm_ea += bp + si;
1250 break;
1251 case 3:
1252 modrm_ea += bp + di;
1253 break;
1254 case 4:
1255 modrm_ea += si;
1256 break;
1257 case 5:
1258 modrm_ea += di;
1259 break;
1260 case 6:
1261 if (ctxt->modrm_mod != 0)
1262 modrm_ea += bp;
1263 break;
1264 case 7:
1265 modrm_ea += bx;
1266 break;
1267 }
1268 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1269 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1270 ctxt->modrm_seg = VCPU_SREG_SS;
1271 modrm_ea = (u16)modrm_ea;
1272 } else {
1273 /* 32/64-bit ModR/M decode. */
1274 if ((ctxt->modrm_rm & 7) == 4) {
1275 sib = insn_fetch(u8, ctxt);
1276 index_reg |= (sib >> 3) & 7;
1277 base_reg |= sib & 7;
1278 scale = sib >> 6;
1279
1280 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1281 modrm_ea += insn_fetch(s32, ctxt);
1282 else {
1283 modrm_ea += reg_read(ctxt, base_reg);
1284 adjust_modrm_seg(ctxt, base_reg);
1285 /* Increment ESP on POP [ESP] */
1286 if ((ctxt->d & IncSP) &&
1287 base_reg == VCPU_REGS_RSP)
1288 modrm_ea += ctxt->op_bytes;
1289 }
1290 if (index_reg != 4)
1291 modrm_ea += reg_read(ctxt, index_reg) << scale;
1292 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1293 modrm_ea += insn_fetch(s32, ctxt);
1294 if (ctxt->mode == X86EMUL_MODE_PROT64)
1295 ctxt->rip_relative = 1;
1296 } else {
1297 base_reg = ctxt->modrm_rm;
1298 modrm_ea += reg_read(ctxt, base_reg);
1299 adjust_modrm_seg(ctxt, base_reg);
1300 }
1301 switch (ctxt->modrm_mod) {
1302 case 1:
1303 modrm_ea += insn_fetch(s8, ctxt);
1304 break;
1305 case 2:
1306 modrm_ea += insn_fetch(s32, ctxt);
1307 break;
1308 }
1309 }
1310 op->addr.mem.ea = modrm_ea;
1311 if (ctxt->ad_bytes != 8)
1312 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1313
1314 done:
1315 return rc;
1316 }
1317
decode_abs(struct x86_emulate_ctxt * ctxt,struct operand * op)1318 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1319 struct operand *op)
1320 {
1321 int rc = X86EMUL_CONTINUE;
1322
1323 op->type = OP_MEM;
1324 switch (ctxt->ad_bytes) {
1325 case 2:
1326 op->addr.mem.ea = insn_fetch(u16, ctxt);
1327 break;
1328 case 4:
1329 op->addr.mem.ea = insn_fetch(u32, ctxt);
1330 break;
1331 case 8:
1332 op->addr.mem.ea = insn_fetch(u64, ctxt);
1333 break;
1334 }
1335 done:
1336 return rc;
1337 }
1338
fetch_bit_operand(struct x86_emulate_ctxt * ctxt)1339 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1340 {
1341 long sv = 0, mask;
1342
1343 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1344 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1345
1346 if (ctxt->src.bytes == 2)
1347 sv = (s16)ctxt->src.val & (s16)mask;
1348 else if (ctxt->src.bytes == 4)
1349 sv = (s32)ctxt->src.val & (s32)mask;
1350 else
1351 sv = (s64)ctxt->src.val & (s64)mask;
1352
1353 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1354 ctxt->dst.addr.mem.ea + (sv >> 3));
1355 }
1356
1357 /* only subword offset */
1358 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1359 }
1360
read_emulated(struct x86_emulate_ctxt * ctxt,unsigned long addr,void * dest,unsigned size)1361 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1362 unsigned long addr, void *dest, unsigned size)
1363 {
1364 int rc;
1365 struct read_cache *mc = &ctxt->mem_read;
1366
1367 if (mc->pos < mc->end)
1368 goto read_cached;
1369
1370 if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1371 return X86EMUL_UNHANDLEABLE;
1372
1373 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1374 &ctxt->exception);
1375 if (rc != X86EMUL_CONTINUE)
1376 return rc;
1377
1378 mc->end += size;
1379
1380 read_cached:
1381 memcpy(dest, mc->data + mc->pos, size);
1382 mc->pos += size;
1383 return X86EMUL_CONTINUE;
1384 }
1385
segmented_read(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,void * data,unsigned size)1386 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1387 struct segmented_address addr,
1388 void *data,
1389 unsigned size)
1390 {
1391 int rc;
1392 ulong linear;
1393
1394 rc = linearize(ctxt, addr, size, false, &linear);
1395 if (rc != X86EMUL_CONTINUE)
1396 return rc;
1397 return read_emulated(ctxt, linear, data, size);
1398 }
1399
segmented_write(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,const void * data,unsigned size)1400 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1401 struct segmented_address addr,
1402 const void *data,
1403 unsigned size)
1404 {
1405 int rc;
1406 ulong linear;
1407
1408 rc = linearize(ctxt, addr, size, true, &linear);
1409 if (rc != X86EMUL_CONTINUE)
1410 return rc;
1411 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1412 &ctxt->exception);
1413 }
1414
segmented_cmpxchg(struct x86_emulate_ctxt * ctxt,struct segmented_address addr,const void * orig_data,const void * data,unsigned size)1415 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1416 struct segmented_address addr,
1417 const void *orig_data, const void *data,
1418 unsigned size)
1419 {
1420 int rc;
1421 ulong linear;
1422
1423 rc = linearize(ctxt, addr, size, true, &linear);
1424 if (rc != X86EMUL_CONTINUE)
1425 return rc;
1426 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1427 size, &ctxt->exception);
1428 }
1429
pio_in_emulated(struct x86_emulate_ctxt * ctxt,unsigned int size,unsigned short port,void * dest)1430 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1431 unsigned int size, unsigned short port,
1432 void *dest)
1433 {
1434 struct read_cache *rc = &ctxt->io_read;
1435
1436 if (rc->pos == rc->end) { /* refill pio read ahead */
1437 unsigned int in_page, n;
1438 unsigned int count = ctxt->rep_prefix ?
1439 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1440 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1441 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1442 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1443 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1444 if (n == 0)
1445 n = 1;
1446 rc->pos = rc->end = 0;
1447 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1448 return 0;
1449 rc->end = n * size;
1450 }
1451
1452 if (ctxt->rep_prefix && (ctxt->d & String) &&
1453 !(ctxt->eflags & X86_EFLAGS_DF)) {
1454 ctxt->dst.data = rc->data + rc->pos;
1455 ctxt->dst.type = OP_MEM_STR;
1456 ctxt->dst.count = (rc->end - rc->pos) / size;
1457 rc->pos = rc->end;
1458 } else {
1459 memcpy(dest, rc->data + rc->pos, size);
1460 rc->pos += size;
1461 }
1462 return 1;
1463 }
1464
read_interrupt_descriptor(struct x86_emulate_ctxt * ctxt,u16 index,struct desc_struct * desc)1465 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1466 u16 index, struct desc_struct *desc)
1467 {
1468 struct desc_ptr dt;
1469 ulong addr;
1470
1471 ctxt->ops->get_idt(ctxt, &dt);
1472
1473 if (dt.size < index * 8 + 7)
1474 return emulate_gp(ctxt, index << 3 | 0x2);
1475
1476 addr = dt.address + index * 8;
1477 return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1478 }
1479
get_descriptor_table_ptr(struct x86_emulate_ctxt * ctxt,u16 selector,struct desc_ptr * dt)1480 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1481 u16 selector, struct desc_ptr *dt)
1482 {
1483 const struct x86_emulate_ops *ops = ctxt->ops;
1484 u32 base3 = 0;
1485
1486 if (selector & 1 << 2) {
1487 struct desc_struct desc;
1488 u16 sel;
1489
1490 memset(dt, 0, sizeof(*dt));
1491 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1492 VCPU_SREG_LDTR))
1493 return;
1494
1495 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1496 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1497 } else
1498 ops->get_gdt(ctxt, dt);
1499 }
1500
get_descriptor_ptr(struct x86_emulate_ctxt * ctxt,u16 selector,ulong * desc_addr_p)1501 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1502 u16 selector, ulong *desc_addr_p)
1503 {
1504 struct desc_ptr dt;
1505 u16 index = selector >> 3;
1506 ulong addr;
1507
1508 get_descriptor_table_ptr(ctxt, selector, &dt);
1509
1510 if (dt.size < index * 8 + 7)
1511 return emulate_gp(ctxt, selector & 0xfffc);
1512
1513 addr = dt.address + index * 8;
1514
1515 #ifdef CONFIG_X86_64
1516 if (addr >> 32 != 0) {
1517 u64 efer = 0;
1518
1519 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1520 if (!(efer & EFER_LMA))
1521 addr &= (u32)-1;
1522 }
1523 #endif
1524
1525 *desc_addr_p = addr;
1526 return X86EMUL_CONTINUE;
1527 }
1528
1529 /* allowed just for 8 bytes segments */
read_segment_descriptor(struct x86_emulate_ctxt * ctxt,u16 selector,struct desc_struct * desc,ulong * desc_addr_p)1530 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1531 u16 selector, struct desc_struct *desc,
1532 ulong *desc_addr_p)
1533 {
1534 int rc;
1535
1536 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1537 if (rc != X86EMUL_CONTINUE)
1538 return rc;
1539
1540 return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1541 }
1542
1543 /* allowed just for 8 bytes segments */
write_segment_descriptor(struct x86_emulate_ctxt * ctxt,u16 selector,struct desc_struct * desc)1544 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1545 u16 selector, struct desc_struct *desc)
1546 {
1547 int rc;
1548 ulong addr;
1549
1550 rc = get_descriptor_ptr(ctxt, selector, &addr);
1551 if (rc != X86EMUL_CONTINUE)
1552 return rc;
1553
1554 return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1555 }
1556
emulator_is_ssp_invalid(struct x86_emulate_ctxt * ctxt,u8 cpl)1557 static bool emulator_is_ssp_invalid(struct x86_emulate_ctxt *ctxt, u8 cpl)
1558 {
1559 const u32 MSR_IA32_X_CET = cpl == 3 ? MSR_IA32_U_CET : MSR_IA32_S_CET;
1560 u64 efer = 0, cet = 0, ssp = 0;
1561
1562 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET))
1563 return false;
1564
1565 if (ctxt->ops->get_msr(ctxt, MSR_EFER, &efer))
1566 return true;
1567
1568 /* SSP is guaranteed to be valid if the vCPU was already in 32-bit mode. */
1569 if (!(efer & EFER_LMA))
1570 return false;
1571
1572 if (ctxt->ops->get_msr(ctxt, MSR_IA32_X_CET, &cet))
1573 return true;
1574
1575 if (!(cet & CET_SHSTK_EN))
1576 return false;
1577
1578 if (ctxt->ops->get_msr(ctxt, MSR_KVM_INTERNAL_GUEST_SSP, &ssp))
1579 return true;
1580
1581 /*
1582 * On transfer from 64-bit mode to compatibility mode, SSP[63:32] must
1583 * be 0, i.e. SSP must be a 32-bit value outside of 64-bit mode.
1584 */
1585 return ssp >> 32;
1586 }
1587
__load_segment_descriptor(struct x86_emulate_ctxt * ctxt,u16 selector,int seg,u8 cpl,enum x86_transfer_type transfer,struct desc_struct * desc)1588 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1589 u16 selector, int seg, u8 cpl,
1590 enum x86_transfer_type transfer,
1591 struct desc_struct *desc)
1592 {
1593 struct desc_struct seg_desc, old_desc;
1594 u8 dpl, rpl;
1595 unsigned err_vec = GP_VECTOR;
1596 u32 err_code = 0;
1597 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1598 ulong desc_addr;
1599 int ret;
1600 u16 dummy;
1601 u32 base3 = 0;
1602
1603 memset(&seg_desc, 0, sizeof(seg_desc));
1604
1605 if (ctxt->mode == X86EMUL_MODE_REAL) {
1606 /* set real mode segment descriptor (keep limit etc. for
1607 * unreal mode) */
1608 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1609 set_desc_base(&seg_desc, selector << 4);
1610 goto load;
1611 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1612 /* VM86 needs a clean new segment descriptor */
1613 set_desc_base(&seg_desc, selector << 4);
1614 set_desc_limit(&seg_desc, 0xffff);
1615 seg_desc.type = 3;
1616 seg_desc.p = 1;
1617 seg_desc.s = 1;
1618 seg_desc.dpl = 3;
1619 goto load;
1620 }
1621
1622 rpl = selector & 3;
1623
1624 /* TR should be in GDT only */
1625 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1626 goto exception;
1627
1628 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1629 if (null_selector) {
1630 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1631 goto exception;
1632
1633 if (seg == VCPU_SREG_SS) {
1634 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1635 goto exception;
1636
1637 /*
1638 * ctxt->ops->set_segment expects the CPL to be in
1639 * SS.DPL, so fake an expand-up 32-bit data segment.
1640 */
1641 seg_desc.type = 3;
1642 seg_desc.p = 1;
1643 seg_desc.s = 1;
1644 seg_desc.dpl = cpl;
1645 seg_desc.d = 1;
1646 seg_desc.g = 1;
1647 }
1648
1649 /* Skip all following checks */
1650 goto load;
1651 }
1652
1653 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1654 if (ret != X86EMUL_CONTINUE)
1655 return ret;
1656
1657 err_code = selector & 0xfffc;
1658 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1659 GP_VECTOR;
1660
1661 /* can't load system descriptor into segment selector */
1662 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1663 if (transfer == X86_TRANSFER_CALL_JMP)
1664 return X86EMUL_UNHANDLEABLE;
1665 goto exception;
1666 }
1667
1668 dpl = seg_desc.dpl;
1669
1670 switch (seg) {
1671 case VCPU_SREG_SS:
1672 /*
1673 * segment is not a writable data segment or segment
1674 * selector's RPL != CPL or DPL != CPL
1675 */
1676 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1677 goto exception;
1678 break;
1679 case VCPU_SREG_CS:
1680 /*
1681 * KVM uses "none" when loading CS as part of emulating Real
1682 * Mode exceptions and IRET (handled above). In all other
1683 * cases, loading CS without a control transfer is a KVM bug.
1684 */
1685 if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1686 goto exception;
1687
1688 if (!(seg_desc.type & 8))
1689 goto exception;
1690
1691 if (transfer == X86_TRANSFER_RET) {
1692 /* RET can never return to an inner privilege level. */
1693 if (rpl < cpl)
1694 goto exception;
1695 /* Outer-privilege level return is not implemented */
1696 if (rpl > cpl)
1697 return X86EMUL_UNHANDLEABLE;
1698 }
1699 if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1700 if (seg_desc.type & 4) {
1701 /* conforming */
1702 if (dpl > rpl)
1703 goto exception;
1704 } else {
1705 /* nonconforming */
1706 if (dpl != rpl)
1707 goto exception;
1708 }
1709 } else { /* X86_TRANSFER_CALL_JMP */
1710 if (seg_desc.type & 4) {
1711 /* conforming */
1712 if (dpl > cpl)
1713 goto exception;
1714 } else {
1715 /* nonconforming */
1716 if (rpl > cpl || dpl != cpl)
1717 goto exception;
1718 }
1719 }
1720 /* in long-mode d/b must be clear if l is set */
1721 if (seg_desc.d && seg_desc.l) {
1722 u64 efer = 0;
1723
1724 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1725 if (efer & EFER_LMA)
1726 goto exception;
1727 }
1728 if (!seg_desc.l && emulator_is_ssp_invalid(ctxt, cpl)) {
1729 err_code = 0;
1730 goto exception;
1731 }
1732
1733 /* CS(RPL) <- CPL */
1734 selector = (selector & 0xfffc) | cpl;
1735 break;
1736 case VCPU_SREG_TR:
1737 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1738 goto exception;
1739 break;
1740 case VCPU_SREG_LDTR:
1741 if (seg_desc.s || seg_desc.type != 2)
1742 goto exception;
1743 break;
1744 default: /* DS, ES, FS, or GS */
1745 /*
1746 * segment is not a data or readable code segment or
1747 * ((segment is a data or nonconforming code segment)
1748 * and ((RPL > DPL) or (CPL > DPL)))
1749 */
1750 if ((seg_desc.type & 0xa) == 0x8 ||
1751 (((seg_desc.type & 0xc) != 0xc) &&
1752 (rpl > dpl || cpl > dpl)))
1753 goto exception;
1754 break;
1755 }
1756
1757 if (!seg_desc.p) {
1758 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1759 goto exception;
1760 }
1761
1762 if (seg_desc.s) {
1763 /* mark segment as accessed */
1764 if (!(seg_desc.type & 1)) {
1765 seg_desc.type |= 1;
1766 ret = write_segment_descriptor(ctxt, selector,
1767 &seg_desc);
1768 if (ret != X86EMUL_CONTINUE)
1769 return ret;
1770 }
1771 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1772 ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1773 if (ret != X86EMUL_CONTINUE)
1774 return ret;
1775 if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1776 ((u64)base3 << 32), ctxt,
1777 X86EMUL_F_DT_LOAD))
1778 return emulate_gp(ctxt, err_code);
1779 }
1780
1781 if (seg == VCPU_SREG_TR) {
1782 old_desc = seg_desc;
1783 seg_desc.type |= 2; /* busy */
1784 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1785 sizeof(seg_desc), &ctxt->exception);
1786 if (ret != X86EMUL_CONTINUE)
1787 return ret;
1788 }
1789 load:
1790 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1791 if (desc)
1792 *desc = seg_desc;
1793 return X86EMUL_CONTINUE;
1794 exception:
1795 return emulate_exception(ctxt, err_vec, err_code, true);
1796 }
1797
load_segment_descriptor(struct x86_emulate_ctxt * ctxt,u16 selector,int seg)1798 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1799 u16 selector, int seg)
1800 {
1801 u8 cpl = ctxt->ops->cpl(ctxt);
1802
1803 /*
1804 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1805 * they can load it at CPL<3 (Intel's manual says only LSS can,
1806 * but it's wrong).
1807 *
1808 * However, the Intel manual says that putting IST=1/DPL=3 in
1809 * an interrupt gate will result in SS=3 (the AMD manual instead
1810 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1811 * and only forbid it here.
1812 */
1813 if (seg == VCPU_SREG_SS && selector == 3 &&
1814 ctxt->mode == X86EMUL_MODE_PROT64)
1815 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1816
1817 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1818 X86_TRANSFER_NONE, NULL);
1819 }
1820
write_register_operand(struct operand * op)1821 static void write_register_operand(struct operand *op)
1822 {
1823 return assign_register(op->addr.reg, op->val, op->bytes);
1824 }
1825
writeback(struct x86_emulate_ctxt * ctxt,struct operand * op)1826 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1827 {
1828 switch (op->type) {
1829 case OP_REG:
1830 write_register_operand(op);
1831 break;
1832 case OP_MEM:
1833 if (ctxt->lock_prefix)
1834 return segmented_cmpxchg(ctxt,
1835 op->addr.mem,
1836 &op->orig_val,
1837 &op->val,
1838 op->bytes);
1839 else
1840 return segmented_write(ctxt,
1841 op->addr.mem,
1842 &op->val,
1843 op->bytes);
1844 case OP_MEM_STR:
1845 return segmented_write(ctxt,
1846 op->addr.mem,
1847 op->data,
1848 op->bytes * op->count);
1849 case OP_XMM:
1850 kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1851 break;
1852 case OP_MM:
1853 kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1854 break;
1855 case OP_NONE:
1856 /* no writeback */
1857 break;
1858 default:
1859 break;
1860 }
1861 return X86EMUL_CONTINUE;
1862 }
1863
emulate_push(struct x86_emulate_ctxt * ctxt,const void * data,int len)1864 static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len)
1865 {
1866 struct segmented_address addr;
1867
1868 rsp_increment(ctxt, -len);
1869 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1870 addr.seg = VCPU_SREG_SS;
1871
1872 return segmented_write(ctxt, addr, data, len);
1873 }
1874
em_push(struct x86_emulate_ctxt * ctxt)1875 static int em_push(struct x86_emulate_ctxt *ctxt)
1876 {
1877 /* Disable writeback. */
1878 ctxt->dst.type = OP_NONE;
1879 return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1880 }
1881
emulate_pop(struct x86_emulate_ctxt * ctxt,void * dest,int len)1882 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1883 void *dest, int len)
1884 {
1885 int rc;
1886 struct segmented_address addr;
1887
1888 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1889 addr.seg = VCPU_SREG_SS;
1890 rc = segmented_read(ctxt, addr, dest, len);
1891 if (rc != X86EMUL_CONTINUE)
1892 return rc;
1893
1894 rsp_increment(ctxt, len);
1895 return rc;
1896 }
1897
em_pop(struct x86_emulate_ctxt * ctxt)1898 static int em_pop(struct x86_emulate_ctxt *ctxt)
1899 {
1900 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1901 }
1902
emulate_popf(struct x86_emulate_ctxt * ctxt,void * dest,int len)1903 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1904 void *dest, int len)
1905 {
1906 int rc;
1907 unsigned long val = 0;
1908 unsigned long change_mask;
1909 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1910 int cpl = ctxt->ops->cpl(ctxt);
1911
1912 rc = emulate_pop(ctxt, &val, len);
1913 if (rc != X86EMUL_CONTINUE)
1914 return rc;
1915
1916 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1917 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1918 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1919 X86_EFLAGS_AC | X86_EFLAGS_ID;
1920
1921 switch(ctxt->mode) {
1922 case X86EMUL_MODE_PROT64:
1923 case X86EMUL_MODE_PROT32:
1924 case X86EMUL_MODE_PROT16:
1925 if (cpl == 0)
1926 change_mask |= X86_EFLAGS_IOPL;
1927 if (cpl <= iopl)
1928 change_mask |= X86_EFLAGS_IF;
1929 break;
1930 case X86EMUL_MODE_VM86:
1931 if (iopl < 3)
1932 return emulate_gp(ctxt, 0);
1933 change_mask |= X86_EFLAGS_IF;
1934 break;
1935 default: /* real mode */
1936 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1937 break;
1938 }
1939
1940 *(unsigned long *)dest =
1941 (ctxt->eflags & ~change_mask) | (val & change_mask);
1942
1943 return rc;
1944 }
1945
em_popf(struct x86_emulate_ctxt * ctxt)1946 static int em_popf(struct x86_emulate_ctxt *ctxt)
1947 {
1948 ctxt->dst.type = OP_REG;
1949 ctxt->dst.addr.reg = &ctxt->eflags;
1950 ctxt->dst.bytes = ctxt->op_bytes;
1951 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1952 }
1953
em_enter(struct x86_emulate_ctxt * ctxt)1954 static int em_enter(struct x86_emulate_ctxt *ctxt)
1955 {
1956 int rc;
1957 unsigned frame_size = ctxt->src.val;
1958 unsigned nesting_level = ctxt->src2.val & 31;
1959 ulong rbp;
1960
1961 if (nesting_level)
1962 return X86EMUL_UNHANDLEABLE;
1963
1964 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1965 rc = emulate_push(ctxt, &rbp, stack_size(ctxt));
1966 if (rc != X86EMUL_CONTINUE)
1967 return rc;
1968 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1969 stack_mask(ctxt));
1970 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1971 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1972 stack_mask(ctxt));
1973 return X86EMUL_CONTINUE;
1974 }
1975
em_leave(struct x86_emulate_ctxt * ctxt)1976 static int em_leave(struct x86_emulate_ctxt *ctxt)
1977 {
1978 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1979 stack_mask(ctxt));
1980 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1981 }
1982
em_push_sreg(struct x86_emulate_ctxt * ctxt)1983 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1984 {
1985 int seg = ctxt->src2.val;
1986
1987 ctxt->src.val = get_segment_selector(ctxt, seg);
1988 if (ctxt->op_bytes == 4) {
1989 rsp_increment(ctxt, -2);
1990 ctxt->op_bytes = 2;
1991 }
1992
1993 return em_push(ctxt);
1994 }
1995
em_pop_sreg(struct x86_emulate_ctxt * ctxt)1996 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1997 {
1998 int seg = ctxt->src2.val;
1999 unsigned long selector = 0;
2000 int rc;
2001
2002 rc = emulate_pop(ctxt, &selector, 2);
2003 if (rc != X86EMUL_CONTINUE)
2004 return rc;
2005
2006 if (seg == VCPU_SREG_SS)
2007 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
2008 if (ctxt->op_bytes > 2)
2009 rsp_increment(ctxt, ctxt->op_bytes - 2);
2010
2011 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
2012 return rc;
2013 }
2014
em_pusha(struct x86_emulate_ctxt * ctxt)2015 static int em_pusha(struct x86_emulate_ctxt *ctxt)
2016 {
2017 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
2018 int rc = X86EMUL_CONTINUE;
2019 int reg = VCPU_REGS_RAX;
2020
2021 while (reg <= VCPU_REGS_RDI) {
2022 (reg == VCPU_REGS_RSP) ?
2023 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
2024
2025 rc = em_push(ctxt);
2026 if (rc != X86EMUL_CONTINUE)
2027 return rc;
2028
2029 ++reg;
2030 }
2031
2032 return rc;
2033 }
2034
em_pushf(struct x86_emulate_ctxt * ctxt)2035 static int em_pushf(struct x86_emulate_ctxt *ctxt)
2036 {
2037 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
2038 return em_push(ctxt);
2039 }
2040
em_popa(struct x86_emulate_ctxt * ctxt)2041 static int em_popa(struct x86_emulate_ctxt *ctxt)
2042 {
2043 int rc = X86EMUL_CONTINUE;
2044 int reg = VCPU_REGS_RDI;
2045 u32 val = 0;
2046
2047 while (reg >= VCPU_REGS_RAX) {
2048 if (reg == VCPU_REGS_RSP) {
2049 rsp_increment(ctxt, ctxt->op_bytes);
2050 --reg;
2051 }
2052
2053 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2054 if (rc != X86EMUL_CONTINUE)
2055 break;
2056 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2057 --reg;
2058 }
2059 return rc;
2060 }
2061
__emulate_int_real(struct x86_emulate_ctxt * ctxt,int irq)2062 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2063 {
2064 const struct x86_emulate_ops *ops = ctxt->ops;
2065 int rc;
2066 struct desc_ptr dt;
2067 gva_t cs_addr;
2068 gva_t eip_addr;
2069 u16 cs, eip;
2070
2071 /* TODO: Add limit checks */
2072 ctxt->src.val = ctxt->eflags;
2073 rc = em_push(ctxt);
2074 if (rc != X86EMUL_CONTINUE)
2075 return rc;
2076
2077 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2078
2079 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2080 rc = em_push(ctxt);
2081 if (rc != X86EMUL_CONTINUE)
2082 return rc;
2083
2084 ctxt->src.val = ctxt->_eip;
2085 rc = em_push(ctxt);
2086 if (rc != X86EMUL_CONTINUE)
2087 return rc;
2088
2089 ops->get_idt(ctxt, &dt);
2090
2091 eip_addr = dt.address + (irq << 2);
2092 cs_addr = dt.address + (irq << 2) + 2;
2093
2094 rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2095 if (rc != X86EMUL_CONTINUE)
2096 return rc;
2097
2098 rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2099 if (rc != X86EMUL_CONTINUE)
2100 return rc;
2101
2102 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2103 if (rc != X86EMUL_CONTINUE)
2104 return rc;
2105
2106 ctxt->_eip = eip;
2107
2108 return rc;
2109 }
2110
emulate_int_real(struct x86_emulate_ctxt * ctxt,int irq)2111 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2112 {
2113 int rc;
2114
2115 invalidate_registers(ctxt);
2116 rc = __emulate_int_real(ctxt, irq);
2117 if (rc == X86EMUL_CONTINUE)
2118 writeback_registers(ctxt);
2119 return rc;
2120 }
2121
emulate_int(struct x86_emulate_ctxt * ctxt,int irq)2122 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2123 {
2124 switch(ctxt->mode) {
2125 case X86EMUL_MODE_REAL:
2126 return __emulate_int_real(ctxt, irq);
2127 case X86EMUL_MODE_VM86:
2128 case X86EMUL_MODE_PROT16:
2129 case X86EMUL_MODE_PROT32:
2130 case X86EMUL_MODE_PROT64:
2131 default:
2132 /* Protected mode interrupts unimplemented yet */
2133 return X86EMUL_UNHANDLEABLE;
2134 }
2135 }
2136
emulate_iret_real(struct x86_emulate_ctxt * ctxt)2137 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2138 {
2139 int rc = X86EMUL_CONTINUE;
2140 unsigned long temp_eip = 0;
2141 unsigned long temp_eflags = 0;
2142 unsigned long cs = 0;
2143 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2144 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2145 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2146 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2147 X86_EFLAGS_AC | X86_EFLAGS_ID |
2148 X86_EFLAGS_FIXED;
2149 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2150 X86_EFLAGS_VIP;
2151
2152 /* TODO: Add stack limit check */
2153
2154 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2155
2156 if (rc != X86EMUL_CONTINUE)
2157 return rc;
2158
2159 if (temp_eip & ~0xffff)
2160 return emulate_gp(ctxt, 0);
2161
2162 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2163
2164 if (rc != X86EMUL_CONTINUE)
2165 return rc;
2166
2167 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2168
2169 if (rc != X86EMUL_CONTINUE)
2170 return rc;
2171
2172 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2173
2174 if (rc != X86EMUL_CONTINUE)
2175 return rc;
2176
2177 ctxt->_eip = temp_eip;
2178
2179 if (ctxt->op_bytes == 4)
2180 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2181 else if (ctxt->op_bytes == 2) {
2182 ctxt->eflags &= ~0xffff;
2183 ctxt->eflags |= temp_eflags;
2184 }
2185
2186 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2187 ctxt->eflags |= X86_EFLAGS_FIXED;
2188 ctxt->ops->set_nmi_mask(ctxt, false);
2189
2190 return rc;
2191 }
2192
em_iret(struct x86_emulate_ctxt * ctxt)2193 static int em_iret(struct x86_emulate_ctxt *ctxt)
2194 {
2195 switch(ctxt->mode) {
2196 case X86EMUL_MODE_REAL:
2197 return emulate_iret_real(ctxt);
2198 case X86EMUL_MODE_VM86:
2199 case X86EMUL_MODE_PROT16:
2200 case X86EMUL_MODE_PROT32:
2201 case X86EMUL_MODE_PROT64:
2202 default:
2203 /* iret from protected mode unimplemented yet */
2204 return X86EMUL_UNHANDLEABLE;
2205 }
2206 }
2207
em_jmp_far(struct x86_emulate_ctxt * ctxt)2208 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2209 {
2210 int rc;
2211 unsigned short sel;
2212 struct desc_struct new_desc;
2213 u8 cpl = ctxt->ops->cpl(ctxt);
2214
2215 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2216
2217 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2218 X86_TRANSFER_CALL_JMP,
2219 &new_desc);
2220 if (rc != X86EMUL_CONTINUE)
2221 return rc;
2222
2223 rc = assign_eip_far(ctxt, ctxt->src.val);
2224 /* Error handling is not implemented. */
2225 if (rc != X86EMUL_CONTINUE)
2226 return X86EMUL_UNHANDLEABLE;
2227
2228 return rc;
2229 }
2230
em_jmp_abs(struct x86_emulate_ctxt * ctxt)2231 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2232 {
2233 return assign_eip_near(ctxt, ctxt->src.val);
2234 }
2235
em_call_near_abs(struct x86_emulate_ctxt * ctxt)2236 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2237 {
2238 int rc;
2239 long int old_eip;
2240
2241 old_eip = ctxt->_eip;
2242 rc = assign_eip_near(ctxt, ctxt->src.val);
2243 if (rc != X86EMUL_CONTINUE)
2244 return rc;
2245 ctxt->src.val = old_eip;
2246 rc = em_push(ctxt);
2247 return rc;
2248 }
2249
em_cmpxchg8b(struct x86_emulate_ctxt * ctxt)2250 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2251 {
2252 u64 old = ctxt->dst.orig_val64;
2253
2254 if (ctxt->dst.bytes == 16)
2255 return X86EMUL_UNHANDLEABLE;
2256
2257 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2258 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2259 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2260 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2261 ctxt->eflags &= ~X86_EFLAGS_ZF;
2262 } else {
2263 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2264 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2265
2266 ctxt->eflags |= X86_EFLAGS_ZF;
2267 }
2268 return X86EMUL_CONTINUE;
2269 }
2270
em_ret(struct x86_emulate_ctxt * ctxt)2271 static int em_ret(struct x86_emulate_ctxt *ctxt)
2272 {
2273 int rc;
2274 unsigned long eip = 0;
2275
2276 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2277 if (rc != X86EMUL_CONTINUE)
2278 return rc;
2279
2280 return assign_eip_near(ctxt, eip);
2281 }
2282
em_ret_far(struct x86_emulate_ctxt * ctxt)2283 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2284 {
2285 int rc;
2286 unsigned long eip = 0;
2287 unsigned long cs = 0;
2288 int cpl = ctxt->ops->cpl(ctxt);
2289 struct desc_struct new_desc;
2290
2291 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2292 if (rc != X86EMUL_CONTINUE)
2293 return rc;
2294 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2295 if (rc != X86EMUL_CONTINUE)
2296 return rc;
2297 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2298 X86_TRANSFER_RET,
2299 &new_desc);
2300 if (rc != X86EMUL_CONTINUE)
2301 return rc;
2302 rc = assign_eip_far(ctxt, eip);
2303 /* Error handling is not implemented. */
2304 if (rc != X86EMUL_CONTINUE)
2305 return X86EMUL_UNHANDLEABLE;
2306
2307 return rc;
2308 }
2309
em_ret_far_imm(struct x86_emulate_ctxt * ctxt)2310 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2311 {
2312 int rc;
2313
2314 rc = em_ret_far(ctxt);
2315 if (rc != X86EMUL_CONTINUE)
2316 return rc;
2317 rsp_increment(ctxt, ctxt->src.val);
2318 return X86EMUL_CONTINUE;
2319 }
2320
em_cmpxchg(struct x86_emulate_ctxt * ctxt)2321 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2322 {
2323 /* Save real source value, then compare EAX against destination. */
2324 ctxt->dst.orig_val = ctxt->dst.val;
2325 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2326 ctxt->src.orig_val = ctxt->src.val;
2327 ctxt->src.val = ctxt->dst.orig_val;
2328 fastop(ctxt, em_cmp);
2329
2330 if (ctxt->eflags & X86_EFLAGS_ZF) {
2331 /* Success: write back to memory; no update of EAX */
2332 ctxt->src.type = OP_NONE;
2333 ctxt->dst.val = ctxt->src.orig_val;
2334 } else {
2335 /* Failure: write the value we saw to EAX. */
2336 ctxt->src.type = OP_REG;
2337 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2338 ctxt->src.val = ctxt->dst.orig_val;
2339 /* Create write-cycle to dest by writing the same value */
2340 ctxt->dst.val = ctxt->dst.orig_val;
2341 }
2342 return X86EMUL_CONTINUE;
2343 }
2344
em_lseg(struct x86_emulate_ctxt * ctxt)2345 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2346 {
2347 int seg = ctxt->src2.val;
2348 unsigned short sel;
2349 int rc;
2350
2351 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2352
2353 rc = load_segment_descriptor(ctxt, sel, seg);
2354 if (rc != X86EMUL_CONTINUE)
2355 return rc;
2356
2357 ctxt->dst.val = ctxt->src.val;
2358 return rc;
2359 }
2360
em_rsm(struct x86_emulate_ctxt * ctxt)2361 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2362 {
2363 if (!ctxt->ops->is_smm(ctxt))
2364 return emulate_ud(ctxt);
2365
2366 if (ctxt->ops->leave_smm(ctxt))
2367 ctxt->ops->triple_fault(ctxt);
2368
2369 return emulator_recalc_and_set_mode(ctxt);
2370 }
2371
2372 static void
setup_syscalls_segments(struct desc_struct * cs,struct desc_struct * ss)2373 setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2374 {
2375 cs->l = 0; /* will be adjusted later */
2376 set_desc_base(cs, 0); /* flat segment */
2377 cs->g = 1; /* 4kb granularity */
2378 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2379 cs->type = 0x0b; /* Read, Execute, Accessed */
2380 cs->s = 1;
2381 cs->dpl = 0; /* will be adjusted later */
2382 cs->p = 1;
2383 cs->d = 1;
2384 cs->avl = 0;
2385
2386 set_desc_base(ss, 0); /* flat segment */
2387 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2388 ss->g = 1; /* 4kb granularity */
2389 ss->s = 1;
2390 ss->type = 0x03; /* Read/Write, Accessed */
2391 ss->d = 1; /* 32bit stack segment */
2392 ss->dpl = 0;
2393 ss->p = 1;
2394 ss->l = 0;
2395 ss->avl = 0;
2396 }
2397
em_syscall(struct x86_emulate_ctxt * ctxt)2398 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2399 {
2400 const struct x86_emulate_ops *ops = ctxt->ops;
2401 struct desc_struct cs, ss;
2402 u64 msr_data;
2403 u16 cs_sel, ss_sel;
2404 u64 efer = 0;
2405
2406 /* syscall is not available in real mode */
2407 if (ctxt->mode == X86EMUL_MODE_REAL ||
2408 ctxt->mode == X86EMUL_MODE_VM86)
2409 return emulate_ud(ctxt);
2410
2411 /*
2412 * Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas
2413 * AMD allows SYSCALL in any flavor of protected mode. Note, it's
2414 * infeasible to emulate Intel behavior when running on AMD hardware,
2415 * as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD
2416 * for KVM to trap-and-emulate, unlike emulating AMD on Intel.
2417 */
2418 if (ctxt->mode != X86EMUL_MODE_PROT64 &&
2419 ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
2420 return emulate_ud(ctxt);
2421
2422 ops->get_msr(ctxt, MSR_EFER, &efer);
2423 if (!(efer & EFER_SCE))
2424 return emulate_ud(ctxt);
2425
2426 setup_syscalls_segments(&cs, &ss);
2427 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2428 msr_data >>= 32;
2429 cs_sel = (u16)(msr_data & 0xfffc);
2430 ss_sel = (u16)(msr_data + 8);
2431
2432 if (efer & EFER_LMA) {
2433 cs.d = 0;
2434 cs.l = 1;
2435 }
2436 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2437 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2438
2439 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2440 if (efer & EFER_LMA) {
2441 #ifdef CONFIG_X86_64
2442 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2443
2444 ops->get_msr(ctxt,
2445 ctxt->mode == X86EMUL_MODE_PROT64 ?
2446 MSR_LSTAR : MSR_CSTAR, &msr_data);
2447 ctxt->_eip = msr_data;
2448
2449 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2450 ctxt->eflags &= ~msr_data;
2451 ctxt->eflags |= X86_EFLAGS_FIXED;
2452 #endif
2453 } else {
2454 /* legacy mode */
2455 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2456 ctxt->_eip = (u32)msr_data;
2457
2458 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2459 }
2460
2461 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2462 return X86EMUL_CONTINUE;
2463 }
2464
em_sysenter(struct x86_emulate_ctxt * ctxt)2465 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2466 {
2467 const struct x86_emulate_ops *ops = ctxt->ops;
2468 struct desc_struct cs, ss;
2469 u64 msr_data;
2470 u16 cs_sel, ss_sel;
2471 u64 efer = 0;
2472
2473 ops->get_msr(ctxt, MSR_EFER, &efer);
2474 /* inject #GP if in real mode */
2475 if (ctxt->mode == X86EMUL_MODE_REAL)
2476 return emulate_gp(ctxt, 0);
2477
2478 /*
2479 * Intel's architecture allows SYSENTER in compatibility mode, but AMD
2480 * does not. Note, AMD does allow SYSENTER in legacy protected mode.
2481 */
2482 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) &&
2483 !ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
2484 return emulate_ud(ctxt);
2485
2486 /* sysenter/sysexit have not been tested in 64bit mode. */
2487 if (ctxt->mode == X86EMUL_MODE_PROT64)
2488 return X86EMUL_UNHANDLEABLE;
2489
2490 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2491 if ((msr_data & 0xfffc) == 0x0)
2492 return emulate_gp(ctxt, 0);
2493
2494 setup_syscalls_segments(&cs, &ss);
2495 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2496 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2497 ss_sel = cs_sel + 8;
2498 if (efer & EFER_LMA) {
2499 cs.d = 0;
2500 cs.l = 1;
2501 }
2502
2503 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2504 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2505
2506 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2507 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2508
2509 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2510 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2511 (u32)msr_data;
2512 if (efer & EFER_LMA)
2513 ctxt->mode = X86EMUL_MODE_PROT64;
2514
2515 return X86EMUL_CONTINUE;
2516 }
2517
em_sysexit(struct x86_emulate_ctxt * ctxt)2518 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2519 {
2520 const struct x86_emulate_ops *ops = ctxt->ops;
2521 struct desc_struct cs, ss;
2522 u64 msr_data, rcx, rdx;
2523 int usermode;
2524 u16 cs_sel = 0, ss_sel = 0;
2525
2526 /* inject #GP if in real mode or Virtual 8086 mode */
2527 if (ctxt->mode == X86EMUL_MODE_REAL ||
2528 ctxt->mode == X86EMUL_MODE_VM86)
2529 return emulate_gp(ctxt, 0);
2530
2531 setup_syscalls_segments(&cs, &ss);
2532
2533 if ((ctxt->rex_prefix & 0x8) != 0x0)
2534 usermode = X86EMUL_MODE_PROT64;
2535 else
2536 usermode = X86EMUL_MODE_PROT32;
2537
2538 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2539 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2540
2541 cs.dpl = 3;
2542 ss.dpl = 3;
2543 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2544 switch (usermode) {
2545 case X86EMUL_MODE_PROT32:
2546 cs_sel = (u16)(msr_data + 16);
2547 if ((msr_data & 0xfffc) == 0x0)
2548 return emulate_gp(ctxt, 0);
2549 ss_sel = (u16)(msr_data + 24);
2550 rcx = (u32)rcx;
2551 rdx = (u32)rdx;
2552 break;
2553 case X86EMUL_MODE_PROT64:
2554 cs_sel = (u16)(msr_data + 32);
2555 if (msr_data == 0x0)
2556 return emulate_gp(ctxt, 0);
2557 ss_sel = cs_sel + 8;
2558 cs.d = 0;
2559 cs.l = 1;
2560 if (emul_is_noncanonical_address(rcx, ctxt, 0) ||
2561 emul_is_noncanonical_address(rdx, ctxt, 0))
2562 return emulate_gp(ctxt, 0);
2563 break;
2564 }
2565 cs_sel |= SEGMENT_RPL_MASK;
2566 ss_sel |= SEGMENT_RPL_MASK;
2567
2568 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2569 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2570
2571 ctxt->_eip = rdx;
2572 ctxt->mode = usermode;
2573 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2574
2575 return X86EMUL_CONTINUE;
2576 }
2577
emulator_bad_iopl(struct x86_emulate_ctxt * ctxt)2578 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2579 {
2580 int iopl;
2581 if (ctxt->mode == X86EMUL_MODE_REAL)
2582 return false;
2583 if (ctxt->mode == X86EMUL_MODE_VM86)
2584 return true;
2585 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2586 return ctxt->ops->cpl(ctxt) > iopl;
2587 }
2588
2589 #define VMWARE_PORT_VMPORT (0x5658)
2590 #define VMWARE_PORT_VMRPC (0x5659)
2591
emulator_io_port_access_allowed(struct x86_emulate_ctxt * ctxt,u16 port,u16 len)2592 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2593 u16 port, u16 len)
2594 {
2595 const struct x86_emulate_ops *ops = ctxt->ops;
2596 struct desc_struct tr_seg;
2597 u32 base3;
2598 int r;
2599 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2600 unsigned mask = (1 << len) - 1;
2601 unsigned long base;
2602
2603 /*
2604 * VMware allows access to these ports even if denied
2605 * by TSS I/O permission bitmap. Mimic behavior.
2606 */
2607 if (enable_vmware_backdoor &&
2608 ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2609 return true;
2610
2611 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2612 if (!tr_seg.p)
2613 return false;
2614 if (desc_limit_scaled(&tr_seg) < 103)
2615 return false;
2616 base = get_desc_base(&tr_seg);
2617 #ifdef CONFIG_X86_64
2618 base |= ((u64)base3) << 32;
2619 #endif
2620 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2621 if (r != X86EMUL_CONTINUE)
2622 return false;
2623 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2624 return false;
2625 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2626 if (r != X86EMUL_CONTINUE)
2627 return false;
2628 if ((perm >> bit_idx) & mask)
2629 return false;
2630 return true;
2631 }
2632
emulator_io_permitted(struct x86_emulate_ctxt * ctxt,u16 port,u16 len)2633 static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt,
2634 u16 port, u16 len)
2635 {
2636 if (ctxt->perm_ok)
2637 return true;
2638
2639 if (emulator_bad_iopl(ctxt))
2640 if (!emulator_io_port_access_allowed(ctxt, port, len))
2641 return false;
2642
2643 ctxt->perm_ok = true;
2644
2645 return true;
2646 }
2647
string_registers_quirk(struct x86_emulate_ctxt * ctxt)2648 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2649 {
2650 /*
2651 * Intel CPUs mask the counter and pointers in quite strange
2652 * manner when ECX is zero due to REP-string optimizations.
2653 */
2654 #ifdef CONFIG_X86_64
2655 u32 eax, ebx, ecx, edx;
2656
2657 if (ctxt->ad_bytes != 4)
2658 return;
2659
2660 eax = ecx = 0;
2661 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2662 if (!is_guest_vendor_intel(ebx, ecx, edx))
2663 return;
2664
2665 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2666
2667 switch (ctxt->b) {
2668 case 0xa4: /* movsb */
2669 case 0xa5: /* movsd/w */
2670 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2671 fallthrough;
2672 case 0xaa: /* stosb */
2673 case 0xab: /* stosd/w */
2674 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2675 }
2676 #endif
2677 }
2678
save_state_to_tss16(struct x86_emulate_ctxt * ctxt,struct tss_segment_16 * tss)2679 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2680 struct tss_segment_16 *tss)
2681 {
2682 tss->ip = ctxt->_eip;
2683 tss->flag = ctxt->eflags;
2684 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2685 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2686 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2687 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2688 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2689 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2690 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2691 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2692
2693 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2694 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2695 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2696 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2697 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2698 }
2699
load_state_from_tss16(struct x86_emulate_ctxt * ctxt,struct tss_segment_16 * tss)2700 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2701 struct tss_segment_16 *tss)
2702 {
2703 int ret;
2704 u8 cpl;
2705
2706 ctxt->_eip = tss->ip;
2707 ctxt->eflags = tss->flag | 2;
2708 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2709 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2710 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2711 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2712 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2713 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2714 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2715 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2716
2717 /*
2718 * SDM says that segment selectors are loaded before segment
2719 * descriptors
2720 */
2721 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2722 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2723 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2724 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2725 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2726
2727 cpl = tss->cs & 3;
2728
2729 /*
2730 * Now load segment descriptors. If fault happens at this stage
2731 * it is handled in a context of new task
2732 */
2733 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2734 X86_TRANSFER_TASK_SWITCH, NULL);
2735 if (ret != X86EMUL_CONTINUE)
2736 return ret;
2737 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2738 X86_TRANSFER_TASK_SWITCH, NULL);
2739 if (ret != X86EMUL_CONTINUE)
2740 return ret;
2741 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2742 X86_TRANSFER_TASK_SWITCH, NULL);
2743 if (ret != X86EMUL_CONTINUE)
2744 return ret;
2745 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2746 X86_TRANSFER_TASK_SWITCH, NULL);
2747 if (ret != X86EMUL_CONTINUE)
2748 return ret;
2749 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2750 X86_TRANSFER_TASK_SWITCH, NULL);
2751 if (ret != X86EMUL_CONTINUE)
2752 return ret;
2753
2754 return X86EMUL_CONTINUE;
2755 }
2756
task_switch_16(struct x86_emulate_ctxt * ctxt,u16 old_tss_sel,ulong old_tss_base,struct desc_struct * new_desc)2757 static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2758 ulong old_tss_base, struct desc_struct *new_desc)
2759 {
2760 struct tss_segment_16 tss_seg;
2761 int ret;
2762 u32 new_tss_base = get_desc_base(new_desc);
2763
2764 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2765 if (ret != X86EMUL_CONTINUE)
2766 return ret;
2767
2768 save_state_to_tss16(ctxt, &tss_seg);
2769
2770 ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2771 if (ret != X86EMUL_CONTINUE)
2772 return ret;
2773
2774 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2775 if (ret != X86EMUL_CONTINUE)
2776 return ret;
2777
2778 if (old_tss_sel != 0xffff) {
2779 tss_seg.prev_task_link = old_tss_sel;
2780
2781 ret = linear_write_system(ctxt, new_tss_base,
2782 &tss_seg.prev_task_link,
2783 sizeof(tss_seg.prev_task_link));
2784 if (ret != X86EMUL_CONTINUE)
2785 return ret;
2786 }
2787
2788 return load_state_from_tss16(ctxt, &tss_seg);
2789 }
2790
save_state_to_tss32(struct x86_emulate_ctxt * ctxt,struct tss_segment_32 * tss)2791 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2792 struct tss_segment_32 *tss)
2793 {
2794 /* CR3 and ldt selector are not saved intentionally */
2795 tss->eip = ctxt->_eip;
2796 tss->eflags = ctxt->eflags;
2797 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2798 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2799 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2800 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2801 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2802 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2803 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2804 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2805
2806 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2807 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2808 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2809 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2810 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2811 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2812 }
2813
load_state_from_tss32(struct x86_emulate_ctxt * ctxt,struct tss_segment_32 * tss)2814 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2815 struct tss_segment_32 *tss)
2816 {
2817 int ret;
2818 u8 cpl;
2819
2820 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2821 return emulate_gp(ctxt, 0);
2822 ctxt->_eip = tss->eip;
2823 ctxt->eflags = tss->eflags | 2;
2824
2825 /* General purpose registers */
2826 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2827 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2828 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2829 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2830 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2831 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2832 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2833 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2834
2835 /*
2836 * SDM says that segment selectors are loaded before segment
2837 * descriptors. This is important because CPL checks will
2838 * use CS.RPL.
2839 */
2840 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2841 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2842 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2843 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2844 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2845 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2846 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2847
2848 /*
2849 * If we're switching between Protected Mode and VM86, we need to make
2850 * sure to update the mode before loading the segment descriptors so
2851 * that the selectors are interpreted correctly.
2852 */
2853 if (ctxt->eflags & X86_EFLAGS_VM) {
2854 ctxt->mode = X86EMUL_MODE_VM86;
2855 cpl = 3;
2856 } else {
2857 ctxt->mode = X86EMUL_MODE_PROT32;
2858 cpl = tss->cs & 3;
2859 }
2860
2861 /*
2862 * Now load segment descriptors. If fault happens at this stage
2863 * it is handled in a context of new task
2864 */
2865 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2866 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2867 if (ret != X86EMUL_CONTINUE)
2868 return ret;
2869 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2870 X86_TRANSFER_TASK_SWITCH, NULL);
2871 if (ret != X86EMUL_CONTINUE)
2872 return ret;
2873 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2874 X86_TRANSFER_TASK_SWITCH, NULL);
2875 if (ret != X86EMUL_CONTINUE)
2876 return ret;
2877 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2878 X86_TRANSFER_TASK_SWITCH, NULL);
2879 if (ret != X86EMUL_CONTINUE)
2880 return ret;
2881 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2882 X86_TRANSFER_TASK_SWITCH, NULL);
2883 if (ret != X86EMUL_CONTINUE)
2884 return ret;
2885 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2886 X86_TRANSFER_TASK_SWITCH, NULL);
2887 if (ret != X86EMUL_CONTINUE)
2888 return ret;
2889 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2890 X86_TRANSFER_TASK_SWITCH, NULL);
2891
2892 return ret;
2893 }
2894
task_switch_32(struct x86_emulate_ctxt * ctxt,u16 old_tss_sel,ulong old_tss_base,struct desc_struct * new_desc)2895 static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2896 ulong old_tss_base, struct desc_struct *new_desc)
2897 {
2898 struct tss_segment_32 tss_seg;
2899 int ret;
2900 u32 new_tss_base = get_desc_base(new_desc);
2901 u32 eip_offset = offsetof(struct tss_segment_32, eip);
2902 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2903
2904 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2905 if (ret != X86EMUL_CONTINUE)
2906 return ret;
2907
2908 save_state_to_tss32(ctxt, &tss_seg);
2909
2910 /* Only GP registers and segment selectors are saved */
2911 ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2912 ldt_sel_offset - eip_offset);
2913 if (ret != X86EMUL_CONTINUE)
2914 return ret;
2915
2916 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2917 if (ret != X86EMUL_CONTINUE)
2918 return ret;
2919
2920 if (old_tss_sel != 0xffff) {
2921 tss_seg.prev_task_link = old_tss_sel;
2922
2923 ret = linear_write_system(ctxt, new_tss_base,
2924 &tss_seg.prev_task_link,
2925 sizeof(tss_seg.prev_task_link));
2926 if (ret != X86EMUL_CONTINUE)
2927 return ret;
2928 }
2929
2930 return load_state_from_tss32(ctxt, &tss_seg);
2931 }
2932
emulator_do_task_switch(struct x86_emulate_ctxt * ctxt,u16 tss_selector,int idt_index,int reason,bool has_error_code,u32 error_code)2933 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2934 u16 tss_selector, int idt_index, int reason,
2935 bool has_error_code, u32 error_code)
2936 {
2937 const struct x86_emulate_ops *ops = ctxt->ops;
2938 struct desc_struct curr_tss_desc, next_tss_desc;
2939 int ret;
2940 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2941 ulong old_tss_base =
2942 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2943 u32 desc_limit;
2944 ulong desc_addr, dr7;
2945
2946 /* FIXME: old_tss_base == ~0 ? */
2947
2948 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2949 if (ret != X86EMUL_CONTINUE)
2950 return ret;
2951 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2952 if (ret != X86EMUL_CONTINUE)
2953 return ret;
2954
2955 /* FIXME: check that next_tss_desc is tss */
2956
2957 /*
2958 * Check privileges. The three cases are task switch caused by...
2959 *
2960 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2961 * 2. Exception/IRQ/iret: No check is performed
2962 * 3. jmp/call to TSS/task-gate: No check is performed since the
2963 * hardware checks it before exiting.
2964 */
2965 if (reason == TASK_SWITCH_GATE) {
2966 if (idt_index != -1) {
2967 /* Software interrupts */
2968 struct desc_struct task_gate_desc;
2969 int dpl;
2970
2971 ret = read_interrupt_descriptor(ctxt, idt_index,
2972 &task_gate_desc);
2973 if (ret != X86EMUL_CONTINUE)
2974 return ret;
2975
2976 dpl = task_gate_desc.dpl;
2977 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2978 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2979 }
2980 }
2981
2982 desc_limit = desc_limit_scaled(&next_tss_desc);
2983 if (!next_tss_desc.p ||
2984 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2985 desc_limit < 0x2b)) {
2986 return emulate_ts(ctxt, tss_selector & 0xfffc);
2987 }
2988
2989 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2990 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2991 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2992 }
2993
2994 if (reason == TASK_SWITCH_IRET)
2995 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2996
2997 /* set back link to prev task only if NT bit is set in eflags
2998 note that old_tss_sel is not used after this point */
2999 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3000 old_tss_sel = 0xffff;
3001
3002 if (next_tss_desc.type & 8)
3003 ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
3004 else
3005 ret = task_switch_16(ctxt, old_tss_sel,
3006 old_tss_base, &next_tss_desc);
3007 if (ret != X86EMUL_CONTINUE)
3008 return ret;
3009
3010 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3011 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3012
3013 if (reason != TASK_SWITCH_IRET) {
3014 next_tss_desc.type |= (1 << 1); /* set busy flag */
3015 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3016 }
3017
3018 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3019 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3020
3021 if (has_error_code) {
3022 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3023 ctxt->lock_prefix = 0;
3024 ctxt->src.val = (unsigned long) error_code;
3025 ret = em_push(ctxt);
3026 }
3027
3028 dr7 = ops->get_dr(ctxt, 7);
3029 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3030
3031 return ret;
3032 }
3033
emulator_task_switch(struct x86_emulate_ctxt * ctxt,u16 tss_selector,int idt_index,int reason,bool has_error_code,u32 error_code)3034 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3035 u16 tss_selector, int idt_index, int reason,
3036 bool has_error_code, u32 error_code)
3037 {
3038 int rc;
3039
3040 invalidate_registers(ctxt);
3041 ctxt->_eip = ctxt->eip;
3042 ctxt->dst.type = OP_NONE;
3043
3044 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3045 has_error_code, error_code);
3046
3047 if (rc == X86EMUL_CONTINUE) {
3048 ctxt->eip = ctxt->_eip;
3049 writeback_registers(ctxt);
3050 }
3051
3052 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3053 }
3054
string_addr_inc(struct x86_emulate_ctxt * ctxt,int reg,struct operand * op)3055 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3056 struct operand *op)
3057 {
3058 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3059
3060 register_address_increment(ctxt, reg, df * op->bytes);
3061 op->addr.mem.ea = register_address(ctxt, reg);
3062 }
3063
em_das(struct x86_emulate_ctxt * ctxt)3064 static int em_das(struct x86_emulate_ctxt *ctxt)
3065 {
3066 u8 al, old_al;
3067 bool af, cf, old_cf;
3068
3069 cf = ctxt->eflags & X86_EFLAGS_CF;
3070 al = ctxt->dst.val;
3071
3072 old_al = al;
3073 old_cf = cf;
3074 cf = false;
3075 af = ctxt->eflags & X86_EFLAGS_AF;
3076 if ((al & 0x0f) > 9 || af) {
3077 al -= 6;
3078 cf = old_cf | (al >= 250);
3079 af = true;
3080 } else {
3081 af = false;
3082 }
3083 if (old_al > 0x99 || old_cf) {
3084 al -= 0x60;
3085 cf = true;
3086 }
3087
3088 ctxt->dst.val = al;
3089 /* Set PF, ZF, SF */
3090 ctxt->src.type = OP_IMM;
3091 ctxt->src.val = 0;
3092 ctxt->src.bytes = 1;
3093 fastop(ctxt, em_or);
3094 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3095 if (cf)
3096 ctxt->eflags |= X86_EFLAGS_CF;
3097 if (af)
3098 ctxt->eflags |= X86_EFLAGS_AF;
3099 return X86EMUL_CONTINUE;
3100 }
3101
em_aam(struct x86_emulate_ctxt * ctxt)3102 static int em_aam(struct x86_emulate_ctxt *ctxt)
3103 {
3104 u8 al, ah;
3105
3106 if (ctxt->src.val == 0)
3107 return emulate_de(ctxt);
3108
3109 al = ctxt->dst.val & 0xff;
3110 ah = al / ctxt->src.val;
3111 al %= ctxt->src.val;
3112
3113 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3114
3115 /* Set PF, ZF, SF */
3116 ctxt->src.type = OP_IMM;
3117 ctxt->src.val = 0;
3118 ctxt->src.bytes = 1;
3119 fastop(ctxt, em_or);
3120
3121 return X86EMUL_CONTINUE;
3122 }
3123
em_aad(struct x86_emulate_ctxt * ctxt)3124 static int em_aad(struct x86_emulate_ctxt *ctxt)
3125 {
3126 u8 al = ctxt->dst.val & 0xff;
3127 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3128
3129 al = (al + (ah * ctxt->src.val)) & 0xff;
3130
3131 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3132
3133 /* Set PF, ZF, SF */
3134 ctxt->src.type = OP_IMM;
3135 ctxt->src.val = 0;
3136 ctxt->src.bytes = 1;
3137 fastop(ctxt, em_or);
3138
3139 return X86EMUL_CONTINUE;
3140 }
3141
em_call(struct x86_emulate_ctxt * ctxt)3142 static int em_call(struct x86_emulate_ctxt *ctxt)
3143 {
3144 int rc;
3145 long rel = ctxt->src.val;
3146
3147 ctxt->src.val = (unsigned long)ctxt->_eip;
3148 rc = jmp_rel(ctxt, rel);
3149 if (rc != X86EMUL_CONTINUE)
3150 return rc;
3151 return em_push(ctxt);
3152 }
3153
em_call_far(struct x86_emulate_ctxt * ctxt)3154 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3155 {
3156 u16 sel, old_cs;
3157 ulong old_eip;
3158 int rc;
3159 struct desc_struct old_desc, new_desc;
3160 const struct x86_emulate_ops *ops = ctxt->ops;
3161 int cpl = ctxt->ops->cpl(ctxt);
3162 enum x86emul_mode prev_mode = ctxt->mode;
3163
3164 old_eip = ctxt->_eip;
3165 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3166
3167 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3168 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3169 X86_TRANSFER_CALL_JMP, &new_desc);
3170 if (rc != X86EMUL_CONTINUE)
3171 return rc;
3172
3173 rc = assign_eip_far(ctxt, ctxt->src.val);
3174 if (rc != X86EMUL_CONTINUE)
3175 goto fail;
3176
3177 ctxt->src.val = old_cs;
3178 rc = em_push(ctxt);
3179 if (rc != X86EMUL_CONTINUE)
3180 goto fail;
3181
3182 ctxt->src.val = old_eip;
3183 rc = em_push(ctxt);
3184 /* If we failed, we tainted the memory, but the very least we should
3185 restore cs */
3186 if (rc != X86EMUL_CONTINUE) {
3187 pr_warn_once("faulting far call emulation tainted memory\n");
3188 goto fail;
3189 }
3190 return rc;
3191 fail:
3192 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3193 ctxt->mode = prev_mode;
3194 return rc;
3195
3196 }
3197
em_ret_near_imm(struct x86_emulate_ctxt * ctxt)3198 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3199 {
3200 int rc;
3201 unsigned long eip = 0;
3202
3203 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3204 if (rc != X86EMUL_CONTINUE)
3205 return rc;
3206 rc = assign_eip_near(ctxt, eip);
3207 if (rc != X86EMUL_CONTINUE)
3208 return rc;
3209 rsp_increment(ctxt, ctxt->src.val);
3210 return X86EMUL_CONTINUE;
3211 }
3212
em_xchg(struct x86_emulate_ctxt * ctxt)3213 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3214 {
3215 /* Write back the register source. */
3216 ctxt->src.val = ctxt->dst.val;
3217 write_register_operand(&ctxt->src);
3218
3219 /* Write back the memory destination with implicit LOCK prefix. */
3220 ctxt->dst.val = ctxt->src.orig_val;
3221 ctxt->lock_prefix = 1;
3222 return X86EMUL_CONTINUE;
3223 }
3224
em_imul_3op(struct x86_emulate_ctxt * ctxt)3225 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3226 {
3227 ctxt->dst.val = ctxt->src2.val;
3228 return fastop(ctxt, em_imul);
3229 }
3230
em_cwd(struct x86_emulate_ctxt * ctxt)3231 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3232 {
3233 ctxt->dst.type = OP_REG;
3234 ctxt->dst.bytes = ctxt->src.bytes;
3235 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3236 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3237
3238 return X86EMUL_CONTINUE;
3239 }
3240
em_rdpid(struct x86_emulate_ctxt * ctxt)3241 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3242 {
3243 u64 tsc_aux = 0;
3244
3245 if (!ctxt->ops->guest_has_rdpid(ctxt))
3246 return emulate_ud(ctxt);
3247
3248 ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3249 ctxt->dst.val = tsc_aux;
3250 return X86EMUL_CONTINUE;
3251 }
3252
em_rdtsc(struct x86_emulate_ctxt * ctxt)3253 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3254 {
3255 u64 tsc = 0;
3256
3257 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3258 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3259 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3260 return X86EMUL_CONTINUE;
3261 }
3262
em_rdpmc(struct x86_emulate_ctxt * ctxt)3263 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3264 {
3265 u64 pmc;
3266
3267 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3268 return emulate_gp(ctxt, 0);
3269 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3270 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3271 return X86EMUL_CONTINUE;
3272 }
3273
em_mov(struct x86_emulate_ctxt * ctxt)3274 static int em_mov(struct x86_emulate_ctxt *ctxt)
3275 {
3276 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3277 return X86EMUL_CONTINUE;
3278 }
3279
em_movbe(struct x86_emulate_ctxt * ctxt)3280 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3281 {
3282 u16 tmp;
3283
3284 if (!ctxt->ops->guest_has_movbe(ctxt))
3285 return emulate_ud(ctxt);
3286
3287 switch (ctxt->op_bytes) {
3288 case 2:
3289 /*
3290 * From MOVBE definition: "...When the operand size is 16 bits,
3291 * the upper word of the destination register remains unchanged
3292 * ..."
3293 *
3294 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3295 * rules so we have to do the operation almost per hand.
3296 */
3297 tmp = (u16)ctxt->src.val;
3298 ctxt->dst.val &= ~0xffffUL;
3299 ctxt->dst.val |= (unsigned long)swab16(tmp);
3300 break;
3301 case 4:
3302 ctxt->dst.val = swab32((u32)ctxt->src.val);
3303 break;
3304 case 8:
3305 ctxt->dst.val = swab64(ctxt->src.val);
3306 break;
3307 default:
3308 BUG();
3309 }
3310 return X86EMUL_CONTINUE;
3311 }
3312
em_cr_write(struct x86_emulate_ctxt * ctxt)3313 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3314 {
3315 int cr_num = ctxt->modrm_reg;
3316 int r;
3317
3318 if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3319 return emulate_gp(ctxt, 0);
3320
3321 /* Disable writeback. */
3322 ctxt->dst.type = OP_NONE;
3323
3324 if (cr_num == 0) {
3325 /*
3326 * CR0 write might have updated CR0.PE and/or CR0.PG
3327 * which can affect the cpu's execution mode.
3328 */
3329 r = emulator_recalc_and_set_mode(ctxt);
3330 if (r != X86EMUL_CONTINUE)
3331 return r;
3332 }
3333
3334 return X86EMUL_CONTINUE;
3335 }
3336
em_dr_write(struct x86_emulate_ctxt * ctxt)3337 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3338 {
3339 unsigned long val;
3340
3341 if (ctxt->mode == X86EMUL_MODE_PROT64)
3342 val = ctxt->src.val & ~0ULL;
3343 else
3344 val = ctxt->src.val & ~0U;
3345
3346 /* #UD condition is already handled. */
3347 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3348 return emulate_gp(ctxt, 0);
3349
3350 /* Disable writeback. */
3351 ctxt->dst.type = OP_NONE;
3352 return X86EMUL_CONTINUE;
3353 }
3354
em_wrmsr(struct x86_emulate_ctxt * ctxt)3355 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3356 {
3357 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3358 u64 msr_data;
3359 int r;
3360
3361 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3362 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3363 r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3364
3365 if (r == X86EMUL_PROPAGATE_FAULT)
3366 return emulate_gp(ctxt, 0);
3367
3368 return r;
3369 }
3370
em_rdmsr(struct x86_emulate_ctxt * ctxt)3371 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3372 {
3373 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3374 u64 msr_data;
3375 int r;
3376
3377 r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3378
3379 if (r == X86EMUL_PROPAGATE_FAULT)
3380 return emulate_gp(ctxt, 0);
3381
3382 if (r == X86EMUL_CONTINUE) {
3383 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3384 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3385 }
3386 return r;
3387 }
3388
em_store_sreg(struct x86_emulate_ctxt * ctxt,int segment)3389 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3390 {
3391 if (segment > VCPU_SREG_GS &&
3392 (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3393 ctxt->ops->cpl(ctxt) > 0)
3394 return emulate_gp(ctxt, 0);
3395
3396 ctxt->dst.val = get_segment_selector(ctxt, segment);
3397 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3398 ctxt->dst.bytes = 2;
3399 return X86EMUL_CONTINUE;
3400 }
3401
em_mov_rm_sreg(struct x86_emulate_ctxt * ctxt)3402 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3403 {
3404 if (ctxt->modrm_reg > VCPU_SREG_GS)
3405 return emulate_ud(ctxt);
3406
3407 return em_store_sreg(ctxt, ctxt->modrm_reg);
3408 }
3409
em_mov_sreg_rm(struct x86_emulate_ctxt * ctxt)3410 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3411 {
3412 u16 sel = ctxt->src.val;
3413
3414 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3415 return emulate_ud(ctxt);
3416
3417 if (ctxt->modrm_reg == VCPU_SREG_SS)
3418 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3419
3420 /* Disable writeback. */
3421 ctxt->dst.type = OP_NONE;
3422 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3423 }
3424
em_sldt(struct x86_emulate_ctxt * ctxt)3425 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3426 {
3427 return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3428 }
3429
em_lldt(struct x86_emulate_ctxt * ctxt)3430 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3431 {
3432 u16 sel = ctxt->src.val;
3433
3434 /* Disable writeback. */
3435 ctxt->dst.type = OP_NONE;
3436 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3437 }
3438
em_str(struct x86_emulate_ctxt * ctxt)3439 static int em_str(struct x86_emulate_ctxt *ctxt)
3440 {
3441 return em_store_sreg(ctxt, VCPU_SREG_TR);
3442 }
3443
em_ltr(struct x86_emulate_ctxt * ctxt)3444 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3445 {
3446 u16 sel = ctxt->src.val;
3447
3448 /* Disable writeback. */
3449 ctxt->dst.type = OP_NONE;
3450 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3451 }
3452
em_invlpg(struct x86_emulate_ctxt * ctxt)3453 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3454 {
3455 int rc;
3456 ulong linear;
3457 unsigned int max_size;
3458
3459 rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
3460 &linear, X86EMUL_F_INVLPG);
3461 if (rc == X86EMUL_CONTINUE)
3462 ctxt->ops->invlpg(ctxt, linear);
3463 /* Disable writeback. */
3464 ctxt->dst.type = OP_NONE;
3465 return X86EMUL_CONTINUE;
3466 }
3467
em_clts(struct x86_emulate_ctxt * ctxt)3468 static int em_clts(struct x86_emulate_ctxt *ctxt)
3469 {
3470 ulong cr0;
3471
3472 cr0 = ctxt->ops->get_cr(ctxt, 0);
3473 cr0 &= ~X86_CR0_TS;
3474 ctxt->ops->set_cr(ctxt, 0, cr0);
3475 return X86EMUL_CONTINUE;
3476 }
3477
em_hypercall(struct x86_emulate_ctxt * ctxt)3478 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3479 {
3480 int rc = ctxt->ops->fix_hypercall(ctxt);
3481
3482 if (rc != X86EMUL_CONTINUE)
3483 return rc;
3484
3485 /* Let the processor re-execute the fixed hypercall */
3486 ctxt->_eip = ctxt->eip;
3487 /* Disable writeback. */
3488 ctxt->dst.type = OP_NONE;
3489 return X86EMUL_CONTINUE;
3490 }
3491
emulate_store_desc_ptr(struct x86_emulate_ctxt * ctxt,void (* get)(struct x86_emulate_ctxt * ctxt,struct desc_ptr * ptr))3492 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3493 void (*get)(struct x86_emulate_ctxt *ctxt,
3494 struct desc_ptr *ptr))
3495 {
3496 struct desc_ptr desc_ptr;
3497
3498 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3499 ctxt->ops->cpl(ctxt) > 0)
3500 return emulate_gp(ctxt, 0);
3501
3502 if (ctxt->mode == X86EMUL_MODE_PROT64)
3503 ctxt->op_bytes = 8;
3504 get(ctxt, &desc_ptr);
3505 if (ctxt->op_bytes == 2) {
3506 ctxt->op_bytes = 4;
3507 desc_ptr.address &= 0x00ffffff;
3508 }
3509 /* Disable writeback. */
3510 ctxt->dst.type = OP_NONE;
3511 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3512 &desc_ptr, 2 + ctxt->op_bytes);
3513 }
3514
em_sgdt(struct x86_emulate_ctxt * ctxt)3515 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3516 {
3517 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3518 }
3519
em_sidt(struct x86_emulate_ctxt * ctxt)3520 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3521 {
3522 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3523 }
3524
em_lgdt_lidt(struct x86_emulate_ctxt * ctxt,bool lgdt)3525 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3526 {
3527 struct desc_ptr desc_ptr;
3528 int rc;
3529
3530 if (ctxt->mode == X86EMUL_MODE_PROT64)
3531 ctxt->op_bytes = 8;
3532 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3533 &desc_ptr.size, &desc_ptr.address,
3534 ctxt->op_bytes);
3535 if (rc != X86EMUL_CONTINUE)
3536 return rc;
3537 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3538 emul_is_noncanonical_address(desc_ptr.address, ctxt,
3539 X86EMUL_F_DT_LOAD))
3540 return emulate_gp(ctxt, 0);
3541 if (lgdt)
3542 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3543 else
3544 ctxt->ops->set_idt(ctxt, &desc_ptr);
3545 /* Disable writeback. */
3546 ctxt->dst.type = OP_NONE;
3547 return X86EMUL_CONTINUE;
3548 }
3549
em_lgdt(struct x86_emulate_ctxt * ctxt)3550 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3551 {
3552 return em_lgdt_lidt(ctxt, true);
3553 }
3554
em_lidt(struct x86_emulate_ctxt * ctxt)3555 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3556 {
3557 return em_lgdt_lidt(ctxt, false);
3558 }
3559
em_smsw(struct x86_emulate_ctxt * ctxt)3560 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3561 {
3562 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3563 ctxt->ops->cpl(ctxt) > 0)
3564 return emulate_gp(ctxt, 0);
3565
3566 if (ctxt->dst.type == OP_MEM)
3567 ctxt->dst.bytes = 2;
3568 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3569 return X86EMUL_CONTINUE;
3570 }
3571
em_lmsw(struct x86_emulate_ctxt * ctxt)3572 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3573 {
3574 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3575 | (ctxt->src.val & 0x0f));
3576 ctxt->dst.type = OP_NONE;
3577 return X86EMUL_CONTINUE;
3578 }
3579
em_loop(struct x86_emulate_ctxt * ctxt)3580 static int em_loop(struct x86_emulate_ctxt *ctxt)
3581 {
3582 int rc = X86EMUL_CONTINUE;
3583
3584 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3585 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3586 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3587 rc = jmp_rel(ctxt, ctxt->src.val);
3588
3589 return rc;
3590 }
3591
em_jcxz(struct x86_emulate_ctxt * ctxt)3592 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3593 {
3594 int rc = X86EMUL_CONTINUE;
3595
3596 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3597 rc = jmp_rel(ctxt, ctxt->src.val);
3598
3599 return rc;
3600 }
3601
em_in(struct x86_emulate_ctxt * ctxt)3602 static int em_in(struct x86_emulate_ctxt *ctxt)
3603 {
3604 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3605 &ctxt->dst.val))
3606 return X86EMUL_IO_NEEDED;
3607
3608 return X86EMUL_CONTINUE;
3609 }
3610
em_out(struct x86_emulate_ctxt * ctxt)3611 static int em_out(struct x86_emulate_ctxt *ctxt)
3612 {
3613 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3614 &ctxt->src.val, 1);
3615 /* Disable writeback. */
3616 ctxt->dst.type = OP_NONE;
3617 return X86EMUL_CONTINUE;
3618 }
3619
em_cli(struct x86_emulate_ctxt * ctxt)3620 static int em_cli(struct x86_emulate_ctxt *ctxt)
3621 {
3622 if (emulator_bad_iopl(ctxt))
3623 return emulate_gp(ctxt, 0);
3624
3625 ctxt->eflags &= ~X86_EFLAGS_IF;
3626 return X86EMUL_CONTINUE;
3627 }
3628
em_sti(struct x86_emulate_ctxt * ctxt)3629 static int em_sti(struct x86_emulate_ctxt *ctxt)
3630 {
3631 if (emulator_bad_iopl(ctxt))
3632 return emulate_gp(ctxt, 0);
3633
3634 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3635 ctxt->eflags |= X86_EFLAGS_IF;
3636 return X86EMUL_CONTINUE;
3637 }
3638
em_cpuid(struct x86_emulate_ctxt * ctxt)3639 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3640 {
3641 u32 eax, ebx, ecx, edx;
3642 u64 msr = 0;
3643
3644 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3645 if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3646 ctxt->ops->cpl(ctxt)) {
3647 return emulate_gp(ctxt, 0);
3648 }
3649
3650 eax = reg_read(ctxt, VCPU_REGS_RAX);
3651 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3652 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3653 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3654 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3655 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3656 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3657 return X86EMUL_CONTINUE;
3658 }
3659
em_sahf(struct x86_emulate_ctxt * ctxt)3660 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3661 {
3662 u32 flags;
3663
3664 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3665 X86_EFLAGS_SF;
3666 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3667
3668 ctxt->eflags &= ~0xffUL;
3669 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3670 return X86EMUL_CONTINUE;
3671 }
3672
em_lahf(struct x86_emulate_ctxt * ctxt)3673 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3674 {
3675 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3676 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3677 return X86EMUL_CONTINUE;
3678 }
3679
em_bswap(struct x86_emulate_ctxt * ctxt)3680 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3681 {
3682 switch (ctxt->op_bytes) {
3683 #ifdef CONFIG_X86_64
3684 case 8:
3685 asm("bswap %0" : "+r"(ctxt->dst.val));
3686 break;
3687 #endif
3688 default:
3689 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3690 break;
3691 }
3692 return X86EMUL_CONTINUE;
3693 }
3694
em_clflush(struct x86_emulate_ctxt * ctxt)3695 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3696 {
3697 /* emulating clflush regardless of cpuid */
3698 return X86EMUL_CONTINUE;
3699 }
3700
em_clflushopt(struct x86_emulate_ctxt * ctxt)3701 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3702 {
3703 /* emulating clflushopt regardless of cpuid */
3704 return X86EMUL_CONTINUE;
3705 }
3706
em_movsxd(struct x86_emulate_ctxt * ctxt)3707 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3708 {
3709 ctxt->dst.val = (s32) ctxt->src.val;
3710 return X86EMUL_CONTINUE;
3711 }
3712
check_fxsr(struct x86_emulate_ctxt * ctxt)3713 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3714 {
3715 if (!ctxt->ops->guest_has_fxsr(ctxt))
3716 return emulate_ud(ctxt);
3717
3718 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3719 return emulate_nm(ctxt);
3720
3721 /*
3722 * Don't emulate a case that should never be hit, instead of working
3723 * around a lack of fxsave64/fxrstor64 on old compilers.
3724 */
3725 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3726 return X86EMUL_UNHANDLEABLE;
3727
3728 return X86EMUL_CONTINUE;
3729 }
3730
3731 /*
3732 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3733 * and restore MXCSR.
3734 */
__fxstate_size(int nregs)3735 static size_t __fxstate_size(int nregs)
3736 {
3737 return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3738 }
3739
fxstate_size(struct x86_emulate_ctxt * ctxt)3740 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3741 {
3742 bool cr4_osfxsr;
3743 if (ctxt->mode == X86EMUL_MODE_PROT64)
3744 return __fxstate_size(16);
3745
3746 cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
3747 return __fxstate_size(cr4_osfxsr ? 8 : 0);
3748 }
3749
3750 /*
3751 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3752 * 1) 16 bit mode
3753 * 2) 32 bit mode
3754 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3755 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3756 * save and restore
3757 * 3) 64-bit mode with REX.W prefix
3758 * - like (2), but XMM 8-15 are being saved and restored
3759 * 4) 64-bit mode without REX.W prefix
3760 * - like (3), but FIP and FDP are 64 bit
3761 *
3762 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3763 * desired result. (4) is not emulated.
3764 *
3765 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3766 * and FPU DS) should match.
3767 */
em_fxsave(struct x86_emulate_ctxt * ctxt)3768 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3769 {
3770 struct fxregs_state fx_state;
3771 int rc;
3772
3773 rc = check_fxsr(ctxt);
3774 if (rc != X86EMUL_CONTINUE)
3775 return rc;
3776
3777 kvm_fpu_get();
3778
3779 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3780
3781 kvm_fpu_put();
3782
3783 if (rc != X86EMUL_CONTINUE)
3784 return rc;
3785
3786 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
3787 fxstate_size(ctxt));
3788 }
3789
3790 /*
3791 * FXRSTOR might restore XMM registers not provided by the guest. Fill
3792 * in the host registers (via FXSAVE) instead, so they won't be modified.
3793 * (preemption has to stay disabled until FXRSTOR).
3794 *
3795 * Use noinline to keep the stack for other functions called by callers small.
3796 */
fxregs_fixup(struct fxregs_state * fx_state,const size_t used_size)3797 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
3798 const size_t used_size)
3799 {
3800 struct fxregs_state fx_tmp;
3801 int rc;
3802
3803 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
3804 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
3805 __fxstate_size(16) - used_size);
3806
3807 return rc;
3808 }
3809
em_fxrstor(struct x86_emulate_ctxt * ctxt)3810 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3811 {
3812 struct fxregs_state fx_state;
3813 int rc;
3814 size_t size;
3815
3816 rc = check_fxsr(ctxt);
3817 if (rc != X86EMUL_CONTINUE)
3818 return rc;
3819
3820 size = fxstate_size(ctxt);
3821 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3822 if (rc != X86EMUL_CONTINUE)
3823 return rc;
3824
3825 kvm_fpu_get();
3826
3827 if (size < __fxstate_size(16)) {
3828 rc = fxregs_fixup(&fx_state, size);
3829 if (rc != X86EMUL_CONTINUE)
3830 goto out;
3831 }
3832
3833 if (fx_state.mxcsr >> 16) {
3834 rc = emulate_gp(ctxt, 0);
3835 goto out;
3836 }
3837
3838 if (rc == X86EMUL_CONTINUE)
3839 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3840
3841 out:
3842 kvm_fpu_put();
3843
3844 return rc;
3845 }
3846
em_xsetbv(struct x86_emulate_ctxt * ctxt)3847 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
3848 {
3849 u32 eax, ecx, edx;
3850
3851 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
3852 return emulate_ud(ctxt);
3853
3854 eax = reg_read(ctxt, VCPU_REGS_RAX);
3855 edx = reg_read(ctxt, VCPU_REGS_RDX);
3856 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3857
3858 if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
3859 return emulate_gp(ctxt, 0);
3860
3861 return X86EMUL_CONTINUE;
3862 }
3863
valid_cr(int nr)3864 static bool valid_cr(int nr)
3865 {
3866 switch (nr) {
3867 case 0:
3868 case 2 ... 4:
3869 case 8:
3870 return true;
3871 default:
3872 return false;
3873 }
3874 }
3875
check_cr_access(struct x86_emulate_ctxt * ctxt)3876 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
3877 {
3878 if (!valid_cr(ctxt->modrm_reg))
3879 return emulate_ud(ctxt);
3880
3881 return X86EMUL_CONTINUE;
3882 }
3883
check_dr_read(struct x86_emulate_ctxt * ctxt)3884 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3885 {
3886 int dr = ctxt->modrm_reg;
3887 u64 cr4;
3888
3889 if (dr > 7)
3890 return emulate_ud(ctxt);
3891
3892 cr4 = ctxt->ops->get_cr(ctxt, 4);
3893 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3894 return emulate_ud(ctxt);
3895
3896 if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) {
3897 ulong dr6;
3898
3899 dr6 = ctxt->ops->get_dr(ctxt, 6);
3900 dr6 &= ~DR_TRAP_BITS;
3901 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
3902 ctxt->ops->set_dr(ctxt, 6, dr6);
3903 return emulate_db(ctxt);
3904 }
3905
3906 return X86EMUL_CONTINUE;
3907 }
3908
check_dr_write(struct x86_emulate_ctxt * ctxt)3909 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3910 {
3911 u64 new_val = ctxt->src.val64;
3912 int dr = ctxt->modrm_reg;
3913
3914 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3915 return emulate_gp(ctxt, 0);
3916
3917 return check_dr_read(ctxt);
3918 }
3919
check_svme(struct x86_emulate_ctxt * ctxt)3920 static int check_svme(struct x86_emulate_ctxt *ctxt)
3921 {
3922 u64 efer = 0;
3923
3924 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3925
3926 if (!(efer & EFER_SVME))
3927 return emulate_ud(ctxt);
3928
3929 return X86EMUL_CONTINUE;
3930 }
3931
check_svme_pa(struct x86_emulate_ctxt * ctxt)3932 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3933 {
3934 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3935
3936 /* Valid physical address? */
3937 if (rax & 0xffff000000000000ULL)
3938 return emulate_gp(ctxt, 0);
3939
3940 return check_svme(ctxt);
3941 }
3942
check_rdtsc(struct x86_emulate_ctxt * ctxt)3943 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3944 {
3945 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3946
3947 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3948 return emulate_gp(ctxt, 0);
3949
3950 return X86EMUL_CONTINUE;
3951 }
3952
check_rdpmc(struct x86_emulate_ctxt * ctxt)3953 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3954 {
3955 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3956 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3957
3958 /*
3959 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
3960 * in Ring3 when CR4.PCE=0.
3961 */
3962 if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
3963 return X86EMUL_CONTINUE;
3964
3965 /*
3966 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
3967 * check however is unnecessary because CPL is always 0 outside
3968 * protected mode.
3969 */
3970 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3971 ctxt->ops->check_rdpmc_early(ctxt, rcx))
3972 return emulate_gp(ctxt, 0);
3973
3974 return X86EMUL_CONTINUE;
3975 }
3976
check_perm_in(struct x86_emulate_ctxt * ctxt)3977 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3978 {
3979 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3980 if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes))
3981 return emulate_gp(ctxt, 0);
3982
3983 return X86EMUL_CONTINUE;
3984 }
3985
check_perm_out(struct x86_emulate_ctxt * ctxt)3986 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3987 {
3988 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3989 if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes))
3990 return emulate_gp(ctxt, 0);
3991
3992 return X86EMUL_CONTINUE;
3993 }
3994
3995 #define D(_y) { .flags = (_y) }
3996 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3997 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3998 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3999 #define N D(NotImpl)
4000 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4001 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4002 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4003 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4004 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4005 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4006 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4007 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4008 #define II(_f, _e, _i) \
4009 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4010 #define IIP(_f, _e, _i, _p) \
4011 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4012 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4013 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4014
4015 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4016 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4017 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4018 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4019 #define I2bvIP(_f, _e, _i, _p) \
4020 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4021
4022 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4023 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4024 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4025
4026 static const struct opcode group7_rm0[] = {
4027 N,
4028 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4029 N, N, N, N, N, N,
4030 };
4031
4032 static const struct opcode group7_rm1[] = {
4033 DI(SrcNone | Priv, monitor),
4034 DI(SrcNone | Priv, mwait),
4035 N, N, N, N, N, N,
4036 };
4037
4038 static const struct opcode group7_rm2[] = {
4039 N,
4040 II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4041 N, N, N, N, N, N,
4042 };
4043
4044 static const struct opcode group7_rm3[] = {
4045 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4046 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4047 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4048 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4049 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4050 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4051 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4052 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4053 };
4054
4055 static const struct opcode group7_rm7[] = {
4056 N,
4057 DIP(SrcNone, rdtscp, check_rdtsc),
4058 N, N, N, N, N, N,
4059 };
4060
4061 static const struct opcode group1[] = {
4062 F(Lock, em_add),
4063 F(Lock | PageTable, em_or),
4064 F(Lock, em_adc),
4065 F(Lock, em_sbb),
4066 F(Lock | PageTable, em_and),
4067 F(Lock, em_sub),
4068 F(Lock, em_xor),
4069 F(NoWrite, em_cmp),
4070 };
4071
4072 static const struct opcode group1A[] = {
4073 I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4074 };
4075
4076 static const struct opcode group2[] = {
4077 F(DstMem | ModRM, em_rol),
4078 F(DstMem | ModRM, em_ror),
4079 F(DstMem | ModRM, em_rcl),
4080 F(DstMem | ModRM, em_rcr),
4081 F(DstMem | ModRM, em_shl),
4082 F(DstMem | ModRM, em_shr),
4083 F(DstMem | ModRM, em_shl),
4084 F(DstMem | ModRM, em_sar),
4085 };
4086
4087 static const struct opcode group3[] = {
4088 F(DstMem | SrcImm | NoWrite, em_test),
4089 F(DstMem | SrcImm | NoWrite, em_test),
4090 F(DstMem | SrcNone | Lock, em_not),
4091 F(DstMem | SrcNone | Lock, em_neg),
4092 F(DstXacc | Src2Mem, em_mul_ex),
4093 F(DstXacc | Src2Mem, em_imul_ex),
4094 F(DstXacc | Src2Mem, em_div_ex),
4095 F(DstXacc | Src2Mem, em_idiv_ex),
4096 };
4097
4098 static const struct opcode group4[] = {
4099 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4100 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4101 N, N, N, N, N, N,
4102 };
4103
4104 static const struct opcode group5[] = {
4105 F(DstMem | SrcNone | Lock, em_inc),
4106 F(DstMem | SrcNone | Lock, em_dec),
4107 I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs),
4108 I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far),
4109 I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
4110 I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4111 I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4112 };
4113
4114 static const struct opcode group6[] = {
4115 II(Prot | DstMem, em_sldt, sldt),
4116 II(Prot | DstMem, em_str, str),
4117 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4118 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4119 N, N, N, N,
4120 };
4121
4122 static const struct group_dual group7 = { {
4123 II(Mov | DstMem, em_sgdt, sgdt),
4124 II(Mov | DstMem, em_sidt, sidt),
4125 II(SrcMem | Priv, em_lgdt, lgdt),
4126 II(SrcMem | Priv, em_lidt, lidt),
4127 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4128 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4129 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4130 }, {
4131 EXT(0, group7_rm0),
4132 EXT(0, group7_rm1),
4133 EXT(0, group7_rm2),
4134 EXT(0, group7_rm3),
4135 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4136 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4137 EXT(0, group7_rm7),
4138 } };
4139
4140 static const struct opcode group8[] = {
4141 N, N, N, N,
4142 F(DstMem | SrcImmByte | NoWrite, em_bt),
4143 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4144 F(DstMem | SrcImmByte | Lock, em_btr),
4145 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4146 };
4147
4148 /*
4149 * The "memory" destination is actually always a register, since we come
4150 * from the register case of group9.
4151 */
4152 static const struct gprefix pfx_0f_c7_7 = {
4153 N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4154 };
4155
4156
4157 static const struct group_dual group9 = { {
4158 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4159 }, {
4160 N, N, N, N, N, N, N,
4161 GP(0, &pfx_0f_c7_7),
4162 } };
4163
4164 static const struct opcode group11[] = {
4165 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4166 X7(D(Undefined)),
4167 };
4168
4169 static const struct gprefix pfx_0f_ae_7 = {
4170 I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4171 };
4172
4173 static const struct group_dual group15 = { {
4174 I(ModRM | Aligned16, em_fxsave),
4175 I(ModRM | Aligned16, em_fxrstor),
4176 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4177 }, {
4178 N, N, N, N, N, N, N, N,
4179 } };
4180
4181 static const struct gprefix pfx_0f_6f_0f_7f = {
4182 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4183 };
4184
4185 static const struct instr_dual instr_dual_0f_2b = {
4186 I(0, em_mov), N
4187 };
4188
4189 static const struct gprefix pfx_0f_2b = {
4190 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4191 };
4192
4193 static const struct gprefix pfx_0f_10_0f_11 = {
4194 I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4195 };
4196
4197 static const struct gprefix pfx_0f_28_0f_29 = {
4198 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4199 };
4200
4201 static const struct gprefix pfx_0f_e7 = {
4202 N, I(Sse, em_mov), N, N,
4203 };
4204
4205 static const struct escape escape_d9 = { {
4206 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4207 }, {
4208 /* 0xC0 - 0xC7 */
4209 N, N, N, N, N, N, N, N,
4210 /* 0xC8 - 0xCF */
4211 N, N, N, N, N, N, N, N,
4212 /* 0xD0 - 0xC7 */
4213 N, N, N, N, N, N, N, N,
4214 /* 0xD8 - 0xDF */
4215 N, N, N, N, N, N, N, N,
4216 /* 0xE0 - 0xE7 */
4217 N, N, N, N, N, N, N, N,
4218 /* 0xE8 - 0xEF */
4219 N, N, N, N, N, N, N, N,
4220 /* 0xF0 - 0xF7 */
4221 N, N, N, N, N, N, N, N,
4222 /* 0xF8 - 0xFF */
4223 N, N, N, N, N, N, N, N,
4224 } };
4225
4226 static const struct escape escape_db = { {
4227 N, N, N, N, N, N, N, N,
4228 }, {
4229 /* 0xC0 - 0xC7 */
4230 N, N, N, N, N, N, N, N,
4231 /* 0xC8 - 0xCF */
4232 N, N, N, N, N, N, N, N,
4233 /* 0xD0 - 0xC7 */
4234 N, N, N, N, N, N, N, N,
4235 /* 0xD8 - 0xDF */
4236 N, N, N, N, N, N, N, N,
4237 /* 0xE0 - 0xE7 */
4238 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4239 /* 0xE8 - 0xEF */
4240 N, N, N, N, N, N, N, N,
4241 /* 0xF0 - 0xF7 */
4242 N, N, N, N, N, N, N, N,
4243 /* 0xF8 - 0xFF */
4244 N, N, N, N, N, N, N, N,
4245 } };
4246
4247 static const struct escape escape_dd = { {
4248 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4249 }, {
4250 /* 0xC0 - 0xC7 */
4251 N, N, N, N, N, N, N, N,
4252 /* 0xC8 - 0xCF */
4253 N, N, N, N, N, N, N, N,
4254 /* 0xD0 - 0xC7 */
4255 N, N, N, N, N, N, N, N,
4256 /* 0xD8 - 0xDF */
4257 N, N, N, N, N, N, N, N,
4258 /* 0xE0 - 0xE7 */
4259 N, N, N, N, N, N, N, N,
4260 /* 0xE8 - 0xEF */
4261 N, N, N, N, N, N, N, N,
4262 /* 0xF0 - 0xF7 */
4263 N, N, N, N, N, N, N, N,
4264 /* 0xF8 - 0xFF */
4265 N, N, N, N, N, N, N, N,
4266 } };
4267
4268 static const struct instr_dual instr_dual_0f_c3 = {
4269 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4270 };
4271
4272 static const struct mode_dual mode_dual_63 = {
4273 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4274 };
4275
4276 static const struct instr_dual instr_dual_8d = {
4277 D(DstReg | SrcMem | ModRM | NoAccess), N
4278 };
4279
4280 static const struct opcode opcode_table[256] = {
4281 /* 0x00 - 0x07 */
4282 F6ALU(Lock, em_add),
4283 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4284 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4285 /* 0x08 - 0x0F */
4286 F6ALU(Lock | PageTable, em_or),
4287 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4288 N,
4289 /* 0x10 - 0x17 */
4290 F6ALU(Lock, em_adc),
4291 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4292 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4293 /* 0x18 - 0x1F */
4294 F6ALU(Lock, em_sbb),
4295 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4296 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4297 /* 0x20 - 0x27 */
4298 F6ALU(Lock | PageTable, em_and), N, N,
4299 /* 0x28 - 0x2F */
4300 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4301 /* 0x30 - 0x37 */
4302 F6ALU(Lock, em_xor), N, N,
4303 /* 0x38 - 0x3F */
4304 F6ALU(NoWrite, em_cmp), N, N,
4305 /* 0x40 - 0x4F */
4306 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4307 /* 0x50 - 0x57 */
4308 X8(I(SrcReg | Stack, em_push)),
4309 /* 0x58 - 0x5F */
4310 X8(I(DstReg | Stack, em_pop)),
4311 /* 0x60 - 0x67 */
4312 I(ImplicitOps | Stack | No64, em_pusha),
4313 I(ImplicitOps | Stack | No64, em_popa),
4314 N, MD(ModRM, &mode_dual_63),
4315 N, N, N, N,
4316 /* 0x68 - 0x6F */
4317 I(SrcImm | Mov | Stack, em_push),
4318 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4319 I(SrcImmByte | Mov | Stack, em_push),
4320 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4321 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4322 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4323 /* 0x70 - 0x7F */
4324 X16(D(SrcImmByte | NearBranch | IsBranch)),
4325 /* 0x80 - 0x87 */
4326 G(ByteOp | DstMem | SrcImm, group1),
4327 G(DstMem | SrcImm, group1),
4328 G(ByteOp | DstMem | SrcImm | No64, group1),
4329 G(DstMem | SrcImmByte, group1),
4330 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4331 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4332 /* 0x88 - 0x8F */
4333 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4334 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4335 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4336 ID(0, &instr_dual_8d),
4337 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4338 G(0, group1A),
4339 /* 0x90 - 0x97 */
4340 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4341 /* 0x98 - 0x9F */
4342 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4343 I(SrcImmFAddr | No64 | IsBranch | ShadowStack, em_call_far), N,
4344 II(ImplicitOps | Stack, em_pushf, pushf),
4345 II(ImplicitOps | Stack, em_popf, popf),
4346 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4347 /* 0xA0 - 0xA7 */
4348 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4349 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4350 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4351 F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4352 /* 0xA8 - 0xAF */
4353 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4354 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4355 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4356 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4357 /* 0xB0 - 0xB7 */
4358 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4359 /* 0xB8 - 0xBF */
4360 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4361 /* 0xC0 - 0xC7 */
4362 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4363 I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm),
4364 I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret),
4365 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4366 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4367 G(ByteOp, group11), G(0, group11),
4368 /* 0xC8 - 0xCF */
4369 I(Stack | SrcImmU16 | Src2ImmByte, em_enter),
4370 I(Stack, em_leave),
4371 I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
4372 I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
4373 D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
4374 D(ImplicitOps | No64 | IsBranch),
4375 II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
4376 /* 0xD0 - 0xD7 */
4377 G(Src2One | ByteOp, group2), G(Src2One, group2),
4378 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4379 I(DstAcc | SrcImmUByte | No64, em_aam),
4380 I(DstAcc | SrcImmUByte | No64, em_aad),
4381 F(DstAcc | ByteOp | No64, em_salc),
4382 I(DstAcc | SrcXLat | ByteOp, em_mov),
4383 /* 0xD8 - 0xDF */
4384 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4385 /* 0xE0 - 0xE7 */
4386 X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4387 I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4388 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4389 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4390 /* 0xE8 - 0xEF */
4391 I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
4392 D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4393 I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4394 D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4395 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4396 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4397 /* 0xF0 - 0xF7 */
4398 N, DI(ImplicitOps, icebp), N, N,
4399 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4400 G(ByteOp, group3), G(0, group3),
4401 /* 0xF8 - 0xFF */
4402 D(ImplicitOps), D(ImplicitOps),
4403 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4404 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4405 };
4406
4407 static const struct opcode twobyte_table[256] = {
4408 /* 0x00 - 0x0F */
4409 G(0, group6), GD(0, &group7), N, N,
4410 N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_syscall),
4411 II(ImplicitOps | Priv, em_clts, clts), N,
4412 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4413 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4414 /* 0x10 - 0x1F */
4415 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4416 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4417 N, N, N, N, N, N,
4418 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4419 D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4420 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4421 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4422 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4423 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4424 /* 0x20 - 0x2F */
4425 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4426 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4427 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4428 check_cr_access),
4429 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4430 check_dr_write),
4431 N, N, N, N,
4432 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4433 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4434 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4435 N, N, N, N,
4436 /* 0x30 - 0x3F */
4437 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4438 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4439 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4440 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4441 I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter),
4442 I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
4443 N, N,
4444 N, N, N, N, N, N, N, N,
4445 /* 0x40 - 0x4F */
4446 X16(D(DstReg | SrcMem | ModRM)),
4447 /* 0x50 - 0x5F */
4448 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4449 /* 0x60 - 0x6F */
4450 N, N, N, N,
4451 N, N, N, N,
4452 N, N, N, N,
4453 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4454 /* 0x70 - 0x7F */
4455 N, N, N, N,
4456 N, N, N, N,
4457 N, N, N, N,
4458 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4459 /* 0x80 - 0x8F */
4460 X16(D(SrcImm | NearBranch | IsBranch)),
4461 /* 0x90 - 0x9F */
4462 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4463 /* 0xA0 - 0xA7 */
4464 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4465 II(ImplicitOps, em_cpuid, cpuid),
4466 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4467 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4468 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4469 /* 0xA8 - 0xAF */
4470 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4471 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4472 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4473 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4474 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4475 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4476 /* 0xB0 - 0xB7 */
4477 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4478 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4479 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4480 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4481 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4482 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4483 /* 0xB8 - 0xBF */
4484 N, N,
4485 G(BitOp, group8),
4486 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4487 I(DstReg | SrcMem | ModRM, em_bsf_c),
4488 I(DstReg | SrcMem | ModRM, em_bsr_c),
4489 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4490 /* 0xC0 - 0xC7 */
4491 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4492 N, ID(0, &instr_dual_0f_c3),
4493 N, N, N, GD(0, &group9),
4494 /* 0xC8 - 0xCF */
4495 X8(I(DstReg, em_bswap)),
4496 /* 0xD0 - 0xDF */
4497 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4498 /* 0xE0 - 0xEF */
4499 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4500 N, N, N, N, N, N, N, N,
4501 /* 0xF0 - 0xFF */
4502 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4503 };
4504
4505 static const struct instr_dual instr_dual_0f_38_f0 = {
4506 I(DstReg | SrcMem | Mov, em_movbe), N
4507 };
4508
4509 static const struct instr_dual instr_dual_0f_38_f1 = {
4510 I(DstMem | SrcReg | Mov, em_movbe), N
4511 };
4512
4513 static const struct gprefix three_byte_0f_38_f0 = {
4514 ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N
4515 };
4516
4517 static const struct gprefix three_byte_0f_38_f1 = {
4518 ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N
4519 };
4520
4521 /*
4522 * Insns below are selected by the prefix which indexed by the third opcode
4523 * byte.
4524 */
4525 static const struct opcode opcode_map_0f_38[256] = {
4526 /* 0x00 - 0x7f */
4527 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4528 /* 0x80 - 0xef */
4529 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4530 /* 0xf0 - 0xf1 */
4531 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4532 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4533 /* 0xf2 - 0xff */
4534 N, N, X4(N), X8(N)
4535 };
4536
4537 #undef D
4538 #undef N
4539 #undef G
4540 #undef GD
4541 #undef I
4542 #undef GP
4543 #undef EXT
4544 #undef MD
4545 #undef ID
4546
4547 #undef D2bv
4548 #undef D2bvIP
4549 #undef I2bv
4550 #undef I2bvIP
4551 #undef I6ALU
4552
is_shstk_instruction(struct x86_emulate_ctxt * ctxt)4553 static bool is_shstk_instruction(struct x86_emulate_ctxt *ctxt)
4554 {
4555 return ctxt->d & ShadowStack;
4556 }
4557
is_ibt_instruction(struct x86_emulate_ctxt * ctxt)4558 static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
4559 {
4560 u64 flags = ctxt->d;
4561
4562 if (!(flags & IsBranch))
4563 return false;
4564
4565 /*
4566 * All far JMPs and CALLs (including SYSCALL, SYSENTER, and INTn) are
4567 * indirect and thus affect IBT state. All far RETs (including SYSEXIT
4568 * and IRET) are protected via Shadow Stacks and thus don't affect IBT
4569 * state. IRET #GPs when returning to virtual-8086 and IBT or SHSTK is
4570 * enabled, but that should be handled by IRET emulation (in the very
4571 * unlikely scenario that KVM adds support for fully emulating IRET).
4572 */
4573 if (!(flags & NearBranch))
4574 return ctxt->execute != em_iret &&
4575 ctxt->execute != em_ret_far &&
4576 ctxt->execute != em_ret_far_imm &&
4577 ctxt->execute != em_sysexit;
4578
4579 switch (flags & SrcMask) {
4580 case SrcReg:
4581 case SrcMem:
4582 case SrcMem16:
4583 case SrcMem32:
4584 return true;
4585 case SrcMemFAddr:
4586 case SrcImmFAddr:
4587 /* Far branches should be handled above. */
4588 WARN_ON_ONCE(1);
4589 return true;
4590 case SrcNone:
4591 case SrcImm:
4592 case SrcImmByte:
4593 /*
4594 * Note, ImmU16 is used only for the stack adjustment operand on ENTER
4595 * and RET instructions. ENTER isn't a branch and RET FAR is handled
4596 * by the NearBranch check above. RET itself isn't an indirect branch.
4597 */
4598 case SrcImmU16:
4599 return false;
4600 default:
4601 WARN_ONCE(1, "Unexpected Src operand '%llx' on branch",
4602 flags & SrcMask);
4603 return false;
4604 }
4605 }
4606
imm_size(struct x86_emulate_ctxt * ctxt)4607 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4608 {
4609 unsigned size;
4610
4611 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4612 if (size == 8)
4613 size = 4;
4614 return size;
4615 }
4616
decode_imm(struct x86_emulate_ctxt * ctxt,struct operand * op,unsigned size,bool sign_extension)4617 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4618 unsigned size, bool sign_extension)
4619 {
4620 int rc = X86EMUL_CONTINUE;
4621
4622 op->type = OP_IMM;
4623 op->bytes = size;
4624 op->addr.mem.ea = ctxt->_eip;
4625 /* NB. Immediates are sign-extended as necessary. */
4626 switch (op->bytes) {
4627 case 1:
4628 op->val = insn_fetch(s8, ctxt);
4629 break;
4630 case 2:
4631 op->val = insn_fetch(s16, ctxt);
4632 break;
4633 case 4:
4634 op->val = insn_fetch(s32, ctxt);
4635 break;
4636 case 8:
4637 op->val = insn_fetch(s64, ctxt);
4638 break;
4639 }
4640 if (!sign_extension) {
4641 switch (op->bytes) {
4642 case 1:
4643 op->val &= 0xff;
4644 break;
4645 case 2:
4646 op->val &= 0xffff;
4647 break;
4648 case 4:
4649 op->val &= 0xffffffff;
4650 break;
4651 }
4652 }
4653 done:
4654 return rc;
4655 }
4656
decode_operand(struct x86_emulate_ctxt * ctxt,struct operand * op,unsigned d)4657 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4658 unsigned d)
4659 {
4660 int rc = X86EMUL_CONTINUE;
4661
4662 switch (d) {
4663 case OpReg:
4664 decode_register_operand(ctxt, op);
4665 break;
4666 case OpImmUByte:
4667 rc = decode_imm(ctxt, op, 1, false);
4668 break;
4669 case OpMem:
4670 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4671 mem_common:
4672 *op = ctxt->memop;
4673 ctxt->memopp = op;
4674 if (ctxt->d & BitOp)
4675 fetch_bit_operand(ctxt);
4676 op->orig_val = op->val;
4677 break;
4678 case OpMem64:
4679 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4680 goto mem_common;
4681 case OpAcc:
4682 op->type = OP_REG;
4683 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4684 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4685 fetch_register_operand(op);
4686 op->orig_val = op->val;
4687 break;
4688 case OpAccLo:
4689 op->type = OP_REG;
4690 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4691 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4692 fetch_register_operand(op);
4693 op->orig_val = op->val;
4694 break;
4695 case OpAccHi:
4696 if (ctxt->d & ByteOp) {
4697 op->type = OP_NONE;
4698 break;
4699 }
4700 op->type = OP_REG;
4701 op->bytes = ctxt->op_bytes;
4702 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4703 fetch_register_operand(op);
4704 op->orig_val = op->val;
4705 break;
4706 case OpDI:
4707 op->type = OP_MEM;
4708 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4709 op->addr.mem.ea =
4710 register_address(ctxt, VCPU_REGS_RDI);
4711 op->addr.mem.seg = VCPU_SREG_ES;
4712 op->val = 0;
4713 op->count = 1;
4714 break;
4715 case OpDX:
4716 op->type = OP_REG;
4717 op->bytes = 2;
4718 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4719 fetch_register_operand(op);
4720 break;
4721 case OpCL:
4722 op->type = OP_IMM;
4723 op->bytes = 1;
4724 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4725 break;
4726 case OpImmByte:
4727 rc = decode_imm(ctxt, op, 1, true);
4728 break;
4729 case OpOne:
4730 op->type = OP_IMM;
4731 op->bytes = 1;
4732 op->val = 1;
4733 break;
4734 case OpImm:
4735 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4736 break;
4737 case OpImm64:
4738 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4739 break;
4740 case OpMem8:
4741 ctxt->memop.bytes = 1;
4742 if (ctxt->memop.type == OP_REG) {
4743 ctxt->memop.addr.reg = decode_register(ctxt,
4744 ctxt->modrm_rm, true);
4745 fetch_register_operand(&ctxt->memop);
4746 }
4747 goto mem_common;
4748 case OpMem16:
4749 ctxt->memop.bytes = 2;
4750 goto mem_common;
4751 case OpMem32:
4752 ctxt->memop.bytes = 4;
4753 goto mem_common;
4754 case OpImmU16:
4755 rc = decode_imm(ctxt, op, 2, false);
4756 break;
4757 case OpImmU:
4758 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4759 break;
4760 case OpSI:
4761 op->type = OP_MEM;
4762 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4763 op->addr.mem.ea =
4764 register_address(ctxt, VCPU_REGS_RSI);
4765 op->addr.mem.seg = ctxt->seg_override;
4766 op->val = 0;
4767 op->count = 1;
4768 break;
4769 case OpXLat:
4770 op->type = OP_MEM;
4771 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4772 op->addr.mem.ea =
4773 address_mask(ctxt,
4774 reg_read(ctxt, VCPU_REGS_RBX) +
4775 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4776 op->addr.mem.seg = ctxt->seg_override;
4777 op->val = 0;
4778 break;
4779 case OpImmFAddr:
4780 op->type = OP_IMM;
4781 op->addr.mem.ea = ctxt->_eip;
4782 op->bytes = ctxt->op_bytes + 2;
4783 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4784 break;
4785 case OpMemFAddr:
4786 ctxt->memop.bytes = ctxt->op_bytes + 2;
4787 goto mem_common;
4788 case OpES:
4789 op->type = OP_IMM;
4790 op->val = VCPU_SREG_ES;
4791 break;
4792 case OpCS:
4793 op->type = OP_IMM;
4794 op->val = VCPU_SREG_CS;
4795 break;
4796 case OpSS:
4797 op->type = OP_IMM;
4798 op->val = VCPU_SREG_SS;
4799 break;
4800 case OpDS:
4801 op->type = OP_IMM;
4802 op->val = VCPU_SREG_DS;
4803 break;
4804 case OpFS:
4805 op->type = OP_IMM;
4806 op->val = VCPU_SREG_FS;
4807 break;
4808 case OpGS:
4809 op->type = OP_IMM;
4810 op->val = VCPU_SREG_GS;
4811 break;
4812 case OpImplicit:
4813 /* Special instructions do their own operand decoding. */
4814 default:
4815 op->type = OP_NONE; /* Disable writeback. */
4816 break;
4817 }
4818
4819 done:
4820 return rc;
4821 }
4822
x86_decode_insn(struct x86_emulate_ctxt * ctxt,void * insn,int insn_len,int emulation_type)4823 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
4824 {
4825 int rc = X86EMUL_CONTINUE;
4826 int mode = ctxt->mode;
4827 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4828 bool op_prefix = false;
4829 bool has_seg_override = false;
4830 struct opcode opcode;
4831 u16 dummy;
4832 struct desc_struct desc;
4833
4834 ctxt->memop.type = OP_NONE;
4835 ctxt->memopp = NULL;
4836 ctxt->_eip = ctxt->eip;
4837 ctxt->fetch.ptr = ctxt->fetch.data;
4838 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4839 ctxt->opcode_len = 1;
4840 ctxt->intercept = x86_intercept_none;
4841 if (insn_len > 0)
4842 memcpy(ctxt->fetch.data, insn, insn_len);
4843 else {
4844 rc = __do_insn_fetch_bytes(ctxt, 1);
4845 if (rc != X86EMUL_CONTINUE)
4846 goto done;
4847 }
4848
4849 switch (mode) {
4850 case X86EMUL_MODE_REAL:
4851 case X86EMUL_MODE_VM86:
4852 def_op_bytes = def_ad_bytes = 2;
4853 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
4854 if (desc.d)
4855 def_op_bytes = def_ad_bytes = 4;
4856 break;
4857 case X86EMUL_MODE_PROT16:
4858 def_op_bytes = def_ad_bytes = 2;
4859 break;
4860 case X86EMUL_MODE_PROT32:
4861 def_op_bytes = def_ad_bytes = 4;
4862 break;
4863 #ifdef CONFIG_X86_64
4864 case X86EMUL_MODE_PROT64:
4865 def_op_bytes = 4;
4866 def_ad_bytes = 8;
4867 break;
4868 #endif
4869 default:
4870 return EMULATION_FAILED;
4871 }
4872
4873 ctxt->op_bytes = def_op_bytes;
4874 ctxt->ad_bytes = def_ad_bytes;
4875
4876 /* Legacy prefixes. */
4877 for (;;) {
4878 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4879 case 0x66: /* operand-size override */
4880 op_prefix = true;
4881 /* switch between 2/4 bytes */
4882 ctxt->op_bytes = def_op_bytes ^ 6;
4883 break;
4884 case 0x67: /* address-size override */
4885 if (mode == X86EMUL_MODE_PROT64)
4886 /* switch between 4/8 bytes */
4887 ctxt->ad_bytes = def_ad_bytes ^ 12;
4888 else
4889 /* switch between 2/4 bytes */
4890 ctxt->ad_bytes = def_ad_bytes ^ 6;
4891 break;
4892 case 0x26: /* ES override */
4893 has_seg_override = true;
4894 ctxt->seg_override = VCPU_SREG_ES;
4895 break;
4896 case 0x2e: /* CS override */
4897 has_seg_override = true;
4898 ctxt->seg_override = VCPU_SREG_CS;
4899 break;
4900 case 0x36: /* SS override */
4901 has_seg_override = true;
4902 ctxt->seg_override = VCPU_SREG_SS;
4903 break;
4904 case 0x3e: /* DS override */
4905 has_seg_override = true;
4906 ctxt->seg_override = VCPU_SREG_DS;
4907 break;
4908 case 0x64: /* FS override */
4909 has_seg_override = true;
4910 ctxt->seg_override = VCPU_SREG_FS;
4911 break;
4912 case 0x65: /* GS override */
4913 has_seg_override = true;
4914 ctxt->seg_override = VCPU_SREG_GS;
4915 break;
4916 case 0x40 ... 0x4f: /* REX */
4917 if (mode != X86EMUL_MODE_PROT64)
4918 goto done_prefixes;
4919 ctxt->rex_prefix = ctxt->b;
4920 continue;
4921 case 0xf0: /* LOCK */
4922 ctxt->lock_prefix = 1;
4923 break;
4924 case 0xf2: /* REPNE/REPNZ */
4925 case 0xf3: /* REP/REPE/REPZ */
4926 ctxt->rep_prefix = ctxt->b;
4927 break;
4928 default:
4929 goto done_prefixes;
4930 }
4931
4932 /* Any legacy prefix after a REX prefix nullifies its effect. */
4933
4934 ctxt->rex_prefix = 0;
4935 }
4936
4937 done_prefixes:
4938
4939 /* REX prefix. */
4940 if (ctxt->rex_prefix & 8)
4941 ctxt->op_bytes = 8; /* REX.W */
4942
4943 /* Opcode byte(s). */
4944 opcode = opcode_table[ctxt->b];
4945 /* Two-byte opcode? */
4946 if (ctxt->b == 0x0f) {
4947 ctxt->opcode_len = 2;
4948 ctxt->b = insn_fetch(u8, ctxt);
4949 opcode = twobyte_table[ctxt->b];
4950
4951 /* 0F_38 opcode map */
4952 if (ctxt->b == 0x38) {
4953 ctxt->opcode_len = 3;
4954 ctxt->b = insn_fetch(u8, ctxt);
4955 opcode = opcode_map_0f_38[ctxt->b];
4956 }
4957 }
4958 ctxt->d = opcode.flags;
4959
4960 if (ctxt->d & ModRM)
4961 ctxt->modrm = insn_fetch(u8, ctxt);
4962
4963 /* vex-prefix instructions are not implemented */
4964 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4965 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4966 ctxt->d = NotImpl;
4967 }
4968
4969 while (ctxt->d & GroupMask) {
4970 switch (ctxt->d & GroupMask) {
4971 case Group:
4972 goffset = (ctxt->modrm >> 3) & 7;
4973 opcode = opcode.u.group[goffset];
4974 break;
4975 case GroupDual:
4976 goffset = (ctxt->modrm >> 3) & 7;
4977 if ((ctxt->modrm >> 6) == 3)
4978 opcode = opcode.u.gdual->mod3[goffset];
4979 else
4980 opcode = opcode.u.gdual->mod012[goffset];
4981 break;
4982 case RMExt:
4983 goffset = ctxt->modrm & 7;
4984 opcode = opcode.u.group[goffset];
4985 break;
4986 case Prefix:
4987 if (ctxt->rep_prefix && op_prefix)
4988 return EMULATION_FAILED;
4989 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4990 switch (simd_prefix) {
4991 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4992 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4993 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4994 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4995 }
4996 break;
4997 case Escape:
4998 if (ctxt->modrm > 0xbf) {
4999 size_t size = ARRAY_SIZE(opcode.u.esc->high);
5000 u32 index = array_index_nospec(
5001 ctxt->modrm - 0xc0, size);
5002
5003 opcode = opcode.u.esc->high[index];
5004 } else {
5005 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5006 }
5007 break;
5008 case InstrDual:
5009 if ((ctxt->modrm >> 6) == 3)
5010 opcode = opcode.u.idual->mod3;
5011 else
5012 opcode = opcode.u.idual->mod012;
5013 break;
5014 case ModeDual:
5015 if (ctxt->mode == X86EMUL_MODE_PROT64)
5016 opcode = opcode.u.mdual->mode64;
5017 else
5018 opcode = opcode.u.mdual->mode32;
5019 break;
5020 default:
5021 return EMULATION_FAILED;
5022 }
5023
5024 ctxt->d &= ~(u64)GroupMask;
5025 ctxt->d |= opcode.flags;
5026 }
5027
5028 ctxt->is_branch = opcode.flags & IsBranch;
5029
5030 /* Unrecognised? */
5031 if (ctxt->d == 0)
5032 return EMULATION_FAILED;
5033
5034 ctxt->execute = opcode.u.execute;
5035
5036 /*
5037 * Reject emulation if KVM might need to emulate shadow stack updates
5038 * and/or indirect branch tracking enforcement, which the emulator
5039 * doesn't support.
5040 */
5041 if ((is_ibt_instruction(ctxt) || is_shstk_instruction(ctxt)) &&
5042 ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
5043 u64 u_cet = 0, s_cet = 0;
5044
5045 /*
5046 * Check both User and Supervisor on far transfers as inter-
5047 * privilege level transfers are impacted by CET at the target
5048 * privilege level, and that is not known at this time. The
5049 * expectation is that the guest will not require emulation of
5050 * any CET-affected instructions at any privilege level.
5051 */
5052 if (!(ctxt->d & NearBranch))
5053 u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5054 else if (ctxt->ops->cpl(ctxt) == 3)
5055 u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5056 else
5057 s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5058
5059 if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
5060 (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
5061 return EMULATION_FAILED;
5062
5063 if ((u_cet | s_cet) & CET_SHSTK_EN && is_shstk_instruction(ctxt))
5064 return EMULATION_FAILED;
5065
5066 if ((u_cet | s_cet) & CET_ENDBR_EN && is_ibt_instruction(ctxt))
5067 return EMULATION_FAILED;
5068 }
5069
5070 if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
5071 likely(!(ctxt->d & EmulateOnUD)))
5072 return EMULATION_FAILED;
5073
5074 if (unlikely(ctxt->d &
5075 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5076 No16))) {
5077 /*
5078 * These are copied unconditionally here, and checked unconditionally
5079 * in x86_emulate_insn.
5080 */
5081 ctxt->check_perm = opcode.check_perm;
5082 ctxt->intercept = opcode.intercept;
5083
5084 if (ctxt->d & NotImpl)
5085 return EMULATION_FAILED;
5086
5087 if (mode == X86EMUL_MODE_PROT64) {
5088 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5089 ctxt->op_bytes = 8;
5090 else if (ctxt->d & NearBranch)
5091 ctxt->op_bytes = 8;
5092 }
5093
5094 if (ctxt->d & Op3264) {
5095 if (mode == X86EMUL_MODE_PROT64)
5096 ctxt->op_bytes = 8;
5097 else
5098 ctxt->op_bytes = 4;
5099 }
5100
5101 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5102 ctxt->op_bytes = 4;
5103
5104 if (ctxt->d & Sse)
5105 ctxt->op_bytes = 16;
5106 else if (ctxt->d & Mmx)
5107 ctxt->op_bytes = 8;
5108 }
5109
5110 /* ModRM and SIB bytes. */
5111 if (ctxt->d & ModRM) {
5112 rc = decode_modrm(ctxt, &ctxt->memop);
5113 if (!has_seg_override) {
5114 has_seg_override = true;
5115 ctxt->seg_override = ctxt->modrm_seg;
5116 }
5117 } else if (ctxt->d & MemAbs)
5118 rc = decode_abs(ctxt, &ctxt->memop);
5119 if (rc != X86EMUL_CONTINUE)
5120 goto done;
5121
5122 if (!has_seg_override)
5123 ctxt->seg_override = VCPU_SREG_DS;
5124
5125 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5126
5127 /*
5128 * Decode and fetch the source operand: register, memory
5129 * or immediate.
5130 */
5131 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5132 if (rc != X86EMUL_CONTINUE)
5133 goto done;
5134
5135 /*
5136 * Decode and fetch the second source operand: register, memory
5137 * or immediate.
5138 */
5139 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5140 if (rc != X86EMUL_CONTINUE)
5141 goto done;
5142
5143 /* Decode and fetch the destination operand: register or memory. */
5144 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5145
5146 if (ctxt->rip_relative && likely(ctxt->memopp))
5147 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5148 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5149
5150 done:
5151 if (rc == X86EMUL_PROPAGATE_FAULT)
5152 ctxt->have_exception = true;
5153 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5154 }
5155
x86_page_table_writing_insn(struct x86_emulate_ctxt * ctxt)5156 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5157 {
5158 return ctxt->d & PageTable;
5159 }
5160
string_insn_completed(struct x86_emulate_ctxt * ctxt)5161 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5162 {
5163 /* The second termination condition only applies for REPE
5164 * and REPNE. Test if the repeat string operation prefix is
5165 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5166 * corresponding termination condition according to:
5167 * - if REPE/REPZ and ZF = 0 then done
5168 * - if REPNE/REPNZ and ZF = 1 then done
5169 */
5170 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5171 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5172 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5173 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5174 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5175 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5176 return true;
5177
5178 return false;
5179 }
5180
flush_pending_x87_faults(struct x86_emulate_ctxt * ctxt)5181 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5182 {
5183 int rc;
5184
5185 kvm_fpu_get();
5186 rc = asm_safe("fwait");
5187 kvm_fpu_put();
5188
5189 if (unlikely(rc != X86EMUL_CONTINUE))
5190 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5191
5192 return X86EMUL_CONTINUE;
5193 }
5194
fetch_possible_mmx_operand(struct operand * op)5195 static void fetch_possible_mmx_operand(struct operand *op)
5196 {
5197 if (op->type == OP_MM)
5198 kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5199 }
5200
fastop(struct x86_emulate_ctxt * ctxt,fastop_t fop)5201 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5202 {
5203 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5204
5205 if (!(ctxt->d & ByteOp))
5206 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5207
5208 asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5209 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5210 [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5211 : "c"(ctxt->src2.val));
5212
5213 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5214 if (!fop) /* exception is returned in fop variable */
5215 return emulate_de(ctxt);
5216 return X86EMUL_CONTINUE;
5217 }
5218
init_decode_cache(struct x86_emulate_ctxt * ctxt)5219 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5220 {
5221 /* Clear fields that are set conditionally but read without a guard. */
5222 ctxt->rip_relative = false;
5223 ctxt->rex_prefix = 0;
5224 ctxt->lock_prefix = 0;
5225 ctxt->rep_prefix = 0;
5226 ctxt->regs_valid = 0;
5227 ctxt->regs_dirty = 0;
5228
5229 ctxt->io_read.pos = 0;
5230 ctxt->io_read.end = 0;
5231 ctxt->mem_read.end = 0;
5232 }
5233
x86_emulate_insn(struct x86_emulate_ctxt * ctxt,bool check_intercepts)5234 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
5235 {
5236 const struct x86_emulate_ops *ops = ctxt->ops;
5237 int rc = X86EMUL_CONTINUE;
5238 int saved_dst_type = ctxt->dst.type;
5239
5240 ctxt->mem_read.pos = 0;
5241
5242 /* LOCK prefix is allowed only with some instructions */
5243 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5244 rc = emulate_ud(ctxt);
5245 goto done;
5246 }
5247
5248 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5249 rc = emulate_ud(ctxt);
5250 goto done;
5251 }
5252
5253 if (unlikely(ctxt->d &
5254 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5255 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5256 (ctxt->d & Undefined)) {
5257 rc = emulate_ud(ctxt);
5258 goto done;
5259 }
5260
5261 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5262 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5263 rc = emulate_ud(ctxt);
5264 goto done;
5265 }
5266
5267 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5268 rc = emulate_nm(ctxt);
5269 goto done;
5270 }
5271
5272 if (ctxt->d & Mmx) {
5273 rc = flush_pending_x87_faults(ctxt);
5274 if (rc != X86EMUL_CONTINUE)
5275 goto done;
5276 /*
5277 * Now that we know the fpu is exception safe, we can fetch
5278 * operands from it.
5279 */
5280 fetch_possible_mmx_operand(&ctxt->src);
5281 fetch_possible_mmx_operand(&ctxt->src2);
5282 if (!(ctxt->d & Mov))
5283 fetch_possible_mmx_operand(&ctxt->dst);
5284 }
5285
5286 if (unlikely(check_intercepts) && ctxt->intercept) {
5287 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5288 X86_ICPT_PRE_EXCEPT);
5289 if (rc != X86EMUL_CONTINUE)
5290 goto done;
5291 }
5292
5293 /* Instruction can only be executed in protected mode */
5294 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5295 rc = emulate_ud(ctxt);
5296 goto done;
5297 }
5298
5299 /* Privileged instruction can be executed only in CPL=0 */
5300 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5301 if (ctxt->d & PrivUD)
5302 rc = emulate_ud(ctxt);
5303 else
5304 rc = emulate_gp(ctxt, 0);
5305 goto done;
5306 }
5307
5308 /* Do instruction specific permission checks */
5309 if (ctxt->d & CheckPerm) {
5310 rc = ctxt->check_perm(ctxt);
5311 if (rc != X86EMUL_CONTINUE)
5312 goto done;
5313 }
5314
5315 if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
5316 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5317 X86_ICPT_POST_EXCEPT);
5318 if (rc != X86EMUL_CONTINUE)
5319 goto done;
5320 }
5321
5322 if (ctxt->rep_prefix && (ctxt->d & String)) {
5323 /* All REP prefixes have the same first termination condition */
5324 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5325 string_registers_quirk(ctxt);
5326 ctxt->eip = ctxt->_eip;
5327 ctxt->eflags &= ~X86_EFLAGS_RF;
5328 goto done;
5329 }
5330 }
5331 }
5332
5333 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5334 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5335 ctxt->src.valptr, ctxt->src.bytes);
5336 if (rc != X86EMUL_CONTINUE)
5337 goto done;
5338 ctxt->src.orig_val64 = ctxt->src.val64;
5339 }
5340
5341 if (ctxt->src2.type == OP_MEM) {
5342 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5343 &ctxt->src2.val, ctxt->src2.bytes);
5344 if (rc != X86EMUL_CONTINUE)
5345 goto done;
5346 }
5347
5348 if ((ctxt->d & DstMask) == ImplicitOps)
5349 goto special_insn;
5350
5351
5352 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5353 /* optimisation - avoid slow emulated read if Mov */
5354 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5355 &ctxt->dst.val, ctxt->dst.bytes);
5356 if (rc != X86EMUL_CONTINUE) {
5357 if (!(ctxt->d & NoWrite) &&
5358 rc == X86EMUL_PROPAGATE_FAULT &&
5359 ctxt->exception.vector == PF_VECTOR)
5360 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5361 goto done;
5362 }
5363 }
5364 /* Copy full 64-bit value for CMPXCHG8B. */
5365 ctxt->dst.orig_val64 = ctxt->dst.val64;
5366
5367 special_insn:
5368
5369 if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
5370 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5371 X86_ICPT_POST_MEMACCESS);
5372 if (rc != X86EMUL_CONTINUE)
5373 goto done;
5374 }
5375
5376 if (ctxt->rep_prefix && (ctxt->d & String))
5377 ctxt->eflags |= X86_EFLAGS_RF;
5378 else
5379 ctxt->eflags &= ~X86_EFLAGS_RF;
5380
5381 if (ctxt->execute) {
5382 if (ctxt->d & Fastop)
5383 rc = fastop(ctxt, ctxt->fop);
5384 else
5385 rc = ctxt->execute(ctxt);
5386 if (rc != X86EMUL_CONTINUE)
5387 goto done;
5388 goto writeback;
5389 }
5390
5391 if (ctxt->opcode_len == 2)
5392 goto twobyte_insn;
5393 else if (ctxt->opcode_len == 3)
5394 goto threebyte_insn;
5395
5396 switch (ctxt->b) {
5397 case 0x70 ... 0x7f: /* jcc (short) */
5398 if (test_cc(ctxt->b, ctxt->eflags))
5399 rc = jmp_rel(ctxt, ctxt->src.val);
5400 break;
5401 case 0x8d: /* lea r16/r32, m */
5402 ctxt->dst.val = ctxt->src.addr.mem.ea;
5403 break;
5404 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5405 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5406 ctxt->dst.type = OP_NONE;
5407 else
5408 rc = em_xchg(ctxt);
5409 break;
5410 case 0x98: /* cbw/cwde/cdqe */
5411 switch (ctxt->op_bytes) {
5412 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5413 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5414 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5415 }
5416 break;
5417 case 0xcc: /* int3 */
5418 rc = emulate_int(ctxt, 3);
5419 break;
5420 case 0xcd: /* int n */
5421 rc = emulate_int(ctxt, ctxt->src.val);
5422 break;
5423 case 0xce: /* into */
5424 if (ctxt->eflags & X86_EFLAGS_OF)
5425 rc = emulate_int(ctxt, 4);
5426 break;
5427 case 0xe9: /* jmp rel */
5428 case 0xeb: /* jmp rel short */
5429 rc = jmp_rel(ctxt, ctxt->src.val);
5430 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5431 break;
5432 case 0xf4: /* hlt */
5433 ctxt->ops->halt(ctxt);
5434 break;
5435 case 0xf5: /* cmc */
5436 /* complement carry flag from eflags reg */
5437 ctxt->eflags ^= X86_EFLAGS_CF;
5438 break;
5439 case 0xf8: /* clc */
5440 ctxt->eflags &= ~X86_EFLAGS_CF;
5441 break;
5442 case 0xf9: /* stc */
5443 ctxt->eflags |= X86_EFLAGS_CF;
5444 break;
5445 case 0xfc: /* cld */
5446 ctxt->eflags &= ~X86_EFLAGS_DF;
5447 break;
5448 case 0xfd: /* std */
5449 ctxt->eflags |= X86_EFLAGS_DF;
5450 break;
5451 default:
5452 goto cannot_emulate;
5453 }
5454
5455 if (rc != X86EMUL_CONTINUE)
5456 goto done;
5457
5458 writeback:
5459 if (ctxt->d & SrcWrite) {
5460 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5461 rc = writeback(ctxt, &ctxt->src);
5462 if (rc != X86EMUL_CONTINUE)
5463 goto done;
5464 }
5465 if (!(ctxt->d & NoWrite)) {
5466 rc = writeback(ctxt, &ctxt->dst);
5467 if (rc != X86EMUL_CONTINUE)
5468 goto done;
5469 }
5470
5471 /*
5472 * restore dst type in case the decoding will be reused
5473 * (happens for string instruction )
5474 */
5475 ctxt->dst.type = saved_dst_type;
5476
5477 if ((ctxt->d & SrcMask) == SrcSI)
5478 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5479
5480 if ((ctxt->d & DstMask) == DstDI)
5481 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5482
5483 if (ctxt->rep_prefix && (ctxt->d & String)) {
5484 unsigned int count;
5485 struct read_cache *r = &ctxt->io_read;
5486 if ((ctxt->d & SrcMask) == SrcSI)
5487 count = ctxt->src.count;
5488 else
5489 count = ctxt->dst.count;
5490 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5491
5492 if (!string_insn_completed(ctxt)) {
5493 /*
5494 * Re-enter guest when pio read ahead buffer is empty
5495 * or, if it is not used, after each 1024 iteration.
5496 */
5497 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5498 (r->end == 0 || r->end != r->pos)) {
5499 /*
5500 * Reset read cache. Usually happens before
5501 * decode, but since instruction is restarted
5502 * we have to do it here.
5503 */
5504 ctxt->mem_read.end = 0;
5505 writeback_registers(ctxt);
5506 return EMULATION_RESTART;
5507 }
5508 goto done; /* skip rip writeback */
5509 }
5510 ctxt->eflags &= ~X86_EFLAGS_RF;
5511 }
5512
5513 ctxt->eip = ctxt->_eip;
5514 if (ctxt->mode != X86EMUL_MODE_PROT64)
5515 ctxt->eip = (u32)ctxt->_eip;
5516
5517 done:
5518 if (rc == X86EMUL_PROPAGATE_FAULT) {
5519 if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5520 return EMULATION_FAILED;
5521 ctxt->have_exception = true;
5522 }
5523 if (rc == X86EMUL_INTERCEPTED)
5524 return EMULATION_INTERCEPTED;
5525
5526 if (rc == X86EMUL_CONTINUE)
5527 writeback_registers(ctxt);
5528
5529 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5530
5531 twobyte_insn:
5532 switch (ctxt->b) {
5533 case 0x09: /* wbinvd */
5534 (ctxt->ops->wbinvd)(ctxt);
5535 break;
5536 case 0x08: /* invd */
5537 case 0x0d: /* GrpP (prefetch) */
5538 case 0x18: /* Grp16 (prefetch/nop) */
5539 case 0x1f: /* nop */
5540 break;
5541 case 0x20: /* mov cr, reg */
5542 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5543 break;
5544 case 0x21: /* mov from dr to reg */
5545 ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg);
5546 break;
5547 case 0x40 ... 0x4f: /* cmov */
5548 if (test_cc(ctxt->b, ctxt->eflags))
5549 ctxt->dst.val = ctxt->src.val;
5550 else if (ctxt->op_bytes != 4)
5551 ctxt->dst.type = OP_NONE; /* no writeback */
5552 break;
5553 case 0x80 ... 0x8f: /* jnz rel, etc*/
5554 if (test_cc(ctxt->b, ctxt->eflags))
5555 rc = jmp_rel(ctxt, ctxt->src.val);
5556 break;
5557 case 0x90 ... 0x9f: /* setcc r/m8 */
5558 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5559 break;
5560 case 0xb6 ... 0xb7: /* movzx */
5561 ctxt->dst.bytes = ctxt->op_bytes;
5562 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5563 : (u16) ctxt->src.val;
5564 break;
5565 case 0xbe ... 0xbf: /* movsx */
5566 ctxt->dst.bytes = ctxt->op_bytes;
5567 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5568 (s16) ctxt->src.val;
5569 break;
5570 default:
5571 goto cannot_emulate;
5572 }
5573
5574 threebyte_insn:
5575
5576 if (rc != X86EMUL_CONTINUE)
5577 goto done;
5578
5579 goto writeback;
5580
5581 cannot_emulate:
5582 return EMULATION_FAILED;
5583 }
5584
emulator_invalidate_register_cache(struct x86_emulate_ctxt * ctxt)5585 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5586 {
5587 invalidate_registers(ctxt);
5588 }
5589
emulator_writeback_register_cache(struct x86_emulate_ctxt * ctxt)5590 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5591 {
5592 writeback_registers(ctxt);
5593 }
5594
emulator_can_use_gpa(struct x86_emulate_ctxt * ctxt)5595 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5596 {
5597 if (ctxt->rep_prefix && (ctxt->d & String))
5598 return false;
5599
5600 if (ctxt->d & TwoMemOp)
5601 return false;
5602
5603 return true;
5604 }
5605