xref: /linux/arch/x86/kvm/emulate.c (revision f6d6be78b24426eba424e24d06718f59a860e38a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
3  * emulate.c
4  *
5  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6  *
7  * Copyright (c) 2005 Keir Fraser
8  *
9  * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10  * privileged instructions:
11  *
12  * Copyright (C) 2006 Qumranet
13  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14  *
15  *   Avi Kivity <avi@qumranet.com>
16  *   Yaniv Kamay <yaniv@qumranet.com>
17  *
18  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
19  */
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 
22 #include <linux/kvm_host.h>
23 #include "regs.h"
24 #include "kvm_emulate.h"
25 #include <linux/stringify.h>
26 #include <asm/debugreg.h>
27 #include <asm/nospec-branch.h>
28 #include <asm/ibt.h>
29 #include <asm/text-patching.h>
30 
31 #include "x86.h"
32 #include "tss.h"
33 #include "mmu.h"
34 #include "pmu.h"
35 
36 /*
37  * Operand types
38  */
39 #define OpNone             0ull
40 #define OpImplicit         1ull  /* No generic decode */
41 #define OpReg              2ull  /* Register */
42 #define OpMem              3ull  /* Memory */
43 #define OpAcc              4ull  /* Accumulator: AL/AX/EAX/RAX */
44 #define OpDI               5ull  /* ES:DI/EDI/RDI */
45 #define OpMem64            6ull  /* Memory, 64-bit */
46 #define OpImmUByte         7ull  /* Zero-extended 8-bit immediate */
47 #define OpDX               8ull  /* DX register */
48 #define OpCL               9ull  /* CL register (for shifts) */
49 #define OpImmByte         10ull  /* 8-bit sign extended immediate */
50 #define OpOne             11ull  /* Implied 1 */
51 #define OpImm             12ull  /* Sign extended up to 32-bit immediate */
52 #define OpMem16           13ull  /* Memory operand (16-bit). */
53 #define OpMem32           14ull  /* Memory operand (32-bit). */
54 #define OpImmU            15ull  /* Immediate operand, zero extended */
55 #define OpSI              16ull  /* SI/ESI/RSI */
56 #define OpImmFAddr        17ull  /* Immediate far address */
57 #define OpMemFAddr        18ull  /* Far address in memory */
58 #define OpImmU16          19ull  /* Immediate operand, 16 bits, zero extended */
59 #define OpES              20ull  /* ES */
60 #define OpCS              21ull  /* CS */
61 #define OpSS              22ull  /* SS */
62 #define OpDS              23ull  /* DS */
63 #define OpFS              24ull  /* FS */
64 #define OpGS              25ull  /* GS */
65 #define OpMem8            26ull  /* 8-bit zero extended memory operand */
66 #define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */
67 #define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */
68 #define OpAccLo           29ull  /* Low part of extended acc (AX/AX/EAX/RAX) */
69 #define OpAccHi           30ull  /* High part of extended acc (-/DX/EDX/RDX) */
70 
71 #define OpBits             5  /* Width of operand field */
72 #define OpMask             ((1ull << OpBits) - 1)
73 
74 /*
75  * Opcode effective-address decode tables.
76  * Note that we only emulate instructions that have at least one memory
77  * operand (excluding implicit stack references). We assume that stack
78  * references and instruction fetches will never occur in special memory
79  * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
80  * not be handled.
81  */
82 
83 /* Operand sizes: 8-bit operands or specified/overridden size. */
84 #define ByteOp      (1<<0)      /* 8-bit operands. */
85 #define DstShift    1           /* Destination operand type at bits 1-5 */
86 #define ImplicitOps (OpImplicit << DstShift)
87 #define DstReg      (OpReg << DstShift)
88 #define DstMem      (OpMem << DstShift)
89 #define DstAcc      (OpAcc << DstShift)
90 #define DstDI       (OpDI << DstShift)
91 #define DstMem64    (OpMem64 << DstShift)
92 #define DstMem16    (OpMem16 << DstShift)
93 #define DstImmUByte (OpImmUByte << DstShift)
94 #define DstDX       (OpDX << DstShift)
95 #define DstAccLo    (OpAccLo << DstShift)
96 #define DstMask     (OpMask << DstShift)
97 #define SrcShift    6           /* Source operand type at bits 6-10 */
98 #define SrcNone     (OpNone << SrcShift)
99 #define SrcReg      (OpReg << SrcShift)
100 #define SrcMem      (OpMem << SrcShift)
101 #define SrcMem16    (OpMem16 << SrcShift)
102 #define SrcMem32    (OpMem32 << SrcShift)
103 #define SrcImm      (OpImm << SrcShift)
104 #define SrcImmByte  (OpImmByte << SrcShift)
105 #define SrcOne      (OpOne << SrcShift)
106 #define SrcImmUByte (OpImmUByte << SrcShift)
107 #define SrcImmU     (OpImmU << SrcShift)
108 #define SrcSI       (OpSI << SrcShift)
109 #define SrcXLat     (OpXLat << SrcShift)
110 #define SrcImmFAddr (OpImmFAddr << SrcShift)
111 #define SrcMemFAddr (OpMemFAddr << SrcShift)
112 #define SrcAcc      (OpAcc << SrcShift)
113 #define SrcImmU16   (OpImmU16 << SrcShift)
114 #define SrcImm64    (OpImm64 << SrcShift)
115 #define SrcDX       (OpDX << SrcShift)
116 #define SrcMem8     (OpMem8 << SrcShift)
117 #define SrcAccHi    (OpAccHi << SrcShift)
118 #define SrcMask     (OpMask << SrcShift)
119 #define BitOp       (1<<11)
120 #define MemAbs      (1<<12)     /* Memory operand is absolute displacement */
121 #define String      (1<<13)     /* String instruction (rep capable) */
122 #define Stack       (1<<14)     /* Stack instruction (push/pop) */
123 #define GroupMask   (7<<15)     /* Group mechanisms, at bits 15-17 */
124 #define Group       (1<<15)     /* Bits 3:5 of modrm byte extend opcode */
125 #define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */
126 #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
127 #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
128 #define Escape      (5<<15)     /* Escape to coprocessor instruction */
129 #define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
130 #define ModeDual    (7<<15)     /* Different instruction for 32/64 bit */
131 #define Sse         (1<<18)     /* SSE Vector instruction */
132 #define ModRM       (1<<19)     /* Generic ModRM decode. */
133 #define Mov         (1<<20)     /* Destination is only written; never read. */
134 #define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
135 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
136 #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
137 #define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
138 #define Undefined   (1<<25) /* No Such Instruction */
139 #define Lock        (1<<26) /* lock prefix is allowed for the instruction */
140 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
141 #define No64        (1<<28)     /* Instruction generates #UD in 64-bit mode */
142 #define PageTable   (1 << 29)   /* instruction used to write page table */
143 #define NotImpl     (1 << 30)   /* instruction is not implemented */
144 #define Avx         ((u64)1 << 31)   /* Instruction uses VEX prefix */
145 #define Src2Shift   (32)        /* Source 2 operand type at bits 32-36 */
146 #define Src2None    (OpNone << Src2Shift)
147 #define Src2Mem     (OpMem << Src2Shift)
148 #define Src2CL      (OpCL << Src2Shift)
149 #define Src2ImmByte (OpImmByte << Src2Shift)
150 #define Src2One     (OpOne << Src2Shift)
151 #define Src2Imm     (OpImm << Src2Shift)
152 #define Src2ES      (OpES << Src2Shift)
153 #define Src2CS      (OpCS << Src2Shift)
154 #define Src2SS      (OpSS << Src2Shift)
155 #define Src2DS      (OpDS << Src2Shift)
156 #define Src2FS      (OpFS << Src2Shift)
157 #define Src2GS      (OpGS << Src2Shift)
158 #define Src2Mask    (OpMask << Src2Shift)
159 /* free: 37-39 */
160 #define Mmx         ((u64)1 << 40)  /* MMX Vector instruction */
161 #define AlignMask   ((u64)3 << 41)  /* Memory alignment requirement at bits 41-42 */
162 #define Aligned     ((u64)1 << 41)  /* Explicitly aligned (e.g. MOVDQA) */
163 #define Unaligned   ((u64)2 << 41)  /* Explicitly unaligned (e.g. MOVDQU) */
164 #define Aligned16   ((u64)3 << 41)  /* Aligned to 16 byte boundary (e.g. FXSAVE) */
165 /* free: 43-44 */
166 #define NoWrite     ((u64)1 << 45)  /* No writeback */
167 #define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
168 #define NoMod	    ((u64)1 << 47)  /* Mod field is ignored */
169 #define Intercept   ((u64)1 << 48)  /* Has valid intercept field */
170 #define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
171 #define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
172 #define NearBranch  ((u64)1 << 52)  /* Near branches */
173 #define No16	    ((u64)1 << 53)  /* No 16 bit operand */
174 #define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
175 #define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
176 #define IsBranch    ((u64)1 << 56)  /* Instruction is considered a branch. */
177 #define ShadowStack ((u64)1 << 57)  /* Instruction affects Shadow Stacks. */
178 
179 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
180 
181 #define X2(x...) x, x
182 #define X3(x...) X2(x), x
183 #define X4(x...) X2(x), X2(x)
184 #define X5(x...) X4(x), x
185 #define X6(x...) X4(x), X2(x)
186 #define X7(x...) X4(x), X3(x)
187 #define X8(x...) X4(x), X4(x)
188 #define X16(x...) X8(x), X8(x)
189 
190 struct opcode {
191 	u64 flags;
192 	u8 intercept;
193 	u8 pad[7];
194 	union {
195 		int (*execute)(struct x86_emulate_ctxt *ctxt);
196 		const struct opcode *group;
197 		const struct group_dual *gdual;
198 		const struct gprefix *gprefix;
199 		const struct escape *esc;
200 		const struct instr_dual *idual;
201 		const struct mode_dual *mdual;
202 	} u;
203 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
204 };
205 
206 struct group_dual {
207 	struct opcode mod012[8];
208 	struct opcode mod3[8];
209 };
210 
211 struct gprefix {
212 	struct opcode pfx_no;
213 	struct opcode pfx_66;
214 	struct opcode pfx_f2;
215 	struct opcode pfx_f3;
216 };
217 
218 struct escape {
219 	struct opcode op[8];
220 	struct opcode high[64];
221 };
222 
223 struct instr_dual {
224 	struct opcode mod012;
225 	struct opcode mod3;
226 };
227 
228 struct mode_dual {
229 	struct opcode mode32;
230 	struct opcode mode64;
231 };
232 
233 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
234 
235 enum x86_transfer_type {
236 	X86_TRANSFER_NONE,
237 	X86_TRANSFER_CALL_JMP,
238 	X86_TRANSFER_RET,
239 	X86_TRANSFER_TASK_SWITCH,
240 };
241 
242 enum rex_bits {
243 	REX_B = 1,
244 	REX_X = 2,
245 	REX_R = 4,
246 	REX_W = 8,
247 };
248 
249 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
250 {
251 	unsigned long dirty = ctxt->regs_dirty;
252 	unsigned reg;
253 
254 	for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
255 		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
256 }
257 
258 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
259 {
260 	ctxt->regs_dirty = 0;
261 	ctxt->regs_valid = 0;
262 }
263 
264 /*
265  * These EFLAGS bits are restored from saved value during emulation, and
266  * any changes are written back to the saved value after emulation.
267  */
268 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
269 		     X86_EFLAGS_PF|X86_EFLAGS_CF)
270 
271 #ifdef CONFIG_X86_64
272 #define ON64(x...) x
273 #else
274 #define ON64(x...)
275 #endif
276 
277 #define EM_ASM_START(op) \
278 static int em_##op(struct x86_emulate_ctxt *ctxt) \
279 { \
280 	unsigned long flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; \
281 	int bytes = 1, ok = 1; \
282 	if (!(ctxt->d & ByteOp)) \
283 		bytes = ctxt->dst.bytes; \
284 	switch (bytes) {
285 
286 #define __EM_ASM(str) \
287 		asm("push %[flags]; popf \n\t" \
288 		    "10: " str \
289 		    "pushf; pop %[flags] \n\t" \
290 		    "11: \n\t" \
291 		    : "+a" (ctxt->dst.val), \
292 		      "+d" (ctxt->src.val), \
293 		      [flags] "+D" (flags), \
294 		      "+S" (ok) \
295 		    : "c" (ctxt->src2.val))
296 
297 #define __EM_ASM_1(op, dst) \
298 		__EM_ASM(#op " %%" #dst " \n\t")
299 
300 #define __EM_ASM_1_EX(op, dst) \
301 		__EM_ASM(#op " %%" #dst " \n\t" \
302 			 _ASM_EXTABLE_TYPE_REG(10b, 11f, EX_TYPE_ZERO_REG, %%esi))
303 
304 #define __EM_ASM_2(op, dst, src) \
305 		__EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
306 
307 #define __EM_ASM_3(op, dst, src, src2) \
308 		__EM_ASM(#op " %%" #src2 ", %%" #src ", %%" #dst " \n\t")
309 
310 #define EM_ASM_END \
311 	} \
312 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); \
313 	return !ok ? emulate_de(ctxt) : X86EMUL_CONTINUE; \
314 }
315 
316 /* 1-operand, using "a" (dst) */
317 #define EM_ASM_1(op) \
318 	EM_ASM_START(op) \
319 	case 1: __EM_ASM_1(op##b, al); break; \
320 	case 2: __EM_ASM_1(op##w, ax); break; \
321 	case 4: __EM_ASM_1(op##l, eax); break; \
322 	ON64(case 8: __EM_ASM_1(op##q, rax); break;) \
323 	EM_ASM_END
324 
325 /* 1-operand, using "c" (src2) */
326 #define EM_ASM_1SRC2(op, name) \
327 	EM_ASM_START(name) \
328 	case 1: __EM_ASM_1(op##b, cl); break; \
329 	case 2: __EM_ASM_1(op##w, cx); break; \
330 	case 4: __EM_ASM_1(op##l, ecx); break; \
331 	ON64(case 8: __EM_ASM_1(op##q, rcx); break;) \
332 	EM_ASM_END
333 
334 /* 1-operand, using "c" (src2) with exception */
335 #define EM_ASM_1SRC2EX(op, name) \
336 	EM_ASM_START(name) \
337 	case 1: __EM_ASM_1_EX(op##b, cl); break; \
338 	case 2: __EM_ASM_1_EX(op##w, cx); break; \
339 	case 4: __EM_ASM_1_EX(op##l, ecx); break; \
340 	ON64(case 8: __EM_ASM_1_EX(op##q, rcx); break;) \
341 	EM_ASM_END
342 
343 /* 2-operand, using "a" (dst), "d" (src) */
344 #define EM_ASM_2(op) \
345 	EM_ASM_START(op) \
346 	case 1: __EM_ASM_2(op##b, al, dl); break; \
347 	case 2: __EM_ASM_2(op##w, ax, dx); break; \
348 	case 4: __EM_ASM_2(op##l, eax, edx); break; \
349 	ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \
350 	EM_ASM_END
351 
352 /* 2-operand, reversed */
353 #define EM_ASM_2R(op, name) \
354 	EM_ASM_START(name) \
355 	case 1: __EM_ASM_2(op##b, dl, al); break; \
356 	case 2: __EM_ASM_2(op##w, dx, ax); break; \
357 	case 4: __EM_ASM_2(op##l, edx, eax); break; \
358 	ON64(case 8: __EM_ASM_2(op##q, rdx, rax); break;) \
359 	EM_ASM_END
360 
361 /* 2-operand, word only (no byte op) */
362 #define EM_ASM_2W(op) \
363 	EM_ASM_START(op) \
364 	case 1: break; \
365 	case 2: __EM_ASM_2(op##w, ax, dx); break; \
366 	case 4: __EM_ASM_2(op##l, eax, edx); break; \
367 	ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \
368 	EM_ASM_END
369 
370 /* 2-operand, using "a" (dst) and CL (src2) */
371 #define EM_ASM_2CL(op) \
372 	EM_ASM_START(op) \
373 	case 1: __EM_ASM_2(op##b, al, cl); break; \
374 	case 2: __EM_ASM_2(op##w, ax, cl); break; \
375 	case 4: __EM_ASM_2(op##l, eax, cl); break; \
376 	ON64(case 8: __EM_ASM_2(op##q, rax, cl); break;) \
377 	EM_ASM_END
378 
379 /* 3-operand, using "a" (dst), "d" (src) and CL (src2) */
380 #define EM_ASM_3WCL(op) \
381 	EM_ASM_START(op) \
382 	case 1: break; \
383 	case 2: __EM_ASM_3(op##w, ax, dx, cl); break; \
384 	case 4: __EM_ASM_3(op##l, eax, edx, cl); break; \
385 	ON64(case 8: __EM_ASM_3(op##q, rax, rdx, cl); break;) \
386 	EM_ASM_END
387 
388 static int em_salc(struct x86_emulate_ctxt *ctxt)
389 {
390 	/*
391 	 * Set AL 0xFF if CF is set, or 0x00 when clear.
392 	 */
393 	ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF);
394 	return X86EMUL_CONTINUE;
395 }
396 
397 /*
398  * XXX: inoutclob user must know where the argument is being expanded.
399  *      Using asm goto would allow us to remove _fault.
400  */
401 #define asm_safe(insn, inoutclob...) \
402 ({ \
403 	int _fault = 0; \
404  \
405 	asm volatile("1:" insn "\n" \
406 	             "2:\n" \
407 		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
408 	             : [_fault] "+r"(_fault) inoutclob ); \
409  \
410 	_fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
411 })
412 
413 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
414 				    enum x86_intercept intercept,
415 				    enum x86_intercept_stage stage)
416 {
417 	struct x86_instruction_info info = {
418 		.intercept  = intercept,
419 		.rep_prefix = ctxt->rep_prefix,
420 		.modrm_mod  = ctxt->modrm_mod,
421 		.modrm_reg  = ctxt->modrm_reg,
422 		.modrm_rm   = ctxt->modrm_rm,
423 		.src_val    = ctxt->src.val64,
424 		.dst_val    = ctxt->dst.val64,
425 		.src_bytes  = ctxt->src.bytes,
426 		.dst_bytes  = ctxt->dst.bytes,
427 		.src_type   = ctxt->src.type,
428 		.dst_type   = ctxt->dst.type,
429 		.ad_bytes   = ctxt->ad_bytes,
430 		.rip	    = ctxt->eip,
431 		.next_rip   = ctxt->_eip,
432 	};
433 
434 	return ctxt->ops->intercept(ctxt, &info, stage);
435 }
436 
437 static void assign_masked(ulong *dest, ulong src, ulong mask)
438 {
439 	*dest = (*dest & ~mask) | (src & mask);
440 }
441 
442 static void assign_register(unsigned long *reg, u64 val, int bytes)
443 {
444 	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
445 	switch (bytes) {
446 	case 1:
447 		*(u8 *)reg = (u8)val;
448 		break;
449 	case 2:
450 		*(u16 *)reg = (u16)val;
451 		break;
452 	case 4:
453 		*reg = (u32)val;
454 		break;	/* 64b: zero-extend */
455 	case 8:
456 		*reg = val;
457 		break;
458 	}
459 }
460 
461 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
462 {
463 	return (1UL << (ctxt->ad_bytes << 3)) - 1;
464 }
465 
466 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
467 {
468 	u16 sel;
469 	struct desc_struct ss;
470 
471 	if (ctxt->mode == X86EMUL_MODE_PROT64)
472 		return ~0UL;
473 	ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
474 	return ~0U >> ((ss.d ^ 1) * 16);  /* d=0: 0xffff; d=1: 0xffffffff */
475 }
476 
477 static int stack_size(struct x86_emulate_ctxt *ctxt)
478 {
479 	return (__fls(stack_mask(ctxt)) + 1) >> 3;
480 }
481 
482 /* Access/update address held in a register, based on addressing mode. */
483 static inline unsigned long
484 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
485 {
486 	if (ctxt->ad_bytes == sizeof(unsigned long))
487 		return reg;
488 	else
489 		return reg & ad_mask(ctxt);
490 }
491 
492 static inline unsigned long
493 register_address(struct x86_emulate_ctxt *ctxt, int reg)
494 {
495 	return address_mask(ctxt, reg_read(ctxt, reg));
496 }
497 
498 static void masked_increment(ulong *reg, ulong mask, int inc)
499 {
500 	assign_masked(reg, *reg + inc, mask);
501 }
502 
503 static inline void
504 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
505 {
506 	ulong *preg = reg_rmw(ctxt, reg);
507 
508 	assign_register(preg, *preg + inc, ctxt->ad_bytes);
509 }
510 
511 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
512 {
513 	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
514 }
515 
516 static u32 desc_limit_scaled(struct desc_struct *desc)
517 {
518 	u32 limit = get_desc_limit(desc);
519 
520 	return desc->g ? (limit << 12) | 0xfff : limit;
521 }
522 
523 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
524 {
525 	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
526 		return 0;
527 
528 	return ctxt->ops->get_cached_segment_base(ctxt, seg);
529 }
530 
531 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
532 			     u32 error, bool valid)
533 {
534 	if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
535 		return X86EMUL_UNHANDLEABLE;
536 
537 	ctxt->exception.vector = vec;
538 	ctxt->exception.error_code = error;
539 	ctxt->exception.error_code_valid = valid;
540 	return X86EMUL_PROPAGATE_FAULT;
541 }
542 
543 static int emulate_db(struct x86_emulate_ctxt *ctxt, unsigned long dr6)
544 {
545 	ctxt->exception.dr6 = dr6;
546 	return emulate_exception(ctxt, DB_VECTOR, 0, false);
547 }
548 
549 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
550 {
551 	return emulate_exception(ctxt, GP_VECTOR, err, true);
552 }
553 
554 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
555 {
556 	return emulate_exception(ctxt, SS_VECTOR, err, true);
557 }
558 
559 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
560 {
561 	return emulate_exception(ctxt, UD_VECTOR, 0, false);
562 }
563 
564 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
565 {
566 	return emulate_exception(ctxt, TS_VECTOR, err, true);
567 }
568 
569 static int emulate_de(struct x86_emulate_ctxt *ctxt)
570 {
571 	return emulate_exception(ctxt, DE_VECTOR, 0, false);
572 }
573 
574 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
575 {
576 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
577 }
578 
579 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
580 {
581 	u16 selector;
582 	struct desc_struct desc;
583 
584 	ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
585 	return selector;
586 }
587 
588 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
589 				 unsigned seg)
590 {
591 	u16 dummy;
592 	u32 base3;
593 	struct desc_struct desc;
594 
595 	ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
596 	ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
597 }
598 
599 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
600 {
601 	return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
602 }
603 
604 static inline bool emul_is_noncanonical_address(u64 la,
605 						struct x86_emulate_ctxt *ctxt,
606 						unsigned int flags)
607 {
608 	return !ctxt->ops->is_canonical_addr(ctxt, la, flags);
609 }
610 
611 /*
612  * x86 defines three classes of vector instructions: explicitly
613  * aligned, explicitly unaligned, and the rest, which change behaviour
614  * depending on whether they're AVX encoded or not.
615  *
616  * Also included is CMPXCHG16B which is not a vector instruction, yet it is
617  * subject to the same check.  FXSAVE and FXRSTOR are checked here too as their
618  * 512 bytes of data must be aligned to a 16 byte boundary.
619  */
620 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
621 {
622 	u64 alignment = ctxt->d & AlignMask;
623 
624 	if (likely(size < 16))
625 		return 1;
626 
627 	switch (alignment) {
628 	case Unaligned:
629 		return 1;
630 	case Aligned16:
631 		return 16;
632 	case Aligned:
633 	default:
634 		return size;
635 	}
636 }
637 
638 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
639 				       struct segmented_address addr,
640 				       unsigned *max_size, unsigned size,
641 				       enum x86emul_mode mode, ulong *linear,
642 				       unsigned int flags)
643 {
644 	struct desc_struct desc;
645 	bool usable;
646 	ulong la;
647 	u32 lim;
648 	u16 sel;
649 	u8  va_bits;
650 
651 	la = seg_base(ctxt, addr.seg) + addr.ea;
652 	*max_size = 0;
653 	switch (mode) {
654 	case X86EMUL_MODE_PROT64:
655 		*linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
656 		va_bits = ctxt_virt_addr_bits(ctxt);
657 		if (!__is_canonical_address(la, va_bits))
658 			goto bad;
659 
660 		*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
661 		if (size > *max_size)
662 			goto bad;
663 		break;
664 	default:
665 		*linear = la = (u32)la;
666 		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
667 						addr.seg);
668 		if (!usable)
669 			goto bad;
670 		/* code segment in protected mode or read-only data segment */
671 		if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
672 		    (flags & X86EMUL_F_WRITE))
673 			goto bad;
674 		/* unreadable code segment */
675 		if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
676 			goto bad;
677 		lim = desc_limit_scaled(&desc);
678 		if (!(desc.type & 8) && (desc.type & 4)) {
679 			/* expand-down segment */
680 			if (addr.ea <= lim)
681 				goto bad;
682 			lim = desc.d ? 0xffffffff : 0xffff;
683 		}
684 		if (addr.ea > lim)
685 			goto bad;
686 		if (lim == 0xffffffff)
687 			*max_size = ~0u;
688 		else {
689 			*max_size = (u64)lim + 1 - addr.ea;
690 			if (size > *max_size)
691 				goto bad;
692 		}
693 		break;
694 	}
695 	if (la & (insn_alignment(ctxt, size) - 1))
696 		return emulate_gp(ctxt, 0);
697 	return X86EMUL_CONTINUE;
698 bad:
699 	if (addr.seg == VCPU_SREG_SS)
700 		return emulate_ss(ctxt, 0);
701 	else
702 		return emulate_gp(ctxt, 0);
703 }
704 
705 static int linearize(struct x86_emulate_ctxt *ctxt,
706 		     struct segmented_address addr,
707 		     unsigned size, bool write,
708 		     ulong *linear)
709 {
710 	unsigned max_size;
711 	return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
712 			   write ? X86EMUL_F_WRITE : 0);
713 }
714 
715 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
716 {
717 	ulong linear;
718 	int rc;
719 	unsigned max_size;
720 	struct segmented_address addr = { .seg = VCPU_SREG_CS,
721 					   .ea = dst };
722 
723 	if (ctxt->op_bytes != sizeof(unsigned long))
724 		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
725 	rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
726 			 X86EMUL_F_FETCH);
727 	if (rc == X86EMUL_CONTINUE)
728 		ctxt->_eip = addr.ea;
729 	return rc;
730 }
731 
732 static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
733 {
734 	u64 efer;
735 	struct desc_struct cs;
736 	u16 selector;
737 	u32 base3;
738 
739 	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
740 
741 	if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
742 		/* Real mode. cpu must not have long mode active */
743 		if (efer & EFER_LMA)
744 			return X86EMUL_UNHANDLEABLE;
745 		ctxt->mode = X86EMUL_MODE_REAL;
746 		return X86EMUL_CONTINUE;
747 	}
748 
749 	if (ctxt->eflags & X86_EFLAGS_VM) {
750 		/* Protected/VM86 mode. cpu must not have long mode active */
751 		if (efer & EFER_LMA)
752 			return X86EMUL_UNHANDLEABLE;
753 		ctxt->mode = X86EMUL_MODE_VM86;
754 		return X86EMUL_CONTINUE;
755 	}
756 
757 	if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
758 		return X86EMUL_UNHANDLEABLE;
759 
760 	if (efer & EFER_LMA) {
761 		if (cs.l) {
762 			/* Proper long mode */
763 			ctxt->mode = X86EMUL_MODE_PROT64;
764 		} else if (cs.d) {
765 			/* 32 bit compatibility mode*/
766 			ctxt->mode = X86EMUL_MODE_PROT32;
767 		} else {
768 			ctxt->mode = X86EMUL_MODE_PROT16;
769 		}
770 	} else {
771 		/* Legacy 32 bit / 16 bit mode */
772 		ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
773 	}
774 
775 	return X86EMUL_CONTINUE;
776 }
777 
778 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
779 {
780 	return assign_eip(ctxt, dst);
781 }
782 
783 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
784 {
785 	int rc = emulator_recalc_and_set_mode(ctxt);
786 
787 	if (rc != X86EMUL_CONTINUE)
788 		return rc;
789 
790 	return assign_eip(ctxt, dst);
791 }
792 
793 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
794 {
795 	return assign_eip_near(ctxt, ctxt->_eip + rel);
796 }
797 
798 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
799 			      void *data, unsigned size)
800 {
801 	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
802 }
803 
804 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
805 			       ulong linear, void *data,
806 			       unsigned int size)
807 {
808 	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
809 }
810 
811 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
812 			      struct segmented_address addr,
813 			      void *data,
814 			      unsigned size)
815 {
816 	int rc;
817 	ulong linear;
818 
819 	rc = linearize(ctxt, addr, size, false, &linear);
820 	if (rc != X86EMUL_CONTINUE)
821 		return rc;
822 	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
823 }
824 
825 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
826 			       struct segmented_address addr,
827 			       void *data,
828 			       unsigned int size)
829 {
830 	int rc;
831 	ulong linear;
832 
833 	rc = linearize(ctxt, addr, size, true, &linear);
834 	if (rc != X86EMUL_CONTINUE)
835 		return rc;
836 	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
837 }
838 
839 /*
840  * Prefetch the remaining bytes of the instruction without crossing page
841  * boundary if they are not in fetch_cache yet.
842  */
843 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
844 {
845 	int rc;
846 	unsigned size, max_size;
847 	unsigned long linear;
848 	int cur_size = ctxt->fetch.end - ctxt->fetch.data;
849 	struct segmented_address addr = { .seg = VCPU_SREG_CS,
850 					   .ea = ctxt->eip + cur_size };
851 
852 	/*
853 	 * We do not know exactly how many bytes will be needed, and
854 	 * __linearize is expensive, so fetch as much as possible.  We
855 	 * just have to avoid going beyond the 15 byte limit, the end
856 	 * of the segment, or the end of the page.
857 	 *
858 	 * __linearize is called with size 0 so that it does not do any
859 	 * boundary check itself.  Instead, we use max_size to check
860 	 * against op_size.
861 	 */
862 	rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
863 			 X86EMUL_F_FETCH);
864 	if (unlikely(rc != X86EMUL_CONTINUE))
865 		return rc;
866 
867 	size = min_t(unsigned, 15UL ^ cur_size, max_size);
868 	size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
869 
870 	/*
871 	 * One instruction can only straddle two pages,
872 	 * and one has been loaded at the beginning of
873 	 * x86_decode_insn.  So, if not enough bytes
874 	 * still, we must have hit the 15-byte boundary.
875 	 */
876 	if (unlikely(size < op_size))
877 		return emulate_gp(ctxt, 0);
878 
879 	rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
880 			      size, &ctxt->exception);
881 	if (unlikely(rc != X86EMUL_CONTINUE))
882 		return rc;
883 	ctxt->fetch.end += size;
884 	return X86EMUL_CONTINUE;
885 }
886 
887 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
888 					       unsigned size)
889 {
890 	unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
891 
892 	if (unlikely(done_size < size))
893 		return __do_insn_fetch_bytes(ctxt, size - done_size);
894 	else
895 		return X86EMUL_CONTINUE;
896 }
897 
898 /* Fetch next part of the instruction being emulated. */
899 #define insn_fetch(_type, _ctxt)					\
900 ({	_type _x;							\
901 									\
902 	rc = do_insn_fetch_bytes(_ctxt, sizeof(_type));			\
903 	if (rc != X86EMUL_CONTINUE)					\
904 		goto done;						\
905 	ctxt->_eip += sizeof(_type);					\
906 	memcpy(&_x, ctxt->fetch.ptr, sizeof(_type));			\
907 	ctxt->fetch.ptr += sizeof(_type);				\
908 	_x;								\
909 })
910 
911 #define insn_fetch_arr(_arr, _size, _ctxt)				\
912 ({									\
913 	rc = do_insn_fetch_bytes(_ctxt, _size);				\
914 	if (rc != X86EMUL_CONTINUE)					\
915 		goto done;						\
916 	ctxt->_eip += (_size);						\
917 	memcpy(_arr, ctxt->fetch.ptr, _size);				\
918 	ctxt->fetch.ptr += (_size);					\
919 })
920 
921 /*
922  * Given the 'reg' portion of a ModRM byte, and a register block, return a
923  * pointer into the block that addresses the relevant register.
924  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
925  */
926 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
927 			     int byteop)
928 {
929 	void *p;
930 	int highbyte_regs = (ctxt->rex_prefix == REX_NONE) && byteop;
931 
932 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
933 		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
934 	else
935 		p = reg_rmw(ctxt, modrm_reg);
936 	return p;
937 }
938 
939 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
940 			   struct segmented_address addr,
941 			   u16 *size, unsigned long *address, int op_bytes)
942 {
943 	int rc;
944 
945 	if (op_bytes == 2)
946 		op_bytes = 3;
947 	*address = 0;
948 	rc = segmented_read_std(ctxt, addr, size, 2);
949 	if (rc != X86EMUL_CONTINUE)
950 		return rc;
951 	addr.ea += 2;
952 	rc = segmented_read_std(ctxt, addr, address, op_bytes);
953 	return rc;
954 }
955 
956 EM_ASM_2(add);
957 EM_ASM_2(or);
958 EM_ASM_2(adc);
959 EM_ASM_2(sbb);
960 EM_ASM_2(and);
961 EM_ASM_2(sub);
962 EM_ASM_2(xor);
963 EM_ASM_2(cmp);
964 EM_ASM_2(test);
965 EM_ASM_2(xadd);
966 
967 EM_ASM_1SRC2(mul, mul_ex);
968 EM_ASM_1SRC2(imul, imul_ex);
969 EM_ASM_1SRC2EX(div, div_ex);
970 EM_ASM_1SRC2EX(idiv, idiv_ex);
971 
972 EM_ASM_3WCL(shld);
973 EM_ASM_3WCL(shrd);
974 
975 EM_ASM_2W(imul);
976 
977 EM_ASM_1(not);
978 EM_ASM_1(neg);
979 EM_ASM_1(inc);
980 EM_ASM_1(dec);
981 
982 EM_ASM_2CL(rol);
983 EM_ASM_2CL(ror);
984 EM_ASM_2CL(rcl);
985 EM_ASM_2CL(rcr);
986 EM_ASM_2CL(shl);
987 EM_ASM_2CL(shr);
988 EM_ASM_2CL(sar);
989 
990 EM_ASM_2W(bsf);
991 EM_ASM_2W(bsr);
992 EM_ASM_2W(bt);
993 EM_ASM_2W(bts);
994 EM_ASM_2W(btr);
995 EM_ASM_2W(btc);
996 
997 EM_ASM_2R(cmp, cmp_r);
998 
999 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1000 {
1001 	/* If src is zero, do not writeback, but update flags */
1002 	if (ctxt->src.val == 0)
1003 		ctxt->dst.type = OP_NONE;
1004 	return em_bsf(ctxt);
1005 }
1006 
1007 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1008 {
1009 	/* If src is zero, do not writeback, but update flags */
1010 	if (ctxt->src.val == 0)
1011 		ctxt->dst.type = OP_NONE;
1012 	return em_bsr(ctxt);
1013 }
1014 
1015 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1016 {
1017 	return __emulate_cc(flags, condition & 0xf);
1018 }
1019 
1020 static void fetch_register_operand(struct operand *op)
1021 {
1022 	switch (op->bytes) {
1023 	case 1:
1024 		op->val = *(u8 *)op->addr.reg;
1025 		break;
1026 	case 2:
1027 		op->val = *(u16 *)op->addr.reg;
1028 		break;
1029 	case 4:
1030 		op->val = *(u32 *)op->addr.reg;
1031 		break;
1032 	case 8:
1033 		op->val = *(u64 *)op->addr.reg;
1034 		break;
1035 	}
1036 	op->orig_val = op->val;
1037 }
1038 
1039 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1040 {
1041 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1042 		return emulate_nm(ctxt);
1043 
1044 	kvm_fpu_get();
1045 	asm volatile("fninit");
1046 	kvm_fpu_put();
1047 	return X86EMUL_CONTINUE;
1048 }
1049 
1050 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1051 {
1052 	u16 fcw;
1053 
1054 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1055 		return emulate_nm(ctxt);
1056 
1057 	kvm_fpu_get();
1058 	asm volatile("fnstcw %0": "+m"(fcw));
1059 	kvm_fpu_put();
1060 
1061 	ctxt->dst.val = fcw;
1062 
1063 	return X86EMUL_CONTINUE;
1064 }
1065 
1066 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1067 {
1068 	u16 fsw;
1069 
1070 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1071 		return emulate_nm(ctxt);
1072 
1073 	kvm_fpu_get();
1074 	asm volatile("fnstsw %0": "+m"(fsw));
1075 	kvm_fpu_put();
1076 
1077 	ctxt->dst.val = fsw;
1078 
1079 	return X86EMUL_CONTINUE;
1080 }
1081 
1082 static void __decode_register_operand(struct x86_emulate_ctxt *ctxt,
1083 				      struct operand *op, int reg)
1084 {
1085 	if ((ctxt->d & Avx) && ctxt->op_bytes == 32) {
1086 		op->type = OP_YMM;
1087 		op->bytes = 32;
1088 		op->addr.xmm = reg;
1089 		kvm_read_avx_reg(reg, &op->vec_val2);
1090 		return;
1091 	}
1092 	if (ctxt->d & (Avx|Sse)) {
1093 		op->type = OP_XMM;
1094 		op->bytes = 16;
1095 		op->addr.xmm = reg;
1096 		kvm_read_sse_reg(reg, &op->vec_val);
1097 		return;
1098 	}
1099 	if (ctxt->d & Mmx) {
1100 		reg &= 7;
1101 		op->type = OP_MM;
1102 		op->bytes = 8;
1103 		op->addr.mm = reg;
1104 		return;
1105 	}
1106 
1107 	op->type = OP_REG;
1108 	op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1109 	op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1110 	fetch_register_operand(op);
1111 }
1112 
1113 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1114 				    struct operand *op)
1115 {
1116 	unsigned int reg;
1117 
1118 	if (ctxt->d & ModRM)
1119 		reg = ctxt->modrm_reg;
1120 	else
1121 		reg = (ctxt->b & 7) | (ctxt->rex_bits & REX_B ? 8 : 0);
1122 
1123 	__decode_register_operand(ctxt, op, reg);
1124 }
1125 
1126 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1127 {
1128 	if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1129 		ctxt->modrm_seg = VCPU_SREG_SS;
1130 }
1131 
1132 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1133 			struct operand *op)
1134 {
1135 	u8 sib;
1136 	int index_reg, base_reg, scale;
1137 	int rc = X86EMUL_CONTINUE;
1138 	ulong modrm_ea = 0;
1139 
1140 	ctxt->modrm_reg = (ctxt->rex_bits & REX_R ? 8 : 0);
1141 	index_reg = (ctxt->rex_bits & REX_X ? 8 : 0);
1142 	base_reg = (ctxt->rex_bits & REX_B ? 8 : 0);
1143 
1144 	ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1145 	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1146 	ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1147 	ctxt->modrm_seg = VCPU_SREG_DS;
1148 
1149 	if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1150 		__decode_register_operand(ctxt, op, ctxt->modrm_rm);
1151 		return rc;
1152 	}
1153 
1154 	op->type = OP_MEM;
1155 
1156 	if (ctxt->ad_bytes == 2) {
1157 		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1158 		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1159 		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1160 		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1161 
1162 		/* 16-bit ModR/M decode. */
1163 		switch (ctxt->modrm_mod) {
1164 		case 0:
1165 			if (ctxt->modrm_rm == 6)
1166 				modrm_ea += insn_fetch(u16, ctxt);
1167 			break;
1168 		case 1:
1169 			modrm_ea += insn_fetch(s8, ctxt);
1170 			break;
1171 		case 2:
1172 			modrm_ea += insn_fetch(u16, ctxt);
1173 			break;
1174 		}
1175 		switch (ctxt->modrm_rm) {
1176 		case 0:
1177 			modrm_ea += bx + si;
1178 			break;
1179 		case 1:
1180 			modrm_ea += bx + di;
1181 			break;
1182 		case 2:
1183 			modrm_ea += bp + si;
1184 			break;
1185 		case 3:
1186 			modrm_ea += bp + di;
1187 			break;
1188 		case 4:
1189 			modrm_ea += si;
1190 			break;
1191 		case 5:
1192 			modrm_ea += di;
1193 			break;
1194 		case 6:
1195 			if (ctxt->modrm_mod != 0)
1196 				modrm_ea += bp;
1197 			break;
1198 		case 7:
1199 			modrm_ea += bx;
1200 			break;
1201 		}
1202 		if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1203 		    (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1204 			ctxt->modrm_seg = VCPU_SREG_SS;
1205 		modrm_ea = (u16)modrm_ea;
1206 	} else {
1207 		/* 32/64-bit ModR/M decode. */
1208 		if ((ctxt->modrm_rm & 7) == 4) {
1209 			sib = insn_fetch(u8, ctxt);
1210 			index_reg |= (sib >> 3) & 7;
1211 			base_reg |= sib & 7;
1212 			scale = sib >> 6;
1213 
1214 			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1215 				modrm_ea += insn_fetch(s32, ctxt);
1216 			else {
1217 				modrm_ea += reg_read(ctxt, base_reg);
1218 				adjust_modrm_seg(ctxt, base_reg);
1219 				/* Increment ESP on POP [ESP] */
1220 				if ((ctxt->d & IncSP) &&
1221 				    base_reg == VCPU_REGS_RSP)
1222 					modrm_ea += ctxt->op_bytes;
1223 			}
1224 			if (index_reg != 4)
1225 				modrm_ea += reg_read(ctxt, index_reg) << scale;
1226 		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1227 			modrm_ea += insn_fetch(s32, ctxt);
1228 			if (ctxt->mode == X86EMUL_MODE_PROT64)
1229 				ctxt->rip_relative = 1;
1230 		} else {
1231 			base_reg = ctxt->modrm_rm;
1232 			modrm_ea += reg_read(ctxt, base_reg);
1233 			adjust_modrm_seg(ctxt, base_reg);
1234 		}
1235 		switch (ctxt->modrm_mod) {
1236 		case 1:
1237 			modrm_ea += insn_fetch(s8, ctxt);
1238 			break;
1239 		case 2:
1240 			modrm_ea += insn_fetch(s32, ctxt);
1241 			break;
1242 		}
1243 	}
1244 	op->addr.mem.ea = modrm_ea;
1245 	if (ctxt->ad_bytes != 8)
1246 		ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1247 
1248 done:
1249 	return rc;
1250 }
1251 
1252 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1253 		      struct operand *op)
1254 {
1255 	int rc = X86EMUL_CONTINUE;
1256 
1257 	op->type = OP_MEM;
1258 	switch (ctxt->ad_bytes) {
1259 	case 2:
1260 		op->addr.mem.ea = insn_fetch(u16, ctxt);
1261 		break;
1262 	case 4:
1263 		op->addr.mem.ea = insn_fetch(u32, ctxt);
1264 		break;
1265 	case 8:
1266 		op->addr.mem.ea = insn_fetch(u64, ctxt);
1267 		break;
1268 	}
1269 done:
1270 	return rc;
1271 }
1272 
1273 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1274 {
1275 	long sv = 0, mask;
1276 
1277 	if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1278 		mask = ~((long)ctxt->dst.bytes * 8 - 1);
1279 
1280 		if (ctxt->src.bytes == 2)
1281 			sv = (s16)ctxt->src.val & (s16)mask;
1282 		else if (ctxt->src.bytes == 4)
1283 			sv = (s32)ctxt->src.val & (s32)mask;
1284 		else
1285 			sv = (s64)ctxt->src.val & (s64)mask;
1286 
1287 		ctxt->dst.addr.mem.ea = address_mask(ctxt,
1288 					   ctxt->dst.addr.mem.ea + (sv >> 3));
1289 	}
1290 
1291 	/* only subword offset */
1292 	ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1293 }
1294 
1295 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1296 			 unsigned long addr, void *dest, unsigned size)
1297 {
1298 	int rc;
1299 	struct read_cache *mc = &ctxt->mem_read;
1300 
1301 	/*
1302 	 * If the read gets a cache hit, simply copy the value from the cache.
1303 	 * A "hit" here means that there is unused data in the cache, i.e. when
1304 	 * re-emulating an instruction to complete a userspace exit, KVM relies
1305 	 * on "no decode" to ensure the instruction is re-emulated in the same
1306 	 * sequence, so that multiple reads are fulfilled in the correct order.
1307 	 */
1308 	if (mc->pos < mc->end)
1309 		goto read_cached;
1310 
1311 	if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1312 		return X86EMUL_UNHANDLEABLE;
1313 
1314 	/*
1315 	 * Route all reads to the cache.  This allows @dest to be an on-stack
1316 	 * variable without triggering use-after-free if KVM needs to exit to
1317 	 * userspace to handle an MMIO read (the MMIO fragment will point at
1318 	 * the current location in the cache).
1319 	 */
1320 	rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1321 				      &ctxt->exception);
1322 	if (rc != X86EMUL_CONTINUE)
1323 		return rc;
1324 
1325 	mc->end += size;
1326 
1327 read_cached:
1328 	memcpy(dest, mc->data + mc->pos, size);
1329 	mc->pos += size;
1330 	return X86EMUL_CONTINUE;
1331 }
1332 
1333 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1334 			  struct segmented_address addr,
1335 			  void *data,
1336 			  unsigned size)
1337 {
1338 	int rc;
1339 	ulong linear;
1340 
1341 	rc = linearize(ctxt, addr, size, false, &linear);
1342 	if (rc != X86EMUL_CONTINUE)
1343 		return rc;
1344 	return read_emulated(ctxt, linear, data, size);
1345 }
1346 
1347 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1348 			   struct segmented_address addr,
1349 			   const void *data,
1350 			   unsigned size)
1351 {
1352 	int rc;
1353 	ulong linear;
1354 
1355 	rc = linearize(ctxt, addr, size, true, &linear);
1356 	if (rc != X86EMUL_CONTINUE)
1357 		return rc;
1358 	return ctxt->ops->write_emulated(ctxt, linear, data, size,
1359 					 &ctxt->exception);
1360 }
1361 
1362 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1363 			     struct segmented_address addr,
1364 			     const void *orig_data, const void *data,
1365 			     unsigned size)
1366 {
1367 	int rc;
1368 	ulong linear;
1369 
1370 	rc = linearize(ctxt, addr, size, true, &linear);
1371 	if (rc != X86EMUL_CONTINUE)
1372 		return rc;
1373 	return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1374 					   size, &ctxt->exception);
1375 }
1376 
1377 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1378 			   unsigned int size, unsigned short port,
1379 			   void *dest)
1380 {
1381 	struct read_cache *rc = &ctxt->io_read;
1382 
1383 	if (rc->pos == rc->end) { /* refill pio read ahead */
1384 		unsigned int in_page, n;
1385 		unsigned int count = ctxt->rep_prefix ?
1386 			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1387 		in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1388 			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1389 			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1390 		n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1391 		if (n == 0)
1392 			n = 1;
1393 		rc->pos = rc->end = 0;
1394 		if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1395 			return 0;
1396 		rc->end = n * size;
1397 	}
1398 
1399 	if (ctxt->rep_prefix && (ctxt->d & String) &&
1400 	    !(ctxt->eflags & X86_EFLAGS_DF)) {
1401 		ctxt->dst.data = rc->data + rc->pos;
1402 		ctxt->dst.type = OP_MEM_STR;
1403 		ctxt->dst.count = (rc->end - rc->pos) / size;
1404 		rc->pos = rc->end;
1405 	} else {
1406 		memcpy(dest, rc->data + rc->pos, size);
1407 		rc->pos += size;
1408 	}
1409 	return 1;
1410 }
1411 
1412 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1413 				     u16 index, struct desc_struct *desc)
1414 {
1415 	struct desc_ptr dt;
1416 	ulong addr;
1417 
1418 	ctxt->ops->get_idt(ctxt, &dt);
1419 
1420 	if (dt.size < index * 8 + 7)
1421 		return emulate_gp(ctxt, index << 3 | 0x2);
1422 
1423 	addr = dt.address + index * 8;
1424 	return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1425 }
1426 
1427 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1428 				     u16 selector, struct desc_ptr *dt)
1429 {
1430 	const struct x86_emulate_ops *ops = ctxt->ops;
1431 	u32 base3 = 0;
1432 
1433 	if (selector & 1 << 2) {
1434 		struct desc_struct desc;
1435 		u16 sel;
1436 
1437 		memset(dt, 0, sizeof(*dt));
1438 		if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1439 				      VCPU_SREG_LDTR))
1440 			return;
1441 
1442 		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1443 		dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1444 	} else
1445 		ops->get_gdt(ctxt, dt);
1446 }
1447 
1448 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1449 			      u16 selector, ulong *desc_addr_p)
1450 {
1451 	struct desc_ptr dt;
1452 	u16 index = selector >> 3;
1453 	ulong addr;
1454 
1455 	get_descriptor_table_ptr(ctxt, selector, &dt);
1456 
1457 	if (dt.size < index * 8 + 7)
1458 		return emulate_gp(ctxt, selector & 0xfffc);
1459 
1460 	addr = dt.address + index * 8;
1461 
1462 #ifdef CONFIG_X86_64
1463 	if (addr >> 32 != 0) {
1464 		u64 efer = 0;
1465 
1466 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1467 		if (!(efer & EFER_LMA))
1468 			addr &= (u32)-1;
1469 	}
1470 #endif
1471 
1472 	*desc_addr_p = addr;
1473 	return X86EMUL_CONTINUE;
1474 }
1475 
1476 /* allowed just for 8 bytes segments */
1477 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1478 				   u16 selector, struct desc_struct *desc,
1479 				   ulong *desc_addr_p)
1480 {
1481 	int rc;
1482 
1483 	rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1484 	if (rc != X86EMUL_CONTINUE)
1485 		return rc;
1486 
1487 	return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1488 }
1489 
1490 /* allowed just for 8 bytes segments */
1491 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1492 				    u16 selector, struct desc_struct *desc)
1493 {
1494 	int rc;
1495 	ulong addr;
1496 
1497 	rc = get_descriptor_ptr(ctxt, selector, &addr);
1498 	if (rc != X86EMUL_CONTINUE)
1499 		return rc;
1500 
1501 	return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1502 }
1503 
1504 static bool emulator_is_ssp_invalid(struct x86_emulate_ctxt *ctxt, u8 cpl)
1505 {
1506 	const u32 MSR_IA32_X_CET = cpl == 3 ? MSR_IA32_U_CET : MSR_IA32_S_CET;
1507 	u64 efer = 0, cet = 0, ssp = 0;
1508 
1509 	if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET))
1510 		return false;
1511 
1512 	if (ctxt->ops->get_msr(ctxt, MSR_EFER, &efer))
1513 		return true;
1514 
1515 	/* SSP is guaranteed to be valid if the vCPU was already in 32-bit mode. */
1516 	if (!(efer & EFER_LMA))
1517 		return false;
1518 
1519 	if (ctxt->ops->get_msr(ctxt, MSR_IA32_X_CET, &cet))
1520 		return true;
1521 
1522 	if (!(cet & CET_SHSTK_EN))
1523 		return false;
1524 
1525 	if (ctxt->ops->get_msr(ctxt, MSR_KVM_INTERNAL_GUEST_SSP, &ssp))
1526 		return true;
1527 
1528 	/*
1529 	 * On transfer from 64-bit mode to compatibility mode, SSP[63:32] must
1530 	 * be 0, i.e. SSP must be a 32-bit value outside of 64-bit mode.
1531 	 */
1532 	return ssp >> 32;
1533 }
1534 
1535 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1536 				     u16 selector, int seg, u8 cpl,
1537 				     enum x86_transfer_type transfer,
1538 				     struct desc_struct *desc)
1539 {
1540 	struct desc_struct seg_desc, old_desc;
1541 	u8 dpl, rpl;
1542 	unsigned err_vec = GP_VECTOR;
1543 	u32 err_code = 0;
1544 	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1545 	ulong desc_addr;
1546 	int ret;
1547 	u16 dummy;
1548 	u32 base3 = 0;
1549 
1550 	memset(&seg_desc, 0, sizeof(seg_desc));
1551 
1552 	if (ctxt->mode == X86EMUL_MODE_REAL) {
1553 		/* set real mode segment descriptor (keep limit etc. for
1554 		 * unreal mode) */
1555 		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1556 		set_desc_base(&seg_desc, selector << 4);
1557 		goto load;
1558 	} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1559 		/* VM86 needs a clean new segment descriptor */
1560 		set_desc_base(&seg_desc, selector << 4);
1561 		set_desc_limit(&seg_desc, 0xffff);
1562 		seg_desc.type = 3;
1563 		seg_desc.p = 1;
1564 		seg_desc.s = 1;
1565 		seg_desc.dpl = 3;
1566 		goto load;
1567 	}
1568 
1569 	rpl = selector & 3;
1570 
1571 	/* TR should be in GDT only */
1572 	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1573 		goto exception;
1574 
1575 	/* NULL selector is not valid for TR, CS and (except for long mode) SS */
1576 	if (null_selector) {
1577 		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1578 			goto exception;
1579 
1580 		if (seg == VCPU_SREG_SS) {
1581 			if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1582 				goto exception;
1583 
1584 			/*
1585 			 * ctxt->ops->set_segment expects the CPL to be in
1586 			 * SS.DPL, so fake an expand-up 32-bit data segment.
1587 			 */
1588 			seg_desc.type = 3;
1589 			seg_desc.p = 1;
1590 			seg_desc.s = 1;
1591 			seg_desc.dpl = cpl;
1592 			seg_desc.d = 1;
1593 			seg_desc.g = 1;
1594 		}
1595 
1596 		/* Skip all following checks */
1597 		goto load;
1598 	}
1599 
1600 	ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1601 	if (ret != X86EMUL_CONTINUE)
1602 		return ret;
1603 
1604 	err_code = selector & 0xfffc;
1605 	err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1606 							   GP_VECTOR;
1607 
1608 	/* can't load system descriptor into segment selector */
1609 	if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1610 		if (transfer == X86_TRANSFER_CALL_JMP)
1611 			return X86EMUL_UNHANDLEABLE;
1612 		goto exception;
1613 	}
1614 
1615 	dpl = seg_desc.dpl;
1616 
1617 	switch (seg) {
1618 	case VCPU_SREG_SS:
1619 		/*
1620 		 * segment is not a writable data segment or segment
1621 		 * selector's RPL != CPL or DPL != CPL
1622 		 */
1623 		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1624 			goto exception;
1625 		break;
1626 	case VCPU_SREG_CS:
1627 		/*
1628 		 * KVM uses "none" when loading CS as part of emulating Real
1629 		 * Mode exceptions and IRET (handled above).  In all other
1630 		 * cases, loading CS without a control transfer is a KVM bug.
1631 		 */
1632 		if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1633 			goto exception;
1634 
1635 		if (!(seg_desc.type & 8))
1636 			goto exception;
1637 
1638 		if (transfer == X86_TRANSFER_RET) {
1639 			/* RET can never return to an inner privilege level. */
1640 			if (rpl < cpl)
1641 				goto exception;
1642 			/* Outer-privilege level return is not implemented */
1643 			if (rpl > cpl)
1644 				return X86EMUL_UNHANDLEABLE;
1645 		}
1646 		if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1647 			if (seg_desc.type & 4) {
1648 				/* conforming */
1649 				if (dpl > rpl)
1650 					goto exception;
1651 			} else {
1652 				/* nonconforming */
1653 				if (dpl != rpl)
1654 					goto exception;
1655 			}
1656 		} else { /* X86_TRANSFER_CALL_JMP */
1657 			if (seg_desc.type & 4) {
1658 				/* conforming */
1659 				if (dpl > cpl)
1660 					goto exception;
1661 			} else {
1662 				/* nonconforming */
1663 				if (rpl > cpl || dpl != cpl)
1664 					goto exception;
1665 			}
1666 		}
1667 		/* in long-mode d/b must be clear if l is set */
1668 		if (seg_desc.d && seg_desc.l) {
1669 			u64 efer = 0;
1670 
1671 			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1672 			if (efer & EFER_LMA)
1673 				goto exception;
1674 		}
1675 		if (!seg_desc.l && emulator_is_ssp_invalid(ctxt, cpl)) {
1676 			err_code = 0;
1677 			goto exception;
1678 		}
1679 
1680 		/* CS(RPL) <- CPL */
1681 		selector = (selector & 0xfffc) | cpl;
1682 		break;
1683 	case VCPU_SREG_TR:
1684 		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1685 			goto exception;
1686 		break;
1687 	case VCPU_SREG_LDTR:
1688 		if (seg_desc.s || seg_desc.type != 2)
1689 			goto exception;
1690 		break;
1691 	default: /*  DS, ES, FS, or GS */
1692 		/*
1693 		 * segment is not a data or readable code segment or
1694 		 * ((segment is a data or nonconforming code segment)
1695 		 * and ((RPL > DPL) or (CPL > DPL)))
1696 		 */
1697 		if ((seg_desc.type & 0xa) == 0x8 ||
1698 		    (((seg_desc.type & 0xc) != 0xc) &&
1699 		     (rpl > dpl || cpl > dpl)))
1700 			goto exception;
1701 		break;
1702 	}
1703 
1704 	if (!seg_desc.p) {
1705 		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1706 		goto exception;
1707 	}
1708 
1709 	if (seg_desc.s) {
1710 		/* mark segment as accessed */
1711 		if (!(seg_desc.type & 1)) {
1712 			seg_desc.type |= 1;
1713 			ret = write_segment_descriptor(ctxt, selector,
1714 						       &seg_desc);
1715 			if (ret != X86EMUL_CONTINUE)
1716 				return ret;
1717 		}
1718 	} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1719 		ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1720 		if (ret != X86EMUL_CONTINUE)
1721 			return ret;
1722 		if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1723 						 ((u64)base3 << 32), ctxt,
1724 						 X86EMUL_F_DT_LOAD))
1725 			return emulate_gp(ctxt, err_code);
1726 	}
1727 
1728 	if (seg == VCPU_SREG_TR) {
1729 		old_desc = seg_desc;
1730 		seg_desc.type |= 2; /* busy */
1731 		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1732 						  sizeof(seg_desc), &ctxt->exception);
1733 		if (ret != X86EMUL_CONTINUE)
1734 			return ret;
1735 	}
1736 load:
1737 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1738 	if (desc)
1739 		*desc = seg_desc;
1740 	return X86EMUL_CONTINUE;
1741 exception:
1742 	return emulate_exception(ctxt, err_vec, err_code, true);
1743 }
1744 
1745 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1746 				   u16 selector, int seg)
1747 {
1748 	u8 cpl = ctxt->ops->cpl(ctxt);
1749 
1750 	/*
1751 	 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1752 	 * they can load it at CPL<3 (Intel's manual says only LSS can,
1753 	 * but it's wrong).
1754 	 *
1755 	 * However, the Intel manual says that putting IST=1/DPL=3 in
1756 	 * an interrupt gate will result in SS=3 (the AMD manual instead
1757 	 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1758 	 * and only forbid it here.
1759 	 */
1760 	if (seg == VCPU_SREG_SS && selector == 3 &&
1761 	    ctxt->mode == X86EMUL_MODE_PROT64)
1762 		return emulate_exception(ctxt, GP_VECTOR, 0, true);
1763 
1764 	return __load_segment_descriptor(ctxt, selector, seg, cpl,
1765 					 X86_TRANSFER_NONE, NULL);
1766 }
1767 
1768 static void write_register_operand(struct operand *op)
1769 {
1770 	return assign_register(op->addr.reg, op->val, op->bytes);
1771 }
1772 
1773 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1774 {
1775 	switch (op->type) {
1776 	case OP_REG:
1777 		write_register_operand(op);
1778 		break;
1779 	case OP_MEM:
1780 		if (ctxt->lock_prefix)
1781 			return segmented_cmpxchg(ctxt,
1782 						 op->addr.mem,
1783 						 &op->orig_val,
1784 						 &op->val,
1785 						 op->bytes);
1786 		else
1787 			return segmented_write(ctxt,
1788 					       op->addr.mem,
1789 					       &op->val,
1790 					       op->bytes);
1791 	case OP_MEM_STR:
1792 		return segmented_write(ctxt,
1793 				       op->addr.mem,
1794 				       op->data,
1795 				       op->bytes * op->count);
1796 	case OP_XMM:
1797 		if (!(ctxt->d & Avx)) {
1798 			kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1799 			break;
1800 		}
1801 		/* full YMM write but with high bytes cleared */
1802 		memset(op->valptr + 16, 0, 16);
1803 		fallthrough;
1804 	case OP_YMM:
1805 		kvm_write_avx_reg(op->addr.xmm, &op->vec_val2);
1806 		break;
1807 	case OP_MM:
1808 		kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1809 		break;
1810 	case OP_NONE:
1811 		/* no writeback */
1812 		break;
1813 	default:
1814 		break;
1815 	}
1816 	return X86EMUL_CONTINUE;
1817 }
1818 
1819 static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len)
1820 {
1821 	struct segmented_address addr;
1822 
1823 	rsp_increment(ctxt, -len);
1824 	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1825 	addr.seg = VCPU_SREG_SS;
1826 
1827 	return segmented_write(ctxt, addr, data, len);
1828 }
1829 
1830 static int em_push(struct x86_emulate_ctxt *ctxt)
1831 {
1832 	/* Disable writeback. */
1833 	ctxt->dst.type = OP_NONE;
1834 	return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1835 }
1836 
1837 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1838 		       void *dest, int len)
1839 {
1840 	int rc;
1841 	struct segmented_address addr;
1842 
1843 	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1844 	addr.seg = VCPU_SREG_SS;
1845 	rc = segmented_read(ctxt, addr, dest, len);
1846 	if (rc != X86EMUL_CONTINUE)
1847 		return rc;
1848 
1849 	rsp_increment(ctxt, len);
1850 	return rc;
1851 }
1852 
1853 static int em_pop(struct x86_emulate_ctxt *ctxt)
1854 {
1855 	return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1856 }
1857 
1858 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1859 			void *dest, int len)
1860 {
1861 	int rc;
1862 	unsigned long val = 0;
1863 	unsigned long change_mask;
1864 	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1865 	int cpl = ctxt->ops->cpl(ctxt);
1866 
1867 	rc = emulate_pop(ctxt, &val, len);
1868 	if (rc != X86EMUL_CONTINUE)
1869 		return rc;
1870 
1871 	change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1872 		      X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1873 		      X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1874 		      X86_EFLAGS_AC | X86_EFLAGS_ID;
1875 
1876 	switch(ctxt->mode) {
1877 	case X86EMUL_MODE_PROT64:
1878 	case X86EMUL_MODE_PROT32:
1879 	case X86EMUL_MODE_PROT16:
1880 		if (cpl == 0)
1881 			change_mask |= X86_EFLAGS_IOPL;
1882 		if (cpl <= iopl)
1883 			change_mask |= X86_EFLAGS_IF;
1884 		break;
1885 	case X86EMUL_MODE_VM86:
1886 		if (iopl < 3)
1887 			return emulate_gp(ctxt, 0);
1888 		change_mask |= X86_EFLAGS_IF;
1889 		break;
1890 	default: /* real mode */
1891 		change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1892 		break;
1893 	}
1894 
1895 	*(unsigned long *)dest =
1896 		(ctxt->eflags & ~change_mask) | (val & change_mask);
1897 
1898 	return rc;
1899 }
1900 
1901 static int em_popf(struct x86_emulate_ctxt *ctxt)
1902 {
1903 	ctxt->dst.type = OP_REG;
1904 	ctxt->dst.addr.reg = &ctxt->eflags;
1905 	ctxt->dst.bytes = ctxt->op_bytes;
1906 	return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1907 }
1908 
1909 static int em_enter(struct x86_emulate_ctxt *ctxt)
1910 {
1911 	int rc;
1912 	unsigned frame_size = ctxt->src.val;
1913 	unsigned nesting_level = ctxt->src2.val & 31;
1914 	ulong rbp;
1915 
1916 	if (nesting_level)
1917 		return X86EMUL_UNHANDLEABLE;
1918 
1919 	rbp = reg_read(ctxt, VCPU_REGS_RBP);
1920 	rc = emulate_push(ctxt, &rbp, stack_size(ctxt));
1921 	if (rc != X86EMUL_CONTINUE)
1922 		return rc;
1923 	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1924 		      stack_mask(ctxt));
1925 	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1926 		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1927 		      stack_mask(ctxt));
1928 	return X86EMUL_CONTINUE;
1929 }
1930 
1931 static int em_leave(struct x86_emulate_ctxt *ctxt)
1932 {
1933 	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1934 		      stack_mask(ctxt));
1935 	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1936 }
1937 
1938 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1939 {
1940 	int seg = ctxt->src2.val;
1941 
1942 	ctxt->src.val = get_segment_selector(ctxt, seg);
1943 	if (ctxt->op_bytes == 4) {
1944 		rsp_increment(ctxt, -2);
1945 		ctxt->op_bytes = 2;
1946 	}
1947 
1948 	return em_push(ctxt);
1949 }
1950 
1951 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1952 {
1953 	int seg = ctxt->src2.val;
1954 	unsigned long selector = 0;
1955 	int rc;
1956 
1957 	rc = emulate_pop(ctxt, &selector, 2);
1958 	if (rc != X86EMUL_CONTINUE)
1959 		return rc;
1960 
1961 	if (seg == VCPU_SREG_SS)
1962 		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1963 	if (ctxt->op_bytes > 2)
1964 		rsp_increment(ctxt, ctxt->op_bytes - 2);
1965 
1966 	rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1967 	return rc;
1968 }
1969 
1970 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1971 {
1972 	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1973 	int rc = X86EMUL_CONTINUE;
1974 	int reg = VCPU_REGS_RAX;
1975 
1976 	while (reg <= VCPU_REGS_RDI) {
1977 		(reg == VCPU_REGS_RSP) ?
1978 		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1979 
1980 		rc = em_push(ctxt);
1981 		if (rc != X86EMUL_CONTINUE)
1982 			return rc;
1983 
1984 		++reg;
1985 	}
1986 
1987 	return rc;
1988 }
1989 
1990 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1991 {
1992 	ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1993 	return em_push(ctxt);
1994 }
1995 
1996 static int em_popa(struct x86_emulate_ctxt *ctxt)
1997 {
1998 	int rc = X86EMUL_CONTINUE;
1999 	int reg = VCPU_REGS_RDI;
2000 	u32 val = 0;
2001 
2002 	while (reg >= VCPU_REGS_RAX) {
2003 		if (reg == VCPU_REGS_RSP) {
2004 			rsp_increment(ctxt, ctxt->op_bytes);
2005 			--reg;
2006 		}
2007 
2008 		rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2009 		if (rc != X86EMUL_CONTINUE)
2010 			break;
2011 		assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2012 		--reg;
2013 	}
2014 	return rc;
2015 }
2016 
2017 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2018 {
2019 	const struct x86_emulate_ops *ops = ctxt->ops;
2020 	int rc;
2021 	struct desc_ptr dt;
2022 	gva_t cs_addr;
2023 	gva_t eip_addr;
2024 	u16 cs, eip;
2025 
2026 	/* TODO: Add limit checks */
2027 	ctxt->src.val = ctxt->eflags;
2028 	rc = em_push(ctxt);
2029 	if (rc != X86EMUL_CONTINUE)
2030 		return rc;
2031 
2032 	ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2033 
2034 	ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2035 	rc = em_push(ctxt);
2036 	if (rc != X86EMUL_CONTINUE)
2037 		return rc;
2038 
2039 	ctxt->src.val = ctxt->_eip;
2040 	rc = em_push(ctxt);
2041 	if (rc != X86EMUL_CONTINUE)
2042 		return rc;
2043 
2044 	ops->get_idt(ctxt, &dt);
2045 
2046 	eip_addr = dt.address + (irq << 2);
2047 	cs_addr = dt.address + (irq << 2) + 2;
2048 
2049 	rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2050 	if (rc != X86EMUL_CONTINUE)
2051 		return rc;
2052 
2053 	rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2054 	if (rc != X86EMUL_CONTINUE)
2055 		return rc;
2056 
2057 	rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2058 	if (rc != X86EMUL_CONTINUE)
2059 		return rc;
2060 
2061 	ctxt->_eip = eip;
2062 
2063 	return rc;
2064 }
2065 
2066 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2067 {
2068 	int rc;
2069 
2070 	invalidate_registers(ctxt);
2071 	rc = __emulate_int_real(ctxt, irq);
2072 	if (rc == X86EMUL_CONTINUE)
2073 		writeback_registers(ctxt);
2074 	return rc;
2075 }
2076 
2077 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2078 {
2079 	switch(ctxt->mode) {
2080 	case X86EMUL_MODE_REAL:
2081 		return __emulate_int_real(ctxt, irq);
2082 	case X86EMUL_MODE_VM86:
2083 	case X86EMUL_MODE_PROT16:
2084 	case X86EMUL_MODE_PROT32:
2085 	case X86EMUL_MODE_PROT64:
2086 	default:
2087 		/* Protected mode interrupts unimplemented yet */
2088 		return X86EMUL_UNHANDLEABLE;
2089 	}
2090 }
2091 
2092 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2093 {
2094 	int rc = X86EMUL_CONTINUE;
2095 	unsigned long temp_eip = 0;
2096 	unsigned long temp_eflags = 0;
2097 	unsigned long cs = 0;
2098 	unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2099 			     X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2100 			     X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2101 			     X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2102 			     X86_EFLAGS_AC | X86_EFLAGS_ID |
2103 			     X86_EFLAGS_FIXED;
2104 	unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2105 				  X86_EFLAGS_VIP;
2106 
2107 	/* TODO: Add stack limit check */
2108 
2109 	rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2110 
2111 	if (rc != X86EMUL_CONTINUE)
2112 		return rc;
2113 
2114 	if (temp_eip & ~0xffff)
2115 		return emulate_gp(ctxt, 0);
2116 
2117 	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2118 
2119 	if (rc != X86EMUL_CONTINUE)
2120 		return rc;
2121 
2122 	rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2123 
2124 	if (rc != X86EMUL_CONTINUE)
2125 		return rc;
2126 
2127 	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2128 
2129 	if (rc != X86EMUL_CONTINUE)
2130 		return rc;
2131 
2132 	ctxt->_eip = temp_eip;
2133 
2134 	if (ctxt->op_bytes == 4)
2135 		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2136 	else if (ctxt->op_bytes == 2) {
2137 		ctxt->eflags &= ~0xffff;
2138 		ctxt->eflags |= temp_eflags;
2139 	}
2140 
2141 	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2142 	ctxt->eflags |= X86_EFLAGS_FIXED;
2143 	ctxt->ops->set_nmi_mask(ctxt, false);
2144 
2145 	return rc;
2146 }
2147 
2148 static int em_iret(struct x86_emulate_ctxt *ctxt)
2149 {
2150 	switch(ctxt->mode) {
2151 	case X86EMUL_MODE_REAL:
2152 		return emulate_iret_real(ctxt);
2153 	case X86EMUL_MODE_VM86:
2154 	case X86EMUL_MODE_PROT16:
2155 	case X86EMUL_MODE_PROT32:
2156 	case X86EMUL_MODE_PROT64:
2157 	default:
2158 		/* iret from protected mode unimplemented yet */
2159 		return X86EMUL_UNHANDLEABLE;
2160 	}
2161 }
2162 
2163 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2164 {
2165 	int rc;
2166 	unsigned short sel;
2167 	struct desc_struct new_desc;
2168 	u8 cpl = ctxt->ops->cpl(ctxt);
2169 
2170 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2171 
2172 	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2173 				       X86_TRANSFER_CALL_JMP,
2174 				       &new_desc);
2175 	if (rc != X86EMUL_CONTINUE)
2176 		return rc;
2177 
2178 	rc = assign_eip_far(ctxt, ctxt->src.val);
2179 	/* Error handling is not implemented. */
2180 	if (rc != X86EMUL_CONTINUE)
2181 		return X86EMUL_UNHANDLEABLE;
2182 
2183 	return rc;
2184 }
2185 
2186 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2187 {
2188 	return assign_eip_near(ctxt, ctxt->src.val);
2189 }
2190 
2191 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2192 {
2193 	int rc;
2194 	long int old_eip;
2195 
2196 	old_eip = ctxt->_eip;
2197 	rc = assign_eip_near(ctxt, ctxt->src.val);
2198 	if (rc != X86EMUL_CONTINUE)
2199 		return rc;
2200 	ctxt->src.val = old_eip;
2201 	rc = em_push(ctxt);
2202 	return rc;
2203 }
2204 
2205 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2206 {
2207 	u64 old = ctxt->dst.orig_val64;
2208 
2209 	if (ctxt->dst.bytes == 16)
2210 		return X86EMUL_UNHANDLEABLE;
2211 
2212 	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2213 	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2214 		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2215 		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2216 		ctxt->eflags &= ~X86_EFLAGS_ZF;
2217 	} else {
2218 		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2219 			(u32) reg_read(ctxt, VCPU_REGS_RBX);
2220 
2221 		ctxt->eflags |= X86_EFLAGS_ZF;
2222 	}
2223 	return X86EMUL_CONTINUE;
2224 }
2225 
2226 static int em_ret(struct x86_emulate_ctxt *ctxt)
2227 {
2228 	int rc;
2229 	unsigned long eip = 0;
2230 
2231 	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2232 	if (rc != X86EMUL_CONTINUE)
2233 		return rc;
2234 
2235 	return assign_eip_near(ctxt, eip);
2236 }
2237 
2238 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2239 {
2240 	int rc;
2241 	unsigned long eip = 0;
2242 	unsigned long cs = 0;
2243 	int cpl = ctxt->ops->cpl(ctxt);
2244 	struct desc_struct new_desc;
2245 
2246 	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2247 	if (rc != X86EMUL_CONTINUE)
2248 		return rc;
2249 	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2250 	if (rc != X86EMUL_CONTINUE)
2251 		return rc;
2252 	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2253 				       X86_TRANSFER_RET,
2254 				       &new_desc);
2255 	if (rc != X86EMUL_CONTINUE)
2256 		return rc;
2257 	rc = assign_eip_far(ctxt, eip);
2258 	/* Error handling is not implemented. */
2259 	if (rc != X86EMUL_CONTINUE)
2260 		return X86EMUL_UNHANDLEABLE;
2261 
2262 	return rc;
2263 }
2264 
2265 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2266 {
2267         int rc;
2268 
2269         rc = em_ret_far(ctxt);
2270         if (rc != X86EMUL_CONTINUE)
2271                 return rc;
2272         rsp_increment(ctxt, ctxt->src.val);
2273         return X86EMUL_CONTINUE;
2274 }
2275 
2276 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2277 {
2278 	/* Save real source value, then compare EAX against destination. */
2279 	ctxt->dst.orig_val = ctxt->dst.val;
2280 	ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2281 	ctxt->src.orig_val = ctxt->src.val;
2282 	ctxt->src.val = ctxt->dst.orig_val;
2283 	em_cmp(ctxt);
2284 
2285 	if (ctxt->eflags & X86_EFLAGS_ZF) {
2286 		/* Success: write back to memory; no update of EAX */
2287 		ctxt->src.type = OP_NONE;
2288 		ctxt->dst.val = ctxt->src.orig_val;
2289 	} else {
2290 		/* Failure: write the value we saw to EAX. */
2291 		ctxt->src.type = OP_REG;
2292 		ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2293 		ctxt->src.val = ctxt->dst.orig_val;
2294 		/* Create write-cycle to dest by writing the same value */
2295 		ctxt->dst.val = ctxt->dst.orig_val;
2296 	}
2297 	return X86EMUL_CONTINUE;
2298 }
2299 
2300 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2301 {
2302 	int seg = ctxt->src2.val;
2303 	unsigned short sel;
2304 	int rc;
2305 
2306 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2307 
2308 	rc = load_segment_descriptor(ctxt, sel, seg);
2309 	if (rc != X86EMUL_CONTINUE)
2310 		return rc;
2311 
2312 	ctxt->dst.val = ctxt->src.val;
2313 	return rc;
2314 }
2315 
2316 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2317 {
2318 	if (!ctxt->ops->is_smm(ctxt))
2319 		return emulate_ud(ctxt);
2320 
2321 	if (ctxt->ops->leave_smm(ctxt))
2322 		ctxt->ops->triple_fault(ctxt);
2323 
2324 	return emulator_recalc_and_set_mode(ctxt);
2325 }
2326 
2327 static void
2328 setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2329 {
2330 	cs->l = 0;		/* will be adjusted later */
2331 	set_desc_base(cs, 0);	/* flat segment */
2332 	cs->g = 1;		/* 4kb granularity */
2333 	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
2334 	cs->type = 0x0b;	/* Read, Execute, Accessed */
2335 	cs->s = 1;
2336 	cs->dpl = 0;		/* will be adjusted later */
2337 	cs->p = 1;
2338 	cs->d = 1;
2339 	cs->avl = 0;
2340 
2341 	set_desc_base(ss, 0);	/* flat segment */
2342 	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
2343 	ss->g = 1;		/* 4kb granularity */
2344 	ss->s = 1;
2345 	ss->type = 0x03;	/* Read/Write, Accessed */
2346 	ss->d = 1;		/* 32bit stack segment */
2347 	ss->dpl = 0;
2348 	ss->p = 1;
2349 	ss->l = 0;
2350 	ss->avl = 0;
2351 }
2352 
2353 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2354 {
2355 	const struct x86_emulate_ops *ops = ctxt->ops;
2356 	struct desc_struct cs, ss;
2357 	u64 msr_data;
2358 	u16 cs_sel, ss_sel;
2359 	u64 efer = 0;
2360 
2361 	/* syscall is not available in real mode */
2362 	if (ctxt->mode == X86EMUL_MODE_REAL ||
2363 	    ctxt->mode == X86EMUL_MODE_VM86)
2364 		return emulate_ud(ctxt);
2365 
2366 	/*
2367 	 * Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas
2368 	 * AMD allows SYSCALL in any flavor of protected mode.  Note, it's
2369 	 * infeasible to emulate Intel behavior when running on AMD hardware,
2370 	 * as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD
2371 	 * for KVM to trap-and-emulate, unlike emulating AMD on Intel.
2372 	 */
2373 	if (ctxt->mode != X86EMUL_MODE_PROT64 &&
2374 	    ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
2375 		return emulate_ud(ctxt);
2376 
2377 	ops->get_msr(ctxt, MSR_EFER, &efer);
2378 	if (!(efer & EFER_SCE))
2379 		return emulate_ud(ctxt);
2380 
2381 	setup_syscalls_segments(&cs, &ss);
2382 	ops->get_msr(ctxt, MSR_STAR, &msr_data);
2383 	msr_data >>= 32;
2384 	cs_sel = (u16)(msr_data & 0xfffc);
2385 	ss_sel = (u16)(msr_data + 8);
2386 
2387 	if (efer & EFER_LMA) {
2388 		cs.d = 0;
2389 		cs.l = 1;
2390 	}
2391 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2392 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2393 
2394 	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2395 	if (efer & EFER_LMA) {
2396 #ifdef CONFIG_X86_64
2397 		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2398 
2399 		ops->get_msr(ctxt,
2400 			     ctxt->mode == X86EMUL_MODE_PROT64 ?
2401 			     MSR_LSTAR : MSR_CSTAR, &msr_data);
2402 		ctxt->_eip = msr_data;
2403 
2404 		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2405 		ctxt->eflags &= ~msr_data;
2406 		ctxt->eflags |= X86_EFLAGS_FIXED;
2407 #endif
2408 	} else {
2409 		/* legacy mode */
2410 		ops->get_msr(ctxt, MSR_STAR, &msr_data);
2411 		ctxt->_eip = (u32)msr_data;
2412 
2413 		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2414 	}
2415 
2416 	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2417 	return X86EMUL_CONTINUE;
2418 }
2419 
2420 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2421 {
2422 	const struct x86_emulate_ops *ops = ctxt->ops;
2423 	struct desc_struct cs, ss;
2424 	u64 msr_data;
2425 	u16 cs_sel, ss_sel;
2426 	u64 efer = 0;
2427 
2428 	ops->get_msr(ctxt, MSR_EFER, &efer);
2429 	/* inject #GP if in real mode */
2430 	if (ctxt->mode == X86EMUL_MODE_REAL)
2431 		return emulate_gp(ctxt, 0);
2432 
2433 	/*
2434 	 * Intel's architecture allows SYSENTER in compatibility mode, but AMD
2435 	 * does not.  Note, AMD does allow SYSENTER in legacy protected mode.
2436 	 */
2437 	if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) &&
2438 	    !ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
2439 		return emulate_ud(ctxt);
2440 
2441 	/* sysenter/sysexit have not been tested in 64bit mode. */
2442 	if (ctxt->mode == X86EMUL_MODE_PROT64)
2443 		return X86EMUL_UNHANDLEABLE;
2444 
2445 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2446 	if ((msr_data & 0xfffc) == 0x0)
2447 		return emulate_gp(ctxt, 0);
2448 
2449 	setup_syscalls_segments(&cs, &ss);
2450 	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2451 	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2452 	ss_sel = cs_sel + 8;
2453 	if (efer & EFER_LMA) {
2454 		cs.d = 0;
2455 		cs.l = 1;
2456 	}
2457 
2458 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2459 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2460 
2461 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2462 	ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2463 
2464 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2465 	*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2466 							      (u32)msr_data;
2467 	if (efer & EFER_LMA)
2468 		ctxt->mode = X86EMUL_MODE_PROT64;
2469 
2470 	return X86EMUL_CONTINUE;
2471 }
2472 
2473 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2474 {
2475 	const struct x86_emulate_ops *ops = ctxt->ops;
2476 	struct desc_struct cs, ss;
2477 	u64 msr_data, rcx, rdx;
2478 	int usermode;
2479 	u16 cs_sel = 0, ss_sel = 0;
2480 
2481 	/* inject #GP if in real mode or Virtual 8086 mode */
2482 	if (ctxt->mode == X86EMUL_MODE_REAL ||
2483 	    ctxt->mode == X86EMUL_MODE_VM86)
2484 		return emulate_gp(ctxt, 0);
2485 
2486 	setup_syscalls_segments(&cs, &ss);
2487 
2488 	if (ctxt->rex_bits & REX_W)
2489 		usermode = X86EMUL_MODE_PROT64;
2490 	else
2491 		usermode = X86EMUL_MODE_PROT32;
2492 
2493 	rcx = reg_read(ctxt, VCPU_REGS_RCX);
2494 	rdx = reg_read(ctxt, VCPU_REGS_RDX);
2495 
2496 	cs.dpl = 3;
2497 	ss.dpl = 3;
2498 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2499 	switch (usermode) {
2500 	case X86EMUL_MODE_PROT32:
2501 		cs_sel = (u16)(msr_data + 16);
2502 		if ((msr_data & 0xfffc) == 0x0)
2503 			return emulate_gp(ctxt, 0);
2504 		ss_sel = (u16)(msr_data + 24);
2505 		rcx = (u32)rcx;
2506 		rdx = (u32)rdx;
2507 		break;
2508 	case X86EMUL_MODE_PROT64:
2509 		cs_sel = (u16)(msr_data + 32);
2510 		if (msr_data == 0x0)
2511 			return emulate_gp(ctxt, 0);
2512 		ss_sel = cs_sel + 8;
2513 		cs.d = 0;
2514 		cs.l = 1;
2515 		if (emul_is_noncanonical_address(rcx, ctxt, 0) ||
2516 		    emul_is_noncanonical_address(rdx, ctxt, 0))
2517 			return emulate_gp(ctxt, 0);
2518 		break;
2519 	}
2520 	cs_sel |= SEGMENT_RPL_MASK;
2521 	ss_sel |= SEGMENT_RPL_MASK;
2522 
2523 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2524 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2525 
2526 	ctxt->_eip = rdx;
2527 	ctxt->mode = usermode;
2528 	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2529 
2530 	return X86EMUL_CONTINUE;
2531 }
2532 
2533 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2534 {
2535 	int iopl;
2536 	if (ctxt->mode == X86EMUL_MODE_REAL)
2537 		return false;
2538 	if (ctxt->mode == X86EMUL_MODE_VM86)
2539 		return true;
2540 	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2541 	return ctxt->ops->cpl(ctxt) > iopl;
2542 }
2543 
2544 #define VMWARE_PORT_VMPORT	(0x5658)
2545 #define VMWARE_PORT_VMRPC	(0x5659)
2546 
2547 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2548 					    u16 port, u16 len)
2549 {
2550 	const struct x86_emulate_ops *ops = ctxt->ops;
2551 	struct desc_struct tr_seg;
2552 	u32 base3;
2553 	int r;
2554 	u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2555 	unsigned mask = (1 << len) - 1;
2556 	unsigned long base;
2557 
2558 	/*
2559 	 * VMware allows access to these ports even if denied
2560 	 * by TSS I/O permission bitmap. Mimic behavior.
2561 	 */
2562 	if (enable_vmware_backdoor &&
2563 	    ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2564 		return true;
2565 
2566 	ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2567 	if (!tr_seg.p)
2568 		return false;
2569 	if (desc_limit_scaled(&tr_seg) < 103)
2570 		return false;
2571 	base = get_desc_base(&tr_seg);
2572 #ifdef CONFIG_X86_64
2573 	base |= ((u64)base3) << 32;
2574 #endif
2575 	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2576 	if (r != X86EMUL_CONTINUE)
2577 		return false;
2578 	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2579 		return false;
2580 	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2581 	if (r != X86EMUL_CONTINUE)
2582 		return false;
2583 	if ((perm >> bit_idx) & mask)
2584 		return false;
2585 	return true;
2586 }
2587 
2588 static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt,
2589 				  u16 port, u16 len)
2590 {
2591 	if (ctxt->perm_ok)
2592 		return true;
2593 
2594 	if (emulator_bad_iopl(ctxt))
2595 		if (!emulator_io_port_access_allowed(ctxt, port, len))
2596 			return false;
2597 
2598 	ctxt->perm_ok = true;
2599 
2600 	return true;
2601 }
2602 
2603 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2604 {
2605 	/*
2606 	 * Intel CPUs mask the counter and pointers in quite strange
2607 	 * manner when ECX is zero due to REP-string optimizations.
2608 	 */
2609 #ifdef CONFIG_X86_64
2610 	u32 eax, ebx, ecx, edx;
2611 
2612 	if (ctxt->ad_bytes != 4)
2613 		return;
2614 
2615 	eax = ecx = 0;
2616 	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2617 	if (!is_guest_vendor_intel(ebx, ecx, edx))
2618 		return;
2619 
2620 	*reg_write(ctxt, VCPU_REGS_RCX) = 0;
2621 
2622 	switch (ctxt->b) {
2623 	case 0xa4:	/* movsb */
2624 	case 0xa5:	/* movsd/w */
2625 		*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2626 		fallthrough;
2627 	case 0xaa:	/* stosb */
2628 	case 0xab:	/* stosd/w */
2629 		*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2630 	}
2631 #endif
2632 }
2633 
2634 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2635 				struct tss_segment_16 *tss)
2636 {
2637 	tss->ip = ctxt->_eip;
2638 	tss->flag = ctxt->eflags;
2639 	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2640 	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2641 	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2642 	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2643 	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2644 	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2645 	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2646 	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2647 
2648 	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2649 	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2650 	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2651 	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2652 	tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2653 }
2654 
2655 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2656 				 struct tss_segment_16 *tss)
2657 {
2658 	int ret;
2659 	u8 cpl;
2660 
2661 	ctxt->_eip = tss->ip;
2662 	ctxt->eflags = tss->flag | 2;
2663 	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2664 	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2665 	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2666 	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2667 	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2668 	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2669 	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2670 	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2671 
2672 	/*
2673 	 * SDM says that segment selectors are loaded before segment
2674 	 * descriptors
2675 	 */
2676 	set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2677 	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2678 	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2679 	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2680 	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2681 
2682 	cpl = tss->cs & 3;
2683 
2684 	/*
2685 	 * Now load segment descriptors. If fault happens at this stage
2686 	 * it is handled in a context of new task
2687 	 */
2688 	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2689 					X86_TRANSFER_TASK_SWITCH, NULL);
2690 	if (ret != X86EMUL_CONTINUE)
2691 		return ret;
2692 	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2693 					X86_TRANSFER_TASK_SWITCH, NULL);
2694 	if (ret != X86EMUL_CONTINUE)
2695 		return ret;
2696 	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2697 					X86_TRANSFER_TASK_SWITCH, NULL);
2698 	if (ret != X86EMUL_CONTINUE)
2699 		return ret;
2700 	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2701 					X86_TRANSFER_TASK_SWITCH, NULL);
2702 	if (ret != X86EMUL_CONTINUE)
2703 		return ret;
2704 	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2705 					X86_TRANSFER_TASK_SWITCH, NULL);
2706 	if (ret != X86EMUL_CONTINUE)
2707 		return ret;
2708 
2709 	return X86EMUL_CONTINUE;
2710 }
2711 
2712 static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2713 			  ulong old_tss_base, struct desc_struct *new_desc)
2714 {
2715 	struct tss_segment_16 tss_seg;
2716 	int ret;
2717 	u32 new_tss_base = get_desc_base(new_desc);
2718 
2719 	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2720 	if (ret != X86EMUL_CONTINUE)
2721 		return ret;
2722 
2723 	save_state_to_tss16(ctxt, &tss_seg);
2724 
2725 	ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2726 	if (ret != X86EMUL_CONTINUE)
2727 		return ret;
2728 
2729 	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2730 	if (ret != X86EMUL_CONTINUE)
2731 		return ret;
2732 
2733 	if (old_tss_sel != 0xffff) {
2734 		tss_seg.prev_task_link = old_tss_sel;
2735 
2736 		ret = linear_write_system(ctxt, new_tss_base,
2737 					  &tss_seg.prev_task_link,
2738 					  sizeof(tss_seg.prev_task_link));
2739 		if (ret != X86EMUL_CONTINUE)
2740 			return ret;
2741 	}
2742 
2743 	return load_state_from_tss16(ctxt, &tss_seg);
2744 }
2745 
2746 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2747 				struct tss_segment_32 *tss)
2748 {
2749 	/* CR3 and ldt selector are not saved intentionally */
2750 	tss->eip = ctxt->_eip;
2751 	tss->eflags = ctxt->eflags;
2752 	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2753 	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2754 	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2755 	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2756 	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2757 	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2758 	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2759 	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2760 
2761 	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2762 	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2763 	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2764 	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2765 	tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2766 	tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2767 }
2768 
2769 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2770 				 struct tss_segment_32 *tss)
2771 {
2772 	int ret;
2773 	u8 cpl;
2774 
2775 	if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2776 		return emulate_gp(ctxt, 0);
2777 	ctxt->_eip = tss->eip;
2778 	ctxt->eflags = tss->eflags | 2;
2779 
2780 	/* General purpose registers */
2781 	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2782 	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2783 	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2784 	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2785 	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2786 	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2787 	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2788 	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2789 
2790 	/*
2791 	 * SDM says that segment selectors are loaded before segment
2792 	 * descriptors.  This is important because CPL checks will
2793 	 * use CS.RPL.
2794 	 */
2795 	set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2796 	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2797 	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2798 	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2799 	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2800 	set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2801 	set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2802 
2803 	/*
2804 	 * If we're switching between Protected Mode and VM86, we need to make
2805 	 * sure to update the mode before loading the segment descriptors so
2806 	 * that the selectors are interpreted correctly.
2807 	 */
2808 	if (ctxt->eflags & X86_EFLAGS_VM) {
2809 		ctxt->mode = X86EMUL_MODE_VM86;
2810 		cpl = 3;
2811 	} else {
2812 		ctxt->mode = X86EMUL_MODE_PROT32;
2813 		cpl = tss->cs & 3;
2814 	}
2815 
2816 	/*
2817 	 * Now load segment descriptors. If fault happens at this stage
2818 	 * it is handled in a context of new task
2819 	 */
2820 	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2821 					cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2822 	if (ret != X86EMUL_CONTINUE)
2823 		return ret;
2824 	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2825 					X86_TRANSFER_TASK_SWITCH, NULL);
2826 	if (ret != X86EMUL_CONTINUE)
2827 		return ret;
2828 	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2829 					X86_TRANSFER_TASK_SWITCH, NULL);
2830 	if (ret != X86EMUL_CONTINUE)
2831 		return ret;
2832 	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2833 					X86_TRANSFER_TASK_SWITCH, NULL);
2834 	if (ret != X86EMUL_CONTINUE)
2835 		return ret;
2836 	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2837 					X86_TRANSFER_TASK_SWITCH, NULL);
2838 	if (ret != X86EMUL_CONTINUE)
2839 		return ret;
2840 	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2841 					X86_TRANSFER_TASK_SWITCH, NULL);
2842 	if (ret != X86EMUL_CONTINUE)
2843 		return ret;
2844 	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2845 					X86_TRANSFER_TASK_SWITCH, NULL);
2846 
2847 	return ret;
2848 }
2849 
2850 static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2851 			  ulong old_tss_base, struct desc_struct *new_desc)
2852 {
2853 	struct tss_segment_32 tss_seg;
2854 	int ret;
2855 	u32 new_tss_base = get_desc_base(new_desc);
2856 	u32 eip_offset = offsetof(struct tss_segment_32, eip);
2857 	u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2858 
2859 	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2860 	if (ret != X86EMUL_CONTINUE)
2861 		return ret;
2862 
2863 	save_state_to_tss32(ctxt, &tss_seg);
2864 
2865 	/* Only GP registers and segment selectors are saved */
2866 	ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2867 				  ldt_sel_offset - eip_offset);
2868 	if (ret != X86EMUL_CONTINUE)
2869 		return ret;
2870 
2871 	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2872 	if (ret != X86EMUL_CONTINUE)
2873 		return ret;
2874 
2875 	if (old_tss_sel != 0xffff) {
2876 		tss_seg.prev_task_link = old_tss_sel;
2877 
2878 		ret = linear_write_system(ctxt, new_tss_base,
2879 					  &tss_seg.prev_task_link,
2880 					  sizeof(tss_seg.prev_task_link));
2881 		if (ret != X86EMUL_CONTINUE)
2882 			return ret;
2883 	}
2884 
2885 	return load_state_from_tss32(ctxt, &tss_seg);
2886 }
2887 
2888 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2889 				   u16 tss_selector, int idt_index, int reason,
2890 				   bool has_error_code, u32 error_code)
2891 {
2892 	const struct x86_emulate_ops *ops = ctxt->ops;
2893 	struct desc_struct curr_tss_desc, next_tss_desc;
2894 	int ret;
2895 	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2896 	ulong old_tss_base =
2897 		ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2898 	u32 desc_limit;
2899 	ulong desc_addr, dr7;
2900 
2901 	/* FIXME: old_tss_base == ~0 ? */
2902 
2903 	ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2904 	if (ret != X86EMUL_CONTINUE)
2905 		return ret;
2906 	ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2907 	if (ret != X86EMUL_CONTINUE)
2908 		return ret;
2909 
2910 	/* FIXME: check that next_tss_desc is tss */
2911 
2912 	/*
2913 	 * Check privileges. The three cases are task switch caused by...
2914 	 *
2915 	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2916 	 * 2. Exception/IRQ/iret: No check is performed
2917 	 * 3. jmp/call to TSS/task-gate: No check is performed since the
2918 	 *    hardware checks it before exiting.
2919 	 */
2920 	if (reason == TASK_SWITCH_GATE) {
2921 		if (idt_index != -1) {
2922 			/* Software interrupts */
2923 			struct desc_struct task_gate_desc;
2924 			int dpl;
2925 
2926 			ret = read_interrupt_descriptor(ctxt, idt_index,
2927 							&task_gate_desc);
2928 			if (ret != X86EMUL_CONTINUE)
2929 				return ret;
2930 
2931 			dpl = task_gate_desc.dpl;
2932 			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2933 				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2934 		}
2935 	}
2936 
2937 	desc_limit = desc_limit_scaled(&next_tss_desc);
2938 	if (!next_tss_desc.p ||
2939 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2940 	     desc_limit < 0x2b)) {
2941 		return emulate_ts(ctxt, tss_selector & 0xfffc);
2942 	}
2943 
2944 	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2945 		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2946 		write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2947 	}
2948 
2949 	if (reason == TASK_SWITCH_IRET)
2950 		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2951 
2952 	/* set back link to prev task only if NT bit is set in eflags
2953 	   note that old_tss_sel is not used after this point */
2954 	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2955 		old_tss_sel = 0xffff;
2956 
2957 	if (next_tss_desc.type & 8)
2958 		ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
2959 	else
2960 		ret = task_switch_16(ctxt, old_tss_sel,
2961 				     old_tss_base, &next_tss_desc);
2962 	if (ret != X86EMUL_CONTINUE)
2963 		return ret;
2964 
2965 	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2966 		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2967 
2968 	if (reason != TASK_SWITCH_IRET) {
2969 		next_tss_desc.type |= (1 << 1); /* set busy flag */
2970 		write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
2971 	}
2972 
2973 	ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
2974 	ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
2975 
2976 	if (has_error_code) {
2977 		ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2978 		ctxt->lock_prefix = 0;
2979 		ctxt->src.val = (unsigned long) error_code;
2980 		ret = em_push(ctxt);
2981 	}
2982 
2983 	dr7 = ops->get_dr(ctxt, 7);
2984 	ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
2985 
2986 	return ret;
2987 }
2988 
2989 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2990 			 u16 tss_selector, int idt_index, int reason,
2991 			 bool has_error_code, u32 error_code)
2992 {
2993 	int rc;
2994 
2995 	invalidate_registers(ctxt);
2996 	ctxt->_eip = ctxt->eip;
2997 	ctxt->dst.type = OP_NONE;
2998 
2999 	rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3000 				     has_error_code, error_code);
3001 
3002 	if (rc == X86EMUL_CONTINUE) {
3003 		ctxt->eip = ctxt->_eip;
3004 		writeback_registers(ctxt);
3005 	}
3006 
3007 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3008 }
3009 
3010 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3011 		struct operand *op)
3012 {
3013 	int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3014 
3015 	register_address_increment(ctxt, reg, df * op->bytes);
3016 	op->addr.mem.ea = register_address(ctxt, reg);
3017 }
3018 
3019 static int em_das(struct x86_emulate_ctxt *ctxt)
3020 {
3021 	u8 al, old_al;
3022 	bool af, cf, old_cf;
3023 
3024 	cf = ctxt->eflags & X86_EFLAGS_CF;
3025 	al = ctxt->dst.val;
3026 
3027 	old_al = al;
3028 	old_cf = cf;
3029 	cf = false;
3030 	af = ctxt->eflags & X86_EFLAGS_AF;
3031 	if ((al & 0x0f) > 9 || af) {
3032 		al -= 6;
3033 		cf = old_cf | (al >= 250);
3034 		af = true;
3035 	} else {
3036 		af = false;
3037 	}
3038 	if (old_al > 0x99 || old_cf) {
3039 		al -= 0x60;
3040 		cf = true;
3041 	}
3042 
3043 	ctxt->dst.val = al;
3044 	/* Set PF, ZF, SF */
3045 	ctxt->src.type = OP_IMM;
3046 	ctxt->src.val = 0;
3047 	ctxt->src.bytes = 1;
3048 	em_or(ctxt);
3049 	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3050 	if (cf)
3051 		ctxt->eflags |= X86_EFLAGS_CF;
3052 	if (af)
3053 		ctxt->eflags |= X86_EFLAGS_AF;
3054 	return X86EMUL_CONTINUE;
3055 }
3056 
3057 static int em_aam(struct x86_emulate_ctxt *ctxt)
3058 {
3059 	u8 al, ah;
3060 
3061 	if (ctxt->src.val == 0)
3062 		return emulate_de(ctxt);
3063 
3064 	al = ctxt->dst.val & 0xff;
3065 	ah = al / ctxt->src.val;
3066 	al %= ctxt->src.val;
3067 
3068 	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3069 
3070 	/* Set PF, ZF, SF */
3071 	ctxt->src.type = OP_IMM;
3072 	ctxt->src.val = 0;
3073 	ctxt->src.bytes = 1;
3074 	em_or(ctxt);
3075 
3076 	return X86EMUL_CONTINUE;
3077 }
3078 
3079 static int em_aad(struct x86_emulate_ctxt *ctxt)
3080 {
3081 	u8 al = ctxt->dst.val & 0xff;
3082 	u8 ah = (ctxt->dst.val >> 8) & 0xff;
3083 
3084 	al = (al + (ah * ctxt->src.val)) & 0xff;
3085 
3086 	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3087 
3088 	/* Set PF, ZF, SF */
3089 	ctxt->src.type = OP_IMM;
3090 	ctxt->src.val = 0;
3091 	ctxt->src.bytes = 1;
3092 	em_or(ctxt);
3093 
3094 	return X86EMUL_CONTINUE;
3095 }
3096 
3097 static int em_call(struct x86_emulate_ctxt *ctxt)
3098 {
3099 	int rc;
3100 	long rel = ctxt->src.val;
3101 
3102 	ctxt->src.val = (unsigned long)ctxt->_eip;
3103 	rc = jmp_rel(ctxt, rel);
3104 	if (rc != X86EMUL_CONTINUE)
3105 		return rc;
3106 	return em_push(ctxt);
3107 }
3108 
3109 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3110 {
3111 	u16 sel, old_cs;
3112 	ulong old_eip;
3113 	int rc;
3114 	struct desc_struct old_desc, new_desc;
3115 	const struct x86_emulate_ops *ops = ctxt->ops;
3116 	int cpl = ctxt->ops->cpl(ctxt);
3117 	enum x86emul_mode prev_mode = ctxt->mode;
3118 
3119 	old_eip = ctxt->_eip;
3120 	ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3121 
3122 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3123 	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3124 				       X86_TRANSFER_CALL_JMP, &new_desc);
3125 	if (rc != X86EMUL_CONTINUE)
3126 		return rc;
3127 
3128 	rc = assign_eip_far(ctxt, ctxt->src.val);
3129 	if (rc != X86EMUL_CONTINUE)
3130 		goto fail;
3131 
3132 	ctxt->src.val = old_cs;
3133 	rc = em_push(ctxt);
3134 	if (rc != X86EMUL_CONTINUE)
3135 		goto fail;
3136 
3137 	ctxt->src.val = old_eip;
3138 	rc = em_push(ctxt);
3139 	/* If we failed, we tainted the memory, but the very least we should
3140 	   restore cs */
3141 	if (rc != X86EMUL_CONTINUE) {
3142 		pr_warn_once("faulting far call emulation tainted memory\n");
3143 		goto fail;
3144 	}
3145 	return rc;
3146 fail:
3147 	ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3148 	ctxt->mode = prev_mode;
3149 	return rc;
3150 
3151 }
3152 
3153 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3154 {
3155 	int rc;
3156 	unsigned long eip = 0;
3157 
3158 	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3159 	if (rc != X86EMUL_CONTINUE)
3160 		return rc;
3161 	rc = assign_eip_near(ctxt, eip);
3162 	if (rc != X86EMUL_CONTINUE)
3163 		return rc;
3164 	rsp_increment(ctxt, ctxt->src.val);
3165 	return X86EMUL_CONTINUE;
3166 }
3167 
3168 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3169 {
3170 	/* Write back the register source. */
3171 	ctxt->src.val = ctxt->dst.val;
3172 	write_register_operand(&ctxt->src);
3173 
3174 	/* Write back the memory destination with implicit LOCK prefix. */
3175 	ctxt->dst.val = ctxt->src.orig_val;
3176 	ctxt->lock_prefix = 1;
3177 	return X86EMUL_CONTINUE;
3178 }
3179 
3180 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3181 {
3182 	ctxt->dst.val = ctxt->src2.val;
3183 	return em_imul(ctxt);
3184 }
3185 
3186 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3187 {
3188 	ctxt->dst.type = OP_REG;
3189 	ctxt->dst.bytes = ctxt->src.bytes;
3190 	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3191 	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3192 
3193 	return X86EMUL_CONTINUE;
3194 }
3195 
3196 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3197 {
3198 	u64 tsc_aux = 0;
3199 
3200 	if (!ctxt->ops->guest_has_rdpid(ctxt))
3201 		return emulate_ud(ctxt);
3202 
3203 	ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3204 	ctxt->dst.val = tsc_aux;
3205 	return X86EMUL_CONTINUE;
3206 }
3207 
3208 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3209 {
3210 	u64 tsc = 0;
3211 
3212 	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3213 	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3214 	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3215 	return X86EMUL_CONTINUE;
3216 }
3217 
3218 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3219 {
3220 	u64 pmc;
3221 
3222 	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3223 		return emulate_gp(ctxt, 0);
3224 	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3225 	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3226 	return X86EMUL_CONTINUE;
3227 }
3228 
3229 static int em_mov(struct x86_emulate_ctxt *ctxt)
3230 {
3231 	memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3232 	return X86EMUL_CONTINUE;
3233 }
3234 
3235 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3236 {
3237 	u16 tmp;
3238 
3239 	if (!ctxt->ops->guest_has_movbe(ctxt))
3240 		return emulate_ud(ctxt);
3241 
3242 	switch (ctxt->op_bytes) {
3243 	case 2:
3244 		/*
3245 		 * From MOVBE definition: "...When the operand size is 16 bits,
3246 		 * the upper word of the destination register remains unchanged
3247 		 * ..."
3248 		 *
3249 		 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3250 		 * rules so we have to do the operation almost per hand.
3251 		 */
3252 		tmp = (u16)ctxt->src.val;
3253 		ctxt->dst.val &= ~0xffffUL;
3254 		ctxt->dst.val |= (unsigned long)swab16(tmp);
3255 		break;
3256 	case 4:
3257 		ctxt->dst.val = swab32((u32)ctxt->src.val);
3258 		break;
3259 	case 8:
3260 		ctxt->dst.val = swab64(ctxt->src.val);
3261 		break;
3262 	default:
3263 		BUG();
3264 	}
3265 	return X86EMUL_CONTINUE;
3266 }
3267 
3268 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3269 {
3270 	int cr_num = ctxt->modrm_reg;
3271 	int r;
3272 
3273 	if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3274 		return emulate_gp(ctxt, 0);
3275 
3276 	/* Disable writeback. */
3277 	ctxt->dst.type = OP_NONE;
3278 
3279 	if (cr_num == 0) {
3280 		/*
3281 		 * CR0 write might have updated CR0.PE and/or CR0.PG
3282 		 * which can affect the cpu's execution mode.
3283 		 */
3284 		r = emulator_recalc_and_set_mode(ctxt);
3285 		if (r != X86EMUL_CONTINUE)
3286 			return r;
3287 	}
3288 
3289 	return X86EMUL_CONTINUE;
3290 }
3291 
3292 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3293 {
3294 	unsigned long val;
3295 
3296 	if (ctxt->mode == X86EMUL_MODE_PROT64)
3297 		val = ctxt->src.val & ~0ULL;
3298 	else
3299 		val = ctxt->src.val & ~0U;
3300 
3301 	/* #UD condition is already handled. */
3302 	if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3303 		return emulate_gp(ctxt, 0);
3304 
3305 	/* Disable writeback. */
3306 	ctxt->dst.type = OP_NONE;
3307 	return X86EMUL_CONTINUE;
3308 }
3309 
3310 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3311 {
3312 	u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3313 	u64 msr_data;
3314 	int r;
3315 
3316 	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3317 		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3318 	r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3319 
3320 	if (r == X86EMUL_PROPAGATE_FAULT)
3321 		return emulate_gp(ctxt, 0);
3322 
3323 	return r;
3324 }
3325 
3326 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3327 {
3328 	u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3329 	u64 msr_data;
3330 	int r;
3331 
3332 	r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3333 
3334 	if (r == X86EMUL_PROPAGATE_FAULT)
3335 		return emulate_gp(ctxt, 0);
3336 
3337 	if (r == X86EMUL_CONTINUE) {
3338 		*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3339 		*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3340 	}
3341 	return r;
3342 }
3343 
3344 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3345 {
3346 	if (segment > VCPU_SREG_GS &&
3347 	    (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3348 	    ctxt->ops->cpl(ctxt) > 0)
3349 		return emulate_gp(ctxt, 0);
3350 
3351 	ctxt->dst.val = get_segment_selector(ctxt, segment);
3352 	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3353 		ctxt->dst.bytes = 2;
3354 	return X86EMUL_CONTINUE;
3355 }
3356 
3357 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3358 {
3359 	if (ctxt->modrm_reg > VCPU_SREG_GS)
3360 		return emulate_ud(ctxt);
3361 
3362 	return em_store_sreg(ctxt, ctxt->modrm_reg);
3363 }
3364 
3365 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3366 {
3367 	u16 sel = ctxt->src.val;
3368 
3369 	if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3370 		return emulate_ud(ctxt);
3371 
3372 	if (ctxt->modrm_reg == VCPU_SREG_SS)
3373 		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3374 
3375 	/* Disable writeback. */
3376 	ctxt->dst.type = OP_NONE;
3377 	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3378 }
3379 
3380 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3381 {
3382 	return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3383 }
3384 
3385 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3386 {
3387 	u16 sel = ctxt->src.val;
3388 
3389 	/* Disable writeback. */
3390 	ctxt->dst.type = OP_NONE;
3391 	return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3392 }
3393 
3394 static int em_str(struct x86_emulate_ctxt *ctxt)
3395 {
3396 	return em_store_sreg(ctxt, VCPU_SREG_TR);
3397 }
3398 
3399 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3400 {
3401 	u16 sel = ctxt->src.val;
3402 
3403 	/* Disable writeback. */
3404 	ctxt->dst.type = OP_NONE;
3405 	return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3406 }
3407 
3408 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3409 {
3410 	int rc;
3411 	ulong linear;
3412 	unsigned int max_size;
3413 
3414 	rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
3415 			 &linear, X86EMUL_F_INVLPG);
3416 	if (rc == X86EMUL_CONTINUE)
3417 		ctxt->ops->invlpg(ctxt, linear);
3418 	/* Disable writeback. */
3419 	ctxt->dst.type = OP_NONE;
3420 	return X86EMUL_CONTINUE;
3421 }
3422 
3423 static int em_clts(struct x86_emulate_ctxt *ctxt)
3424 {
3425 	ulong cr0;
3426 
3427 	cr0 = ctxt->ops->get_cr(ctxt, 0);
3428 	cr0 &= ~X86_CR0_TS;
3429 	ctxt->ops->set_cr(ctxt, 0, cr0);
3430 	return X86EMUL_CONTINUE;
3431 }
3432 
3433 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3434 {
3435 	int rc = ctxt->ops->fix_hypercall(ctxt);
3436 
3437 	if (rc != X86EMUL_CONTINUE)
3438 		return rc;
3439 
3440 	/* Let the processor re-execute the fixed hypercall */
3441 	ctxt->_eip = ctxt->eip;
3442 	/* Disable writeback. */
3443 	ctxt->dst.type = OP_NONE;
3444 	return X86EMUL_CONTINUE;
3445 }
3446 
3447 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3448 				  void (*get)(struct x86_emulate_ctxt *ctxt,
3449 					      struct desc_ptr *ptr))
3450 {
3451 	struct desc_ptr desc_ptr;
3452 
3453 	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3454 	    ctxt->ops->cpl(ctxt) > 0)
3455 		return emulate_gp(ctxt, 0);
3456 
3457 	if (ctxt->mode == X86EMUL_MODE_PROT64)
3458 		ctxt->op_bytes = 8;
3459 	get(ctxt, &desc_ptr);
3460 	if (ctxt->op_bytes == 2) {
3461 		ctxt->op_bytes = 4;
3462 		desc_ptr.address &= 0x00ffffff;
3463 	}
3464 	/* Disable writeback. */
3465 	ctxt->dst.type = OP_NONE;
3466 	return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3467 				   &desc_ptr, 2 + ctxt->op_bytes);
3468 }
3469 
3470 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3471 {
3472 	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3473 }
3474 
3475 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3476 {
3477 	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3478 }
3479 
3480 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3481 {
3482 	struct desc_ptr desc_ptr;
3483 	int rc;
3484 
3485 	if (ctxt->mode == X86EMUL_MODE_PROT64)
3486 		ctxt->op_bytes = 8;
3487 	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3488 			     &desc_ptr.size, &desc_ptr.address,
3489 			     ctxt->op_bytes);
3490 	if (rc != X86EMUL_CONTINUE)
3491 		return rc;
3492 	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3493 	    emul_is_noncanonical_address(desc_ptr.address, ctxt,
3494 					 X86EMUL_F_DT_LOAD))
3495 		return emulate_gp(ctxt, 0);
3496 	if (lgdt)
3497 		ctxt->ops->set_gdt(ctxt, &desc_ptr);
3498 	else
3499 		ctxt->ops->set_idt(ctxt, &desc_ptr);
3500 	/* Disable writeback. */
3501 	ctxt->dst.type = OP_NONE;
3502 	return X86EMUL_CONTINUE;
3503 }
3504 
3505 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3506 {
3507 	return em_lgdt_lidt(ctxt, true);
3508 }
3509 
3510 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3511 {
3512 	return em_lgdt_lidt(ctxt, false);
3513 }
3514 
3515 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3516 {
3517 	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3518 	    ctxt->ops->cpl(ctxt) > 0)
3519 		return emulate_gp(ctxt, 0);
3520 
3521 	if (ctxt->dst.type == OP_MEM)
3522 		ctxt->dst.bytes = 2;
3523 	ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3524 	return X86EMUL_CONTINUE;
3525 }
3526 
3527 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3528 {
3529 	ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3530 			  | (ctxt->src.val & 0x0f));
3531 	ctxt->dst.type = OP_NONE;
3532 	return X86EMUL_CONTINUE;
3533 }
3534 
3535 static int em_loop(struct x86_emulate_ctxt *ctxt)
3536 {
3537 	int rc = X86EMUL_CONTINUE;
3538 
3539 	register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3540 	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3541 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3542 		rc = jmp_rel(ctxt, ctxt->src.val);
3543 
3544 	return rc;
3545 }
3546 
3547 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3548 {
3549 	int rc = X86EMUL_CONTINUE;
3550 
3551 	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3552 		rc = jmp_rel(ctxt, ctxt->src.val);
3553 
3554 	return rc;
3555 }
3556 
3557 static int em_in(struct x86_emulate_ctxt *ctxt)
3558 {
3559 	if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3560 			     &ctxt->dst.val))
3561 		return X86EMUL_IO_NEEDED;
3562 
3563 	return X86EMUL_CONTINUE;
3564 }
3565 
3566 static int em_out(struct x86_emulate_ctxt *ctxt)
3567 {
3568 	ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3569 				    &ctxt->src.val, 1);
3570 	/* Disable writeback. */
3571 	ctxt->dst.type = OP_NONE;
3572 	return X86EMUL_CONTINUE;
3573 }
3574 
3575 static int em_cli(struct x86_emulate_ctxt *ctxt)
3576 {
3577 	if (emulator_bad_iopl(ctxt))
3578 		return emulate_gp(ctxt, 0);
3579 
3580 	ctxt->eflags &= ~X86_EFLAGS_IF;
3581 	return X86EMUL_CONTINUE;
3582 }
3583 
3584 static int em_sti(struct x86_emulate_ctxt *ctxt)
3585 {
3586 	if (emulator_bad_iopl(ctxt))
3587 		return emulate_gp(ctxt, 0);
3588 
3589 	ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3590 	ctxt->eflags |= X86_EFLAGS_IF;
3591 	return X86EMUL_CONTINUE;
3592 }
3593 
3594 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3595 {
3596 	u32 eax, ebx, ecx, edx;
3597 
3598 	if (!ctxt->ops->is_cpuid_allowed(ctxt))
3599 		return emulate_gp(ctxt, 0);
3600 
3601 	eax = reg_read(ctxt, VCPU_REGS_RAX);
3602 	ecx = reg_read(ctxt, VCPU_REGS_RCX);
3603 	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3604 	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
3605 	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3606 	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3607 	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
3608 	return X86EMUL_CONTINUE;
3609 }
3610 
3611 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3612 {
3613 	u32 flags;
3614 
3615 	flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3616 		X86_EFLAGS_SF;
3617 	flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3618 
3619 	ctxt->eflags &= ~0xffUL;
3620 	ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3621 	return X86EMUL_CONTINUE;
3622 }
3623 
3624 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3625 {
3626 	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3627 	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3628 	return X86EMUL_CONTINUE;
3629 }
3630 
3631 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3632 {
3633 	switch (ctxt->op_bytes) {
3634 #ifdef CONFIG_X86_64
3635 	case 8:
3636 		asm("bswap %0" : "+r"(ctxt->dst.val));
3637 		break;
3638 #endif
3639 	default:
3640 		asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3641 		break;
3642 	}
3643 	return X86EMUL_CONTINUE;
3644 }
3645 
3646 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3647 {
3648 	/* emulating clflush regardless of cpuid */
3649 	return X86EMUL_CONTINUE;
3650 }
3651 
3652 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3653 {
3654 	/* emulating clflushopt regardless of cpuid */
3655 	return X86EMUL_CONTINUE;
3656 }
3657 
3658 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3659 {
3660 	ctxt->dst.val = (s32) ctxt->src.val;
3661 	return X86EMUL_CONTINUE;
3662 }
3663 
3664 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3665 {
3666 	if (!ctxt->ops->guest_has_fxsr(ctxt))
3667 		return emulate_ud(ctxt);
3668 
3669 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3670 		return emulate_nm(ctxt);
3671 
3672 	/*
3673 	 * Don't emulate a case that should never be hit, instead of working
3674 	 * around a lack of fxsave64/fxrstor64 on old compilers.
3675 	 */
3676 	if (ctxt->mode >= X86EMUL_MODE_PROT64)
3677 		return X86EMUL_UNHANDLEABLE;
3678 
3679 	return X86EMUL_CONTINUE;
3680 }
3681 
3682 /*
3683  * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3684  * and restore MXCSR.
3685  */
3686 static size_t __fxstate_size(int nregs)
3687 {
3688 	return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3689 }
3690 
3691 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3692 {
3693 	bool cr4_osfxsr;
3694 	if (ctxt->mode == X86EMUL_MODE_PROT64)
3695 		return __fxstate_size(16);
3696 
3697 	cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
3698 	return __fxstate_size(cr4_osfxsr ? 8 : 0);
3699 }
3700 
3701 /*
3702  * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3703  *  1) 16 bit mode
3704  *  2) 32 bit mode
3705  *     - like (1), but FIP and FDP (foo) are only 16 bit.  At least Intel CPUs
3706  *       preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3707  *       save and restore
3708  *  3) 64-bit mode with REX.W prefix
3709  *     - like (2), but XMM 8-15 are being saved and restored
3710  *  4) 64-bit mode without REX.W prefix
3711  *     - like (3), but FIP and FDP are 64 bit
3712  *
3713  * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3714  * desired result.  (4) is not emulated.
3715  *
3716  * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3717  * and FPU DS) should match.
3718  */
3719 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3720 {
3721 	struct fxregs_state fx_state = {};
3722 	int rc;
3723 
3724 	rc = check_fxsr(ctxt);
3725 	if (rc != X86EMUL_CONTINUE)
3726 		return rc;
3727 
3728 	kvm_fpu_get();
3729 
3730 	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3731 
3732 	kvm_fpu_put();
3733 
3734 	if (rc != X86EMUL_CONTINUE)
3735 		return rc;
3736 
3737 	return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
3738 		                   fxstate_size(ctxt));
3739 }
3740 
3741 /*
3742  * FXRSTOR might restore XMM registers not provided by the guest. Fill
3743  * in the host registers (via FXSAVE) instead, so they won't be modified.
3744  * (preemption has to stay disabled until FXRSTOR).
3745  *
3746  * Use noinline to keep the stack for other functions called by callers small.
3747  */
3748 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
3749 				 const size_t used_size)
3750 {
3751 	struct fxregs_state fx_tmp = {};
3752 	int rc;
3753 
3754 	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
3755 	memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
3756 	       __fxstate_size(16) - used_size);
3757 
3758 	return rc;
3759 }
3760 
3761 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3762 {
3763 	struct fxregs_state fx_state;
3764 	int rc;
3765 	size_t size;
3766 
3767 	rc = check_fxsr(ctxt);
3768 	if (rc != X86EMUL_CONTINUE)
3769 		return rc;
3770 
3771 	size = fxstate_size(ctxt);
3772 	rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3773 	if (rc != X86EMUL_CONTINUE)
3774 		return rc;
3775 
3776 	kvm_fpu_get();
3777 
3778 	if (size < __fxstate_size(16)) {
3779 		rc = fxregs_fixup(&fx_state, size);
3780 		if (rc != X86EMUL_CONTINUE)
3781 			goto out;
3782 	}
3783 
3784 	if (fx_state.mxcsr >> 16) {
3785 		rc = emulate_gp(ctxt, 0);
3786 		goto out;
3787 	}
3788 
3789 	if (rc == X86EMUL_CONTINUE)
3790 		rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3791 
3792 out:
3793 	kvm_fpu_put();
3794 
3795 	return rc;
3796 }
3797 
3798 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
3799 {
3800 	u32 eax, ecx, edx;
3801 
3802 	if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
3803 		return emulate_ud(ctxt);
3804 
3805 	eax = reg_read(ctxt, VCPU_REGS_RAX);
3806 	edx = reg_read(ctxt, VCPU_REGS_RDX);
3807 	ecx = reg_read(ctxt, VCPU_REGS_RCX);
3808 
3809 	if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
3810 		return emulate_gp(ctxt, 0);
3811 
3812 	return X86EMUL_CONTINUE;
3813 }
3814 
3815 static bool valid_cr(int nr)
3816 {
3817 	switch (nr) {
3818 	case 0:
3819 	case 2 ... 4:
3820 	case 8:
3821 		return true;
3822 	default:
3823 		return false;
3824 	}
3825 }
3826 
3827 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
3828 {
3829 	if (!valid_cr(ctxt->modrm_reg))
3830 		return emulate_ud(ctxt);
3831 
3832 	return X86EMUL_CONTINUE;
3833 }
3834 
3835 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3836 {
3837 	int dr = ctxt->modrm_reg;
3838 	u64 cr4;
3839 
3840 	if (dr > 7)
3841 		return emulate_ud(ctxt);
3842 
3843 	cr4 = ctxt->ops->get_cr(ctxt, 4);
3844 	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3845 		return emulate_ud(ctxt);
3846 
3847 	if (ctxt->ops->get_effective_dr7(ctxt) & DR7_GD)
3848 		return emulate_db(ctxt, DR6_BD);
3849 
3850 	return X86EMUL_CONTINUE;
3851 }
3852 
3853 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3854 {
3855 	u64 new_val = ctxt->src.val64;
3856 	int dr = ctxt->modrm_reg;
3857 
3858 	if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3859 		return emulate_gp(ctxt, 0);
3860 
3861 	return check_dr_read(ctxt);
3862 }
3863 
3864 static int check_svme(struct x86_emulate_ctxt *ctxt)
3865 {
3866 	u64 efer = 0;
3867 
3868 	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3869 
3870 	if (!(efer & EFER_SVME))
3871 		return emulate_ud(ctxt);
3872 
3873 	return X86EMUL_CONTINUE;
3874 }
3875 
3876 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3877 {
3878 	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3879 
3880 	if (!ctxt->ops->page_address_valid(ctxt, rax))
3881 		return emulate_gp(ctxt, 0);
3882 
3883 	return check_svme(ctxt);
3884 }
3885 
3886 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3887 {
3888 	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3889 
3890 	if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3891 		return emulate_gp(ctxt, 0);
3892 
3893 	return X86EMUL_CONTINUE;
3894 }
3895 
3896 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3897 {
3898 	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3899 	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3900 
3901 	/*
3902 	 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
3903 	 * in Ring3 when CR4.PCE=0.
3904 	 */
3905 	if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
3906 		return X86EMUL_CONTINUE;
3907 
3908 	/*
3909 	 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0.  The CR0.PE
3910 	 * check however is unnecessary because CPL is always 0 outside
3911 	 * protected mode.
3912 	 */
3913 	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3914 	    ctxt->ops->check_rdpmc_early(ctxt, rcx))
3915 		return emulate_gp(ctxt, 0);
3916 
3917 	return X86EMUL_CONTINUE;
3918 }
3919 
3920 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3921 {
3922 	ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3923 	if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes))
3924 		return emulate_gp(ctxt, 0);
3925 
3926 	return X86EMUL_CONTINUE;
3927 }
3928 
3929 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3930 {
3931 	ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3932 	if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes))
3933 		return emulate_gp(ctxt, 0);
3934 
3935 	return X86EMUL_CONTINUE;
3936 }
3937 
3938 #define D(_y) { .flags = (_y) }
3939 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3940 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3941 		      .intercept = x86_intercept_##_i, .check_perm = (_p) }
3942 #define N    D(NotImpl)
3943 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3944 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3945 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3946 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
3947 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
3948 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3949 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3950 #define II(_f, _e, _i) \
3951 	{ .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
3952 #define IIP(_f, _e, _i, _p) \
3953 	{ .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
3954 	  .intercept = x86_intercept_##_i, .check_perm = (_p) }
3955 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
3956 
3957 #define D2bv(_f)      D((_f) | ByteOp), D(_f)
3958 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
3959 #define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
3960 #define F2bv(_f, _e)  F((_f) | ByteOp, _e), F(_f, _e)
3961 #define I2bvIP(_f, _e, _i, _p) \
3962 	IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
3963 
3964 #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e),		\
3965 		I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
3966 		I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3967 
3968 static const struct opcode ud = I(SrcNone, emulate_ud);
3969 
3970 static const struct opcode group7_rm0[] = {
3971 	N,
3972 	I(SrcNone | Priv | EmulateOnUD,	em_hypercall),
3973 	N, N, N, N, N, N,
3974 };
3975 
3976 static const struct opcode group7_rm1[] = {
3977 	DI(SrcNone | Priv, monitor),
3978 	DI(SrcNone | Priv, mwait),
3979 	N, N, N, N, N, N,
3980 };
3981 
3982 static const struct opcode group7_rm2[] = {
3983 	N,
3984 	II(ImplicitOps | Priv,			em_xsetbv,	xsetbv),
3985 	N, N, N, N, N, N,
3986 };
3987 
3988 static const struct opcode group7_rm3[] = {
3989 	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
3990 	II(SrcNone  | Prot | EmulateOnUD,	em_hypercall,	vmmcall),
3991 	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
3992 	DIP(SrcNone | Prot | Priv,		vmsave,		check_svme_pa),
3993 	DIP(SrcNone | Prot | Priv,		stgi,		check_svme),
3994 	DIP(SrcNone | Prot | Priv,		clgi,		check_svme),
3995 	DIP(SrcNone | Prot | Priv,		skinit,		check_svme),
3996 	DIP(SrcNone | Prot | Priv,		invlpga,	check_svme),
3997 };
3998 
3999 static const struct opcode group7_rm7[] = {
4000 	N,
4001 	DIP(SrcNone, rdtscp, check_rdtsc),
4002 	N, N, N, N, N, N,
4003 };
4004 
4005 static const struct opcode group1[] = {
4006 	I(Lock, em_add),
4007 	I(Lock | PageTable, em_or),
4008 	I(Lock, em_adc),
4009 	I(Lock, em_sbb),
4010 	I(Lock | PageTable, em_and),
4011 	I(Lock, em_sub),
4012 	I(Lock, em_xor),
4013 	I(NoWrite, em_cmp),
4014 };
4015 
4016 static const struct opcode group1A[] = {
4017 	I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4018 };
4019 
4020 static const struct opcode group2[] = {
4021 	I(DstMem | ModRM, em_rol),
4022 	I(DstMem | ModRM, em_ror),
4023 	I(DstMem | ModRM, em_rcl),
4024 	I(DstMem | ModRM, em_rcr),
4025 	I(DstMem | ModRM, em_shl),
4026 	I(DstMem | ModRM, em_shr),
4027 	I(DstMem | ModRM, em_shl),
4028 	I(DstMem | ModRM, em_sar),
4029 };
4030 
4031 static const struct opcode group3[] = {
4032 	I(DstMem | SrcImm | NoWrite, em_test),
4033 	I(DstMem | SrcImm | NoWrite, em_test),
4034 	I(DstMem | SrcNone | Lock, em_not),
4035 	I(DstMem | SrcNone | Lock, em_neg),
4036 	I(DstXacc | Src2Mem, em_mul_ex),
4037 	I(DstXacc | Src2Mem, em_imul_ex),
4038 	I(DstXacc | Src2Mem, em_div_ex),
4039 	I(DstXacc | Src2Mem, em_idiv_ex),
4040 };
4041 
4042 static const struct opcode group4[] = {
4043 	I(ByteOp | DstMem | SrcNone | Lock, em_inc),
4044 	I(ByteOp | DstMem | SrcNone | Lock, em_dec),
4045 	N, N, N, N, N, N,
4046 };
4047 
4048 static const struct opcode group5[] = {
4049 	I(DstMem | SrcNone | Lock,		em_inc),
4050 	I(DstMem | SrcNone | Lock,		em_dec),
4051 	I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs),
4052 	I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far),
4053 	I(SrcMem | NearBranch | IsBranch,       em_jmp_abs),
4054 	I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4055 	I(SrcMem | Stack | TwoMemOp,		em_push), D(Undefined),
4056 };
4057 
4058 static const struct opcode group6[] = {
4059 	II(Prot | DstMem,	   em_sldt, sldt),
4060 	II(Prot | DstMem,	   em_str, str),
4061 	II(Prot | Priv | SrcMem16, em_lldt, lldt),
4062 	II(Prot | Priv | SrcMem16, em_ltr, ltr),
4063 	N, N, N, N,
4064 };
4065 
4066 static const struct group_dual group7 = { {
4067 	II(Mov | DstMem,			em_sgdt, sgdt),
4068 	II(Mov | DstMem,			em_sidt, sidt),
4069 	II(SrcMem | Priv,			em_lgdt, lgdt),
4070 	II(SrcMem | Priv,			em_lidt, lidt),
4071 	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
4072 	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
4073 	II(SrcMem | ByteOp | Priv | NoAccess,	em_invlpg, invlpg),
4074 }, {
4075 	EXT(0, group7_rm0),
4076 	EXT(0, group7_rm1),
4077 	EXT(0, group7_rm2),
4078 	EXT(0, group7_rm3),
4079 	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
4080 	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
4081 	EXT(0, group7_rm7),
4082 } };
4083 
4084 static const struct opcode group8[] = {
4085 	N, N, N, N,
4086 	I(DstMem | SrcImmByte | NoWrite,		em_bt),
4087 	I(DstMem | SrcImmByte | Lock | PageTable,	em_bts),
4088 	I(DstMem | SrcImmByte | Lock,			em_btr),
4089 	I(DstMem | SrcImmByte | Lock | PageTable,	em_btc),
4090 };
4091 
4092 /*
4093  * The "memory" destination is actually always a register, since we come
4094  * from the register case of group9.
4095  */
4096 static const struct gprefix pfx_0f_c7_7 = {
4097 	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4098 };
4099 
4100 
4101 static const struct group_dual group9 = { {
4102 	N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4103 }, {
4104 	N, N, N, N, N, N, N,
4105 	GP(0, &pfx_0f_c7_7),
4106 } };
4107 
4108 static const struct opcode group11[] = {
4109 	I(DstMem | SrcImm | Mov | PageTable, em_mov),
4110 	X7(D(Undefined)),
4111 };
4112 
4113 static const struct gprefix pfx_0f_ae_7 = {
4114 	I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4115 };
4116 
4117 static const struct group_dual group15 = { {
4118 	I(ModRM | Aligned16, em_fxsave),
4119 	I(ModRM | Aligned16, em_fxrstor),
4120 	N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4121 }, {
4122 	N, N, N, N, N, N, N, N,
4123 } };
4124 
4125 static const struct gprefix pfx_0f_6f_0f_7f = {
4126 	I(Mmx, em_mov), I(Sse | Avx | Aligned, em_mov), N, I(Sse | Avx | Unaligned, em_mov),
4127 };
4128 
4129 static const struct instr_dual instr_dual_0f_2b = {
4130 	I(0, em_mov), N
4131 };
4132 
4133 static const struct gprefix pfx_0f_2b = {
4134 	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4135 };
4136 
4137 static const struct gprefix pfx_0f_10_0f_11 = {
4138 	I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4139 };
4140 
4141 static const struct gprefix pfx_0f_28_0f_29 = {
4142 	I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4143 };
4144 
4145 static const struct gprefix pfx_0f_e7_0f_38_2a = {
4146 	N, I(Sse | Avx, em_mov), N, N,
4147 };
4148 
4149 static const struct escape escape_d9 = { {
4150 	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4151 }, {
4152 	/* 0xC0 - 0xC7 */
4153 	N, N, N, N, N, N, N, N,
4154 	/* 0xC8 - 0xCF */
4155 	N, N, N, N, N, N, N, N,
4156 	/* 0xD0 - 0xC7 */
4157 	N, N, N, N, N, N, N, N,
4158 	/* 0xD8 - 0xDF */
4159 	N, N, N, N, N, N, N, N,
4160 	/* 0xE0 - 0xE7 */
4161 	N, N, N, N, N, N, N, N,
4162 	/* 0xE8 - 0xEF */
4163 	N, N, N, N, N, N, N, N,
4164 	/* 0xF0 - 0xF7 */
4165 	N, N, N, N, N, N, N, N,
4166 	/* 0xF8 - 0xFF */
4167 	N, N, N, N, N, N, N, N,
4168 } };
4169 
4170 static const struct escape escape_db = { {
4171 	N, N, N, N, N, N, N, N,
4172 }, {
4173 	/* 0xC0 - 0xC7 */
4174 	N, N, N, N, N, N, N, N,
4175 	/* 0xC8 - 0xCF */
4176 	N, N, N, N, N, N, N, N,
4177 	/* 0xD0 - 0xC7 */
4178 	N, N, N, N, N, N, N, N,
4179 	/* 0xD8 - 0xDF */
4180 	N, N, N, N, N, N, N, N,
4181 	/* 0xE0 - 0xE7 */
4182 	N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4183 	/* 0xE8 - 0xEF */
4184 	N, N, N, N, N, N, N, N,
4185 	/* 0xF0 - 0xF7 */
4186 	N, N, N, N, N, N, N, N,
4187 	/* 0xF8 - 0xFF */
4188 	N, N, N, N, N, N, N, N,
4189 } };
4190 
4191 static const struct escape escape_dd = { {
4192 	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4193 }, {
4194 	/* 0xC0 - 0xC7 */
4195 	N, N, N, N, N, N, N, N,
4196 	/* 0xC8 - 0xCF */
4197 	N, N, N, N, N, N, N, N,
4198 	/* 0xD0 - 0xC7 */
4199 	N, N, N, N, N, N, N, N,
4200 	/* 0xD8 - 0xDF */
4201 	N, N, N, N, N, N, N, N,
4202 	/* 0xE0 - 0xE7 */
4203 	N, N, N, N, N, N, N, N,
4204 	/* 0xE8 - 0xEF */
4205 	N, N, N, N, N, N, N, N,
4206 	/* 0xF0 - 0xF7 */
4207 	N, N, N, N, N, N, N, N,
4208 	/* 0xF8 - 0xFF */
4209 	N, N, N, N, N, N, N, N,
4210 } };
4211 
4212 static const struct instr_dual instr_dual_0f_c3 = {
4213 	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4214 };
4215 
4216 static const struct mode_dual mode_dual_63 = {
4217 	N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4218 };
4219 
4220 static const struct instr_dual instr_dual_8d = {
4221 	D(DstReg | SrcMem | ModRM | NoAccess), N
4222 };
4223 
4224 static const struct opcode opcode_table[256] = {
4225 	/* 0x00 - 0x07 */
4226 	I6ALU(Lock, em_add),
4227 	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4228 	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4229 	/* 0x08 - 0x0F */
4230 	I6ALU(Lock | PageTable, em_or),
4231 	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4232 	N,
4233 	/* 0x10 - 0x17 */
4234 	I6ALU(Lock, em_adc),
4235 	I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4236 	I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4237 	/* 0x18 - 0x1F */
4238 	I6ALU(Lock, em_sbb),
4239 	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4240 	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4241 	/* 0x20 - 0x27 */
4242 	I6ALU(Lock | PageTable, em_and), N, N,
4243 	/* 0x28 - 0x2F */
4244 	I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4245 	/* 0x30 - 0x37 */
4246 	I6ALU(Lock, em_xor), N, N,
4247 	/* 0x38 - 0x3F */
4248 	I6ALU(NoWrite, em_cmp), N, N,
4249 	/* 0x40 - 0x4F */
4250 	X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)),
4251 	/* 0x50 - 0x57 */
4252 	X8(I(SrcReg | Stack, em_push)),
4253 	/* 0x58 - 0x5F */
4254 	X8(I(DstReg | Stack, em_pop)),
4255 	/* 0x60 - 0x67 */
4256 	I(ImplicitOps | Stack | No64, em_pusha),
4257 	I(ImplicitOps | Stack | No64, em_popa),
4258 	N, MD(ModRM, &mode_dual_63),
4259 	N, N, N, N,
4260 	/* 0x68 - 0x6F */
4261 	I(SrcImm | Mov | Stack, em_push),
4262 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4263 	I(SrcImmByte | Mov | Stack, em_push),
4264 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4265 	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4266 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4267 	/* 0x70 - 0x7F */
4268 	X16(D(SrcImmByte | NearBranch | IsBranch)),
4269 	/* 0x80 - 0x87 */
4270 	G(ByteOp | DstMem | SrcImm, group1),
4271 	G(DstMem | SrcImm, group1),
4272 	G(ByteOp | DstMem | SrcImm | No64, group1),
4273 	G(DstMem | SrcImmByte, group1),
4274 	I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4275 	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4276 	/* 0x88 - 0x8F */
4277 	I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4278 	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4279 	I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4280 	ID(0, &instr_dual_8d),
4281 	I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4282 	G(0, group1A),
4283 	/* 0x90 - 0x97 */
4284 	DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4285 	/* 0x98 - 0x9F */
4286 	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4287 	I(SrcImmFAddr | No64 | IsBranch | ShadowStack, em_call_far), N,
4288 	II(ImplicitOps | Stack, em_pushf, pushf),
4289 	II(ImplicitOps | Stack, em_popf, popf),
4290 	I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4291 	/* 0xA0 - 0xA7 */
4292 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4293 	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4294 	I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4295 	I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4296 	/* 0xA8 - 0xAF */
4297 	I2bv(DstAcc | SrcImm | NoWrite, em_test),
4298 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4299 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4300 	I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4301 	/* 0xB0 - 0xB7 */
4302 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4303 	/* 0xB8 - 0xBF */
4304 	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4305 	/* 0xC0 - 0xC7 */
4306 	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4307 	I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch | ShadowStack, em_ret_near_imm),
4308 	I(ImplicitOps | NearBranch | IsBranch | ShadowStack, em_ret),
4309 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4310 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4311 	G(ByteOp, group11), G(0, group11),
4312 	/* 0xC8 - 0xCF */
4313 	I(Stack | SrcImmU16 | Src2ImmByte, em_enter),
4314 	I(Stack, em_leave),
4315 	I(ImplicitOps | SrcImmU16 | IsBranch | ShadowStack, em_ret_far_imm),
4316 	I(ImplicitOps | IsBranch | ShadowStack, em_ret_far),
4317 	D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch | ShadowStack, intn),
4318 	D(ImplicitOps | No64 | IsBranch),
4319 	II(ImplicitOps | IsBranch | ShadowStack, em_iret, iret),
4320 	/* 0xD0 - 0xD7 */
4321 	G(Src2One | ByteOp, group2), G(Src2One, group2),
4322 	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4323 	I(DstAcc | SrcImmUByte | No64, em_aam),
4324 	I(DstAcc | SrcImmUByte | No64, em_aad),
4325 	I(DstAcc | ByteOp | No64, em_salc),
4326 	I(DstAcc | SrcXLat | ByteOp, em_mov),
4327 	/* 0xD8 - 0xDF */
4328 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4329 	/* 0xE0 - 0xE7 */
4330 	X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4331 	I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4332 	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
4333 	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4334 	/* 0xE8 - 0xEF */
4335 	I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
4336 	D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4337 	I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4338 	D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4339 	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
4340 	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4341 	/* 0xF0 - 0xF7 */
4342 	N, DI(ImplicitOps, icebp), N, N,
4343 	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4344 	G(ByteOp, group3), G(0, group3),
4345 	/* 0xF8 - 0xFF */
4346 	D(ImplicitOps), D(ImplicitOps),
4347 	I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4348 	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4349 };
4350 
4351 static const struct opcode twobyte_table[256] = {
4352 	/* 0x00 - 0x0F */
4353 	G(0, group6), GD(0, &group7), N, N,
4354 	N, I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_syscall),
4355 	II(ImplicitOps | Priv, em_clts, clts), N,
4356 	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4357 	N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4358 	/* 0x10 - 0x1F */
4359 	GP(ModRM | DstReg | SrcMem | Mov | Sse | Avx, &pfx_0f_10_0f_11),
4360 	GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_10_0f_11),
4361 	N, N, N, N, N, N,
4362 	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4363 	D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4364 	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4365 	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4366 	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4367 	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4368 	/* 0x20 - 0x2F */
4369 	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4370 	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4371 	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4372 						check_cr_access),
4373 	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4374 						check_dr_write),
4375 	N, N, N, N,
4376 	GP(ModRM | DstReg | SrcMem | Mov | Sse | Avx, &pfx_0f_28_0f_29),
4377 	GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_28_0f_29),
4378 	N, GP(ModRM | DstMem | SrcReg | Mov | Sse | Avx, &pfx_0f_2b),
4379 	N, N, N, N,
4380 	/* 0x30 - 0x3F */
4381 	II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4382 	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4383 	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4384 	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4385 	I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter),
4386 	I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
4387 	N, N,
4388 	N, N, N, N, N, N, N, N,
4389 	/* 0x40 - 0x4F */
4390 	X16(D(DstReg | SrcMem | ModRM)),
4391 	/* 0x50 - 0x5F */
4392 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4393 	/* 0x60 - 0x6F */
4394 	N, N, N, N,
4395 	N, N, N, N,
4396 	N, N, N, N,
4397 	N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4398 	/* 0x70 - 0x7F */
4399 	N, N, N, N,
4400 	N, N, N, N,
4401 	N, N, N, N,
4402 	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4403 	/* 0x80 - 0x8F */
4404 	X16(D(SrcImm | NearBranch | IsBranch)),
4405 	/* 0x90 - 0x9F */
4406 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4407 	/* 0xA0 - 0xA7 */
4408 	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4409 	II(ImplicitOps, em_cpuid, cpuid),
4410 	I(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4411 	I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4412 	I(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4413 	/* 0xA8 - 0xAF */
4414 	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4415 	II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4416 	I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4417 	I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4418 	I(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4419 	GD(0, &group15), I(DstReg | SrcMem | ModRM, em_imul),
4420 	/* 0xB0 - 0xB7 */
4421 	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4422 	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4423 	I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4424 	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4425 	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4426 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4427 	/* 0xB8 - 0xBF */
4428 	N, N,
4429 	G(BitOp, group8),
4430 	I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4431 	I(DstReg | SrcMem | ModRM, em_bsf_c),
4432 	I(DstReg | SrcMem | ModRM, em_bsr_c),
4433 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4434 	/* 0xC0 - 0xC7 */
4435 	I2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4436 	N, ID(0, &instr_dual_0f_c3),
4437 	N, N, N, GD(0, &group9),
4438 	/* 0xC8 - 0xCF */
4439 	X8(I(DstReg, em_bswap)),
4440 	/* 0xD0 - 0xDF */
4441 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4442 	/* 0xE0 - 0xEF */
4443 	N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7_0f_38_2a),
4444 	N, N, N, N, N, N, N, N,
4445 	/* 0xF0 - 0xFF */
4446 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4447 };
4448 
4449 static const struct instr_dual instr_dual_0f_38_f0 = {
4450 	I(DstReg | SrcMem | Mov, em_movbe), N
4451 };
4452 
4453 static const struct instr_dual instr_dual_0f_38_f1 = {
4454 	I(DstMem | SrcReg | Mov, em_movbe), N
4455 };
4456 
4457 static const struct gprefix three_byte_0f_38_f0 = {
4458 	ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N
4459 };
4460 
4461 static const struct gprefix three_byte_0f_38_f1 = {
4462 	ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N
4463 };
4464 
4465 /*
4466  * Insns below are selected by the prefix which indexed by the third opcode
4467  * byte.
4468  */
4469 static const struct opcode opcode_map_0f_38[256] = {
4470 	/* 0x00 - 0x1f */
4471 	X16(N), X16(N),
4472 	/* 0x20 - 0x2f */
4473 	X8(N),
4474 	X2(N), GP(SrcMem | DstReg | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
4475 	/* 0x30 - 0x7f */
4476 	X16(N), X16(N), X16(N), X16(N), X16(N),
4477 	/* 0x80 - 0xef */
4478 	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4479 	/* 0xf0 - 0xf1 */
4480 	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4481 	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4482 	/* 0xf2 - 0xff */
4483 	N, N, X4(N), X8(N)
4484 };
4485 
4486 #undef D
4487 #undef N
4488 #undef G
4489 #undef GD
4490 #undef I
4491 #undef GP
4492 #undef EXT
4493 #undef MD
4494 #undef ID
4495 
4496 #undef D2bv
4497 #undef D2bvIP
4498 #undef I2bv
4499 #undef I2bvIP
4500 #undef I6ALU
4501 
4502 static bool is_shstk_instruction(struct x86_emulate_ctxt *ctxt)
4503 {
4504 	return ctxt->d & ShadowStack;
4505 }
4506 
4507 static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
4508 {
4509 	u64 flags = ctxt->d;
4510 
4511 	if (!(flags & IsBranch))
4512 		return false;
4513 
4514 	/*
4515 	 * All far JMPs and CALLs (including SYSCALL, SYSENTER, and INTn) are
4516 	 * indirect and thus affect IBT state.  All far RETs (including SYSEXIT
4517 	 * and IRET) are protected via Shadow Stacks and thus don't affect IBT
4518 	 * state.  IRET #GPs when returning to virtual-8086 and IBT or SHSTK is
4519 	 * enabled, but that should be handled by IRET emulation (in the very
4520 	 * unlikely scenario that KVM adds support for fully emulating IRET).
4521 	 */
4522 	if (!(flags & NearBranch))
4523 		return ctxt->execute != em_iret &&
4524 		       ctxt->execute != em_ret_far &&
4525 		       ctxt->execute != em_ret_far_imm &&
4526 		       ctxt->execute != em_sysexit;
4527 
4528 	switch (flags & SrcMask) {
4529 	case SrcReg:
4530 	case SrcMem:
4531 	case SrcMem16:
4532 	case SrcMem32:
4533 		return true;
4534 	case SrcMemFAddr:
4535 	case SrcImmFAddr:
4536 		/* Far branches should be handled above. */
4537 		WARN_ON_ONCE(1);
4538 		return true;
4539 	case SrcNone:
4540 	case SrcImm:
4541 	case SrcImmByte:
4542 	/*
4543 	 * Note, ImmU16 is used only for the stack adjustment operand on ENTER
4544 	 * and RET instructions.  ENTER isn't a branch and RET FAR is handled
4545 	 * by the NearBranch check above.  RET itself isn't an indirect branch.
4546 	 */
4547 	case SrcImmU16:
4548 		return false;
4549 	default:
4550 		WARN_ONCE(1, "Unexpected Src operand '%llx' on branch",
4551 			  flags & SrcMask);
4552 		return false;
4553 	}
4554 }
4555 
4556 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4557 {
4558 	unsigned size;
4559 
4560 	size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4561 	if (size == 8)
4562 		size = 4;
4563 	return size;
4564 }
4565 
4566 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4567 		      unsigned size, bool sign_extension)
4568 {
4569 	int rc = X86EMUL_CONTINUE;
4570 
4571 	op->type = OP_IMM;
4572 	op->bytes = size;
4573 	op->addr.mem.ea = ctxt->_eip;
4574 	/* NB. Immediates are sign-extended as necessary. */
4575 	switch (op->bytes) {
4576 	case 1:
4577 		op->val = insn_fetch(s8, ctxt);
4578 		break;
4579 	case 2:
4580 		op->val = insn_fetch(s16, ctxt);
4581 		break;
4582 	case 4:
4583 		op->val = insn_fetch(s32, ctxt);
4584 		break;
4585 	case 8:
4586 		op->val = insn_fetch(s64, ctxt);
4587 		break;
4588 	}
4589 	if (!sign_extension) {
4590 		switch (op->bytes) {
4591 		case 1:
4592 			op->val &= 0xff;
4593 			break;
4594 		case 2:
4595 			op->val &= 0xffff;
4596 			break;
4597 		case 4:
4598 			op->val &= 0xffffffff;
4599 			break;
4600 		}
4601 	}
4602 done:
4603 	return rc;
4604 }
4605 
4606 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4607 			  unsigned d)
4608 {
4609 	int rc = X86EMUL_CONTINUE;
4610 
4611 	switch (d) {
4612 	case OpReg:
4613 		decode_register_operand(ctxt, op);
4614 		break;
4615 	case OpImmUByte:
4616 		rc = decode_imm(ctxt, op, 1, false);
4617 		break;
4618 	case OpMem:
4619 		ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4620 	mem_common:
4621 		*op = ctxt->memop;
4622 		ctxt->memopp = op;
4623 		if (ctxt->d & BitOp)
4624 			fetch_bit_operand(ctxt);
4625 		op->orig_val = op->val;
4626 		break;
4627 	case OpMem64:
4628 		ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4629 		goto mem_common;
4630 	case OpAcc:
4631 		op->type = OP_REG;
4632 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4633 		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4634 		fetch_register_operand(op);
4635 		break;
4636 	case OpAccLo:
4637 		op->type = OP_REG;
4638 		op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4639 		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4640 		fetch_register_operand(op);
4641 		break;
4642 	case OpAccHi:
4643 		if (ctxt->d & ByteOp) {
4644 			op->type = OP_NONE;
4645 			break;
4646 		}
4647 		op->type = OP_REG;
4648 		op->bytes = ctxt->op_bytes;
4649 		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4650 		fetch_register_operand(op);
4651 		break;
4652 	case OpDI:
4653 		op->type = OP_MEM;
4654 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4655 		op->addr.mem.ea =
4656 			register_address(ctxt, VCPU_REGS_RDI);
4657 		op->addr.mem.seg = VCPU_SREG_ES;
4658 		op->val = 0;
4659 		op->count = 1;
4660 		break;
4661 	case OpDX:
4662 		op->type = OP_REG;
4663 		op->bytes = 2;
4664 		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4665 		fetch_register_operand(op);
4666 		break;
4667 	case OpCL:
4668 		op->type = OP_IMM;
4669 		op->bytes = 1;
4670 		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4671 		break;
4672 	case OpImmByte:
4673 		rc = decode_imm(ctxt, op, 1, true);
4674 		break;
4675 	case OpOne:
4676 		op->type = OP_IMM;
4677 		op->bytes = 1;
4678 		op->val = 1;
4679 		break;
4680 	case OpImm:
4681 		rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4682 		break;
4683 	case OpImm64:
4684 		rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4685 		break;
4686 	case OpMem8:
4687 		ctxt->memop.bytes = 1;
4688 		if (ctxt->memop.type == OP_REG) {
4689 			ctxt->memop.addr.reg = decode_register(ctxt,
4690 					ctxt->modrm_rm, true);
4691 			fetch_register_operand(&ctxt->memop);
4692 		}
4693 		goto mem_common;
4694 	case OpMem16:
4695 		ctxt->memop.bytes = 2;
4696 		goto mem_common;
4697 	case OpMem32:
4698 		ctxt->memop.bytes = 4;
4699 		goto mem_common;
4700 	case OpImmU16:
4701 		rc = decode_imm(ctxt, op, 2, false);
4702 		break;
4703 	case OpImmU:
4704 		rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4705 		break;
4706 	case OpSI:
4707 		op->type = OP_MEM;
4708 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4709 		op->addr.mem.ea =
4710 			register_address(ctxt, VCPU_REGS_RSI);
4711 		op->addr.mem.seg = ctxt->seg_override;
4712 		op->val = 0;
4713 		op->count = 1;
4714 		break;
4715 	case OpXLat:
4716 		op->type = OP_MEM;
4717 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4718 		op->addr.mem.ea =
4719 			address_mask(ctxt,
4720 				reg_read(ctxt, VCPU_REGS_RBX) +
4721 				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4722 		op->addr.mem.seg = ctxt->seg_override;
4723 		op->val = 0;
4724 		break;
4725 	case OpImmFAddr:
4726 		op->type = OP_IMM;
4727 		op->addr.mem.ea = ctxt->_eip;
4728 		op->bytes = ctxt->op_bytes + 2;
4729 		insn_fetch_arr(op->valptr, op->bytes, ctxt);
4730 		break;
4731 	case OpMemFAddr:
4732 		ctxt->memop.bytes = ctxt->op_bytes + 2;
4733 		goto mem_common;
4734 	case OpES:
4735 		op->type = OP_IMM;
4736 		op->val = VCPU_SREG_ES;
4737 		break;
4738 	case OpCS:
4739 		op->type = OP_IMM;
4740 		op->val = VCPU_SREG_CS;
4741 		break;
4742 	case OpSS:
4743 		op->type = OP_IMM;
4744 		op->val = VCPU_SREG_SS;
4745 		break;
4746 	case OpDS:
4747 		op->type = OP_IMM;
4748 		op->val = VCPU_SREG_DS;
4749 		break;
4750 	case OpFS:
4751 		op->type = OP_IMM;
4752 		op->val = VCPU_SREG_FS;
4753 		break;
4754 	case OpGS:
4755 		op->type = OP_IMM;
4756 		op->val = VCPU_SREG_GS;
4757 		break;
4758 	case OpImplicit:
4759 		/* Special instructions do their own operand decoding. */
4760 	default:
4761 		op->type = OP_NONE; /* Disable writeback. */
4762 		break;
4763 	}
4764 
4765 done:
4766 	return rc;
4767 }
4768 
4769 static int x86_decode_avx(struct x86_emulate_ctxt *ctxt,
4770 			  u8 vex_1st, u8 vex_2nd, struct opcode *opcode)
4771 {
4772 	u8 vex_3rd, map, pp, l, v;
4773 	int rc = X86EMUL_CONTINUE;
4774 
4775 	if (ctxt->rep_prefix || ctxt->op_prefix || ctxt->rex_prefix)
4776 		goto ud;
4777 
4778 	if (vex_1st == 0xc5) {
4779 		/* Expand RVVVVlpp to VEX3 format */
4780 		vex_3rd = vex_2nd & ~0x80;         /* VVVVlpp from VEX2, w=0 */
4781 		vex_2nd = (vex_2nd & 0x80) | 0x61; /* R from VEX2, X=1 B=1 mmmmm=00001 */
4782 	} else {
4783 		vex_3rd = insn_fetch(u8, ctxt);
4784 	}
4785 
4786 	/* vex_2nd = RXBmmmmm, vex_3rd = wVVVVlpp.  Fix polarity */
4787 	vex_2nd ^= 0xE0; /* binary 11100000 */
4788 	vex_3rd ^= 0x78; /* binary 01111000 */
4789 
4790 	ctxt->rex_prefix = REX_PREFIX;
4791 	ctxt->rex_bits = (vex_2nd & 0xE0) >> 5; /* RXB */
4792 	ctxt->rex_bits |= (vex_3rd & 0x80) >> 4; /* w */
4793 	if (ctxt->rex_bits && ctxt->mode != X86EMUL_MODE_PROT64)
4794 		goto ud;
4795 
4796 	map = vex_2nd & 0x1f;
4797 	v = (vex_3rd >> 3) & 0xf;
4798 	l = vex_3rd & 0x4;
4799 	pp = vex_3rd & 0x3;
4800 
4801 	ctxt->b = insn_fetch(u8, ctxt);
4802 	switch (map) {
4803 	case 1:
4804 		ctxt->opcode_len = 2;
4805 		*opcode = twobyte_table[ctxt->b];
4806 		break;
4807 	case 2:
4808 		ctxt->opcode_len = 3;
4809 		*opcode = opcode_map_0f_38[ctxt->b];
4810 		break;
4811 	case 3:
4812 		/* no 0f 3a instructions are supported yet */
4813 		return X86EMUL_UNHANDLEABLE;
4814 	default:
4815 		goto ud;
4816 	}
4817 
4818 	/*
4819 	 * No three operand instructions are supported yet; those that
4820 	 * *are* marked with the Avx flag reserve the VVVV flag.
4821 	 */
4822 	if (v)
4823 		goto ud;
4824 
4825 	if (l)
4826 		ctxt->op_bytes = 32;
4827 	else
4828 		ctxt->op_bytes = 16;
4829 
4830 	switch (pp) {
4831 	case 0: break;
4832 	case 1: ctxt->op_prefix = true; break;
4833 	case 2: ctxt->rep_prefix = 0xf3; break;
4834 	case 3: ctxt->rep_prefix = 0xf2; break;
4835 	}
4836 
4837 done:
4838 	return rc;
4839 ud:
4840 	*opcode = ud;
4841 	return rc;
4842 }
4843 
4844 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
4845 {
4846 	int rc = X86EMUL_CONTINUE;
4847 	int mode = ctxt->mode;
4848 	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4849 	bool vex_prefix = false;
4850 	bool has_seg_override = false;
4851 	struct opcode opcode;
4852 	u16 dummy;
4853 	struct desc_struct desc;
4854 
4855 	ctxt->memop.type = OP_NONE;
4856 	ctxt->memopp = NULL;
4857 	ctxt->_eip = ctxt->eip;
4858 	ctxt->fetch.ptr = ctxt->fetch.data;
4859 	ctxt->fetch.end = ctxt->fetch.data + insn_len;
4860 	ctxt->opcode_len = 1;
4861 	ctxt->intercept = x86_intercept_none;
4862 	if (insn_len > 0)
4863 		memcpy(ctxt->fetch.data, insn, insn_len);
4864 	else {
4865 		rc = __do_insn_fetch_bytes(ctxt, 1);
4866 		if (rc != X86EMUL_CONTINUE)
4867 			goto done;
4868 	}
4869 
4870 	switch (mode) {
4871 	case X86EMUL_MODE_REAL:
4872 	case X86EMUL_MODE_VM86:
4873 		def_op_bytes = def_ad_bytes = 2;
4874 		ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
4875 		if (desc.d)
4876 			def_op_bytes = def_ad_bytes = 4;
4877 		break;
4878 	case X86EMUL_MODE_PROT16:
4879 		def_op_bytes = def_ad_bytes = 2;
4880 		break;
4881 	case X86EMUL_MODE_PROT32:
4882 		def_op_bytes = def_ad_bytes = 4;
4883 		break;
4884 #ifdef CONFIG_X86_64
4885 	case X86EMUL_MODE_PROT64:
4886 		def_op_bytes = 4;
4887 		def_ad_bytes = 8;
4888 		break;
4889 #endif
4890 	default:
4891 		return EMULATION_FAILED;
4892 	}
4893 
4894 	ctxt->op_bytes = def_op_bytes;
4895 	ctxt->ad_bytes = def_ad_bytes;
4896 
4897 	/* Legacy prefixes. */
4898 	for (;;) {
4899 		switch (ctxt->b = insn_fetch(u8, ctxt)) {
4900 		case 0x66:	/* operand-size override */
4901 			ctxt->op_prefix = true;
4902 			/* switch between 2/4 bytes */
4903 			ctxt->op_bytes = def_op_bytes ^ 6;
4904 			break;
4905 		case 0x67:	/* address-size override */
4906 			if (mode == X86EMUL_MODE_PROT64)
4907 				/* switch between 4/8 bytes */
4908 				ctxt->ad_bytes = def_ad_bytes ^ 12;
4909 			else
4910 				/* switch between 2/4 bytes */
4911 				ctxt->ad_bytes = def_ad_bytes ^ 6;
4912 			break;
4913 		case 0x26:	/* ES override */
4914 			has_seg_override = true;
4915 			ctxt->seg_override = VCPU_SREG_ES;
4916 			break;
4917 		case 0x2e:	/* CS override */
4918 			has_seg_override = true;
4919 			ctxt->seg_override = VCPU_SREG_CS;
4920 			break;
4921 		case 0x36:	/* SS override */
4922 			has_seg_override = true;
4923 			ctxt->seg_override = VCPU_SREG_SS;
4924 			break;
4925 		case 0x3e:	/* DS override */
4926 			has_seg_override = true;
4927 			ctxt->seg_override = VCPU_SREG_DS;
4928 			break;
4929 		case 0x64:	/* FS override */
4930 			has_seg_override = true;
4931 			ctxt->seg_override = VCPU_SREG_FS;
4932 			break;
4933 		case 0x65:	/* GS override */
4934 			has_seg_override = true;
4935 			ctxt->seg_override = VCPU_SREG_GS;
4936 			break;
4937 		case 0x40 ... 0x4f: /* REX */
4938 			if (mode != X86EMUL_MODE_PROT64)
4939 				goto done_prefixes;
4940 			ctxt->rex_prefix = REX_PREFIX;
4941 			ctxt->rex_bits   = ctxt->b & 0xf;
4942 			continue;
4943 		case 0xf0:	/* LOCK */
4944 			ctxt->lock_prefix = 1;
4945 			break;
4946 		case 0xf2:	/* REPNE/REPNZ */
4947 		case 0xf3:	/* REP/REPE/REPZ */
4948 			ctxt->rep_prefix = ctxt->b;
4949 			break;
4950 		default:
4951 			goto done_prefixes;
4952 		}
4953 
4954 		/* Any legacy prefix after a REX prefix nullifies its effect. */
4955 		ctxt->rex_prefix = REX_NONE;
4956 		ctxt->rex_bits = 0;
4957 	}
4958 
4959 done_prefixes:
4960 
4961 	/* REX prefix. */
4962 	if (ctxt->rex_bits & REX_W)
4963 		ctxt->op_bytes = 8;
4964 
4965 	/* Opcode byte(s). */
4966 	if (ctxt->b == 0xc4 || ctxt->b == 0xc5) {
4967 		/* VEX or LDS/LES */
4968 		u8 vex_2nd = insn_fetch(u8, ctxt);
4969 		if (mode != X86EMUL_MODE_PROT64 && (vex_2nd & 0xc0) != 0xc0) {
4970 			opcode = opcode_table[ctxt->b];
4971 			ctxt->modrm = vex_2nd;
4972 			/* the Mod/RM byte has been fetched already!  */
4973 			goto done_modrm;
4974 		}
4975 
4976 		vex_prefix = true;
4977 		rc = x86_decode_avx(ctxt, ctxt->b, vex_2nd, &opcode);
4978 		if (rc != X86EMUL_CONTINUE)
4979 			goto done;
4980 	} else if (ctxt->b == 0x0f) {
4981 		/* Two- or three-byte opcode */
4982 		ctxt->opcode_len = 2;
4983 		ctxt->b = insn_fetch(u8, ctxt);
4984 		opcode = twobyte_table[ctxt->b];
4985 
4986 		/* 0F_38 opcode map */
4987 		if (ctxt->b == 0x38) {
4988 			ctxt->opcode_len = 3;
4989 			ctxt->b = insn_fetch(u8, ctxt);
4990 			opcode = opcode_map_0f_38[ctxt->b];
4991 		}
4992 	} else {
4993 		/* Opcode byte(s). */
4994 		opcode = opcode_table[ctxt->b];
4995 	}
4996 
4997 	if (opcode.flags & ModRM)
4998 		ctxt->modrm = insn_fetch(u8, ctxt);
4999 
5000 done_modrm:
5001 	ctxt->d = opcode.flags;
5002 	while (ctxt->d & GroupMask) {
5003 		switch (ctxt->d & GroupMask) {
5004 		case Group:
5005 			goffset = (ctxt->modrm >> 3) & 7;
5006 			opcode = opcode.u.group[goffset];
5007 			break;
5008 		case GroupDual:
5009 			goffset = (ctxt->modrm >> 3) & 7;
5010 			if ((ctxt->modrm >> 6) == 3)
5011 				opcode = opcode.u.gdual->mod3[goffset];
5012 			else
5013 				opcode = opcode.u.gdual->mod012[goffset];
5014 			break;
5015 		case RMExt:
5016 			goffset = ctxt->modrm & 7;
5017 			opcode = opcode.u.group[goffset];
5018 			break;
5019 		case Prefix:
5020 			if (ctxt->rep_prefix && ctxt->op_prefix)
5021 				return EMULATION_FAILED;
5022 			simd_prefix = ctxt->op_prefix ? 0x66 : ctxt->rep_prefix;
5023 			switch (simd_prefix) {
5024 			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5025 			case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5026 			case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5027 			case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5028 			}
5029 			break;
5030 		case Escape:
5031 			if (ctxt->modrm > 0xbf) {
5032 				size_t size = ARRAY_SIZE(opcode.u.esc->high);
5033 				u32 index = array_index_nospec(
5034 					ctxt->modrm - 0xc0, size);
5035 
5036 				opcode = opcode.u.esc->high[index];
5037 			} else {
5038 				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5039 			}
5040 			break;
5041 		case InstrDual:
5042 			if ((ctxt->modrm >> 6) == 3)
5043 				opcode = opcode.u.idual->mod3;
5044 			else
5045 				opcode = opcode.u.idual->mod012;
5046 			break;
5047 		case ModeDual:
5048 			if (ctxt->mode == X86EMUL_MODE_PROT64)
5049 				opcode = opcode.u.mdual->mode64;
5050 			else
5051 				opcode = opcode.u.mdual->mode32;
5052 			break;
5053 		default:
5054 			return EMULATION_FAILED;
5055 		}
5056 
5057 		ctxt->d &= ~(u64)GroupMask;
5058 		ctxt->d |= opcode.flags;
5059 	}
5060 
5061 	ctxt->is_branch = opcode.flags & IsBranch;
5062 
5063 	/* Unrecognised? */
5064 	if (ctxt->d == 0)
5065 		return EMULATION_FAILED;
5066 
5067 	if (unlikely(vex_prefix)) {
5068 		/*
5069 		 * Only specifically marked instructions support VEX.  Since many
5070 		 * instructions support it but are not annotated, return not implemented
5071 		 * rather than #UD.
5072 		 */
5073 		if (!(ctxt->d & Avx))
5074 			return EMULATION_FAILED;
5075 
5076 		if (!(ctxt->d & AlignMask))
5077 			ctxt->d |= Unaligned;
5078 	}
5079 
5080 	ctxt->execute = opcode.u.execute;
5081 
5082 	/*
5083 	 * Reject emulation if KVM might need to emulate shadow stack updates
5084 	 * and/or indirect branch tracking enforcement, which the emulator
5085 	 * doesn't support.
5086 	 */
5087 	if ((is_ibt_instruction(ctxt) || is_shstk_instruction(ctxt)) &&
5088 	    ctxt->ops->get_cr(ctxt, 4) & X86_CR4_CET) {
5089 		u64 u_cet = 0, s_cet = 0;
5090 
5091 		/*
5092 		 * Check both User and Supervisor on far transfers as inter-
5093 		 * privilege level transfers are impacted by CET at the target
5094 		 * privilege level, and that is not known at this time.  The
5095 		 * expectation is that the guest will not require emulation of
5096 		 * any CET-affected instructions at any privilege level.
5097 		 */
5098 		if (!(ctxt->d & NearBranch))
5099 			u_cet = s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5100 		else if (ctxt->ops->cpl(ctxt) == 3)
5101 			u_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5102 		else
5103 			s_cet = CET_SHSTK_EN | CET_ENDBR_EN;
5104 
5105 		if ((u_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_U_CET, &u_cet)) ||
5106 		    (s_cet && ctxt->ops->get_msr(ctxt, MSR_IA32_S_CET, &s_cet)))
5107 			return EMULATION_FAILED;
5108 
5109 		if ((u_cet | s_cet) & CET_SHSTK_EN && is_shstk_instruction(ctxt))
5110 			return EMULATION_FAILED;
5111 
5112 		if ((u_cet | s_cet) & CET_ENDBR_EN && is_ibt_instruction(ctxt))
5113 			return EMULATION_FAILED;
5114 	}
5115 
5116 	if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
5117 	    likely(!(ctxt->d & EmulateOnUD)))
5118 		return EMULATION_FAILED;
5119 
5120 	if (unlikely(ctxt->d &
5121 	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5122 	     No16))) {
5123 		/*
5124 		 * These are copied unconditionally here, and checked unconditionally
5125 		 * in x86_emulate_insn.
5126 		 */
5127 		ctxt->check_perm = opcode.check_perm;
5128 		ctxt->intercept = opcode.intercept;
5129 
5130 		if (ctxt->d & NotImpl)
5131 			return EMULATION_FAILED;
5132 
5133 		if (mode == X86EMUL_MODE_PROT64) {
5134 			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5135 				ctxt->op_bytes = 8;
5136 			else if (ctxt->d & NearBranch)
5137 				ctxt->op_bytes = 8;
5138 		}
5139 
5140 		if (ctxt->d & Op3264) {
5141 			if (mode == X86EMUL_MODE_PROT64)
5142 				ctxt->op_bytes = 8;
5143 			else
5144 				ctxt->op_bytes = 4;
5145 		}
5146 
5147 		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5148 			ctxt->op_bytes = 4;
5149 
5150 		if (vex_prefix)
5151 			;
5152 		else if (ctxt->d & Sse)
5153 			ctxt->op_bytes = 16, ctxt->d &= ~Avx;
5154 		else if (ctxt->d & Mmx)
5155 			ctxt->op_bytes = 8;
5156 	}
5157 
5158 	/* ModRM and SIB bytes. */
5159 	if (ctxt->d & ModRM) {
5160 		rc = decode_modrm(ctxt, &ctxt->memop);
5161 		if (!has_seg_override) {
5162 			has_seg_override = true;
5163 			ctxt->seg_override = ctxt->modrm_seg;
5164 		}
5165 	} else if (ctxt->d & MemAbs)
5166 		rc = decode_abs(ctxt, &ctxt->memop);
5167 	if (rc != X86EMUL_CONTINUE)
5168 		goto done;
5169 
5170 	if (!has_seg_override)
5171 		ctxt->seg_override = VCPU_SREG_DS;
5172 
5173 	ctxt->memop.addr.mem.seg = ctxt->seg_override;
5174 
5175 	/*
5176 	 * Decode and fetch the source operand: register, memory
5177 	 * or immediate.
5178 	 */
5179 	rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5180 	if (rc != X86EMUL_CONTINUE)
5181 		goto done;
5182 
5183 	/*
5184 	 * Decode and fetch the second source operand: register, memory
5185 	 * or immediate.
5186 	 */
5187 	rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5188 	if (rc != X86EMUL_CONTINUE)
5189 		goto done;
5190 
5191 	/* Decode and fetch the destination operand: register or memory. */
5192 	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5193 
5194 	if (ctxt->rip_relative && likely(ctxt->memopp))
5195 		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5196 					ctxt->memopp->addr.mem.ea + ctxt->_eip);
5197 
5198 done:
5199 	if (rc == X86EMUL_PROPAGATE_FAULT)
5200 		ctxt->have_exception = true;
5201 	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5202 }
5203 
5204 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5205 {
5206 	return ctxt->d & PageTable;
5207 }
5208 
5209 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5210 {
5211 	/* The second termination condition only applies for REPE
5212 	 * and REPNE. Test if the repeat string operation prefix is
5213 	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5214 	 * corresponding termination condition according to:
5215 	 * 	- if REPE/REPZ and ZF = 0 then done
5216 	 * 	- if REPNE/REPNZ and ZF = 1 then done
5217 	 */
5218 	if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5219 	     (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5220 	    && (((ctxt->rep_prefix == REPE_PREFIX) &&
5221 		 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5222 		|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
5223 		    ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5224 		return true;
5225 
5226 	return false;
5227 }
5228 
5229 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5230 {
5231 	int rc;
5232 
5233 	kvm_fpu_get();
5234 	rc = asm_safe("fwait");
5235 	kvm_fpu_put();
5236 
5237 	if (unlikely(rc != X86EMUL_CONTINUE))
5238 		return emulate_exception(ctxt, MF_VECTOR, 0, false);
5239 
5240 	return X86EMUL_CONTINUE;
5241 }
5242 
5243 static void fetch_possible_mmx_operand(struct operand *op)
5244 {
5245 	if (op->type == OP_MM)
5246 		kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5247 }
5248 
5249 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5250 {
5251 	/* Clear fields that are set conditionally but read without a guard. */
5252 	ctxt->rip_relative = false;
5253 	ctxt->rex_prefix = REX_NONE;
5254 	ctxt->rex_bits = 0;
5255 	ctxt->lock_prefix = 0;
5256 	ctxt->op_prefix = false;
5257 	ctxt->rep_prefix = 0;
5258 	ctxt->regs_valid = 0;
5259 	ctxt->regs_dirty = 0;
5260 
5261 	ctxt->io_read.pos = 0;
5262 	ctxt->io_read.end = 0;
5263 	ctxt->mem_read.end = 0;
5264 }
5265 
5266 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
5267 {
5268 	const struct x86_emulate_ops *ops = ctxt->ops;
5269 	int rc = X86EMUL_CONTINUE;
5270 	int saved_dst_type = ctxt->dst.type;
5271 
5272 	ctxt->mem_read.pos = 0;
5273 
5274 	/* LOCK prefix is allowed only with some instructions */
5275 	if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5276 		rc = emulate_ud(ctxt);
5277 		goto done;
5278 	}
5279 
5280 	if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5281 		rc = emulate_ud(ctxt);
5282 		goto done;
5283 	}
5284 
5285 	if (unlikely(ctxt->d &
5286 		     (No64|Undefined|Avx|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5287 		if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5288 				(ctxt->d & Undefined)) {
5289 			rc = emulate_ud(ctxt);
5290 			goto done;
5291 		}
5292 
5293 		if ((ctxt->d & (Avx|Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) {
5294 			rc = emulate_ud(ctxt);
5295 			goto done;
5296 		}
5297 
5298 		if (ctxt->d & Avx) {
5299 			u64 xcr = 0;
5300 			if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE)
5301 			    || ops->get_xcr(ctxt, 0, &xcr)
5302 			    || !(xcr & XFEATURE_MASK_YMM)) {
5303 				rc = emulate_ud(ctxt);
5304 				goto done;
5305 			}
5306 		} else if (ctxt->d & Sse) {
5307 			if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) {
5308 				rc = emulate_ud(ctxt);
5309 				goto done;
5310 			}
5311 		}
5312 
5313 		if ((ctxt->d & (Avx|Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5314 			rc = emulate_nm(ctxt);
5315 			goto done;
5316 		}
5317 
5318 		if (ctxt->d & Mmx) {
5319 			rc = flush_pending_x87_faults(ctxt);
5320 			if (rc != X86EMUL_CONTINUE)
5321 				goto done;
5322 			/*
5323 			 * Now that we know the fpu is exception safe, we can fetch
5324 			 * operands from it.
5325 			 */
5326 			fetch_possible_mmx_operand(&ctxt->src);
5327 			fetch_possible_mmx_operand(&ctxt->src2);
5328 			if (!(ctxt->d & Mov))
5329 				fetch_possible_mmx_operand(&ctxt->dst);
5330 		}
5331 
5332 		if (unlikely(check_intercepts) && ctxt->intercept) {
5333 			rc = emulator_check_intercept(ctxt, ctxt->intercept,
5334 						      X86_ICPT_PRE_EXCEPT);
5335 			if (rc != X86EMUL_CONTINUE)
5336 				goto done;
5337 		}
5338 
5339 		/* Instruction can only be executed in protected mode */
5340 		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5341 			rc = emulate_ud(ctxt);
5342 			goto done;
5343 		}
5344 
5345 		/* Privileged instruction can be executed only in CPL=0 */
5346 		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5347 			if (ctxt->d & PrivUD)
5348 				rc = emulate_ud(ctxt);
5349 			else
5350 				rc = emulate_gp(ctxt, 0);
5351 			goto done;
5352 		}
5353 
5354 		/* Do instruction specific permission checks */
5355 		if (ctxt->d & CheckPerm) {
5356 			rc = ctxt->check_perm(ctxt);
5357 			if (rc != X86EMUL_CONTINUE)
5358 				goto done;
5359 		}
5360 
5361 		if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
5362 			rc = emulator_check_intercept(ctxt, ctxt->intercept,
5363 						      X86_ICPT_POST_EXCEPT);
5364 			if (rc != X86EMUL_CONTINUE)
5365 				goto done;
5366 		}
5367 
5368 		if (ctxt->rep_prefix && (ctxt->d & String)) {
5369 			/* All REP prefixes have the same first termination condition */
5370 			if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5371 				string_registers_quirk(ctxt);
5372 				ctxt->eip = ctxt->_eip;
5373 				ctxt->eflags &= ~X86_EFLAGS_RF;
5374 				goto done;
5375 			}
5376 		}
5377 	}
5378 
5379 	if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5380 		rc = segmented_read(ctxt, ctxt->src.addr.mem,
5381 				    ctxt->src.valptr, ctxt->src.bytes);
5382 		if (rc != X86EMUL_CONTINUE)
5383 			goto done;
5384 		ctxt->src.orig_val64 = ctxt->src.val64;
5385 	}
5386 
5387 	if (ctxt->src2.type == OP_MEM) {
5388 		rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5389 				    &ctxt->src2.val, ctxt->src2.bytes);
5390 		if (rc != X86EMUL_CONTINUE)
5391 			goto done;
5392 	}
5393 
5394 	if ((ctxt->d & DstMask) == ImplicitOps)
5395 		goto special_insn;
5396 
5397 
5398 	if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5399 		/* optimisation - avoid slow emulated read if Mov */
5400 		rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5401 				   &ctxt->dst.val, ctxt->dst.bytes);
5402 		if (rc != X86EMUL_CONTINUE) {
5403 			if (!(ctxt->d & NoWrite) &&
5404 			    rc == X86EMUL_PROPAGATE_FAULT &&
5405 			    ctxt->exception.vector == PF_VECTOR)
5406 				ctxt->exception.error_code |= PFERR_WRITE_MASK;
5407 			goto done;
5408 		}
5409 	}
5410 	/* Copy full 64-bit value for CMPXCHG8B.  */
5411 	ctxt->dst.orig_val64 = ctxt->dst.val64;
5412 
5413 special_insn:
5414 
5415 	if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
5416 		rc = emulator_check_intercept(ctxt, ctxt->intercept,
5417 					      X86_ICPT_POST_MEMACCESS);
5418 		if (rc != X86EMUL_CONTINUE)
5419 			goto done;
5420 	}
5421 
5422 	if (ctxt->rep_prefix && (ctxt->d & String))
5423 		ctxt->eflags |= X86_EFLAGS_RF;
5424 	else
5425 		ctxt->eflags &= ~X86_EFLAGS_RF;
5426 
5427 	if (ctxt->execute) {
5428 		rc = ctxt->execute(ctxt);
5429 		if (rc != X86EMUL_CONTINUE)
5430 			goto done;
5431 		goto writeback;
5432 	}
5433 
5434 	if (ctxt->opcode_len == 2)
5435 		goto twobyte_insn;
5436 	else if (ctxt->opcode_len == 3)
5437 		goto threebyte_insn;
5438 
5439 	switch (ctxt->b) {
5440 	case 0x70 ... 0x7f: /* jcc (short) */
5441 		if (test_cc(ctxt->b, ctxt->eflags))
5442 			rc = jmp_rel(ctxt, ctxt->src.val);
5443 		break;
5444 	case 0x8d: /* lea r16/r32, m */
5445 		ctxt->dst.val = ctxt->src.addr.mem.ea;
5446 		break;
5447 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
5448 		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5449 			ctxt->dst.type = OP_NONE;
5450 		else
5451 			rc = em_xchg(ctxt);
5452 		break;
5453 	case 0x98: /* cbw/cwde/cdqe */
5454 		switch (ctxt->op_bytes) {
5455 		case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5456 		case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5457 		case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5458 		}
5459 		break;
5460 	case 0xcc:		/* int3 */
5461 		rc = emulate_int(ctxt, 3);
5462 		break;
5463 	case 0xcd:		/* int n */
5464 		rc = emulate_int(ctxt, ctxt->src.val);
5465 		break;
5466 	case 0xce:		/* into */
5467 		if (ctxt->eflags & X86_EFLAGS_OF)
5468 			rc = emulate_int(ctxt, 4);
5469 		break;
5470 	case 0xe9: /* jmp rel */
5471 	case 0xeb: /* jmp rel short */
5472 		rc = jmp_rel(ctxt, ctxt->src.val);
5473 		ctxt->dst.type = OP_NONE; /* Disable writeback. */
5474 		break;
5475 	case 0xf4:              /* hlt */
5476 		ctxt->ops->halt(ctxt);
5477 		break;
5478 	case 0xf5:	/* cmc */
5479 		/* complement carry flag from eflags reg */
5480 		ctxt->eflags ^= X86_EFLAGS_CF;
5481 		break;
5482 	case 0xf8: /* clc */
5483 		ctxt->eflags &= ~X86_EFLAGS_CF;
5484 		break;
5485 	case 0xf9: /* stc */
5486 		ctxt->eflags |= X86_EFLAGS_CF;
5487 		break;
5488 	case 0xfc: /* cld */
5489 		ctxt->eflags &= ~X86_EFLAGS_DF;
5490 		break;
5491 	case 0xfd: /* std */
5492 		ctxt->eflags |= X86_EFLAGS_DF;
5493 		break;
5494 	default:
5495 		goto cannot_emulate;
5496 	}
5497 
5498 	if (rc != X86EMUL_CONTINUE)
5499 		goto done;
5500 
5501 writeback:
5502 	if (ctxt->d & SrcWrite) {
5503 		BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5504 		rc = writeback(ctxt, &ctxt->src);
5505 		if (rc != X86EMUL_CONTINUE)
5506 			goto done;
5507 	}
5508 	if (!(ctxt->d & NoWrite)) {
5509 		rc = writeback(ctxt, &ctxt->dst);
5510 		if (rc != X86EMUL_CONTINUE)
5511 			goto done;
5512 	}
5513 
5514 	/*
5515 	 * restore dst type in case the decoding will be reused
5516 	 * (happens for string instruction )
5517 	 */
5518 	ctxt->dst.type = saved_dst_type;
5519 
5520 	if ((ctxt->d & SrcMask) == SrcSI)
5521 		string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5522 
5523 	if ((ctxt->d & DstMask) == DstDI)
5524 		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5525 
5526 	if (ctxt->rep_prefix && (ctxt->d & String)) {
5527 		unsigned int count;
5528 		struct read_cache *r = &ctxt->io_read;
5529 		if ((ctxt->d & SrcMask) == SrcSI)
5530 			count = ctxt->src.count;
5531 		else
5532 			count = ctxt->dst.count;
5533 		register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5534 
5535 		if (!string_insn_completed(ctxt)) {
5536 			/*
5537 			 * Re-enter guest when pio read ahead buffer is empty
5538 			 * or, if it is not used, after each 1024 iteration.
5539 			 */
5540 			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5541 			    (r->end == 0 || r->end != r->pos)) {
5542 				/*
5543 				 * Reset read cache. Usually happens before
5544 				 * decode, but since instruction is restarted
5545 				 * we have to do it here.
5546 				 */
5547 				ctxt->mem_read.end = 0;
5548 				writeback_registers(ctxt);
5549 				return EMULATION_RESTART;
5550 			}
5551 			goto done; /* skip rip writeback */
5552 		}
5553 		ctxt->eflags &= ~X86_EFLAGS_RF;
5554 	}
5555 
5556 	ctxt->eip = ctxt->_eip;
5557 	if (ctxt->mode != X86EMUL_MODE_PROT64)
5558 		ctxt->eip = (u32)ctxt->_eip;
5559 
5560 done:
5561 	if (rc == X86EMUL_PROPAGATE_FAULT) {
5562 		if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5563 			return EMULATION_FAILED;
5564 		ctxt->have_exception = true;
5565 	}
5566 	if (rc == X86EMUL_INTERCEPTED)
5567 		return EMULATION_INTERCEPTED;
5568 
5569 	if (rc == X86EMUL_CONTINUE)
5570 		writeback_registers(ctxt);
5571 
5572 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5573 
5574 twobyte_insn:
5575 	switch (ctxt->b) {
5576 	case 0x09:		/* wbinvd */
5577 		(ctxt->ops->wbinvd)(ctxt);
5578 		break;
5579 	case 0x08:		/* invd */
5580 	case 0x0d:		/* GrpP (prefetch) */
5581 	case 0x18:		/* Grp16 (prefetch/nop) */
5582 	case 0x1f:		/* nop */
5583 		break;
5584 	case 0x20: /* mov cr, reg */
5585 		ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5586 		break;
5587 	case 0x21: /* mov from dr to reg */
5588 		ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg);
5589 		break;
5590 	case 0x40 ... 0x4f:	/* cmov */
5591 		if (test_cc(ctxt->b, ctxt->eflags))
5592 			ctxt->dst.val = ctxt->src.val;
5593 		else if (ctxt->op_bytes != 4)
5594 			ctxt->dst.type = OP_NONE; /* no writeback */
5595 		break;
5596 	case 0x80 ... 0x8f: /* jnz rel, etc*/
5597 		if (test_cc(ctxt->b, ctxt->eflags))
5598 			rc = jmp_rel(ctxt, ctxt->src.val);
5599 		break;
5600 	case 0x90 ... 0x9f:     /* setcc r/m8 */
5601 		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5602 		break;
5603 	case 0xb6 ... 0xb7:	/* movzx */
5604 		ctxt->dst.bytes = ctxt->op_bytes;
5605 		ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5606 						       : (u16) ctxt->src.val;
5607 		break;
5608 	case 0xbe ... 0xbf:	/* movsx */
5609 		ctxt->dst.bytes = ctxt->op_bytes;
5610 		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5611 							(s16) ctxt->src.val;
5612 		break;
5613 	default:
5614 		goto cannot_emulate;
5615 	}
5616 
5617 threebyte_insn:
5618 
5619 	if (rc != X86EMUL_CONTINUE)
5620 		goto done;
5621 
5622 	goto writeback;
5623 
5624 cannot_emulate:
5625 	return EMULATION_FAILED;
5626 }
5627 
5628 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5629 {
5630 	invalidate_registers(ctxt);
5631 }
5632 
5633 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5634 {
5635 	writeback_registers(ctxt);
5636 }
5637 
5638 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5639 {
5640 	if (ctxt->rep_prefix && (ctxt->d & String))
5641 		return false;
5642 
5643 	if (ctxt->d & TwoMemOp)
5644 		return false;
5645 
5646 	return true;
5647 }
5648