1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2012 Sandvine, Inc.
5 * Copyright (c) 2012 NetApp, Inc.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 /*
30 * This file and its contents are supplied under the terms of the
31 * Common Development and Distribution License ("CDDL"), version 1.0.
32 * You may only use this file in accordance with the terms of version
33 * 1.0 of the CDDL.
34 *
35 * A full copy of the text of the CDDL should have accompanied this
36 * source. A copy of the CDDL is also available via the Internet at
37 * http://www.illumos.org/license/CDDL.
38 *
39 * Copyright 2015 Pluribus Networks Inc.
40 * Copyright 2018 Joyent, Inc.
41 * Copyright 2021 Oxide Computer Company
42 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
43 */
44
45 #include <sys/cdefs.h>
46
47 #include <sys/param.h>
48 #include <sys/pcpu.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51
52 #include <machine/vmparam.h>
53 #include <machine/vmm.h>
54 #include <sys/vmm_kernel.h>
55 #include <sys/vmm_vm.h>
56
57 #include <sys/vmm_instruction_emul.h>
58 #include <x86/psl.h>
59 #include <x86/specialreg.h>
60
61 #include "vmm_ioport.h"
62
63 enum vie_status {
64 VIES_INIT = (1U << 0),
65 VIES_MMIO = (1U << 1),
66 VIES_INOUT = (1U << 2),
67 VIES_OTHER = (1U << 3),
68 VIES_INST_FETCH = (1U << 4),
69 VIES_INST_DECODE = (1U << 5),
70 VIES_PENDING_MMIO = (1U << 6),
71 VIES_PENDING_INOUT = (1U << 7),
72 VIES_REPEAT = (1U << 8),
73 VIES_USER_FALLBACK = (1U << 9),
74 VIES_COMPLETE = (1U << 10),
75 };
76
77 /* State of request to perform emulated access (inout or MMIO) */
78 enum vie_req {
79 VR_NONE,
80 VR_PENDING,
81 VR_DONE,
82 };
83
84 struct vie_mmio {
85 uint64_t data;
86 uint64_t gpa;
87 uint8_t bytes;
88 enum vie_req state;
89 };
90
91 struct vie_op {
92 uint8_t op_byte; /* actual opcode byte */
93 uint8_t op_type; /* type of operation (e.g. MOV) */
94 uint16_t op_flags;
95 };
96
97 #define VIE_INST_SIZE 15
98 struct vie {
99 uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
100 uint8_t num_valid; /* size of the instruction */
101 uint8_t num_processed;
102
103 uint8_t addrsize:4, opsize:4; /* address and operand sizes */
104 uint8_t rex_w:1, /* REX prefix */
105 rex_r:1,
106 rex_x:1,
107 rex_b:1,
108 rex_present:1,
109 repz_present:1, /* REP/REPE/REPZ prefix */
110 repnz_present:1, /* REPNE/REPNZ prefix */
111 opsize_override:1, /* Operand size override */
112 addrsize_override:1, /* Address size override */
113 segment_override:1; /* Segment override */
114
115 uint8_t mod:2, /* ModRM byte */
116 reg:4,
117 rm:4;
118
119 uint8_t ss:2, /* SIB byte */
120 vex_present:1, /* VEX prefixed */
121 vex_l:1, /* L bit */
122 index:4, /* SIB byte */
123 base:4; /* SIB byte */
124
125 uint8_t disp_bytes;
126 uint8_t imm_bytes;
127
128 uint8_t scale;
129
130 uint8_t vex_reg:4, /* vvvv: first source reg specifier */
131 vex_pp:2, /* pp */
132 _sparebits:2;
133
134 uint8_t _sparebytes[2];
135
136 int base_register; /* VM_REG_GUEST_xyz */
137 int index_register; /* VM_REG_GUEST_xyz */
138 int segment_register; /* VM_REG_GUEST_xyz */
139
140 int64_t displacement; /* optional addr displacement */
141 int64_t immediate; /* optional immediate operand */
142
143 struct vie_op op; /* opcode description */
144
145 enum vie_status status;
146
147 struct vm_guest_paging paging; /* guest paging state */
148
149 uint64_t mmio_gpa; /* faulting GPA */
150 struct vie_mmio mmio_req_read;
151 struct vie_mmio mmio_req_write;
152
153 struct vm_inout inout; /* active in/out op */
154 enum vie_req inout_req_state;
155 uint32_t inout_req_val; /* value from userspace */
156 };
157
158
159 /* struct vie_op.op_type */
160 enum {
161 VIE_OP_TYPE_NONE = 0,
162 VIE_OP_TYPE_MOV,
163 VIE_OP_TYPE_MOVSX,
164 VIE_OP_TYPE_MOVZX,
165 VIE_OP_TYPE_MOV_CR,
166 VIE_OP_TYPE_AND,
167 VIE_OP_TYPE_OR,
168 VIE_OP_TYPE_SUB,
169 VIE_OP_TYPE_TWO_BYTE,
170 VIE_OP_TYPE_PUSH,
171 VIE_OP_TYPE_CMP,
172 VIE_OP_TYPE_POP,
173 VIE_OP_TYPE_MOVS,
174 VIE_OP_TYPE_GROUP1,
175 VIE_OP_TYPE_STOS,
176 VIE_OP_TYPE_BITTEST,
177 VIE_OP_TYPE_TWOB_GRP15,
178 VIE_OP_TYPE_ADD,
179 VIE_OP_TYPE_TEST,
180 VIE_OP_TYPE_BEXTR,
181 VIE_OP_TYPE_CLTS,
182 VIE_OP_TYPE_MUL,
183 VIE_OP_TYPE_LAST
184 };
185
186 /* struct vie_op.op_flags */
187 #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */
188 #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
189 #define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */
190 #define VIE_OP_F_NO_MODRM (1 << 3)
191 #define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4)
192 #define VIE_OP_F_REG_REG (1 << 5) /* special-case for mov-cr */
193
194 static const struct vie_op three_byte_opcodes_0f38[256] = {
195 [0xF7] = {
196 .op_byte = 0xF7,
197 .op_type = VIE_OP_TYPE_BEXTR,
198 },
199 };
200
201 static const struct vie_op two_byte_opcodes[256] = {
202 [0x06] = {
203 .op_byte = 0x06,
204 .op_type = VIE_OP_TYPE_CLTS,
205 .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
206 },
207 [0x20] = {
208 .op_byte = 0x20,
209 .op_type = VIE_OP_TYPE_MOV_CR,
210 .op_flags = VIE_OP_F_REG_REG | VIE_OP_F_NO_GLA_VERIFICATION
211 },
212 [0x22] = {
213 .op_byte = 0x22,
214 .op_type = VIE_OP_TYPE_MOV_CR,
215 .op_flags = VIE_OP_F_REG_REG | VIE_OP_F_NO_GLA_VERIFICATION
216 },
217 [0xAE] = {
218 .op_byte = 0xAE,
219 .op_type = VIE_OP_TYPE_TWOB_GRP15,
220 },
221 [0xAF] = {
222 .op_byte = 0xAF,
223 .op_type = VIE_OP_TYPE_MUL,
224 },
225 [0xB6] = {
226 .op_byte = 0xB6,
227 .op_type = VIE_OP_TYPE_MOVZX,
228 },
229 [0xB7] = {
230 .op_byte = 0xB7,
231 .op_type = VIE_OP_TYPE_MOVZX,
232 },
233 [0xBA] = {
234 .op_byte = 0xBA,
235 .op_type = VIE_OP_TYPE_BITTEST,
236 .op_flags = VIE_OP_F_IMM8,
237 },
238 [0xBE] = {
239 .op_byte = 0xBE,
240 .op_type = VIE_OP_TYPE_MOVSX,
241 },
242 };
243
244 static const struct vie_op one_byte_opcodes[256] = {
245 [0x03] = {
246 .op_byte = 0x03,
247 .op_type = VIE_OP_TYPE_ADD,
248 },
249 [0x0F] = {
250 .op_byte = 0x0F,
251 .op_type = VIE_OP_TYPE_TWO_BYTE
252 },
253 [0x0B] = {
254 .op_byte = 0x0B,
255 .op_type = VIE_OP_TYPE_OR,
256 },
257 [0x2B] = {
258 .op_byte = 0x2B,
259 .op_type = VIE_OP_TYPE_SUB,
260 },
261 [0x39] = {
262 .op_byte = 0x39,
263 .op_type = VIE_OP_TYPE_CMP,
264 },
265 [0x3B] = {
266 .op_byte = 0x3B,
267 .op_type = VIE_OP_TYPE_CMP,
268 },
269 [0x88] = {
270 .op_byte = 0x88,
271 .op_type = VIE_OP_TYPE_MOV,
272 },
273 [0x89] = {
274 .op_byte = 0x89,
275 .op_type = VIE_OP_TYPE_MOV,
276 },
277 [0x8A] = {
278 .op_byte = 0x8A,
279 .op_type = VIE_OP_TYPE_MOV,
280 },
281 [0x8B] = {
282 .op_byte = 0x8B,
283 .op_type = VIE_OP_TYPE_MOV,
284 },
285 [0xA1] = {
286 .op_byte = 0xA1,
287 .op_type = VIE_OP_TYPE_MOV,
288 .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
289 },
290 [0xA3] = {
291 .op_byte = 0xA3,
292 .op_type = VIE_OP_TYPE_MOV,
293 .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
294 },
295 [0xA4] = {
296 .op_byte = 0xA4,
297 .op_type = VIE_OP_TYPE_MOVS,
298 .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
299 },
300 [0xA5] = {
301 .op_byte = 0xA5,
302 .op_type = VIE_OP_TYPE_MOVS,
303 .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
304 },
305 [0xAA] = {
306 .op_byte = 0xAA,
307 .op_type = VIE_OP_TYPE_STOS,
308 .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
309 },
310 [0xAB] = {
311 .op_byte = 0xAB,
312 .op_type = VIE_OP_TYPE_STOS,
313 .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
314 },
315 [0xC6] = {
316 /* XXX Group 11 extended opcode - not just MOV */
317 .op_byte = 0xC6,
318 .op_type = VIE_OP_TYPE_MOV,
319 .op_flags = VIE_OP_F_IMM8,
320 },
321 [0xC7] = {
322 .op_byte = 0xC7,
323 .op_type = VIE_OP_TYPE_MOV,
324 .op_flags = VIE_OP_F_IMM,
325 },
326 [0x23] = {
327 .op_byte = 0x23,
328 .op_type = VIE_OP_TYPE_AND,
329 },
330 [0x80] = {
331 /* Group 1 extended opcode */
332 .op_byte = 0x80,
333 .op_type = VIE_OP_TYPE_GROUP1,
334 .op_flags = VIE_OP_F_IMM8,
335 },
336 [0x81] = {
337 /* Group 1 extended opcode */
338 .op_byte = 0x81,
339 .op_type = VIE_OP_TYPE_GROUP1,
340 .op_flags = VIE_OP_F_IMM,
341 },
342 [0x83] = {
343 /* Group 1 extended opcode */
344 .op_byte = 0x83,
345 .op_type = VIE_OP_TYPE_GROUP1,
346 .op_flags = VIE_OP_F_IMM8,
347 },
348 [0x8F] = {
349 /* XXX Group 1A extended opcode - not just POP */
350 .op_byte = 0x8F,
351 .op_type = VIE_OP_TYPE_POP,
352 },
353 [0xF6] = {
354 /* XXX Group 3 extended opcode - not just TEST */
355 .op_byte = 0xF6,
356 .op_type = VIE_OP_TYPE_TEST,
357 .op_flags = VIE_OP_F_IMM8,
358 },
359 [0xF7] = {
360 /* XXX Group 3 extended opcode - not just TEST */
361 .op_byte = 0xF7,
362 .op_type = VIE_OP_TYPE_TEST,
363 .op_flags = VIE_OP_F_IMM,
364 },
365 [0xFF] = {
366 /* XXX Group 5 extended opcode - not just PUSH */
367 .op_byte = 0xFF,
368 .op_type = VIE_OP_TYPE_PUSH,
369 }
370 };
371
372 /* struct vie.mod */
373 #define VIE_MOD_INDIRECT 0
374 #define VIE_MOD_INDIRECT_DISP8 1
375 #define VIE_MOD_INDIRECT_DISP32 2
376 #define VIE_MOD_DIRECT 3
377
378 /* struct vie.rm */
379 #define VIE_RM_SIB 4
380 #define VIE_RM_DISP32 5
381
382 #define GB (1024 * 1024 * 1024)
383
384
385 /*
386 * Paging defines, previously pulled in from machine/pmap.h
387 */
388 #define PG_V (1 << 0) /* Present */
389 #define PG_RW (1 << 1) /* Read/Write */
390 #define PG_U (1 << 2) /* User/Supervisor */
391 #define PG_A (1 << 5) /* Accessed */
392 #define PG_M (1 << 6) /* Dirty */
393 #define PG_PS (1 << 7) /* Largepage */
394
395 /*
396 * Paging except defines, previously pulled in from machine/pmap.h
397 */
398 #define PGEX_P (1 << 0) /* Non-present/Protection */
399 #define PGEX_W (1 << 1) /* Read/Write */
400 #define PGEX_U (1 << 2) /* User/Supervisor */
401 #define PGEX_RSV (1 << 3) /* (Non-)Reserved */
402 #define PGEX_I (1 << 4) /* Instruction */
403
404
405 static enum vm_reg_name gpr_map[16] = {
406 VM_REG_GUEST_RAX,
407 VM_REG_GUEST_RCX,
408 VM_REG_GUEST_RDX,
409 VM_REG_GUEST_RBX,
410 VM_REG_GUEST_RSP,
411 VM_REG_GUEST_RBP,
412 VM_REG_GUEST_RSI,
413 VM_REG_GUEST_RDI,
414 VM_REG_GUEST_R8,
415 VM_REG_GUEST_R9,
416 VM_REG_GUEST_R10,
417 VM_REG_GUEST_R11,
418 VM_REG_GUEST_R12,
419 VM_REG_GUEST_R13,
420 VM_REG_GUEST_R14,
421 VM_REG_GUEST_R15
422 };
423
424 static const char *gpr_name_map[][16] = {
425 [1] = {
426 "a[hl]", "c[hl]", "d[hl]", "b[hl]", "spl", "bpl", "sil", "dil",
427 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b",
428 },
429 [2] = {
430 "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
431 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w",
432 },
433 [4] = {
434 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
435 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
436 },
437 [8] = {
438 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
439 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
440 },
441 };
442
443 static enum vm_reg_name cr_map[16] = {
444 VM_REG_GUEST_CR0,
445 VM_REG_LAST,
446 VM_REG_GUEST_CR2,
447 VM_REG_GUEST_CR3,
448 VM_REG_GUEST_CR4,
449 VM_REG_LAST,
450 VM_REG_LAST,
451 VM_REG_LAST,
452 VM_REG_LAST,
453 VM_REG_LAST,
454 VM_REG_LAST,
455 VM_REG_LAST,
456 VM_REG_LAST,
457 VM_REG_LAST,
458 VM_REG_LAST,
459 VM_REG_LAST
460 };
461
462 static uint64_t size2mask[] = {
463 [1] = 0xff,
464 [2] = 0xffff,
465 [4] = 0xffffffff,
466 [8] = 0xffffffffffffffff,
467 };
468
469
470 static int vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid,
471 uint64_t gpa, uint64_t *rval, int bytes);
472 static int vie_mmio_write(struct vie *vie, struct vm *vm, int cpuid,
473 uint64_t gpa, uint64_t wval, int bytes);
474 static int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
475 struct seg_desc *desc, uint64_t offset, int length, int addrsize,
476 int prot, uint64_t *gla);
477 static int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
478 static int vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf,
479 uint64_t gla);
480 static uint64_t vie_size2mask(int size);
481
482 struct vie *
vie_alloc()483 vie_alloc()
484 {
485 return (kmem_zalloc(sizeof (struct vie), KM_SLEEP));
486 }
487
488 void
vie_free(struct vie * vie)489 vie_free(struct vie *vie)
490 {
491 kmem_free(vie, sizeof (struct vie));
492 }
493
494 enum vm_reg_name
vie_regnum_map(uint8_t regnum)495 vie_regnum_map(uint8_t regnum)
496 {
497 VERIFY3U(regnum, <, 16);
498 return (gpr_map[regnum]);
499 }
500
501 const char *
vie_regnum_name(uint8_t regnum,uint8_t size)502 vie_regnum_name(uint8_t regnum, uint8_t size)
503 {
504 VERIFY3U(regnum, <, 16);
505 VERIFY(size == 1 || size == 2 || size == 4 || size == 8);
506 return (gpr_name_map[size][regnum]);
507 }
508
509 static void
vie_calc_bytereg(struct vie * vie,enum vm_reg_name * reg,int * lhbr)510 vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr)
511 {
512 *lhbr = 0;
513 *reg = gpr_map[vie->reg];
514
515 /*
516 * 64-bit mode imposes limitations on accessing legacy high byte
517 * registers (lhbr).
518 *
519 * The legacy high-byte registers cannot be addressed if the REX
520 * prefix is present. In this case the values 4, 5, 6 and 7 of the
521 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively.
522 *
523 * If the REX prefix is not present then the values 4, 5, 6 and 7
524 * of the 'ModRM:reg' field address the legacy high-byte registers,
525 * %ah, %ch, %dh and %bh respectively.
526 */
527 if (!vie->rex_present) {
528 if (vie->reg & 0x4) {
529 *lhbr = 1;
530 *reg = gpr_map[vie->reg & 0x3];
531 }
532 }
533 }
534
535 static int
vie_read_bytereg(struct vie * vie,struct vm * vm,int vcpuid,uint8_t * rval)536 vie_read_bytereg(struct vie *vie, struct vm *vm, int vcpuid, uint8_t *rval)
537 {
538 uint64_t val;
539 int error, lhbr;
540 enum vm_reg_name reg;
541
542 vie_calc_bytereg(vie, ®, &lhbr);
543 error = vm_get_register(vm, vcpuid, reg, &val);
544
545 /*
546 * To obtain the value of a legacy high byte register shift the
547 * base register right by 8 bits (%ah = %rax >> 8).
548 */
549 if (lhbr)
550 *rval = val >> 8;
551 else
552 *rval = val;
553 return (error);
554 }
555
556 static int
vie_write_bytereg(struct vie * vie,struct vm * vm,int vcpuid,uint8_t byte)557 vie_write_bytereg(struct vie *vie, struct vm *vm, int vcpuid, uint8_t byte)
558 {
559 uint64_t origval, val, mask;
560 int error, lhbr;
561 enum vm_reg_name reg;
562
563 vie_calc_bytereg(vie, ®, &lhbr);
564 error = vm_get_register(vm, vcpuid, reg, &origval);
565 if (error == 0) {
566 val = byte;
567 mask = 0xff;
568 if (lhbr) {
569 /*
570 * Shift left by 8 to store 'byte' in a legacy high
571 * byte register.
572 */
573 val <<= 8;
574 mask <<= 8;
575 }
576 val |= origval & ~mask;
577 error = vm_set_register(vm, vcpuid, reg, val);
578 }
579 return (error);
580 }
581
582 static int
vie_update_register(struct vm * vm,int vcpuid,enum vm_reg_name reg,uint64_t val,int size)583 vie_update_register(struct vm *vm, int vcpuid, enum vm_reg_name reg,
584 uint64_t val, int size)
585 {
586 int error;
587 uint64_t origval;
588
589 switch (size) {
590 case 1:
591 case 2:
592 error = vm_get_register(vm, vcpuid, reg, &origval);
593 if (error)
594 return (error);
595 val &= size2mask[size];
596 val |= origval & ~size2mask[size];
597 break;
598 case 4:
599 val &= 0xffffffffUL;
600 break;
601 case 8:
602 break;
603 default:
604 return (EINVAL);
605 }
606
607 error = vm_set_register(vm, vcpuid, reg, val);
608 return (error);
609 }
610
611 static int
vie_repeat(struct vie * vie)612 vie_repeat(struct vie *vie)
613 {
614 vie->status |= VIES_REPEAT;
615
616 /*
617 * Clear out any cached operation values so the repeated instruction can
618 * begin without using that stale state. Other state, such as the
619 * decoding results, are kept around as it will not vary between
620 * iterations of a rep-prefixed instruction.
621 */
622 if ((vie->status & VIES_MMIO) != 0) {
623 vie->mmio_req_read.state = VR_NONE;
624 vie->mmio_req_write.state = VR_NONE;
625 } else if ((vie->status & VIES_INOUT) != 0) {
626 vie->inout_req_state = VR_NONE;
627 } else {
628 panic("unexpected emulation state");
629 }
630
631 return (EAGAIN);
632 }
633
634 #define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
635
636 /*
637 * Return the status flags that would result from doing (x - y).
638 */
639 /* BEGIN CSTYLED */
640 #define GETCC(sz) \
641 static ulong_t \
642 getcc##sz(uint##sz##_t x, uint##sz##_t y) \
643 { \
644 ulong_t rflags; \
645 \
646 __asm __volatile("sub %2,%1; pushfq; popq %0" : \
647 "=r" (rflags), "+r" (x) : "m" (y)); \
648 return (rflags); \
649 } struct __hack
650 /* END CSTYLED */
651
652 GETCC(8);
653 GETCC(16);
654 GETCC(32);
655 GETCC(64);
656
657 static ulong_t
getcc(int opsize,uint64_t x,uint64_t y)658 getcc(int opsize, uint64_t x, uint64_t y)
659 {
660 KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
661 ("getcc: invalid operand size %d", opsize));
662
663 if (opsize == 1)
664 return (getcc8(x, y));
665 else if (opsize == 2)
666 return (getcc16(x, y));
667 else if (opsize == 4)
668 return (getcc32(x, y));
669 else
670 return (getcc64(x, y));
671 }
672
673 /*
674 * Macro creation of functions getaddflags{8,16,32,64}
675 */
676 /* BEGIN CSTYLED */
677 #define GETADDFLAGS(sz) \
678 static ulong_t \
679 getaddflags##sz(uint##sz##_t x, uint##sz##_t y) \
680 { \
681 ulong_t rflags; \
682 \
683 __asm __volatile("add %2,%1; pushfq; popq %0" : \
684 "=r" (rflags), "+r" (x) : "m" (y)); \
685 return (rflags); \
686 } struct __hack
687 /* END CSTYLED */
688
689 GETADDFLAGS(8);
690 GETADDFLAGS(16);
691 GETADDFLAGS(32);
692 GETADDFLAGS(64);
693
694 static ulong_t
getaddflags(int opsize,uint64_t x,uint64_t y)695 getaddflags(int opsize, uint64_t x, uint64_t y)
696 {
697 KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
698 ("getaddflags: invalid operand size %d", opsize));
699
700 if (opsize == 1)
701 return (getaddflags8(x, y));
702 else if (opsize == 2)
703 return (getaddflags16(x, y));
704 else if (opsize == 4)
705 return (getaddflags32(x, y));
706 else
707 return (getaddflags64(x, y));
708 }
709
710 /*
711 * Macro creation of functions getimulflags{16,32,64}
712 */
713 /* BEGIN CSTYLED */
714 #define GETIMULFLAGS(sz) \
715 static ulong_t \
716 getimulflags##sz(uint##sz##_t x, uint##sz##_t y) \
717 { \
718 ulong_t rflags; \
719 \
720 __asm __volatile("imul %2,%1; pushfq; popq %0" : \
721 "=r" (rflags), "+r" (x) : "m" (y)); \
722 return (rflags); \
723 } struct __hack
724 /* END CSTYLED */
725
726 GETIMULFLAGS(16);
727 GETIMULFLAGS(32);
728 GETIMULFLAGS(64);
729
730 static ulong_t
getimulflags(int opsize,uint64_t x,uint64_t y)731 getimulflags(int opsize, uint64_t x, uint64_t y)
732 {
733 KASSERT(opsize == 2 || opsize == 4 || opsize == 8,
734 ("getimulflags: invalid operand size %d", opsize));
735
736 if (opsize == 2)
737 return (getimulflags16(x, y));
738 else if (opsize == 4)
739 return (getimulflags32(x, y));
740 else
741 return (getimulflags64(x, y));
742 }
743
744 /*
745 * Return the status flags that would result from doing (x & y).
746 */
747 /* BEGIN CSTYLED */
748 #define GETANDFLAGS(sz) \
749 static ulong_t \
750 getandflags##sz(uint##sz##_t x, uint##sz##_t y) \
751 { \
752 ulong_t rflags; \
753 \
754 __asm __volatile("and %2,%1; pushfq; popq %0" : \
755 "=r" (rflags), "+r" (x) : "m" (y)); \
756 return (rflags); \
757 } struct __hack
758 /* END CSTYLED */
759
760 GETANDFLAGS(8);
761 GETANDFLAGS(16);
762 GETANDFLAGS(32);
763 GETANDFLAGS(64);
764
765 static ulong_t
getandflags(int opsize,uint64_t x,uint64_t y)766 getandflags(int opsize, uint64_t x, uint64_t y)
767 {
768 KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
769 ("getandflags: invalid operand size %d", opsize));
770
771 if (opsize == 1)
772 return (getandflags8(x, y));
773 else if (opsize == 2)
774 return (getandflags16(x, y));
775 else if (opsize == 4)
776 return (getandflags32(x, y));
777 else
778 return (getandflags64(x, y));
779 }
780
781 static int
vie_emulate_mov_cr(struct vie * vie,struct vm * vm,int vcpuid)782 vie_emulate_mov_cr(struct vie *vie, struct vm *vm, int vcpuid)
783 {
784 uint64_t val;
785 int err;
786 enum vm_reg_name gpr = gpr_map[vie->rm];
787 enum vm_reg_name cr = cr_map[vie->reg];
788
789 uint_t size = 4;
790 if (vie->paging.cpu_mode == CPU_MODE_64BIT) {
791 size = 8;
792 }
793
794 switch (vie->op.op_byte) {
795 case 0x20:
796 /*
797 * MOV control register (ModRM:reg) to reg (ModRM:r/m)
798 * 20/r: mov r32, CR0-CR7
799 * 20/r: mov r64, CR0-CR7
800 * REX.R + 20/0: mov r64, CR8
801 */
802 if (vie->paging.cpl != 0) {
803 vm_inject_gp(vm, vcpuid);
804 vie->num_processed = 0;
805 return (0);
806 }
807 err = vm_get_register(vm, vcpuid, cr, &val);
808 if (err != 0) {
809 /* #UD for access to non-existent CRs */
810 vm_inject_ud(vm, vcpuid);
811 vie->num_processed = 0;
812 return (0);
813 }
814 err = vie_update_register(vm, vcpuid, gpr, val, size);
815 break;
816 case 0x22: {
817 /*
818 * MOV reg (ModRM:r/m) to control register (ModRM:reg)
819 * 22/r: mov CR0-CR7, r32
820 * 22/r: mov CR0-CR7, r64
821 * REX.R + 22/0: mov CR8, r64
822 */
823 uint64_t old, diff;
824
825 if (vie->paging.cpl != 0) {
826 vm_inject_gp(vm, vcpuid);
827 vie->num_processed = 0;
828 return (0);
829 }
830 err = vm_get_register(vm, vcpuid, cr, &old);
831 if (err != 0) {
832 /* #UD for access to non-existent CRs */
833 vm_inject_ud(vm, vcpuid);
834 vie->num_processed = 0;
835 return (0);
836 }
837 err = vm_get_register(vm, vcpuid, gpr, &val);
838 VERIFY0(err);
839 val &= size2mask[size];
840 diff = old ^ val;
841
842 switch (cr) {
843 case VM_REG_GUEST_CR0:
844 if ((diff & CR0_PG) != 0) {
845 uint64_t efer;
846
847 err = vm_get_register(vm, vcpuid,
848 VM_REG_GUEST_EFER, &efer);
849 VERIFY0(err);
850
851 /* Keep the long-mode state in EFER in sync */
852 if ((val & CR0_PG) != 0 &&
853 (efer & EFER_LME) != 0) {
854 efer |= EFER_LMA;
855 }
856 if ((val & CR0_PG) == 0 &&
857 (efer & EFER_LME) != 0) {
858 efer &= ~EFER_LMA;
859 }
860
861 err = vm_set_register(vm, vcpuid,
862 VM_REG_GUEST_EFER, efer);
863 VERIFY0(err);
864 }
865 /* TODO: enforce more of the #GP checks */
866 err = vm_set_register(vm, vcpuid, cr, val);
867 VERIFY0(err);
868 break;
869 case VM_REG_GUEST_CR2:
870 case VM_REG_GUEST_CR3:
871 case VM_REG_GUEST_CR4:
872 /* TODO: enforce more of the #GP checks */
873 err = vm_set_register(vm, vcpuid, cr, val);
874 break;
875 default:
876 /* The cr_map mapping should prevent this */
877 panic("invalid cr %d", cr);
878 }
879 break;
880 }
881 default:
882 return (EINVAL);
883 }
884 return (err);
885 }
886
887 static int
vie_emulate_mov(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)888 vie_emulate_mov(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
889 {
890 int error, size;
891 enum vm_reg_name reg;
892 uint8_t byte;
893 uint64_t val;
894
895 size = vie->opsize;
896 error = EINVAL;
897
898 switch (vie->op.op_byte) {
899 case 0x88:
900 /*
901 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m)
902 * 88/r: mov r/m8, r8
903 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available)
904 */
905 size = 1; /* override for byte operation */
906 error = vie_read_bytereg(vie, vm, vcpuid, &byte);
907 if (error == 0) {
908 error = vie_mmio_write(vie, vm, vcpuid, gpa, byte,
909 size);
910 }
911 break;
912 case 0x89:
913 /*
914 * MOV from reg (ModRM:reg) to mem (ModRM:r/m)
915 * 89/r: mov r/m16, r16
916 * 89/r: mov r/m32, r32
917 * REX.W + 89/r mov r/m64, r64
918 */
919 reg = gpr_map[vie->reg];
920 error = vm_get_register(vm, vcpuid, reg, &val);
921 if (error == 0) {
922 val &= size2mask[size];
923 error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
924 }
925 break;
926 case 0x8A:
927 /*
928 * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg)
929 * 8A/r: mov r8, r/m8
930 * REX + 8A/r: mov r8, r/m8
931 */
932 size = 1; /* override for byte operation */
933 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
934 if (error == 0)
935 error = vie_write_bytereg(vie, vm, vcpuid, val);
936 break;
937 case 0x8B:
938 /*
939 * MOV from mem (ModRM:r/m) to reg (ModRM:reg)
940 * 8B/r: mov r16, r/m16
941 * 8B/r: mov r32, r/m32
942 * REX.W 8B/r: mov r64, r/m64
943 */
944 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
945 if (error == 0) {
946 reg = gpr_map[vie->reg];
947 error = vie_update_register(vm, vcpuid, reg, val, size);
948 }
949 break;
950 case 0xA1:
951 /*
952 * MOV from seg:moffset to AX/EAX/RAX
953 * A1: mov AX, moffs16
954 * A1: mov EAX, moffs32
955 * REX.W + A1: mov RAX, moffs64
956 */
957 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
958 if (error == 0) {
959 reg = VM_REG_GUEST_RAX;
960 error = vie_update_register(vm, vcpuid, reg, val, size);
961 }
962 break;
963 case 0xA3:
964 /*
965 * MOV from AX/EAX/RAX to seg:moffset
966 * A3: mov moffs16, AX
967 * A3: mov moffs32, EAX
968 * REX.W + A3: mov moffs64, RAX
969 */
970 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
971 if (error == 0) {
972 val &= size2mask[size];
973 error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
974 }
975 break;
976 case 0xC6:
977 /*
978 * MOV from imm8 to mem (ModRM:r/m)
979 * C6/0 mov r/m8, imm8
980 * REX + C6/0 mov r/m8, imm8
981 */
982 size = 1; /* override for byte operation */
983 val = vie->immediate;
984 error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
985 break;
986 case 0xC7:
987 /*
988 * MOV from imm16/imm32 to mem (ModRM:r/m)
989 * C7/0 mov r/m16, imm16
990 * C7/0 mov r/m32, imm32
991 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits)
992 */
993 val = vie->immediate & size2mask[size];
994 error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
995 break;
996 default:
997 break;
998 }
999
1000 return (error);
1001 }
1002
1003 static int
vie_emulate_movx(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1004 vie_emulate_movx(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1005 {
1006 int error, size;
1007 enum vm_reg_name reg;
1008 uint64_t val;
1009
1010 size = vie->opsize;
1011 error = EINVAL;
1012
1013 switch (vie->op.op_byte) {
1014 case 0xB6:
1015 /*
1016 * MOV and zero extend byte from mem (ModRM:r/m) to
1017 * reg (ModRM:reg).
1018 *
1019 * 0F B6/r movzx r16, r/m8
1020 * 0F B6/r movzx r32, r/m8
1021 * REX.W + 0F B6/r movzx r64, r/m8
1022 */
1023
1024 /* get the first operand */
1025 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 1);
1026 if (error)
1027 break;
1028
1029 /* get the second operand */
1030 reg = gpr_map[vie->reg];
1031
1032 /* zero-extend byte */
1033 val = (uint8_t)val;
1034
1035 /* write the result */
1036 error = vie_update_register(vm, vcpuid, reg, val, size);
1037 break;
1038 case 0xB7:
1039 /*
1040 * MOV and zero extend word from mem (ModRM:r/m) to
1041 * reg (ModRM:reg).
1042 *
1043 * 0F B7/r movzx r32, r/m16
1044 * REX.W + 0F B7/r movzx r64, r/m16
1045 */
1046 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 2);
1047 if (error)
1048 return (error);
1049
1050 reg = gpr_map[vie->reg];
1051
1052 /* zero-extend word */
1053 val = (uint16_t)val;
1054
1055 error = vie_update_register(vm, vcpuid, reg, val, size);
1056 break;
1057 case 0xBE:
1058 /*
1059 * MOV and sign extend byte from mem (ModRM:r/m) to
1060 * reg (ModRM:reg).
1061 *
1062 * 0F BE/r movsx r16, r/m8
1063 * 0F BE/r movsx r32, r/m8
1064 * REX.W + 0F BE/r movsx r64, r/m8
1065 */
1066
1067 /* get the first operand */
1068 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 1);
1069 if (error)
1070 break;
1071
1072 /* get the second operand */
1073 reg = gpr_map[vie->reg];
1074
1075 /* sign extend byte */
1076 val = (int8_t)val;
1077
1078 /* write the result */
1079 error = vie_update_register(vm, vcpuid, reg, val, size);
1080 break;
1081 default:
1082 break;
1083 }
1084 return (error);
1085 }
1086
1087 /*
1088 * Helper function to calculate and validate a linear address.
1089 */
1090 static int
vie_get_gla(struct vie * vie,struct vm * vm,int vcpuid,int opsize,int addrsize,int prot,enum vm_reg_name seg,enum vm_reg_name gpr,uint64_t * gla)1091 vie_get_gla(struct vie *vie, struct vm *vm, int vcpuid, int opsize,
1092 int addrsize, int prot, enum vm_reg_name seg, enum vm_reg_name gpr,
1093 uint64_t *gla)
1094 {
1095 struct seg_desc desc;
1096 uint64_t cr0, val, rflags;
1097 int error;
1098 struct vm_guest_paging *paging;
1099
1100 paging = &vie->paging;
1101
1102 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
1103 KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
1104
1105 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1106 KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
1107
1108 error = vm_get_seg_desc(vm, vcpuid, seg, &desc);
1109 KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
1110 __func__, error, seg));
1111
1112 error = vm_get_register(vm, vcpuid, gpr, &val);
1113 KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
1114 error, gpr));
1115
1116 if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize,
1117 addrsize, prot, gla)) {
1118 if (seg == VM_REG_GUEST_SS)
1119 vm_inject_ss(vm, vcpuid, 0);
1120 else
1121 vm_inject_gp(vm, vcpuid);
1122 return (-1);
1123 }
1124
1125 if (vie_canonical_check(paging->cpu_mode, *gla)) {
1126 if (seg == VM_REG_GUEST_SS)
1127 vm_inject_ss(vm, vcpuid, 0);
1128 else
1129 vm_inject_gp(vm, vcpuid);
1130 return (-1);
1131 }
1132
1133 if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
1134 vm_inject_ac(vm, vcpuid, 0);
1135 return (-1);
1136 }
1137
1138 return (0);
1139 }
1140
1141 static int
vie_emulate_movs(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1142 vie_emulate_movs(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1143 {
1144 struct vm_copyinfo copyinfo[2];
1145 uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val;
1146 uint64_t rcx, rdi, rsi, rflags;
1147 int error, fault, opsize, seg, repeat;
1148 struct vm_guest_paging *paging;
1149
1150 opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
1151 val = 0;
1152 error = 0;
1153 paging = &vie->paging;
1154
1155 /*
1156 * XXX although the MOVS instruction is only supposed to be used with
1157 * the "rep" prefix some guests like FreeBSD will use "repnz" instead.
1158 *
1159 * Empirically the "repnz" prefix has identical behavior to "rep"
1160 * and the zero flag does not make a difference.
1161 */
1162 repeat = vie->repz_present | vie->repnz_present;
1163
1164 if (repeat) {
1165 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
1166 KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
1167
1168 /*
1169 * The count register is %rcx, %ecx or %cx depending on the
1170 * address size of the instruction.
1171 */
1172 if ((rcx & vie_size2mask(vie->addrsize)) == 0) {
1173 error = 0;
1174 goto done;
1175 }
1176 }
1177
1178 /*
1179 * Source Destination Comments
1180 * --------------------------------------------
1181 * (1) memory memory n/a
1182 * (2) memory mmio emulated
1183 * (3) mmio memory emulated
1184 * (4) mmio mmio emulated
1185 *
1186 * At this point we don't have sufficient information to distinguish
1187 * between (2), (3) and (4). We use 'vm_copy_setup()' to tease this
1188 * out because it will succeed only when operating on regular memory.
1189 *
1190 * XXX the emulation doesn't properly handle the case where 'gpa'
1191 * is straddling the boundary between the normal memory and MMIO.
1192 */
1193
1194 seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
1195 if (vie_get_gla(vie, vm, vcpuid, opsize, vie->addrsize, PROT_READ, seg,
1196 VM_REG_GUEST_RSI, &srcaddr) != 0) {
1197 goto done;
1198 }
1199
1200 error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
1201 copyinfo, nitems(copyinfo), &fault);
1202 if (error == 0) {
1203 if (fault)
1204 goto done; /* Resume guest to handle fault */
1205
1206 /*
1207 * case (2): read from system memory and write to mmio.
1208 */
1209 vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
1210 vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
1211 error = vie_mmio_write(vie, vm, vcpuid, gpa, val, opsize);
1212 if (error)
1213 goto done;
1214 } else {
1215 /*
1216 * 'vm_copy_setup()' is expected to fail for cases (3) and (4)
1217 * if 'srcaddr' is in the mmio space.
1218 */
1219
1220 if (vie_get_gla(vie, vm, vcpuid, opsize, vie->addrsize,
1221 PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI,
1222 &dstaddr) != 0) {
1223 goto done;
1224 }
1225
1226 error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
1227 PROT_WRITE, copyinfo, nitems(copyinfo), &fault);
1228 if (error == 0) {
1229 if (fault)
1230 goto done; /* Resume guest to handle fault */
1231
1232 /*
1233 * case (3): read from MMIO and write to system memory.
1234 *
1235 * A MMIO read can have side-effects so we
1236 * commit to it only after vm_copy_setup() is
1237 * successful. If a page-fault needs to be
1238 * injected into the guest then it will happen
1239 * before the MMIO read is attempted.
1240 */
1241 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val,
1242 opsize);
1243
1244 if (error == 0) {
1245 vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
1246 }
1247 /*
1248 * Regardless of whether the MMIO read was successful or
1249 * not, the copy resources must be cleaned up.
1250 */
1251 vm_copy_teardown(vm, vcpuid, copyinfo,
1252 nitems(copyinfo));
1253 if (error != 0) {
1254 goto done;
1255 }
1256 } else {
1257 /*
1258 * Case (4): read from and write to mmio.
1259 *
1260 * Commit to the MMIO read/write (with potential
1261 * side-effects) only after we are sure that the
1262 * instruction is not going to be restarted due
1263 * to address translation faults.
1264 */
1265 error = vm_gla2gpa(vm, vcpuid, paging, srcaddr,
1266 PROT_READ, &srcgpa, &fault);
1267 if (error || fault)
1268 goto done;
1269
1270 error = vm_gla2gpa(vm, vcpuid, paging, dstaddr,
1271 PROT_WRITE, &dstgpa, &fault);
1272 if (error || fault)
1273 goto done;
1274
1275 error = vie_mmio_read(vie, vm, vcpuid, srcgpa, &val,
1276 opsize);
1277 if (error)
1278 goto done;
1279
1280 error = vie_mmio_write(vie, vm, vcpuid, dstgpa, val,
1281 opsize);
1282 if (error)
1283 goto done;
1284 }
1285 }
1286
1287 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
1288 KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
1289
1290 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
1291 KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
1292
1293 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1294 KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
1295
1296 if (rflags & PSL_D) {
1297 rsi -= opsize;
1298 rdi -= opsize;
1299 } else {
1300 rsi += opsize;
1301 rdi += opsize;
1302 }
1303
1304 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi,
1305 vie->addrsize);
1306 KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error));
1307
1308 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
1309 vie->addrsize);
1310 KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
1311
1312 if (repeat) {
1313 rcx = rcx - 1;
1314 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
1315 rcx, vie->addrsize);
1316 KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
1317
1318 /*
1319 * Repeat the instruction if the count register is not zero.
1320 */
1321 if ((rcx & vie_size2mask(vie->addrsize)) != 0)
1322 return (vie_repeat(vie));
1323 }
1324 done:
1325 return (error);
1326 }
1327
1328 static int
vie_emulate_stos(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1329 vie_emulate_stos(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1330 {
1331 int error, opsize, repeat;
1332 uint64_t val;
1333 uint64_t rcx, rdi, rflags;
1334
1335 opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize;
1336 repeat = vie->repz_present | vie->repnz_present;
1337
1338 if (repeat) {
1339 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
1340 KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
1341
1342 /*
1343 * The count register is %rcx, %ecx or %cx depending on the
1344 * address size of the instruction.
1345 */
1346 if ((rcx & vie_size2mask(vie->addrsize)) == 0)
1347 return (0);
1348 }
1349
1350 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
1351 KASSERT(!error, ("%s: error %d getting rax", __func__, error));
1352
1353 error = vie_mmio_write(vie, vm, vcpuid, gpa, val, opsize);
1354 if (error)
1355 return (error);
1356
1357 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
1358 KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
1359
1360 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1361 KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
1362
1363 if (rflags & PSL_D)
1364 rdi -= opsize;
1365 else
1366 rdi += opsize;
1367
1368 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
1369 vie->addrsize);
1370 KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
1371
1372 if (repeat) {
1373 rcx = rcx - 1;
1374 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
1375 rcx, vie->addrsize);
1376 KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
1377
1378 /*
1379 * Repeat the instruction if the count register is not zero.
1380 */
1381 if ((rcx & vie_size2mask(vie->addrsize)) != 0)
1382 return (vie_repeat(vie));
1383 }
1384
1385 return (0);
1386 }
1387
1388 static int
vie_emulate_and(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1389 vie_emulate_and(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1390 {
1391 int error, size;
1392 enum vm_reg_name reg;
1393 uint64_t result, rflags, rflags2, val1, val2;
1394
1395 size = vie->opsize;
1396 error = EINVAL;
1397
1398 switch (vie->op.op_byte) {
1399 case 0x23:
1400 /*
1401 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
1402 * result in reg.
1403 *
1404 * 23/r and r16, r/m16
1405 * 23/r and r32, r/m32
1406 * REX.W + 23/r and r64, r/m64
1407 */
1408
1409 /* get the first operand */
1410 reg = gpr_map[vie->reg];
1411 error = vm_get_register(vm, vcpuid, reg, &val1);
1412 if (error)
1413 break;
1414
1415 /* get the second operand */
1416 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
1417 if (error)
1418 break;
1419
1420 /* perform the operation and write the result */
1421 result = val1 & val2;
1422 error = vie_update_register(vm, vcpuid, reg, result, size);
1423 break;
1424 case 0x81:
1425 case 0x83:
1426 /*
1427 * AND mem (ModRM:r/m) with immediate and store the
1428 * result in mem.
1429 *
1430 * 81 /4 and r/m16, imm16
1431 * 81 /4 and r/m32, imm32
1432 * REX.W + 81 /4 and r/m64, imm32 sign-extended to 64
1433 *
1434 * 83 /4 and r/m16, imm8 sign-extended to 16
1435 * 83 /4 and r/m32, imm8 sign-extended to 32
1436 * REX.W + 83/4 and r/m64, imm8 sign-extended to 64
1437 */
1438
1439 /* get the first operand */
1440 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val1, size);
1441 if (error)
1442 break;
1443
1444 /*
1445 * perform the operation with the pre-fetched immediate
1446 * operand and write the result
1447 */
1448 result = val1 & vie->immediate;
1449 error = vie_mmio_write(vie, vm, vcpuid, gpa, result, size);
1450 break;
1451 default:
1452 break;
1453 }
1454 if (error)
1455 return (error);
1456
1457 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1458 if (error)
1459 return (error);
1460
1461 /*
1462 * OF and CF are cleared; the SF, ZF and PF flags are set according
1463 * to the result; AF is undefined.
1464 *
1465 * The updated status flags are obtained by subtracting 0 from 'result'.
1466 */
1467 rflags2 = getcc(size, result, 0);
1468 rflags &= ~RFLAGS_STATUS_BITS;
1469 rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
1470
1471 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
1472 return (error);
1473 }
1474
1475 static int
vie_emulate_or(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1476 vie_emulate_or(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1477 {
1478 int error, size;
1479 enum vm_reg_name reg;
1480 uint64_t result, rflags, rflags2, val1, val2;
1481
1482 size = vie->opsize;
1483 error = EINVAL;
1484
1485 switch (vie->op.op_byte) {
1486 case 0x0B:
1487 /*
1488 * OR reg (ModRM:reg) and mem (ModRM:r/m) and store the
1489 * result in reg.
1490 *
1491 * 0b/r or r16, r/m16
1492 * 0b/r or r32, r/m32
1493 * REX.W + 0b/r or r64, r/m64
1494 */
1495
1496 /* get the first operand */
1497 reg = gpr_map[vie->reg];
1498 error = vm_get_register(vm, vcpuid, reg, &val1);
1499 if (error)
1500 break;
1501
1502 /* get the second operand */
1503 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
1504 if (error)
1505 break;
1506
1507 /* perform the operation and write the result */
1508 result = val1 | val2;
1509 error = vie_update_register(vm, vcpuid, reg, result, size);
1510 break;
1511 case 0x81:
1512 case 0x83:
1513 /*
1514 * OR mem (ModRM:r/m) with immediate and store the
1515 * result in mem.
1516 *
1517 * 81 /1 or r/m16, imm16
1518 * 81 /1 or r/m32, imm32
1519 * REX.W + 81 /1 or r/m64, imm32 sign-extended to 64
1520 *
1521 * 83 /1 or r/m16, imm8 sign-extended to 16
1522 * 83 /1 or r/m32, imm8 sign-extended to 32
1523 * REX.W + 83/1 or r/m64, imm8 sign-extended to 64
1524 */
1525
1526 /* get the first operand */
1527 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val1, size);
1528 if (error)
1529 break;
1530
1531 /*
1532 * perform the operation with the pre-fetched immediate
1533 * operand and write the result
1534 */
1535 result = val1 | vie->immediate;
1536 error = vie_mmio_write(vie, vm, vcpuid, gpa, result, size);
1537 break;
1538 default:
1539 break;
1540 }
1541 if (error)
1542 return (error);
1543
1544 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1545 if (error)
1546 return (error);
1547
1548 /*
1549 * OF and CF are cleared; the SF, ZF and PF flags are set according
1550 * to the result; AF is undefined.
1551 *
1552 * The updated status flags are obtained by subtracting 0 from 'result'.
1553 */
1554 rflags2 = getcc(size, result, 0);
1555 rflags &= ~RFLAGS_STATUS_BITS;
1556 rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
1557
1558 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
1559 return (error);
1560 }
1561
1562 static int
vie_emulate_cmp(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1563 vie_emulate_cmp(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1564 {
1565 int error, size;
1566 uint64_t regop, memop, op1, op2, rflags, rflags2;
1567 enum vm_reg_name reg;
1568
1569 size = vie->opsize;
1570 switch (vie->op.op_byte) {
1571 case 0x39:
1572 case 0x3B:
1573 /*
1574 * 39/r CMP r/m16, r16
1575 * 39/r CMP r/m32, r32
1576 * REX.W 39/r CMP r/m64, r64
1577 *
1578 * 3B/r CMP r16, r/m16
1579 * 3B/r CMP r32, r/m32
1580 * REX.W + 3B/r CMP r64, r/m64
1581 *
1582 * Compare the first operand with the second operand and
1583 * set status flags in EFLAGS register. The comparison is
1584 * performed by subtracting the second operand from the first
1585 * operand and then setting the status flags.
1586 */
1587
1588 /* Get the register operand */
1589 reg = gpr_map[vie->reg];
1590 error = vm_get_register(vm, vcpuid, reg, ®op);
1591 if (error)
1592 return (error);
1593
1594 /* Get the memory operand */
1595 error = vie_mmio_read(vie, vm, vcpuid, gpa, &memop, size);
1596 if (error)
1597 return (error);
1598
1599 if (vie->op.op_byte == 0x3B) {
1600 op1 = regop;
1601 op2 = memop;
1602 } else {
1603 op1 = memop;
1604 op2 = regop;
1605 }
1606 rflags2 = getcc(size, op1, op2);
1607 break;
1608 case 0x80:
1609 case 0x81:
1610 case 0x83:
1611 /*
1612 * 80 /7 cmp r/m8, imm8
1613 * REX + 80 /7 cmp r/m8, imm8
1614 *
1615 * 81 /7 cmp r/m16, imm16
1616 * 81 /7 cmp r/m32, imm32
1617 * REX.W + 81 /7 cmp r/m64, imm32 sign-extended to 64
1618 *
1619 * 83 /7 cmp r/m16, imm8 sign-extended to 16
1620 * 83 /7 cmp r/m32, imm8 sign-extended to 32
1621 * REX.W + 83 /7 cmp r/m64, imm8 sign-extended to 64
1622 *
1623 * Compare mem (ModRM:r/m) with immediate and set
1624 * status flags according to the results. The
1625 * comparison is performed by subtracting the
1626 * immediate from the first operand and then setting
1627 * the status flags.
1628 *
1629 */
1630 if (vie->op.op_byte == 0x80)
1631 size = 1;
1632
1633 /* get the first operand */
1634 error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size);
1635 if (error)
1636 return (error);
1637
1638 rflags2 = getcc(size, op1, vie->immediate);
1639 break;
1640 default:
1641 return (EINVAL);
1642 }
1643 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1644 if (error)
1645 return (error);
1646 rflags &= ~RFLAGS_STATUS_BITS;
1647 rflags |= rflags2 & RFLAGS_STATUS_BITS;
1648
1649 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
1650 return (error);
1651 }
1652
1653 static int
vie_emulate_test(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1654 vie_emulate_test(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1655 {
1656 int error, size;
1657 uint64_t op1, rflags, rflags2;
1658
1659 size = vie->opsize;
1660 error = EINVAL;
1661
1662 switch (vie->op.op_byte) {
1663 case 0xF6:
1664 /*
1665 * F6 /0 test r/m8, imm8
1666 *
1667 * Test mem (ModRM:r/m) with immediate and set status
1668 * flags according to the results. The comparison is
1669 * performed by anding the immediate from the first
1670 * operand and then setting the status flags.
1671 */
1672 if ((vie->reg & 7) != 0)
1673 return (EINVAL);
1674
1675 size = 1; /* override for byte operation */
1676
1677 error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size);
1678 if (error)
1679 return (error);
1680
1681 rflags2 = getandflags(size, op1, vie->immediate);
1682 break;
1683 case 0xF7:
1684 /*
1685 * F7 /0 test r/m16, imm16
1686 * F7 /0 test r/m32, imm32
1687 * REX.W + F7 /0 test r/m64, imm32 sign-extended to 64
1688 *
1689 * Test mem (ModRM:r/m) with immediate and set status
1690 * flags according to the results. The comparison is
1691 * performed by anding the immediate from the first
1692 * operand and then setting the status flags.
1693 */
1694 if ((vie->reg & 7) != 0)
1695 return (EINVAL);
1696
1697 error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size);
1698 if (error)
1699 return (error);
1700
1701 rflags2 = getandflags(size, op1, vie->immediate);
1702 break;
1703 default:
1704 return (EINVAL);
1705 }
1706 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1707 if (error)
1708 return (error);
1709
1710 /*
1711 * OF and CF are cleared; the SF, ZF and PF flags are set according
1712 * to the result; AF is undefined.
1713 */
1714 rflags &= ~RFLAGS_STATUS_BITS;
1715 rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
1716
1717 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
1718 return (error);
1719 }
1720
1721 static int
vie_emulate_bextr(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1722 vie_emulate_bextr(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1723 {
1724 uint64_t src1, src2, dst, rflags;
1725 unsigned start, len, size;
1726 int error;
1727 struct vm_guest_paging *paging;
1728
1729 size = vie->opsize;
1730 error = EINVAL;
1731 paging = &vie->paging;
1732
1733 /*
1734 * VEX.LZ.0F38.W0 F7 /r BEXTR r32a, r/m32, r32b
1735 * VEX.LZ.0F38.W1 F7 /r BEXTR r64a, r/m64, r64b
1736 *
1737 * Destination operand is ModRM:reg. Source operands are ModRM:r/m and
1738 * Vex.vvvv.
1739 *
1740 * Operand size is always 32-bit if not in 64-bit mode (W1 is ignored).
1741 */
1742 if (size != 4 && paging->cpu_mode != CPU_MODE_64BIT)
1743 size = 4;
1744
1745 /*
1746 * Extracts contiguous bits from the first /source/ operand (second
1747 * operand) using an index and length specified in the second /source/
1748 * operand (third operand).
1749 */
1750 error = vie_mmio_read(vie, vm, vcpuid, gpa, &src1, size);
1751 if (error)
1752 return (error);
1753 error = vm_get_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2);
1754 if (error)
1755 return (error);
1756 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1757 if (error)
1758 return (error);
1759
1760 start = (src2 & 0xff);
1761 len = (src2 & 0xff00) >> 8;
1762
1763 /* If no bits are extracted, the destination register is cleared. */
1764 dst = 0;
1765
1766 /* If START exceeds the operand size, no bits are extracted. */
1767 if (start > size * 8)
1768 goto done;
1769 /* Length is bounded by both the destination size and start offset. */
1770 if (start + len > size * 8)
1771 len = (size * 8) - start;
1772 if (len == 0)
1773 goto done;
1774
1775 if (start > 0)
1776 src1 = (src1 >> start);
1777 if (len < 64)
1778 src1 = src1 & ((1ull << len) - 1);
1779 dst = src1;
1780
1781 done:
1782 error = vie_update_register(vm, vcpuid, gpr_map[vie->reg], dst, size);
1783 if (error)
1784 return (error);
1785
1786 /*
1787 * AMD: OF, CF cleared; SF/AF/PF undefined; ZF set by result.
1788 * Intel: ZF is set by result; AF/SF/PF undefined; all others cleared.
1789 */
1790 rflags &= ~RFLAGS_STATUS_BITS;
1791 if (dst == 0)
1792 rflags |= PSL_Z;
1793 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags,
1794 8);
1795 return (error);
1796 }
1797
1798 static int
vie_emulate_add(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1799 vie_emulate_add(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1800 {
1801 int error, size;
1802 uint64_t nval, rflags, rflags2, val1, val2;
1803 enum vm_reg_name reg;
1804
1805 size = vie->opsize;
1806 error = EINVAL;
1807
1808 switch (vie->op.op_byte) {
1809 case 0x03:
1810 /*
1811 * ADD r/m to r and store the result in r
1812 *
1813 * 03/r ADD r16, r/m16
1814 * 03/r ADD r32, r/m32
1815 * REX.W + 03/r ADD r64, r/m64
1816 */
1817
1818 /* get the first operand */
1819 reg = gpr_map[vie->reg];
1820 error = vm_get_register(vm, vcpuid, reg, &val1);
1821 if (error)
1822 break;
1823
1824 /* get the second operand */
1825 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
1826 if (error)
1827 break;
1828
1829 /* perform the operation and write the result */
1830 nval = val1 + val2;
1831 error = vie_update_register(vm, vcpuid, reg, nval, size);
1832 break;
1833 default:
1834 break;
1835 }
1836
1837 if (!error) {
1838 rflags2 = getaddflags(size, val1, val2);
1839 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
1840 &rflags);
1841 if (error)
1842 return (error);
1843
1844 rflags &= ~RFLAGS_STATUS_BITS;
1845 rflags |= rflags2 & RFLAGS_STATUS_BITS;
1846 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
1847 rflags, 8);
1848 }
1849
1850 return (error);
1851 }
1852
1853 static int
vie_emulate_sub(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1854 vie_emulate_sub(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1855 {
1856 int error, size;
1857 uint64_t nval, rflags, rflags2, val1, val2;
1858 enum vm_reg_name reg;
1859
1860 size = vie->opsize;
1861 error = EINVAL;
1862
1863 switch (vie->op.op_byte) {
1864 case 0x2B:
1865 /*
1866 * SUB r/m from r and store the result in r
1867 *
1868 * 2B/r SUB r16, r/m16
1869 * 2B/r SUB r32, r/m32
1870 * REX.W + 2B/r SUB r64, r/m64
1871 */
1872
1873 /* get the first operand */
1874 reg = gpr_map[vie->reg];
1875 error = vm_get_register(vm, vcpuid, reg, &val1);
1876 if (error)
1877 break;
1878
1879 /* get the second operand */
1880 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
1881 if (error)
1882 break;
1883
1884 /* perform the operation and write the result */
1885 nval = val1 - val2;
1886 error = vie_update_register(vm, vcpuid, reg, nval, size);
1887 break;
1888 default:
1889 break;
1890 }
1891
1892 if (!error) {
1893 rflags2 = getcc(size, val1, val2);
1894 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
1895 &rflags);
1896 if (error)
1897 return (error);
1898
1899 rflags &= ~RFLAGS_STATUS_BITS;
1900 rflags |= rflags2 & RFLAGS_STATUS_BITS;
1901 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
1902 rflags, 8);
1903 }
1904
1905 return (error);
1906 }
1907
1908 static int
vie_emulate_mul(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1909 vie_emulate_mul(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1910 {
1911 int error, size;
1912 uint64_t rflags, rflags2, val1, val2;
1913 __int128_t nval;
1914 enum vm_reg_name reg;
1915 ulong_t (*getflags)(int, uint64_t, uint64_t) = NULL;
1916
1917 size = vie->opsize;
1918 error = EINVAL;
1919
1920 switch (vie->op.op_byte) {
1921 case 0xAF:
1922 /*
1923 * Multiply the contents of a destination register by
1924 * the contents of a register or memory operand and
1925 * put the signed result in the destination register.
1926 *
1927 * AF/r IMUL r16, r/m16
1928 * AF/r IMUL r32, r/m32
1929 * REX.W + AF/r IMUL r64, r/m64
1930 */
1931
1932 getflags = getimulflags;
1933
1934 /* get the first operand */
1935 reg = gpr_map[vie->reg];
1936 error = vm_get_register(vm, vcpuid, reg, &val1);
1937 if (error != 0)
1938 break;
1939
1940 /* get the second operand */
1941 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
1942 if (error != 0)
1943 break;
1944
1945 /* perform the operation and write the result */
1946 nval = (int64_t)val1 * (int64_t)val2;
1947
1948 error = vie_update_register(vm, vcpuid, reg, nval, size);
1949
1950 DTRACE_PROBE4(vie__imul,
1951 const char *, vie_regnum_name(vie->reg, size),
1952 uint64_t, val1, uint64_t, val2, __uint128_t, nval);
1953
1954 break;
1955 default:
1956 break;
1957 }
1958
1959 if (error == 0) {
1960 rflags2 = getflags(size, val1, val2);
1961 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
1962 &rflags);
1963 if (error)
1964 return (error);
1965
1966 rflags &= ~RFLAGS_STATUS_BITS;
1967 rflags |= rflags2 & RFLAGS_STATUS_BITS;
1968 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
1969 rflags, 8);
1970
1971 DTRACE_PROBE2(vie__imul__rflags,
1972 uint64_t, rflags, uint64_t, rflags2);
1973 }
1974
1975 return (error);
1976 }
1977
1978 static int
vie_emulate_stack_op(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)1979 vie_emulate_stack_op(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
1980 {
1981 struct vm_copyinfo copyinfo[2];
1982 struct seg_desc ss_desc;
1983 uint64_t cr0, rflags, rsp, stack_gla, val;
1984 int error, fault, size, stackaddrsize, pushop;
1985 struct vm_guest_paging *paging;
1986
1987 val = 0;
1988 size = vie->opsize;
1989 pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0;
1990 paging = &vie->paging;
1991
1992 /*
1993 * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
1994 */
1995 if (paging->cpu_mode == CPU_MODE_REAL) {
1996 stackaddrsize = 2;
1997 } else if (paging->cpu_mode == CPU_MODE_64BIT) {
1998 /*
1999 * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3
2000 * - Stack pointer size is always 64-bits.
2001 * - PUSH/POP of 32-bit values is not possible in 64-bit mode.
2002 * - 16-bit PUSH/POP is supported by using the operand size
2003 * override prefix (66H).
2004 */
2005 stackaddrsize = 8;
2006 size = vie->opsize_override ? 2 : 8;
2007 } else {
2008 /*
2009 * In protected or compatibility mode the 'B' flag in the
2010 * stack-segment descriptor determines the size of the
2011 * stack pointer.
2012 */
2013 error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc);
2014 KASSERT(error == 0, ("%s: error %d getting SS descriptor",
2015 __func__, error));
2016 if (SEG_DESC_DEF32(ss_desc.access))
2017 stackaddrsize = 4;
2018 else
2019 stackaddrsize = 2;
2020 }
2021
2022 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
2023 KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
2024
2025 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
2026 KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
2027
2028 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
2029 KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
2030 if (pushop) {
2031 rsp -= size;
2032 }
2033
2034 if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc,
2035 rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ,
2036 &stack_gla)) {
2037 vm_inject_ss(vm, vcpuid, 0);
2038 return (0);
2039 }
2040
2041 if (vie_canonical_check(paging->cpu_mode, stack_gla)) {
2042 vm_inject_ss(vm, vcpuid, 0);
2043 return (0);
2044 }
2045
2046 if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) {
2047 vm_inject_ac(vm, vcpuid, 0);
2048 return (0);
2049 }
2050
2051 error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size,
2052 pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo),
2053 &fault);
2054 if (error || fault)
2055 return (error);
2056
2057 if (pushop) {
2058 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
2059 if (error == 0)
2060 vm_copyout(vm, vcpuid, &val, copyinfo, size);
2061 } else {
2062 vm_copyin(vm, vcpuid, copyinfo, &val, size);
2063 error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
2064 rsp += size;
2065 }
2066 vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
2067
2068 if (error == 0) {
2069 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
2070 stackaddrsize);
2071 KASSERT(error == 0, ("error %d updating rsp", error));
2072 }
2073 return (error);
2074 }
2075
2076 static int
vie_emulate_push(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)2077 vie_emulate_push(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
2078 {
2079 int error;
2080
2081 /*
2082 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
2083 *
2084 * PUSH is part of the group 5 extended opcodes and is identified
2085 * by ModRM:reg = b110.
2086 */
2087 if ((vie->reg & 7) != 6)
2088 return (EINVAL);
2089
2090 error = vie_emulate_stack_op(vie, vm, vcpuid, gpa);
2091 return (error);
2092 }
2093
2094 static int
vie_emulate_pop(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)2095 vie_emulate_pop(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
2096 {
2097 int error;
2098
2099 /*
2100 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
2101 *
2102 * POP is part of the group 1A extended opcodes and is identified
2103 * by ModRM:reg = b000.
2104 */
2105 if ((vie->reg & 7) != 0)
2106 return (EINVAL);
2107
2108 error = vie_emulate_stack_op(vie, vm, vcpuid, gpa);
2109 return (error);
2110 }
2111
2112 static int
vie_emulate_group1(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)2113 vie_emulate_group1(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
2114 {
2115 int error;
2116
2117 switch (vie->reg & 7) {
2118 case 0x1: /* OR */
2119 error = vie_emulate_or(vie, vm, vcpuid, gpa);
2120 break;
2121 case 0x4: /* AND */
2122 error = vie_emulate_and(vie, vm, vcpuid, gpa);
2123 break;
2124 case 0x7: /* CMP */
2125 error = vie_emulate_cmp(vie, vm, vcpuid, gpa);
2126 break;
2127 default:
2128 error = EINVAL;
2129 break;
2130 }
2131
2132 return (error);
2133 }
2134
2135 static int
vie_emulate_bittest(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)2136 vie_emulate_bittest(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa)
2137 {
2138 uint64_t val, rflags;
2139 int error, bitmask, bitoff;
2140
2141 /*
2142 * 0F BA is a Group 8 extended opcode.
2143 *
2144 * Currently we only emulate the 'Bit Test' instruction which is
2145 * identified by a ModR/M:reg encoding of 100b.
2146 */
2147 if ((vie->reg & 7) != 4)
2148 return (EINVAL);
2149
2150 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
2151 KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
2152
2153 error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, vie->opsize);
2154 if (error)
2155 return (error);
2156
2157 /*
2158 * Intel SDM, Vol 2, Table 3-2:
2159 * "Range of Bit Positions Specified by Bit Offset Operands"
2160 */
2161 bitmask = vie->opsize * 8 - 1;
2162 bitoff = vie->immediate & bitmask;
2163
2164 /* Copy the bit into the Carry flag in %rflags */
2165 if (val & (1UL << bitoff))
2166 rflags |= PSL_C;
2167 else
2168 rflags &= ~PSL_C;
2169
2170 error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
2171 KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error));
2172
2173 return (0);
2174 }
2175
2176 static int
vie_emulate_twob_group15(struct vie * vie,struct vm * vm,int vcpuid,uint64_t gpa)2177 vie_emulate_twob_group15(struct vie *vie, struct vm *vm, int vcpuid,
2178 uint64_t gpa)
2179 {
2180 int error;
2181 uint64_t buf;
2182
2183 switch (vie->reg & 7) {
2184 case 0x7: /* CLFLUSH, CLFLUSHOPT, and SFENCE */
2185 if (vie->mod == 0x3) {
2186 /*
2187 * SFENCE. Ignore it, VM exit provides enough
2188 * barriers on its own.
2189 */
2190 error = 0;
2191 } else {
2192 /*
2193 * CLFLUSH, CLFLUSHOPT. Only check for access
2194 * rights.
2195 */
2196 error = vie_mmio_read(vie, vm, vcpuid, gpa, &buf, 1);
2197 }
2198 break;
2199 default:
2200 error = EINVAL;
2201 break;
2202 }
2203
2204 return (error);
2205 }
2206
2207 static int
vie_emulate_clts(struct vie * vie,struct vm * vm,int vcpuid)2208 vie_emulate_clts(struct vie *vie, struct vm *vm, int vcpuid)
2209 {
2210 uint64_t val;
2211 int error __maybe_unused;
2212
2213 if (vie->paging.cpl != 0) {
2214 vm_inject_gp(vm, vcpuid);
2215 vie->num_processed = 0;
2216 return (0);
2217 }
2218
2219 error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &val);
2220 ASSERT(error == 0);
2221
2222 /* Clear %cr0.TS */
2223 val &= ~CR0_TS;
2224
2225 error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR0, val);
2226 ASSERT(error == 0);
2227
2228 return (0);
2229 }
2230
2231 static int
vie_mmio_read(struct vie * vie,struct vm * vm,int cpuid,uint64_t gpa,uint64_t * rval,int bytes)2232 vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa,
2233 uint64_t *rval, int bytes)
2234 {
2235 int err;
2236
2237 if (vie->mmio_req_read.state == VR_DONE) {
2238 ASSERT(vie->mmio_req_read.bytes == bytes);
2239 ASSERT(vie->mmio_req_read.gpa == gpa);
2240
2241 *rval = vie->mmio_req_read.data;
2242 return (0);
2243 }
2244
2245 err = vm_service_mmio_read(vm, cpuid, gpa, rval, bytes);
2246 if (err == 0) {
2247 /*
2248 * A successful read from an in-kernel-emulated device may come
2249 * with side effects, so stash the result in case it's used for
2250 * an instruction which subsequently needs to issue an MMIO
2251 * write to userspace.
2252 */
2253 ASSERT(vie->mmio_req_read.state == VR_NONE);
2254
2255 vie->mmio_req_read.bytes = bytes;
2256 vie->mmio_req_read.gpa = gpa;
2257 vie->mmio_req_read.data = *rval;
2258 vie->mmio_req_read.state = VR_DONE;
2259
2260 } else if (err == ESRCH) {
2261 /* Hope that userspace emulation can fulfill this read */
2262 vie->mmio_req_read.bytes = bytes;
2263 vie->mmio_req_read.gpa = gpa;
2264 vie->mmio_req_read.state = VR_PENDING;
2265 vie->status |= VIES_PENDING_MMIO;
2266 } else if (err < 0) {
2267 /*
2268 * The MMIO read failed in such a way that fallback to handling
2269 * in userspace is required.
2270 */
2271 vie->status |= VIES_USER_FALLBACK;
2272 }
2273 return (err);
2274 }
2275
2276 static int
vie_mmio_write(struct vie * vie,struct vm * vm,int cpuid,uint64_t gpa,uint64_t wval,int bytes)2277 vie_mmio_write(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa,
2278 uint64_t wval, int bytes)
2279 {
2280 int err;
2281
2282 if (vie->mmio_req_write.state == VR_DONE) {
2283 ASSERT(vie->mmio_req_write.bytes == bytes);
2284 ASSERT(vie->mmio_req_write.gpa == gpa);
2285
2286 return (0);
2287 }
2288
2289 err = vm_service_mmio_write(vm, cpuid, gpa, wval, bytes);
2290 if (err == 0) {
2291 /*
2292 * A successful write to an in-kernel-emulated device probably
2293 * results in side effects, so stash the fact that such a write
2294 * succeeded in case the operation requires other work.
2295 */
2296 vie->mmio_req_write.bytes = bytes;
2297 vie->mmio_req_write.gpa = gpa;
2298 vie->mmio_req_write.data = wval;
2299 vie->mmio_req_write.state = VR_DONE;
2300 } else if (err == ESRCH) {
2301 /* Hope that userspace emulation can fulfill this write */
2302 vie->mmio_req_write.bytes = bytes;
2303 vie->mmio_req_write.gpa = gpa;
2304 vie->mmio_req_write.data = wval;
2305 vie->mmio_req_write.state = VR_PENDING;
2306 vie->status |= VIES_PENDING_MMIO;
2307 } else if (err < 0) {
2308 /*
2309 * The MMIO write failed in such a way that fallback to handling
2310 * in userspace is required.
2311 */
2312 vie->status |= VIES_USER_FALLBACK;
2313 }
2314 return (err);
2315 }
2316
2317 int
vie_emulate_mmio(struct vie * vie,struct vm * vm,int vcpuid)2318 vie_emulate_mmio(struct vie *vie, struct vm *vm, int vcpuid)
2319 {
2320 int error;
2321 uint64_t gpa;
2322
2323 if ((vie->status & (VIES_INST_DECODE | VIES_MMIO)) !=
2324 (VIES_INST_DECODE | VIES_MMIO)) {
2325 return (EINVAL);
2326 }
2327
2328 gpa = vie->mmio_gpa;
2329
2330 switch (vie->op.op_type) {
2331 case VIE_OP_TYPE_GROUP1:
2332 error = vie_emulate_group1(vie, vm, vcpuid, gpa);
2333 break;
2334 case VIE_OP_TYPE_POP:
2335 error = vie_emulate_pop(vie, vm, vcpuid, gpa);
2336 break;
2337 case VIE_OP_TYPE_PUSH:
2338 error = vie_emulate_push(vie, vm, vcpuid, gpa);
2339 break;
2340 case VIE_OP_TYPE_CMP:
2341 error = vie_emulate_cmp(vie, vm, vcpuid, gpa);
2342 break;
2343 case VIE_OP_TYPE_MOV:
2344 error = vie_emulate_mov(vie, vm, vcpuid, gpa);
2345 break;
2346 case VIE_OP_TYPE_MOVSX:
2347 case VIE_OP_TYPE_MOVZX:
2348 error = vie_emulate_movx(vie, vm, vcpuid, gpa);
2349 break;
2350 case VIE_OP_TYPE_MOVS:
2351 error = vie_emulate_movs(vie, vm, vcpuid, gpa);
2352 break;
2353 case VIE_OP_TYPE_STOS:
2354 error = vie_emulate_stos(vie, vm, vcpuid, gpa);
2355 break;
2356 case VIE_OP_TYPE_AND:
2357 error = vie_emulate_and(vie, vm, vcpuid, gpa);
2358 break;
2359 case VIE_OP_TYPE_OR:
2360 error = vie_emulate_or(vie, vm, vcpuid, gpa);
2361 break;
2362 case VIE_OP_TYPE_SUB:
2363 error = vie_emulate_sub(vie, vm, vcpuid, gpa);
2364 break;
2365 case VIE_OP_TYPE_BITTEST:
2366 error = vie_emulate_bittest(vie, vm, vcpuid, gpa);
2367 break;
2368 case VIE_OP_TYPE_TWOB_GRP15:
2369 error = vie_emulate_twob_group15(vie, vm, vcpuid, gpa);
2370 break;
2371 case VIE_OP_TYPE_ADD:
2372 error = vie_emulate_add(vie, vm, vcpuid, gpa);
2373 break;
2374 case VIE_OP_TYPE_TEST:
2375 error = vie_emulate_test(vie, vm, vcpuid, gpa);
2376 break;
2377 case VIE_OP_TYPE_BEXTR:
2378 error = vie_emulate_bextr(vie, vm, vcpuid, gpa);
2379 break;
2380 case VIE_OP_TYPE_MUL:
2381 error = vie_emulate_mul(vie, vm, vcpuid, gpa);
2382 break;
2383 default:
2384 error = EINVAL;
2385 break;
2386 }
2387
2388 if (error == ESRCH) {
2389 /* Return to userspace with the mmio request */
2390 return (-1);
2391 }
2392
2393 return (error);
2394 }
2395
2396 static int
vie_emulate_inout_port(struct vie * vie,struct vm * vm,int vcpuid,uint32_t * eax)2397 vie_emulate_inout_port(struct vie *vie, struct vm *vm, int vcpuid,
2398 uint32_t *eax)
2399 {
2400 uint32_t mask, val;
2401 bool in;
2402 int err;
2403
2404 mask = vie_size2mask(vie->inout.bytes);
2405 in = (vie->inout.flags & INOUT_IN) != 0;
2406
2407 if (!in) {
2408 val = *eax & mask;
2409 }
2410
2411 if (vie->inout_req_state != VR_DONE) {
2412 err = vm_ioport_access(vm, vcpuid, in, vie->inout.port,
2413 vie->inout.bytes, &val);
2414 val &= mask;
2415 } else {
2416 /*
2417 * This port access was handled in userspace and the result was
2418 * injected in to be handled now.
2419 */
2420 val = vie->inout_req_val & mask;
2421 vie->inout_req_state = VR_NONE;
2422 err = 0;
2423 }
2424
2425 if (err == ESRCH) {
2426 vie->status |= VIES_PENDING_INOUT;
2427 vie->inout_req_state = VR_PENDING;
2428 return (err);
2429 } else if (err != 0) {
2430 return (err);
2431 }
2432
2433 if (in) {
2434 *eax = (*eax & ~mask) | val;
2435 }
2436 return (0);
2437 }
2438
2439 static enum vm_reg_name
vie_inout_segname(const struct vie * vie)2440 vie_inout_segname(const struct vie *vie)
2441 {
2442 uint8_t segidx = vie->inout.segment;
2443 const enum vm_reg_name segmap[] = {
2444 VM_REG_GUEST_ES,
2445 VM_REG_GUEST_CS,
2446 VM_REG_GUEST_SS,
2447 VM_REG_GUEST_DS,
2448 VM_REG_GUEST_FS,
2449 VM_REG_GUEST_GS,
2450 };
2451 const uint8_t maxidx = (sizeof (segmap) / sizeof (segmap[0]));
2452
2453 if (segidx >= maxidx) {
2454 panic("unexpected segment index %u", segidx);
2455 }
2456 return (segmap[segidx]);
2457 }
2458
2459 static int
vie_emulate_inout_str(struct vie * vie,struct vm * vm,int vcpuid)2460 vie_emulate_inout_str(struct vie *vie, struct vm *vm, int vcpuid)
2461 {
2462 uint8_t bytes, addrsize;
2463 uint64_t index, count = 0, gla, rflags;
2464 int prot, err, fault;
2465 bool in, repeat;
2466 enum vm_reg_name seg_reg, idx_reg;
2467 struct vm_copyinfo copyinfo[2];
2468
2469 in = (vie->inout.flags & INOUT_IN) != 0;
2470 bytes = vie->inout.bytes;
2471 addrsize = vie->inout.addrsize;
2472 prot = in ? PROT_WRITE : PROT_READ;
2473
2474 ASSERT(bytes == 1 || bytes == 2 || bytes == 4);
2475 ASSERT(addrsize == 2 || addrsize == 4 || addrsize == 8);
2476
2477 idx_reg = (in) ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
2478 seg_reg = vie_inout_segname(vie);
2479 err = vm_get_register(vm, vcpuid, idx_reg, &index);
2480 ASSERT(err == 0);
2481 index = index & vie_size2mask(addrsize);
2482
2483 repeat = (vie->inout.flags & INOUT_REP) != 0;
2484
2485 /* Count register */
2486 if (repeat) {
2487 err = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &count);
2488 count &= vie_size2mask(addrsize);
2489
2490 if (count == 0) {
2491 /*
2492 * If we were asked to emulate a REP INS/OUTS when the
2493 * count register is zero, no further work is required.
2494 */
2495 return (0);
2496 }
2497 } else {
2498 count = 1;
2499 }
2500
2501 gla = 0;
2502 if (vie_get_gla(vie, vm, vcpuid, bytes, addrsize, prot, seg_reg,
2503 idx_reg, &gla) != 0) {
2504 /* vie_get_gla() already injected the appropriate fault */
2505 return (0);
2506 }
2507
2508 /*
2509 * The INS/OUTS emulate currently assumes that the memory target resides
2510 * within the guest system memory, rather than a device MMIO region. If
2511 * such a case becomes a necessity, that additional handling could be
2512 * put in place.
2513 */
2514 err = vm_copy_setup(vm, vcpuid, &vie->paging, gla, bytes, prot,
2515 copyinfo, nitems(copyinfo), &fault);
2516
2517 if (err) {
2518 /* Unrecoverable error */
2519 return (err);
2520 } else if (fault) {
2521 /* Resume guest to handle fault */
2522 return (0);
2523 }
2524
2525 if (!in) {
2526 vm_copyin(vm, vcpuid, copyinfo, &vie->inout.eax, bytes);
2527 }
2528
2529 err = vie_emulate_inout_port(vie, vm, vcpuid, &vie->inout.eax);
2530
2531 if (err == 0 && in) {
2532 vm_copyout(vm, vcpuid, &vie->inout.eax, copyinfo, bytes);
2533 }
2534
2535 vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
2536
2537 if (err == 0) {
2538 err = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
2539 &rflags);
2540 ASSERT(err == 0);
2541
2542 /* Update index */
2543 if (rflags & PSL_D) {
2544 index -= bytes;
2545 } else {
2546 index += bytes;
2547 }
2548
2549 /* Update index register */
2550 err = vie_update_register(vm, vcpuid, idx_reg, index, addrsize);
2551 ASSERT(err == 0);
2552
2553 /*
2554 * Update count register only if the instruction had a repeat
2555 * prefix.
2556 */
2557 if ((vie->inout.flags & INOUT_REP) != 0) {
2558 count--;
2559 err = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
2560 count, addrsize);
2561 ASSERT(err == 0);
2562
2563 if (count != 0) {
2564 return (vie_repeat(vie));
2565 }
2566 }
2567 }
2568
2569 return (err);
2570 }
2571
2572 int
vie_emulate_inout(struct vie * vie,struct vm * vm,int vcpuid)2573 vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid)
2574 {
2575 int err = 0;
2576
2577 if ((vie->status & VIES_INOUT) == 0) {
2578 return (EINVAL);
2579 }
2580
2581 if ((vie->inout.flags & INOUT_STR) == 0) {
2582 /*
2583 * For now, using the 'rep' prefixes with plain (non-string)
2584 * in/out is not supported.
2585 */
2586 if ((vie->inout.flags & INOUT_REP) != 0) {
2587 return (EINVAL);
2588 }
2589
2590 err = vie_emulate_inout_port(vie, vm, vcpuid, &vie->inout.eax);
2591 if (err == 0 && (vie->inout.flags & INOUT_IN) != 0) {
2592 /*
2593 * With the inX access now a success, the result needs
2594 * to be stored in the guest %rax.
2595 */
2596 err = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX,
2597 vie->inout.eax);
2598 VERIFY0(err);
2599 }
2600 } else {
2601 vie->status &= ~VIES_REPEAT;
2602 err = vie_emulate_inout_str(vie, vm, vcpuid);
2603
2604 }
2605 if (err < 0) {
2606 /*
2607 * Access to an I/O port failed in such a way that fallback to
2608 * handling in userspace is required.
2609 */
2610 vie->status |= VIES_USER_FALLBACK;
2611 } else if (err == ESRCH) {
2612 ASSERT(vie->status & VIES_PENDING_INOUT);
2613 /* Return to userspace with the in/out request */
2614 err = -1;
2615 }
2616
2617 return (err);
2618 }
2619
2620 int
vie_emulate_other(struct vie * vie,struct vm * vm,int vcpuid)2621 vie_emulate_other(struct vie *vie, struct vm *vm, int vcpuid)
2622 {
2623 int error;
2624
2625 if ((vie->status & (VIES_INST_DECODE | VIES_OTHER)) !=
2626 (VIES_INST_DECODE | VIES_OTHER)) {
2627 return (EINVAL);
2628 }
2629
2630 switch (vie->op.op_type) {
2631 case VIE_OP_TYPE_CLTS:
2632 error = vie_emulate_clts(vie, vm, vcpuid);
2633 break;
2634 case VIE_OP_TYPE_MOV_CR:
2635 error = vie_emulate_mov_cr(vie, vm, vcpuid);
2636 break;
2637 default:
2638 error = EINVAL;
2639 break;
2640 }
2641
2642 return (error);
2643 }
2644
2645 void
vie_reset(struct vie * vie)2646 vie_reset(struct vie *vie)
2647 {
2648 vie->status = 0;
2649 vie->num_processed = vie->num_valid = 0;
2650 }
2651
2652 void
vie_advance_pc(struct vie * vie,uint64_t * nextrip)2653 vie_advance_pc(struct vie *vie, uint64_t *nextrip)
2654 {
2655 VERIFY((vie->status & VIES_REPEAT) == 0);
2656
2657 *nextrip += vie->num_processed;
2658 vie_reset(vie);
2659 }
2660
2661 void
vie_exitinfo(const struct vie * vie,struct vm_exit * vme)2662 vie_exitinfo(const struct vie *vie, struct vm_exit *vme)
2663 {
2664 if (vie->status & VIES_USER_FALLBACK) {
2665 /*
2666 * Despite the fact that the instruction was successfully
2667 * decoded, some aspect of the emulation failed in such a way
2668 * that it is left up to userspace to complete the operation.
2669 */
2670 vie_fallback_exitinfo(vie, vme);
2671 } else if (vie->status & VIES_MMIO) {
2672 vme->exitcode = VM_EXITCODE_MMIO;
2673 if (vie->mmio_req_read.state == VR_PENDING) {
2674 vme->u.mmio.gpa = vie->mmio_req_read.gpa;
2675 vme->u.mmio.data = 0;
2676 vme->u.mmio.bytes = vie->mmio_req_read.bytes;
2677 vme->u.mmio.read = 1;
2678 } else if (vie->mmio_req_write.state == VR_PENDING) {
2679 vme->u.mmio.gpa = vie->mmio_req_write.gpa;
2680 vme->u.mmio.data = vie->mmio_req_write.data &
2681 vie_size2mask(vie->mmio_req_write.bytes);
2682 vme->u.mmio.bytes = vie->mmio_req_write.bytes;
2683 vme->u.mmio.read = 0;
2684 } else {
2685 panic("bad pending MMIO state");
2686 }
2687 } else if (vie->status & VIES_INOUT) {
2688 vme->exitcode = VM_EXITCODE_INOUT;
2689 vme->u.inout.port = vie->inout.port;
2690 vme->u.inout.bytes = vie->inout.bytes;
2691 if ((vie->inout.flags & INOUT_IN) != 0) {
2692 vme->u.inout.flags = INOUT_IN;
2693 vme->u.inout.eax = 0;
2694 } else {
2695 vme->u.inout.flags = 0;
2696 vme->u.inout.eax = vie->inout.eax &
2697 vie_size2mask(vie->inout.bytes);
2698 }
2699 } else {
2700 panic("no pending operation");
2701 }
2702 }
2703
2704 /*
2705 * In the case of a decoding or verification failure, bailing out to userspace
2706 * to do the instruction emulation is our only option for now.
2707 */
2708 void
vie_fallback_exitinfo(const struct vie * vie,struct vm_exit * vme)2709 vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme)
2710 {
2711 if ((vie->status & VIES_INST_FETCH) == 0) {
2712 bzero(&vme->u.inst_emul, sizeof (vme->u.inst_emul));
2713 } else {
2714 ASSERT(sizeof (vie->inst) == sizeof (vme->u.inst_emul.inst));
2715
2716 bcopy(vie->inst, vme->u.inst_emul.inst, sizeof (vie->inst));
2717 vme->u.inst_emul.num_valid = vie->num_valid;
2718 }
2719 vme->exitcode = VM_EXITCODE_INST_EMUL;
2720 }
2721
2722 void
vie_cs_info(const struct vie * vie,struct vm * vm,int vcpuid,uint64_t * cs_base,int * cs_d)2723 vie_cs_info(const struct vie *vie, struct vm *vm, int vcpuid, uint64_t *cs_base,
2724 int *cs_d)
2725 {
2726 struct seg_desc cs_desc;
2727 int error __maybe_unused;
2728
2729 error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &cs_desc);
2730 ASSERT(error == 0);
2731
2732 /* Initialization required for the paging info to be populated */
2733 VERIFY(vie->status & VIES_INIT);
2734 switch (vie->paging.cpu_mode) {
2735 case CPU_MODE_REAL:
2736 *cs_base = cs_desc.base;
2737 *cs_d = 0;
2738 break;
2739 case CPU_MODE_PROTECTED:
2740 case CPU_MODE_COMPATIBILITY:
2741 *cs_base = cs_desc.base;
2742 *cs_d = SEG_DESC_DEF32(cs_desc.access) ? 1 : 0;
2743 break;
2744 default:
2745 *cs_base = 0;
2746 *cs_d = 0;
2747 break;
2748 }
2749 }
2750
2751 bool
vie_pending(const struct vie * vie)2752 vie_pending(const struct vie *vie)
2753 {
2754 /*
2755 * These VIE status bits indicate conditions which must be addressed
2756 * through either device IO fulfillment (with corresponding
2757 * vie_fulfill_*()) or complete userspace emulation (followed by a
2758 * vie_reset()).
2759 */
2760 const enum vie_status of_interest =
2761 VIES_PENDING_MMIO | VIES_PENDING_INOUT | VIES_USER_FALLBACK;
2762
2763 return ((vie->status & of_interest) != 0);
2764 }
2765
2766 bool
vie_needs_fetch(const struct vie * vie)2767 vie_needs_fetch(const struct vie *vie)
2768 {
2769 if (vie->status & VIES_INST_FETCH) {
2770 ASSERT(vie->num_valid != 0);
2771 return (false);
2772 }
2773 return (true);
2774 }
2775
2776 static int
vie_alignment_check(int cpl,int size,uint64_t cr0,uint64_t rf,uint64_t gla)2777 vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
2778 {
2779 KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
2780 ("%s: invalid size %d", __func__, size));
2781 KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl));
2782
2783 if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
2784 return (0);
2785
2786 return ((gla & (size - 1)) ? 1 : 0);
2787 }
2788
2789 static int
vie_canonical_check(enum vm_cpu_mode cpu_mode,uint64_t gla)2790 vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
2791 {
2792 uint64_t mask;
2793
2794 if (cpu_mode != CPU_MODE_64BIT)
2795 return (0);
2796
2797 /*
2798 * The value of the bit 47 in the 'gla' should be replicated in the
2799 * most significant 16 bits.
2800 */
2801 mask = ~((1UL << 48) - 1);
2802 if (gla & (1UL << 47))
2803 return ((gla & mask) != mask);
2804 else
2805 return ((gla & mask) != 0);
2806 }
2807
2808 static uint64_t
vie_size2mask(int size)2809 vie_size2mask(int size)
2810 {
2811 KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
2812 ("vie_size2mask: invalid size %d", size));
2813 return (size2mask[size]);
2814 }
2815
2816 static int
vie_calculate_gla(enum vm_cpu_mode cpu_mode,enum vm_reg_name seg,struct seg_desc * desc,uint64_t offset,int length,int addrsize,int prot,uint64_t * gla)2817 vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
2818 struct seg_desc *desc, uint64_t offset, int length, int addrsize,
2819 int prot, uint64_t *gla)
2820 {
2821 uint64_t firstoff, low_limit, high_limit, segbase;
2822 int glasize, type;
2823
2824 KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS,
2825 ("%s: invalid segment %d", __func__, seg));
2826 KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
2827 ("%s: invalid operand size %d", __func__, length));
2828 KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
2829 ("%s: invalid prot %x", __func__, prot));
2830
2831 firstoff = offset;
2832 if (cpu_mode == CPU_MODE_64BIT) {
2833 KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address "
2834 "size %d for cpu_mode %d", __func__, addrsize, cpu_mode));
2835 glasize = 8;
2836 } else {
2837 KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address "
2838 "size %d for cpu mode %d", __func__, addrsize, cpu_mode));
2839 glasize = 4;
2840 /*
2841 * If the segment selector is loaded with a NULL selector
2842 * then the descriptor is unusable and attempting to use
2843 * it results in a #GP(0).
2844 */
2845 if (SEG_DESC_UNUSABLE(desc->access))
2846 return (-1);
2847
2848 /*
2849 * The processor generates a #NP exception when a segment
2850 * register is loaded with a selector that points to a
2851 * descriptor that is not present. If this was the case then
2852 * it would have been checked before the VM-exit.
2853 */
2854 KASSERT(SEG_DESC_PRESENT(desc->access),
2855 ("segment %d not present: %x", seg, desc->access));
2856
2857 /*
2858 * The descriptor type must indicate a code/data segment.
2859 */
2860 type = SEG_DESC_TYPE(desc->access);
2861 KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
2862 "descriptor type %x", seg, type));
2863
2864 if (prot & PROT_READ) {
2865 /* #GP on a read access to a exec-only code segment */
2866 if ((type & 0xA) == 0x8)
2867 return (-1);
2868 }
2869
2870 if (prot & PROT_WRITE) {
2871 /*
2872 * #GP on a write access to a code segment or a
2873 * read-only data segment.
2874 */
2875 if (type & 0x8) /* code segment */
2876 return (-1);
2877
2878 if ((type & 0xA) == 0) /* read-only data seg */
2879 return (-1);
2880 }
2881
2882 /*
2883 * 'desc->limit' is fully expanded taking granularity into
2884 * account.
2885 */
2886 if ((type & 0xC) == 0x4) {
2887 /* expand-down data segment */
2888 low_limit = desc->limit + 1;
2889 high_limit = SEG_DESC_DEF32(desc->access) ?
2890 0xffffffff : 0xffff;
2891 } else {
2892 /* code segment or expand-up data segment */
2893 low_limit = 0;
2894 high_limit = desc->limit;
2895 }
2896
2897 while (length > 0) {
2898 offset &= vie_size2mask(addrsize);
2899 if (offset < low_limit || offset > high_limit)
2900 return (-1);
2901 offset++;
2902 length--;
2903 }
2904 }
2905
2906 /*
2907 * In 64-bit mode all segments except %fs and %gs have a segment
2908 * base address of 0.
2909 */
2910 if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
2911 seg != VM_REG_GUEST_GS) {
2912 segbase = 0;
2913 } else {
2914 segbase = desc->base;
2915 }
2916
2917 /*
2918 * Truncate 'firstoff' to the effective address size before adding
2919 * it to the segment base.
2920 */
2921 firstoff &= vie_size2mask(addrsize);
2922 *gla = (segbase + firstoff) & vie_size2mask(glasize);
2923 return (0);
2924 }
2925
2926 void
vie_init_mmio(struct vie * vie,const char * inst_bytes,uint8_t inst_length,const struct vm_guest_paging * paging,uint64_t gpa)2927 vie_init_mmio(struct vie *vie, const char *inst_bytes, uint8_t inst_length,
2928 const struct vm_guest_paging *paging, uint64_t gpa)
2929 {
2930 KASSERT(inst_length <= VIE_INST_SIZE,
2931 ("%s: invalid instruction length (%d)", __func__, inst_length));
2932
2933 bzero(vie, sizeof (struct vie));
2934
2935 vie->base_register = VM_REG_LAST;
2936 vie->index_register = VM_REG_LAST;
2937 vie->segment_register = VM_REG_LAST;
2938 vie->status = VIES_INIT | VIES_MMIO;
2939
2940 if (inst_length != 0) {
2941 bcopy(inst_bytes, vie->inst, inst_length);
2942 vie->num_valid = inst_length;
2943 vie->status |= VIES_INST_FETCH;
2944 }
2945
2946 vie->paging = *paging;
2947 vie->mmio_gpa = gpa;
2948 }
2949
2950 void
vie_init_inout(struct vie * vie,const struct vm_inout * inout,uint8_t inst_len,const struct vm_guest_paging * paging)2951 vie_init_inout(struct vie *vie, const struct vm_inout *inout, uint8_t inst_len,
2952 const struct vm_guest_paging *paging)
2953 {
2954 bzero(vie, sizeof (struct vie));
2955
2956 vie->status = VIES_INIT | VIES_INOUT;
2957
2958 vie->inout = *inout;
2959 vie->paging = *paging;
2960
2961 /*
2962 * Since VMX/SVM assists already decoded the nature of the in/out
2963 * instruction, let the status reflect that.
2964 */
2965 vie->status |= VIES_INST_FETCH | VIES_INST_DECODE;
2966 vie->num_processed = inst_len;
2967 }
2968
2969 void
vie_init_other(struct vie * vie,const struct vm_guest_paging * paging)2970 vie_init_other(struct vie *vie, const struct vm_guest_paging *paging)
2971 {
2972 bzero(vie, sizeof (struct vie));
2973
2974 vie->base_register = VM_REG_LAST;
2975 vie->index_register = VM_REG_LAST;
2976 vie->segment_register = VM_REG_LAST;
2977 vie->status = VIES_INIT | VIES_OTHER;
2978
2979 vie->paging = *paging;
2980 }
2981
2982 int
vie_fulfill_mmio(struct vie * vie,const struct vm_mmio * result)2983 vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *result)
2984 {
2985 struct vie_mmio *pending;
2986
2987 if ((vie->status & VIES_MMIO) == 0 ||
2988 (vie->status & VIES_PENDING_MMIO) == 0) {
2989 return (EINVAL);
2990 }
2991
2992 if (result->read) {
2993 pending = &vie->mmio_req_read;
2994 } else {
2995 pending = &vie->mmio_req_write;
2996 }
2997
2998 if (pending->state != VR_PENDING ||
2999 pending->bytes != result->bytes || pending->gpa != result->gpa) {
3000 return (EINVAL);
3001 }
3002
3003 if (result->read) {
3004 pending->data = result->data & vie_size2mask(pending->bytes);
3005 }
3006 pending->state = VR_DONE;
3007 vie->status &= ~VIES_PENDING_MMIO;
3008
3009 return (0);
3010 }
3011
3012 int
vie_fulfill_inout(struct vie * vie,const struct vm_inout * result)3013 vie_fulfill_inout(struct vie *vie, const struct vm_inout *result)
3014 {
3015 if ((vie->status & VIES_INOUT) == 0 ||
3016 (vie->status & VIES_PENDING_INOUT) == 0) {
3017 return (EINVAL);
3018 }
3019 if ((vie->inout.flags & INOUT_IN) != (result->flags & INOUT_IN) ||
3020 vie->inout.bytes != result->bytes ||
3021 vie->inout.port != result->port) {
3022 return (EINVAL);
3023 }
3024
3025 if (result->flags & INOUT_IN) {
3026 vie->inout_req_val = result->eax &
3027 vie_size2mask(vie->inout.bytes);
3028 }
3029 vie->inout_req_state = VR_DONE;
3030 vie->status &= ~(VIES_PENDING_INOUT);
3031
3032 return (0);
3033 }
3034
3035 uint64_t
vie_mmio_gpa(const struct vie * vie)3036 vie_mmio_gpa(const struct vie *vie)
3037 {
3038 return (vie->mmio_gpa);
3039 }
3040
3041 static int
pf_error_code(int usermode,int prot,int rsvd,uint64_t pte)3042 pf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
3043 {
3044 int error_code = 0;
3045
3046 if (pte & PG_V)
3047 error_code |= PGEX_P;
3048 if (prot & PROT_WRITE)
3049 error_code |= PGEX_W;
3050 if (usermode)
3051 error_code |= PGEX_U;
3052 if (rsvd)
3053 error_code |= PGEX_RSV;
3054 if (prot & PROT_EXEC)
3055 error_code |= PGEX_I;
3056
3057 return (error_code);
3058 }
3059
3060 static void
ptp_release(vm_page_t ** vmp)3061 ptp_release(vm_page_t **vmp)
3062 {
3063 if (*vmp != NULL) {
3064 (void) vmp_release(*vmp);
3065 *vmp = NULL;
3066 }
3067 }
3068
3069 static void *
ptp_hold(struct vm * vm,int vcpu,uintptr_t gpa,size_t len,vm_page_t ** vmp)3070 ptp_hold(struct vm *vm, int vcpu, uintptr_t gpa, size_t len, vm_page_t **vmp)
3071 {
3072 vm_client_t *vmc = vm_get_vmclient(vm, vcpu);
3073 const uintptr_t hold_gpa = gpa & PAGEMASK;
3074
3075 /* Hold must not cross a page boundary */
3076 VERIFY3U(gpa + len, <=, hold_gpa + PAGESIZE);
3077
3078 if (*vmp != NULL) {
3079 (void) vmp_release(*vmp);
3080 }
3081
3082 *vmp = vmc_hold(vmc, hold_gpa, PROT_READ | PROT_WRITE);
3083 if (*vmp == NULL) {
3084 return (NULL);
3085 }
3086
3087 return ((caddr_t)vmp_get_writable(*vmp) + (gpa - hold_gpa));
3088 }
3089
3090 static int
_vm_gla2gpa(struct vm * vm,int vcpuid,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * guest_fault,bool check_only)3091 _vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
3092 uint64_t gla, int prot, uint64_t *gpa, int *guest_fault, bool check_only)
3093 {
3094 int nlevels, pfcode;
3095 int ptpshift = 0, ptpindex = 0;
3096 uint64_t ptpphys;
3097 uint64_t *ptpbase = NULL, pte = 0, pgsize = 0;
3098 vm_page_t *cookie = NULL;
3099 const bool usermode = paging->cpl == 3;
3100 const bool writable = (prot & PROT_WRITE) != 0;
3101
3102 *guest_fault = 0;
3103 restart:
3104 ptpphys = paging->cr3; /* root of the page tables */
3105 ptp_release(&cookie);
3106
3107 if (vie_canonical_check(paging->cpu_mode, gla)) {
3108 /*
3109 * XXX assuming a non-stack reference otherwise a stack fault
3110 * should be generated.
3111 */
3112 if (!check_only)
3113 vm_inject_gp(vm, vcpuid);
3114 *guest_fault = 1;
3115 return (0);
3116 }
3117
3118 if (paging->paging_mode == PAGING_MODE_FLAT) {
3119 *gpa = gla;
3120 return (0);
3121 }
3122
3123 if (paging->paging_mode == PAGING_MODE_32) {
3124 uint32_t *ptpbase32, pte32;
3125
3126 nlevels = 2;
3127 while (--nlevels >= 0) {
3128 /* Zero out the lower 12 bits. */
3129 ptpphys &= ~0xfff;
3130
3131 ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE,
3132 &cookie);
3133
3134 if (ptpbase32 == NULL) {
3135 return (EFAULT);
3136 }
3137
3138 ptpshift = PAGE_SHIFT + nlevels * 10;
3139 ptpindex = (gla >> ptpshift) & 0x3FF;
3140 pgsize = 1UL << ptpshift;
3141
3142 pte32 = ptpbase32[ptpindex];
3143
3144 if ((pte32 & PG_V) == 0 ||
3145 (usermode && (pte32 & PG_U) == 0) ||
3146 (writable && (pte32 & PG_RW) == 0)) {
3147 if (!check_only) {
3148 pfcode = pf_error_code(usermode, prot,
3149 0, pte32);
3150 vm_inject_pf(vm, vcpuid, pfcode, gla);
3151 }
3152
3153 ptp_release(&cookie);
3154 *guest_fault = 1;
3155 return (0);
3156 }
3157
3158 /*
3159 * Emulate the x86 MMU's management of the accessed
3160 * and dirty flags. While the accessed flag is set
3161 * at every level of the page table, the dirty flag
3162 * is only set at the last level providing the guest
3163 * physical address.
3164 */
3165 if (!check_only && (pte32 & PG_A) == 0) {
3166 if (atomic_cmpset_32(&ptpbase32[ptpindex],
3167 pte32, pte32 | PG_A) == 0) {
3168 goto restart;
3169 }
3170 }
3171
3172 /* XXX must be ignored if CR4.PSE=0 */
3173 if (nlevels > 0 && (pte32 & PG_PS) != 0)
3174 break;
3175
3176 ptpphys = pte32;
3177 }
3178
3179 /* Set the dirty bit in the page table entry if necessary */
3180 if (!check_only && writable && (pte32 & PG_M) == 0) {
3181 if (atomic_cmpset_32(&ptpbase32[ptpindex],
3182 pte32, pte32 | PG_M) == 0) {
3183 goto restart;
3184 }
3185 }
3186
3187 /* Zero out the lower 'ptpshift' bits */
3188 pte32 >>= ptpshift; pte32 <<= ptpshift;
3189 *gpa = pte32 | (gla & (pgsize - 1));
3190 ptp_release(&cookie);
3191 return (0);
3192 }
3193
3194 if (paging->paging_mode == PAGING_MODE_PAE) {
3195 /* Zero out the lower 5 bits and the upper 32 bits */
3196 ptpphys &= 0xffffffe0UL;
3197
3198 ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof (*ptpbase) * 4,
3199 &cookie);
3200 if (ptpbase == NULL) {
3201 return (EFAULT);
3202 }
3203
3204 ptpindex = (gla >> 30) & 0x3;
3205
3206 pte = ptpbase[ptpindex];
3207
3208 if ((pte & PG_V) == 0) {
3209 if (!check_only) {
3210 pfcode = pf_error_code(usermode, prot, 0, pte);
3211 vm_inject_pf(vm, vcpuid, pfcode, gla);
3212 }
3213
3214 ptp_release(&cookie);
3215 *guest_fault = 1;
3216 return (0);
3217 }
3218
3219 ptpphys = pte;
3220
3221 nlevels = 2;
3222 } else {
3223 nlevels = 4;
3224 }
3225
3226 while (--nlevels >= 0) {
3227 /* Zero out the lower 12 bits and the upper 12 bits */
3228 ptpphys &= 0x000ffffffffff000UL;
3229
3230 ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie);
3231 if (ptpbase == NULL) {
3232 return (EFAULT);
3233 }
3234
3235 ptpshift = PAGE_SHIFT + nlevels * 9;
3236 ptpindex = (gla >> ptpshift) & 0x1FF;
3237 pgsize = 1UL << ptpshift;
3238
3239 pte = ptpbase[ptpindex];
3240
3241 if ((pte & PG_V) == 0 ||
3242 (usermode && (pte & PG_U) == 0) ||
3243 (writable && (pte & PG_RW) == 0)) {
3244 if (!check_only) {
3245 pfcode = pf_error_code(usermode, prot, 0, pte);
3246 vm_inject_pf(vm, vcpuid, pfcode, gla);
3247 }
3248
3249 ptp_release(&cookie);
3250 *guest_fault = 1;
3251 return (0);
3252 }
3253
3254 /* Set the accessed bit in the page table entry */
3255 if (!check_only && (pte & PG_A) == 0) {
3256 if (atomic_cmpset_64(&ptpbase[ptpindex],
3257 pte, pte | PG_A) == 0) {
3258 goto restart;
3259 }
3260 }
3261
3262 if (nlevels > 0 && (pte & PG_PS) != 0) {
3263 if (pgsize > 1 * GB) {
3264 if (!check_only) {
3265 pfcode = pf_error_code(usermode, prot,
3266 1, pte);
3267 vm_inject_pf(vm, vcpuid, pfcode, gla);
3268 }
3269
3270 ptp_release(&cookie);
3271 *guest_fault = 1;
3272 return (0);
3273 }
3274 break;
3275 }
3276
3277 ptpphys = pte;
3278 }
3279
3280 /* Set the dirty bit in the page table entry if necessary */
3281 if (!check_only && writable && (pte & PG_M) == 0) {
3282 if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0)
3283 goto restart;
3284 }
3285 ptp_release(&cookie);
3286
3287 /* Zero out the lower 'ptpshift' bits and the upper 12 bits */
3288 pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
3289 *gpa = pte | (gla & (pgsize - 1));
3290 return (0);
3291 }
3292
3293 int
vm_gla2gpa(struct vm * vm,int vcpuid,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * guest_fault)3294 vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
3295 uint64_t gla, int prot, uint64_t *gpa, int *guest_fault)
3296 {
3297
3298 return (_vm_gla2gpa(vm, vcpuid, paging, gla, prot, gpa, guest_fault,
3299 false));
3300 }
3301
3302 int
vm_gla2gpa_nofault(struct vm * vm,int vcpuid,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * guest_fault)3303 vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
3304 uint64_t gla, int prot, uint64_t *gpa, int *guest_fault)
3305 {
3306
3307 return (_vm_gla2gpa(vm, vcpuid, paging, gla, prot, gpa, guest_fault,
3308 true));
3309 }
3310
3311 int
vie_fetch_instruction(struct vie * vie,struct vm * vm,int vcpuid,uint64_t rip,int * faultptr)3312 vie_fetch_instruction(struct vie *vie, struct vm *vm, int vcpuid, uint64_t rip,
3313 int *faultptr)
3314 {
3315 struct vm_copyinfo copyinfo[2];
3316 int error, prot;
3317
3318 if ((vie->status & VIES_INIT) == 0) {
3319 return (EINVAL);
3320 }
3321
3322 prot = PROT_READ | PROT_EXEC;
3323 error = vm_copy_setup(vm, vcpuid, &vie->paging, rip, VIE_INST_SIZE,
3324 prot, copyinfo, nitems(copyinfo), faultptr);
3325 if (error || *faultptr)
3326 return (error);
3327
3328 vm_copyin(vm, vcpuid, copyinfo, vie->inst, VIE_INST_SIZE);
3329 vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
3330 vie->num_valid = VIE_INST_SIZE;
3331 vie->status |= VIES_INST_FETCH;
3332 return (0);
3333 }
3334
3335 static int
vie_peek(struct vie * vie,uint8_t * x)3336 vie_peek(struct vie *vie, uint8_t *x)
3337 {
3338
3339 if (vie->num_processed < vie->num_valid) {
3340 *x = vie->inst[vie->num_processed];
3341 return (0);
3342 } else
3343 return (-1);
3344 }
3345
3346 static void
vie_advance(struct vie * vie)3347 vie_advance(struct vie *vie)
3348 {
3349
3350 vie->num_processed++;
3351 }
3352
3353 static bool
segment_override(uint8_t x,int * seg)3354 segment_override(uint8_t x, int *seg)
3355 {
3356
3357 switch (x) {
3358 case 0x2E:
3359 *seg = VM_REG_GUEST_CS;
3360 break;
3361 case 0x36:
3362 *seg = VM_REG_GUEST_SS;
3363 break;
3364 case 0x3E:
3365 *seg = VM_REG_GUEST_DS;
3366 break;
3367 case 0x26:
3368 *seg = VM_REG_GUEST_ES;
3369 break;
3370 case 0x64:
3371 *seg = VM_REG_GUEST_FS;
3372 break;
3373 case 0x65:
3374 *seg = VM_REG_GUEST_GS;
3375 break;
3376 default:
3377 return (false);
3378 }
3379 return (true);
3380 }
3381
3382 static int
decode_prefixes(struct vie * vie,enum vm_cpu_mode cpu_mode,int cs_d)3383 decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
3384 {
3385 uint8_t x;
3386
3387 while (1) {
3388 if (vie_peek(vie, &x))
3389 return (-1);
3390
3391 if (x == 0x66)
3392 vie->opsize_override = 1;
3393 else if (x == 0x67)
3394 vie->addrsize_override = 1;
3395 else if (x == 0xF3)
3396 vie->repz_present = 1;
3397 else if (x == 0xF2)
3398 vie->repnz_present = 1;
3399 else if (segment_override(x, &vie->segment_register))
3400 vie->segment_override = 1;
3401 else
3402 break;
3403
3404 vie_advance(vie);
3405 }
3406
3407 /*
3408 * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2:
3409 * - Only one REX prefix is allowed per instruction.
3410 * - The REX prefix must immediately precede the opcode byte or the
3411 * escape opcode byte.
3412 * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3)
3413 * the mandatory prefix must come before the REX prefix.
3414 */
3415 if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) {
3416 vie->rex_present = 1;
3417 vie->rex_w = x & 0x8 ? 1 : 0;
3418 vie->rex_r = x & 0x4 ? 1 : 0;
3419 vie->rex_x = x & 0x2 ? 1 : 0;
3420 vie->rex_b = x & 0x1 ? 1 : 0;
3421 vie_advance(vie);
3422 }
3423
3424 /*
3425 * § 2.3.5, "The VEX Prefix", SDM Vol 2.
3426 */
3427 if ((cpu_mode == CPU_MODE_64BIT ||
3428 cpu_mode == CPU_MODE_COMPATIBILITY) && x == 0xC4) {
3429 const struct vie_op *optab;
3430
3431 /* 3-byte VEX prefix. */
3432 vie->vex_present = 1;
3433
3434 vie_advance(vie);
3435 if (vie_peek(vie, &x))
3436 return (-1);
3437
3438 /*
3439 * 2nd byte: [R', X', B', mmmmm[4:0]]. Bits are inverted
3440 * relative to REX encoding.
3441 */
3442 vie->rex_r = x & 0x80 ? 0 : 1;
3443 vie->rex_x = x & 0x40 ? 0 : 1;
3444 vie->rex_b = x & 0x20 ? 0 : 1;
3445
3446 switch (x & 0x1F) {
3447 case 0x2:
3448 /* 0F 38. */
3449 optab = three_byte_opcodes_0f38;
3450 break;
3451 case 0x1:
3452 /* 0F class - nothing handled here yet. */
3453 /* FALLTHROUGH */
3454 case 0x3:
3455 /* 0F 3A class - nothing handled here yet. */
3456 /* FALLTHROUGH */
3457 default:
3458 /* Reserved (#UD). */
3459 return (-1);
3460 }
3461
3462 vie_advance(vie);
3463 if (vie_peek(vie, &x))
3464 return (-1);
3465
3466 /* 3rd byte: [W, vvvv[6:3], L, pp[1:0]]. */
3467 vie->rex_w = x & 0x80 ? 1 : 0;
3468
3469 vie->vex_reg = ((~(unsigned)x & 0x78u) >> 3);
3470 vie->vex_l = !!(x & 0x4);
3471 vie->vex_pp = (x & 0x3);
3472
3473 /* PP: 1=66 2=F3 3=F2 prefixes. */
3474 switch (vie->vex_pp) {
3475 case 0x1:
3476 vie->opsize_override = 1;
3477 break;
3478 case 0x2:
3479 vie->repz_present = 1;
3480 break;
3481 case 0x3:
3482 vie->repnz_present = 1;
3483 break;
3484 }
3485
3486 vie_advance(vie);
3487
3488 /* Opcode, sans literal prefix prefix. */
3489 if (vie_peek(vie, &x))
3490 return (-1);
3491
3492 vie->op = optab[x];
3493 if (vie->op.op_type == VIE_OP_TYPE_NONE)
3494 return (-1);
3495
3496 vie_advance(vie);
3497 }
3498
3499 /*
3500 * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1
3501 */
3502 if (cpu_mode == CPU_MODE_64BIT) {
3503 /*
3504 * Default address size is 64-bits and default operand size
3505 * is 32-bits.
3506 */
3507 vie->addrsize = vie->addrsize_override ? 4 : 8;
3508 if (vie->rex_w)
3509 vie->opsize = 8;
3510 else if (vie->opsize_override)
3511 vie->opsize = 2;
3512 else
3513 vie->opsize = 4;
3514 } else if (cs_d) {
3515 /* Default address and operand sizes are 32-bits */
3516 vie->addrsize = vie->addrsize_override ? 2 : 4;
3517 vie->opsize = vie->opsize_override ? 2 : 4;
3518 } else {
3519 /* Default address and operand sizes are 16-bits */
3520 vie->addrsize = vie->addrsize_override ? 4 : 2;
3521 vie->opsize = vie->opsize_override ? 4 : 2;
3522 }
3523 return (0);
3524 }
3525
3526 static int
decode_two_byte_opcode(struct vie * vie)3527 decode_two_byte_opcode(struct vie *vie)
3528 {
3529 uint8_t x;
3530
3531 if (vie_peek(vie, &x))
3532 return (-1);
3533
3534 vie->op = two_byte_opcodes[x];
3535
3536 if (vie->op.op_type == VIE_OP_TYPE_NONE)
3537 return (-1);
3538
3539 vie_advance(vie);
3540 return (0);
3541 }
3542
3543 static int
decode_opcode(struct vie * vie)3544 decode_opcode(struct vie *vie)
3545 {
3546 uint8_t x;
3547
3548 if (vie_peek(vie, &x))
3549 return (-1);
3550
3551 /* Already did this via VEX prefix. */
3552 if (vie->op.op_type != VIE_OP_TYPE_NONE)
3553 return (0);
3554
3555 vie->op = one_byte_opcodes[x];
3556
3557 if (vie->op.op_type == VIE_OP_TYPE_NONE)
3558 return (-1);
3559
3560 vie_advance(vie);
3561
3562 if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE)
3563 return (decode_two_byte_opcode(vie));
3564
3565 return (0);
3566 }
3567
3568 static int
decode_modrm(struct vie * vie,enum vm_cpu_mode cpu_mode)3569 decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
3570 {
3571 uint8_t x;
3572 /*
3573 * Handling mov-to/from-cr is special since it is not issuing
3574 * mmio/pio requests and can be done in real mode. We must bypass some
3575 * of the other existing decoding restrictions for it.
3576 */
3577 const bool is_movcr = ((vie->op.op_flags & VIE_OP_F_REG_REG) != 0);
3578
3579 if (vie->op.op_flags & VIE_OP_F_NO_MODRM)
3580 return (0);
3581
3582 if (cpu_mode == CPU_MODE_REAL && !is_movcr)
3583 return (-1);
3584
3585 if (vie_peek(vie, &x))
3586 return (-1);
3587
3588 vie->mod = (x >> 6) & 0x3;
3589 vie->rm = (x >> 0) & 0x7;
3590 vie->reg = (x >> 3) & 0x7;
3591
3592 /*
3593 * A direct addressing mode makes no sense in the context of an EPT
3594 * fault. There has to be a memory access involved to cause the
3595 * EPT fault.
3596 */
3597 if (vie->mod == VIE_MOD_DIRECT && !is_movcr)
3598 return (-1);
3599
3600 if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
3601 (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
3602 /*
3603 * Table 2-5: Special Cases of REX Encodings
3604 *
3605 * mod=0, r/m=5 is used in the compatibility mode to
3606 * indicate a disp32 without a base register.
3607 *
3608 * mod!=3, r/m=4 is used in the compatibility mode to
3609 * indicate that the SIB byte is present.
3610 *
3611 * The 'b' bit in the REX prefix is don't care in
3612 * this case.
3613 */
3614 } else {
3615 vie->rm |= (vie->rex_b << 3);
3616 }
3617
3618 vie->reg |= (vie->rex_r << 3);
3619
3620 /* SIB */
3621 if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
3622 goto done;
3623
3624 vie->base_register = gpr_map[vie->rm];
3625
3626 switch (vie->mod) {
3627 case VIE_MOD_INDIRECT_DISP8:
3628 vie->disp_bytes = 1;
3629 break;
3630 case VIE_MOD_INDIRECT_DISP32:
3631 vie->disp_bytes = 4;
3632 break;
3633 case VIE_MOD_INDIRECT:
3634 if (vie->rm == VIE_RM_DISP32) {
3635 vie->disp_bytes = 4;
3636 /*
3637 * Table 2-7. RIP-Relative Addressing
3638 *
3639 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32
3640 * whereas in compatibility mode it just implies disp32.
3641 */
3642
3643 if (cpu_mode == CPU_MODE_64BIT)
3644 vie->base_register = VM_REG_GUEST_RIP;
3645 else
3646 vie->base_register = VM_REG_LAST;
3647 }
3648 break;
3649 }
3650
3651 done:
3652 vie_advance(vie);
3653
3654 return (0);
3655 }
3656
3657 static int
decode_sib(struct vie * vie)3658 decode_sib(struct vie *vie)
3659 {
3660 uint8_t x;
3661
3662 /* Proceed only if SIB byte is present */
3663 if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
3664 return (0);
3665
3666 if (vie_peek(vie, &x))
3667 return (-1);
3668
3669 /* De-construct the SIB byte */
3670 vie->ss = (x >> 6) & 0x3;
3671 vie->index = (x >> 3) & 0x7;
3672 vie->base = (x >> 0) & 0x7;
3673
3674 /* Apply the REX prefix modifiers */
3675 vie->index |= vie->rex_x << 3;
3676 vie->base |= vie->rex_b << 3;
3677
3678 switch (vie->mod) {
3679 case VIE_MOD_INDIRECT_DISP8:
3680 vie->disp_bytes = 1;
3681 break;
3682 case VIE_MOD_INDIRECT_DISP32:
3683 vie->disp_bytes = 4;
3684 break;
3685 }
3686
3687 if (vie->mod == VIE_MOD_INDIRECT &&
3688 (vie->base == 5 || vie->base == 13)) {
3689 /*
3690 * Special case when base register is unused if mod = 0
3691 * and base = %rbp or %r13.
3692 *
3693 * Documented in:
3694 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
3695 * Table 2-5: Special Cases of REX Encodings
3696 */
3697 vie->disp_bytes = 4;
3698 } else {
3699 vie->base_register = gpr_map[vie->base];
3700 }
3701
3702 /*
3703 * All encodings of 'index' are valid except for %rsp (4).
3704 *
3705 * Documented in:
3706 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
3707 * Table 2-5: Special Cases of REX Encodings
3708 */
3709 if (vie->index != 4)
3710 vie->index_register = gpr_map[vie->index];
3711
3712 /* 'scale' makes sense only in the context of an index register */
3713 if (vie->index_register < VM_REG_LAST)
3714 vie->scale = 1 << vie->ss;
3715
3716 vie_advance(vie);
3717
3718 return (0);
3719 }
3720
3721 static int
decode_displacement(struct vie * vie)3722 decode_displacement(struct vie *vie)
3723 {
3724 int n, i;
3725 uint8_t x;
3726
3727 union {
3728 char buf[4];
3729 int8_t signed8;
3730 int32_t signed32;
3731 } u;
3732
3733 if ((n = vie->disp_bytes) == 0)
3734 return (0);
3735
3736 if (n != 1 && n != 4)
3737 panic("decode_displacement: invalid disp_bytes %d", n);
3738
3739 for (i = 0; i < n; i++) {
3740 if (vie_peek(vie, &x))
3741 return (-1);
3742
3743 u.buf[i] = x;
3744 vie_advance(vie);
3745 }
3746
3747 if (n == 1)
3748 vie->displacement = u.signed8; /* sign-extended */
3749 else
3750 vie->displacement = u.signed32; /* sign-extended */
3751
3752 return (0);
3753 }
3754
3755 static int
decode_immediate(struct vie * vie)3756 decode_immediate(struct vie *vie)
3757 {
3758 int i, n;
3759 uint8_t x;
3760 union {
3761 char buf[4];
3762 int8_t signed8;
3763 int16_t signed16;
3764 int32_t signed32;
3765 } u;
3766
3767 /* Figure out immediate operand size (if any) */
3768 if (vie->op.op_flags & VIE_OP_F_IMM) {
3769 /*
3770 * Section 2.2.1.5 "Immediates", Intel SDM:
3771 * In 64-bit mode the typical size of immediate operands
3772 * remains 32-bits. When the operand size if 64-bits, the
3773 * processor sign-extends all immediates to 64-bits prior
3774 * to their use.
3775 */
3776 if (vie->opsize == 4 || vie->opsize == 8)
3777 vie->imm_bytes = 4;
3778 else
3779 vie->imm_bytes = 2;
3780 } else if (vie->op.op_flags & VIE_OP_F_IMM8) {
3781 vie->imm_bytes = 1;
3782 }
3783
3784 if ((n = vie->imm_bytes) == 0)
3785 return (0);
3786
3787 KASSERT(n == 1 || n == 2 || n == 4,
3788 ("%s: invalid number of immediate bytes: %d", __func__, n));
3789
3790 for (i = 0; i < n; i++) {
3791 if (vie_peek(vie, &x))
3792 return (-1);
3793
3794 u.buf[i] = x;
3795 vie_advance(vie);
3796 }
3797
3798 /* sign-extend the immediate value before use */
3799 if (n == 1)
3800 vie->immediate = u.signed8;
3801 else if (n == 2)
3802 vie->immediate = u.signed16;
3803 else
3804 vie->immediate = u.signed32;
3805
3806 return (0);
3807 }
3808
3809 static int
decode_moffset(struct vie * vie)3810 decode_moffset(struct vie *vie)
3811 {
3812 int i, n;
3813 uint8_t x;
3814 union {
3815 char buf[8];
3816 uint64_t u64;
3817 } u;
3818
3819 if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0)
3820 return (0);
3821
3822 /*
3823 * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM:
3824 * The memory offset size follows the address-size of the instruction.
3825 */
3826 n = vie->addrsize;
3827 KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n));
3828
3829 u.u64 = 0;
3830 for (i = 0; i < n; i++) {
3831 if (vie_peek(vie, &x))
3832 return (-1);
3833
3834 u.buf[i] = x;
3835 vie_advance(vie);
3836 }
3837 vie->displacement = u.u64;
3838 return (0);
3839 }
3840
3841 /*
3842 * Verify that the 'guest linear address' provided as collateral of the nested
3843 * page table fault matches with our instruction decoding.
3844 */
3845 int
vie_verify_gla(struct vie * vie,struct vm * vm,int cpuid,uint64_t gla)3846 vie_verify_gla(struct vie *vie, struct vm *vm, int cpuid, uint64_t gla)
3847 {
3848 int error;
3849 uint64_t base, segbase, idx, gla2;
3850 enum vm_reg_name seg;
3851 struct seg_desc desc;
3852
3853 ASSERT((vie->status & VIES_INST_DECODE) != 0);
3854
3855 /*
3856 * If there was no valid GLA context with the exit, or the decoded
3857 * instruction acts on more than one address, verification is done.
3858 */
3859 if (gla == VIE_INVALID_GLA ||
3860 (vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) != 0) {
3861 return (0);
3862 }
3863
3864 base = 0;
3865 if (vie->base_register != VM_REG_LAST) {
3866 error = vm_get_register(vm, cpuid, vie->base_register, &base);
3867 if (error) {
3868 printf("verify_gla: error %d getting base reg %d\n",
3869 error, vie->base_register);
3870 return (-1);
3871 }
3872
3873 /*
3874 * RIP-relative addressing starts from the following
3875 * instruction
3876 */
3877 if (vie->base_register == VM_REG_GUEST_RIP)
3878 base += vie->num_processed;
3879 }
3880
3881 idx = 0;
3882 if (vie->index_register != VM_REG_LAST) {
3883 error = vm_get_register(vm, cpuid, vie->index_register, &idx);
3884 if (error) {
3885 printf("verify_gla: error %d getting index reg %d\n",
3886 error, vie->index_register);
3887 return (-1);
3888 }
3889 }
3890
3891 /*
3892 * From "Specifying a Segment Selector", Intel SDM, Vol 1
3893 *
3894 * In 64-bit mode, segmentation is generally (but not
3895 * completely) disabled. The exceptions are the FS and GS
3896 * segments.
3897 *
3898 * In legacy IA-32 mode, when the ESP or EBP register is used
3899 * as the base, the SS segment is the default segment. For
3900 * other data references, except when relative to stack or
3901 * string destination the DS segment is the default. These
3902 * can be overridden to allow other segments to be accessed.
3903 */
3904 if (vie->segment_override) {
3905 seg = vie->segment_register;
3906 } else if (vie->base_register == VM_REG_GUEST_RSP ||
3907 vie->base_register == VM_REG_GUEST_RBP) {
3908 seg = VM_REG_GUEST_SS;
3909 } else {
3910 seg = VM_REG_GUEST_DS;
3911 }
3912 if (vie->paging.cpu_mode == CPU_MODE_64BIT &&
3913 seg != VM_REG_GUEST_FS && seg != VM_REG_GUEST_GS) {
3914 segbase = 0;
3915 } else {
3916 error = vm_get_seg_desc(vm, cpuid, seg, &desc);
3917 if (error) {
3918 printf("verify_gla: error %d getting segment"
3919 " descriptor %d", error, vie->segment_register);
3920 return (-1);
3921 }
3922 segbase = desc.base;
3923 }
3924
3925 gla2 = segbase + base + vie->scale * idx + vie->displacement;
3926 gla2 &= size2mask[vie->addrsize];
3927 if (gla != gla2) {
3928 printf("verify_gla mismatch: segbase(0x%0lx)"
3929 "base(0x%0lx), scale(%d), index(0x%0lx), "
3930 "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n",
3931 segbase, base, vie->scale, idx, vie->displacement,
3932 gla, gla2);
3933 return (-1);
3934 }
3935
3936 return (0);
3937 }
3938
3939 int
vie_decode_instruction(struct vie * vie,struct vm * vm,int cpuid,int cs_d)3940 vie_decode_instruction(struct vie *vie, struct vm *vm, int cpuid, int cs_d)
3941 {
3942 enum vm_cpu_mode cpu_mode;
3943
3944 if ((vie->status & VIES_INST_FETCH) == 0) {
3945 return (EINVAL);
3946 }
3947
3948 cpu_mode = vie->paging.cpu_mode;
3949
3950 if (decode_prefixes(vie, cpu_mode, cs_d))
3951 return (-1);
3952
3953 if (decode_opcode(vie))
3954 return (-1);
3955
3956 if (decode_modrm(vie, cpu_mode))
3957 return (-1);
3958
3959 if (decode_sib(vie))
3960 return (-1);
3961
3962 if (decode_displacement(vie))
3963 return (-1);
3964
3965 if (decode_immediate(vie))
3966 return (-1);
3967
3968 if (decode_moffset(vie))
3969 return (-1);
3970
3971 vie->status |= VIES_INST_DECODE;
3972
3973 return (0);
3974 }
3975