1 // SPDX-License-Identifier: GPL-2.0 2 3 static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, 4 unsigned long exit_code) 5 { 6 unsigned int opcode = (unsigned int)ctxt->insn.opcode.value; 7 u8 modrm = ctxt->insn.modrm.value; 8 9 switch (exit_code) { 10 11 case SVM_EXIT_IOIO: 12 case SVM_EXIT_NPF: 13 /* handled separately */ 14 return ES_OK; 15 16 case SVM_EXIT_CPUID: 17 if (opcode == 0xa20f) 18 return ES_OK; 19 break; 20 21 case SVM_EXIT_INVD: 22 if (opcode == 0x080f) 23 return ES_OK; 24 break; 25 26 case SVM_EXIT_MONITOR: 27 /* MONITOR and MONITORX instructions generate the same error code */ 28 if (opcode == 0x010f && (modrm == 0xc8 || modrm == 0xfa)) 29 return ES_OK; 30 break; 31 32 case SVM_EXIT_MWAIT: 33 /* MWAIT and MWAITX instructions generate the same error code */ 34 if (opcode == 0x010f && (modrm == 0xc9 || modrm == 0xfb)) 35 return ES_OK; 36 break; 37 38 case SVM_EXIT_MSR: 39 /* RDMSR */ 40 if (opcode == 0x320f || 41 /* WRMSR */ 42 opcode == 0x300f) 43 return ES_OK; 44 break; 45 46 case SVM_EXIT_RDPMC: 47 if (opcode == 0x330f) 48 return ES_OK; 49 break; 50 51 case SVM_EXIT_RDTSC: 52 if (opcode == 0x310f) 53 return ES_OK; 54 break; 55 56 case SVM_EXIT_RDTSCP: 57 if (opcode == 0x010f && modrm == 0xf9) 58 return ES_OK; 59 break; 60 61 case SVM_EXIT_READ_DR7: 62 if (opcode == 0x210f && 63 X86_MODRM_REG(ctxt->insn.modrm.value) == 7) 64 return ES_OK; 65 break; 66 67 case SVM_EXIT_VMMCALL: 68 if (opcode == 0x010f && modrm == 0xd9) 69 return ES_OK; 70 71 break; 72 73 case SVM_EXIT_WRITE_DR7: 74 if (opcode == 0x230f && 75 X86_MODRM_REG(ctxt->insn.modrm.value) == 7) 76 return ES_OK; 77 break; 78 79 case SVM_EXIT_WBINVD: 80 if (opcode == 0x90f) 81 return ES_OK; 82 break; 83 84 default: 85 break; 86 } 87 88 sev_printk(KERN_ERR "Wrong/unhandled opcode bytes: 0x%x, exit_code: 0x%lx, rIP: 0x%lx\n", 89 opcode, exit_code, ctxt->regs->ip); 90 91 return ES_UNSUPPORTED; 92 } 93 94 static bool vc_decoding_needed(unsigned long exit_code) 95 { 96 /* Exceptions don't require to decode the instruction */ 97 return !(exit_code >= SVM_EXIT_EXCP_BASE && 98 exit_code <= SVM_EXIT_LAST_EXCP); 99 } 100 101 static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt, 102 struct pt_regs *regs, 103 unsigned long exit_code) 104 { 105 enum es_result ret = ES_OK; 106 107 memset(ctxt, 0, sizeof(*ctxt)); 108 ctxt->regs = regs; 109 110 if (vc_decoding_needed(exit_code)) 111 ret = vc_decode_insn(ctxt); 112 113 return ret; 114 } 115 116 static void vc_finish_insn(struct es_em_ctxt *ctxt) 117 { 118 ctxt->regs->ip += ctxt->insn.length; 119 } 120 121 static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt, 122 unsigned long address, 123 bool write) 124 { 125 if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) { 126 ctxt->fi.vector = X86_TRAP_PF; 127 ctxt->fi.error_code = X86_PF_USER; 128 ctxt->fi.cr2 = address; 129 if (write) 130 ctxt->fi.error_code |= X86_PF_WRITE; 131 132 return ES_EXCEPTION; 133 } 134 135 return ES_OK; 136 } 137 138 static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, 139 void *src, char *buf, 140 unsigned int data_size, 141 unsigned int count, 142 bool backwards) 143 { 144 int i, b = backwards ? -1 : 1; 145 unsigned long address = (unsigned long)src; 146 enum es_result ret; 147 148 ret = vc_insn_string_check(ctxt, address, false); 149 if (ret != ES_OK) 150 return ret; 151 152 for (i = 0; i < count; i++) { 153 void *s = src + (i * data_size * b); 154 char *d = buf + (i * data_size); 155 156 ret = vc_read_mem(ctxt, s, d, data_size); 157 if (ret != ES_OK) 158 break; 159 } 160 161 return ret; 162 } 163 164 static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, 165 void *dst, char *buf, 166 unsigned int data_size, 167 unsigned int count, 168 bool backwards) 169 { 170 int i, s = backwards ? -1 : 1; 171 unsigned long address = (unsigned long)dst; 172 enum es_result ret; 173 174 ret = vc_insn_string_check(ctxt, address, true); 175 if (ret != ES_OK) 176 return ret; 177 178 for (i = 0; i < count; i++) { 179 void *d = dst + (i * data_size * s); 180 char *b = buf + (i * data_size); 181 182 ret = vc_write_mem(ctxt, d, b, data_size); 183 if (ret != ES_OK) 184 break; 185 } 186 187 return ret; 188 } 189 190 #define IOIO_TYPE_STR BIT(2) 191 #define IOIO_TYPE_IN 1 192 #define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR) 193 #define IOIO_TYPE_OUT 0 194 #define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR) 195 196 #define IOIO_REP BIT(3) 197 198 #define IOIO_ADDR_64 BIT(9) 199 #define IOIO_ADDR_32 BIT(8) 200 #define IOIO_ADDR_16 BIT(7) 201 202 #define IOIO_DATA_32 BIT(6) 203 #define IOIO_DATA_16 BIT(5) 204 #define IOIO_DATA_8 BIT(4) 205 206 #define IOIO_SEG_ES (0 << 10) 207 #define IOIO_SEG_DS (3 << 10) 208 209 static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) 210 { 211 struct insn *insn = &ctxt->insn; 212 size_t size; 213 u64 port; 214 215 *exitinfo = 0; 216 217 switch (insn->opcode.bytes[0]) { 218 /* INS opcodes */ 219 case 0x6c: 220 case 0x6d: 221 *exitinfo |= IOIO_TYPE_INS; 222 *exitinfo |= IOIO_SEG_ES; 223 port = ctxt->regs->dx & 0xffff; 224 break; 225 226 /* OUTS opcodes */ 227 case 0x6e: 228 case 0x6f: 229 *exitinfo |= IOIO_TYPE_OUTS; 230 *exitinfo |= IOIO_SEG_DS; 231 port = ctxt->regs->dx & 0xffff; 232 break; 233 234 /* IN immediate opcodes */ 235 case 0xe4: 236 case 0xe5: 237 *exitinfo |= IOIO_TYPE_IN; 238 port = (u8)insn->immediate.value & 0xffff; 239 break; 240 241 /* OUT immediate opcodes */ 242 case 0xe6: 243 case 0xe7: 244 *exitinfo |= IOIO_TYPE_OUT; 245 port = (u8)insn->immediate.value & 0xffff; 246 break; 247 248 /* IN register opcodes */ 249 case 0xec: 250 case 0xed: 251 *exitinfo |= IOIO_TYPE_IN; 252 port = ctxt->regs->dx & 0xffff; 253 break; 254 255 /* OUT register opcodes */ 256 case 0xee: 257 case 0xef: 258 *exitinfo |= IOIO_TYPE_OUT; 259 port = ctxt->regs->dx & 0xffff; 260 break; 261 262 default: 263 return ES_DECODE_FAILED; 264 } 265 266 *exitinfo |= port << 16; 267 268 switch (insn->opcode.bytes[0]) { 269 case 0x6c: 270 case 0x6e: 271 case 0xe4: 272 case 0xe6: 273 case 0xec: 274 case 0xee: 275 /* Single byte opcodes */ 276 *exitinfo |= IOIO_DATA_8; 277 size = 1; 278 break; 279 default: 280 /* Length determined by instruction parsing */ 281 *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 282 : IOIO_DATA_32; 283 size = (insn->opnd_bytes == 2) ? 2 : 4; 284 } 285 286 switch (insn->addr_bytes) { 287 case 2: 288 *exitinfo |= IOIO_ADDR_16; 289 break; 290 case 4: 291 *exitinfo |= IOIO_ADDR_32; 292 break; 293 case 8: 294 *exitinfo |= IOIO_ADDR_64; 295 break; 296 } 297 298 if (insn_has_rep_prefix(insn)) 299 *exitinfo |= IOIO_REP; 300 301 return vc_ioio_check(ctxt, (u16)port, size); 302 } 303 304 static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 305 { 306 struct pt_regs *regs = ctxt->regs; 307 u64 exit_info_1, exit_info_2; 308 enum es_result ret; 309 310 ret = vc_ioio_exitinfo(ctxt, &exit_info_1); 311 if (ret != ES_OK) 312 return ret; 313 314 if (exit_info_1 & IOIO_TYPE_STR) { 315 316 /* (REP) INS/OUTS */ 317 318 bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF); 319 unsigned int io_bytes, exit_bytes; 320 unsigned int ghcb_count, op_count; 321 unsigned long es_base; 322 u64 sw_scratch; 323 324 /* 325 * For the string variants with rep prefix the amount of in/out 326 * operations per #VC exception is limited so that the kernel 327 * has a chance to take interrupts and re-schedule while the 328 * instruction is emulated. 329 */ 330 io_bytes = (exit_info_1 >> 4) & 0x7; 331 ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes; 332 333 op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1; 334 exit_info_2 = min(op_count, ghcb_count); 335 exit_bytes = exit_info_2 * io_bytes; 336 337 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 338 339 /* Read bytes of OUTS into the shared buffer */ 340 if (!(exit_info_1 & IOIO_TYPE_IN)) { 341 ret = vc_insn_string_read(ctxt, 342 (void *)(es_base + regs->si), 343 ghcb->shared_buffer, io_bytes, 344 exit_info_2, df); 345 if (ret) 346 return ret; 347 } 348 349 /* 350 * Issue an VMGEXIT to the HV to consume the bytes from the 351 * shared buffer or to have it write them into the shared buffer 352 * depending on the instruction: OUTS or INS. 353 */ 354 sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer); 355 ghcb_set_sw_scratch(ghcb, sw_scratch); 356 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, 357 exit_info_1, exit_info_2); 358 if (ret != ES_OK) 359 return ret; 360 361 /* Read bytes from shared buffer into the guest's destination. */ 362 if (exit_info_1 & IOIO_TYPE_IN) { 363 ret = vc_insn_string_write(ctxt, 364 (void *)(es_base + regs->di), 365 ghcb->shared_buffer, io_bytes, 366 exit_info_2, df); 367 if (ret) 368 return ret; 369 370 if (df) 371 regs->di -= exit_bytes; 372 else 373 regs->di += exit_bytes; 374 } else { 375 if (df) 376 regs->si -= exit_bytes; 377 else 378 regs->si += exit_bytes; 379 } 380 381 if (exit_info_1 & IOIO_REP) 382 regs->cx -= exit_info_2; 383 384 ret = regs->cx ? ES_RETRY : ES_OK; 385 386 } else { 387 388 /* IN/OUT into/from rAX */ 389 390 int bits = (exit_info_1 & 0x70) >> 1; 391 u64 rax = 0; 392 393 if (!(exit_info_1 & IOIO_TYPE_IN)) 394 rax = lower_bits(regs->ax, bits); 395 396 ghcb_set_rax(ghcb, rax); 397 398 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0); 399 if (ret != ES_OK) 400 return ret; 401 402 if (exit_info_1 & IOIO_TYPE_IN) { 403 if (!ghcb_rax_is_valid(ghcb)) 404 return ES_VMM_ERROR; 405 regs->ax = lower_bits(ghcb->save.rax, bits); 406 } 407 } 408 409 return ret; 410 } 411 412 enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 413 { 414 u32 ret; 415 416 ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); 417 if (!ret) 418 return ES_OK; 419 420 if (ret == 1) { 421 u64 info = ghcb->save.sw_exit_info_2; 422 unsigned long v = info & SVM_EVTINJ_VEC_MASK; 423 424 /* Check if exception information from hypervisor is sane. */ 425 if ((info & SVM_EVTINJ_VALID) && 426 ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && 427 ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { 428 ctxt->fi.vector = v; 429 430 if (info & SVM_EVTINJ_VALID_ERR) 431 ctxt->fi.error_code = info >> 32; 432 433 return ES_EXCEPTION; 434 } 435 } 436 437 return ES_VMM_ERROR; 438 } 439 440 enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, 441 struct es_em_ctxt *ctxt, 442 u64 exit_code, u64 exit_info_1, 443 u64 exit_info_2) 444 { 445 /* Fill in protocol and format specifiers */ 446 ghcb->protocol_version = ghcb_version; 447 ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; 448 449 ghcb_set_sw_exit_code(ghcb, exit_code); 450 ghcb_set_sw_exit_info_1(ghcb, exit_info_1); 451 ghcb_set_sw_exit_info_2(ghcb, exit_info_2); 452 453 sev_es_wr_ghcb_msr(__pa(ghcb)); 454 VMGEXIT(); 455 456 return verify_exception_info(ghcb, ctxt); 457 } 458 459 static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) 460 { 461 u32 cr4 = native_read_cr4(); 462 int ret; 463 464 ghcb_set_rax(ghcb, leaf->fn); 465 ghcb_set_rcx(ghcb, leaf->subfn); 466 467 if (cr4 & X86_CR4_OSXSAVE) 468 /* Safe to read xcr0 */ 469 ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); 470 else 471 /* xgetbv will cause #UD - use reset value for xcr0 */ 472 ghcb_set_xcr0(ghcb, 1); 473 474 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); 475 if (ret != ES_OK) 476 return ret; 477 478 if (!(ghcb_rax_is_valid(ghcb) && 479 ghcb_rbx_is_valid(ghcb) && 480 ghcb_rcx_is_valid(ghcb) && 481 ghcb_rdx_is_valid(ghcb))) 482 return ES_VMM_ERROR; 483 484 leaf->eax = ghcb->save.rax; 485 leaf->ebx = ghcb->save.rbx; 486 leaf->ecx = ghcb->save.rcx; 487 leaf->edx = ghcb->save.rdx; 488 489 return ES_OK; 490 } 491 492 struct cpuid_ctx { 493 struct ghcb *ghcb; 494 struct es_em_ctxt *ctxt; 495 }; 496 497 static void snp_cpuid_hv_ghcb(void *p, struct cpuid_leaf *leaf) 498 { 499 struct cpuid_ctx *ctx = p; 500 501 if (__sev_cpuid_hv_ghcb(ctx->ghcb, ctx->ctxt, leaf)) 502 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); 503 } 504 505 static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 506 { 507 struct cpuid_ctx ctx = { ghcb, ctxt }; 508 struct pt_regs *regs = ctxt->regs; 509 struct cpuid_leaf leaf; 510 int ret; 511 512 leaf.fn = regs->ax; 513 leaf.subfn = regs->cx; 514 ret = snp_cpuid(snp_cpuid_hv_ghcb, &ctx, &leaf); 515 if (!ret) { 516 regs->ax = leaf.eax; 517 regs->bx = leaf.ebx; 518 regs->cx = leaf.ecx; 519 regs->dx = leaf.edx; 520 } 521 522 return ret; 523 } 524 525 static enum es_result vc_handle_cpuid(struct ghcb *ghcb, 526 struct es_em_ctxt *ctxt) 527 { 528 struct pt_regs *regs = ctxt->regs; 529 u32 cr4 = native_read_cr4(); 530 enum es_result ret; 531 int snp_cpuid_ret; 532 533 snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt); 534 if (!snp_cpuid_ret) 535 return ES_OK; 536 if (snp_cpuid_ret != -EOPNOTSUPP) 537 return ES_VMM_ERROR; 538 539 ghcb_set_rax(ghcb, regs->ax); 540 ghcb_set_rcx(ghcb, regs->cx); 541 542 if (cr4 & X86_CR4_OSXSAVE) 543 /* Safe to read xcr0 */ 544 ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); 545 else 546 /* xgetbv will cause #GP - use reset value for xcr0 */ 547 ghcb_set_xcr0(ghcb, 1); 548 549 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); 550 if (ret != ES_OK) 551 return ret; 552 553 if (!(ghcb_rax_is_valid(ghcb) && 554 ghcb_rbx_is_valid(ghcb) && 555 ghcb_rcx_is_valid(ghcb) && 556 ghcb_rdx_is_valid(ghcb))) 557 return ES_VMM_ERROR; 558 559 regs->ax = ghcb->save.rax; 560 regs->bx = ghcb->save.rbx; 561 regs->cx = ghcb->save.rcx; 562 regs->dx = ghcb->save.rdx; 563 564 return ES_OK; 565 } 566 567 static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, 568 struct es_em_ctxt *ctxt, 569 unsigned long exit_code) 570 { 571 bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); 572 enum es_result ret; 573 574 /* 575 * The hypervisor should not be intercepting RDTSC/RDTSCP when Secure 576 * TSC is enabled. A #VC exception will be generated if the RDTSC/RDTSCP 577 * instructions are being intercepted. If this should occur and Secure 578 * TSC is enabled, guest execution should be terminated as the guest 579 * cannot rely on the TSC value provided by the hypervisor. 580 */ 581 if (sev_status & MSR_AMD64_SNP_SECURE_TSC) 582 return ES_VMM_ERROR; 583 584 ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0); 585 if (ret != ES_OK) 586 return ret; 587 588 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) && 589 (!rdtscp || ghcb_rcx_is_valid(ghcb)))) 590 return ES_VMM_ERROR; 591 592 ctxt->regs->ax = ghcb->save.rax; 593 ctxt->regs->dx = ghcb->save.rdx; 594 if (rdtscp) 595 ctxt->regs->cx = ghcb->save.rcx; 596 597 return ES_OK; 598 } 599 600 void snp_register_ghcb_early(unsigned long paddr) 601 { 602 unsigned long pfn = paddr >> PAGE_SHIFT; 603 u64 val; 604 605 sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn)); 606 VMGEXIT(); 607 608 val = sev_es_rd_ghcb_msr(); 609 610 /* If the response GPA is not ours then abort the guest */ 611 if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) || 612 (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn)) 613 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); 614 } 615 616 bool __init sev_es_check_cpu_features(void) 617 { 618 if (!has_cpuflag(X86_FEATURE_RDRAND)) { 619 error("RDRAND instruction not supported - no trusted source of randomness available\n"); 620 return false; 621 } 622 623 return true; 624 } 625 626 bool sev_es_negotiate_protocol(void) 627 { 628 u64 val; 629 630 /* Do the GHCB protocol version negotiation */ 631 sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); 632 VMGEXIT(); 633 val = sev_es_rd_ghcb_msr(); 634 635 if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) 636 return false; 637 638 if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || 639 GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) 640 return false; 641 642 ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX); 643 644 return true; 645 } 646