1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * AMD Memory Encryption Support
4 *
5 * Copyright (C) 2019 SUSE
6 *
7 * Author: Joerg Roedel <jroedel@suse.de>
8 */
9
10 #define pr_fmt(fmt) "SEV: " fmt
11
12 #include <linux/sched/debug.h> /* For show_regs() */
13 #include <linux/percpu-defs.h>
14 #include <linux/cc_platform.h>
15 #include <linux/printk.h>
16 #include <linux/mm_types.h>
17 #include <linux/set_memory.h>
18 #include <linux/memblock.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/cpumask.h>
22 #include <linux/efi.h>
23 #include <linux/platform_device.h>
24 #include <linux/io.h>
25 #include <linux/psp-sev.h>
26 #include <linux/dmi.h>
27 #include <uapi/linux/sev-guest.h>
28 #include <crypto/gcm.h>
29
30 #include <asm/init.h>
31 #include <asm/cpu_entry_area.h>
32 #include <asm/stacktrace.h>
33 #include <asm/sev.h>
34 #include <asm/insn-eval.h>
35 #include <asm/fpu/xcr.h>
36 #include <asm/processor.h>
37 #include <asm/realmode.h>
38 #include <asm/setup.h>
39 #include <asm/traps.h>
40 #include <asm/svm.h>
41 #include <asm/smp.h>
42 #include <asm/cpu.h>
43 #include <asm/apic.h>
44 #include <asm/cpuid/api.h>
45 #include <asm/cmdline.h>
46 #include <asm/msr.h>
47
48 #include "internal.h"
49
50 /* Bitmap of SEV features supported by the hypervisor */
51 u64 sev_hv_features __ro_after_init;
52 SYM_PIC_ALIAS(sev_hv_features);
53
54 /* Secrets page physical address from the CC blob */
55 u64 sev_secrets_pa __ro_after_init;
56 SYM_PIC_ALIAS(sev_secrets_pa);
57
58 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */
59 #define AP_INIT_CS_LIMIT 0xffff
60 #define AP_INIT_DS_LIMIT 0xffff
61 #define AP_INIT_LDTR_LIMIT 0xffff
62 #define AP_INIT_GDTR_LIMIT 0xffff
63 #define AP_INIT_IDTR_LIMIT 0xffff
64 #define AP_INIT_TR_LIMIT 0xffff
65 #define AP_INIT_RFLAGS_DEFAULT 0x2
66 #define AP_INIT_DR6_DEFAULT 0xffff0ff0
67 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
68 #define AP_INIT_XCR0_DEFAULT 0x1
69 #define AP_INIT_X87_FTW_DEFAULT 0x5555
70 #define AP_INIT_X87_FCW_DEFAULT 0x0040
71 #define AP_INIT_CR0_DEFAULT 0x60000010
72 #define AP_INIT_MXCSR_DEFAULT 0x1f80
73
74 static const char * const sev_status_feat_names[] = {
75 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
76 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
77 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
78 [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
79 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
80 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
81 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
82 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
83 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
84 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
85 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
86 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
87 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
88 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
89 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
90 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
91 [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC",
92 [MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT] = "IBPBOnEntry",
93 };
94
95 /*
96 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
97 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
98 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
99 */
100 static u64 snp_tsc_scale __ro_after_init;
101 static u64 snp_tsc_offset __ro_after_init;
102 static unsigned long snp_tsc_freq_khz __ro_after_init;
103
104 DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
105 DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
106
107 /*
108 * SVSM related information:
109 * When running under an SVSM, the VMPL that Linux is executing at must be
110 * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
111 */
112 u8 snp_vmpl __ro_after_init;
113 EXPORT_SYMBOL_GPL(snp_vmpl);
114 SYM_PIC_ALIAS(snp_vmpl);
115
116 /*
117 * Since feature negotiation related variables are set early in the boot
118 * process they must reside in the .data section so as not to be zeroed
119 * out when the .bss section is later cleared.
120 *
121 * GHCB protocol version negotiated with the hypervisor.
122 */
123 u16 ghcb_version __ro_after_init;
124 SYM_PIC_ALIAS(ghcb_version);
125
126 /* For early boot hypervisor communication in SEV-ES enabled guests */
127 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
128
129 /*
130 * Needs to be in the .data section because we need it NULL before bss is
131 * cleared
132 */
133 struct ghcb *boot_ghcb __section(".data");
134
get_snp_jump_table_addr(void)135 static u64 __init get_snp_jump_table_addr(void)
136 {
137 struct snp_secrets_page *secrets;
138 void __iomem *mem;
139 u64 addr;
140
141 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
142 if (!mem) {
143 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
144 return 0;
145 }
146
147 secrets = (__force struct snp_secrets_page *)mem;
148
149 addr = secrets->os_area.ap_jump_table_pa;
150 iounmap(mem);
151
152 return addr;
153 }
154
get_jump_table_addr(void)155 static u64 __init get_jump_table_addr(void)
156 {
157 struct ghcb_state state;
158 unsigned long flags;
159 struct ghcb *ghcb;
160 u64 ret = 0;
161
162 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
163 return get_snp_jump_table_addr();
164
165 local_irq_save(flags);
166
167 ghcb = __sev_get_ghcb(&state);
168
169 vc_ghcb_invalidate(ghcb);
170 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
171 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
172 ghcb_set_sw_exit_info_2(ghcb, 0);
173
174 sev_es_wr_ghcb_msr(__pa(ghcb));
175 VMGEXIT();
176
177 if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
178 ghcb_sw_exit_info_2_is_valid(ghcb))
179 ret = ghcb->save.sw_exit_info_2;
180
181 __sev_put_ghcb(&state);
182
183 local_irq_restore(flags);
184
185 return ret;
186 }
187
pval_pages(struct snp_psc_desc * desc)188 static void pval_pages(struct snp_psc_desc *desc)
189 {
190 struct psc_entry *e;
191 unsigned long vaddr;
192 unsigned int size;
193 unsigned int i;
194 bool validate;
195 u64 pfn;
196 int rc;
197
198 for (i = 0; i <= desc->hdr.end_entry; i++) {
199 e = &desc->entries[i];
200
201 pfn = e->gfn;
202 vaddr = (unsigned long)pfn_to_kaddr(pfn);
203 size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
204 validate = e->operation == SNP_PAGE_STATE_PRIVATE;
205
206 rc = pvalidate(vaddr, size, validate);
207 if (!rc)
208 continue;
209
210 if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
211 unsigned long vaddr_end = vaddr + PMD_SIZE;
212
213 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
214 rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
215 if (rc)
216 __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
217 }
218 } else {
219 __pval_terminate(pfn, validate, size, rc, 0);
220 }
221 }
222 }
223
pvalidate_pages(struct snp_psc_desc * desc)224 static void pvalidate_pages(struct snp_psc_desc *desc)
225 {
226 struct psc_entry *e;
227 unsigned int i;
228
229 if (snp_vmpl)
230 svsm_pval_pages(desc);
231 else
232 pval_pages(desc);
233
234 /*
235 * If not affected by the cache-coherency vulnerability there is no need
236 * to perform the cache eviction mitigation.
237 */
238 if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO))
239 return;
240
241 for (i = 0; i <= desc->hdr.end_entry; i++) {
242 e = &desc->entries[i];
243
244 /*
245 * If validating memory (making it private) perform the cache
246 * eviction mitigation.
247 */
248 if (e->operation == SNP_PAGE_STATE_PRIVATE)
249 sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1);
250 }
251 }
252
vmgexit_psc(struct ghcb * ghcb,struct snp_psc_desc * desc)253 static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
254 {
255 int cur_entry, end_entry, ret = 0;
256 struct snp_psc_desc *data;
257 struct es_em_ctxt ctxt;
258
259 vc_ghcb_invalidate(ghcb);
260
261 /* Copy the input desc into GHCB shared buffer */
262 data = (struct snp_psc_desc *)ghcb->shared_buffer;
263 memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
264
265 /*
266 * As per the GHCB specification, the hypervisor can resume the guest
267 * before processing all the entries. Check whether all the entries
268 * are processed. If not, then keep retrying. Note, the hypervisor
269 * will update the data memory directly to indicate the status, so
270 * reference the data->hdr everywhere.
271 *
272 * The strategy here is to wait for the hypervisor to change the page
273 * state in the RMP table before guest accesses the memory pages. If the
274 * page state change was not successful, then later memory access will
275 * result in a crash.
276 */
277 cur_entry = data->hdr.cur_entry;
278 end_entry = data->hdr.end_entry;
279
280 while (data->hdr.cur_entry <= data->hdr.end_entry) {
281 ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
282
283 /* This will advance the shared buffer data points to. */
284 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
285
286 /*
287 * Page State Change VMGEXIT can pass error code through
288 * exit_info_2.
289 */
290 if (WARN(ret || ghcb->save.sw_exit_info_2,
291 "SNP: PSC failed ret=%d exit_info_2=%llx\n",
292 ret, ghcb->save.sw_exit_info_2)) {
293 ret = 1;
294 goto out;
295 }
296
297 /* Verify that reserved bit is not set */
298 if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
299 ret = 1;
300 goto out;
301 }
302
303 /*
304 * Sanity check that entry processing is not going backwards.
305 * This will happen only if hypervisor is tricking us.
306 */
307 if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
308 "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
309 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
310 ret = 1;
311 goto out;
312 }
313 }
314
315 out:
316 return ret;
317 }
318
__set_pages_state(struct snp_psc_desc * data,unsigned long vaddr,unsigned long vaddr_end,int op)319 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
320 unsigned long vaddr_end, int op)
321 {
322 struct ghcb_state state;
323 bool use_large_entry;
324 struct psc_hdr *hdr;
325 struct psc_entry *e;
326 unsigned long flags;
327 unsigned long pfn;
328 struct ghcb *ghcb;
329 int i;
330
331 hdr = &data->hdr;
332 e = data->entries;
333
334 memset(data, 0, sizeof(*data));
335 i = 0;
336
337 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
338 hdr->end_entry = i;
339
340 if (is_vmalloc_addr((void *)vaddr)) {
341 pfn = vmalloc_to_pfn((void *)vaddr);
342 use_large_entry = false;
343 } else {
344 pfn = __pa(vaddr) >> PAGE_SHIFT;
345 use_large_entry = true;
346 }
347
348 e->gfn = pfn;
349 e->operation = op;
350
351 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
352 (vaddr_end - vaddr) >= PMD_SIZE) {
353 e->pagesize = RMP_PG_SIZE_2M;
354 vaddr += PMD_SIZE;
355 } else {
356 e->pagesize = RMP_PG_SIZE_4K;
357 vaddr += PAGE_SIZE;
358 }
359
360 e++;
361 i++;
362 }
363
364 /* Page validation must be rescinded before changing to shared */
365 if (op == SNP_PAGE_STATE_SHARED)
366 pvalidate_pages(data);
367
368 local_irq_save(flags);
369
370 if (sev_cfg.ghcbs_initialized)
371 ghcb = __sev_get_ghcb(&state);
372 else
373 ghcb = boot_ghcb;
374
375 /* Invoke the hypervisor to perform the page state changes */
376 if (!ghcb || vmgexit_psc(ghcb, data))
377 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
378
379 if (sev_cfg.ghcbs_initialized)
380 __sev_put_ghcb(&state);
381
382 local_irq_restore(flags);
383
384 /* Page validation must be performed after changing to private */
385 if (op == SNP_PAGE_STATE_PRIVATE)
386 pvalidate_pages(data);
387
388 return vaddr;
389 }
390
set_pages_state(unsigned long vaddr,unsigned long npages,int op)391 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
392 {
393 struct snp_psc_desc desc;
394 unsigned long vaddr_end;
395
396 /* Use the MSR protocol when a GHCB is not available. */
397 if (!boot_ghcb) {
398 struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() };
399
400 return early_set_pages_state(vaddr, __pa(vaddr), npages, &d);
401 }
402
403 vaddr = vaddr & PAGE_MASK;
404 vaddr_end = vaddr + (npages << PAGE_SHIFT);
405
406 while (vaddr < vaddr_end)
407 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
408 }
409
snp_set_memory_shared(unsigned long vaddr,unsigned long npages)410 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
411 {
412 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
413 return;
414
415 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
416 }
417
snp_set_memory_private(unsigned long vaddr,unsigned long npages)418 void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
419 {
420 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
421 return;
422
423 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
424 }
425
snp_accept_memory(phys_addr_t start,phys_addr_t end)426 void snp_accept_memory(phys_addr_t start, phys_addr_t end)
427 {
428 unsigned long vaddr, npages;
429
430 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
431 return;
432
433 vaddr = (unsigned long)__va(start);
434 npages = (end - start) >> PAGE_SHIFT;
435
436 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
437 }
438
vmgexit_ap_control(u64 event,struct sev_es_save_area * vmsa,u32 apic_id)439 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
440 {
441 bool create = event != SVM_VMGEXIT_AP_DESTROY;
442 struct ghcb_state state;
443 unsigned long flags;
444 struct ghcb *ghcb;
445 int ret = 0;
446
447 local_irq_save(flags);
448
449 ghcb = __sev_get_ghcb(&state);
450
451 vc_ghcb_invalidate(ghcb);
452
453 if (create)
454 ghcb_set_rax(ghcb, vmsa->sev_features);
455
456 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
457 ghcb_set_sw_exit_info_1(ghcb,
458 ((u64)apic_id << 32) |
459 ((u64)snp_vmpl << 16) |
460 event);
461 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
462
463 sev_es_wr_ghcb_msr(__pa(ghcb));
464 VMGEXIT();
465
466 if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
467 lower_32_bits(ghcb->save.sw_exit_info_1)) {
468 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
469 ret = -EINVAL;
470 }
471
472 __sev_put_ghcb(&state);
473
474 local_irq_restore(flags);
475
476 return ret;
477 }
478
snp_set_vmsa(void * va,void * caa,int apic_id,bool make_vmsa)479 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
480 {
481 int ret;
482
483 if (snp_vmpl) {
484 struct svsm_call call = {};
485 unsigned long flags;
486
487 local_irq_save(flags);
488
489 call.caa = this_cpu_read(svsm_caa);
490 call.rcx = __pa(va);
491
492 if (make_vmsa) {
493 /* Protocol 0, Call ID 2 */
494 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
495 call.rdx = __pa(caa);
496 call.r8 = apic_id;
497 } else {
498 /* Protocol 0, Call ID 3 */
499 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
500 }
501
502 ret = svsm_perform_call_protocol(&call);
503
504 local_irq_restore(flags);
505 } else {
506 /*
507 * If the kernel runs at VMPL0, it can change the VMSA
508 * bit for a page using the RMPADJUST instruction.
509 * However, for the instruction to succeed it must
510 * target the permissions of a lesser privileged (higher
511 * numbered) VMPL level, so use VMPL1.
512 */
513 u64 attrs = 1;
514
515 if (make_vmsa)
516 attrs |= RMPADJUST_VMSA_PAGE_BIT;
517
518 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
519 }
520
521 return ret;
522 }
523
snp_cleanup_vmsa(struct sev_es_save_area * vmsa,int apic_id)524 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
525 {
526 int err;
527
528 err = snp_set_vmsa(vmsa, NULL, apic_id, false);
529 if (err)
530 pr_err("clear VMSA page failed (%u), leaking page\n", err);
531 else
532 free_page((unsigned long)vmsa);
533 }
534
set_pte_enc(pte_t * kpte,int level,void * va)535 static void set_pte_enc(pte_t *kpte, int level, void *va)
536 {
537 struct pte_enc_desc d = {
538 .kpte = kpte,
539 .pte_level = level,
540 .va = va,
541 .encrypt = true
542 };
543
544 prepare_pte_enc(&d);
545 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
546 }
547
unshare_all_memory(void)548 static void unshare_all_memory(void)
549 {
550 unsigned long addr, end, size, ghcb;
551 struct sev_es_runtime_data *data;
552 unsigned int npages, level;
553 bool skipped_addr;
554 pte_t *pte;
555 int cpu;
556
557 /* Unshare the direct mapping. */
558 addr = PAGE_OFFSET;
559 end = PAGE_OFFSET + get_max_mapped();
560
561 while (addr < end) {
562 pte = lookup_address(addr, &level);
563 size = page_level_size(level);
564 npages = size / PAGE_SIZE;
565 skipped_addr = false;
566
567 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
568 addr += size;
569 continue;
570 }
571
572 /*
573 * Ensure that all the per-CPU GHCBs are made private at the
574 * end of the unsharing loop so that the switch to the slower
575 * MSR protocol happens last.
576 */
577 for_each_possible_cpu(cpu) {
578 data = per_cpu(runtime_data, cpu);
579 ghcb = (unsigned long)&data->ghcb_page;
580
581 /* Handle the case of a huge page containing the GHCB page */
582 if (addr <= ghcb && ghcb < addr + size) {
583 skipped_addr = true;
584 break;
585 }
586 }
587
588 if (!skipped_addr) {
589 set_pte_enc(pte, level, (void *)addr);
590 snp_set_memory_private(addr, npages);
591 }
592 addr += size;
593 }
594
595 /* Unshare all bss decrypted memory. */
596 addr = (unsigned long)__start_bss_decrypted;
597 end = (unsigned long)__start_bss_decrypted_unused;
598 npages = (end - addr) >> PAGE_SHIFT;
599
600 for (; addr < end; addr += PAGE_SIZE) {
601 pte = lookup_address(addr, &level);
602 if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
603 continue;
604
605 set_pte_enc(pte, level, (void *)addr);
606 }
607 addr = (unsigned long)__start_bss_decrypted;
608 snp_set_memory_private(addr, npages);
609
610 __flush_tlb_all();
611 }
612
613 /* Stop new private<->shared conversions */
snp_kexec_begin(void)614 void snp_kexec_begin(void)
615 {
616 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
617 return;
618
619 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
620 return;
621
622 /*
623 * Crash kernel ends up here with interrupts disabled: can't wait for
624 * conversions to finish.
625 *
626 * If race happened, just report and proceed.
627 */
628 if (!set_memory_enc_stop_conversion())
629 pr_warn("Failed to stop shared<->private conversions\n");
630 }
631
632 /*
633 * Shutdown all APs except the one handling kexec/kdump and clearing
634 * the VMSA tag on AP's VMSA pages as they are not being used as
635 * VMSA page anymore.
636 */
shutdown_all_aps(void)637 static void shutdown_all_aps(void)
638 {
639 struct sev_es_save_area *vmsa;
640 int apic_id, this_cpu, cpu;
641
642 this_cpu = get_cpu();
643
644 /*
645 * APs are already in HLT loop when enc_kexec_finish() callback
646 * is invoked.
647 */
648 for_each_present_cpu(cpu) {
649 vmsa = per_cpu(sev_vmsa, cpu);
650
651 /*
652 * The BSP or offlined APs do not have guest allocated VMSA
653 * and there is no need to clear the VMSA tag for this page.
654 */
655 if (!vmsa)
656 continue;
657
658 /*
659 * Cannot clear the VMSA tag for the currently running vCPU.
660 */
661 if (this_cpu == cpu) {
662 unsigned long pa;
663 struct page *p;
664
665 pa = __pa(vmsa);
666 /*
667 * Mark the VMSA page of the running vCPU as offline
668 * so that is excluded and not touched by makedumpfile
669 * while generating vmcore during kdump.
670 */
671 p = pfn_to_online_page(pa >> PAGE_SHIFT);
672 if (p)
673 __SetPageOffline(p);
674 continue;
675 }
676
677 apic_id = cpuid_to_apicid[cpu];
678
679 /*
680 * Issue AP destroy to ensure AP gets kicked out of guest mode
681 * to allow using RMPADJUST to remove the VMSA tag on it's
682 * VMSA page.
683 */
684 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
685 snp_cleanup_vmsa(vmsa, apic_id);
686 }
687
688 put_cpu();
689 }
690
snp_kexec_finish(void)691 void snp_kexec_finish(void)
692 {
693 struct sev_es_runtime_data *data;
694 unsigned long size, addr;
695 unsigned int level, cpu;
696 struct ghcb *ghcb;
697 pte_t *pte;
698
699 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
700 return;
701
702 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
703 return;
704
705 shutdown_all_aps();
706
707 unshare_all_memory();
708
709 /*
710 * Switch to using the MSR protocol to change per-CPU GHCBs to
711 * private. All the per-CPU GHCBs have been switched back to private,
712 * so can't do any more GHCB calls to the hypervisor beyond this point
713 * until the kexec'ed kernel starts running.
714 */
715 boot_ghcb = NULL;
716 sev_cfg.ghcbs_initialized = false;
717
718 for_each_possible_cpu(cpu) {
719 data = per_cpu(runtime_data, cpu);
720 ghcb = &data->ghcb_page;
721 pte = lookup_address((unsigned long)ghcb, &level);
722 size = page_level_size(level);
723 /* Handle the case of a huge page containing the GHCB page */
724 addr = (unsigned long)ghcb & page_level_mask(level);
725 set_pte_enc(pte, level, (void *)addr);
726 snp_set_memory_private(addr, (size / PAGE_SIZE));
727 }
728 }
729
730 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
731 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
732 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
733
734 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
735 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
736
snp_alloc_vmsa_page(int cpu)737 static void *snp_alloc_vmsa_page(int cpu)
738 {
739 struct page *p;
740
741 /*
742 * Allocate VMSA page to work around the SNP erratum where the CPU will
743 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
744 * collides with the RMP entry of VMSA page. The recommended workaround
745 * is to not use a large page.
746 *
747 * Allocate an 8k page which is also 8k-aligned.
748 */
749 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
750 if (!p)
751 return NULL;
752
753 split_page(p, 1);
754
755 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
756 __free_page(p);
757
758 return page_address(p + 1);
759 }
760
wakeup_cpu_via_vmgexit(u32 apic_id,unsigned long start_ip,unsigned int cpu)761 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
762 {
763 struct sev_es_save_area *cur_vmsa, *vmsa;
764 struct svsm_ca *caa;
765 u8 sipi_vector;
766 int ret;
767 u64 cr4;
768
769 /*
770 * The hypervisor SNP feature support check has happened earlier, just check
771 * the AP_CREATION one here.
772 */
773 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
774 return -EOPNOTSUPP;
775
776 /*
777 * Verify the desired start IP against the known trampoline start IP
778 * to catch any future new trampolines that may be introduced that
779 * would require a new protected guest entry point.
780 */
781 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
782 "Unsupported SNP start_ip: %lx\n", start_ip))
783 return -EINVAL;
784
785 /* Override start_ip with known protected guest start IP */
786 start_ip = real_mode_header->sev_es_trampoline_start;
787 cur_vmsa = per_cpu(sev_vmsa, cpu);
788
789 /*
790 * A new VMSA is created each time because there is no guarantee that
791 * the current VMSA is the kernels or that the vCPU is not running. If
792 * an attempt was done to use the current VMSA with a running vCPU, a
793 * #VMEXIT of that vCPU would wipe out all of the settings being done
794 * here.
795 */
796 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
797 if (!vmsa)
798 return -ENOMEM;
799
800 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
801 caa = per_cpu(svsm_caa, cpu);
802
803 /* CR4 should maintain the MCE value */
804 cr4 = native_read_cr4() & X86_CR4_MCE;
805
806 /* Set the CS value based on the start_ip converted to a SIPI vector */
807 sipi_vector = (start_ip >> 12);
808 vmsa->cs.base = sipi_vector << 12;
809 vmsa->cs.limit = AP_INIT_CS_LIMIT;
810 vmsa->cs.attrib = INIT_CS_ATTRIBS;
811 vmsa->cs.selector = sipi_vector << 8;
812
813 /* Set the RIP value based on start_ip */
814 vmsa->rip = start_ip & 0xfff;
815
816 /* Set AP INIT defaults as documented in the APM */
817 vmsa->ds.limit = AP_INIT_DS_LIMIT;
818 vmsa->ds.attrib = INIT_DS_ATTRIBS;
819 vmsa->es = vmsa->ds;
820 vmsa->fs = vmsa->ds;
821 vmsa->gs = vmsa->ds;
822 vmsa->ss = vmsa->ds;
823
824 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
825 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
826 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
827 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
828 vmsa->tr.limit = AP_INIT_TR_LIMIT;
829 vmsa->tr.attrib = INIT_TR_ATTRIBS;
830
831 vmsa->cr4 = cr4;
832 vmsa->cr0 = AP_INIT_CR0_DEFAULT;
833 vmsa->dr7 = DR7_RESET_VALUE;
834 vmsa->dr6 = AP_INIT_DR6_DEFAULT;
835 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
836 vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
837 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
838 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
839 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
840 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
841
842 if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
843 vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK;
844
845 /* SVME must be set. */
846 vmsa->efer = EFER_SVME;
847
848 /*
849 * Set the SNP-specific fields for this VMSA:
850 * VMPL level
851 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
852 */
853 vmsa->vmpl = snp_vmpl;
854 vmsa->sev_features = sev_status >> 2;
855
856 /* Populate AP's TSC scale/offset to get accurate TSC values. */
857 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
858 vmsa->tsc_scale = snp_tsc_scale;
859 vmsa->tsc_offset = snp_tsc_offset;
860 }
861
862 /* Switch the page over to a VMSA page now that it is initialized */
863 ret = snp_set_vmsa(vmsa, caa, apic_id, true);
864 if (ret) {
865 pr_err("set VMSA page failed (%u)\n", ret);
866 free_page((unsigned long)vmsa);
867
868 return -EINVAL;
869 }
870
871 /* Issue VMGEXIT AP Creation NAE event */
872 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
873 if (ret) {
874 snp_cleanup_vmsa(vmsa, apic_id);
875 vmsa = NULL;
876 }
877
878 /* Free up any previous VMSA page */
879 if (cur_vmsa)
880 snp_cleanup_vmsa(cur_vmsa, apic_id);
881
882 /* Record the current VMSA page */
883 per_cpu(sev_vmsa, cpu) = vmsa;
884
885 return ret;
886 }
887
snp_set_wakeup_secondary_cpu(void)888 void __init snp_set_wakeup_secondary_cpu(void)
889 {
890 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
891 return;
892
893 /*
894 * Always set this override if SNP is enabled. This makes it the
895 * required method to start APs under SNP. If the hypervisor does
896 * not support AP creation, then no APs will be started.
897 */
898 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
899 }
900
sev_es_setup_ap_jump_table(struct real_mode_header * rmh)901 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
902 {
903 u16 startup_cs, startup_ip;
904 phys_addr_t jump_table_pa;
905 u64 jump_table_addr;
906 u16 __iomem *jump_table;
907
908 jump_table_addr = get_jump_table_addr();
909
910 /* On UP guests there is no jump table so this is not a failure */
911 if (!jump_table_addr)
912 return 0;
913
914 /* Check if AP Jump Table is page-aligned */
915 if (jump_table_addr & ~PAGE_MASK)
916 return -EINVAL;
917
918 jump_table_pa = jump_table_addr & PAGE_MASK;
919
920 startup_cs = (u16)(rmh->trampoline_start >> 4);
921 startup_ip = (u16)(rmh->sev_es_trampoline_start -
922 rmh->trampoline_start);
923
924 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
925 if (!jump_table)
926 return -EIO;
927
928 writew(startup_ip, &jump_table[0]);
929 writew(startup_cs, &jump_table[1]);
930
931 iounmap(jump_table);
932
933 return 0;
934 }
935
936 /*
937 * This is needed by the OVMF UEFI firmware which will use whatever it finds in
938 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
939 * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
940 *
941 * When running under SVSM the CA page is needed too, so map it as well.
942 */
sev_es_efi_map_ghcbs_cas(pgd_t * pgd)943 int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
944 {
945 unsigned long address, pflags, pflags_enc;
946 struct sev_es_runtime_data *data;
947 int cpu;
948 u64 pfn;
949
950 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
951 return 0;
952
953 pflags = _PAGE_NX | _PAGE_RW;
954 pflags_enc = cc_mkenc(pflags);
955
956 for_each_possible_cpu(cpu) {
957 data = per_cpu(runtime_data, cpu);
958
959 address = __pa(&data->ghcb_page);
960 pfn = address >> PAGE_SHIFT;
961
962 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
963 return 1;
964
965 if (snp_vmpl) {
966 address = per_cpu(svsm_caa_pa, cpu);
967 if (!address)
968 return 1;
969
970 pfn = address >> PAGE_SHIFT;
971 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc))
972 return 1;
973 }
974 }
975
976 return 0;
977 }
978
savic_ghcb_msr_read(u32 reg)979 u64 savic_ghcb_msr_read(u32 reg)
980 {
981 u64 msr = APIC_BASE_MSR + (reg >> 4);
982 struct pt_regs regs = { .cx = msr };
983 struct es_em_ctxt ctxt = { .regs = ®s };
984 struct ghcb_state state;
985 enum es_result res;
986 struct ghcb *ghcb;
987
988 guard(irqsave)();
989
990 ghcb = __sev_get_ghcb(&state);
991 vc_ghcb_invalidate(ghcb);
992
993 res = __vc_handle_msr(ghcb, &ctxt, false);
994 if (res != ES_OK) {
995 pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res);
996 /* MSR read failures are treated as fatal errors */
997 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
998 }
999
1000 __sev_put_ghcb(&state);
1001
1002 return regs.ax | regs.dx << 32;
1003 }
1004
savic_ghcb_msr_write(u32 reg,u64 value)1005 void savic_ghcb_msr_write(u32 reg, u64 value)
1006 {
1007 u64 msr = APIC_BASE_MSR + (reg >> 4);
1008 struct pt_regs regs = {
1009 .cx = msr,
1010 .ax = lower_32_bits(value),
1011 .dx = upper_32_bits(value)
1012 };
1013 struct es_em_ctxt ctxt = { .regs = ®s };
1014 struct ghcb_state state;
1015 enum es_result res;
1016 struct ghcb *ghcb;
1017
1018 guard(irqsave)();
1019
1020 ghcb = __sev_get_ghcb(&state);
1021 vc_ghcb_invalidate(ghcb);
1022
1023 res = __vc_handle_msr(ghcb, &ctxt, true);
1024 if (res != ES_OK) {
1025 pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res);
1026 /* MSR writes should never fail. Any failure is fatal error for SNP guest */
1027 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
1028 }
1029
1030 __sev_put_ghcb(&state);
1031 }
1032
savic_register_gpa(u64 gpa)1033 enum es_result savic_register_gpa(u64 gpa)
1034 {
1035 struct ghcb_state state;
1036 struct es_em_ctxt ctxt;
1037 enum es_result res;
1038 struct ghcb *ghcb;
1039
1040 guard(irqsave)();
1041
1042 ghcb = __sev_get_ghcb(&state);
1043 vc_ghcb_invalidate(ghcb);
1044
1045 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
1046 ghcb_set_rbx(ghcb, gpa);
1047 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
1048 SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0);
1049
1050 __sev_put_ghcb(&state);
1051
1052 return res;
1053 }
1054
savic_unregister_gpa(u64 * gpa)1055 enum es_result savic_unregister_gpa(u64 *gpa)
1056 {
1057 struct ghcb_state state;
1058 struct es_em_ctxt ctxt;
1059 enum es_result res;
1060 struct ghcb *ghcb;
1061
1062 guard(irqsave)();
1063
1064 ghcb = __sev_get_ghcb(&state);
1065 vc_ghcb_invalidate(ghcb);
1066
1067 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
1068 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
1069 SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0);
1070 if (gpa && res == ES_OK)
1071 *gpa = ghcb->save.rbx;
1072
1073 __sev_put_ghcb(&state);
1074
1075 return res;
1076 }
1077
snp_register_per_cpu_ghcb(void)1078 static void snp_register_per_cpu_ghcb(void)
1079 {
1080 struct sev_es_runtime_data *data;
1081 struct ghcb *ghcb;
1082
1083 data = this_cpu_read(runtime_data);
1084 ghcb = &data->ghcb_page;
1085
1086 snp_register_ghcb_early(__pa(ghcb));
1087 }
1088
setup_ghcb(void)1089 void setup_ghcb(void)
1090 {
1091 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1092 return;
1093
1094 /*
1095 * Check whether the runtime #VC exception handler is active. It uses
1096 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
1097 *
1098 * If SNP is active, register the per-CPU GHCB page so that the runtime
1099 * exception handler can use it.
1100 */
1101 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
1102 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1103 snp_register_per_cpu_ghcb();
1104
1105 sev_cfg.ghcbs_initialized = true;
1106
1107 return;
1108 }
1109
1110 /*
1111 * Make sure the hypervisor talks a supported protocol.
1112 * This gets called only in the BSP boot phase.
1113 */
1114 if (!sev_es_negotiate_protocol())
1115 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1116
1117 /*
1118 * Clear the boot_ghcb. The first exception comes in before the bss
1119 * section is cleared.
1120 */
1121 memset(&boot_ghcb_page, 0, PAGE_SIZE);
1122
1123 /* Alright - Make the boot-ghcb public */
1124 boot_ghcb = &boot_ghcb_page;
1125
1126 /* SNP guest requires that GHCB GPA must be registered. */
1127 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1128 snp_register_ghcb_early(__pa(&boot_ghcb_page));
1129 }
1130
1131 #ifdef CONFIG_HOTPLUG_CPU
sev_es_ap_hlt_loop(void)1132 static void sev_es_ap_hlt_loop(void)
1133 {
1134 struct ghcb_state state;
1135 struct ghcb *ghcb;
1136
1137 ghcb = __sev_get_ghcb(&state);
1138
1139 while (true) {
1140 vc_ghcb_invalidate(ghcb);
1141 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
1142 ghcb_set_sw_exit_info_1(ghcb, 0);
1143 ghcb_set_sw_exit_info_2(ghcb, 0);
1144
1145 sev_es_wr_ghcb_msr(__pa(ghcb));
1146 VMGEXIT();
1147
1148 /* Wakeup signal? */
1149 if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
1150 ghcb->save.sw_exit_info_2)
1151 break;
1152 }
1153
1154 __sev_put_ghcb(&state);
1155 }
1156
1157 /*
1158 * Play_dead handler when running under SEV-ES. This is needed because
1159 * the hypervisor can't deliver an SIPI request to restart the AP.
1160 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
1161 * hypervisor wakes it up again.
1162 */
sev_es_play_dead(void)1163 static void sev_es_play_dead(void)
1164 {
1165 play_dead_common();
1166
1167 /* IRQs now disabled */
1168
1169 sev_es_ap_hlt_loop();
1170
1171 /*
1172 * If we get here, the VCPU was woken up again. Jump to CPU
1173 * startup code to get it back online.
1174 */
1175 soft_restart_cpu();
1176 }
1177 #else /* CONFIG_HOTPLUG_CPU */
1178 #define sev_es_play_dead native_play_dead
1179 #endif /* CONFIG_HOTPLUG_CPU */
1180
1181 #ifdef CONFIG_SMP
sev_es_setup_play_dead(void)1182 static void __init sev_es_setup_play_dead(void)
1183 {
1184 smp_ops.play_dead = sev_es_play_dead;
1185 }
1186 #else
sev_es_setup_play_dead(void)1187 static inline void sev_es_setup_play_dead(void) { }
1188 #endif
1189
alloc_runtime_data(int cpu)1190 static void __init alloc_runtime_data(int cpu)
1191 {
1192 struct sev_es_runtime_data *data;
1193
1194 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
1195 if (!data)
1196 panic("Can't allocate SEV-ES runtime data");
1197
1198 per_cpu(runtime_data, cpu) = data;
1199
1200 if (snp_vmpl) {
1201 struct svsm_ca *caa;
1202
1203 /* Allocate the SVSM CA page if an SVSM is present */
1204 caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE)
1205 : &boot_svsm_ca_page;
1206
1207 per_cpu(svsm_caa, cpu) = caa;
1208 per_cpu(svsm_caa_pa, cpu) = __pa(caa);
1209 }
1210 }
1211
init_ghcb(int cpu)1212 static void __init init_ghcb(int cpu)
1213 {
1214 struct sev_es_runtime_data *data;
1215 int err;
1216
1217 data = per_cpu(runtime_data, cpu);
1218
1219 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
1220 sizeof(data->ghcb_page));
1221 if (err)
1222 panic("Can't map GHCBs unencrypted");
1223
1224 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
1225
1226 data->ghcb_active = false;
1227 data->backup_ghcb_active = false;
1228 }
1229
sev_es_init_vc_handling(void)1230 void __init sev_es_init_vc_handling(void)
1231 {
1232 int cpu;
1233
1234 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
1235
1236 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1237 return;
1238
1239 if (!sev_es_check_cpu_features())
1240 panic("SEV-ES CPU Features missing");
1241
1242 /*
1243 * SNP is supported in v2 of the GHCB spec which mandates support for HV
1244 * features.
1245 */
1246 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
1247 sev_hv_features = get_hv_features();
1248
1249 if (!(sev_hv_features & GHCB_HV_FT_SNP))
1250 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
1251 }
1252
1253 /* Initialize per-cpu GHCB pages */
1254 for_each_possible_cpu(cpu) {
1255 alloc_runtime_data(cpu);
1256 init_ghcb(cpu);
1257 }
1258
1259 if (snp_vmpl)
1260 sev_cfg.use_cas = true;
1261
1262 sev_es_setup_play_dead();
1263
1264 /* Secondary CPUs use the runtime #VC handler */
1265 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
1266 }
1267
1268 /*
1269 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
1270 * enabled, as the alternative (fallback) logic for DMI probing in the legacy
1271 * ROM region can cause a crash since this region is not pre-validated.
1272 */
snp_dmi_setup(void)1273 void __init snp_dmi_setup(void)
1274 {
1275 if (efi_enabled(EFI_CONFIG_TABLES))
1276 dmi_setup();
1277 }
1278
dump_cpuid_table(void)1279 static void dump_cpuid_table(void)
1280 {
1281 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1282 int i = 0;
1283
1284 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
1285 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
1286
1287 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
1288 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
1289
1290 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
1291 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
1292 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
1293 }
1294 }
1295
1296 /*
1297 * It is useful from an auditing/testing perspective to provide an easy way
1298 * for the guest owner to know that the CPUID table has been initialized as
1299 * expected, but that initialization happens too early in boot to print any
1300 * sort of indicator, and there's not really any other good place to do it,
1301 * so do it here.
1302 *
1303 * If running as an SNP guest, report the current VM privilege level (VMPL).
1304 */
report_snp_info(void)1305 static int __init report_snp_info(void)
1306 {
1307 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1308
1309 if (cpuid_table->count) {
1310 pr_info("Using SNP CPUID table, %d entries present.\n",
1311 cpuid_table->count);
1312
1313 if (sev_cfg.debug)
1314 dump_cpuid_table();
1315 }
1316
1317 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1318 pr_info("SNP running at VMPL%u.\n", snp_vmpl);
1319
1320 return 0;
1321 }
1322 arch_initcall(report_snp_info);
1323
snp_issue_guest_request(struct snp_guest_req * req)1324 static int snp_issue_guest_request(struct snp_guest_req *req)
1325 {
1326 struct snp_req_data *input = &req->input;
1327 struct ghcb_state state;
1328 struct es_em_ctxt ctxt;
1329 unsigned long flags;
1330 struct ghcb *ghcb;
1331 int ret;
1332
1333 req->exitinfo2 = SEV_RET_NO_FW_CALL;
1334
1335 /*
1336 * __sev_get_ghcb() needs to run with IRQs disabled because it is using
1337 * a per-CPU GHCB.
1338 */
1339 local_irq_save(flags);
1340
1341 ghcb = __sev_get_ghcb(&state);
1342 if (!ghcb) {
1343 ret = -EIO;
1344 goto e_restore_irq;
1345 }
1346
1347 vc_ghcb_invalidate(ghcb);
1348
1349 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1350 ghcb_set_rax(ghcb, input->data_gpa);
1351 ghcb_set_rbx(ghcb, input->data_npages);
1352 }
1353
1354 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
1355 if (ret)
1356 goto e_put;
1357
1358 req->exitinfo2 = ghcb->save.sw_exit_info_2;
1359 switch (req->exitinfo2) {
1360 case 0:
1361 break;
1362
1363 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
1364 ret = -EAGAIN;
1365 break;
1366
1367 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
1368 /* Number of expected pages are returned in RBX */
1369 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1370 input->data_npages = ghcb_get_rbx(ghcb);
1371 ret = -ENOSPC;
1372 break;
1373 }
1374 fallthrough;
1375 default:
1376 ret = -EIO;
1377 break;
1378 }
1379
1380 e_put:
1381 __sev_put_ghcb(&state);
1382 e_restore_irq:
1383 local_irq_restore(flags);
1384
1385 return ret;
1386 }
1387
1388 static struct platform_device sev_guest_device = {
1389 .name = "sev-guest",
1390 .id = -1,
1391 };
1392
1393 static struct platform_device tpm_svsm_device = {
1394 .name = "tpm-svsm",
1395 .id = -1,
1396 };
1397
snp_init_platform_device(void)1398 static int __init snp_init_platform_device(void)
1399 {
1400 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1401 return -ENODEV;
1402
1403 if (platform_device_register(&sev_guest_device))
1404 return -ENODEV;
1405
1406 if (snp_svsm_vtpm_probe() &&
1407 platform_device_register(&tpm_svsm_device))
1408 return -ENODEV;
1409
1410 pr_info("SNP guest platform devices initialized.\n");
1411 return 0;
1412 }
1413 device_initcall(snp_init_platform_device);
1414
sev_show_status(void)1415 void sev_show_status(void)
1416 {
1417 int i;
1418
1419 pr_info("Status: ");
1420 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
1421 if (sev_status & BIT_ULL(i)) {
1422 if (!sev_status_feat_names[i])
1423 continue;
1424
1425 pr_cont("%s ", sev_status_feat_names[i]);
1426 }
1427 }
1428 pr_cont("\n");
1429 }
1430
1431 #ifdef CONFIG_SYSFS
vmpl_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1432 static ssize_t vmpl_show(struct kobject *kobj,
1433 struct kobj_attribute *attr, char *buf)
1434 {
1435 return sysfs_emit(buf, "%d\n", snp_vmpl);
1436 }
1437
1438 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
1439
1440 static struct attribute *vmpl_attrs[] = {
1441 &vmpl_attr.attr,
1442 NULL
1443 };
1444
1445 static struct attribute_group sev_attr_group = {
1446 .attrs = vmpl_attrs,
1447 };
1448
sev_sysfs_init(void)1449 static int __init sev_sysfs_init(void)
1450 {
1451 struct kobject *sev_kobj;
1452 struct device *dev_root;
1453 int ret;
1454
1455 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1456 return -ENODEV;
1457
1458 dev_root = bus_get_dev_root(&cpu_subsys);
1459 if (!dev_root)
1460 return -ENODEV;
1461
1462 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
1463 put_device(dev_root);
1464
1465 if (!sev_kobj)
1466 return -ENOMEM;
1467
1468 ret = sysfs_create_group(sev_kobj, &sev_attr_group);
1469 if (ret)
1470 kobject_put(sev_kobj);
1471
1472 return ret;
1473 }
1474 arch_initcall(sev_sysfs_init);
1475 #endif // CONFIG_SYSFS
1476
free_shared_pages(void * buf,size_t sz)1477 static void free_shared_pages(void *buf, size_t sz)
1478 {
1479 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1480 int ret;
1481
1482 if (!buf)
1483 return;
1484
1485 ret = set_memory_encrypted((unsigned long)buf, npages);
1486 if (ret) {
1487 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
1488 return;
1489 }
1490
1491 __free_pages(virt_to_page(buf), get_order(sz));
1492 }
1493
alloc_shared_pages(size_t sz)1494 static void *alloc_shared_pages(size_t sz)
1495 {
1496 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1497 struct page *page;
1498 int ret;
1499
1500 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
1501 if (!page)
1502 return NULL;
1503
1504 ret = set_memory_decrypted((unsigned long)page_address(page), npages);
1505 if (ret) {
1506 pr_err("failed to mark page shared, ret=%d\n", ret);
1507 __free_pages(page, get_order(sz));
1508 return NULL;
1509 }
1510
1511 return page_address(page);
1512 }
1513
get_vmpck(int id,struct snp_secrets_page * secrets,u32 ** seqno)1514 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
1515 {
1516 u8 *key = NULL;
1517
1518 switch (id) {
1519 case 0:
1520 *seqno = &secrets->os_area.msg_seqno_0;
1521 key = secrets->vmpck0;
1522 break;
1523 case 1:
1524 *seqno = &secrets->os_area.msg_seqno_1;
1525 key = secrets->vmpck1;
1526 break;
1527 case 2:
1528 *seqno = &secrets->os_area.msg_seqno_2;
1529 key = secrets->vmpck2;
1530 break;
1531 case 3:
1532 *seqno = &secrets->os_area.msg_seqno_3;
1533 key = secrets->vmpck3;
1534 break;
1535 default:
1536 break;
1537 }
1538
1539 return key;
1540 }
1541
snp_init_crypto(u8 * key,size_t keylen)1542 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
1543 {
1544 struct aesgcm_ctx *ctx;
1545
1546 ctx = kzalloc_obj(*ctx);
1547 if (!ctx)
1548 return NULL;
1549
1550 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
1551 pr_err("Crypto context initialization failed\n");
1552 kfree(ctx);
1553 return NULL;
1554 }
1555
1556 return ctx;
1557 }
1558
snp_msg_init(struct snp_msg_desc * mdesc,int vmpck_id)1559 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
1560 {
1561 /* Adjust the default VMPCK key based on the executing VMPL level */
1562 if (vmpck_id == -1)
1563 vmpck_id = snp_vmpl;
1564
1565 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
1566 if (!mdesc->vmpck) {
1567 pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
1568 return -EINVAL;
1569 }
1570
1571 /* Verify that VMPCK is not zero. */
1572 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1573 pr_err("Empty VMPCK%d communication key\n", vmpck_id);
1574 return -EINVAL;
1575 }
1576
1577 mdesc->vmpck_id = vmpck_id;
1578
1579 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
1580 if (!mdesc->ctx)
1581 return -ENOMEM;
1582
1583 return 0;
1584 }
1585 EXPORT_SYMBOL_GPL(snp_msg_init);
1586
snp_msg_alloc(void)1587 struct snp_msg_desc *snp_msg_alloc(void)
1588 {
1589 struct snp_msg_desc *mdesc;
1590 void __iomem *mem;
1591
1592 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);
1593
1594 mdesc = kzalloc_obj(struct snp_msg_desc);
1595 if (!mdesc)
1596 return ERR_PTR(-ENOMEM);
1597
1598 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
1599 if (!mem)
1600 goto e_free_mdesc;
1601
1602 mdesc->secrets = (__force struct snp_secrets_page *)mem;
1603
1604 /* Allocate the shared page used for the request and response message. */
1605 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
1606 if (!mdesc->request)
1607 goto e_unmap;
1608
1609 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
1610 if (!mdesc->response)
1611 goto e_free_request;
1612
1613 return mdesc;
1614
1615 e_free_request:
1616 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1617 e_unmap:
1618 iounmap(mem);
1619 e_free_mdesc:
1620 kfree(mdesc);
1621
1622 return ERR_PTR(-ENOMEM);
1623 }
1624 EXPORT_SYMBOL_GPL(snp_msg_alloc);
1625
snp_msg_free(struct snp_msg_desc * mdesc)1626 void snp_msg_free(struct snp_msg_desc *mdesc)
1627 {
1628 if (!mdesc)
1629 return;
1630
1631 kfree(mdesc->ctx);
1632 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
1633 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1634 iounmap((__force void __iomem *)mdesc->secrets);
1635
1636 kfree_sensitive(mdesc);
1637 }
1638 EXPORT_SYMBOL_GPL(snp_msg_free);
1639
1640 /* Mutex to serialize the shared buffer access and command handling. */
1641 static DEFINE_MUTEX(snp_cmd_mutex);
1642
1643 /*
1644 * If an error is received from the host or AMD Secure Processor (ASP) there
1645 * are two options. Either retry the exact same encrypted request or discontinue
1646 * using the VMPCK.
1647 *
1648 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
1649 * encrypt the requests. The IV for this scheme is the sequence number. GCM
1650 * cannot tolerate IV reuse.
1651 *
1652 * The ASP FW v1.51 only increments the sequence numbers on a successful
1653 * guest<->ASP back and forth and only accepts messages at its exact sequence
1654 * number.
1655 *
1656 * So if the sequence number were to be reused the encryption scheme is
1657 * vulnerable. If the sequence number were incremented for a fresh IV the ASP
1658 * will reject the request.
1659 */
snp_disable_vmpck(struct snp_msg_desc * mdesc)1660 static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
1661 {
1662 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
1663 mdesc->vmpck_id);
1664 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
1665 mdesc->vmpck = NULL;
1666 }
1667
__snp_get_msg_seqno(struct snp_msg_desc * mdesc)1668 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1669 {
1670 u64 count;
1671
1672 lockdep_assert_held(&snp_cmd_mutex);
1673
1674 /* Read the current message sequence counter from secrets pages */
1675 count = *mdesc->os_area_msg_seqno;
1676
1677 return count + 1;
1678 }
1679
1680 /* Return a non-zero on success */
snp_get_msg_seqno(struct snp_msg_desc * mdesc)1681 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1682 {
1683 u64 count = __snp_get_msg_seqno(mdesc);
1684
1685 /*
1686 * The message sequence counter for the SNP guest request is a 64-bit
1687 * value but the version 2 of GHCB specification defines a 32-bit storage
1688 * for it. If the counter exceeds the 32-bit value then return zero.
1689 * The caller should check the return value, but if the caller happens to
1690 * not check the value and use it, then the firmware treats zero as an
1691 * invalid number and will fail the message request.
1692 */
1693 if (count >= UINT_MAX) {
1694 pr_err("request message sequence counter overflow\n");
1695 return 0;
1696 }
1697
1698 return count;
1699 }
1700
snp_inc_msg_seqno(struct snp_msg_desc * mdesc)1701 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
1702 {
1703 /*
1704 * The counter is also incremented by the PSP, so increment it by 2
1705 * and save in secrets page.
1706 */
1707 *mdesc->os_area_msg_seqno += 2;
1708 }
1709
verify_and_dec_payload(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1710 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1711 {
1712 struct snp_guest_msg *resp_msg = &mdesc->secret_response;
1713 struct snp_guest_msg *req_msg = &mdesc->secret_request;
1714 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
1715 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
1716 struct aesgcm_ctx *ctx = mdesc->ctx;
1717 u8 iv[GCM_AES_IV_SIZE] = {};
1718
1719 pr_debug("response [seqno %lld type %d version %d sz %d]\n",
1720 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
1721 resp_msg_hdr->msg_sz);
1722
1723 /* Copy response from shared memory to encrypted memory. */
1724 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
1725
1726 /* Verify that the sequence counter is incremented by 1 */
1727 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
1728 return -EBADMSG;
1729
1730 /* Verify response message type and version number. */
1731 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
1732 resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
1733 return -EBADMSG;
1734
1735 /*
1736 * If the message size is greater than our buffer length then return
1737 * an error.
1738 */
1739 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
1740 return -EBADMSG;
1741
1742 /* Decrypt the payload */
1743 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
1744 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
1745 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
1746 return -EBADMSG;
1747
1748 return 0;
1749 }
1750
enc_payload(struct snp_msg_desc * mdesc,u64 seqno,struct snp_guest_req * req)1751 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
1752 {
1753 struct snp_guest_msg *msg = &mdesc->secret_request;
1754 struct snp_guest_msg_hdr *hdr = &msg->hdr;
1755 struct aesgcm_ctx *ctx = mdesc->ctx;
1756 u8 iv[GCM_AES_IV_SIZE] = {};
1757
1758 memset(msg, 0, sizeof(*msg));
1759
1760 hdr->algo = SNP_AEAD_AES_256_GCM;
1761 hdr->hdr_version = MSG_HDR_VER;
1762 hdr->hdr_sz = sizeof(*hdr);
1763 hdr->msg_type = req->msg_type;
1764 hdr->msg_version = req->msg_version;
1765 hdr->msg_seqno = seqno;
1766 hdr->msg_vmpck = req->vmpck_id;
1767 hdr->msg_sz = req->req_sz;
1768
1769 /* Verify the sequence number is non-zero */
1770 if (!hdr->msg_seqno)
1771 return -ENOSR;
1772
1773 pr_debug("request [seqno %lld type %d version %d sz %d]\n",
1774 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
1775
1776 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
1777 return -EBADMSG;
1778
1779 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
1780 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
1781 AAD_LEN, iv, hdr->authtag);
1782
1783 return 0;
1784 }
1785
__handle_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1786 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1787 {
1788 unsigned long req_start = jiffies;
1789 unsigned int override_npages = 0;
1790 u64 override_err = 0;
1791 int rc;
1792
1793 retry_request:
1794 /*
1795 * Call firmware to process the request. In this function the encrypted
1796 * message enters shared memory with the host. So after this call the
1797 * sequence number must be incremented or the VMPCK must be deleted to
1798 * prevent reuse of the IV.
1799 */
1800 rc = snp_issue_guest_request(req);
1801 switch (rc) {
1802 case -ENOSPC:
1803 /*
1804 * If the extended guest request fails due to having too
1805 * small of a certificate data buffer, retry the same
1806 * guest request without the extended data request in
1807 * order to increment the sequence number and thus avoid
1808 * IV reuse.
1809 */
1810 override_npages = req->input.data_npages;
1811 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
1812
1813 /*
1814 * Override the error to inform callers the given extended
1815 * request buffer size was too small and give the caller the
1816 * required buffer size.
1817 */
1818 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
1819
1820 /*
1821 * If this call to the firmware succeeds, the sequence number can
1822 * be incremented allowing for continued use of the VMPCK. If
1823 * there is an error reflected in the return value, this value
1824 * is checked further down and the result will be the deletion
1825 * of the VMPCK and the error code being propagated back to the
1826 * user as an ioctl() return code.
1827 */
1828 goto retry_request;
1829
1830 /*
1831 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
1832 * throttled. Retry in the driver to avoid returning and reusing the
1833 * message sequence number on a different message.
1834 */
1835 case -EAGAIN:
1836 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
1837 rc = -ETIMEDOUT;
1838 break;
1839 }
1840 schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
1841 goto retry_request;
1842 }
1843
1844 /*
1845 * Increment the message sequence number. There is no harm in doing
1846 * this now because decryption uses the value stored in the response
1847 * structure and any failure will wipe the VMPCK, preventing further
1848 * use anyway.
1849 */
1850 snp_inc_msg_seqno(mdesc);
1851
1852 if (override_err) {
1853 req->exitinfo2 = override_err;
1854
1855 /*
1856 * If an extended guest request was issued and the supplied certificate
1857 * buffer was not large enough, a standard guest request was issued to
1858 * prevent IV reuse. If the standard request was successful, return -EIO
1859 * back to the caller as would have originally been returned.
1860 */
1861 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
1862 rc = -EIO;
1863 }
1864
1865 if (override_npages)
1866 req->input.data_npages = override_npages;
1867
1868 return rc;
1869 }
1870
snp_send_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1871 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1872 {
1873 u64 seqno;
1874 int rc;
1875
1876 /*
1877 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW.
1878 * The offload's DMA SG list of data to encrypt has to be in linear mapping.
1879 */
1880 if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
1881 pr_warn("AES-GSM buffers must be in linear mapping");
1882 return -EINVAL;
1883 }
1884
1885 guard(mutex)(&snp_cmd_mutex);
1886
1887 /* Check if the VMPCK is not empty */
1888 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1889 pr_err_ratelimited("VMPCK is disabled\n");
1890 return -ENOTTY;
1891 }
1892
1893 /* Get message sequence and verify that its a non-zero */
1894 seqno = snp_get_msg_seqno(mdesc);
1895 if (!seqno)
1896 return -EIO;
1897
1898 /* Clear shared memory's response for the host to populate. */
1899 memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
1900
1901 /* Encrypt the userspace provided payload in mdesc->secret_request. */
1902 rc = enc_payload(mdesc, seqno, req);
1903 if (rc)
1904 return rc;
1905
1906 /*
1907 * Write the fully encrypted request to the shared unencrypted
1908 * request page.
1909 */
1910 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
1911
1912 /* Initialize the input address for guest request */
1913 req->input.req_gpa = __pa(mdesc->request);
1914 req->input.resp_gpa = __pa(mdesc->response);
1915 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
1916
1917 rc = __handle_guest_request(mdesc, req);
1918 if (rc) {
1919 if (rc == -EIO &&
1920 req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
1921 return rc;
1922
1923 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
1924 rc, req->exitinfo2);
1925
1926 snp_disable_vmpck(mdesc);
1927 return rc;
1928 }
1929
1930 rc = verify_and_dec_payload(mdesc, req);
1931 if (rc) {
1932 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
1933 snp_disable_vmpck(mdesc);
1934 return rc;
1935 }
1936
1937 return 0;
1938 }
1939 EXPORT_SYMBOL_GPL(snp_send_guest_request);
1940
snp_get_tsc_info(void)1941 static int __init snp_get_tsc_info(void)
1942 {
1943 struct snp_tsc_info_resp *tsc_resp;
1944 struct snp_tsc_info_req *tsc_req;
1945 struct snp_msg_desc *mdesc;
1946 struct snp_guest_req req = {};
1947 int rc = -ENOMEM;
1948
1949 tsc_req = kzalloc_obj(*tsc_req);
1950 if (!tsc_req)
1951 return rc;
1952
1953 /*
1954 * The intermediate response buffer is used while decrypting the
1955 * response payload. Make sure that it has enough space to cover
1956 * the authtag.
1957 */
1958 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
1959 if (!tsc_resp)
1960 goto e_free_tsc_req;
1961
1962 mdesc = snp_msg_alloc();
1963 if (IS_ERR_OR_NULL(mdesc))
1964 goto e_free_tsc_resp;
1965
1966 rc = snp_msg_init(mdesc, snp_vmpl);
1967 if (rc)
1968 goto e_free_mdesc;
1969
1970 req.msg_version = MSG_HDR_VER;
1971 req.msg_type = SNP_MSG_TSC_INFO_REQ;
1972 req.vmpck_id = snp_vmpl;
1973 req.req_buf = tsc_req;
1974 req.req_sz = sizeof(*tsc_req);
1975 req.resp_buf = (void *)tsc_resp;
1976 req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
1977 req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;
1978
1979 rc = snp_send_guest_request(mdesc, &req);
1980 if (rc)
1981 goto e_request;
1982
1983 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
1984 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
1985 tsc_resp->tsc_factor);
1986
1987 if (!tsc_resp->status) {
1988 snp_tsc_scale = tsc_resp->tsc_scale;
1989 snp_tsc_offset = tsc_resp->tsc_offset;
1990 } else {
1991 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
1992 rc = -EIO;
1993 }
1994
1995 e_request:
1996 /* The response buffer contains sensitive data, explicitly clear it. */
1997 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
1998 e_free_mdesc:
1999 snp_msg_free(mdesc);
2000 e_free_tsc_resp:
2001 kfree(tsc_resp);
2002 e_free_tsc_req:
2003 kfree(tsc_req);
2004
2005 return rc;
2006 }
2007
snp_secure_tsc_prepare(void)2008 void __init snp_secure_tsc_prepare(void)
2009 {
2010 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2011 return;
2012
2013 if (snp_get_tsc_info()) {
2014 pr_alert("Unable to retrieve Secure TSC info from ASP\n");
2015 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2016 }
2017
2018 pr_debug("SecureTSC enabled");
2019 }
2020
securetsc_get_tsc_khz(void)2021 static unsigned long securetsc_get_tsc_khz(void)
2022 {
2023 return snp_tsc_freq_khz;
2024 }
2025
snp_secure_tsc_init(void)2026 void __init snp_secure_tsc_init(void)
2027 {
2028 struct snp_secrets_page *secrets;
2029 unsigned long tsc_freq_mhz;
2030 void *mem;
2031
2032 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2033 return;
2034
2035 mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
2036 if (!mem) {
2037 pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
2038 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2039 }
2040
2041 secrets = (__force struct snp_secrets_page *)mem;
2042
2043 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
2044 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
2045
2046 /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
2047 tsc_freq_mhz &= GENMASK_ULL(17, 0);
2048
2049 snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
2050
2051 x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
2052 x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
2053
2054 early_memunmap(mem, PAGE_SIZE);
2055 }
2056