1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * AMD Memory Encryption Support
4 *
5 * Copyright (C) 2019 SUSE
6 *
7 * Author: Joerg Roedel <jroedel@suse.de>
8 */
9
10 #define pr_fmt(fmt) "SEV: " fmt
11
12 #include <linux/sched/debug.h> /* For show_regs() */
13 #include <linux/percpu-defs.h>
14 #include <linux/cc_platform.h>
15 #include <linux/printk.h>
16 #include <linux/mm_types.h>
17 #include <linux/set_memory.h>
18 #include <linux/memblock.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/cpumask.h>
22 #include <linux/efi.h>
23 #include <linux/platform_device.h>
24 #include <linux/io.h>
25 #include <linux/psp-sev.h>
26 #include <linux/dmi.h>
27 #include <uapi/linux/sev-guest.h>
28 #include <crypto/gcm.h>
29
30 #include <asm/init.h>
31 #include <asm/cpu_entry_area.h>
32 #include <asm/stacktrace.h>
33 #include <asm/sev.h>
34 #include <asm/insn-eval.h>
35 #include <asm/fpu/xcr.h>
36 #include <asm/processor.h>
37 #include <asm/realmode.h>
38 #include <asm/setup.h>
39 #include <asm/traps.h>
40 #include <asm/svm.h>
41 #include <asm/smp.h>
42 #include <asm/cpu.h>
43 #include <asm/apic.h>
44 #include <asm/cpuid/api.h>
45 #include <asm/cmdline.h>
46 #include <asm/msr.h>
47
48 #include "internal.h"
49
50 /* Bitmap of SEV features supported by the hypervisor */
51 u64 sev_hv_features __ro_after_init;
52 SYM_PIC_ALIAS(sev_hv_features);
53
54 /* Secrets page physical address from the CC blob */
55 u64 sev_secrets_pa __ro_after_init;
56 SYM_PIC_ALIAS(sev_secrets_pa);
57
58 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */
59 #define AP_INIT_CS_LIMIT 0xffff
60 #define AP_INIT_DS_LIMIT 0xffff
61 #define AP_INIT_LDTR_LIMIT 0xffff
62 #define AP_INIT_GDTR_LIMIT 0xffff
63 #define AP_INIT_IDTR_LIMIT 0xffff
64 #define AP_INIT_TR_LIMIT 0xffff
65 #define AP_INIT_RFLAGS_DEFAULT 0x2
66 #define AP_INIT_DR6_DEFAULT 0xffff0ff0
67 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
68 #define AP_INIT_XCR0_DEFAULT 0x1
69 #define AP_INIT_X87_FTW_DEFAULT 0x5555
70 #define AP_INIT_X87_FCW_DEFAULT 0x0040
71 #define AP_INIT_CR0_DEFAULT 0x60000010
72 #define AP_INIT_MXCSR_DEFAULT 0x1f80
73
74 static const char * const sev_status_feat_names[] = {
75 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
76 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
77 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
78 [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
79 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
80 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
81 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
82 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
83 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
84 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
85 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
86 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
87 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
88 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
89 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
90 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
91 [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC",
92 };
93
94 /*
95 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
96 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
97 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
98 */
99 static u64 snp_tsc_scale __ro_after_init;
100 static u64 snp_tsc_offset __ro_after_init;
101 static unsigned long snp_tsc_freq_khz __ro_after_init;
102
103 DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
104 DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
105
106 /*
107 * SVSM related information:
108 * When running under an SVSM, the VMPL that Linux is executing at must be
109 * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
110 */
111 u8 snp_vmpl __ro_after_init;
112 EXPORT_SYMBOL_GPL(snp_vmpl);
113 SYM_PIC_ALIAS(snp_vmpl);
114
115 /*
116 * Since feature negotiation related variables are set early in the boot
117 * process they must reside in the .data section so as not to be zeroed
118 * out when the .bss section is later cleared.
119 *
120 * GHCB protocol version negotiated with the hypervisor.
121 */
122 u16 ghcb_version __ro_after_init;
123 SYM_PIC_ALIAS(ghcb_version);
124
125 /* For early boot hypervisor communication in SEV-ES enabled guests */
126 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
127
128 /*
129 * Needs to be in the .data section because we need it NULL before bss is
130 * cleared
131 */
132 struct ghcb *boot_ghcb __section(".data");
133
get_snp_jump_table_addr(void)134 static u64 __init get_snp_jump_table_addr(void)
135 {
136 struct snp_secrets_page *secrets;
137 void __iomem *mem;
138 u64 addr;
139
140 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
141 if (!mem) {
142 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
143 return 0;
144 }
145
146 secrets = (__force struct snp_secrets_page *)mem;
147
148 addr = secrets->os_area.ap_jump_table_pa;
149 iounmap(mem);
150
151 return addr;
152 }
153
get_jump_table_addr(void)154 static u64 __init get_jump_table_addr(void)
155 {
156 struct ghcb_state state;
157 unsigned long flags;
158 struct ghcb *ghcb;
159 u64 ret = 0;
160
161 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
162 return get_snp_jump_table_addr();
163
164 local_irq_save(flags);
165
166 ghcb = __sev_get_ghcb(&state);
167
168 vc_ghcb_invalidate(ghcb);
169 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
170 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
171 ghcb_set_sw_exit_info_2(ghcb, 0);
172
173 sev_es_wr_ghcb_msr(__pa(ghcb));
174 VMGEXIT();
175
176 if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
177 ghcb_sw_exit_info_2_is_valid(ghcb))
178 ret = ghcb->save.sw_exit_info_2;
179
180 __sev_put_ghcb(&state);
181
182 local_irq_restore(flags);
183
184 return ret;
185 }
186
pval_pages(struct snp_psc_desc * desc)187 static void pval_pages(struct snp_psc_desc *desc)
188 {
189 struct psc_entry *e;
190 unsigned long vaddr;
191 unsigned int size;
192 unsigned int i;
193 bool validate;
194 u64 pfn;
195 int rc;
196
197 for (i = 0; i <= desc->hdr.end_entry; i++) {
198 e = &desc->entries[i];
199
200 pfn = e->gfn;
201 vaddr = (unsigned long)pfn_to_kaddr(pfn);
202 size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
203 validate = e->operation == SNP_PAGE_STATE_PRIVATE;
204
205 rc = pvalidate(vaddr, size, validate);
206 if (!rc)
207 continue;
208
209 if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
210 unsigned long vaddr_end = vaddr + PMD_SIZE;
211
212 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
213 rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
214 if (rc)
215 __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
216 }
217 } else {
218 __pval_terminate(pfn, validate, size, rc, 0);
219 }
220 }
221 }
222
pvalidate_pages(struct snp_psc_desc * desc)223 static void pvalidate_pages(struct snp_psc_desc *desc)
224 {
225 struct psc_entry *e;
226 unsigned int i;
227
228 if (snp_vmpl)
229 svsm_pval_pages(desc);
230 else
231 pval_pages(desc);
232
233 /*
234 * If not affected by the cache-coherency vulnerability there is no need
235 * to perform the cache eviction mitigation.
236 */
237 if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO))
238 return;
239
240 for (i = 0; i <= desc->hdr.end_entry; i++) {
241 e = &desc->entries[i];
242
243 /*
244 * If validating memory (making it private) perform the cache
245 * eviction mitigation.
246 */
247 if (e->operation == SNP_PAGE_STATE_PRIVATE)
248 sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1);
249 }
250 }
251
vmgexit_psc(struct ghcb * ghcb,struct snp_psc_desc * desc)252 static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
253 {
254 int cur_entry, end_entry, ret = 0;
255 struct snp_psc_desc *data;
256 struct es_em_ctxt ctxt;
257
258 vc_ghcb_invalidate(ghcb);
259
260 /* Copy the input desc into GHCB shared buffer */
261 data = (struct snp_psc_desc *)ghcb->shared_buffer;
262 memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
263
264 /*
265 * As per the GHCB specification, the hypervisor can resume the guest
266 * before processing all the entries. Check whether all the entries
267 * are processed. If not, then keep retrying. Note, the hypervisor
268 * will update the data memory directly to indicate the status, so
269 * reference the data->hdr everywhere.
270 *
271 * The strategy here is to wait for the hypervisor to change the page
272 * state in the RMP table before guest accesses the memory pages. If the
273 * page state change was not successful, then later memory access will
274 * result in a crash.
275 */
276 cur_entry = data->hdr.cur_entry;
277 end_entry = data->hdr.end_entry;
278
279 while (data->hdr.cur_entry <= data->hdr.end_entry) {
280 ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
281
282 /* This will advance the shared buffer data points to. */
283 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
284
285 /*
286 * Page State Change VMGEXIT can pass error code through
287 * exit_info_2.
288 */
289 if (WARN(ret || ghcb->save.sw_exit_info_2,
290 "SNP: PSC failed ret=%d exit_info_2=%llx\n",
291 ret, ghcb->save.sw_exit_info_2)) {
292 ret = 1;
293 goto out;
294 }
295
296 /* Verify that reserved bit is not set */
297 if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
298 ret = 1;
299 goto out;
300 }
301
302 /*
303 * Sanity check that entry processing is not going backwards.
304 * This will happen only if hypervisor is tricking us.
305 */
306 if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
307 "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
308 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
309 ret = 1;
310 goto out;
311 }
312 }
313
314 out:
315 return ret;
316 }
317
__set_pages_state(struct snp_psc_desc * data,unsigned long vaddr,unsigned long vaddr_end,int op)318 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
319 unsigned long vaddr_end, int op)
320 {
321 struct ghcb_state state;
322 bool use_large_entry;
323 struct psc_hdr *hdr;
324 struct psc_entry *e;
325 unsigned long flags;
326 unsigned long pfn;
327 struct ghcb *ghcb;
328 int i;
329
330 hdr = &data->hdr;
331 e = data->entries;
332
333 memset(data, 0, sizeof(*data));
334 i = 0;
335
336 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
337 hdr->end_entry = i;
338
339 if (is_vmalloc_addr((void *)vaddr)) {
340 pfn = vmalloc_to_pfn((void *)vaddr);
341 use_large_entry = false;
342 } else {
343 pfn = __pa(vaddr) >> PAGE_SHIFT;
344 use_large_entry = true;
345 }
346
347 e->gfn = pfn;
348 e->operation = op;
349
350 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
351 (vaddr_end - vaddr) >= PMD_SIZE) {
352 e->pagesize = RMP_PG_SIZE_2M;
353 vaddr += PMD_SIZE;
354 } else {
355 e->pagesize = RMP_PG_SIZE_4K;
356 vaddr += PAGE_SIZE;
357 }
358
359 e++;
360 i++;
361 }
362
363 /* Page validation must be rescinded before changing to shared */
364 if (op == SNP_PAGE_STATE_SHARED)
365 pvalidate_pages(data);
366
367 local_irq_save(flags);
368
369 if (sev_cfg.ghcbs_initialized)
370 ghcb = __sev_get_ghcb(&state);
371 else
372 ghcb = boot_ghcb;
373
374 /* Invoke the hypervisor to perform the page state changes */
375 if (!ghcb || vmgexit_psc(ghcb, data))
376 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
377
378 if (sev_cfg.ghcbs_initialized)
379 __sev_put_ghcb(&state);
380
381 local_irq_restore(flags);
382
383 /* Page validation must be performed after changing to private */
384 if (op == SNP_PAGE_STATE_PRIVATE)
385 pvalidate_pages(data);
386
387 return vaddr;
388 }
389
set_pages_state(unsigned long vaddr,unsigned long npages,int op)390 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
391 {
392 struct snp_psc_desc desc;
393 unsigned long vaddr_end;
394
395 /* Use the MSR protocol when a GHCB is not available. */
396 if (!boot_ghcb) {
397 struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() };
398
399 return early_set_pages_state(vaddr, __pa(vaddr), npages, &d);
400 }
401
402 vaddr = vaddr & PAGE_MASK;
403 vaddr_end = vaddr + (npages << PAGE_SHIFT);
404
405 while (vaddr < vaddr_end)
406 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
407 }
408
snp_set_memory_shared(unsigned long vaddr,unsigned long npages)409 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
410 {
411 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
412 return;
413
414 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
415 }
416
snp_set_memory_private(unsigned long vaddr,unsigned long npages)417 void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
418 {
419 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
420 return;
421
422 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
423 }
424
snp_accept_memory(phys_addr_t start,phys_addr_t end)425 void snp_accept_memory(phys_addr_t start, phys_addr_t end)
426 {
427 unsigned long vaddr, npages;
428
429 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
430 return;
431
432 vaddr = (unsigned long)__va(start);
433 npages = (end - start) >> PAGE_SHIFT;
434
435 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
436 }
437
vmgexit_ap_control(u64 event,struct sev_es_save_area * vmsa,u32 apic_id)438 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
439 {
440 bool create = event != SVM_VMGEXIT_AP_DESTROY;
441 struct ghcb_state state;
442 unsigned long flags;
443 struct ghcb *ghcb;
444 int ret = 0;
445
446 local_irq_save(flags);
447
448 ghcb = __sev_get_ghcb(&state);
449
450 vc_ghcb_invalidate(ghcb);
451
452 if (create)
453 ghcb_set_rax(ghcb, vmsa->sev_features);
454
455 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
456 ghcb_set_sw_exit_info_1(ghcb,
457 ((u64)apic_id << 32) |
458 ((u64)snp_vmpl << 16) |
459 event);
460 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
461
462 sev_es_wr_ghcb_msr(__pa(ghcb));
463 VMGEXIT();
464
465 if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
466 lower_32_bits(ghcb->save.sw_exit_info_1)) {
467 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
468 ret = -EINVAL;
469 }
470
471 __sev_put_ghcb(&state);
472
473 local_irq_restore(flags);
474
475 return ret;
476 }
477
snp_set_vmsa(void * va,void * caa,int apic_id,bool make_vmsa)478 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
479 {
480 int ret;
481
482 if (snp_vmpl) {
483 struct svsm_call call = {};
484 unsigned long flags;
485
486 local_irq_save(flags);
487
488 call.caa = this_cpu_read(svsm_caa);
489 call.rcx = __pa(va);
490
491 if (make_vmsa) {
492 /* Protocol 0, Call ID 2 */
493 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
494 call.rdx = __pa(caa);
495 call.r8 = apic_id;
496 } else {
497 /* Protocol 0, Call ID 3 */
498 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
499 }
500
501 ret = svsm_perform_call_protocol(&call);
502
503 local_irq_restore(flags);
504 } else {
505 /*
506 * If the kernel runs at VMPL0, it can change the VMSA
507 * bit for a page using the RMPADJUST instruction.
508 * However, for the instruction to succeed it must
509 * target the permissions of a lesser privileged (higher
510 * numbered) VMPL level, so use VMPL1.
511 */
512 u64 attrs = 1;
513
514 if (make_vmsa)
515 attrs |= RMPADJUST_VMSA_PAGE_BIT;
516
517 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
518 }
519
520 return ret;
521 }
522
snp_cleanup_vmsa(struct sev_es_save_area * vmsa,int apic_id)523 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
524 {
525 int err;
526
527 err = snp_set_vmsa(vmsa, NULL, apic_id, false);
528 if (err)
529 pr_err("clear VMSA page failed (%u), leaking page\n", err);
530 else
531 free_page((unsigned long)vmsa);
532 }
533
set_pte_enc(pte_t * kpte,int level,void * va)534 static void set_pte_enc(pte_t *kpte, int level, void *va)
535 {
536 struct pte_enc_desc d = {
537 .kpte = kpte,
538 .pte_level = level,
539 .va = va,
540 .encrypt = true
541 };
542
543 prepare_pte_enc(&d);
544 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
545 }
546
unshare_all_memory(void)547 static void unshare_all_memory(void)
548 {
549 unsigned long addr, end, size, ghcb;
550 struct sev_es_runtime_data *data;
551 unsigned int npages, level;
552 bool skipped_addr;
553 pte_t *pte;
554 int cpu;
555
556 /* Unshare the direct mapping. */
557 addr = PAGE_OFFSET;
558 end = PAGE_OFFSET + get_max_mapped();
559
560 while (addr < end) {
561 pte = lookup_address(addr, &level);
562 size = page_level_size(level);
563 npages = size / PAGE_SIZE;
564 skipped_addr = false;
565
566 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
567 addr += size;
568 continue;
569 }
570
571 /*
572 * Ensure that all the per-CPU GHCBs are made private at the
573 * end of the unsharing loop so that the switch to the slower
574 * MSR protocol happens last.
575 */
576 for_each_possible_cpu(cpu) {
577 data = per_cpu(runtime_data, cpu);
578 ghcb = (unsigned long)&data->ghcb_page;
579
580 /* Handle the case of a huge page containing the GHCB page */
581 if (addr <= ghcb && ghcb < addr + size) {
582 skipped_addr = true;
583 break;
584 }
585 }
586
587 if (!skipped_addr) {
588 set_pte_enc(pte, level, (void *)addr);
589 snp_set_memory_private(addr, npages);
590 }
591 addr += size;
592 }
593
594 /* Unshare all bss decrypted memory. */
595 addr = (unsigned long)__start_bss_decrypted;
596 end = (unsigned long)__start_bss_decrypted_unused;
597 npages = (end - addr) >> PAGE_SHIFT;
598
599 for (; addr < end; addr += PAGE_SIZE) {
600 pte = lookup_address(addr, &level);
601 if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
602 continue;
603
604 set_pte_enc(pte, level, (void *)addr);
605 }
606 addr = (unsigned long)__start_bss_decrypted;
607 snp_set_memory_private(addr, npages);
608
609 __flush_tlb_all();
610 }
611
612 /* Stop new private<->shared conversions */
snp_kexec_begin(void)613 void snp_kexec_begin(void)
614 {
615 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
616 return;
617
618 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
619 return;
620
621 /*
622 * Crash kernel ends up here with interrupts disabled: can't wait for
623 * conversions to finish.
624 *
625 * If race happened, just report and proceed.
626 */
627 if (!set_memory_enc_stop_conversion())
628 pr_warn("Failed to stop shared<->private conversions\n");
629 }
630
631 /*
632 * Shutdown all APs except the one handling kexec/kdump and clearing
633 * the VMSA tag on AP's VMSA pages as they are not being used as
634 * VMSA page anymore.
635 */
shutdown_all_aps(void)636 static void shutdown_all_aps(void)
637 {
638 struct sev_es_save_area *vmsa;
639 int apic_id, this_cpu, cpu;
640
641 this_cpu = get_cpu();
642
643 /*
644 * APs are already in HLT loop when enc_kexec_finish() callback
645 * is invoked.
646 */
647 for_each_present_cpu(cpu) {
648 vmsa = per_cpu(sev_vmsa, cpu);
649
650 /*
651 * The BSP or offlined APs do not have guest allocated VMSA
652 * and there is no need to clear the VMSA tag for this page.
653 */
654 if (!vmsa)
655 continue;
656
657 /*
658 * Cannot clear the VMSA tag for the currently running vCPU.
659 */
660 if (this_cpu == cpu) {
661 unsigned long pa;
662 struct page *p;
663
664 pa = __pa(vmsa);
665 /*
666 * Mark the VMSA page of the running vCPU as offline
667 * so that is excluded and not touched by makedumpfile
668 * while generating vmcore during kdump.
669 */
670 p = pfn_to_online_page(pa >> PAGE_SHIFT);
671 if (p)
672 __SetPageOffline(p);
673 continue;
674 }
675
676 apic_id = cpuid_to_apicid[cpu];
677
678 /*
679 * Issue AP destroy to ensure AP gets kicked out of guest mode
680 * to allow using RMPADJUST to remove the VMSA tag on it's
681 * VMSA page.
682 */
683 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
684 snp_cleanup_vmsa(vmsa, apic_id);
685 }
686
687 put_cpu();
688 }
689
snp_kexec_finish(void)690 void snp_kexec_finish(void)
691 {
692 struct sev_es_runtime_data *data;
693 unsigned long size, addr;
694 unsigned int level, cpu;
695 struct ghcb *ghcb;
696 pte_t *pte;
697
698 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
699 return;
700
701 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
702 return;
703
704 shutdown_all_aps();
705
706 unshare_all_memory();
707
708 /*
709 * Switch to using the MSR protocol to change per-CPU GHCBs to
710 * private. All the per-CPU GHCBs have been switched back to private,
711 * so can't do any more GHCB calls to the hypervisor beyond this point
712 * until the kexec'ed kernel starts running.
713 */
714 boot_ghcb = NULL;
715 sev_cfg.ghcbs_initialized = false;
716
717 for_each_possible_cpu(cpu) {
718 data = per_cpu(runtime_data, cpu);
719 ghcb = &data->ghcb_page;
720 pte = lookup_address((unsigned long)ghcb, &level);
721 size = page_level_size(level);
722 /* Handle the case of a huge page containing the GHCB page */
723 addr = (unsigned long)ghcb & page_level_mask(level);
724 set_pte_enc(pte, level, (void *)addr);
725 snp_set_memory_private(addr, (size / PAGE_SIZE));
726 }
727 }
728
729 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
730 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
731 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
732
733 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
734 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
735
snp_alloc_vmsa_page(int cpu)736 static void *snp_alloc_vmsa_page(int cpu)
737 {
738 struct page *p;
739
740 /*
741 * Allocate VMSA page to work around the SNP erratum where the CPU will
742 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
743 * collides with the RMP entry of VMSA page. The recommended workaround
744 * is to not use a large page.
745 *
746 * Allocate an 8k page which is also 8k-aligned.
747 */
748 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
749 if (!p)
750 return NULL;
751
752 split_page(p, 1);
753
754 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
755 __free_page(p);
756
757 return page_address(p + 1);
758 }
759
wakeup_cpu_via_vmgexit(u32 apic_id,unsigned long start_ip,unsigned int cpu)760 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
761 {
762 struct sev_es_save_area *cur_vmsa, *vmsa;
763 struct svsm_ca *caa;
764 u8 sipi_vector;
765 int ret;
766 u64 cr4;
767
768 /*
769 * The hypervisor SNP feature support check has happened earlier, just check
770 * the AP_CREATION one here.
771 */
772 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
773 return -EOPNOTSUPP;
774
775 /*
776 * Verify the desired start IP against the known trampoline start IP
777 * to catch any future new trampolines that may be introduced that
778 * would require a new protected guest entry point.
779 */
780 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
781 "Unsupported SNP start_ip: %lx\n", start_ip))
782 return -EINVAL;
783
784 /* Override start_ip with known protected guest start IP */
785 start_ip = real_mode_header->sev_es_trampoline_start;
786 cur_vmsa = per_cpu(sev_vmsa, cpu);
787
788 /*
789 * A new VMSA is created each time because there is no guarantee that
790 * the current VMSA is the kernels or that the vCPU is not running. If
791 * an attempt was done to use the current VMSA with a running vCPU, a
792 * #VMEXIT of that vCPU would wipe out all of the settings being done
793 * here.
794 */
795 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
796 if (!vmsa)
797 return -ENOMEM;
798
799 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
800 caa = per_cpu(svsm_caa, cpu);
801
802 /* CR4 should maintain the MCE value */
803 cr4 = native_read_cr4() & X86_CR4_MCE;
804
805 /* Set the CS value based on the start_ip converted to a SIPI vector */
806 sipi_vector = (start_ip >> 12);
807 vmsa->cs.base = sipi_vector << 12;
808 vmsa->cs.limit = AP_INIT_CS_LIMIT;
809 vmsa->cs.attrib = INIT_CS_ATTRIBS;
810 vmsa->cs.selector = sipi_vector << 8;
811
812 /* Set the RIP value based on start_ip */
813 vmsa->rip = start_ip & 0xfff;
814
815 /* Set AP INIT defaults as documented in the APM */
816 vmsa->ds.limit = AP_INIT_DS_LIMIT;
817 vmsa->ds.attrib = INIT_DS_ATTRIBS;
818 vmsa->es = vmsa->ds;
819 vmsa->fs = vmsa->ds;
820 vmsa->gs = vmsa->ds;
821 vmsa->ss = vmsa->ds;
822
823 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
824 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
825 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
826 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
827 vmsa->tr.limit = AP_INIT_TR_LIMIT;
828 vmsa->tr.attrib = INIT_TR_ATTRIBS;
829
830 vmsa->cr4 = cr4;
831 vmsa->cr0 = AP_INIT_CR0_DEFAULT;
832 vmsa->dr7 = DR7_RESET_VALUE;
833 vmsa->dr6 = AP_INIT_DR6_DEFAULT;
834 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
835 vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
836 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
837 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
838 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
839 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
840
841 if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
842 vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK;
843
844 /* SVME must be set. */
845 vmsa->efer = EFER_SVME;
846
847 /*
848 * Set the SNP-specific fields for this VMSA:
849 * VMPL level
850 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
851 */
852 vmsa->vmpl = snp_vmpl;
853 vmsa->sev_features = sev_status >> 2;
854
855 /* Populate AP's TSC scale/offset to get accurate TSC values. */
856 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
857 vmsa->tsc_scale = snp_tsc_scale;
858 vmsa->tsc_offset = snp_tsc_offset;
859 }
860
861 /* Switch the page over to a VMSA page now that it is initialized */
862 ret = snp_set_vmsa(vmsa, caa, apic_id, true);
863 if (ret) {
864 pr_err("set VMSA page failed (%u)\n", ret);
865 free_page((unsigned long)vmsa);
866
867 return -EINVAL;
868 }
869
870 /* Issue VMGEXIT AP Creation NAE event */
871 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
872 if (ret) {
873 snp_cleanup_vmsa(vmsa, apic_id);
874 vmsa = NULL;
875 }
876
877 /* Free up any previous VMSA page */
878 if (cur_vmsa)
879 snp_cleanup_vmsa(cur_vmsa, apic_id);
880
881 /* Record the current VMSA page */
882 per_cpu(sev_vmsa, cpu) = vmsa;
883
884 return ret;
885 }
886
snp_set_wakeup_secondary_cpu(void)887 void __init snp_set_wakeup_secondary_cpu(void)
888 {
889 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
890 return;
891
892 /*
893 * Always set this override if SNP is enabled. This makes it the
894 * required method to start APs under SNP. If the hypervisor does
895 * not support AP creation, then no APs will be started.
896 */
897 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
898 }
899
sev_es_setup_ap_jump_table(struct real_mode_header * rmh)900 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
901 {
902 u16 startup_cs, startup_ip;
903 phys_addr_t jump_table_pa;
904 u64 jump_table_addr;
905 u16 __iomem *jump_table;
906
907 jump_table_addr = get_jump_table_addr();
908
909 /* On UP guests there is no jump table so this is not a failure */
910 if (!jump_table_addr)
911 return 0;
912
913 /* Check if AP Jump Table is page-aligned */
914 if (jump_table_addr & ~PAGE_MASK)
915 return -EINVAL;
916
917 jump_table_pa = jump_table_addr & PAGE_MASK;
918
919 startup_cs = (u16)(rmh->trampoline_start >> 4);
920 startup_ip = (u16)(rmh->sev_es_trampoline_start -
921 rmh->trampoline_start);
922
923 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
924 if (!jump_table)
925 return -EIO;
926
927 writew(startup_ip, &jump_table[0]);
928 writew(startup_cs, &jump_table[1]);
929
930 iounmap(jump_table);
931
932 return 0;
933 }
934
935 /*
936 * This is needed by the OVMF UEFI firmware which will use whatever it finds in
937 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
938 * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
939 *
940 * When running under SVSM the CA page is needed too, so map it as well.
941 */
sev_es_efi_map_ghcbs_cas(pgd_t * pgd)942 int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
943 {
944 unsigned long address, pflags, pflags_enc;
945 struct sev_es_runtime_data *data;
946 int cpu;
947 u64 pfn;
948
949 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
950 return 0;
951
952 pflags = _PAGE_NX | _PAGE_RW;
953 pflags_enc = cc_mkenc(pflags);
954
955 for_each_possible_cpu(cpu) {
956 data = per_cpu(runtime_data, cpu);
957
958 address = __pa(&data->ghcb_page);
959 pfn = address >> PAGE_SHIFT;
960
961 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
962 return 1;
963
964 if (snp_vmpl) {
965 address = per_cpu(svsm_caa_pa, cpu);
966 if (!address)
967 return 1;
968
969 pfn = address >> PAGE_SHIFT;
970 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc))
971 return 1;
972 }
973 }
974
975 return 0;
976 }
977
savic_ghcb_msr_read(u32 reg)978 u64 savic_ghcb_msr_read(u32 reg)
979 {
980 u64 msr = APIC_BASE_MSR + (reg >> 4);
981 struct pt_regs regs = { .cx = msr };
982 struct es_em_ctxt ctxt = { .regs = ®s };
983 struct ghcb_state state;
984 enum es_result res;
985 struct ghcb *ghcb;
986
987 guard(irqsave)();
988
989 ghcb = __sev_get_ghcb(&state);
990 vc_ghcb_invalidate(ghcb);
991
992 res = __vc_handle_msr(ghcb, &ctxt, false);
993 if (res != ES_OK) {
994 pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res);
995 /* MSR read failures are treated as fatal errors */
996 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
997 }
998
999 __sev_put_ghcb(&state);
1000
1001 return regs.ax | regs.dx << 32;
1002 }
1003
savic_ghcb_msr_write(u32 reg,u64 value)1004 void savic_ghcb_msr_write(u32 reg, u64 value)
1005 {
1006 u64 msr = APIC_BASE_MSR + (reg >> 4);
1007 struct pt_regs regs = {
1008 .cx = msr,
1009 .ax = lower_32_bits(value),
1010 .dx = upper_32_bits(value)
1011 };
1012 struct es_em_ctxt ctxt = { .regs = ®s };
1013 struct ghcb_state state;
1014 enum es_result res;
1015 struct ghcb *ghcb;
1016
1017 guard(irqsave)();
1018
1019 ghcb = __sev_get_ghcb(&state);
1020 vc_ghcb_invalidate(ghcb);
1021
1022 res = __vc_handle_msr(ghcb, &ctxt, true);
1023 if (res != ES_OK) {
1024 pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res);
1025 /* MSR writes should never fail. Any failure is fatal error for SNP guest */
1026 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
1027 }
1028
1029 __sev_put_ghcb(&state);
1030 }
1031
savic_register_gpa(u64 gpa)1032 enum es_result savic_register_gpa(u64 gpa)
1033 {
1034 struct ghcb_state state;
1035 struct es_em_ctxt ctxt;
1036 enum es_result res;
1037 struct ghcb *ghcb;
1038
1039 guard(irqsave)();
1040
1041 ghcb = __sev_get_ghcb(&state);
1042 vc_ghcb_invalidate(ghcb);
1043
1044 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
1045 ghcb_set_rbx(ghcb, gpa);
1046 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
1047 SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0);
1048
1049 __sev_put_ghcb(&state);
1050
1051 return res;
1052 }
1053
savic_unregister_gpa(u64 * gpa)1054 enum es_result savic_unregister_gpa(u64 *gpa)
1055 {
1056 struct ghcb_state state;
1057 struct es_em_ctxt ctxt;
1058 enum es_result res;
1059 struct ghcb *ghcb;
1060
1061 guard(irqsave)();
1062
1063 ghcb = __sev_get_ghcb(&state);
1064 vc_ghcb_invalidate(ghcb);
1065
1066 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
1067 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
1068 SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0);
1069 if (gpa && res == ES_OK)
1070 *gpa = ghcb->save.rbx;
1071
1072 __sev_put_ghcb(&state);
1073
1074 return res;
1075 }
1076
snp_register_per_cpu_ghcb(void)1077 static void snp_register_per_cpu_ghcb(void)
1078 {
1079 struct sev_es_runtime_data *data;
1080 struct ghcb *ghcb;
1081
1082 data = this_cpu_read(runtime_data);
1083 ghcb = &data->ghcb_page;
1084
1085 snp_register_ghcb_early(__pa(ghcb));
1086 }
1087
setup_ghcb(void)1088 void setup_ghcb(void)
1089 {
1090 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1091 return;
1092
1093 /*
1094 * Check whether the runtime #VC exception handler is active. It uses
1095 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
1096 *
1097 * If SNP is active, register the per-CPU GHCB page so that the runtime
1098 * exception handler can use it.
1099 */
1100 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
1101 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1102 snp_register_per_cpu_ghcb();
1103
1104 sev_cfg.ghcbs_initialized = true;
1105
1106 return;
1107 }
1108
1109 /*
1110 * Make sure the hypervisor talks a supported protocol.
1111 * This gets called only in the BSP boot phase.
1112 */
1113 if (!sev_es_negotiate_protocol())
1114 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1115
1116 /*
1117 * Clear the boot_ghcb. The first exception comes in before the bss
1118 * section is cleared.
1119 */
1120 memset(&boot_ghcb_page, 0, PAGE_SIZE);
1121
1122 /* Alright - Make the boot-ghcb public */
1123 boot_ghcb = &boot_ghcb_page;
1124
1125 /* SNP guest requires that GHCB GPA must be registered. */
1126 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1127 snp_register_ghcb_early(__pa(&boot_ghcb_page));
1128 }
1129
1130 #ifdef CONFIG_HOTPLUG_CPU
sev_es_ap_hlt_loop(void)1131 static void sev_es_ap_hlt_loop(void)
1132 {
1133 struct ghcb_state state;
1134 struct ghcb *ghcb;
1135
1136 ghcb = __sev_get_ghcb(&state);
1137
1138 while (true) {
1139 vc_ghcb_invalidate(ghcb);
1140 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
1141 ghcb_set_sw_exit_info_1(ghcb, 0);
1142 ghcb_set_sw_exit_info_2(ghcb, 0);
1143
1144 sev_es_wr_ghcb_msr(__pa(ghcb));
1145 VMGEXIT();
1146
1147 /* Wakeup signal? */
1148 if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
1149 ghcb->save.sw_exit_info_2)
1150 break;
1151 }
1152
1153 __sev_put_ghcb(&state);
1154 }
1155
1156 /*
1157 * Play_dead handler when running under SEV-ES. This is needed because
1158 * the hypervisor can't deliver an SIPI request to restart the AP.
1159 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
1160 * hypervisor wakes it up again.
1161 */
sev_es_play_dead(void)1162 static void sev_es_play_dead(void)
1163 {
1164 play_dead_common();
1165
1166 /* IRQs now disabled */
1167
1168 sev_es_ap_hlt_loop();
1169
1170 /*
1171 * If we get here, the VCPU was woken up again. Jump to CPU
1172 * startup code to get it back online.
1173 */
1174 soft_restart_cpu();
1175 }
1176 #else /* CONFIG_HOTPLUG_CPU */
1177 #define sev_es_play_dead native_play_dead
1178 #endif /* CONFIG_HOTPLUG_CPU */
1179
1180 #ifdef CONFIG_SMP
sev_es_setup_play_dead(void)1181 static void __init sev_es_setup_play_dead(void)
1182 {
1183 smp_ops.play_dead = sev_es_play_dead;
1184 }
1185 #else
sev_es_setup_play_dead(void)1186 static inline void sev_es_setup_play_dead(void) { }
1187 #endif
1188
alloc_runtime_data(int cpu)1189 static void __init alloc_runtime_data(int cpu)
1190 {
1191 struct sev_es_runtime_data *data;
1192
1193 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
1194 if (!data)
1195 panic("Can't allocate SEV-ES runtime data");
1196
1197 per_cpu(runtime_data, cpu) = data;
1198
1199 if (snp_vmpl) {
1200 struct svsm_ca *caa;
1201
1202 /* Allocate the SVSM CA page if an SVSM is present */
1203 caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE)
1204 : &boot_svsm_ca_page;
1205
1206 per_cpu(svsm_caa, cpu) = caa;
1207 per_cpu(svsm_caa_pa, cpu) = __pa(caa);
1208 }
1209 }
1210
init_ghcb(int cpu)1211 static void __init init_ghcb(int cpu)
1212 {
1213 struct sev_es_runtime_data *data;
1214 int err;
1215
1216 data = per_cpu(runtime_data, cpu);
1217
1218 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
1219 sizeof(data->ghcb_page));
1220 if (err)
1221 panic("Can't map GHCBs unencrypted");
1222
1223 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
1224
1225 data->ghcb_active = false;
1226 data->backup_ghcb_active = false;
1227 }
1228
sev_es_init_vc_handling(void)1229 void __init sev_es_init_vc_handling(void)
1230 {
1231 int cpu;
1232
1233 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
1234
1235 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1236 return;
1237
1238 if (!sev_es_check_cpu_features())
1239 panic("SEV-ES CPU Features missing");
1240
1241 /*
1242 * SNP is supported in v2 of the GHCB spec which mandates support for HV
1243 * features.
1244 */
1245 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
1246 sev_hv_features = get_hv_features();
1247
1248 if (!(sev_hv_features & GHCB_HV_FT_SNP))
1249 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
1250 }
1251
1252 /* Initialize per-cpu GHCB pages */
1253 for_each_possible_cpu(cpu) {
1254 alloc_runtime_data(cpu);
1255 init_ghcb(cpu);
1256 }
1257
1258 if (snp_vmpl)
1259 sev_cfg.use_cas = true;
1260
1261 sev_es_setup_play_dead();
1262
1263 /* Secondary CPUs use the runtime #VC handler */
1264 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
1265 }
1266
1267 /*
1268 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
1269 * enabled, as the alternative (fallback) logic for DMI probing in the legacy
1270 * ROM region can cause a crash since this region is not pre-validated.
1271 */
snp_dmi_setup(void)1272 void __init snp_dmi_setup(void)
1273 {
1274 if (efi_enabled(EFI_CONFIG_TABLES))
1275 dmi_setup();
1276 }
1277
dump_cpuid_table(void)1278 static void dump_cpuid_table(void)
1279 {
1280 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1281 int i = 0;
1282
1283 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
1284 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
1285
1286 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
1287 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
1288
1289 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
1290 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
1291 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
1292 }
1293 }
1294
1295 /*
1296 * It is useful from an auditing/testing perspective to provide an easy way
1297 * for the guest owner to know that the CPUID table has been initialized as
1298 * expected, but that initialization happens too early in boot to print any
1299 * sort of indicator, and there's not really any other good place to do it,
1300 * so do it here.
1301 *
1302 * If running as an SNP guest, report the current VM privilege level (VMPL).
1303 */
report_snp_info(void)1304 static int __init report_snp_info(void)
1305 {
1306 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1307
1308 if (cpuid_table->count) {
1309 pr_info("Using SNP CPUID table, %d entries present.\n",
1310 cpuid_table->count);
1311
1312 if (sev_cfg.debug)
1313 dump_cpuid_table();
1314 }
1315
1316 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1317 pr_info("SNP running at VMPL%u.\n", snp_vmpl);
1318
1319 return 0;
1320 }
1321 arch_initcall(report_snp_info);
1322
snp_issue_guest_request(struct snp_guest_req * req)1323 static int snp_issue_guest_request(struct snp_guest_req *req)
1324 {
1325 struct snp_req_data *input = &req->input;
1326 struct ghcb_state state;
1327 struct es_em_ctxt ctxt;
1328 unsigned long flags;
1329 struct ghcb *ghcb;
1330 int ret;
1331
1332 req->exitinfo2 = SEV_RET_NO_FW_CALL;
1333
1334 /*
1335 * __sev_get_ghcb() needs to run with IRQs disabled because it is using
1336 * a per-CPU GHCB.
1337 */
1338 local_irq_save(flags);
1339
1340 ghcb = __sev_get_ghcb(&state);
1341 if (!ghcb) {
1342 ret = -EIO;
1343 goto e_restore_irq;
1344 }
1345
1346 vc_ghcb_invalidate(ghcb);
1347
1348 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1349 ghcb_set_rax(ghcb, input->data_gpa);
1350 ghcb_set_rbx(ghcb, input->data_npages);
1351 }
1352
1353 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
1354 if (ret)
1355 goto e_put;
1356
1357 req->exitinfo2 = ghcb->save.sw_exit_info_2;
1358 switch (req->exitinfo2) {
1359 case 0:
1360 break;
1361
1362 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
1363 ret = -EAGAIN;
1364 break;
1365
1366 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
1367 /* Number of expected pages are returned in RBX */
1368 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1369 input->data_npages = ghcb_get_rbx(ghcb);
1370 ret = -ENOSPC;
1371 break;
1372 }
1373 fallthrough;
1374 default:
1375 ret = -EIO;
1376 break;
1377 }
1378
1379 e_put:
1380 __sev_put_ghcb(&state);
1381 e_restore_irq:
1382 local_irq_restore(flags);
1383
1384 return ret;
1385 }
1386
1387 static struct platform_device sev_guest_device = {
1388 .name = "sev-guest",
1389 .id = -1,
1390 };
1391
1392 static struct platform_device tpm_svsm_device = {
1393 .name = "tpm-svsm",
1394 .id = -1,
1395 };
1396
snp_init_platform_device(void)1397 static int __init snp_init_platform_device(void)
1398 {
1399 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1400 return -ENODEV;
1401
1402 if (platform_device_register(&sev_guest_device))
1403 return -ENODEV;
1404
1405 if (snp_svsm_vtpm_probe() &&
1406 platform_device_register(&tpm_svsm_device))
1407 return -ENODEV;
1408
1409 pr_info("SNP guest platform devices initialized.\n");
1410 return 0;
1411 }
1412 device_initcall(snp_init_platform_device);
1413
sev_show_status(void)1414 void sev_show_status(void)
1415 {
1416 int i;
1417
1418 pr_info("Status: ");
1419 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
1420 if (sev_status & BIT_ULL(i)) {
1421 if (!sev_status_feat_names[i])
1422 continue;
1423
1424 pr_cont("%s ", sev_status_feat_names[i]);
1425 }
1426 }
1427 pr_cont("\n");
1428 }
1429
1430 #ifdef CONFIG_SYSFS
vmpl_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1431 static ssize_t vmpl_show(struct kobject *kobj,
1432 struct kobj_attribute *attr, char *buf)
1433 {
1434 return sysfs_emit(buf, "%d\n", snp_vmpl);
1435 }
1436
1437 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
1438
1439 static struct attribute *vmpl_attrs[] = {
1440 &vmpl_attr.attr,
1441 NULL
1442 };
1443
1444 static struct attribute_group sev_attr_group = {
1445 .attrs = vmpl_attrs,
1446 };
1447
sev_sysfs_init(void)1448 static int __init sev_sysfs_init(void)
1449 {
1450 struct kobject *sev_kobj;
1451 struct device *dev_root;
1452 int ret;
1453
1454 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1455 return -ENODEV;
1456
1457 dev_root = bus_get_dev_root(&cpu_subsys);
1458 if (!dev_root)
1459 return -ENODEV;
1460
1461 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
1462 put_device(dev_root);
1463
1464 if (!sev_kobj)
1465 return -ENOMEM;
1466
1467 ret = sysfs_create_group(sev_kobj, &sev_attr_group);
1468 if (ret)
1469 kobject_put(sev_kobj);
1470
1471 return ret;
1472 }
1473 arch_initcall(sev_sysfs_init);
1474 #endif // CONFIG_SYSFS
1475
free_shared_pages(void * buf,size_t sz)1476 static void free_shared_pages(void *buf, size_t sz)
1477 {
1478 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1479 int ret;
1480
1481 if (!buf)
1482 return;
1483
1484 ret = set_memory_encrypted((unsigned long)buf, npages);
1485 if (ret) {
1486 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
1487 return;
1488 }
1489
1490 __free_pages(virt_to_page(buf), get_order(sz));
1491 }
1492
alloc_shared_pages(size_t sz)1493 static void *alloc_shared_pages(size_t sz)
1494 {
1495 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1496 struct page *page;
1497 int ret;
1498
1499 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
1500 if (!page)
1501 return NULL;
1502
1503 ret = set_memory_decrypted((unsigned long)page_address(page), npages);
1504 if (ret) {
1505 pr_err("failed to mark page shared, ret=%d\n", ret);
1506 __free_pages(page, get_order(sz));
1507 return NULL;
1508 }
1509
1510 return page_address(page);
1511 }
1512
get_vmpck(int id,struct snp_secrets_page * secrets,u32 ** seqno)1513 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
1514 {
1515 u8 *key = NULL;
1516
1517 switch (id) {
1518 case 0:
1519 *seqno = &secrets->os_area.msg_seqno_0;
1520 key = secrets->vmpck0;
1521 break;
1522 case 1:
1523 *seqno = &secrets->os_area.msg_seqno_1;
1524 key = secrets->vmpck1;
1525 break;
1526 case 2:
1527 *seqno = &secrets->os_area.msg_seqno_2;
1528 key = secrets->vmpck2;
1529 break;
1530 case 3:
1531 *seqno = &secrets->os_area.msg_seqno_3;
1532 key = secrets->vmpck3;
1533 break;
1534 default:
1535 break;
1536 }
1537
1538 return key;
1539 }
1540
snp_init_crypto(u8 * key,size_t keylen)1541 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
1542 {
1543 struct aesgcm_ctx *ctx;
1544
1545 ctx = kzalloc_obj(*ctx);
1546 if (!ctx)
1547 return NULL;
1548
1549 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
1550 pr_err("Crypto context initialization failed\n");
1551 kfree(ctx);
1552 return NULL;
1553 }
1554
1555 return ctx;
1556 }
1557
snp_msg_init(struct snp_msg_desc * mdesc,int vmpck_id)1558 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
1559 {
1560 /* Adjust the default VMPCK key based on the executing VMPL level */
1561 if (vmpck_id == -1)
1562 vmpck_id = snp_vmpl;
1563
1564 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
1565 if (!mdesc->vmpck) {
1566 pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
1567 return -EINVAL;
1568 }
1569
1570 /* Verify that VMPCK is not zero. */
1571 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1572 pr_err("Empty VMPCK%d communication key\n", vmpck_id);
1573 return -EINVAL;
1574 }
1575
1576 mdesc->vmpck_id = vmpck_id;
1577
1578 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
1579 if (!mdesc->ctx)
1580 return -ENOMEM;
1581
1582 return 0;
1583 }
1584 EXPORT_SYMBOL_GPL(snp_msg_init);
1585
snp_msg_alloc(void)1586 struct snp_msg_desc *snp_msg_alloc(void)
1587 {
1588 struct snp_msg_desc *mdesc;
1589 void __iomem *mem;
1590
1591 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);
1592
1593 mdesc = kzalloc_obj(struct snp_msg_desc);
1594 if (!mdesc)
1595 return ERR_PTR(-ENOMEM);
1596
1597 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
1598 if (!mem)
1599 goto e_free_mdesc;
1600
1601 mdesc->secrets = (__force struct snp_secrets_page *)mem;
1602
1603 /* Allocate the shared page used for the request and response message. */
1604 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
1605 if (!mdesc->request)
1606 goto e_unmap;
1607
1608 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
1609 if (!mdesc->response)
1610 goto e_free_request;
1611
1612 return mdesc;
1613
1614 e_free_request:
1615 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1616 e_unmap:
1617 iounmap(mem);
1618 e_free_mdesc:
1619 kfree(mdesc);
1620
1621 return ERR_PTR(-ENOMEM);
1622 }
1623 EXPORT_SYMBOL_GPL(snp_msg_alloc);
1624
snp_msg_free(struct snp_msg_desc * mdesc)1625 void snp_msg_free(struct snp_msg_desc *mdesc)
1626 {
1627 if (!mdesc)
1628 return;
1629
1630 kfree(mdesc->ctx);
1631 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
1632 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1633 iounmap((__force void __iomem *)mdesc->secrets);
1634
1635 kfree_sensitive(mdesc);
1636 }
1637 EXPORT_SYMBOL_GPL(snp_msg_free);
1638
1639 /* Mutex to serialize the shared buffer access and command handling. */
1640 static DEFINE_MUTEX(snp_cmd_mutex);
1641
1642 /*
1643 * If an error is received from the host or AMD Secure Processor (ASP) there
1644 * are two options. Either retry the exact same encrypted request or discontinue
1645 * using the VMPCK.
1646 *
1647 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
1648 * encrypt the requests. The IV for this scheme is the sequence number. GCM
1649 * cannot tolerate IV reuse.
1650 *
1651 * The ASP FW v1.51 only increments the sequence numbers on a successful
1652 * guest<->ASP back and forth and only accepts messages at its exact sequence
1653 * number.
1654 *
1655 * So if the sequence number were to be reused the encryption scheme is
1656 * vulnerable. If the sequence number were incremented for a fresh IV the ASP
1657 * will reject the request.
1658 */
snp_disable_vmpck(struct snp_msg_desc * mdesc)1659 static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
1660 {
1661 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
1662 mdesc->vmpck_id);
1663 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
1664 mdesc->vmpck = NULL;
1665 }
1666
__snp_get_msg_seqno(struct snp_msg_desc * mdesc)1667 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1668 {
1669 u64 count;
1670
1671 lockdep_assert_held(&snp_cmd_mutex);
1672
1673 /* Read the current message sequence counter from secrets pages */
1674 count = *mdesc->os_area_msg_seqno;
1675
1676 return count + 1;
1677 }
1678
1679 /* Return a non-zero on success */
snp_get_msg_seqno(struct snp_msg_desc * mdesc)1680 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1681 {
1682 u64 count = __snp_get_msg_seqno(mdesc);
1683
1684 /*
1685 * The message sequence counter for the SNP guest request is a 64-bit
1686 * value but the version 2 of GHCB specification defines a 32-bit storage
1687 * for it. If the counter exceeds the 32-bit value then return zero.
1688 * The caller should check the return value, but if the caller happens to
1689 * not check the value and use it, then the firmware treats zero as an
1690 * invalid number and will fail the message request.
1691 */
1692 if (count >= UINT_MAX) {
1693 pr_err("request message sequence counter overflow\n");
1694 return 0;
1695 }
1696
1697 return count;
1698 }
1699
snp_inc_msg_seqno(struct snp_msg_desc * mdesc)1700 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
1701 {
1702 /*
1703 * The counter is also incremented by the PSP, so increment it by 2
1704 * and save in secrets page.
1705 */
1706 *mdesc->os_area_msg_seqno += 2;
1707 }
1708
verify_and_dec_payload(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1709 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1710 {
1711 struct snp_guest_msg *resp_msg = &mdesc->secret_response;
1712 struct snp_guest_msg *req_msg = &mdesc->secret_request;
1713 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
1714 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
1715 struct aesgcm_ctx *ctx = mdesc->ctx;
1716 u8 iv[GCM_AES_IV_SIZE] = {};
1717
1718 pr_debug("response [seqno %lld type %d version %d sz %d]\n",
1719 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
1720 resp_msg_hdr->msg_sz);
1721
1722 /* Copy response from shared memory to encrypted memory. */
1723 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
1724
1725 /* Verify that the sequence counter is incremented by 1 */
1726 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
1727 return -EBADMSG;
1728
1729 /* Verify response message type and version number. */
1730 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
1731 resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
1732 return -EBADMSG;
1733
1734 /*
1735 * If the message size is greater than our buffer length then return
1736 * an error.
1737 */
1738 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
1739 return -EBADMSG;
1740
1741 /* Decrypt the payload */
1742 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
1743 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
1744 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
1745 return -EBADMSG;
1746
1747 return 0;
1748 }
1749
enc_payload(struct snp_msg_desc * mdesc,u64 seqno,struct snp_guest_req * req)1750 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
1751 {
1752 struct snp_guest_msg *msg = &mdesc->secret_request;
1753 struct snp_guest_msg_hdr *hdr = &msg->hdr;
1754 struct aesgcm_ctx *ctx = mdesc->ctx;
1755 u8 iv[GCM_AES_IV_SIZE] = {};
1756
1757 memset(msg, 0, sizeof(*msg));
1758
1759 hdr->algo = SNP_AEAD_AES_256_GCM;
1760 hdr->hdr_version = MSG_HDR_VER;
1761 hdr->hdr_sz = sizeof(*hdr);
1762 hdr->msg_type = req->msg_type;
1763 hdr->msg_version = req->msg_version;
1764 hdr->msg_seqno = seqno;
1765 hdr->msg_vmpck = req->vmpck_id;
1766 hdr->msg_sz = req->req_sz;
1767
1768 /* Verify the sequence number is non-zero */
1769 if (!hdr->msg_seqno)
1770 return -ENOSR;
1771
1772 pr_debug("request [seqno %lld type %d version %d sz %d]\n",
1773 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
1774
1775 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
1776 return -EBADMSG;
1777
1778 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
1779 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
1780 AAD_LEN, iv, hdr->authtag);
1781
1782 return 0;
1783 }
1784
__handle_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1785 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1786 {
1787 unsigned long req_start = jiffies;
1788 unsigned int override_npages = 0;
1789 u64 override_err = 0;
1790 int rc;
1791
1792 retry_request:
1793 /*
1794 * Call firmware to process the request. In this function the encrypted
1795 * message enters shared memory with the host. So after this call the
1796 * sequence number must be incremented or the VMPCK must be deleted to
1797 * prevent reuse of the IV.
1798 */
1799 rc = snp_issue_guest_request(req);
1800 switch (rc) {
1801 case -ENOSPC:
1802 /*
1803 * If the extended guest request fails due to having too
1804 * small of a certificate data buffer, retry the same
1805 * guest request without the extended data request in
1806 * order to increment the sequence number and thus avoid
1807 * IV reuse.
1808 */
1809 override_npages = req->input.data_npages;
1810 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
1811
1812 /*
1813 * Override the error to inform callers the given extended
1814 * request buffer size was too small and give the caller the
1815 * required buffer size.
1816 */
1817 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
1818
1819 /*
1820 * If this call to the firmware succeeds, the sequence number can
1821 * be incremented allowing for continued use of the VMPCK. If
1822 * there is an error reflected in the return value, this value
1823 * is checked further down and the result will be the deletion
1824 * of the VMPCK and the error code being propagated back to the
1825 * user as an ioctl() return code.
1826 */
1827 goto retry_request;
1828
1829 /*
1830 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
1831 * throttled. Retry in the driver to avoid returning and reusing the
1832 * message sequence number on a different message.
1833 */
1834 case -EAGAIN:
1835 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
1836 rc = -ETIMEDOUT;
1837 break;
1838 }
1839 schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
1840 goto retry_request;
1841 }
1842
1843 /*
1844 * Increment the message sequence number. There is no harm in doing
1845 * this now because decryption uses the value stored in the response
1846 * structure and any failure will wipe the VMPCK, preventing further
1847 * use anyway.
1848 */
1849 snp_inc_msg_seqno(mdesc);
1850
1851 if (override_err) {
1852 req->exitinfo2 = override_err;
1853
1854 /*
1855 * If an extended guest request was issued and the supplied certificate
1856 * buffer was not large enough, a standard guest request was issued to
1857 * prevent IV reuse. If the standard request was successful, return -EIO
1858 * back to the caller as would have originally been returned.
1859 */
1860 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
1861 rc = -EIO;
1862 }
1863
1864 if (override_npages)
1865 req->input.data_npages = override_npages;
1866
1867 return rc;
1868 }
1869
snp_send_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1870 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1871 {
1872 u64 seqno;
1873 int rc;
1874
1875 /*
1876 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW.
1877 * The offload's DMA SG list of data to encrypt has to be in linear mapping.
1878 */
1879 if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
1880 pr_warn("AES-GSM buffers must be in linear mapping");
1881 return -EINVAL;
1882 }
1883
1884 guard(mutex)(&snp_cmd_mutex);
1885
1886 /* Check if the VMPCK is not empty */
1887 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1888 pr_err_ratelimited("VMPCK is disabled\n");
1889 return -ENOTTY;
1890 }
1891
1892 /* Get message sequence and verify that its a non-zero */
1893 seqno = snp_get_msg_seqno(mdesc);
1894 if (!seqno)
1895 return -EIO;
1896
1897 /* Clear shared memory's response for the host to populate. */
1898 memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
1899
1900 /* Encrypt the userspace provided payload in mdesc->secret_request. */
1901 rc = enc_payload(mdesc, seqno, req);
1902 if (rc)
1903 return rc;
1904
1905 /*
1906 * Write the fully encrypted request to the shared unencrypted
1907 * request page.
1908 */
1909 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
1910
1911 /* Initialize the input address for guest request */
1912 req->input.req_gpa = __pa(mdesc->request);
1913 req->input.resp_gpa = __pa(mdesc->response);
1914 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
1915
1916 rc = __handle_guest_request(mdesc, req);
1917 if (rc) {
1918 if (rc == -EIO &&
1919 req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
1920 return rc;
1921
1922 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
1923 rc, req->exitinfo2);
1924
1925 snp_disable_vmpck(mdesc);
1926 return rc;
1927 }
1928
1929 rc = verify_and_dec_payload(mdesc, req);
1930 if (rc) {
1931 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
1932 snp_disable_vmpck(mdesc);
1933 return rc;
1934 }
1935
1936 return 0;
1937 }
1938 EXPORT_SYMBOL_GPL(snp_send_guest_request);
1939
snp_get_tsc_info(void)1940 static int __init snp_get_tsc_info(void)
1941 {
1942 struct snp_tsc_info_resp *tsc_resp;
1943 struct snp_tsc_info_req *tsc_req;
1944 struct snp_msg_desc *mdesc;
1945 struct snp_guest_req req = {};
1946 int rc = -ENOMEM;
1947
1948 tsc_req = kzalloc_obj(*tsc_req);
1949 if (!tsc_req)
1950 return rc;
1951
1952 /*
1953 * The intermediate response buffer is used while decrypting the
1954 * response payload. Make sure that it has enough space to cover
1955 * the authtag.
1956 */
1957 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
1958 if (!tsc_resp)
1959 goto e_free_tsc_req;
1960
1961 mdesc = snp_msg_alloc();
1962 if (IS_ERR_OR_NULL(mdesc))
1963 goto e_free_tsc_resp;
1964
1965 rc = snp_msg_init(mdesc, snp_vmpl);
1966 if (rc)
1967 goto e_free_mdesc;
1968
1969 req.msg_version = MSG_HDR_VER;
1970 req.msg_type = SNP_MSG_TSC_INFO_REQ;
1971 req.vmpck_id = snp_vmpl;
1972 req.req_buf = tsc_req;
1973 req.req_sz = sizeof(*tsc_req);
1974 req.resp_buf = (void *)tsc_resp;
1975 req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
1976 req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;
1977
1978 rc = snp_send_guest_request(mdesc, &req);
1979 if (rc)
1980 goto e_request;
1981
1982 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
1983 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
1984 tsc_resp->tsc_factor);
1985
1986 if (!tsc_resp->status) {
1987 snp_tsc_scale = tsc_resp->tsc_scale;
1988 snp_tsc_offset = tsc_resp->tsc_offset;
1989 } else {
1990 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
1991 rc = -EIO;
1992 }
1993
1994 e_request:
1995 /* The response buffer contains sensitive data, explicitly clear it. */
1996 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
1997 e_free_mdesc:
1998 snp_msg_free(mdesc);
1999 e_free_tsc_resp:
2000 kfree(tsc_resp);
2001 e_free_tsc_req:
2002 kfree(tsc_req);
2003
2004 return rc;
2005 }
2006
snp_secure_tsc_prepare(void)2007 void __init snp_secure_tsc_prepare(void)
2008 {
2009 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2010 return;
2011
2012 if (snp_get_tsc_info()) {
2013 pr_alert("Unable to retrieve Secure TSC info from ASP\n");
2014 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2015 }
2016
2017 pr_debug("SecureTSC enabled");
2018 }
2019
securetsc_get_tsc_khz(void)2020 static unsigned long securetsc_get_tsc_khz(void)
2021 {
2022 return snp_tsc_freq_khz;
2023 }
2024
snp_secure_tsc_init(void)2025 void __init snp_secure_tsc_init(void)
2026 {
2027 struct snp_secrets_page *secrets;
2028 unsigned long tsc_freq_mhz;
2029 void *mem;
2030
2031 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2032 return;
2033
2034 mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
2035 if (!mem) {
2036 pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
2037 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2038 }
2039
2040 secrets = (__force struct snp_secrets_page *)mem;
2041
2042 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
2043 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
2044
2045 /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
2046 tsc_freq_mhz &= GENMASK_ULL(17, 0);
2047
2048 snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
2049
2050 x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
2051 x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
2052
2053 early_memunmap(mem, PAGE_SIZE);
2054 }
2055