1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * AMD Memory Encryption Support
4 *
5 * Copyright (C) 2019 SUSE
6 *
7 * Author: Joerg Roedel <jroedel@suse.de>
8 */
9
10 #define pr_fmt(fmt) "SEV: " fmt
11
12 #include <linux/sched/debug.h> /* For show_regs() */
13 #include <linux/percpu-defs.h>
14 #include <linux/cc_platform.h>
15 #include <linux/printk.h>
16 #include <linux/mm_types.h>
17 #include <linux/set_memory.h>
18 #include <linux/memblock.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/cpumask.h>
22 #include <linux/efi.h>
23 #include <linux/platform_device.h>
24 #include <linux/io.h>
25 #include <linux/psp-sev.h>
26 #include <linux/dmi.h>
27 #include <uapi/linux/sev-guest.h>
28 #include <crypto/gcm.h>
29
30 #include <asm/init.h>
31 #include <asm/cpu_entry_area.h>
32 #include <asm/stacktrace.h>
33 #include <asm/sev.h>
34 #include <asm/sev-internal.h>
35 #include <asm/insn-eval.h>
36 #include <asm/fpu/xcr.h>
37 #include <asm/processor.h>
38 #include <asm/realmode.h>
39 #include <asm/setup.h>
40 #include <asm/traps.h>
41 #include <asm/svm.h>
42 #include <asm/smp.h>
43 #include <asm/cpu.h>
44 #include <asm/apic.h>
45 #include <asm/cpuid/api.h>
46 #include <asm/cmdline.h>
47 #include <asm/msr.h>
48
49 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */
50 #define AP_INIT_CS_LIMIT 0xffff
51 #define AP_INIT_DS_LIMIT 0xffff
52 #define AP_INIT_LDTR_LIMIT 0xffff
53 #define AP_INIT_GDTR_LIMIT 0xffff
54 #define AP_INIT_IDTR_LIMIT 0xffff
55 #define AP_INIT_TR_LIMIT 0xffff
56 #define AP_INIT_RFLAGS_DEFAULT 0x2
57 #define AP_INIT_DR6_DEFAULT 0xffff0ff0
58 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
59 #define AP_INIT_XCR0_DEFAULT 0x1
60 #define AP_INIT_X87_FTW_DEFAULT 0x5555
61 #define AP_INIT_X87_FCW_DEFAULT 0x0040
62 #define AP_INIT_CR0_DEFAULT 0x60000010
63 #define AP_INIT_MXCSR_DEFAULT 0x1f80
64
65 static const char * const sev_status_feat_names[] = {
66 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
67 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
68 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
69 [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
70 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
71 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
72 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
73 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
74 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
75 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
76 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
77 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
78 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
79 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
80 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
81 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
82 };
83
84 /*
85 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
86 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
87 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
88 */
89 static u64 snp_tsc_scale __ro_after_init;
90 static u64 snp_tsc_offset __ro_after_init;
91 static unsigned long snp_tsc_freq_khz __ro_after_init;
92
93 DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
94 DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
95
96 /*
97 * SVSM related information:
98 * When running under an SVSM, the VMPL that Linux is executing at must be
99 * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
100 */
101 u8 snp_vmpl __ro_after_init;
102 EXPORT_SYMBOL_GPL(snp_vmpl);
103
get_snp_jump_table_addr(void)104 static u64 __init get_snp_jump_table_addr(void)
105 {
106 struct snp_secrets_page *secrets;
107 void __iomem *mem;
108 u64 addr;
109
110 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
111 if (!mem) {
112 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
113 return 0;
114 }
115
116 secrets = (__force struct snp_secrets_page *)mem;
117
118 addr = secrets->os_area.ap_jump_table_pa;
119 iounmap(mem);
120
121 return addr;
122 }
123
get_jump_table_addr(void)124 static u64 __init get_jump_table_addr(void)
125 {
126 struct ghcb_state state;
127 unsigned long flags;
128 struct ghcb *ghcb;
129 u64 ret = 0;
130
131 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
132 return get_snp_jump_table_addr();
133
134 local_irq_save(flags);
135
136 ghcb = __sev_get_ghcb(&state);
137
138 vc_ghcb_invalidate(ghcb);
139 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
140 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
141 ghcb_set_sw_exit_info_2(ghcb, 0);
142
143 sev_es_wr_ghcb_msr(__pa(ghcb));
144 VMGEXIT();
145
146 if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
147 ghcb_sw_exit_info_2_is_valid(ghcb))
148 ret = ghcb->save.sw_exit_info_2;
149
150 __sev_put_ghcb(&state);
151
152 local_irq_restore(flags);
153
154 return ret;
155 }
156
__pval_terminate(u64 pfn,bool action,unsigned int page_size,int ret,u64 svsm_ret)157 static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
158 int ret, u64 svsm_ret)
159 {
160 WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
161 pfn, action, page_size, ret, svsm_ret);
162
163 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
164 }
165
svsm_pval_terminate(struct svsm_pvalidate_call * pc,int ret,u64 svsm_ret)166 static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
167 {
168 unsigned int page_size;
169 bool action;
170 u64 pfn;
171
172 pfn = pc->entry[pc->cur_index].pfn;
173 action = pc->entry[pc->cur_index].action;
174 page_size = pc->entry[pc->cur_index].page_size;
175
176 __pval_terminate(pfn, action, page_size, ret, svsm_ret);
177 }
178
pval_pages(struct snp_psc_desc * desc)179 static void pval_pages(struct snp_psc_desc *desc)
180 {
181 struct psc_entry *e;
182 unsigned long vaddr;
183 unsigned int size;
184 unsigned int i;
185 bool validate;
186 u64 pfn;
187 int rc;
188
189 for (i = 0; i <= desc->hdr.end_entry; i++) {
190 e = &desc->entries[i];
191
192 pfn = e->gfn;
193 vaddr = (unsigned long)pfn_to_kaddr(pfn);
194 size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
195 validate = e->operation == SNP_PAGE_STATE_PRIVATE;
196
197 rc = pvalidate(vaddr, size, validate);
198 if (!rc)
199 continue;
200
201 if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
202 unsigned long vaddr_end = vaddr + PMD_SIZE;
203
204 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
205 rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
206 if (rc)
207 __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
208 }
209 } else {
210 __pval_terminate(pfn, validate, size, rc, 0);
211 }
212 }
213 }
214
svsm_build_ca_from_pfn_range(u64 pfn,u64 pfn_end,bool action,struct svsm_pvalidate_call * pc)215 static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
216 struct svsm_pvalidate_call *pc)
217 {
218 struct svsm_pvalidate_entry *pe;
219
220 /* Nothing in the CA yet */
221 pc->num_entries = 0;
222 pc->cur_index = 0;
223
224 pe = &pc->entry[0];
225
226 while (pfn < pfn_end) {
227 pe->page_size = RMP_PG_SIZE_4K;
228 pe->action = action;
229 pe->ignore_cf = 0;
230 pe->pfn = pfn;
231
232 pe++;
233 pfn++;
234
235 pc->num_entries++;
236 if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
237 break;
238 }
239
240 return pfn;
241 }
242
svsm_build_ca_from_psc_desc(struct snp_psc_desc * desc,unsigned int desc_entry,struct svsm_pvalidate_call * pc)243 static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
244 struct svsm_pvalidate_call *pc)
245 {
246 struct svsm_pvalidate_entry *pe;
247 struct psc_entry *e;
248
249 /* Nothing in the CA yet */
250 pc->num_entries = 0;
251 pc->cur_index = 0;
252
253 pe = &pc->entry[0];
254 e = &desc->entries[desc_entry];
255
256 while (desc_entry <= desc->hdr.end_entry) {
257 pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
258 pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
259 pe->ignore_cf = 0;
260 pe->pfn = e->gfn;
261
262 pe++;
263 e++;
264
265 desc_entry++;
266 pc->num_entries++;
267 if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
268 break;
269 }
270
271 return desc_entry;
272 }
273
svsm_pval_pages(struct snp_psc_desc * desc)274 static void svsm_pval_pages(struct snp_psc_desc *desc)
275 {
276 struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
277 unsigned int i, pv_4k_count = 0;
278 struct svsm_pvalidate_call *pc;
279 struct svsm_call call = {};
280 unsigned long flags;
281 bool action;
282 u64 pc_pa;
283 int ret;
284
285 /*
286 * This can be called very early in the boot, use native functions in
287 * order to avoid paravirt issues.
288 */
289 flags = native_local_irq_save();
290
291 /*
292 * The SVSM calling area (CA) can support processing 510 entries at a
293 * time. Loop through the Page State Change descriptor until the CA is
294 * full or the last entry in the descriptor is reached, at which time
295 * the SVSM is invoked. This repeats until all entries in the descriptor
296 * are processed.
297 */
298 call.caa = svsm_get_caa();
299
300 pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
301 pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
302
303 /* Protocol 0, Call ID 1 */
304 call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
305 call.rcx = pc_pa;
306
307 for (i = 0; i <= desc->hdr.end_entry;) {
308 i = svsm_build_ca_from_psc_desc(desc, i, pc);
309
310 do {
311 ret = svsm_perform_call_protocol(&call);
312 if (!ret)
313 continue;
314
315 /*
316 * Check if the entry failed because of an RMP mismatch (a
317 * PVALIDATE at 2M was requested, but the page is mapped in
318 * the RMP as 4K).
319 */
320
321 if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
322 pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
323 /* Save this entry for post-processing at 4K */
324 pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
325
326 /* Skip to the next one unless at the end of the list */
327 pc->cur_index++;
328 if (pc->cur_index < pc->num_entries)
329 ret = -EAGAIN;
330 else
331 ret = 0;
332 }
333 } while (ret == -EAGAIN);
334
335 if (ret)
336 svsm_pval_terminate(pc, ret, call.rax_out);
337 }
338
339 /* Process any entries that failed to be validated at 2M and validate them at 4K */
340 for (i = 0; i < pv_4k_count; i++) {
341 u64 pfn, pfn_end;
342
343 action = pv_4k[i].action;
344 pfn = pv_4k[i].pfn;
345 pfn_end = pfn + 512;
346
347 while (pfn < pfn_end) {
348 pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
349
350 ret = svsm_perform_call_protocol(&call);
351 if (ret)
352 svsm_pval_terminate(pc, ret, call.rax_out);
353 }
354 }
355
356 native_local_irq_restore(flags);
357 }
358
pvalidate_pages(struct snp_psc_desc * desc)359 static void pvalidate_pages(struct snp_psc_desc *desc)
360 {
361 struct psc_entry *e;
362 unsigned int i;
363
364 if (snp_vmpl)
365 svsm_pval_pages(desc);
366 else
367 pval_pages(desc);
368
369 /*
370 * If not affected by the cache-coherency vulnerability there is no need
371 * to perform the cache eviction mitigation.
372 */
373 if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO))
374 return;
375
376 for (i = 0; i <= desc->hdr.end_entry; i++) {
377 e = &desc->entries[i];
378
379 /*
380 * If validating memory (making it private) perform the cache
381 * eviction mitigation.
382 */
383 if (e->operation == SNP_PAGE_STATE_PRIVATE)
384 sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1);
385 }
386 }
387
vmgexit_psc(struct ghcb * ghcb,struct snp_psc_desc * desc)388 static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
389 {
390 int cur_entry, end_entry, ret = 0;
391 struct snp_psc_desc *data;
392 struct es_em_ctxt ctxt;
393
394 vc_ghcb_invalidate(ghcb);
395
396 /* Copy the input desc into GHCB shared buffer */
397 data = (struct snp_psc_desc *)ghcb->shared_buffer;
398 memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
399
400 /*
401 * As per the GHCB specification, the hypervisor can resume the guest
402 * before processing all the entries. Check whether all the entries
403 * are processed. If not, then keep retrying. Note, the hypervisor
404 * will update the data memory directly to indicate the status, so
405 * reference the data->hdr everywhere.
406 *
407 * The strategy here is to wait for the hypervisor to change the page
408 * state in the RMP table before guest accesses the memory pages. If the
409 * page state change was not successful, then later memory access will
410 * result in a crash.
411 */
412 cur_entry = data->hdr.cur_entry;
413 end_entry = data->hdr.end_entry;
414
415 while (data->hdr.cur_entry <= data->hdr.end_entry) {
416 ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
417
418 /* This will advance the shared buffer data points to. */
419 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
420
421 /*
422 * Page State Change VMGEXIT can pass error code through
423 * exit_info_2.
424 */
425 if (WARN(ret || ghcb->save.sw_exit_info_2,
426 "SNP: PSC failed ret=%d exit_info_2=%llx\n",
427 ret, ghcb->save.sw_exit_info_2)) {
428 ret = 1;
429 goto out;
430 }
431
432 /* Verify that reserved bit is not set */
433 if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
434 ret = 1;
435 goto out;
436 }
437
438 /*
439 * Sanity check that entry processing is not going backwards.
440 * This will happen only if hypervisor is tricking us.
441 */
442 if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
443 "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
444 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
445 ret = 1;
446 goto out;
447 }
448 }
449
450 out:
451 return ret;
452 }
453
__set_pages_state(struct snp_psc_desc * data,unsigned long vaddr,unsigned long vaddr_end,int op)454 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
455 unsigned long vaddr_end, int op)
456 {
457 struct ghcb_state state;
458 bool use_large_entry;
459 struct psc_hdr *hdr;
460 struct psc_entry *e;
461 unsigned long flags;
462 unsigned long pfn;
463 struct ghcb *ghcb;
464 int i;
465
466 hdr = &data->hdr;
467 e = data->entries;
468
469 memset(data, 0, sizeof(*data));
470 i = 0;
471
472 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
473 hdr->end_entry = i;
474
475 if (is_vmalloc_addr((void *)vaddr)) {
476 pfn = vmalloc_to_pfn((void *)vaddr);
477 use_large_entry = false;
478 } else {
479 pfn = __pa(vaddr) >> PAGE_SHIFT;
480 use_large_entry = true;
481 }
482
483 e->gfn = pfn;
484 e->operation = op;
485
486 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
487 (vaddr_end - vaddr) >= PMD_SIZE) {
488 e->pagesize = RMP_PG_SIZE_2M;
489 vaddr += PMD_SIZE;
490 } else {
491 e->pagesize = RMP_PG_SIZE_4K;
492 vaddr += PAGE_SIZE;
493 }
494
495 e++;
496 i++;
497 }
498
499 /* Page validation must be rescinded before changing to shared */
500 if (op == SNP_PAGE_STATE_SHARED)
501 pvalidate_pages(data);
502
503 local_irq_save(flags);
504
505 if (sev_cfg.ghcbs_initialized)
506 ghcb = __sev_get_ghcb(&state);
507 else
508 ghcb = boot_ghcb;
509
510 /* Invoke the hypervisor to perform the page state changes */
511 if (!ghcb || vmgexit_psc(ghcb, data))
512 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
513
514 if (sev_cfg.ghcbs_initialized)
515 __sev_put_ghcb(&state);
516
517 local_irq_restore(flags);
518
519 /* Page validation must be performed after changing to private */
520 if (op == SNP_PAGE_STATE_PRIVATE)
521 pvalidate_pages(data);
522
523 return vaddr;
524 }
525
set_pages_state(unsigned long vaddr,unsigned long npages,int op)526 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
527 {
528 struct snp_psc_desc desc;
529 unsigned long vaddr_end;
530
531 /* Use the MSR protocol when a GHCB is not available. */
532 if (!boot_ghcb)
533 return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
534
535 vaddr = vaddr & PAGE_MASK;
536 vaddr_end = vaddr + (npages << PAGE_SHIFT);
537
538 while (vaddr < vaddr_end)
539 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
540 }
541
snp_set_memory_shared(unsigned long vaddr,unsigned long npages)542 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
543 {
544 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
545 return;
546
547 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
548 }
549
snp_set_memory_private(unsigned long vaddr,unsigned long npages)550 void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
551 {
552 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
553 return;
554
555 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
556 }
557
snp_accept_memory(phys_addr_t start,phys_addr_t end)558 void snp_accept_memory(phys_addr_t start, phys_addr_t end)
559 {
560 unsigned long vaddr, npages;
561
562 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
563 return;
564
565 vaddr = (unsigned long)__va(start);
566 npages = (end - start) >> PAGE_SHIFT;
567
568 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
569 }
570
vmgexit_ap_control(u64 event,struct sev_es_save_area * vmsa,u32 apic_id)571 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
572 {
573 bool create = event != SVM_VMGEXIT_AP_DESTROY;
574 struct ghcb_state state;
575 unsigned long flags;
576 struct ghcb *ghcb;
577 int ret = 0;
578
579 local_irq_save(flags);
580
581 ghcb = __sev_get_ghcb(&state);
582
583 vc_ghcb_invalidate(ghcb);
584
585 if (create)
586 ghcb_set_rax(ghcb, vmsa->sev_features);
587
588 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
589 ghcb_set_sw_exit_info_1(ghcb,
590 ((u64)apic_id << 32) |
591 ((u64)snp_vmpl << 16) |
592 event);
593 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
594
595 sev_es_wr_ghcb_msr(__pa(ghcb));
596 VMGEXIT();
597
598 if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
599 lower_32_bits(ghcb->save.sw_exit_info_1)) {
600 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
601 ret = -EINVAL;
602 }
603
604 __sev_put_ghcb(&state);
605
606 local_irq_restore(flags);
607
608 return ret;
609 }
610
snp_set_vmsa(void * va,void * caa,int apic_id,bool make_vmsa)611 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
612 {
613 int ret;
614
615 if (snp_vmpl) {
616 struct svsm_call call = {};
617 unsigned long flags;
618
619 local_irq_save(flags);
620
621 call.caa = this_cpu_read(svsm_caa);
622 call.rcx = __pa(va);
623
624 if (make_vmsa) {
625 /* Protocol 0, Call ID 2 */
626 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
627 call.rdx = __pa(caa);
628 call.r8 = apic_id;
629 } else {
630 /* Protocol 0, Call ID 3 */
631 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
632 }
633
634 ret = svsm_perform_call_protocol(&call);
635
636 local_irq_restore(flags);
637 } else {
638 /*
639 * If the kernel runs at VMPL0, it can change the VMSA
640 * bit for a page using the RMPADJUST instruction.
641 * However, for the instruction to succeed it must
642 * target the permissions of a lesser privileged (higher
643 * numbered) VMPL level, so use VMPL1.
644 */
645 u64 attrs = 1;
646
647 if (make_vmsa)
648 attrs |= RMPADJUST_VMSA_PAGE_BIT;
649
650 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
651 }
652
653 return ret;
654 }
655
snp_cleanup_vmsa(struct sev_es_save_area * vmsa,int apic_id)656 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
657 {
658 int err;
659
660 err = snp_set_vmsa(vmsa, NULL, apic_id, false);
661 if (err)
662 pr_err("clear VMSA page failed (%u), leaking page\n", err);
663 else
664 free_page((unsigned long)vmsa);
665 }
666
set_pte_enc(pte_t * kpte,int level,void * va)667 static void set_pte_enc(pte_t *kpte, int level, void *va)
668 {
669 struct pte_enc_desc d = {
670 .kpte = kpte,
671 .pte_level = level,
672 .va = va,
673 .encrypt = true
674 };
675
676 prepare_pte_enc(&d);
677 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
678 }
679
unshare_all_memory(void)680 static void unshare_all_memory(void)
681 {
682 unsigned long addr, end, size, ghcb;
683 struct sev_es_runtime_data *data;
684 unsigned int npages, level;
685 bool skipped_addr;
686 pte_t *pte;
687 int cpu;
688
689 /* Unshare the direct mapping. */
690 addr = PAGE_OFFSET;
691 end = PAGE_OFFSET + get_max_mapped();
692
693 while (addr < end) {
694 pte = lookup_address(addr, &level);
695 size = page_level_size(level);
696 npages = size / PAGE_SIZE;
697 skipped_addr = false;
698
699 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
700 addr += size;
701 continue;
702 }
703
704 /*
705 * Ensure that all the per-CPU GHCBs are made private at the
706 * end of the unsharing loop so that the switch to the slower
707 * MSR protocol happens last.
708 */
709 for_each_possible_cpu(cpu) {
710 data = per_cpu(runtime_data, cpu);
711 ghcb = (unsigned long)&data->ghcb_page;
712
713 /* Handle the case of a huge page containing the GHCB page */
714 if (addr <= ghcb && ghcb < addr + size) {
715 skipped_addr = true;
716 break;
717 }
718 }
719
720 if (!skipped_addr) {
721 set_pte_enc(pte, level, (void *)addr);
722 snp_set_memory_private(addr, npages);
723 }
724 addr += size;
725 }
726
727 /* Unshare all bss decrypted memory. */
728 addr = (unsigned long)__start_bss_decrypted;
729 end = (unsigned long)__start_bss_decrypted_unused;
730 npages = (end - addr) >> PAGE_SHIFT;
731
732 for (; addr < end; addr += PAGE_SIZE) {
733 pte = lookup_address(addr, &level);
734 if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
735 continue;
736
737 set_pte_enc(pte, level, (void *)addr);
738 }
739 addr = (unsigned long)__start_bss_decrypted;
740 snp_set_memory_private(addr, npages);
741
742 __flush_tlb_all();
743 }
744
745 /* Stop new private<->shared conversions */
snp_kexec_begin(void)746 void snp_kexec_begin(void)
747 {
748 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
749 return;
750
751 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
752 return;
753
754 /*
755 * Crash kernel ends up here with interrupts disabled: can't wait for
756 * conversions to finish.
757 *
758 * If race happened, just report and proceed.
759 */
760 if (!set_memory_enc_stop_conversion())
761 pr_warn("Failed to stop shared<->private conversions\n");
762 }
763
764 /*
765 * Shutdown all APs except the one handling kexec/kdump and clearing
766 * the VMSA tag on AP's VMSA pages as they are not being used as
767 * VMSA page anymore.
768 */
shutdown_all_aps(void)769 static void shutdown_all_aps(void)
770 {
771 struct sev_es_save_area *vmsa;
772 int apic_id, this_cpu, cpu;
773
774 this_cpu = get_cpu();
775
776 /*
777 * APs are already in HLT loop when enc_kexec_finish() callback
778 * is invoked.
779 */
780 for_each_present_cpu(cpu) {
781 vmsa = per_cpu(sev_vmsa, cpu);
782
783 /*
784 * The BSP or offlined APs do not have guest allocated VMSA
785 * and there is no need to clear the VMSA tag for this page.
786 */
787 if (!vmsa)
788 continue;
789
790 /*
791 * Cannot clear the VMSA tag for the currently running vCPU.
792 */
793 if (this_cpu == cpu) {
794 unsigned long pa;
795 struct page *p;
796
797 pa = __pa(vmsa);
798 /*
799 * Mark the VMSA page of the running vCPU as offline
800 * so that is excluded and not touched by makedumpfile
801 * while generating vmcore during kdump.
802 */
803 p = pfn_to_online_page(pa >> PAGE_SHIFT);
804 if (p)
805 __SetPageOffline(p);
806 continue;
807 }
808
809 apic_id = cpuid_to_apicid[cpu];
810
811 /*
812 * Issue AP destroy to ensure AP gets kicked out of guest mode
813 * to allow using RMPADJUST to remove the VMSA tag on it's
814 * VMSA page.
815 */
816 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
817 snp_cleanup_vmsa(vmsa, apic_id);
818 }
819
820 put_cpu();
821 }
822
snp_kexec_finish(void)823 void snp_kexec_finish(void)
824 {
825 struct sev_es_runtime_data *data;
826 unsigned long size, addr;
827 unsigned int level, cpu;
828 struct ghcb *ghcb;
829 pte_t *pte;
830
831 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
832 return;
833
834 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
835 return;
836
837 shutdown_all_aps();
838
839 unshare_all_memory();
840
841 /*
842 * Switch to using the MSR protocol to change per-CPU GHCBs to
843 * private. All the per-CPU GHCBs have been switched back to private,
844 * so can't do any more GHCB calls to the hypervisor beyond this point
845 * until the kexec'ed kernel starts running.
846 */
847 boot_ghcb = NULL;
848 sev_cfg.ghcbs_initialized = false;
849
850 for_each_possible_cpu(cpu) {
851 data = per_cpu(runtime_data, cpu);
852 ghcb = &data->ghcb_page;
853 pte = lookup_address((unsigned long)ghcb, &level);
854 size = page_level_size(level);
855 /* Handle the case of a huge page containing the GHCB page */
856 addr = (unsigned long)ghcb & page_level_mask(level);
857 set_pte_enc(pte, level, (void *)addr);
858 snp_set_memory_private(addr, (size / PAGE_SIZE));
859 }
860 }
861
862 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
863 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
864 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
865
866 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
867 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
868
snp_alloc_vmsa_page(int cpu)869 static void *snp_alloc_vmsa_page(int cpu)
870 {
871 struct page *p;
872
873 /*
874 * Allocate VMSA page to work around the SNP erratum where the CPU will
875 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
876 * collides with the RMP entry of VMSA page. The recommended workaround
877 * is to not use a large page.
878 *
879 * Allocate an 8k page which is also 8k-aligned.
880 */
881 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
882 if (!p)
883 return NULL;
884
885 split_page(p, 1);
886
887 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
888 __free_page(p);
889
890 return page_address(p + 1);
891 }
892
wakeup_cpu_via_vmgexit(u32 apic_id,unsigned long start_ip,unsigned int cpu)893 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
894 {
895 struct sev_es_save_area *cur_vmsa, *vmsa;
896 struct svsm_ca *caa;
897 u8 sipi_vector;
898 int ret;
899 u64 cr4;
900
901 /*
902 * The hypervisor SNP feature support check has happened earlier, just check
903 * the AP_CREATION one here.
904 */
905 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
906 return -EOPNOTSUPP;
907
908 /*
909 * Verify the desired start IP against the known trampoline start IP
910 * to catch any future new trampolines that may be introduced that
911 * would require a new protected guest entry point.
912 */
913 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
914 "Unsupported SNP start_ip: %lx\n", start_ip))
915 return -EINVAL;
916
917 /* Override start_ip with known protected guest start IP */
918 start_ip = real_mode_header->sev_es_trampoline_start;
919 cur_vmsa = per_cpu(sev_vmsa, cpu);
920
921 /*
922 * A new VMSA is created each time because there is no guarantee that
923 * the current VMSA is the kernels or that the vCPU is not running. If
924 * an attempt was done to use the current VMSA with a running vCPU, a
925 * #VMEXIT of that vCPU would wipe out all of the settings being done
926 * here.
927 */
928 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
929 if (!vmsa)
930 return -ENOMEM;
931
932 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
933 caa = per_cpu(svsm_caa, cpu);
934
935 /* CR4 should maintain the MCE value */
936 cr4 = native_read_cr4() & X86_CR4_MCE;
937
938 /* Set the CS value based on the start_ip converted to a SIPI vector */
939 sipi_vector = (start_ip >> 12);
940 vmsa->cs.base = sipi_vector << 12;
941 vmsa->cs.limit = AP_INIT_CS_LIMIT;
942 vmsa->cs.attrib = INIT_CS_ATTRIBS;
943 vmsa->cs.selector = sipi_vector << 8;
944
945 /* Set the RIP value based on start_ip */
946 vmsa->rip = start_ip & 0xfff;
947
948 /* Set AP INIT defaults as documented in the APM */
949 vmsa->ds.limit = AP_INIT_DS_LIMIT;
950 vmsa->ds.attrib = INIT_DS_ATTRIBS;
951 vmsa->es = vmsa->ds;
952 vmsa->fs = vmsa->ds;
953 vmsa->gs = vmsa->ds;
954 vmsa->ss = vmsa->ds;
955
956 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
957 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
958 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
959 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
960 vmsa->tr.limit = AP_INIT_TR_LIMIT;
961 vmsa->tr.attrib = INIT_TR_ATTRIBS;
962
963 vmsa->cr4 = cr4;
964 vmsa->cr0 = AP_INIT_CR0_DEFAULT;
965 vmsa->dr7 = DR7_RESET_VALUE;
966 vmsa->dr6 = AP_INIT_DR6_DEFAULT;
967 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
968 vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
969 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
970 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
971 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
972 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
973
974 /* SVME must be set. */
975 vmsa->efer = EFER_SVME;
976
977 /*
978 * Set the SNP-specific fields for this VMSA:
979 * VMPL level
980 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
981 */
982 vmsa->vmpl = snp_vmpl;
983 vmsa->sev_features = sev_status >> 2;
984
985 /* Populate AP's TSC scale/offset to get accurate TSC values. */
986 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
987 vmsa->tsc_scale = snp_tsc_scale;
988 vmsa->tsc_offset = snp_tsc_offset;
989 }
990
991 /* Switch the page over to a VMSA page now that it is initialized */
992 ret = snp_set_vmsa(vmsa, caa, apic_id, true);
993 if (ret) {
994 pr_err("set VMSA page failed (%u)\n", ret);
995 free_page((unsigned long)vmsa);
996
997 return -EINVAL;
998 }
999
1000 /* Issue VMGEXIT AP Creation NAE event */
1001 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
1002 if (ret) {
1003 snp_cleanup_vmsa(vmsa, apic_id);
1004 vmsa = NULL;
1005 }
1006
1007 /* Free up any previous VMSA page */
1008 if (cur_vmsa)
1009 snp_cleanup_vmsa(cur_vmsa, apic_id);
1010
1011 /* Record the current VMSA page */
1012 per_cpu(sev_vmsa, cpu) = vmsa;
1013
1014 return ret;
1015 }
1016
snp_set_wakeup_secondary_cpu(void)1017 void __init snp_set_wakeup_secondary_cpu(void)
1018 {
1019 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1020 return;
1021
1022 /*
1023 * Always set this override if SNP is enabled. This makes it the
1024 * required method to start APs under SNP. If the hypervisor does
1025 * not support AP creation, then no APs will be started.
1026 */
1027 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
1028 }
1029
sev_es_setup_ap_jump_table(struct real_mode_header * rmh)1030 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
1031 {
1032 u16 startup_cs, startup_ip;
1033 phys_addr_t jump_table_pa;
1034 u64 jump_table_addr;
1035 u16 __iomem *jump_table;
1036
1037 jump_table_addr = get_jump_table_addr();
1038
1039 /* On UP guests there is no jump table so this is not a failure */
1040 if (!jump_table_addr)
1041 return 0;
1042
1043 /* Check if AP Jump Table is page-aligned */
1044 if (jump_table_addr & ~PAGE_MASK)
1045 return -EINVAL;
1046
1047 jump_table_pa = jump_table_addr & PAGE_MASK;
1048
1049 startup_cs = (u16)(rmh->trampoline_start >> 4);
1050 startup_ip = (u16)(rmh->sev_es_trampoline_start -
1051 rmh->trampoline_start);
1052
1053 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
1054 if (!jump_table)
1055 return -EIO;
1056
1057 writew(startup_ip, &jump_table[0]);
1058 writew(startup_cs, &jump_table[1]);
1059
1060 iounmap(jump_table);
1061
1062 return 0;
1063 }
1064
1065 /*
1066 * This is needed by the OVMF UEFI firmware which will use whatever it finds in
1067 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
1068 * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
1069 *
1070 * When running under SVSM the CA page is needed too, so map it as well.
1071 */
sev_es_efi_map_ghcbs_cas(pgd_t * pgd)1072 int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
1073 {
1074 unsigned long address, pflags, pflags_enc;
1075 struct sev_es_runtime_data *data;
1076 int cpu;
1077 u64 pfn;
1078
1079 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1080 return 0;
1081
1082 pflags = _PAGE_NX | _PAGE_RW;
1083 pflags_enc = cc_mkenc(pflags);
1084
1085 for_each_possible_cpu(cpu) {
1086 data = per_cpu(runtime_data, cpu);
1087
1088 address = __pa(&data->ghcb_page);
1089 pfn = address >> PAGE_SHIFT;
1090
1091 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
1092 return 1;
1093
1094 if (snp_vmpl) {
1095 address = per_cpu(svsm_caa_pa, cpu);
1096 if (!address)
1097 return 1;
1098
1099 pfn = address >> PAGE_SHIFT;
1100 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc))
1101 return 1;
1102 }
1103 }
1104
1105 return 0;
1106 }
1107
snp_register_per_cpu_ghcb(void)1108 static void snp_register_per_cpu_ghcb(void)
1109 {
1110 struct sev_es_runtime_data *data;
1111 struct ghcb *ghcb;
1112
1113 data = this_cpu_read(runtime_data);
1114 ghcb = &data->ghcb_page;
1115
1116 snp_register_ghcb_early(__pa(ghcb));
1117 }
1118
setup_ghcb(void)1119 void setup_ghcb(void)
1120 {
1121 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1122 return;
1123
1124 /*
1125 * Check whether the runtime #VC exception handler is active. It uses
1126 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
1127 *
1128 * If SNP is active, register the per-CPU GHCB page so that the runtime
1129 * exception handler can use it.
1130 */
1131 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
1132 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1133 snp_register_per_cpu_ghcb();
1134
1135 sev_cfg.ghcbs_initialized = true;
1136
1137 return;
1138 }
1139
1140 /*
1141 * Make sure the hypervisor talks a supported protocol.
1142 * This gets called only in the BSP boot phase.
1143 */
1144 if (!sev_es_negotiate_protocol())
1145 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1146
1147 /*
1148 * Clear the boot_ghcb. The first exception comes in before the bss
1149 * section is cleared.
1150 */
1151 memset(&boot_ghcb_page, 0, PAGE_SIZE);
1152
1153 /* Alright - Make the boot-ghcb public */
1154 boot_ghcb = &boot_ghcb_page;
1155
1156 /* SNP guest requires that GHCB GPA must be registered. */
1157 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1158 snp_register_ghcb_early(__pa(&boot_ghcb_page));
1159 }
1160
1161 #ifdef CONFIG_HOTPLUG_CPU
sev_es_ap_hlt_loop(void)1162 static void sev_es_ap_hlt_loop(void)
1163 {
1164 struct ghcb_state state;
1165 struct ghcb *ghcb;
1166
1167 ghcb = __sev_get_ghcb(&state);
1168
1169 while (true) {
1170 vc_ghcb_invalidate(ghcb);
1171 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
1172 ghcb_set_sw_exit_info_1(ghcb, 0);
1173 ghcb_set_sw_exit_info_2(ghcb, 0);
1174
1175 sev_es_wr_ghcb_msr(__pa(ghcb));
1176 VMGEXIT();
1177
1178 /* Wakeup signal? */
1179 if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
1180 ghcb->save.sw_exit_info_2)
1181 break;
1182 }
1183
1184 __sev_put_ghcb(&state);
1185 }
1186
1187 /*
1188 * Play_dead handler when running under SEV-ES. This is needed because
1189 * the hypervisor can't deliver an SIPI request to restart the AP.
1190 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
1191 * hypervisor wakes it up again.
1192 */
sev_es_play_dead(void)1193 static void sev_es_play_dead(void)
1194 {
1195 play_dead_common();
1196
1197 /* IRQs now disabled */
1198
1199 sev_es_ap_hlt_loop();
1200
1201 /*
1202 * If we get here, the VCPU was woken up again. Jump to CPU
1203 * startup code to get it back online.
1204 */
1205 soft_restart_cpu();
1206 }
1207 #else /* CONFIG_HOTPLUG_CPU */
1208 #define sev_es_play_dead native_play_dead
1209 #endif /* CONFIG_HOTPLUG_CPU */
1210
1211 #ifdef CONFIG_SMP
sev_es_setup_play_dead(void)1212 static void __init sev_es_setup_play_dead(void)
1213 {
1214 smp_ops.play_dead = sev_es_play_dead;
1215 }
1216 #else
sev_es_setup_play_dead(void)1217 static inline void sev_es_setup_play_dead(void) { }
1218 #endif
1219
alloc_runtime_data(int cpu)1220 static void __init alloc_runtime_data(int cpu)
1221 {
1222 struct sev_es_runtime_data *data;
1223
1224 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
1225 if (!data)
1226 panic("Can't allocate SEV-ES runtime data");
1227
1228 per_cpu(runtime_data, cpu) = data;
1229
1230 if (snp_vmpl) {
1231 struct svsm_ca *caa;
1232
1233 /* Allocate the SVSM CA page if an SVSM is present */
1234 caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE);
1235
1236 per_cpu(svsm_caa, cpu) = caa;
1237 per_cpu(svsm_caa_pa, cpu) = __pa(caa);
1238 }
1239 }
1240
init_ghcb(int cpu)1241 static void __init init_ghcb(int cpu)
1242 {
1243 struct sev_es_runtime_data *data;
1244 int err;
1245
1246 data = per_cpu(runtime_data, cpu);
1247
1248 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
1249 sizeof(data->ghcb_page));
1250 if (err)
1251 panic("Can't map GHCBs unencrypted");
1252
1253 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
1254
1255 data->ghcb_active = false;
1256 data->backup_ghcb_active = false;
1257 }
1258
sev_es_init_vc_handling(void)1259 void __init sev_es_init_vc_handling(void)
1260 {
1261 int cpu;
1262
1263 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
1264
1265 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1266 return;
1267
1268 if (!sev_es_check_cpu_features())
1269 panic("SEV-ES CPU Features missing");
1270
1271 /*
1272 * SNP is supported in v2 of the GHCB spec which mandates support for HV
1273 * features.
1274 */
1275 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
1276 sev_hv_features = get_hv_features();
1277
1278 if (!(sev_hv_features & GHCB_HV_FT_SNP))
1279 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
1280 }
1281
1282 /* Initialize per-cpu GHCB pages */
1283 for_each_possible_cpu(cpu) {
1284 alloc_runtime_data(cpu);
1285 init_ghcb(cpu);
1286 }
1287
1288 /* If running under an SVSM, switch to the per-cpu CA */
1289 if (snp_vmpl) {
1290 struct svsm_call call = {};
1291 unsigned long flags;
1292 int ret;
1293
1294 local_irq_save(flags);
1295
1296 /*
1297 * SVSM_CORE_REMAP_CA call:
1298 * RAX = 0 (Protocol=0, CallID=0)
1299 * RCX = New CA GPA
1300 */
1301 call.caa = svsm_get_caa();
1302 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
1303 call.rcx = this_cpu_read(svsm_caa_pa);
1304 ret = svsm_perform_call_protocol(&call);
1305 if (ret)
1306 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
1307 ret, call.rax_out);
1308
1309 sev_cfg.use_cas = true;
1310
1311 local_irq_restore(flags);
1312 }
1313
1314 sev_es_setup_play_dead();
1315
1316 /* Secondary CPUs use the runtime #VC handler */
1317 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
1318 }
1319
1320 /*
1321 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
1322 * enabled, as the alternative (fallback) logic for DMI probing in the legacy
1323 * ROM region can cause a crash since this region is not pre-validated.
1324 */
snp_dmi_setup(void)1325 void __init snp_dmi_setup(void)
1326 {
1327 if (efi_enabled(EFI_CONFIG_TABLES))
1328 dmi_setup();
1329 }
1330
dump_cpuid_table(void)1331 static void dump_cpuid_table(void)
1332 {
1333 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1334 int i = 0;
1335
1336 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
1337 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
1338
1339 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
1340 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
1341
1342 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
1343 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
1344 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
1345 }
1346 }
1347
1348 /*
1349 * It is useful from an auditing/testing perspective to provide an easy way
1350 * for the guest owner to know that the CPUID table has been initialized as
1351 * expected, but that initialization happens too early in boot to print any
1352 * sort of indicator, and there's not really any other good place to do it,
1353 * so do it here.
1354 *
1355 * If running as an SNP guest, report the current VM privilege level (VMPL).
1356 */
report_snp_info(void)1357 static int __init report_snp_info(void)
1358 {
1359 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1360
1361 if (cpuid_table->count) {
1362 pr_info("Using SNP CPUID table, %d entries present.\n",
1363 cpuid_table->count);
1364
1365 if (sev_cfg.debug)
1366 dump_cpuid_table();
1367 }
1368
1369 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1370 pr_info("SNP running at VMPL%u.\n", snp_vmpl);
1371
1372 return 0;
1373 }
1374 arch_initcall(report_snp_info);
1375
update_attest_input(struct svsm_call * call,struct svsm_attest_call * input)1376 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
1377 {
1378 /* If (new) lengths have been returned, propagate them up */
1379 if (call->rcx_out != call->rcx)
1380 input->manifest_buf.len = call->rcx_out;
1381
1382 if (call->rdx_out != call->rdx)
1383 input->certificates_buf.len = call->rdx_out;
1384
1385 if (call->r8_out != call->r8)
1386 input->report_buf.len = call->r8_out;
1387 }
1388
snp_issue_svsm_attest_req(u64 call_id,struct svsm_call * call,struct svsm_attest_call * input)1389 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
1390 struct svsm_attest_call *input)
1391 {
1392 struct svsm_attest_call *ac;
1393 unsigned long flags;
1394 u64 attest_call_pa;
1395 int ret;
1396
1397 if (!snp_vmpl)
1398 return -EINVAL;
1399
1400 local_irq_save(flags);
1401
1402 call->caa = svsm_get_caa();
1403
1404 ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
1405 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
1406
1407 *ac = *input;
1408
1409 /*
1410 * Set input registers for the request and set RDX and R8 to known
1411 * values in order to detect length values being returned in them.
1412 */
1413 call->rax = call_id;
1414 call->rcx = attest_call_pa;
1415 call->rdx = -1;
1416 call->r8 = -1;
1417 ret = svsm_perform_call_protocol(call);
1418 update_attest_input(call, input);
1419
1420 local_irq_restore(flags);
1421
1422 return ret;
1423 }
1424 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
1425
snp_issue_guest_request(struct snp_guest_req * req)1426 static int snp_issue_guest_request(struct snp_guest_req *req)
1427 {
1428 struct snp_req_data *input = &req->input;
1429 struct ghcb_state state;
1430 struct es_em_ctxt ctxt;
1431 unsigned long flags;
1432 struct ghcb *ghcb;
1433 int ret;
1434
1435 req->exitinfo2 = SEV_RET_NO_FW_CALL;
1436
1437 /*
1438 * __sev_get_ghcb() needs to run with IRQs disabled because it is using
1439 * a per-CPU GHCB.
1440 */
1441 local_irq_save(flags);
1442
1443 ghcb = __sev_get_ghcb(&state);
1444 if (!ghcb) {
1445 ret = -EIO;
1446 goto e_restore_irq;
1447 }
1448
1449 vc_ghcb_invalidate(ghcb);
1450
1451 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1452 ghcb_set_rax(ghcb, input->data_gpa);
1453 ghcb_set_rbx(ghcb, input->data_npages);
1454 }
1455
1456 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
1457 if (ret)
1458 goto e_put;
1459
1460 req->exitinfo2 = ghcb->save.sw_exit_info_2;
1461 switch (req->exitinfo2) {
1462 case 0:
1463 break;
1464
1465 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
1466 ret = -EAGAIN;
1467 break;
1468
1469 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
1470 /* Number of expected pages are returned in RBX */
1471 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1472 input->data_npages = ghcb_get_rbx(ghcb);
1473 ret = -ENOSPC;
1474 break;
1475 }
1476 fallthrough;
1477 default:
1478 ret = -EIO;
1479 break;
1480 }
1481
1482 e_put:
1483 __sev_put_ghcb(&state);
1484 e_restore_irq:
1485 local_irq_restore(flags);
1486
1487 return ret;
1488 }
1489
1490 /**
1491 * snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device
1492 *
1493 * Check that there is SVSM and that it supports at least TPM_SEND_COMMAND
1494 * which is the only request used so far.
1495 *
1496 * Return: true if the platform provides a vTPM SVSM device, false otherwise.
1497 */
snp_svsm_vtpm_probe(void)1498 static bool snp_svsm_vtpm_probe(void)
1499 {
1500 struct svsm_call call = {};
1501
1502 /* The vTPM device is available only if a SVSM is present */
1503 if (!snp_vmpl)
1504 return false;
1505
1506 call.caa = svsm_get_caa();
1507 call.rax = SVSM_VTPM_CALL(SVSM_VTPM_QUERY);
1508
1509 if (svsm_perform_call_protocol(&call))
1510 return false;
1511
1512 /* Check platform commands contains TPM_SEND_COMMAND - platform command 8 */
1513 return call.rcx_out & BIT_ULL(8);
1514 }
1515
1516 /**
1517 * snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM
1518 * @buffer: A buffer used to both send the command and receive the response.
1519 *
1520 * Execute a SVSM_VTPM_CMD call as defined by
1521 * "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00
1522 *
1523 * All command request/response buffers have a common structure as specified by
1524 * the following table:
1525 * Byte Size In/Out Description
1526 * Offset (Bytes)
1527 * 0x000 4 In Platform command
1528 * Out Platform command response size
1529 *
1530 * Each command can build upon this common request/response structure to create
1531 * a structure specific to the command. See include/linux/tpm_svsm.h for more
1532 * details.
1533 *
1534 * Return: 0 on success, -errno on failure
1535 */
snp_svsm_vtpm_send_command(u8 * buffer)1536 int snp_svsm_vtpm_send_command(u8 *buffer)
1537 {
1538 struct svsm_call call = {};
1539
1540 call.caa = svsm_get_caa();
1541 call.rax = SVSM_VTPM_CALL(SVSM_VTPM_CMD);
1542 call.rcx = __pa(buffer);
1543
1544 return svsm_perform_call_protocol(&call);
1545 }
1546 EXPORT_SYMBOL_GPL(snp_svsm_vtpm_send_command);
1547
1548 static struct platform_device sev_guest_device = {
1549 .name = "sev-guest",
1550 .id = -1,
1551 };
1552
1553 static struct platform_device tpm_svsm_device = {
1554 .name = "tpm-svsm",
1555 .id = -1,
1556 };
1557
snp_init_platform_device(void)1558 static int __init snp_init_platform_device(void)
1559 {
1560 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1561 return -ENODEV;
1562
1563 if (platform_device_register(&sev_guest_device))
1564 return -ENODEV;
1565
1566 if (snp_svsm_vtpm_probe() &&
1567 platform_device_register(&tpm_svsm_device))
1568 return -ENODEV;
1569
1570 pr_info("SNP guest platform devices initialized.\n");
1571 return 0;
1572 }
1573 device_initcall(snp_init_platform_device);
1574
sev_show_status(void)1575 void sev_show_status(void)
1576 {
1577 int i;
1578
1579 pr_info("Status: ");
1580 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
1581 if (sev_status & BIT_ULL(i)) {
1582 if (!sev_status_feat_names[i])
1583 continue;
1584
1585 pr_cont("%s ", sev_status_feat_names[i]);
1586 }
1587 }
1588 pr_cont("\n");
1589 }
1590
snp_update_svsm_ca(void)1591 void __init snp_update_svsm_ca(void)
1592 {
1593 if (!snp_vmpl)
1594 return;
1595
1596 /* Update the CAA to a proper kernel address */
1597 boot_svsm_caa = &boot_svsm_ca_page;
1598 }
1599
1600 #ifdef CONFIG_SYSFS
vmpl_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1601 static ssize_t vmpl_show(struct kobject *kobj,
1602 struct kobj_attribute *attr, char *buf)
1603 {
1604 return sysfs_emit(buf, "%d\n", snp_vmpl);
1605 }
1606
1607 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
1608
1609 static struct attribute *vmpl_attrs[] = {
1610 &vmpl_attr.attr,
1611 NULL
1612 };
1613
1614 static struct attribute_group sev_attr_group = {
1615 .attrs = vmpl_attrs,
1616 };
1617
sev_sysfs_init(void)1618 static int __init sev_sysfs_init(void)
1619 {
1620 struct kobject *sev_kobj;
1621 struct device *dev_root;
1622 int ret;
1623
1624 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1625 return -ENODEV;
1626
1627 dev_root = bus_get_dev_root(&cpu_subsys);
1628 if (!dev_root)
1629 return -ENODEV;
1630
1631 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
1632 put_device(dev_root);
1633
1634 if (!sev_kobj)
1635 return -ENOMEM;
1636
1637 ret = sysfs_create_group(sev_kobj, &sev_attr_group);
1638 if (ret)
1639 kobject_put(sev_kobj);
1640
1641 return ret;
1642 }
1643 arch_initcall(sev_sysfs_init);
1644 #endif // CONFIG_SYSFS
1645
free_shared_pages(void * buf,size_t sz)1646 static void free_shared_pages(void *buf, size_t sz)
1647 {
1648 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1649 int ret;
1650
1651 if (!buf)
1652 return;
1653
1654 ret = set_memory_encrypted((unsigned long)buf, npages);
1655 if (ret) {
1656 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
1657 return;
1658 }
1659
1660 __free_pages(virt_to_page(buf), get_order(sz));
1661 }
1662
alloc_shared_pages(size_t sz)1663 static void *alloc_shared_pages(size_t sz)
1664 {
1665 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1666 struct page *page;
1667 int ret;
1668
1669 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
1670 if (!page)
1671 return NULL;
1672
1673 ret = set_memory_decrypted((unsigned long)page_address(page), npages);
1674 if (ret) {
1675 pr_err("failed to mark page shared, ret=%d\n", ret);
1676 __free_pages(page, get_order(sz));
1677 return NULL;
1678 }
1679
1680 return page_address(page);
1681 }
1682
get_vmpck(int id,struct snp_secrets_page * secrets,u32 ** seqno)1683 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
1684 {
1685 u8 *key = NULL;
1686
1687 switch (id) {
1688 case 0:
1689 *seqno = &secrets->os_area.msg_seqno_0;
1690 key = secrets->vmpck0;
1691 break;
1692 case 1:
1693 *seqno = &secrets->os_area.msg_seqno_1;
1694 key = secrets->vmpck1;
1695 break;
1696 case 2:
1697 *seqno = &secrets->os_area.msg_seqno_2;
1698 key = secrets->vmpck2;
1699 break;
1700 case 3:
1701 *seqno = &secrets->os_area.msg_seqno_3;
1702 key = secrets->vmpck3;
1703 break;
1704 default:
1705 break;
1706 }
1707
1708 return key;
1709 }
1710
snp_init_crypto(u8 * key,size_t keylen)1711 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
1712 {
1713 struct aesgcm_ctx *ctx;
1714
1715 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1716 if (!ctx)
1717 return NULL;
1718
1719 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
1720 pr_err("Crypto context initialization failed\n");
1721 kfree(ctx);
1722 return NULL;
1723 }
1724
1725 return ctx;
1726 }
1727
snp_msg_init(struct snp_msg_desc * mdesc,int vmpck_id)1728 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
1729 {
1730 /* Adjust the default VMPCK key based on the executing VMPL level */
1731 if (vmpck_id == -1)
1732 vmpck_id = snp_vmpl;
1733
1734 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
1735 if (!mdesc->vmpck) {
1736 pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
1737 return -EINVAL;
1738 }
1739
1740 /* Verify that VMPCK is not zero. */
1741 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1742 pr_err("Empty VMPCK%d communication key\n", vmpck_id);
1743 return -EINVAL;
1744 }
1745
1746 mdesc->vmpck_id = vmpck_id;
1747
1748 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
1749 if (!mdesc->ctx)
1750 return -ENOMEM;
1751
1752 return 0;
1753 }
1754 EXPORT_SYMBOL_GPL(snp_msg_init);
1755
snp_msg_alloc(void)1756 struct snp_msg_desc *snp_msg_alloc(void)
1757 {
1758 struct snp_msg_desc *mdesc;
1759 void __iomem *mem;
1760
1761 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);
1762
1763 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL);
1764 if (!mdesc)
1765 return ERR_PTR(-ENOMEM);
1766
1767 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
1768 if (!mem)
1769 goto e_free_mdesc;
1770
1771 mdesc->secrets = (__force struct snp_secrets_page *)mem;
1772
1773 /* Allocate the shared page used for the request and response message. */
1774 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
1775 if (!mdesc->request)
1776 goto e_unmap;
1777
1778 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
1779 if (!mdesc->response)
1780 goto e_free_request;
1781
1782 return mdesc;
1783
1784 e_free_request:
1785 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1786 e_unmap:
1787 iounmap(mem);
1788 e_free_mdesc:
1789 kfree(mdesc);
1790
1791 return ERR_PTR(-ENOMEM);
1792 }
1793 EXPORT_SYMBOL_GPL(snp_msg_alloc);
1794
snp_msg_free(struct snp_msg_desc * mdesc)1795 void snp_msg_free(struct snp_msg_desc *mdesc)
1796 {
1797 if (!mdesc)
1798 return;
1799
1800 kfree(mdesc->ctx);
1801 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
1802 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1803 iounmap((__force void __iomem *)mdesc->secrets);
1804
1805 memset(mdesc, 0, sizeof(*mdesc));
1806 kfree(mdesc);
1807 }
1808 EXPORT_SYMBOL_GPL(snp_msg_free);
1809
1810 /* Mutex to serialize the shared buffer access and command handling. */
1811 static DEFINE_MUTEX(snp_cmd_mutex);
1812
1813 /*
1814 * If an error is received from the host or AMD Secure Processor (ASP) there
1815 * are two options. Either retry the exact same encrypted request or discontinue
1816 * using the VMPCK.
1817 *
1818 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
1819 * encrypt the requests. The IV for this scheme is the sequence number. GCM
1820 * cannot tolerate IV reuse.
1821 *
1822 * The ASP FW v1.51 only increments the sequence numbers on a successful
1823 * guest<->ASP back and forth and only accepts messages at its exact sequence
1824 * number.
1825 *
1826 * So if the sequence number were to be reused the encryption scheme is
1827 * vulnerable. If the sequence number were incremented for a fresh IV the ASP
1828 * will reject the request.
1829 */
snp_disable_vmpck(struct snp_msg_desc * mdesc)1830 static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
1831 {
1832 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
1833 mdesc->vmpck_id);
1834 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
1835 mdesc->vmpck = NULL;
1836 }
1837
__snp_get_msg_seqno(struct snp_msg_desc * mdesc)1838 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1839 {
1840 u64 count;
1841
1842 lockdep_assert_held(&snp_cmd_mutex);
1843
1844 /* Read the current message sequence counter from secrets pages */
1845 count = *mdesc->os_area_msg_seqno;
1846
1847 return count + 1;
1848 }
1849
1850 /* Return a non-zero on success */
snp_get_msg_seqno(struct snp_msg_desc * mdesc)1851 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1852 {
1853 u64 count = __snp_get_msg_seqno(mdesc);
1854
1855 /*
1856 * The message sequence counter for the SNP guest request is a 64-bit
1857 * value but the version 2 of GHCB specification defines a 32-bit storage
1858 * for it. If the counter exceeds the 32-bit value then return zero.
1859 * The caller should check the return value, but if the caller happens to
1860 * not check the value and use it, then the firmware treats zero as an
1861 * invalid number and will fail the message request.
1862 */
1863 if (count >= UINT_MAX) {
1864 pr_err("request message sequence counter overflow\n");
1865 return 0;
1866 }
1867
1868 return count;
1869 }
1870
snp_inc_msg_seqno(struct snp_msg_desc * mdesc)1871 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
1872 {
1873 /*
1874 * The counter is also incremented by the PSP, so increment it by 2
1875 * and save in secrets page.
1876 */
1877 *mdesc->os_area_msg_seqno += 2;
1878 }
1879
verify_and_dec_payload(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1880 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1881 {
1882 struct snp_guest_msg *resp_msg = &mdesc->secret_response;
1883 struct snp_guest_msg *req_msg = &mdesc->secret_request;
1884 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
1885 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
1886 struct aesgcm_ctx *ctx = mdesc->ctx;
1887 u8 iv[GCM_AES_IV_SIZE] = {};
1888
1889 pr_debug("response [seqno %lld type %d version %d sz %d]\n",
1890 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
1891 resp_msg_hdr->msg_sz);
1892
1893 /* Copy response from shared memory to encrypted memory. */
1894 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
1895
1896 /* Verify that the sequence counter is incremented by 1 */
1897 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
1898 return -EBADMSG;
1899
1900 /* Verify response message type and version number. */
1901 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
1902 resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
1903 return -EBADMSG;
1904
1905 /*
1906 * If the message size is greater than our buffer length then return
1907 * an error.
1908 */
1909 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
1910 return -EBADMSG;
1911
1912 /* Decrypt the payload */
1913 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
1914 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
1915 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
1916 return -EBADMSG;
1917
1918 return 0;
1919 }
1920
enc_payload(struct snp_msg_desc * mdesc,u64 seqno,struct snp_guest_req * req)1921 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
1922 {
1923 struct snp_guest_msg *msg = &mdesc->secret_request;
1924 struct snp_guest_msg_hdr *hdr = &msg->hdr;
1925 struct aesgcm_ctx *ctx = mdesc->ctx;
1926 u8 iv[GCM_AES_IV_SIZE] = {};
1927
1928 memset(msg, 0, sizeof(*msg));
1929
1930 hdr->algo = SNP_AEAD_AES_256_GCM;
1931 hdr->hdr_version = MSG_HDR_VER;
1932 hdr->hdr_sz = sizeof(*hdr);
1933 hdr->msg_type = req->msg_type;
1934 hdr->msg_version = req->msg_version;
1935 hdr->msg_seqno = seqno;
1936 hdr->msg_vmpck = req->vmpck_id;
1937 hdr->msg_sz = req->req_sz;
1938
1939 /* Verify the sequence number is non-zero */
1940 if (!hdr->msg_seqno)
1941 return -ENOSR;
1942
1943 pr_debug("request [seqno %lld type %d version %d sz %d]\n",
1944 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
1945
1946 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
1947 return -EBADMSG;
1948
1949 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
1950 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
1951 AAD_LEN, iv, hdr->authtag);
1952
1953 return 0;
1954 }
1955
__handle_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1956 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1957 {
1958 unsigned long req_start = jiffies;
1959 unsigned int override_npages = 0;
1960 u64 override_err = 0;
1961 int rc;
1962
1963 retry_request:
1964 /*
1965 * Call firmware to process the request. In this function the encrypted
1966 * message enters shared memory with the host. So after this call the
1967 * sequence number must be incremented or the VMPCK must be deleted to
1968 * prevent reuse of the IV.
1969 */
1970 rc = snp_issue_guest_request(req);
1971 switch (rc) {
1972 case -ENOSPC:
1973 /*
1974 * If the extended guest request fails due to having too
1975 * small of a certificate data buffer, retry the same
1976 * guest request without the extended data request in
1977 * order to increment the sequence number and thus avoid
1978 * IV reuse.
1979 */
1980 override_npages = req->input.data_npages;
1981 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
1982
1983 /*
1984 * Override the error to inform callers the given extended
1985 * request buffer size was too small and give the caller the
1986 * required buffer size.
1987 */
1988 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
1989
1990 /*
1991 * If this call to the firmware succeeds, the sequence number can
1992 * be incremented allowing for continued use of the VMPCK. If
1993 * there is an error reflected in the return value, this value
1994 * is checked further down and the result will be the deletion
1995 * of the VMPCK and the error code being propagated back to the
1996 * user as an ioctl() return code.
1997 */
1998 goto retry_request;
1999
2000 /*
2001 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
2002 * throttled. Retry in the driver to avoid returning and reusing the
2003 * message sequence number on a different message.
2004 */
2005 case -EAGAIN:
2006 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
2007 rc = -ETIMEDOUT;
2008 break;
2009 }
2010 schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
2011 goto retry_request;
2012 }
2013
2014 /*
2015 * Increment the message sequence number. There is no harm in doing
2016 * this now because decryption uses the value stored in the response
2017 * structure and any failure will wipe the VMPCK, preventing further
2018 * use anyway.
2019 */
2020 snp_inc_msg_seqno(mdesc);
2021
2022 if (override_err) {
2023 req->exitinfo2 = override_err;
2024
2025 /*
2026 * If an extended guest request was issued and the supplied certificate
2027 * buffer was not large enough, a standard guest request was issued to
2028 * prevent IV reuse. If the standard request was successful, return -EIO
2029 * back to the caller as would have originally been returned.
2030 */
2031 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2032 rc = -EIO;
2033 }
2034
2035 if (override_npages)
2036 req->input.data_npages = override_npages;
2037
2038 return rc;
2039 }
2040
snp_send_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)2041 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
2042 {
2043 u64 seqno;
2044 int rc;
2045
2046 /*
2047 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW.
2048 * The offload's DMA SG list of data to encrypt has to be in linear mapping.
2049 */
2050 if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
2051 pr_warn("AES-GSM buffers must be in linear mapping");
2052 return -EINVAL;
2053 }
2054
2055 guard(mutex)(&snp_cmd_mutex);
2056
2057 /* Check if the VMPCK is not empty */
2058 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
2059 pr_err_ratelimited("VMPCK is disabled\n");
2060 return -ENOTTY;
2061 }
2062
2063 /* Get message sequence and verify that its a non-zero */
2064 seqno = snp_get_msg_seqno(mdesc);
2065 if (!seqno)
2066 return -EIO;
2067
2068 /* Clear shared memory's response for the host to populate. */
2069 memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
2070
2071 /* Encrypt the userspace provided payload in mdesc->secret_request. */
2072 rc = enc_payload(mdesc, seqno, req);
2073 if (rc)
2074 return rc;
2075
2076 /*
2077 * Write the fully encrypted request to the shared unencrypted
2078 * request page.
2079 */
2080 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
2081
2082 /* Initialize the input address for guest request */
2083 req->input.req_gpa = __pa(mdesc->request);
2084 req->input.resp_gpa = __pa(mdesc->response);
2085 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
2086
2087 rc = __handle_guest_request(mdesc, req);
2088 if (rc) {
2089 if (rc == -EIO &&
2090 req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2091 return rc;
2092
2093 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
2094 rc, req->exitinfo2);
2095
2096 snp_disable_vmpck(mdesc);
2097 return rc;
2098 }
2099
2100 rc = verify_and_dec_payload(mdesc, req);
2101 if (rc) {
2102 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
2103 snp_disable_vmpck(mdesc);
2104 return rc;
2105 }
2106
2107 return 0;
2108 }
2109 EXPORT_SYMBOL_GPL(snp_send_guest_request);
2110
snp_get_tsc_info(void)2111 static int __init snp_get_tsc_info(void)
2112 {
2113 struct snp_tsc_info_resp *tsc_resp;
2114 struct snp_tsc_info_req *tsc_req;
2115 struct snp_msg_desc *mdesc;
2116 struct snp_guest_req req = {};
2117 int rc = -ENOMEM;
2118
2119 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL);
2120 if (!tsc_req)
2121 return rc;
2122
2123 /*
2124 * The intermediate response buffer is used while decrypting the
2125 * response payload. Make sure that it has enough space to cover
2126 * the authtag.
2127 */
2128 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
2129 if (!tsc_resp)
2130 goto e_free_tsc_req;
2131
2132 mdesc = snp_msg_alloc();
2133 if (IS_ERR_OR_NULL(mdesc))
2134 goto e_free_tsc_resp;
2135
2136 rc = snp_msg_init(mdesc, snp_vmpl);
2137 if (rc)
2138 goto e_free_mdesc;
2139
2140 req.msg_version = MSG_HDR_VER;
2141 req.msg_type = SNP_MSG_TSC_INFO_REQ;
2142 req.vmpck_id = snp_vmpl;
2143 req.req_buf = tsc_req;
2144 req.req_sz = sizeof(*tsc_req);
2145 req.resp_buf = (void *)tsc_resp;
2146 req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
2147 req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;
2148
2149 rc = snp_send_guest_request(mdesc, &req);
2150 if (rc)
2151 goto e_request;
2152
2153 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
2154 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
2155 tsc_resp->tsc_factor);
2156
2157 if (!tsc_resp->status) {
2158 snp_tsc_scale = tsc_resp->tsc_scale;
2159 snp_tsc_offset = tsc_resp->tsc_offset;
2160 } else {
2161 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
2162 rc = -EIO;
2163 }
2164
2165 e_request:
2166 /* The response buffer contains sensitive data, explicitly clear it. */
2167 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
2168 e_free_mdesc:
2169 snp_msg_free(mdesc);
2170 e_free_tsc_resp:
2171 kfree(tsc_resp);
2172 e_free_tsc_req:
2173 kfree(tsc_req);
2174
2175 return rc;
2176 }
2177
snp_secure_tsc_prepare(void)2178 void __init snp_secure_tsc_prepare(void)
2179 {
2180 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2181 return;
2182
2183 if (snp_get_tsc_info()) {
2184 pr_alert("Unable to retrieve Secure TSC info from ASP\n");
2185 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2186 }
2187
2188 pr_debug("SecureTSC enabled");
2189 }
2190
securetsc_get_tsc_khz(void)2191 static unsigned long securetsc_get_tsc_khz(void)
2192 {
2193 return snp_tsc_freq_khz;
2194 }
2195
snp_secure_tsc_init(void)2196 void __init snp_secure_tsc_init(void)
2197 {
2198 struct snp_secrets_page *secrets;
2199 unsigned long tsc_freq_mhz;
2200 void *mem;
2201
2202 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2203 return;
2204
2205 mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
2206 if (!mem) {
2207 pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
2208 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2209 }
2210
2211 secrets = (__force struct snp_secrets_page *)mem;
2212
2213 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
2214 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
2215
2216 /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
2217 tsc_freq_mhz &= GENMASK_ULL(17, 0);
2218
2219 snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
2220
2221 x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
2222 x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
2223
2224 early_memunmap(mem, PAGE_SIZE);
2225 }
2226