1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * AMD Memory Encryption Support
4 *
5 * Copyright (C) 2019 SUSE
6 *
7 * Author: Joerg Roedel <jroedel@suse.de>
8 */
9
10 #define pr_fmt(fmt) "SEV: " fmt
11
12 #include <linux/sched/debug.h> /* For show_regs() */
13 #include <linux/percpu-defs.h>
14 #include <linux/cc_platform.h>
15 #include <linux/printk.h>
16 #include <linux/mm_types.h>
17 #include <linux/set_memory.h>
18 #include <linux/memblock.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/cpumask.h>
22 #include <linux/efi.h>
23 #include <linux/platform_device.h>
24 #include <linux/io.h>
25 #include <linux/psp-sev.h>
26 #include <linux/dmi.h>
27 #include <uapi/linux/sev-guest.h>
28 #include <crypto/gcm.h>
29
30 #include <asm/init.h>
31 #include <asm/cpu_entry_area.h>
32 #include <asm/stacktrace.h>
33 #include <asm/sev.h>
34 #include <asm/sev-internal.h>
35 #include <asm/insn-eval.h>
36 #include <asm/fpu/xcr.h>
37 #include <asm/processor.h>
38 #include <asm/realmode.h>
39 #include <asm/setup.h>
40 #include <asm/traps.h>
41 #include <asm/svm.h>
42 #include <asm/smp.h>
43 #include <asm/cpu.h>
44 #include <asm/apic.h>
45 #include <asm/cpuid/api.h>
46 #include <asm/cmdline.h>
47 #include <asm/msr.h>
48
49 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */
50 #define AP_INIT_CS_LIMIT 0xffff
51 #define AP_INIT_DS_LIMIT 0xffff
52 #define AP_INIT_LDTR_LIMIT 0xffff
53 #define AP_INIT_GDTR_LIMIT 0xffff
54 #define AP_INIT_IDTR_LIMIT 0xffff
55 #define AP_INIT_TR_LIMIT 0xffff
56 #define AP_INIT_RFLAGS_DEFAULT 0x2
57 #define AP_INIT_DR6_DEFAULT 0xffff0ff0
58 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
59 #define AP_INIT_XCR0_DEFAULT 0x1
60 #define AP_INIT_X87_FTW_DEFAULT 0x5555
61 #define AP_INIT_X87_FCW_DEFAULT 0x0040
62 #define AP_INIT_CR0_DEFAULT 0x60000010
63 #define AP_INIT_MXCSR_DEFAULT 0x1f80
64
65 static const char * const sev_status_feat_names[] = {
66 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
67 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
68 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
69 [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
70 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
71 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
72 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
73 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
74 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
75 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
76 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
77 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
78 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
79 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
80 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
81 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
82 };
83
84 /*
85 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
86 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
87 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
88 */
89 static u64 snp_tsc_scale __ro_after_init;
90 static u64 snp_tsc_offset __ro_after_init;
91 static unsigned long snp_tsc_freq_khz __ro_after_init;
92
93 DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
94 DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
95
96 /*
97 * SVSM related information:
98 * When running under an SVSM, the VMPL that Linux is executing at must be
99 * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
100 */
101 u8 snp_vmpl __ro_after_init;
102 EXPORT_SYMBOL_GPL(snp_vmpl);
103
get_snp_jump_table_addr(void)104 static u64 __init get_snp_jump_table_addr(void)
105 {
106 struct snp_secrets_page *secrets;
107 void __iomem *mem;
108 u64 addr;
109
110 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
111 if (!mem) {
112 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
113 return 0;
114 }
115
116 secrets = (__force struct snp_secrets_page *)mem;
117
118 addr = secrets->os_area.ap_jump_table_pa;
119 iounmap(mem);
120
121 return addr;
122 }
123
get_jump_table_addr(void)124 static u64 __init get_jump_table_addr(void)
125 {
126 struct ghcb_state state;
127 unsigned long flags;
128 struct ghcb *ghcb;
129 u64 ret = 0;
130
131 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
132 return get_snp_jump_table_addr();
133
134 local_irq_save(flags);
135
136 ghcb = __sev_get_ghcb(&state);
137
138 vc_ghcb_invalidate(ghcb);
139 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
140 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
141 ghcb_set_sw_exit_info_2(ghcb, 0);
142
143 sev_es_wr_ghcb_msr(__pa(ghcb));
144 VMGEXIT();
145
146 if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
147 ghcb_sw_exit_info_2_is_valid(ghcb))
148 ret = ghcb->save.sw_exit_info_2;
149
150 __sev_put_ghcb(&state);
151
152 local_irq_restore(flags);
153
154 return ret;
155 }
156
__pval_terminate(u64 pfn,bool action,unsigned int page_size,int ret,u64 svsm_ret)157 static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
158 int ret, u64 svsm_ret)
159 {
160 WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
161 pfn, action, page_size, ret, svsm_ret);
162
163 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
164 }
165
svsm_pval_terminate(struct svsm_pvalidate_call * pc,int ret,u64 svsm_ret)166 static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
167 {
168 unsigned int page_size;
169 bool action;
170 u64 pfn;
171
172 pfn = pc->entry[pc->cur_index].pfn;
173 action = pc->entry[pc->cur_index].action;
174 page_size = pc->entry[pc->cur_index].page_size;
175
176 __pval_terminate(pfn, action, page_size, ret, svsm_ret);
177 }
178
pval_pages(struct snp_psc_desc * desc)179 static void pval_pages(struct snp_psc_desc *desc)
180 {
181 struct psc_entry *e;
182 unsigned long vaddr;
183 unsigned int size;
184 unsigned int i;
185 bool validate;
186 u64 pfn;
187 int rc;
188
189 for (i = 0; i <= desc->hdr.end_entry; i++) {
190 e = &desc->entries[i];
191
192 pfn = e->gfn;
193 vaddr = (unsigned long)pfn_to_kaddr(pfn);
194 size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
195 validate = e->operation == SNP_PAGE_STATE_PRIVATE;
196
197 rc = pvalidate(vaddr, size, validate);
198 if (!rc)
199 continue;
200
201 if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
202 unsigned long vaddr_end = vaddr + PMD_SIZE;
203
204 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
205 rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
206 if (rc)
207 __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
208 }
209 } else {
210 __pval_terminate(pfn, validate, size, rc, 0);
211 }
212 }
213 }
214
svsm_build_ca_from_pfn_range(u64 pfn,u64 pfn_end,bool action,struct svsm_pvalidate_call * pc)215 static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
216 struct svsm_pvalidate_call *pc)
217 {
218 struct svsm_pvalidate_entry *pe;
219
220 /* Nothing in the CA yet */
221 pc->num_entries = 0;
222 pc->cur_index = 0;
223
224 pe = &pc->entry[0];
225
226 while (pfn < pfn_end) {
227 pe->page_size = RMP_PG_SIZE_4K;
228 pe->action = action;
229 pe->ignore_cf = 0;
230 pe->pfn = pfn;
231
232 pe++;
233 pfn++;
234
235 pc->num_entries++;
236 if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
237 break;
238 }
239
240 return pfn;
241 }
242
svsm_build_ca_from_psc_desc(struct snp_psc_desc * desc,unsigned int desc_entry,struct svsm_pvalidate_call * pc)243 static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
244 struct svsm_pvalidate_call *pc)
245 {
246 struct svsm_pvalidate_entry *pe;
247 struct psc_entry *e;
248
249 /* Nothing in the CA yet */
250 pc->num_entries = 0;
251 pc->cur_index = 0;
252
253 pe = &pc->entry[0];
254 e = &desc->entries[desc_entry];
255
256 while (desc_entry <= desc->hdr.end_entry) {
257 pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
258 pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
259 pe->ignore_cf = 0;
260 pe->pfn = e->gfn;
261
262 pe++;
263 e++;
264
265 desc_entry++;
266 pc->num_entries++;
267 if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
268 break;
269 }
270
271 return desc_entry;
272 }
273
svsm_pval_pages(struct snp_psc_desc * desc)274 static void svsm_pval_pages(struct snp_psc_desc *desc)
275 {
276 struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
277 unsigned int i, pv_4k_count = 0;
278 struct svsm_pvalidate_call *pc;
279 struct svsm_call call = {};
280 unsigned long flags;
281 bool action;
282 u64 pc_pa;
283 int ret;
284
285 /*
286 * This can be called very early in the boot, use native functions in
287 * order to avoid paravirt issues.
288 */
289 flags = native_local_irq_save();
290
291 /*
292 * The SVSM calling area (CA) can support processing 510 entries at a
293 * time. Loop through the Page State Change descriptor until the CA is
294 * full or the last entry in the descriptor is reached, at which time
295 * the SVSM is invoked. This repeats until all entries in the descriptor
296 * are processed.
297 */
298 call.caa = svsm_get_caa();
299
300 pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
301 pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
302
303 /* Protocol 0, Call ID 1 */
304 call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
305 call.rcx = pc_pa;
306
307 for (i = 0; i <= desc->hdr.end_entry;) {
308 i = svsm_build_ca_from_psc_desc(desc, i, pc);
309
310 do {
311 ret = svsm_perform_call_protocol(&call);
312 if (!ret)
313 continue;
314
315 /*
316 * Check if the entry failed because of an RMP mismatch (a
317 * PVALIDATE at 2M was requested, but the page is mapped in
318 * the RMP as 4K).
319 */
320
321 if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
322 pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
323 /* Save this entry for post-processing at 4K */
324 pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
325
326 /* Skip to the next one unless at the end of the list */
327 pc->cur_index++;
328 if (pc->cur_index < pc->num_entries)
329 ret = -EAGAIN;
330 else
331 ret = 0;
332 }
333 } while (ret == -EAGAIN);
334
335 if (ret)
336 svsm_pval_terminate(pc, ret, call.rax_out);
337 }
338
339 /* Process any entries that failed to be validated at 2M and validate them at 4K */
340 for (i = 0; i < pv_4k_count; i++) {
341 u64 pfn, pfn_end;
342
343 action = pv_4k[i].action;
344 pfn = pv_4k[i].pfn;
345 pfn_end = pfn + 512;
346
347 while (pfn < pfn_end) {
348 pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
349
350 ret = svsm_perform_call_protocol(&call);
351 if (ret)
352 svsm_pval_terminate(pc, ret, call.rax_out);
353 }
354 }
355
356 native_local_irq_restore(flags);
357 }
358
pvalidate_pages(struct snp_psc_desc * desc)359 static void pvalidate_pages(struct snp_psc_desc *desc)
360 {
361 if (snp_vmpl)
362 svsm_pval_pages(desc);
363 else
364 pval_pages(desc);
365 }
366
vmgexit_psc(struct ghcb * ghcb,struct snp_psc_desc * desc)367 static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
368 {
369 int cur_entry, end_entry, ret = 0;
370 struct snp_psc_desc *data;
371 struct es_em_ctxt ctxt;
372
373 vc_ghcb_invalidate(ghcb);
374
375 /* Copy the input desc into GHCB shared buffer */
376 data = (struct snp_psc_desc *)ghcb->shared_buffer;
377 memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
378
379 /*
380 * As per the GHCB specification, the hypervisor can resume the guest
381 * before processing all the entries. Check whether all the entries
382 * are processed. If not, then keep retrying. Note, the hypervisor
383 * will update the data memory directly to indicate the status, so
384 * reference the data->hdr everywhere.
385 *
386 * The strategy here is to wait for the hypervisor to change the page
387 * state in the RMP table before guest accesses the memory pages. If the
388 * page state change was not successful, then later memory access will
389 * result in a crash.
390 */
391 cur_entry = data->hdr.cur_entry;
392 end_entry = data->hdr.end_entry;
393
394 while (data->hdr.cur_entry <= data->hdr.end_entry) {
395 ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
396
397 /* This will advance the shared buffer data points to. */
398 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
399
400 /*
401 * Page State Change VMGEXIT can pass error code through
402 * exit_info_2.
403 */
404 if (WARN(ret || ghcb->save.sw_exit_info_2,
405 "SNP: PSC failed ret=%d exit_info_2=%llx\n",
406 ret, ghcb->save.sw_exit_info_2)) {
407 ret = 1;
408 goto out;
409 }
410
411 /* Verify that reserved bit is not set */
412 if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
413 ret = 1;
414 goto out;
415 }
416
417 /*
418 * Sanity check that entry processing is not going backwards.
419 * This will happen only if hypervisor is tricking us.
420 */
421 if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
422 "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
423 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
424 ret = 1;
425 goto out;
426 }
427 }
428
429 out:
430 return ret;
431 }
432
__set_pages_state(struct snp_psc_desc * data,unsigned long vaddr,unsigned long vaddr_end,int op)433 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
434 unsigned long vaddr_end, int op)
435 {
436 struct ghcb_state state;
437 bool use_large_entry;
438 struct psc_hdr *hdr;
439 struct psc_entry *e;
440 unsigned long flags;
441 unsigned long pfn;
442 struct ghcb *ghcb;
443 int i;
444
445 hdr = &data->hdr;
446 e = data->entries;
447
448 memset(data, 0, sizeof(*data));
449 i = 0;
450
451 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
452 hdr->end_entry = i;
453
454 if (is_vmalloc_addr((void *)vaddr)) {
455 pfn = vmalloc_to_pfn((void *)vaddr);
456 use_large_entry = false;
457 } else {
458 pfn = __pa(vaddr) >> PAGE_SHIFT;
459 use_large_entry = true;
460 }
461
462 e->gfn = pfn;
463 e->operation = op;
464
465 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
466 (vaddr_end - vaddr) >= PMD_SIZE) {
467 e->pagesize = RMP_PG_SIZE_2M;
468 vaddr += PMD_SIZE;
469 } else {
470 e->pagesize = RMP_PG_SIZE_4K;
471 vaddr += PAGE_SIZE;
472 }
473
474 e++;
475 i++;
476 }
477
478 /* Page validation must be rescinded before changing to shared */
479 if (op == SNP_PAGE_STATE_SHARED)
480 pvalidate_pages(data);
481
482 local_irq_save(flags);
483
484 if (sev_cfg.ghcbs_initialized)
485 ghcb = __sev_get_ghcb(&state);
486 else
487 ghcb = boot_ghcb;
488
489 /* Invoke the hypervisor to perform the page state changes */
490 if (!ghcb || vmgexit_psc(ghcb, data))
491 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
492
493 if (sev_cfg.ghcbs_initialized)
494 __sev_put_ghcb(&state);
495
496 local_irq_restore(flags);
497
498 /* Page validation must be performed after changing to private */
499 if (op == SNP_PAGE_STATE_PRIVATE)
500 pvalidate_pages(data);
501
502 return vaddr;
503 }
504
set_pages_state(unsigned long vaddr,unsigned long npages,int op)505 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
506 {
507 struct snp_psc_desc desc;
508 unsigned long vaddr_end;
509
510 /* Use the MSR protocol when a GHCB is not available. */
511 if (!boot_ghcb)
512 return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
513
514 vaddr = vaddr & PAGE_MASK;
515 vaddr_end = vaddr + (npages << PAGE_SHIFT);
516
517 while (vaddr < vaddr_end)
518 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
519 }
520
snp_set_memory_shared(unsigned long vaddr,unsigned long npages)521 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
522 {
523 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
524 return;
525
526 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
527 }
528
snp_set_memory_private(unsigned long vaddr,unsigned long npages)529 void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
530 {
531 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
532 return;
533
534 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
535 }
536
snp_accept_memory(phys_addr_t start,phys_addr_t end)537 void snp_accept_memory(phys_addr_t start, phys_addr_t end)
538 {
539 unsigned long vaddr, npages;
540
541 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
542 return;
543
544 vaddr = (unsigned long)__va(start);
545 npages = (end - start) >> PAGE_SHIFT;
546
547 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
548 }
549
vmgexit_ap_control(u64 event,struct sev_es_save_area * vmsa,u32 apic_id)550 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
551 {
552 bool create = event != SVM_VMGEXIT_AP_DESTROY;
553 struct ghcb_state state;
554 unsigned long flags;
555 struct ghcb *ghcb;
556 int ret = 0;
557
558 local_irq_save(flags);
559
560 ghcb = __sev_get_ghcb(&state);
561
562 vc_ghcb_invalidate(ghcb);
563
564 if (create)
565 ghcb_set_rax(ghcb, vmsa->sev_features);
566
567 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
568 ghcb_set_sw_exit_info_1(ghcb,
569 ((u64)apic_id << 32) |
570 ((u64)snp_vmpl << 16) |
571 event);
572 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
573
574 sev_es_wr_ghcb_msr(__pa(ghcb));
575 VMGEXIT();
576
577 if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
578 lower_32_bits(ghcb->save.sw_exit_info_1)) {
579 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
580 ret = -EINVAL;
581 }
582
583 __sev_put_ghcb(&state);
584
585 local_irq_restore(flags);
586
587 return ret;
588 }
589
snp_set_vmsa(void * va,void * caa,int apic_id,bool make_vmsa)590 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
591 {
592 int ret;
593
594 if (snp_vmpl) {
595 struct svsm_call call = {};
596 unsigned long flags;
597
598 local_irq_save(flags);
599
600 call.caa = this_cpu_read(svsm_caa);
601 call.rcx = __pa(va);
602
603 if (make_vmsa) {
604 /* Protocol 0, Call ID 2 */
605 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
606 call.rdx = __pa(caa);
607 call.r8 = apic_id;
608 } else {
609 /* Protocol 0, Call ID 3 */
610 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
611 }
612
613 ret = svsm_perform_call_protocol(&call);
614
615 local_irq_restore(flags);
616 } else {
617 /*
618 * If the kernel runs at VMPL0, it can change the VMSA
619 * bit for a page using the RMPADJUST instruction.
620 * However, for the instruction to succeed it must
621 * target the permissions of a lesser privileged (higher
622 * numbered) VMPL level, so use VMPL1.
623 */
624 u64 attrs = 1;
625
626 if (make_vmsa)
627 attrs |= RMPADJUST_VMSA_PAGE_BIT;
628
629 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
630 }
631
632 return ret;
633 }
634
snp_cleanup_vmsa(struct sev_es_save_area * vmsa,int apic_id)635 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
636 {
637 int err;
638
639 err = snp_set_vmsa(vmsa, NULL, apic_id, false);
640 if (err)
641 pr_err("clear VMSA page failed (%u), leaking page\n", err);
642 else
643 free_page((unsigned long)vmsa);
644 }
645
set_pte_enc(pte_t * kpte,int level,void * va)646 static void set_pte_enc(pte_t *kpte, int level, void *va)
647 {
648 struct pte_enc_desc d = {
649 .kpte = kpte,
650 .pte_level = level,
651 .va = va,
652 .encrypt = true
653 };
654
655 prepare_pte_enc(&d);
656 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
657 }
658
unshare_all_memory(void)659 static void unshare_all_memory(void)
660 {
661 unsigned long addr, end, size, ghcb;
662 struct sev_es_runtime_data *data;
663 unsigned int npages, level;
664 bool skipped_addr;
665 pte_t *pte;
666 int cpu;
667
668 /* Unshare the direct mapping. */
669 addr = PAGE_OFFSET;
670 end = PAGE_OFFSET + get_max_mapped();
671
672 while (addr < end) {
673 pte = lookup_address(addr, &level);
674 size = page_level_size(level);
675 npages = size / PAGE_SIZE;
676 skipped_addr = false;
677
678 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
679 addr += size;
680 continue;
681 }
682
683 /*
684 * Ensure that all the per-CPU GHCBs are made private at the
685 * end of the unsharing loop so that the switch to the slower
686 * MSR protocol happens last.
687 */
688 for_each_possible_cpu(cpu) {
689 data = per_cpu(runtime_data, cpu);
690 ghcb = (unsigned long)&data->ghcb_page;
691
692 /* Handle the case of a huge page containing the GHCB page */
693 if (addr <= ghcb && ghcb < addr + size) {
694 skipped_addr = true;
695 break;
696 }
697 }
698
699 if (!skipped_addr) {
700 set_pte_enc(pte, level, (void *)addr);
701 snp_set_memory_private(addr, npages);
702 }
703 addr += size;
704 }
705
706 /* Unshare all bss decrypted memory. */
707 addr = (unsigned long)__start_bss_decrypted;
708 end = (unsigned long)__start_bss_decrypted_unused;
709 npages = (end - addr) >> PAGE_SHIFT;
710
711 for (; addr < end; addr += PAGE_SIZE) {
712 pte = lookup_address(addr, &level);
713 if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
714 continue;
715
716 set_pte_enc(pte, level, (void *)addr);
717 }
718 addr = (unsigned long)__start_bss_decrypted;
719 snp_set_memory_private(addr, npages);
720
721 __flush_tlb_all();
722 }
723
724 /* Stop new private<->shared conversions */
snp_kexec_begin(void)725 void snp_kexec_begin(void)
726 {
727 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
728 return;
729
730 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
731 return;
732
733 /*
734 * Crash kernel ends up here with interrupts disabled: can't wait for
735 * conversions to finish.
736 *
737 * If race happened, just report and proceed.
738 */
739 if (!set_memory_enc_stop_conversion())
740 pr_warn("Failed to stop shared<->private conversions\n");
741 }
742
743 /*
744 * Shutdown all APs except the one handling kexec/kdump and clearing
745 * the VMSA tag on AP's VMSA pages as they are not being used as
746 * VMSA page anymore.
747 */
shutdown_all_aps(void)748 static void shutdown_all_aps(void)
749 {
750 struct sev_es_save_area *vmsa;
751 int apic_id, this_cpu, cpu;
752
753 this_cpu = get_cpu();
754
755 /*
756 * APs are already in HLT loop when enc_kexec_finish() callback
757 * is invoked.
758 */
759 for_each_present_cpu(cpu) {
760 vmsa = per_cpu(sev_vmsa, cpu);
761
762 /*
763 * The BSP or offlined APs do not have guest allocated VMSA
764 * and there is no need to clear the VMSA tag for this page.
765 */
766 if (!vmsa)
767 continue;
768
769 /*
770 * Cannot clear the VMSA tag for the currently running vCPU.
771 */
772 if (this_cpu == cpu) {
773 unsigned long pa;
774 struct page *p;
775
776 pa = __pa(vmsa);
777 /*
778 * Mark the VMSA page of the running vCPU as offline
779 * so that is excluded and not touched by makedumpfile
780 * while generating vmcore during kdump.
781 */
782 p = pfn_to_online_page(pa >> PAGE_SHIFT);
783 if (p)
784 __SetPageOffline(p);
785 continue;
786 }
787
788 apic_id = cpuid_to_apicid[cpu];
789
790 /*
791 * Issue AP destroy to ensure AP gets kicked out of guest mode
792 * to allow using RMPADJUST to remove the VMSA tag on it's
793 * VMSA page.
794 */
795 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
796 snp_cleanup_vmsa(vmsa, apic_id);
797 }
798
799 put_cpu();
800 }
801
snp_kexec_finish(void)802 void snp_kexec_finish(void)
803 {
804 struct sev_es_runtime_data *data;
805 unsigned long size, addr;
806 unsigned int level, cpu;
807 struct ghcb *ghcb;
808 pte_t *pte;
809
810 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
811 return;
812
813 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
814 return;
815
816 shutdown_all_aps();
817
818 unshare_all_memory();
819
820 /*
821 * Switch to using the MSR protocol to change per-CPU GHCBs to
822 * private. All the per-CPU GHCBs have been switched back to private,
823 * so can't do any more GHCB calls to the hypervisor beyond this point
824 * until the kexec'ed kernel starts running.
825 */
826 boot_ghcb = NULL;
827 sev_cfg.ghcbs_initialized = false;
828
829 for_each_possible_cpu(cpu) {
830 data = per_cpu(runtime_data, cpu);
831 ghcb = &data->ghcb_page;
832 pte = lookup_address((unsigned long)ghcb, &level);
833 size = page_level_size(level);
834 /* Handle the case of a huge page containing the GHCB page */
835 addr = (unsigned long)ghcb & page_level_mask(level);
836 set_pte_enc(pte, level, (void *)addr);
837 snp_set_memory_private(addr, (size / PAGE_SIZE));
838 }
839 }
840
841 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
842 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
843 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
844
845 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
846 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
847
snp_alloc_vmsa_page(int cpu)848 static void *snp_alloc_vmsa_page(int cpu)
849 {
850 struct page *p;
851
852 /*
853 * Allocate VMSA page to work around the SNP erratum where the CPU will
854 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
855 * collides with the RMP entry of VMSA page. The recommended workaround
856 * is to not use a large page.
857 *
858 * Allocate an 8k page which is also 8k-aligned.
859 */
860 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
861 if (!p)
862 return NULL;
863
864 split_page(p, 1);
865
866 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
867 __free_page(p);
868
869 return page_address(p + 1);
870 }
871
wakeup_cpu_via_vmgexit(u32 apic_id,unsigned long start_ip,unsigned int cpu)872 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
873 {
874 struct sev_es_save_area *cur_vmsa, *vmsa;
875 struct svsm_ca *caa;
876 u8 sipi_vector;
877 int ret;
878 u64 cr4;
879
880 /*
881 * The hypervisor SNP feature support check has happened earlier, just check
882 * the AP_CREATION one here.
883 */
884 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
885 return -EOPNOTSUPP;
886
887 /*
888 * Verify the desired start IP against the known trampoline start IP
889 * to catch any future new trampolines that may be introduced that
890 * would require a new protected guest entry point.
891 */
892 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
893 "Unsupported SNP start_ip: %lx\n", start_ip))
894 return -EINVAL;
895
896 /* Override start_ip with known protected guest start IP */
897 start_ip = real_mode_header->sev_es_trampoline_start;
898 cur_vmsa = per_cpu(sev_vmsa, cpu);
899
900 /*
901 * A new VMSA is created each time because there is no guarantee that
902 * the current VMSA is the kernels or that the vCPU is not running. If
903 * an attempt was done to use the current VMSA with a running vCPU, a
904 * #VMEXIT of that vCPU would wipe out all of the settings being done
905 * here.
906 */
907 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
908 if (!vmsa)
909 return -ENOMEM;
910
911 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
912 caa = per_cpu(svsm_caa, cpu);
913
914 /* CR4 should maintain the MCE value */
915 cr4 = native_read_cr4() & X86_CR4_MCE;
916
917 /* Set the CS value based on the start_ip converted to a SIPI vector */
918 sipi_vector = (start_ip >> 12);
919 vmsa->cs.base = sipi_vector << 12;
920 vmsa->cs.limit = AP_INIT_CS_LIMIT;
921 vmsa->cs.attrib = INIT_CS_ATTRIBS;
922 vmsa->cs.selector = sipi_vector << 8;
923
924 /* Set the RIP value based on start_ip */
925 vmsa->rip = start_ip & 0xfff;
926
927 /* Set AP INIT defaults as documented in the APM */
928 vmsa->ds.limit = AP_INIT_DS_LIMIT;
929 vmsa->ds.attrib = INIT_DS_ATTRIBS;
930 vmsa->es = vmsa->ds;
931 vmsa->fs = vmsa->ds;
932 vmsa->gs = vmsa->ds;
933 vmsa->ss = vmsa->ds;
934
935 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
936 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
937 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
938 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
939 vmsa->tr.limit = AP_INIT_TR_LIMIT;
940 vmsa->tr.attrib = INIT_TR_ATTRIBS;
941
942 vmsa->cr4 = cr4;
943 vmsa->cr0 = AP_INIT_CR0_DEFAULT;
944 vmsa->dr7 = DR7_RESET_VALUE;
945 vmsa->dr6 = AP_INIT_DR6_DEFAULT;
946 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
947 vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
948 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
949 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
950 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
951 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
952
953 /* SVME must be set. */
954 vmsa->efer = EFER_SVME;
955
956 /*
957 * Set the SNP-specific fields for this VMSA:
958 * VMPL level
959 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
960 */
961 vmsa->vmpl = snp_vmpl;
962 vmsa->sev_features = sev_status >> 2;
963
964 /* Populate AP's TSC scale/offset to get accurate TSC values. */
965 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
966 vmsa->tsc_scale = snp_tsc_scale;
967 vmsa->tsc_offset = snp_tsc_offset;
968 }
969
970 /* Switch the page over to a VMSA page now that it is initialized */
971 ret = snp_set_vmsa(vmsa, caa, apic_id, true);
972 if (ret) {
973 pr_err("set VMSA page failed (%u)\n", ret);
974 free_page((unsigned long)vmsa);
975
976 return -EINVAL;
977 }
978
979 /* Issue VMGEXIT AP Creation NAE event */
980 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
981 if (ret) {
982 snp_cleanup_vmsa(vmsa, apic_id);
983 vmsa = NULL;
984 }
985
986 /* Free up any previous VMSA page */
987 if (cur_vmsa)
988 snp_cleanup_vmsa(cur_vmsa, apic_id);
989
990 /* Record the current VMSA page */
991 per_cpu(sev_vmsa, cpu) = vmsa;
992
993 return ret;
994 }
995
snp_set_wakeup_secondary_cpu(void)996 void __init snp_set_wakeup_secondary_cpu(void)
997 {
998 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
999 return;
1000
1001 /*
1002 * Always set this override if SNP is enabled. This makes it the
1003 * required method to start APs under SNP. If the hypervisor does
1004 * not support AP creation, then no APs will be started.
1005 */
1006 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
1007 }
1008
sev_es_setup_ap_jump_table(struct real_mode_header * rmh)1009 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
1010 {
1011 u16 startup_cs, startup_ip;
1012 phys_addr_t jump_table_pa;
1013 u64 jump_table_addr;
1014 u16 __iomem *jump_table;
1015
1016 jump_table_addr = get_jump_table_addr();
1017
1018 /* On UP guests there is no jump table so this is not a failure */
1019 if (!jump_table_addr)
1020 return 0;
1021
1022 /* Check if AP Jump Table is page-aligned */
1023 if (jump_table_addr & ~PAGE_MASK)
1024 return -EINVAL;
1025
1026 jump_table_pa = jump_table_addr & PAGE_MASK;
1027
1028 startup_cs = (u16)(rmh->trampoline_start >> 4);
1029 startup_ip = (u16)(rmh->sev_es_trampoline_start -
1030 rmh->trampoline_start);
1031
1032 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
1033 if (!jump_table)
1034 return -EIO;
1035
1036 writew(startup_ip, &jump_table[0]);
1037 writew(startup_cs, &jump_table[1]);
1038
1039 iounmap(jump_table);
1040
1041 return 0;
1042 }
1043
1044 /*
1045 * This is needed by the OVMF UEFI firmware which will use whatever it finds in
1046 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
1047 * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
1048 *
1049 * When running under SVSM the CA page is needed too, so map it as well.
1050 */
sev_es_efi_map_ghcbs_cas(pgd_t * pgd)1051 int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
1052 {
1053 unsigned long address, pflags, pflags_enc;
1054 struct sev_es_runtime_data *data;
1055 int cpu;
1056 u64 pfn;
1057
1058 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1059 return 0;
1060
1061 pflags = _PAGE_NX | _PAGE_RW;
1062 pflags_enc = cc_mkenc(pflags);
1063
1064 for_each_possible_cpu(cpu) {
1065 data = per_cpu(runtime_data, cpu);
1066
1067 address = __pa(&data->ghcb_page);
1068 pfn = address >> PAGE_SHIFT;
1069
1070 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
1071 return 1;
1072
1073 if (snp_vmpl) {
1074 address = per_cpu(svsm_caa_pa, cpu);
1075 if (!address)
1076 return 1;
1077
1078 pfn = address >> PAGE_SHIFT;
1079 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc))
1080 return 1;
1081 }
1082 }
1083
1084 return 0;
1085 }
1086
snp_register_per_cpu_ghcb(void)1087 static void snp_register_per_cpu_ghcb(void)
1088 {
1089 struct sev_es_runtime_data *data;
1090 struct ghcb *ghcb;
1091
1092 data = this_cpu_read(runtime_data);
1093 ghcb = &data->ghcb_page;
1094
1095 snp_register_ghcb_early(__pa(ghcb));
1096 }
1097
setup_ghcb(void)1098 void setup_ghcb(void)
1099 {
1100 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1101 return;
1102
1103 /*
1104 * Check whether the runtime #VC exception handler is active. It uses
1105 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
1106 *
1107 * If SNP is active, register the per-CPU GHCB page so that the runtime
1108 * exception handler can use it.
1109 */
1110 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
1111 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1112 snp_register_per_cpu_ghcb();
1113
1114 sev_cfg.ghcbs_initialized = true;
1115
1116 return;
1117 }
1118
1119 /*
1120 * Make sure the hypervisor talks a supported protocol.
1121 * This gets called only in the BSP boot phase.
1122 */
1123 if (!sev_es_negotiate_protocol())
1124 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1125
1126 /*
1127 * Clear the boot_ghcb. The first exception comes in before the bss
1128 * section is cleared.
1129 */
1130 memset(&boot_ghcb_page, 0, PAGE_SIZE);
1131
1132 /* Alright - Make the boot-ghcb public */
1133 boot_ghcb = &boot_ghcb_page;
1134
1135 /* SNP guest requires that GHCB GPA must be registered. */
1136 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1137 snp_register_ghcb_early(__pa(&boot_ghcb_page));
1138 }
1139
1140 #ifdef CONFIG_HOTPLUG_CPU
sev_es_ap_hlt_loop(void)1141 static void sev_es_ap_hlt_loop(void)
1142 {
1143 struct ghcb_state state;
1144 struct ghcb *ghcb;
1145
1146 ghcb = __sev_get_ghcb(&state);
1147
1148 while (true) {
1149 vc_ghcb_invalidate(ghcb);
1150 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
1151 ghcb_set_sw_exit_info_1(ghcb, 0);
1152 ghcb_set_sw_exit_info_2(ghcb, 0);
1153
1154 sev_es_wr_ghcb_msr(__pa(ghcb));
1155 VMGEXIT();
1156
1157 /* Wakeup signal? */
1158 if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
1159 ghcb->save.sw_exit_info_2)
1160 break;
1161 }
1162
1163 __sev_put_ghcb(&state);
1164 }
1165
1166 /*
1167 * Play_dead handler when running under SEV-ES. This is needed because
1168 * the hypervisor can't deliver an SIPI request to restart the AP.
1169 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
1170 * hypervisor wakes it up again.
1171 */
sev_es_play_dead(void)1172 static void sev_es_play_dead(void)
1173 {
1174 play_dead_common();
1175
1176 /* IRQs now disabled */
1177
1178 sev_es_ap_hlt_loop();
1179
1180 /*
1181 * If we get here, the VCPU was woken up again. Jump to CPU
1182 * startup code to get it back online.
1183 */
1184 soft_restart_cpu();
1185 }
1186 #else /* CONFIG_HOTPLUG_CPU */
1187 #define sev_es_play_dead native_play_dead
1188 #endif /* CONFIG_HOTPLUG_CPU */
1189
1190 #ifdef CONFIG_SMP
sev_es_setup_play_dead(void)1191 static void __init sev_es_setup_play_dead(void)
1192 {
1193 smp_ops.play_dead = sev_es_play_dead;
1194 }
1195 #else
sev_es_setup_play_dead(void)1196 static inline void sev_es_setup_play_dead(void) { }
1197 #endif
1198
alloc_runtime_data(int cpu)1199 static void __init alloc_runtime_data(int cpu)
1200 {
1201 struct sev_es_runtime_data *data;
1202
1203 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
1204 if (!data)
1205 panic("Can't allocate SEV-ES runtime data");
1206
1207 per_cpu(runtime_data, cpu) = data;
1208
1209 if (snp_vmpl) {
1210 struct svsm_ca *caa;
1211
1212 /* Allocate the SVSM CA page if an SVSM is present */
1213 caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE);
1214
1215 per_cpu(svsm_caa, cpu) = caa;
1216 per_cpu(svsm_caa_pa, cpu) = __pa(caa);
1217 }
1218 }
1219
init_ghcb(int cpu)1220 static void __init init_ghcb(int cpu)
1221 {
1222 struct sev_es_runtime_data *data;
1223 int err;
1224
1225 data = per_cpu(runtime_data, cpu);
1226
1227 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
1228 sizeof(data->ghcb_page));
1229 if (err)
1230 panic("Can't map GHCBs unencrypted");
1231
1232 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
1233
1234 data->ghcb_active = false;
1235 data->backup_ghcb_active = false;
1236 }
1237
sev_es_init_vc_handling(void)1238 void __init sev_es_init_vc_handling(void)
1239 {
1240 int cpu;
1241
1242 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
1243
1244 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1245 return;
1246
1247 if (!sev_es_check_cpu_features())
1248 panic("SEV-ES CPU Features missing");
1249
1250 /*
1251 * SNP is supported in v2 of the GHCB spec which mandates support for HV
1252 * features.
1253 */
1254 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
1255 sev_hv_features = get_hv_features();
1256
1257 if (!(sev_hv_features & GHCB_HV_FT_SNP))
1258 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
1259 }
1260
1261 /* Initialize per-cpu GHCB pages */
1262 for_each_possible_cpu(cpu) {
1263 alloc_runtime_data(cpu);
1264 init_ghcb(cpu);
1265 }
1266
1267 /* If running under an SVSM, switch to the per-cpu CA */
1268 if (snp_vmpl) {
1269 struct svsm_call call = {};
1270 unsigned long flags;
1271 int ret;
1272
1273 local_irq_save(flags);
1274
1275 /*
1276 * SVSM_CORE_REMAP_CA call:
1277 * RAX = 0 (Protocol=0, CallID=0)
1278 * RCX = New CA GPA
1279 */
1280 call.caa = svsm_get_caa();
1281 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
1282 call.rcx = this_cpu_read(svsm_caa_pa);
1283 ret = svsm_perform_call_protocol(&call);
1284 if (ret)
1285 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
1286 ret, call.rax_out);
1287
1288 sev_cfg.use_cas = true;
1289
1290 local_irq_restore(flags);
1291 }
1292
1293 sev_es_setup_play_dead();
1294
1295 /* Secondary CPUs use the runtime #VC handler */
1296 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
1297 }
1298
1299 /*
1300 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
1301 * enabled, as the alternative (fallback) logic for DMI probing in the legacy
1302 * ROM region can cause a crash since this region is not pre-validated.
1303 */
snp_dmi_setup(void)1304 void __init snp_dmi_setup(void)
1305 {
1306 if (efi_enabled(EFI_CONFIG_TABLES))
1307 dmi_setup();
1308 }
1309
dump_cpuid_table(void)1310 static void dump_cpuid_table(void)
1311 {
1312 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1313 int i = 0;
1314
1315 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
1316 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
1317
1318 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
1319 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
1320
1321 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
1322 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
1323 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
1324 }
1325 }
1326
1327 /*
1328 * It is useful from an auditing/testing perspective to provide an easy way
1329 * for the guest owner to know that the CPUID table has been initialized as
1330 * expected, but that initialization happens too early in boot to print any
1331 * sort of indicator, and there's not really any other good place to do it,
1332 * so do it here.
1333 *
1334 * If running as an SNP guest, report the current VM privilege level (VMPL).
1335 */
report_snp_info(void)1336 static int __init report_snp_info(void)
1337 {
1338 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1339
1340 if (cpuid_table->count) {
1341 pr_info("Using SNP CPUID table, %d entries present.\n",
1342 cpuid_table->count);
1343
1344 if (sev_cfg.debug)
1345 dump_cpuid_table();
1346 }
1347
1348 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1349 pr_info("SNP running at VMPL%u.\n", snp_vmpl);
1350
1351 return 0;
1352 }
1353 arch_initcall(report_snp_info);
1354
update_attest_input(struct svsm_call * call,struct svsm_attest_call * input)1355 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
1356 {
1357 /* If (new) lengths have been returned, propagate them up */
1358 if (call->rcx_out != call->rcx)
1359 input->manifest_buf.len = call->rcx_out;
1360
1361 if (call->rdx_out != call->rdx)
1362 input->certificates_buf.len = call->rdx_out;
1363
1364 if (call->r8_out != call->r8)
1365 input->report_buf.len = call->r8_out;
1366 }
1367
snp_issue_svsm_attest_req(u64 call_id,struct svsm_call * call,struct svsm_attest_call * input)1368 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
1369 struct svsm_attest_call *input)
1370 {
1371 struct svsm_attest_call *ac;
1372 unsigned long flags;
1373 u64 attest_call_pa;
1374 int ret;
1375
1376 if (!snp_vmpl)
1377 return -EINVAL;
1378
1379 local_irq_save(flags);
1380
1381 call->caa = svsm_get_caa();
1382
1383 ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
1384 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
1385
1386 *ac = *input;
1387
1388 /*
1389 * Set input registers for the request and set RDX and R8 to known
1390 * values in order to detect length values being returned in them.
1391 */
1392 call->rax = call_id;
1393 call->rcx = attest_call_pa;
1394 call->rdx = -1;
1395 call->r8 = -1;
1396 ret = svsm_perform_call_protocol(call);
1397 update_attest_input(call, input);
1398
1399 local_irq_restore(flags);
1400
1401 return ret;
1402 }
1403 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
1404
snp_issue_guest_request(struct snp_guest_req * req)1405 static int snp_issue_guest_request(struct snp_guest_req *req)
1406 {
1407 struct snp_req_data *input = &req->input;
1408 struct ghcb_state state;
1409 struct es_em_ctxt ctxt;
1410 unsigned long flags;
1411 struct ghcb *ghcb;
1412 int ret;
1413
1414 req->exitinfo2 = SEV_RET_NO_FW_CALL;
1415
1416 /*
1417 * __sev_get_ghcb() needs to run with IRQs disabled because it is using
1418 * a per-CPU GHCB.
1419 */
1420 local_irq_save(flags);
1421
1422 ghcb = __sev_get_ghcb(&state);
1423 if (!ghcb) {
1424 ret = -EIO;
1425 goto e_restore_irq;
1426 }
1427
1428 vc_ghcb_invalidate(ghcb);
1429
1430 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1431 ghcb_set_rax(ghcb, input->data_gpa);
1432 ghcb_set_rbx(ghcb, input->data_npages);
1433 }
1434
1435 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
1436 if (ret)
1437 goto e_put;
1438
1439 req->exitinfo2 = ghcb->save.sw_exit_info_2;
1440 switch (req->exitinfo2) {
1441 case 0:
1442 break;
1443
1444 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
1445 ret = -EAGAIN;
1446 break;
1447
1448 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
1449 /* Number of expected pages are returned in RBX */
1450 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1451 input->data_npages = ghcb_get_rbx(ghcb);
1452 ret = -ENOSPC;
1453 break;
1454 }
1455 fallthrough;
1456 default:
1457 ret = -EIO;
1458 break;
1459 }
1460
1461 e_put:
1462 __sev_put_ghcb(&state);
1463 e_restore_irq:
1464 local_irq_restore(flags);
1465
1466 return ret;
1467 }
1468
1469 /**
1470 * snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device
1471 *
1472 * Check that there is SVSM and that it supports at least TPM_SEND_COMMAND
1473 * which is the only request used so far.
1474 *
1475 * Return: true if the platform provides a vTPM SVSM device, false otherwise.
1476 */
snp_svsm_vtpm_probe(void)1477 static bool snp_svsm_vtpm_probe(void)
1478 {
1479 struct svsm_call call = {};
1480
1481 /* The vTPM device is available only if a SVSM is present */
1482 if (!snp_vmpl)
1483 return false;
1484
1485 call.caa = svsm_get_caa();
1486 call.rax = SVSM_VTPM_CALL(SVSM_VTPM_QUERY);
1487
1488 if (svsm_perform_call_protocol(&call))
1489 return false;
1490
1491 /* Check platform commands contains TPM_SEND_COMMAND - platform command 8 */
1492 return call.rcx_out & BIT_ULL(8);
1493 }
1494
1495 /**
1496 * snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM
1497 * @buffer: A buffer used to both send the command and receive the response.
1498 *
1499 * Execute a SVSM_VTPM_CMD call as defined by
1500 * "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00
1501 *
1502 * All command request/response buffers have a common structure as specified by
1503 * the following table:
1504 * Byte Size In/Out Description
1505 * Offset (Bytes)
1506 * 0x000 4 In Platform command
1507 * Out Platform command response size
1508 *
1509 * Each command can build upon this common request/response structure to create
1510 * a structure specific to the command. See include/linux/tpm_svsm.h for more
1511 * details.
1512 *
1513 * Return: 0 on success, -errno on failure
1514 */
snp_svsm_vtpm_send_command(u8 * buffer)1515 int snp_svsm_vtpm_send_command(u8 *buffer)
1516 {
1517 struct svsm_call call = {};
1518
1519 call.caa = svsm_get_caa();
1520 call.rax = SVSM_VTPM_CALL(SVSM_VTPM_CMD);
1521 call.rcx = __pa(buffer);
1522
1523 return svsm_perform_call_protocol(&call);
1524 }
1525 EXPORT_SYMBOL_GPL(snp_svsm_vtpm_send_command);
1526
1527 static struct platform_device sev_guest_device = {
1528 .name = "sev-guest",
1529 .id = -1,
1530 };
1531
1532 static struct platform_device tpm_svsm_device = {
1533 .name = "tpm-svsm",
1534 .id = -1,
1535 };
1536
snp_init_platform_device(void)1537 static int __init snp_init_platform_device(void)
1538 {
1539 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1540 return -ENODEV;
1541
1542 if (platform_device_register(&sev_guest_device))
1543 return -ENODEV;
1544
1545 if (snp_svsm_vtpm_probe() &&
1546 platform_device_register(&tpm_svsm_device))
1547 return -ENODEV;
1548
1549 pr_info("SNP guest platform devices initialized.\n");
1550 return 0;
1551 }
1552 device_initcall(snp_init_platform_device);
1553
sev_show_status(void)1554 void sev_show_status(void)
1555 {
1556 int i;
1557
1558 pr_info("Status: ");
1559 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
1560 if (sev_status & BIT_ULL(i)) {
1561 if (!sev_status_feat_names[i])
1562 continue;
1563
1564 pr_cont("%s ", sev_status_feat_names[i]);
1565 }
1566 }
1567 pr_cont("\n");
1568 }
1569
snp_update_svsm_ca(void)1570 void __init snp_update_svsm_ca(void)
1571 {
1572 if (!snp_vmpl)
1573 return;
1574
1575 /* Update the CAA to a proper kernel address */
1576 boot_svsm_caa = &boot_svsm_ca_page;
1577 }
1578
1579 #ifdef CONFIG_SYSFS
vmpl_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1580 static ssize_t vmpl_show(struct kobject *kobj,
1581 struct kobj_attribute *attr, char *buf)
1582 {
1583 return sysfs_emit(buf, "%d\n", snp_vmpl);
1584 }
1585
1586 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
1587
1588 static struct attribute *vmpl_attrs[] = {
1589 &vmpl_attr.attr,
1590 NULL
1591 };
1592
1593 static struct attribute_group sev_attr_group = {
1594 .attrs = vmpl_attrs,
1595 };
1596
sev_sysfs_init(void)1597 static int __init sev_sysfs_init(void)
1598 {
1599 struct kobject *sev_kobj;
1600 struct device *dev_root;
1601 int ret;
1602
1603 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1604 return -ENODEV;
1605
1606 dev_root = bus_get_dev_root(&cpu_subsys);
1607 if (!dev_root)
1608 return -ENODEV;
1609
1610 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
1611 put_device(dev_root);
1612
1613 if (!sev_kobj)
1614 return -ENOMEM;
1615
1616 ret = sysfs_create_group(sev_kobj, &sev_attr_group);
1617 if (ret)
1618 kobject_put(sev_kobj);
1619
1620 return ret;
1621 }
1622 arch_initcall(sev_sysfs_init);
1623 #endif // CONFIG_SYSFS
1624
free_shared_pages(void * buf,size_t sz)1625 static void free_shared_pages(void *buf, size_t sz)
1626 {
1627 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1628 int ret;
1629
1630 if (!buf)
1631 return;
1632
1633 ret = set_memory_encrypted((unsigned long)buf, npages);
1634 if (ret) {
1635 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
1636 return;
1637 }
1638
1639 __free_pages(virt_to_page(buf), get_order(sz));
1640 }
1641
alloc_shared_pages(size_t sz)1642 static void *alloc_shared_pages(size_t sz)
1643 {
1644 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1645 struct page *page;
1646 int ret;
1647
1648 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
1649 if (!page)
1650 return NULL;
1651
1652 ret = set_memory_decrypted((unsigned long)page_address(page), npages);
1653 if (ret) {
1654 pr_err("failed to mark page shared, ret=%d\n", ret);
1655 __free_pages(page, get_order(sz));
1656 return NULL;
1657 }
1658
1659 return page_address(page);
1660 }
1661
get_vmpck(int id,struct snp_secrets_page * secrets,u32 ** seqno)1662 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
1663 {
1664 u8 *key = NULL;
1665
1666 switch (id) {
1667 case 0:
1668 *seqno = &secrets->os_area.msg_seqno_0;
1669 key = secrets->vmpck0;
1670 break;
1671 case 1:
1672 *seqno = &secrets->os_area.msg_seqno_1;
1673 key = secrets->vmpck1;
1674 break;
1675 case 2:
1676 *seqno = &secrets->os_area.msg_seqno_2;
1677 key = secrets->vmpck2;
1678 break;
1679 case 3:
1680 *seqno = &secrets->os_area.msg_seqno_3;
1681 key = secrets->vmpck3;
1682 break;
1683 default:
1684 break;
1685 }
1686
1687 return key;
1688 }
1689
snp_init_crypto(u8 * key,size_t keylen)1690 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
1691 {
1692 struct aesgcm_ctx *ctx;
1693
1694 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1695 if (!ctx)
1696 return NULL;
1697
1698 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
1699 pr_err("Crypto context initialization failed\n");
1700 kfree(ctx);
1701 return NULL;
1702 }
1703
1704 return ctx;
1705 }
1706
snp_msg_init(struct snp_msg_desc * mdesc,int vmpck_id)1707 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
1708 {
1709 /* Adjust the default VMPCK key based on the executing VMPL level */
1710 if (vmpck_id == -1)
1711 vmpck_id = snp_vmpl;
1712
1713 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
1714 if (!mdesc->vmpck) {
1715 pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
1716 return -EINVAL;
1717 }
1718
1719 /* Verify that VMPCK is not zero. */
1720 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1721 pr_err("Empty VMPCK%d communication key\n", vmpck_id);
1722 return -EINVAL;
1723 }
1724
1725 mdesc->vmpck_id = vmpck_id;
1726
1727 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
1728 if (!mdesc->ctx)
1729 return -ENOMEM;
1730
1731 return 0;
1732 }
1733 EXPORT_SYMBOL_GPL(snp_msg_init);
1734
snp_msg_alloc(void)1735 struct snp_msg_desc *snp_msg_alloc(void)
1736 {
1737 struct snp_msg_desc *mdesc;
1738 void __iomem *mem;
1739
1740 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);
1741
1742 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL);
1743 if (!mdesc)
1744 return ERR_PTR(-ENOMEM);
1745
1746 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
1747 if (!mem)
1748 goto e_free_mdesc;
1749
1750 mdesc->secrets = (__force struct snp_secrets_page *)mem;
1751
1752 /* Allocate the shared page used for the request and response message. */
1753 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
1754 if (!mdesc->request)
1755 goto e_unmap;
1756
1757 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
1758 if (!mdesc->response)
1759 goto e_free_request;
1760
1761 return mdesc;
1762
1763 e_free_request:
1764 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1765 e_unmap:
1766 iounmap(mem);
1767 e_free_mdesc:
1768 kfree(mdesc);
1769
1770 return ERR_PTR(-ENOMEM);
1771 }
1772 EXPORT_SYMBOL_GPL(snp_msg_alloc);
1773
snp_msg_free(struct snp_msg_desc * mdesc)1774 void snp_msg_free(struct snp_msg_desc *mdesc)
1775 {
1776 if (!mdesc)
1777 return;
1778
1779 kfree(mdesc->ctx);
1780 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
1781 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1782 iounmap((__force void __iomem *)mdesc->secrets);
1783
1784 memset(mdesc, 0, sizeof(*mdesc));
1785 kfree(mdesc);
1786 }
1787 EXPORT_SYMBOL_GPL(snp_msg_free);
1788
1789 /* Mutex to serialize the shared buffer access and command handling. */
1790 static DEFINE_MUTEX(snp_cmd_mutex);
1791
1792 /*
1793 * If an error is received from the host or AMD Secure Processor (ASP) there
1794 * are two options. Either retry the exact same encrypted request or discontinue
1795 * using the VMPCK.
1796 *
1797 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
1798 * encrypt the requests. The IV for this scheme is the sequence number. GCM
1799 * cannot tolerate IV reuse.
1800 *
1801 * The ASP FW v1.51 only increments the sequence numbers on a successful
1802 * guest<->ASP back and forth and only accepts messages at its exact sequence
1803 * number.
1804 *
1805 * So if the sequence number were to be reused the encryption scheme is
1806 * vulnerable. If the sequence number were incremented for a fresh IV the ASP
1807 * will reject the request.
1808 */
snp_disable_vmpck(struct snp_msg_desc * mdesc)1809 static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
1810 {
1811 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
1812 mdesc->vmpck_id);
1813 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
1814 mdesc->vmpck = NULL;
1815 }
1816
__snp_get_msg_seqno(struct snp_msg_desc * mdesc)1817 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1818 {
1819 u64 count;
1820
1821 lockdep_assert_held(&snp_cmd_mutex);
1822
1823 /* Read the current message sequence counter from secrets pages */
1824 count = *mdesc->os_area_msg_seqno;
1825
1826 return count + 1;
1827 }
1828
1829 /* Return a non-zero on success */
snp_get_msg_seqno(struct snp_msg_desc * mdesc)1830 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1831 {
1832 u64 count = __snp_get_msg_seqno(mdesc);
1833
1834 /*
1835 * The message sequence counter for the SNP guest request is a 64-bit
1836 * value but the version 2 of GHCB specification defines a 32-bit storage
1837 * for it. If the counter exceeds the 32-bit value then return zero.
1838 * The caller should check the return value, but if the caller happens to
1839 * not check the value and use it, then the firmware treats zero as an
1840 * invalid number and will fail the message request.
1841 */
1842 if (count >= UINT_MAX) {
1843 pr_err("request message sequence counter overflow\n");
1844 return 0;
1845 }
1846
1847 return count;
1848 }
1849
snp_inc_msg_seqno(struct snp_msg_desc * mdesc)1850 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
1851 {
1852 /*
1853 * The counter is also incremented by the PSP, so increment it by 2
1854 * and save in secrets page.
1855 */
1856 *mdesc->os_area_msg_seqno += 2;
1857 }
1858
verify_and_dec_payload(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1859 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1860 {
1861 struct snp_guest_msg *resp_msg = &mdesc->secret_response;
1862 struct snp_guest_msg *req_msg = &mdesc->secret_request;
1863 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
1864 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
1865 struct aesgcm_ctx *ctx = mdesc->ctx;
1866 u8 iv[GCM_AES_IV_SIZE] = {};
1867
1868 pr_debug("response [seqno %lld type %d version %d sz %d]\n",
1869 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
1870 resp_msg_hdr->msg_sz);
1871
1872 /* Copy response from shared memory to encrypted memory. */
1873 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
1874
1875 /* Verify that the sequence counter is incremented by 1 */
1876 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
1877 return -EBADMSG;
1878
1879 /* Verify response message type and version number. */
1880 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
1881 resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
1882 return -EBADMSG;
1883
1884 /*
1885 * If the message size is greater than our buffer length then return
1886 * an error.
1887 */
1888 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
1889 return -EBADMSG;
1890
1891 /* Decrypt the payload */
1892 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
1893 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
1894 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
1895 return -EBADMSG;
1896
1897 return 0;
1898 }
1899
enc_payload(struct snp_msg_desc * mdesc,u64 seqno,struct snp_guest_req * req)1900 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
1901 {
1902 struct snp_guest_msg *msg = &mdesc->secret_request;
1903 struct snp_guest_msg_hdr *hdr = &msg->hdr;
1904 struct aesgcm_ctx *ctx = mdesc->ctx;
1905 u8 iv[GCM_AES_IV_SIZE] = {};
1906
1907 memset(msg, 0, sizeof(*msg));
1908
1909 hdr->algo = SNP_AEAD_AES_256_GCM;
1910 hdr->hdr_version = MSG_HDR_VER;
1911 hdr->hdr_sz = sizeof(*hdr);
1912 hdr->msg_type = req->msg_type;
1913 hdr->msg_version = req->msg_version;
1914 hdr->msg_seqno = seqno;
1915 hdr->msg_vmpck = req->vmpck_id;
1916 hdr->msg_sz = req->req_sz;
1917
1918 /* Verify the sequence number is non-zero */
1919 if (!hdr->msg_seqno)
1920 return -ENOSR;
1921
1922 pr_debug("request [seqno %lld type %d version %d sz %d]\n",
1923 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
1924
1925 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
1926 return -EBADMSG;
1927
1928 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
1929 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
1930 AAD_LEN, iv, hdr->authtag);
1931
1932 return 0;
1933 }
1934
__handle_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1935 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1936 {
1937 unsigned long req_start = jiffies;
1938 unsigned int override_npages = 0;
1939 u64 override_err = 0;
1940 int rc;
1941
1942 retry_request:
1943 /*
1944 * Call firmware to process the request. In this function the encrypted
1945 * message enters shared memory with the host. So after this call the
1946 * sequence number must be incremented or the VMPCK must be deleted to
1947 * prevent reuse of the IV.
1948 */
1949 rc = snp_issue_guest_request(req);
1950 switch (rc) {
1951 case -ENOSPC:
1952 /*
1953 * If the extended guest request fails due to having too
1954 * small of a certificate data buffer, retry the same
1955 * guest request without the extended data request in
1956 * order to increment the sequence number and thus avoid
1957 * IV reuse.
1958 */
1959 override_npages = req->input.data_npages;
1960 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
1961
1962 /*
1963 * Override the error to inform callers the given extended
1964 * request buffer size was too small and give the caller the
1965 * required buffer size.
1966 */
1967 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
1968
1969 /*
1970 * If this call to the firmware succeeds, the sequence number can
1971 * be incremented allowing for continued use of the VMPCK. If
1972 * there is an error reflected in the return value, this value
1973 * is checked further down and the result will be the deletion
1974 * of the VMPCK and the error code being propagated back to the
1975 * user as an ioctl() return code.
1976 */
1977 goto retry_request;
1978
1979 /*
1980 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
1981 * throttled. Retry in the driver to avoid returning and reusing the
1982 * message sequence number on a different message.
1983 */
1984 case -EAGAIN:
1985 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
1986 rc = -ETIMEDOUT;
1987 break;
1988 }
1989 schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
1990 goto retry_request;
1991 }
1992
1993 /*
1994 * Increment the message sequence number. There is no harm in doing
1995 * this now because decryption uses the value stored in the response
1996 * structure and any failure will wipe the VMPCK, preventing further
1997 * use anyway.
1998 */
1999 snp_inc_msg_seqno(mdesc);
2000
2001 if (override_err) {
2002 req->exitinfo2 = override_err;
2003
2004 /*
2005 * If an extended guest request was issued and the supplied certificate
2006 * buffer was not large enough, a standard guest request was issued to
2007 * prevent IV reuse. If the standard request was successful, return -EIO
2008 * back to the caller as would have originally been returned.
2009 */
2010 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2011 rc = -EIO;
2012 }
2013
2014 if (override_npages)
2015 req->input.data_npages = override_npages;
2016
2017 return rc;
2018 }
2019
snp_send_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)2020 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
2021 {
2022 u64 seqno;
2023 int rc;
2024
2025 /*
2026 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW.
2027 * The offload's DMA SG list of data to encrypt has to be in linear mapping.
2028 */
2029 if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
2030 pr_warn("AES-GSM buffers must be in linear mapping");
2031 return -EINVAL;
2032 }
2033
2034 guard(mutex)(&snp_cmd_mutex);
2035
2036 /* Check if the VMPCK is not empty */
2037 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
2038 pr_err_ratelimited("VMPCK is disabled\n");
2039 return -ENOTTY;
2040 }
2041
2042 /* Get message sequence and verify that its a non-zero */
2043 seqno = snp_get_msg_seqno(mdesc);
2044 if (!seqno)
2045 return -EIO;
2046
2047 /* Clear shared memory's response for the host to populate. */
2048 memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
2049
2050 /* Encrypt the userspace provided payload in mdesc->secret_request. */
2051 rc = enc_payload(mdesc, seqno, req);
2052 if (rc)
2053 return rc;
2054
2055 /*
2056 * Write the fully encrypted request to the shared unencrypted
2057 * request page.
2058 */
2059 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
2060
2061 /* Initialize the input address for guest request */
2062 req->input.req_gpa = __pa(mdesc->request);
2063 req->input.resp_gpa = __pa(mdesc->response);
2064 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
2065
2066 rc = __handle_guest_request(mdesc, req);
2067 if (rc) {
2068 if (rc == -EIO &&
2069 req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2070 return rc;
2071
2072 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
2073 rc, req->exitinfo2);
2074
2075 snp_disable_vmpck(mdesc);
2076 return rc;
2077 }
2078
2079 rc = verify_and_dec_payload(mdesc, req);
2080 if (rc) {
2081 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
2082 snp_disable_vmpck(mdesc);
2083 return rc;
2084 }
2085
2086 return 0;
2087 }
2088 EXPORT_SYMBOL_GPL(snp_send_guest_request);
2089
snp_get_tsc_info(void)2090 static int __init snp_get_tsc_info(void)
2091 {
2092 struct snp_tsc_info_resp *tsc_resp;
2093 struct snp_tsc_info_req *tsc_req;
2094 struct snp_msg_desc *mdesc;
2095 struct snp_guest_req req = {};
2096 int rc = -ENOMEM;
2097
2098 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL);
2099 if (!tsc_req)
2100 return rc;
2101
2102 /*
2103 * The intermediate response buffer is used while decrypting the
2104 * response payload. Make sure that it has enough space to cover
2105 * the authtag.
2106 */
2107 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
2108 if (!tsc_resp)
2109 goto e_free_tsc_req;
2110
2111 mdesc = snp_msg_alloc();
2112 if (IS_ERR_OR_NULL(mdesc))
2113 goto e_free_tsc_resp;
2114
2115 rc = snp_msg_init(mdesc, snp_vmpl);
2116 if (rc)
2117 goto e_free_mdesc;
2118
2119 req.msg_version = MSG_HDR_VER;
2120 req.msg_type = SNP_MSG_TSC_INFO_REQ;
2121 req.vmpck_id = snp_vmpl;
2122 req.req_buf = tsc_req;
2123 req.req_sz = sizeof(*tsc_req);
2124 req.resp_buf = (void *)tsc_resp;
2125 req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
2126 req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;
2127
2128 rc = snp_send_guest_request(mdesc, &req);
2129 if (rc)
2130 goto e_request;
2131
2132 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
2133 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
2134 tsc_resp->tsc_factor);
2135
2136 if (!tsc_resp->status) {
2137 snp_tsc_scale = tsc_resp->tsc_scale;
2138 snp_tsc_offset = tsc_resp->tsc_offset;
2139 } else {
2140 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
2141 rc = -EIO;
2142 }
2143
2144 e_request:
2145 /* The response buffer contains sensitive data, explicitly clear it. */
2146 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
2147 e_free_mdesc:
2148 snp_msg_free(mdesc);
2149 e_free_tsc_resp:
2150 kfree(tsc_resp);
2151 e_free_tsc_req:
2152 kfree(tsc_req);
2153
2154 return rc;
2155 }
2156
snp_secure_tsc_prepare(void)2157 void __init snp_secure_tsc_prepare(void)
2158 {
2159 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2160 return;
2161
2162 if (snp_get_tsc_info()) {
2163 pr_alert("Unable to retrieve Secure TSC info from ASP\n");
2164 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2165 }
2166
2167 pr_debug("SecureTSC enabled");
2168 }
2169
securetsc_get_tsc_khz(void)2170 static unsigned long securetsc_get_tsc_khz(void)
2171 {
2172 return snp_tsc_freq_khz;
2173 }
2174
snp_secure_tsc_init(void)2175 void __init snp_secure_tsc_init(void)
2176 {
2177 struct snp_secrets_page *secrets;
2178 unsigned long tsc_freq_mhz;
2179 void *mem;
2180
2181 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2182 return;
2183
2184 mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
2185 if (!mem) {
2186 pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
2187 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2188 }
2189
2190 secrets = (__force struct snp_secrets_page *)mem;
2191
2192 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
2193 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
2194
2195 /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
2196 tsc_freq_mhz &= GENMASK_ULL(17, 0);
2197
2198 snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
2199
2200 x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
2201 x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
2202
2203 early_memunmap(mem, PAGE_SIZE);
2204 }
2205