1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * AMD Memory Encryption Support
4 *
5 * Copyright (C) 2019 SUSE
6 *
7 * Author: Joerg Roedel <jroedel@suse.de>
8 */
9
10 #define pr_fmt(fmt) "SEV: " fmt
11
12 #include <linux/sched/debug.h> /* For show_regs() */
13 #include <linux/percpu-defs.h>
14 #include <linux/cc_platform.h>
15 #include <linux/printk.h>
16 #include <linux/mm_types.h>
17 #include <linux/set_memory.h>
18 #include <linux/memblock.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/cpumask.h>
22 #include <linux/efi.h>
23 #include <linux/platform_device.h>
24 #include <linux/io.h>
25 #include <linux/psp-sev.h>
26 #include <linux/dmi.h>
27 #include <uapi/linux/sev-guest.h>
28 #include <crypto/gcm.h>
29
30 #include <asm/init.h>
31 #include <asm/cpu_entry_area.h>
32 #include <asm/stacktrace.h>
33 #include <asm/sev.h>
34 #include <asm/sev-internal.h>
35 #include <asm/insn-eval.h>
36 #include <asm/fpu/xcr.h>
37 #include <asm/processor.h>
38 #include <asm/realmode.h>
39 #include <asm/setup.h>
40 #include <asm/traps.h>
41 #include <asm/svm.h>
42 #include <asm/smp.h>
43 #include <asm/cpu.h>
44 #include <asm/apic.h>
45 #include <asm/cpuid/api.h>
46 #include <asm/cmdline.h>
47 #include <asm/msr.h>
48
49 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */
50 #define AP_INIT_CS_LIMIT 0xffff
51 #define AP_INIT_DS_LIMIT 0xffff
52 #define AP_INIT_LDTR_LIMIT 0xffff
53 #define AP_INIT_GDTR_LIMIT 0xffff
54 #define AP_INIT_IDTR_LIMIT 0xffff
55 #define AP_INIT_TR_LIMIT 0xffff
56 #define AP_INIT_RFLAGS_DEFAULT 0x2
57 #define AP_INIT_DR6_DEFAULT 0xffff0ff0
58 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
59 #define AP_INIT_XCR0_DEFAULT 0x1
60 #define AP_INIT_X87_FTW_DEFAULT 0x5555
61 #define AP_INIT_X87_FCW_DEFAULT 0x0040
62 #define AP_INIT_CR0_DEFAULT 0x60000010
63 #define AP_INIT_MXCSR_DEFAULT 0x1f80
64
65 static const char * const sev_status_feat_names[] = {
66 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
67 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
68 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
69 [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
70 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
71 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
72 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
73 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
74 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
75 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
76 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
77 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
78 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
79 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
80 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
81 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
82 };
83
84 /*
85 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
86 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
87 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
88 */
89 static u64 snp_tsc_scale __ro_after_init;
90 static u64 snp_tsc_offset __ro_after_init;
91 static unsigned long snp_tsc_freq_khz __ro_after_init;
92
93 DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
94 DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
95
96 /*
97 * SVSM related information:
98 * When running under an SVSM, the VMPL that Linux is executing at must be
99 * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
100 */
101 u8 snp_vmpl __ro_after_init;
102 EXPORT_SYMBOL_GPL(snp_vmpl);
103
get_snp_jump_table_addr(void)104 static u64 __init get_snp_jump_table_addr(void)
105 {
106 struct snp_secrets_page *secrets;
107 void __iomem *mem;
108 u64 addr;
109
110 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
111 if (!mem) {
112 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
113 return 0;
114 }
115
116 secrets = (__force struct snp_secrets_page *)mem;
117
118 addr = secrets->os_area.ap_jump_table_pa;
119 iounmap(mem);
120
121 return addr;
122 }
123
get_jump_table_addr(void)124 static u64 __init get_jump_table_addr(void)
125 {
126 struct ghcb_state state;
127 unsigned long flags;
128 struct ghcb *ghcb;
129 u64 ret = 0;
130
131 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
132 return get_snp_jump_table_addr();
133
134 local_irq_save(flags);
135
136 ghcb = __sev_get_ghcb(&state);
137
138 vc_ghcb_invalidate(ghcb);
139 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
140 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
141 ghcb_set_sw_exit_info_2(ghcb, 0);
142
143 sev_es_wr_ghcb_msr(__pa(ghcb));
144 VMGEXIT();
145
146 if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
147 ghcb_sw_exit_info_2_is_valid(ghcb))
148 ret = ghcb->save.sw_exit_info_2;
149
150 __sev_put_ghcb(&state);
151
152 local_irq_restore(flags);
153
154 return ret;
155 }
156
__pval_terminate(u64 pfn,bool action,unsigned int page_size,int ret,u64 svsm_ret)157 static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
158 int ret, u64 svsm_ret)
159 {
160 WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
161 pfn, action, page_size, ret, svsm_ret);
162
163 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
164 }
165
svsm_pval_terminate(struct svsm_pvalidate_call * pc,int ret,u64 svsm_ret)166 static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
167 {
168 unsigned int page_size;
169 bool action;
170 u64 pfn;
171
172 pfn = pc->entry[pc->cur_index].pfn;
173 action = pc->entry[pc->cur_index].action;
174 page_size = pc->entry[pc->cur_index].page_size;
175
176 __pval_terminate(pfn, action, page_size, ret, svsm_ret);
177 }
178
pval_pages(struct snp_psc_desc * desc)179 static void pval_pages(struct snp_psc_desc *desc)
180 {
181 struct psc_entry *e;
182 unsigned long vaddr;
183 unsigned int size;
184 unsigned int i;
185 bool validate;
186 u64 pfn;
187 int rc;
188
189 for (i = 0; i <= desc->hdr.end_entry; i++) {
190 e = &desc->entries[i];
191
192 pfn = e->gfn;
193 vaddr = (unsigned long)pfn_to_kaddr(pfn);
194 size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
195 validate = e->operation == SNP_PAGE_STATE_PRIVATE;
196
197 rc = pvalidate(vaddr, size, validate);
198 if (!rc)
199 continue;
200
201 if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
202 unsigned long vaddr_end = vaddr + PMD_SIZE;
203
204 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
205 rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
206 if (rc)
207 __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
208 }
209 } else {
210 __pval_terminate(pfn, validate, size, rc, 0);
211 }
212 }
213 }
214
svsm_build_ca_from_pfn_range(u64 pfn,u64 pfn_end,bool action,struct svsm_pvalidate_call * pc)215 static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
216 struct svsm_pvalidate_call *pc)
217 {
218 struct svsm_pvalidate_entry *pe;
219
220 /* Nothing in the CA yet */
221 pc->num_entries = 0;
222 pc->cur_index = 0;
223
224 pe = &pc->entry[0];
225
226 while (pfn < pfn_end) {
227 pe->page_size = RMP_PG_SIZE_4K;
228 pe->action = action;
229 pe->ignore_cf = 0;
230 pe->pfn = pfn;
231
232 pe++;
233 pfn++;
234
235 pc->num_entries++;
236 if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
237 break;
238 }
239
240 return pfn;
241 }
242
svsm_build_ca_from_psc_desc(struct snp_psc_desc * desc,unsigned int desc_entry,struct svsm_pvalidate_call * pc)243 static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
244 struct svsm_pvalidate_call *pc)
245 {
246 struct svsm_pvalidate_entry *pe;
247 struct psc_entry *e;
248
249 /* Nothing in the CA yet */
250 pc->num_entries = 0;
251 pc->cur_index = 0;
252
253 pe = &pc->entry[0];
254 e = &desc->entries[desc_entry];
255
256 while (desc_entry <= desc->hdr.end_entry) {
257 pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
258 pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
259 pe->ignore_cf = 0;
260 pe->pfn = e->gfn;
261
262 pe++;
263 e++;
264
265 desc_entry++;
266 pc->num_entries++;
267 if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
268 break;
269 }
270
271 return desc_entry;
272 }
273
svsm_pval_pages(struct snp_psc_desc * desc)274 static void svsm_pval_pages(struct snp_psc_desc *desc)
275 {
276 struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
277 unsigned int i, pv_4k_count = 0;
278 struct svsm_pvalidate_call *pc;
279 struct svsm_call call = {};
280 unsigned long flags;
281 bool action;
282 u64 pc_pa;
283 int ret;
284
285 /*
286 * This can be called very early in the boot, use native functions in
287 * order to avoid paravirt issues.
288 */
289 flags = native_local_irq_save();
290
291 /*
292 * The SVSM calling area (CA) can support processing 510 entries at a
293 * time. Loop through the Page State Change descriptor until the CA is
294 * full or the last entry in the descriptor is reached, at which time
295 * the SVSM is invoked. This repeats until all entries in the descriptor
296 * are processed.
297 */
298 call.caa = svsm_get_caa();
299
300 pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
301 pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
302
303 /* Protocol 0, Call ID 1 */
304 call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
305 call.rcx = pc_pa;
306
307 for (i = 0; i <= desc->hdr.end_entry;) {
308 i = svsm_build_ca_from_psc_desc(desc, i, pc);
309
310 do {
311 ret = svsm_perform_call_protocol(&call);
312 if (!ret)
313 continue;
314
315 /*
316 * Check if the entry failed because of an RMP mismatch (a
317 * PVALIDATE at 2M was requested, but the page is mapped in
318 * the RMP as 4K).
319 */
320
321 if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
322 pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
323 /* Save this entry for post-processing at 4K */
324 pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
325
326 /* Skip to the next one unless at the end of the list */
327 pc->cur_index++;
328 if (pc->cur_index < pc->num_entries)
329 ret = -EAGAIN;
330 else
331 ret = 0;
332 }
333 } while (ret == -EAGAIN);
334
335 if (ret)
336 svsm_pval_terminate(pc, ret, call.rax_out);
337 }
338
339 /* Process any entries that failed to be validated at 2M and validate them at 4K */
340 for (i = 0; i < pv_4k_count; i++) {
341 u64 pfn, pfn_end;
342
343 action = pv_4k[i].action;
344 pfn = pv_4k[i].pfn;
345 pfn_end = pfn + 512;
346
347 while (pfn < pfn_end) {
348 pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
349
350 ret = svsm_perform_call_protocol(&call);
351 if (ret)
352 svsm_pval_terminate(pc, ret, call.rax_out);
353 }
354 }
355
356 native_local_irq_restore(flags);
357 }
358
pvalidate_pages(struct snp_psc_desc * desc)359 static void pvalidate_pages(struct snp_psc_desc *desc)
360 {
361 if (snp_vmpl)
362 svsm_pval_pages(desc);
363 else
364 pval_pages(desc);
365 }
366
vmgexit_psc(struct ghcb * ghcb,struct snp_psc_desc * desc)367 static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
368 {
369 int cur_entry, end_entry, ret = 0;
370 struct snp_psc_desc *data;
371 struct es_em_ctxt ctxt;
372
373 vc_ghcb_invalidate(ghcb);
374
375 /* Copy the input desc into GHCB shared buffer */
376 data = (struct snp_psc_desc *)ghcb->shared_buffer;
377 memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
378
379 /*
380 * As per the GHCB specification, the hypervisor can resume the guest
381 * before processing all the entries. Check whether all the entries
382 * are processed. If not, then keep retrying. Note, the hypervisor
383 * will update the data memory directly to indicate the status, so
384 * reference the data->hdr everywhere.
385 *
386 * The strategy here is to wait for the hypervisor to change the page
387 * state in the RMP table before guest accesses the memory pages. If the
388 * page state change was not successful, then later memory access will
389 * result in a crash.
390 */
391 cur_entry = data->hdr.cur_entry;
392 end_entry = data->hdr.end_entry;
393
394 while (data->hdr.cur_entry <= data->hdr.end_entry) {
395 ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
396
397 /* This will advance the shared buffer data points to. */
398 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
399
400 /*
401 * Page State Change VMGEXIT can pass error code through
402 * exit_info_2.
403 */
404 if (WARN(ret || ghcb->save.sw_exit_info_2,
405 "SNP: PSC failed ret=%d exit_info_2=%llx\n",
406 ret, ghcb->save.sw_exit_info_2)) {
407 ret = 1;
408 goto out;
409 }
410
411 /* Verify that reserved bit is not set */
412 if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
413 ret = 1;
414 goto out;
415 }
416
417 /*
418 * Sanity check that entry processing is not going backwards.
419 * This will happen only if hypervisor is tricking us.
420 */
421 if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
422 "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
423 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
424 ret = 1;
425 goto out;
426 }
427 }
428
429 out:
430 return ret;
431 }
432
__set_pages_state(struct snp_psc_desc * data,unsigned long vaddr,unsigned long vaddr_end,int op)433 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
434 unsigned long vaddr_end, int op)
435 {
436 struct ghcb_state state;
437 bool use_large_entry;
438 struct psc_hdr *hdr;
439 struct psc_entry *e;
440 unsigned long flags;
441 unsigned long pfn;
442 struct ghcb *ghcb;
443 int i;
444
445 hdr = &data->hdr;
446 e = data->entries;
447
448 memset(data, 0, sizeof(*data));
449 i = 0;
450
451 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
452 hdr->end_entry = i;
453
454 if (is_vmalloc_addr((void *)vaddr)) {
455 pfn = vmalloc_to_pfn((void *)vaddr);
456 use_large_entry = false;
457 } else {
458 pfn = __pa(vaddr) >> PAGE_SHIFT;
459 use_large_entry = true;
460 }
461
462 e->gfn = pfn;
463 e->operation = op;
464
465 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
466 (vaddr_end - vaddr) >= PMD_SIZE) {
467 e->pagesize = RMP_PG_SIZE_2M;
468 vaddr += PMD_SIZE;
469 } else {
470 e->pagesize = RMP_PG_SIZE_4K;
471 vaddr += PAGE_SIZE;
472 }
473
474 e++;
475 i++;
476 }
477
478 /* Page validation must be rescinded before changing to shared */
479 if (op == SNP_PAGE_STATE_SHARED)
480 pvalidate_pages(data);
481
482 local_irq_save(flags);
483
484 if (sev_cfg.ghcbs_initialized)
485 ghcb = __sev_get_ghcb(&state);
486 else
487 ghcb = boot_ghcb;
488
489 /* Invoke the hypervisor to perform the page state changes */
490 if (!ghcb || vmgexit_psc(ghcb, data))
491 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
492
493 if (sev_cfg.ghcbs_initialized)
494 __sev_put_ghcb(&state);
495
496 local_irq_restore(flags);
497
498 /* Page validation must be performed after changing to private */
499 if (op == SNP_PAGE_STATE_PRIVATE)
500 pvalidate_pages(data);
501
502 return vaddr;
503 }
504
set_pages_state(unsigned long vaddr,unsigned long npages,int op)505 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
506 {
507 struct snp_psc_desc desc;
508 unsigned long vaddr_end;
509
510 /* Use the MSR protocol when a GHCB is not available. */
511 if (!boot_ghcb)
512 return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
513
514 vaddr = vaddr & PAGE_MASK;
515 vaddr_end = vaddr + (npages << PAGE_SHIFT);
516
517 while (vaddr < vaddr_end)
518 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
519 }
520
snp_set_memory_shared(unsigned long vaddr,unsigned long npages)521 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
522 {
523 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
524 return;
525
526 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
527 }
528
snp_set_memory_private(unsigned long vaddr,unsigned long npages)529 void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
530 {
531 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
532 return;
533
534 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
535 }
536
snp_accept_memory(phys_addr_t start,phys_addr_t end)537 void snp_accept_memory(phys_addr_t start, phys_addr_t end)
538 {
539 unsigned long vaddr, npages;
540
541 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
542 return;
543
544 vaddr = (unsigned long)__va(start);
545 npages = (end - start) >> PAGE_SHIFT;
546
547 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
548 }
549
vmgexit_ap_control(u64 event,struct sev_es_save_area * vmsa,u32 apic_id)550 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
551 {
552 bool create = event != SVM_VMGEXIT_AP_DESTROY;
553 struct ghcb_state state;
554 unsigned long flags;
555 struct ghcb *ghcb;
556 int ret = 0;
557
558 local_irq_save(flags);
559
560 ghcb = __sev_get_ghcb(&state);
561
562 vc_ghcb_invalidate(ghcb);
563
564 if (create)
565 ghcb_set_rax(ghcb, vmsa->sev_features);
566
567 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
568 ghcb_set_sw_exit_info_1(ghcb,
569 ((u64)apic_id << 32) |
570 ((u64)snp_vmpl << 16) |
571 event);
572 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
573
574 sev_es_wr_ghcb_msr(__pa(ghcb));
575 VMGEXIT();
576
577 if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
578 lower_32_bits(ghcb->save.sw_exit_info_1)) {
579 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
580 ret = -EINVAL;
581 }
582
583 __sev_put_ghcb(&state);
584
585 local_irq_restore(flags);
586
587 return ret;
588 }
589
snp_set_vmsa(void * va,void * caa,int apic_id,bool make_vmsa)590 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
591 {
592 int ret;
593
594 if (snp_vmpl) {
595 struct svsm_call call = {};
596 unsigned long flags;
597
598 local_irq_save(flags);
599
600 call.caa = this_cpu_read(svsm_caa);
601 call.rcx = __pa(va);
602
603 if (make_vmsa) {
604 /* Protocol 0, Call ID 2 */
605 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
606 call.rdx = __pa(caa);
607 call.r8 = apic_id;
608 } else {
609 /* Protocol 0, Call ID 3 */
610 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
611 }
612
613 ret = svsm_perform_call_protocol(&call);
614
615 local_irq_restore(flags);
616 } else {
617 /*
618 * If the kernel runs at VMPL0, it can change the VMSA
619 * bit for a page using the RMPADJUST instruction.
620 * However, for the instruction to succeed it must
621 * target the permissions of a lesser privileged (higher
622 * numbered) VMPL level, so use VMPL1.
623 */
624 u64 attrs = 1;
625
626 if (make_vmsa)
627 attrs |= RMPADJUST_VMSA_PAGE_BIT;
628
629 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
630 }
631
632 return ret;
633 }
634
snp_cleanup_vmsa(struct sev_es_save_area * vmsa,int apic_id)635 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
636 {
637 int err;
638
639 err = snp_set_vmsa(vmsa, NULL, apic_id, false);
640 if (err)
641 pr_err("clear VMSA page failed (%u), leaking page\n", err);
642 else
643 free_page((unsigned long)vmsa);
644 }
645
set_pte_enc(pte_t * kpte,int level,void * va)646 static void set_pte_enc(pte_t *kpte, int level, void *va)
647 {
648 struct pte_enc_desc d = {
649 .kpte = kpte,
650 .pte_level = level,
651 .va = va,
652 .encrypt = true
653 };
654
655 prepare_pte_enc(&d);
656 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
657 }
658
unshare_all_memory(void)659 static void unshare_all_memory(void)
660 {
661 unsigned long addr, end, size, ghcb;
662 struct sev_es_runtime_data *data;
663 unsigned int npages, level;
664 bool skipped_addr;
665 pte_t *pte;
666 int cpu;
667
668 /* Unshare the direct mapping. */
669 addr = PAGE_OFFSET;
670 end = PAGE_OFFSET + get_max_mapped();
671
672 while (addr < end) {
673 pte = lookup_address(addr, &level);
674 size = page_level_size(level);
675 npages = size / PAGE_SIZE;
676 skipped_addr = false;
677
678 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
679 addr += size;
680 continue;
681 }
682
683 /*
684 * Ensure that all the per-CPU GHCBs are made private at the
685 * end of the unsharing loop so that the switch to the slower
686 * MSR protocol happens last.
687 */
688 for_each_possible_cpu(cpu) {
689 data = per_cpu(runtime_data, cpu);
690 ghcb = (unsigned long)&data->ghcb_page;
691
692 /* Handle the case of a huge page containing the GHCB page */
693 if (addr <= ghcb && ghcb < addr + size) {
694 skipped_addr = true;
695 break;
696 }
697 }
698
699 if (!skipped_addr) {
700 set_pte_enc(pte, level, (void *)addr);
701 snp_set_memory_private(addr, npages);
702 }
703 addr += size;
704 }
705
706 /* Unshare all bss decrypted memory. */
707 addr = (unsigned long)__start_bss_decrypted;
708 end = (unsigned long)__start_bss_decrypted_unused;
709 npages = (end - addr) >> PAGE_SHIFT;
710
711 for (; addr < end; addr += PAGE_SIZE) {
712 pte = lookup_address(addr, &level);
713 if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
714 continue;
715
716 set_pte_enc(pte, level, (void *)addr);
717 }
718 addr = (unsigned long)__start_bss_decrypted;
719 snp_set_memory_private(addr, npages);
720
721 __flush_tlb_all();
722 }
723
724 /* Stop new private<->shared conversions */
snp_kexec_begin(void)725 void snp_kexec_begin(void)
726 {
727 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
728 return;
729
730 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
731 return;
732
733 /*
734 * Crash kernel ends up here with interrupts disabled: can't wait for
735 * conversions to finish.
736 *
737 * If race happened, just report and proceed.
738 */
739 if (!set_memory_enc_stop_conversion())
740 pr_warn("Failed to stop shared<->private conversions\n");
741 }
742
743 /*
744 * Shutdown all APs except the one handling kexec/kdump and clearing
745 * the VMSA tag on AP's VMSA pages as they are not being used as
746 * VMSA page anymore.
747 */
shutdown_all_aps(void)748 static void shutdown_all_aps(void)
749 {
750 struct sev_es_save_area *vmsa;
751 int apic_id, this_cpu, cpu;
752
753 this_cpu = get_cpu();
754
755 /*
756 * APs are already in HLT loop when enc_kexec_finish() callback
757 * is invoked.
758 */
759 for_each_present_cpu(cpu) {
760 vmsa = per_cpu(sev_vmsa, cpu);
761
762 /*
763 * The BSP or offlined APs do not have guest allocated VMSA
764 * and there is no need to clear the VMSA tag for this page.
765 */
766 if (!vmsa)
767 continue;
768
769 /*
770 * Cannot clear the VMSA tag for the currently running vCPU.
771 */
772 if (this_cpu == cpu) {
773 unsigned long pa;
774 struct page *p;
775
776 pa = __pa(vmsa);
777 /*
778 * Mark the VMSA page of the running vCPU as offline
779 * so that is excluded and not touched by makedumpfile
780 * while generating vmcore during kdump.
781 */
782 p = pfn_to_online_page(pa >> PAGE_SHIFT);
783 if (p)
784 __SetPageOffline(p);
785 continue;
786 }
787
788 apic_id = cpuid_to_apicid[cpu];
789
790 /*
791 * Issue AP destroy to ensure AP gets kicked out of guest mode
792 * to allow using RMPADJUST to remove the VMSA tag on it's
793 * VMSA page.
794 */
795 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
796 snp_cleanup_vmsa(vmsa, apic_id);
797 }
798
799 put_cpu();
800 }
801
snp_kexec_finish(void)802 void snp_kexec_finish(void)
803 {
804 struct sev_es_runtime_data *data;
805 unsigned long size, addr;
806 unsigned int level, cpu;
807 struct ghcb *ghcb;
808 pte_t *pte;
809
810 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
811 return;
812
813 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
814 return;
815
816 shutdown_all_aps();
817
818 unshare_all_memory();
819
820 /*
821 * Switch to using the MSR protocol to change per-CPU GHCBs to
822 * private. All the per-CPU GHCBs have been switched back to private,
823 * so can't do any more GHCB calls to the hypervisor beyond this point
824 * until the kexec'ed kernel starts running.
825 */
826 boot_ghcb = NULL;
827 sev_cfg.ghcbs_initialized = false;
828
829 for_each_possible_cpu(cpu) {
830 data = per_cpu(runtime_data, cpu);
831 ghcb = &data->ghcb_page;
832 pte = lookup_address((unsigned long)ghcb, &level);
833 size = page_level_size(level);
834 /* Handle the case of a huge page containing the GHCB page */
835 addr = (unsigned long)ghcb & page_level_mask(level);
836 set_pte_enc(pte, level, (void *)addr);
837 snp_set_memory_private(addr, (size / PAGE_SIZE));
838 }
839 }
840
841 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
842 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
843 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
844
845 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
846 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
847
snp_alloc_vmsa_page(int cpu)848 static void *snp_alloc_vmsa_page(int cpu)
849 {
850 struct page *p;
851
852 /*
853 * Allocate VMSA page to work around the SNP erratum where the CPU will
854 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
855 * collides with the RMP entry of VMSA page. The recommended workaround
856 * is to not use a large page.
857 *
858 * Allocate an 8k page which is also 8k-aligned.
859 */
860 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
861 if (!p)
862 return NULL;
863
864 split_page(p, 1);
865
866 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
867 __free_page(p);
868
869 return page_address(p + 1);
870 }
871
wakeup_cpu_via_vmgexit(u32 apic_id,unsigned long start_ip,unsigned int cpu)872 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
873 {
874 struct sev_es_save_area *cur_vmsa, *vmsa;
875 struct svsm_ca *caa;
876 u8 sipi_vector;
877 int ret;
878 u64 cr4;
879
880 /*
881 * The hypervisor SNP feature support check has happened earlier, just check
882 * the AP_CREATION one here.
883 */
884 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
885 return -EOPNOTSUPP;
886
887 /*
888 * Verify the desired start IP against the known trampoline start IP
889 * to catch any future new trampolines that may be introduced that
890 * would require a new protected guest entry point.
891 */
892 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
893 "Unsupported SNP start_ip: %lx\n", start_ip))
894 return -EINVAL;
895
896 /* Override start_ip with known protected guest start IP */
897 start_ip = real_mode_header->sev_es_trampoline_start;
898 cur_vmsa = per_cpu(sev_vmsa, cpu);
899
900 /*
901 * A new VMSA is created each time because there is no guarantee that
902 * the current VMSA is the kernels or that the vCPU is not running. If
903 * an attempt was done to use the current VMSA with a running vCPU, a
904 * #VMEXIT of that vCPU would wipe out all of the settings being done
905 * here.
906 */
907 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
908 if (!vmsa)
909 return -ENOMEM;
910
911 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
912 caa = per_cpu(svsm_caa, cpu);
913
914 /* CR4 should maintain the MCE value */
915 cr4 = native_read_cr4() & X86_CR4_MCE;
916
917 /* Set the CS value based on the start_ip converted to a SIPI vector */
918 sipi_vector = (start_ip >> 12);
919 vmsa->cs.base = sipi_vector << 12;
920 vmsa->cs.limit = AP_INIT_CS_LIMIT;
921 vmsa->cs.attrib = INIT_CS_ATTRIBS;
922 vmsa->cs.selector = sipi_vector << 8;
923
924 /* Set the RIP value based on start_ip */
925 vmsa->rip = start_ip & 0xfff;
926
927 /* Set AP INIT defaults as documented in the APM */
928 vmsa->ds.limit = AP_INIT_DS_LIMIT;
929 vmsa->ds.attrib = INIT_DS_ATTRIBS;
930 vmsa->es = vmsa->ds;
931 vmsa->fs = vmsa->ds;
932 vmsa->gs = vmsa->ds;
933 vmsa->ss = vmsa->ds;
934
935 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
936 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
937 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
938 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
939 vmsa->tr.limit = AP_INIT_TR_LIMIT;
940 vmsa->tr.attrib = INIT_TR_ATTRIBS;
941
942 vmsa->cr4 = cr4;
943 vmsa->cr0 = AP_INIT_CR0_DEFAULT;
944 vmsa->dr7 = DR7_RESET_VALUE;
945 vmsa->dr6 = AP_INIT_DR6_DEFAULT;
946 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
947 vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
948 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
949 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
950 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
951 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
952
953 /* SVME must be set. */
954 vmsa->efer = EFER_SVME;
955
956 /*
957 * Set the SNP-specific fields for this VMSA:
958 * VMPL level
959 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
960 */
961 vmsa->vmpl = snp_vmpl;
962 vmsa->sev_features = sev_status >> 2;
963
964 /* Populate AP's TSC scale/offset to get accurate TSC values. */
965 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
966 vmsa->tsc_scale = snp_tsc_scale;
967 vmsa->tsc_offset = snp_tsc_offset;
968 }
969
970 /* Switch the page over to a VMSA page now that it is initialized */
971 ret = snp_set_vmsa(vmsa, caa, apic_id, true);
972 if (ret) {
973 pr_err("set VMSA page failed (%u)\n", ret);
974 free_page((unsigned long)vmsa);
975
976 return -EINVAL;
977 }
978
979 /* Issue VMGEXIT AP Creation NAE event */
980 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
981 if (ret) {
982 snp_cleanup_vmsa(vmsa, apic_id);
983 vmsa = NULL;
984 }
985
986 /* Free up any previous VMSA page */
987 if (cur_vmsa)
988 snp_cleanup_vmsa(cur_vmsa, apic_id);
989
990 /* Record the current VMSA page */
991 per_cpu(sev_vmsa, cpu) = vmsa;
992
993 return ret;
994 }
995
snp_set_wakeup_secondary_cpu(void)996 void __init snp_set_wakeup_secondary_cpu(void)
997 {
998 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
999 return;
1000
1001 /*
1002 * Always set this override if SNP is enabled. This makes it the
1003 * required method to start APs under SNP. If the hypervisor does
1004 * not support AP creation, then no APs will be started.
1005 */
1006 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
1007 }
1008
sev_es_setup_ap_jump_table(struct real_mode_header * rmh)1009 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
1010 {
1011 u16 startup_cs, startup_ip;
1012 phys_addr_t jump_table_pa;
1013 u64 jump_table_addr;
1014 u16 __iomem *jump_table;
1015
1016 jump_table_addr = get_jump_table_addr();
1017
1018 /* On UP guests there is no jump table so this is not a failure */
1019 if (!jump_table_addr)
1020 return 0;
1021
1022 /* Check if AP Jump Table is page-aligned */
1023 if (jump_table_addr & ~PAGE_MASK)
1024 return -EINVAL;
1025
1026 jump_table_pa = jump_table_addr & PAGE_MASK;
1027
1028 startup_cs = (u16)(rmh->trampoline_start >> 4);
1029 startup_ip = (u16)(rmh->sev_es_trampoline_start -
1030 rmh->trampoline_start);
1031
1032 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
1033 if (!jump_table)
1034 return -EIO;
1035
1036 writew(startup_ip, &jump_table[0]);
1037 writew(startup_cs, &jump_table[1]);
1038
1039 iounmap(jump_table);
1040
1041 return 0;
1042 }
1043
1044 /*
1045 * This is needed by the OVMF UEFI firmware which will use whatever it finds in
1046 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
1047 * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
1048 */
sev_es_efi_map_ghcbs(pgd_t * pgd)1049 int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
1050 {
1051 struct sev_es_runtime_data *data;
1052 unsigned long address, pflags;
1053 int cpu;
1054 u64 pfn;
1055
1056 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1057 return 0;
1058
1059 pflags = _PAGE_NX | _PAGE_RW;
1060
1061 for_each_possible_cpu(cpu) {
1062 data = per_cpu(runtime_data, cpu);
1063
1064 address = __pa(&data->ghcb_page);
1065 pfn = address >> PAGE_SHIFT;
1066
1067 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
1068 return 1;
1069 }
1070
1071 return 0;
1072 }
1073
snp_register_per_cpu_ghcb(void)1074 static void snp_register_per_cpu_ghcb(void)
1075 {
1076 struct sev_es_runtime_data *data;
1077 struct ghcb *ghcb;
1078
1079 data = this_cpu_read(runtime_data);
1080 ghcb = &data->ghcb_page;
1081
1082 snp_register_ghcb_early(__pa(ghcb));
1083 }
1084
setup_ghcb(void)1085 void setup_ghcb(void)
1086 {
1087 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1088 return;
1089
1090 /*
1091 * Check whether the runtime #VC exception handler is active. It uses
1092 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
1093 *
1094 * If SNP is active, register the per-CPU GHCB page so that the runtime
1095 * exception handler can use it.
1096 */
1097 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
1098 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1099 snp_register_per_cpu_ghcb();
1100
1101 sev_cfg.ghcbs_initialized = true;
1102
1103 return;
1104 }
1105
1106 /*
1107 * Make sure the hypervisor talks a supported protocol.
1108 * This gets called only in the BSP boot phase.
1109 */
1110 if (!sev_es_negotiate_protocol())
1111 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1112
1113 /*
1114 * Clear the boot_ghcb. The first exception comes in before the bss
1115 * section is cleared.
1116 */
1117 memset(&boot_ghcb_page, 0, PAGE_SIZE);
1118
1119 /* Alright - Make the boot-ghcb public */
1120 boot_ghcb = &boot_ghcb_page;
1121
1122 /* SNP guest requires that GHCB GPA must be registered. */
1123 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1124 snp_register_ghcb_early(__pa(&boot_ghcb_page));
1125 }
1126
1127 #ifdef CONFIG_HOTPLUG_CPU
sev_es_ap_hlt_loop(void)1128 static void sev_es_ap_hlt_loop(void)
1129 {
1130 struct ghcb_state state;
1131 struct ghcb *ghcb;
1132
1133 ghcb = __sev_get_ghcb(&state);
1134
1135 while (true) {
1136 vc_ghcb_invalidate(ghcb);
1137 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
1138 ghcb_set_sw_exit_info_1(ghcb, 0);
1139 ghcb_set_sw_exit_info_2(ghcb, 0);
1140
1141 sev_es_wr_ghcb_msr(__pa(ghcb));
1142 VMGEXIT();
1143
1144 /* Wakeup signal? */
1145 if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
1146 ghcb->save.sw_exit_info_2)
1147 break;
1148 }
1149
1150 __sev_put_ghcb(&state);
1151 }
1152
1153 /*
1154 * Play_dead handler when running under SEV-ES. This is needed because
1155 * the hypervisor can't deliver an SIPI request to restart the AP.
1156 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
1157 * hypervisor wakes it up again.
1158 */
sev_es_play_dead(void)1159 static void sev_es_play_dead(void)
1160 {
1161 play_dead_common();
1162
1163 /* IRQs now disabled */
1164
1165 sev_es_ap_hlt_loop();
1166
1167 /*
1168 * If we get here, the VCPU was woken up again. Jump to CPU
1169 * startup code to get it back online.
1170 */
1171 soft_restart_cpu();
1172 }
1173 #else /* CONFIG_HOTPLUG_CPU */
1174 #define sev_es_play_dead native_play_dead
1175 #endif /* CONFIG_HOTPLUG_CPU */
1176
1177 #ifdef CONFIG_SMP
sev_es_setup_play_dead(void)1178 static void __init sev_es_setup_play_dead(void)
1179 {
1180 smp_ops.play_dead = sev_es_play_dead;
1181 }
1182 #else
sev_es_setup_play_dead(void)1183 static inline void sev_es_setup_play_dead(void) { }
1184 #endif
1185
alloc_runtime_data(int cpu)1186 static void __init alloc_runtime_data(int cpu)
1187 {
1188 struct sev_es_runtime_data *data;
1189
1190 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
1191 if (!data)
1192 panic("Can't allocate SEV-ES runtime data");
1193
1194 per_cpu(runtime_data, cpu) = data;
1195
1196 if (snp_vmpl) {
1197 struct svsm_ca *caa;
1198
1199 /* Allocate the SVSM CA page if an SVSM is present */
1200 caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE);
1201
1202 per_cpu(svsm_caa, cpu) = caa;
1203 per_cpu(svsm_caa_pa, cpu) = __pa(caa);
1204 }
1205 }
1206
init_ghcb(int cpu)1207 static void __init init_ghcb(int cpu)
1208 {
1209 struct sev_es_runtime_data *data;
1210 int err;
1211
1212 data = per_cpu(runtime_data, cpu);
1213
1214 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
1215 sizeof(data->ghcb_page));
1216 if (err)
1217 panic("Can't map GHCBs unencrypted");
1218
1219 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
1220
1221 data->ghcb_active = false;
1222 data->backup_ghcb_active = false;
1223 }
1224
sev_es_init_vc_handling(void)1225 void __init sev_es_init_vc_handling(void)
1226 {
1227 int cpu;
1228
1229 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
1230
1231 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1232 return;
1233
1234 if (!sev_es_check_cpu_features())
1235 panic("SEV-ES CPU Features missing");
1236
1237 /*
1238 * SNP is supported in v2 of the GHCB spec which mandates support for HV
1239 * features.
1240 */
1241 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
1242 sev_hv_features = get_hv_features();
1243
1244 if (!(sev_hv_features & GHCB_HV_FT_SNP))
1245 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
1246 }
1247
1248 /* Initialize per-cpu GHCB pages */
1249 for_each_possible_cpu(cpu) {
1250 alloc_runtime_data(cpu);
1251 init_ghcb(cpu);
1252 }
1253
1254 /* If running under an SVSM, switch to the per-cpu CA */
1255 if (snp_vmpl) {
1256 struct svsm_call call = {};
1257 unsigned long flags;
1258 int ret;
1259
1260 local_irq_save(flags);
1261
1262 /*
1263 * SVSM_CORE_REMAP_CA call:
1264 * RAX = 0 (Protocol=0, CallID=0)
1265 * RCX = New CA GPA
1266 */
1267 call.caa = svsm_get_caa();
1268 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
1269 call.rcx = this_cpu_read(svsm_caa_pa);
1270 ret = svsm_perform_call_protocol(&call);
1271 if (ret)
1272 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
1273 ret, call.rax_out);
1274
1275 sev_cfg.use_cas = true;
1276
1277 local_irq_restore(flags);
1278 }
1279
1280 sev_es_setup_play_dead();
1281
1282 /* Secondary CPUs use the runtime #VC handler */
1283 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
1284 }
1285
1286 /*
1287 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
1288 * enabled, as the alternative (fallback) logic for DMI probing in the legacy
1289 * ROM region can cause a crash since this region is not pre-validated.
1290 */
snp_dmi_setup(void)1291 void __init snp_dmi_setup(void)
1292 {
1293 if (efi_enabled(EFI_CONFIG_TABLES))
1294 dmi_setup();
1295 }
1296
dump_cpuid_table(void)1297 static void dump_cpuid_table(void)
1298 {
1299 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1300 int i = 0;
1301
1302 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
1303 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
1304
1305 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
1306 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
1307
1308 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
1309 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
1310 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
1311 }
1312 }
1313
1314 /*
1315 * It is useful from an auditing/testing perspective to provide an easy way
1316 * for the guest owner to know that the CPUID table has been initialized as
1317 * expected, but that initialization happens too early in boot to print any
1318 * sort of indicator, and there's not really any other good place to do it,
1319 * so do it here.
1320 *
1321 * If running as an SNP guest, report the current VM privilege level (VMPL).
1322 */
report_snp_info(void)1323 static int __init report_snp_info(void)
1324 {
1325 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1326
1327 if (cpuid_table->count) {
1328 pr_info("Using SNP CPUID table, %d entries present.\n",
1329 cpuid_table->count);
1330
1331 if (sev_cfg.debug)
1332 dump_cpuid_table();
1333 }
1334
1335 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1336 pr_info("SNP running at VMPL%u.\n", snp_vmpl);
1337
1338 return 0;
1339 }
1340 arch_initcall(report_snp_info);
1341
update_attest_input(struct svsm_call * call,struct svsm_attest_call * input)1342 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
1343 {
1344 /* If (new) lengths have been returned, propagate them up */
1345 if (call->rcx_out != call->rcx)
1346 input->manifest_buf.len = call->rcx_out;
1347
1348 if (call->rdx_out != call->rdx)
1349 input->certificates_buf.len = call->rdx_out;
1350
1351 if (call->r8_out != call->r8)
1352 input->report_buf.len = call->r8_out;
1353 }
1354
snp_issue_svsm_attest_req(u64 call_id,struct svsm_call * call,struct svsm_attest_call * input)1355 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
1356 struct svsm_attest_call *input)
1357 {
1358 struct svsm_attest_call *ac;
1359 unsigned long flags;
1360 u64 attest_call_pa;
1361 int ret;
1362
1363 if (!snp_vmpl)
1364 return -EINVAL;
1365
1366 local_irq_save(flags);
1367
1368 call->caa = svsm_get_caa();
1369
1370 ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
1371 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
1372
1373 *ac = *input;
1374
1375 /*
1376 * Set input registers for the request and set RDX and R8 to known
1377 * values in order to detect length values being returned in them.
1378 */
1379 call->rax = call_id;
1380 call->rcx = attest_call_pa;
1381 call->rdx = -1;
1382 call->r8 = -1;
1383 ret = svsm_perform_call_protocol(call);
1384 update_attest_input(call, input);
1385
1386 local_irq_restore(flags);
1387
1388 return ret;
1389 }
1390 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
1391
snp_issue_guest_request(struct snp_guest_req * req,struct snp_req_data * input,struct snp_guest_request_ioctl * rio)1392 static int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input,
1393 struct snp_guest_request_ioctl *rio)
1394 {
1395 struct ghcb_state state;
1396 struct es_em_ctxt ctxt;
1397 unsigned long flags;
1398 struct ghcb *ghcb;
1399 int ret;
1400
1401 rio->exitinfo2 = SEV_RET_NO_FW_CALL;
1402
1403 /*
1404 * __sev_get_ghcb() needs to run with IRQs disabled because it is using
1405 * a per-CPU GHCB.
1406 */
1407 local_irq_save(flags);
1408
1409 ghcb = __sev_get_ghcb(&state);
1410 if (!ghcb) {
1411 ret = -EIO;
1412 goto e_restore_irq;
1413 }
1414
1415 vc_ghcb_invalidate(ghcb);
1416
1417 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1418 ghcb_set_rax(ghcb, input->data_gpa);
1419 ghcb_set_rbx(ghcb, input->data_npages);
1420 }
1421
1422 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
1423 if (ret)
1424 goto e_put;
1425
1426 rio->exitinfo2 = ghcb->save.sw_exit_info_2;
1427 switch (rio->exitinfo2) {
1428 case 0:
1429 break;
1430
1431 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
1432 ret = -EAGAIN;
1433 break;
1434
1435 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
1436 /* Number of expected pages are returned in RBX */
1437 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1438 input->data_npages = ghcb_get_rbx(ghcb);
1439 ret = -ENOSPC;
1440 break;
1441 }
1442 fallthrough;
1443 default:
1444 ret = -EIO;
1445 break;
1446 }
1447
1448 e_put:
1449 __sev_put_ghcb(&state);
1450 e_restore_irq:
1451 local_irq_restore(flags);
1452
1453 return ret;
1454 }
1455
1456 /**
1457 * snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device
1458 *
1459 * Check that there is SVSM and that it supports at least TPM_SEND_COMMAND
1460 * which is the only request used so far.
1461 *
1462 * Return: true if the platform provides a vTPM SVSM device, false otherwise.
1463 */
snp_svsm_vtpm_probe(void)1464 static bool snp_svsm_vtpm_probe(void)
1465 {
1466 struct svsm_call call = {};
1467
1468 /* The vTPM device is available only if a SVSM is present */
1469 if (!snp_vmpl)
1470 return false;
1471
1472 call.caa = svsm_get_caa();
1473 call.rax = SVSM_VTPM_CALL(SVSM_VTPM_QUERY);
1474
1475 if (svsm_perform_call_protocol(&call))
1476 return false;
1477
1478 /* Check platform commands contains TPM_SEND_COMMAND - platform command 8 */
1479 return call.rcx_out & BIT_ULL(8);
1480 }
1481
1482 /**
1483 * snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM
1484 * @buffer: A buffer used to both send the command and receive the response.
1485 *
1486 * Execute a SVSM_VTPM_CMD call as defined by
1487 * "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00
1488 *
1489 * All command request/response buffers have a common structure as specified by
1490 * the following table:
1491 * Byte Size In/Out Description
1492 * Offset (Bytes)
1493 * 0x000 4 In Platform command
1494 * Out Platform command response size
1495 *
1496 * Each command can build upon this common request/response structure to create
1497 * a structure specific to the command. See include/linux/tpm_svsm.h for more
1498 * details.
1499 *
1500 * Return: 0 on success, -errno on failure
1501 */
snp_svsm_vtpm_send_command(u8 * buffer)1502 int snp_svsm_vtpm_send_command(u8 *buffer)
1503 {
1504 struct svsm_call call = {};
1505
1506 call.caa = svsm_get_caa();
1507 call.rax = SVSM_VTPM_CALL(SVSM_VTPM_CMD);
1508 call.rcx = __pa(buffer);
1509
1510 return svsm_perform_call_protocol(&call);
1511 }
1512 EXPORT_SYMBOL_GPL(snp_svsm_vtpm_send_command);
1513
1514 static struct platform_device sev_guest_device = {
1515 .name = "sev-guest",
1516 .id = -1,
1517 };
1518
1519 static struct platform_device tpm_svsm_device = {
1520 .name = "tpm-svsm",
1521 .id = -1,
1522 };
1523
snp_init_platform_device(void)1524 static int __init snp_init_platform_device(void)
1525 {
1526 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1527 return -ENODEV;
1528
1529 if (platform_device_register(&sev_guest_device))
1530 return -ENODEV;
1531
1532 if (snp_svsm_vtpm_probe() &&
1533 platform_device_register(&tpm_svsm_device))
1534 return -ENODEV;
1535
1536 pr_info("SNP guest platform devices initialized.\n");
1537 return 0;
1538 }
1539 device_initcall(snp_init_platform_device);
1540
sev_show_status(void)1541 void sev_show_status(void)
1542 {
1543 int i;
1544
1545 pr_info("Status: ");
1546 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
1547 if (sev_status & BIT_ULL(i)) {
1548 if (!sev_status_feat_names[i])
1549 continue;
1550
1551 pr_cont("%s ", sev_status_feat_names[i]);
1552 }
1553 }
1554 pr_cont("\n");
1555 }
1556
snp_update_svsm_ca(void)1557 void __init snp_update_svsm_ca(void)
1558 {
1559 if (!snp_vmpl)
1560 return;
1561
1562 /* Update the CAA to a proper kernel address */
1563 boot_svsm_caa = &boot_svsm_ca_page;
1564 }
1565
1566 #ifdef CONFIG_SYSFS
vmpl_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1567 static ssize_t vmpl_show(struct kobject *kobj,
1568 struct kobj_attribute *attr, char *buf)
1569 {
1570 return sysfs_emit(buf, "%d\n", snp_vmpl);
1571 }
1572
1573 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
1574
1575 static struct attribute *vmpl_attrs[] = {
1576 &vmpl_attr.attr,
1577 NULL
1578 };
1579
1580 static struct attribute_group sev_attr_group = {
1581 .attrs = vmpl_attrs,
1582 };
1583
sev_sysfs_init(void)1584 static int __init sev_sysfs_init(void)
1585 {
1586 struct kobject *sev_kobj;
1587 struct device *dev_root;
1588 int ret;
1589
1590 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1591 return -ENODEV;
1592
1593 dev_root = bus_get_dev_root(&cpu_subsys);
1594 if (!dev_root)
1595 return -ENODEV;
1596
1597 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
1598 put_device(dev_root);
1599
1600 if (!sev_kobj)
1601 return -ENOMEM;
1602
1603 ret = sysfs_create_group(sev_kobj, &sev_attr_group);
1604 if (ret)
1605 kobject_put(sev_kobj);
1606
1607 return ret;
1608 }
1609 arch_initcall(sev_sysfs_init);
1610 #endif // CONFIG_SYSFS
1611
free_shared_pages(void * buf,size_t sz)1612 static void free_shared_pages(void *buf, size_t sz)
1613 {
1614 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1615 int ret;
1616
1617 if (!buf)
1618 return;
1619
1620 ret = set_memory_encrypted((unsigned long)buf, npages);
1621 if (ret) {
1622 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
1623 return;
1624 }
1625
1626 __free_pages(virt_to_page(buf), get_order(sz));
1627 }
1628
alloc_shared_pages(size_t sz)1629 static void *alloc_shared_pages(size_t sz)
1630 {
1631 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1632 struct page *page;
1633 int ret;
1634
1635 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
1636 if (!page)
1637 return NULL;
1638
1639 ret = set_memory_decrypted((unsigned long)page_address(page), npages);
1640 if (ret) {
1641 pr_err("failed to mark page shared, ret=%d\n", ret);
1642 __free_pages(page, get_order(sz));
1643 return NULL;
1644 }
1645
1646 return page_address(page);
1647 }
1648
get_vmpck(int id,struct snp_secrets_page * secrets,u32 ** seqno)1649 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
1650 {
1651 u8 *key = NULL;
1652
1653 switch (id) {
1654 case 0:
1655 *seqno = &secrets->os_area.msg_seqno_0;
1656 key = secrets->vmpck0;
1657 break;
1658 case 1:
1659 *seqno = &secrets->os_area.msg_seqno_1;
1660 key = secrets->vmpck1;
1661 break;
1662 case 2:
1663 *seqno = &secrets->os_area.msg_seqno_2;
1664 key = secrets->vmpck2;
1665 break;
1666 case 3:
1667 *seqno = &secrets->os_area.msg_seqno_3;
1668 key = secrets->vmpck3;
1669 break;
1670 default:
1671 break;
1672 }
1673
1674 return key;
1675 }
1676
snp_init_crypto(u8 * key,size_t keylen)1677 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
1678 {
1679 struct aesgcm_ctx *ctx;
1680
1681 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1682 if (!ctx)
1683 return NULL;
1684
1685 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
1686 pr_err("Crypto context initialization failed\n");
1687 kfree(ctx);
1688 return NULL;
1689 }
1690
1691 return ctx;
1692 }
1693
snp_msg_init(struct snp_msg_desc * mdesc,int vmpck_id)1694 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
1695 {
1696 /* Adjust the default VMPCK key based on the executing VMPL level */
1697 if (vmpck_id == -1)
1698 vmpck_id = snp_vmpl;
1699
1700 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
1701 if (!mdesc->vmpck) {
1702 pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
1703 return -EINVAL;
1704 }
1705
1706 /* Verify that VMPCK is not zero. */
1707 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1708 pr_err("Empty VMPCK%d communication key\n", vmpck_id);
1709 return -EINVAL;
1710 }
1711
1712 mdesc->vmpck_id = vmpck_id;
1713
1714 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
1715 if (!mdesc->ctx)
1716 return -ENOMEM;
1717
1718 return 0;
1719 }
1720 EXPORT_SYMBOL_GPL(snp_msg_init);
1721
snp_msg_alloc(void)1722 struct snp_msg_desc *snp_msg_alloc(void)
1723 {
1724 struct snp_msg_desc *mdesc;
1725 void __iomem *mem;
1726
1727 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);
1728
1729 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL);
1730 if (!mdesc)
1731 return ERR_PTR(-ENOMEM);
1732
1733 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
1734 if (!mem)
1735 goto e_free_mdesc;
1736
1737 mdesc->secrets = (__force struct snp_secrets_page *)mem;
1738
1739 /* Allocate the shared page used for the request and response message. */
1740 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
1741 if (!mdesc->request)
1742 goto e_unmap;
1743
1744 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
1745 if (!mdesc->response)
1746 goto e_free_request;
1747
1748 return mdesc;
1749
1750 e_free_request:
1751 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1752 e_unmap:
1753 iounmap(mem);
1754 e_free_mdesc:
1755 kfree(mdesc);
1756
1757 return ERR_PTR(-ENOMEM);
1758 }
1759 EXPORT_SYMBOL_GPL(snp_msg_alloc);
1760
snp_msg_free(struct snp_msg_desc * mdesc)1761 void snp_msg_free(struct snp_msg_desc *mdesc)
1762 {
1763 if (!mdesc)
1764 return;
1765
1766 kfree(mdesc->ctx);
1767 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
1768 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1769 iounmap((__force void __iomem *)mdesc->secrets);
1770
1771 memset(mdesc, 0, sizeof(*mdesc));
1772 kfree(mdesc);
1773 }
1774 EXPORT_SYMBOL_GPL(snp_msg_free);
1775
1776 /* Mutex to serialize the shared buffer access and command handling. */
1777 static DEFINE_MUTEX(snp_cmd_mutex);
1778
1779 /*
1780 * If an error is received from the host or AMD Secure Processor (ASP) there
1781 * are two options. Either retry the exact same encrypted request or discontinue
1782 * using the VMPCK.
1783 *
1784 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
1785 * encrypt the requests. The IV for this scheme is the sequence number. GCM
1786 * cannot tolerate IV reuse.
1787 *
1788 * The ASP FW v1.51 only increments the sequence numbers on a successful
1789 * guest<->ASP back and forth and only accepts messages at its exact sequence
1790 * number.
1791 *
1792 * So if the sequence number were to be reused the encryption scheme is
1793 * vulnerable. If the sequence number were incremented for a fresh IV the ASP
1794 * will reject the request.
1795 */
snp_disable_vmpck(struct snp_msg_desc * mdesc)1796 static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
1797 {
1798 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
1799 mdesc->vmpck_id);
1800 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
1801 mdesc->vmpck = NULL;
1802 }
1803
__snp_get_msg_seqno(struct snp_msg_desc * mdesc)1804 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1805 {
1806 u64 count;
1807
1808 lockdep_assert_held(&snp_cmd_mutex);
1809
1810 /* Read the current message sequence counter from secrets pages */
1811 count = *mdesc->os_area_msg_seqno;
1812
1813 return count + 1;
1814 }
1815
1816 /* Return a non-zero on success */
snp_get_msg_seqno(struct snp_msg_desc * mdesc)1817 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
1818 {
1819 u64 count = __snp_get_msg_seqno(mdesc);
1820
1821 /*
1822 * The message sequence counter for the SNP guest request is a 64-bit
1823 * value but the version 2 of GHCB specification defines a 32-bit storage
1824 * for it. If the counter exceeds the 32-bit value then return zero.
1825 * The caller should check the return value, but if the caller happens to
1826 * not check the value and use it, then the firmware treats zero as an
1827 * invalid number and will fail the message request.
1828 */
1829 if (count >= UINT_MAX) {
1830 pr_err("request message sequence counter overflow\n");
1831 return 0;
1832 }
1833
1834 return count;
1835 }
1836
snp_inc_msg_seqno(struct snp_msg_desc * mdesc)1837 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
1838 {
1839 /*
1840 * The counter is also incremented by the PSP, so increment it by 2
1841 * and save in secrets page.
1842 */
1843 *mdesc->os_area_msg_seqno += 2;
1844 }
1845
verify_and_dec_payload(struct snp_msg_desc * mdesc,struct snp_guest_req * req)1846 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
1847 {
1848 struct snp_guest_msg *resp_msg = &mdesc->secret_response;
1849 struct snp_guest_msg *req_msg = &mdesc->secret_request;
1850 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
1851 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
1852 struct aesgcm_ctx *ctx = mdesc->ctx;
1853 u8 iv[GCM_AES_IV_SIZE] = {};
1854
1855 pr_debug("response [seqno %lld type %d version %d sz %d]\n",
1856 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
1857 resp_msg_hdr->msg_sz);
1858
1859 /* Copy response from shared memory to encrypted memory. */
1860 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
1861
1862 /* Verify that the sequence counter is incremented by 1 */
1863 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
1864 return -EBADMSG;
1865
1866 /* Verify response message type and version number. */
1867 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
1868 resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
1869 return -EBADMSG;
1870
1871 /*
1872 * If the message size is greater than our buffer length then return
1873 * an error.
1874 */
1875 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
1876 return -EBADMSG;
1877
1878 /* Decrypt the payload */
1879 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
1880 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
1881 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
1882 return -EBADMSG;
1883
1884 return 0;
1885 }
1886
enc_payload(struct snp_msg_desc * mdesc,u64 seqno,struct snp_guest_req * req)1887 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
1888 {
1889 struct snp_guest_msg *msg = &mdesc->secret_request;
1890 struct snp_guest_msg_hdr *hdr = &msg->hdr;
1891 struct aesgcm_ctx *ctx = mdesc->ctx;
1892 u8 iv[GCM_AES_IV_SIZE] = {};
1893
1894 memset(msg, 0, sizeof(*msg));
1895
1896 hdr->algo = SNP_AEAD_AES_256_GCM;
1897 hdr->hdr_version = MSG_HDR_VER;
1898 hdr->hdr_sz = sizeof(*hdr);
1899 hdr->msg_type = req->msg_type;
1900 hdr->msg_version = req->msg_version;
1901 hdr->msg_seqno = seqno;
1902 hdr->msg_vmpck = req->vmpck_id;
1903 hdr->msg_sz = req->req_sz;
1904
1905 /* Verify the sequence number is non-zero */
1906 if (!hdr->msg_seqno)
1907 return -ENOSR;
1908
1909 pr_debug("request [seqno %lld type %d version %d sz %d]\n",
1910 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
1911
1912 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
1913 return -EBADMSG;
1914
1915 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
1916 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
1917 AAD_LEN, iv, hdr->authtag);
1918
1919 return 0;
1920 }
1921
__handle_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req,struct snp_guest_request_ioctl * rio)1922 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
1923 struct snp_guest_request_ioctl *rio)
1924 {
1925 unsigned long req_start = jiffies;
1926 unsigned int override_npages = 0;
1927 u64 override_err = 0;
1928 int rc;
1929
1930 retry_request:
1931 /*
1932 * Call firmware to process the request. In this function the encrypted
1933 * message enters shared memory with the host. So after this call the
1934 * sequence number must be incremented or the VMPCK must be deleted to
1935 * prevent reuse of the IV.
1936 */
1937 rc = snp_issue_guest_request(req, &req->input, rio);
1938 switch (rc) {
1939 case -ENOSPC:
1940 /*
1941 * If the extended guest request fails due to having too
1942 * small of a certificate data buffer, retry the same
1943 * guest request without the extended data request in
1944 * order to increment the sequence number and thus avoid
1945 * IV reuse.
1946 */
1947 override_npages = req->input.data_npages;
1948 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
1949
1950 /*
1951 * Override the error to inform callers the given extended
1952 * request buffer size was too small and give the caller the
1953 * required buffer size.
1954 */
1955 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
1956
1957 /*
1958 * If this call to the firmware succeeds, the sequence number can
1959 * be incremented allowing for continued use of the VMPCK. If
1960 * there is an error reflected in the return value, this value
1961 * is checked further down and the result will be the deletion
1962 * of the VMPCK and the error code being propagated back to the
1963 * user as an ioctl() return code.
1964 */
1965 goto retry_request;
1966
1967 /*
1968 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
1969 * throttled. Retry in the driver to avoid returning and reusing the
1970 * message sequence number on a different message.
1971 */
1972 case -EAGAIN:
1973 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
1974 rc = -ETIMEDOUT;
1975 break;
1976 }
1977 schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
1978 goto retry_request;
1979 }
1980
1981 /*
1982 * Increment the message sequence number. There is no harm in doing
1983 * this now because decryption uses the value stored in the response
1984 * structure and any failure will wipe the VMPCK, preventing further
1985 * use anyway.
1986 */
1987 snp_inc_msg_seqno(mdesc);
1988
1989 if (override_err) {
1990 rio->exitinfo2 = override_err;
1991
1992 /*
1993 * If an extended guest request was issued and the supplied certificate
1994 * buffer was not large enough, a standard guest request was issued to
1995 * prevent IV reuse. If the standard request was successful, return -EIO
1996 * back to the caller as would have originally been returned.
1997 */
1998 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
1999 rc = -EIO;
2000 }
2001
2002 if (override_npages)
2003 req->input.data_npages = override_npages;
2004
2005 return rc;
2006 }
2007
snp_send_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req,struct snp_guest_request_ioctl * rio)2008 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
2009 struct snp_guest_request_ioctl *rio)
2010 {
2011 u64 seqno;
2012 int rc;
2013
2014 guard(mutex)(&snp_cmd_mutex);
2015
2016 /* Check if the VMPCK is not empty */
2017 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
2018 pr_err_ratelimited("VMPCK is disabled\n");
2019 return -ENOTTY;
2020 }
2021
2022 /* Get message sequence and verify that its a non-zero */
2023 seqno = snp_get_msg_seqno(mdesc);
2024 if (!seqno)
2025 return -EIO;
2026
2027 /* Clear shared memory's response for the host to populate. */
2028 memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
2029
2030 /* Encrypt the userspace provided payload in mdesc->secret_request. */
2031 rc = enc_payload(mdesc, seqno, req);
2032 if (rc)
2033 return rc;
2034
2035 /*
2036 * Write the fully encrypted request to the shared unencrypted
2037 * request page.
2038 */
2039 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
2040
2041 /* Initialize the input address for guest request */
2042 req->input.req_gpa = __pa(mdesc->request);
2043 req->input.resp_gpa = __pa(mdesc->response);
2044 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
2045
2046 rc = __handle_guest_request(mdesc, req, rio);
2047 if (rc) {
2048 if (rc == -EIO &&
2049 rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2050 return rc;
2051
2052 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
2053 rc, rio->exitinfo2);
2054
2055 snp_disable_vmpck(mdesc);
2056 return rc;
2057 }
2058
2059 rc = verify_and_dec_payload(mdesc, req);
2060 if (rc) {
2061 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
2062 snp_disable_vmpck(mdesc);
2063 return rc;
2064 }
2065
2066 return 0;
2067 }
2068 EXPORT_SYMBOL_GPL(snp_send_guest_request);
2069
snp_get_tsc_info(void)2070 static int __init snp_get_tsc_info(void)
2071 {
2072 struct snp_guest_request_ioctl *rio;
2073 struct snp_tsc_info_resp *tsc_resp;
2074 struct snp_tsc_info_req *tsc_req;
2075 struct snp_msg_desc *mdesc;
2076 struct snp_guest_req *req;
2077 int rc = -ENOMEM;
2078
2079 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL);
2080 if (!tsc_req)
2081 return rc;
2082
2083 /*
2084 * The intermediate response buffer is used while decrypting the
2085 * response payload. Make sure that it has enough space to cover
2086 * the authtag.
2087 */
2088 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
2089 if (!tsc_resp)
2090 goto e_free_tsc_req;
2091
2092 req = kzalloc(sizeof(*req), GFP_KERNEL);
2093 if (!req)
2094 goto e_free_tsc_resp;
2095
2096 rio = kzalloc(sizeof(*rio), GFP_KERNEL);
2097 if (!rio)
2098 goto e_free_req;
2099
2100 mdesc = snp_msg_alloc();
2101 if (IS_ERR_OR_NULL(mdesc))
2102 goto e_free_rio;
2103
2104 rc = snp_msg_init(mdesc, snp_vmpl);
2105 if (rc)
2106 goto e_free_mdesc;
2107
2108 req->msg_version = MSG_HDR_VER;
2109 req->msg_type = SNP_MSG_TSC_INFO_REQ;
2110 req->vmpck_id = snp_vmpl;
2111 req->req_buf = tsc_req;
2112 req->req_sz = sizeof(*tsc_req);
2113 req->resp_buf = (void *)tsc_resp;
2114 req->resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
2115 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
2116
2117 rc = snp_send_guest_request(mdesc, req, rio);
2118 if (rc)
2119 goto e_request;
2120
2121 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
2122 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
2123 tsc_resp->tsc_factor);
2124
2125 if (!tsc_resp->status) {
2126 snp_tsc_scale = tsc_resp->tsc_scale;
2127 snp_tsc_offset = tsc_resp->tsc_offset;
2128 } else {
2129 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
2130 rc = -EIO;
2131 }
2132
2133 e_request:
2134 /* The response buffer contains sensitive data, explicitly clear it. */
2135 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
2136 e_free_mdesc:
2137 snp_msg_free(mdesc);
2138 e_free_rio:
2139 kfree(rio);
2140 e_free_req:
2141 kfree(req);
2142 e_free_tsc_resp:
2143 kfree(tsc_resp);
2144 e_free_tsc_req:
2145 kfree(tsc_req);
2146
2147 return rc;
2148 }
2149
snp_secure_tsc_prepare(void)2150 void __init snp_secure_tsc_prepare(void)
2151 {
2152 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2153 return;
2154
2155 if (snp_get_tsc_info()) {
2156 pr_alert("Unable to retrieve Secure TSC info from ASP\n");
2157 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2158 }
2159
2160 pr_debug("SecureTSC enabled");
2161 }
2162
securetsc_get_tsc_khz(void)2163 static unsigned long securetsc_get_tsc_khz(void)
2164 {
2165 return snp_tsc_freq_khz;
2166 }
2167
snp_secure_tsc_init(void)2168 void __init snp_secure_tsc_init(void)
2169 {
2170 struct snp_secrets_page *secrets;
2171 unsigned long tsc_freq_mhz;
2172 void *mem;
2173
2174 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2175 return;
2176
2177 mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
2178 if (!mem) {
2179 pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
2180 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2181 }
2182
2183 secrets = (__force struct snp_secrets_page *)mem;
2184
2185 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
2186 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
2187
2188 /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
2189 tsc_freq_mhz &= GENMASK_ULL(17, 0);
2190
2191 snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
2192
2193 x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
2194 x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
2195
2196 early_memunmap(mem, PAGE_SIZE);
2197 }
2198