1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * AMD Memory Encryption Support
4 *
5 * Copyright (C) 2019 SUSE
6 *
7 * Author: Joerg Roedel <jroedel@suse.de>
8 */
9
10 #define pr_fmt(fmt) "SEV: " fmt
11
12 #include <linux/sched/debug.h> /* For show_regs() */
13 #include <linux/percpu-defs.h>
14 #include <linux/cc_platform.h>
15 #include <linux/printk.h>
16 #include <linux/mm_types.h>
17 #include <linux/set_memory.h>
18 #include <linux/memblock.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/cpumask.h>
22 #include <linux/efi.h>
23 #include <linux/platform_device.h>
24 #include <linux/io.h>
25 #include <linux/psp-sev.h>
26 #include <linux/dmi.h>
27 #include <uapi/linux/sev-guest.h>
28 #include <crypto/gcm.h>
29
30 #include <asm/init.h>
31 #include <asm/cpu_entry_area.h>
32 #include <asm/stacktrace.h>
33 #include <asm/sev.h>
34 #include <asm/sev-internal.h>
35 #include <asm/insn-eval.h>
36 #include <asm/fpu/xcr.h>
37 #include <asm/processor.h>
38 #include <asm/realmode.h>
39 #include <asm/setup.h>
40 #include <asm/traps.h>
41 #include <asm/svm.h>
42 #include <asm/smp.h>
43 #include <asm/cpu.h>
44 #include <asm/apic.h>
45 #include <asm/cpuid/api.h>
46 #include <asm/cmdline.h>
47 #include <asm/msr.h>
48
49 /* Bitmap of SEV features supported by the hypervisor */
50 u64 sev_hv_features __ro_after_init;
51 SYM_PIC_ALIAS(sev_hv_features);
52
53 /* Secrets page physical address from the CC blob */
54 u64 sev_secrets_pa __ro_after_init;
55 SYM_PIC_ALIAS(sev_secrets_pa);
56
57 /* For early boot SVSM communication */
58 struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
59 SYM_PIC_ALIAS(boot_svsm_ca_page);
60
61 /*
62 * SVSM related information:
63 * During boot, the page tables are set up as identity mapped and later
64 * changed to use kernel virtual addresses. Maintain separate virtual and
65 * physical addresses for the CAA to allow SVSM functions to be used during
66 * early boot, both with identity mapped virtual addresses and proper kernel
67 * virtual addresses.
68 */
69 u64 boot_svsm_caa_pa __ro_after_init;
70 SYM_PIC_ALIAS(boot_svsm_caa_pa);
71
72 DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
73 DEFINE_PER_CPU(u64, svsm_caa_pa);
74
svsm_get_caa(void)75 static inline struct svsm_ca *svsm_get_caa(void)
76 {
77 if (sev_cfg.use_cas)
78 return this_cpu_read(svsm_caa);
79 else
80 return rip_rel_ptr(&boot_svsm_ca_page);
81 }
82
svsm_get_caa_pa(void)83 static inline u64 svsm_get_caa_pa(void)
84 {
85 if (sev_cfg.use_cas)
86 return this_cpu_read(svsm_caa_pa);
87 else
88 return boot_svsm_caa_pa;
89 }
90
91 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */
92 #define AP_INIT_CS_LIMIT 0xffff
93 #define AP_INIT_DS_LIMIT 0xffff
94 #define AP_INIT_LDTR_LIMIT 0xffff
95 #define AP_INIT_GDTR_LIMIT 0xffff
96 #define AP_INIT_IDTR_LIMIT 0xffff
97 #define AP_INIT_TR_LIMIT 0xffff
98 #define AP_INIT_RFLAGS_DEFAULT 0x2
99 #define AP_INIT_DR6_DEFAULT 0xffff0ff0
100 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
101 #define AP_INIT_XCR0_DEFAULT 0x1
102 #define AP_INIT_X87_FTW_DEFAULT 0x5555
103 #define AP_INIT_X87_FCW_DEFAULT 0x0040
104 #define AP_INIT_CR0_DEFAULT 0x60000010
105 #define AP_INIT_MXCSR_DEFAULT 0x1f80
106
107 static const char * const sev_status_feat_names[] = {
108 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV",
109 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES",
110 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP",
111 [MSR_AMD64_SNP_VTOM_BIT] = "vTom",
112 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC",
113 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI",
114 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI",
115 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap",
116 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS",
117 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol",
118 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS",
119 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC",
120 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam",
121 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt",
122 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
123 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
124 [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC",
125 };
126
127 /*
128 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
129 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
130 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
131 */
132 static u64 snp_tsc_scale __ro_after_init;
133 static u64 snp_tsc_offset __ro_after_init;
134 static unsigned long snp_tsc_freq_khz __ro_after_init;
135
136 DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
137 DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
138
139 /*
140 * SVSM related information:
141 * When running under an SVSM, the VMPL that Linux is executing at must be
142 * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
143 */
144 u8 snp_vmpl __ro_after_init;
145 EXPORT_SYMBOL_GPL(snp_vmpl);
146 SYM_PIC_ALIAS(snp_vmpl);
147
148 /*
149 * Since feature negotiation related variables are set early in the boot
150 * process they must reside in the .data section so as not to be zeroed
151 * out when the .bss section is later cleared.
152 *
153 * GHCB protocol version negotiated with the hypervisor.
154 */
155 u16 ghcb_version __ro_after_init;
156 SYM_PIC_ALIAS(ghcb_version);
157
158 /* For early boot hypervisor communication in SEV-ES enabled guests */
159 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
160
161 /*
162 * Needs to be in the .data section because we need it NULL before bss is
163 * cleared
164 */
165 struct ghcb *boot_ghcb __section(".data");
166
get_snp_jump_table_addr(void)167 static u64 __init get_snp_jump_table_addr(void)
168 {
169 struct snp_secrets_page *secrets;
170 void __iomem *mem;
171 u64 addr;
172
173 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
174 if (!mem) {
175 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
176 return 0;
177 }
178
179 secrets = (__force struct snp_secrets_page *)mem;
180
181 addr = secrets->os_area.ap_jump_table_pa;
182 iounmap(mem);
183
184 return addr;
185 }
186
get_jump_table_addr(void)187 static u64 __init get_jump_table_addr(void)
188 {
189 struct ghcb_state state;
190 unsigned long flags;
191 struct ghcb *ghcb;
192 u64 ret = 0;
193
194 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
195 return get_snp_jump_table_addr();
196
197 local_irq_save(flags);
198
199 ghcb = __sev_get_ghcb(&state);
200
201 vc_ghcb_invalidate(ghcb);
202 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
203 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
204 ghcb_set_sw_exit_info_2(ghcb, 0);
205
206 sev_es_wr_ghcb_msr(__pa(ghcb));
207 VMGEXIT();
208
209 if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
210 ghcb_sw_exit_info_2_is_valid(ghcb))
211 ret = ghcb->save.sw_exit_info_2;
212
213 __sev_put_ghcb(&state);
214
215 local_irq_restore(flags);
216
217 return ret;
218 }
219
svsm_perform_ghcb_protocol(struct ghcb * ghcb,struct svsm_call * call)220 static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
221 {
222 struct es_em_ctxt ctxt;
223 u8 pending = 0;
224
225 vc_ghcb_invalidate(ghcb);
226
227 /*
228 * Fill in protocol and format specifiers. This can be called very early
229 * in the boot, so use rip-relative references as needed.
230 */
231 ghcb->protocol_version = ghcb_version;
232 ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
233
234 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
235 ghcb_set_sw_exit_info_1(ghcb, 0);
236 ghcb_set_sw_exit_info_2(ghcb, 0);
237
238 sev_es_wr_ghcb_msr(__pa(ghcb));
239
240 svsm_issue_call(call, &pending);
241
242 if (pending)
243 return -EINVAL;
244
245 switch (verify_exception_info(ghcb, &ctxt)) {
246 case ES_OK:
247 break;
248 case ES_EXCEPTION:
249 vc_forward_exception(&ctxt);
250 fallthrough;
251 default:
252 return -EINVAL;
253 }
254
255 return svsm_process_result_codes(call);
256 }
257
svsm_perform_call_protocol(struct svsm_call * call)258 static int svsm_perform_call_protocol(struct svsm_call *call)
259 {
260 struct ghcb_state state;
261 unsigned long flags;
262 struct ghcb *ghcb;
263 int ret;
264
265 flags = native_local_irq_save();
266
267 if (sev_cfg.ghcbs_initialized)
268 ghcb = __sev_get_ghcb(&state);
269 else if (boot_ghcb)
270 ghcb = boot_ghcb;
271 else
272 ghcb = NULL;
273
274 do {
275 ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
276 : __pi_svsm_perform_msr_protocol(call);
277 } while (ret == -EAGAIN);
278
279 if (sev_cfg.ghcbs_initialized)
280 __sev_put_ghcb(&state);
281
282 native_local_irq_restore(flags);
283
284 return ret;
285 }
286
__pval_terminate(u64 pfn,bool action,unsigned int page_size,int ret,u64 svsm_ret)287 static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
288 int ret, u64 svsm_ret)
289 {
290 WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
291 pfn, action, page_size, ret, svsm_ret);
292
293 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
294 }
295
svsm_pval_terminate(struct svsm_pvalidate_call * pc,int ret,u64 svsm_ret)296 static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
297 {
298 unsigned int page_size;
299 bool action;
300 u64 pfn;
301
302 pfn = pc->entry[pc->cur_index].pfn;
303 action = pc->entry[pc->cur_index].action;
304 page_size = pc->entry[pc->cur_index].page_size;
305
306 __pval_terminate(pfn, action, page_size, ret, svsm_ret);
307 }
308
pval_pages(struct snp_psc_desc * desc)309 static void pval_pages(struct snp_psc_desc *desc)
310 {
311 struct psc_entry *e;
312 unsigned long vaddr;
313 unsigned int size;
314 unsigned int i;
315 bool validate;
316 u64 pfn;
317 int rc;
318
319 for (i = 0; i <= desc->hdr.end_entry; i++) {
320 e = &desc->entries[i];
321
322 pfn = e->gfn;
323 vaddr = (unsigned long)pfn_to_kaddr(pfn);
324 size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
325 validate = e->operation == SNP_PAGE_STATE_PRIVATE;
326
327 rc = pvalidate(vaddr, size, validate);
328 if (!rc)
329 continue;
330
331 if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
332 unsigned long vaddr_end = vaddr + PMD_SIZE;
333
334 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
335 rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
336 if (rc)
337 __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
338 }
339 } else {
340 __pval_terminate(pfn, validate, size, rc, 0);
341 }
342 }
343 }
344
svsm_build_ca_from_pfn_range(u64 pfn,u64 pfn_end,bool action,struct svsm_pvalidate_call * pc)345 static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
346 struct svsm_pvalidate_call *pc)
347 {
348 struct svsm_pvalidate_entry *pe;
349
350 /* Nothing in the CA yet */
351 pc->num_entries = 0;
352 pc->cur_index = 0;
353
354 pe = &pc->entry[0];
355
356 while (pfn < pfn_end) {
357 pe->page_size = RMP_PG_SIZE_4K;
358 pe->action = action;
359 pe->ignore_cf = 0;
360 pe->rsvd = 0;
361 pe->pfn = pfn;
362
363 pe++;
364 pfn++;
365
366 pc->num_entries++;
367 if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
368 break;
369 }
370
371 return pfn;
372 }
373
svsm_build_ca_from_psc_desc(struct snp_psc_desc * desc,unsigned int desc_entry,struct svsm_pvalidate_call * pc)374 static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
375 struct svsm_pvalidate_call *pc)
376 {
377 struct svsm_pvalidate_entry *pe;
378 struct psc_entry *e;
379
380 /* Nothing in the CA yet */
381 pc->num_entries = 0;
382 pc->cur_index = 0;
383
384 pe = &pc->entry[0];
385 e = &desc->entries[desc_entry];
386
387 while (desc_entry <= desc->hdr.end_entry) {
388 pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
389 pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
390 pe->ignore_cf = 0;
391 pe->rsvd = 0;
392 pe->pfn = e->gfn;
393
394 pe++;
395 e++;
396
397 desc_entry++;
398 pc->num_entries++;
399 if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
400 break;
401 }
402
403 return desc_entry;
404 }
405
svsm_pval_pages(struct snp_psc_desc * desc)406 static void svsm_pval_pages(struct snp_psc_desc *desc)
407 {
408 struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
409 unsigned int i, pv_4k_count = 0;
410 struct svsm_pvalidate_call *pc;
411 struct svsm_call call = {};
412 unsigned long flags;
413 bool action;
414 u64 pc_pa;
415 int ret;
416
417 /*
418 * This can be called very early in the boot, use native functions in
419 * order to avoid paravirt issues.
420 */
421 flags = native_local_irq_save();
422
423 /*
424 * The SVSM calling area (CA) can support processing 510 entries at a
425 * time. Loop through the Page State Change descriptor until the CA is
426 * full or the last entry in the descriptor is reached, at which time
427 * the SVSM is invoked. This repeats until all entries in the descriptor
428 * are processed.
429 */
430 call.caa = svsm_get_caa();
431
432 pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
433 pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
434
435 /* Protocol 0, Call ID 1 */
436 call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
437 call.rcx = pc_pa;
438
439 for (i = 0; i <= desc->hdr.end_entry;) {
440 i = svsm_build_ca_from_psc_desc(desc, i, pc);
441
442 do {
443 ret = svsm_perform_call_protocol(&call);
444 if (!ret)
445 continue;
446
447 /*
448 * Check if the entry failed because of an RMP mismatch (a
449 * PVALIDATE at 2M was requested, but the page is mapped in
450 * the RMP as 4K).
451 */
452
453 if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
454 pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
455 /* Save this entry for post-processing at 4K */
456 pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
457
458 /* Skip to the next one unless at the end of the list */
459 pc->cur_index++;
460 if (pc->cur_index < pc->num_entries)
461 ret = -EAGAIN;
462 else
463 ret = 0;
464 }
465 } while (ret == -EAGAIN);
466
467 if (ret)
468 svsm_pval_terminate(pc, ret, call.rax_out);
469 }
470
471 /* Process any entries that failed to be validated at 2M and validate them at 4K */
472 for (i = 0; i < pv_4k_count; i++) {
473 u64 pfn, pfn_end;
474
475 action = pv_4k[i].action;
476 pfn = pv_4k[i].pfn;
477 pfn_end = pfn + 512;
478
479 while (pfn < pfn_end) {
480 pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
481
482 ret = svsm_perform_call_protocol(&call);
483 if (ret)
484 svsm_pval_terminate(pc, ret, call.rax_out);
485 }
486 }
487
488 native_local_irq_restore(flags);
489 }
490
pvalidate_pages(struct snp_psc_desc * desc)491 static void pvalidate_pages(struct snp_psc_desc *desc)
492 {
493 struct psc_entry *e;
494 unsigned int i;
495
496 if (snp_vmpl)
497 svsm_pval_pages(desc);
498 else
499 pval_pages(desc);
500
501 /*
502 * If not affected by the cache-coherency vulnerability there is no need
503 * to perform the cache eviction mitigation.
504 */
505 if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO))
506 return;
507
508 for (i = 0; i <= desc->hdr.end_entry; i++) {
509 e = &desc->entries[i];
510
511 /*
512 * If validating memory (making it private) perform the cache
513 * eviction mitigation.
514 */
515 if (e->operation == SNP_PAGE_STATE_PRIVATE)
516 sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1);
517 }
518 }
519
vmgexit_psc(struct ghcb * ghcb,struct snp_psc_desc * desc)520 static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
521 {
522 int cur_entry, end_entry, ret = 0;
523 struct snp_psc_desc *data;
524 struct es_em_ctxt ctxt;
525
526 vc_ghcb_invalidate(ghcb);
527
528 /* Copy the input desc into GHCB shared buffer */
529 data = (struct snp_psc_desc *)ghcb->shared_buffer;
530 memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
531
532 /*
533 * As per the GHCB specification, the hypervisor can resume the guest
534 * before processing all the entries. Check whether all the entries
535 * are processed. If not, then keep retrying. Note, the hypervisor
536 * will update the data memory directly to indicate the status, so
537 * reference the data->hdr everywhere.
538 *
539 * The strategy here is to wait for the hypervisor to change the page
540 * state in the RMP table before guest accesses the memory pages. If the
541 * page state change was not successful, then later memory access will
542 * result in a crash.
543 */
544 cur_entry = data->hdr.cur_entry;
545 end_entry = data->hdr.end_entry;
546
547 while (data->hdr.cur_entry <= data->hdr.end_entry) {
548 ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
549
550 /* This will advance the shared buffer data points to. */
551 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
552
553 /*
554 * Page State Change VMGEXIT can pass error code through
555 * exit_info_2.
556 */
557 if (WARN(ret || ghcb->save.sw_exit_info_2,
558 "SNP: PSC failed ret=%d exit_info_2=%llx\n",
559 ret, ghcb->save.sw_exit_info_2)) {
560 ret = 1;
561 goto out;
562 }
563
564 /* Verify that reserved bit is not set */
565 if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
566 ret = 1;
567 goto out;
568 }
569
570 /*
571 * Sanity check that entry processing is not going backwards.
572 * This will happen only if hypervisor is tricking us.
573 */
574 if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
575 "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
576 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
577 ret = 1;
578 goto out;
579 }
580 }
581
582 out:
583 return ret;
584 }
585
__set_pages_state(struct snp_psc_desc * data,unsigned long vaddr,unsigned long vaddr_end,int op)586 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
587 unsigned long vaddr_end, int op)
588 {
589 struct ghcb_state state;
590 bool use_large_entry;
591 struct psc_hdr *hdr;
592 struct psc_entry *e;
593 unsigned long flags;
594 unsigned long pfn;
595 struct ghcb *ghcb;
596 int i;
597
598 hdr = &data->hdr;
599 e = data->entries;
600
601 memset(data, 0, sizeof(*data));
602 i = 0;
603
604 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
605 hdr->end_entry = i;
606
607 if (is_vmalloc_addr((void *)vaddr)) {
608 pfn = vmalloc_to_pfn((void *)vaddr);
609 use_large_entry = false;
610 } else {
611 pfn = __pa(vaddr) >> PAGE_SHIFT;
612 use_large_entry = true;
613 }
614
615 e->gfn = pfn;
616 e->operation = op;
617
618 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
619 (vaddr_end - vaddr) >= PMD_SIZE) {
620 e->pagesize = RMP_PG_SIZE_2M;
621 vaddr += PMD_SIZE;
622 } else {
623 e->pagesize = RMP_PG_SIZE_4K;
624 vaddr += PAGE_SIZE;
625 }
626
627 e++;
628 i++;
629 }
630
631 /* Page validation must be rescinded before changing to shared */
632 if (op == SNP_PAGE_STATE_SHARED)
633 pvalidate_pages(data);
634
635 local_irq_save(flags);
636
637 if (sev_cfg.ghcbs_initialized)
638 ghcb = __sev_get_ghcb(&state);
639 else
640 ghcb = boot_ghcb;
641
642 /* Invoke the hypervisor to perform the page state changes */
643 if (!ghcb || vmgexit_psc(ghcb, data))
644 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
645
646 if (sev_cfg.ghcbs_initialized)
647 __sev_put_ghcb(&state);
648
649 local_irq_restore(flags);
650
651 /* Page validation must be performed after changing to private */
652 if (op == SNP_PAGE_STATE_PRIVATE)
653 pvalidate_pages(data);
654
655 return vaddr;
656 }
657
set_pages_state(unsigned long vaddr,unsigned long npages,int op)658 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
659 {
660 struct snp_psc_desc desc;
661 unsigned long vaddr_end;
662
663 /* Use the MSR protocol when a GHCB is not available. */
664 if (!boot_ghcb) {
665 struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() };
666
667 return early_set_pages_state(vaddr, __pa(vaddr), npages, &d);
668 }
669
670 vaddr = vaddr & PAGE_MASK;
671 vaddr_end = vaddr + (npages << PAGE_SHIFT);
672
673 while (vaddr < vaddr_end)
674 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
675 }
676
snp_set_memory_shared(unsigned long vaddr,unsigned long npages)677 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
678 {
679 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
680 return;
681
682 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
683 }
684
snp_set_memory_private(unsigned long vaddr,unsigned long npages)685 void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
686 {
687 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
688 return;
689
690 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
691 }
692
snp_accept_memory(phys_addr_t start,phys_addr_t end)693 void snp_accept_memory(phys_addr_t start, phys_addr_t end)
694 {
695 unsigned long vaddr, npages;
696
697 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
698 return;
699
700 vaddr = (unsigned long)__va(start);
701 npages = (end - start) >> PAGE_SHIFT;
702
703 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
704 }
705
vmgexit_ap_control(u64 event,struct sev_es_save_area * vmsa,u32 apic_id)706 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
707 {
708 bool create = event != SVM_VMGEXIT_AP_DESTROY;
709 struct ghcb_state state;
710 unsigned long flags;
711 struct ghcb *ghcb;
712 int ret = 0;
713
714 local_irq_save(flags);
715
716 ghcb = __sev_get_ghcb(&state);
717
718 vc_ghcb_invalidate(ghcb);
719
720 if (create)
721 ghcb_set_rax(ghcb, vmsa->sev_features);
722
723 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
724 ghcb_set_sw_exit_info_1(ghcb,
725 ((u64)apic_id << 32) |
726 ((u64)snp_vmpl << 16) |
727 event);
728 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
729
730 sev_es_wr_ghcb_msr(__pa(ghcb));
731 VMGEXIT();
732
733 if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
734 lower_32_bits(ghcb->save.sw_exit_info_1)) {
735 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
736 ret = -EINVAL;
737 }
738
739 __sev_put_ghcb(&state);
740
741 local_irq_restore(flags);
742
743 return ret;
744 }
745
snp_set_vmsa(void * va,void * caa,int apic_id,bool make_vmsa)746 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
747 {
748 int ret;
749
750 if (snp_vmpl) {
751 struct svsm_call call = {};
752 unsigned long flags;
753
754 local_irq_save(flags);
755
756 call.caa = this_cpu_read(svsm_caa);
757 call.rcx = __pa(va);
758
759 if (make_vmsa) {
760 /* Protocol 0, Call ID 2 */
761 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
762 call.rdx = __pa(caa);
763 call.r8 = apic_id;
764 } else {
765 /* Protocol 0, Call ID 3 */
766 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
767 }
768
769 ret = svsm_perform_call_protocol(&call);
770
771 local_irq_restore(flags);
772 } else {
773 /*
774 * If the kernel runs at VMPL0, it can change the VMSA
775 * bit for a page using the RMPADJUST instruction.
776 * However, for the instruction to succeed it must
777 * target the permissions of a lesser privileged (higher
778 * numbered) VMPL level, so use VMPL1.
779 */
780 u64 attrs = 1;
781
782 if (make_vmsa)
783 attrs |= RMPADJUST_VMSA_PAGE_BIT;
784
785 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
786 }
787
788 return ret;
789 }
790
snp_cleanup_vmsa(struct sev_es_save_area * vmsa,int apic_id)791 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
792 {
793 int err;
794
795 err = snp_set_vmsa(vmsa, NULL, apic_id, false);
796 if (err)
797 pr_err("clear VMSA page failed (%u), leaking page\n", err);
798 else
799 free_page((unsigned long)vmsa);
800 }
801
set_pte_enc(pte_t * kpte,int level,void * va)802 static void set_pte_enc(pte_t *kpte, int level, void *va)
803 {
804 struct pte_enc_desc d = {
805 .kpte = kpte,
806 .pte_level = level,
807 .va = va,
808 .encrypt = true
809 };
810
811 prepare_pte_enc(&d);
812 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
813 }
814
unshare_all_memory(void)815 static void unshare_all_memory(void)
816 {
817 unsigned long addr, end, size, ghcb;
818 struct sev_es_runtime_data *data;
819 unsigned int npages, level;
820 bool skipped_addr;
821 pte_t *pte;
822 int cpu;
823
824 /* Unshare the direct mapping. */
825 addr = PAGE_OFFSET;
826 end = PAGE_OFFSET + get_max_mapped();
827
828 while (addr < end) {
829 pte = lookup_address(addr, &level);
830 size = page_level_size(level);
831 npages = size / PAGE_SIZE;
832 skipped_addr = false;
833
834 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
835 addr += size;
836 continue;
837 }
838
839 /*
840 * Ensure that all the per-CPU GHCBs are made private at the
841 * end of the unsharing loop so that the switch to the slower
842 * MSR protocol happens last.
843 */
844 for_each_possible_cpu(cpu) {
845 data = per_cpu(runtime_data, cpu);
846 ghcb = (unsigned long)&data->ghcb_page;
847
848 /* Handle the case of a huge page containing the GHCB page */
849 if (addr <= ghcb && ghcb < addr + size) {
850 skipped_addr = true;
851 break;
852 }
853 }
854
855 if (!skipped_addr) {
856 set_pte_enc(pte, level, (void *)addr);
857 snp_set_memory_private(addr, npages);
858 }
859 addr += size;
860 }
861
862 /* Unshare all bss decrypted memory. */
863 addr = (unsigned long)__start_bss_decrypted;
864 end = (unsigned long)__start_bss_decrypted_unused;
865 npages = (end - addr) >> PAGE_SHIFT;
866
867 for (; addr < end; addr += PAGE_SIZE) {
868 pte = lookup_address(addr, &level);
869 if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
870 continue;
871
872 set_pte_enc(pte, level, (void *)addr);
873 }
874 addr = (unsigned long)__start_bss_decrypted;
875 snp_set_memory_private(addr, npages);
876
877 __flush_tlb_all();
878 }
879
880 /* Stop new private<->shared conversions */
snp_kexec_begin(void)881 void snp_kexec_begin(void)
882 {
883 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
884 return;
885
886 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
887 return;
888
889 /*
890 * Crash kernel ends up here with interrupts disabled: can't wait for
891 * conversions to finish.
892 *
893 * If race happened, just report and proceed.
894 */
895 if (!set_memory_enc_stop_conversion())
896 pr_warn("Failed to stop shared<->private conversions\n");
897 }
898
899 /*
900 * Shutdown all APs except the one handling kexec/kdump and clearing
901 * the VMSA tag on AP's VMSA pages as they are not being used as
902 * VMSA page anymore.
903 */
shutdown_all_aps(void)904 static void shutdown_all_aps(void)
905 {
906 struct sev_es_save_area *vmsa;
907 int apic_id, this_cpu, cpu;
908
909 this_cpu = get_cpu();
910
911 /*
912 * APs are already in HLT loop when enc_kexec_finish() callback
913 * is invoked.
914 */
915 for_each_present_cpu(cpu) {
916 vmsa = per_cpu(sev_vmsa, cpu);
917
918 /*
919 * The BSP or offlined APs do not have guest allocated VMSA
920 * and there is no need to clear the VMSA tag for this page.
921 */
922 if (!vmsa)
923 continue;
924
925 /*
926 * Cannot clear the VMSA tag for the currently running vCPU.
927 */
928 if (this_cpu == cpu) {
929 unsigned long pa;
930 struct page *p;
931
932 pa = __pa(vmsa);
933 /*
934 * Mark the VMSA page of the running vCPU as offline
935 * so that is excluded and not touched by makedumpfile
936 * while generating vmcore during kdump.
937 */
938 p = pfn_to_online_page(pa >> PAGE_SHIFT);
939 if (p)
940 __SetPageOffline(p);
941 continue;
942 }
943
944 apic_id = cpuid_to_apicid[cpu];
945
946 /*
947 * Issue AP destroy to ensure AP gets kicked out of guest mode
948 * to allow using RMPADJUST to remove the VMSA tag on it's
949 * VMSA page.
950 */
951 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
952 snp_cleanup_vmsa(vmsa, apic_id);
953 }
954
955 put_cpu();
956 }
957
snp_kexec_finish(void)958 void snp_kexec_finish(void)
959 {
960 struct sev_es_runtime_data *data;
961 unsigned long size, addr;
962 unsigned int level, cpu;
963 struct ghcb *ghcb;
964 pte_t *pte;
965
966 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
967 return;
968
969 if (!IS_ENABLED(CONFIG_KEXEC_CORE))
970 return;
971
972 shutdown_all_aps();
973
974 unshare_all_memory();
975
976 /*
977 * Switch to using the MSR protocol to change per-CPU GHCBs to
978 * private. All the per-CPU GHCBs have been switched back to private,
979 * so can't do any more GHCB calls to the hypervisor beyond this point
980 * until the kexec'ed kernel starts running.
981 */
982 boot_ghcb = NULL;
983 sev_cfg.ghcbs_initialized = false;
984
985 for_each_possible_cpu(cpu) {
986 data = per_cpu(runtime_data, cpu);
987 ghcb = &data->ghcb_page;
988 pte = lookup_address((unsigned long)ghcb, &level);
989 size = page_level_size(level);
990 /* Handle the case of a huge page containing the GHCB page */
991 addr = (unsigned long)ghcb & page_level_mask(level);
992 set_pte_enc(pte, level, (void *)addr);
993 snp_set_memory_private(addr, (size / PAGE_SIZE));
994 }
995 }
996
997 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
998 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
999 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
1000
1001 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2)
1002 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3)
1003
snp_alloc_vmsa_page(int cpu)1004 static void *snp_alloc_vmsa_page(int cpu)
1005 {
1006 struct page *p;
1007
1008 /*
1009 * Allocate VMSA page to work around the SNP erratum where the CPU will
1010 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
1011 * collides with the RMP entry of VMSA page. The recommended workaround
1012 * is to not use a large page.
1013 *
1014 * Allocate an 8k page which is also 8k-aligned.
1015 */
1016 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
1017 if (!p)
1018 return NULL;
1019
1020 split_page(p, 1);
1021
1022 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
1023 __free_page(p);
1024
1025 return page_address(p + 1);
1026 }
1027
wakeup_cpu_via_vmgexit(u32 apic_id,unsigned long start_ip,unsigned int cpu)1028 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
1029 {
1030 struct sev_es_save_area *cur_vmsa, *vmsa;
1031 struct svsm_ca *caa;
1032 u8 sipi_vector;
1033 int ret;
1034 u64 cr4;
1035
1036 /*
1037 * The hypervisor SNP feature support check has happened earlier, just check
1038 * the AP_CREATION one here.
1039 */
1040 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
1041 return -EOPNOTSUPP;
1042
1043 /*
1044 * Verify the desired start IP against the known trampoline start IP
1045 * to catch any future new trampolines that may be introduced that
1046 * would require a new protected guest entry point.
1047 */
1048 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
1049 "Unsupported SNP start_ip: %lx\n", start_ip))
1050 return -EINVAL;
1051
1052 /* Override start_ip with known protected guest start IP */
1053 start_ip = real_mode_header->sev_es_trampoline_start;
1054 cur_vmsa = per_cpu(sev_vmsa, cpu);
1055
1056 /*
1057 * A new VMSA is created each time because there is no guarantee that
1058 * the current VMSA is the kernels or that the vCPU is not running. If
1059 * an attempt was done to use the current VMSA with a running vCPU, a
1060 * #VMEXIT of that vCPU would wipe out all of the settings being done
1061 * here.
1062 */
1063 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
1064 if (!vmsa)
1065 return -ENOMEM;
1066
1067 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
1068 caa = per_cpu(svsm_caa, cpu);
1069
1070 /* CR4 should maintain the MCE value */
1071 cr4 = native_read_cr4() & X86_CR4_MCE;
1072
1073 /* Set the CS value based on the start_ip converted to a SIPI vector */
1074 sipi_vector = (start_ip >> 12);
1075 vmsa->cs.base = sipi_vector << 12;
1076 vmsa->cs.limit = AP_INIT_CS_LIMIT;
1077 vmsa->cs.attrib = INIT_CS_ATTRIBS;
1078 vmsa->cs.selector = sipi_vector << 8;
1079
1080 /* Set the RIP value based on start_ip */
1081 vmsa->rip = start_ip & 0xfff;
1082
1083 /* Set AP INIT defaults as documented in the APM */
1084 vmsa->ds.limit = AP_INIT_DS_LIMIT;
1085 vmsa->ds.attrib = INIT_DS_ATTRIBS;
1086 vmsa->es = vmsa->ds;
1087 vmsa->fs = vmsa->ds;
1088 vmsa->gs = vmsa->ds;
1089 vmsa->ss = vmsa->ds;
1090
1091 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT;
1092 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT;
1093 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS;
1094 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT;
1095 vmsa->tr.limit = AP_INIT_TR_LIMIT;
1096 vmsa->tr.attrib = INIT_TR_ATTRIBS;
1097
1098 vmsa->cr4 = cr4;
1099 vmsa->cr0 = AP_INIT_CR0_DEFAULT;
1100 vmsa->dr7 = DR7_RESET_VALUE;
1101 vmsa->dr6 = AP_INIT_DR6_DEFAULT;
1102 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT;
1103 vmsa->g_pat = AP_INIT_GPAT_DEFAULT;
1104 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT;
1105 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT;
1106 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT;
1107 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT;
1108
1109 if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
1110 vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK;
1111
1112 /* SVME must be set. */
1113 vmsa->efer = EFER_SVME;
1114
1115 /*
1116 * Set the SNP-specific fields for this VMSA:
1117 * VMPL level
1118 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
1119 */
1120 vmsa->vmpl = snp_vmpl;
1121 vmsa->sev_features = sev_status >> 2;
1122
1123 /* Populate AP's TSC scale/offset to get accurate TSC values. */
1124 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
1125 vmsa->tsc_scale = snp_tsc_scale;
1126 vmsa->tsc_offset = snp_tsc_offset;
1127 }
1128
1129 /* Switch the page over to a VMSA page now that it is initialized */
1130 ret = snp_set_vmsa(vmsa, caa, apic_id, true);
1131 if (ret) {
1132 pr_err("set VMSA page failed (%u)\n", ret);
1133 free_page((unsigned long)vmsa);
1134
1135 return -EINVAL;
1136 }
1137
1138 /* Issue VMGEXIT AP Creation NAE event */
1139 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
1140 if (ret) {
1141 snp_cleanup_vmsa(vmsa, apic_id);
1142 vmsa = NULL;
1143 }
1144
1145 /* Free up any previous VMSA page */
1146 if (cur_vmsa)
1147 snp_cleanup_vmsa(cur_vmsa, apic_id);
1148
1149 /* Record the current VMSA page */
1150 per_cpu(sev_vmsa, cpu) = vmsa;
1151
1152 return ret;
1153 }
1154
snp_set_wakeup_secondary_cpu(void)1155 void __init snp_set_wakeup_secondary_cpu(void)
1156 {
1157 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1158 return;
1159
1160 /*
1161 * Always set this override if SNP is enabled. This makes it the
1162 * required method to start APs under SNP. If the hypervisor does
1163 * not support AP creation, then no APs will be started.
1164 */
1165 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
1166 }
1167
sev_es_setup_ap_jump_table(struct real_mode_header * rmh)1168 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
1169 {
1170 u16 startup_cs, startup_ip;
1171 phys_addr_t jump_table_pa;
1172 u64 jump_table_addr;
1173 u16 __iomem *jump_table;
1174
1175 jump_table_addr = get_jump_table_addr();
1176
1177 /* On UP guests there is no jump table so this is not a failure */
1178 if (!jump_table_addr)
1179 return 0;
1180
1181 /* Check if AP Jump Table is page-aligned */
1182 if (jump_table_addr & ~PAGE_MASK)
1183 return -EINVAL;
1184
1185 jump_table_pa = jump_table_addr & PAGE_MASK;
1186
1187 startup_cs = (u16)(rmh->trampoline_start >> 4);
1188 startup_ip = (u16)(rmh->sev_es_trampoline_start -
1189 rmh->trampoline_start);
1190
1191 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
1192 if (!jump_table)
1193 return -EIO;
1194
1195 writew(startup_ip, &jump_table[0]);
1196 writew(startup_cs, &jump_table[1]);
1197
1198 iounmap(jump_table);
1199
1200 return 0;
1201 }
1202
1203 /*
1204 * This is needed by the OVMF UEFI firmware which will use whatever it finds in
1205 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
1206 * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
1207 *
1208 * When running under SVSM the CA page is needed too, so map it as well.
1209 */
sev_es_efi_map_ghcbs_cas(pgd_t * pgd)1210 int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
1211 {
1212 unsigned long address, pflags, pflags_enc;
1213 struct sev_es_runtime_data *data;
1214 int cpu;
1215 u64 pfn;
1216
1217 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1218 return 0;
1219
1220 pflags = _PAGE_NX | _PAGE_RW;
1221 pflags_enc = cc_mkenc(pflags);
1222
1223 for_each_possible_cpu(cpu) {
1224 data = per_cpu(runtime_data, cpu);
1225
1226 address = __pa(&data->ghcb_page);
1227 pfn = address >> PAGE_SHIFT;
1228
1229 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
1230 return 1;
1231
1232 if (snp_vmpl) {
1233 address = per_cpu(svsm_caa_pa, cpu);
1234 if (!address)
1235 return 1;
1236
1237 pfn = address >> PAGE_SHIFT;
1238 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc))
1239 return 1;
1240 }
1241 }
1242
1243 return 0;
1244 }
1245
savic_ghcb_msr_read(u32 reg)1246 u64 savic_ghcb_msr_read(u32 reg)
1247 {
1248 u64 msr = APIC_BASE_MSR + (reg >> 4);
1249 struct pt_regs regs = { .cx = msr };
1250 struct es_em_ctxt ctxt = { .regs = ®s };
1251 struct ghcb_state state;
1252 enum es_result res;
1253 struct ghcb *ghcb;
1254
1255 guard(irqsave)();
1256
1257 ghcb = __sev_get_ghcb(&state);
1258 vc_ghcb_invalidate(ghcb);
1259
1260 res = sev_es_ghcb_handle_msr(ghcb, &ctxt, false);
1261 if (res != ES_OK) {
1262 pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res);
1263 /* MSR read failures are treated as fatal errors */
1264 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
1265 }
1266
1267 __sev_put_ghcb(&state);
1268
1269 return regs.ax | regs.dx << 32;
1270 }
1271
savic_ghcb_msr_write(u32 reg,u64 value)1272 void savic_ghcb_msr_write(u32 reg, u64 value)
1273 {
1274 u64 msr = APIC_BASE_MSR + (reg >> 4);
1275 struct pt_regs regs = {
1276 .cx = msr,
1277 .ax = lower_32_bits(value),
1278 .dx = upper_32_bits(value)
1279 };
1280 struct es_em_ctxt ctxt = { .regs = ®s };
1281 struct ghcb_state state;
1282 enum es_result res;
1283 struct ghcb *ghcb;
1284
1285 guard(irqsave)();
1286
1287 ghcb = __sev_get_ghcb(&state);
1288 vc_ghcb_invalidate(ghcb);
1289
1290 res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true);
1291 if (res != ES_OK) {
1292 pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res);
1293 /* MSR writes should never fail. Any failure is fatal error for SNP guest */
1294 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL);
1295 }
1296
1297 __sev_put_ghcb(&state);
1298 }
1299
savic_register_gpa(u64 gpa)1300 enum es_result savic_register_gpa(u64 gpa)
1301 {
1302 struct ghcb_state state;
1303 struct es_em_ctxt ctxt;
1304 enum es_result res;
1305 struct ghcb *ghcb;
1306
1307 guard(irqsave)();
1308
1309 ghcb = __sev_get_ghcb(&state);
1310 vc_ghcb_invalidate(ghcb);
1311
1312 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
1313 ghcb_set_rbx(ghcb, gpa);
1314 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
1315 SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0);
1316
1317 __sev_put_ghcb(&state);
1318
1319 return res;
1320 }
1321
savic_unregister_gpa(u64 * gpa)1322 enum es_result savic_unregister_gpa(u64 *gpa)
1323 {
1324 struct ghcb_state state;
1325 struct es_em_ctxt ctxt;
1326 enum es_result res;
1327 struct ghcb *ghcb;
1328
1329 guard(irqsave)();
1330
1331 ghcb = __sev_get_ghcb(&state);
1332 vc_ghcb_invalidate(ghcb);
1333
1334 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA);
1335 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC,
1336 SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0);
1337 if (gpa && res == ES_OK)
1338 *gpa = ghcb->save.rbx;
1339
1340 __sev_put_ghcb(&state);
1341
1342 return res;
1343 }
1344
snp_register_per_cpu_ghcb(void)1345 static void snp_register_per_cpu_ghcb(void)
1346 {
1347 struct sev_es_runtime_data *data;
1348 struct ghcb *ghcb;
1349
1350 data = this_cpu_read(runtime_data);
1351 ghcb = &data->ghcb_page;
1352
1353 snp_register_ghcb_early(__pa(ghcb));
1354 }
1355
setup_ghcb(void)1356 void setup_ghcb(void)
1357 {
1358 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1359 return;
1360
1361 /*
1362 * Check whether the runtime #VC exception handler is active. It uses
1363 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
1364 *
1365 * If SNP is active, register the per-CPU GHCB page so that the runtime
1366 * exception handler can use it.
1367 */
1368 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
1369 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1370 snp_register_per_cpu_ghcb();
1371
1372 sev_cfg.ghcbs_initialized = true;
1373
1374 return;
1375 }
1376
1377 /*
1378 * Make sure the hypervisor talks a supported protocol.
1379 * This gets called only in the BSP boot phase.
1380 */
1381 if (!sev_es_negotiate_protocol())
1382 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1383
1384 /*
1385 * Clear the boot_ghcb. The first exception comes in before the bss
1386 * section is cleared.
1387 */
1388 memset(&boot_ghcb_page, 0, PAGE_SIZE);
1389
1390 /* Alright - Make the boot-ghcb public */
1391 boot_ghcb = &boot_ghcb_page;
1392
1393 /* SNP guest requires that GHCB GPA must be registered. */
1394 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1395 snp_register_ghcb_early(__pa(&boot_ghcb_page));
1396 }
1397
1398 #ifdef CONFIG_HOTPLUG_CPU
sev_es_ap_hlt_loop(void)1399 static void sev_es_ap_hlt_loop(void)
1400 {
1401 struct ghcb_state state;
1402 struct ghcb *ghcb;
1403
1404 ghcb = __sev_get_ghcb(&state);
1405
1406 while (true) {
1407 vc_ghcb_invalidate(ghcb);
1408 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
1409 ghcb_set_sw_exit_info_1(ghcb, 0);
1410 ghcb_set_sw_exit_info_2(ghcb, 0);
1411
1412 sev_es_wr_ghcb_msr(__pa(ghcb));
1413 VMGEXIT();
1414
1415 /* Wakeup signal? */
1416 if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
1417 ghcb->save.sw_exit_info_2)
1418 break;
1419 }
1420
1421 __sev_put_ghcb(&state);
1422 }
1423
1424 /*
1425 * Play_dead handler when running under SEV-ES. This is needed because
1426 * the hypervisor can't deliver an SIPI request to restart the AP.
1427 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
1428 * hypervisor wakes it up again.
1429 */
sev_es_play_dead(void)1430 static void sev_es_play_dead(void)
1431 {
1432 play_dead_common();
1433
1434 /* IRQs now disabled */
1435
1436 sev_es_ap_hlt_loop();
1437
1438 /*
1439 * If we get here, the VCPU was woken up again. Jump to CPU
1440 * startup code to get it back online.
1441 */
1442 soft_restart_cpu();
1443 }
1444 #else /* CONFIG_HOTPLUG_CPU */
1445 #define sev_es_play_dead native_play_dead
1446 #endif /* CONFIG_HOTPLUG_CPU */
1447
1448 #ifdef CONFIG_SMP
sev_es_setup_play_dead(void)1449 static void __init sev_es_setup_play_dead(void)
1450 {
1451 smp_ops.play_dead = sev_es_play_dead;
1452 }
1453 #else
sev_es_setup_play_dead(void)1454 static inline void sev_es_setup_play_dead(void) { }
1455 #endif
1456
alloc_runtime_data(int cpu)1457 static void __init alloc_runtime_data(int cpu)
1458 {
1459 struct sev_es_runtime_data *data;
1460
1461 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
1462 if (!data)
1463 panic("Can't allocate SEV-ES runtime data");
1464
1465 per_cpu(runtime_data, cpu) = data;
1466
1467 if (snp_vmpl) {
1468 struct svsm_ca *caa;
1469
1470 /* Allocate the SVSM CA page if an SVSM is present */
1471 caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE)
1472 : &boot_svsm_ca_page;
1473
1474 per_cpu(svsm_caa, cpu) = caa;
1475 per_cpu(svsm_caa_pa, cpu) = __pa(caa);
1476 }
1477 }
1478
init_ghcb(int cpu)1479 static void __init init_ghcb(int cpu)
1480 {
1481 struct sev_es_runtime_data *data;
1482 int err;
1483
1484 data = per_cpu(runtime_data, cpu);
1485
1486 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
1487 sizeof(data->ghcb_page));
1488 if (err)
1489 panic("Can't map GHCBs unencrypted");
1490
1491 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
1492
1493 data->ghcb_active = false;
1494 data->backup_ghcb_active = false;
1495 }
1496
sev_es_init_vc_handling(void)1497 void __init sev_es_init_vc_handling(void)
1498 {
1499 int cpu;
1500
1501 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
1502
1503 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
1504 return;
1505
1506 if (!sev_es_check_cpu_features())
1507 panic("SEV-ES CPU Features missing");
1508
1509 /*
1510 * SNP is supported in v2 of the GHCB spec which mandates support for HV
1511 * features.
1512 */
1513 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
1514 sev_hv_features = get_hv_features();
1515
1516 if (!(sev_hv_features & GHCB_HV_FT_SNP))
1517 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
1518 }
1519
1520 /* Initialize per-cpu GHCB pages */
1521 for_each_possible_cpu(cpu) {
1522 alloc_runtime_data(cpu);
1523 init_ghcb(cpu);
1524 }
1525
1526 if (snp_vmpl)
1527 sev_cfg.use_cas = true;
1528
1529 sev_es_setup_play_dead();
1530
1531 /* Secondary CPUs use the runtime #VC handler */
1532 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
1533 }
1534
1535 /*
1536 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
1537 * enabled, as the alternative (fallback) logic for DMI probing in the legacy
1538 * ROM region can cause a crash since this region is not pre-validated.
1539 */
snp_dmi_setup(void)1540 void __init snp_dmi_setup(void)
1541 {
1542 if (efi_enabled(EFI_CONFIG_TABLES))
1543 dmi_setup();
1544 }
1545
dump_cpuid_table(void)1546 static void dump_cpuid_table(void)
1547 {
1548 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1549 int i = 0;
1550
1551 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
1552 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
1553
1554 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
1555 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
1556
1557 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
1558 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
1559 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
1560 }
1561 }
1562
1563 /*
1564 * It is useful from an auditing/testing perspective to provide an easy way
1565 * for the guest owner to know that the CPUID table has been initialized as
1566 * expected, but that initialization happens too early in boot to print any
1567 * sort of indicator, and there's not really any other good place to do it,
1568 * so do it here.
1569 *
1570 * If running as an SNP guest, report the current VM privilege level (VMPL).
1571 */
report_snp_info(void)1572 static int __init report_snp_info(void)
1573 {
1574 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
1575
1576 if (cpuid_table->count) {
1577 pr_info("Using SNP CPUID table, %d entries present.\n",
1578 cpuid_table->count);
1579
1580 if (sev_cfg.debug)
1581 dump_cpuid_table();
1582 }
1583
1584 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1585 pr_info("SNP running at VMPL%u.\n", snp_vmpl);
1586
1587 return 0;
1588 }
1589 arch_initcall(report_snp_info);
1590
update_attest_input(struct svsm_call * call,struct svsm_attest_call * input)1591 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
1592 {
1593 /* If (new) lengths have been returned, propagate them up */
1594 if (call->rcx_out != call->rcx)
1595 input->manifest_buf.len = call->rcx_out;
1596
1597 if (call->rdx_out != call->rdx)
1598 input->certificates_buf.len = call->rdx_out;
1599
1600 if (call->r8_out != call->r8)
1601 input->report_buf.len = call->r8_out;
1602 }
1603
snp_issue_svsm_attest_req(u64 call_id,struct svsm_call * call,struct svsm_attest_call * input)1604 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
1605 struct svsm_attest_call *input)
1606 {
1607 struct svsm_attest_call *ac;
1608 unsigned long flags;
1609 u64 attest_call_pa;
1610 int ret;
1611
1612 if (!snp_vmpl)
1613 return -EINVAL;
1614
1615 local_irq_save(flags);
1616
1617 call->caa = svsm_get_caa();
1618
1619 ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
1620 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
1621
1622 *ac = *input;
1623
1624 /*
1625 * Set input registers for the request and set RDX and R8 to known
1626 * values in order to detect length values being returned in them.
1627 */
1628 call->rax = call_id;
1629 call->rcx = attest_call_pa;
1630 call->rdx = -1;
1631 call->r8 = -1;
1632 ret = svsm_perform_call_protocol(call);
1633 update_attest_input(call, input);
1634
1635 local_irq_restore(flags);
1636
1637 return ret;
1638 }
1639 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
1640
snp_issue_guest_request(struct snp_guest_req * req)1641 static int snp_issue_guest_request(struct snp_guest_req *req)
1642 {
1643 struct snp_req_data *input = &req->input;
1644 struct ghcb_state state;
1645 struct es_em_ctxt ctxt;
1646 unsigned long flags;
1647 struct ghcb *ghcb;
1648 int ret;
1649
1650 req->exitinfo2 = SEV_RET_NO_FW_CALL;
1651
1652 /*
1653 * __sev_get_ghcb() needs to run with IRQs disabled because it is using
1654 * a per-CPU GHCB.
1655 */
1656 local_irq_save(flags);
1657
1658 ghcb = __sev_get_ghcb(&state);
1659 if (!ghcb) {
1660 ret = -EIO;
1661 goto e_restore_irq;
1662 }
1663
1664 vc_ghcb_invalidate(ghcb);
1665
1666 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1667 ghcb_set_rax(ghcb, input->data_gpa);
1668 ghcb_set_rbx(ghcb, input->data_npages);
1669 }
1670
1671 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
1672 if (ret)
1673 goto e_put;
1674
1675 req->exitinfo2 = ghcb->save.sw_exit_info_2;
1676 switch (req->exitinfo2) {
1677 case 0:
1678 break;
1679
1680 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
1681 ret = -EAGAIN;
1682 break;
1683
1684 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
1685 /* Number of expected pages are returned in RBX */
1686 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
1687 input->data_npages = ghcb_get_rbx(ghcb);
1688 ret = -ENOSPC;
1689 break;
1690 }
1691 fallthrough;
1692 default:
1693 ret = -EIO;
1694 break;
1695 }
1696
1697 e_put:
1698 __sev_put_ghcb(&state);
1699 e_restore_irq:
1700 local_irq_restore(flags);
1701
1702 return ret;
1703 }
1704
1705 /**
1706 * snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device
1707 *
1708 * Check that there is SVSM and that it supports at least TPM_SEND_COMMAND
1709 * which is the only request used so far.
1710 *
1711 * Return: true if the platform provides a vTPM SVSM device, false otherwise.
1712 */
snp_svsm_vtpm_probe(void)1713 static bool snp_svsm_vtpm_probe(void)
1714 {
1715 struct svsm_call call = {};
1716
1717 /* The vTPM device is available only if a SVSM is present */
1718 if (!snp_vmpl)
1719 return false;
1720
1721 call.caa = svsm_get_caa();
1722 call.rax = SVSM_VTPM_CALL(SVSM_VTPM_QUERY);
1723
1724 if (svsm_perform_call_protocol(&call))
1725 return false;
1726
1727 /* Check platform commands contains TPM_SEND_COMMAND - platform command 8 */
1728 return call.rcx_out & BIT_ULL(8);
1729 }
1730
1731 /**
1732 * snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM
1733 * @buffer: A buffer used to both send the command and receive the response.
1734 *
1735 * Execute a SVSM_VTPM_CMD call as defined by
1736 * "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00
1737 *
1738 * All command request/response buffers have a common structure as specified by
1739 * the following table:
1740 * Byte Size In/Out Description
1741 * Offset (Bytes)
1742 * 0x000 4 In Platform command
1743 * Out Platform command response size
1744 *
1745 * Each command can build upon this common request/response structure to create
1746 * a structure specific to the command. See include/linux/tpm_svsm.h for more
1747 * details.
1748 *
1749 * Return: 0 on success, -errno on failure
1750 */
snp_svsm_vtpm_send_command(u8 * buffer)1751 int snp_svsm_vtpm_send_command(u8 *buffer)
1752 {
1753 struct svsm_call call = {};
1754
1755 call.caa = svsm_get_caa();
1756 call.rax = SVSM_VTPM_CALL(SVSM_VTPM_CMD);
1757 call.rcx = __pa(buffer);
1758
1759 return svsm_perform_call_protocol(&call);
1760 }
1761 EXPORT_SYMBOL_GPL(snp_svsm_vtpm_send_command);
1762
1763 static struct platform_device sev_guest_device = {
1764 .name = "sev-guest",
1765 .id = -1,
1766 };
1767
1768 static struct platform_device tpm_svsm_device = {
1769 .name = "tpm-svsm",
1770 .id = -1,
1771 };
1772
snp_init_platform_device(void)1773 static int __init snp_init_platform_device(void)
1774 {
1775 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1776 return -ENODEV;
1777
1778 if (platform_device_register(&sev_guest_device))
1779 return -ENODEV;
1780
1781 if (snp_svsm_vtpm_probe() &&
1782 platform_device_register(&tpm_svsm_device))
1783 return -ENODEV;
1784
1785 pr_info("SNP guest platform devices initialized.\n");
1786 return 0;
1787 }
1788 device_initcall(snp_init_platform_device);
1789
sev_show_status(void)1790 void sev_show_status(void)
1791 {
1792 int i;
1793
1794 pr_info("Status: ");
1795 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
1796 if (sev_status & BIT_ULL(i)) {
1797 if (!sev_status_feat_names[i])
1798 continue;
1799
1800 pr_cont("%s ", sev_status_feat_names[i]);
1801 }
1802 }
1803 pr_cont("\n");
1804 }
1805
1806 #ifdef CONFIG_SYSFS
vmpl_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1807 static ssize_t vmpl_show(struct kobject *kobj,
1808 struct kobj_attribute *attr, char *buf)
1809 {
1810 return sysfs_emit(buf, "%d\n", snp_vmpl);
1811 }
1812
1813 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
1814
1815 static struct attribute *vmpl_attrs[] = {
1816 &vmpl_attr.attr,
1817 NULL
1818 };
1819
1820 static struct attribute_group sev_attr_group = {
1821 .attrs = vmpl_attrs,
1822 };
1823
sev_sysfs_init(void)1824 static int __init sev_sysfs_init(void)
1825 {
1826 struct kobject *sev_kobj;
1827 struct device *dev_root;
1828 int ret;
1829
1830 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
1831 return -ENODEV;
1832
1833 dev_root = bus_get_dev_root(&cpu_subsys);
1834 if (!dev_root)
1835 return -ENODEV;
1836
1837 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
1838 put_device(dev_root);
1839
1840 if (!sev_kobj)
1841 return -ENOMEM;
1842
1843 ret = sysfs_create_group(sev_kobj, &sev_attr_group);
1844 if (ret)
1845 kobject_put(sev_kobj);
1846
1847 return ret;
1848 }
1849 arch_initcall(sev_sysfs_init);
1850 #endif // CONFIG_SYSFS
1851
free_shared_pages(void * buf,size_t sz)1852 static void free_shared_pages(void *buf, size_t sz)
1853 {
1854 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1855 int ret;
1856
1857 if (!buf)
1858 return;
1859
1860 ret = set_memory_encrypted((unsigned long)buf, npages);
1861 if (ret) {
1862 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
1863 return;
1864 }
1865
1866 __free_pages(virt_to_page(buf), get_order(sz));
1867 }
1868
alloc_shared_pages(size_t sz)1869 static void *alloc_shared_pages(size_t sz)
1870 {
1871 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
1872 struct page *page;
1873 int ret;
1874
1875 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
1876 if (!page)
1877 return NULL;
1878
1879 ret = set_memory_decrypted((unsigned long)page_address(page), npages);
1880 if (ret) {
1881 pr_err("failed to mark page shared, ret=%d\n", ret);
1882 __free_pages(page, get_order(sz));
1883 return NULL;
1884 }
1885
1886 return page_address(page);
1887 }
1888
get_vmpck(int id,struct snp_secrets_page * secrets,u32 ** seqno)1889 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
1890 {
1891 u8 *key = NULL;
1892
1893 switch (id) {
1894 case 0:
1895 *seqno = &secrets->os_area.msg_seqno_0;
1896 key = secrets->vmpck0;
1897 break;
1898 case 1:
1899 *seqno = &secrets->os_area.msg_seqno_1;
1900 key = secrets->vmpck1;
1901 break;
1902 case 2:
1903 *seqno = &secrets->os_area.msg_seqno_2;
1904 key = secrets->vmpck2;
1905 break;
1906 case 3:
1907 *seqno = &secrets->os_area.msg_seqno_3;
1908 key = secrets->vmpck3;
1909 break;
1910 default:
1911 break;
1912 }
1913
1914 return key;
1915 }
1916
snp_init_crypto(u8 * key,size_t keylen)1917 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
1918 {
1919 struct aesgcm_ctx *ctx;
1920
1921 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1922 if (!ctx)
1923 return NULL;
1924
1925 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
1926 pr_err("Crypto context initialization failed\n");
1927 kfree(ctx);
1928 return NULL;
1929 }
1930
1931 return ctx;
1932 }
1933
snp_msg_init(struct snp_msg_desc * mdesc,int vmpck_id)1934 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
1935 {
1936 /* Adjust the default VMPCK key based on the executing VMPL level */
1937 if (vmpck_id == -1)
1938 vmpck_id = snp_vmpl;
1939
1940 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
1941 if (!mdesc->vmpck) {
1942 pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
1943 return -EINVAL;
1944 }
1945
1946 /* Verify that VMPCK is not zero. */
1947 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
1948 pr_err("Empty VMPCK%d communication key\n", vmpck_id);
1949 return -EINVAL;
1950 }
1951
1952 mdesc->vmpck_id = vmpck_id;
1953
1954 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
1955 if (!mdesc->ctx)
1956 return -ENOMEM;
1957
1958 return 0;
1959 }
1960 EXPORT_SYMBOL_GPL(snp_msg_init);
1961
snp_msg_alloc(void)1962 struct snp_msg_desc *snp_msg_alloc(void)
1963 {
1964 struct snp_msg_desc *mdesc;
1965 void __iomem *mem;
1966
1967 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);
1968
1969 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL);
1970 if (!mdesc)
1971 return ERR_PTR(-ENOMEM);
1972
1973 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
1974 if (!mem)
1975 goto e_free_mdesc;
1976
1977 mdesc->secrets = (__force struct snp_secrets_page *)mem;
1978
1979 /* Allocate the shared page used for the request and response message. */
1980 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
1981 if (!mdesc->request)
1982 goto e_unmap;
1983
1984 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
1985 if (!mdesc->response)
1986 goto e_free_request;
1987
1988 return mdesc;
1989
1990 e_free_request:
1991 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
1992 e_unmap:
1993 iounmap(mem);
1994 e_free_mdesc:
1995 kfree(mdesc);
1996
1997 return ERR_PTR(-ENOMEM);
1998 }
1999 EXPORT_SYMBOL_GPL(snp_msg_alloc);
2000
snp_msg_free(struct snp_msg_desc * mdesc)2001 void snp_msg_free(struct snp_msg_desc *mdesc)
2002 {
2003 if (!mdesc)
2004 return;
2005
2006 kfree(mdesc->ctx);
2007 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
2008 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
2009 iounmap((__force void __iomem *)mdesc->secrets);
2010
2011 memset(mdesc, 0, sizeof(*mdesc));
2012 kfree(mdesc);
2013 }
2014 EXPORT_SYMBOL_GPL(snp_msg_free);
2015
2016 /* Mutex to serialize the shared buffer access and command handling. */
2017 static DEFINE_MUTEX(snp_cmd_mutex);
2018
2019 /*
2020 * If an error is received from the host or AMD Secure Processor (ASP) there
2021 * are two options. Either retry the exact same encrypted request or discontinue
2022 * using the VMPCK.
2023 *
2024 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
2025 * encrypt the requests. The IV for this scheme is the sequence number. GCM
2026 * cannot tolerate IV reuse.
2027 *
2028 * The ASP FW v1.51 only increments the sequence numbers on a successful
2029 * guest<->ASP back and forth and only accepts messages at its exact sequence
2030 * number.
2031 *
2032 * So if the sequence number were to be reused the encryption scheme is
2033 * vulnerable. If the sequence number were incremented for a fresh IV the ASP
2034 * will reject the request.
2035 */
snp_disable_vmpck(struct snp_msg_desc * mdesc)2036 static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
2037 {
2038 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
2039 mdesc->vmpck_id);
2040 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
2041 mdesc->vmpck = NULL;
2042 }
2043
__snp_get_msg_seqno(struct snp_msg_desc * mdesc)2044 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
2045 {
2046 u64 count;
2047
2048 lockdep_assert_held(&snp_cmd_mutex);
2049
2050 /* Read the current message sequence counter from secrets pages */
2051 count = *mdesc->os_area_msg_seqno;
2052
2053 return count + 1;
2054 }
2055
2056 /* Return a non-zero on success */
snp_get_msg_seqno(struct snp_msg_desc * mdesc)2057 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
2058 {
2059 u64 count = __snp_get_msg_seqno(mdesc);
2060
2061 /*
2062 * The message sequence counter for the SNP guest request is a 64-bit
2063 * value but the version 2 of GHCB specification defines a 32-bit storage
2064 * for it. If the counter exceeds the 32-bit value then return zero.
2065 * The caller should check the return value, but if the caller happens to
2066 * not check the value and use it, then the firmware treats zero as an
2067 * invalid number and will fail the message request.
2068 */
2069 if (count >= UINT_MAX) {
2070 pr_err("request message sequence counter overflow\n");
2071 return 0;
2072 }
2073
2074 return count;
2075 }
2076
snp_inc_msg_seqno(struct snp_msg_desc * mdesc)2077 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
2078 {
2079 /*
2080 * The counter is also incremented by the PSP, so increment it by 2
2081 * and save in secrets page.
2082 */
2083 *mdesc->os_area_msg_seqno += 2;
2084 }
2085
verify_and_dec_payload(struct snp_msg_desc * mdesc,struct snp_guest_req * req)2086 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
2087 {
2088 struct snp_guest_msg *resp_msg = &mdesc->secret_response;
2089 struct snp_guest_msg *req_msg = &mdesc->secret_request;
2090 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
2091 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
2092 struct aesgcm_ctx *ctx = mdesc->ctx;
2093 u8 iv[GCM_AES_IV_SIZE] = {};
2094
2095 pr_debug("response [seqno %lld type %d version %d sz %d]\n",
2096 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
2097 resp_msg_hdr->msg_sz);
2098
2099 /* Copy response from shared memory to encrypted memory. */
2100 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
2101
2102 /* Verify that the sequence counter is incremented by 1 */
2103 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
2104 return -EBADMSG;
2105
2106 /* Verify response message type and version number. */
2107 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
2108 resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
2109 return -EBADMSG;
2110
2111 /*
2112 * If the message size is greater than our buffer length then return
2113 * an error.
2114 */
2115 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
2116 return -EBADMSG;
2117
2118 /* Decrypt the payload */
2119 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
2120 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
2121 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
2122 return -EBADMSG;
2123
2124 return 0;
2125 }
2126
enc_payload(struct snp_msg_desc * mdesc,u64 seqno,struct snp_guest_req * req)2127 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
2128 {
2129 struct snp_guest_msg *msg = &mdesc->secret_request;
2130 struct snp_guest_msg_hdr *hdr = &msg->hdr;
2131 struct aesgcm_ctx *ctx = mdesc->ctx;
2132 u8 iv[GCM_AES_IV_SIZE] = {};
2133
2134 memset(msg, 0, sizeof(*msg));
2135
2136 hdr->algo = SNP_AEAD_AES_256_GCM;
2137 hdr->hdr_version = MSG_HDR_VER;
2138 hdr->hdr_sz = sizeof(*hdr);
2139 hdr->msg_type = req->msg_type;
2140 hdr->msg_version = req->msg_version;
2141 hdr->msg_seqno = seqno;
2142 hdr->msg_vmpck = req->vmpck_id;
2143 hdr->msg_sz = req->req_sz;
2144
2145 /* Verify the sequence number is non-zero */
2146 if (!hdr->msg_seqno)
2147 return -ENOSR;
2148
2149 pr_debug("request [seqno %lld type %d version %d sz %d]\n",
2150 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
2151
2152 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
2153 return -EBADMSG;
2154
2155 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
2156 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
2157 AAD_LEN, iv, hdr->authtag);
2158
2159 return 0;
2160 }
2161
__handle_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)2162 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
2163 {
2164 unsigned long req_start = jiffies;
2165 unsigned int override_npages = 0;
2166 u64 override_err = 0;
2167 int rc;
2168
2169 retry_request:
2170 /*
2171 * Call firmware to process the request. In this function the encrypted
2172 * message enters shared memory with the host. So after this call the
2173 * sequence number must be incremented or the VMPCK must be deleted to
2174 * prevent reuse of the IV.
2175 */
2176 rc = snp_issue_guest_request(req);
2177 switch (rc) {
2178 case -ENOSPC:
2179 /*
2180 * If the extended guest request fails due to having too
2181 * small of a certificate data buffer, retry the same
2182 * guest request without the extended data request in
2183 * order to increment the sequence number and thus avoid
2184 * IV reuse.
2185 */
2186 override_npages = req->input.data_npages;
2187 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
2188
2189 /*
2190 * Override the error to inform callers the given extended
2191 * request buffer size was too small and give the caller the
2192 * required buffer size.
2193 */
2194 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
2195
2196 /*
2197 * If this call to the firmware succeeds, the sequence number can
2198 * be incremented allowing for continued use of the VMPCK. If
2199 * there is an error reflected in the return value, this value
2200 * is checked further down and the result will be the deletion
2201 * of the VMPCK and the error code being propagated back to the
2202 * user as an ioctl() return code.
2203 */
2204 goto retry_request;
2205
2206 /*
2207 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
2208 * throttled. Retry in the driver to avoid returning and reusing the
2209 * message sequence number on a different message.
2210 */
2211 case -EAGAIN:
2212 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
2213 rc = -ETIMEDOUT;
2214 break;
2215 }
2216 schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
2217 goto retry_request;
2218 }
2219
2220 /*
2221 * Increment the message sequence number. There is no harm in doing
2222 * this now because decryption uses the value stored in the response
2223 * structure and any failure will wipe the VMPCK, preventing further
2224 * use anyway.
2225 */
2226 snp_inc_msg_seqno(mdesc);
2227
2228 if (override_err) {
2229 req->exitinfo2 = override_err;
2230
2231 /*
2232 * If an extended guest request was issued and the supplied certificate
2233 * buffer was not large enough, a standard guest request was issued to
2234 * prevent IV reuse. If the standard request was successful, return -EIO
2235 * back to the caller as would have originally been returned.
2236 */
2237 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2238 rc = -EIO;
2239 }
2240
2241 if (override_npages)
2242 req->input.data_npages = override_npages;
2243
2244 return rc;
2245 }
2246
snp_send_guest_request(struct snp_msg_desc * mdesc,struct snp_guest_req * req)2247 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
2248 {
2249 u64 seqno;
2250 int rc;
2251
2252 /*
2253 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW.
2254 * The offload's DMA SG list of data to encrypt has to be in linear mapping.
2255 */
2256 if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
2257 pr_warn("AES-GSM buffers must be in linear mapping");
2258 return -EINVAL;
2259 }
2260
2261 guard(mutex)(&snp_cmd_mutex);
2262
2263 /* Check if the VMPCK is not empty */
2264 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
2265 pr_err_ratelimited("VMPCK is disabled\n");
2266 return -ENOTTY;
2267 }
2268
2269 /* Get message sequence and verify that its a non-zero */
2270 seqno = snp_get_msg_seqno(mdesc);
2271 if (!seqno)
2272 return -EIO;
2273
2274 /* Clear shared memory's response for the host to populate. */
2275 memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
2276
2277 /* Encrypt the userspace provided payload in mdesc->secret_request. */
2278 rc = enc_payload(mdesc, seqno, req);
2279 if (rc)
2280 return rc;
2281
2282 /*
2283 * Write the fully encrypted request to the shared unencrypted
2284 * request page.
2285 */
2286 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
2287
2288 /* Initialize the input address for guest request */
2289 req->input.req_gpa = __pa(mdesc->request);
2290 req->input.resp_gpa = __pa(mdesc->response);
2291 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
2292
2293 rc = __handle_guest_request(mdesc, req);
2294 if (rc) {
2295 if (rc == -EIO &&
2296 req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
2297 return rc;
2298
2299 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
2300 rc, req->exitinfo2);
2301
2302 snp_disable_vmpck(mdesc);
2303 return rc;
2304 }
2305
2306 rc = verify_and_dec_payload(mdesc, req);
2307 if (rc) {
2308 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
2309 snp_disable_vmpck(mdesc);
2310 return rc;
2311 }
2312
2313 return 0;
2314 }
2315 EXPORT_SYMBOL_GPL(snp_send_guest_request);
2316
snp_get_tsc_info(void)2317 static int __init snp_get_tsc_info(void)
2318 {
2319 struct snp_tsc_info_resp *tsc_resp;
2320 struct snp_tsc_info_req *tsc_req;
2321 struct snp_msg_desc *mdesc;
2322 struct snp_guest_req req = {};
2323 int rc = -ENOMEM;
2324
2325 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL);
2326 if (!tsc_req)
2327 return rc;
2328
2329 /*
2330 * The intermediate response buffer is used while decrypting the
2331 * response payload. Make sure that it has enough space to cover
2332 * the authtag.
2333 */
2334 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
2335 if (!tsc_resp)
2336 goto e_free_tsc_req;
2337
2338 mdesc = snp_msg_alloc();
2339 if (IS_ERR_OR_NULL(mdesc))
2340 goto e_free_tsc_resp;
2341
2342 rc = snp_msg_init(mdesc, snp_vmpl);
2343 if (rc)
2344 goto e_free_mdesc;
2345
2346 req.msg_version = MSG_HDR_VER;
2347 req.msg_type = SNP_MSG_TSC_INFO_REQ;
2348 req.vmpck_id = snp_vmpl;
2349 req.req_buf = tsc_req;
2350 req.req_sz = sizeof(*tsc_req);
2351 req.resp_buf = (void *)tsc_resp;
2352 req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
2353 req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;
2354
2355 rc = snp_send_guest_request(mdesc, &req);
2356 if (rc)
2357 goto e_request;
2358
2359 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
2360 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
2361 tsc_resp->tsc_factor);
2362
2363 if (!tsc_resp->status) {
2364 snp_tsc_scale = tsc_resp->tsc_scale;
2365 snp_tsc_offset = tsc_resp->tsc_offset;
2366 } else {
2367 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
2368 rc = -EIO;
2369 }
2370
2371 e_request:
2372 /* The response buffer contains sensitive data, explicitly clear it. */
2373 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
2374 e_free_mdesc:
2375 snp_msg_free(mdesc);
2376 e_free_tsc_resp:
2377 kfree(tsc_resp);
2378 e_free_tsc_req:
2379 kfree(tsc_req);
2380
2381 return rc;
2382 }
2383
snp_secure_tsc_prepare(void)2384 void __init snp_secure_tsc_prepare(void)
2385 {
2386 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2387 return;
2388
2389 if (snp_get_tsc_info()) {
2390 pr_alert("Unable to retrieve Secure TSC info from ASP\n");
2391 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2392 }
2393
2394 pr_debug("SecureTSC enabled");
2395 }
2396
securetsc_get_tsc_khz(void)2397 static unsigned long securetsc_get_tsc_khz(void)
2398 {
2399 return snp_tsc_freq_khz;
2400 }
2401
snp_secure_tsc_init(void)2402 void __init snp_secure_tsc_init(void)
2403 {
2404 struct snp_secrets_page *secrets;
2405 unsigned long tsc_freq_mhz;
2406 void *mem;
2407
2408 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
2409 return;
2410
2411 mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
2412 if (!mem) {
2413 pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
2414 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
2415 }
2416
2417 secrets = (__force struct snp_secrets_page *)mem;
2418
2419 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
2420 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
2421
2422 /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
2423 tsc_freq_mhz &= GENMASK_ULL(17, 0);
2424
2425 snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
2426
2427 x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
2428 x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
2429
2430 early_memunmap(mem, PAGE_SIZE);
2431 }
2432