1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright(c) 2023 Intel Corporation.
4 *
5 * Intel Trusted Domain Extensions (TDX) support
6 */
7
8 #include "asm/page_types.h"
9 #define pr_fmt(fmt) "virt/tdx: " fmt
10
11 #include <linux/types.h>
12 #include <linux/cache.h>
13 #include <linux/init.h>
14 #include <linux/errno.h>
15 #include <linux/printk.h>
16 #include <linux/cpu.h>
17 #include <linux/spinlock.h>
18 #include <linux/percpu-defs.h>
19 #include <linux/mutex.h>
20 #include <linux/list.h>
21 #include <linux/memblock.h>
22 #include <linux/memory.h>
23 #include <linux/minmax.h>
24 #include <linux/sizes.h>
25 #include <linux/pfn.h>
26 #include <linux/align.h>
27 #include <linux/sort.h>
28 #include <linux/log2.h>
29 #include <linux/acpi.h>
30 #include <linux/suspend.h>
31 #include <linux/idr.h>
32 #include <linux/kvm_types.h>
33 #include <asm/page.h>
34 #include <asm/special_insns.h>
35 #include <asm/msr-index.h>
36 #include <asm/msr.h>
37 #include <asm/cpufeature.h>
38 #include <asm/tdx.h>
39 #include <asm/cpu_device_id.h>
40 #include <asm/processor.h>
41 #include <asm/mce.h>
42 #include "tdx.h"
43
44 static u32 tdx_global_keyid __ro_after_init;
45 static u32 tdx_guest_keyid_start __ro_after_init;
46 static u32 tdx_nr_guest_keyids __ro_after_init;
47
48 static DEFINE_IDA(tdx_guest_keyid_pool);
49
50 static DEFINE_PER_CPU(bool, tdx_lp_initialized);
51
52 static struct tdmr_info_list tdx_tdmr_list;
53
54 static enum tdx_module_status_t tdx_module_status;
55 static DEFINE_MUTEX(tdx_module_lock);
56
57 /* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
58 static LIST_HEAD(tdx_memlist);
59
60 static struct tdx_sys_info tdx_sysinfo;
61
62 typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
63
seamcall_err(u64 fn,u64 err,struct tdx_module_args * args)64 static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args)
65 {
66 pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err);
67 }
68
seamcall_err_ret(u64 fn,u64 err,struct tdx_module_args * args)69 static inline void seamcall_err_ret(u64 fn, u64 err,
70 struct tdx_module_args *args)
71 {
72 seamcall_err(fn, err, args);
73 pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n",
74 args->rcx, args->rdx, args->r8);
75 pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n",
76 args->r9, args->r10, args->r11);
77 }
78
sc_retry_prerr(sc_func_t func,sc_err_func_t err_func,u64 fn,struct tdx_module_args * args)79 static __always_inline int sc_retry_prerr(sc_func_t func,
80 sc_err_func_t err_func,
81 u64 fn, struct tdx_module_args *args)
82 {
83 u64 sret = sc_retry(func, fn, args);
84
85 if (sret == TDX_SUCCESS)
86 return 0;
87
88 if (sret == TDX_SEAMCALL_VMFAILINVALID)
89 return -ENODEV;
90
91 if (sret == TDX_SEAMCALL_GP)
92 return -EOPNOTSUPP;
93
94 if (sret == TDX_SEAMCALL_UD)
95 return -EACCES;
96
97 err_func(fn, sret, args);
98 return -EIO;
99 }
100
101 #define seamcall_prerr(__fn, __args) \
102 sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args))
103
104 #define seamcall_prerr_ret(__fn, __args) \
105 sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args))
106
107 /*
108 * Do the module global initialization once and return its result.
109 * It can be done on any cpu. It's always called with interrupts
110 * disabled.
111 */
try_init_module_global(void)112 static int try_init_module_global(void)
113 {
114 struct tdx_module_args args = {};
115 static DEFINE_RAW_SPINLOCK(sysinit_lock);
116 static bool sysinit_done;
117 static int sysinit_ret;
118
119 lockdep_assert_irqs_disabled();
120
121 raw_spin_lock(&sysinit_lock);
122
123 if (sysinit_done)
124 goto out;
125
126 /* RCX is module attributes and all bits are reserved */
127 args.rcx = 0;
128 sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args);
129
130 /*
131 * The first SEAMCALL also detects the TDX module, thus
132 * it can fail due to the TDX module is not loaded.
133 * Dump message to let the user know.
134 */
135 if (sysinit_ret == -ENODEV)
136 pr_err("module not loaded\n");
137
138 sysinit_done = true;
139 out:
140 raw_spin_unlock(&sysinit_lock);
141 return sysinit_ret;
142 }
143
144 /**
145 * tdx_cpu_enable - Enable TDX on local cpu
146 *
147 * Do one-time TDX module per-cpu initialization SEAMCALL (and TDX module
148 * global initialization SEAMCALL if not done) on local cpu to make this
149 * cpu be ready to run any other SEAMCALLs.
150 *
151 * Always call this function via IPI function calls.
152 *
153 * Return 0 on success, otherwise errors.
154 */
tdx_cpu_enable(void)155 int tdx_cpu_enable(void)
156 {
157 struct tdx_module_args args = {};
158 int ret;
159
160 if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
161 return -ENODEV;
162
163 lockdep_assert_irqs_disabled();
164
165 if (__this_cpu_read(tdx_lp_initialized))
166 return 0;
167
168 /*
169 * The TDX module global initialization is the very first step
170 * to enable TDX. Need to do it first (if hasn't been done)
171 * before the per-cpu initialization.
172 */
173 ret = try_init_module_global();
174 if (ret)
175 return ret;
176
177 ret = seamcall_prerr(TDH_SYS_LP_INIT, &args);
178 if (ret)
179 return ret;
180
181 __this_cpu_write(tdx_lp_initialized, true);
182
183 return 0;
184 }
185 EXPORT_SYMBOL_FOR_KVM(tdx_cpu_enable);
186
187 /*
188 * Add a memory region as a TDX memory block. The caller must make sure
189 * all memory regions are added in address ascending order and don't
190 * overlap.
191 */
add_tdx_memblock(struct list_head * tmb_list,unsigned long start_pfn,unsigned long end_pfn,int nid)192 static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn,
193 unsigned long end_pfn, int nid)
194 {
195 struct tdx_memblock *tmb;
196
197 tmb = kmalloc_obj(*tmb);
198 if (!tmb)
199 return -ENOMEM;
200
201 INIT_LIST_HEAD(&tmb->list);
202 tmb->start_pfn = start_pfn;
203 tmb->end_pfn = end_pfn;
204 tmb->nid = nid;
205
206 /* @tmb_list is protected by mem_hotplug_lock */
207 list_add_tail(&tmb->list, tmb_list);
208 return 0;
209 }
210
free_tdx_memlist(struct list_head * tmb_list)211 static void free_tdx_memlist(struct list_head *tmb_list)
212 {
213 /* @tmb_list is protected by mem_hotplug_lock */
214 while (!list_empty(tmb_list)) {
215 struct tdx_memblock *tmb = list_first_entry(tmb_list,
216 struct tdx_memblock, list);
217
218 list_del(&tmb->list);
219 kfree(tmb);
220 }
221 }
222
223 /*
224 * Ensure that all memblock memory regions are convertible to TDX
225 * memory. Once this has been established, stash the memblock
226 * ranges off in a secondary structure because memblock is modified
227 * in memory hotplug while TDX memory regions are fixed.
228 */
build_tdx_memlist(struct list_head * tmb_list)229 static int build_tdx_memlist(struct list_head *tmb_list)
230 {
231 unsigned long start_pfn, end_pfn;
232 int i, nid, ret;
233
234 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
235 /*
236 * The first 1MB is not reported as TDX convertible memory.
237 * Although the first 1MB is always reserved and won't end up
238 * to the page allocator, it is still in memblock's memory
239 * regions. Skip them manually to exclude them as TDX memory.
240 */
241 start_pfn = max(start_pfn, PHYS_PFN(SZ_1M));
242 if (start_pfn >= end_pfn)
243 continue;
244
245 /*
246 * Add the memory regions as TDX memory. The regions in
247 * memblock has already guaranteed they are in address
248 * ascending order and don't overlap.
249 */
250 ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn, nid);
251 if (ret)
252 goto err;
253 }
254
255 return 0;
256 err:
257 free_tdx_memlist(tmb_list);
258 return ret;
259 }
260
read_sys_metadata_field(u64 field_id,u64 * data)261 static int read_sys_metadata_field(u64 field_id, u64 *data)
262 {
263 struct tdx_module_args args = {};
264 int ret;
265
266 /*
267 * TDH.SYS.RD -- reads one global metadata field
268 * - RDX (in): the field to read
269 * - R8 (out): the field data
270 */
271 args.rdx = field_id;
272 ret = seamcall_prerr_ret(TDH_SYS_RD, &args);
273 if (ret)
274 return ret;
275
276 *data = args.r8;
277
278 return 0;
279 }
280
281 #include "tdx_global_metadata.c"
282
check_features(struct tdx_sys_info * sysinfo)283 static int check_features(struct tdx_sys_info *sysinfo)
284 {
285 u64 tdx_features0 = sysinfo->features.tdx_features0;
286
287 if (!(tdx_features0 & TDX_FEATURES0_NO_RBP_MOD)) {
288 pr_err("frame pointer (RBP) clobber bug present, upgrade TDX module\n");
289 return -EINVAL;
290 }
291
292 return 0;
293 }
294
295 /* Calculate the actual TDMR size */
tdmr_size_single(u16 max_reserved_per_tdmr)296 static int tdmr_size_single(u16 max_reserved_per_tdmr)
297 {
298 int tdmr_sz;
299
300 /*
301 * The actual size of TDMR depends on the maximum
302 * number of reserved areas.
303 */
304 tdmr_sz = sizeof(struct tdmr_info);
305 tdmr_sz += sizeof(struct tdmr_reserved_area) * max_reserved_per_tdmr;
306
307 return ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT);
308 }
309
alloc_tdmr_list(struct tdmr_info_list * tdmr_list,struct tdx_sys_info_tdmr * sysinfo_tdmr)310 static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
311 struct tdx_sys_info_tdmr *sysinfo_tdmr)
312 {
313 size_t tdmr_sz, tdmr_array_sz;
314 void *tdmr_array;
315
316 tdmr_sz = tdmr_size_single(sysinfo_tdmr->max_reserved_per_tdmr);
317 tdmr_array_sz = tdmr_sz * sysinfo_tdmr->max_tdmrs;
318
319 /*
320 * To keep things simple, allocate all TDMRs together.
321 * The buffer needs to be physically contiguous to make
322 * sure each TDMR is physically contiguous.
323 */
324 tdmr_array = alloc_pages_exact(tdmr_array_sz,
325 GFP_KERNEL | __GFP_ZERO);
326 if (!tdmr_array)
327 return -ENOMEM;
328
329 tdmr_list->tdmrs = tdmr_array;
330
331 /*
332 * Keep the size of TDMR to find the target TDMR
333 * at a given index in the TDMR list.
334 */
335 tdmr_list->tdmr_sz = tdmr_sz;
336 tdmr_list->max_tdmrs = sysinfo_tdmr->max_tdmrs;
337 tdmr_list->nr_consumed_tdmrs = 0;
338
339 return 0;
340 }
341
free_tdmr_list(struct tdmr_info_list * tdmr_list)342 static void free_tdmr_list(struct tdmr_info_list *tdmr_list)
343 {
344 free_pages_exact(tdmr_list->tdmrs,
345 tdmr_list->max_tdmrs * tdmr_list->tdmr_sz);
346 }
347
348 /* Get the TDMR from the list at the given index. */
tdmr_entry(struct tdmr_info_list * tdmr_list,int idx)349 static struct tdmr_info *tdmr_entry(struct tdmr_info_list *tdmr_list,
350 int idx)
351 {
352 int tdmr_info_offset = tdmr_list->tdmr_sz * idx;
353
354 return (void *)tdmr_list->tdmrs + tdmr_info_offset;
355 }
356
357 #define TDMR_ALIGNMENT SZ_1G
358 #define TDMR_ALIGN_DOWN(_addr) ALIGN_DOWN((_addr), TDMR_ALIGNMENT)
359 #define TDMR_ALIGN_UP(_addr) ALIGN((_addr), TDMR_ALIGNMENT)
360
tdmr_end(struct tdmr_info * tdmr)361 static inline u64 tdmr_end(struct tdmr_info *tdmr)
362 {
363 return tdmr->base + tdmr->size;
364 }
365
366 /*
367 * Take the memory referenced in @tmb_list and populate the
368 * preallocated @tdmr_list, following all the special alignment
369 * and size rules for TDMR.
370 */
fill_out_tdmrs(struct list_head * tmb_list,struct tdmr_info_list * tdmr_list)371 static int fill_out_tdmrs(struct list_head *tmb_list,
372 struct tdmr_info_list *tdmr_list)
373 {
374 struct tdx_memblock *tmb;
375 int tdmr_idx = 0;
376
377 /*
378 * Loop over TDX memory regions and fill out TDMRs to cover them.
379 * To keep it simple, always try to use one TDMR to cover one
380 * memory region.
381 *
382 * In practice TDX supports at least 64 TDMRs. A 2-socket system
383 * typically only consumes less than 10 of those. This code is
384 * dumb and simple and may use more TMDRs than is strictly
385 * required.
386 */
387 list_for_each_entry(tmb, tmb_list, list) {
388 struct tdmr_info *tdmr = tdmr_entry(tdmr_list, tdmr_idx);
389 u64 start, end;
390
391 start = TDMR_ALIGN_DOWN(PFN_PHYS(tmb->start_pfn));
392 end = TDMR_ALIGN_UP(PFN_PHYS(tmb->end_pfn));
393
394 /*
395 * A valid size indicates the current TDMR has already
396 * been filled out to cover the previous memory region(s).
397 */
398 if (tdmr->size) {
399 /*
400 * Loop to the next if the current memory region
401 * has already been fully covered.
402 */
403 if (end <= tdmr_end(tdmr))
404 continue;
405
406 /* Otherwise, skip the already covered part. */
407 if (start < tdmr_end(tdmr))
408 start = tdmr_end(tdmr);
409
410 /*
411 * Create a new TDMR to cover the current memory
412 * region, or the remaining part of it.
413 */
414 tdmr_idx++;
415 if (tdmr_idx >= tdmr_list->max_tdmrs) {
416 pr_warn("initialization failed: TDMRs exhausted.\n");
417 return -ENOSPC;
418 }
419
420 tdmr = tdmr_entry(tdmr_list, tdmr_idx);
421 }
422
423 tdmr->base = start;
424 tdmr->size = end - start;
425 }
426
427 /* @tdmr_idx is always the index of the last valid TDMR. */
428 tdmr_list->nr_consumed_tdmrs = tdmr_idx + 1;
429
430 /*
431 * Warn early that kernel is about to run out of TDMRs.
432 *
433 * This is an indication that TDMR allocation has to be
434 * reworked to be smarter to not run into an issue.
435 */
436 if (tdmr_list->max_tdmrs - tdmr_list->nr_consumed_tdmrs < TDMR_NR_WARN)
437 pr_warn("consumed TDMRs reaching limit: %d used out of %d\n",
438 tdmr_list->nr_consumed_tdmrs,
439 tdmr_list->max_tdmrs);
440
441 return 0;
442 }
443
444 /*
445 * Calculate PAMT size given a TDMR and a page size. The returned
446 * PAMT size is always aligned up to 4K page boundary.
447 */
tdmr_get_pamt_sz(struct tdmr_info * tdmr,int pgsz,u16 pamt_entry_size)448 static unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz,
449 u16 pamt_entry_size)
450 {
451 unsigned long pamt_sz, nr_pamt_entries;
452
453 switch (pgsz) {
454 case TDX_PS_4K:
455 nr_pamt_entries = tdmr->size >> PAGE_SHIFT;
456 break;
457 case TDX_PS_2M:
458 nr_pamt_entries = tdmr->size >> PMD_SHIFT;
459 break;
460 case TDX_PS_1G:
461 nr_pamt_entries = tdmr->size >> PUD_SHIFT;
462 break;
463 default:
464 WARN_ON_ONCE(1);
465 return 0;
466 }
467
468 pamt_sz = nr_pamt_entries * pamt_entry_size;
469 /* TDX requires PAMT size must be 4K aligned */
470 pamt_sz = ALIGN(pamt_sz, PAGE_SIZE);
471
472 return pamt_sz;
473 }
474
475 /*
476 * Locate a NUMA node which should hold the allocation of the @tdmr
477 * PAMT. This node will have some memory covered by the TDMR. The
478 * relative amount of memory covered is not considered.
479 */
tdmr_get_nid(struct tdmr_info * tdmr,struct list_head * tmb_list)480 static int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list)
481 {
482 struct tdx_memblock *tmb;
483
484 /*
485 * A TDMR must cover at least part of one TMB. That TMB will end
486 * after the TDMR begins. But, that TMB may have started before
487 * the TDMR. Find the next 'tmb' that _ends_ after this TDMR
488 * begins. Ignore 'tmb' start addresses. They are irrelevant.
489 */
490 list_for_each_entry(tmb, tmb_list, list) {
491 if (tmb->end_pfn > PHYS_PFN(tdmr->base))
492 return tmb->nid;
493 }
494
495 /*
496 * Fall back to allocating the TDMR's metadata from node 0 when
497 * no TDX memory block can be found. This should never happen
498 * since TDMRs originate from TDX memory blocks.
499 */
500 pr_warn("TDMR [0x%llx, 0x%llx): unable to find local NUMA node for PAMT allocation, fallback to use node 0.\n",
501 tdmr->base, tdmr_end(tdmr));
502 return 0;
503 }
504
505 /*
506 * Allocate PAMTs from the local NUMA node of some memory in @tmb_list
507 * within @tdmr, and set up PAMTs for @tdmr.
508 */
tdmr_set_up_pamt(struct tdmr_info * tdmr,struct list_head * tmb_list,u16 pamt_entry_size[])509 static int tdmr_set_up_pamt(struct tdmr_info *tdmr,
510 struct list_head *tmb_list,
511 u16 pamt_entry_size[])
512 {
513 unsigned long pamt_base[TDX_PS_NR];
514 unsigned long pamt_size[TDX_PS_NR];
515 unsigned long tdmr_pamt_base;
516 unsigned long tdmr_pamt_size;
517 struct page *pamt;
518 int pgsz, nid;
519
520 nid = tdmr_get_nid(tdmr, tmb_list);
521
522 /*
523 * Calculate the PAMT size for each TDX supported page size
524 * and the total PAMT size.
525 */
526 tdmr_pamt_size = 0;
527 for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) {
528 pamt_size[pgsz] = tdmr_get_pamt_sz(tdmr, pgsz,
529 pamt_entry_size[pgsz]);
530 tdmr_pamt_size += pamt_size[pgsz];
531 }
532
533 /*
534 * Allocate one chunk of physically contiguous memory for all
535 * PAMTs. This helps minimize the PAMT's use of reserved areas
536 * in overlapped TDMRs.
537 */
538 pamt = alloc_contig_pages(tdmr_pamt_size >> PAGE_SHIFT, GFP_KERNEL,
539 nid, &node_online_map);
540 if (!pamt)
541 return -ENOMEM;
542
543 /*
544 * Break the contiguous allocation back up into the
545 * individual PAMTs for each page size.
546 */
547 tdmr_pamt_base = page_to_pfn(pamt) << PAGE_SHIFT;
548 for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) {
549 pamt_base[pgsz] = tdmr_pamt_base;
550 tdmr_pamt_base += pamt_size[pgsz];
551 }
552
553 tdmr->pamt_4k_base = pamt_base[TDX_PS_4K];
554 tdmr->pamt_4k_size = pamt_size[TDX_PS_4K];
555 tdmr->pamt_2m_base = pamt_base[TDX_PS_2M];
556 tdmr->pamt_2m_size = pamt_size[TDX_PS_2M];
557 tdmr->pamt_1g_base = pamt_base[TDX_PS_1G];
558 tdmr->pamt_1g_size = pamt_size[TDX_PS_1G];
559
560 return 0;
561 }
562
tdmr_get_pamt(struct tdmr_info * tdmr,unsigned long * pamt_base,unsigned long * pamt_size)563 static void tdmr_get_pamt(struct tdmr_info *tdmr, unsigned long *pamt_base,
564 unsigned long *pamt_size)
565 {
566 unsigned long pamt_bs, pamt_sz;
567
568 /*
569 * The PAMT was allocated in one contiguous unit. The 4K PAMT
570 * should always point to the beginning of that allocation.
571 */
572 pamt_bs = tdmr->pamt_4k_base;
573 pamt_sz = tdmr->pamt_4k_size + tdmr->pamt_2m_size + tdmr->pamt_1g_size;
574
575 WARN_ON_ONCE((pamt_bs & ~PAGE_MASK) || (pamt_sz & ~PAGE_MASK));
576
577 *pamt_base = pamt_bs;
578 *pamt_size = pamt_sz;
579 }
580
tdmr_do_pamt_func(struct tdmr_info * tdmr,void (* pamt_func)(unsigned long base,unsigned long size))581 static void tdmr_do_pamt_func(struct tdmr_info *tdmr,
582 void (*pamt_func)(unsigned long base, unsigned long size))
583 {
584 unsigned long pamt_base, pamt_size;
585
586 tdmr_get_pamt(tdmr, &pamt_base, &pamt_size);
587
588 /* Do nothing if PAMT hasn't been allocated for this TDMR */
589 if (!pamt_size)
590 return;
591
592 if (WARN_ON_ONCE(!pamt_base))
593 return;
594
595 pamt_func(pamt_base, pamt_size);
596 }
597
free_pamt(unsigned long pamt_base,unsigned long pamt_size)598 static void free_pamt(unsigned long pamt_base, unsigned long pamt_size)
599 {
600 free_contig_range(pamt_base >> PAGE_SHIFT, pamt_size >> PAGE_SHIFT);
601 }
602
tdmr_free_pamt(struct tdmr_info * tdmr)603 static void tdmr_free_pamt(struct tdmr_info *tdmr)
604 {
605 tdmr_do_pamt_func(tdmr, free_pamt);
606 }
607
tdmrs_free_pamt_all(struct tdmr_info_list * tdmr_list)608 static void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list)
609 {
610 int i;
611
612 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
613 tdmr_free_pamt(tdmr_entry(tdmr_list, i));
614 }
615
616 /* Allocate and set up PAMTs for all TDMRs */
tdmrs_set_up_pamt_all(struct tdmr_info_list * tdmr_list,struct list_head * tmb_list,u16 pamt_entry_size[])617 static int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list,
618 struct list_head *tmb_list,
619 u16 pamt_entry_size[])
620 {
621 int i, ret = 0;
622
623 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
624 ret = tdmr_set_up_pamt(tdmr_entry(tdmr_list, i), tmb_list,
625 pamt_entry_size);
626 if (ret)
627 goto err;
628 }
629
630 return 0;
631 err:
632 tdmrs_free_pamt_all(tdmr_list);
633 return ret;
634 }
635
636 /*
637 * Convert TDX private pages back to normal by using MOVDIR64B to clear these
638 * pages. Typically, any write to the page will convert it from TDX private back
639 * to normal kernel memory. Systems with the X86_BUG_TDX_PW_MCE erratum need to
640 * do the conversion explicitly via MOVDIR64B.
641 */
tdx_quirk_reset_paddr(unsigned long base,unsigned long size)642 static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
643 {
644 const void *zero_page = (const void *)page_address(ZERO_PAGE(0));
645 unsigned long phys, end;
646
647 if (!boot_cpu_has_bug(X86_BUG_TDX_PW_MCE))
648 return;
649
650 end = base + size;
651 for (phys = base; phys < end; phys += 64)
652 movdir64b(__va(phys), zero_page);
653
654 /*
655 * MOVDIR64B uses WC protocol. Use memory barrier to
656 * make sure any later user of these pages sees the
657 * updated data.
658 */
659 mb();
660 }
661
tdx_quirk_reset_page(struct page * page)662 void tdx_quirk_reset_page(struct page *page)
663 {
664 tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE);
665 }
666 EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page);
667
tdmr_quirk_reset_pamt(struct tdmr_info * tdmr)668 static void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr)
669 {
670 tdmr_do_pamt_func(tdmr, tdx_quirk_reset_paddr);
671 }
672
tdmrs_quirk_reset_pamt_all(struct tdmr_info_list * tdmr_list)673 static void tdmrs_quirk_reset_pamt_all(struct tdmr_info_list *tdmr_list)
674 {
675 int i;
676
677 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
678 tdmr_quirk_reset_pamt(tdmr_entry(tdmr_list, i));
679 }
680
tdmrs_count_pamt_kb(struct tdmr_info_list * tdmr_list)681 static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list)
682 {
683 unsigned long pamt_size = 0;
684 int i;
685
686 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
687 unsigned long base, size;
688
689 tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size);
690 pamt_size += size;
691 }
692
693 return pamt_size / 1024;
694 }
695
tdmr_add_rsvd_area(struct tdmr_info * tdmr,int * p_idx,u64 addr,u64 size,u16 max_reserved_per_tdmr)696 static int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx, u64 addr,
697 u64 size, u16 max_reserved_per_tdmr)
698 {
699 struct tdmr_reserved_area *rsvd_areas = tdmr->reserved_areas;
700 int idx = *p_idx;
701
702 /* Reserved area must be 4K aligned in offset and size */
703 if (WARN_ON(addr & ~PAGE_MASK || size & ~PAGE_MASK))
704 return -EINVAL;
705
706 if (idx >= max_reserved_per_tdmr) {
707 pr_warn("initialization failed: TDMR [0x%llx, 0x%llx): reserved areas exhausted.\n",
708 tdmr->base, tdmr_end(tdmr));
709 return -ENOSPC;
710 }
711
712 /*
713 * Consume one reserved area per call. Make no effort to
714 * optimize or reduce the number of reserved areas which are
715 * consumed by contiguous reserved areas, for instance.
716 */
717 rsvd_areas[idx].offset = addr - tdmr->base;
718 rsvd_areas[idx].size = size;
719
720 *p_idx = idx + 1;
721
722 return 0;
723 }
724
725 /*
726 * Go through @tmb_list to find holes between memory areas. If any of
727 * those holes fall within @tdmr, set up a TDMR reserved area to cover
728 * the hole.
729 */
tdmr_populate_rsvd_holes(struct list_head * tmb_list,struct tdmr_info * tdmr,int * rsvd_idx,u16 max_reserved_per_tdmr)730 static int tdmr_populate_rsvd_holes(struct list_head *tmb_list,
731 struct tdmr_info *tdmr,
732 int *rsvd_idx,
733 u16 max_reserved_per_tdmr)
734 {
735 struct tdx_memblock *tmb;
736 u64 prev_end;
737 int ret;
738
739 /*
740 * Start looking for reserved blocks at the
741 * beginning of the TDMR.
742 */
743 prev_end = tdmr->base;
744 list_for_each_entry(tmb, tmb_list, list) {
745 u64 start, end;
746
747 start = PFN_PHYS(tmb->start_pfn);
748 end = PFN_PHYS(tmb->end_pfn);
749
750 /* Break if this region is after the TDMR */
751 if (start >= tdmr_end(tdmr))
752 break;
753
754 /* Exclude regions before this TDMR */
755 if (end < tdmr->base)
756 continue;
757
758 /*
759 * Skip over memory areas that
760 * have already been dealt with.
761 */
762 if (start <= prev_end) {
763 prev_end = end;
764 continue;
765 }
766
767 /* Add the hole before this region */
768 ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end,
769 start - prev_end,
770 max_reserved_per_tdmr);
771 if (ret)
772 return ret;
773
774 prev_end = end;
775 }
776
777 /* Add the hole after the last region if it exists. */
778 if (prev_end < tdmr_end(tdmr)) {
779 ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end,
780 tdmr_end(tdmr) - prev_end,
781 max_reserved_per_tdmr);
782 if (ret)
783 return ret;
784 }
785
786 return 0;
787 }
788
789 /*
790 * Go through @tdmr_list to find all PAMTs. If any of those PAMTs
791 * overlaps with @tdmr, set up a TDMR reserved area to cover the
792 * overlapping part.
793 */
tdmr_populate_rsvd_pamts(struct tdmr_info_list * tdmr_list,struct tdmr_info * tdmr,int * rsvd_idx,u16 max_reserved_per_tdmr)794 static int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list,
795 struct tdmr_info *tdmr,
796 int *rsvd_idx,
797 u16 max_reserved_per_tdmr)
798 {
799 int i, ret;
800
801 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
802 struct tdmr_info *tmp = tdmr_entry(tdmr_list, i);
803 unsigned long pamt_base, pamt_size, pamt_end;
804
805 tdmr_get_pamt(tmp, &pamt_base, &pamt_size);
806 /* Each TDMR must already have PAMT allocated */
807 WARN_ON_ONCE(!pamt_size || !pamt_base);
808
809 pamt_end = pamt_base + pamt_size;
810 /* Skip PAMTs outside of the given TDMR */
811 if ((pamt_end <= tdmr->base) ||
812 (pamt_base >= tdmr_end(tdmr)))
813 continue;
814
815 /* Only mark the part within the TDMR as reserved */
816 if (pamt_base < tdmr->base)
817 pamt_base = tdmr->base;
818 if (pamt_end > tdmr_end(tdmr))
819 pamt_end = tdmr_end(tdmr);
820
821 ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, pamt_base,
822 pamt_end - pamt_base,
823 max_reserved_per_tdmr);
824 if (ret)
825 return ret;
826 }
827
828 return 0;
829 }
830
831 /* Compare function called by sort() for TDMR reserved areas */
rsvd_area_cmp_func(const void * a,const void * b)832 static int rsvd_area_cmp_func(const void *a, const void *b)
833 {
834 struct tdmr_reserved_area *r1 = (struct tdmr_reserved_area *)a;
835 struct tdmr_reserved_area *r2 = (struct tdmr_reserved_area *)b;
836
837 if (r1->offset + r1->size <= r2->offset)
838 return -1;
839 if (r1->offset >= r2->offset + r2->size)
840 return 1;
841
842 /* Reserved areas cannot overlap. The caller must guarantee. */
843 WARN_ON_ONCE(1);
844 return -1;
845 }
846
847 /*
848 * Populate reserved areas for the given @tdmr, including memory holes
849 * (via @tmb_list) and PAMTs (via @tdmr_list).
850 */
tdmr_populate_rsvd_areas(struct tdmr_info * tdmr,struct list_head * tmb_list,struct tdmr_info_list * tdmr_list,u16 max_reserved_per_tdmr)851 static int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr,
852 struct list_head *tmb_list,
853 struct tdmr_info_list *tdmr_list,
854 u16 max_reserved_per_tdmr)
855 {
856 int ret, rsvd_idx = 0;
857
858 ret = tdmr_populate_rsvd_holes(tmb_list, tdmr, &rsvd_idx,
859 max_reserved_per_tdmr);
860 if (ret)
861 return ret;
862
863 ret = tdmr_populate_rsvd_pamts(tdmr_list, tdmr, &rsvd_idx,
864 max_reserved_per_tdmr);
865 if (ret)
866 return ret;
867
868 /* TDX requires reserved areas listed in address ascending order */
869 sort(tdmr->reserved_areas, rsvd_idx, sizeof(struct tdmr_reserved_area),
870 rsvd_area_cmp_func, NULL);
871
872 return 0;
873 }
874
875 /*
876 * Populate reserved areas for all TDMRs in @tdmr_list, including memory
877 * holes (via @tmb_list) and PAMTs.
878 */
tdmrs_populate_rsvd_areas_all(struct tdmr_info_list * tdmr_list,struct list_head * tmb_list,u16 max_reserved_per_tdmr)879 static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list,
880 struct list_head *tmb_list,
881 u16 max_reserved_per_tdmr)
882 {
883 int i;
884
885 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
886 int ret;
887
888 ret = tdmr_populate_rsvd_areas(tdmr_entry(tdmr_list, i),
889 tmb_list, tdmr_list, max_reserved_per_tdmr);
890 if (ret)
891 return ret;
892 }
893
894 return 0;
895 }
896
897 /*
898 * Construct a list of TDMRs on the preallocated space in @tdmr_list
899 * to cover all TDX memory regions in @tmb_list based on the TDX module
900 * TDMR global information in @sysinfo_tdmr.
901 */
construct_tdmrs(struct list_head * tmb_list,struct tdmr_info_list * tdmr_list,struct tdx_sys_info_tdmr * sysinfo_tdmr)902 static int construct_tdmrs(struct list_head *tmb_list,
903 struct tdmr_info_list *tdmr_list,
904 struct tdx_sys_info_tdmr *sysinfo_tdmr)
905 {
906 u16 pamt_entry_size[TDX_PS_NR] = {
907 sysinfo_tdmr->pamt_4k_entry_size,
908 sysinfo_tdmr->pamt_2m_entry_size,
909 sysinfo_tdmr->pamt_1g_entry_size,
910 };
911 int ret;
912
913 ret = fill_out_tdmrs(tmb_list, tdmr_list);
914 if (ret)
915 return ret;
916
917 ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, pamt_entry_size);
918 if (ret)
919 return ret;
920
921 ret = tdmrs_populate_rsvd_areas_all(tdmr_list, tmb_list,
922 sysinfo_tdmr->max_reserved_per_tdmr);
923 if (ret)
924 tdmrs_free_pamt_all(tdmr_list);
925
926 /*
927 * The tdmr_info_list is read-only from here on out.
928 * Ensure that these writes are seen by other CPUs.
929 * Pairs with a smp_rmb() in is_pamt_page().
930 */
931 smp_wmb();
932
933 return ret;
934 }
935
config_tdx_module(struct tdmr_info_list * tdmr_list,u64 global_keyid)936 static int config_tdx_module(struct tdmr_info_list *tdmr_list, u64 global_keyid)
937 {
938 struct tdx_module_args args = {};
939 u64 *tdmr_pa_array;
940 size_t array_sz;
941 int i, ret;
942
943 /*
944 * TDMRs are passed to the TDX module via an array of physical
945 * addresses of each TDMR. The array itself also has certain
946 * alignment requirement.
947 */
948 array_sz = tdmr_list->nr_consumed_tdmrs * sizeof(u64);
949 array_sz = roundup_pow_of_two(array_sz);
950 if (array_sz < TDMR_INFO_PA_ARRAY_ALIGNMENT)
951 array_sz = TDMR_INFO_PA_ARRAY_ALIGNMENT;
952
953 tdmr_pa_array = kzalloc(array_sz, GFP_KERNEL);
954 if (!tdmr_pa_array)
955 return -ENOMEM;
956
957 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
958 tdmr_pa_array[i] = __pa(tdmr_entry(tdmr_list, i));
959
960 args.rcx = __pa(tdmr_pa_array);
961 args.rdx = tdmr_list->nr_consumed_tdmrs;
962 args.r8 = global_keyid;
963 ret = seamcall_prerr(TDH_SYS_CONFIG, &args);
964
965 /* Free the array as it is not required anymore. */
966 kfree(tdmr_pa_array);
967
968 return ret;
969 }
970
do_global_key_config(void * unused)971 static int do_global_key_config(void *unused)
972 {
973 struct tdx_module_args args = {};
974
975 return seamcall_prerr(TDH_SYS_KEY_CONFIG, &args);
976 }
977
978 /*
979 * Attempt to configure the global KeyID on all physical packages.
980 *
981 * This requires running code on at least one CPU in each package.
982 * TDMR initialization) will fail will fail if any package in the
983 * system has no online CPUs.
984 *
985 * This code takes no affirmative steps to online CPUs. Callers (aka.
986 * KVM) can ensure success by ensuring sufficient CPUs are online and
987 * can run SEAMCALLs.
988 */
config_global_keyid(void)989 static int config_global_keyid(void)
990 {
991 cpumask_var_t packages;
992 int cpu, ret = -EINVAL;
993
994 if (!zalloc_cpumask_var(&packages, GFP_KERNEL))
995 return -ENOMEM;
996
997 /*
998 * Hardware doesn't guarantee cache coherency across different
999 * KeyIDs. The kernel needs to flush PAMT's dirty cachelines
1000 * (associated with KeyID 0) before the TDX module can use the
1001 * global KeyID to access the PAMT. Given PAMTs are potentially
1002 * large (~1/256th of system RAM), just use WBINVD.
1003 */
1004 wbinvd_on_all_cpus();
1005
1006 for_each_online_cpu(cpu) {
1007 /*
1008 * The key configuration only needs to be done once per
1009 * package and will return an error if configured more
1010 * than once. Avoid doing it multiple times per package.
1011 */
1012 if (cpumask_test_and_set_cpu(topology_physical_package_id(cpu),
1013 packages))
1014 continue;
1015
1016 /*
1017 * TDH.SYS.KEY.CONFIG cannot run concurrently on
1018 * different cpus. Do it one by one.
1019 */
1020 ret = smp_call_on_cpu(cpu, do_global_key_config, NULL, true);
1021 if (ret)
1022 break;
1023 }
1024
1025 free_cpumask_var(packages);
1026 return ret;
1027 }
1028
init_tdmr(struct tdmr_info * tdmr)1029 static int init_tdmr(struct tdmr_info *tdmr)
1030 {
1031 u64 next;
1032
1033 /*
1034 * Initializing a TDMR can be time consuming. To avoid long
1035 * SEAMCALLs, the TDX module may only initialize a part of the
1036 * TDMR in each call.
1037 */
1038 do {
1039 struct tdx_module_args args = {
1040 .rcx = tdmr->base,
1041 };
1042 int ret;
1043
1044 ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args);
1045 if (ret)
1046 return ret;
1047 /*
1048 * RDX contains 'next-to-initialize' address if
1049 * TDH.SYS.TDMR.INIT did not fully complete and
1050 * should be retried.
1051 */
1052 next = args.rdx;
1053 cond_resched();
1054 /* Keep making SEAMCALLs until the TDMR is done */
1055 } while (next < tdmr->base + tdmr->size);
1056
1057 return 0;
1058 }
1059
init_tdmrs(struct tdmr_info_list * tdmr_list)1060 static int init_tdmrs(struct tdmr_info_list *tdmr_list)
1061 {
1062 int i;
1063
1064 /*
1065 * This operation is costly. It can be parallelized,
1066 * but keep it simple for now.
1067 */
1068 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
1069 int ret;
1070
1071 ret = init_tdmr(tdmr_entry(tdmr_list, i));
1072 if (ret)
1073 return ret;
1074 }
1075
1076 return 0;
1077 }
1078
init_tdx_module(void)1079 static int init_tdx_module(void)
1080 {
1081 int ret;
1082
1083 ret = get_tdx_sys_info(&tdx_sysinfo);
1084 if (ret)
1085 return ret;
1086
1087 /* Check whether the kernel can support this module */
1088 ret = check_features(&tdx_sysinfo);
1089 if (ret)
1090 return ret;
1091
1092 /*
1093 * To keep things simple, assume that all TDX-protected memory
1094 * will come from the page allocator. Make sure all pages in the
1095 * page allocator are TDX-usable memory.
1096 *
1097 * Build the list of "TDX-usable" memory regions which cover all
1098 * pages in the page allocator to guarantee that. Do it while
1099 * holding mem_hotplug_lock read-lock as the memory hotplug code
1100 * path reads the @tdx_memlist to reject any new memory.
1101 */
1102 get_online_mems();
1103
1104 ret = build_tdx_memlist(&tdx_memlist);
1105 if (ret)
1106 goto out_put_tdxmem;
1107
1108 /* Allocate enough space for constructing TDMRs */
1109 ret = alloc_tdmr_list(&tdx_tdmr_list, &tdx_sysinfo.tdmr);
1110 if (ret)
1111 goto err_free_tdxmem;
1112
1113 /* Cover all TDX-usable memory regions in TDMRs */
1114 ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdx_sysinfo.tdmr);
1115 if (ret)
1116 goto err_free_tdmrs;
1117
1118 /* Pass the TDMRs and the global KeyID to the TDX module */
1119 ret = config_tdx_module(&tdx_tdmr_list, tdx_global_keyid);
1120 if (ret)
1121 goto err_free_pamts;
1122
1123 /* Config the key of global KeyID on all packages */
1124 ret = config_global_keyid();
1125 if (ret)
1126 goto err_reset_pamts;
1127
1128 /* Initialize TDMRs to complete the TDX module initialization */
1129 ret = init_tdmrs(&tdx_tdmr_list);
1130 if (ret)
1131 goto err_reset_pamts;
1132
1133 pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list));
1134
1135 out_put_tdxmem:
1136 /*
1137 * @tdx_memlist is written here and read at memory hotplug time.
1138 * Lock out memory hotplug code while building it.
1139 */
1140 put_online_mems();
1141 return ret;
1142
1143 err_reset_pamts:
1144 /*
1145 * Part of PAMTs may already have been initialized by the
1146 * TDX module. Flush cache before returning PAMTs back
1147 * to the kernel.
1148 */
1149 wbinvd_on_all_cpus();
1150 tdmrs_quirk_reset_pamt_all(&tdx_tdmr_list);
1151 err_free_pamts:
1152 tdmrs_free_pamt_all(&tdx_tdmr_list);
1153 err_free_tdmrs:
1154 free_tdmr_list(&tdx_tdmr_list);
1155 err_free_tdxmem:
1156 free_tdx_memlist(&tdx_memlist);
1157 goto out_put_tdxmem;
1158 }
1159
__tdx_enable(void)1160 static int __tdx_enable(void)
1161 {
1162 int ret;
1163
1164 ret = init_tdx_module();
1165 if (ret) {
1166 pr_err("module initialization failed (%d)\n", ret);
1167 tdx_module_status = TDX_MODULE_ERROR;
1168 return ret;
1169 }
1170
1171 pr_info("module initialized\n");
1172 tdx_module_status = TDX_MODULE_INITIALIZED;
1173
1174 return 0;
1175 }
1176
1177 /**
1178 * tdx_enable - Enable TDX module to make it ready to run TDX guests
1179 *
1180 * This function assumes the caller has: 1) held read lock of CPU hotplug
1181 * lock to prevent any new cpu from becoming online; 2) done both VMXON
1182 * and tdx_cpu_enable() on all online cpus.
1183 *
1184 * This function requires there's at least one online cpu for each CPU
1185 * package to succeed.
1186 *
1187 * This function can be called in parallel by multiple callers.
1188 *
1189 * Return 0 if TDX is enabled successfully, otherwise error.
1190 */
tdx_enable(void)1191 int tdx_enable(void)
1192 {
1193 int ret;
1194
1195 if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
1196 return -ENODEV;
1197
1198 lockdep_assert_cpus_held();
1199
1200 mutex_lock(&tdx_module_lock);
1201
1202 switch (tdx_module_status) {
1203 case TDX_MODULE_UNINITIALIZED:
1204 ret = __tdx_enable();
1205 break;
1206 case TDX_MODULE_INITIALIZED:
1207 /* Already initialized, great, tell the caller. */
1208 ret = 0;
1209 break;
1210 default:
1211 /* Failed to initialize in the previous attempts */
1212 ret = -EINVAL;
1213 break;
1214 }
1215
1216 mutex_unlock(&tdx_module_lock);
1217
1218 return ret;
1219 }
1220 EXPORT_SYMBOL_FOR_KVM(tdx_enable);
1221
is_pamt_page(unsigned long phys)1222 static bool is_pamt_page(unsigned long phys)
1223 {
1224 struct tdmr_info_list *tdmr_list = &tdx_tdmr_list;
1225 int i;
1226
1227 /* Ensure that all remote 'tdmr_list' writes are visible: */
1228 smp_rmb();
1229
1230 /*
1231 * The TDX module is no longer returning TDX_SYS_NOT_READY and
1232 * is initialized. The 'tdmr_list' was initialized long ago
1233 * and is now read-only.
1234 */
1235 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
1236 unsigned long base, size;
1237
1238 tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size);
1239
1240 if (phys >= base && phys < (base + size))
1241 return true;
1242 }
1243
1244 return false;
1245 }
1246
1247 /*
1248 * Return whether the memory page at the given physical address is TDX
1249 * private memory or not.
1250 *
1251 * This can be imprecise for two known reasons:
1252 * 1. PAMTs are private memory and exist before the TDX module is
1253 * ready and TDH_PHYMEM_PAGE_RDMD works. This is a relatively
1254 * short window that occurs once per boot.
1255 * 2. TDH_PHYMEM_PAGE_RDMD reflects the TDX module's knowledge of the
1256 * page. However, the page can still cause #MC until it has been
1257 * fully converted to shared using 64-byte writes like MOVDIR64B.
1258 * Buggy hosts might still leave #MC-causing memory in place which
1259 * this function can not detect.
1260 */
paddr_is_tdx_private(unsigned long phys)1261 static bool paddr_is_tdx_private(unsigned long phys)
1262 {
1263 struct tdx_module_args args = {
1264 .rcx = phys & PAGE_MASK,
1265 };
1266 u64 sret;
1267
1268 if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
1269 return false;
1270
1271 /* Get page type from the TDX module */
1272 sret = __seamcall_dirty_cache(__seamcall_ret, TDH_PHYMEM_PAGE_RDMD, &args);
1273
1274 /*
1275 * The SEAMCALL will not return success unless there is a
1276 * working, "ready" TDX module. Assume an absence of TDX
1277 * private pages until SEAMCALL is working.
1278 */
1279 if (sret)
1280 return false;
1281
1282 /*
1283 * SEAMCALL was successful -- read page type (via RCX):
1284 *
1285 * - PT_NDA: Page is not used by the TDX module
1286 * - PT_RSVD: Reserved for Non-TDX use
1287 * - Others: Page is used by the TDX module
1288 *
1289 * Note PAMT pages are marked as PT_RSVD but they are also TDX
1290 * private memory.
1291 */
1292 switch (args.rcx) {
1293 case PT_NDA:
1294 return false;
1295 case PT_RSVD:
1296 return is_pamt_page(phys);
1297 default:
1298 return true;
1299 }
1300 }
1301
1302 /*
1303 * Some TDX-capable CPUs have an erratum. A write to TDX private
1304 * memory poisons that memory, and a subsequent read of that memory
1305 * triggers #MC.
1306 *
1307 * Help distinguish erratum-triggered #MCs from a normal hardware one.
1308 * Just print additional message to show such #MC may be result of the
1309 * erratum.
1310 */
tdx_dump_mce_info(struct mce * m)1311 const char *tdx_dump_mce_info(struct mce *m)
1312 {
1313 if (!m || !mce_is_memory_error(m) || !mce_usable_address(m))
1314 return NULL;
1315
1316 if (!paddr_is_tdx_private(m->addr))
1317 return NULL;
1318
1319 return "TDX private memory error. Possible kernel bug.";
1320 }
1321
record_keyid_partitioning(u32 * tdx_keyid_start,u32 * nr_tdx_keyids)1322 static __init int record_keyid_partitioning(u32 *tdx_keyid_start,
1323 u32 *nr_tdx_keyids)
1324 {
1325 u32 _nr_mktme_keyids, _tdx_keyid_start, _nr_tdx_keyids;
1326 int ret;
1327
1328 /*
1329 * IA32_MKTME_KEYID_PARTIONING:
1330 * Bit [31:0]: Number of MKTME KeyIDs.
1331 * Bit [63:32]: Number of TDX private KeyIDs.
1332 */
1333 ret = rdmsr_safe(MSR_IA32_MKTME_KEYID_PARTITIONING, &_nr_mktme_keyids,
1334 &_nr_tdx_keyids);
1335 if (ret || !_nr_tdx_keyids)
1336 return -EINVAL;
1337
1338 /* TDX KeyIDs start after the last MKTME KeyID. */
1339 _tdx_keyid_start = _nr_mktme_keyids + 1;
1340
1341 *tdx_keyid_start = _tdx_keyid_start;
1342 *nr_tdx_keyids = _nr_tdx_keyids;
1343
1344 return 0;
1345 }
1346
is_tdx_memory(unsigned long start_pfn,unsigned long end_pfn)1347 static bool is_tdx_memory(unsigned long start_pfn, unsigned long end_pfn)
1348 {
1349 struct tdx_memblock *tmb;
1350
1351 /*
1352 * This check assumes that the start_pfn<->end_pfn range does not
1353 * cross multiple @tdx_memlist entries. A single memory online
1354 * event across multiple memblocks (from which @tdx_memlist
1355 * entries are derived at the time of module initialization) is
1356 * not possible. This is because memory offline/online is done
1357 * on granularity of 'struct memory_block', and the hotpluggable
1358 * memory region (one memblock) must be multiple of memory_block.
1359 */
1360 list_for_each_entry(tmb, &tdx_memlist, list) {
1361 if (start_pfn >= tmb->start_pfn && end_pfn <= tmb->end_pfn)
1362 return true;
1363 }
1364 return false;
1365 }
1366
tdx_memory_notifier(struct notifier_block * nb,unsigned long action,void * v)1367 static int tdx_memory_notifier(struct notifier_block *nb, unsigned long action,
1368 void *v)
1369 {
1370 struct memory_notify *mn = v;
1371
1372 if (action != MEM_GOING_ONLINE)
1373 return NOTIFY_OK;
1374
1375 /*
1376 * Empty list means TDX isn't enabled. Allow any memory
1377 * to go online.
1378 */
1379 if (list_empty(&tdx_memlist))
1380 return NOTIFY_OK;
1381
1382 /*
1383 * The TDX memory configuration is static and can not be
1384 * changed. Reject onlining any memory which is outside of
1385 * the static configuration whether it supports TDX or not.
1386 */
1387 if (is_tdx_memory(mn->start_pfn, mn->start_pfn + mn->nr_pages))
1388 return NOTIFY_OK;
1389
1390 return NOTIFY_BAD;
1391 }
1392
1393 static struct notifier_block tdx_memory_nb = {
1394 .notifier_call = tdx_memory_notifier,
1395 };
1396
check_tdx_erratum(void)1397 static void __init check_tdx_erratum(void)
1398 {
1399 /*
1400 * These CPUs have an erratum. A partial write from non-TD
1401 * software (e.g. via MOVNTI variants or UC/WC mapping) to TDX
1402 * private memory poisons that memory, and a subsequent read of
1403 * that memory triggers #MC.
1404 */
1405 switch (boot_cpu_data.x86_vfm) {
1406 case INTEL_SAPPHIRERAPIDS_X:
1407 case INTEL_EMERALDRAPIDS_X:
1408 setup_force_cpu_bug(X86_BUG_TDX_PW_MCE);
1409 }
1410 }
1411
tdx_init(void)1412 void __init tdx_init(void)
1413 {
1414 u32 tdx_keyid_start, nr_tdx_keyids;
1415 int err;
1416
1417 err = record_keyid_partitioning(&tdx_keyid_start, &nr_tdx_keyids);
1418 if (err)
1419 return;
1420
1421 pr_info("BIOS enabled: private KeyID range [%u, %u)\n",
1422 tdx_keyid_start, tdx_keyid_start + nr_tdx_keyids);
1423
1424 /*
1425 * The TDX module itself requires one 'global KeyID' to protect
1426 * its metadata. If there's only one TDX KeyID, there won't be
1427 * any left for TDX guests thus there's no point to enable TDX
1428 * at all.
1429 */
1430 if (nr_tdx_keyids < 2) {
1431 pr_err("initialization failed: too few private KeyIDs available.\n");
1432 return;
1433 }
1434
1435 /*
1436 * At this point, hibernation_available() indicates whether or
1437 * not hibernation support has been permanently disabled.
1438 */
1439 if (hibernation_available()) {
1440 pr_err("initialization failed: Hibernation support is enabled\n");
1441 return;
1442 }
1443
1444 err = register_memory_notifier(&tdx_memory_nb);
1445 if (err) {
1446 pr_err("initialization failed: register_memory_notifier() failed (%d)\n",
1447 err);
1448 return;
1449 }
1450
1451 #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
1452 pr_info("Disable ACPI S3. Turn off TDX in the BIOS to use ACPI S3.\n");
1453 acpi_suspend_lowlevel = NULL;
1454 #endif
1455
1456 /*
1457 * Just use the first TDX KeyID as the 'global KeyID' and
1458 * leave the rest for TDX guests.
1459 */
1460 tdx_global_keyid = tdx_keyid_start;
1461 tdx_guest_keyid_start = tdx_keyid_start + 1;
1462 tdx_nr_guest_keyids = nr_tdx_keyids - 1;
1463
1464 setup_force_cpu_cap(X86_FEATURE_TDX_HOST_PLATFORM);
1465
1466 check_tdx_erratum();
1467 }
1468
tdx_get_sysinfo(void)1469 const struct tdx_sys_info *tdx_get_sysinfo(void)
1470 {
1471 const struct tdx_sys_info *p = NULL;
1472
1473 /* Make sure all fields in @tdx_sysinfo have been populated */
1474 mutex_lock(&tdx_module_lock);
1475 if (tdx_module_status == TDX_MODULE_INITIALIZED)
1476 p = (const struct tdx_sys_info *)&tdx_sysinfo;
1477 mutex_unlock(&tdx_module_lock);
1478
1479 return p;
1480 }
1481 EXPORT_SYMBOL_FOR_KVM(tdx_get_sysinfo);
1482
tdx_get_nr_guest_keyids(void)1483 u32 tdx_get_nr_guest_keyids(void)
1484 {
1485 return tdx_nr_guest_keyids;
1486 }
1487 EXPORT_SYMBOL_FOR_KVM(tdx_get_nr_guest_keyids);
1488
tdx_guest_keyid_alloc(void)1489 int tdx_guest_keyid_alloc(void)
1490 {
1491 return ida_alloc_range(&tdx_guest_keyid_pool, tdx_guest_keyid_start,
1492 tdx_guest_keyid_start + tdx_nr_guest_keyids - 1,
1493 GFP_KERNEL);
1494 }
1495 EXPORT_SYMBOL_FOR_KVM(tdx_guest_keyid_alloc);
1496
tdx_guest_keyid_free(unsigned int keyid)1497 void tdx_guest_keyid_free(unsigned int keyid)
1498 {
1499 ida_free(&tdx_guest_keyid_pool, keyid);
1500 }
1501 EXPORT_SYMBOL_FOR_KVM(tdx_guest_keyid_free);
1502
tdx_tdr_pa(struct tdx_td * td)1503 static inline u64 tdx_tdr_pa(struct tdx_td *td)
1504 {
1505 return page_to_phys(td->tdr_page);
1506 }
1507
1508 /*
1509 * The TDX module exposes a CLFLUSH_BEFORE_ALLOC bit to specify whether
1510 * a CLFLUSH of pages is required before handing them to the TDX module.
1511 * Be conservative and make the code simpler by doing the CLFLUSH
1512 * unconditionally.
1513 */
tdx_clflush_page(struct page * page)1514 static void tdx_clflush_page(struct page *page)
1515 {
1516 clflush_cache_range(page_to_virt(page), PAGE_SIZE);
1517 }
1518
tdh_vp_enter(struct tdx_vp * td,struct tdx_module_args * args)1519 noinstr u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args)
1520 {
1521 args->rcx = td->tdvpr_pa;
1522
1523 return __seamcall_dirty_cache(__seamcall_saved_ret, TDH_VP_ENTER, args);
1524 }
1525 EXPORT_SYMBOL_FOR_KVM(tdh_vp_enter);
1526
tdh_mng_addcx(struct tdx_td * td,struct page * tdcs_page)1527 u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page)
1528 {
1529 struct tdx_module_args args = {
1530 .rcx = page_to_phys(tdcs_page),
1531 .rdx = tdx_tdr_pa(td),
1532 };
1533
1534 tdx_clflush_page(tdcs_page);
1535 return seamcall(TDH_MNG_ADDCX, &args);
1536 }
1537 EXPORT_SYMBOL_FOR_KVM(tdh_mng_addcx);
1538
tdh_mem_page_add(struct tdx_td * td,u64 gpa,struct page * page,struct page * source,u64 * ext_err1,u64 * ext_err2)1539 u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2)
1540 {
1541 struct tdx_module_args args = {
1542 .rcx = gpa,
1543 .rdx = tdx_tdr_pa(td),
1544 .r8 = page_to_phys(page),
1545 .r9 = page_to_phys(source),
1546 };
1547 u64 ret;
1548
1549 tdx_clflush_page(page);
1550 ret = seamcall_ret(TDH_MEM_PAGE_ADD, &args);
1551
1552 *ext_err1 = args.rcx;
1553 *ext_err2 = args.rdx;
1554
1555 return ret;
1556 }
1557 EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_add);
1558
tdh_mem_sept_add(struct tdx_td * td,u64 gpa,int level,struct page * page,u64 * ext_err1,u64 * ext_err2)1559 u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
1560 {
1561 struct tdx_module_args args = {
1562 .rcx = gpa | level,
1563 .rdx = tdx_tdr_pa(td),
1564 .r8 = page_to_phys(page),
1565 };
1566 u64 ret;
1567
1568 tdx_clflush_page(page);
1569 ret = seamcall_ret(TDH_MEM_SEPT_ADD, &args);
1570
1571 *ext_err1 = args.rcx;
1572 *ext_err2 = args.rdx;
1573
1574 return ret;
1575 }
1576 EXPORT_SYMBOL_FOR_KVM(tdh_mem_sept_add);
1577
tdh_vp_addcx(struct tdx_vp * vp,struct page * tdcx_page)1578 u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page)
1579 {
1580 struct tdx_module_args args = {
1581 .rcx = page_to_phys(tdcx_page),
1582 .rdx = vp->tdvpr_pa,
1583 };
1584
1585 tdx_clflush_page(tdcx_page);
1586 return seamcall(TDH_VP_ADDCX, &args);
1587 }
1588 EXPORT_SYMBOL_FOR_KVM(tdh_vp_addcx);
1589
tdh_mem_page_aug(struct tdx_td * td,u64 gpa,int level,struct page * page,u64 * ext_err1,u64 * ext_err2)1590 u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
1591 {
1592 struct tdx_module_args args = {
1593 .rcx = gpa | level,
1594 .rdx = tdx_tdr_pa(td),
1595 .r8 = page_to_phys(page),
1596 };
1597 u64 ret;
1598
1599 tdx_clflush_page(page);
1600 ret = seamcall_ret(TDH_MEM_PAGE_AUG, &args);
1601
1602 *ext_err1 = args.rcx;
1603 *ext_err2 = args.rdx;
1604
1605 return ret;
1606 }
1607 EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_aug);
1608
tdh_mem_range_block(struct tdx_td * td,u64 gpa,int level,u64 * ext_err1,u64 * ext_err2)1609 u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2)
1610 {
1611 struct tdx_module_args args = {
1612 .rcx = gpa | level,
1613 .rdx = tdx_tdr_pa(td),
1614 };
1615 u64 ret;
1616
1617 ret = seamcall_ret(TDH_MEM_RANGE_BLOCK, &args);
1618
1619 *ext_err1 = args.rcx;
1620 *ext_err2 = args.rdx;
1621
1622 return ret;
1623 }
1624 EXPORT_SYMBOL_FOR_KVM(tdh_mem_range_block);
1625
tdh_mng_key_config(struct tdx_td * td)1626 u64 tdh_mng_key_config(struct tdx_td *td)
1627 {
1628 struct tdx_module_args args = {
1629 .rcx = tdx_tdr_pa(td),
1630 };
1631
1632 return seamcall(TDH_MNG_KEY_CONFIG, &args);
1633 }
1634 EXPORT_SYMBOL_FOR_KVM(tdh_mng_key_config);
1635
tdh_mng_create(struct tdx_td * td,u16 hkid)1636 u64 tdh_mng_create(struct tdx_td *td, u16 hkid)
1637 {
1638 struct tdx_module_args args = {
1639 .rcx = tdx_tdr_pa(td),
1640 .rdx = hkid,
1641 };
1642
1643 tdx_clflush_page(td->tdr_page);
1644 return seamcall(TDH_MNG_CREATE, &args);
1645 }
1646 EXPORT_SYMBOL_FOR_KVM(tdh_mng_create);
1647
tdh_vp_create(struct tdx_td * td,struct tdx_vp * vp)1648 u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp)
1649 {
1650 struct tdx_module_args args = {
1651 .rcx = vp->tdvpr_pa,
1652 .rdx = tdx_tdr_pa(td),
1653 };
1654
1655 tdx_clflush_page(vp->tdvpr_page);
1656 return seamcall(TDH_VP_CREATE, &args);
1657 }
1658 EXPORT_SYMBOL_FOR_KVM(tdh_vp_create);
1659
tdh_mng_rd(struct tdx_td * td,u64 field,u64 * data)1660 u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data)
1661 {
1662 struct tdx_module_args args = {
1663 .rcx = tdx_tdr_pa(td),
1664 .rdx = field,
1665 };
1666 u64 ret;
1667
1668 ret = seamcall_ret(TDH_MNG_RD, &args);
1669
1670 /* R8: Content of the field, or 0 in case of error. */
1671 *data = args.r8;
1672
1673 return ret;
1674 }
1675 EXPORT_SYMBOL_FOR_KVM(tdh_mng_rd);
1676
tdh_mr_extend(struct tdx_td * td,u64 gpa,u64 * ext_err1,u64 * ext_err2)1677 u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2)
1678 {
1679 struct tdx_module_args args = {
1680 .rcx = gpa,
1681 .rdx = tdx_tdr_pa(td),
1682 };
1683 u64 ret;
1684
1685 ret = seamcall_ret(TDH_MR_EXTEND, &args);
1686
1687 *ext_err1 = args.rcx;
1688 *ext_err2 = args.rdx;
1689
1690 return ret;
1691 }
1692 EXPORT_SYMBOL_FOR_KVM(tdh_mr_extend);
1693
tdh_mr_finalize(struct tdx_td * td)1694 u64 tdh_mr_finalize(struct tdx_td *td)
1695 {
1696 struct tdx_module_args args = {
1697 .rcx = tdx_tdr_pa(td),
1698 };
1699
1700 return seamcall(TDH_MR_FINALIZE, &args);
1701 }
1702 EXPORT_SYMBOL_FOR_KVM(tdh_mr_finalize);
1703
tdh_vp_flush(struct tdx_vp * vp)1704 u64 tdh_vp_flush(struct tdx_vp *vp)
1705 {
1706 struct tdx_module_args args = {
1707 .rcx = vp->tdvpr_pa,
1708 };
1709
1710 return seamcall(TDH_VP_FLUSH, &args);
1711 }
1712 EXPORT_SYMBOL_FOR_KVM(tdh_vp_flush);
1713
tdh_mng_vpflushdone(struct tdx_td * td)1714 u64 tdh_mng_vpflushdone(struct tdx_td *td)
1715 {
1716 struct tdx_module_args args = {
1717 .rcx = tdx_tdr_pa(td),
1718 };
1719
1720 return seamcall(TDH_MNG_VPFLUSHDONE, &args);
1721 }
1722 EXPORT_SYMBOL_FOR_KVM(tdh_mng_vpflushdone);
1723
tdh_mng_key_freeid(struct tdx_td * td)1724 u64 tdh_mng_key_freeid(struct tdx_td *td)
1725 {
1726 struct tdx_module_args args = {
1727 .rcx = tdx_tdr_pa(td),
1728 };
1729
1730 return seamcall(TDH_MNG_KEY_FREEID, &args);
1731 }
1732 EXPORT_SYMBOL_FOR_KVM(tdh_mng_key_freeid);
1733
tdh_mng_init(struct tdx_td * td,u64 td_params,u64 * extended_err)1734 u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err)
1735 {
1736 struct tdx_module_args args = {
1737 .rcx = tdx_tdr_pa(td),
1738 .rdx = td_params,
1739 };
1740 u64 ret;
1741
1742 ret = seamcall_ret(TDH_MNG_INIT, &args);
1743
1744 *extended_err = args.rcx;
1745
1746 return ret;
1747 }
1748 EXPORT_SYMBOL_FOR_KVM(tdh_mng_init);
1749
tdh_vp_rd(struct tdx_vp * vp,u64 field,u64 * data)1750 u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data)
1751 {
1752 struct tdx_module_args args = {
1753 .rcx = vp->tdvpr_pa,
1754 .rdx = field,
1755 };
1756 u64 ret;
1757
1758 ret = seamcall_ret(TDH_VP_RD, &args);
1759
1760 /* R8: Content of the field, or 0 in case of error. */
1761 *data = args.r8;
1762
1763 return ret;
1764 }
1765 EXPORT_SYMBOL_FOR_KVM(tdh_vp_rd);
1766
tdh_vp_wr(struct tdx_vp * vp,u64 field,u64 data,u64 mask)1767 u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask)
1768 {
1769 struct tdx_module_args args = {
1770 .rcx = vp->tdvpr_pa,
1771 .rdx = field,
1772 .r8 = data,
1773 .r9 = mask,
1774 };
1775
1776 return seamcall(TDH_VP_WR, &args);
1777 }
1778 EXPORT_SYMBOL_FOR_KVM(tdh_vp_wr);
1779
tdh_vp_init(struct tdx_vp * vp,u64 initial_rcx,u32 x2apicid)1780 u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid)
1781 {
1782 struct tdx_module_args args = {
1783 .rcx = vp->tdvpr_pa,
1784 .rdx = initial_rcx,
1785 .r8 = x2apicid,
1786 };
1787
1788 /* apicid requires version == 1. */
1789 return seamcall(TDH_VP_INIT | (1ULL << TDX_VERSION_SHIFT), &args);
1790 }
1791 EXPORT_SYMBOL_FOR_KVM(tdh_vp_init);
1792
1793 /*
1794 * TDX ABI defines output operands as PT, OWNER and SIZE. These are TDX defined fomats.
1795 * So despite the names, they must be interpted specially as described by the spec. Return
1796 * them only for error reporting purposes.
1797 */
tdh_phymem_page_reclaim(struct page * page,u64 * tdx_pt,u64 * tdx_owner,u64 * tdx_size)1798 u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size)
1799 {
1800 struct tdx_module_args args = {
1801 .rcx = page_to_phys(page),
1802 };
1803 u64 ret;
1804
1805 ret = seamcall_ret(TDH_PHYMEM_PAGE_RECLAIM, &args);
1806
1807 *tdx_pt = args.rcx;
1808 *tdx_owner = args.rdx;
1809 *tdx_size = args.r8;
1810
1811 return ret;
1812 }
1813 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_reclaim);
1814
tdh_mem_track(struct tdx_td * td)1815 u64 tdh_mem_track(struct tdx_td *td)
1816 {
1817 struct tdx_module_args args = {
1818 .rcx = tdx_tdr_pa(td),
1819 };
1820
1821 return seamcall(TDH_MEM_TRACK, &args);
1822 }
1823 EXPORT_SYMBOL_FOR_KVM(tdh_mem_track);
1824
tdh_mem_page_remove(struct tdx_td * td,u64 gpa,u64 level,u64 * ext_err1,u64 * ext_err2)1825 u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2)
1826 {
1827 struct tdx_module_args args = {
1828 .rcx = gpa | level,
1829 .rdx = tdx_tdr_pa(td),
1830 };
1831 u64 ret;
1832
1833 ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args);
1834
1835 *ext_err1 = args.rcx;
1836 *ext_err2 = args.rdx;
1837
1838 return ret;
1839 }
1840 EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_remove);
1841
tdh_phymem_cache_wb(bool resume)1842 u64 tdh_phymem_cache_wb(bool resume)
1843 {
1844 struct tdx_module_args args = {
1845 .rcx = resume ? 1 : 0,
1846 };
1847
1848 return seamcall(TDH_PHYMEM_CACHE_WB, &args);
1849 }
1850 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_cache_wb);
1851
tdh_phymem_page_wbinvd_tdr(struct tdx_td * td)1852 u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td)
1853 {
1854 struct tdx_module_args args = {};
1855
1856 args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page);
1857
1858 return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
1859 }
1860 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_tdr);
1861
tdh_phymem_page_wbinvd_hkid(u64 hkid,struct page * page)1862 u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
1863 {
1864 struct tdx_module_args args = {};
1865
1866 args.rcx = mk_keyed_paddr(hkid, page);
1867
1868 return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
1869 }
1870 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid);
1871
1872 #ifdef CONFIG_KEXEC_CORE
tdx_cpu_flush_cache_for_kexec(void)1873 void tdx_cpu_flush_cache_for_kexec(void)
1874 {
1875 lockdep_assert_preemption_disabled();
1876
1877 if (!this_cpu_read(cache_state_incoherent))
1878 return;
1879
1880 /*
1881 * Private memory cachelines need to be clean at the time of
1882 * kexec. Write them back now, as the caller promises that
1883 * there should be no more SEAMCALLs on this CPU.
1884 */
1885 wbinvd();
1886 this_cpu_write(cache_state_incoherent, false);
1887 }
1888 EXPORT_SYMBOL_FOR_KVM(tdx_cpu_flush_cache_for_kexec);
1889 #endif
1890