xref: /linux/arch/x86/virt/vmx/tdx/tdx.c (revision fbf5df34a4dbcd09d433dd4f0916bf9b2ddb16de)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(c) 2023 Intel Corporation.
4  *
5  * Intel Trusted Domain Extensions (TDX) support
6  */
7 
8 #include "asm/page_types.h"
9 #define pr_fmt(fmt)	"virt/tdx: " fmt
10 
11 #include <linux/types.h>
12 #include <linux/cache.h>
13 #include <linux/init.h>
14 #include <linux/errno.h>
15 #include <linux/printk.h>
16 #include <linux/cpu.h>
17 #include <linux/spinlock.h>
18 #include <linux/percpu-defs.h>
19 #include <linux/mutex.h>
20 #include <linux/list.h>
21 #include <linux/memblock.h>
22 #include <linux/memory.h>
23 #include <linux/minmax.h>
24 #include <linux/sizes.h>
25 #include <linux/pfn.h>
26 #include <linux/align.h>
27 #include <linux/sort.h>
28 #include <linux/log2.h>
29 #include <linux/acpi.h>
30 #include <linux/suspend.h>
31 #include <linux/syscore_ops.h>
32 #include <linux/idr.h>
33 #include <linux/kvm_types.h>
34 #include <asm/page.h>
35 #include <asm/special_insns.h>
36 #include <asm/msr-index.h>
37 #include <asm/msr.h>
38 #include <asm/cpufeature.h>
39 #include <asm/tdx.h>
40 #include <asm/cpu_device_id.h>
41 #include <asm/processor.h>
42 #include <asm/mce.h>
43 #include <asm/virt.h>
44 #include "tdx.h"
45 
46 static u32 tdx_global_keyid __ro_after_init;
47 static u32 tdx_guest_keyid_start __ro_after_init;
48 static u32 tdx_nr_guest_keyids __ro_after_init;
49 
50 static DEFINE_IDA(tdx_guest_keyid_pool);
51 
52 static DEFINE_PER_CPU(bool, tdx_lp_initialized);
53 
54 static struct tdmr_info_list tdx_tdmr_list;
55 
56 /* All TDX-usable memory regions.  Protected by mem_hotplug_lock. */
57 static LIST_HEAD(tdx_memlist);
58 
59 static struct tdx_sys_info tdx_sysinfo __ro_after_init;
60 static bool tdx_module_initialized __ro_after_init;
61 
62 typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
63 
64 static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args)
65 {
66 	pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err);
67 }
68 
69 static inline void seamcall_err_ret(u64 fn, u64 err,
70 				    struct tdx_module_args *args)
71 {
72 	seamcall_err(fn, err, args);
73 	pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n",
74 			args->rcx, args->rdx, args->r8);
75 	pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n",
76 			args->r9, args->r10, args->r11);
77 }
78 
79 static __always_inline int sc_retry_prerr(sc_func_t func,
80 					  sc_err_func_t err_func,
81 					  u64 fn, struct tdx_module_args *args)
82 {
83 	u64 sret = sc_retry(func, fn, args);
84 
85 	if (sret == TDX_SUCCESS)
86 		return 0;
87 
88 	if (sret == TDX_SEAMCALL_VMFAILINVALID)
89 		return -ENODEV;
90 
91 	if (sret == TDX_SEAMCALL_GP)
92 		return -EOPNOTSUPP;
93 
94 	if (sret == TDX_SEAMCALL_UD)
95 		return -EACCES;
96 
97 	err_func(fn, sret, args);
98 	return -EIO;
99 }
100 
101 #define seamcall_prerr(__fn, __args)						\
102 	sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args))
103 
104 #define seamcall_prerr_ret(__fn, __args)					\
105 	sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args))
106 
107 /*
108  * Do the module global initialization once and return its result.
109  * It can be done on any cpu, and from task or IRQ context.
110  */
111 static int try_init_module_global(void)
112 {
113 	struct tdx_module_args args = {};
114 	static DEFINE_RAW_SPINLOCK(sysinit_lock);
115 	static bool sysinit_done;
116 	static int sysinit_ret;
117 
118 	raw_spin_lock(&sysinit_lock);
119 
120 	if (sysinit_done)
121 		goto out;
122 
123 	/* RCX is module attributes and all bits are reserved */
124 	args.rcx = 0;
125 	sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args);
126 
127 	/*
128 	 * The first SEAMCALL also detects the TDX module, thus
129 	 * it can fail due to the TDX module is not loaded.
130 	 * Dump message to let the user know.
131 	 */
132 	if (sysinit_ret == -ENODEV)
133 		pr_err("module not loaded\n");
134 
135 	sysinit_done = true;
136 out:
137 	raw_spin_unlock(&sysinit_lock);
138 	return sysinit_ret;
139 }
140 
141 /**
142  * Enable VMXON and then do one-time TDX module per-cpu initialization SEAMCALL
143  * (and TDX module global initialization SEAMCALL if not done) on local cpu to
144  * make this cpu be ready to run any other SEAMCALLs.
145  */
146 static int tdx_cpu_enable(void)
147 {
148 	struct tdx_module_args args = {};
149 	int ret;
150 
151 	if (__this_cpu_read(tdx_lp_initialized))
152 		return 0;
153 
154 	/*
155 	 * The TDX module global initialization is the very first step
156 	 * to enable TDX.  Need to do it first (if hasn't been done)
157 	 * before the per-cpu initialization.
158 	 */
159 	ret = try_init_module_global();
160 	if (ret)
161 		return ret;
162 
163 	ret = seamcall_prerr(TDH_SYS_LP_INIT, &args);
164 	if (ret)
165 		return ret;
166 
167 	__this_cpu_write(tdx_lp_initialized, true);
168 
169 	return 0;
170 }
171 
172 static int tdx_online_cpu(unsigned int cpu)
173 {
174 	int ret;
175 
176 	ret = x86_virt_get_ref(X86_FEATURE_VMX);
177 	if (ret)
178 		return ret;
179 
180 	ret = tdx_cpu_enable();
181 	if (ret)
182 		x86_virt_put_ref(X86_FEATURE_VMX);
183 
184 	return ret;
185 }
186 
187 static int tdx_offline_cpu(unsigned int cpu)
188 {
189 	int i;
190 
191 	/* No TD is running.  Allow any cpu to be offline. */
192 	if (ida_is_empty(&tdx_guest_keyid_pool))
193 		goto done;
194 
195 	/*
196 	 * In order to reclaim TDX HKID, (i.e. when deleting guest TD), need to
197 	 * call TDH.PHYMEM.PAGE.WBINVD on all packages to program all memory
198 	 * controller with pconfig.  If we have active TDX HKID, refuse to
199 	 * offline the last online cpu.
200 	 */
201 	for_each_online_cpu(i) {
202 		/*
203 		 * Found another online cpu on the same package.
204 		 * Allow to offline.
205 		 */
206 		if (i != cpu && topology_physical_package_id(i) ==
207 				topology_physical_package_id(cpu))
208 			goto done;
209 	}
210 
211 	/*
212 	 * This is the last cpu of this package.  Don't offline it.
213 	 *
214 	 * Because it's hard for human operator to understand the
215 	 * reason, warn it.
216 	 */
217 #define MSG_ALLPKG_ONLINE \
218 	"TDX requires all packages to have an online CPU. Delete all TDs in order to offline all CPUs of a package.\n"
219 	pr_warn_ratelimited(MSG_ALLPKG_ONLINE);
220 	return -EBUSY;
221 
222 done:
223 	x86_virt_put_ref(X86_FEATURE_VMX);
224 	return 0;
225 }
226 
227 static void tdx_shutdown_cpu(void *ign)
228 {
229 	x86_virt_put_ref(X86_FEATURE_VMX);
230 }
231 
232 static void tdx_shutdown(void *ign)
233 {
234 	on_each_cpu(tdx_shutdown_cpu, NULL, 1);
235 }
236 
237 static int tdx_suspend(void *ign)
238 {
239 	x86_virt_put_ref(X86_FEATURE_VMX);
240 	return 0;
241 }
242 
243 static void tdx_resume(void *ign)
244 {
245 	WARN_ON_ONCE(x86_virt_get_ref(X86_FEATURE_VMX));
246 }
247 
248 static const struct syscore_ops tdx_syscore_ops = {
249 	.suspend = tdx_suspend,
250 	.resume = tdx_resume,
251 	.shutdown = tdx_shutdown,
252 };
253 
254 static struct syscore tdx_syscore = {
255 	.ops = &tdx_syscore_ops,
256 };
257 
258 /*
259  * Add a memory region as a TDX memory block.  The caller must make sure
260  * all memory regions are added in address ascending order and don't
261  * overlap.
262  */
263 static __init int add_tdx_memblock(struct list_head *tmb_list,
264 				   unsigned long start_pfn,
265 				   unsigned long end_pfn, int nid)
266 {
267 	struct tdx_memblock *tmb;
268 
269 	tmb = kmalloc_obj(*tmb);
270 	if (!tmb)
271 		return -ENOMEM;
272 
273 	INIT_LIST_HEAD(&tmb->list);
274 	tmb->start_pfn = start_pfn;
275 	tmb->end_pfn = end_pfn;
276 	tmb->nid = nid;
277 
278 	/* @tmb_list is protected by mem_hotplug_lock */
279 	list_add_tail(&tmb->list, tmb_list);
280 	return 0;
281 }
282 
283 static __init void free_tdx_memlist(struct list_head *tmb_list)
284 {
285 	/* @tmb_list is protected by mem_hotplug_lock */
286 	while (!list_empty(tmb_list)) {
287 		struct tdx_memblock *tmb = list_first_entry(tmb_list,
288 				struct tdx_memblock, list);
289 
290 		list_del(&tmb->list);
291 		kfree(tmb);
292 	}
293 }
294 
295 /*
296  * Ensure that all memblock memory regions are convertible to TDX
297  * memory.  Once this has been established, stash the memblock
298  * ranges off in a secondary structure because memblock is modified
299  * in memory hotplug while TDX memory regions are fixed.
300  */
301 static __init int build_tdx_memlist(struct list_head *tmb_list)
302 {
303 	unsigned long start_pfn, end_pfn;
304 	int i, nid, ret;
305 
306 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
307 		/*
308 		 * The first 1MB is not reported as TDX convertible memory.
309 		 * Although the first 1MB is always reserved and won't end up
310 		 * to the page allocator, it is still in memblock's memory
311 		 * regions.  Skip them manually to exclude them as TDX memory.
312 		 */
313 		start_pfn = max(start_pfn, PHYS_PFN(SZ_1M));
314 		if (start_pfn >= end_pfn)
315 			continue;
316 
317 		/*
318 		 * Add the memory regions as TDX memory.  The regions in
319 		 * memblock has already guaranteed they are in address
320 		 * ascending order and don't overlap.
321 		 */
322 		ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn, nid);
323 		if (ret)
324 			goto err;
325 	}
326 
327 	return 0;
328 err:
329 	free_tdx_memlist(tmb_list);
330 	return ret;
331 }
332 
333 static __init int read_sys_metadata_field(u64 field_id, u64 *data)
334 {
335 	struct tdx_module_args args = {};
336 	int ret;
337 
338 	/*
339 	 * TDH.SYS.RD -- reads one global metadata field
340 	 *  - RDX (in): the field to read
341 	 *  - R8 (out): the field data
342 	 */
343 	args.rdx = field_id;
344 	ret = seamcall_prerr_ret(TDH_SYS_RD, &args);
345 	if (ret)
346 		return ret;
347 
348 	*data = args.r8;
349 
350 	return 0;
351 }
352 
353 #include "tdx_global_metadata.c"
354 
355 static __init int check_features(struct tdx_sys_info *sysinfo)
356 {
357 	u64 tdx_features0 = sysinfo->features.tdx_features0;
358 
359 	if (!(tdx_features0 & TDX_FEATURES0_NO_RBP_MOD)) {
360 		pr_err("frame pointer (RBP) clobber bug present, upgrade TDX module\n");
361 		return -EINVAL;
362 	}
363 
364 	return 0;
365 }
366 
367 /* Calculate the actual TDMR size */
368 static __init int tdmr_size_single(u16 max_reserved_per_tdmr)
369 {
370 	int tdmr_sz;
371 
372 	/*
373 	 * The actual size of TDMR depends on the maximum
374 	 * number of reserved areas.
375 	 */
376 	tdmr_sz = sizeof(struct tdmr_info);
377 	tdmr_sz += sizeof(struct tdmr_reserved_area) * max_reserved_per_tdmr;
378 
379 	return ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT);
380 }
381 
382 static __init int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
383 				  struct tdx_sys_info_tdmr *sysinfo_tdmr)
384 {
385 	size_t tdmr_sz, tdmr_array_sz;
386 	void *tdmr_array;
387 
388 	tdmr_sz = tdmr_size_single(sysinfo_tdmr->max_reserved_per_tdmr);
389 	tdmr_array_sz = tdmr_sz * sysinfo_tdmr->max_tdmrs;
390 
391 	/*
392 	 * To keep things simple, allocate all TDMRs together.
393 	 * The buffer needs to be physically contiguous to make
394 	 * sure each TDMR is physically contiguous.
395 	 */
396 	tdmr_array = alloc_pages_exact(tdmr_array_sz,
397 			GFP_KERNEL | __GFP_ZERO);
398 	if (!tdmr_array)
399 		return -ENOMEM;
400 
401 	tdmr_list->tdmrs = tdmr_array;
402 
403 	/*
404 	 * Keep the size of TDMR to find the target TDMR
405 	 * at a given index in the TDMR list.
406 	 */
407 	tdmr_list->tdmr_sz = tdmr_sz;
408 	tdmr_list->max_tdmrs = sysinfo_tdmr->max_tdmrs;
409 	tdmr_list->nr_consumed_tdmrs = 0;
410 
411 	return 0;
412 }
413 
414 static __init void free_tdmr_list(struct tdmr_info_list *tdmr_list)
415 {
416 	free_pages_exact(tdmr_list->tdmrs,
417 			tdmr_list->max_tdmrs * tdmr_list->tdmr_sz);
418 }
419 
420 /* Get the TDMR from the list at the given index. */
421 static struct tdmr_info *tdmr_entry(struct tdmr_info_list *tdmr_list,
422 				    int idx)
423 {
424 	int tdmr_info_offset = tdmr_list->tdmr_sz * idx;
425 
426 	return (void *)tdmr_list->tdmrs + tdmr_info_offset;
427 }
428 
429 #define TDMR_ALIGNMENT		SZ_1G
430 #define TDMR_ALIGN_DOWN(_addr)	ALIGN_DOWN((_addr), TDMR_ALIGNMENT)
431 #define TDMR_ALIGN_UP(_addr)	ALIGN((_addr), TDMR_ALIGNMENT)
432 
433 static inline u64 tdmr_end(struct tdmr_info *tdmr)
434 {
435 	return tdmr->base + tdmr->size;
436 }
437 
438 /*
439  * Take the memory referenced in @tmb_list and populate the
440  * preallocated @tdmr_list, following all the special alignment
441  * and size rules for TDMR.
442  */
443 static __init int fill_out_tdmrs(struct list_head *tmb_list,
444 				 struct tdmr_info_list *tdmr_list)
445 {
446 	struct tdx_memblock *tmb;
447 	int tdmr_idx = 0;
448 
449 	/*
450 	 * Loop over TDX memory regions and fill out TDMRs to cover them.
451 	 * To keep it simple, always try to use one TDMR to cover one
452 	 * memory region.
453 	 *
454 	 * In practice TDX supports at least 64 TDMRs.  A 2-socket system
455 	 * typically only consumes less than 10 of those.  This code is
456 	 * dumb and simple and may use more TMDRs than is strictly
457 	 * required.
458 	 */
459 	list_for_each_entry(tmb, tmb_list, list) {
460 		struct tdmr_info *tdmr = tdmr_entry(tdmr_list, tdmr_idx);
461 		u64 start, end;
462 
463 		start = TDMR_ALIGN_DOWN(PFN_PHYS(tmb->start_pfn));
464 		end   = TDMR_ALIGN_UP(PFN_PHYS(tmb->end_pfn));
465 
466 		/*
467 		 * A valid size indicates the current TDMR has already
468 		 * been filled out to cover the previous memory region(s).
469 		 */
470 		if (tdmr->size) {
471 			/*
472 			 * Loop to the next if the current memory region
473 			 * has already been fully covered.
474 			 */
475 			if (end <= tdmr_end(tdmr))
476 				continue;
477 
478 			/* Otherwise, skip the already covered part. */
479 			if (start < tdmr_end(tdmr))
480 				start = tdmr_end(tdmr);
481 
482 			/*
483 			 * Create a new TDMR to cover the current memory
484 			 * region, or the remaining part of it.
485 			 */
486 			tdmr_idx++;
487 			if (tdmr_idx >= tdmr_list->max_tdmrs) {
488 				pr_warn("initialization failed: TDMRs exhausted.\n");
489 				return -ENOSPC;
490 			}
491 
492 			tdmr = tdmr_entry(tdmr_list, tdmr_idx);
493 		}
494 
495 		tdmr->base = start;
496 		tdmr->size = end - start;
497 	}
498 
499 	/* @tdmr_idx is always the index of the last valid TDMR. */
500 	tdmr_list->nr_consumed_tdmrs = tdmr_idx + 1;
501 
502 	/*
503 	 * Warn early that kernel is about to run out of TDMRs.
504 	 *
505 	 * This is an indication that TDMR allocation has to be
506 	 * reworked to be smarter to not run into an issue.
507 	 */
508 	if (tdmr_list->max_tdmrs - tdmr_list->nr_consumed_tdmrs < TDMR_NR_WARN)
509 		pr_warn("consumed TDMRs reaching limit: %d used out of %d\n",
510 				tdmr_list->nr_consumed_tdmrs,
511 				tdmr_list->max_tdmrs);
512 
513 	return 0;
514 }
515 
516 /*
517  * Calculate PAMT size given a TDMR and a page size.  The returned
518  * PAMT size is always aligned up to 4K page boundary.
519  */
520 static __init unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz,
521 					     u16 pamt_entry_size)
522 {
523 	unsigned long pamt_sz, nr_pamt_entries;
524 
525 	switch (pgsz) {
526 	case TDX_PS_4K:
527 		nr_pamt_entries = tdmr->size >> PAGE_SHIFT;
528 		break;
529 	case TDX_PS_2M:
530 		nr_pamt_entries = tdmr->size >> PMD_SHIFT;
531 		break;
532 	case TDX_PS_1G:
533 		nr_pamt_entries = tdmr->size >> PUD_SHIFT;
534 		break;
535 	default:
536 		WARN_ON_ONCE(1);
537 		return 0;
538 	}
539 
540 	pamt_sz = nr_pamt_entries * pamt_entry_size;
541 	/* TDX requires PAMT size must be 4K aligned */
542 	pamt_sz = ALIGN(pamt_sz, PAGE_SIZE);
543 
544 	return pamt_sz;
545 }
546 
547 /*
548  * Locate a NUMA node which should hold the allocation of the @tdmr
549  * PAMT.  This node will have some memory covered by the TDMR.  The
550  * relative amount of memory covered is not considered.
551  */
552 static __init int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list)
553 {
554 	struct tdx_memblock *tmb;
555 
556 	/*
557 	 * A TDMR must cover at least part of one TMB.  That TMB will end
558 	 * after the TDMR begins.  But, that TMB may have started before
559 	 * the TDMR.  Find the next 'tmb' that _ends_ after this TDMR
560 	 * begins.  Ignore 'tmb' start addresses.  They are irrelevant.
561 	 */
562 	list_for_each_entry(tmb, tmb_list, list) {
563 		if (tmb->end_pfn > PHYS_PFN(tdmr->base))
564 			return tmb->nid;
565 	}
566 
567 	/*
568 	 * Fall back to allocating the TDMR's metadata from node 0 when
569 	 * no TDX memory block can be found.  This should never happen
570 	 * since TDMRs originate from TDX memory blocks.
571 	 */
572 	pr_warn("TDMR [0x%llx, 0x%llx): unable to find local NUMA node for PAMT allocation, fallback to use node 0.\n",
573 			tdmr->base, tdmr_end(tdmr));
574 	return 0;
575 }
576 
577 /*
578  * Allocate PAMTs from the local NUMA node of some memory in @tmb_list
579  * within @tdmr, and set up PAMTs for @tdmr.
580  */
581 static __init int tdmr_set_up_pamt(struct tdmr_info *tdmr,
582 				   struct list_head *tmb_list,
583 				   u16 pamt_entry_size[])
584 {
585 	unsigned long pamt_base[TDX_PS_NR];
586 	unsigned long pamt_size[TDX_PS_NR];
587 	unsigned long tdmr_pamt_base;
588 	unsigned long tdmr_pamt_size;
589 	struct page *pamt;
590 	int pgsz, nid;
591 
592 	nid = tdmr_get_nid(tdmr, tmb_list);
593 
594 	/*
595 	 * Calculate the PAMT size for each TDX supported page size
596 	 * and the total PAMT size.
597 	 */
598 	tdmr_pamt_size = 0;
599 	for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) {
600 		pamt_size[pgsz] = tdmr_get_pamt_sz(tdmr, pgsz,
601 					pamt_entry_size[pgsz]);
602 		tdmr_pamt_size += pamt_size[pgsz];
603 	}
604 
605 	/*
606 	 * Allocate one chunk of physically contiguous memory for all
607 	 * PAMTs.  This helps minimize the PAMT's use of reserved areas
608 	 * in overlapped TDMRs.
609 	 */
610 	pamt = alloc_contig_pages(tdmr_pamt_size >> PAGE_SHIFT, GFP_KERNEL,
611 			nid, &node_online_map);
612 	if (!pamt)
613 		return -ENOMEM;
614 
615 	/*
616 	 * Break the contiguous allocation back up into the
617 	 * individual PAMTs for each page size.
618 	 */
619 	tdmr_pamt_base = page_to_pfn(pamt) << PAGE_SHIFT;
620 	for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) {
621 		pamt_base[pgsz] = tdmr_pamt_base;
622 		tdmr_pamt_base += pamt_size[pgsz];
623 	}
624 
625 	tdmr->pamt_4k_base = pamt_base[TDX_PS_4K];
626 	tdmr->pamt_4k_size = pamt_size[TDX_PS_4K];
627 	tdmr->pamt_2m_base = pamt_base[TDX_PS_2M];
628 	tdmr->pamt_2m_size = pamt_size[TDX_PS_2M];
629 	tdmr->pamt_1g_base = pamt_base[TDX_PS_1G];
630 	tdmr->pamt_1g_size = pamt_size[TDX_PS_1G];
631 
632 	return 0;
633 }
634 
635 static void tdmr_get_pamt(struct tdmr_info *tdmr, unsigned long *pamt_base,
636 			  unsigned long *pamt_size)
637 {
638 	unsigned long pamt_bs, pamt_sz;
639 
640 	/*
641 	 * The PAMT was allocated in one contiguous unit.  The 4K PAMT
642 	 * should always point to the beginning of that allocation.
643 	 */
644 	pamt_bs = tdmr->pamt_4k_base;
645 	pamt_sz = tdmr->pamt_4k_size + tdmr->pamt_2m_size + tdmr->pamt_1g_size;
646 
647 	WARN_ON_ONCE((pamt_bs & ~PAGE_MASK) || (pamt_sz & ~PAGE_MASK));
648 
649 	*pamt_base = pamt_bs;
650 	*pamt_size = pamt_sz;
651 }
652 
653 static __init void tdmr_do_pamt_func(struct tdmr_info *tdmr,
654 		void (*pamt_func)(unsigned long base, unsigned long size))
655 {
656 	unsigned long pamt_base, pamt_size;
657 
658 	tdmr_get_pamt(tdmr, &pamt_base, &pamt_size);
659 
660 	/* Do nothing if PAMT hasn't been allocated for this TDMR */
661 	if (!pamt_size)
662 		return;
663 
664 	if (WARN_ON_ONCE(!pamt_base))
665 		return;
666 
667 	pamt_func(pamt_base, pamt_size);
668 }
669 
670 static __init void free_pamt(unsigned long pamt_base, unsigned long pamt_size)
671 {
672 	free_contig_range(pamt_base >> PAGE_SHIFT, pamt_size >> PAGE_SHIFT);
673 }
674 
675 static __init void tdmr_free_pamt(struct tdmr_info *tdmr)
676 {
677 	tdmr_do_pamt_func(tdmr, free_pamt);
678 }
679 
680 static __init void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list)
681 {
682 	int i;
683 
684 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
685 		tdmr_free_pamt(tdmr_entry(tdmr_list, i));
686 }
687 
688 /* Allocate and set up PAMTs for all TDMRs */
689 static __init int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list,
690 					struct list_head *tmb_list,
691 					u16 pamt_entry_size[])
692 {
693 	int i, ret = 0;
694 
695 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
696 		ret = tdmr_set_up_pamt(tdmr_entry(tdmr_list, i), tmb_list,
697 				pamt_entry_size);
698 		if (ret)
699 			goto err;
700 	}
701 
702 	return 0;
703 err:
704 	tdmrs_free_pamt_all(tdmr_list);
705 	return ret;
706 }
707 
708 /*
709  * Convert TDX private pages back to normal by using MOVDIR64B to clear these
710  * pages. Typically, any write to the page will convert it from TDX private back
711  * to normal kernel memory. Systems with the X86_BUG_TDX_PW_MCE erratum need to
712  * do the conversion explicitly via MOVDIR64B.
713  */
714 static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
715 {
716 	const void *zero_page = (const void *)page_address(ZERO_PAGE(0));
717 	unsigned long phys, end;
718 
719 	if (!boot_cpu_has_bug(X86_BUG_TDX_PW_MCE))
720 		return;
721 
722 	end = base + size;
723 	for (phys = base; phys < end; phys += 64)
724 		movdir64b(__va(phys), zero_page);
725 
726 	/*
727 	 * MOVDIR64B uses WC protocol.  Use memory barrier to
728 	 * make sure any later user of these pages sees the
729 	 * updated data.
730 	 */
731 	mb();
732 }
733 
734 void tdx_quirk_reset_page(struct page *page)
735 {
736 	tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE);
737 }
738 EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page);
739 
740 static __init void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr)
741 
742 {
743 	tdmr_do_pamt_func(tdmr, tdx_quirk_reset_paddr);
744 }
745 
746 static __init void tdmrs_quirk_reset_pamt_all(struct tdmr_info_list *tdmr_list)
747 {
748 	int i;
749 
750 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
751 		tdmr_quirk_reset_pamt(tdmr_entry(tdmr_list, i));
752 }
753 
754 static __init unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list)
755 {
756 	unsigned long pamt_size = 0;
757 	int i;
758 
759 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
760 		unsigned long base, size;
761 
762 		tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size);
763 		pamt_size += size;
764 	}
765 
766 	return pamt_size / 1024;
767 }
768 
769 static __init int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx,
770 				     u64 addr, u64 size, u16 max_reserved_per_tdmr)
771 {
772 	struct tdmr_reserved_area *rsvd_areas = tdmr->reserved_areas;
773 	int idx = *p_idx;
774 
775 	/* Reserved area must be 4K aligned in offset and size */
776 	if (WARN_ON(addr & ~PAGE_MASK || size & ~PAGE_MASK))
777 		return -EINVAL;
778 
779 	if (idx >= max_reserved_per_tdmr) {
780 		pr_warn("initialization failed: TDMR [0x%llx, 0x%llx): reserved areas exhausted.\n",
781 				tdmr->base, tdmr_end(tdmr));
782 		return -ENOSPC;
783 	}
784 
785 	/*
786 	 * Consume one reserved area per call.  Make no effort to
787 	 * optimize or reduce the number of reserved areas which are
788 	 * consumed by contiguous reserved areas, for instance.
789 	 */
790 	rsvd_areas[idx].offset = addr - tdmr->base;
791 	rsvd_areas[idx].size = size;
792 
793 	*p_idx = idx + 1;
794 
795 	return 0;
796 }
797 
798 /*
799  * Go through @tmb_list to find holes between memory areas.  If any of
800  * those holes fall within @tdmr, set up a TDMR reserved area to cover
801  * the hole.
802  */
803 static __init int tdmr_populate_rsvd_holes(struct list_head *tmb_list,
804 					   struct tdmr_info *tdmr,
805 					   int *rsvd_idx,
806 					   u16 max_reserved_per_tdmr)
807 {
808 	struct tdx_memblock *tmb;
809 	u64 prev_end;
810 	int ret;
811 
812 	/*
813 	 * Start looking for reserved blocks at the
814 	 * beginning of the TDMR.
815 	 */
816 	prev_end = tdmr->base;
817 	list_for_each_entry(tmb, tmb_list, list) {
818 		u64 start, end;
819 
820 		start = PFN_PHYS(tmb->start_pfn);
821 		end   = PFN_PHYS(tmb->end_pfn);
822 
823 		/* Break if this region is after the TDMR */
824 		if (start >= tdmr_end(tdmr))
825 			break;
826 
827 		/* Exclude regions before this TDMR */
828 		if (end < tdmr->base)
829 			continue;
830 
831 		/*
832 		 * Skip over memory areas that
833 		 * have already been dealt with.
834 		 */
835 		if (start <= prev_end) {
836 			prev_end = end;
837 			continue;
838 		}
839 
840 		/* Add the hole before this region */
841 		ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end,
842 				start - prev_end,
843 				max_reserved_per_tdmr);
844 		if (ret)
845 			return ret;
846 
847 		prev_end = end;
848 	}
849 
850 	/* Add the hole after the last region if it exists. */
851 	if (prev_end < tdmr_end(tdmr)) {
852 		ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end,
853 				tdmr_end(tdmr) - prev_end,
854 				max_reserved_per_tdmr);
855 		if (ret)
856 			return ret;
857 	}
858 
859 	return 0;
860 }
861 
862 /*
863  * Go through @tdmr_list to find all PAMTs.  If any of those PAMTs
864  * overlaps with @tdmr, set up a TDMR reserved area to cover the
865  * overlapping part.
866  */
867 static __init int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list,
868 					   struct tdmr_info *tdmr,
869 					   int *rsvd_idx,
870 					   u16 max_reserved_per_tdmr)
871 {
872 	int i, ret;
873 
874 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
875 		struct tdmr_info *tmp = tdmr_entry(tdmr_list, i);
876 		unsigned long pamt_base, pamt_size, pamt_end;
877 
878 		tdmr_get_pamt(tmp, &pamt_base, &pamt_size);
879 		/* Each TDMR must already have PAMT allocated */
880 		WARN_ON_ONCE(!pamt_size || !pamt_base);
881 
882 		pamt_end = pamt_base + pamt_size;
883 		/* Skip PAMTs outside of the given TDMR */
884 		if ((pamt_end <= tdmr->base) ||
885 				(pamt_base >= tdmr_end(tdmr)))
886 			continue;
887 
888 		/* Only mark the part within the TDMR as reserved */
889 		if (pamt_base < tdmr->base)
890 			pamt_base = tdmr->base;
891 		if (pamt_end > tdmr_end(tdmr))
892 			pamt_end = tdmr_end(tdmr);
893 
894 		ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, pamt_base,
895 				pamt_end - pamt_base,
896 				max_reserved_per_tdmr);
897 		if (ret)
898 			return ret;
899 	}
900 
901 	return 0;
902 }
903 
904 /* Compare function called by sort() for TDMR reserved areas */
905 static __init int rsvd_area_cmp_func(const void *a, const void *b)
906 {
907 	struct tdmr_reserved_area *r1 = (struct tdmr_reserved_area *)a;
908 	struct tdmr_reserved_area *r2 = (struct tdmr_reserved_area *)b;
909 
910 	if (r1->offset + r1->size <= r2->offset)
911 		return -1;
912 	if (r1->offset >= r2->offset + r2->size)
913 		return 1;
914 
915 	/* Reserved areas cannot overlap.  The caller must guarantee. */
916 	WARN_ON_ONCE(1);
917 	return -1;
918 }
919 
920 /*
921  * Populate reserved areas for the given @tdmr, including memory holes
922  * (via @tmb_list) and PAMTs (via @tdmr_list).
923  */
924 static __init int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr,
925 					   struct list_head *tmb_list,
926 					   struct tdmr_info_list *tdmr_list,
927 					   u16 max_reserved_per_tdmr)
928 {
929 	int ret, rsvd_idx = 0;
930 
931 	ret = tdmr_populate_rsvd_holes(tmb_list, tdmr, &rsvd_idx,
932 			max_reserved_per_tdmr);
933 	if (ret)
934 		return ret;
935 
936 	ret = tdmr_populate_rsvd_pamts(tdmr_list, tdmr, &rsvd_idx,
937 			max_reserved_per_tdmr);
938 	if (ret)
939 		return ret;
940 
941 	/* TDX requires reserved areas listed in address ascending order */
942 	sort(tdmr->reserved_areas, rsvd_idx, sizeof(struct tdmr_reserved_area),
943 			rsvd_area_cmp_func, NULL);
944 
945 	return 0;
946 }
947 
948 /*
949  * Populate reserved areas for all TDMRs in @tdmr_list, including memory
950  * holes (via @tmb_list) and PAMTs.
951  */
952 static __init int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list,
953 						struct list_head *tmb_list,
954 						u16 max_reserved_per_tdmr)
955 {
956 	int i;
957 
958 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
959 		int ret;
960 
961 		ret = tdmr_populate_rsvd_areas(tdmr_entry(tdmr_list, i),
962 				tmb_list, tdmr_list, max_reserved_per_tdmr);
963 		if (ret)
964 			return ret;
965 	}
966 
967 	return 0;
968 }
969 
970 /*
971  * Construct a list of TDMRs on the preallocated space in @tdmr_list
972  * to cover all TDX memory regions in @tmb_list based on the TDX module
973  * TDMR global information in @sysinfo_tdmr.
974  */
975 static __init int construct_tdmrs(struct list_head *tmb_list,
976 				  struct tdmr_info_list *tdmr_list,
977 				  struct tdx_sys_info_tdmr *sysinfo_tdmr)
978 {
979 	u16 pamt_entry_size[TDX_PS_NR] = {
980 		sysinfo_tdmr->pamt_4k_entry_size,
981 		sysinfo_tdmr->pamt_2m_entry_size,
982 		sysinfo_tdmr->pamt_1g_entry_size,
983 	};
984 	int ret;
985 
986 	ret = fill_out_tdmrs(tmb_list, tdmr_list);
987 	if (ret)
988 		return ret;
989 
990 	ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, pamt_entry_size);
991 	if (ret)
992 		return ret;
993 
994 	ret = tdmrs_populate_rsvd_areas_all(tdmr_list, tmb_list,
995 			sysinfo_tdmr->max_reserved_per_tdmr);
996 	if (ret)
997 		tdmrs_free_pamt_all(tdmr_list);
998 
999 	/*
1000 	 * The tdmr_info_list is read-only from here on out.
1001 	 * Ensure that these writes are seen by other CPUs.
1002 	 * Pairs with a smp_rmb() in is_pamt_page().
1003 	 */
1004 	smp_wmb();
1005 
1006 	return ret;
1007 }
1008 
1009 static __init int config_tdx_module(struct tdmr_info_list *tdmr_list,
1010 				    u64 global_keyid)
1011 {
1012 	struct tdx_module_args args = {};
1013 	u64 *tdmr_pa_array;
1014 	size_t array_sz;
1015 	int i, ret;
1016 
1017 	/*
1018 	 * TDMRs are passed to the TDX module via an array of physical
1019 	 * addresses of each TDMR.  The array itself also has certain
1020 	 * alignment requirement.
1021 	 */
1022 	array_sz = tdmr_list->nr_consumed_tdmrs * sizeof(u64);
1023 	array_sz = roundup_pow_of_two(array_sz);
1024 	if (array_sz < TDMR_INFO_PA_ARRAY_ALIGNMENT)
1025 		array_sz = TDMR_INFO_PA_ARRAY_ALIGNMENT;
1026 
1027 	tdmr_pa_array = kzalloc(array_sz, GFP_KERNEL);
1028 	if (!tdmr_pa_array)
1029 		return -ENOMEM;
1030 
1031 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
1032 		tdmr_pa_array[i] = __pa(tdmr_entry(tdmr_list, i));
1033 
1034 	args.rcx = __pa(tdmr_pa_array);
1035 	args.rdx = tdmr_list->nr_consumed_tdmrs;
1036 	args.r8 = global_keyid;
1037 	ret = seamcall_prerr(TDH_SYS_CONFIG, &args);
1038 
1039 	/* Free the array as it is not required anymore. */
1040 	kfree(tdmr_pa_array);
1041 
1042 	return ret;
1043 }
1044 
1045 static __init int do_global_key_config(void *unused)
1046 {
1047 	struct tdx_module_args args = {};
1048 
1049 	return seamcall_prerr(TDH_SYS_KEY_CONFIG, &args);
1050 }
1051 
1052 /*
1053  * Attempt to configure the global KeyID on all physical packages.
1054  *
1055  * This requires running code on at least one CPU in each package.
1056  * TDMR initialization) will fail will fail if any package in the
1057  * system has no online CPUs.
1058  *
1059  * This code takes no affirmative steps to online CPUs.  Callers (aka.
1060  * KVM) can ensure success by ensuring sufficient CPUs are online and
1061  * can run SEAMCALLs.
1062  */
1063 static __init int config_global_keyid(void)
1064 {
1065 	cpumask_var_t packages;
1066 	int cpu, ret = -EINVAL;
1067 
1068 	if (!zalloc_cpumask_var(&packages, GFP_KERNEL))
1069 		return -ENOMEM;
1070 
1071 	/*
1072 	 * Hardware doesn't guarantee cache coherency across different
1073 	 * KeyIDs.  The kernel needs to flush PAMT's dirty cachelines
1074 	 * (associated with KeyID 0) before the TDX module can use the
1075 	 * global KeyID to access the PAMT.  Given PAMTs are potentially
1076 	 * large (~1/256th of system RAM), just use WBINVD.
1077 	 */
1078 	wbinvd_on_all_cpus();
1079 
1080 	for_each_online_cpu(cpu) {
1081 		/*
1082 		 * The key configuration only needs to be done once per
1083 		 * package and will return an error if configured more
1084 		 * than once.  Avoid doing it multiple times per package.
1085 		 */
1086 		if (cpumask_test_and_set_cpu(topology_physical_package_id(cpu),
1087 					packages))
1088 			continue;
1089 
1090 		/*
1091 		 * TDH.SYS.KEY.CONFIG cannot run concurrently on
1092 		 * different cpus.  Do it one by one.
1093 		 */
1094 		ret = smp_call_on_cpu(cpu, do_global_key_config, NULL, true);
1095 		if (ret)
1096 			break;
1097 	}
1098 
1099 	free_cpumask_var(packages);
1100 	return ret;
1101 }
1102 
1103 static __init int init_tdmr(struct tdmr_info *tdmr)
1104 {
1105 	u64 next;
1106 
1107 	/*
1108 	 * Initializing a TDMR can be time consuming.  To avoid long
1109 	 * SEAMCALLs, the TDX module may only initialize a part of the
1110 	 * TDMR in each call.
1111 	 */
1112 	do {
1113 		struct tdx_module_args args = {
1114 			.rcx = tdmr->base,
1115 		};
1116 		int ret;
1117 
1118 		ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args);
1119 		if (ret)
1120 			return ret;
1121 		/*
1122 		 * RDX contains 'next-to-initialize' address if
1123 		 * TDH.SYS.TDMR.INIT did not fully complete and
1124 		 * should be retried.
1125 		 */
1126 		next = args.rdx;
1127 		cond_resched();
1128 		/* Keep making SEAMCALLs until the TDMR is done */
1129 	} while (next < tdmr->base + tdmr->size);
1130 
1131 	return 0;
1132 }
1133 
1134 static __init int init_tdmrs(struct tdmr_info_list *tdmr_list)
1135 {
1136 	int i;
1137 
1138 	/*
1139 	 * This operation is costly.  It can be parallelized,
1140 	 * but keep it simple for now.
1141 	 */
1142 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
1143 		int ret;
1144 
1145 		ret = init_tdmr(tdmr_entry(tdmr_list, i));
1146 		if (ret)
1147 			return ret;
1148 	}
1149 
1150 	return 0;
1151 }
1152 
1153 static __init int init_tdx_module(void)
1154 {
1155 	int ret;
1156 
1157 	ret = get_tdx_sys_info(&tdx_sysinfo);
1158 	if (ret)
1159 		return ret;
1160 
1161 	/* Check whether the kernel can support this module */
1162 	ret = check_features(&tdx_sysinfo);
1163 	if (ret)
1164 		return ret;
1165 
1166 	/*
1167 	 * To keep things simple, assume that all TDX-protected memory
1168 	 * will come from the page allocator.  Make sure all pages in the
1169 	 * page allocator are TDX-usable memory.
1170 	 *
1171 	 * Build the list of "TDX-usable" memory regions which cover all
1172 	 * pages in the page allocator to guarantee that.  Do it while
1173 	 * holding mem_hotplug_lock read-lock as the memory hotplug code
1174 	 * path reads the @tdx_memlist to reject any new memory.
1175 	 */
1176 	get_online_mems();
1177 
1178 	ret = build_tdx_memlist(&tdx_memlist);
1179 	if (ret)
1180 		goto out_put_tdxmem;
1181 
1182 	/* Allocate enough space for constructing TDMRs */
1183 	ret = alloc_tdmr_list(&tdx_tdmr_list, &tdx_sysinfo.tdmr);
1184 	if (ret)
1185 		goto err_free_tdxmem;
1186 
1187 	/* Cover all TDX-usable memory regions in TDMRs */
1188 	ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdx_sysinfo.tdmr);
1189 	if (ret)
1190 		goto err_free_tdmrs;
1191 
1192 	/* Pass the TDMRs and the global KeyID to the TDX module */
1193 	ret = config_tdx_module(&tdx_tdmr_list, tdx_global_keyid);
1194 	if (ret)
1195 		goto err_free_pamts;
1196 
1197 	/* Config the key of global KeyID on all packages */
1198 	ret = config_global_keyid();
1199 	if (ret)
1200 		goto err_reset_pamts;
1201 
1202 	/* Initialize TDMRs to complete the TDX module initialization */
1203 	ret = init_tdmrs(&tdx_tdmr_list);
1204 	if (ret)
1205 		goto err_reset_pamts;
1206 
1207 	pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list));
1208 
1209 out_put_tdxmem:
1210 	/*
1211 	 * @tdx_memlist is written here and read at memory hotplug time.
1212 	 * Lock out memory hotplug code while building it.
1213 	 */
1214 	put_online_mems();
1215 	return ret;
1216 
1217 err_reset_pamts:
1218 	/*
1219 	 * Part of PAMTs may already have been initialized by the
1220 	 * TDX module.  Flush cache before returning PAMTs back
1221 	 * to the kernel.
1222 	 */
1223 	wbinvd_on_all_cpus();
1224 	tdmrs_quirk_reset_pamt_all(&tdx_tdmr_list);
1225 err_free_pamts:
1226 	tdmrs_free_pamt_all(&tdx_tdmr_list);
1227 err_free_tdmrs:
1228 	free_tdmr_list(&tdx_tdmr_list);
1229 err_free_tdxmem:
1230 	free_tdx_memlist(&tdx_memlist);
1231 	goto out_put_tdxmem;
1232 }
1233 
1234 static __init int tdx_enable(void)
1235 {
1236 	enum cpuhp_state state;
1237 	int ret;
1238 
1239 	if (!cpu_feature_enabled(X86_FEATURE_TDX_HOST_PLATFORM)) {
1240 		pr_err("TDX not supported by the host platform\n");
1241 		return -ENODEV;
1242 	}
1243 
1244 	if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
1245 		pr_err("XSAVE is required for TDX\n");
1246 		return -EINVAL;
1247 	}
1248 
1249 	if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
1250 		pr_err("MOVDIR64B is required for TDX\n");
1251 		return -EINVAL;
1252 	}
1253 
1254 	if (!cpu_feature_enabled(X86_FEATURE_SELFSNOOP)) {
1255 		pr_err("Self-snoop is required for TDX\n");
1256 		return -ENODEV;
1257 	}
1258 
1259 	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "virt/tdx:online",
1260 				  tdx_online_cpu, tdx_offline_cpu);
1261 	if (state < 0)
1262 		return state;
1263 
1264 	ret = init_tdx_module();
1265 	if (ret) {
1266 		pr_err("TDX-Module initialization failed (%d)\n", ret);
1267 		cpuhp_remove_state(state);
1268 		return ret;
1269 	}
1270 
1271 	register_syscore(&tdx_syscore);
1272 
1273 	tdx_module_initialized = true;
1274 	pr_info("TDX-Module initialized\n");
1275 	return 0;
1276 }
1277 subsys_initcall(tdx_enable);
1278 
1279 static bool is_pamt_page(unsigned long phys)
1280 {
1281 	struct tdmr_info_list *tdmr_list = &tdx_tdmr_list;
1282 	int i;
1283 
1284 	/* Ensure that all remote 'tdmr_list' writes are visible: */
1285 	smp_rmb();
1286 
1287 	/*
1288 	 * The TDX module is no longer returning TDX_SYS_NOT_READY and
1289 	 * is initialized.  The 'tdmr_list' was initialized long ago
1290 	 * and is now read-only.
1291 	 */
1292 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
1293 		unsigned long base, size;
1294 
1295 		tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size);
1296 
1297 		if (phys >= base && phys < (base + size))
1298 			return true;
1299 	}
1300 
1301 	return false;
1302 }
1303 
1304 /*
1305  * Return whether the memory page at the given physical address is TDX
1306  * private memory or not.
1307  *
1308  * This can be imprecise for two known reasons:
1309  * 1. PAMTs are private memory and exist before the TDX module is
1310  *    ready and TDH_PHYMEM_PAGE_RDMD works.  This is a relatively
1311  *    short window that occurs once per boot.
1312  * 2. TDH_PHYMEM_PAGE_RDMD reflects the TDX module's knowledge of the
1313  *    page.  However, the page can still cause #MC until it has been
1314  *    fully converted to shared using 64-byte writes like MOVDIR64B.
1315  *    Buggy hosts might still leave #MC-causing memory in place which
1316  *    this function can not detect.
1317  */
1318 static bool paddr_is_tdx_private(unsigned long phys)
1319 {
1320 	struct tdx_module_args args = {
1321 		.rcx = phys & PAGE_MASK,
1322 	};
1323 	u64 sret;
1324 
1325 	if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
1326 		return false;
1327 
1328 	/* Get page type from the TDX module */
1329 	sret = __seamcall_dirty_cache(__seamcall_ret, TDH_PHYMEM_PAGE_RDMD, &args);
1330 
1331 	/*
1332 	 * The SEAMCALL will not return success unless there is a
1333 	 * working, "ready" TDX module.  Assume an absence of TDX
1334 	 * private pages until SEAMCALL is working.
1335 	 */
1336 	if (sret)
1337 		return false;
1338 
1339 	/*
1340 	 * SEAMCALL was successful -- read page type (via RCX):
1341 	 *
1342 	 *  - PT_NDA:	Page is not used by the TDX module
1343 	 *  - PT_RSVD:	Reserved for Non-TDX use
1344 	 *  - Others:	Page is used by the TDX module
1345 	 *
1346 	 * Note PAMT pages are marked as PT_RSVD but they are also TDX
1347 	 * private memory.
1348 	 */
1349 	switch (args.rcx) {
1350 	case PT_NDA:
1351 		return false;
1352 	case PT_RSVD:
1353 		return is_pamt_page(phys);
1354 	default:
1355 		return true;
1356 	}
1357 }
1358 
1359 /*
1360  * Some TDX-capable CPUs have an erratum.  A write to TDX private
1361  * memory poisons that memory, and a subsequent read of that memory
1362  * triggers #MC.
1363  *
1364  * Help distinguish erratum-triggered #MCs from a normal hardware one.
1365  * Just print additional message to show such #MC may be result of the
1366  * erratum.
1367  */
1368 const char *tdx_dump_mce_info(struct mce *m)
1369 {
1370 	if (!m || !mce_is_memory_error(m) || !mce_usable_address(m))
1371 		return NULL;
1372 
1373 	if (!paddr_is_tdx_private(m->addr))
1374 		return NULL;
1375 
1376 	return "TDX private memory error. Possible kernel bug.";
1377 }
1378 
1379 static __init int record_keyid_partitioning(u32 *tdx_keyid_start,
1380 					    u32 *nr_tdx_keyids)
1381 {
1382 	u32 _nr_mktme_keyids, _tdx_keyid_start, _nr_tdx_keyids;
1383 	int ret;
1384 
1385 	/*
1386 	 * IA32_MKTME_KEYID_PARTIONING:
1387 	 *   Bit [31:0]:	Number of MKTME KeyIDs.
1388 	 *   Bit [63:32]:	Number of TDX private KeyIDs.
1389 	 */
1390 	ret = rdmsr_safe(MSR_IA32_MKTME_KEYID_PARTITIONING, &_nr_mktme_keyids,
1391 			&_nr_tdx_keyids);
1392 	if (ret || !_nr_tdx_keyids)
1393 		return -EINVAL;
1394 
1395 	/* TDX KeyIDs start after the last MKTME KeyID. */
1396 	_tdx_keyid_start = _nr_mktme_keyids + 1;
1397 
1398 	*tdx_keyid_start = _tdx_keyid_start;
1399 	*nr_tdx_keyids = _nr_tdx_keyids;
1400 
1401 	return 0;
1402 }
1403 
1404 static bool is_tdx_memory(unsigned long start_pfn, unsigned long end_pfn)
1405 {
1406 	struct tdx_memblock *tmb;
1407 
1408 	/*
1409 	 * This check assumes that the start_pfn<->end_pfn range does not
1410 	 * cross multiple @tdx_memlist entries.  A single memory online
1411 	 * event across multiple memblocks (from which @tdx_memlist
1412 	 * entries are derived at the time of module initialization) is
1413 	 * not possible.  This is because memory offline/online is done
1414 	 * on granularity of 'struct memory_block', and the hotpluggable
1415 	 * memory region (one memblock) must be multiple of memory_block.
1416 	 */
1417 	list_for_each_entry(tmb, &tdx_memlist, list) {
1418 		if (start_pfn >= tmb->start_pfn && end_pfn <= tmb->end_pfn)
1419 			return true;
1420 	}
1421 	return false;
1422 }
1423 
1424 static int tdx_memory_notifier(struct notifier_block *nb, unsigned long action,
1425 			       void *v)
1426 {
1427 	struct memory_notify *mn = v;
1428 
1429 	if (action != MEM_GOING_ONLINE)
1430 		return NOTIFY_OK;
1431 
1432 	/*
1433 	 * Empty list means TDX isn't enabled.  Allow any memory
1434 	 * to go online.
1435 	 */
1436 	if (list_empty(&tdx_memlist))
1437 		return NOTIFY_OK;
1438 
1439 	/*
1440 	 * The TDX memory configuration is static and can not be
1441 	 * changed.  Reject onlining any memory which is outside of
1442 	 * the static configuration whether it supports TDX or not.
1443 	 */
1444 	if (is_tdx_memory(mn->start_pfn, mn->start_pfn + mn->nr_pages))
1445 		return NOTIFY_OK;
1446 
1447 	return NOTIFY_BAD;
1448 }
1449 
1450 static struct notifier_block tdx_memory_nb = {
1451 	.notifier_call = tdx_memory_notifier,
1452 };
1453 
1454 static void __init check_tdx_erratum(void)
1455 {
1456 	/*
1457 	 * These CPUs have an erratum.  A partial write from non-TD
1458 	 * software (e.g. via MOVNTI variants or UC/WC mapping) to TDX
1459 	 * private memory poisons that memory, and a subsequent read of
1460 	 * that memory triggers #MC.
1461 	 */
1462 	switch (boot_cpu_data.x86_vfm) {
1463 	case INTEL_SAPPHIRERAPIDS_X:
1464 	case INTEL_EMERALDRAPIDS_X:
1465 		setup_force_cpu_bug(X86_BUG_TDX_PW_MCE);
1466 	}
1467 }
1468 
1469 void __init tdx_init(void)
1470 {
1471 	u32 tdx_keyid_start, nr_tdx_keyids;
1472 	int err;
1473 
1474 	err = record_keyid_partitioning(&tdx_keyid_start, &nr_tdx_keyids);
1475 	if (err)
1476 		return;
1477 
1478 	pr_info("BIOS enabled: private KeyID range [%u, %u)\n",
1479 			tdx_keyid_start, tdx_keyid_start + nr_tdx_keyids);
1480 
1481 	/*
1482 	 * The TDX module itself requires one 'global KeyID' to protect
1483 	 * its metadata.  If there's only one TDX KeyID, there won't be
1484 	 * any left for TDX guests thus there's no point to enable TDX
1485 	 * at all.
1486 	 */
1487 	if (nr_tdx_keyids < 2) {
1488 		pr_err("initialization failed: too few private KeyIDs available.\n");
1489 		return;
1490 	}
1491 
1492 	/*
1493 	 * At this point, hibernation_available() indicates whether or
1494 	 * not hibernation support has been permanently disabled.
1495 	 */
1496 	if (hibernation_available()) {
1497 		pr_err("initialization failed: Hibernation support is enabled\n");
1498 		return;
1499 	}
1500 
1501 	err = register_memory_notifier(&tdx_memory_nb);
1502 	if (err) {
1503 		pr_err("initialization failed: register_memory_notifier() failed (%d)\n",
1504 				err);
1505 		return;
1506 	}
1507 
1508 #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
1509 	pr_info("Disable ACPI S3. Turn off TDX in the BIOS to use ACPI S3.\n");
1510 	acpi_suspend_lowlevel = NULL;
1511 #endif
1512 
1513 	/*
1514 	 * Just use the first TDX KeyID as the 'global KeyID' and
1515 	 * leave the rest for TDX guests.
1516 	 */
1517 	tdx_global_keyid = tdx_keyid_start;
1518 	tdx_guest_keyid_start = tdx_keyid_start + 1;
1519 	tdx_nr_guest_keyids = nr_tdx_keyids - 1;
1520 
1521 	setup_force_cpu_cap(X86_FEATURE_TDX_HOST_PLATFORM);
1522 
1523 	check_tdx_erratum();
1524 }
1525 
1526 const struct tdx_sys_info *tdx_get_sysinfo(void)
1527 {
1528 	if (!tdx_module_initialized)
1529 		return NULL;
1530 
1531 	return (const struct tdx_sys_info *)&tdx_sysinfo;
1532 }
1533 EXPORT_SYMBOL_FOR_KVM(tdx_get_sysinfo);
1534 
1535 u32 tdx_get_nr_guest_keyids(void)
1536 {
1537 	return tdx_nr_guest_keyids;
1538 }
1539 EXPORT_SYMBOL_FOR_KVM(tdx_get_nr_guest_keyids);
1540 
1541 int tdx_guest_keyid_alloc(void)
1542 {
1543 	return ida_alloc_range(&tdx_guest_keyid_pool, tdx_guest_keyid_start,
1544 			       tdx_guest_keyid_start + tdx_nr_guest_keyids - 1,
1545 			       GFP_KERNEL);
1546 }
1547 EXPORT_SYMBOL_FOR_KVM(tdx_guest_keyid_alloc);
1548 
1549 void tdx_guest_keyid_free(unsigned int keyid)
1550 {
1551 	ida_free(&tdx_guest_keyid_pool, keyid);
1552 }
1553 EXPORT_SYMBOL_FOR_KVM(tdx_guest_keyid_free);
1554 
1555 static inline u64 tdx_tdr_pa(struct tdx_td *td)
1556 {
1557 	return page_to_phys(td->tdr_page);
1558 }
1559 
1560 /*
1561  * The TDX module exposes a CLFLUSH_BEFORE_ALLOC bit to specify whether
1562  * a CLFLUSH of pages is required before handing them to the TDX module.
1563  * Be conservative and make the code simpler by doing the CLFLUSH
1564  * unconditionally.
1565  */
1566 static void tdx_clflush_page(struct page *page)
1567 {
1568 	clflush_cache_range(page_to_virt(page), PAGE_SIZE);
1569 }
1570 
1571 noinstr u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args)
1572 {
1573 	args->rcx = td->tdvpr_pa;
1574 
1575 	return __seamcall_dirty_cache(__seamcall_saved_ret, TDH_VP_ENTER, args);
1576 }
1577 EXPORT_SYMBOL_FOR_KVM(tdh_vp_enter);
1578 
1579 u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page)
1580 {
1581 	struct tdx_module_args args = {
1582 		.rcx = page_to_phys(tdcs_page),
1583 		.rdx = tdx_tdr_pa(td),
1584 	};
1585 
1586 	tdx_clflush_page(tdcs_page);
1587 	return seamcall(TDH_MNG_ADDCX, &args);
1588 }
1589 EXPORT_SYMBOL_FOR_KVM(tdh_mng_addcx);
1590 
1591 u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2)
1592 {
1593 	struct tdx_module_args args = {
1594 		.rcx = gpa,
1595 		.rdx = tdx_tdr_pa(td),
1596 		.r8 = page_to_phys(page),
1597 		.r9 = page_to_phys(source),
1598 	};
1599 	u64 ret;
1600 
1601 	tdx_clflush_page(page);
1602 	ret = seamcall_ret(TDH_MEM_PAGE_ADD, &args);
1603 
1604 	*ext_err1 = args.rcx;
1605 	*ext_err2 = args.rdx;
1606 
1607 	return ret;
1608 }
1609 EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_add);
1610 
1611 u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
1612 {
1613 	struct tdx_module_args args = {
1614 		.rcx = gpa | level,
1615 		.rdx = tdx_tdr_pa(td),
1616 		.r8 = page_to_phys(page),
1617 	};
1618 	u64 ret;
1619 
1620 	tdx_clflush_page(page);
1621 	ret = seamcall_ret(TDH_MEM_SEPT_ADD, &args);
1622 
1623 	*ext_err1 = args.rcx;
1624 	*ext_err2 = args.rdx;
1625 
1626 	return ret;
1627 }
1628 EXPORT_SYMBOL_FOR_KVM(tdh_mem_sept_add);
1629 
1630 u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page)
1631 {
1632 	struct tdx_module_args args = {
1633 		.rcx = page_to_phys(tdcx_page),
1634 		.rdx = vp->tdvpr_pa,
1635 	};
1636 
1637 	tdx_clflush_page(tdcx_page);
1638 	return seamcall(TDH_VP_ADDCX, &args);
1639 }
1640 EXPORT_SYMBOL_FOR_KVM(tdh_vp_addcx);
1641 
1642 u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
1643 {
1644 	struct tdx_module_args args = {
1645 		.rcx = gpa | level,
1646 		.rdx = tdx_tdr_pa(td),
1647 		.r8 = page_to_phys(page),
1648 	};
1649 	u64 ret;
1650 
1651 	tdx_clflush_page(page);
1652 	ret = seamcall_ret(TDH_MEM_PAGE_AUG, &args);
1653 
1654 	*ext_err1 = args.rcx;
1655 	*ext_err2 = args.rdx;
1656 
1657 	return ret;
1658 }
1659 EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_aug);
1660 
1661 u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2)
1662 {
1663 	struct tdx_module_args args = {
1664 		.rcx = gpa | level,
1665 		.rdx = tdx_tdr_pa(td),
1666 	};
1667 	u64 ret;
1668 
1669 	ret = seamcall_ret(TDH_MEM_RANGE_BLOCK, &args);
1670 
1671 	*ext_err1 = args.rcx;
1672 	*ext_err2 = args.rdx;
1673 
1674 	return ret;
1675 }
1676 EXPORT_SYMBOL_FOR_KVM(tdh_mem_range_block);
1677 
1678 u64 tdh_mng_key_config(struct tdx_td *td)
1679 {
1680 	struct tdx_module_args args = {
1681 		.rcx = tdx_tdr_pa(td),
1682 	};
1683 
1684 	return seamcall(TDH_MNG_KEY_CONFIG, &args);
1685 }
1686 EXPORT_SYMBOL_FOR_KVM(tdh_mng_key_config);
1687 
1688 u64 tdh_mng_create(struct tdx_td *td, u16 hkid)
1689 {
1690 	struct tdx_module_args args = {
1691 		.rcx = tdx_tdr_pa(td),
1692 		.rdx = hkid,
1693 	};
1694 
1695 	tdx_clflush_page(td->tdr_page);
1696 	return seamcall(TDH_MNG_CREATE, &args);
1697 }
1698 EXPORT_SYMBOL_FOR_KVM(tdh_mng_create);
1699 
1700 u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp)
1701 {
1702 	struct tdx_module_args args = {
1703 		.rcx = vp->tdvpr_pa,
1704 		.rdx = tdx_tdr_pa(td),
1705 	};
1706 
1707 	tdx_clflush_page(vp->tdvpr_page);
1708 	return seamcall(TDH_VP_CREATE, &args);
1709 }
1710 EXPORT_SYMBOL_FOR_KVM(tdh_vp_create);
1711 
1712 u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data)
1713 {
1714 	struct tdx_module_args args = {
1715 		.rcx = tdx_tdr_pa(td),
1716 		.rdx = field,
1717 	};
1718 	u64 ret;
1719 
1720 	ret = seamcall_ret(TDH_MNG_RD, &args);
1721 
1722 	/* R8: Content of the field, or 0 in case of error. */
1723 	*data = args.r8;
1724 
1725 	return ret;
1726 }
1727 EXPORT_SYMBOL_FOR_KVM(tdh_mng_rd);
1728 
1729 u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2)
1730 {
1731 	struct tdx_module_args args = {
1732 		.rcx = gpa,
1733 		.rdx = tdx_tdr_pa(td),
1734 	};
1735 	u64 ret;
1736 
1737 	ret = seamcall_ret(TDH_MR_EXTEND, &args);
1738 
1739 	*ext_err1 = args.rcx;
1740 	*ext_err2 = args.rdx;
1741 
1742 	return ret;
1743 }
1744 EXPORT_SYMBOL_FOR_KVM(tdh_mr_extend);
1745 
1746 u64 tdh_mr_finalize(struct tdx_td *td)
1747 {
1748 	struct tdx_module_args args = {
1749 		.rcx = tdx_tdr_pa(td),
1750 	};
1751 
1752 	return seamcall(TDH_MR_FINALIZE, &args);
1753 }
1754 EXPORT_SYMBOL_FOR_KVM(tdh_mr_finalize);
1755 
1756 u64 tdh_vp_flush(struct tdx_vp *vp)
1757 {
1758 	struct tdx_module_args args = {
1759 		.rcx = vp->tdvpr_pa,
1760 	};
1761 
1762 	return seamcall(TDH_VP_FLUSH, &args);
1763 }
1764 EXPORT_SYMBOL_FOR_KVM(tdh_vp_flush);
1765 
1766 u64 tdh_mng_vpflushdone(struct tdx_td *td)
1767 {
1768 	struct tdx_module_args args = {
1769 		.rcx = tdx_tdr_pa(td),
1770 	};
1771 
1772 	return seamcall(TDH_MNG_VPFLUSHDONE, &args);
1773 }
1774 EXPORT_SYMBOL_FOR_KVM(tdh_mng_vpflushdone);
1775 
1776 u64 tdh_mng_key_freeid(struct tdx_td *td)
1777 {
1778 	struct tdx_module_args args = {
1779 		.rcx = tdx_tdr_pa(td),
1780 	};
1781 
1782 	return seamcall(TDH_MNG_KEY_FREEID, &args);
1783 }
1784 EXPORT_SYMBOL_FOR_KVM(tdh_mng_key_freeid);
1785 
1786 u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err)
1787 {
1788 	struct tdx_module_args args = {
1789 		.rcx = tdx_tdr_pa(td),
1790 		.rdx = td_params,
1791 	};
1792 	u64 ret;
1793 
1794 	ret = seamcall_ret(TDH_MNG_INIT, &args);
1795 
1796 	*extended_err = args.rcx;
1797 
1798 	return ret;
1799 }
1800 EXPORT_SYMBOL_FOR_KVM(tdh_mng_init);
1801 
1802 u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data)
1803 {
1804 	struct tdx_module_args args = {
1805 		.rcx = vp->tdvpr_pa,
1806 		.rdx = field,
1807 	};
1808 	u64 ret;
1809 
1810 	ret = seamcall_ret(TDH_VP_RD, &args);
1811 
1812 	/* R8: Content of the field, or 0 in case of error. */
1813 	*data = args.r8;
1814 
1815 	return ret;
1816 }
1817 EXPORT_SYMBOL_FOR_KVM(tdh_vp_rd);
1818 
1819 u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask)
1820 {
1821 	struct tdx_module_args args = {
1822 		.rcx = vp->tdvpr_pa,
1823 		.rdx = field,
1824 		.r8 = data,
1825 		.r9 = mask,
1826 	};
1827 
1828 	return seamcall(TDH_VP_WR, &args);
1829 }
1830 EXPORT_SYMBOL_FOR_KVM(tdh_vp_wr);
1831 
1832 u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid)
1833 {
1834 	struct tdx_module_args args = {
1835 		.rcx = vp->tdvpr_pa,
1836 		.rdx = initial_rcx,
1837 		.r8 = x2apicid,
1838 	};
1839 
1840 	/* apicid requires version == 1. */
1841 	return seamcall(TDH_VP_INIT | (1ULL << TDX_VERSION_SHIFT), &args);
1842 }
1843 EXPORT_SYMBOL_FOR_KVM(tdh_vp_init);
1844 
1845 /*
1846  * TDX ABI defines output operands as PT, OWNER and SIZE. These are TDX defined fomats.
1847  * So despite the names, they must be interpted specially as described by the spec. Return
1848  * them only for error reporting purposes.
1849  */
1850 u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size)
1851 {
1852 	struct tdx_module_args args = {
1853 		.rcx = page_to_phys(page),
1854 	};
1855 	u64 ret;
1856 
1857 	ret = seamcall_ret(TDH_PHYMEM_PAGE_RECLAIM, &args);
1858 
1859 	*tdx_pt = args.rcx;
1860 	*tdx_owner = args.rdx;
1861 	*tdx_size = args.r8;
1862 
1863 	return ret;
1864 }
1865 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_reclaim);
1866 
1867 u64 tdh_mem_track(struct tdx_td *td)
1868 {
1869 	struct tdx_module_args args = {
1870 		.rcx = tdx_tdr_pa(td),
1871 	};
1872 
1873 	return seamcall(TDH_MEM_TRACK, &args);
1874 }
1875 EXPORT_SYMBOL_FOR_KVM(tdh_mem_track);
1876 
1877 u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2)
1878 {
1879 	struct tdx_module_args args = {
1880 		.rcx = gpa | level,
1881 		.rdx = tdx_tdr_pa(td),
1882 	};
1883 	u64 ret;
1884 
1885 	ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args);
1886 
1887 	*ext_err1 = args.rcx;
1888 	*ext_err2 = args.rdx;
1889 
1890 	return ret;
1891 }
1892 EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_remove);
1893 
1894 u64 tdh_phymem_cache_wb(bool resume)
1895 {
1896 	struct tdx_module_args args = {
1897 		.rcx = resume ? 1 : 0,
1898 	};
1899 
1900 	return seamcall(TDH_PHYMEM_CACHE_WB, &args);
1901 }
1902 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_cache_wb);
1903 
1904 u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td)
1905 {
1906 	struct tdx_module_args args = {};
1907 
1908 	args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page);
1909 
1910 	return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
1911 }
1912 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_tdr);
1913 
1914 u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
1915 {
1916 	struct tdx_module_args args = {};
1917 
1918 	args.rcx = mk_keyed_paddr(hkid, page);
1919 
1920 	return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
1921 }
1922 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid);
1923 
1924 #ifdef CONFIG_KEXEC_CORE
1925 void tdx_cpu_flush_cache_for_kexec(void)
1926 {
1927 	lockdep_assert_preemption_disabled();
1928 
1929 	if (!this_cpu_read(cache_state_incoherent))
1930 		return;
1931 
1932 	/*
1933 	 * Private memory cachelines need to be clean at the time of
1934 	 * kexec.  Write them back now, as the caller promises that
1935 	 * there should be no more SEAMCALLs on this CPU.
1936 	 */
1937 	wbinvd();
1938 	this_cpu_write(cache_state_incoherent, false);
1939 }
1940 EXPORT_SYMBOL_FOR_KVM(tdx_cpu_flush_cache_for_kexec);
1941 #endif
1942