1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(c) 2023 Intel Corporation. 4 * 5 * Intel Trusted Domain Extensions (TDX) support 6 */ 7 8 #define pr_fmt(fmt) "virt/tdx: " fmt 9 10 #include <linux/types.h> 11 #include <linux/cache.h> 12 #include <linux/init.h> 13 #include <linux/errno.h> 14 #include <linux/printk.h> 15 #include <linux/cpu.h> 16 #include <linux/spinlock.h> 17 #include <linux/percpu-defs.h> 18 #include <linux/mutex.h> 19 #include <linux/list.h> 20 #include <linux/memblock.h> 21 #include <linux/memory.h> 22 #include <linux/minmax.h> 23 #include <linux/sizes.h> 24 #include <linux/pfn.h> 25 #include <linux/align.h> 26 #include <linux/sort.h> 27 #include <linux/log2.h> 28 #include <linux/acpi.h> 29 #include <linux/suspend.h> 30 #include <asm/page.h> 31 #include <asm/special_insns.h> 32 #include <asm/msr-index.h> 33 #include <asm/msr.h> 34 #include <asm/cpufeature.h> 35 #include <asm/tdx.h> 36 #include <asm/cpu_device_id.h> 37 #include <asm/processor.h> 38 #include <asm/mce.h> 39 #include "tdx.h" 40 41 static u32 tdx_global_keyid __ro_after_init; 42 static u32 tdx_guest_keyid_start __ro_after_init; 43 static u32 tdx_nr_guest_keyids __ro_after_init; 44 45 static DEFINE_PER_CPU(bool, tdx_lp_initialized); 46 47 static struct tdmr_info_list tdx_tdmr_list; 48 49 static enum tdx_module_status_t tdx_module_status; 50 static DEFINE_MUTEX(tdx_module_lock); 51 52 /* All TDX-usable memory regions. Protected by mem_hotplug_lock. */ 53 static LIST_HEAD(tdx_memlist); 54 55 typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args); 56 57 static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args) 58 { 59 pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err); 60 } 61 62 static inline void seamcall_err_ret(u64 fn, u64 err, 63 struct tdx_module_args *args) 64 { 65 seamcall_err(fn, err, args); 66 pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n", 67 args->rcx, args->rdx, args->r8); 68 pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n", 69 args->r9, args->r10, args->r11); 70 } 71 72 static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, 73 u64 fn, struct tdx_module_args *args) 74 { 75 u64 sret = sc_retry(func, fn, args); 76 77 if (sret == TDX_SUCCESS) 78 return 0; 79 80 if (sret == TDX_SEAMCALL_VMFAILINVALID) 81 return -ENODEV; 82 83 if (sret == TDX_SEAMCALL_GP) 84 return -EOPNOTSUPP; 85 86 if (sret == TDX_SEAMCALL_UD) 87 return -EACCES; 88 89 err_func(fn, sret, args); 90 return -EIO; 91 } 92 93 #define seamcall_prerr(__fn, __args) \ 94 sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args)) 95 96 #define seamcall_prerr_ret(__fn, __args) \ 97 sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args)) 98 99 /* 100 * Do the module global initialization once and return its result. 101 * It can be done on any cpu. It's always called with interrupts 102 * disabled. 103 */ 104 static int try_init_module_global(void) 105 { 106 struct tdx_module_args args = {}; 107 static DEFINE_RAW_SPINLOCK(sysinit_lock); 108 static bool sysinit_done; 109 static int sysinit_ret; 110 111 lockdep_assert_irqs_disabled(); 112 113 raw_spin_lock(&sysinit_lock); 114 115 if (sysinit_done) 116 goto out; 117 118 /* RCX is module attributes and all bits are reserved */ 119 args.rcx = 0; 120 sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args); 121 122 /* 123 * The first SEAMCALL also detects the TDX module, thus 124 * it can fail due to the TDX module is not loaded. 125 * Dump message to let the user know. 126 */ 127 if (sysinit_ret == -ENODEV) 128 pr_err("module not loaded\n"); 129 130 sysinit_done = true; 131 out: 132 raw_spin_unlock(&sysinit_lock); 133 return sysinit_ret; 134 } 135 136 /** 137 * tdx_cpu_enable - Enable TDX on local cpu 138 * 139 * Do one-time TDX module per-cpu initialization SEAMCALL (and TDX module 140 * global initialization SEAMCALL if not done) on local cpu to make this 141 * cpu be ready to run any other SEAMCALLs. 142 * 143 * Always call this function via IPI function calls. 144 * 145 * Return 0 on success, otherwise errors. 146 */ 147 int tdx_cpu_enable(void) 148 { 149 struct tdx_module_args args = {}; 150 int ret; 151 152 if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM)) 153 return -ENODEV; 154 155 lockdep_assert_irqs_disabled(); 156 157 if (__this_cpu_read(tdx_lp_initialized)) 158 return 0; 159 160 /* 161 * The TDX module global initialization is the very first step 162 * to enable TDX. Need to do it first (if hasn't been done) 163 * before the per-cpu initialization. 164 */ 165 ret = try_init_module_global(); 166 if (ret) 167 return ret; 168 169 ret = seamcall_prerr(TDH_SYS_LP_INIT, &args); 170 if (ret) 171 return ret; 172 173 __this_cpu_write(tdx_lp_initialized, true); 174 175 return 0; 176 } 177 EXPORT_SYMBOL_GPL(tdx_cpu_enable); 178 179 /* 180 * Add a memory region as a TDX memory block. The caller must make sure 181 * all memory regions are added in address ascending order and don't 182 * overlap. 183 */ 184 static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn, 185 unsigned long end_pfn, int nid) 186 { 187 struct tdx_memblock *tmb; 188 189 tmb = kmalloc(sizeof(*tmb), GFP_KERNEL); 190 if (!tmb) 191 return -ENOMEM; 192 193 INIT_LIST_HEAD(&tmb->list); 194 tmb->start_pfn = start_pfn; 195 tmb->end_pfn = end_pfn; 196 tmb->nid = nid; 197 198 /* @tmb_list is protected by mem_hotplug_lock */ 199 list_add_tail(&tmb->list, tmb_list); 200 return 0; 201 } 202 203 static void free_tdx_memlist(struct list_head *tmb_list) 204 { 205 /* @tmb_list is protected by mem_hotplug_lock */ 206 while (!list_empty(tmb_list)) { 207 struct tdx_memblock *tmb = list_first_entry(tmb_list, 208 struct tdx_memblock, list); 209 210 list_del(&tmb->list); 211 kfree(tmb); 212 } 213 } 214 215 /* 216 * Ensure that all memblock memory regions are convertible to TDX 217 * memory. Once this has been established, stash the memblock 218 * ranges off in a secondary structure because memblock is modified 219 * in memory hotplug while TDX memory regions are fixed. 220 */ 221 static int build_tdx_memlist(struct list_head *tmb_list) 222 { 223 unsigned long start_pfn, end_pfn; 224 int i, nid, ret; 225 226 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 227 /* 228 * The first 1MB is not reported as TDX convertible memory. 229 * Although the first 1MB is always reserved and won't end up 230 * to the page allocator, it is still in memblock's memory 231 * regions. Skip them manually to exclude them as TDX memory. 232 */ 233 start_pfn = max(start_pfn, PHYS_PFN(SZ_1M)); 234 if (start_pfn >= end_pfn) 235 continue; 236 237 /* 238 * Add the memory regions as TDX memory. The regions in 239 * memblock has already guaranteed they are in address 240 * ascending order and don't overlap. 241 */ 242 ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn, nid); 243 if (ret) 244 goto err; 245 } 246 247 return 0; 248 err: 249 free_tdx_memlist(tmb_list); 250 return ret; 251 } 252 253 static int read_sys_metadata_field(u64 field_id, u64 *data) 254 { 255 struct tdx_module_args args = {}; 256 int ret; 257 258 /* 259 * TDH.SYS.RD -- reads one global metadata field 260 * - RDX (in): the field to read 261 * - R8 (out): the field data 262 */ 263 args.rdx = field_id; 264 ret = seamcall_prerr_ret(TDH_SYS_RD, &args); 265 if (ret) 266 return ret; 267 268 *data = args.r8; 269 270 return 0; 271 } 272 273 #include "tdx_global_metadata.c" 274 275 static int check_features(struct tdx_sys_info *sysinfo) 276 { 277 u64 tdx_features0 = sysinfo->features.tdx_features0; 278 279 if (!(tdx_features0 & TDX_FEATURES0_NO_RBP_MOD)) { 280 pr_err("frame pointer (RBP) clobber bug present, upgrade TDX module\n"); 281 return -EINVAL; 282 } 283 284 return 0; 285 } 286 287 /* Calculate the actual TDMR size */ 288 static int tdmr_size_single(u16 max_reserved_per_tdmr) 289 { 290 int tdmr_sz; 291 292 /* 293 * The actual size of TDMR depends on the maximum 294 * number of reserved areas. 295 */ 296 tdmr_sz = sizeof(struct tdmr_info); 297 tdmr_sz += sizeof(struct tdmr_reserved_area) * max_reserved_per_tdmr; 298 299 return ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT); 300 } 301 302 static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list, 303 struct tdx_sys_info_tdmr *sysinfo_tdmr) 304 { 305 size_t tdmr_sz, tdmr_array_sz; 306 void *tdmr_array; 307 308 tdmr_sz = tdmr_size_single(sysinfo_tdmr->max_reserved_per_tdmr); 309 tdmr_array_sz = tdmr_sz * sysinfo_tdmr->max_tdmrs; 310 311 /* 312 * To keep things simple, allocate all TDMRs together. 313 * The buffer needs to be physically contiguous to make 314 * sure each TDMR is physically contiguous. 315 */ 316 tdmr_array = alloc_pages_exact(tdmr_array_sz, 317 GFP_KERNEL | __GFP_ZERO); 318 if (!tdmr_array) 319 return -ENOMEM; 320 321 tdmr_list->tdmrs = tdmr_array; 322 323 /* 324 * Keep the size of TDMR to find the target TDMR 325 * at a given index in the TDMR list. 326 */ 327 tdmr_list->tdmr_sz = tdmr_sz; 328 tdmr_list->max_tdmrs = sysinfo_tdmr->max_tdmrs; 329 tdmr_list->nr_consumed_tdmrs = 0; 330 331 return 0; 332 } 333 334 static void free_tdmr_list(struct tdmr_info_list *tdmr_list) 335 { 336 free_pages_exact(tdmr_list->tdmrs, 337 tdmr_list->max_tdmrs * tdmr_list->tdmr_sz); 338 } 339 340 /* Get the TDMR from the list at the given index. */ 341 static struct tdmr_info *tdmr_entry(struct tdmr_info_list *tdmr_list, 342 int idx) 343 { 344 int tdmr_info_offset = tdmr_list->tdmr_sz * idx; 345 346 return (void *)tdmr_list->tdmrs + tdmr_info_offset; 347 } 348 349 #define TDMR_ALIGNMENT SZ_1G 350 #define TDMR_ALIGN_DOWN(_addr) ALIGN_DOWN((_addr), TDMR_ALIGNMENT) 351 #define TDMR_ALIGN_UP(_addr) ALIGN((_addr), TDMR_ALIGNMENT) 352 353 static inline u64 tdmr_end(struct tdmr_info *tdmr) 354 { 355 return tdmr->base + tdmr->size; 356 } 357 358 /* 359 * Take the memory referenced in @tmb_list and populate the 360 * preallocated @tdmr_list, following all the special alignment 361 * and size rules for TDMR. 362 */ 363 static int fill_out_tdmrs(struct list_head *tmb_list, 364 struct tdmr_info_list *tdmr_list) 365 { 366 struct tdx_memblock *tmb; 367 int tdmr_idx = 0; 368 369 /* 370 * Loop over TDX memory regions and fill out TDMRs to cover them. 371 * To keep it simple, always try to use one TDMR to cover one 372 * memory region. 373 * 374 * In practice TDX supports at least 64 TDMRs. A 2-socket system 375 * typically only consumes less than 10 of those. This code is 376 * dumb and simple and may use more TMDRs than is strictly 377 * required. 378 */ 379 list_for_each_entry(tmb, tmb_list, list) { 380 struct tdmr_info *tdmr = tdmr_entry(tdmr_list, tdmr_idx); 381 u64 start, end; 382 383 start = TDMR_ALIGN_DOWN(PFN_PHYS(tmb->start_pfn)); 384 end = TDMR_ALIGN_UP(PFN_PHYS(tmb->end_pfn)); 385 386 /* 387 * A valid size indicates the current TDMR has already 388 * been filled out to cover the previous memory region(s). 389 */ 390 if (tdmr->size) { 391 /* 392 * Loop to the next if the current memory region 393 * has already been fully covered. 394 */ 395 if (end <= tdmr_end(tdmr)) 396 continue; 397 398 /* Otherwise, skip the already covered part. */ 399 if (start < tdmr_end(tdmr)) 400 start = tdmr_end(tdmr); 401 402 /* 403 * Create a new TDMR to cover the current memory 404 * region, or the remaining part of it. 405 */ 406 tdmr_idx++; 407 if (tdmr_idx >= tdmr_list->max_tdmrs) { 408 pr_warn("initialization failed: TDMRs exhausted.\n"); 409 return -ENOSPC; 410 } 411 412 tdmr = tdmr_entry(tdmr_list, tdmr_idx); 413 } 414 415 tdmr->base = start; 416 tdmr->size = end - start; 417 } 418 419 /* @tdmr_idx is always the index of the last valid TDMR. */ 420 tdmr_list->nr_consumed_tdmrs = tdmr_idx + 1; 421 422 /* 423 * Warn early that kernel is about to run out of TDMRs. 424 * 425 * This is an indication that TDMR allocation has to be 426 * reworked to be smarter to not run into an issue. 427 */ 428 if (tdmr_list->max_tdmrs - tdmr_list->nr_consumed_tdmrs < TDMR_NR_WARN) 429 pr_warn("consumed TDMRs reaching limit: %d used out of %d\n", 430 tdmr_list->nr_consumed_tdmrs, 431 tdmr_list->max_tdmrs); 432 433 return 0; 434 } 435 436 /* 437 * Calculate PAMT size given a TDMR and a page size. The returned 438 * PAMT size is always aligned up to 4K page boundary. 439 */ 440 static unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz, 441 u16 pamt_entry_size) 442 { 443 unsigned long pamt_sz, nr_pamt_entries; 444 445 switch (pgsz) { 446 case TDX_PS_4K: 447 nr_pamt_entries = tdmr->size >> PAGE_SHIFT; 448 break; 449 case TDX_PS_2M: 450 nr_pamt_entries = tdmr->size >> PMD_SHIFT; 451 break; 452 case TDX_PS_1G: 453 nr_pamt_entries = tdmr->size >> PUD_SHIFT; 454 break; 455 default: 456 WARN_ON_ONCE(1); 457 return 0; 458 } 459 460 pamt_sz = nr_pamt_entries * pamt_entry_size; 461 /* TDX requires PAMT size must be 4K aligned */ 462 pamt_sz = ALIGN(pamt_sz, PAGE_SIZE); 463 464 return pamt_sz; 465 } 466 467 /* 468 * Locate a NUMA node which should hold the allocation of the @tdmr 469 * PAMT. This node will have some memory covered by the TDMR. The 470 * relative amount of memory covered is not considered. 471 */ 472 static int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list) 473 { 474 struct tdx_memblock *tmb; 475 476 /* 477 * A TDMR must cover at least part of one TMB. That TMB will end 478 * after the TDMR begins. But, that TMB may have started before 479 * the TDMR. Find the next 'tmb' that _ends_ after this TDMR 480 * begins. Ignore 'tmb' start addresses. They are irrelevant. 481 */ 482 list_for_each_entry(tmb, tmb_list, list) { 483 if (tmb->end_pfn > PHYS_PFN(tdmr->base)) 484 return tmb->nid; 485 } 486 487 /* 488 * Fall back to allocating the TDMR's metadata from node 0 when 489 * no TDX memory block can be found. This should never happen 490 * since TDMRs originate from TDX memory blocks. 491 */ 492 pr_warn("TDMR [0x%llx, 0x%llx): unable to find local NUMA node for PAMT allocation, fallback to use node 0.\n", 493 tdmr->base, tdmr_end(tdmr)); 494 return 0; 495 } 496 497 /* 498 * Allocate PAMTs from the local NUMA node of some memory in @tmb_list 499 * within @tdmr, and set up PAMTs for @tdmr. 500 */ 501 static int tdmr_set_up_pamt(struct tdmr_info *tdmr, 502 struct list_head *tmb_list, 503 u16 pamt_entry_size[]) 504 { 505 unsigned long pamt_base[TDX_PS_NR]; 506 unsigned long pamt_size[TDX_PS_NR]; 507 unsigned long tdmr_pamt_base; 508 unsigned long tdmr_pamt_size; 509 struct page *pamt; 510 int pgsz, nid; 511 512 nid = tdmr_get_nid(tdmr, tmb_list); 513 514 /* 515 * Calculate the PAMT size for each TDX supported page size 516 * and the total PAMT size. 517 */ 518 tdmr_pamt_size = 0; 519 for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) { 520 pamt_size[pgsz] = tdmr_get_pamt_sz(tdmr, pgsz, 521 pamt_entry_size[pgsz]); 522 tdmr_pamt_size += pamt_size[pgsz]; 523 } 524 525 /* 526 * Allocate one chunk of physically contiguous memory for all 527 * PAMTs. This helps minimize the PAMT's use of reserved areas 528 * in overlapped TDMRs. 529 */ 530 pamt = alloc_contig_pages(tdmr_pamt_size >> PAGE_SHIFT, GFP_KERNEL, 531 nid, &node_online_map); 532 if (!pamt) 533 return -ENOMEM; 534 535 /* 536 * Break the contiguous allocation back up into the 537 * individual PAMTs for each page size. 538 */ 539 tdmr_pamt_base = page_to_pfn(pamt) << PAGE_SHIFT; 540 for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) { 541 pamt_base[pgsz] = tdmr_pamt_base; 542 tdmr_pamt_base += pamt_size[pgsz]; 543 } 544 545 tdmr->pamt_4k_base = pamt_base[TDX_PS_4K]; 546 tdmr->pamt_4k_size = pamt_size[TDX_PS_4K]; 547 tdmr->pamt_2m_base = pamt_base[TDX_PS_2M]; 548 tdmr->pamt_2m_size = pamt_size[TDX_PS_2M]; 549 tdmr->pamt_1g_base = pamt_base[TDX_PS_1G]; 550 tdmr->pamt_1g_size = pamt_size[TDX_PS_1G]; 551 552 return 0; 553 } 554 555 static void tdmr_get_pamt(struct tdmr_info *tdmr, unsigned long *pamt_base, 556 unsigned long *pamt_size) 557 { 558 unsigned long pamt_bs, pamt_sz; 559 560 /* 561 * The PAMT was allocated in one contiguous unit. The 4K PAMT 562 * should always point to the beginning of that allocation. 563 */ 564 pamt_bs = tdmr->pamt_4k_base; 565 pamt_sz = tdmr->pamt_4k_size + tdmr->pamt_2m_size + tdmr->pamt_1g_size; 566 567 WARN_ON_ONCE((pamt_bs & ~PAGE_MASK) || (pamt_sz & ~PAGE_MASK)); 568 569 *pamt_base = pamt_bs; 570 *pamt_size = pamt_sz; 571 } 572 573 static void tdmr_do_pamt_func(struct tdmr_info *tdmr, 574 void (*pamt_func)(unsigned long base, unsigned long size)) 575 { 576 unsigned long pamt_base, pamt_size; 577 578 tdmr_get_pamt(tdmr, &pamt_base, &pamt_size); 579 580 /* Do nothing if PAMT hasn't been allocated for this TDMR */ 581 if (!pamt_size) 582 return; 583 584 if (WARN_ON_ONCE(!pamt_base)) 585 return; 586 587 pamt_func(pamt_base, pamt_size); 588 } 589 590 static void free_pamt(unsigned long pamt_base, unsigned long pamt_size) 591 { 592 free_contig_range(pamt_base >> PAGE_SHIFT, pamt_size >> PAGE_SHIFT); 593 } 594 595 static void tdmr_free_pamt(struct tdmr_info *tdmr) 596 { 597 tdmr_do_pamt_func(tdmr, free_pamt); 598 } 599 600 static void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list) 601 { 602 int i; 603 604 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) 605 tdmr_free_pamt(tdmr_entry(tdmr_list, i)); 606 } 607 608 /* Allocate and set up PAMTs for all TDMRs */ 609 static int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list, 610 struct list_head *tmb_list, 611 u16 pamt_entry_size[]) 612 { 613 int i, ret = 0; 614 615 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { 616 ret = tdmr_set_up_pamt(tdmr_entry(tdmr_list, i), tmb_list, 617 pamt_entry_size); 618 if (ret) 619 goto err; 620 } 621 622 return 0; 623 err: 624 tdmrs_free_pamt_all(tdmr_list); 625 return ret; 626 } 627 628 /* 629 * Convert TDX private pages back to normal by using MOVDIR64B to 630 * clear these pages. Note this function doesn't flush cache of 631 * these TDX private pages. The caller should make sure of that. 632 */ 633 static void reset_tdx_pages(unsigned long base, unsigned long size) 634 { 635 const void *zero_page = (const void *)page_address(ZERO_PAGE(0)); 636 unsigned long phys, end; 637 638 end = base + size; 639 for (phys = base; phys < end; phys += 64) 640 movdir64b(__va(phys), zero_page); 641 642 /* 643 * MOVDIR64B uses WC protocol. Use memory barrier to 644 * make sure any later user of these pages sees the 645 * updated data. 646 */ 647 mb(); 648 } 649 650 static void tdmr_reset_pamt(struct tdmr_info *tdmr) 651 { 652 tdmr_do_pamt_func(tdmr, reset_tdx_pages); 653 } 654 655 static void tdmrs_reset_pamt_all(struct tdmr_info_list *tdmr_list) 656 { 657 int i; 658 659 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) 660 tdmr_reset_pamt(tdmr_entry(tdmr_list, i)); 661 } 662 663 static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list) 664 { 665 unsigned long pamt_size = 0; 666 int i; 667 668 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { 669 unsigned long base, size; 670 671 tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size); 672 pamt_size += size; 673 } 674 675 return pamt_size / 1024; 676 } 677 678 static int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx, u64 addr, 679 u64 size, u16 max_reserved_per_tdmr) 680 { 681 struct tdmr_reserved_area *rsvd_areas = tdmr->reserved_areas; 682 int idx = *p_idx; 683 684 /* Reserved area must be 4K aligned in offset and size */ 685 if (WARN_ON(addr & ~PAGE_MASK || size & ~PAGE_MASK)) 686 return -EINVAL; 687 688 if (idx >= max_reserved_per_tdmr) { 689 pr_warn("initialization failed: TDMR [0x%llx, 0x%llx): reserved areas exhausted.\n", 690 tdmr->base, tdmr_end(tdmr)); 691 return -ENOSPC; 692 } 693 694 /* 695 * Consume one reserved area per call. Make no effort to 696 * optimize or reduce the number of reserved areas which are 697 * consumed by contiguous reserved areas, for instance. 698 */ 699 rsvd_areas[idx].offset = addr - tdmr->base; 700 rsvd_areas[idx].size = size; 701 702 *p_idx = idx + 1; 703 704 return 0; 705 } 706 707 /* 708 * Go through @tmb_list to find holes between memory areas. If any of 709 * those holes fall within @tdmr, set up a TDMR reserved area to cover 710 * the hole. 711 */ 712 static int tdmr_populate_rsvd_holes(struct list_head *tmb_list, 713 struct tdmr_info *tdmr, 714 int *rsvd_idx, 715 u16 max_reserved_per_tdmr) 716 { 717 struct tdx_memblock *tmb; 718 u64 prev_end; 719 int ret; 720 721 /* 722 * Start looking for reserved blocks at the 723 * beginning of the TDMR. 724 */ 725 prev_end = tdmr->base; 726 list_for_each_entry(tmb, tmb_list, list) { 727 u64 start, end; 728 729 start = PFN_PHYS(tmb->start_pfn); 730 end = PFN_PHYS(tmb->end_pfn); 731 732 /* Break if this region is after the TDMR */ 733 if (start >= tdmr_end(tdmr)) 734 break; 735 736 /* Exclude regions before this TDMR */ 737 if (end < tdmr->base) 738 continue; 739 740 /* 741 * Skip over memory areas that 742 * have already been dealt with. 743 */ 744 if (start <= prev_end) { 745 prev_end = end; 746 continue; 747 } 748 749 /* Add the hole before this region */ 750 ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end, 751 start - prev_end, 752 max_reserved_per_tdmr); 753 if (ret) 754 return ret; 755 756 prev_end = end; 757 } 758 759 /* Add the hole after the last region if it exists. */ 760 if (prev_end < tdmr_end(tdmr)) { 761 ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end, 762 tdmr_end(tdmr) - prev_end, 763 max_reserved_per_tdmr); 764 if (ret) 765 return ret; 766 } 767 768 return 0; 769 } 770 771 /* 772 * Go through @tdmr_list to find all PAMTs. If any of those PAMTs 773 * overlaps with @tdmr, set up a TDMR reserved area to cover the 774 * overlapping part. 775 */ 776 static int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list, 777 struct tdmr_info *tdmr, 778 int *rsvd_idx, 779 u16 max_reserved_per_tdmr) 780 { 781 int i, ret; 782 783 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { 784 struct tdmr_info *tmp = tdmr_entry(tdmr_list, i); 785 unsigned long pamt_base, pamt_size, pamt_end; 786 787 tdmr_get_pamt(tmp, &pamt_base, &pamt_size); 788 /* Each TDMR must already have PAMT allocated */ 789 WARN_ON_ONCE(!pamt_size || !pamt_base); 790 791 pamt_end = pamt_base + pamt_size; 792 /* Skip PAMTs outside of the given TDMR */ 793 if ((pamt_end <= tdmr->base) || 794 (pamt_base >= tdmr_end(tdmr))) 795 continue; 796 797 /* Only mark the part within the TDMR as reserved */ 798 if (pamt_base < tdmr->base) 799 pamt_base = tdmr->base; 800 if (pamt_end > tdmr_end(tdmr)) 801 pamt_end = tdmr_end(tdmr); 802 803 ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, pamt_base, 804 pamt_end - pamt_base, 805 max_reserved_per_tdmr); 806 if (ret) 807 return ret; 808 } 809 810 return 0; 811 } 812 813 /* Compare function called by sort() for TDMR reserved areas */ 814 static int rsvd_area_cmp_func(const void *a, const void *b) 815 { 816 struct tdmr_reserved_area *r1 = (struct tdmr_reserved_area *)a; 817 struct tdmr_reserved_area *r2 = (struct tdmr_reserved_area *)b; 818 819 if (r1->offset + r1->size <= r2->offset) 820 return -1; 821 if (r1->offset >= r2->offset + r2->size) 822 return 1; 823 824 /* Reserved areas cannot overlap. The caller must guarantee. */ 825 WARN_ON_ONCE(1); 826 return -1; 827 } 828 829 /* 830 * Populate reserved areas for the given @tdmr, including memory holes 831 * (via @tmb_list) and PAMTs (via @tdmr_list). 832 */ 833 static int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr, 834 struct list_head *tmb_list, 835 struct tdmr_info_list *tdmr_list, 836 u16 max_reserved_per_tdmr) 837 { 838 int ret, rsvd_idx = 0; 839 840 ret = tdmr_populate_rsvd_holes(tmb_list, tdmr, &rsvd_idx, 841 max_reserved_per_tdmr); 842 if (ret) 843 return ret; 844 845 ret = tdmr_populate_rsvd_pamts(tdmr_list, tdmr, &rsvd_idx, 846 max_reserved_per_tdmr); 847 if (ret) 848 return ret; 849 850 /* TDX requires reserved areas listed in address ascending order */ 851 sort(tdmr->reserved_areas, rsvd_idx, sizeof(struct tdmr_reserved_area), 852 rsvd_area_cmp_func, NULL); 853 854 return 0; 855 } 856 857 /* 858 * Populate reserved areas for all TDMRs in @tdmr_list, including memory 859 * holes (via @tmb_list) and PAMTs. 860 */ 861 static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list, 862 struct list_head *tmb_list, 863 u16 max_reserved_per_tdmr) 864 { 865 int i; 866 867 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { 868 int ret; 869 870 ret = tdmr_populate_rsvd_areas(tdmr_entry(tdmr_list, i), 871 tmb_list, tdmr_list, max_reserved_per_tdmr); 872 if (ret) 873 return ret; 874 } 875 876 return 0; 877 } 878 879 /* 880 * Construct a list of TDMRs on the preallocated space in @tdmr_list 881 * to cover all TDX memory regions in @tmb_list based on the TDX module 882 * TDMR global information in @sysinfo_tdmr. 883 */ 884 static int construct_tdmrs(struct list_head *tmb_list, 885 struct tdmr_info_list *tdmr_list, 886 struct tdx_sys_info_tdmr *sysinfo_tdmr) 887 { 888 u16 pamt_entry_size[TDX_PS_NR] = { 889 sysinfo_tdmr->pamt_4k_entry_size, 890 sysinfo_tdmr->pamt_2m_entry_size, 891 sysinfo_tdmr->pamt_1g_entry_size, 892 }; 893 int ret; 894 895 ret = fill_out_tdmrs(tmb_list, tdmr_list); 896 if (ret) 897 return ret; 898 899 ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, pamt_entry_size); 900 if (ret) 901 return ret; 902 903 ret = tdmrs_populate_rsvd_areas_all(tdmr_list, tmb_list, 904 sysinfo_tdmr->max_reserved_per_tdmr); 905 if (ret) 906 tdmrs_free_pamt_all(tdmr_list); 907 908 /* 909 * The tdmr_info_list is read-only from here on out. 910 * Ensure that these writes are seen by other CPUs. 911 * Pairs with a smp_rmb() in is_pamt_page(). 912 */ 913 smp_wmb(); 914 915 return ret; 916 } 917 918 static int config_tdx_module(struct tdmr_info_list *tdmr_list, u64 global_keyid) 919 { 920 struct tdx_module_args args = {}; 921 u64 *tdmr_pa_array; 922 size_t array_sz; 923 int i, ret; 924 925 /* 926 * TDMRs are passed to the TDX module via an array of physical 927 * addresses of each TDMR. The array itself also has certain 928 * alignment requirement. 929 */ 930 array_sz = tdmr_list->nr_consumed_tdmrs * sizeof(u64); 931 array_sz = roundup_pow_of_two(array_sz); 932 if (array_sz < TDMR_INFO_PA_ARRAY_ALIGNMENT) 933 array_sz = TDMR_INFO_PA_ARRAY_ALIGNMENT; 934 935 tdmr_pa_array = kzalloc(array_sz, GFP_KERNEL); 936 if (!tdmr_pa_array) 937 return -ENOMEM; 938 939 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) 940 tdmr_pa_array[i] = __pa(tdmr_entry(tdmr_list, i)); 941 942 args.rcx = __pa(tdmr_pa_array); 943 args.rdx = tdmr_list->nr_consumed_tdmrs; 944 args.r8 = global_keyid; 945 ret = seamcall_prerr(TDH_SYS_CONFIG, &args); 946 947 /* Free the array as it is not required anymore. */ 948 kfree(tdmr_pa_array); 949 950 return ret; 951 } 952 953 static int do_global_key_config(void *unused) 954 { 955 struct tdx_module_args args = {}; 956 957 return seamcall_prerr(TDH_SYS_KEY_CONFIG, &args); 958 } 959 960 /* 961 * Attempt to configure the global KeyID on all physical packages. 962 * 963 * This requires running code on at least one CPU in each package. 964 * TDMR initialization) will fail will fail if any package in the 965 * system has no online CPUs. 966 * 967 * This code takes no affirmative steps to online CPUs. Callers (aka. 968 * KVM) can ensure success by ensuring sufficient CPUs are online and 969 * can run SEAMCALLs. 970 */ 971 static int config_global_keyid(void) 972 { 973 cpumask_var_t packages; 974 int cpu, ret = -EINVAL; 975 976 if (!zalloc_cpumask_var(&packages, GFP_KERNEL)) 977 return -ENOMEM; 978 979 /* 980 * Hardware doesn't guarantee cache coherency across different 981 * KeyIDs. The kernel needs to flush PAMT's dirty cachelines 982 * (associated with KeyID 0) before the TDX module can use the 983 * global KeyID to access the PAMT. Given PAMTs are potentially 984 * large (~1/256th of system RAM), just use WBINVD. 985 */ 986 wbinvd_on_all_cpus(); 987 988 for_each_online_cpu(cpu) { 989 /* 990 * The key configuration only needs to be done once per 991 * package and will return an error if configured more 992 * than once. Avoid doing it multiple times per package. 993 */ 994 if (cpumask_test_and_set_cpu(topology_physical_package_id(cpu), 995 packages)) 996 continue; 997 998 /* 999 * TDH.SYS.KEY.CONFIG cannot run concurrently on 1000 * different cpus. Do it one by one. 1001 */ 1002 ret = smp_call_on_cpu(cpu, do_global_key_config, NULL, true); 1003 if (ret) 1004 break; 1005 } 1006 1007 free_cpumask_var(packages); 1008 return ret; 1009 } 1010 1011 static int init_tdmr(struct tdmr_info *tdmr) 1012 { 1013 u64 next; 1014 1015 /* 1016 * Initializing a TDMR can be time consuming. To avoid long 1017 * SEAMCALLs, the TDX module may only initialize a part of the 1018 * TDMR in each call. 1019 */ 1020 do { 1021 struct tdx_module_args args = { 1022 .rcx = tdmr->base, 1023 }; 1024 int ret; 1025 1026 ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args); 1027 if (ret) 1028 return ret; 1029 /* 1030 * RDX contains 'next-to-initialize' address if 1031 * TDH.SYS.TDMR.INIT did not fully complete and 1032 * should be retried. 1033 */ 1034 next = args.rdx; 1035 cond_resched(); 1036 /* Keep making SEAMCALLs until the TDMR is done */ 1037 } while (next < tdmr->base + tdmr->size); 1038 1039 return 0; 1040 } 1041 1042 static int init_tdmrs(struct tdmr_info_list *tdmr_list) 1043 { 1044 int i; 1045 1046 /* 1047 * This operation is costly. It can be parallelized, 1048 * but keep it simple for now. 1049 */ 1050 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { 1051 int ret; 1052 1053 ret = init_tdmr(tdmr_entry(tdmr_list, i)); 1054 if (ret) 1055 return ret; 1056 } 1057 1058 return 0; 1059 } 1060 1061 static int init_tdx_module(void) 1062 { 1063 struct tdx_sys_info sysinfo; 1064 int ret; 1065 1066 ret = get_tdx_sys_info(&sysinfo); 1067 if (ret) 1068 return ret; 1069 1070 /* Check whether the kernel can support this module */ 1071 ret = check_features(&sysinfo); 1072 if (ret) 1073 return ret; 1074 1075 /* 1076 * To keep things simple, assume that all TDX-protected memory 1077 * will come from the page allocator. Make sure all pages in the 1078 * page allocator are TDX-usable memory. 1079 * 1080 * Build the list of "TDX-usable" memory regions which cover all 1081 * pages in the page allocator to guarantee that. Do it while 1082 * holding mem_hotplug_lock read-lock as the memory hotplug code 1083 * path reads the @tdx_memlist to reject any new memory. 1084 */ 1085 get_online_mems(); 1086 1087 ret = build_tdx_memlist(&tdx_memlist); 1088 if (ret) 1089 goto out_put_tdxmem; 1090 1091 /* Allocate enough space for constructing TDMRs */ 1092 ret = alloc_tdmr_list(&tdx_tdmr_list, &sysinfo.tdmr); 1093 if (ret) 1094 goto err_free_tdxmem; 1095 1096 /* Cover all TDX-usable memory regions in TDMRs */ 1097 ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &sysinfo.tdmr); 1098 if (ret) 1099 goto err_free_tdmrs; 1100 1101 /* Pass the TDMRs and the global KeyID to the TDX module */ 1102 ret = config_tdx_module(&tdx_tdmr_list, tdx_global_keyid); 1103 if (ret) 1104 goto err_free_pamts; 1105 1106 /* Config the key of global KeyID on all packages */ 1107 ret = config_global_keyid(); 1108 if (ret) 1109 goto err_reset_pamts; 1110 1111 /* Initialize TDMRs to complete the TDX module initialization */ 1112 ret = init_tdmrs(&tdx_tdmr_list); 1113 if (ret) 1114 goto err_reset_pamts; 1115 1116 pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list)); 1117 1118 out_put_tdxmem: 1119 /* 1120 * @tdx_memlist is written here and read at memory hotplug time. 1121 * Lock out memory hotplug code while building it. 1122 */ 1123 put_online_mems(); 1124 return ret; 1125 1126 err_reset_pamts: 1127 /* 1128 * Part of PAMTs may already have been initialized by the 1129 * TDX module. Flush cache before returning PAMTs back 1130 * to the kernel. 1131 */ 1132 wbinvd_on_all_cpus(); 1133 /* 1134 * According to the TDX hardware spec, if the platform 1135 * doesn't have the "partial write machine check" 1136 * erratum, any kernel read/write will never cause #MC 1137 * in kernel space, thus it's OK to not convert PAMTs 1138 * back to normal. But do the conversion anyway here 1139 * as suggested by the TDX spec. 1140 */ 1141 tdmrs_reset_pamt_all(&tdx_tdmr_list); 1142 err_free_pamts: 1143 tdmrs_free_pamt_all(&tdx_tdmr_list); 1144 err_free_tdmrs: 1145 free_tdmr_list(&tdx_tdmr_list); 1146 err_free_tdxmem: 1147 free_tdx_memlist(&tdx_memlist); 1148 goto out_put_tdxmem; 1149 } 1150 1151 static int __tdx_enable(void) 1152 { 1153 int ret; 1154 1155 ret = init_tdx_module(); 1156 if (ret) { 1157 pr_err("module initialization failed (%d)\n", ret); 1158 tdx_module_status = TDX_MODULE_ERROR; 1159 return ret; 1160 } 1161 1162 pr_info("module initialized\n"); 1163 tdx_module_status = TDX_MODULE_INITIALIZED; 1164 1165 return 0; 1166 } 1167 1168 /** 1169 * tdx_enable - Enable TDX module to make it ready to run TDX guests 1170 * 1171 * This function assumes the caller has: 1) held read lock of CPU hotplug 1172 * lock to prevent any new cpu from becoming online; 2) done both VMXON 1173 * and tdx_cpu_enable() on all online cpus. 1174 * 1175 * This function requires there's at least one online cpu for each CPU 1176 * package to succeed. 1177 * 1178 * This function can be called in parallel by multiple callers. 1179 * 1180 * Return 0 if TDX is enabled successfully, otherwise error. 1181 */ 1182 int tdx_enable(void) 1183 { 1184 int ret; 1185 1186 if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM)) 1187 return -ENODEV; 1188 1189 lockdep_assert_cpus_held(); 1190 1191 mutex_lock(&tdx_module_lock); 1192 1193 switch (tdx_module_status) { 1194 case TDX_MODULE_UNINITIALIZED: 1195 ret = __tdx_enable(); 1196 break; 1197 case TDX_MODULE_INITIALIZED: 1198 /* Already initialized, great, tell the caller. */ 1199 ret = 0; 1200 break; 1201 default: 1202 /* Failed to initialize in the previous attempts */ 1203 ret = -EINVAL; 1204 break; 1205 } 1206 1207 mutex_unlock(&tdx_module_lock); 1208 1209 return ret; 1210 } 1211 EXPORT_SYMBOL_GPL(tdx_enable); 1212 1213 static bool is_pamt_page(unsigned long phys) 1214 { 1215 struct tdmr_info_list *tdmr_list = &tdx_tdmr_list; 1216 int i; 1217 1218 /* Ensure that all remote 'tdmr_list' writes are visible: */ 1219 smp_rmb(); 1220 1221 /* 1222 * The TDX module is no longer returning TDX_SYS_NOT_READY and 1223 * is initialized. The 'tdmr_list' was initialized long ago 1224 * and is now read-only. 1225 */ 1226 for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) { 1227 unsigned long base, size; 1228 1229 tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size); 1230 1231 if (phys >= base && phys < (base + size)) 1232 return true; 1233 } 1234 1235 return false; 1236 } 1237 1238 /* 1239 * Return whether the memory page at the given physical address is TDX 1240 * private memory or not. 1241 * 1242 * This can be imprecise for two known reasons: 1243 * 1. PAMTs are private memory and exist before the TDX module is 1244 * ready and TDH_PHYMEM_PAGE_RDMD works. This is a relatively 1245 * short window that occurs once per boot. 1246 * 2. TDH_PHYMEM_PAGE_RDMD reflects the TDX module's knowledge of the 1247 * page. However, the page can still cause #MC until it has been 1248 * fully converted to shared using 64-byte writes like MOVDIR64B. 1249 * Buggy hosts might still leave #MC-causing memory in place which 1250 * this function can not detect. 1251 */ 1252 static bool paddr_is_tdx_private(unsigned long phys) 1253 { 1254 struct tdx_module_args args = { 1255 .rcx = phys & PAGE_MASK, 1256 }; 1257 u64 sret; 1258 1259 if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM)) 1260 return false; 1261 1262 /* Get page type from the TDX module */ 1263 sret = __seamcall_ret(TDH_PHYMEM_PAGE_RDMD, &args); 1264 1265 /* 1266 * The SEAMCALL will not return success unless there is a 1267 * working, "ready" TDX module. Assume an absence of TDX 1268 * private pages until SEAMCALL is working. 1269 */ 1270 if (sret) 1271 return false; 1272 1273 /* 1274 * SEAMCALL was successful -- read page type (via RCX): 1275 * 1276 * - PT_NDA: Page is not used by the TDX module 1277 * - PT_RSVD: Reserved for Non-TDX use 1278 * - Others: Page is used by the TDX module 1279 * 1280 * Note PAMT pages are marked as PT_RSVD but they are also TDX 1281 * private memory. 1282 */ 1283 switch (args.rcx) { 1284 case PT_NDA: 1285 return false; 1286 case PT_RSVD: 1287 return is_pamt_page(phys); 1288 default: 1289 return true; 1290 } 1291 } 1292 1293 /* 1294 * Some TDX-capable CPUs have an erratum. A write to TDX private 1295 * memory poisons that memory, and a subsequent read of that memory 1296 * triggers #MC. 1297 * 1298 * Help distinguish erratum-triggered #MCs from a normal hardware one. 1299 * Just print additional message to show such #MC may be result of the 1300 * erratum. 1301 */ 1302 const char *tdx_dump_mce_info(struct mce *m) 1303 { 1304 if (!m || !mce_is_memory_error(m) || !mce_usable_address(m)) 1305 return NULL; 1306 1307 if (!paddr_is_tdx_private(m->addr)) 1308 return NULL; 1309 1310 return "TDX private memory error. Possible kernel bug."; 1311 } 1312 1313 static __init int record_keyid_partitioning(u32 *tdx_keyid_start, 1314 u32 *nr_tdx_keyids) 1315 { 1316 u32 _nr_mktme_keyids, _tdx_keyid_start, _nr_tdx_keyids; 1317 int ret; 1318 1319 /* 1320 * IA32_MKTME_KEYID_PARTIONING: 1321 * Bit [31:0]: Number of MKTME KeyIDs. 1322 * Bit [63:32]: Number of TDX private KeyIDs. 1323 */ 1324 ret = rdmsr_safe(MSR_IA32_MKTME_KEYID_PARTITIONING, &_nr_mktme_keyids, 1325 &_nr_tdx_keyids); 1326 if (ret || !_nr_tdx_keyids) 1327 return -EINVAL; 1328 1329 /* TDX KeyIDs start after the last MKTME KeyID. */ 1330 _tdx_keyid_start = _nr_mktme_keyids + 1; 1331 1332 *tdx_keyid_start = _tdx_keyid_start; 1333 *nr_tdx_keyids = _nr_tdx_keyids; 1334 1335 return 0; 1336 } 1337 1338 static bool is_tdx_memory(unsigned long start_pfn, unsigned long end_pfn) 1339 { 1340 struct tdx_memblock *tmb; 1341 1342 /* 1343 * This check assumes that the start_pfn<->end_pfn range does not 1344 * cross multiple @tdx_memlist entries. A single memory online 1345 * event across multiple memblocks (from which @tdx_memlist 1346 * entries are derived at the time of module initialization) is 1347 * not possible. This is because memory offline/online is done 1348 * on granularity of 'struct memory_block', and the hotpluggable 1349 * memory region (one memblock) must be multiple of memory_block. 1350 */ 1351 list_for_each_entry(tmb, &tdx_memlist, list) { 1352 if (start_pfn >= tmb->start_pfn && end_pfn <= tmb->end_pfn) 1353 return true; 1354 } 1355 return false; 1356 } 1357 1358 static int tdx_memory_notifier(struct notifier_block *nb, unsigned long action, 1359 void *v) 1360 { 1361 struct memory_notify *mn = v; 1362 1363 if (action != MEM_GOING_ONLINE) 1364 return NOTIFY_OK; 1365 1366 /* 1367 * Empty list means TDX isn't enabled. Allow any memory 1368 * to go online. 1369 */ 1370 if (list_empty(&tdx_memlist)) 1371 return NOTIFY_OK; 1372 1373 /* 1374 * The TDX memory configuration is static and can not be 1375 * changed. Reject onlining any memory which is outside of 1376 * the static configuration whether it supports TDX or not. 1377 */ 1378 if (is_tdx_memory(mn->start_pfn, mn->start_pfn + mn->nr_pages)) 1379 return NOTIFY_OK; 1380 1381 return NOTIFY_BAD; 1382 } 1383 1384 static struct notifier_block tdx_memory_nb = { 1385 .notifier_call = tdx_memory_notifier, 1386 }; 1387 1388 static void __init check_tdx_erratum(void) 1389 { 1390 /* 1391 * These CPUs have an erratum. A partial write from non-TD 1392 * software (e.g. via MOVNTI variants or UC/WC mapping) to TDX 1393 * private memory poisons that memory, and a subsequent read of 1394 * that memory triggers #MC. 1395 */ 1396 switch (boot_cpu_data.x86_vfm) { 1397 case INTEL_SAPPHIRERAPIDS_X: 1398 case INTEL_EMERALDRAPIDS_X: 1399 setup_force_cpu_bug(X86_BUG_TDX_PW_MCE); 1400 } 1401 } 1402 1403 void __init tdx_init(void) 1404 { 1405 u32 tdx_keyid_start, nr_tdx_keyids; 1406 int err; 1407 1408 err = record_keyid_partitioning(&tdx_keyid_start, &nr_tdx_keyids); 1409 if (err) 1410 return; 1411 1412 pr_info("BIOS enabled: private KeyID range [%u, %u)\n", 1413 tdx_keyid_start, tdx_keyid_start + nr_tdx_keyids); 1414 1415 /* 1416 * The TDX module itself requires one 'global KeyID' to protect 1417 * its metadata. If there's only one TDX KeyID, there won't be 1418 * any left for TDX guests thus there's no point to enable TDX 1419 * at all. 1420 */ 1421 if (nr_tdx_keyids < 2) { 1422 pr_err("initialization failed: too few private KeyIDs available.\n"); 1423 return; 1424 } 1425 1426 /* 1427 * At this point, hibernation_available() indicates whether or 1428 * not hibernation support has been permanently disabled. 1429 */ 1430 if (hibernation_available()) { 1431 pr_err("initialization failed: Hibernation support is enabled\n"); 1432 return; 1433 } 1434 1435 err = register_memory_notifier(&tdx_memory_nb); 1436 if (err) { 1437 pr_err("initialization failed: register_memory_notifier() failed (%d)\n", 1438 err); 1439 return; 1440 } 1441 1442 #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) 1443 pr_info("Disable ACPI S3. Turn off TDX in the BIOS to use ACPI S3.\n"); 1444 acpi_suspend_lowlevel = NULL; 1445 #endif 1446 1447 /* 1448 * Just use the first TDX KeyID as the 'global KeyID' and 1449 * leave the rest for TDX guests. 1450 */ 1451 tdx_global_keyid = tdx_keyid_start; 1452 tdx_guest_keyid_start = tdx_keyid_start + 1; 1453 tdx_nr_guest_keyids = nr_tdx_keyids - 1; 1454 1455 setup_force_cpu_cap(X86_FEATURE_TDX_HOST_PLATFORM); 1456 1457 check_tdx_erratum(); 1458 } 1459