xref: /linux/arch/x86/kvm/mmu/spte.c (revision c98d767b34574be82b74d77d02264a830ae1cadd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * Macros and functions to access KVM PTEs (also known as SPTEs)
6  *
7  * Copyright (C) 2006 Qumranet, Inc.
8  * Copyright 2020 Red Hat, Inc. and/or its affiliates.
9  */
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 
12 #include <linux/kvm_host.h>
13 #include "mmu.h"
14 #include "mmu_internal.h"
15 #include "x86.h"
16 #include "spte.h"
17 
18 #include <asm/cpuid/api.h>
19 #include <asm/e820/api.h>
20 #include <asm/memtype.h>
21 #include <asm/vmx.h>
22 
23 bool __read_mostly enable_mmio_caching = true;
24 static bool __ro_after_init allow_mmio_caching;
25 module_param_named(mmio_caching, enable_mmio_caching, bool, 0444);
26 EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_mmio_caching);
27 
28 bool __read_mostly kvm_ad_enabled;
29 
30 u64 __read_mostly shadow_host_writable_mask;
31 u64 __read_mostly shadow_mmu_writable_mask;
32 u64 __read_mostly shadow_nx_mask;
33 u64 __read_mostly shadow_user_mask;
34 u64 __read_mostly shadow_xs_mask; /* mutual exclusive with nx_mask and user_mask */
35 u64 __read_mostly shadow_xu_mask; /* mutual exclusive with nx_mask and user_mask */
36 u64 __read_mostly shadow_accessed_mask;
37 u64 __read_mostly shadow_dirty_mask;
38 u64 __read_mostly shadow_mmio_value;
39 u64 __read_mostly shadow_mmio_mask;
40 u64 __read_mostly shadow_mmio_access_mask;
41 u64 __read_mostly shadow_present_mask;
42 u64 __read_mostly shadow_me_value;
43 u64 __read_mostly shadow_me_mask;
44 u64 __read_mostly shadow_acc_track_mask;
45 
46 u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
47 u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
48 
49 static u8 __init kvm_get_host_maxphyaddr(void)
50 {
51 	/*
52 	 * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
53 	 * in CPU detection code, but the processor treats those reduced bits as
54 	 * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
55 	 * the physical address bits reported by CPUID, i.e. the raw MAXPHYADDR,
56 	 * when reasoning about CPU behavior with respect to MAXPHYADDR.
57 	 */
58 	if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
59 		return cpuid_eax(0x80000008) & 0xff;
60 
61 	/*
62 	 * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
63 	 * custom CPUID.  Proceed with whatever the kernel found since these features
64 	 * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
65 	 */
66 	return boot_cpu_data.x86_phys_bits;
67 }
68 
69 void __init kvm_mmu_spte_module_init(void)
70 {
71 	/*
72 	 * Snapshot userspace's desire to allow MMIO caching.  Whether or not
73 	 * KVM can actually enable MMIO caching depends on vendor-specific
74 	 * hardware capabilities and other module params that can't be resolved
75 	 * until the vendor module is loaded, i.e. enable_mmio_caching can and
76 	 * will change when the vendor module is (re)loaded.
77 	 */
78 	allow_mmio_caching = enable_mmio_caching;
79 
80 	kvm_host.maxphyaddr = kvm_get_host_maxphyaddr();
81 }
82 
83 static u64 generation_mmio_spte_mask(u64 gen)
84 {
85 	u64 mask;
86 
87 	WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK);
88 
89 	mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
90 	mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
91 	return mask;
92 }
93 
94 u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
95 {
96 	u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK;
97 	u64 spte = generation_mmio_spte_mask(gen);
98 	u64 gpa = gfn << PAGE_SHIFT;
99 
100 	access &= shadow_mmio_access_mask;
101 	spte |= vcpu->kvm->arch.shadow_mmio_value | access;
102 	spte |= gpa | shadow_nonpresent_or_rsvd_mask;
103 	spte |= (gpa & shadow_nonpresent_or_rsvd_mask)
104 		<< SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
105 
106 	return spte;
107 }
108 
109 static bool __kvm_is_mmio_pfn(kvm_pfn_t pfn)
110 {
111 	if (pfn_valid(pfn))
112 		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
113 			/*
114 			 * Some reserved pages, such as those from NVDIMM
115 			 * DAX devices, are not for MMIO, and can be mapped
116 			 * with cached memory type for better performance.
117 			 * However, the above check misconceives those pages
118 			 * as MMIO, and results in KVM mapping them with UC
119 			 * memory type, which would hurt the performance.
120 			 * Therefore, we check the host memory type in addition
121 			 * and only treat UC/UC-/WC pages as MMIO.
122 			 */
123 			(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
124 
125 	return !e820__mapped_raw_any(pfn_to_hpa(pfn),
126 				     pfn_to_hpa(pfn + 1) - 1,
127 				     E820_TYPE_RAM);
128 }
129 
130 static bool kvm_is_mmio_pfn(kvm_pfn_t pfn, int *is_host_mmio)
131 {
132 	/*
133 	 * Determining if a PFN is host MMIO is relative expensive.  Cache the
134 	 * result locally (in the sole caller) to avoid doing the full query
135 	 * multiple times when creating a single SPTE.
136 	 */
137 	if (*is_host_mmio < 0)
138 		*is_host_mmio = __kvm_is_mmio_pfn(pfn);
139 
140 	return *is_host_mmio;
141 }
142 
143 static void kvm_track_host_mmio_mapping(struct kvm_vcpu *vcpu)
144 {
145 	struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
146 
147 	if (root)
148 		WRITE_ONCE(root->has_mapped_host_mmio, true);
149 	else
150 		WRITE_ONCE(vcpu->kvm->arch.has_mapped_host_mmio, true);
151 
152 	/*
153 	 * Force vCPUs to exit and flush CPU buffers if the vCPU is using the
154 	 * affected root(s).
155 	 */
156 	kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
157 }
158 
159 /*
160  * Returns true if the SPTE needs to be updated atomically due to having bits
161  * that may be changed without holding mmu_lock, and for which KVM must not
162  * lose information.  E.g. KVM must not drop Dirty bit information.  The caller
163  * is responsible for checking if the SPTE is shadow-present, and for
164  * determining whether or not the caller cares about non-leaf SPTEs.
165  */
166 bool spte_needs_atomic_update(u64 spte)
167 {
168 	/* SPTEs can be made Writable bit by KVM's fast page fault handler. */
169 	if (!is_writable_pte(spte) && is_mmu_writable_spte(spte))
170 		return true;
171 
172 	/*
173 	 * A/D-disabled SPTEs can be access-tracked by aging, and access-tracked
174 	 * SPTEs can be restored by KVM's fast page fault handler.
175 	 */
176 	if (!spte_ad_enabled(spte))
177 		return true;
178 
179 	/*
180 	 * Dirty and Accessed bits can be set by the CPU.  Ignore the Accessed
181 	 * bit, as KVM tolerates false negatives/positives, e.g. KVM doesn't
182 	 * invalidate TLBs when aging SPTEs, and so it's safe to clobber the
183 	 * Accessed bit (and rare in practice).
184 	 */
185 	return is_writable_pte(spte) && !(spte & shadow_dirty_mask);
186 }
187 
188 bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
189 	       const struct kvm_memory_slot *slot,
190 	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
191 	       u64 old_spte, bool prefetch, bool synchronizing,
192 	       bool host_writable, u64 *new_spte)
193 {
194 	int level = sp->role.level;
195 	u64 spte = SPTE_MMU_PRESENT_MASK;
196 	int is_host_mmio = -1;
197 	bool wrprot = false;
198 
199 	WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE);
200 
201 	if (sp->role.ad_disabled)
202 		spte |= SPTE_TDP_AD_DISABLED;
203 	else if (kvm_mmu_page_ad_need_write_protect(vcpu->kvm, sp))
204 		spte |= SPTE_TDP_AD_WRPROT_ONLY;
205 
206 	spte |= shadow_present_mask;
207 	if (!prefetch || synchronizing)
208 		spte |= shadow_accessed_mask;
209 
210 	/*
211 	 * For simplicity, enforce the NX huge page mitigation even if not
212 	 * strictly necessary.  KVM could ignore the mitigation if paging is
213 	 * disabled in the guest, as the guest doesn't have any page tables to
214 	 * abuse.  But to safely ignore the mitigation, KVM would have to
215 	 * ensure a new MMU is loaded (or all shadow pages zapped) when CR0.PG
216 	 * is toggled on, and that's a net negative for performance when TDP is
217 	 * enabled.  When TDP is disabled, KVM will always switch to a new MMU
218 	 * when CR0.PG is toggled, but leveraging that to ignore the mitigation
219 	 * would tie make_spte() further to vCPU/MMU state, and add complexity
220 	 * just to optimize a mode that is anything but performance critical.
221 	 */
222 	if (level > PG_LEVEL_4K && is_nx_huge_page_enabled(vcpu->kvm)) {
223 		pte_access &= ~ACC_EXEC_MASK;
224 		if (shadow_xu_mask)
225 			pte_access &= ~ACC_USER_EXEC_MASK;
226 	}
227 
228 	if (pte_access & ACC_READ_MASK)
229 		spte |= PT_PRESENT_MASK; /* or VMX_EPT_READABLE_MASK */
230 
231 	if (shadow_nx_mask) {
232 		if (!(pte_access & ACC_EXEC_MASK))
233 			spte |= shadow_nx_mask;
234 		if (pte_access & ACC_USER_MASK)
235 			spte |= shadow_user_mask;
236 	} else {
237 		if (pte_access & ACC_EXEC_MASK)
238 			spte |= shadow_xs_mask;
239 		if (pte_access & ACC_USER_EXEC_MASK)
240 			spte |= shadow_xu_mask;
241 	}
242 
243 	if (level > PG_LEVEL_4K)
244 		spte |= PT_PAGE_SIZE_MASK;
245 
246 	if (kvm_x86_ops.get_mt_mask)
247 		spte |= kvm_x86_call(get_mt_mask)(vcpu, gfn,
248 						  kvm_is_mmio_pfn(pfn, &is_host_mmio));
249 	if (host_writable)
250 		spte |= shadow_host_writable_mask;
251 	else
252 		pte_access &= ~ACC_WRITE_MASK;
253 
254 	if (shadow_me_value && !kvm_is_mmio_pfn(pfn, &is_host_mmio))
255 		spte |= shadow_me_value;
256 
257 	spte |= (u64)pfn << PAGE_SHIFT;
258 
259 	if (pte_access & ACC_WRITE_MASK) {
260 		/*
261 		 * Unsync shadow pages that are reachable by the new, writable
262 		 * SPTE.  Write-protect the SPTE if the page can't be unsync'd,
263 		 * e.g. it's write-tracked (upper-level SPs) or has one or more
264 		 * shadow pages and unsync'ing pages is not allowed.
265 		 *
266 		 * When overwriting an existing leaf SPTE, and the old SPTE was
267 		 * writable, skip trying to unsync shadow pages as any relevant
268 		 * shadow pages must already be unsync, i.e. the hash lookup is
269 		 * unnecessary (and expensive).  Note, this relies on KVM not
270 		 * changing PFNs without first zapping the old SPTE, which is
271 		 * guaranteed by both the shadow MMU and the TDP MMU.
272 		 */
273 		if ((!is_last_spte(old_spte, level) || !is_writable_pte(old_spte)) &&
274 		    mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, synchronizing, prefetch))
275 			wrprot = true;
276 		else
277 			spte |= PT_WRITABLE_MASK | shadow_mmu_writable_mask |
278 				shadow_dirty_mask;
279 	}
280 
281 	if (prefetch && !synchronizing)
282 		spte = mark_spte_for_access_track(spte);
283 
284 	WARN_ONCE(is_rsvd_spte(&vcpu->arch.mmu->shadow_zero_check, spte, level),
285 		  "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level,
286 		  get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level));
287 
288 	/*
289 	 * Mark the memslot dirty *after* modifying it for access tracking.
290 	 * Unlike folios, memslots can be safely marked dirty out of mmu_lock,
291 	 * i.e. in the fast page fault handler.
292 	 */
293 	if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
294 		/* Enforced by kvm_mmu_hugepage_adjust. */
295 		WARN_ON_ONCE(level > PG_LEVEL_4K);
296 		mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
297 	}
298 
299 	if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO) &&
300 	    !kvm_vcpu_can_access_host_mmio(vcpu) &&
301 	    kvm_is_mmio_pfn(pfn, &is_host_mmio))
302 		kvm_track_host_mmio_mapping(vcpu);
303 
304 	*new_spte = spte;
305 	return wrprot;
306 }
307 
308 static u64 modify_spte_protections(u64 spte, u64 set, u64 clear)
309 {
310 	bool is_access_track = is_access_track_spte(spte);
311 
312 	if (is_access_track)
313 		spte = restore_acc_track_spte(spte);
314 
315 	KVM_MMU_WARN_ON(set & clear);
316 	spte = (spte | set) & ~clear;
317 
318 	if (is_access_track)
319 		spte = mark_spte_for_access_track(spte);
320 
321 	return spte;
322 }
323 
324 static u64 change_spte_executable(u64 spte, u8 access)
325 {
326 	u64 set, clear;
327 
328 	if (shadow_nx_mask)
329 		set = (access & ACC_EXEC_MASK) ? 0 : shadow_nx_mask;
330 	else
331 		set =
332 			(access & ACC_EXEC_MASK ? shadow_xs_mask : 0) |
333 			(access & ACC_USER_EXEC_MASK ? shadow_xu_mask : 0);
334 	clear = set ^ (shadow_nx_mask | shadow_xs_mask | shadow_xu_mask);
335 	return modify_spte_protections(spte, set, clear);
336 }
337 
338 /*
339  * Construct an SPTE that maps a sub-page of the given huge page SPTE where
340  * `index` identifies which sub-page.
341  *
342  * This is used during huge page splitting to build the SPTEs that make up the
343  * new page table.
344  */
345 u64 make_small_spte(struct kvm *kvm, u64 huge_spte,
346 		    union kvm_mmu_page_role role, int index)
347 {
348 	u64 child_spte = huge_spte;
349 
350 	KVM_BUG_ON(!is_shadow_present_pte(huge_spte) || !is_large_pte(huge_spte), kvm);
351 
352 	/*
353 	 * The child_spte already has the base address of the huge page being
354 	 * split. So we just have to OR in the offset to the page at the next
355 	 * lower level for the given index.
356 	 */
357 	child_spte |= (index * KVM_PAGES_PER_HPAGE(role.level)) << PAGE_SHIFT;
358 
359 	if (role.level == PG_LEVEL_4K) {
360 		child_spte &= ~PT_PAGE_SIZE_MASK;
361 
362 		/*
363 		 * When splitting to a 4K page where execution is allowed, mark
364 		 * the page executable as the NX hugepage mitigation no longer
365 		 * applies.
366 		 */
367 		if (is_nx_huge_page_enabled(kvm))
368 			child_spte = change_spte_executable(child_spte, role.access);
369 	}
370 
371 	return child_spte;
372 }
373 
374 u64 make_huge_spte(struct kvm *kvm, u64 small_spte, int level)
375 {
376 	u64 huge_spte;
377 
378 	KVM_BUG_ON(!is_shadow_present_pte(small_spte) || level == PG_LEVEL_4K, kvm);
379 
380 	huge_spte = small_spte | PT_PAGE_SIZE_MASK;
381 
382 	/*
383 	 * huge_spte already has the address of the sub-page being collapsed
384 	 * from small_spte, so just clear the lower address bits to create the
385 	 * huge page address.
386 	 */
387 	huge_spte &= KVM_HPAGE_MASK(level) | ~PAGE_MASK;
388 
389 	if (is_nx_huge_page_enabled(kvm))
390 		huge_spte = change_spte_executable(huge_spte, 0);
391 
392 	return huge_spte;
393 }
394 
395 u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
396 {
397 	u64 spte = SPTE_MMU_PRESENT_MASK;
398 
399 	spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
400 		PT_PRESENT_MASK /* or VMX_EPT_READABLE_MASK */ |
401 		shadow_user_mask | shadow_xs_mask | shadow_xu_mask | shadow_me_value;
402 
403 	if (ad_disabled)
404 		spte |= SPTE_TDP_AD_DISABLED;
405 	else
406 		spte |= shadow_accessed_mask;
407 
408 	return spte;
409 }
410 
411 u64 mark_spte_for_access_track(u64 spte)
412 {
413 	if (spte_ad_enabled(spte))
414 		return spte & ~shadow_accessed_mask;
415 
416 	if (is_access_track_spte(spte))
417 		return spte;
418 
419 	check_spte_writable_invariants(spte);
420 
421 	WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK <<
422 			  SHADOW_ACC_TRACK_SAVED_BITS_SHIFT),
423 		  "Access Tracking saved bit locations are not zero\n");
424 
425 	spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) <<
426 		SHADOW_ACC_TRACK_SAVED_BITS_SHIFT;
427 	spte &= ~(shadow_acc_track_mask | shadow_accessed_mask);
428 
429 	return spte;
430 }
431 
432 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
433 {
434 	BUG_ON((u64)(unsigned)access_mask != access_mask);
435 	WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
436 
437 	/*
438 	 * Reset to the original module param value to honor userspace's desire
439 	 * to (dis)allow MMIO caching.  Update the param itself so that
440 	 * userspace can see whether or not KVM is actually using MMIO caching.
441 	 */
442 	enable_mmio_caching = allow_mmio_caching;
443 	if (!enable_mmio_caching)
444 		mmio_value = 0;
445 
446 	/*
447 	 * The mask must contain only bits that are carved out specifically for
448 	 * the MMIO SPTE mask, e.g. to ensure there's no overlap with the MMIO
449 	 * generation.
450 	 */
451 	if (WARN_ON(mmio_mask & ~SPTE_MMIO_ALLOWED_MASK))
452 		mmio_value = 0;
453 
454 	/*
455 	 * Disable MMIO caching if the MMIO value collides with the bits that
456 	 * are used to hold the relocated GFN when the L1TF mitigation is
457 	 * enabled.  This should never fire as there is no known hardware that
458 	 * can trigger this condition, e.g. SME/SEV CPUs that require a custom
459 	 * MMIO value are not susceptible to L1TF.
460 	 */
461 	if (WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask <<
462 				  SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)))
463 		mmio_value = 0;
464 
465 	/*
466 	 * The masked MMIO value must obviously match itself and a frozen SPTE
467 	 * must not get a false positive.  Frozen SPTEs and MMIO SPTEs should
468 	 * never collide as MMIO must set some RWX bits, and frozen SPTEs must
469 	 * not set any RWX bits.
470 	 */
471 	if (WARN_ON((mmio_value & mmio_mask) != mmio_value) ||
472 	    WARN_ON(mmio_value && (FROZEN_SPTE & mmio_mask) == mmio_value))
473 		mmio_value = 0;
474 
475 	if (!mmio_value)
476 		enable_mmio_caching = false;
477 
478 	shadow_mmio_value = mmio_value;
479 	shadow_mmio_mask  = mmio_mask;
480 	shadow_mmio_access_mask = access_mask;
481 }
482 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_mmio_spte_mask);
483 
484 void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value)
485 {
486 	kvm->arch.shadow_mmio_value = mmio_value;
487 }
488 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_mmio_spte_value);
489 
490 void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
491 {
492 	/* shadow_me_value must be a subset of shadow_me_mask */
493 	if (WARN_ON(me_value & ~me_mask))
494 		me_value = me_mask = 0;
495 
496 	shadow_me_value = me_value;
497 	shadow_me_mask = me_mask;
498 }
499 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_me_spte_mask);
500 
501 void kvm_mmu_set_ept_masks(bool has_ad_bits)
502 {
503 	kvm_ad_enabled		= has_ad_bits;
504 
505 	shadow_user_mask	= 0;
506 	shadow_accessed_mask	= VMX_EPT_ACCESS_BIT;
507 	shadow_dirty_mask	= VMX_EPT_DIRTY_BIT;
508 	shadow_nx_mask		= 0ull;
509 	shadow_xs_mask		= VMX_EPT_EXECUTABLE_MASK;
510 
511 	/*
512 	 * The MMU always maps ACC_EXEC_MASK and ACC_USER_EXEC_MASK to the
513 	 * XS and XU bits of shadow EPT entries, regardless of whether MBEC
514 	 * is available on the host or enabled in the VMCS.
515 	 *
516 	 * For the non-nested case, pages are mapped with ACC_EXEC_MASK
517 	 * and ACC_USER_EXEC_MASK set in tandem, so XS == XU and the
518 	 * host's MBEC setting does not matter.  On hardware without MBEC
519 	 * the XU bit is reserved-as-ignored, and setting it does no harm.
520 	 *
521 	 * For nested EPT, when MBEC is disabled by L1, correctness relies
522 	 * on (a) ignoring bit 10 of the gPTE in is_present_gpte(), rather
523 	 * than treating it as a present bit, and (b) permission_fault()
524 	 * using an mmu->permissions[] array that effectively ignores
525 	 * ACC_USER_EXEC_MASK.  Bit 10 of the gPTE does end up mirrored
526 	 * in the sPTEs but is ignored because L2 runs with MBEC disabled.
527 	 */
528 	shadow_xu_mask		= VMX_EPT_USER_EXECUTABLE_MASK;
529 	shadow_present_mask	= VMX_EPT_SUPPRESS_VE_BIT;
530 
531 	shadow_acc_track_mask	= VMX_EPT_RWX_MASK | VMX_EPT_USER_EXECUTABLE_MASK;
532 	shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
533 	shadow_mmu_writable_mask  = EPT_SPTE_MMU_WRITABLE;
534 
535 	/*
536 	 * EPT Misconfigurations are generated if the value of bits 2:0
537 	 * of an EPT paging-structure entry is 110b (write/execute).
538 	 */
539 	kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
540 				   VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT, 0);
541 }
542 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_set_ept_masks);
543 
544 void kvm_mmu_reset_all_pte_masks(void)
545 {
546 	u8 low_phys_bits;
547 	u64 mask;
548 
549 	kvm_ad_enabled = true;
550 
551 	/*
552 	 * If the CPU has 46 or less physical address bits, then set an
553 	 * appropriate mask to guard against L1TF attacks. Otherwise, it is
554 	 * assumed that the CPU is not vulnerable to L1TF.
555 	 *
556 	 * Some Intel CPUs address the L1 cache using more PA bits than are
557 	 * reported by CPUID. Use the PA width of the L1 cache when possible
558 	 * to achieve more effective mitigation, e.g. if system RAM overlaps
559 	 * the most significant bits of legal physical address space.
560 	 */
561 	shadow_nonpresent_or_rsvd_mask = 0;
562 	low_phys_bits = boot_cpu_data.x86_phys_bits;
563 	if (boot_cpu_has_bug(X86_BUG_L1TF) &&
564 	    !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >=
565 			  52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) {
566 		low_phys_bits = boot_cpu_data.x86_cache_bits
567 			- SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
568 		shadow_nonpresent_or_rsvd_mask =
569 			rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1);
570 	}
571 
572 	shadow_nonpresent_or_rsvd_lower_gfn_mask =
573 		GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
574 
575 	shadow_user_mask	= PT_USER_MASK;
576 	shadow_accessed_mask	= PT_ACCESSED_MASK;
577 	shadow_dirty_mask	= PT_DIRTY_MASK;
578 	shadow_nx_mask		= PT64_NX_MASK;
579 	shadow_xs_mask		= 0;
580 	shadow_xu_mask		= 0;
581 	shadow_present_mask	= PT_PRESENT_MASK;
582 
583 	shadow_acc_track_mask	= 0;
584 	shadow_me_mask		= 0;
585 	shadow_me_value		= 0;
586 
587 	shadow_host_writable_mask = DEFAULT_SPTE_HOST_WRITABLE;
588 	shadow_mmu_writable_mask  = DEFAULT_SPTE_MMU_WRITABLE;
589 
590 	/*
591 	 * Set a reserved PA bit in MMIO SPTEs to generate page faults with
592 	 * PFEC.RSVD=1 on MMIO accesses.  64-bit PTEs (PAE, x86-64, and EPT
593 	 * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports
594 	 * 52-bit physical addresses then there are no reserved PA bits in the
595 	 * PTEs and so the reserved PA approach must be disabled.
596 	 */
597 	if (kvm_host.maxphyaddr < 52)
598 		mask = BIT_ULL(51) | PT_PRESENT_MASK;
599 	else
600 		mask = 0;
601 
602 	kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
603 }
604