xref: /linux/arch/x86/kvm/mmu/mmu_internal.h (revision c10743a1824b9db449eb631745ed6f2d3cdf9762)
16ca9a6f3SSean Christopherson /* SPDX-License-Identifier: GPL-2.0 */
26ca9a6f3SSean Christopherson #ifndef __KVM_X86_MMU_INTERNAL_H
36ca9a6f3SSean Christopherson #define __KVM_X86_MMU_INTERNAL_H
46ca9a6f3SSean Christopherson 
5985ab278SSean Christopherson #include <linux/types.h>
65a9624afSPaolo Bonzini #include <linux/kvm_host.h>
7985ab278SSean Christopherson #include <asm/kvm_host.h>
8985ab278SSean Christopherson 
95a9624afSPaolo Bonzini #undef MMU_DEBUG
105a9624afSPaolo Bonzini 
115a9624afSPaolo Bonzini #ifdef MMU_DEBUG
125a9624afSPaolo Bonzini extern bool dbg;
135a9624afSPaolo Bonzini 
145a9624afSPaolo Bonzini #define pgprintk(x...) do { if (dbg) printk(x); } while (0)
15805a0f83SStephen Zhang #define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
165a9624afSPaolo Bonzini #define MMU_WARN_ON(x) WARN_ON(x)
175a9624afSPaolo Bonzini #else
185a9624afSPaolo Bonzini #define pgprintk(x...) do { } while (0)
195a9624afSPaolo Bonzini #define rmap_printk(x...) do { } while (0)
205a9624afSPaolo Bonzini #define MMU_WARN_ON(x) do { } while (0)
215a9624afSPaolo Bonzini #endif
225a9624afSPaolo Bonzini 
23c834e5e4SSean Christopherson /*
24c834e5e4SSean Christopherson  * Unlike regular MMU roots, PAE "roots", a.k.a. PDPTEs/PDPTRs, have a PRESENT
25c834e5e4SSean Christopherson  * bit, and thus are guaranteed to be non-zero when valid.  And, when a guest
26c834e5e4SSean Christopherson  * PDPTR is !PRESENT, its corresponding PAE root cannot be set to INVALID_PAGE,
27c834e5e4SSean Christopherson  * as the CPU would treat that as PRESENT PDPTR with reserved bits set.  Use
28c834e5e4SSean Christopherson  * '0' instead of INVALID_PAGE to indicate an invalid PAE root.
29c834e5e4SSean Christopherson  */
30c834e5e4SSean Christopherson #define INVALID_PAE_ROOT	0
31c834e5e4SSean Christopherson #define IS_VALID_PAE_ROOT(x)	(!!(x))
32c834e5e4SSean Christopherson 
33*c10743a1SSean Christopherson typedef u64 __rcu *tdp_ptep_t;
34*c10743a1SSean Christopherson 
35985ab278SSean Christopherson struct kvm_mmu_page {
361148bfc4SSean Christopherson 	/*
371148bfc4SSean Christopherson 	 * Note, "link" through "spt" fit in a single 64 byte cache line on
381148bfc4SSean Christopherson 	 * 64-bit kernels, keep it that way unless there's a reason not to.
391148bfc4SSean Christopherson 	 */
40985ab278SSean Christopherson 	struct list_head link;
41985ab278SSean Christopherson 	struct hlist_node hash_link;
42985ab278SSean Christopherson 
43ca41c34cSSean Christopherson 	bool tdp_mmu_page;
44985ab278SSean Christopherson 	bool unsync;
45985ab278SSean Christopherson 	u8 mmu_valid_gen;
46985ab278SSean Christopherson 	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
47985ab278SSean Christopherson 
48985ab278SSean Christopherson 	/*
49985ab278SSean Christopherson 	 * The following two entries are used to key the shadow page in the
50985ab278SSean Christopherson 	 * hash table.
51985ab278SSean Christopherson 	 */
52985ab278SSean Christopherson 	union kvm_mmu_page_role role;
53985ab278SSean Christopherson 	gfn_t gfn;
54985ab278SSean Christopherson 
55985ab278SSean Christopherson 	u64 *spt;
56985ab278SSean Christopherson 	/* hold the gfn of each spte inside spt */
57985ab278SSean Christopherson 	gfn_t *gfns;
5811cccf5cSBen Gardon 	/* Currently serving as active root */
5911cccf5cSBen Gardon 	union {
6011cccf5cSBen Gardon 		int root_count;
6111cccf5cSBen Gardon 		refcount_t tdp_mmu_root_count;
6211cccf5cSBen Gardon 	};
63985ab278SSean Christopherson 	unsigned int unsync_children;
64*c10743a1SSean Christopherson 	union {
65985ab278SSean Christopherson 		struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
66*c10743a1SSean Christopherson 		tdp_ptep_t ptep;
67*c10743a1SSean Christopherson 	};
68985ab278SSean Christopherson 	DECLARE_BITMAP(unsync_child_bitmap, 512);
69985ab278SSean Christopherson 
701148bfc4SSean Christopherson 	struct list_head lpage_disallowed_link;
71985ab278SSean Christopherson #ifdef CONFIG_X86_32
72985ab278SSean Christopherson 	/*
73985ab278SSean Christopherson 	 * Used out of the mmu-lock to avoid reading spte values while an
74985ab278SSean Christopherson 	 * update is in progress; see the comments in __get_spte_lockless().
75985ab278SSean Christopherson 	 */
76985ab278SSean Christopherson 	int clear_spte_count;
77985ab278SSean Christopherson #endif
78985ab278SSean Christopherson 
79985ab278SSean Christopherson 	/* Number of writes since the last time traversal visited this page.  */
80985ab278SSean Christopherson 	atomic_t write_flooding_count;
8102c00b3aSBen Gardon 
82897218ffSPaolo Bonzini #ifdef CONFIG_X86_64
83d9f6e12fSIngo Molnar 	/* Used for freeing the page asynchronously if it is a TDP MMU page. */
847cca2d0bSBen Gardon 	struct rcu_head rcu_head;
85897218ffSPaolo Bonzini #endif
86985ab278SSean Christopherson };
87985ab278SSean Christopherson 
8802c00b3aSBen Gardon extern struct kmem_cache *mmu_page_header_cache;
8902c00b3aSBen Gardon 
90e47c4aeeSSean Christopherson static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page)
91985ab278SSean Christopherson {
92985ab278SSean Christopherson 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
93985ab278SSean Christopherson 
94985ab278SSean Christopherson 	return (struct kvm_mmu_page *)page_private(page);
95985ab278SSean Christopherson }
96985ab278SSean Christopherson 
9757354682SSean Christopherson static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
9857354682SSean Christopherson {
99e47c4aeeSSean Christopherson 	return to_shadow_page(__pa(sptep));
10057354682SSean Christopherson }
10157354682SSean Christopherson 
102a3f15bdaSSean Christopherson static inline int kvm_mmu_role_as_id(union kvm_mmu_page_role role)
103a3f15bdaSSean Christopherson {
104a3f15bdaSSean Christopherson 	return role.smm ? 1 : 0;
105a3f15bdaSSean Christopherson }
106a3f15bdaSSean Christopherson 
10708889894SSean Christopherson static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
10808889894SSean Christopherson {
109a3f15bdaSSean Christopherson 	return kvm_mmu_role_as_id(sp->role);
11008889894SSean Christopherson }
11108889894SSean Christopherson 
112ce92ef76SSean Christopherson static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
1135a9624afSPaolo Bonzini {
1145a9624afSPaolo Bonzini 	/*
11544ac5958SSean Christopherson 	 * When using the EPT page-modification log, the GPAs in the CPU dirty
11644ac5958SSean Christopherson 	 * log would come from L2 rather than L1.  Therefore, we need to rely
11744ac5958SSean Christopherson 	 * on write protection to record dirty pages, which bypasses PML, since
11844ac5958SSean Christopherson 	 * writes now result in a vmexit.  Note, the check on CPU dirty logging
11944ac5958SSean Christopherson 	 * being enabled is mandatory as the bits used to denote WP-only SPTEs
120ce92ef76SSean Christopherson 	 * are reserved for PAE paging (32-bit KVM).
1215a9624afSPaolo Bonzini 	 */
122ce92ef76SSean Christopherson 	return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode;
1235a9624afSPaolo Bonzini }
1245a9624afSPaolo Bonzini 
1258283e36aSBen Gardon int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
1262839180cSPaolo Bonzini 			    gfn_t gfn, bool can_unsync, bool prefetch);
1275a9624afSPaolo Bonzini 
128269e9552SHamza Mahfooz void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
129269e9552SHamza Mahfooz void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
1306ca9a6f3SSean Christopherson bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
1313ad93562SKeqian Zhu 				    struct kvm_memory_slot *slot, u64 gfn,
1323ad93562SKeqian Zhu 				    int min_level);
1332f2fad08SBen Gardon void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
1342f2fad08SBen Gardon 					u64 start_gfn, u64 pages);
1353bcd0662SPeter Xu unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
1366ca9a6f3SSean Christopherson 
137bb18842eSBen Gardon /*
138bb18842eSBen Gardon  * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
139bb18842eSBen Gardon  *
140bb18842eSBen Gardon  * RET_PF_RETRY: let CPU fault again on the address.
141bb18842eSBen Gardon  * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
142bb18842eSBen Gardon  * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
143bb18842eSBen Gardon  * RET_PF_FIXED: The faulting entry has been fixed.
144bb18842eSBen Gardon  * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
14561bcd360SDavid Matlack  *
14661bcd360SDavid Matlack  * Any names added to this enum should be exported to userspace for use in
14761bcd360SDavid Matlack  * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h
148bb18842eSBen Gardon  */
149bb18842eSBen Gardon enum {
150bb18842eSBen Gardon 	RET_PF_RETRY = 0,
151bb18842eSBen Gardon 	RET_PF_EMULATE,
152bb18842eSBen Gardon 	RET_PF_INVALID,
153bb18842eSBen Gardon 	RET_PF_FIXED,
154bb18842eSBen Gardon 	RET_PF_SPURIOUS,
155bb18842eSBen Gardon };
156bb18842eSBen Gardon 
1578ca6f063SBen Gardon int kvm_mmu_max_mapping_level(struct kvm *kvm,
1588ca6f063SBen Gardon 			      const struct kvm_memory_slot *slot, gfn_t gfn,
1598ca6f063SBen Gardon 			      kvm_pfn_t pfn, int max_level);
16073a3c659SPaolo Bonzini void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
161536f0e6aSPaolo Bonzini void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
162bb18842eSBen Gardon 
163bb18842eSBen Gardon void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
164bb18842eSBen Gardon 
16529cf0f50SBen Gardon void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
16629cf0f50SBen Gardon void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
16729cf0f50SBen Gardon 
1686ca9a6f3SSean Christopherson #endif /* __KVM_X86_MMU_INTERNAL_H */
169