xref: /linux/arch/x86/kvm/mmu/mmu_internal.h (revision 3ad93562093d764bc22d6460e84ba60d0c57f7ab)
16ca9a6f3SSean Christopherson /* SPDX-License-Identifier: GPL-2.0 */
26ca9a6f3SSean Christopherson #ifndef __KVM_X86_MMU_INTERNAL_H
36ca9a6f3SSean Christopherson #define __KVM_X86_MMU_INTERNAL_H
46ca9a6f3SSean Christopherson 
5985ab278SSean Christopherson #include <linux/types.h>
65a9624afSPaolo Bonzini #include <linux/kvm_host.h>
7985ab278SSean Christopherson #include <asm/kvm_host.h>
8985ab278SSean Christopherson 
95a9624afSPaolo Bonzini #undef MMU_DEBUG
105a9624afSPaolo Bonzini 
115a9624afSPaolo Bonzini #ifdef MMU_DEBUG
125a9624afSPaolo Bonzini extern bool dbg;
135a9624afSPaolo Bonzini 
145a9624afSPaolo Bonzini #define pgprintk(x...) do { if (dbg) printk(x); } while (0)
15805a0f83SStephen Zhang #define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
165a9624afSPaolo Bonzini #define MMU_WARN_ON(x) WARN_ON(x)
175a9624afSPaolo Bonzini #else
185a9624afSPaolo Bonzini #define pgprintk(x...) do { } while (0)
195a9624afSPaolo Bonzini #define rmap_printk(x...) do { } while (0)
205a9624afSPaolo Bonzini #define MMU_WARN_ON(x) do { } while (0)
215a9624afSPaolo Bonzini #endif
225a9624afSPaolo Bonzini 
23c834e5e4SSean Christopherson /*
24c834e5e4SSean Christopherson  * Unlike regular MMU roots, PAE "roots", a.k.a. PDPTEs/PDPTRs, have a PRESENT
25c834e5e4SSean Christopherson  * bit, and thus are guaranteed to be non-zero when valid.  And, when a guest
26c834e5e4SSean Christopherson  * PDPTR is !PRESENT, its corresponding PAE root cannot be set to INVALID_PAGE,
27c834e5e4SSean Christopherson  * as the CPU would treat that as PRESENT PDPTR with reserved bits set.  Use
28c834e5e4SSean Christopherson  * '0' instead of INVALID_PAGE to indicate an invalid PAE root.
29c834e5e4SSean Christopherson  */
30c834e5e4SSean Christopherson #define INVALID_PAE_ROOT	0
31c834e5e4SSean Christopherson #define IS_VALID_PAE_ROOT(x)	(!!(x))
32c834e5e4SSean Christopherson 
33985ab278SSean Christopherson struct kvm_mmu_page {
34985ab278SSean Christopherson 	struct list_head link;
35985ab278SSean Christopherson 	struct hlist_node hash_link;
36985ab278SSean Christopherson 	struct list_head lpage_disallowed_link;
37985ab278SSean Christopherson 
38985ab278SSean Christopherson 	bool unsync;
39985ab278SSean Christopherson 	u8 mmu_valid_gen;
40985ab278SSean Christopherson 	bool mmio_cached;
41985ab278SSean Christopherson 	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
42985ab278SSean Christopherson 
43985ab278SSean Christopherson 	/*
44985ab278SSean Christopherson 	 * The following two entries are used to key the shadow page in the
45985ab278SSean Christopherson 	 * hash table.
46985ab278SSean Christopherson 	 */
47985ab278SSean Christopherson 	union kvm_mmu_page_role role;
48985ab278SSean Christopherson 	gfn_t gfn;
49985ab278SSean Christopherson 
50985ab278SSean Christopherson 	u64 *spt;
51985ab278SSean Christopherson 	/* hold the gfn of each spte inside spt */
52985ab278SSean Christopherson 	gfn_t *gfns;
5311cccf5cSBen Gardon 	/* Currently serving as active root */
5411cccf5cSBen Gardon 	union {
5511cccf5cSBen Gardon 		int root_count;
5611cccf5cSBen Gardon 		refcount_t tdp_mmu_root_count;
5711cccf5cSBen Gardon 	};
58985ab278SSean Christopherson 	unsigned int unsync_children;
59985ab278SSean Christopherson 	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
60985ab278SSean Christopherson 	DECLARE_BITMAP(unsync_child_bitmap, 512);
61985ab278SSean Christopherson 
62985ab278SSean Christopherson #ifdef CONFIG_X86_32
63985ab278SSean Christopherson 	/*
64985ab278SSean Christopherson 	 * Used out of the mmu-lock to avoid reading spte values while an
65985ab278SSean Christopherson 	 * update is in progress; see the comments in __get_spte_lockless().
66985ab278SSean Christopherson 	 */
67985ab278SSean Christopherson 	int clear_spte_count;
68985ab278SSean Christopherson #endif
69985ab278SSean Christopherson 
70985ab278SSean Christopherson 	/* Number of writes since the last time traversal visited this page.  */
71985ab278SSean Christopherson 	atomic_t write_flooding_count;
7202c00b3aSBen Gardon 
73897218ffSPaolo Bonzini #ifdef CONFIG_X86_64
7402c00b3aSBen Gardon 	bool tdp_mmu_page;
757cca2d0bSBen Gardon 
76d9f6e12fSIngo Molnar 	/* Used for freeing the page asynchronously if it is a TDP MMU page. */
777cca2d0bSBen Gardon 	struct rcu_head rcu_head;
78897218ffSPaolo Bonzini #endif
79985ab278SSean Christopherson };
80985ab278SSean Christopherson 
8102c00b3aSBen Gardon extern struct kmem_cache *mmu_page_header_cache;
8202c00b3aSBen Gardon 
83e47c4aeeSSean Christopherson static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page)
84985ab278SSean Christopherson {
85985ab278SSean Christopherson 	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
86985ab278SSean Christopherson 
87985ab278SSean Christopherson 	return (struct kvm_mmu_page *)page_private(page);
88985ab278SSean Christopherson }
89985ab278SSean Christopherson 
9057354682SSean Christopherson static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
9157354682SSean Christopherson {
92e47c4aeeSSean Christopherson 	return to_shadow_page(__pa(sptep));
9357354682SSean Christopherson }
9457354682SSean Christopherson 
95a3f15bdaSSean Christopherson static inline int kvm_mmu_role_as_id(union kvm_mmu_page_role role)
96a3f15bdaSSean Christopherson {
97a3f15bdaSSean Christopherson 	return role.smm ? 1 : 0;
98a3f15bdaSSean Christopherson }
99a3f15bdaSSean Christopherson 
10008889894SSean Christopherson static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
10108889894SSean Christopherson {
102a3f15bdaSSean Christopherson 	return kvm_mmu_role_as_id(sp->role);
10308889894SSean Christopherson }
10408889894SSean Christopherson 
1055a9624afSPaolo Bonzini static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
1065a9624afSPaolo Bonzini {
1075a9624afSPaolo Bonzini 	/*
10844ac5958SSean Christopherson 	 * When using the EPT page-modification log, the GPAs in the CPU dirty
10944ac5958SSean Christopherson 	 * log would come from L2 rather than L1.  Therefore, we need to rely
11044ac5958SSean Christopherson 	 * on write protection to record dirty pages, which bypasses PML, since
11144ac5958SSean Christopherson 	 * writes now result in a vmexit.  Note, the check on CPU dirty logging
11244ac5958SSean Christopherson 	 * being enabled is mandatory as the bits used to denote WP-only SPTEs
11344ac5958SSean Christopherson 	 * are reserved for NPT w/ PAE (32-bit KVM).
1145a9624afSPaolo Bonzini 	 */
11544ac5958SSean Christopherson 	return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
11644ac5958SSean Christopherson 	       kvm_x86_ops.cpu_dirty_log_size;
1175a9624afSPaolo Bonzini }
1185a9624afSPaolo Bonzini 
119a9d6496dSShaokun Zhang extern int nx_huge_pages;
120a9d6496dSShaokun Zhang static inline bool is_nx_huge_page_enabled(void)
121a9d6496dSShaokun Zhang {
122a9d6496dSShaokun Zhang 	return READ_ONCE(nx_huge_pages);
123a9d6496dSShaokun Zhang }
124a9d6496dSShaokun Zhang 
1255a9624afSPaolo Bonzini bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
1265a9624afSPaolo Bonzini 			    bool can_unsync);
1275a9624afSPaolo Bonzini 
1286ca9a6f3SSean Christopherson void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
1296ca9a6f3SSean Christopherson void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
1306ca9a6f3SSean Christopherson bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
131*3ad93562SKeqian Zhu 				    struct kvm_memory_slot *slot, u64 gfn,
132*3ad93562SKeqian Zhu 				    int min_level);
1332f2fad08SBen Gardon void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
1342f2fad08SBen Gardon 					u64 start_gfn, u64 pages);
1356ca9a6f3SSean Christopherson 
136bb18842eSBen Gardon /*
137bb18842eSBen Gardon  * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
138bb18842eSBen Gardon  *
139bb18842eSBen Gardon  * RET_PF_RETRY: let CPU fault again on the address.
140bb18842eSBen Gardon  * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
141bb18842eSBen Gardon  * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
142bb18842eSBen Gardon  * RET_PF_FIXED: The faulting entry has been fixed.
143bb18842eSBen Gardon  * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
144bb18842eSBen Gardon  */
145bb18842eSBen Gardon enum {
146bb18842eSBen Gardon 	RET_PF_RETRY = 0,
147bb18842eSBen Gardon 	RET_PF_EMULATE,
148bb18842eSBen Gardon 	RET_PF_INVALID,
149bb18842eSBen Gardon 	RET_PF_FIXED,
150bb18842eSBen Gardon 	RET_PF_SPURIOUS,
151bb18842eSBen Gardon };
152bb18842eSBen Gardon 
153bb18842eSBen Gardon /* Bits which may be returned by set_spte() */
154bb18842eSBen Gardon #define SET_SPTE_WRITE_PROTECTED_PT	BIT(0)
155bb18842eSBen Gardon #define SET_SPTE_NEED_REMOTE_TLB_FLUSH	BIT(1)
156bb18842eSBen Gardon #define SET_SPTE_SPURIOUS		BIT(2)
157bb18842eSBen Gardon 
1588ca6f063SBen Gardon int kvm_mmu_max_mapping_level(struct kvm *kvm,
1598ca6f063SBen Gardon 			      const struct kvm_memory_slot *slot, gfn_t gfn,
1608ca6f063SBen Gardon 			      kvm_pfn_t pfn, int max_level);
161bb18842eSBen Gardon int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
162bb18842eSBen Gardon 			    int max_level, kvm_pfn_t *pfnp,
163bb18842eSBen Gardon 			    bool huge_page_disallowed, int *req_level);
164bb18842eSBen Gardon void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
165bb18842eSBen Gardon 				kvm_pfn_t *pfnp, int *goal_levelp);
166bb18842eSBen Gardon 
167bb18842eSBen Gardon void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
168bb18842eSBen Gardon 
16929cf0f50SBen Gardon void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
17029cf0f50SBen Gardon void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
17129cf0f50SBen Gardon 
1726ca9a6f3SSean Christopherson #endif /* __KVM_X86_MMU_INTERNAL_H */
173