xref: /linux/arch/s390/kvm/gmap.h (revision abeb7eb57f1671d9185ddf11236c784f07bdb928)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  *  KVM guest address space mapping code
4  *
5  *    Copyright IBM Corp. 2007, 2016, 2025
6  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
7  *               Claudio Imbrenda <imbrenda@linux.ibm.com>
8  */
9 
10 #ifndef ARCH_KVM_S390_GMAP_H
11 #define ARCH_KVM_S390_GMAP_H
12 
13 #include "dat.h"
14 
15 /**
16  * enum gmap_flags - Flags of a gmap.
17  *
18  * @GMAP_FLAG_SHADOW: The gmap is a vsie shadow gmap.
19  * @GMAP_FLAG_OWNS_PAGETABLES: The gmap owns all dat levels; normally 1, is 0
20  *                             only for ucontrol per-cpu gmaps, since they
21  *                             share the page tables with the main gmap.
22  * @GMAP_FLAG_IS_UCONTROL: The gmap is ucontrol (main gmap or per-cpu gmap).
23  * @GMAP_FLAG_ALLOW_HPAGE_1M: 1M hugepages are allowed for this gmap,
24  *                            independently of the page size used by userspace.
25  * @GMAP_FLAG_ALLOW_HPAGE_2G: 2G hugepages are allowed for this gmap,
26  *                            independently of the page size used by userspace.
27  * @GMAP_FLAG_PFAULT_ENABLED: Pfault is enabled for the gmap.
28  * @GMAP_FLAG_USES_SKEYS: If the guest uses storage keys.
29  * @GMAP_FLAG_USES_CMM: Whether the guest uses CMMA.
30  * @GMAP_FLAG_EXPORT_ON_UNMAP: Whether to export guest pages when unmapping.
31  */
32 enum gmap_flags {
33 	GMAP_FLAG_SHADOW = 0,
34 	GMAP_FLAG_OWNS_PAGETABLES,
35 	GMAP_FLAG_IS_UCONTROL,
36 	GMAP_FLAG_ALLOW_HPAGE_1M,
37 	GMAP_FLAG_ALLOW_HPAGE_2G,
38 	GMAP_FLAG_PFAULT_ENABLED,
39 	GMAP_FLAG_USES_SKEYS,
40 	GMAP_FLAG_USES_CMM,
41 	GMAP_FLAG_EXPORT_ON_UNMAP,
42 };
43 
44 /**
45  * struct gmap_struct - Guest address space.
46  *
47  * @flags: GMAP_FLAG_* flags.
48  * @edat_level: The edat level of this shadow gmap.
49  * @kvm: The vm.
50  * @asce: The ASCE used by this gmap.
51  * @list: List head used in children gmaps for the children gmap list.
52  * @children_lock: Protects children and scb_users.
53  * @children: List of child gmaps of this gmap.
54  * @scb_users: List of vsie_scb that use this shadow gmap.
55  * @parent: Parent gmap of a child gmap.
56  * @guest_asce: Original ASCE of this shadow gmap.
57  * @host_to_rmap_lock: Protects host_to_rmap.
58  * @host_to_rmap: Radix tree mapping host addresses to guest addresses.
59  */
60 struct gmap {
61 	unsigned long flags;
62 	unsigned char edat_level;
63 	bool invalidated;
64 	struct kvm *kvm;
65 	union asce asce;
66 	struct list_head list;
67 	spinlock_t children_lock;	/* Protects: children, scb_users */
68 	struct list_head children;
69 	struct list_head scb_users;
70 	struct gmap *parent;
71 	union asce guest_asce;
72 	spinlock_t host_to_rmap_lock;	/* Protects host_to_rmap */
73 	struct radix_tree_root host_to_rmap;
74 	refcount_t refcount;
75 };
76 
77 struct gmap_cache {
78 	struct list_head list;
79 	struct gmap *gmap;
80 };
81 
82 #define gmap_for_each_rmap_safe(pos, n, head) \
83 	for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n)
84 
85 int s390_replace_asce(struct gmap *gmap);
86 bool _gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end, bool hint);
87 bool gmap_age_gfn(struct gmap *gmap, gfn_t start, gfn_t end);
88 bool gmap_unmap_gfn_range(struct gmap *gmap, struct kvm_memory_slot *slot, gfn_t start, gfn_t end);
89 int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault);
90 struct gmap *gmap_new(struct kvm *kvm, gfn_t limit);
91 struct gmap *gmap_new_child(struct gmap *parent, gfn_t limit);
92 void gmap_remove_child(struct gmap *child);
93 void gmap_dispose(struct gmap *gmap);
94 int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *fault,
95 	      struct kvm_memory_slot *slot);
96 void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end);
97 int gmap_set_limit(struct gmap *gmap, gfn_t limit);
98 int gmap_ucas_translate(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gpa_t *gaddr);
99 int gmap_ucas_map(struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, unsigned long count);
100 void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count);
101 int gmap_enable_skeys(struct gmap *gmap);
102 int gmap_pv_destroy_range(struct gmap *gmap, gfn_t start, gfn_t end, bool interruptible);
103 int gmap_insert_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn,
104 		     gfn_t r_gfn, int level);
105 int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn,
106 		      kvm_pfn_t pfn, int level, bool wr);
107 void gmap_set_cmma_all_dirty(struct gmap *gmap);
108 void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn);
109 struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
110 				union asce asce, int edat_level);
111 void gmap_split_huge_pages(struct gmap *gmap);
112 
113 static inline bool uses_skeys(struct gmap *gmap)
114 {
115 	return test_bit(GMAP_FLAG_USES_SKEYS, &gmap->flags);
116 }
117 
118 static inline bool uses_cmm(struct gmap *gmap)
119 {
120 	return test_bit(GMAP_FLAG_USES_CMM, &gmap->flags);
121 }
122 
123 static inline bool pfault_enabled(struct gmap *gmap)
124 {
125 	return test_bit(GMAP_FLAG_PFAULT_ENABLED, &gmap->flags);
126 }
127 
128 static inline bool is_ucontrol(struct gmap *gmap)
129 {
130 	return test_bit(GMAP_FLAG_IS_UCONTROL, &gmap->flags);
131 }
132 
133 static inline bool is_shadow(struct gmap *gmap)
134 {
135 	return test_bit(GMAP_FLAG_SHADOW, &gmap->flags);
136 }
137 
138 static inline bool owns_page_tables(struct gmap *gmap)
139 {
140 	return test_bit(GMAP_FLAG_OWNS_PAGETABLES, &gmap->flags);
141 }
142 
143 static inline struct gmap *gmap_put(struct gmap *gmap)
144 {
145 	if (refcount_dec_and_test(&gmap->refcount))
146 		gmap_dispose(gmap);
147 	return NULL;
148 }
149 
150 static inline void gmap_get(struct gmap *gmap)
151 {
152 	WARN_ON_ONCE(unlikely(!refcount_inc_not_zero(&gmap->refcount)));
153 }
154 
155 static inline void gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn)
156 {
157 	scoped_guard(spinlock, &parent->children_lock)
158 		_gmap_handle_vsie_unshadow_event(parent, gfn);
159 }
160 
161 static inline bool gmap_mkold_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
162 {
163 	return _gmap_unmap_prefix(gmap, gfn, end, true);
164 }
165 
166 static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
167 {
168 	return _gmap_unmap_prefix(gmap, gfn, end, false);
169 }
170 
171 /**
172  * pte_needs_unshadow() -- Check if the pte operations triggers unshadowing.
173  * @oldpte: the previous value for the guest pte.
174  * @newpte: the new pte being set.
175  * @pgste: the pgste for the pte entry.
176  *
177  * If the pgste.vsie_notif bit is not set, return false: the page is not
178  * involved in vsie and thus should not trigger an unshadow operation.
179  *
180  * If the pgste.vsie_gmem bit is set, this pte represents shadowed guest
181  * memory. The access rights on g3's memory should be synchronized with g1's
182  * and g2's. Therefore unshadowing is triggered if the new and old pte
183  * differ in protection, or if the new pte is invalid.
184  *
185  * If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables
186  * for g3. If the entry becomes writable or absent, it becomes impossible to
187  * guarantee that the shadow mapping will match g2's mapping. In that case,
188  * trigger an unshadow event.
189  *
190  * Return: true if an unshadow event should be triggered, otherwise false.
191  */
192 static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste)
193 {
194 	if (!pgste.vsie_notif)
195 		return false;
196 	if (pgste.vsie_gmem)
197 		return (oldpte.h.p != newpte.h.p) || newpte.h.i;
198 	return !newpte.h.p || !newpte.s.pr;
199 }
200 
201 static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
202 					  union pgste pgste, gfn_t gfn, bool needs_lock)
203 {
204 	lockdep_assert_held(&gmap->kvm->mmu_lock);
205 	if (!needs_lock)
206 		lockdep_assert_held(&gmap->children_lock);
207 	else
208 		lockdep_assert_not_held(&gmap->children_lock);
209 
210 	if (pgste.prefix_notif && (newpte.h.p || newpte.h.i)) {
211 		pgste.prefix_notif = 0;
212 		gmap_unmap_prefix(gmap, gfn, gfn + 1);
213 	}
214 	if (pte_needs_unshadow(*ptep, newpte, pgste)) {
215 		pgste.vsie_notif = 0;
216 		pgste.vsie_gmem = 0;
217 		if (needs_lock)
218 			gmap_handle_vsie_unshadow_event(gmap, gfn);
219 		else
220 			_gmap_handle_vsie_unshadow_event(gmap, gfn);
221 	}
222 	if (!ptep->s.d && newpte.s.d && !newpte.s.s)
223 		SetPageDirty(pfn_to_page(newpte.h.pfra));
224 	pgste.zero = 0;
225 	return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap));
226 }
227 
228 static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
229 					 union pgste pgste, gfn_t gfn)
230 {
231 	return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
232 }
233 
234 /**
235  * crste_needs_unshadow() -- Check if the crste operations triggers unshadowing.
236  * @oldcrste: the previous value for the crste.
237  * @newcrste: the new value for the crste.
238  *
239  * If the old crste did not have the vsie_notif bit set, return false: the
240  * page is not involved in vsie and thus should not trigger an unshadow
241  * operation. Conversely, if the bit is set, it can only be g3 memory, since
242  * dat tables are never mapped using large pages.
243  *
244  * Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the
245  * protection bit is changing or the new page is invalid, trigger an
246  * unshadow event. Also trigger an unshadow event if the new crste does not
247  * have the vsie_notif bit set.
248  *
249  * Return: true if an unshadow event should be triggered, otherwise false.
250  */
251 static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste)
252 {
253 	if (!oldcrste.s.fc1.vsie_notif)
254 		return false;
255 	return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif;
256 }
257 
258 static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
259 							 union crste oldcrste, union crste newcrste,
260 							 gfn_t gfn, bool needs_lock)
261 {
262 	unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES;
263 
264 	if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm))
265 		return true;
266 
267 	lockdep_assert_held(&gmap->kvm->mmu_lock);
268 	if (!needs_lock)
269 		lockdep_assert_held(&gmap->children_lock);
270 
271 	gfn = ALIGN_DOWN(gfn, align);
272 	if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) {
273 		newcrste.s.fc1.prefix_notif = 0;
274 		gmap_unmap_prefix(gmap, gfn, gfn + align);
275 	}
276 	if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
277 		newcrste = oldcrste;
278 		newcrste.s.fc1.vsie_notif = 0;
279 		if (needs_lock)
280 			gmap_handle_vsie_unshadow_event(gmap, gfn);
281 		else
282 			_gmap_handle_vsie_unshadow_event(gmap, gfn);
283 		if (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce))
284 			return false;
285 		/*
286 		 * Return false even if the swap was successful, as it only
287 		 * indicates that the best effort clearing of the vsie_notif
288 		 * bit was successful. The caller will have to try again
289 		 * regardless, since the desired value has not been set.
290 		 * This pointless check is needed to silence a potential
291 		 * __must_check warning.
292 		 */
293 		return false;
294 	}
295 	if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s)
296 		SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
297 	return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
298 }
299 
300 static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
301 							union crste oldcrste, union crste newcrste,
302 							gfn_t gfn)
303 {
304 	return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true);
305 }
306 
307 /**
308  * gmap_is_shadow_valid() - check if a shadow guest address space matches the
309  *                          given properties and is still valid.
310  * @sg: Pointer to the shadow guest address space structure.
311  * @asce: ASCE for which the shadow table is requested.
312  * @edat_level: Edat level to be used for the shadow translation.
313  *
314  * Return: true if the gmap shadow is still valid and matches the given
315  * properties and the caller can continue using it; false otherwise, the
316  * caller has to request a new shadow gmap in this case.
317  */
318 static inline bool gmap_is_shadow_valid(struct gmap *sg, union asce asce, int edat_level)
319 {
320 	return sg->guest_asce.val == asce.val && sg->edat_level == edat_level;
321 }
322 
323 #endif /* ARCH_KVM_S390_GMAP_H */
324