1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2016, 2025 6 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 7 * Claudio Imbrenda <imbrenda@linux.ibm.com> 8 */ 9 10 #ifndef ARCH_KVM_S390_GMAP_H 11 #define ARCH_KVM_S390_GMAP_H 12 13 #include "dat.h" 14 15 /** 16 * enum gmap_flags - Flags of a gmap. 17 * 18 * @GMAP_FLAG_SHADOW: The gmap is a vsie shadow gmap. 19 * @GMAP_FLAG_OWNS_PAGETABLES: The gmap owns all dat levels; normally 1, is 0 20 * only for ucontrol per-cpu gmaps, since they 21 * share the page tables with the main gmap. 22 * @GMAP_FLAG_IS_UCONTROL: The gmap is ucontrol (main gmap or per-cpu gmap). 23 * @GMAP_FLAG_ALLOW_HPAGE_1M: 1M hugepages are allowed for this gmap, 24 * independently of the page size used by userspace. 25 * @GMAP_FLAG_ALLOW_HPAGE_2G: 2G hugepages are allowed for this gmap, 26 * independently of the page size used by userspace. 27 * @GMAP_FLAG_PFAULT_ENABLED: Pfault is enabled for the gmap. 28 * @GMAP_FLAG_USES_SKEYS: If the guest uses storage keys. 29 * @GMAP_FLAG_USES_CMM: Whether the guest uses CMMA. 30 * @GMAP_FLAG_EXPORT_ON_UNMAP: Whether to export guest pages when unmapping. 31 */ 32 enum gmap_flags { 33 GMAP_FLAG_SHADOW = 0, 34 GMAP_FLAG_OWNS_PAGETABLES, 35 GMAP_FLAG_IS_UCONTROL, 36 GMAP_FLAG_ALLOW_HPAGE_1M, 37 GMAP_FLAG_ALLOW_HPAGE_2G, 38 GMAP_FLAG_PFAULT_ENABLED, 39 GMAP_FLAG_USES_SKEYS, 40 GMAP_FLAG_USES_CMM, 41 GMAP_FLAG_EXPORT_ON_UNMAP, 42 }; 43 44 /** 45 * struct gmap_struct - Guest address space. 46 * 47 * @flags: GMAP_FLAG_* flags. 48 * @edat_level: The edat level of this shadow gmap. 49 * @kvm: The vm. 50 * @asce: The ASCE used by this gmap. 51 * @list: List head used in children gmaps for the children gmap list. 52 * @children_lock: Protects children and scb_users. 53 * @children: List of child gmaps of this gmap. 54 * @scb_users: List of vsie_scb that use this shadow gmap. 55 * @parent: Parent gmap of a child gmap. 56 * @guest_asce: Original ASCE of this shadow gmap. 57 * @host_to_rmap_lock: Protects host_to_rmap. 58 * @host_to_rmap: Radix tree mapping host addresses to guest addresses. 59 */ 60 struct gmap { 61 unsigned long flags; 62 unsigned char edat_level; 63 bool invalidated; 64 struct kvm *kvm; 65 union asce asce; 66 struct list_head list; 67 spinlock_t children_lock; /* Protects: children, scb_users */ 68 struct list_head children; 69 struct list_head scb_users; 70 struct gmap *parent; 71 union asce guest_asce; 72 spinlock_t host_to_rmap_lock; /* Protects host_to_rmap */ 73 struct radix_tree_root host_to_rmap; 74 refcount_t refcount; 75 }; 76 77 struct gmap_cache { 78 struct list_head list; 79 struct gmap *gmap; 80 }; 81 82 #define gmap_for_each_rmap_safe(pos, n, head) \ 83 for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n) 84 85 int s390_replace_asce(struct gmap *gmap); 86 bool _gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end, bool hint); 87 bool gmap_age_gfn(struct gmap *gmap, gfn_t start, gfn_t end); 88 bool gmap_unmap_gfn_range(struct gmap *gmap, struct kvm_memory_slot *slot, gfn_t start, gfn_t end); 89 int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault); 90 struct gmap *gmap_new(struct kvm *kvm, gfn_t limit); 91 struct gmap *gmap_new_child(struct gmap *parent, gfn_t limit); 92 void gmap_remove_child(struct gmap *child); 93 void gmap_dispose(struct gmap *gmap); 94 int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *fault, 95 struct kvm_memory_slot *slot); 96 void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end); 97 int gmap_set_limit(struct gmap *gmap, gfn_t limit); 98 int gmap_ucas_translate(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gpa_t *gaddr); 99 int gmap_ucas_map(struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, unsigned long count); 100 void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count); 101 int gmap_enable_skeys(struct gmap *gmap); 102 int gmap_pv_destroy_range(struct gmap *gmap, gfn_t start, gfn_t end, bool interruptible); 103 int gmap_insert_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, 104 gfn_t r_gfn, int level); 105 int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, 106 kvm_pfn_t pfn, int level, bool wr); 107 void gmap_set_cmma_all_dirty(struct gmap *gmap); 108 void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn); 109 struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, 110 union asce asce, int edat_level); 111 void gmap_split_huge_pages(struct gmap *gmap); 112 113 static inline bool uses_skeys(struct gmap *gmap) 114 { 115 return test_bit(GMAP_FLAG_USES_SKEYS, &gmap->flags); 116 } 117 118 static inline bool uses_cmm(struct gmap *gmap) 119 { 120 return test_bit(GMAP_FLAG_USES_CMM, &gmap->flags); 121 } 122 123 static inline bool pfault_enabled(struct gmap *gmap) 124 { 125 return test_bit(GMAP_FLAG_PFAULT_ENABLED, &gmap->flags); 126 } 127 128 static inline bool is_ucontrol(struct gmap *gmap) 129 { 130 return test_bit(GMAP_FLAG_IS_UCONTROL, &gmap->flags); 131 } 132 133 static inline bool is_shadow(struct gmap *gmap) 134 { 135 return test_bit(GMAP_FLAG_SHADOW, &gmap->flags); 136 } 137 138 static inline bool owns_page_tables(struct gmap *gmap) 139 { 140 return test_bit(GMAP_FLAG_OWNS_PAGETABLES, &gmap->flags); 141 } 142 143 static inline struct gmap *gmap_put(struct gmap *gmap) 144 { 145 if (refcount_dec_and_test(&gmap->refcount)) 146 gmap_dispose(gmap); 147 return NULL; 148 } 149 150 static inline void gmap_get(struct gmap *gmap) 151 { 152 WARN_ON_ONCE(unlikely(!refcount_inc_not_zero(&gmap->refcount))); 153 } 154 155 static inline void gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn) 156 { 157 scoped_guard(spinlock, &parent->children_lock) 158 _gmap_handle_vsie_unshadow_event(parent, gfn); 159 } 160 161 static inline bool gmap_mkold_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end) 162 { 163 return _gmap_unmap_prefix(gmap, gfn, end, true); 164 } 165 166 static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end) 167 { 168 return _gmap_unmap_prefix(gmap, gfn, end, false); 169 } 170 171 /** 172 * pte_needs_unshadow() -- Check if the pte operations triggers unshadowing. 173 * @oldpte: the previous value for the guest pte. 174 * @newpte: the new pte being set. 175 * @pgste: the pgste for the pte entry. 176 * 177 * If the pgste.vsie_notif bit is not set, return false: the page is not 178 * involved in vsie and thus should not trigger an unshadow operation. 179 * 180 * If the pgste.vsie_gmem bit is set, this pte represents shadowed guest 181 * memory. The access rights on g3's memory should be synchronized with g1's 182 * and g2's. Therefore unshadowing is triggered if the new and old pte 183 * differ in protection, or if the new pte is invalid. 184 * 185 * If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables 186 * for g3. If the entry becomes writable or absent, it becomes impossible to 187 * guarantee that the shadow mapping will match g2's mapping. In that case, 188 * trigger an unshadow event. 189 * 190 * Return: true if an unshadow event should be triggered, otherwise false. 191 */ 192 static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste) 193 { 194 if (!pgste.vsie_notif) 195 return false; 196 if (pgste.vsie_gmem) 197 return (oldpte.h.p != newpte.h.p) || newpte.h.i; 198 return !newpte.h.p || !newpte.s.pr; 199 } 200 201 static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte, 202 union pgste pgste, gfn_t gfn, bool needs_lock) 203 { 204 lockdep_assert_held(&gmap->kvm->mmu_lock); 205 if (!needs_lock) 206 lockdep_assert_held(&gmap->children_lock); 207 else 208 lockdep_assert_not_held(&gmap->children_lock); 209 210 if (pgste.prefix_notif && (newpte.h.p || newpte.h.i)) { 211 pgste.prefix_notif = 0; 212 gmap_unmap_prefix(gmap, gfn, gfn + 1); 213 } 214 if (pte_needs_unshadow(*ptep, newpte, pgste)) { 215 pgste.vsie_notif = 0; 216 pgste.vsie_gmem = 0; 217 if (needs_lock) 218 gmap_handle_vsie_unshadow_event(gmap, gfn); 219 else 220 _gmap_handle_vsie_unshadow_event(gmap, gfn); 221 } 222 if (!ptep->s.d && newpte.s.d && !newpte.s.s) 223 SetPageDirty(pfn_to_page(newpte.h.pfra)); 224 pgste.zero = 0; 225 return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap)); 226 } 227 228 static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte, 229 union pgste pgste, gfn_t gfn) 230 { 231 return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true); 232 } 233 234 /** 235 * crste_needs_unshadow() -- Check if the crste operations triggers unshadowing. 236 * @oldcrste: the previous value for the crste. 237 * @newcrste: the new value for the crste. 238 * 239 * If the old crste did not have the vsie_notif bit set, return false: the 240 * page is not involved in vsie and thus should not trigger an unshadow 241 * operation. Conversely, if the bit is set, it can only be g3 memory, since 242 * dat tables are never mapped using large pages. 243 * 244 * Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the 245 * protection bit is changing or the new page is invalid, trigger an 246 * unshadow event. Also trigger an unshadow event if the new crste does not 247 * have the vsie_notif bit set. 248 * 249 * Return: true if an unshadow event should be triggered, otherwise false. 250 */ 251 static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste) 252 { 253 if (!oldcrste.s.fc1.vsie_notif) 254 return false; 255 return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif; 256 } 257 258 static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, 259 union crste oldcrste, union crste newcrste, 260 gfn_t gfn, bool needs_lock) 261 { 262 unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES; 263 264 if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm)) 265 return true; 266 267 lockdep_assert_held(&gmap->kvm->mmu_lock); 268 if (!needs_lock) 269 lockdep_assert_held(&gmap->children_lock); 270 271 gfn = ALIGN_DOWN(gfn, align); 272 if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) { 273 newcrste.s.fc1.prefix_notif = 0; 274 gmap_unmap_prefix(gmap, gfn, gfn + align); 275 } 276 if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) { 277 newcrste = oldcrste; 278 newcrste.s.fc1.vsie_notif = 0; 279 if (needs_lock) 280 gmap_handle_vsie_unshadow_event(gmap, gfn); 281 else 282 _gmap_handle_vsie_unshadow_event(gmap, gfn); 283 if (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce)) 284 return false; 285 /* 286 * Return false even if the swap was successful, as it only 287 * indicates that the best effort clearing of the vsie_notif 288 * bit was successful. The caller will have to try again 289 * regardless, since the desired value has not been set. 290 * This pointless check is needed to silence a potential 291 * __must_check warning. 292 */ 293 return false; 294 } 295 if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s) 296 SetPageDirty(phys_to_page(crste_origin_large(newcrste))); 297 return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce); 298 } 299 300 static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, 301 union crste oldcrste, union crste newcrste, 302 gfn_t gfn) 303 { 304 return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true); 305 } 306 307 /** 308 * gmap_is_shadow_valid() - check if a shadow guest address space matches the 309 * given properties and is still valid. 310 * @sg: Pointer to the shadow guest address space structure. 311 * @asce: ASCE for which the shadow table is requested. 312 * @edat_level: Edat level to be used for the shadow translation. 313 * 314 * Return: true if the gmap shadow is still valid and matches the given 315 * properties and the caller can continue using it; false otherwise, the 316 * caller has to request a new shadow gmap in this case. 317 */ 318 static inline bool gmap_is_shadow_valid(struct gmap *sg, union asce asce, int edat_level) 319 { 320 return sg->guest_asce.val == asce.val && sg->edat_level == edat_level; 321 } 322 323 #endif /* ARCH_KVM_S390_GMAP_H */ 324