xref: /linux/arch/s390/kvm/dat.c (revision 6a97c4d5262d02f04d1f41113b0d090ea51f08dd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  KVM guest address space mapping code
4  *
5  *    Copyright IBM Corp. 2007, 2020, 2024
6  *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
7  *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
8  *		 David Hildenbrand <david@redhat.com>
9  *		 Janosch Frank <frankja@linux.ibm.com>
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/pagewalk.h>
14 #include <linux/swap.h>
15 #include <linux/smp.h>
16 #include <linux/spinlock.h>
17 #include <linux/slab.h>
18 #include <linux/swapops.h>
19 #include <linux/ksm.h>
20 #include <linux/mm.h>
21 #include <linux/mman.h>
22 #include <linux/pgtable.h>
23 #include <linux/kvm_types.h>
24 #include <linux/kvm_host.h>
25 #include <linux/pgalloc.h>
26 
27 #include <asm/page-states.h>
28 #include <asm/tlb.h>
29 #include "dat.h"
30 
kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache * mc)31 int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc)
32 {
33 	void *o;
34 
35 	for ( ; mc->n_crsts < KVM_S390_MMU_CACHE_N_CRSTS; mc->n_crsts++) {
36 		o = (void *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_COMP, CRST_ALLOC_ORDER);
37 		if (!o)
38 			return -ENOMEM;
39 		mc->crsts[mc->n_crsts] = o;
40 	}
41 	for ( ; mc->n_pts < KVM_S390_MMU_CACHE_N_PTS; mc->n_pts++) {
42 		o = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
43 		if (!o)
44 			return -ENOMEM;
45 		mc->pts[mc->n_pts] = o;
46 	}
47 	for ( ; mc->n_rmaps < KVM_S390_MMU_CACHE_N_RMAPS; mc->n_rmaps++) {
48 		o = kzalloc_obj(*mc->rmaps[0], GFP_KERNEL_ACCOUNT);
49 		if (!o)
50 			return -ENOMEM;
51 		mc->rmaps[mc->n_rmaps] = o;
52 	}
53 	return 0;
54 }
55 
dat_alloc_pt_noinit(struct kvm_s390_mmu_cache * mc)56 static inline struct page_table *dat_alloc_pt_noinit(struct kvm_s390_mmu_cache *mc)
57 {
58 	struct page_table *res;
59 
60 	res = kvm_s390_mmu_cache_alloc_pt(mc);
61 	if (res)
62 		__arch_set_page_dat(res, 1);
63 	return res;
64 }
65 
dat_alloc_crst_noinit(struct kvm_s390_mmu_cache * mc)66 static inline struct crst_table *dat_alloc_crst_noinit(struct kvm_s390_mmu_cache *mc)
67 {
68 	struct crst_table *res;
69 
70 	res = kvm_s390_mmu_cache_alloc_crst(mc);
71 	if (res)
72 		__arch_set_page_dat(res, 1UL << CRST_ALLOC_ORDER);
73 	return res;
74 }
75 
dat_alloc_crst_sleepable(unsigned long init)76 struct crst_table *dat_alloc_crst_sleepable(unsigned long init)
77 {
78 	struct page *page;
79 	void *virt;
80 
81 	page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_COMP, CRST_ALLOC_ORDER);
82 	if (!page)
83 		return NULL;
84 	virt = page_to_virt(page);
85 	__arch_set_page_dat(virt, 1UL << CRST_ALLOC_ORDER);
86 	crst_table_init(virt, init);
87 	return virt;
88 }
89 
dat_free_level(struct crst_table * table,bool owns_ptes)90 void dat_free_level(struct crst_table *table, bool owns_ptes)
91 {
92 	unsigned int i;
93 
94 	for (i = 0; i < _CRST_ENTRIES; i++) {
95 		if (table->crstes[i].h.fc || table->crstes[i].h.i)
96 			continue;
97 		if (!is_pmd(table->crstes[i]))
98 			dat_free_level(dereference_crste(table->crstes[i]), owns_ptes);
99 		else if (owns_ptes)
100 			dat_free_pt(dereference_pmd(table->crstes[i].pmd));
101 	}
102 	dat_free_crst(table);
103 }
104 
dat_set_asce_limit(struct kvm_s390_mmu_cache * mc,union asce * asce,int newtype)105 int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype)
106 {
107 	struct crst_table *table;
108 	union crste crste;
109 
110 	while (asce->dt > newtype) {
111 		table = dereference_asce(*asce);
112 		crste = table->crstes[0];
113 		if (crste.h.fc)
114 			return 0;
115 		if (!crste.h.i) {
116 			asce->rsto = crste.h.fc0.to;
117 			dat_free_crst(table);
118 		} else {
119 			crste.h.tt--;
120 			crst_table_init((void *)table, crste.val);
121 		}
122 		asce->dt--;
123 	}
124 	while (asce->dt < newtype) {
125 		crste = _crste_fc0(asce->rsto, asce->dt + 1);
126 		table = dat_alloc_crst_noinit(mc);
127 		if (!table)
128 			return -ENOMEM;
129 		crst_table_init((void *)table, _CRSTE_HOLE(crste.h.tt).val);
130 		table->crstes[0] = crste;
131 		asce->rsto = __pa(table) >> PAGE_SHIFT;
132 		asce->dt++;
133 	}
134 	return 0;
135 }
136 
137 /**
138  * dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
139  * @crstep: Pointer to the CRST entry.
140  * @old: Expected old value.
141  * @new: Replacement entry.
142  * @gfn: The affected guest address.
143  * @asce: The asce of the address space.
144  *
145  * This function is needed to atomically exchange a CRSTE that potentially
146  * maps a prefix area, without having to invalidate it inbetween.
147  *
148  * Context: This function is assumed to be called with kvm->mmu_lock held.
149  *
150  * Return: %true if the exchange was successful.
151  */
dat_crstep_xchg_atomic(union crste * crstep,union crste old,union crste new,gfn_t gfn,union asce asce)152 bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
153 					 gfn_t gfn, union asce asce)
154 {
155 	if (old.h.i)
156 		return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
157 	if (cpu_has_edat2())
158 		return crdte_crste(crstep, old, new, gfn, asce);
159 	return cspg_crste(crstep, old, new);
160 }
161 
dat_set_storage_key_from_pgste(union pte pte,union pgste pgste)162 static void dat_set_storage_key_from_pgste(union pte pte, union pgste pgste)
163 {
164 	union skey nkey = { .acc = pgste.acc, .fp = pgste.fp };
165 
166 	page_set_storage_key(pte_origin(pte), nkey.skey, 0);
167 }
168 
dat_move_storage_key(union pte old,union pte new)169 static void dat_move_storage_key(union pte old, union pte new)
170 {
171 	page_set_storage_key(pte_origin(new), page_get_storage_key(pte_origin(old)), 1);
172 }
173 
dat_save_storage_key_into_pgste(union pte pte,union pgste pgste)174 static union pgste dat_save_storage_key_into_pgste(union pte pte, union pgste pgste)
175 {
176 	union skey skey;
177 
178 	skey.skey = page_get_storage_key(pte_origin(pte));
179 
180 	pgste.acc = skey.acc;
181 	pgste.fp = skey.fp;
182 	pgste.gr |= skey.r;
183 	pgste.gc |= skey.c;
184 
185 	return pgste;
186 }
187 
__dat_ptep_xchg(union pte * ptep,union pgste pgste,union pte new,gfn_t gfn,union asce asce,bool uses_skeys)188 union pgste __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new, gfn_t gfn,
189 			    union asce asce, bool uses_skeys)
190 {
191 	union pte old = READ_ONCE(*ptep);
192 
193 	/* Updating only the software bits while holding the pgste lock. */
194 	if (!((ptep->val ^ new.val) & ~_PAGE_SW_BITS)) {
195 		WRITE_ONCE(ptep->swbyte, new.swbyte);
196 		return pgste;
197 	}
198 
199 	if (!old.h.i) {
200 		unsigned long opts = IPTE_GUEST_ASCE | (pgste.nodat ? IPTE_NODAT : 0);
201 
202 		if (machine_has_tlb_guest())
203 			__ptep_ipte(gfn_to_gpa(gfn), (void *)ptep, opts, asce.val, IPTE_GLOBAL);
204 		else
205 			__ptep_ipte(gfn_to_gpa(gfn), (void *)ptep, 0, 0, IPTE_GLOBAL);
206 	}
207 
208 	if (uses_skeys) {
209 		if (old.h.i && !new.h.i)
210 			/* Invalid to valid: restore storage keys from PGSTE. */
211 			dat_set_storage_key_from_pgste(new, pgste);
212 		else if (!old.h.i && new.h.i)
213 			/* Valid to invalid: save storage keys to PGSTE. */
214 			pgste = dat_save_storage_key_into_pgste(old, pgste);
215 		else if (!old.h.i && !new.h.i)
216 			/* Valid to valid: move storage keys. */
217 			if (old.h.pfra != new.h.pfra)
218 				dat_move_storage_key(old, new);
219 		/* Invalid to invalid: nothing to do. */
220 	}
221 
222 	WRITE_ONCE(*ptep, new);
223 	return pgste;
224 }
225 
226 /*
227  * dat_split_ste() - Split a segment table entry into page table entries.
228  *
229  * Context: This function is assumed to be called with kvm->mmu_lock held.
230  *
231  * Return: 0 in case of success, -ENOMEM if running out of memory.
232  */
dat_split_ste(struct kvm_s390_mmu_cache * mc,union pmd * pmdp,gfn_t gfn,union asce asce,bool uses_skeys)233 static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t gfn,
234 			 union asce asce, bool uses_skeys)
235 {
236 	union pgste pgste_init;
237 	struct page_table *pt;
238 	union pmd new, old;
239 	union pte init;
240 	int i;
241 
242 	BUG_ON(!mc);
243 	old = READ_ONCE(*pmdp);
244 
245 	/* Already split, nothing to do. */
246 	if (!old.h.i && !old.h.fc)
247 		return 0;
248 
249 	pt = dat_alloc_pt_noinit(mc);
250 	if (!pt)
251 		return -ENOMEM;
252 	new.val = virt_to_phys(pt);
253 
254 	while (old.h.i || old.h.fc) {
255 		init.val = pmd_origin_large(old);
256 		init.h.p = old.h.p;
257 		init.h.i = old.h.i;
258 		init.s.d = old.s.fc1.d;
259 		init.s.w = old.s.fc1.w;
260 		init.s.y = old.s.fc1.y;
261 		init.s.sd = old.s.fc1.sd;
262 		init.s.pr = old.s.fc1.pr;
263 		pgste_init.val = 0;
264 		if (old.h.fc) {
265 			for (i = 0; i < _PAGE_ENTRIES; i++)
266 				pt->ptes[i].val = init.val | i * PAGE_SIZE;
267 			/* No need to take locks as the page table is not installed yet. */
268 			pgste_init.prefix_notif = old.s.fc1.prefix_notif;
269 			pgste_init.vsie_notif = old.s.fc1.vsie_notif;
270 			pgste_init.vsie_gmem = old.s.fc1.vsie_notif;
271 			pgste_init.pcl = uses_skeys && init.h.i;
272 			dat_init_pgstes(pt, pgste_init.val);
273 		} else {
274 			dat_init_page_table(pt, init.val, 0);
275 		}
276 
277 		if (dat_pmdp_xchg_atomic(pmdp, old, new, gfn, asce)) {
278 			if (!pgste_init.pcl)
279 				return 0;
280 			for (i = 0; i < _PAGE_ENTRIES; i++) {
281 				union pgste pgste = pt->pgstes[i];
282 
283 				pgste = dat_save_storage_key_into_pgste(pt->ptes[i], pgste);
284 				pgste_set_unlock(pt->ptes + i, pgste);
285 			}
286 			return 0;
287 		}
288 		old = READ_ONCE(*pmdp);
289 	}
290 
291 	dat_free_pt(pt);
292 	return 0;
293 }
294 
295 /*
296  * dat_split_crste() - Split a crste into smaller crstes.
297  *
298  * Context: This function is assumed to be called with kvm->mmu_lock held.
299  *
300  * Return: %0 in case of success, %-ENOMEM if running out of memory.
301  */
dat_split_crste(struct kvm_s390_mmu_cache * mc,union crste * crstep,gfn_t gfn,union asce asce,bool uses_skeys)302 static int dat_split_crste(struct kvm_s390_mmu_cache *mc, union crste *crstep,
303 			   gfn_t gfn, union asce asce, bool uses_skeys)
304 {
305 	struct crst_table *table;
306 	union crste old, new, init;
307 	int i;
308 
309 	old = READ_ONCE(*crstep);
310 	if (is_pmd(old))
311 		return dat_split_ste(mc, &crstep->pmd, gfn, asce, uses_skeys);
312 
313 	BUG_ON(!mc);
314 
315 	/* Already split, nothing to do. */
316 	if (!old.h.i && !old.h.fc)
317 		return 0;
318 
319 	table = dat_alloc_crst_noinit(mc);
320 	if (!table)
321 		return -ENOMEM;
322 
323 	new.val = virt_to_phys(table);
324 	new.h.tt = old.h.tt;
325 	new.h.fc0.tl = _REGION_ENTRY_LENGTH;
326 
327 	while (old.h.i || old.h.fc) {
328 		init = old;
329 		init.h.tt--;
330 		if (old.h.fc) {
331 			for (i = 0; i < _CRST_ENTRIES; i++)
332 				table->crstes[i].val = init.val | i * HPAGE_SIZE;
333 		} else {
334 			crst_table_init((void *)table, init.val);
335 		}
336 		if (dat_crstep_xchg_atomic(crstep, old, new, gfn, asce))
337 			return 0;
338 		old = READ_ONCE(*crstep);
339 	}
340 
341 	dat_free_crst(table);
342 	return 0;
343 }
344 
345 /**
346  * dat_entry_walk() - Walk the gmap page tables.
347  * @mc: Cache to use to allocate dat tables, if needed; can be NULL if neither
348  *      %DAT_WALK_SPLIT or %DAT_WALK_ALLOC is specified in @flags.
349  * @gfn: Guest frame.
350  * @asce: The ASCE of the address space.
351  * @flags: Flags from WALK_* macros.
352  * @walk_level: Level to walk to, from LEVEL_* macros.
353  * @last: Will be filled the last visited non-pte DAT entry.
354  * @ptepp: Will be filled the last visited pte entry, if any, otherwise NULL.
355  *
356  * Returns a table entry pointer for the given guest address and @walk_level.
357  *
358  * The @flags have the following meanings:
359  * * %DAT_WALK_IGN_HOLES: consider holes as normal table entries
360  * * %DAT_WALK_ALLOC: allocate new tables to reach the requested level, if needed
361  * * %DAT_WALK_SPLIT: split existing large pages to reach the requested level, if needed
362  * * %DAT_WALK_LEAF: return successfully whenever a large page is encountered
363  * * %DAT_WALK_ANY: return successfully even if the requested level could not be reached
364  * * %DAT_WALK_CONTINUE: walk to the requested level with the specified flags, and then try to
365  *                       continue walking to ptes with only DAT_WALK_ANY
366  * * %DAT_WALK_USES_SKEYS: storage keys are in use
367  *
368  * Context: called with kvm->mmu_lock held.
369  *
370  * Return:
371  * * %PGM_ADDRESSING if the requested address lies outside memory
372  * * a PIC number if the requested address lies in a memory hole of type _DAT_TOKEN_PIC
373  * * %-EFAULT if the requested address lies inside a memory hole of a different type
374  * * %-EINVAL if the given ASCE is not compatible with the requested level
375  * * %-EFBIG if the requested level could not be reached because a larger frame was found
376  * * %-ENOENT if the requested level could not be reached for other reasons
377  * * %-ENOMEM if running out of memory while allocating or splitting a table
378  */
dat_entry_walk(struct kvm_s390_mmu_cache * mc,gfn_t gfn,union asce asce,int flags,int walk_level,union crste ** last,union pte ** ptepp)379 int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags,
380 		   int walk_level, union crste **last, union pte **ptepp)
381 {
382 	union vaddress vaddr = { .addr = gfn_to_gpa(gfn) };
383 	bool continue_anyway = flags & DAT_WALK_CONTINUE;
384 	bool uses_skeys = flags & DAT_WALK_USES_SKEYS;
385 	bool ign_holes = flags & DAT_WALK_IGN_HOLES;
386 	bool allocate = flags & DAT_WALK_ALLOC;
387 	bool split = flags & DAT_WALK_SPLIT;
388 	bool leaf = flags & DAT_WALK_LEAF;
389 	bool any = flags & DAT_WALK_ANY;
390 	struct page_table *pgtable;
391 	struct crst_table *table;
392 	union crste entry;
393 	int rc;
394 
395 	*last = NULL;
396 	*ptepp = NULL;
397 	if (WARN_ON_ONCE(unlikely(!asce.val)))
398 		return -EINVAL;
399 	if (WARN_ON_ONCE(unlikely(walk_level > asce.dt)))
400 		return -EINVAL;
401 	if (!asce_contains_gfn(asce, gfn))
402 		return PGM_ADDRESSING;
403 
404 	table = dereference_asce(asce);
405 	if (asce.dt >= ASCE_TYPE_REGION1) {
406 		*last = table->crstes + vaddr.rfx;
407 		entry = READ_ONCE(**last);
408 		if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION1))
409 			return -EINVAL;
410 		if (crste_hole(entry) && !ign_holes)
411 			return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
412 		if (walk_level == TABLE_TYPE_REGION1)
413 			return 0;
414 		if (entry.pgd.h.i) {
415 			if (!allocate)
416 				return any ? 0 : -ENOENT;
417 			rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
418 			if (rc)
419 				return rc;
420 			entry = READ_ONCE(**last);
421 		}
422 		table = dereference_crste(entry.pgd);
423 	}
424 
425 	if (asce.dt >= ASCE_TYPE_REGION2) {
426 		*last = table->crstes + vaddr.rsx;
427 		entry = READ_ONCE(**last);
428 		if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION2))
429 			return -EINVAL;
430 		if (crste_hole(entry) && !ign_holes)
431 			return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
432 		if (walk_level == TABLE_TYPE_REGION2)
433 			return 0;
434 		if (entry.p4d.h.i) {
435 			if (!allocate)
436 				return any ? 0 : -ENOENT;
437 			rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
438 			if (rc)
439 				return rc;
440 			entry = READ_ONCE(**last);
441 		}
442 		table = dereference_crste(entry.p4d);
443 	}
444 
445 	if (asce.dt >= ASCE_TYPE_REGION3) {
446 		*last = table->crstes + vaddr.rtx;
447 		entry = READ_ONCE(**last);
448 		if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION3))
449 			return -EINVAL;
450 		if (crste_hole(entry) && !ign_holes)
451 			return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
452 		if (walk_level == TABLE_TYPE_REGION3 &&
453 		    continue_anyway && !entry.pud.h.fc && !entry.h.i) {
454 			walk_level = TABLE_TYPE_PAGE_TABLE;
455 			allocate = false;
456 		}
457 		if (walk_level == TABLE_TYPE_REGION3 || ((leaf || any) && entry.pud.h.fc))
458 			return 0;
459 		if (entry.pud.h.i && !entry.pud.h.fc) {
460 			if (!allocate)
461 				return any ? 0 : -ENOENT;
462 			rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
463 			if (rc)
464 				return rc;
465 			entry = READ_ONCE(**last);
466 		}
467 		if (walk_level <= TABLE_TYPE_SEGMENT && entry.pud.h.fc) {
468 			if (!split)
469 				return -EFBIG;
470 			rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
471 			if (rc)
472 				return rc;
473 			entry = READ_ONCE(**last);
474 		}
475 		table = dereference_crste(entry.pud);
476 	}
477 
478 	*last = table->crstes + vaddr.sx;
479 	entry = READ_ONCE(**last);
480 	if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_SEGMENT))
481 		return -EINVAL;
482 	if (crste_hole(entry) && !ign_holes)
483 		return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
484 	if (continue_anyway && !entry.pmd.h.fc && !entry.h.i) {
485 		walk_level = TABLE_TYPE_PAGE_TABLE;
486 		allocate = false;
487 	}
488 	if (walk_level == TABLE_TYPE_SEGMENT || ((leaf || any) && entry.pmd.h.fc))
489 		return 0;
490 
491 	if (entry.pmd.h.i && !entry.pmd.h.fc) {
492 		if (!allocate)
493 			return any ? 0 : -ENOENT;
494 		rc = dat_split_ste(mc, &(*last)->pmd, gfn, asce, uses_skeys);
495 		if (rc)
496 			return rc;
497 		entry = READ_ONCE(**last);
498 	}
499 	if (walk_level <= TABLE_TYPE_PAGE_TABLE && entry.pmd.h.fc) {
500 		if (!split)
501 			return -EFBIG;
502 		rc = dat_split_ste(mc, &(*last)->pmd, gfn, asce, uses_skeys);
503 		if (rc)
504 			return rc;
505 		entry = READ_ONCE(**last);
506 	}
507 	pgtable = dereference_pmd(entry.pmd);
508 	*ptepp = pgtable->ptes + vaddr.px;
509 	if (pte_hole(**ptepp) && !ign_holes)
510 		return (*ptepp)->tok.type == _DAT_TOKEN_PIC ? (*ptepp)->tok.par : -EFAULT;
511 	return 0;
512 }
513 
dat_pte_walk_range(gfn_t gfn,gfn_t end,struct page_table * table,struct dat_walk * w)514 static long dat_pte_walk_range(gfn_t gfn, gfn_t end, struct page_table *table, struct dat_walk *w)
515 {
516 	unsigned int idx = gfn & (_PAGE_ENTRIES - 1);
517 	long rc = 0;
518 
519 	for ( ; gfn < end; idx++, gfn++) {
520 		if (pte_hole(READ_ONCE(table->ptes[idx]))) {
521 			if (!(w->flags & DAT_WALK_IGN_HOLES))
522 				return -EFAULT;
523 			if (!(w->flags & DAT_WALK_ANY))
524 				continue;
525 		}
526 
527 		rc = w->ops->pte_entry(table->ptes + idx, gfn, gfn + 1, w);
528 		if (rc)
529 			break;
530 	}
531 	return rc;
532 }
533 
dat_crste_walk_range(gfn_t start,gfn_t end,struct crst_table * table,struct dat_walk * walk)534 static long dat_crste_walk_range(gfn_t start, gfn_t end, struct crst_table *table,
535 				 struct dat_walk *walk)
536 {
537 	unsigned long idx, cur_shift, cur_size;
538 	dat_walk_op the_op;
539 	union crste crste;
540 	gfn_t cur, next;
541 	long rc = 0;
542 
543 	cur_shift = 8 + table->crstes[0].h.tt * 11;
544 	idx = (start >> cur_shift) & (_CRST_ENTRIES - 1);
545 	cur_size = 1UL << cur_shift;
546 
547 	for (cur = ALIGN_DOWN(start, cur_size); cur < end; idx++, cur = next) {
548 		next = cur + cur_size;
549 		walk->last = table->crstes + idx;
550 		crste = READ_ONCE(*walk->last);
551 
552 		if (crste_hole(crste)) {
553 			if (!(walk->flags & DAT_WALK_IGN_HOLES))
554 				return -EFAULT;
555 			if (!(walk->flags & DAT_WALK_ANY))
556 				continue;
557 		}
558 
559 		the_op = walk->ops->crste_ops[crste.h.tt];
560 		if (the_op) {
561 			rc = the_op(walk->last, cur, next, walk);
562 			crste = READ_ONCE(*walk->last);
563 		}
564 		if (rc)
565 			break;
566 		if (!crste.h.i && !crste.h.fc) {
567 			if (!is_pmd(crste))
568 				rc = dat_crste_walk_range(max(start, cur), min(end, next),
569 							  _dereference_crste(crste), walk);
570 			else if (walk->ops->pte_entry)
571 				rc = dat_pte_walk_range(max(start, cur), min(end, next),
572 							dereference_pmd(crste.pmd), walk);
573 		}
574 	}
575 	return rc;
576 }
577 
578 /**
579  * _dat_walk_gfn_range() - Walk DAT tables.
580  * @start: The first guest page frame to walk.
581  * @end: The guest page frame immediately after the last one to walk.
582  * @asce: The ASCE of the guest mapping.
583  * @ops: The gmap_walk_ops that will be used to perform the walk.
584  * @flags: Flags from WALK_* (currently only WALK_IGN_HOLES is supported).
585  * @priv: Will be passed as-is to the callbacks.
586  *
587  * Any callback returning non-zero causes the walk to stop immediately.
588  *
589  * Return: %-EINVAL in case of error, %-EFAULT if @start is too high for the
590  *         given ASCE unless the DAT_WALK_IGN_HOLES flag is specified,
591  *         otherwise it returns whatever the callbacks return.
592  */
_dat_walk_gfn_range(gfn_t start,gfn_t end,union asce asce,const struct dat_walk_ops * ops,int flags,void * priv)593 long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce,
594 			 const struct dat_walk_ops *ops, int flags, void *priv)
595 {
596 	struct crst_table *table = dereference_asce(asce);
597 	struct dat_walk walk = {
598 		.ops	= ops,
599 		.asce	= asce,
600 		.priv	= priv,
601 		.flags	= flags,
602 		.start	= start,
603 		.end	= end,
604 	};
605 
606 	if (WARN_ON_ONCE(unlikely(!asce.val)))
607 		return -EINVAL;
608 	if (!asce_contains_gfn(asce, start))
609 		return (flags & DAT_WALK_IGN_HOLES) ? 0 : -EFAULT;
610 
611 	return dat_crste_walk_range(start, min(end, asce_end(asce)), table, &walk);
612 }
613 
dat_get_storage_key(union asce asce,gfn_t gfn,union skey * skey)614 int dat_get_storage_key(union asce asce, gfn_t gfn, union skey *skey)
615 {
616 	union crste *crstep;
617 	union pgste pgste;
618 	union pte *ptep;
619 	int rc;
620 
621 	skey->skey = 0;
622 	rc = dat_entry_walk(NULL, gfn, asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep);
623 	if (rc)
624 		return rc;
625 
626 	if (!ptep) {
627 		union crste crste;
628 
629 		crste = READ_ONCE(*crstep);
630 		if (!crste.h.fc || !crste.s.fc1.pr)
631 			return 0;
632 		skey->skey = page_get_storage_key(large_crste_to_phys(crste, gfn));
633 		return 0;
634 	}
635 	pgste = pgste_get_lock(ptep);
636 	if (ptep->h.i) {
637 		skey->acc = pgste.acc;
638 		skey->fp = pgste.fp;
639 	} else {
640 		skey->skey = page_get_storage_key(pte_origin(*ptep));
641 	}
642 	skey->r |= pgste.gr;
643 	skey->c |= pgste.gc;
644 	pgste_set_unlock(ptep, pgste);
645 	return 0;
646 }
647 
dat_update_ptep_sd(union pgste old,union pgste pgste,union pte * ptep)648 static void dat_update_ptep_sd(union pgste old, union pgste pgste, union pte *ptep)
649 {
650 	if (pgste.acc != old.acc || pgste.fp != old.fp || pgste.gr != old.gr || pgste.gc != old.gc)
651 		__atomic64_or(_PAGE_SD, &ptep->val);
652 }
653 
dat_set_storage_key(struct kvm_s390_mmu_cache * mc,union asce asce,gfn_t gfn,union skey skey,bool nq)654 int dat_set_storage_key(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
655 			union skey skey, bool nq)
656 {
657 	union pgste pgste, old;
658 	union crste *crstep;
659 	union pte *ptep;
660 	int rc;
661 
662 	rc = dat_entry_walk(mc, gfn, asce, DAT_WALK_LEAF_ALLOC, TABLE_TYPE_PAGE_TABLE,
663 			    &crstep, &ptep);
664 	if (rc)
665 		return rc;
666 
667 	if (!ptep) {
668 		page_set_storage_key(large_crste_to_phys(*crstep, gfn), skey.skey, !nq);
669 		return 0;
670 	}
671 
672 	old = pgste_get_lock(ptep);
673 	pgste = old;
674 
675 	pgste.acc = skey.acc;
676 	pgste.fp = skey.fp;
677 	pgste.gc = skey.c;
678 	pgste.gr = skey.r;
679 
680 	if (!ptep->h.i) {
681 		union skey old_skey;
682 
683 		old_skey.skey = page_get_storage_key(pte_origin(*ptep));
684 		pgste.hc |= old_skey.c;
685 		pgste.hr |= old_skey.r;
686 		old_skey.c = old.gc;
687 		old_skey.r = old.gr;
688 		skey.r = 0;
689 		skey.c = 0;
690 		page_set_storage_key(pte_origin(*ptep), skey.skey, !nq);
691 	}
692 
693 	dat_update_ptep_sd(old, pgste, ptep);
694 	pgste_set_unlock(ptep, pgste);
695 	return 0;
696 }
697 
page_cond_set_storage_key(phys_addr_t paddr,union skey skey,union skey * oldkey,bool nq,bool mr,bool mc)698 static bool page_cond_set_storage_key(phys_addr_t paddr, union skey skey, union skey *oldkey,
699 				      bool nq, bool mr, bool mc)
700 {
701 	oldkey->skey = page_get_storage_key(paddr);
702 	if (oldkey->acc == skey.acc && oldkey->fp == skey.fp &&
703 	    (oldkey->r == skey.r || mr) && (oldkey->c == skey.c || mc))
704 		return false;
705 	page_set_storage_key(paddr, skey.skey, !nq);
706 	return true;
707 }
708 
dat_cond_set_storage_key(struct kvm_s390_mmu_cache * mmc,union asce asce,gfn_t gfn,union skey skey,union skey * oldkey,bool nq,bool mr,bool mc)709 int dat_cond_set_storage_key(struct kvm_s390_mmu_cache *mmc, union asce asce, gfn_t gfn,
710 			     union skey skey, union skey *oldkey, bool nq, bool mr, bool mc)
711 {
712 	union pgste pgste, old;
713 	union crste *crstep;
714 	union skey prev;
715 	union pte *ptep;
716 	int rc;
717 
718 	rc = dat_entry_walk(mmc, gfn, asce, DAT_WALK_LEAF_ALLOC, TABLE_TYPE_PAGE_TABLE,
719 			    &crstep, &ptep);
720 	if (rc)
721 		return rc;
722 
723 	if (!ptep)
724 		return page_cond_set_storage_key(large_crste_to_phys(*crstep, gfn), skey, oldkey,
725 						 nq, mr, mc);
726 
727 	old = pgste_get_lock(ptep);
728 	pgste = old;
729 
730 	rc = 1;
731 	pgste.acc = skey.acc;
732 	pgste.fp = skey.fp;
733 	pgste.gc = skey.c;
734 	pgste.gr = skey.r;
735 
736 	if (!ptep->h.i) {
737 		rc = page_cond_set_storage_key(pte_origin(*ptep), skey, &prev, nq, mr, mc);
738 		pgste.hc |= prev.c;
739 		pgste.hr |= prev.r;
740 		prev.c |= old.gc;
741 		prev.r |= old.gr;
742 	} else {
743 		prev.acc = old.acc;
744 		prev.fp = old.fp;
745 		prev.c = old.gc;
746 		prev.r = old.gr;
747 	}
748 	if (oldkey)
749 		*oldkey = prev;
750 
751 	dat_update_ptep_sd(old, pgste, ptep);
752 	pgste_set_unlock(ptep, pgste);
753 	return rc;
754 }
755 
dat_reset_reference_bit(union asce asce,gfn_t gfn)756 int dat_reset_reference_bit(union asce asce, gfn_t gfn)
757 {
758 	union pgste pgste, old;
759 	union crste *crstep;
760 	union pte *ptep;
761 	int rc;
762 
763 	rc = dat_entry_walk(NULL, gfn, asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep);
764 	if (rc)
765 		return rc;
766 
767 	if (!ptep) {
768 		union crste crste = READ_ONCE(*crstep);
769 
770 		if (!crste.h.fc || !crste.s.fc1.pr)
771 			return 0;
772 		return page_reset_referenced(large_crste_to_phys(*crstep, gfn));
773 	}
774 	old = pgste_get_lock(ptep);
775 	pgste = old;
776 
777 	if (!ptep->h.i) {
778 		rc = page_reset_referenced(pte_origin(*ptep));
779 		pgste.hr = rc >> 1;
780 	}
781 	rc |= (pgste.gr << 1) | pgste.gc;
782 	pgste.gr = 0;
783 
784 	dat_update_ptep_sd(old, pgste, ptep);
785 	pgste_set_unlock(ptep, pgste);
786 	return rc;
787 }
788 
dat_reset_skeys_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)789 static long dat_reset_skeys_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
790 {
791 	union pgste pgste;
792 
793 	pgste = pgste_get_lock(ptep);
794 	pgste.acc = 0;
795 	pgste.fp = 0;
796 	pgste.gr = 0;
797 	pgste.gc = 0;
798 	if (ptep->s.pr)
799 		page_set_storage_key(pte_origin(*ptep), PAGE_DEFAULT_KEY, 1);
800 	pgste_set_unlock(ptep, pgste);
801 
802 	if (need_resched())
803 		return next;
804 	return 0;
805 }
806 
dat_reset_skeys_crste(union crste * crstep,gfn_t gfn,gfn_t next,struct dat_walk * walk)807 static long dat_reset_skeys_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
808 {
809 	phys_addr_t addr, end, origin = crste_origin_large(*crstep);
810 
811 	if (!crstep->h.fc || !crstep->s.fc1.pr)
812 		return 0;
813 
814 	addr = ((max(gfn, walk->start) - gfn) << PAGE_SHIFT) + origin;
815 	end = ((min(next, walk->end) - gfn) << PAGE_SHIFT) + origin;
816 	while (ALIGN(addr + 1, _SEGMENT_SIZE) <= end)
817 		addr = sske_frame(addr, PAGE_DEFAULT_KEY);
818 	for ( ; addr < end; addr += PAGE_SIZE)
819 		page_set_storage_key(addr, PAGE_DEFAULT_KEY, 1);
820 
821 	if (need_resched())
822 		return next;
823 	return 0;
824 }
825 
dat_reset_skeys(union asce asce,gfn_t start)826 long dat_reset_skeys(union asce asce, gfn_t start)
827 {
828 	const struct dat_walk_ops ops = {
829 		.pte_entry = dat_reset_skeys_pte,
830 		.pmd_entry = dat_reset_skeys_crste,
831 		.pud_entry = dat_reset_skeys_crste,
832 	};
833 
834 	return _dat_walk_gfn_range(start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, NULL);
835 }
836 
837 struct slot_priv {
838 	unsigned long token;
839 	struct kvm_s390_mmu_cache *mc;
840 };
841 
_dat_slot_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)842 static long _dat_slot_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
843 {
844 	struct slot_priv *p = walk->priv;
845 	union crste dummy = { .val = p->token };
846 	union pte new_pte, pte = READ_ONCE(*ptep);
847 
848 	new_pte = _PTE_TOK(dummy.tok.type, dummy.tok.par);
849 
850 	/* Table entry already in the desired state. */
851 	if (pte.val == new_pte.val)
852 		return 0;
853 
854 	dat_ptep_xchg(ptep, new_pte, gfn, walk->asce, false);
855 	return 0;
856 }
857 
_dat_slot_crste(union crste * crstep,gfn_t gfn,gfn_t next,struct dat_walk * walk)858 static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
859 {
860 	union crste new_crste, crste = READ_ONCE(*crstep);
861 	struct slot_priv *p = walk->priv;
862 
863 	new_crste.val = p->token;
864 	new_crste.h.tt = crste.h.tt;
865 
866 	/* Table entry already in the desired state. */
867 	if (crste.val == new_crste.val)
868 		return 0;
869 
870 	/* This table entry needs to be updated. */
871 	if (walk->start <= gfn && walk->end >= next) {
872 		if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
873 			return -EINVAL;
874 		/* A lower level table was present, needs to be freed. */
875 		if (!crste.h.fc && !crste.h.i) {
876 			if (is_pmd(crste))
877 				dat_free_pt(dereference_pmd(crste.pmd));
878 			else
879 				dat_free_level(dereference_crste(crste), true);
880 		}
881 		return 0;
882 	}
883 
884 	/* A lower level table is present, things will handled there. */
885 	if (!crste.h.fc && !crste.h.i)
886 		return 0;
887 	/* Split (install a lower level table), and handle things there. */
888 	return dat_split_crste(p->mc, crstep, gfn, walk->asce, false);
889 }
890 
891 static const struct dat_walk_ops dat_slot_ops = {
892 	.pte_entry = _dat_slot_pte,
893 	.crste_ops = { _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, },
894 };
895 
dat_set_slot(struct kvm_s390_mmu_cache * mc,union asce asce,gfn_t start,gfn_t end,u16 type,u16 param)896 int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end,
897 		 u16 type, u16 param)
898 {
899 	struct slot_priv priv = {
900 		.token = _CRSTE_TOK(0, type, param).val,
901 		.mc = mc,
902 	};
903 
904 	return _dat_walk_gfn_range(start, end, asce, &dat_slot_ops,
905 				   DAT_WALK_IGN_HOLES | DAT_WALK_ANY, &priv);
906 }
907 
pgste_set_unlock_multiple(union pte * first,int n,union pgste * pgstes)908 static void pgste_set_unlock_multiple(union pte *first, int n, union pgste *pgstes)
909 {
910 	int i;
911 
912 	for (i = 0; i < n; i++) {
913 		if (!pgstes[i].pcl)
914 			break;
915 		pgste_set_unlock(first + i, pgstes[i]);
916 	}
917 }
918 
pgste_get_trylock_multiple(union pte * first,int n,union pgste * pgstes)919 static bool pgste_get_trylock_multiple(union pte *first, int n, union pgste *pgstes)
920 {
921 	int i;
922 
923 	for (i = 0; i < n; i++) {
924 		if (!pgste_get_trylock(first + i, pgstes + i))
925 			break;
926 	}
927 	if (i == n)
928 		return true;
929 	pgste_set_unlock_multiple(first, n, pgstes);
930 	return false;
931 }
932 
dat_get_ptval(struct page_table * table,struct ptval_param param)933 unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param)
934 {
935 	union pgste pgstes[4] = {};
936 	unsigned long res = 0;
937 	int i, n;
938 
939 	n = param.len + 1;
940 
941 	while (!pgste_get_trylock_multiple(table->ptes + param.offset, n, pgstes))
942 		cpu_relax();
943 
944 	for (i = 0; i < n; i++)
945 		res = res << 16 | pgstes[i].val16;
946 
947 	pgste_set_unlock_multiple(table->ptes + param.offset, n, pgstes);
948 	return res;
949 }
950 
dat_set_ptval(struct page_table * table,struct ptval_param param,unsigned long val)951 void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val)
952 {
953 	union pgste pgstes[4] = {};
954 	int i, n;
955 
956 	n = param.len + 1;
957 
958 	while (!pgste_get_trylock_multiple(table->ptes + param.offset, n, pgstes))
959 		cpu_relax();
960 
961 	for (i = param.len; i >= 0; i--) {
962 		pgstes[i].val16 = val;
963 		val = val >> 16;
964 	}
965 
966 	pgste_set_unlock_multiple(table->ptes + param.offset, n, pgstes);
967 }
968 
_dat_test_young_pte(union pte * ptep,gfn_t start,gfn_t end,struct dat_walk * walk)969 static long _dat_test_young_pte(union pte *ptep, gfn_t start, gfn_t end, struct dat_walk *walk)
970 {
971 	return ptep->s.y;
972 }
973 
_dat_test_young_crste(union crste * crstep,gfn_t start,gfn_t end,struct dat_walk * walk)974 static long _dat_test_young_crste(union crste *crstep, gfn_t start, gfn_t end,
975 				  struct dat_walk *walk)
976 {
977 	return crstep->h.fc && crstep->s.fc1.y;
978 }
979 
980 static const struct dat_walk_ops test_age_ops = {
981 	.pte_entry = _dat_test_young_pte,
982 	.pmd_entry = _dat_test_young_crste,
983 	.pud_entry = _dat_test_young_crste,
984 };
985 
986 /**
987  * dat_test_age_gfn() - Test young.
988  * @asce: The ASCE whose address range is to be tested.
989  * @start: The first guest frame of the range to check.
990  * @end: The guest frame after the last in the range.
991  *
992  * Context: called by KVM common code with the kvm mmu write lock held.
993  *
994  * Return: %true if any page in the given range is young, otherwise %false.
995  */
dat_test_age_gfn(union asce asce,gfn_t start,gfn_t end)996 bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
997 {
998 	return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
999 }
1000 
dat_set_pn_crste(union crste * crstep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1001 static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1002 {
1003 	union crste newcrste, oldcrste;
1004 	int *n = walk->priv;
1005 
1006 	do {
1007 		oldcrste = READ_ONCE(*crstep);
1008 		if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
1009 			return 0;
1010 		if (oldcrste.s.fc1.prefix_notif)
1011 			break;
1012 		newcrste = oldcrste;
1013 		newcrste.s.fc1.prefix_notif = 1;
1014 	} while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
1015 	*n = 2;
1016 	return 0;
1017 }
1018 
dat_set_pn_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1019 static long dat_set_pn_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1020 {
1021 	int *n = walk->priv;
1022 	union pgste pgste;
1023 
1024 	pgste = pgste_get_lock(ptep);
1025 	if (!ptep->h.i && !ptep->h.p) {
1026 		pgste.prefix_notif = 1;
1027 		*n += 1;
1028 	}
1029 	pgste_set_unlock(ptep, pgste);
1030 	return 0;
1031 }
1032 
dat_set_prefix_notif_bit(union asce asce,gfn_t gfn)1033 int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn)
1034 {
1035 	static const struct dat_walk_ops ops = {
1036 		.pte_entry = dat_set_pn_pte,
1037 		.pmd_entry = dat_set_pn_crste,
1038 		.pud_entry = dat_set_pn_crste,
1039 	};
1040 
1041 	int n = 0;
1042 
1043 	_dat_walk_gfn_range(gfn, gfn + 2, asce, &ops, DAT_WALK_IGN_HOLES, &n);
1044 	if (n != 2)
1045 		return -EAGAIN;
1046 	return 0;
1047 }
1048 
1049 /**
1050  * dat_perform_essa() - Perform ESSA actions on the PGSTE.
1051  * @asce: The asce to operate on.
1052  * @gfn: The guest page frame to operate on.
1053  * @orc: The specific action to perform, see the ESSA_SET_* macros.
1054  * @state: The storage attributes to be returned to the guest.
1055  * @dirty: Returns whether the function dirtied a previously clean entry.
1056  *
1057  * Context: Called with kvm->mmu_lock held.
1058  *
1059  * Return:
1060  * * %1 if the page state has been altered and the page is to be added to the CBRL
1061  * * %0 if the page state has been altered, but the page is not to be added to the CBRL
1062  * * %-1 if the page state has not been altered and the page is not to be added to the CBRL
1063  */
dat_perform_essa(union asce asce,gfn_t gfn,int orc,union essa_state * state,bool * dirty)1064 int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty)
1065 {
1066 	union crste *crstep;
1067 	union pgste pgste;
1068 	union pte *ptep;
1069 	int res = 0;
1070 
1071 	if (dat_entry_walk(NULL, gfn, asce, 0, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep)) {
1072 		*state = (union essa_state) { .exception = 1 };
1073 		return -1;
1074 	}
1075 
1076 	pgste = pgste_get_lock(ptep);
1077 
1078 	*state = (union essa_state) {
1079 		.content = (ptep->h.i << 1) + (ptep->h.i && pgste.zero),
1080 		.nodat = pgste.nodat,
1081 		.usage = pgste.usage,
1082 		};
1083 
1084 	switch (orc) {
1085 	case ESSA_GET_STATE:
1086 		res = -1;
1087 		break;
1088 	case ESSA_SET_STABLE:
1089 		pgste.usage = PGSTE_GPS_USAGE_STABLE;
1090 		pgste.nodat = 0;
1091 		break;
1092 	case ESSA_SET_UNUSED:
1093 		pgste.usage = PGSTE_GPS_USAGE_UNUSED;
1094 		if (ptep->h.i)
1095 			res = 1;
1096 		break;
1097 	case ESSA_SET_VOLATILE:
1098 		pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1099 		if (ptep->h.i)
1100 			res = 1;
1101 		break;
1102 	case ESSA_SET_POT_VOLATILE:
1103 		if (!ptep->h.i) {
1104 			pgste.usage = PGSTE_GPS_USAGE_POT_VOLATILE;
1105 		} else if (pgste.zero) {
1106 			pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1107 		} else if (!pgste.gc) {
1108 			pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1109 			res = 1;
1110 		}
1111 		break;
1112 	case ESSA_SET_STABLE_RESIDENT:
1113 		pgste.usage = PGSTE_GPS_USAGE_STABLE;
1114 		/*
1115 		 * Since the resident state can go away any time after this
1116 		 * call, we will not make this page resident. We can revisit
1117 		 * this decision if a guest will ever start using this.
1118 		 */
1119 		break;
1120 	case ESSA_SET_STABLE_IF_RESIDENT:
1121 		if (!ptep->h.i)
1122 			pgste.usage = PGSTE_GPS_USAGE_STABLE;
1123 		break;
1124 	case ESSA_SET_STABLE_NODAT:
1125 		pgste.usage = PGSTE_GPS_USAGE_STABLE;
1126 		pgste.nodat = 1;
1127 		break;
1128 	default:
1129 		WARN_ONCE(1, "Invalid ORC!");
1130 		res = -1;
1131 		break;
1132 	}
1133 	/* If we are discarding a page, set it to logical zero. */
1134 	pgste.zero = res == 1;
1135 	if (orc > 0) {
1136 		*dirty = !pgste.cmma_d;
1137 		pgste.cmma_d = 1;
1138 	}
1139 
1140 	pgste_set_unlock(ptep, pgste);
1141 
1142 	return res;
1143 }
1144 
dat_reset_cmma_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1145 static long dat_reset_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1146 {
1147 	union pgste pgste;
1148 
1149 	pgste = pgste_get_lock(ptep);
1150 	pgste.usage = 0;
1151 	pgste.nodat = 0;
1152 	pgste.cmma_d = 0;
1153 	pgste_set_unlock(ptep, pgste);
1154 	if (need_resched())
1155 		return next;
1156 	return 0;
1157 }
1158 
dat_reset_cmma(union asce asce,gfn_t start)1159 long dat_reset_cmma(union asce asce, gfn_t start)
1160 {
1161 	const struct dat_walk_ops dat_reset_cmma_ops = {
1162 		.pte_entry = dat_reset_cmma_pte,
1163 	};
1164 
1165 	return _dat_walk_gfn_range(start, asce_end(asce), asce, &dat_reset_cmma_ops,
1166 				   DAT_WALK_IGN_HOLES, NULL);
1167 }
1168 
1169 struct dat_get_cmma_state {
1170 	gfn_t start;
1171 	gfn_t end;
1172 	unsigned int count;
1173 	u8 *values;
1174 	atomic64_t *remaining;
1175 };
1176 
__dat_peek_cmma_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1177 static long __dat_peek_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1178 {
1179 	struct dat_get_cmma_state *state = walk->priv;
1180 	union pgste pgste;
1181 
1182 	pgste = pgste_get_lock(ptep);
1183 	state->values[gfn - walk->start] = pgste.usage | (pgste.nodat << 6);
1184 	pgste_set_unlock(ptep, pgste);
1185 	state->end = next;
1186 
1187 	return 0;
1188 }
1189 
__dat_peek_cmma_crste(union crste * crstep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1190 static long __dat_peek_cmma_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1191 {
1192 	struct dat_get_cmma_state *state = walk->priv;
1193 
1194 	if (crstep->h.i)
1195 		state->end = min(walk->end, next);
1196 	return 0;
1197 }
1198 
dat_peek_cmma(gfn_t start,union asce asce,unsigned int * count,u8 * values)1199 int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values)
1200 {
1201 	const struct dat_walk_ops ops = {
1202 		.pte_entry = __dat_peek_cmma_pte,
1203 		.pmd_entry = __dat_peek_cmma_crste,
1204 		.pud_entry = __dat_peek_cmma_crste,
1205 		.p4d_entry = __dat_peek_cmma_crste,
1206 		.pgd_entry = __dat_peek_cmma_crste,
1207 	};
1208 	struct dat_get_cmma_state state = { .values = values, };
1209 	int rc;
1210 
1211 	rc = _dat_walk_gfn_range(start, start + *count, asce, &ops, DAT_WALK_DEFAULT, &state);
1212 	*count = state.end - start;
1213 	/* Return success if at least one value was saved, otherwise an error. */
1214 	return (rc == -EFAULT && *count > 0) ? 0 : rc;
1215 }
1216 
__dat_get_cmma_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1217 static long __dat_get_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1218 {
1219 	struct dat_get_cmma_state *state = walk->priv;
1220 	union pgste pgste;
1221 
1222 	if (state->start != -1) {
1223 		if ((gfn - state->end) > KVM_S390_MAX_BIT_DISTANCE)
1224 			return 1;
1225 		if (gfn - state->start >= state->count)
1226 			return 1;
1227 	}
1228 
1229 	if (!READ_ONCE(*pgste_of(ptep)).cmma_d)
1230 		return 0;
1231 
1232 	pgste = pgste_get_lock(ptep);
1233 	if (pgste.cmma_d) {
1234 		if (state->start == -1)
1235 			state->start = gfn;
1236 		pgste.cmma_d = 0;
1237 		atomic64_dec(state->remaining);
1238 		state->values[gfn - state->start] = pgste.usage | pgste.nodat << 6;
1239 		state->end = next;
1240 	}
1241 	pgste_set_unlock(ptep, pgste);
1242 	return 0;
1243 }
1244 
dat_get_cmma(union asce asce,gfn_t * start,unsigned int * count,u8 * values,atomic64_t * rem)1245 int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem)
1246 {
1247 	const struct dat_walk_ops ops = { .pte_entry = __dat_get_cmma_pte, };
1248 	struct dat_get_cmma_state state = {
1249 		.remaining = rem,
1250 		.values = values,
1251 		.count = *count,
1252 		.start = -1,
1253 	};
1254 
1255 	_dat_walk_gfn_range(*start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, &state);
1256 
1257 	if (state.start == -1) {
1258 		*count = 0;
1259 	} else {
1260 		*count = state.end - state.start;
1261 		*start = state.start;
1262 	}
1263 
1264 	return 0;
1265 }
1266 
1267 struct dat_set_cmma_state {
1268 	unsigned long mask;
1269 	const u8 *bits;
1270 };
1271 
__dat_set_cmma_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1272 static long __dat_set_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1273 {
1274 	struct dat_set_cmma_state *state = walk->priv;
1275 	union pgste pgste, tmp;
1276 
1277 	tmp.val = (state->bits[gfn - walk->start] << 24) & state->mask;
1278 
1279 	pgste = pgste_get_lock(ptep);
1280 	pgste.usage = tmp.usage;
1281 	pgste.nodat = tmp.nodat;
1282 	pgste_set_unlock(ptep, pgste);
1283 
1284 	return 0;
1285 }
1286 
1287 /**
1288  * dat_set_cmma_bits() - Set CMMA bits for a range of guest pages.
1289  * @mc: Cache used for allocations.
1290  * @asce: The ASCE of the guest.
1291  * @gfn: The guest frame of the fist page whose CMMA bits are to set.
1292  * @count: How many pages need to be processed.
1293  * @mask: Which PGSTE bits should be set.
1294  * @bits: Points to an array with the CMMA attributes.
1295  *
1296  * This function sets the CMMA attributes for the given pages. If the input
1297  * buffer has zero length, no action is taken, otherwise the attributes are
1298  * set and the mm->context.uses_cmm flag is set.
1299  *
1300  * Each byte in @bits contains new values for bits 32-39 of the PGSTE.
1301  * Currently, only the fields NT and US are applied.
1302  *
1303  * Return: %0 in case of success, a negative error value otherwise.
1304  */
dat_set_cmma_bits(struct kvm_s390_mmu_cache * mc,union asce asce,gfn_t gfn,unsigned long count,unsigned long mask,const uint8_t * bits)1305 int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
1306 		      unsigned long count, unsigned long mask, const uint8_t *bits)
1307 {
1308 	const struct dat_walk_ops ops = { .pte_entry = __dat_set_cmma_pte, };
1309 	struct dat_set_cmma_state state = { .mask = mask, .bits = bits, };
1310 	union crste *crstep;
1311 	union pte *ptep;
1312 	gfn_t cur;
1313 	int rc;
1314 
1315 	for (cur = ALIGN_DOWN(gfn, _PAGE_ENTRIES); cur < gfn + count; cur += _PAGE_ENTRIES) {
1316 		rc = dat_entry_walk(mc, cur, asce, DAT_WALK_ALLOC, TABLE_TYPE_PAGE_TABLE,
1317 				    &crstep, &ptep);
1318 		if (rc)
1319 			return rc;
1320 	}
1321 	return _dat_walk_gfn_range(gfn, gfn + count, asce, &ops, DAT_WALK_IGN_HOLES, &state);
1322 }
1323