1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * KVM guest address space mapping code
4 *
5 * Copyright IBM Corp. 2007, 2020, 2024
6 * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
7 * Martin Schwidefsky <schwidefsky@de.ibm.com>
8 * David Hildenbrand <david@redhat.com>
9 * Janosch Frank <frankja@linux.ibm.com>
10 */
11
12 #include <linux/kernel.h>
13 #include <linux/pagewalk.h>
14 #include <linux/swap.h>
15 #include <linux/smp.h>
16 #include <linux/spinlock.h>
17 #include <linux/slab.h>
18 #include <linux/swapops.h>
19 #include <linux/ksm.h>
20 #include <linux/mm.h>
21 #include <linux/mman.h>
22 #include <linux/pgtable.h>
23 #include <linux/kvm_types.h>
24 #include <linux/kvm_host.h>
25 #include <linux/pgalloc.h>
26
27 #include <asm/page-states.h>
28 #include <asm/tlb.h>
29 #include "dat.h"
30
kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache * mc)31 int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc)
32 {
33 void *o;
34
35 for ( ; mc->n_crsts < KVM_S390_MMU_CACHE_N_CRSTS; mc->n_crsts++) {
36 o = (void *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_COMP, CRST_ALLOC_ORDER);
37 if (!o)
38 return -ENOMEM;
39 mc->crsts[mc->n_crsts] = o;
40 }
41 for ( ; mc->n_pts < KVM_S390_MMU_CACHE_N_PTS; mc->n_pts++) {
42 o = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
43 if (!o)
44 return -ENOMEM;
45 mc->pts[mc->n_pts] = o;
46 }
47 for ( ; mc->n_rmaps < KVM_S390_MMU_CACHE_N_RMAPS; mc->n_rmaps++) {
48 o = kzalloc_obj(*mc->rmaps[0], GFP_KERNEL_ACCOUNT);
49 if (!o)
50 return -ENOMEM;
51 mc->rmaps[mc->n_rmaps] = o;
52 }
53 return 0;
54 }
55
dat_alloc_pt_noinit(struct kvm_s390_mmu_cache * mc)56 static inline struct page_table *dat_alloc_pt_noinit(struct kvm_s390_mmu_cache *mc)
57 {
58 struct page_table *res;
59
60 res = kvm_s390_mmu_cache_alloc_pt(mc);
61 if (res)
62 __arch_set_page_dat(res, 1);
63 return res;
64 }
65
dat_alloc_crst_noinit(struct kvm_s390_mmu_cache * mc)66 static inline struct crst_table *dat_alloc_crst_noinit(struct kvm_s390_mmu_cache *mc)
67 {
68 struct crst_table *res;
69
70 res = kvm_s390_mmu_cache_alloc_crst(mc);
71 if (res)
72 __arch_set_page_dat(res, 1UL << CRST_ALLOC_ORDER);
73 return res;
74 }
75
dat_alloc_crst_sleepable(unsigned long init)76 struct crst_table *dat_alloc_crst_sleepable(unsigned long init)
77 {
78 struct page *page;
79 void *virt;
80
81 page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_COMP, CRST_ALLOC_ORDER);
82 if (!page)
83 return NULL;
84 virt = page_to_virt(page);
85 __arch_set_page_dat(virt, 1UL << CRST_ALLOC_ORDER);
86 crst_table_init(virt, init);
87 return virt;
88 }
89
dat_free_level(struct crst_table * table,bool owns_ptes)90 void dat_free_level(struct crst_table *table, bool owns_ptes)
91 {
92 unsigned int i;
93
94 for (i = 0; i < _CRST_ENTRIES; i++) {
95 if (table->crstes[i].h.fc || table->crstes[i].h.i)
96 continue;
97 if (!is_pmd(table->crstes[i]))
98 dat_free_level(dereference_crste(table->crstes[i]), owns_ptes);
99 else if (owns_ptes)
100 dat_free_pt(dereference_pmd(table->crstes[i].pmd));
101 }
102 dat_free_crst(table);
103 }
104
dat_set_asce_limit(struct kvm_s390_mmu_cache * mc,union asce * asce,int newtype)105 int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype)
106 {
107 struct crst_table *table;
108 union crste crste;
109
110 while (asce->dt > newtype) {
111 table = dereference_asce(*asce);
112 crste = table->crstes[0];
113 if (crste.h.fc)
114 return 0;
115 if (!crste.h.i) {
116 asce->rsto = crste.h.fc0.to;
117 dat_free_crst(table);
118 } else {
119 crste.h.tt--;
120 crst_table_init((void *)table, crste.val);
121 }
122 asce->dt--;
123 }
124 while (asce->dt < newtype) {
125 crste = _crste_fc0(asce->rsto, asce->dt + 1);
126 table = dat_alloc_crst_noinit(mc);
127 if (!table)
128 return -ENOMEM;
129 crst_table_init((void *)table, _CRSTE_HOLE(crste.h.tt).val);
130 table->crstes[0] = crste;
131 asce->rsto = __pa(table) >> PAGE_SHIFT;
132 asce->dt++;
133 }
134 return 0;
135 }
136
137 /**
138 * dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
139 * @crstep: Pointer to the CRST entry.
140 * @old: Expected old value.
141 * @new: Replacement entry.
142 * @gfn: The affected guest address.
143 * @asce: The asce of the address space.
144 *
145 * This function is needed to atomically exchange a CRSTE that potentially
146 * maps a prefix area, without having to invalidate it inbetween.
147 *
148 * Context: This function is assumed to be called with kvm->mmu_lock held.
149 *
150 * Return: %true if the exchange was successful.
151 */
dat_crstep_xchg_atomic(union crste * crstep,union crste old,union crste new,gfn_t gfn,union asce asce)152 bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
153 gfn_t gfn, union asce asce)
154 {
155 if (old.h.i)
156 return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
157 if (cpu_has_edat2())
158 return crdte_crste(crstep, old, new, gfn, asce);
159 return cspg_crste(crstep, old, new);
160 }
161
dat_set_storage_key_from_pgste(union pte pte,union pgste pgste)162 static void dat_set_storage_key_from_pgste(union pte pte, union pgste pgste)
163 {
164 union skey nkey = { .acc = pgste.acc, .fp = pgste.fp };
165
166 page_set_storage_key(pte_origin(pte), nkey.skey, 0);
167 }
168
dat_move_storage_key(union pte old,union pte new)169 static void dat_move_storage_key(union pte old, union pte new)
170 {
171 page_set_storage_key(pte_origin(new), page_get_storage_key(pte_origin(old)), 1);
172 }
173
dat_save_storage_key_into_pgste(union pte pte,union pgste pgste)174 static union pgste dat_save_storage_key_into_pgste(union pte pte, union pgste pgste)
175 {
176 union skey skey;
177
178 skey.skey = page_get_storage_key(pte_origin(pte));
179
180 pgste.acc = skey.acc;
181 pgste.fp = skey.fp;
182 pgste.gr |= skey.r;
183 pgste.gc |= skey.c;
184
185 return pgste;
186 }
187
__dat_ptep_xchg(union pte * ptep,union pgste pgste,union pte new,gfn_t gfn,union asce asce,bool uses_skeys)188 union pgste __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new, gfn_t gfn,
189 union asce asce, bool uses_skeys)
190 {
191 union pte old = READ_ONCE(*ptep);
192
193 /* Updating only the software bits while holding the pgste lock. */
194 if (!((ptep->val ^ new.val) & ~_PAGE_SW_BITS)) {
195 WRITE_ONCE(ptep->swbyte, new.swbyte);
196 return pgste;
197 }
198
199 if (!old.h.i) {
200 unsigned long opts = IPTE_GUEST_ASCE | (pgste.nodat ? IPTE_NODAT : 0);
201
202 if (machine_has_tlb_guest())
203 __ptep_ipte(gfn_to_gpa(gfn), (void *)ptep, opts, asce.val, IPTE_GLOBAL);
204 else
205 __ptep_ipte(gfn_to_gpa(gfn), (void *)ptep, 0, 0, IPTE_GLOBAL);
206 }
207
208 if (uses_skeys) {
209 if (old.h.i && !new.h.i)
210 /* Invalid to valid: restore storage keys from PGSTE. */
211 dat_set_storage_key_from_pgste(new, pgste);
212 else if (!old.h.i && new.h.i)
213 /* Valid to invalid: save storage keys to PGSTE. */
214 pgste = dat_save_storage_key_into_pgste(old, pgste);
215 else if (!old.h.i && !new.h.i)
216 /* Valid to valid: move storage keys. */
217 if (old.h.pfra != new.h.pfra)
218 dat_move_storage_key(old, new);
219 /* Invalid to invalid: nothing to do. */
220 }
221
222 WRITE_ONCE(*ptep, new);
223 return pgste;
224 }
225
226 /*
227 * dat_split_ste() - Split a segment table entry into page table entries.
228 *
229 * Context: This function is assumed to be called with kvm->mmu_lock held.
230 *
231 * Return: 0 in case of success, -ENOMEM if running out of memory.
232 */
dat_split_ste(struct kvm_s390_mmu_cache * mc,union pmd * pmdp,gfn_t gfn,union asce asce,bool uses_skeys)233 static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t gfn,
234 union asce asce, bool uses_skeys)
235 {
236 union pgste pgste_init;
237 struct page_table *pt;
238 union pmd new, old;
239 union pte init;
240 int i;
241
242 BUG_ON(!mc);
243 old = READ_ONCE(*pmdp);
244
245 /* Already split, nothing to do. */
246 if (!old.h.i && !old.h.fc)
247 return 0;
248
249 pt = dat_alloc_pt_noinit(mc);
250 if (!pt)
251 return -ENOMEM;
252 new.val = virt_to_phys(pt);
253
254 while (old.h.i || old.h.fc) {
255 init.val = pmd_origin_large(old);
256 init.h.p = old.h.p;
257 init.h.i = old.h.i;
258 init.s.d = old.s.fc1.d;
259 init.s.w = old.s.fc1.w;
260 init.s.y = old.s.fc1.y;
261 init.s.sd = old.s.fc1.sd;
262 init.s.pr = old.s.fc1.pr;
263 pgste_init.val = 0;
264 if (old.h.fc) {
265 for (i = 0; i < _PAGE_ENTRIES; i++)
266 pt->ptes[i].val = init.val | i * PAGE_SIZE;
267 /* No need to take locks as the page table is not installed yet. */
268 pgste_init.prefix_notif = old.s.fc1.prefix_notif;
269 pgste_init.vsie_notif = old.s.fc1.vsie_notif;
270 pgste_init.pcl = uses_skeys && init.h.i;
271 dat_init_pgstes(pt, pgste_init.val);
272 } else {
273 dat_init_page_table(pt, init.val, 0);
274 }
275
276 if (dat_pmdp_xchg_atomic(pmdp, old, new, gfn, asce)) {
277 if (!pgste_init.pcl)
278 return 0;
279 for (i = 0; i < _PAGE_ENTRIES; i++) {
280 union pgste pgste = pt->pgstes[i];
281
282 pgste = dat_save_storage_key_into_pgste(pt->ptes[i], pgste);
283 pgste_set_unlock(pt->ptes + i, pgste);
284 }
285 return 0;
286 }
287 old = READ_ONCE(*pmdp);
288 }
289
290 dat_free_pt(pt);
291 return 0;
292 }
293
294 /*
295 * dat_split_crste() - Split a crste into smaller crstes.
296 *
297 * Context: This function is assumed to be called with kvm->mmu_lock held.
298 *
299 * Return: %0 in case of success, %-ENOMEM if running out of memory.
300 */
dat_split_crste(struct kvm_s390_mmu_cache * mc,union crste * crstep,gfn_t gfn,union asce asce,bool uses_skeys)301 static int dat_split_crste(struct kvm_s390_mmu_cache *mc, union crste *crstep,
302 gfn_t gfn, union asce asce, bool uses_skeys)
303 {
304 struct crst_table *table;
305 union crste old, new, init;
306 int i;
307
308 old = READ_ONCE(*crstep);
309 if (is_pmd(old))
310 return dat_split_ste(mc, &crstep->pmd, gfn, asce, uses_skeys);
311
312 BUG_ON(!mc);
313
314 /* Already split, nothing to do. */
315 if (!old.h.i && !old.h.fc)
316 return 0;
317
318 table = dat_alloc_crst_noinit(mc);
319 if (!table)
320 return -ENOMEM;
321
322 new.val = virt_to_phys(table);
323 new.h.tt = old.h.tt;
324 new.h.fc0.tl = _REGION_ENTRY_LENGTH;
325
326 while (old.h.i || old.h.fc) {
327 init = old;
328 init.h.tt--;
329 if (old.h.fc) {
330 for (i = 0; i < _CRST_ENTRIES; i++)
331 table->crstes[i].val = init.val | i * HPAGE_SIZE;
332 } else {
333 crst_table_init((void *)table, init.val);
334 }
335 if (dat_crstep_xchg_atomic(crstep, old, new, gfn, asce))
336 return 0;
337 old = READ_ONCE(*crstep);
338 }
339
340 dat_free_crst(table);
341 return 0;
342 }
343
344 /**
345 * dat_entry_walk() - Walk the gmap page tables.
346 * @mc: Cache to use to allocate dat tables, if needed; can be NULL if neither
347 * %DAT_WALK_SPLIT or %DAT_WALK_ALLOC is specified in @flags.
348 * @gfn: Guest frame.
349 * @asce: The ASCE of the address space.
350 * @flags: Flags from WALK_* macros.
351 * @walk_level: Level to walk to, from LEVEL_* macros.
352 * @last: Will be filled the last visited non-pte DAT entry.
353 * @ptepp: Will be filled the last visited pte entry, if any, otherwise NULL.
354 *
355 * Returns a table entry pointer for the given guest address and @walk_level.
356 *
357 * The @flags have the following meanings:
358 * * %DAT_WALK_IGN_HOLES: consider holes as normal table entries
359 * * %DAT_WALK_ALLOC: allocate new tables to reach the requested level, if needed
360 * * %DAT_WALK_SPLIT: split existing large pages to reach the requested level, if needed
361 * * %DAT_WALK_LEAF: return successfully whenever a large page is encountered
362 * * %DAT_WALK_ANY: return successfully even if the requested level could not be reached
363 * * %DAT_WALK_CONTINUE: walk to the requested level with the specified flags, and then try to
364 * continue walking to ptes with only DAT_WALK_ANY
365 * * %DAT_WALK_USES_SKEYS: storage keys are in use
366 *
367 * Context: called with kvm->mmu_lock held.
368 *
369 * Return:
370 * * %PGM_ADDRESSING if the requested address lies outside memory
371 * * a PIC number if the requested address lies in a memory hole of type _DAT_TOKEN_PIC
372 * * %-EFAULT if the requested address lies inside a memory hole of a different type
373 * * %-EINVAL if the given ASCE is not compatible with the requested level
374 * * %-EFBIG if the requested level could not be reached because a larger frame was found
375 * * %-ENOENT if the requested level could not be reached for other reasons
376 * * %-ENOMEM if running out of memory while allocating or splitting a table
377 */
dat_entry_walk(struct kvm_s390_mmu_cache * mc,gfn_t gfn,union asce asce,int flags,int walk_level,union crste ** last,union pte ** ptepp)378 int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags,
379 int walk_level, union crste **last, union pte **ptepp)
380 {
381 union vaddress vaddr = { .addr = gfn_to_gpa(gfn) };
382 bool continue_anyway = flags & DAT_WALK_CONTINUE;
383 bool uses_skeys = flags & DAT_WALK_USES_SKEYS;
384 bool ign_holes = flags & DAT_WALK_IGN_HOLES;
385 bool allocate = flags & DAT_WALK_ALLOC;
386 bool split = flags & DAT_WALK_SPLIT;
387 bool leaf = flags & DAT_WALK_LEAF;
388 bool any = flags & DAT_WALK_ANY;
389 struct page_table *pgtable;
390 struct crst_table *table;
391 union crste entry;
392 int rc;
393
394 *last = NULL;
395 *ptepp = NULL;
396 if (WARN_ON_ONCE(unlikely(!asce.val)))
397 return -EINVAL;
398 if (WARN_ON_ONCE(unlikely(walk_level > asce.dt)))
399 return -EINVAL;
400 if (!asce_contains_gfn(asce, gfn))
401 return PGM_ADDRESSING;
402
403 table = dereference_asce(asce);
404 if (asce.dt >= ASCE_TYPE_REGION1) {
405 *last = table->crstes + vaddr.rfx;
406 entry = READ_ONCE(**last);
407 if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION1))
408 return -EINVAL;
409 if (crste_hole(entry) && !ign_holes)
410 return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
411 if (walk_level == TABLE_TYPE_REGION1)
412 return 0;
413 if (entry.pgd.h.i) {
414 if (!allocate)
415 return any ? 0 : -ENOENT;
416 rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
417 if (rc)
418 return rc;
419 entry = READ_ONCE(**last);
420 }
421 table = dereference_crste(entry.pgd);
422 }
423
424 if (asce.dt >= ASCE_TYPE_REGION2) {
425 *last = table->crstes + vaddr.rsx;
426 entry = READ_ONCE(**last);
427 if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION2))
428 return -EINVAL;
429 if (crste_hole(entry) && !ign_holes)
430 return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
431 if (walk_level == TABLE_TYPE_REGION2)
432 return 0;
433 if (entry.p4d.h.i) {
434 if (!allocate)
435 return any ? 0 : -ENOENT;
436 rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
437 if (rc)
438 return rc;
439 entry = READ_ONCE(**last);
440 }
441 table = dereference_crste(entry.p4d);
442 }
443
444 if (asce.dt >= ASCE_TYPE_REGION3) {
445 *last = table->crstes + vaddr.rtx;
446 entry = READ_ONCE(**last);
447 if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION3))
448 return -EINVAL;
449 if (crste_hole(entry) && !ign_holes)
450 return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
451 if (walk_level == TABLE_TYPE_REGION3 &&
452 continue_anyway && !entry.pud.h.fc && !entry.h.i) {
453 walk_level = TABLE_TYPE_PAGE_TABLE;
454 allocate = false;
455 }
456 if (walk_level == TABLE_TYPE_REGION3 || ((leaf || any) && entry.pud.h.fc))
457 return 0;
458 if (entry.pud.h.i && !entry.pud.h.fc) {
459 if (!allocate)
460 return any ? 0 : -ENOENT;
461 rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
462 if (rc)
463 return rc;
464 entry = READ_ONCE(**last);
465 }
466 if (walk_level <= TABLE_TYPE_SEGMENT && entry.pud.h.fc) {
467 if (!split)
468 return -EFBIG;
469 rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
470 if (rc)
471 return rc;
472 entry = READ_ONCE(**last);
473 }
474 table = dereference_crste(entry.pud);
475 }
476
477 *last = table->crstes + vaddr.sx;
478 entry = READ_ONCE(**last);
479 if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_SEGMENT))
480 return -EINVAL;
481 if (crste_hole(entry) && !ign_holes)
482 return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
483 if (continue_anyway && !entry.pmd.h.fc && !entry.h.i) {
484 walk_level = TABLE_TYPE_PAGE_TABLE;
485 allocate = false;
486 }
487 if (walk_level == TABLE_TYPE_SEGMENT || ((leaf || any) && entry.pmd.h.fc))
488 return 0;
489
490 if (entry.pmd.h.i && !entry.pmd.h.fc) {
491 if (!allocate)
492 return any ? 0 : -ENOENT;
493 rc = dat_split_ste(mc, &(*last)->pmd, gfn, asce, uses_skeys);
494 if (rc)
495 return rc;
496 entry = READ_ONCE(**last);
497 }
498 if (walk_level <= TABLE_TYPE_PAGE_TABLE && entry.pmd.h.fc) {
499 if (!split)
500 return -EFBIG;
501 rc = dat_split_ste(mc, &(*last)->pmd, gfn, asce, uses_skeys);
502 if (rc)
503 return rc;
504 entry = READ_ONCE(**last);
505 }
506 pgtable = dereference_pmd(entry.pmd);
507 *ptepp = pgtable->ptes + vaddr.px;
508 if (pte_hole(**ptepp) && !ign_holes)
509 return (*ptepp)->tok.type == _DAT_TOKEN_PIC ? (*ptepp)->tok.par : -EFAULT;
510 return 0;
511 }
512
dat_pte_walk_range(gfn_t gfn,gfn_t end,struct page_table * table,struct dat_walk * w)513 static long dat_pte_walk_range(gfn_t gfn, gfn_t end, struct page_table *table, struct dat_walk *w)
514 {
515 unsigned int idx = gfn & (_PAGE_ENTRIES - 1);
516 long rc = 0;
517
518 for ( ; gfn < end; idx++, gfn++) {
519 if (pte_hole(READ_ONCE(table->ptes[idx]))) {
520 if (!(w->flags & DAT_WALK_IGN_HOLES))
521 return -EFAULT;
522 if (!(w->flags & DAT_WALK_ANY))
523 continue;
524 }
525
526 rc = w->ops->pte_entry(table->ptes + idx, gfn, gfn + 1, w);
527 if (rc)
528 break;
529 }
530 return rc;
531 }
532
dat_crste_walk_range(gfn_t start,gfn_t end,struct crst_table * table,struct dat_walk * walk)533 static long dat_crste_walk_range(gfn_t start, gfn_t end, struct crst_table *table,
534 struct dat_walk *walk)
535 {
536 unsigned long idx, cur_shift, cur_size;
537 dat_walk_op the_op;
538 union crste crste;
539 gfn_t cur, next;
540 long rc = 0;
541
542 cur_shift = 8 + table->crstes[0].h.tt * 11;
543 idx = (start >> cur_shift) & (_CRST_ENTRIES - 1);
544 cur_size = 1UL << cur_shift;
545
546 for (cur = ALIGN_DOWN(start, cur_size); cur < end; idx++, cur = next) {
547 next = cur + cur_size;
548 walk->last = table->crstes + idx;
549 crste = READ_ONCE(*walk->last);
550
551 if (crste_hole(crste)) {
552 if (!(walk->flags & DAT_WALK_IGN_HOLES))
553 return -EFAULT;
554 if (!(walk->flags & DAT_WALK_ANY))
555 continue;
556 }
557
558 the_op = walk->ops->crste_ops[crste.h.tt];
559 if (the_op) {
560 rc = the_op(walk->last, cur, next, walk);
561 crste = READ_ONCE(*walk->last);
562 }
563 if (rc)
564 break;
565 if (!crste.h.i && !crste.h.fc) {
566 if (!is_pmd(crste))
567 rc = dat_crste_walk_range(max(start, cur), min(end, next),
568 _dereference_crste(crste), walk);
569 else if (walk->ops->pte_entry)
570 rc = dat_pte_walk_range(max(start, cur), min(end, next),
571 dereference_pmd(crste.pmd), walk);
572 }
573 }
574 return rc;
575 }
576
577 /**
578 * _dat_walk_gfn_range() - Walk DAT tables.
579 * @start: The first guest page frame to walk.
580 * @end: The guest page frame immediately after the last one to walk.
581 * @asce: The ASCE of the guest mapping.
582 * @ops: The gmap_walk_ops that will be used to perform the walk.
583 * @flags: Flags from WALK_* (currently only WALK_IGN_HOLES is supported).
584 * @priv: Will be passed as-is to the callbacks.
585 *
586 * Any callback returning non-zero causes the walk to stop immediately.
587 *
588 * Return: %-EINVAL in case of error, %-EFAULT if @start is too high for the
589 * given ASCE unless the DAT_WALK_IGN_HOLES flag is specified,
590 * otherwise it returns whatever the callbacks return.
591 */
_dat_walk_gfn_range(gfn_t start,gfn_t end,union asce asce,const struct dat_walk_ops * ops,int flags,void * priv)592 long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce,
593 const struct dat_walk_ops *ops, int flags, void *priv)
594 {
595 struct crst_table *table = dereference_asce(asce);
596 struct dat_walk walk = {
597 .ops = ops,
598 .asce = asce,
599 .priv = priv,
600 .flags = flags,
601 .start = start,
602 .end = end,
603 };
604
605 if (WARN_ON_ONCE(unlikely(!asce.val)))
606 return -EINVAL;
607 if (!asce_contains_gfn(asce, start))
608 return (flags & DAT_WALK_IGN_HOLES) ? 0 : -EFAULT;
609
610 return dat_crste_walk_range(start, min(end, asce_end(asce)), table, &walk);
611 }
612
dat_get_storage_key(union asce asce,gfn_t gfn,union skey * skey)613 int dat_get_storage_key(union asce asce, gfn_t gfn, union skey *skey)
614 {
615 union crste *crstep;
616 union pgste pgste;
617 union pte *ptep;
618 int rc;
619
620 skey->skey = 0;
621 rc = dat_entry_walk(NULL, gfn, asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep);
622 if (rc)
623 return rc;
624
625 if (!ptep) {
626 union crste crste;
627
628 crste = READ_ONCE(*crstep);
629 if (!crste.h.fc || !crste.s.fc1.pr)
630 return 0;
631 skey->skey = page_get_storage_key(large_crste_to_phys(crste, gfn));
632 return 0;
633 }
634 pgste = pgste_get_lock(ptep);
635 if (ptep->h.i) {
636 skey->acc = pgste.acc;
637 skey->fp = pgste.fp;
638 } else {
639 skey->skey = page_get_storage_key(pte_origin(*ptep));
640 }
641 skey->r |= pgste.gr;
642 skey->c |= pgste.gc;
643 pgste_set_unlock(ptep, pgste);
644 return 0;
645 }
646
dat_update_ptep_sd(union pgste old,union pgste pgste,union pte * ptep)647 static void dat_update_ptep_sd(union pgste old, union pgste pgste, union pte *ptep)
648 {
649 if (pgste.acc != old.acc || pgste.fp != old.fp || pgste.gr != old.gr || pgste.gc != old.gc)
650 __atomic64_or(_PAGE_SD, &ptep->val);
651 }
652
dat_set_storage_key(struct kvm_s390_mmu_cache * mc,union asce asce,gfn_t gfn,union skey skey,bool nq)653 int dat_set_storage_key(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
654 union skey skey, bool nq)
655 {
656 union pgste pgste, old;
657 union crste *crstep;
658 union pte *ptep;
659 int rc;
660
661 rc = dat_entry_walk(mc, gfn, asce, DAT_WALK_LEAF_ALLOC, TABLE_TYPE_PAGE_TABLE,
662 &crstep, &ptep);
663 if (rc)
664 return rc;
665
666 if (!ptep) {
667 page_set_storage_key(large_crste_to_phys(*crstep, gfn), skey.skey, !nq);
668 return 0;
669 }
670
671 old = pgste_get_lock(ptep);
672 pgste = old;
673
674 pgste.acc = skey.acc;
675 pgste.fp = skey.fp;
676 pgste.gc = skey.c;
677 pgste.gr = skey.r;
678
679 if (!ptep->h.i) {
680 union skey old_skey;
681
682 old_skey.skey = page_get_storage_key(pte_origin(*ptep));
683 pgste.hc |= old_skey.c;
684 pgste.hr |= old_skey.r;
685 old_skey.c = old.gc;
686 old_skey.r = old.gr;
687 skey.r = 0;
688 skey.c = 0;
689 page_set_storage_key(pte_origin(*ptep), skey.skey, !nq);
690 }
691
692 dat_update_ptep_sd(old, pgste, ptep);
693 pgste_set_unlock(ptep, pgste);
694 return 0;
695 }
696
page_cond_set_storage_key(phys_addr_t paddr,union skey skey,union skey * oldkey,bool nq,bool mr,bool mc)697 static bool page_cond_set_storage_key(phys_addr_t paddr, union skey skey, union skey *oldkey,
698 bool nq, bool mr, bool mc)
699 {
700 oldkey->skey = page_get_storage_key(paddr);
701 if (oldkey->acc == skey.acc && oldkey->fp == skey.fp &&
702 (oldkey->r == skey.r || mr) && (oldkey->c == skey.c || mc))
703 return false;
704 page_set_storage_key(paddr, skey.skey, !nq);
705 return true;
706 }
707
dat_cond_set_storage_key(struct kvm_s390_mmu_cache * mmc,union asce asce,gfn_t gfn,union skey skey,union skey * oldkey,bool nq,bool mr,bool mc)708 int dat_cond_set_storage_key(struct kvm_s390_mmu_cache *mmc, union asce asce, gfn_t gfn,
709 union skey skey, union skey *oldkey, bool nq, bool mr, bool mc)
710 {
711 union pgste pgste, old;
712 union crste *crstep;
713 union skey prev;
714 union pte *ptep;
715 int rc;
716
717 rc = dat_entry_walk(mmc, gfn, asce, DAT_WALK_LEAF_ALLOC, TABLE_TYPE_PAGE_TABLE,
718 &crstep, &ptep);
719 if (rc)
720 return rc;
721
722 if (!ptep)
723 return page_cond_set_storage_key(large_crste_to_phys(*crstep, gfn), skey, oldkey,
724 nq, mr, mc);
725
726 old = pgste_get_lock(ptep);
727 pgste = old;
728
729 rc = 1;
730 pgste.acc = skey.acc;
731 pgste.fp = skey.fp;
732 pgste.gc = skey.c;
733 pgste.gr = skey.r;
734
735 if (!ptep->h.i) {
736 rc = page_cond_set_storage_key(pte_origin(*ptep), skey, &prev, nq, mr, mc);
737 pgste.hc |= prev.c;
738 pgste.hr |= prev.r;
739 prev.c |= old.gc;
740 prev.r |= old.gr;
741 } else {
742 prev.acc = old.acc;
743 prev.fp = old.fp;
744 prev.c = old.gc;
745 prev.r = old.gr;
746 }
747 if (oldkey)
748 *oldkey = prev;
749
750 dat_update_ptep_sd(old, pgste, ptep);
751 pgste_set_unlock(ptep, pgste);
752 return rc;
753 }
754
dat_reset_reference_bit(union asce asce,gfn_t gfn)755 int dat_reset_reference_bit(union asce asce, gfn_t gfn)
756 {
757 union pgste pgste, old;
758 union crste *crstep;
759 union pte *ptep;
760 int rc;
761
762 rc = dat_entry_walk(NULL, gfn, asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep);
763 if (rc)
764 return rc;
765
766 if (!ptep) {
767 union crste crste = READ_ONCE(*crstep);
768
769 if (!crste.h.fc || !crste.s.fc1.pr)
770 return 0;
771 return page_reset_referenced(large_crste_to_phys(*crstep, gfn));
772 }
773 old = pgste_get_lock(ptep);
774 pgste = old;
775
776 if (!ptep->h.i) {
777 rc = page_reset_referenced(pte_origin(*ptep));
778 pgste.hr = rc >> 1;
779 }
780 rc |= (pgste.gr << 1) | pgste.gc;
781 pgste.gr = 0;
782
783 dat_update_ptep_sd(old, pgste, ptep);
784 pgste_set_unlock(ptep, pgste);
785 return rc;
786 }
787
dat_reset_skeys_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)788 static long dat_reset_skeys_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
789 {
790 union pgste pgste;
791
792 pgste = pgste_get_lock(ptep);
793 pgste.acc = 0;
794 pgste.fp = 0;
795 pgste.gr = 0;
796 pgste.gc = 0;
797 if (ptep->s.pr)
798 page_set_storage_key(pte_origin(*ptep), PAGE_DEFAULT_KEY, 1);
799 pgste_set_unlock(ptep, pgste);
800
801 if (need_resched())
802 return next;
803 return 0;
804 }
805
dat_reset_skeys_crste(union crste * crstep,gfn_t gfn,gfn_t next,struct dat_walk * walk)806 static long dat_reset_skeys_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
807 {
808 phys_addr_t addr, end, origin = crste_origin_large(*crstep);
809
810 if (!crstep->h.fc || !crstep->s.fc1.pr)
811 return 0;
812
813 addr = ((max(gfn, walk->start) - gfn) << PAGE_SHIFT) + origin;
814 end = ((min(next, walk->end) - gfn) << PAGE_SHIFT) + origin;
815 while (ALIGN(addr + 1, _SEGMENT_SIZE) <= end)
816 addr = sske_frame(addr, PAGE_DEFAULT_KEY);
817 for ( ; addr < end; addr += PAGE_SIZE)
818 page_set_storage_key(addr, PAGE_DEFAULT_KEY, 1);
819
820 if (need_resched())
821 return next;
822 return 0;
823 }
824
dat_reset_skeys(union asce asce,gfn_t start)825 long dat_reset_skeys(union asce asce, gfn_t start)
826 {
827 const struct dat_walk_ops ops = {
828 .pte_entry = dat_reset_skeys_pte,
829 .pmd_entry = dat_reset_skeys_crste,
830 .pud_entry = dat_reset_skeys_crste,
831 };
832
833 return _dat_walk_gfn_range(start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, NULL);
834 }
835
836 struct slot_priv {
837 unsigned long token;
838 struct kvm_s390_mmu_cache *mc;
839 };
840
_dat_slot_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)841 static long _dat_slot_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
842 {
843 struct slot_priv *p = walk->priv;
844 union crste dummy = { .val = p->token };
845 union pte new_pte, pte = READ_ONCE(*ptep);
846
847 new_pte = _PTE_TOK(dummy.tok.type, dummy.tok.par);
848
849 /* Table entry already in the desired state. */
850 if (pte.val == new_pte.val)
851 return 0;
852
853 dat_ptep_xchg(ptep, new_pte, gfn, walk->asce, false);
854 return 0;
855 }
856
_dat_slot_crste(union crste * crstep,gfn_t gfn,gfn_t next,struct dat_walk * walk)857 static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
858 {
859 union crste new_crste, crste = READ_ONCE(*crstep);
860 struct slot_priv *p = walk->priv;
861
862 new_crste.val = p->token;
863 new_crste.h.tt = crste.h.tt;
864
865 /* Table entry already in the desired state. */
866 if (crste.val == new_crste.val)
867 return 0;
868
869 /* This table entry needs to be updated. */
870 if (walk->start <= gfn && walk->end >= next) {
871 if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
872 return -EINVAL;
873 /* A lower level table was present, needs to be freed. */
874 if (!crste.h.fc && !crste.h.i) {
875 if (is_pmd(crste))
876 dat_free_pt(dereference_pmd(crste.pmd));
877 else
878 dat_free_level(dereference_crste(crste), true);
879 }
880 return 0;
881 }
882
883 /* A lower level table is present, things will handled there. */
884 if (!crste.h.fc && !crste.h.i)
885 return 0;
886 /* Split (install a lower level table), and handle things there. */
887 return dat_split_crste(p->mc, crstep, gfn, walk->asce, false);
888 }
889
890 static const struct dat_walk_ops dat_slot_ops = {
891 .pte_entry = _dat_slot_pte,
892 .crste_ops = { _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, },
893 };
894
dat_set_slot(struct kvm_s390_mmu_cache * mc,union asce asce,gfn_t start,gfn_t end,u16 type,u16 param)895 int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end,
896 u16 type, u16 param)
897 {
898 struct slot_priv priv = {
899 .token = _CRSTE_TOK(0, type, param).val,
900 .mc = mc,
901 };
902
903 return _dat_walk_gfn_range(start, end, asce, &dat_slot_ops,
904 DAT_WALK_IGN_HOLES | DAT_WALK_ANY, &priv);
905 }
906
pgste_set_unlock_multiple(union pte * first,int n,union pgste * pgstes)907 static void pgste_set_unlock_multiple(union pte *first, int n, union pgste *pgstes)
908 {
909 int i;
910
911 for (i = 0; i < n; i++) {
912 if (!pgstes[i].pcl)
913 break;
914 pgste_set_unlock(first + i, pgstes[i]);
915 }
916 }
917
pgste_get_trylock_multiple(union pte * first,int n,union pgste * pgstes)918 static bool pgste_get_trylock_multiple(union pte *first, int n, union pgste *pgstes)
919 {
920 int i;
921
922 for (i = 0; i < n; i++) {
923 if (!pgste_get_trylock(first + i, pgstes + i))
924 break;
925 }
926 if (i == n)
927 return true;
928 pgste_set_unlock_multiple(first, n, pgstes);
929 return false;
930 }
931
dat_get_ptval(struct page_table * table,struct ptval_param param)932 unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param)
933 {
934 union pgste pgstes[4] = {};
935 unsigned long res = 0;
936 int i, n;
937
938 n = param.len + 1;
939
940 while (!pgste_get_trylock_multiple(table->ptes + param.offset, n, pgstes))
941 cpu_relax();
942
943 for (i = 0; i < n; i++)
944 res = res << 16 | pgstes[i].val16;
945
946 pgste_set_unlock_multiple(table->ptes + param.offset, n, pgstes);
947 return res;
948 }
949
dat_set_ptval(struct page_table * table,struct ptval_param param,unsigned long val)950 void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val)
951 {
952 union pgste pgstes[4] = {};
953 int i, n;
954
955 n = param.len + 1;
956
957 while (!pgste_get_trylock_multiple(table->ptes + param.offset, n, pgstes))
958 cpu_relax();
959
960 for (i = param.len; i >= 0; i--) {
961 pgstes[i].val16 = val;
962 val = val >> 16;
963 }
964
965 pgste_set_unlock_multiple(table->ptes + param.offset, n, pgstes);
966 }
967
_dat_test_young_pte(union pte * ptep,gfn_t start,gfn_t end,struct dat_walk * walk)968 static long _dat_test_young_pte(union pte *ptep, gfn_t start, gfn_t end, struct dat_walk *walk)
969 {
970 return ptep->s.y;
971 }
972
_dat_test_young_crste(union crste * crstep,gfn_t start,gfn_t end,struct dat_walk * walk)973 static long _dat_test_young_crste(union crste *crstep, gfn_t start, gfn_t end,
974 struct dat_walk *walk)
975 {
976 return crstep->h.fc && crstep->s.fc1.y;
977 }
978
979 static const struct dat_walk_ops test_age_ops = {
980 .pte_entry = _dat_test_young_pte,
981 .pmd_entry = _dat_test_young_crste,
982 .pud_entry = _dat_test_young_crste,
983 };
984
985 /**
986 * dat_test_age_gfn() - Test young.
987 * @asce: The ASCE whose address range is to be tested.
988 * @start: The first guest frame of the range to check.
989 * @end: The guest frame after the last in the range.
990 *
991 * Context: called by KVM common code with the kvm mmu write lock held.
992 *
993 * Return: %true if any page in the given range is young, otherwise %false.
994 */
dat_test_age_gfn(union asce asce,gfn_t start,gfn_t end)995 bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
996 {
997 return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
998 }
999
dat_set_pn_crste(union crste * crstep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1000 static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1001 {
1002 union crste newcrste, oldcrste;
1003 int *n = walk->priv;
1004
1005 do {
1006 oldcrste = READ_ONCE(*crstep);
1007 if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
1008 return 0;
1009 if (oldcrste.s.fc1.prefix_notif)
1010 break;
1011 newcrste = oldcrste;
1012 newcrste.s.fc1.prefix_notif = 1;
1013 } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
1014 *n = 2;
1015 return 0;
1016 }
1017
dat_set_pn_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1018 static long dat_set_pn_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1019 {
1020 int *n = walk->priv;
1021 union pgste pgste;
1022
1023 pgste = pgste_get_lock(ptep);
1024 if (!ptep->h.i && !ptep->h.p) {
1025 pgste.prefix_notif = 1;
1026 *n += 1;
1027 }
1028 pgste_set_unlock(ptep, pgste);
1029 return 0;
1030 }
1031
dat_set_prefix_notif_bit(union asce asce,gfn_t gfn)1032 int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn)
1033 {
1034 static const struct dat_walk_ops ops = {
1035 .pte_entry = dat_set_pn_pte,
1036 .pmd_entry = dat_set_pn_crste,
1037 .pud_entry = dat_set_pn_crste,
1038 };
1039
1040 int n = 0;
1041
1042 _dat_walk_gfn_range(gfn, gfn + 2, asce, &ops, DAT_WALK_IGN_HOLES, &n);
1043 if (n != 2)
1044 return -EAGAIN;
1045 return 0;
1046 }
1047
1048 /**
1049 * dat_perform_essa() - Perform ESSA actions on the PGSTE.
1050 * @asce: The asce to operate on.
1051 * @gfn: The guest page frame to operate on.
1052 * @orc: The specific action to perform, see the ESSA_SET_* macros.
1053 * @state: The storage attributes to be returned to the guest.
1054 * @dirty: Returns whether the function dirtied a previously clean entry.
1055 *
1056 * Context: Called with kvm->mmu_lock held.
1057 *
1058 * Return:
1059 * * %1 if the page state has been altered and the page is to be added to the CBRL
1060 * * %0 if the page state has been altered, but the page is not to be added to the CBRL
1061 * * %-1 if the page state has not been altered and the page is not to be added to the CBRL
1062 */
dat_perform_essa(union asce asce,gfn_t gfn,int orc,union essa_state * state,bool * dirty)1063 int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty)
1064 {
1065 union crste *crstep;
1066 union pgste pgste;
1067 union pte *ptep;
1068 int res = 0;
1069
1070 if (dat_entry_walk(NULL, gfn, asce, 0, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep)) {
1071 *state = (union essa_state) { .exception = 1 };
1072 return -1;
1073 }
1074
1075 pgste = pgste_get_lock(ptep);
1076
1077 *state = (union essa_state) {
1078 .content = (ptep->h.i << 1) + (ptep->h.i && pgste.zero),
1079 .nodat = pgste.nodat,
1080 .usage = pgste.usage,
1081 };
1082
1083 switch (orc) {
1084 case ESSA_GET_STATE:
1085 res = -1;
1086 break;
1087 case ESSA_SET_STABLE:
1088 pgste.usage = PGSTE_GPS_USAGE_STABLE;
1089 pgste.nodat = 0;
1090 break;
1091 case ESSA_SET_UNUSED:
1092 pgste.usage = PGSTE_GPS_USAGE_UNUSED;
1093 if (ptep->h.i)
1094 res = 1;
1095 break;
1096 case ESSA_SET_VOLATILE:
1097 pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1098 if (ptep->h.i)
1099 res = 1;
1100 break;
1101 case ESSA_SET_POT_VOLATILE:
1102 if (!ptep->h.i) {
1103 pgste.usage = PGSTE_GPS_USAGE_POT_VOLATILE;
1104 } else if (pgste.zero) {
1105 pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1106 } else if (!pgste.gc) {
1107 pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1108 res = 1;
1109 }
1110 break;
1111 case ESSA_SET_STABLE_RESIDENT:
1112 pgste.usage = PGSTE_GPS_USAGE_STABLE;
1113 /*
1114 * Since the resident state can go away any time after this
1115 * call, we will not make this page resident. We can revisit
1116 * this decision if a guest will ever start using this.
1117 */
1118 break;
1119 case ESSA_SET_STABLE_IF_RESIDENT:
1120 if (!ptep->h.i)
1121 pgste.usage = PGSTE_GPS_USAGE_STABLE;
1122 break;
1123 case ESSA_SET_STABLE_NODAT:
1124 pgste.usage = PGSTE_GPS_USAGE_STABLE;
1125 pgste.nodat = 1;
1126 break;
1127 default:
1128 WARN_ONCE(1, "Invalid ORC!");
1129 res = -1;
1130 break;
1131 }
1132 /* If we are discarding a page, set it to logical zero. */
1133 pgste.zero = res == 1;
1134 if (orc > 0) {
1135 *dirty = !pgste.cmma_d;
1136 pgste.cmma_d = 1;
1137 }
1138
1139 pgste_set_unlock(ptep, pgste);
1140
1141 return res;
1142 }
1143
dat_reset_cmma_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1144 static long dat_reset_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1145 {
1146 union pgste pgste;
1147
1148 pgste = pgste_get_lock(ptep);
1149 pgste.usage = 0;
1150 pgste.nodat = 0;
1151 pgste.cmma_d = 0;
1152 pgste_set_unlock(ptep, pgste);
1153 if (need_resched())
1154 return next;
1155 return 0;
1156 }
1157
dat_reset_cmma(union asce asce,gfn_t start)1158 long dat_reset_cmma(union asce asce, gfn_t start)
1159 {
1160 const struct dat_walk_ops dat_reset_cmma_ops = {
1161 .pte_entry = dat_reset_cmma_pte,
1162 };
1163
1164 return _dat_walk_gfn_range(start, asce_end(asce), asce, &dat_reset_cmma_ops,
1165 DAT_WALK_IGN_HOLES, NULL);
1166 }
1167
1168 struct dat_get_cmma_state {
1169 gfn_t start;
1170 gfn_t end;
1171 unsigned int count;
1172 u8 *values;
1173 atomic64_t *remaining;
1174 };
1175
__dat_peek_cmma_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1176 static long __dat_peek_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1177 {
1178 struct dat_get_cmma_state *state = walk->priv;
1179 union pgste pgste;
1180
1181 pgste = pgste_get_lock(ptep);
1182 state->values[gfn - walk->start] = pgste.usage | (pgste.nodat << 6);
1183 pgste_set_unlock(ptep, pgste);
1184 state->end = next;
1185
1186 return 0;
1187 }
1188
__dat_peek_cmma_crste(union crste * crstep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1189 static long __dat_peek_cmma_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1190 {
1191 struct dat_get_cmma_state *state = walk->priv;
1192
1193 if (crstep->h.i)
1194 state->end = min(walk->end, next);
1195 return 0;
1196 }
1197
dat_peek_cmma(gfn_t start,union asce asce,unsigned int * count,u8 * values)1198 int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values)
1199 {
1200 const struct dat_walk_ops ops = {
1201 .pte_entry = __dat_peek_cmma_pte,
1202 .pmd_entry = __dat_peek_cmma_crste,
1203 .pud_entry = __dat_peek_cmma_crste,
1204 .p4d_entry = __dat_peek_cmma_crste,
1205 .pgd_entry = __dat_peek_cmma_crste,
1206 };
1207 struct dat_get_cmma_state state = { .values = values, };
1208 int rc;
1209
1210 rc = _dat_walk_gfn_range(start, start + *count, asce, &ops, DAT_WALK_DEFAULT, &state);
1211 *count = state.end - start;
1212 /* Return success if at least one value was saved, otherwise an error. */
1213 return (rc == -EFAULT && *count > 0) ? 0 : rc;
1214 }
1215
__dat_get_cmma_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1216 static long __dat_get_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1217 {
1218 struct dat_get_cmma_state *state = walk->priv;
1219 union pgste pgste;
1220
1221 if (state->start != -1) {
1222 if ((gfn - state->end) > KVM_S390_MAX_BIT_DISTANCE)
1223 return 1;
1224 if (gfn - state->start >= state->count)
1225 return 1;
1226 }
1227
1228 if (!READ_ONCE(*pgste_of(ptep)).cmma_d)
1229 return 0;
1230
1231 pgste = pgste_get_lock(ptep);
1232 if (pgste.cmma_d) {
1233 if (state->start == -1)
1234 state->start = gfn;
1235 pgste.cmma_d = 0;
1236 atomic64_dec(state->remaining);
1237 state->values[gfn - state->start] = pgste.usage | pgste.nodat << 6;
1238 state->end = next;
1239 }
1240 pgste_set_unlock(ptep, pgste);
1241 return 0;
1242 }
1243
dat_get_cmma(union asce asce,gfn_t * start,unsigned int * count,u8 * values,atomic64_t * rem)1244 int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem)
1245 {
1246 const struct dat_walk_ops ops = { .pte_entry = __dat_get_cmma_pte, };
1247 struct dat_get_cmma_state state = {
1248 .remaining = rem,
1249 .values = values,
1250 .count = *count,
1251 .start = -1,
1252 };
1253
1254 _dat_walk_gfn_range(*start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, &state);
1255
1256 if (state.start == -1) {
1257 *count = 0;
1258 } else {
1259 *count = state.end - state.start;
1260 *start = state.start;
1261 }
1262
1263 return 0;
1264 }
1265
1266 struct dat_set_cmma_state {
1267 unsigned long mask;
1268 const u8 *bits;
1269 };
1270
__dat_set_cmma_pte(union pte * ptep,gfn_t gfn,gfn_t next,struct dat_walk * walk)1271 static long __dat_set_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1272 {
1273 struct dat_set_cmma_state *state = walk->priv;
1274 union pgste pgste, tmp;
1275
1276 tmp.val = (state->bits[gfn - walk->start] << 24) & state->mask;
1277
1278 pgste = pgste_get_lock(ptep);
1279 pgste.usage = tmp.usage;
1280 pgste.nodat = tmp.nodat;
1281 pgste_set_unlock(ptep, pgste);
1282
1283 return 0;
1284 }
1285
1286 /**
1287 * dat_set_cmma_bits() - Set CMMA bits for a range of guest pages.
1288 * @mc: Cache used for allocations.
1289 * @asce: The ASCE of the guest.
1290 * @gfn: The guest frame of the fist page whose CMMA bits are to set.
1291 * @count: How many pages need to be processed.
1292 * @mask: Which PGSTE bits should be set.
1293 * @bits: Points to an array with the CMMA attributes.
1294 *
1295 * This function sets the CMMA attributes for the given pages. If the input
1296 * buffer has zero length, no action is taken, otherwise the attributes are
1297 * set and the mm->context.uses_cmm flag is set.
1298 *
1299 * Each byte in @bits contains new values for bits 32-39 of the PGSTE.
1300 * Currently, only the fields NT and US are applied.
1301 *
1302 * Return: %0 in case of success, a negative error value otherwise.
1303 */
dat_set_cmma_bits(struct kvm_s390_mmu_cache * mc,union asce asce,gfn_t gfn,unsigned long count,unsigned long mask,const uint8_t * bits)1304 int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
1305 unsigned long count, unsigned long mask, const uint8_t *bits)
1306 {
1307 const struct dat_walk_ops ops = { .pte_entry = __dat_set_cmma_pte, };
1308 struct dat_set_cmma_state state = { .mask = mask, .bits = bits, };
1309 union crste *crstep;
1310 union pte *ptep;
1311 gfn_t cur;
1312 int rc;
1313
1314 for (cur = ALIGN_DOWN(gfn, _PAGE_ENTRIES); cur < gfn + count; cur += _PAGE_ENTRIES) {
1315 rc = dat_entry_walk(mc, cur, asce, DAT_WALK_ALLOC, TABLE_TYPE_PAGE_TABLE,
1316 &crstep, &ptep);
1317 if (rc)
1318 return rc;
1319 }
1320 return _dat_walk_gfn_range(gfn, gfn + count, asce, &ops, DAT_WALK_IGN_HOLES, &state);
1321 }
1322