1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * KVM guest address space mapping code
4 *
5 * Copyright IBM Corp. 2024, 2025
6 * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
7 */
8
9 #ifndef __KVM_S390_DAT_H
10 #define __KVM_S390_DAT_H
11
12 #include <linux/radix-tree.h>
13 #include <linux/refcount.h>
14 #include <linux/io.h>
15 #include <linux/kvm_types.h>
16 #include <linux/pgalloc.h>
17 #include <asm/tlbflush.h>
18 #include <asm/dat-bits.h>
19
20 /*
21 * Base address and length must be sent at the start of each block, therefore
22 * it's cheaper to send some clean data, as long as it's less than the size of
23 * two longs.
24 */
25 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
26 /* For consistency */
27 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
28
29 #define _ASCE(x) ((union asce) { .val = (x), })
30 #define NULL_ASCE _ASCE(0)
31
32 enum {
33 _DAT_TOKEN_NONE = 0,
34 _DAT_TOKEN_PIC,
35 };
36
37 #define _CRSTE_TOK(l, t, p) ((union crste) { \
38 .tok.i = 1, \
39 .tok.tt = (l), \
40 .tok.type = (t), \
41 .tok.par = (p) \
42 })
43 #define _CRSTE_PIC(l, p) _CRSTE_TOK(l, _DAT_TOKEN_PIC, p)
44
45 #define _CRSTE_HOLE(l) _CRSTE_PIC(l, PGM_ADDRESSING)
46 #define _CRSTE_EMPTY(l) _CRSTE_TOK(l, _DAT_TOKEN_NONE, 0)
47
48 #define _PMD_EMPTY _CRSTE_EMPTY(TABLE_TYPE_SEGMENT)
49
50 #define _PTE_TOK(t, p) ((union pte) { .tok.i = 1, .tok.type = (t), .tok.par = (p) })
51 #define _PTE_EMPTY _PTE_TOK(_DAT_TOKEN_NONE, 0)
52
53 /* This fake table type is used for page table walks (both for normal page tables and vSIE) */
54 #define TABLE_TYPE_PAGE_TABLE -1
55
56 enum dat_walk_flags {
57 DAT_WALK_USES_SKEYS = 0x40,
58 DAT_WALK_CONTINUE = 0x20,
59 DAT_WALK_IGN_HOLES = 0x10,
60 DAT_WALK_SPLIT = 0x08,
61 DAT_WALK_ALLOC = 0x04,
62 DAT_WALK_ANY = 0x02,
63 DAT_WALK_LEAF = 0x01,
64 DAT_WALK_DEFAULT = 0
65 };
66
67 #define DAT_WALK_SPLIT_ALLOC (DAT_WALK_SPLIT | DAT_WALK_ALLOC)
68 #define DAT_WALK_ALLOC_CONTINUE (DAT_WALK_CONTINUE | DAT_WALK_ALLOC)
69 #define DAT_WALK_LEAF_ALLOC (DAT_WALK_LEAF | DAT_WALK_ALLOC)
70
71 union pte {
72 unsigned long val;
73 union page_table_entry h;
74 struct {
75 unsigned long :56; /* Hardware bits */
76 unsigned long u : 1; /* Page unused */
77 unsigned long s : 1; /* Special */
78 unsigned long w : 1; /* Writable */
79 unsigned long r : 1; /* Readable */
80 unsigned long d : 1; /* Dirty */
81 unsigned long y : 1; /* Young */
82 unsigned long sd: 1; /* Soft dirty */
83 unsigned long pr: 1; /* Present */
84 } s;
85 struct {
86 unsigned char hwbytes[7];
87 unsigned char swbyte;
88 };
89 union {
90 struct {
91 unsigned long type :16; /* Token type */
92 unsigned long par :16; /* Token parameter */
93 unsigned long :20;
94 unsigned long : 1; /* Must be 0 */
95 unsigned long i : 1; /* Must be 1 */
96 unsigned long : 2;
97 unsigned long : 7;
98 unsigned long pr : 1; /* Must be 0 */
99 };
100 struct {
101 unsigned long token:32; /* Token and parameter */
102 unsigned long :32;
103 };
104 } tok;
105 };
106
107 #define _SEGMENT_FR_MASK (_SEGMENT_MASK >> PAGE_SHIFT)
108 #define _REGION3_FR_MASK (_REGION3_MASK >> PAGE_SHIFT)
109 #define _PAGES_PER_SEGMENT _PAGE_ENTRIES
110 #define _PAGES_PER_REGION3 (_PAGES_PER_SEGMENT * _CRST_ENTRIES)
111
112 /* Soft dirty, needed as macro for atomic operations on ptes */
113 #define _PAGE_SD 0x002
114
115 /* Needed as macro to perform atomic operations */
116 #define PGSTE_PCL_BIT 0x0080000000000000UL /* PCL lock, HW bit */
117 #define PGSTE_CMMA_D_BIT 0x0000000000008000UL /* CMMA dirty soft-bit */
118
119 enum pgste_gps_usage {
120 PGSTE_GPS_USAGE_STABLE = 0,
121 PGSTE_GPS_USAGE_UNUSED,
122 PGSTE_GPS_USAGE_POT_VOLATILE,
123 PGSTE_GPS_USAGE_VOLATILE,
124 };
125
126 union pgste {
127 unsigned long val;
128 struct {
129 unsigned long acc : 4;
130 unsigned long fp : 1;
131 unsigned long : 3;
132 unsigned long pcl : 1;
133 unsigned long hr : 1;
134 unsigned long hc : 1;
135 unsigned long : 2;
136 unsigned long gr : 1;
137 unsigned long gc : 1;
138 unsigned long : 1;
139 unsigned long :16; /* val16 */
140 unsigned long zero : 1;
141 unsigned long nodat : 1;
142 unsigned long : 4;
143 unsigned long usage : 2;
144 unsigned long : 8;
145 unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */
146 unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
147 unsigned long vsie_notif : 1; /* Referenced in a shadow table */
148 unsigned long vsie_gmem : 1; /* Contains nested guest memory */
149 unsigned long : 4;
150 unsigned long : 8;
151 };
152 struct {
153 unsigned short hwbytes0;
154 unsigned short val16; /* Used to store chunked values, see dat_{s,g}et_ptval() */
155 unsigned short hwbytes4;
156 unsigned char flags; /* Maps to the software bits */
157 unsigned char hwbyte7;
158 } __packed;
159 };
160
161 union pmd {
162 unsigned long val;
163 union segment_table_entry h;
164 struct {
165 struct {
166 unsigned long :44; /* HW */
167 unsigned long : 3; /* Unused */
168 unsigned long : 1; /* HW */
169 unsigned long s : 1; /* Special */
170 unsigned long w : 1; /* Writable soft-bit */
171 unsigned long r : 1; /* Readable soft-bit */
172 unsigned long d : 1; /* Dirty */
173 unsigned long y : 1; /* Young */
174 unsigned long : 3; /* HW */
175 unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
176 unsigned long vsie_notif : 1; /* Referenced in a shadow table */
177 unsigned long : 4; /* HW */
178 unsigned long sd : 1; /* Soft-Dirty */
179 unsigned long pr : 1; /* Present */
180 } fc1;
181 } s;
182 };
183
184 union pud {
185 unsigned long val;
186 union region3_table_entry h;
187 struct {
188 struct {
189 unsigned long :33; /* HW */
190 unsigned long :14; /* Unused */
191 unsigned long : 1; /* HW */
192 unsigned long s : 1; /* Special */
193 unsigned long w : 1; /* Writable soft-bit */
194 unsigned long r : 1; /* Readable soft-bit */
195 unsigned long d : 1; /* Dirty */
196 unsigned long y : 1; /* Young */
197 unsigned long : 3; /* HW */
198 unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
199 unsigned long vsie_notif : 1; /* Referenced in a shadow table */
200 unsigned long : 4; /* HW */
201 unsigned long sd : 1; /* Soft-Dirty */
202 unsigned long pr : 1; /* Present */
203 } fc1;
204 } s;
205 };
206
207 union p4d {
208 unsigned long val;
209 union region2_table_entry h;
210 };
211
212 union pgd {
213 unsigned long val;
214 union region1_table_entry h;
215 };
216
217 union crste {
218 unsigned long val;
219 union {
220 struct {
221 unsigned long :52;
222 unsigned long : 1;
223 unsigned long fc: 1;
224 unsigned long p : 1;
225 unsigned long : 1;
226 unsigned long : 2;
227 unsigned long i : 1;
228 unsigned long : 1;
229 unsigned long tt: 2;
230 unsigned long : 2;
231 };
232 struct {
233 unsigned long to:52;
234 unsigned long : 1;
235 unsigned long fc: 1;
236 unsigned long p : 1;
237 unsigned long : 1;
238 unsigned long tf: 2;
239 unsigned long i : 1;
240 unsigned long : 1;
241 unsigned long tt: 2;
242 unsigned long tl: 2;
243 } fc0;
244 struct {
245 unsigned long :47;
246 unsigned long av : 1; /* ACCF-Validity Control */
247 unsigned long acc: 4; /* Access-Control Bits */
248 unsigned long f : 1; /* Fetch-Protection Bit */
249 unsigned long fc : 1; /* Format-Control */
250 unsigned long p : 1; /* DAT-Protection Bit */
251 unsigned long iep: 1; /* Instruction-Execution-Protection */
252 unsigned long : 2;
253 unsigned long i : 1; /* Segment-Invalid Bit */
254 unsigned long cs : 1; /* Common-Segment Bit */
255 unsigned long tt : 2; /* Table-Type Bits */
256 unsigned long : 2;
257 } fc1;
258 } h;
259 struct {
260 struct {
261 unsigned long :47;
262 unsigned long : 1; /* HW (should be 0) */
263 unsigned long s : 1; /* Special */
264 unsigned long w : 1; /* Writable */
265 unsigned long r : 1; /* Readable */
266 unsigned long d : 1; /* Dirty */
267 unsigned long y : 1; /* Young */
268 unsigned long : 3; /* HW */
269 unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
270 unsigned long vsie_notif : 1; /* Referenced in a shadow table */
271 unsigned long : 4; /* HW */
272 unsigned long sd : 1; /* Soft-Dirty */
273 unsigned long pr : 1; /* Present */
274 } fc1;
275 } s;
276 union {
277 struct {
278 unsigned long type :16; /* Token type */
279 unsigned long par :16; /* Token parameter */
280 unsigned long :26;
281 unsigned long i : 1; /* Must be 1 */
282 unsigned long : 1;
283 unsigned long tt : 2;
284 unsigned long : 1;
285 unsigned long pr : 1; /* Must be 0 */
286 };
287 struct {
288 unsigned long token:32; /* Token and parameter */
289 unsigned long :32;
290 };
291 } tok;
292 union pmd pmd;
293 union pud pud;
294 union p4d p4d;
295 union pgd pgd;
296 };
297
298 union skey {
299 unsigned char skey;
300 struct {
301 unsigned char acc :4;
302 unsigned char fp :1;
303 unsigned char r :1;
304 unsigned char c :1;
305 unsigned char zero:1;
306 };
307 };
308
309 static_assert(sizeof(union pgste) == sizeof(unsigned long));
310 static_assert(sizeof(union pte) == sizeof(unsigned long));
311 static_assert(sizeof(union pmd) == sizeof(unsigned long));
312 static_assert(sizeof(union pud) == sizeof(unsigned long));
313 static_assert(sizeof(union p4d) == sizeof(unsigned long));
314 static_assert(sizeof(union pgd) == sizeof(unsigned long));
315 static_assert(sizeof(union crste) == sizeof(unsigned long));
316 static_assert(sizeof(union skey) == sizeof(char));
317
318 struct segment_table {
319 union pmd pmds[_CRST_ENTRIES];
320 };
321
322 struct region3_table {
323 union pud puds[_CRST_ENTRIES];
324 };
325
326 struct region2_table {
327 union p4d p4ds[_CRST_ENTRIES];
328 };
329
330 struct region1_table {
331 union pgd pgds[_CRST_ENTRIES];
332 };
333
334 struct crst_table {
335 union {
336 union crste crstes[_CRST_ENTRIES];
337 struct segment_table segment;
338 struct region3_table region3;
339 struct region2_table region2;
340 struct region1_table region1;
341 };
342 };
343
344 struct page_table {
345 union pte ptes[_PAGE_ENTRIES];
346 union pgste pgstes[_PAGE_ENTRIES];
347 };
348
349 static_assert(sizeof(struct crst_table) == _CRST_TABLE_SIZE);
350 static_assert(sizeof(struct page_table) == PAGE_SIZE);
351
352 struct dat_walk;
353
354 typedef long (*dat_walk_op)(union crste *crste, gfn_t gfn, gfn_t next, struct dat_walk *w);
355
356 struct dat_walk_ops {
357 union {
358 dat_walk_op crste_ops[4];
359 struct {
360 dat_walk_op pmd_entry;
361 dat_walk_op pud_entry;
362 dat_walk_op p4d_entry;
363 dat_walk_op pgd_entry;
364 };
365 };
366 long (*pte_entry)(union pte *pte, gfn_t gfn, gfn_t next, struct dat_walk *w);
367 };
368
369 struct dat_walk {
370 const struct dat_walk_ops *ops;
371 union crste *last;
372 union pte *last_pte;
373 union asce asce;
374 gfn_t start;
375 gfn_t end;
376 int flags;
377 void *priv;
378 };
379
380 struct ptval_param {
381 unsigned char offset : 6;
382 unsigned char len : 2;
383 };
384
385 /**
386 * _pte() - Useful constructor for union pte
387 * @pfn: the pfn this pte should point to.
388 * @writable: whether the pte should be writable.
389 * @dirty: whether the pte should be dirty.
390 * @special: whether the pte should be marked as special
391 *
392 * The pte is also marked as young and present. If the pte is marked as dirty,
393 * it gets marked as soft-dirty too. If the pte is not dirty, the hardware
394 * protect bit is set (independently of the write softbit); this way proper
395 * dirty tracking can be performed.
396 *
397 * Return: a union pte value.
398 */
_pte(kvm_pfn_t pfn,bool writable,bool dirty,bool special)399 static inline union pte _pte(kvm_pfn_t pfn, bool writable, bool dirty, bool special)
400 {
401 union pte res = { .val = PFN_PHYS(pfn) };
402
403 res.h.p = !dirty;
404 res.s.y = 1;
405 res.s.pr = 1;
406 res.s.w = writable;
407 res.s.d = dirty;
408 res.s.sd = dirty;
409 res.s.s = special;
410 return res;
411 }
412
_crste_fc0(kvm_pfn_t pfn,int tt)413 static inline union crste _crste_fc0(kvm_pfn_t pfn, int tt)
414 {
415 union crste res = { .val = PFN_PHYS(pfn) };
416
417 res.h.tt = tt;
418 res.h.fc0.tl = _REGION_ENTRY_LENGTH;
419 res.h.fc0.tf = 0;
420 return res;
421 }
422
423 /**
424 * _crste() - Useful constructor for union crste with FC=1
425 * @pfn: the pfn this pte should point to.
426 * @tt: the table type
427 * @writable: whether the pte should be writable.
428 * @dirty: whether the pte should be dirty.
429 *
430 * The crste is also marked as young and present. If the crste is marked as
431 * dirty, it gets marked as soft-dirty too. If the crste is not dirty, the
432 * hardware protect bit is set (independently of the write softbit); this way
433 * proper dirty tracking can be performed.
434 *
435 * Return: a union crste value.
436 */
_crste_fc1(kvm_pfn_t pfn,int tt,bool writable,bool dirty)437 static inline union crste _crste_fc1(kvm_pfn_t pfn, int tt, bool writable, bool dirty)
438 {
439 union crste res = { .val = PFN_PHYS(pfn) & _SEGMENT_MASK };
440
441 res.h.tt = tt;
442 res.h.p = !dirty;
443 res.h.fc = 1;
444 res.s.fc1.y = 1;
445 res.s.fc1.pr = 1;
446 res.s.fc1.w = writable;
447 res.s.fc1.d = dirty;
448 res.s.fc1.sd = dirty;
449 return res;
450 }
451
452 union essa_state {
453 unsigned char val;
454 struct {
455 unsigned char : 2;
456 unsigned char nodat : 1;
457 unsigned char exception : 1;
458 unsigned char usage : 2;
459 unsigned char content : 2;
460 };
461 };
462
463 /**
464 * struct vsie_rmap - reverse mapping for shadow page table entries
465 * @next: pointer to next rmap in the list
466 * @r_gfn: virtual rmap address in the shadow guest address space
467 */
468 struct vsie_rmap {
469 struct vsie_rmap *next;
470 union {
471 unsigned long val;
472 struct {
473 long level: 8;
474 unsigned long : 4;
475 unsigned long r_gfn:52;
476 };
477 };
478 };
479
480 static_assert(sizeof(struct vsie_rmap) == 2 * sizeof(long));
481
482 #define KVM_S390_MMU_CACHE_N_CRSTS 6
483 #define KVM_S390_MMU_CACHE_N_PTS 2
484 #define KVM_S390_MMU_CACHE_N_RMAPS 16
485 struct kvm_s390_mmu_cache {
486 void *crsts[KVM_S390_MMU_CACHE_N_CRSTS];
487 void *pts[KVM_S390_MMU_CACHE_N_PTS];
488 void *rmaps[KVM_S390_MMU_CACHE_N_RMAPS];
489 short int n_crsts;
490 short int n_pts;
491 short int n_rmaps;
492 };
493
494 struct guest_fault {
495 gfn_t gfn; /* Guest frame */
496 kvm_pfn_t pfn; /* Host PFN */
497 struct page *page; /* Host page */
498 union pte *ptep; /* Used to resolve the fault, or NULL */
499 union crste *crstep; /* Used to resolve the fault, or NULL */
500 bool writable; /* Mapping is writable */
501 bool write_attempt; /* Write access attempted */
502 bool attempt_pfault; /* Attempt a pfault first */
503 bool valid; /* This entry contains valid data */
504 void (*callback)(struct guest_fault *f);
505 void *priv;
506 };
507
508 /*
509 * 0 1 2 3 4 5 6 7
510 * +-------+-------+-------+-------+-------+-------+-------+-------+
511 * 0 | | PGT_ADDR |
512 * 8 | VMADDR | |
513 * 16 | |
514 * 24 | |
515 */
516 #define MKPTVAL(o, l) ((struct ptval_param) { .offset = (o), .len = ((l) + 1) / 2 - 1})
517 #define PTVAL_PGT_ADDR MKPTVAL(4, 8)
518 #define PTVAL_VMADDR MKPTVAL(8, 6)
519
520 union pgste __must_check __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new,
521 gfn_t gfn, union asce asce, bool uses_skeys);
522 bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
523 union asce asce);
524 void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce);
525
526 long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce,
527 const struct dat_walk_ops *ops, int flags, void *priv);
528
529 int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags,
530 int walk_level, union crste **last, union pte **ptepp);
531 void dat_free_level(struct crst_table *table, bool owns_ptes);
532 struct crst_table *dat_alloc_crst_sleepable(unsigned long init);
533 int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype);
534 int dat_get_storage_key(union asce asce, gfn_t gfn, union skey *skey);
535 int dat_set_storage_key(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
536 union skey skey, bool nq);
537 int dat_cond_set_storage_key(struct kvm_s390_mmu_cache *mmc, union asce asce, gfn_t gfn,
538 union skey skey, union skey *oldkey, bool nq, bool mr, bool mc);
539 int dat_reset_reference_bit(union asce asce, gfn_t gfn);
540 long dat_reset_skeys(union asce asce, gfn_t start);
541
542 unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param);
543 void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val);
544
545 int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end,
546 u16 type, u16 param);
547 int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
548 bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
549
550 int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
551 long dat_reset_cmma(union asce asce, gfn_t start_gfn);
552 int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values);
553 int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem);
554 int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
555 unsigned long count, unsigned long mask, const uint8_t *bits);
556
557 int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc);
558
559 #define GFP_KVM_S390_MMU_CACHE (GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN)
560
kvm_s390_mmu_cache_alloc_pt(struct kvm_s390_mmu_cache * mc)561 static inline struct page_table *kvm_s390_mmu_cache_alloc_pt(struct kvm_s390_mmu_cache *mc)
562 {
563 if (mc->n_pts)
564 return mc->pts[--mc->n_pts];
565 return (void *)__get_free_page(GFP_KVM_S390_MMU_CACHE);
566 }
567
kvm_s390_mmu_cache_alloc_crst(struct kvm_s390_mmu_cache * mc)568 static inline struct crst_table *kvm_s390_mmu_cache_alloc_crst(struct kvm_s390_mmu_cache *mc)
569 {
570 if (mc->n_crsts)
571 return mc->crsts[--mc->n_crsts];
572 return (void *)__get_free_pages(GFP_KVM_S390_MMU_CACHE | __GFP_COMP, CRST_ALLOC_ORDER);
573 }
574
kvm_s390_mmu_cache_alloc_rmap(struct kvm_s390_mmu_cache * mc)575 static inline struct vsie_rmap *kvm_s390_mmu_cache_alloc_rmap(struct kvm_s390_mmu_cache *mc)
576 {
577 if (mc->n_rmaps)
578 return mc->rmaps[--mc->n_rmaps];
579 return kzalloc_obj(struct vsie_rmap, GFP_KVM_S390_MMU_CACHE);
580 }
581
crste_table_start(union crste * crstep)582 static inline struct crst_table *crste_table_start(union crste *crstep)
583 {
584 return (struct crst_table *)ALIGN_DOWN((unsigned long)crstep, _CRST_TABLE_SIZE);
585 }
586
pte_table_start(union pte * ptep)587 static inline struct page_table *pte_table_start(union pte *ptep)
588 {
589 return (struct page_table *)ALIGN_DOWN((unsigned long)ptep, _PAGE_TABLE_SIZE);
590 }
591
crdte_crste(union crste * crstep,union crste old,union crste new,gfn_t gfn,union asce asce)592 static inline bool crdte_crste(union crste *crstep, union crste old, union crste new, gfn_t gfn,
593 union asce asce)
594 {
595 unsigned long dtt = 0x10 | new.h.tt << 2;
596 void *table = crste_table_start(crstep);
597
598 return crdte(old.val, new.val, table, dtt, gfn_to_gpa(gfn), asce.val);
599 }
600
601 /**
602 * idte_crste() - invalidate a crste entry using idte
603 * @crstep: pointer to the crste to be invalidated
604 * @gfn: a gfn mapped by the crste
605 * @opt: options for the idte instruction
606 * @asce: the asce
607 * @local: whether the operation is cpu-local
608 */
idte_crste(union crste * crstep,gfn_t gfn,unsigned long opt,union asce asce,int local)609 static __always_inline void idte_crste(union crste *crstep, gfn_t gfn, unsigned long opt,
610 union asce asce, int local)
611 {
612 unsigned long table_origin = __pa(crste_table_start(crstep));
613 unsigned long gaddr = gfn_to_gpa(gfn) & HPAGE_MASK;
614
615 if (__builtin_constant_p(opt) && opt == 0) {
616 /* flush without guest asce */
617 asm volatile("idte %[table_origin],0,%[gaddr],%[local]"
618 : "+m" (*crstep)
619 : [table_origin] "a" (table_origin), [gaddr] "a" (gaddr),
620 [local] "i" (local)
621 : "cc");
622 } else {
623 /* flush with guest asce */
624 asm volatile("idte %[table_origin],%[asce],%[gaddr_opt],%[local]"
625 : "+m" (*crstep)
626 : [table_origin] "a" (table_origin), [gaddr_opt] "a" (gaddr | opt),
627 [asce] "a" (asce.val), [local] "i" (local)
628 : "cc");
629 }
630 }
631
dat_init_pgstes(struct page_table * pt,unsigned long val)632 static inline void dat_init_pgstes(struct page_table *pt, unsigned long val)
633 {
634 memset64((void *)pt->pgstes, val, PTRS_PER_PTE);
635 }
636
dat_init_page_table(struct page_table * pt,unsigned long ptes,unsigned long pgstes)637 static inline void dat_init_page_table(struct page_table *pt, unsigned long ptes,
638 unsigned long pgstes)
639 {
640 memset64((void *)pt->ptes, ptes, PTRS_PER_PTE);
641 dat_init_pgstes(pt, pgstes);
642 }
643
asce_end(union asce asce)644 static inline gfn_t asce_end(union asce asce)
645 {
646 return 1ULL << ((asce.dt + 1) * 11 + _SEGMENT_SHIFT - PAGE_SHIFT);
647 }
648
649 #define _CRSTE(x) ((union crste) { .val = _Generic((x), \
650 union pgd : (x).val, \
651 union p4d : (x).val, \
652 union pud : (x).val, \
653 union pmd : (x).val, \
654 union crste : (x).val)})
655
656 #define _CRSTEP(x) ((union crste *)_Generic((*(x)), \
657 union pgd : (x), \
658 union p4d : (x), \
659 union pud : (x), \
660 union pmd : (x), \
661 union crste : (x)))
662
663 #define _CRSTP(x) ((struct crst_table *)_Generic((*(x)), \
664 struct crst_table : (x), \
665 struct segment_table : (x), \
666 struct region3_table : (x), \
667 struct region2_table : (x), \
668 struct region1_table : (x)))
669
asce_contains_gfn(union asce asce,gfn_t gfn)670 static inline bool asce_contains_gfn(union asce asce, gfn_t gfn)
671 {
672 return gfn < asce_end(asce);
673 }
674
is_pmd(union crste crste)675 static inline bool is_pmd(union crste crste)
676 {
677 return crste.h.tt == TABLE_TYPE_SEGMENT;
678 }
679
is_pud(union crste crste)680 static inline bool is_pud(union crste crste)
681 {
682 return crste.h.tt == TABLE_TYPE_REGION3;
683 }
684
is_p4d(union crste crste)685 static inline bool is_p4d(union crste crste)
686 {
687 return crste.h.tt == TABLE_TYPE_REGION2;
688 }
689
is_pgd(union crste crste)690 static inline bool is_pgd(union crste crste)
691 {
692 return crste.h.tt == TABLE_TYPE_REGION1;
693 }
694
pmd_origin_large(union pmd pmd)695 static inline phys_addr_t pmd_origin_large(union pmd pmd)
696 {
697 return pmd.val & _SEGMENT_ENTRY_ORIGIN_LARGE;
698 }
699
pud_origin_large(union pud pud)700 static inline phys_addr_t pud_origin_large(union pud pud)
701 {
702 return pud.val & _REGION3_ENTRY_ORIGIN_LARGE;
703 }
704
705 /**
706 * crste_origin_large() - Return the large frame origin of a large crste
707 * @crste: The crste whose origin is to be returned. Should be either a
708 * region-3 table entry or a segment table entry, in both cases with
709 * FC set to 1 (large pages).
710 *
711 * Return: The origin of the large frame pointed to by @crste, or -1 if the
712 * crste was not large (wrong table type, or FC==0)
713 */
crste_origin_large(union crste crste)714 static inline phys_addr_t crste_origin_large(union crste crste)
715 {
716 if (unlikely(!crste.h.fc || crste.h.tt > TABLE_TYPE_REGION3))
717 return -1;
718 if (is_pmd(crste))
719 return pmd_origin_large(crste.pmd);
720 return pud_origin_large(crste.pud);
721 }
722
723 #define crste_origin(x) (_Generic((x), \
724 union pmd : (x).val & _SEGMENT_ENTRY_ORIGIN, \
725 union pud : (x).val & _REGION_ENTRY_ORIGIN, \
726 union p4d : (x).val & _REGION_ENTRY_ORIGIN, \
727 union pgd : (x).val & _REGION_ENTRY_ORIGIN))
728
pte_origin(union pte pte)729 static inline unsigned long pte_origin(union pte pte)
730 {
731 return pte.val & PAGE_MASK;
732 }
733
pmd_prefix(union pmd pmd)734 static inline bool pmd_prefix(union pmd pmd)
735 {
736 return pmd.h.fc && pmd.s.fc1.prefix_notif;
737 }
738
pud_prefix(union pud pud)739 static inline bool pud_prefix(union pud pud)
740 {
741 return pud.h.fc && pud.s.fc1.prefix_notif;
742 }
743
crste_leaf(union crste crste)744 static inline bool crste_leaf(union crste crste)
745 {
746 return (crste.h.tt <= TABLE_TYPE_REGION3) && crste.h.fc;
747 }
748
crste_prefix(union crste crste)749 static inline bool crste_prefix(union crste crste)
750 {
751 return crste_leaf(crste) && crste.s.fc1.prefix_notif;
752 }
753
crste_dirty(union crste crste)754 static inline bool crste_dirty(union crste crste)
755 {
756 return crste_leaf(crste) && crste.s.fc1.d;
757 }
758
pgste_of(union pte * pte)759 static inline union pgste *pgste_of(union pte *pte)
760 {
761 return (union pgste *)(pte + _PAGE_ENTRIES);
762 }
763
pte_hole(union pte pte)764 static inline bool pte_hole(union pte pte)
765 {
766 return pte.h.i && !pte.tok.pr && pte.tok.type != _DAT_TOKEN_NONE;
767 }
768
_crste_hole(union crste crste)769 static inline bool _crste_hole(union crste crste)
770 {
771 return crste.h.i && !crste.tok.pr && crste.tok.type != _DAT_TOKEN_NONE;
772 }
773
774 #define crste_hole(x) _crste_hole(_CRSTE(x))
775
_crste_none(union crste crste)776 static inline bool _crste_none(union crste crste)
777 {
778 return crste.h.i && !crste.tok.pr && crste.tok.type == _DAT_TOKEN_NONE;
779 }
780
781 #define crste_none(x) _crste_none(_CRSTE(x))
782
large_pud_to_phys(union pud pud,gfn_t gfn)783 static inline phys_addr_t large_pud_to_phys(union pud pud, gfn_t gfn)
784 {
785 return pud_origin_large(pud) | (gfn_to_gpa(gfn) & ~_REGION3_MASK);
786 }
787
large_pmd_to_phys(union pmd pmd,gfn_t gfn)788 static inline phys_addr_t large_pmd_to_phys(union pmd pmd, gfn_t gfn)
789 {
790 return pmd_origin_large(pmd) | (gfn_to_gpa(gfn) & ~_SEGMENT_MASK);
791 }
792
large_crste_to_phys(union crste crste,gfn_t gfn)793 static inline phys_addr_t large_crste_to_phys(union crste crste, gfn_t gfn)
794 {
795 if (unlikely(!crste.h.fc || crste.h.tt > TABLE_TYPE_REGION3))
796 return -1;
797 if (is_pmd(crste))
798 return large_pmd_to_phys(crste.pmd, gfn);
799 return large_pud_to_phys(crste.pud, gfn);
800 }
801
cspg_crste(union crste * crstep,union crste old,union crste new)802 static inline bool cspg_crste(union crste *crstep, union crste old, union crste new)
803 {
804 return cspg(&crstep->val, old.val, new.val);
805 }
806
dereference_pmd(union pmd pmd)807 static inline struct page_table *dereference_pmd(union pmd pmd)
808 {
809 return phys_to_virt(crste_origin(pmd));
810 }
811
dereference_pud(union pud pud)812 static inline struct segment_table *dereference_pud(union pud pud)
813 {
814 return phys_to_virt(crste_origin(pud));
815 }
816
dereference_p4d(union p4d p4d)817 static inline struct region3_table *dereference_p4d(union p4d p4d)
818 {
819 return phys_to_virt(crste_origin(p4d));
820 }
821
dereference_pgd(union pgd pgd)822 static inline struct region2_table *dereference_pgd(union pgd pgd)
823 {
824 return phys_to_virt(crste_origin(pgd));
825 }
826
_dereference_crste(union crste crste)827 static inline struct crst_table *_dereference_crste(union crste crste)
828 {
829 if (unlikely(is_pmd(crste)))
830 return NULL;
831 return phys_to_virt(crste_origin(crste.pud));
832 }
833
834 #define dereference_crste(x) (_Generic((x), \
835 union pud : _dereference_crste(_CRSTE(x)), \
836 union p4d : _dereference_crste(_CRSTE(x)), \
837 union pgd : _dereference_crste(_CRSTE(x)), \
838 union crste : _dereference_crste(_CRSTE(x))))
839
dereference_asce(union asce asce)840 static inline struct crst_table *dereference_asce(union asce asce)
841 {
842 return phys_to_virt(asce.val & _ASCE_ORIGIN);
843 }
844
asce_flush_tlb(union asce asce)845 static inline void asce_flush_tlb(union asce asce)
846 {
847 __tlb_flush_idte(asce.val);
848 }
849
pgste_get_trylock(union pte * ptep,union pgste * res)850 static inline bool pgste_get_trylock(union pte *ptep, union pgste *res)
851 {
852 union pgste *pgstep = pgste_of(ptep);
853 union pgste old_pgste;
854
855 if (READ_ONCE(pgstep->val) & PGSTE_PCL_BIT)
856 return false;
857 old_pgste.val = __atomic64_or_barrier(PGSTE_PCL_BIT, &pgstep->val);
858 if (old_pgste.pcl)
859 return false;
860 old_pgste.pcl = 1;
861 *res = old_pgste;
862 return true;
863 }
864
pgste_get_lock(union pte * ptep)865 static inline union pgste pgste_get_lock(union pte *ptep)
866 {
867 union pgste res;
868
869 while (!pgste_get_trylock(ptep, &res))
870 cpu_relax();
871 return res;
872 }
873
pgste_set_unlock(union pte * ptep,union pgste pgste)874 static inline void pgste_set_unlock(union pte *ptep, union pgste pgste)
875 {
876 pgste.pcl = 0;
877 barrier();
878 WRITE_ONCE(*pgste_of(ptep), pgste);
879 }
880
dat_ptep_xchg(union pte * ptep,union pte new,gfn_t gfn,union asce asce,bool has_skeys)881 static inline void dat_ptep_xchg(union pte *ptep, union pte new, gfn_t gfn, union asce asce,
882 bool has_skeys)
883 {
884 union pgste pgste;
885
886 pgste = pgste_get_lock(ptep);
887 pgste = __dat_ptep_xchg(ptep, pgste, new, gfn, asce, has_skeys);
888 pgste_set_unlock(ptep, pgste);
889 }
890
dat_ptep_clear(union pte * ptep,gfn_t gfn,union asce asce,bool has_skeys)891 static inline void dat_ptep_clear(union pte *ptep, gfn_t gfn, union asce asce, bool has_skeys)
892 {
893 dat_ptep_xchg(ptep, _PTE_EMPTY, gfn, asce, has_skeys);
894 }
895
dat_free_pt(struct page_table * pt)896 static inline void dat_free_pt(struct page_table *pt)
897 {
898 free_page((unsigned long)pt);
899 }
900
_dat_free_crst(struct crst_table * table)901 static inline void _dat_free_crst(struct crst_table *table)
902 {
903 free_pages((unsigned long)table, CRST_ALLOC_ORDER);
904 }
905
906 #define dat_free_crst(x) _dat_free_crst(_CRSTP(x))
907
kvm_s390_free_mmu_cache(struct kvm_s390_mmu_cache * mc)908 static inline void kvm_s390_free_mmu_cache(struct kvm_s390_mmu_cache *mc)
909 {
910 if (!mc)
911 return;
912 while (mc->n_pts)
913 dat_free_pt(mc->pts[--mc->n_pts]);
914 while (mc->n_crsts)
915 _dat_free_crst(mc->crsts[--mc->n_crsts]);
916 while (mc->n_rmaps)
917 kfree(mc->rmaps[--mc->n_rmaps]);
918 kfree(mc);
919 }
920
DEFINE_FREE(kvm_s390_mmu_cache,struct kvm_s390_mmu_cache *,if (_T)kvm_s390_free_mmu_cache (_T))921 DEFINE_FREE(kvm_s390_mmu_cache, struct kvm_s390_mmu_cache *, if (_T) kvm_s390_free_mmu_cache(_T))
922
923 static inline struct kvm_s390_mmu_cache *kvm_s390_new_mmu_cache(void)
924 {
925 struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
926
927 mc = kzalloc_obj(*mc, GFP_KERNEL_ACCOUNT);
928 if (mc && !kvm_s390_mmu_cache_topup(mc))
929 return_ptr(mc);
930 return NULL;
931 }
932
dat_pmdp_xchg_atomic(union pmd * pmdp,union pmd old,union pmd new,gfn_t gfn,union asce asce)933 static inline bool dat_pmdp_xchg_atomic(union pmd *pmdp, union pmd old, union pmd new,
934 gfn_t gfn, union asce asce)
935 {
936 return dat_crstep_xchg_atomic(_CRSTEP(pmdp), _CRSTE(old), _CRSTE(new), gfn, asce);
937 }
938
dat_pudp_xchg_atomic(union pud * pudp,union pud old,union pud new,gfn_t gfn,union asce asce)939 static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pud new,
940 gfn_t gfn, union asce asce)
941 {
942 return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
943 }
944
dat_crstep_clear_atomic(union crste * crstep,gfn_t gfn,union asce asce)945 static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
946 {
947 union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
948
949 do {
950 oldcrste = READ_ONCE(*crstep);
951 } while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
952 return oldcrste;
953 }
954
get_level(union crste * crstep,union pte * ptep)955 static inline int get_level(union crste *crstep, union pte *ptep)
956 {
957 return ptep ? TABLE_TYPE_PAGE_TABLE : crstep->h.tt;
958 }
959
dat_delete_slot(struct kvm_s390_mmu_cache * mc,union asce asce,gfn_t start,unsigned long npages)960 static inline int dat_delete_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
961 unsigned long npages)
962 {
963 return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_PIC, PGM_ADDRESSING);
964 }
965
dat_create_slot(struct kvm_s390_mmu_cache * mc,union asce asce,gfn_t start,unsigned long npages)966 static inline int dat_create_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
967 unsigned long npages)
968 {
969 return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_NONE, 0);
970 }
971
crste_is_ucas(union crste crste)972 static inline bool crste_is_ucas(union crste crste)
973 {
974 return is_pmd(crste) && crste.h.i && crste.h.fc0.tl == 1 && crste.h.fc == 0;
975 }
976
977 #endif /* __KVM_S390_DAT_H */
978