xref: /linux/arch/s390/kvm/dat.h (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  *  KVM guest address space mapping code
4  *
5  *    Copyright IBM Corp. 2024, 2025
6  *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
7  */
8 
9 #ifndef __KVM_S390_DAT_H
10 #define __KVM_S390_DAT_H
11 
12 #include <linux/radix-tree.h>
13 #include <linux/refcount.h>
14 #include <linux/io.h>
15 #include <linux/kvm_types.h>
16 #include <linux/pgalloc.h>
17 #include <asm/tlbflush.h>
18 #include <asm/dat-bits.h>
19 
20 /*
21  * Base address and length must be sent at the start of each block, therefore
22  * it's cheaper to send some clean data, as long as it's less than the size of
23  * two longs.
24  */
25 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
26 /* For consistency */
27 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
28 
29 #define _ASCE(x) ((union asce) { .val = (x), })
30 #define NULL_ASCE _ASCE(0)
31 
32 enum {
33 	_DAT_TOKEN_NONE = 0,
34 	_DAT_TOKEN_PIC,
35 };
36 
37 #define _CRSTE_TOK(l, t, p) ((union crste) {	\
38 		.tok.i = 1,			\
39 		.tok.tt = (l),			\
40 		.tok.type = (t),		\
41 		.tok.par = (p)			\
42 	})
43 #define _CRSTE_PIC(l, p) _CRSTE_TOK(l, _DAT_TOKEN_PIC, p)
44 
45 #define _CRSTE_HOLE(l) _CRSTE_PIC(l, PGM_ADDRESSING)
46 #define _CRSTE_EMPTY(l) _CRSTE_TOK(l, _DAT_TOKEN_NONE, 0)
47 
48 #define _PMD_EMPTY _CRSTE_EMPTY(TABLE_TYPE_SEGMENT)
49 
50 #define _PTE_TOK(t, p) ((union pte) { .tok.i = 1, .tok.type = (t), .tok.par = (p) })
51 #define _PTE_EMPTY _PTE_TOK(_DAT_TOKEN_NONE, 0)
52 
53 /* This fake table type is used for page table walks (both for normal page tables and vSIE) */
54 #define TABLE_TYPE_PAGE_TABLE -1
55 
56 enum dat_walk_flags {
57 	DAT_WALK_USES_SKEYS	= 0x40,
58 	DAT_WALK_CONTINUE	= 0x20,
59 	DAT_WALK_IGN_HOLES	= 0x10,
60 	DAT_WALK_SPLIT		= 0x08,
61 	DAT_WALK_ALLOC		= 0x04,
62 	DAT_WALK_ANY		= 0x02,
63 	DAT_WALK_LEAF		= 0x01,
64 	DAT_WALK_DEFAULT	= 0
65 };
66 
67 #define DAT_WALK_SPLIT_ALLOC (DAT_WALK_SPLIT | DAT_WALK_ALLOC)
68 #define DAT_WALK_ALLOC_CONTINUE (DAT_WALK_CONTINUE | DAT_WALK_ALLOC)
69 #define DAT_WALK_LEAF_ALLOC (DAT_WALK_LEAF | DAT_WALK_ALLOC)
70 
71 union pte {
72 	unsigned long val;
73 	union page_table_entry h;
74 	struct {
75 		unsigned long   :56; /* Hardware bits */
76 		unsigned long u : 1; /* Page unused */
77 		unsigned long s : 1; /* Special */
78 		unsigned long w : 1; /* Writable */
79 		unsigned long r : 1; /* Readable */
80 		unsigned long d : 1; /* Dirty */
81 		unsigned long y : 1; /* Young */
82 		unsigned long sd: 1; /* Soft dirty */
83 		unsigned long pr: 1; /* Present */
84 	} s;
85 	struct {
86 		unsigned char hwbytes[7];
87 		unsigned char swbyte;
88 	};
89 	union {
90 		struct {
91 			unsigned long type :16; /* Token type */
92 			unsigned long par  :16; /* Token parameter */
93 			unsigned long      :20;
94 			unsigned long      : 1; /* Must be 0 */
95 			unsigned long i    : 1; /* Must be 1 */
96 			unsigned long      : 2;
97 			unsigned long      : 7;
98 			unsigned long pr   : 1; /* Must be 0 */
99 		};
100 		struct {
101 			unsigned long token:32; /* Token and parameter */
102 			unsigned long      :32;
103 		};
104 	} tok;
105 };
106 
107 #define _SEGMENT_FR_MASK	(_SEGMENT_MASK >> PAGE_SHIFT)
108 #define _REGION3_FR_MASK	(_REGION3_MASK >> PAGE_SHIFT)
109 #define _PAGES_PER_SEGMENT	_PAGE_ENTRIES
110 #define _PAGES_PER_REGION3	(_PAGES_PER_SEGMENT * _CRST_ENTRIES)
111 
112 /* Soft dirty, needed as macro for atomic operations on ptes */
113 #define _PAGE_SD 0x002
114 
115 /* Needed as macro to perform atomic operations */
116 #define PGSTE_PCL_BIT		0x0080000000000000UL	/* PCL lock, HW bit */
117 #define PGSTE_CMMA_D_BIT	0x0000000000008000UL	/* CMMA dirty soft-bit */
118 
119 enum pgste_gps_usage {
120 	PGSTE_GPS_USAGE_STABLE = 0,
121 	PGSTE_GPS_USAGE_UNUSED,
122 	PGSTE_GPS_USAGE_POT_VOLATILE,
123 	PGSTE_GPS_USAGE_VOLATILE,
124 };
125 
126 union pgste {
127 	unsigned long val;
128 	struct {
129 		unsigned long acc          : 4;
130 		unsigned long fp           : 1;
131 		unsigned long              : 3;
132 		unsigned long pcl          : 1;
133 		unsigned long hr           : 1;
134 		unsigned long hc           : 1;
135 		unsigned long              : 2;
136 		unsigned long gr           : 1;
137 		unsigned long gc           : 1;
138 		unsigned long              : 1;
139 		unsigned long              :16; /* val16 */
140 		unsigned long zero         : 1;
141 		unsigned long nodat        : 1;
142 		unsigned long              : 4;
143 		unsigned long usage        : 2;
144 		unsigned long              : 8;
145 		unsigned long cmma_d       : 1; /* Dirty flag for CMMA bits */
146 		unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
147 		unsigned long vsie_notif   : 1; /* Referenced in a shadow table */
148 		unsigned long vsie_gmem    : 1; /* Contains nested guest memory */
149 		unsigned long              : 4;
150 		unsigned long              : 8;
151 	};
152 	struct {
153 		unsigned short hwbytes0;
154 		unsigned short val16;	/* Used to store chunked values, see dat_{s,g}et_ptval() */
155 		unsigned short hwbytes4;
156 		unsigned char flags;	/* Maps to the software bits */
157 		unsigned char hwbyte7;
158 	} __packed;
159 };
160 
161 union pmd {
162 	unsigned long val;
163 	union segment_table_entry h;
164 	struct {
165 		struct {
166 			unsigned long              :44; /* HW */
167 			unsigned long              : 3; /* Unused */
168 			unsigned long              : 1; /* HW */
169 			unsigned long s            : 1; /* Special */
170 			unsigned long w            : 1; /* Writable soft-bit */
171 			unsigned long r            : 1; /* Readable soft-bit */
172 			unsigned long d            : 1; /* Dirty */
173 			unsigned long y            : 1; /* Young */
174 			unsigned long              : 3; /* HW */
175 			unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
176 			unsigned long vsie_notif   : 1; /* Referenced in a shadow table */
177 			unsigned long              : 4; /* HW */
178 			unsigned long sd           : 1; /* Soft-Dirty */
179 			unsigned long pr           : 1; /* Present */
180 		} fc1;
181 	} s;
182 };
183 
184 union pud {
185 	unsigned long val;
186 	union region3_table_entry h;
187 	struct {
188 		struct {
189 			unsigned long              :33; /* HW */
190 			unsigned long              :14; /* Unused */
191 			unsigned long              : 1; /* HW */
192 			unsigned long s            : 1; /* Special */
193 			unsigned long w            : 1; /* Writable soft-bit */
194 			unsigned long r            : 1; /* Readable soft-bit */
195 			unsigned long d            : 1; /* Dirty */
196 			unsigned long y            : 1; /* Young */
197 			unsigned long              : 3; /* HW */
198 			unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
199 			unsigned long vsie_notif   : 1; /* Referenced in a shadow table */
200 			unsigned long              : 4; /* HW */
201 			unsigned long sd           : 1; /* Soft-Dirty */
202 			unsigned long pr           : 1; /* Present */
203 		} fc1;
204 	} s;
205 };
206 
207 union p4d {
208 	unsigned long val;
209 	union region2_table_entry h;
210 };
211 
212 union pgd {
213 	unsigned long val;
214 	union region1_table_entry h;
215 };
216 
217 union crste {
218 	unsigned long val;
219 	union {
220 		struct {
221 			unsigned long   :52;
222 			unsigned long   : 1;
223 			unsigned long fc: 1;
224 			unsigned long p : 1;
225 			unsigned long   : 1;
226 			unsigned long   : 2;
227 			unsigned long i : 1;
228 			unsigned long   : 1;
229 			unsigned long tt: 2;
230 			unsigned long   : 2;
231 		};
232 		struct {
233 			unsigned long to:52;
234 			unsigned long   : 1;
235 			unsigned long fc: 1;
236 			unsigned long p : 1;
237 			unsigned long   : 1;
238 			unsigned long tf: 2;
239 			unsigned long i : 1;
240 			unsigned long   : 1;
241 			unsigned long tt: 2;
242 			unsigned long tl: 2;
243 		} fc0;
244 		struct {
245 			unsigned long    :47;
246 			unsigned long av : 1; /* ACCF-Validity Control */
247 			unsigned long acc: 4; /* Access-Control Bits */
248 			unsigned long f  : 1; /* Fetch-Protection Bit */
249 			unsigned long fc : 1; /* Format-Control */
250 			unsigned long p  : 1; /* DAT-Protection Bit */
251 			unsigned long iep: 1; /* Instruction-Execution-Protection */
252 			unsigned long    : 2;
253 			unsigned long i  : 1; /* Segment-Invalid Bit */
254 			unsigned long cs : 1; /* Common-Segment Bit */
255 			unsigned long tt : 2; /* Table-Type Bits */
256 			unsigned long    : 2;
257 		} fc1;
258 	} h;
259 	struct {
260 		struct {
261 			unsigned long              :47;
262 			unsigned long              : 1; /* HW (should be 0) */
263 			unsigned long s            : 1; /* Special */
264 			unsigned long w            : 1; /* Writable */
265 			unsigned long r            : 1; /* Readable */
266 			unsigned long d            : 1; /* Dirty */
267 			unsigned long y            : 1; /* Young */
268 			unsigned long              : 3; /* HW */
269 			unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
270 			unsigned long vsie_notif   : 1; /* Referenced in a shadow table */
271 			unsigned long              : 4; /* HW */
272 			unsigned long sd           : 1; /* Soft-Dirty */
273 			unsigned long pr           : 1; /* Present */
274 		} fc1;
275 	} s;
276 	union {
277 		struct {
278 			unsigned long type :16; /* Token type */
279 			unsigned long par  :16; /* Token parameter */
280 			unsigned long      :26;
281 			unsigned long i    : 1; /* Must be 1 */
282 			unsigned long      : 1;
283 			unsigned long tt   : 2;
284 			unsigned long      : 1;
285 			unsigned long pr   : 1; /* Must be 0 */
286 		};
287 		struct {
288 			unsigned long token:32; /* Token and parameter */
289 			unsigned long      :32;
290 		};
291 	} tok;
292 	union pmd pmd;
293 	union pud pud;
294 	union p4d p4d;
295 	union pgd pgd;
296 };
297 
298 union skey {
299 	unsigned char skey;
300 	struct {
301 		unsigned char acc :4;
302 		unsigned char fp  :1;
303 		unsigned char r   :1;
304 		unsigned char c   :1;
305 		unsigned char zero:1;
306 	};
307 };
308 
309 static_assert(sizeof(union pgste) == sizeof(unsigned long));
310 static_assert(sizeof(union pte) == sizeof(unsigned long));
311 static_assert(sizeof(union pmd) == sizeof(unsigned long));
312 static_assert(sizeof(union pud) == sizeof(unsigned long));
313 static_assert(sizeof(union p4d) == sizeof(unsigned long));
314 static_assert(sizeof(union pgd) == sizeof(unsigned long));
315 static_assert(sizeof(union crste) == sizeof(unsigned long));
316 static_assert(sizeof(union skey) == sizeof(char));
317 
318 struct segment_table {
319 	union pmd pmds[_CRST_ENTRIES];
320 };
321 
322 struct region3_table {
323 	union pud puds[_CRST_ENTRIES];
324 };
325 
326 struct region2_table {
327 	union p4d p4ds[_CRST_ENTRIES];
328 };
329 
330 struct region1_table {
331 	union pgd pgds[_CRST_ENTRIES];
332 };
333 
334 struct crst_table {
335 	union {
336 		union crste crstes[_CRST_ENTRIES];
337 		struct segment_table segment;
338 		struct region3_table region3;
339 		struct region2_table region2;
340 		struct region1_table region1;
341 	};
342 };
343 
344 struct page_table {
345 	union pte ptes[_PAGE_ENTRIES];
346 	union pgste pgstes[_PAGE_ENTRIES];
347 };
348 
349 static_assert(sizeof(struct crst_table) == _CRST_TABLE_SIZE);
350 static_assert(sizeof(struct page_table) == PAGE_SIZE);
351 
352 struct dat_walk;
353 
354 typedef long (*dat_walk_op)(union crste *crste, gfn_t gfn, gfn_t next, struct dat_walk *w);
355 
356 struct dat_walk_ops {
357 	union {
358 		dat_walk_op crste_ops[4];
359 		struct {
360 			dat_walk_op pmd_entry;
361 			dat_walk_op pud_entry;
362 			dat_walk_op p4d_entry;
363 			dat_walk_op pgd_entry;
364 		};
365 	};
366 	long (*pte_entry)(union pte *pte, gfn_t gfn, gfn_t next, struct dat_walk *w);
367 };
368 
369 struct dat_walk {
370 	const struct dat_walk_ops *ops;
371 	union crste *last;
372 	union pte *last_pte;
373 	union asce asce;
374 	gfn_t start;
375 	gfn_t end;
376 	int flags;
377 	void *priv;
378 };
379 
380 struct ptval_param {
381 	unsigned char offset : 6;
382 	unsigned char len : 2;
383 };
384 
385 /**
386  * _pte() - Useful constructor for union pte
387  * @pfn: the pfn this pte should point to.
388  * @writable: whether the pte should be writable.
389  * @dirty: whether the pte should be dirty.
390  * @special: whether the pte should be marked as special
391  *
392  * The pte is also marked as young and present. If the pte is marked as dirty,
393  * it gets marked as soft-dirty too. If the pte is not dirty, the hardware
394  * protect bit is set (independently of the write softbit); this way proper
395  * dirty tracking can be performed.
396  *
397  * Return: a union pte value.
398  */
399 static inline union pte _pte(kvm_pfn_t pfn, bool writable, bool dirty, bool special)
400 {
401 	union pte res = { .val = PFN_PHYS(pfn) };
402 
403 	res.h.p = !dirty;
404 	res.s.y = 1;
405 	res.s.pr = 1;
406 	res.s.w = writable;
407 	res.s.d = dirty;
408 	res.s.sd = dirty;
409 	res.s.s = special;
410 	return res;
411 }
412 
413 static inline union crste _crste_fc0(kvm_pfn_t pfn, int tt)
414 {
415 	union crste res = { .val = PFN_PHYS(pfn) };
416 
417 	res.h.tt = tt;
418 	res.h.fc0.tl = _REGION_ENTRY_LENGTH;
419 	res.h.fc0.tf = 0;
420 	return res;
421 }
422 
423 /**
424  * _crste() - Useful constructor for union crste with FC=1
425  * @pfn: the pfn this pte should point to.
426  * @tt: the table type
427  * @writable: whether the pte should be writable.
428  * @dirty: whether the pte should be dirty.
429  *
430  * The crste is also marked as young and present. If the crste is marked as
431  * dirty, it gets marked as soft-dirty too. If the crste is not dirty, the
432  * hardware protect bit is set (independently of the write softbit); this way
433  * proper dirty tracking can be performed.
434  *
435  * Return: a union crste value.
436  */
437 static inline union crste _crste_fc1(kvm_pfn_t pfn, int tt, bool writable, bool dirty)
438 {
439 	union crste res = { .val = PFN_PHYS(pfn) & _SEGMENT_MASK };
440 
441 	res.h.tt = tt;
442 	res.h.p = !dirty;
443 	res.h.fc = 1;
444 	res.s.fc1.y = 1;
445 	res.s.fc1.pr = 1;
446 	res.s.fc1.w = writable;
447 	res.s.fc1.d = dirty;
448 	res.s.fc1.sd = dirty;
449 	return res;
450 }
451 
452 union essa_state {
453 	unsigned char val;
454 	struct {
455 		unsigned char		: 2;
456 		unsigned char nodat	: 1;
457 		unsigned char exception	: 1;
458 		unsigned char usage	: 2;
459 		unsigned char content	: 2;
460 	};
461 };
462 
463 /**
464  * struct vsie_rmap - reverse mapping for shadow page table entries
465  * @next: pointer to next rmap in the list
466  * @r_gfn: virtual rmap address in the shadow guest address space
467  */
468 struct vsie_rmap {
469 	struct vsie_rmap *next;
470 	union {
471 		unsigned long val;
472 		struct {
473 			long          level: 8;
474 			unsigned long      : 4;
475 			unsigned long r_gfn:52;
476 		};
477 	};
478 };
479 
480 static_assert(sizeof(struct vsie_rmap) == 2 * sizeof(long));
481 
482 #define KVM_S390_MMU_CACHE_N_CRSTS	6
483 #define KVM_S390_MMU_CACHE_N_PTS	2
484 #define KVM_S390_MMU_CACHE_N_RMAPS	16
485 struct kvm_s390_mmu_cache {
486 	void *crsts[KVM_S390_MMU_CACHE_N_CRSTS];
487 	void *pts[KVM_S390_MMU_CACHE_N_PTS];
488 	void *rmaps[KVM_S390_MMU_CACHE_N_RMAPS];
489 	short int n_crsts;
490 	short int n_pts;
491 	short int n_rmaps;
492 };
493 
494 struct guest_fault {
495 	gfn_t gfn;		/* Guest frame */
496 	kvm_pfn_t pfn;		/* Host PFN */
497 	struct page *page;	/* Host page */
498 	union pte *ptep;	/* Used to resolve the fault, or NULL */
499 	union crste *crstep;	/* Used to resolve the fault, or NULL */
500 	bool writable;		/* Mapping is writable */
501 	bool write_attempt;	/* Write access attempted */
502 	bool attempt_pfault;	/* Attempt a pfault first */
503 	bool valid;		/* This entry contains valid data */
504 	bool crste_region3;     /* Whether crstep refers to a region3 entry */
505 	void (*callback)(struct guest_fault *f);
506 	void *priv;
507 };
508 
509 /*
510  *	0	1	2	3	4	5	6	7
511  *	+-------+-------+-------+-------+-------+-------+-------+-------+
512  *  0	|				|	    PGT_ADDR		|
513  *  8	|	 VMADDR		|					|
514  * 16	|								|
515  * 24	|								|
516  */
517 #define MKPTVAL(o, l) ((struct ptval_param) { .offset = (o), .len = ((l) + 1) / 2 - 1})
518 #define PTVAL_PGT_ADDR	MKPTVAL(4, 8)
519 #define PTVAL_VMADDR	MKPTVAL(8, 6)
520 
521 union pgste __must_check __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new,
522 					 gfn_t gfn, union asce asce, bool uses_skeys);
523 bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
524 			    union asce asce);
525 void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce);
526 
527 long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce,
528 			 const struct dat_walk_ops *ops, int flags, void *priv);
529 
530 int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags,
531 		   int walk_level, union crste **last, union pte **ptepp);
532 void dat_free_level(struct crst_table *table, bool owns_ptes);
533 struct crst_table *dat_alloc_crst_sleepable(unsigned long init);
534 int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype);
535 int dat_get_storage_key(union asce asce, gfn_t gfn, union skey *skey);
536 int dat_set_storage_key(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
537 			union skey skey, bool nq);
538 int dat_cond_set_storage_key(struct kvm_s390_mmu_cache *mmc, union asce asce, gfn_t gfn,
539 			     union skey skey, union skey *oldkey, bool nq, bool mr, bool mc);
540 int dat_reset_reference_bit(union asce asce, gfn_t gfn);
541 long dat_reset_skeys(union asce asce, gfn_t start);
542 
543 unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param);
544 void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val);
545 
546 int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end,
547 		 u16 type, u16 param);
548 int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
549 bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
550 
551 int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
552 long dat_reset_cmma(union asce asce, gfn_t start_gfn);
553 int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values);
554 int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem);
555 int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
556 		      unsigned long count, unsigned long mask, const uint8_t *bits);
557 
558 int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc);
559 
560 #define GFP_KVM_S390_MMU_CACHE (GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN)
561 
562 static inline struct page_table *kvm_s390_mmu_cache_alloc_pt(struct kvm_s390_mmu_cache *mc)
563 {
564 	if (mc->n_pts)
565 		return mc->pts[--mc->n_pts];
566 	return (void *)__get_free_page(GFP_KVM_S390_MMU_CACHE);
567 }
568 
569 static inline struct crst_table *kvm_s390_mmu_cache_alloc_crst(struct kvm_s390_mmu_cache *mc)
570 {
571 	if (mc->n_crsts)
572 		return mc->crsts[--mc->n_crsts];
573 	return (void *)__get_free_pages(GFP_KVM_S390_MMU_CACHE | __GFP_COMP, CRST_ALLOC_ORDER);
574 }
575 
576 static inline struct vsie_rmap *kvm_s390_mmu_cache_alloc_rmap(struct kvm_s390_mmu_cache *mc)
577 {
578 	if (mc->n_rmaps)
579 		return mc->rmaps[--mc->n_rmaps];
580 	return kzalloc_obj(struct vsie_rmap, GFP_KVM_S390_MMU_CACHE);
581 }
582 
583 static inline struct crst_table *crste_table_start(union crste *crstep)
584 {
585 	return (struct crst_table *)ALIGN_DOWN((unsigned long)crstep, _CRST_TABLE_SIZE);
586 }
587 
588 static inline struct page_table *pte_table_start(union pte *ptep)
589 {
590 	return (struct page_table *)ALIGN_DOWN((unsigned long)ptep, _PAGE_TABLE_SIZE);
591 }
592 
593 static inline bool crdte_crste(union crste *crstep, union crste old, union crste new, gfn_t gfn,
594 			       union asce asce)
595 {
596 	unsigned long dtt = 0x10 | new.h.tt << 2;
597 	void *table = crste_table_start(crstep);
598 
599 	return crdte(old.val, new.val, table, dtt, gfn_to_gpa(gfn), asce.val);
600 }
601 
602 /**
603  * idte_crste() - invalidate a crste entry using idte
604  * @crstep: pointer to the crste to be invalidated
605  * @gfn: a gfn mapped by the crste
606  * @opt: options for the idte instruction
607  * @asce: the asce
608  * @local: whether the operation is cpu-local
609  */
610 static __always_inline void idte_crste(union crste *crstep, gfn_t gfn, unsigned long opt,
611 				       union asce asce, int local)
612 {
613 	unsigned long table_origin = __pa(crste_table_start(crstep));
614 	unsigned long gaddr = gfn_to_gpa(gfn) & HPAGE_MASK;
615 
616 	if (__builtin_constant_p(opt) && opt == 0) {
617 		/* flush without guest asce */
618 		asm volatile("idte	%[table_origin],0,%[gaddr],%[local]"
619 			: "+m" (*crstep)
620 			: [table_origin] "a" (table_origin), [gaddr] "a" (gaddr),
621 			  [local] "i" (local)
622 			: "cc");
623 	} else {
624 		/* flush with guest asce */
625 		asm volatile("idte %[table_origin],%[asce],%[gaddr_opt],%[local]"
626 			: "+m" (*crstep)
627 			: [table_origin] "a" (table_origin), [gaddr_opt] "a" (gaddr | opt),
628 			  [asce] "a" (asce.val), [local] "i" (local)
629 			: "cc");
630 	}
631 }
632 
633 static inline void dat_init_pgstes(struct page_table *pt, unsigned long val)
634 {
635 	memset64((void *)pt->pgstes, val, PTRS_PER_PTE);
636 }
637 
638 static inline void dat_init_page_table(struct page_table *pt, unsigned long ptes,
639 				       unsigned long pgstes)
640 {
641 	memset64((void *)pt->ptes, ptes, PTRS_PER_PTE);
642 	dat_init_pgstes(pt, pgstes);
643 }
644 
645 static inline gfn_t asce_end(union asce asce)
646 {
647 	return 1ULL << ((asce.dt + 1) * 11 + _SEGMENT_SHIFT - PAGE_SHIFT);
648 }
649 
650 #define _CRSTE(x) ((union crste) { .val = _Generic((x),	\
651 			union pgd : (x).val,		\
652 			union p4d : (x).val,		\
653 			union pud : (x).val,		\
654 			union pmd : (x).val,		\
655 			union crste : (x).val)})
656 
657 #define _CRSTEP(x) ((union crste *)_Generic((*(x)),	\
658 				union pgd : (x),	\
659 				union p4d : (x),	\
660 				union pud : (x),	\
661 				union pmd : (x),	\
662 				union crste : (x)))
663 
664 #define _CRSTP(x) ((struct crst_table *)_Generic((*(x)),	\
665 		struct crst_table : (x),			\
666 		struct segment_table : (x),			\
667 		struct region3_table : (x),			\
668 		struct region2_table : (x),			\
669 		struct region1_table : (x)))
670 
671 static inline bool asce_contains_gfn(union asce asce, gfn_t gfn)
672 {
673 	return gfn < asce_end(asce);
674 }
675 
676 static inline bool is_pmd(union crste crste)
677 {
678 	return crste.h.tt == TABLE_TYPE_SEGMENT;
679 }
680 
681 static inline bool is_pud(union crste crste)
682 {
683 	return crste.h.tt == TABLE_TYPE_REGION3;
684 }
685 
686 static inline bool is_p4d(union crste crste)
687 {
688 	return crste.h.tt == TABLE_TYPE_REGION2;
689 }
690 
691 static inline bool is_pgd(union crste crste)
692 {
693 	return crste.h.tt == TABLE_TYPE_REGION1;
694 }
695 
696 static inline phys_addr_t pmd_origin_large(union pmd pmd)
697 {
698 	return pmd.val & _SEGMENT_ENTRY_ORIGIN_LARGE;
699 }
700 
701 static inline phys_addr_t pud_origin_large(union pud pud)
702 {
703 	return pud.val & _REGION3_ENTRY_ORIGIN_LARGE;
704 }
705 
706 /**
707  * crste_origin_large() - Return the large frame origin of a large crste
708  * @crste: The crste whose origin is to be returned. Should be either a
709  *         region-3 table entry or a segment table entry, in both cases with
710  *         FC set to 1 (large pages).
711  *
712  * Return: The origin of the large frame pointed to by @crste, or -1 if the
713  *         crste was not large (wrong table type, or FC==0)
714  */
715 static inline phys_addr_t crste_origin_large(union crste crste)
716 {
717 	if (unlikely(!crste.h.fc || crste.h.tt > TABLE_TYPE_REGION3))
718 		return -1;
719 	if (is_pmd(crste))
720 		return pmd_origin_large(crste.pmd);
721 	return pud_origin_large(crste.pud);
722 }
723 
724 #define crste_origin(x) (_Generic((x),				\
725 		union pmd : (x).val & _SEGMENT_ENTRY_ORIGIN,	\
726 		union pud : (x).val & _REGION_ENTRY_ORIGIN,	\
727 		union p4d : (x).val & _REGION_ENTRY_ORIGIN,	\
728 		union pgd : (x).val & _REGION_ENTRY_ORIGIN))
729 
730 static inline unsigned long pte_origin(union pte pte)
731 {
732 	return pte.val & PAGE_MASK;
733 }
734 
735 static inline bool pmd_prefix(union pmd pmd)
736 {
737 	return pmd.h.fc && pmd.s.fc1.prefix_notif;
738 }
739 
740 static inline bool pud_prefix(union pud pud)
741 {
742 	return pud.h.fc && pud.s.fc1.prefix_notif;
743 }
744 
745 static inline bool crste_leaf(union crste crste)
746 {
747 	return (crste.h.tt <= TABLE_TYPE_REGION3) && crste.h.fc;
748 }
749 
750 static inline bool crste_prefix(union crste crste)
751 {
752 	return crste_leaf(crste) && crste.s.fc1.prefix_notif;
753 }
754 
755 static inline bool crste_dirty(union crste crste)
756 {
757 	return crste_leaf(crste) && crste.s.fc1.d;
758 }
759 
760 static inline union pgste *pgste_of(union pte *pte)
761 {
762 	return (union pgste *)(pte + _PAGE_ENTRIES);
763 }
764 
765 static inline bool pte_hole(union pte pte)
766 {
767 	return pte.h.i && !pte.tok.pr && pte.tok.type != _DAT_TOKEN_NONE;
768 }
769 
770 static inline bool _crste_hole(union crste crste)
771 {
772 	return crste.h.i && !crste.tok.pr && crste.tok.type != _DAT_TOKEN_NONE;
773 }
774 
775 #define crste_hole(x) _crste_hole(_CRSTE(x))
776 
777 static inline bool _crste_none(union crste crste)
778 {
779 	return crste.h.i && !crste.tok.pr && crste.tok.type == _DAT_TOKEN_NONE;
780 }
781 
782 #define crste_none(x) _crste_none(_CRSTE(x))
783 
784 static inline phys_addr_t large_pud_to_phys(union pud pud, gfn_t gfn)
785 {
786 	return pud_origin_large(pud) | (gfn_to_gpa(gfn) & ~_REGION3_MASK);
787 }
788 
789 static inline phys_addr_t large_pmd_to_phys(union pmd pmd, gfn_t gfn)
790 {
791 	return pmd_origin_large(pmd) | (gfn_to_gpa(gfn) & ~_SEGMENT_MASK);
792 }
793 
794 static inline phys_addr_t large_crste_to_phys(union crste crste, gfn_t gfn)
795 {
796 	if (unlikely(!crste.h.fc || crste.h.tt > TABLE_TYPE_REGION3))
797 		return -1;
798 	if (is_pmd(crste))
799 		return large_pmd_to_phys(crste.pmd, gfn);
800 	return large_pud_to_phys(crste.pud, gfn);
801 }
802 
803 static inline bool cspg_crste(union crste *crstep, union crste old, union crste new)
804 {
805 	return cspg(&crstep->val, old.val, new.val);
806 }
807 
808 static inline struct page_table *dereference_pmd(union pmd pmd)
809 {
810 	return phys_to_virt(crste_origin(pmd));
811 }
812 
813 static inline struct segment_table *dereference_pud(union pud pud)
814 {
815 	return phys_to_virt(crste_origin(pud));
816 }
817 
818 static inline struct region3_table *dereference_p4d(union p4d p4d)
819 {
820 	return phys_to_virt(crste_origin(p4d));
821 }
822 
823 static inline struct region2_table *dereference_pgd(union pgd pgd)
824 {
825 	return phys_to_virt(crste_origin(pgd));
826 }
827 
828 static inline struct crst_table *_dereference_crste(union crste crste)
829 {
830 	if (unlikely(is_pmd(crste)))
831 		return NULL;
832 	return phys_to_virt(crste_origin(crste.pud));
833 }
834 
835 #define dereference_crste(x) (_Generic((x),			\
836 		union pud : _dereference_crste(_CRSTE(x)),	\
837 		union p4d : _dereference_crste(_CRSTE(x)),	\
838 		union pgd : _dereference_crste(_CRSTE(x)),	\
839 		union crste : _dereference_crste(_CRSTE(x))))
840 
841 static inline struct crst_table *dereference_asce(union asce asce)
842 {
843 	return phys_to_virt(asce.val & _ASCE_ORIGIN);
844 }
845 
846 static inline void asce_flush_tlb(union asce asce)
847 {
848 	__tlb_flush_idte(asce.val);
849 }
850 
851 static inline bool pgste_get_trylock(union pte *ptep, union pgste *res)
852 {
853 	union pgste *pgstep = pgste_of(ptep);
854 	union pgste old_pgste;
855 
856 	if (READ_ONCE(pgstep->val) & PGSTE_PCL_BIT)
857 		return false;
858 	old_pgste.val = __atomic64_or_barrier(PGSTE_PCL_BIT, &pgstep->val);
859 	if (old_pgste.pcl)
860 		return false;
861 	old_pgste.pcl = 1;
862 	*res = old_pgste;
863 	return true;
864 }
865 
866 static inline union pgste pgste_get_lock(union pte *ptep)
867 {
868 	union pgste res;
869 
870 	while (!pgste_get_trylock(ptep, &res))
871 		cpu_relax();
872 	return res;
873 }
874 
875 static inline void pgste_set_unlock(union pte *ptep, union pgste pgste)
876 {
877 	pgste.pcl = 0;
878 	barrier();
879 	WRITE_ONCE(*pgste_of(ptep), pgste);
880 }
881 
882 static inline void dat_ptep_xchg(union pte *ptep, union pte new, gfn_t gfn, union asce asce,
883 				 bool has_skeys)
884 {
885 	union pgste pgste;
886 
887 	pgste = pgste_get_lock(ptep);
888 	pgste = __dat_ptep_xchg(ptep, pgste, new, gfn, asce, has_skeys);
889 	pgste_set_unlock(ptep, pgste);
890 }
891 
892 static inline void dat_ptep_clear(union pte *ptep, gfn_t gfn, union asce asce, bool has_skeys)
893 {
894 	dat_ptep_xchg(ptep, _PTE_EMPTY, gfn, asce, has_skeys);
895 }
896 
897 static inline void dat_free_pt(struct page_table *pt)
898 {
899 	free_page((unsigned long)pt);
900 }
901 
902 static inline void _dat_free_crst(struct crst_table *table)
903 {
904 	free_pages((unsigned long)table, CRST_ALLOC_ORDER);
905 }
906 
907 #define dat_free_crst(x) _dat_free_crst(_CRSTP(x))
908 
909 static inline void kvm_s390_free_mmu_cache(struct kvm_s390_mmu_cache *mc)
910 {
911 	if (!mc)
912 		return;
913 	while (mc->n_pts)
914 		dat_free_pt(mc->pts[--mc->n_pts]);
915 	while (mc->n_crsts)
916 		_dat_free_crst(mc->crsts[--mc->n_crsts]);
917 	while (mc->n_rmaps)
918 		kfree(mc->rmaps[--mc->n_rmaps]);
919 	kfree(mc);
920 }
921 
922 DEFINE_FREE(kvm_s390_mmu_cache, struct kvm_s390_mmu_cache *, if (_T) kvm_s390_free_mmu_cache(_T))
923 
924 static inline struct kvm_s390_mmu_cache *kvm_s390_new_mmu_cache(void)
925 {
926 	struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
927 
928 	mc = kzalloc_obj(*mc, GFP_KERNEL_ACCOUNT);
929 	if (mc && !kvm_s390_mmu_cache_topup(mc))
930 		return_ptr(mc);
931 	return NULL;
932 }
933 
934 static inline bool dat_pmdp_xchg_atomic(union pmd *pmdp, union pmd old, union pmd new,
935 					gfn_t gfn, union asce asce)
936 {
937 	return dat_crstep_xchg_atomic(_CRSTEP(pmdp), _CRSTE(old), _CRSTE(new), gfn, asce);
938 }
939 
940 static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pud new,
941 					gfn_t gfn, union asce asce)
942 {
943 	return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
944 }
945 
946 static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
947 {
948 	union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
949 
950 	do {
951 		oldcrste = READ_ONCE(*crstep);
952 	} while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
953 	return oldcrste;
954 }
955 
956 static inline int get_level(union crste *crstep, union pte *ptep)
957 {
958 	return ptep ? TABLE_TYPE_PAGE_TABLE : crstep->h.tt;
959 }
960 
961 static inline int dat_delete_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
962 				  unsigned long npages)
963 {
964 	return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_PIC, PGM_ADDRESSING);
965 }
966 
967 static inline int dat_create_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
968 				  unsigned long npages)
969 {
970 	return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_NONE, 0);
971 }
972 
973 static inline bool crste_is_ucas(union crste crste)
974 {
975 	return is_pmd(crste) && crste.h.i && crste.h.fc0.tl == 1 && crste.h.fc == 0;
976 }
977 
978 #endif /* __KVM_S390_DAT_H */
979