xref: /linux/arch/s390/kvm/gaccess.c (revision 954a209f431c06b62718a49b403bd4c549f0d6fb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * guest access functions
4  *
5  * Copyright IBM Corp. 2014
6  *
7  */
8 
9 #include <linux/vmalloc.h>
10 #include <linux/mm_types.h>
11 #include <linux/err.h>
12 #include <linux/pgtable.h>
13 #include <linux/bitfield.h>
14 #include <asm/access-regs.h>
15 #include <asm/fault.h>
16 #include <asm/gmap.h>
17 #include <asm/dat-bits.h>
18 #include "kvm-s390.h"
19 #include "gmap.h"
20 #include "gaccess.h"
21 
22 /*
23  * vaddress union in order to easily decode a virtual address into its
24  * region first index, region second index etc. parts.
25  */
26 union vaddress {
27 	unsigned long addr;
28 	struct {
29 		unsigned long rfx : 11;
30 		unsigned long rsx : 11;
31 		unsigned long rtx : 11;
32 		unsigned long sx  : 11;
33 		unsigned long px  : 8;
34 		unsigned long bx  : 12;
35 	};
36 	struct {
37 		unsigned long rfx01 : 2;
38 		unsigned long	    : 9;
39 		unsigned long rsx01 : 2;
40 		unsigned long	    : 9;
41 		unsigned long rtx01 : 2;
42 		unsigned long	    : 9;
43 		unsigned long sx01  : 2;
44 		unsigned long	    : 29;
45 	};
46 };
47 
48 /*
49  * raddress union which will contain the result (real or absolute address)
50  * after a page table walk. The rfaa, sfaa and pfra members are used to
51  * simply assign them the value of a region, segment or page table entry.
52  */
53 union raddress {
54 	unsigned long addr;
55 	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
56 	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
57 	unsigned long pfra : 52; /* Page-Frame Real Address */
58 };
59 
60 union alet {
61 	u32 val;
62 	struct {
63 		u32 reserved : 7;
64 		u32 p        : 1;
65 		u32 alesn    : 8;
66 		u32 alen     : 16;
67 	};
68 };
69 
70 union ald {
71 	u32 val;
72 	struct {
73 		u32     : 1;
74 		u32 alo : 24;
75 		u32 all : 7;
76 	};
77 };
78 
79 struct ale {
80 	unsigned long i      : 1; /* ALEN-Invalid Bit */
81 	unsigned long        : 5;
82 	unsigned long fo     : 1; /* Fetch-Only Bit */
83 	unsigned long p      : 1; /* Private Bit */
84 	unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
85 	unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
86 	unsigned long        : 32;
87 	unsigned long        : 1;
88 	unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
89 	unsigned long        : 6;
90 	unsigned long astesn : 32; /* ASTE Sequence Number */
91 };
92 
93 struct aste {
94 	unsigned long i      : 1; /* ASX-Invalid Bit */
95 	unsigned long ato    : 29; /* Authority-Table Origin */
96 	unsigned long        : 1;
97 	unsigned long b      : 1; /* Base-Space Bit */
98 	unsigned long ax     : 16; /* Authorization Index */
99 	unsigned long atl    : 12; /* Authority-Table Length */
100 	unsigned long        : 2;
101 	unsigned long ca     : 1; /* Controlled-ASN Bit */
102 	unsigned long ra     : 1; /* Reusable-ASN Bit */
103 	unsigned long asce   : 64; /* Address-Space-Control Element */
104 	unsigned long ald    : 32;
105 	unsigned long astesn : 32;
106 	/* .. more fields there */
107 };
108 
ipte_lock_held(struct kvm * kvm)109 int ipte_lock_held(struct kvm *kvm)
110 {
111 	if (sclp.has_siif) {
112 		int rc;
113 
114 		read_lock(&kvm->arch.sca_lock);
115 		rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
116 		read_unlock(&kvm->arch.sca_lock);
117 		return rc;
118 	}
119 	return kvm->arch.ipte_lock_count != 0;
120 }
121 
ipte_lock_simple(struct kvm * kvm)122 static void ipte_lock_simple(struct kvm *kvm)
123 {
124 	union ipte_control old, new, *ic;
125 
126 	mutex_lock(&kvm->arch.ipte_mutex);
127 	kvm->arch.ipte_lock_count++;
128 	if (kvm->arch.ipte_lock_count > 1)
129 		goto out;
130 retry:
131 	read_lock(&kvm->arch.sca_lock);
132 	ic = kvm_s390_get_ipte_control(kvm);
133 	old = READ_ONCE(*ic);
134 	do {
135 		if (old.k) {
136 			read_unlock(&kvm->arch.sca_lock);
137 			cond_resched();
138 			goto retry;
139 		}
140 		new = old;
141 		new.k = 1;
142 	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
143 	read_unlock(&kvm->arch.sca_lock);
144 out:
145 	mutex_unlock(&kvm->arch.ipte_mutex);
146 }
147 
ipte_unlock_simple(struct kvm * kvm)148 static void ipte_unlock_simple(struct kvm *kvm)
149 {
150 	union ipte_control old, new, *ic;
151 
152 	mutex_lock(&kvm->arch.ipte_mutex);
153 	kvm->arch.ipte_lock_count--;
154 	if (kvm->arch.ipte_lock_count)
155 		goto out;
156 	read_lock(&kvm->arch.sca_lock);
157 	ic = kvm_s390_get_ipte_control(kvm);
158 	old = READ_ONCE(*ic);
159 	do {
160 		new = old;
161 		new.k = 0;
162 	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
163 	read_unlock(&kvm->arch.sca_lock);
164 	wake_up(&kvm->arch.ipte_wq);
165 out:
166 	mutex_unlock(&kvm->arch.ipte_mutex);
167 }
168 
ipte_lock_siif(struct kvm * kvm)169 static void ipte_lock_siif(struct kvm *kvm)
170 {
171 	union ipte_control old, new, *ic;
172 
173 retry:
174 	read_lock(&kvm->arch.sca_lock);
175 	ic = kvm_s390_get_ipte_control(kvm);
176 	old = READ_ONCE(*ic);
177 	do {
178 		if (old.kg) {
179 			read_unlock(&kvm->arch.sca_lock);
180 			cond_resched();
181 			goto retry;
182 		}
183 		new = old;
184 		new.k = 1;
185 		new.kh++;
186 	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
187 	read_unlock(&kvm->arch.sca_lock);
188 }
189 
ipte_unlock_siif(struct kvm * kvm)190 static void ipte_unlock_siif(struct kvm *kvm)
191 {
192 	union ipte_control old, new, *ic;
193 
194 	read_lock(&kvm->arch.sca_lock);
195 	ic = kvm_s390_get_ipte_control(kvm);
196 	old = READ_ONCE(*ic);
197 	do {
198 		new = old;
199 		new.kh--;
200 		if (!new.kh)
201 			new.k = 0;
202 	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
203 	read_unlock(&kvm->arch.sca_lock);
204 	if (!new.kh)
205 		wake_up(&kvm->arch.ipte_wq);
206 }
207 
ipte_lock(struct kvm * kvm)208 void ipte_lock(struct kvm *kvm)
209 {
210 	if (sclp.has_siif)
211 		ipte_lock_siif(kvm);
212 	else
213 		ipte_lock_simple(kvm);
214 }
215 
ipte_unlock(struct kvm * kvm)216 void ipte_unlock(struct kvm *kvm)
217 {
218 	if (sclp.has_siif)
219 		ipte_unlock_siif(kvm);
220 	else
221 		ipte_unlock_simple(kvm);
222 }
223 
ar_translation(struct kvm_vcpu * vcpu,union asce * asce,u8 ar,enum gacc_mode mode)224 static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
225 			  enum gacc_mode mode)
226 {
227 	union alet alet;
228 	struct ale ale;
229 	struct aste aste;
230 	unsigned long ald_addr, authority_table_addr;
231 	union ald ald;
232 	int eax, rc;
233 	u8 authority_table;
234 
235 	if (ar >= NUM_ACRS)
236 		return -EINVAL;
237 
238 	if (vcpu->arch.acrs_loaded)
239 		save_access_regs(vcpu->run->s.regs.acrs);
240 	alet.val = vcpu->run->s.regs.acrs[ar];
241 
242 	if (ar == 0 || alet.val == 0) {
243 		asce->val = vcpu->arch.sie_block->gcr[1];
244 		return 0;
245 	} else if (alet.val == 1) {
246 		asce->val = vcpu->arch.sie_block->gcr[7];
247 		return 0;
248 	}
249 
250 	if (alet.reserved)
251 		return PGM_ALET_SPECIFICATION;
252 
253 	if (alet.p)
254 		ald_addr = vcpu->arch.sie_block->gcr[5];
255 	else
256 		ald_addr = vcpu->arch.sie_block->gcr[2];
257 	ald_addr &= 0x7fffffc0;
258 
259 	rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
260 	if (rc)
261 		return rc;
262 
263 	if (alet.alen / 8 > ald.all)
264 		return PGM_ALEN_TRANSLATION;
265 
266 	if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
267 		return PGM_ADDRESSING;
268 
269 	rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
270 			     sizeof(struct ale));
271 	if (rc)
272 		return rc;
273 
274 	if (ale.i == 1)
275 		return PGM_ALEN_TRANSLATION;
276 	if (ale.alesn != alet.alesn)
277 		return PGM_ALE_SEQUENCE;
278 
279 	rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
280 	if (rc)
281 		return rc;
282 
283 	if (aste.i)
284 		return PGM_ASTE_VALIDITY;
285 	if (aste.astesn != ale.astesn)
286 		return PGM_ASTE_SEQUENCE;
287 
288 	if (ale.p == 1) {
289 		eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
290 		if (ale.aleax != eax) {
291 			if (eax / 16 > aste.atl)
292 				return PGM_EXTENDED_AUTHORITY;
293 
294 			authority_table_addr = aste.ato * 4 + eax / 4;
295 
296 			rc = read_guest_real(vcpu, authority_table_addr,
297 					     &authority_table,
298 					     sizeof(u8));
299 			if (rc)
300 				return rc;
301 
302 			if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
303 				return PGM_EXTENDED_AUTHORITY;
304 		}
305 	}
306 
307 	if (ale.fo == 1 && mode == GACC_STORE)
308 		return PGM_PROTECTION;
309 
310 	asce->val = aste.asce;
311 	return 0;
312 }
313 
314 enum prot_type {
315 	PROT_TYPE_LA   = 0,
316 	PROT_TYPE_KEYC = 1,
317 	PROT_TYPE_ALC  = 2,
318 	PROT_TYPE_DAT  = 3,
319 	PROT_TYPE_IEP  = 4,
320 	/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
321 	PROT_NONE,
322 };
323 
trans_exc_ending(struct kvm_vcpu * vcpu,int code,unsigned long gva,u8 ar,enum gacc_mode mode,enum prot_type prot,bool terminate)324 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
325 			    enum gacc_mode mode, enum prot_type prot, bool terminate)
326 {
327 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
328 	union teid *teid;
329 
330 	memset(pgm, 0, sizeof(*pgm));
331 	pgm->code = code;
332 	teid = (union teid *)&pgm->trans_exc_code;
333 
334 	switch (code) {
335 	case PGM_PROTECTION:
336 		switch (prot) {
337 		case PROT_NONE:
338 			/* We should never get here, acts like termination */
339 			WARN_ON_ONCE(1);
340 			break;
341 		case PROT_TYPE_IEP:
342 			teid->b61 = 1;
343 			fallthrough;
344 		case PROT_TYPE_LA:
345 			teid->b56 = 1;
346 			break;
347 		case PROT_TYPE_KEYC:
348 			teid->b60 = 1;
349 			break;
350 		case PROT_TYPE_ALC:
351 			teid->b60 = 1;
352 			fallthrough;
353 		case PROT_TYPE_DAT:
354 			teid->b61 = 1;
355 			break;
356 		}
357 		if (terminate) {
358 			teid->b56 = 0;
359 			teid->b60 = 0;
360 			teid->b61 = 0;
361 		}
362 		fallthrough;
363 	case PGM_ASCE_TYPE:
364 	case PGM_PAGE_TRANSLATION:
365 	case PGM_REGION_FIRST_TRANS:
366 	case PGM_REGION_SECOND_TRANS:
367 	case PGM_REGION_THIRD_TRANS:
368 	case PGM_SEGMENT_TRANSLATION:
369 		/*
370 		 * op_access_id only applies to MOVE_PAGE -> set bit 61
371 		 * exc_access_id has to be set to 0 for some instructions. Both
372 		 * cases have to be handled by the caller.
373 		 */
374 		teid->addr = gva >> PAGE_SHIFT;
375 		teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
376 		teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
377 		fallthrough;
378 	case PGM_ALEN_TRANSLATION:
379 	case PGM_ALE_SEQUENCE:
380 	case PGM_ASTE_VALIDITY:
381 	case PGM_ASTE_SEQUENCE:
382 	case PGM_EXTENDED_AUTHORITY:
383 		/*
384 		 * We can always store exc_access_id, as it is
385 		 * undefined for non-ar cases. It is undefined for
386 		 * most DAT protection exceptions.
387 		 */
388 		pgm->exc_access_id = ar;
389 		break;
390 	}
391 	return code;
392 }
393 
trans_exc(struct kvm_vcpu * vcpu,int code,unsigned long gva,u8 ar,enum gacc_mode mode,enum prot_type prot)394 static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
395 		     enum gacc_mode mode, enum prot_type prot)
396 {
397 	return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
398 }
399 
get_vcpu_asce(struct kvm_vcpu * vcpu,union asce * asce,unsigned long ga,u8 ar,enum gacc_mode mode)400 static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
401 			 unsigned long ga, u8 ar, enum gacc_mode mode)
402 {
403 	int rc;
404 	struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
405 
406 	if (!psw.dat) {
407 		asce->val = 0;
408 		asce->r = 1;
409 		return 0;
410 	}
411 
412 	if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
413 		psw.as = PSW_BITS_AS_PRIMARY;
414 
415 	switch (psw.as) {
416 	case PSW_BITS_AS_PRIMARY:
417 		asce->val = vcpu->arch.sie_block->gcr[1];
418 		return 0;
419 	case PSW_BITS_AS_SECONDARY:
420 		asce->val = vcpu->arch.sie_block->gcr[7];
421 		return 0;
422 	case PSW_BITS_AS_HOME:
423 		asce->val = vcpu->arch.sie_block->gcr[13];
424 		return 0;
425 	case PSW_BITS_AS_ACCREG:
426 		rc = ar_translation(vcpu, asce, ar, mode);
427 		if (rc > 0)
428 			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
429 		return rc;
430 	}
431 	return 0;
432 }
433 
deref_table(struct kvm * kvm,unsigned long gpa,unsigned long * val)434 static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
435 {
436 	return kvm_read_guest(kvm, gpa, val, sizeof(*val));
437 }
438 
439 /**
440  * guest_translate - translate a guest virtual into a guest absolute address
441  * @vcpu: virtual cpu
442  * @gva: guest virtual address
443  * @gpa: points to where guest physical (absolute) address should be stored
444  * @asce: effective asce
445  * @mode: indicates the access mode to be used
446  * @prot: returns the type for protection exceptions
447  *
448  * Translate a guest virtual address into a guest absolute address by means
449  * of dynamic address translation as specified by the architecture.
450  * If the resulting absolute address is not available in the configuration
451  * an addressing exception is indicated and @gpa will not be changed.
452  *
453  * Returns: - zero on success; @gpa contains the resulting absolute address
454  *	    - a negative value if guest access failed due to e.g. broken
455  *	      guest mapping
456  *	    - a positive value if an access exception happened. In this case
457  *	      the returned value is the program interruption code as defined
458  *	      by the architecture
459  */
guest_translate(struct kvm_vcpu * vcpu,unsigned long gva,unsigned long * gpa,const union asce asce,enum gacc_mode mode,enum prot_type * prot)460 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
461 				     unsigned long *gpa, const union asce asce,
462 				     enum gacc_mode mode, enum prot_type *prot)
463 {
464 	union vaddress vaddr = {.addr = gva};
465 	union raddress raddr = {.addr = gva};
466 	union page_table_entry pte;
467 	int dat_protection = 0;
468 	int iep_protection = 0;
469 	union ctlreg0 ctlreg0;
470 	unsigned long ptr;
471 	int edat1, edat2, iep;
472 
473 	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
474 	edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
475 	edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
476 	iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
477 	if (asce.r)
478 		goto real_address;
479 	ptr = asce.rsto * PAGE_SIZE;
480 	switch (asce.dt) {
481 	case ASCE_TYPE_REGION1:
482 		if (vaddr.rfx01 > asce.tl)
483 			return PGM_REGION_FIRST_TRANS;
484 		ptr += vaddr.rfx * 8;
485 		break;
486 	case ASCE_TYPE_REGION2:
487 		if (vaddr.rfx)
488 			return PGM_ASCE_TYPE;
489 		if (vaddr.rsx01 > asce.tl)
490 			return PGM_REGION_SECOND_TRANS;
491 		ptr += vaddr.rsx * 8;
492 		break;
493 	case ASCE_TYPE_REGION3:
494 		if (vaddr.rfx || vaddr.rsx)
495 			return PGM_ASCE_TYPE;
496 		if (vaddr.rtx01 > asce.tl)
497 			return PGM_REGION_THIRD_TRANS;
498 		ptr += vaddr.rtx * 8;
499 		break;
500 	case ASCE_TYPE_SEGMENT:
501 		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
502 			return PGM_ASCE_TYPE;
503 		if (vaddr.sx01 > asce.tl)
504 			return PGM_SEGMENT_TRANSLATION;
505 		ptr += vaddr.sx * 8;
506 		break;
507 	}
508 	switch (asce.dt) {
509 	case ASCE_TYPE_REGION1:	{
510 		union region1_table_entry rfte;
511 
512 		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
513 			return PGM_ADDRESSING;
514 		if (deref_table(vcpu->kvm, ptr, &rfte.val))
515 			return -EFAULT;
516 		if (rfte.i)
517 			return PGM_REGION_FIRST_TRANS;
518 		if (rfte.tt != TABLE_TYPE_REGION1)
519 			return PGM_TRANSLATION_SPEC;
520 		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
521 			return PGM_REGION_SECOND_TRANS;
522 		if (edat1)
523 			dat_protection |= rfte.p;
524 		ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
525 	}
526 		fallthrough;
527 	case ASCE_TYPE_REGION2: {
528 		union region2_table_entry rste;
529 
530 		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
531 			return PGM_ADDRESSING;
532 		if (deref_table(vcpu->kvm, ptr, &rste.val))
533 			return -EFAULT;
534 		if (rste.i)
535 			return PGM_REGION_SECOND_TRANS;
536 		if (rste.tt != TABLE_TYPE_REGION2)
537 			return PGM_TRANSLATION_SPEC;
538 		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
539 			return PGM_REGION_THIRD_TRANS;
540 		if (edat1)
541 			dat_protection |= rste.p;
542 		ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
543 	}
544 		fallthrough;
545 	case ASCE_TYPE_REGION3: {
546 		union region3_table_entry rtte;
547 
548 		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
549 			return PGM_ADDRESSING;
550 		if (deref_table(vcpu->kvm, ptr, &rtte.val))
551 			return -EFAULT;
552 		if (rtte.i)
553 			return PGM_REGION_THIRD_TRANS;
554 		if (rtte.tt != TABLE_TYPE_REGION3)
555 			return PGM_TRANSLATION_SPEC;
556 		if (rtte.cr && asce.p && edat2)
557 			return PGM_TRANSLATION_SPEC;
558 		if (rtte.fc && edat2) {
559 			dat_protection |= rtte.fc1.p;
560 			iep_protection = rtte.fc1.iep;
561 			raddr.rfaa = rtte.fc1.rfaa;
562 			goto absolute_address;
563 		}
564 		if (vaddr.sx01 < rtte.fc0.tf)
565 			return PGM_SEGMENT_TRANSLATION;
566 		if (vaddr.sx01 > rtte.fc0.tl)
567 			return PGM_SEGMENT_TRANSLATION;
568 		if (edat1)
569 			dat_protection |= rtte.fc0.p;
570 		ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
571 	}
572 		fallthrough;
573 	case ASCE_TYPE_SEGMENT: {
574 		union segment_table_entry ste;
575 
576 		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
577 			return PGM_ADDRESSING;
578 		if (deref_table(vcpu->kvm, ptr, &ste.val))
579 			return -EFAULT;
580 		if (ste.i)
581 			return PGM_SEGMENT_TRANSLATION;
582 		if (ste.tt != TABLE_TYPE_SEGMENT)
583 			return PGM_TRANSLATION_SPEC;
584 		if (ste.cs && asce.p)
585 			return PGM_TRANSLATION_SPEC;
586 		if (ste.fc && edat1) {
587 			dat_protection |= ste.fc1.p;
588 			iep_protection = ste.fc1.iep;
589 			raddr.sfaa = ste.fc1.sfaa;
590 			goto absolute_address;
591 		}
592 		dat_protection |= ste.fc0.p;
593 		ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
594 	}
595 	}
596 	if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
597 		return PGM_ADDRESSING;
598 	if (deref_table(vcpu->kvm, ptr, &pte.val))
599 		return -EFAULT;
600 	if (pte.i)
601 		return PGM_PAGE_TRANSLATION;
602 	if (pte.z)
603 		return PGM_TRANSLATION_SPEC;
604 	dat_protection |= pte.p;
605 	iep_protection = pte.iep;
606 	raddr.pfra = pte.pfra;
607 real_address:
608 	raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
609 absolute_address:
610 	if (mode == GACC_STORE && dat_protection) {
611 		*prot = PROT_TYPE_DAT;
612 		return PGM_PROTECTION;
613 	}
614 	if (mode == GACC_IFETCH && iep_protection && iep) {
615 		*prot = PROT_TYPE_IEP;
616 		return PGM_PROTECTION;
617 	}
618 	if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
619 		return PGM_ADDRESSING;
620 	*gpa = raddr.addr;
621 	return 0;
622 }
623 
is_low_address(unsigned long ga)624 static inline int is_low_address(unsigned long ga)
625 {
626 	/* Check for address ranges 0..511 and 4096..4607 */
627 	return (ga & ~0x11fful) == 0;
628 }
629 
low_address_protection_enabled(struct kvm_vcpu * vcpu,const union asce asce)630 static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
631 					  const union asce asce)
632 {
633 	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
634 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
635 
636 	if (!ctlreg0.lap)
637 		return 0;
638 	if (psw_bits(*psw).dat && asce.p)
639 		return 0;
640 	return 1;
641 }
642 
vm_check_access_key(struct kvm * kvm,u8 access_key,enum gacc_mode mode,gpa_t gpa)643 static int vm_check_access_key(struct kvm *kvm, u8 access_key,
644 			       enum gacc_mode mode, gpa_t gpa)
645 {
646 	u8 storage_key, access_control;
647 	bool fetch_protected;
648 	unsigned long hva;
649 	int r;
650 
651 	if (access_key == 0)
652 		return 0;
653 
654 	hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
655 	if (kvm_is_error_hva(hva))
656 		return PGM_ADDRESSING;
657 
658 	mmap_read_lock(current->mm);
659 	r = get_guest_storage_key(current->mm, hva, &storage_key);
660 	mmap_read_unlock(current->mm);
661 	if (r)
662 		return r;
663 	access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
664 	if (access_control == access_key)
665 		return 0;
666 	fetch_protected = storage_key & _PAGE_FP_BIT;
667 	if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
668 		return 0;
669 	return PGM_PROTECTION;
670 }
671 
fetch_prot_override_applicable(struct kvm_vcpu * vcpu,enum gacc_mode mode,union asce asce)672 static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
673 					   union asce asce)
674 {
675 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
676 	unsigned long override;
677 
678 	if (mode == GACC_FETCH || mode == GACC_IFETCH) {
679 		/* check if fetch protection override enabled */
680 		override = vcpu->arch.sie_block->gcr[0];
681 		override &= CR0_FETCH_PROTECTION_OVERRIDE;
682 		/* not applicable if subject to DAT && private space */
683 		override = override && !(psw_bits(*psw).dat && asce.p);
684 		return override;
685 	}
686 	return false;
687 }
688 
fetch_prot_override_applies(unsigned long ga,unsigned int len)689 static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
690 {
691 	return ga < 2048 && ga + len <= 2048;
692 }
693 
storage_prot_override_applicable(struct kvm_vcpu * vcpu)694 static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
695 {
696 	/* check if storage protection override enabled */
697 	return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
698 }
699 
storage_prot_override_applies(u8 access_control)700 static bool storage_prot_override_applies(u8 access_control)
701 {
702 	/* matches special storage protection override key (9) -> allow */
703 	return access_control == PAGE_SPO_ACC;
704 }
705 
vcpu_check_access_key(struct kvm_vcpu * vcpu,u8 access_key,enum gacc_mode mode,union asce asce,gpa_t gpa,unsigned long ga,unsigned int len)706 static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key,
707 				 enum gacc_mode mode, union asce asce, gpa_t gpa,
708 				 unsigned long ga, unsigned int len)
709 {
710 	u8 storage_key, access_control;
711 	unsigned long hva;
712 	int r;
713 
714 	/* access key 0 matches any storage key -> allow */
715 	if (access_key == 0)
716 		return 0;
717 	/*
718 	 * caller needs to ensure that gfn is accessible, so we can
719 	 * assume that this cannot fail
720 	 */
721 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
722 	mmap_read_lock(current->mm);
723 	r = get_guest_storage_key(current->mm, hva, &storage_key);
724 	mmap_read_unlock(current->mm);
725 	if (r)
726 		return r;
727 	access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
728 	/* access key matches storage key -> allow */
729 	if (access_control == access_key)
730 		return 0;
731 	if (mode == GACC_FETCH || mode == GACC_IFETCH) {
732 		/* it is a fetch and fetch protection is off -> allow */
733 		if (!(storage_key & _PAGE_FP_BIT))
734 			return 0;
735 		if (fetch_prot_override_applicable(vcpu, mode, asce) &&
736 		    fetch_prot_override_applies(ga, len))
737 			return 0;
738 	}
739 	if (storage_prot_override_applicable(vcpu) &&
740 	    storage_prot_override_applies(access_control))
741 		return 0;
742 	return PGM_PROTECTION;
743 }
744 
745 /**
746  * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
747  * covering a logical range
748  * @vcpu: virtual cpu
749  * @ga: guest address, start of range
750  * @ar: access register
751  * @gpas: output argument, may be NULL
752  * @len: length of range in bytes
753  * @asce: address-space-control element to use for translation
754  * @mode: access mode
755  * @access_key: access key to mach the range's storage keys against
756  *
757  * Translate a logical range to a series of guest absolute addresses,
758  * such that the concatenation of page fragments starting at each gpa make up
759  * the whole range.
760  * The translation is performed as if done by the cpu for the given @asce, @ar,
761  * @mode and state of the @vcpu.
762  * If the translation causes an exception, its program interruption code is
763  * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
764  * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
765  * a correct exception into the guest.
766  * The resulting gpas are stored into @gpas, unless it is NULL.
767  *
768  * Note: All fragments except the first one start at the beginning of a page.
769  *	 When deriving the boundaries of a fragment from a gpa, all but the last
770  *	 fragment end at the end of the page.
771  *
772  * Return:
773  * * 0		- success
774  * * <0		- translation could not be performed, for example if  guest
775  *		  memory could not be accessed
776  * * >0		- an access exception occurred. In this case the returned value
777  *		  is the program interruption code and the contents of pgm may
778  *		  be used to inject an exception into the guest.
779  */
guest_range_to_gpas(struct kvm_vcpu * vcpu,unsigned long ga,u8 ar,unsigned long * gpas,unsigned long len,const union asce asce,enum gacc_mode mode,u8 access_key)780 static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
781 			       unsigned long *gpas, unsigned long len,
782 			       const union asce asce, enum gacc_mode mode,
783 			       u8 access_key)
784 {
785 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
786 	unsigned int offset = offset_in_page(ga);
787 	unsigned int fragment_len;
788 	int lap_enabled, rc = 0;
789 	enum prot_type prot;
790 	unsigned long gpa;
791 
792 	lap_enabled = low_address_protection_enabled(vcpu, asce);
793 	while (min(PAGE_SIZE - offset, len) > 0) {
794 		fragment_len = min(PAGE_SIZE - offset, len);
795 		ga = kvm_s390_logical_to_effective(vcpu, ga);
796 		if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
797 			return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
798 					 PROT_TYPE_LA);
799 		if (psw_bits(*psw).dat) {
800 			rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
801 			if (rc < 0)
802 				return rc;
803 		} else {
804 			gpa = kvm_s390_real_to_abs(vcpu, ga);
805 			if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
806 				rc = PGM_ADDRESSING;
807 				prot = PROT_NONE;
808 			}
809 		}
810 		if (rc)
811 			return trans_exc(vcpu, rc, ga, ar, mode, prot);
812 		rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
813 					   fragment_len);
814 		if (rc)
815 			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
816 		if (gpas)
817 			*gpas++ = gpa;
818 		offset = 0;
819 		ga += fragment_len;
820 		len -= fragment_len;
821 	}
822 	return 0;
823 }
824 
access_guest_page(struct kvm * kvm,enum gacc_mode mode,gpa_t gpa,void * data,unsigned int len)825 static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
826 			     void *data, unsigned int len)
827 {
828 	const unsigned int offset = offset_in_page(gpa);
829 	const gfn_t gfn = gpa_to_gfn(gpa);
830 	int rc;
831 
832 	if (!gfn_to_memslot(kvm, gfn))
833 		return PGM_ADDRESSING;
834 	if (mode == GACC_STORE)
835 		rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
836 	else
837 		rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
838 	return rc;
839 }
840 
841 static int
access_guest_page_with_key(struct kvm * kvm,enum gacc_mode mode,gpa_t gpa,void * data,unsigned int len,u8 access_key)842 access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
843 			   void *data, unsigned int len, u8 access_key)
844 {
845 	struct kvm_memory_slot *slot;
846 	bool writable;
847 	gfn_t gfn;
848 	hva_t hva;
849 	int rc;
850 
851 	gfn = gpa >> PAGE_SHIFT;
852 	slot = gfn_to_memslot(kvm, gfn);
853 	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
854 
855 	if (kvm_is_error_hva(hva))
856 		return PGM_ADDRESSING;
857 	/*
858 	 * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
859 	 * Don't try to actually handle that case.
860 	 */
861 	if (!writable && mode == GACC_STORE)
862 		return -EOPNOTSUPP;
863 	hva += offset_in_page(gpa);
864 	if (mode == GACC_STORE)
865 		rc = copy_to_user_key((void __user *)hva, data, len, access_key);
866 	else
867 		rc = copy_from_user_key(data, (void __user *)hva, len, access_key);
868 	if (rc)
869 		return PGM_PROTECTION;
870 	if (mode == GACC_STORE)
871 		mark_page_dirty_in_slot(kvm, slot, gfn);
872 	return 0;
873 }
874 
access_guest_abs_with_key(struct kvm * kvm,gpa_t gpa,void * data,unsigned long len,enum gacc_mode mode,u8 access_key)875 int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
876 			      unsigned long len, enum gacc_mode mode, u8 access_key)
877 {
878 	int offset = offset_in_page(gpa);
879 	int fragment_len;
880 	int rc;
881 
882 	while (min(PAGE_SIZE - offset, len) > 0) {
883 		fragment_len = min(PAGE_SIZE - offset, len);
884 		rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key);
885 		if (rc)
886 			return rc;
887 		offset = 0;
888 		len -= fragment_len;
889 		data += fragment_len;
890 		gpa += fragment_len;
891 	}
892 	return 0;
893 }
894 
access_guest_with_key(struct kvm_vcpu * vcpu,unsigned long ga,u8 ar,void * data,unsigned long len,enum gacc_mode mode,u8 access_key)895 int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
896 			  void *data, unsigned long len, enum gacc_mode mode,
897 			  u8 access_key)
898 {
899 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
900 	unsigned long nr_pages, idx;
901 	unsigned long gpa_array[2];
902 	unsigned int fragment_len;
903 	unsigned long *gpas;
904 	enum prot_type prot;
905 	int need_ipte_lock;
906 	union asce asce;
907 	bool try_storage_prot_override;
908 	bool try_fetch_prot_override;
909 	int rc;
910 
911 	if (!len)
912 		return 0;
913 	ga = kvm_s390_logical_to_effective(vcpu, ga);
914 	rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
915 	if (rc)
916 		return rc;
917 	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
918 	gpas = gpa_array;
919 	if (nr_pages > ARRAY_SIZE(gpa_array))
920 		gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
921 	if (!gpas)
922 		return -ENOMEM;
923 	try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
924 	try_storage_prot_override = storage_prot_override_applicable(vcpu);
925 	need_ipte_lock = psw_bits(*psw).dat && !asce.r;
926 	if (need_ipte_lock)
927 		ipte_lock(vcpu->kvm);
928 	/*
929 	 * Since we do the access further down ultimately via a move instruction
930 	 * that does key checking and returns an error in case of a protection
931 	 * violation, we don't need to do the check during address translation.
932 	 * Skip it by passing access key 0, which matches any storage key,
933 	 * obviating the need for any further checks. As a result the check is
934 	 * handled entirely in hardware on access, we only need to take care to
935 	 * forego key protection checking if fetch protection override applies or
936 	 * retry with the special key 9 in case of storage protection override.
937 	 */
938 	rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
939 	if (rc)
940 		goto out_unlock;
941 	for (idx = 0; idx < nr_pages; idx++) {
942 		fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
943 		if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
944 			rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
945 					       data, fragment_len);
946 		} else {
947 			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
948 							data, fragment_len, access_key);
949 		}
950 		if (rc == PGM_PROTECTION && try_storage_prot_override)
951 			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
952 							data, fragment_len, PAGE_SPO_ACC);
953 		if (rc)
954 			break;
955 		len -= fragment_len;
956 		data += fragment_len;
957 		ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
958 	}
959 	if (rc > 0) {
960 		bool terminate = (mode == GACC_STORE) && (idx > 0);
961 
962 		if (rc == PGM_PROTECTION)
963 			prot = PROT_TYPE_KEYC;
964 		else
965 			prot = PROT_NONE;
966 		rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
967 	}
968 out_unlock:
969 	if (need_ipte_lock)
970 		ipte_unlock(vcpu->kvm);
971 	if (nr_pages > ARRAY_SIZE(gpa_array))
972 		vfree(gpas);
973 	return rc;
974 }
975 
access_guest_real(struct kvm_vcpu * vcpu,unsigned long gra,void * data,unsigned long len,enum gacc_mode mode)976 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
977 		      void *data, unsigned long len, enum gacc_mode mode)
978 {
979 	unsigned int fragment_len;
980 	unsigned long gpa;
981 	int rc = 0;
982 
983 	while (len && !rc) {
984 		gpa = kvm_s390_real_to_abs(vcpu, gra);
985 		fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
986 		rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
987 		len -= fragment_len;
988 		gra += fragment_len;
989 		data += fragment_len;
990 	}
991 	if (rc > 0)
992 		vcpu->arch.pgm.code = rc;
993 	return rc;
994 }
995 
996 /**
997  * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
998  * @kvm: Virtual machine instance.
999  * @gpa: Absolute guest address of the location to be changed.
1000  * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
1001  *       non power of two will result in failure.
1002  * @old_addr: Pointer to old value. If the location at @gpa contains this value,
1003  *            the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
1004  *            *@old_addr contains the value at @gpa before the attempt to
1005  *            exchange the value.
1006  * @new: The value to place at @gpa.
1007  * @access_key: The access key to use for the guest access.
1008  * @success: output value indicating if an exchange occurred.
1009  *
1010  * Atomically exchange the value at @gpa by @new, if it contains *@old.
1011  * Honors storage keys.
1012  *
1013  * Return: * 0: successful exchange
1014  *         * >0: a program interruption code indicating the reason cmpxchg could
1015  *               not be attempted
1016  *         * -EINVAL: address misaligned or len not power of two
1017  *         * -EAGAIN: transient failure (len 1 or 2)
1018  *         * -EOPNOTSUPP: read-only memslot (should never occur)
1019  */
cmpxchg_guest_abs_with_key(struct kvm * kvm,gpa_t gpa,int len,__uint128_t * old_addr,__uint128_t new,u8 access_key,bool * success)1020 int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
1021 			       __uint128_t *old_addr, __uint128_t new,
1022 			       u8 access_key, bool *success)
1023 {
1024 	gfn_t gfn = gpa_to_gfn(gpa);
1025 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1026 	bool writable;
1027 	hva_t hva;
1028 	int ret;
1029 
1030 	if (!IS_ALIGNED(gpa, len))
1031 		return -EINVAL;
1032 
1033 	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
1034 	if (kvm_is_error_hva(hva))
1035 		return PGM_ADDRESSING;
1036 	/*
1037 	 * Check if it's a read-only memslot, even though that cannot occur
1038 	 * since those are unsupported.
1039 	 * Don't try to actually handle that case.
1040 	 */
1041 	if (!writable)
1042 		return -EOPNOTSUPP;
1043 
1044 	hva += offset_in_page(gpa);
1045 	/*
1046 	 * The cmpxchg_user_key macro depends on the type of "old", so we need
1047 	 * a case for each valid length and get some code duplication as long
1048 	 * as we don't introduce a new macro.
1049 	 */
1050 	switch (len) {
1051 	case 1: {
1052 		u8 old;
1053 
1054 		ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
1055 		*success = !ret && old == *old_addr;
1056 		*old_addr = old;
1057 		break;
1058 	}
1059 	case 2: {
1060 		u16 old;
1061 
1062 		ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
1063 		*success = !ret && old == *old_addr;
1064 		*old_addr = old;
1065 		break;
1066 	}
1067 	case 4: {
1068 		u32 old;
1069 
1070 		ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
1071 		*success = !ret && old == *old_addr;
1072 		*old_addr = old;
1073 		break;
1074 	}
1075 	case 8: {
1076 		u64 old;
1077 
1078 		ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
1079 		*success = !ret && old == *old_addr;
1080 		*old_addr = old;
1081 		break;
1082 	}
1083 	case 16: {
1084 		__uint128_t old;
1085 
1086 		ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
1087 		*success = !ret && old == *old_addr;
1088 		*old_addr = old;
1089 		break;
1090 	}
1091 	default:
1092 		return -EINVAL;
1093 	}
1094 	if (*success)
1095 		mark_page_dirty_in_slot(kvm, slot, gfn);
1096 	/*
1097 	 * Assume that the fault is caused by protection, either key protection
1098 	 * or user page write protection.
1099 	 */
1100 	if (ret == -EFAULT)
1101 		ret = PGM_PROTECTION;
1102 	return ret;
1103 }
1104 
1105 /**
1106  * guest_translate_address_with_key - translate guest logical into guest absolute address
1107  * @vcpu: virtual cpu
1108  * @gva: Guest virtual address
1109  * @ar: Access register
1110  * @gpa: Guest physical address
1111  * @mode: Translation access mode
1112  * @access_key: access key to mach the storage key with
1113  *
1114  * Parameter semantics are the same as the ones from guest_translate.
1115  * The memory contents at the guest address are not changed.
1116  *
1117  * Note: The IPTE lock is not taken during this function, so the caller
1118  * has to take care of this.
1119  */
guest_translate_address_with_key(struct kvm_vcpu * vcpu,unsigned long gva,u8 ar,unsigned long * gpa,enum gacc_mode mode,u8 access_key)1120 int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
1121 				     unsigned long *gpa, enum gacc_mode mode,
1122 				     u8 access_key)
1123 {
1124 	union asce asce;
1125 	int rc;
1126 
1127 	gva = kvm_s390_logical_to_effective(vcpu, gva);
1128 	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
1129 	if (rc)
1130 		return rc;
1131 	return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
1132 				   access_key);
1133 }
1134 
1135 /**
1136  * check_gva_range - test a range of guest virtual addresses for accessibility
1137  * @vcpu: virtual cpu
1138  * @gva: Guest virtual address
1139  * @ar: Access register
1140  * @length: Length of test range
1141  * @mode: Translation access mode
1142  * @access_key: access key to mach the storage keys with
1143  */
check_gva_range(struct kvm_vcpu * vcpu,unsigned long gva,u8 ar,unsigned long length,enum gacc_mode mode,u8 access_key)1144 int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
1145 		    unsigned long length, enum gacc_mode mode, u8 access_key)
1146 {
1147 	union asce asce;
1148 	int rc = 0;
1149 
1150 	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
1151 	if (rc)
1152 		return rc;
1153 	ipte_lock(vcpu->kvm);
1154 	rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
1155 				 access_key);
1156 	ipte_unlock(vcpu->kvm);
1157 
1158 	return rc;
1159 }
1160 
1161 /**
1162  * check_gpa_range - test a range of guest physical addresses for accessibility
1163  * @kvm: virtual machine instance
1164  * @gpa: guest physical address
1165  * @length: length of test range
1166  * @mode: access mode to test, relevant for storage keys
1167  * @access_key: access key to mach the storage keys with
1168  */
check_gpa_range(struct kvm * kvm,unsigned long gpa,unsigned long length,enum gacc_mode mode,u8 access_key)1169 int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
1170 		    enum gacc_mode mode, u8 access_key)
1171 {
1172 	unsigned int fragment_len;
1173 	int rc = 0;
1174 
1175 	while (length && !rc) {
1176 		fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
1177 		rc = vm_check_access_key(kvm, access_key, mode, gpa);
1178 		length -= fragment_len;
1179 		gpa += fragment_len;
1180 	}
1181 	return rc;
1182 }
1183 
1184 /**
1185  * kvm_s390_check_low_addr_prot_real - check for low-address protection
1186  * @vcpu: virtual cpu
1187  * @gra: Guest real address
1188  *
1189  * Checks whether an address is subject to low-address protection and set
1190  * up vcpu->arch.pgm accordingly if necessary.
1191  *
1192  * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
1193  */
kvm_s390_check_low_addr_prot_real(struct kvm_vcpu * vcpu,unsigned long gra)1194 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
1195 {
1196 	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
1197 
1198 	if (!ctlreg0.lap || !is_low_address(gra))
1199 		return 0;
1200 	return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
1201 }
1202 
1203 /**
1204  * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
1205  * @sg: pointer to the shadow guest address space structure
1206  * @saddr: faulting address in the shadow gmap
1207  * @pgt: pointer to the beginning of the page table for the given address if
1208  *	 successful (return value 0), or to the first invalid DAT entry in
1209  *	 case of exceptions (return value > 0)
1210  * @dat_protection: referenced memory is write protected
1211  * @fake: pgt references contiguous guest memory block, not a pgtable
1212  */
kvm_s390_shadow_tables(struct gmap * sg,unsigned long saddr,unsigned long * pgt,int * dat_protection,int * fake)1213 static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
1214 				  unsigned long *pgt, int *dat_protection,
1215 				  int *fake)
1216 {
1217 	struct kvm *kvm;
1218 	struct gmap *parent;
1219 	union asce asce;
1220 	union vaddress vaddr;
1221 	unsigned long ptr;
1222 	int rc;
1223 
1224 	*fake = 0;
1225 	*dat_protection = 0;
1226 	kvm = sg->private;
1227 	parent = sg->parent;
1228 	vaddr.addr = saddr;
1229 	asce.val = sg->orig_asce;
1230 	ptr = asce.rsto * PAGE_SIZE;
1231 	if (asce.r) {
1232 		*fake = 1;
1233 		ptr = 0;
1234 		asce.dt = ASCE_TYPE_REGION1;
1235 	}
1236 	switch (asce.dt) {
1237 	case ASCE_TYPE_REGION1:
1238 		if (vaddr.rfx01 > asce.tl && !*fake)
1239 			return PGM_REGION_FIRST_TRANS;
1240 		break;
1241 	case ASCE_TYPE_REGION2:
1242 		if (vaddr.rfx)
1243 			return PGM_ASCE_TYPE;
1244 		if (vaddr.rsx01 > asce.tl)
1245 			return PGM_REGION_SECOND_TRANS;
1246 		break;
1247 	case ASCE_TYPE_REGION3:
1248 		if (vaddr.rfx || vaddr.rsx)
1249 			return PGM_ASCE_TYPE;
1250 		if (vaddr.rtx01 > asce.tl)
1251 			return PGM_REGION_THIRD_TRANS;
1252 		break;
1253 	case ASCE_TYPE_SEGMENT:
1254 		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
1255 			return PGM_ASCE_TYPE;
1256 		if (vaddr.sx01 > asce.tl)
1257 			return PGM_SEGMENT_TRANSLATION;
1258 		break;
1259 	}
1260 
1261 	switch (asce.dt) {
1262 	case ASCE_TYPE_REGION1: {
1263 		union region1_table_entry rfte;
1264 
1265 		if (*fake) {
1266 			ptr += vaddr.rfx * _REGION1_SIZE;
1267 			rfte.val = ptr;
1268 			goto shadow_r2t;
1269 		}
1270 		*pgt = ptr + vaddr.rfx * 8;
1271 		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
1272 		if (rc)
1273 			return rc;
1274 		if (rfte.i)
1275 			return PGM_REGION_FIRST_TRANS;
1276 		if (rfte.tt != TABLE_TYPE_REGION1)
1277 			return PGM_TRANSLATION_SPEC;
1278 		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
1279 			return PGM_REGION_SECOND_TRANS;
1280 		if (sg->edat_level >= 1)
1281 			*dat_protection |= rfte.p;
1282 		ptr = rfte.rto * PAGE_SIZE;
1283 shadow_r2t:
1284 		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
1285 		if (rc)
1286 			return rc;
1287 		kvm->stat.gmap_shadow_r1_entry++;
1288 	}
1289 		fallthrough;
1290 	case ASCE_TYPE_REGION2: {
1291 		union region2_table_entry rste;
1292 
1293 		if (*fake) {
1294 			ptr += vaddr.rsx * _REGION2_SIZE;
1295 			rste.val = ptr;
1296 			goto shadow_r3t;
1297 		}
1298 		*pgt = ptr + vaddr.rsx * 8;
1299 		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
1300 		if (rc)
1301 			return rc;
1302 		if (rste.i)
1303 			return PGM_REGION_SECOND_TRANS;
1304 		if (rste.tt != TABLE_TYPE_REGION2)
1305 			return PGM_TRANSLATION_SPEC;
1306 		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
1307 			return PGM_REGION_THIRD_TRANS;
1308 		if (sg->edat_level >= 1)
1309 			*dat_protection |= rste.p;
1310 		ptr = rste.rto * PAGE_SIZE;
1311 shadow_r3t:
1312 		rste.p |= *dat_protection;
1313 		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
1314 		if (rc)
1315 			return rc;
1316 		kvm->stat.gmap_shadow_r2_entry++;
1317 	}
1318 		fallthrough;
1319 	case ASCE_TYPE_REGION3: {
1320 		union region3_table_entry rtte;
1321 
1322 		if (*fake) {
1323 			ptr += vaddr.rtx * _REGION3_SIZE;
1324 			rtte.val = ptr;
1325 			goto shadow_sgt;
1326 		}
1327 		*pgt = ptr + vaddr.rtx * 8;
1328 		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
1329 		if (rc)
1330 			return rc;
1331 		if (rtte.i)
1332 			return PGM_REGION_THIRD_TRANS;
1333 		if (rtte.tt != TABLE_TYPE_REGION3)
1334 			return PGM_TRANSLATION_SPEC;
1335 		if (rtte.cr && asce.p && sg->edat_level >= 2)
1336 			return PGM_TRANSLATION_SPEC;
1337 		if (rtte.fc && sg->edat_level >= 2) {
1338 			*dat_protection |= rtte.fc0.p;
1339 			*fake = 1;
1340 			ptr = rtte.fc1.rfaa * _REGION3_SIZE;
1341 			rtte.val = ptr;
1342 			goto shadow_sgt;
1343 		}
1344 		if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
1345 			return PGM_SEGMENT_TRANSLATION;
1346 		if (sg->edat_level >= 1)
1347 			*dat_protection |= rtte.fc0.p;
1348 		ptr = rtte.fc0.sto * PAGE_SIZE;
1349 shadow_sgt:
1350 		rtte.fc0.p |= *dat_protection;
1351 		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
1352 		if (rc)
1353 			return rc;
1354 		kvm->stat.gmap_shadow_r3_entry++;
1355 	}
1356 		fallthrough;
1357 	case ASCE_TYPE_SEGMENT: {
1358 		union segment_table_entry ste;
1359 
1360 		if (*fake) {
1361 			ptr += vaddr.sx * _SEGMENT_SIZE;
1362 			ste.val = ptr;
1363 			goto shadow_pgt;
1364 		}
1365 		*pgt = ptr + vaddr.sx * 8;
1366 		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
1367 		if (rc)
1368 			return rc;
1369 		if (ste.i)
1370 			return PGM_SEGMENT_TRANSLATION;
1371 		if (ste.tt != TABLE_TYPE_SEGMENT)
1372 			return PGM_TRANSLATION_SPEC;
1373 		if (ste.cs && asce.p)
1374 			return PGM_TRANSLATION_SPEC;
1375 		*dat_protection |= ste.fc0.p;
1376 		if (ste.fc && sg->edat_level >= 1) {
1377 			*fake = 1;
1378 			ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
1379 			ste.val = ptr;
1380 			goto shadow_pgt;
1381 		}
1382 		ptr = ste.fc0.pto * (PAGE_SIZE / 2);
1383 shadow_pgt:
1384 		ste.fc0.p |= *dat_protection;
1385 		rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
1386 		if (rc)
1387 			return rc;
1388 		kvm->stat.gmap_shadow_sg_entry++;
1389 	}
1390 	}
1391 	/* Return the parent address of the page table */
1392 	*pgt = ptr;
1393 	return 0;
1394 }
1395 
1396 /**
1397  * shadow_pgt_lookup() - find a shadow page table
1398  * @sg: pointer to the shadow guest address space structure
1399  * @saddr: the address in the shadow aguest address space
1400  * @pgt: parent gmap address of the page table to get shadowed
1401  * @dat_protection: if the pgtable is marked as protected by dat
1402  * @fake: pgt references contiguous guest memory block, not a pgtable
1403  *
1404  * Returns 0 if the shadow page table was found and -EAGAIN if the page
1405  * table was not found.
1406  *
1407  * Called with sg->mm->mmap_lock in read.
1408  */
shadow_pgt_lookup(struct gmap * sg,unsigned long saddr,unsigned long * pgt,int * dat_protection,int * fake)1409 static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
1410 			     int *dat_protection, int *fake)
1411 {
1412 	unsigned long pt_index;
1413 	unsigned long *table;
1414 	struct page *page;
1415 	int rc;
1416 
1417 	spin_lock(&sg->guest_table_lock);
1418 	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
1419 	if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
1420 		/* Shadow page tables are full pages (pte+pgste) */
1421 		page = pfn_to_page(*table >> PAGE_SHIFT);
1422 		pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
1423 		*pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
1424 		*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
1425 		*fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
1426 		rc = 0;
1427 	} else  {
1428 		rc = -EAGAIN;
1429 	}
1430 	spin_unlock(&sg->guest_table_lock);
1431 	return rc;
1432 }
1433 
1434 /**
1435  * kvm_s390_shadow_fault - handle fault on a shadow page table
1436  * @vcpu: virtual cpu
1437  * @sg: pointer to the shadow guest address space structure
1438  * @saddr: faulting address in the shadow gmap
1439  * @datptr: will contain the address of the faulting DAT table entry, or of
1440  *	    the valid leaf, plus some flags
1441  *
1442  * Returns: - 0 if the shadow fault was successfully resolved
1443  *	    - > 0 (pgm exception code) on exceptions while faulting
1444  *	    - -EAGAIN if the caller can retry immediately
1445  *	    - -EFAULT when accessing invalid guest addresses
1446  *	    - -ENOMEM if out of memory
1447  */
kvm_s390_shadow_fault(struct kvm_vcpu * vcpu,struct gmap * sg,unsigned long saddr,unsigned long * datptr)1448 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
1449 			  unsigned long saddr, unsigned long *datptr)
1450 {
1451 	union vaddress vaddr;
1452 	union page_table_entry pte;
1453 	unsigned long pgt = 0;
1454 	int dat_protection, fake;
1455 	int rc;
1456 
1457 	if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
1458 		return -EFAULT;
1459 
1460 	mmap_read_lock(sg->mm);
1461 	/*
1462 	 * We don't want any guest-2 tables to change - so the parent
1463 	 * tables/pointers we read stay valid - unshadowing is however
1464 	 * always possible - only guest_table_lock protects us.
1465 	 */
1466 	ipte_lock(vcpu->kvm);
1467 
1468 	rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
1469 	if (rc)
1470 		rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
1471 					    &fake);
1472 
1473 	vaddr.addr = saddr;
1474 	if (fake) {
1475 		pte.val = pgt + vaddr.px * PAGE_SIZE;
1476 		goto shadow_page;
1477 	}
1478 
1479 	switch (rc) {
1480 	case PGM_SEGMENT_TRANSLATION:
1481 	case PGM_REGION_THIRD_TRANS:
1482 	case PGM_REGION_SECOND_TRANS:
1483 	case PGM_REGION_FIRST_TRANS:
1484 		pgt |= PEI_NOT_PTE;
1485 		break;
1486 	case 0:
1487 		pgt += vaddr.px * 8;
1488 		rc = gmap_read_table(sg->parent, pgt, &pte.val);
1489 	}
1490 	if (datptr)
1491 		*datptr = pgt | dat_protection * PEI_DAT_PROT;
1492 	if (!rc && pte.i)
1493 		rc = PGM_PAGE_TRANSLATION;
1494 	if (!rc && pte.z)
1495 		rc = PGM_TRANSLATION_SPEC;
1496 shadow_page:
1497 	pte.p |= dat_protection;
1498 	if (!rc)
1499 		rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
1500 	vcpu->kvm->stat.gmap_shadow_pg_entry++;
1501 	ipte_unlock(vcpu->kvm);
1502 	mmap_read_unlock(sg->mm);
1503 	return rc;
1504 }
1505