1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * guest access functions
4 *
5 * Copyright IBM Corp. 2014
6 *
7 */
8
9 #include <linux/vmalloc.h>
10 #include <linux/mm_types.h>
11 #include <linux/err.h>
12 #include <linux/pgtable.h>
13 #include <linux/bitfield.h>
14 #include <linux/kvm_host.h>
15 #include <linux/kvm_types.h>
16 #include <asm/diag.h>
17 #include <asm/access-regs.h>
18 #include <asm/fault.h>
19 #include <asm/dat-bits.h>
20 #include "kvm-s390.h"
21 #include "dat.h"
22 #include "gmap.h"
23 #include "gaccess.h"
24 #include "faultin.h"
25
26 #define GMAP_SHADOW_FAKE_TABLE 1ULL
27
28 union dat_table_entry {
29 unsigned long val;
30 union region1_table_entry pgd;
31 union region2_table_entry p4d;
32 union region3_table_entry pud;
33 union segment_table_entry pmd;
34 union page_table_entry pte;
35 };
36
37 #define WALK_N_ENTRIES 7
38 #define LEVEL_MEM -2
39 struct pgtwalk {
40 struct guest_fault raw_entries[WALK_N_ENTRIES];
41 gpa_t last_addr;
42 int level;
43 bool p;
44 };
45
get_entries(struct pgtwalk * w)46 static inline struct guest_fault *get_entries(struct pgtwalk *w)
47 {
48 return w->raw_entries - LEVEL_MEM;
49 }
50
51 /*
52 * raddress union which will contain the result (real or absolute address)
53 * after a page table walk. The rfaa, sfaa and pfra members are used to
54 * simply assign them the value of a region, segment or page table entry.
55 */
56 union raddress {
57 unsigned long addr;
58 unsigned long rfaa : 33; /* Region-Frame Absolute Address */
59 unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
60 unsigned long pfra : 52; /* Page-Frame Real Address */
61 };
62
63 union alet {
64 u32 val;
65 struct {
66 u32 reserved : 7;
67 u32 p : 1;
68 u32 alesn : 8;
69 u32 alen : 16;
70 };
71 };
72
73 union ald {
74 u32 val;
75 struct {
76 u32 : 1;
77 u32 alo : 24;
78 u32 all : 7;
79 };
80 };
81
82 struct ale {
83 unsigned long i : 1; /* ALEN-Invalid Bit */
84 unsigned long : 5;
85 unsigned long fo : 1; /* Fetch-Only Bit */
86 unsigned long p : 1; /* Private Bit */
87 unsigned long alesn : 8; /* Access-List-Entry Sequence Number */
88 unsigned long aleax : 16; /* Access-List-Entry Authorization Index */
89 unsigned long : 32;
90 unsigned long : 1;
91 unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
92 unsigned long : 6;
93 unsigned long astesn : 32; /* ASTE Sequence Number */
94 };
95
96 struct aste {
97 unsigned long i : 1; /* ASX-Invalid Bit */
98 unsigned long ato : 29; /* Authority-Table Origin */
99 unsigned long : 1;
100 unsigned long b : 1; /* Base-Space Bit */
101 unsigned long ax : 16; /* Authorization Index */
102 unsigned long atl : 12; /* Authority-Table Length */
103 unsigned long : 2;
104 unsigned long ca : 1; /* Controlled-ASN Bit */
105 unsigned long ra : 1; /* Reusable-ASN Bit */
106 unsigned long asce : 64; /* Address-Space-Control Element */
107 unsigned long ald : 32;
108 unsigned long astesn : 32;
109 /* .. more fields there */
110 };
111
112 union oac {
113 unsigned int val;
114 struct {
115 struct {
116 unsigned short key : 4;
117 unsigned short : 4;
118 unsigned short as : 2;
119 unsigned short : 4;
120 unsigned short k : 1;
121 unsigned short a : 1;
122 } oac1;
123 struct {
124 unsigned short key : 4;
125 unsigned short : 4;
126 unsigned short as : 2;
127 unsigned short : 4;
128 unsigned short k : 1;
129 unsigned short a : 1;
130 } oac2;
131 };
132 };
133
ipte_lock_held(struct kvm * kvm)134 int ipte_lock_held(struct kvm *kvm)
135 {
136 if (sclp.has_siif)
137 return kvm->arch.sca->ipte_control.kh != 0;
138
139 return kvm->arch.ipte_lock_count != 0;
140 }
141
ipte_lock_simple(struct kvm * kvm)142 static void ipte_lock_simple(struct kvm *kvm)
143 {
144 union ipte_control old, new, *ic;
145
146 mutex_lock(&kvm->arch.ipte_mutex);
147 kvm->arch.ipte_lock_count++;
148 if (kvm->arch.ipte_lock_count > 1)
149 goto out;
150 retry:
151 ic = &kvm->arch.sca->ipte_control;
152 old = READ_ONCE(*ic);
153 do {
154 if (old.k) {
155 cond_resched();
156 goto retry;
157 }
158 new = old;
159 new.k = 1;
160 } while (!try_cmpxchg(&ic->val, &old.val, new.val));
161 out:
162 mutex_unlock(&kvm->arch.ipte_mutex);
163 }
164
ipte_unlock_simple(struct kvm * kvm)165 static void ipte_unlock_simple(struct kvm *kvm)
166 {
167 union ipte_control old, new, *ic;
168
169 mutex_lock(&kvm->arch.ipte_mutex);
170 kvm->arch.ipte_lock_count--;
171 if (kvm->arch.ipte_lock_count)
172 goto out;
173 ic = &kvm->arch.sca->ipte_control;
174 old = READ_ONCE(*ic);
175 do {
176 new = old;
177 new.k = 0;
178 } while (!try_cmpxchg(&ic->val, &old.val, new.val));
179 wake_up(&kvm->arch.ipte_wq);
180 out:
181 mutex_unlock(&kvm->arch.ipte_mutex);
182 }
183
ipte_lock_siif(struct kvm * kvm)184 static void ipte_lock_siif(struct kvm *kvm)
185 {
186 union ipte_control old, new, *ic;
187
188 retry:
189 ic = &kvm->arch.sca->ipte_control;
190 old = READ_ONCE(*ic);
191 do {
192 if (old.kg) {
193 cond_resched();
194 goto retry;
195 }
196 new = old;
197 new.k = 1;
198 new.kh++;
199 } while (!try_cmpxchg(&ic->val, &old.val, new.val));
200 }
201
ipte_unlock_siif(struct kvm * kvm)202 static void ipte_unlock_siif(struct kvm *kvm)
203 {
204 union ipte_control old, new, *ic;
205
206 ic = &kvm->arch.sca->ipte_control;
207 old = READ_ONCE(*ic);
208 do {
209 new = old;
210 new.kh--;
211 if (!new.kh)
212 new.k = 0;
213 } while (!try_cmpxchg(&ic->val, &old.val, new.val));
214 if (!new.kh)
215 wake_up(&kvm->arch.ipte_wq);
216 }
217
ipte_lock(struct kvm * kvm)218 void ipte_lock(struct kvm *kvm)
219 {
220 if (sclp.has_siif)
221 ipte_lock_siif(kvm);
222 else
223 ipte_lock_simple(kvm);
224 }
225
ipte_unlock(struct kvm * kvm)226 void ipte_unlock(struct kvm *kvm)
227 {
228 if (sclp.has_siif)
229 ipte_unlock_siif(kvm);
230 else
231 ipte_unlock_simple(kvm);
232 }
233
ar_translation(struct kvm_vcpu * vcpu,union asce * asce,u8 ar,enum gacc_mode mode)234 static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
235 enum gacc_mode mode)
236 {
237 union alet alet;
238 struct ale ale;
239 struct aste aste;
240 unsigned long ald_addr, authority_table_addr;
241 union ald ald;
242 int eax, rc;
243 u8 authority_table;
244
245 if (ar >= NUM_ACRS)
246 return -EINVAL;
247
248 if (vcpu->arch.acrs_loaded)
249 save_access_regs(vcpu->run->s.regs.acrs);
250 alet.val = vcpu->run->s.regs.acrs[ar];
251
252 if (ar == 0 || alet.val == 0) {
253 asce->val = vcpu->arch.sie_block->gcr[1];
254 return 0;
255 } else if (alet.val == 1) {
256 asce->val = vcpu->arch.sie_block->gcr[7];
257 return 0;
258 }
259
260 if (alet.reserved)
261 return PGM_ALET_SPECIFICATION;
262
263 if (alet.p)
264 ald_addr = vcpu->arch.sie_block->gcr[5];
265 else
266 ald_addr = vcpu->arch.sie_block->gcr[2];
267 ald_addr &= 0x7fffffc0;
268
269 rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
270 if (rc)
271 return rc;
272
273 if (alet.alen / 8 > ald.all)
274 return PGM_ALEN_TRANSLATION;
275
276 if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
277 return PGM_ADDRESSING;
278
279 rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
280 sizeof(struct ale));
281 if (rc)
282 return rc;
283
284 if (ale.i == 1)
285 return PGM_ALEN_TRANSLATION;
286 if (ale.alesn != alet.alesn)
287 return PGM_ALE_SEQUENCE;
288
289 rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
290 if (rc)
291 return rc;
292
293 if (aste.i)
294 return PGM_ASTE_VALIDITY;
295 if (aste.astesn != ale.astesn)
296 return PGM_ASTE_SEQUENCE;
297
298 if (ale.p == 1) {
299 eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
300 if (ale.aleax != eax) {
301 if (eax / 16 > aste.atl)
302 return PGM_EXTENDED_AUTHORITY;
303
304 authority_table_addr = aste.ato * 4 + eax / 4;
305
306 rc = read_guest_real(vcpu, authority_table_addr,
307 &authority_table,
308 sizeof(u8));
309 if (rc)
310 return rc;
311
312 if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
313 return PGM_EXTENDED_AUTHORITY;
314 }
315 }
316
317 if (ale.fo == 1 && mode == GACC_STORE)
318 return PGM_PROTECTION;
319
320 asce->val = aste.asce;
321 return 0;
322 }
323
324 enum prot_type {
325 PROT_TYPE_LA = 0,
326 PROT_TYPE_KEYC = 1,
327 PROT_TYPE_ALC = 2,
328 PROT_TYPE_DAT = 3,
329 PROT_TYPE_IEP = 4,
330 /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
331 PROT_TYPE_DUMMY,
332 };
333
trans_exc_ending(struct kvm_vcpu * vcpu,int code,unsigned long gva,u8 ar,enum gacc_mode mode,enum prot_type prot,bool terminate)334 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
335 enum gacc_mode mode, enum prot_type prot, bool terminate)
336 {
337 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
338 union teid *teid;
339
340 memset(pgm, 0, sizeof(*pgm));
341 pgm->code = code;
342 teid = (union teid *)&pgm->trans_exc_code;
343
344 switch (code) {
345 case PGM_PROTECTION:
346 switch (prot) {
347 case PROT_TYPE_DUMMY:
348 /* We should never get here, acts like termination */
349 WARN_ON_ONCE(1);
350 break;
351 case PROT_TYPE_IEP:
352 teid->b61 = 1;
353 fallthrough;
354 case PROT_TYPE_LA:
355 teid->b56 = 1;
356 break;
357 case PROT_TYPE_KEYC:
358 teid->b60 = 1;
359 break;
360 case PROT_TYPE_ALC:
361 teid->b60 = 1;
362 fallthrough;
363 case PROT_TYPE_DAT:
364 teid->b61 = 1;
365 break;
366 }
367 if (terminate) {
368 teid->b56 = 0;
369 teid->b60 = 0;
370 teid->b61 = 0;
371 }
372 fallthrough;
373 case PGM_ASCE_TYPE:
374 case PGM_PAGE_TRANSLATION:
375 case PGM_REGION_FIRST_TRANS:
376 case PGM_REGION_SECOND_TRANS:
377 case PGM_REGION_THIRD_TRANS:
378 case PGM_SEGMENT_TRANSLATION:
379 /*
380 * op_access_id only applies to MOVE_PAGE -> set bit 61
381 * exc_access_id has to be set to 0 for some instructions. Both
382 * cases have to be handled by the caller.
383 */
384 teid->addr = gva >> PAGE_SHIFT;
385 teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
386 teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
387 fallthrough;
388 case PGM_ALEN_TRANSLATION:
389 case PGM_ALE_SEQUENCE:
390 case PGM_ASTE_VALIDITY:
391 case PGM_ASTE_SEQUENCE:
392 case PGM_EXTENDED_AUTHORITY:
393 /*
394 * We can always store exc_access_id, as it is
395 * undefined for non-ar cases. It is undefined for
396 * most DAT protection exceptions.
397 */
398 pgm->exc_access_id = ar;
399 break;
400 }
401 return code;
402 }
403
trans_exc(struct kvm_vcpu * vcpu,int code,unsigned long gva,u8 ar,enum gacc_mode mode,enum prot_type prot)404 static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
405 enum gacc_mode mode, enum prot_type prot)
406 {
407 return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
408 }
409
get_vcpu_asce(struct kvm_vcpu * vcpu,union asce * asce,unsigned long ga,u8 ar,enum gacc_mode mode)410 static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
411 unsigned long ga, u8 ar, enum gacc_mode mode)
412 {
413 int rc;
414 struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
415
416 if (!psw.dat) {
417 asce->val = 0;
418 asce->r = 1;
419 return 0;
420 }
421
422 if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
423 psw.as = PSW_BITS_AS_PRIMARY;
424
425 switch (psw.as) {
426 case PSW_BITS_AS_PRIMARY:
427 asce->val = vcpu->arch.sie_block->gcr[1];
428 return 0;
429 case PSW_BITS_AS_SECONDARY:
430 asce->val = vcpu->arch.sie_block->gcr[7];
431 return 0;
432 case PSW_BITS_AS_HOME:
433 asce->val = vcpu->arch.sie_block->gcr[13];
434 return 0;
435 case PSW_BITS_AS_ACCREG:
436 rc = ar_translation(vcpu, asce, ar, mode);
437 if (rc > 0)
438 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
439 return rc;
440 }
441 return 0;
442 }
443
deref_table(struct kvm * kvm,unsigned long gpa,unsigned long * val)444 static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
445 {
446 return kvm_read_guest(kvm, gpa, val, sizeof(*val));
447 }
448
449 /**
450 * guest_translate_gva() - translate a guest virtual into a guest absolute address
451 * @vcpu: virtual cpu
452 * @gva: guest virtual address
453 * @gpa: points to where guest physical (absolute) address should be stored
454 * @asce: effective asce
455 * @mode: indicates the access mode to be used
456 * @prot: returns the type for protection exceptions
457 *
458 * Translate a guest virtual address into a guest absolute address by means
459 * of dynamic address translation as specified by the architecture.
460 * If the resulting absolute address is not available in the configuration
461 * an addressing exception is indicated and @gpa will not be changed.
462 *
463 * Returns: - zero on success; @gpa contains the resulting absolute address
464 * - a negative value if guest access failed due to e.g. broken
465 * guest mapping
466 * - a positive value if an access exception happened. In this case
467 * the returned value is the program interruption code as defined
468 * by the architecture
469 */
guest_translate_gva(struct kvm_vcpu * vcpu,unsigned long gva,unsigned long * gpa,const union asce asce,enum gacc_mode mode,enum prot_type * prot)470 static unsigned long guest_translate_gva(struct kvm_vcpu *vcpu, unsigned long gva,
471 unsigned long *gpa, const union asce asce,
472 enum gacc_mode mode, enum prot_type *prot)
473 {
474 union vaddress vaddr = {.addr = gva};
475 union raddress raddr = {.addr = gva};
476 union page_table_entry pte;
477 int dat_protection = 0;
478 int iep_protection = 0;
479 union ctlreg0 ctlreg0;
480 unsigned long ptr;
481 int edat1, edat2, iep;
482
483 ctlreg0.val = vcpu->arch.sie_block->gcr[0];
484 edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
485 edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
486 iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
487 if (asce.r)
488 goto real_address;
489 ptr = asce.rsto * PAGE_SIZE;
490 switch (asce.dt) {
491 case ASCE_TYPE_REGION1:
492 if (vaddr.rfx01 > asce.tl)
493 return PGM_REGION_FIRST_TRANS;
494 ptr += vaddr.rfx * 8;
495 break;
496 case ASCE_TYPE_REGION2:
497 if (vaddr.rfx)
498 return PGM_ASCE_TYPE;
499 if (vaddr.rsx01 > asce.tl)
500 return PGM_REGION_SECOND_TRANS;
501 ptr += vaddr.rsx * 8;
502 break;
503 case ASCE_TYPE_REGION3:
504 if (vaddr.rfx || vaddr.rsx)
505 return PGM_ASCE_TYPE;
506 if (vaddr.rtx01 > asce.tl)
507 return PGM_REGION_THIRD_TRANS;
508 ptr += vaddr.rtx * 8;
509 break;
510 case ASCE_TYPE_SEGMENT:
511 if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
512 return PGM_ASCE_TYPE;
513 if (vaddr.sx01 > asce.tl)
514 return PGM_SEGMENT_TRANSLATION;
515 ptr += vaddr.sx * 8;
516 break;
517 }
518 switch (asce.dt) {
519 case ASCE_TYPE_REGION1: {
520 union region1_table_entry rfte;
521
522 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
523 return PGM_ADDRESSING;
524 if (deref_table(vcpu->kvm, ptr, &rfte.val))
525 return -EFAULT;
526 if (rfte.i)
527 return PGM_REGION_FIRST_TRANS;
528 if (rfte.tt != TABLE_TYPE_REGION1)
529 return PGM_TRANSLATION_SPEC;
530 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
531 return PGM_REGION_SECOND_TRANS;
532 if (edat1)
533 dat_protection |= rfte.p;
534 ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
535 }
536 fallthrough;
537 case ASCE_TYPE_REGION2: {
538 union region2_table_entry rste;
539
540 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
541 return PGM_ADDRESSING;
542 if (deref_table(vcpu->kvm, ptr, &rste.val))
543 return -EFAULT;
544 if (rste.i)
545 return PGM_REGION_SECOND_TRANS;
546 if (rste.tt != TABLE_TYPE_REGION2)
547 return PGM_TRANSLATION_SPEC;
548 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
549 return PGM_REGION_THIRD_TRANS;
550 if (edat1)
551 dat_protection |= rste.p;
552 ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
553 }
554 fallthrough;
555 case ASCE_TYPE_REGION3: {
556 union region3_table_entry rtte;
557
558 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
559 return PGM_ADDRESSING;
560 if (deref_table(vcpu->kvm, ptr, &rtte.val))
561 return -EFAULT;
562 if (rtte.i)
563 return PGM_REGION_THIRD_TRANS;
564 if (rtte.tt != TABLE_TYPE_REGION3)
565 return PGM_TRANSLATION_SPEC;
566 if (rtte.cr && asce.p && edat2)
567 return PGM_TRANSLATION_SPEC;
568 if (rtte.fc && edat2) {
569 dat_protection |= rtte.fc1.p;
570 iep_protection = rtte.fc1.iep;
571 raddr.rfaa = rtte.fc1.rfaa;
572 goto absolute_address;
573 }
574 if (vaddr.sx01 < rtte.fc0.tf)
575 return PGM_SEGMENT_TRANSLATION;
576 if (vaddr.sx01 > rtte.fc0.tl)
577 return PGM_SEGMENT_TRANSLATION;
578 if (edat1)
579 dat_protection |= rtte.fc0.p;
580 ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
581 }
582 fallthrough;
583 case ASCE_TYPE_SEGMENT: {
584 union segment_table_entry ste;
585
586 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
587 return PGM_ADDRESSING;
588 if (deref_table(vcpu->kvm, ptr, &ste.val))
589 return -EFAULT;
590 if (ste.i)
591 return PGM_SEGMENT_TRANSLATION;
592 if (ste.tt != TABLE_TYPE_SEGMENT)
593 return PGM_TRANSLATION_SPEC;
594 if (ste.cs && asce.p)
595 return PGM_TRANSLATION_SPEC;
596 if (ste.fc && edat1) {
597 dat_protection |= ste.fc1.p;
598 iep_protection = ste.fc1.iep;
599 raddr.sfaa = ste.fc1.sfaa;
600 goto absolute_address;
601 }
602 dat_protection |= ste.fc0.p;
603 ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
604 }
605 }
606 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
607 return PGM_ADDRESSING;
608 if (deref_table(vcpu->kvm, ptr, &pte.val))
609 return -EFAULT;
610 if (pte.i)
611 return PGM_PAGE_TRANSLATION;
612 if (pte.z)
613 return PGM_TRANSLATION_SPEC;
614 dat_protection |= pte.p;
615 iep_protection = pte.iep;
616 raddr.pfra = pte.pfra;
617 real_address:
618 raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
619 absolute_address:
620 if (mode == GACC_STORE && dat_protection) {
621 *prot = PROT_TYPE_DAT;
622 return PGM_PROTECTION;
623 }
624 if (mode == GACC_IFETCH && iep_protection && iep) {
625 *prot = PROT_TYPE_IEP;
626 return PGM_PROTECTION;
627 }
628 if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
629 return PGM_ADDRESSING;
630 *gpa = raddr.addr;
631 return 0;
632 }
633
is_low_address(unsigned long ga)634 static inline int is_low_address(unsigned long ga)
635 {
636 /* Check for address ranges 0..511 and 4096..4607 */
637 return (ga & ~0x11fful) == 0;
638 }
639
low_address_protection_enabled(struct kvm_vcpu * vcpu,const union asce asce)640 static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
641 const union asce asce)
642 {
643 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
644 psw_t *psw = &vcpu->arch.sie_block->gpsw;
645
646 if (!ctlreg0.lap)
647 return 0;
648 if (psw_bits(*psw).dat && asce.p)
649 return 0;
650 return 1;
651 }
652
vm_check_access_key_gpa(struct kvm * kvm,u8 access_key,enum gacc_mode mode,gpa_t gpa)653 static int vm_check_access_key_gpa(struct kvm *kvm, u8 access_key,
654 enum gacc_mode mode, gpa_t gpa)
655 {
656 union skey storage_key;
657 int r;
658
659 scoped_guard(read_lock, &kvm->mmu_lock)
660 r = dat_get_storage_key(kvm->arch.gmap->asce, gpa_to_gfn(gpa), &storage_key);
661 if (r)
662 return r;
663 if (access_key == 0 || storage_key.acc == access_key)
664 return 0;
665 if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !storage_key.fp)
666 return 0;
667 return PGM_PROTECTION;
668 }
669
fetch_prot_override_applicable(struct kvm_vcpu * vcpu,enum gacc_mode mode,union asce asce)670 static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
671 union asce asce)
672 {
673 psw_t *psw = &vcpu->arch.sie_block->gpsw;
674 unsigned long override;
675
676 if (mode == GACC_FETCH || mode == GACC_IFETCH) {
677 /* check if fetch protection override enabled */
678 override = vcpu->arch.sie_block->gcr[0];
679 override &= CR0_FETCH_PROTECTION_OVERRIDE;
680 /* not applicable if subject to DAT && private space */
681 override = override && !(psw_bits(*psw).dat && asce.p);
682 return override;
683 }
684 return false;
685 }
686
fetch_prot_override_applies(unsigned long ga,unsigned int len)687 static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
688 {
689 return ga < 2048 && ga + len <= 2048;
690 }
691
storage_prot_override_applicable(struct kvm_vcpu * vcpu)692 static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
693 {
694 /* check if storage protection override enabled */
695 return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
696 }
697
storage_prot_override_applies(u8 access_control)698 static bool storage_prot_override_applies(u8 access_control)
699 {
700 /* matches special storage protection override key (9) -> allow */
701 return access_control == PAGE_SPO_ACC;
702 }
703
vcpu_check_access_key_gpa(struct kvm_vcpu * vcpu,u8 access_key,enum gacc_mode mode,union asce asce,gpa_t gpa,unsigned long ga,unsigned int len)704 static int vcpu_check_access_key_gpa(struct kvm_vcpu *vcpu, u8 access_key,
705 enum gacc_mode mode, union asce asce, gpa_t gpa,
706 unsigned long ga, unsigned int len)
707 {
708 union skey storage_key;
709 int r;
710
711 /* access key 0 matches any storage key -> allow */
712 if (access_key == 0)
713 return 0;
714 /*
715 * caller needs to ensure that gfn is accessible, so we can
716 * assume that this cannot fail
717 */
718 scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
719 r = dat_get_storage_key(vcpu->arch.gmap->asce, gpa_to_gfn(gpa), &storage_key);
720 if (r)
721 return r;
722 /* access key matches storage key -> allow */
723 if (storage_key.acc == access_key)
724 return 0;
725 if (mode == GACC_FETCH || mode == GACC_IFETCH) {
726 /* it is a fetch and fetch protection is off -> allow */
727 if (!storage_key.fp)
728 return 0;
729 if (fetch_prot_override_applicable(vcpu, mode, asce) &&
730 fetch_prot_override_applies(ga, len))
731 return 0;
732 }
733 if (storage_prot_override_applicable(vcpu) &&
734 storage_prot_override_applies(storage_key.acc))
735 return 0;
736 return PGM_PROTECTION;
737 }
738
739 /**
740 * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
741 * covering a logical range
742 * @vcpu: virtual cpu
743 * @ga: guest address, start of range
744 * @ar: access register
745 * @gpas: output argument, may be NULL
746 * @len: length of range in bytes
747 * @asce: address-space-control element to use for translation
748 * @mode: access mode
749 * @access_key: access key to mach the range's storage keys against
750 *
751 * Translate a logical range to a series of guest absolute addresses,
752 * such that the concatenation of page fragments starting at each gpa make up
753 * the whole range.
754 * The translation is performed as if done by the cpu for the given @asce, @ar,
755 * @mode and state of the @vcpu.
756 * If the translation causes an exception, its program interruption code is
757 * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
758 * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
759 * a correct exception into the guest.
760 * The resulting gpas are stored into @gpas, unless it is NULL.
761 *
762 * Note: All fragments except the first one start at the beginning of a page.
763 * When deriving the boundaries of a fragment from a gpa, all but the last
764 * fragment end at the end of the page.
765 *
766 * Return:
767 * * 0 - success
768 * * <0 - translation could not be performed, for example if guest
769 * memory could not be accessed
770 * * >0 - an access exception occurred. In this case the returned value
771 * is the program interruption code and the contents of pgm may
772 * be used to inject an exception into the guest.
773 */
guest_range_to_gpas(struct kvm_vcpu * vcpu,unsigned long ga,u8 ar,unsigned long * gpas,unsigned long len,const union asce asce,enum gacc_mode mode,u8 access_key)774 static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
775 unsigned long *gpas, unsigned long len,
776 const union asce asce, enum gacc_mode mode,
777 u8 access_key)
778 {
779 psw_t *psw = &vcpu->arch.sie_block->gpsw;
780 unsigned int offset = offset_in_page(ga);
781 unsigned int fragment_len;
782 int lap_enabled, rc = 0;
783 enum prot_type prot;
784 unsigned long gpa;
785
786 lap_enabled = low_address_protection_enabled(vcpu, asce);
787 while (min(PAGE_SIZE - offset, len) > 0) {
788 fragment_len = min(PAGE_SIZE - offset, len);
789 ga = kvm_s390_logical_to_effective(vcpu, ga);
790 if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
791 return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
792 PROT_TYPE_LA);
793 if (psw_bits(*psw).dat) {
794 rc = guest_translate_gva(vcpu, ga, &gpa, asce, mode, &prot);
795 if (rc < 0)
796 return rc;
797 } else {
798 gpa = kvm_s390_real_to_abs(vcpu, ga);
799 if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
800 rc = PGM_ADDRESSING;
801 prot = PROT_TYPE_DUMMY;
802 }
803 }
804 if (rc)
805 return trans_exc(vcpu, rc, ga, ar, mode, prot);
806 rc = vcpu_check_access_key_gpa(vcpu, access_key, mode, asce, gpa, ga, fragment_len);
807 if (rc)
808 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
809 if (gpas)
810 *gpas++ = gpa;
811 offset = 0;
812 ga += fragment_len;
813 len -= fragment_len;
814 }
815 return 0;
816 }
817
access_guest_page_gpa(struct kvm * kvm,enum gacc_mode mode,gpa_t gpa,void * data,unsigned int len)818 static int access_guest_page_gpa(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
819 void *data, unsigned int len)
820 {
821 const unsigned int offset = offset_in_page(gpa);
822 const gfn_t gfn = gpa_to_gfn(gpa);
823 int rc;
824
825 if (!gfn_to_memslot(kvm, gfn))
826 return PGM_ADDRESSING;
827 if (mode == GACC_STORE)
828 rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
829 else
830 rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
831 return rc;
832 }
833
mvcos_key(void * to,const void * from,unsigned long size,u8 dst_key,u8 src_key)834 static int mvcos_key(void *to, const void *from, unsigned long size, u8 dst_key, u8 src_key)
835 {
836 union oac spec = {
837 .oac1.key = dst_key,
838 .oac1.k = !!dst_key,
839 .oac2.key = src_key,
840 .oac2.k = !!src_key,
841 };
842 int exception = PGM_PROTECTION;
843
844 asm_inline volatile(
845 " lr %%r0,%[spec]\n"
846 "0: mvcos %[to],%[from],%[size]\n"
847 "1: lhi %[exc],0\n"
848 "2:\n"
849 EX_TABLE(0b, 2b)
850 EX_TABLE(1b, 2b)
851 : [size] "+d" (size), [to] "=Q" (*(char *)to), [exc] "+d" (exception)
852 : [spec] "d" (spec.val), [from] "Q" (*(const char *)from)
853 : "memory", "cc", "0");
854 return exception;
855 }
856
857 struct acc_page_key_context {
858 void *data;
859 int exception;
860 unsigned short offset;
861 unsigned short len;
862 bool store;
863 u8 access_key;
864 };
865
_access_guest_page_with_key_gpa(struct guest_fault * f)866 static void _access_guest_page_with_key_gpa(struct guest_fault *f)
867 {
868 struct acc_page_key_context *context = f->priv;
869 void *ptr;
870 int r;
871
872 ptr = __va(PFN_PHYS(f->pfn) | context->offset);
873
874 if (context->store)
875 r = mvcos_key(ptr, context->data, context->len, context->access_key, 0);
876 else
877 r = mvcos_key(context->data, ptr, context->len, 0, context->access_key);
878
879 context->exception = r;
880 }
881
access_guest_page_with_key_gpa(struct kvm * kvm,enum gacc_mode mode,gpa_t gpa,void * data,unsigned int len,u8 acc)882 static int access_guest_page_with_key_gpa(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
883 void *data, unsigned int len, u8 acc)
884 {
885 struct acc_page_key_context context = {
886 .offset = offset_in_page(gpa),
887 .len = len,
888 .data = data,
889 .access_key = acc,
890 .store = mode == GACC_STORE,
891 };
892 struct guest_fault fault = {
893 .gfn = gpa_to_gfn(gpa),
894 .priv = &context,
895 .write_attempt = mode == GACC_STORE,
896 .callback = _access_guest_page_with_key_gpa,
897 };
898 int rc;
899
900 if (KVM_BUG_ON((len + context.offset) > PAGE_SIZE, kvm))
901 return -EINVAL;
902
903 rc = kvm_s390_faultin_gfn(NULL, kvm, &fault);
904 if (rc)
905 return rc;
906 return context.exception;
907 }
908
access_guest_abs_with_key(struct kvm * kvm,gpa_t gpa,void * data,unsigned long len,enum gacc_mode mode,u8 access_key)909 int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
910 unsigned long len, enum gacc_mode mode, u8 access_key)
911 {
912 int offset = offset_in_page(gpa);
913 int fragment_len;
914 int rc;
915
916 while (min(PAGE_SIZE - offset, len) > 0) {
917 fragment_len = min(PAGE_SIZE - offset, len);
918 rc = access_guest_page_with_key_gpa(kvm, mode, gpa, data, fragment_len, access_key);
919 if (rc)
920 return rc;
921 offset = 0;
922 len -= fragment_len;
923 data += fragment_len;
924 gpa += fragment_len;
925 }
926 return 0;
927 }
928
access_guest_with_key(struct kvm_vcpu * vcpu,unsigned long ga,u8 ar,void * data,unsigned long len,enum gacc_mode mode,u8 access_key)929 int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
930 void *data, unsigned long len, enum gacc_mode mode,
931 u8 access_key)
932 {
933 psw_t *psw = &vcpu->arch.sie_block->gpsw;
934 unsigned long nr_pages, idx;
935 unsigned long gpa_array[2];
936 unsigned int fragment_len;
937 unsigned long *gpas;
938 enum prot_type prot;
939 int need_ipte_lock;
940 union asce asce;
941 bool try_storage_prot_override;
942 bool try_fetch_prot_override;
943 int rc;
944
945 if (!len)
946 return 0;
947 ga = kvm_s390_logical_to_effective(vcpu, ga);
948 rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
949 if (rc)
950 return rc;
951 nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
952 gpas = gpa_array;
953 if (nr_pages > ARRAY_SIZE(gpa_array))
954 gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
955 if (!gpas)
956 return -ENOMEM;
957 try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
958 try_storage_prot_override = storage_prot_override_applicable(vcpu);
959 need_ipte_lock = psw_bits(*psw).dat && !asce.r;
960 if (need_ipte_lock)
961 ipte_lock(vcpu->kvm);
962 /*
963 * Since we do the access further down ultimately via a move instruction
964 * that does key checking and returns an error in case of a protection
965 * violation, we don't need to do the check during address translation.
966 * Skip it by passing access key 0, which matches any storage key,
967 * obviating the need for any further checks. As a result the check is
968 * handled entirely in hardware on access, we only need to take care to
969 * forego key protection checking if fetch protection override applies or
970 * retry with the special key 9 in case of storage protection override.
971 */
972 rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
973 if (rc)
974 goto out_unlock;
975 for (idx = 0; idx < nr_pages; idx++) {
976 fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
977 if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
978 rc = access_guest_page_gpa(vcpu->kvm, mode, gpas[idx], data, fragment_len);
979 } else {
980 rc = access_guest_page_with_key_gpa(vcpu->kvm, mode, gpas[idx],
981 data, fragment_len, access_key);
982 }
983 if (rc == PGM_PROTECTION && try_storage_prot_override)
984 rc = access_guest_page_with_key_gpa(vcpu->kvm, mode, gpas[idx],
985 data, fragment_len, PAGE_SPO_ACC);
986 if (rc)
987 break;
988 len -= fragment_len;
989 data += fragment_len;
990 ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
991 }
992 if (rc > 0) {
993 bool terminate = (mode == GACC_STORE) && (idx > 0);
994
995 if (rc == PGM_PROTECTION)
996 prot = PROT_TYPE_KEYC;
997 else
998 prot = PROT_TYPE_DUMMY;
999 rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
1000 }
1001 out_unlock:
1002 if (need_ipte_lock)
1003 ipte_unlock(vcpu->kvm);
1004 if (nr_pages > ARRAY_SIZE(gpa_array))
1005 vfree(gpas);
1006 return rc;
1007 }
1008
access_guest_real(struct kvm_vcpu * vcpu,unsigned long gra,void * data,unsigned long len,enum gacc_mode mode)1009 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
1010 void *data, unsigned long len, enum gacc_mode mode)
1011 {
1012 unsigned int fragment_len;
1013 unsigned long gpa;
1014 int rc = 0;
1015
1016 while (len && !rc) {
1017 gpa = kvm_s390_real_to_abs(vcpu, gra);
1018 fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
1019 rc = access_guest_page_gpa(vcpu->kvm, mode, gpa, data, fragment_len);
1020 len -= fragment_len;
1021 gra += fragment_len;
1022 data += fragment_len;
1023 }
1024 if (rc > 0)
1025 vcpu->arch.pgm.code = rc;
1026 return rc;
1027 }
1028
1029 /**
1030 * __cmpxchg_with_key() - Perform cmpxchg, honoring storage keys.
1031 * @ptr: Address of value to compare to *@old and exchange with
1032 * @new. Must be aligned to @size.
1033 * @old: Old value. Compared to the content pointed to by @ptr in order to
1034 * determine if the exchange occurs. The old value read from *@ptr is
1035 * written here.
1036 * @new: New value to place at *@ptr.
1037 * @size: Size of the operation in bytes, may only be a power of two up to 16.
1038 * @access_key: Access key to use for checking storage key protection.
1039 *
1040 * Perform a cmpxchg on guest memory, honoring storage key protection.
1041 * @access_key alone determines how key checking is performed, neither
1042 * storage-protection-override nor fetch-protection-override apply.
1043 * In case of an exception *@uval is set to zero.
1044 *
1045 * Return:
1046 * * %0: cmpxchg executed successfully
1047 * * %1: cmpxchg executed unsuccessfully
1048 * * %PGM_PROTECTION: an exception happened when trying to access *@ptr
1049 * * %-EAGAIN: maxed out number of retries (byte and short only)
1050 * * %-EINVAL: invalid value for @size
1051 */
__cmpxchg_with_key(union kvm_s390_quad * ptr,union kvm_s390_quad * old,union kvm_s390_quad new,int size,u8 access_key)1052 static int __cmpxchg_with_key(union kvm_s390_quad *ptr, union kvm_s390_quad *old,
1053 union kvm_s390_quad new, int size, u8 access_key)
1054 {
1055 union kvm_s390_quad tmp = { .sixteen = 0 };
1056 int rc;
1057
1058 /*
1059 * The cmpxchg_key macro depends on the type of "old", so we need
1060 * a case for each valid length and get some code duplication as long
1061 * as we don't introduce a new macro.
1062 */
1063 switch (size) {
1064 case 1:
1065 rc = __cmpxchg_key1(&ptr->one, &tmp.one, old->one, new.one, access_key);
1066 break;
1067 case 2:
1068 rc = __cmpxchg_key2(&ptr->two, &tmp.two, old->two, new.two, access_key);
1069 break;
1070 case 4:
1071 rc = __cmpxchg_key4(&ptr->four, &tmp.four, old->four, new.four, access_key);
1072 break;
1073 case 8:
1074 rc = __cmpxchg_key8(&ptr->eight, &tmp.eight, old->eight, new.eight, access_key);
1075 break;
1076 case 16:
1077 rc = __cmpxchg_key16(&ptr->sixteen, &tmp.sixteen, old->sixteen, new.sixteen,
1078 access_key);
1079 break;
1080 default:
1081 return -EINVAL;
1082 }
1083 if (!rc && memcmp(&tmp, old, size))
1084 rc = 1;
1085 *old = tmp;
1086 /*
1087 * Assume that the fault is caused by protection, either key protection
1088 * or user page write protection.
1089 */
1090 if (rc == -EFAULT)
1091 rc = PGM_PROTECTION;
1092 return rc;
1093 }
1094
1095 struct cmpxchg_key_context {
1096 union kvm_s390_quad new;
1097 union kvm_s390_quad *old;
1098 int exception;
1099 unsigned short offset;
1100 u8 access_key;
1101 u8 len;
1102 };
1103
_cmpxchg_guest_abs_with_key(struct guest_fault * f)1104 static void _cmpxchg_guest_abs_with_key(struct guest_fault *f)
1105 {
1106 struct cmpxchg_key_context *context = f->priv;
1107
1108 context->exception = __cmpxchg_with_key(__va(PFN_PHYS(f->pfn) | context->offset),
1109 context->old, context->new, context->len,
1110 context->access_key);
1111 }
1112
1113 /**
1114 * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
1115 * @kvm: Virtual machine instance.
1116 * @gpa: Absolute guest address of the location to be changed.
1117 * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
1118 * non power of two will result in failure.
1119 * @old: Pointer to old value. If the location at @gpa contains this value,
1120 * the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
1121 * *@old contains the value at @gpa before the attempt to
1122 * exchange the value.
1123 * @new: The value to place at @gpa.
1124 * @acc: The access key to use for the guest access.
1125 * @success: output value indicating if an exchange occurred.
1126 *
1127 * Atomically exchange the value at @gpa by @new, if it contains *@old.
1128 * Honors storage keys.
1129 *
1130 * Return: * 0: successful exchange
1131 * * >0: a program interruption code indicating the reason cmpxchg could
1132 * not be attempted
1133 * * -EINVAL: address misaligned or len not power of two
1134 * * -EAGAIN: transient failure (len 1 or 2)
1135 * * -EOPNOTSUPP: read-only memslot (should never occur)
1136 */
cmpxchg_guest_abs_with_key(struct kvm * kvm,gpa_t gpa,int len,union kvm_s390_quad * old,union kvm_s390_quad new,u8 acc,bool * success)1137 int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, union kvm_s390_quad *old,
1138 union kvm_s390_quad new, u8 acc, bool *success)
1139 {
1140 struct cmpxchg_key_context context = {
1141 .old = old,
1142 .new = new,
1143 .offset = offset_in_page(gpa),
1144 .len = len,
1145 .access_key = acc,
1146 };
1147 struct guest_fault fault = {
1148 .gfn = gpa_to_gfn(gpa),
1149 .priv = &context,
1150 .write_attempt = true,
1151 .callback = _cmpxchg_guest_abs_with_key,
1152 };
1153 int rc;
1154
1155 lockdep_assert_held(&kvm->srcu);
1156
1157 if (len > 16 || !IS_ALIGNED(gpa, len))
1158 return -EINVAL;
1159
1160 rc = kvm_s390_faultin_gfn(NULL, kvm, &fault);
1161 if (rc)
1162 return rc;
1163 *success = !context.exception;
1164 if (context.exception == 1)
1165 return 0;
1166 return context.exception;
1167 }
1168
1169 /**
1170 * guest_translate_address_with_key - translate guest logical into guest absolute address
1171 * @vcpu: virtual cpu
1172 * @gva: Guest virtual address
1173 * @ar: Access register
1174 * @gpa: Guest physical address
1175 * @mode: Translation access mode
1176 * @access_key: access key to mach the storage key with
1177 *
1178 * Parameter semantics are the same as the ones from guest_translate.
1179 * The memory contents at the guest address are not changed.
1180 *
1181 * Note: The IPTE lock is not taken during this function, so the caller
1182 * has to take care of this.
1183 */
guest_translate_address_with_key(struct kvm_vcpu * vcpu,unsigned long gva,u8 ar,unsigned long * gpa,enum gacc_mode mode,u8 access_key)1184 int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
1185 unsigned long *gpa, enum gacc_mode mode,
1186 u8 access_key)
1187 {
1188 union asce asce;
1189 int rc;
1190
1191 gva = kvm_s390_logical_to_effective(vcpu, gva);
1192 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
1193 if (rc)
1194 return rc;
1195 return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
1196 access_key);
1197 }
1198
1199 /**
1200 * check_gva_range - test a range of guest virtual addresses for accessibility
1201 * @vcpu: virtual cpu
1202 * @gva: Guest virtual address
1203 * @ar: Access register
1204 * @length: Length of test range
1205 * @mode: Translation access mode
1206 * @access_key: access key to mach the storage keys with
1207 */
check_gva_range(struct kvm_vcpu * vcpu,unsigned long gva,u8 ar,unsigned long length,enum gacc_mode mode,u8 access_key)1208 int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
1209 unsigned long length, enum gacc_mode mode, u8 access_key)
1210 {
1211 union asce asce;
1212 int rc = 0;
1213
1214 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
1215 if (rc)
1216 return rc;
1217 ipte_lock(vcpu->kvm);
1218 rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
1219 access_key);
1220 ipte_unlock(vcpu->kvm);
1221
1222 return rc;
1223 }
1224
1225 /**
1226 * check_gpa_range - test a range of guest physical addresses for accessibility
1227 * @kvm: virtual machine instance
1228 * @gpa: guest physical address
1229 * @length: length of test range
1230 * @mode: access mode to test, relevant for storage keys
1231 * @access_key: access key to mach the storage keys with
1232 */
check_gpa_range(struct kvm * kvm,unsigned long gpa,unsigned long length,enum gacc_mode mode,u8 access_key)1233 int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
1234 enum gacc_mode mode, u8 access_key)
1235 {
1236 unsigned int fragment_len;
1237 int rc = 0;
1238
1239 while (length && !rc) {
1240 fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
1241 rc = vm_check_access_key_gpa(kvm, access_key, mode, gpa);
1242 length -= fragment_len;
1243 gpa += fragment_len;
1244 }
1245 return rc;
1246 }
1247
1248 /**
1249 * kvm_s390_check_low_addr_prot_real - check for low-address protection
1250 * @vcpu: virtual cpu
1251 * @gra: Guest real address
1252 *
1253 * Checks whether an address is subject to low-address protection and set
1254 * up vcpu->arch.pgm accordingly if necessary.
1255 *
1256 * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
1257 */
kvm_s390_check_low_addr_prot_real(struct kvm_vcpu * vcpu,unsigned long gra)1258 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
1259 {
1260 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
1261
1262 if (!ctlreg0.lap || !is_low_address(gra))
1263 return 0;
1264 return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
1265 }
1266
1267 /**
1268 * walk_guest_tables() - Walk the guest page table and pin the dat tables.
1269 * @sg: Pointer to the shadow guest address space structure.
1270 * @saddr: Faulting address in the shadow gmap.
1271 * @w: Will be filled with information on the pinned pages.
1272 * @wr: Wndicates a write access if true.
1273 *
1274 * Return:
1275 * * %0 in case of success,
1276 * * a PIC code > 0 in case the address translation fails
1277 * * an error code < 0 if other errors happen in the host
1278 */
walk_guest_tables(struct gmap * sg,unsigned long saddr,struct pgtwalk * w,bool wr)1279 static int walk_guest_tables(struct gmap *sg, unsigned long saddr, struct pgtwalk *w, bool wr)
1280 {
1281 struct gmap *parent = sg->parent;
1282 struct guest_fault *entries;
1283 union dat_table_entry table;
1284 union vaddress vaddr;
1285 unsigned long ptr;
1286 struct kvm *kvm;
1287 union asce asce;
1288 int rc;
1289
1290 if (!parent)
1291 return -EAGAIN;
1292 kvm = parent->kvm;
1293 WARN_ON(!kvm);
1294 asce = sg->guest_asce;
1295 entries = get_entries(w);
1296
1297 w->level = LEVEL_MEM;
1298 w->last_addr = saddr;
1299 if (asce.r)
1300 return kvm_s390_get_guest_page(kvm, entries + LEVEL_MEM, gpa_to_gfn(saddr), false);
1301
1302 vaddr.addr = saddr;
1303 ptr = asce.rsto * PAGE_SIZE;
1304
1305 if (!asce_contains_gfn(asce, gpa_to_gfn(saddr)))
1306 return PGM_ASCE_TYPE;
1307 switch (asce.dt) {
1308 case ASCE_TYPE_REGION1:
1309 if (vaddr.rfx01 > asce.tl)
1310 return PGM_REGION_FIRST_TRANS;
1311 break;
1312 case ASCE_TYPE_REGION2:
1313 if (vaddr.rsx01 > asce.tl)
1314 return PGM_REGION_SECOND_TRANS;
1315 break;
1316 case ASCE_TYPE_REGION3:
1317 if (vaddr.rtx01 > asce.tl)
1318 return PGM_REGION_THIRD_TRANS;
1319 break;
1320 case ASCE_TYPE_SEGMENT:
1321 if (vaddr.sx01 > asce.tl)
1322 return PGM_SEGMENT_TRANSLATION;
1323 break;
1324 }
1325
1326 w->level = asce.dt;
1327 switch (asce.dt) {
1328 case ASCE_TYPE_REGION1:
1329 w->last_addr = ptr + vaddr.rfx * 8;
1330 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
1331 w->last_addr, &table.val);
1332 if (rc)
1333 return rc;
1334 if (table.pgd.i)
1335 return PGM_REGION_FIRST_TRANS;
1336 if (table.pgd.tt != TABLE_TYPE_REGION1)
1337 return PGM_TRANSLATION_SPEC;
1338 if (vaddr.rsx01 < table.pgd.tf || vaddr.rsx01 > table.pgd.tl)
1339 return PGM_REGION_SECOND_TRANS;
1340 if (sg->edat_level >= 1)
1341 w->p |= table.pgd.p;
1342 ptr = table.pgd.rto * PAGE_SIZE;
1343 w->level--;
1344 fallthrough;
1345 case ASCE_TYPE_REGION2:
1346 w->last_addr = ptr + vaddr.rsx * 8;
1347 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
1348 w->last_addr, &table.val);
1349 if (rc)
1350 return rc;
1351 if (table.p4d.i)
1352 return PGM_REGION_SECOND_TRANS;
1353 if (table.p4d.tt != TABLE_TYPE_REGION2)
1354 return PGM_TRANSLATION_SPEC;
1355 if (vaddr.rtx01 < table.p4d.tf || vaddr.rtx01 > table.p4d.tl)
1356 return PGM_REGION_THIRD_TRANS;
1357 if (sg->edat_level >= 1)
1358 w->p |= table.p4d.p;
1359 ptr = table.p4d.rto * PAGE_SIZE;
1360 w->level--;
1361 fallthrough;
1362 case ASCE_TYPE_REGION3:
1363 w->last_addr = ptr + vaddr.rtx * 8;
1364 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
1365 w->last_addr, &table.val);
1366 if (rc)
1367 return rc;
1368 if (table.pud.i)
1369 return PGM_REGION_THIRD_TRANS;
1370 if (table.pud.tt != TABLE_TYPE_REGION3)
1371 return PGM_TRANSLATION_SPEC;
1372 if (table.pud.cr && asce.p && sg->edat_level >= 2)
1373 return PGM_TRANSLATION_SPEC;
1374 if (sg->edat_level >= 1)
1375 w->p |= table.pud.p;
1376 if (table.pud.fc && sg->edat_level >= 2) {
1377 table.val = u64_replace_bits(table.val, saddr, ~_REGION3_MASK);
1378 goto edat_applies;
1379 }
1380 if (vaddr.sx01 < table.pud.fc0.tf || vaddr.sx01 > table.pud.fc0.tl)
1381 return PGM_SEGMENT_TRANSLATION;
1382 ptr = table.pud.fc0.sto * PAGE_SIZE;
1383 w->level--;
1384 fallthrough;
1385 case ASCE_TYPE_SEGMENT:
1386 w->last_addr = ptr + vaddr.sx * 8;
1387 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
1388 w->last_addr, &table.val);
1389 if (rc)
1390 return rc;
1391 if (table.pmd.i)
1392 return PGM_SEGMENT_TRANSLATION;
1393 if (table.pmd.tt != TABLE_TYPE_SEGMENT)
1394 return PGM_TRANSLATION_SPEC;
1395 if (table.pmd.cs && asce.p)
1396 return PGM_TRANSLATION_SPEC;
1397 w->p |= table.pmd.p;
1398 if (table.pmd.fc && sg->edat_level >= 1) {
1399 table.val = u64_replace_bits(table.val, saddr, ~_SEGMENT_MASK);
1400 goto edat_applies;
1401 }
1402 ptr = table.pmd.fc0.pto * (PAGE_SIZE / 2);
1403 w->level--;
1404 }
1405 w->last_addr = ptr + vaddr.px * 8;
1406 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level,
1407 w->last_addr, &table.val);
1408 if (rc)
1409 return rc;
1410 if (table.pte.i)
1411 return PGM_PAGE_TRANSLATION;
1412 if (table.pte.z)
1413 return PGM_TRANSLATION_SPEC;
1414 w->p |= table.pte.p;
1415 edat_applies:
1416 if (wr && w->p)
1417 return PGM_PROTECTION;
1418
1419 return kvm_s390_get_guest_page(kvm, entries + LEVEL_MEM, table.pte.pfra, wr);
1420 }
1421
_do_shadow_pte(struct gmap * sg,gpa_t raddr,union pte * ptep_h,union pte * ptep,struct guest_fault * f,bool p)1422 static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union pte *ptep,
1423 struct guest_fault *f, bool p)
1424 {
1425 union pgste pgste;
1426 union pte newpte;
1427 int rc;
1428
1429 lockdep_assert_held(&sg->kvm->mmu_lock);
1430 lockdep_assert_held(&sg->parent->children_lock);
1431
1432 scoped_guard(spinlock, &sg->host_to_rmap_lock)
1433 rc = gmap_insert_rmap(sg, f->gfn, gpa_to_gfn(raddr), TABLE_TYPE_PAGE_TABLE);
1434 if (rc)
1435 return rc;
1436
1437 if (!pgste_get_trylock(ptep_h, &pgste))
1438 return -EAGAIN;
1439 newpte = _pte(f->pfn, f->writable, !p, 0);
1440 newpte.s.d |= ptep->s.d;
1441 newpte.s.sd |= ptep->s.sd;
1442 newpte.h.p &= ptep->h.p;
1443 pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
1444 pgste.vsie_notif = 1;
1445 pgste_set_unlock(ptep_h, pgste);
1446
1447 newpte = _pte(f->pfn, 0, !p, 0);
1448 if (!pgste_get_trylock(ptep, &pgste))
1449 return -EAGAIN;
1450 pgste = __dat_ptep_xchg(ptep, pgste, newpte, gpa_to_gfn(raddr), sg->asce, uses_skeys(sg));
1451 pgste_set_unlock(ptep, pgste);
1452
1453 return 0;
1454 }
1455
_do_shadow_crste(struct gmap * sg,gpa_t raddr,union crste * host,union crste * table,struct guest_fault * f,bool p)1456 static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
1457 struct guest_fault *f, bool p)
1458 {
1459 union crste newcrste;
1460 gfn_t gfn;
1461 int rc;
1462
1463 lockdep_assert_held(&sg->kvm->mmu_lock);
1464 lockdep_assert_held(&sg->parent->children_lock);
1465
1466 gfn = f->gfn & gpa_to_gfn(is_pmd(*table) ? _SEGMENT_MASK : _REGION3_MASK);
1467 scoped_guard(spinlock, &sg->host_to_rmap_lock)
1468 rc = gmap_insert_rmap(sg, gfn, gpa_to_gfn(raddr), host->h.tt);
1469 if (rc)
1470 return rc;
1471
1472 newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p);
1473 newcrste.s.fc1.d |= host->s.fc1.d;
1474 newcrste.s.fc1.sd |= host->s.fc1.sd;
1475 newcrste.h.p &= host->h.p;
1476 newcrste.s.fc1.vsie_notif = 1;
1477 newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif;
1478 _gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false);
1479
1480 newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p);
1481 dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce);
1482 return 0;
1483 }
1484
_gaccess_do_shadow(struct kvm_s390_mmu_cache * mc,struct gmap * sg,unsigned long saddr,struct pgtwalk * w)1485 static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
1486 unsigned long saddr, struct pgtwalk *w)
1487 {
1488 struct guest_fault *entries;
1489 int flags, i, hl, gl, l, rc;
1490 union crste *table, *host;
1491 union pte *ptep, *ptep_h;
1492
1493 lockdep_assert_held(&sg->kvm->mmu_lock);
1494 lockdep_assert_held(&sg->parent->children_lock);
1495
1496 entries = get_entries(w);
1497 ptep_h = NULL;
1498 ptep = NULL;
1499
1500 rc = dat_entry_walk(NULL, gpa_to_gfn(saddr), sg->asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE,
1501 &table, &ptep);
1502 if (rc)
1503 return rc;
1504
1505 /* A race occourred. The shadow mapping is already valid, nothing to do */
1506 if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table)))
1507 return 0;
1508
1509 gl = get_level(table, ptep);
1510
1511 /*
1512 * Skip levels that are already protected. For each level, protect
1513 * only the page containing the entry, not the whole table.
1514 */
1515 for (i = gl ; i >= w->level; i--) {
1516 rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr),
1517 entries[i - 1].pfn, i, entries[i - 1].writable);
1518 if (rc)
1519 return rc;
1520 }
1521
1522 rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF,
1523 TABLE_TYPE_PAGE_TABLE, &host, &ptep_h);
1524 if (rc)
1525 return rc;
1526
1527 hl = get_level(host, ptep_h);
1528 /* Get the smallest granularity */
1529 l = min3(gl, hl, w->level);
1530
1531 flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
1532 /* If necessary, create the shadow mapping */
1533 if (l < gl) {
1534 rc = dat_entry_walk(mc, gpa_to_gfn(saddr), sg->asce, flags, l, &table, &ptep);
1535 if (rc)
1536 return rc;
1537 }
1538 if (l < hl) {
1539 rc = dat_entry_walk(mc, entries[LEVEL_MEM].gfn, sg->parent->asce,
1540 flags, l, &host, &ptep_h);
1541 if (rc)
1542 return rc;
1543 }
1544
1545 if (KVM_BUG_ON(l > TABLE_TYPE_REGION3, sg->kvm))
1546 return -EFAULT;
1547 if (l == TABLE_TYPE_PAGE_TABLE)
1548 return _do_shadow_pte(sg, saddr, ptep_h, ptep, entries + LEVEL_MEM, w->p);
1549 return _do_shadow_crste(sg, saddr, host, table, entries + LEVEL_MEM, w->p);
1550 }
1551
_gaccess_shadow_fault(struct kvm_vcpu * vcpu,struct gmap * sg,gpa_t saddr,unsigned long seq,struct pgtwalk * walk)1552 static inline int _gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr,
1553 unsigned long seq, struct pgtwalk *walk)
1554 {
1555 struct gmap *parent;
1556 int rc;
1557
1558 if (kvm_s390_array_needs_retry_unsafe(vcpu->kvm, seq, walk->raw_entries))
1559 return -EAGAIN;
1560 again:
1561 rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc);
1562 if (rc)
1563 return rc;
1564 scoped_guard(read_lock, &vcpu->kvm->mmu_lock) {
1565 if (kvm_s390_array_needs_retry_safe(vcpu->kvm, seq, walk->raw_entries))
1566 return -EAGAIN;
1567 parent = READ_ONCE(sg->parent);
1568 if (!parent)
1569 return -EAGAIN;
1570 scoped_guard(spinlock, &parent->children_lock) {
1571 if (READ_ONCE(sg->parent) != parent)
1572 return -EAGAIN;
1573 rc = _gaccess_do_shadow(vcpu->arch.mc, sg, saddr, walk);
1574 }
1575 if (rc == -ENOMEM)
1576 goto again;
1577 if (!rc)
1578 kvm_s390_release_faultin_array(vcpu->kvm, walk->raw_entries, false);
1579 }
1580 return rc;
1581 }
1582
1583 /**
1584 * __gaccess_shadow_fault() - Handle fault on a shadow page table.
1585 * @vcpu: Virtual cpu that triggered the action.
1586 * @sg: The shadow guest address space structure.
1587 * @saddr: Faulting address in the shadow gmap.
1588 * @datptr: Will contain the address of the faulting DAT table entry, or of
1589 * the valid leaf, plus some flags.
1590 * @wr: Whether this is a write access.
1591 *
1592 * Return:
1593 * * %0 if the shadow fault was successfully resolved
1594 * * > 0 (pgm exception code) on exceptions while faulting
1595 * * %-EAGAIN if the caller can retry immediately
1596 * * %-EFAULT when accessing invalid guest addresses
1597 * * %-ENOMEM if out of memory
1598 */
__gaccess_shadow_fault(struct kvm_vcpu * vcpu,struct gmap * sg,gpa_t saddr,union mvpg_pei * datptr,bool wr)1599 static int __gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr,
1600 union mvpg_pei *datptr, bool wr)
1601 {
1602 struct pgtwalk walk = { .p = false, };
1603 unsigned long seq;
1604 int rc;
1605
1606 seq = vcpu->kvm->mmu_invalidate_seq;
1607 /* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
1608 smp_rmb();
1609
1610 rc = walk_guest_tables(sg, saddr, &walk, wr);
1611 if (datptr) {
1612 datptr->val = walk.last_addr;
1613 datptr->dat_prot = wr && walk.p;
1614 datptr->not_pte = walk.level > TABLE_TYPE_PAGE_TABLE;
1615 datptr->real = sg->guest_asce.r;
1616 }
1617 if (!rc)
1618 rc = _gaccess_shadow_fault(vcpu, sg, saddr, seq, &walk);
1619 if (rc)
1620 kvm_s390_release_faultin_array(vcpu->kvm, walk.raw_entries, true);
1621 return rc;
1622 }
1623
gaccess_shadow_fault(struct kvm_vcpu * vcpu,struct gmap * sg,gpa_t saddr,union mvpg_pei * datptr,bool wr)1624 int gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr,
1625 union mvpg_pei *datptr, bool wr)
1626 {
1627 int rc;
1628
1629 if (KVM_BUG_ON(!test_bit(GMAP_FLAG_SHADOW, &sg->flags), vcpu->kvm))
1630 return -EFAULT;
1631
1632 rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc);
1633 if (rc)
1634 return rc;
1635
1636 ipte_lock(vcpu->kvm);
1637 rc = __gaccess_shadow_fault(vcpu, sg, saddr, datptr, wr || sg->guest_asce.r);
1638 ipte_unlock(vcpu->kvm);
1639
1640 return rc;
1641 }
1642