xref: /linux/arch/s390/kvm/kvm-s390.c (revision 4c0c5bbc89cda1c57ce0fb36d917693396b8b065)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
10  *               Jason J. Herne <jjherne@us.ibm.com>
11  */
12 
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15 
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 #include <linux/mmu_notifier.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 #include "pci.h"
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
63 	KVM_GENERIC_VM_STATS(),
64 	STATS_DESC_COUNTER(VM, inject_io),
65 	STATS_DESC_COUNTER(VM, inject_float_mchk),
66 	STATS_DESC_COUNTER(VM, inject_pfault_done),
67 	STATS_DESC_COUNTER(VM, inject_service_signal),
68 	STATS_DESC_COUNTER(VM, inject_virtio),
69 	STATS_DESC_COUNTER(VM, aen_forward)
70 };
71 
72 const struct kvm_stats_header kvm_vm_stats_header = {
73 	.name_size = KVM_STATS_NAME_SIZE,
74 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 	.id_offset = sizeof(struct kvm_stats_header),
76 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 		       sizeof(kvm_vm_stats_desc),
79 };
80 
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 	KVM_GENERIC_VCPU_STATS(),
83 	STATS_DESC_COUNTER(VCPU, exit_userspace),
84 	STATS_DESC_COUNTER(VCPU, exit_null),
85 	STATS_DESC_COUNTER(VCPU, exit_external_request),
86 	STATS_DESC_COUNTER(VCPU, exit_io_request),
87 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 	STATS_DESC_COUNTER(VCPU, exit_validity),
90 	STATS_DESC_COUNTER(VCPU, exit_instruction),
91 	STATS_DESC_COUNTER(VCPU, exit_pei),
92 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 	STATS_DESC_COUNTER(VCPU, deliver_program),
110 	STATS_DESC_COUNTER(VCPU, deliver_io),
111 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 	STATS_DESC_COUNTER(VCPU, inject_ckc),
114 	STATS_DESC_COUNTER(VCPU, inject_cputm),
115 	STATS_DESC_COUNTER(VCPU, inject_external_call),
116 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 	STATS_DESC_COUNTER(VCPU, inject_mchk),
118 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 	STATS_DESC_COUNTER(VCPU, inject_program),
120 	STATS_DESC_COUNTER(VCPU, inject_restart),
121 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 	STATS_DESC_COUNTER(VCPU, instruction_gs),
125 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 	STATS_DESC_COUNTER(VCPU, instruction_sck),
131 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 	STATS_DESC_COUNTER(VCPU, instruction_spx),
134 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 	STATS_DESC_COUNTER(VCPU, instruction_stap),
136 	STATS_DESC_COUNTER(VCPU, instruction_iske),
137 	STATS_DESC_COUNTER(VCPU, instruction_ri),
138 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 	STATS_DESC_COUNTER(VCPU, instruction_sske),
140 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 	STATS_DESC_COUNTER(VCPU, instruction_tb),
144 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 	STATS_DESC_COUNTER(VCPU, instruction_sie),
148 	STATS_DESC_COUNTER(VCPU, instruction_essa),
149 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 	STATS_DESC_COUNTER(VCPU, pfault_sync)
176 };
177 
178 const struct kvm_stats_header kvm_vcpu_stats_header = {
179 	.name_size = KVM_STATS_NAME_SIZE,
180 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
181 	.id_offset = sizeof(struct kvm_stats_header),
182 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
183 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
184 		       sizeof(kvm_vcpu_stats_desc),
185 };
186 
187 /* allow nested virtualization in KVM (if enabled by user space) */
188 static int nested;
189 module_param(nested, int, S_IRUGO);
190 MODULE_PARM_DESC(nested, "Nested virtualization support");
191 
192 /* allow 1m huge page guest backing, if !nested */
193 static int hpage;
194 module_param(hpage, int, 0444);
195 MODULE_PARM_DESC(hpage, "1m huge page backing support");
196 
197 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
198 static u8 halt_poll_max_steal = 10;
199 module_param(halt_poll_max_steal, byte, 0644);
200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
201 
202 /* if set to true, the GISA will be initialized and used if available */
203 static bool use_gisa  = true;
204 module_param(use_gisa, bool, 0644);
205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
206 
207 /* maximum diag9c forwarding per second */
208 unsigned int diag9c_forwarding_hz;
209 module_param(diag9c_forwarding_hz, uint, 0644);
210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211 
212 /*
213  * For now we handle at most 16 double words as this is what the s390 base
214  * kernel handles and stores in the prefix page. If we ever need to go beyond
215  * this, this requires changes to code, but the external uapi can stay.
216  */
217 #define SIZE_INTERNAL 16
218 
219 /*
220  * Base feature mask that defines default mask for facilities. Consists of the
221  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
222  */
223 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
224 /*
225  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
226  * and defines the facilities that can be enabled via a cpu model.
227  */
228 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
229 
230 static unsigned long kvm_s390_fac_size(void)
231 {
232 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
233 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
234 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
235 		sizeof(stfle_fac_list));
236 
237 	return SIZE_INTERNAL;
238 }
239 
240 /* available cpu features supported by kvm */
241 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
242 /* available subfunctions indicated via query / "test bit" */
243 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
244 
245 static struct gmap_notifier gmap_notifier;
246 static struct gmap_notifier vsie_gmap_notifier;
247 debug_info_t *kvm_s390_dbf;
248 debug_info_t *kvm_s390_dbf_uv;
249 
250 /* Section: not file related */
251 int kvm_arch_hardware_enable(void)
252 {
253 	/* every s390 is virtualization enabled ;-) */
254 	return 0;
255 }
256 
257 int kvm_arch_check_processor_compat(void *opaque)
258 {
259 	return 0;
260 }
261 
262 /* forward declarations */
263 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
264 			      unsigned long end);
265 static int sca_switch_to_extended(struct kvm *kvm);
266 
267 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
268 {
269 	u8 delta_idx = 0;
270 
271 	/*
272 	 * The TOD jumps by delta, we have to compensate this by adding
273 	 * -delta to the epoch.
274 	 */
275 	delta = -delta;
276 
277 	/* sign-extension - we're adding to signed values below */
278 	if ((s64)delta < 0)
279 		delta_idx = -1;
280 
281 	scb->epoch += delta;
282 	if (scb->ecd & ECD_MEF) {
283 		scb->epdx += delta_idx;
284 		if (scb->epoch < delta)
285 			scb->epdx += 1;
286 	}
287 }
288 
289 /*
290  * This callback is executed during stop_machine(). All CPUs are therefore
291  * temporarily stopped. In order not to change guest behavior, we have to
292  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
293  * so a CPU won't be stopped while calculating with the epoch.
294  */
295 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
296 			  void *v)
297 {
298 	struct kvm *kvm;
299 	struct kvm_vcpu *vcpu;
300 	unsigned long i;
301 	unsigned long long *delta = v;
302 
303 	list_for_each_entry(kvm, &vm_list, vm_list) {
304 		kvm_for_each_vcpu(i, vcpu, kvm) {
305 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
306 			if (i == 0) {
307 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
308 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
309 			}
310 			if (vcpu->arch.cputm_enabled)
311 				vcpu->arch.cputm_start += *delta;
312 			if (vcpu->arch.vsie_block)
313 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
314 						   *delta);
315 		}
316 	}
317 	return NOTIFY_OK;
318 }
319 
320 static struct notifier_block kvm_clock_notifier = {
321 	.notifier_call = kvm_clock_sync,
322 };
323 
324 int kvm_arch_hardware_setup(void *opaque)
325 {
326 	gmap_notifier.notifier_call = kvm_gmap_notifier;
327 	gmap_register_pte_notifier(&gmap_notifier);
328 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
329 	gmap_register_pte_notifier(&vsie_gmap_notifier);
330 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
331 				       &kvm_clock_notifier);
332 	return 0;
333 }
334 
335 void kvm_arch_hardware_unsetup(void)
336 {
337 	gmap_unregister_pte_notifier(&gmap_notifier);
338 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
339 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
340 					 &kvm_clock_notifier);
341 }
342 
343 static void allow_cpu_feat(unsigned long nr)
344 {
345 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
346 }
347 
348 static inline int plo_test_bit(unsigned char nr)
349 {
350 	unsigned long function = (unsigned long)nr | 0x100;
351 	int cc;
352 
353 	asm volatile(
354 		"	lgr	0,%[function]\n"
355 		/* Parameter registers are ignored for "test bit" */
356 		"	plo	0,0,0,0(0)\n"
357 		"	ipm	%0\n"
358 		"	srl	%0,28\n"
359 		: "=d" (cc)
360 		: [function] "d" (function)
361 		: "cc", "0");
362 	return cc == 0;
363 }
364 
365 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
366 {
367 	asm volatile(
368 		"	lghi	0,0\n"
369 		"	lgr	1,%[query]\n"
370 		/* Parameter registers are ignored */
371 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
372 		:
373 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
374 		: "cc", "memory", "0", "1");
375 }
376 
377 #define INSN_SORTL 0xb938
378 #define INSN_DFLTCC 0xb939
379 
380 static void kvm_s390_cpu_feat_init(void)
381 {
382 	int i;
383 
384 	for (i = 0; i < 256; ++i) {
385 		if (plo_test_bit(i))
386 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
387 	}
388 
389 	if (test_facility(28)) /* TOD-clock steering */
390 		ptff(kvm_s390_available_subfunc.ptff,
391 		     sizeof(kvm_s390_available_subfunc.ptff),
392 		     PTFF_QAF);
393 
394 	if (test_facility(17)) { /* MSA */
395 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmac);
397 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.kmc);
399 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.km);
401 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.kimd);
403 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
404 			      kvm_s390_available_subfunc.klmd);
405 	}
406 	if (test_facility(76)) /* MSA3 */
407 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
408 			      kvm_s390_available_subfunc.pckmo);
409 	if (test_facility(77)) { /* MSA4 */
410 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kmctr);
412 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmf);
414 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.kmo);
416 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
417 			      kvm_s390_available_subfunc.pcc);
418 	}
419 	if (test_facility(57)) /* MSA5 */
420 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
421 			      kvm_s390_available_subfunc.ppno);
422 
423 	if (test_facility(146)) /* MSA8 */
424 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
425 			      kvm_s390_available_subfunc.kma);
426 
427 	if (test_facility(155)) /* MSA9 */
428 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
429 			      kvm_s390_available_subfunc.kdsa);
430 
431 	if (test_facility(150)) /* SORTL */
432 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
433 
434 	if (test_facility(151)) /* DFLTCC */
435 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
436 
437 	if (MACHINE_HAS_ESOP)
438 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
439 	/*
440 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
441 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
442 	 */
443 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
444 	    !test_facility(3) || !nested)
445 		return;
446 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
447 	if (sclp.has_64bscao)
448 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
449 	if (sclp.has_siif)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
451 	if (sclp.has_gpere)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
453 	if (sclp.has_gsls)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
455 	if (sclp.has_ib)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
457 	if (sclp.has_cei)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
459 	if (sclp.has_ibs)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
461 	if (sclp.has_kss)
462 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
463 	/*
464 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
465 	 * all skey handling functions read/set the skey from the PGSTE
466 	 * instead of the real storage key.
467 	 *
468 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
469 	 * pages being detected as preserved although they are resident.
470 	 *
471 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
472 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
473 	 *
474 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
475 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
476 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
477 	 *
478 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
479 	 * cannot easily shadow the SCA because of the ipte lock.
480 	 */
481 }
482 
483 int kvm_arch_init(void *opaque)
484 {
485 	int rc = -ENOMEM;
486 
487 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
488 	if (!kvm_s390_dbf)
489 		return -ENOMEM;
490 
491 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
492 	if (!kvm_s390_dbf_uv)
493 		goto out;
494 
495 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
496 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
497 		goto out;
498 
499 	kvm_s390_cpu_feat_init();
500 
501 	/* Register floating interrupt controller interface. */
502 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
503 	if (rc) {
504 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
505 		goto out;
506 	}
507 
508 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
509 		rc = kvm_s390_pci_init();
510 		if (rc) {
511 			pr_err("Unable to allocate AIFT for PCI\n");
512 			goto out;
513 		}
514 	}
515 
516 	rc = kvm_s390_gib_init(GAL_ISC);
517 	if (rc)
518 		goto out;
519 
520 	return 0;
521 
522 out:
523 	kvm_arch_exit();
524 	return rc;
525 }
526 
527 void kvm_arch_exit(void)
528 {
529 	kvm_s390_gib_destroy();
530 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
531 		kvm_s390_pci_exit();
532 	debug_unregister(kvm_s390_dbf);
533 	debug_unregister(kvm_s390_dbf_uv);
534 }
535 
536 /* Section: device related */
537 long kvm_arch_dev_ioctl(struct file *filp,
538 			unsigned int ioctl, unsigned long arg)
539 {
540 	if (ioctl == KVM_S390_ENABLE_SIE)
541 		return s390_enable_sie();
542 	return -EINVAL;
543 }
544 
545 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
546 {
547 	int r;
548 
549 	switch (ext) {
550 	case KVM_CAP_S390_PSW:
551 	case KVM_CAP_S390_GMAP:
552 	case KVM_CAP_SYNC_MMU:
553 #ifdef CONFIG_KVM_S390_UCONTROL
554 	case KVM_CAP_S390_UCONTROL:
555 #endif
556 	case KVM_CAP_ASYNC_PF:
557 	case KVM_CAP_SYNC_REGS:
558 	case KVM_CAP_ONE_REG:
559 	case KVM_CAP_ENABLE_CAP:
560 	case KVM_CAP_S390_CSS_SUPPORT:
561 	case KVM_CAP_IOEVENTFD:
562 	case KVM_CAP_DEVICE_CTRL:
563 	case KVM_CAP_S390_IRQCHIP:
564 	case KVM_CAP_VM_ATTRIBUTES:
565 	case KVM_CAP_MP_STATE:
566 	case KVM_CAP_IMMEDIATE_EXIT:
567 	case KVM_CAP_S390_INJECT_IRQ:
568 	case KVM_CAP_S390_USER_SIGP:
569 	case KVM_CAP_S390_USER_STSI:
570 	case KVM_CAP_S390_SKEYS:
571 	case KVM_CAP_S390_IRQ_STATE:
572 	case KVM_CAP_S390_USER_INSTR0:
573 	case KVM_CAP_S390_CMMA_MIGRATION:
574 	case KVM_CAP_S390_AIS:
575 	case KVM_CAP_S390_AIS_MIGRATION:
576 	case KVM_CAP_S390_VCPU_RESETS:
577 	case KVM_CAP_SET_GUEST_DEBUG:
578 	case KVM_CAP_S390_DIAG318:
579 	case KVM_CAP_S390_MEM_OP_EXTENSION:
580 		r = 1;
581 		break;
582 	case KVM_CAP_SET_GUEST_DEBUG2:
583 		r = KVM_GUESTDBG_VALID_MASK;
584 		break;
585 	case KVM_CAP_S390_HPAGE_1M:
586 		r = 0;
587 		if (hpage && !kvm_is_ucontrol(kvm))
588 			r = 1;
589 		break;
590 	case KVM_CAP_S390_MEM_OP:
591 		r = MEM_OP_MAX_SIZE;
592 		break;
593 	case KVM_CAP_NR_VCPUS:
594 	case KVM_CAP_MAX_VCPUS:
595 	case KVM_CAP_MAX_VCPU_ID:
596 		r = KVM_S390_BSCA_CPU_SLOTS;
597 		if (!kvm_s390_use_sca_entries())
598 			r = KVM_MAX_VCPUS;
599 		else if (sclp.has_esca && sclp.has_64bscao)
600 			r = KVM_S390_ESCA_CPU_SLOTS;
601 		if (ext == KVM_CAP_NR_VCPUS)
602 			r = min_t(unsigned int, num_online_cpus(), r);
603 		break;
604 	case KVM_CAP_S390_COW:
605 		r = MACHINE_HAS_ESOP;
606 		break;
607 	case KVM_CAP_S390_VECTOR_REGISTERS:
608 		r = MACHINE_HAS_VX;
609 		break;
610 	case KVM_CAP_S390_RI:
611 		r = test_facility(64);
612 		break;
613 	case KVM_CAP_S390_GS:
614 		r = test_facility(133);
615 		break;
616 	case KVM_CAP_S390_BPB:
617 		r = test_facility(82);
618 		break;
619 	case KVM_CAP_S390_PROTECTED:
620 		r = is_prot_virt_host();
621 		break;
622 	case KVM_CAP_S390_PROTECTED_DUMP: {
623 		u64 pv_cmds_dump[] = {
624 			BIT_UVC_CMD_DUMP_INIT,
625 			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
626 			BIT_UVC_CMD_DUMP_CPU,
627 			BIT_UVC_CMD_DUMP_COMPLETE,
628 		};
629 		int i;
630 
631 		r = is_prot_virt_host();
632 
633 		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
634 			if (!test_bit_inv(pv_cmds_dump[i],
635 					  (unsigned long *)&uv_info.inst_calls_list)) {
636 				r = 0;
637 				break;
638 			}
639 		}
640 		break;
641 	}
642 	case KVM_CAP_S390_ZPCI_OP:
643 		r = kvm_s390_pci_interp_allowed();
644 		break;
645 	case KVM_CAP_S390_CPU_TOPOLOGY:
646 		r = test_facility(11);
647 		break;
648 	default:
649 		r = 0;
650 	}
651 	return r;
652 }
653 
654 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
655 {
656 	int i;
657 	gfn_t cur_gfn, last_gfn;
658 	unsigned long gaddr, vmaddr;
659 	struct gmap *gmap = kvm->arch.gmap;
660 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
661 
662 	/* Loop over all guest segments */
663 	cur_gfn = memslot->base_gfn;
664 	last_gfn = memslot->base_gfn + memslot->npages;
665 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
666 		gaddr = gfn_to_gpa(cur_gfn);
667 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
668 		if (kvm_is_error_hva(vmaddr))
669 			continue;
670 
671 		bitmap_zero(bitmap, _PAGE_ENTRIES);
672 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
673 		for (i = 0; i < _PAGE_ENTRIES; i++) {
674 			if (test_bit(i, bitmap))
675 				mark_page_dirty(kvm, cur_gfn + i);
676 		}
677 
678 		if (fatal_signal_pending(current))
679 			return;
680 		cond_resched();
681 	}
682 }
683 
684 /* Section: vm related */
685 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
686 
687 /*
688  * Get (and clear) the dirty memory log for a memory slot.
689  */
690 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
691 			       struct kvm_dirty_log *log)
692 {
693 	int r;
694 	unsigned long n;
695 	struct kvm_memory_slot *memslot;
696 	int is_dirty;
697 
698 	if (kvm_is_ucontrol(kvm))
699 		return -EINVAL;
700 
701 	mutex_lock(&kvm->slots_lock);
702 
703 	r = -EINVAL;
704 	if (log->slot >= KVM_USER_MEM_SLOTS)
705 		goto out;
706 
707 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
708 	if (r)
709 		goto out;
710 
711 	/* Clear the dirty log */
712 	if (is_dirty) {
713 		n = kvm_dirty_bitmap_bytes(memslot);
714 		memset(memslot->dirty_bitmap, 0, n);
715 	}
716 	r = 0;
717 out:
718 	mutex_unlock(&kvm->slots_lock);
719 	return r;
720 }
721 
722 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
723 {
724 	unsigned long i;
725 	struct kvm_vcpu *vcpu;
726 
727 	kvm_for_each_vcpu(i, vcpu, kvm) {
728 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
729 	}
730 }
731 
732 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
733 {
734 	int r;
735 
736 	if (cap->flags)
737 		return -EINVAL;
738 
739 	switch (cap->cap) {
740 	case KVM_CAP_S390_IRQCHIP:
741 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
742 		kvm->arch.use_irqchip = 1;
743 		r = 0;
744 		break;
745 	case KVM_CAP_S390_USER_SIGP:
746 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
747 		kvm->arch.user_sigp = 1;
748 		r = 0;
749 		break;
750 	case KVM_CAP_S390_VECTOR_REGISTERS:
751 		mutex_lock(&kvm->lock);
752 		if (kvm->created_vcpus) {
753 			r = -EBUSY;
754 		} else if (MACHINE_HAS_VX) {
755 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
756 			set_kvm_facility(kvm->arch.model.fac_list, 129);
757 			if (test_facility(134)) {
758 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
759 				set_kvm_facility(kvm->arch.model.fac_list, 134);
760 			}
761 			if (test_facility(135)) {
762 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
763 				set_kvm_facility(kvm->arch.model.fac_list, 135);
764 			}
765 			if (test_facility(148)) {
766 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
767 				set_kvm_facility(kvm->arch.model.fac_list, 148);
768 			}
769 			if (test_facility(152)) {
770 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
771 				set_kvm_facility(kvm->arch.model.fac_list, 152);
772 			}
773 			if (test_facility(192)) {
774 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
775 				set_kvm_facility(kvm->arch.model.fac_list, 192);
776 			}
777 			r = 0;
778 		} else
779 			r = -EINVAL;
780 		mutex_unlock(&kvm->lock);
781 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
782 			 r ? "(not available)" : "(success)");
783 		break;
784 	case KVM_CAP_S390_RI:
785 		r = -EINVAL;
786 		mutex_lock(&kvm->lock);
787 		if (kvm->created_vcpus) {
788 			r = -EBUSY;
789 		} else if (test_facility(64)) {
790 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
791 			set_kvm_facility(kvm->arch.model.fac_list, 64);
792 			r = 0;
793 		}
794 		mutex_unlock(&kvm->lock);
795 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
796 			 r ? "(not available)" : "(success)");
797 		break;
798 	case KVM_CAP_S390_AIS:
799 		mutex_lock(&kvm->lock);
800 		if (kvm->created_vcpus) {
801 			r = -EBUSY;
802 		} else {
803 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
804 			set_kvm_facility(kvm->arch.model.fac_list, 72);
805 			r = 0;
806 		}
807 		mutex_unlock(&kvm->lock);
808 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
809 			 r ? "(not available)" : "(success)");
810 		break;
811 	case KVM_CAP_S390_GS:
812 		r = -EINVAL;
813 		mutex_lock(&kvm->lock);
814 		if (kvm->created_vcpus) {
815 			r = -EBUSY;
816 		} else if (test_facility(133)) {
817 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
818 			set_kvm_facility(kvm->arch.model.fac_list, 133);
819 			r = 0;
820 		}
821 		mutex_unlock(&kvm->lock);
822 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
823 			 r ? "(not available)" : "(success)");
824 		break;
825 	case KVM_CAP_S390_HPAGE_1M:
826 		mutex_lock(&kvm->lock);
827 		if (kvm->created_vcpus)
828 			r = -EBUSY;
829 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
830 			r = -EINVAL;
831 		else {
832 			r = 0;
833 			mmap_write_lock(kvm->mm);
834 			kvm->mm->context.allow_gmap_hpage_1m = 1;
835 			mmap_write_unlock(kvm->mm);
836 			/*
837 			 * We might have to create fake 4k page
838 			 * tables. To avoid that the hardware works on
839 			 * stale PGSTEs, we emulate these instructions.
840 			 */
841 			kvm->arch.use_skf = 0;
842 			kvm->arch.use_pfmfi = 0;
843 		}
844 		mutex_unlock(&kvm->lock);
845 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
846 			 r ? "(not available)" : "(success)");
847 		break;
848 	case KVM_CAP_S390_USER_STSI:
849 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
850 		kvm->arch.user_stsi = 1;
851 		r = 0;
852 		break;
853 	case KVM_CAP_S390_USER_INSTR0:
854 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
855 		kvm->arch.user_instr0 = 1;
856 		icpt_operexc_on_all_vcpus(kvm);
857 		r = 0;
858 		break;
859 	case KVM_CAP_S390_CPU_TOPOLOGY:
860 		r = -EINVAL;
861 		mutex_lock(&kvm->lock);
862 		if (kvm->created_vcpus) {
863 			r = -EBUSY;
864 		} else if (test_facility(11)) {
865 			set_kvm_facility(kvm->arch.model.fac_mask, 11);
866 			set_kvm_facility(kvm->arch.model.fac_list, 11);
867 			r = 0;
868 		}
869 		mutex_unlock(&kvm->lock);
870 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
871 			 r ? "(not available)" : "(success)");
872 		break;
873 	default:
874 		r = -EINVAL;
875 		break;
876 	}
877 	return r;
878 }
879 
880 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
881 {
882 	int ret;
883 
884 	switch (attr->attr) {
885 	case KVM_S390_VM_MEM_LIMIT_SIZE:
886 		ret = 0;
887 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
888 			 kvm->arch.mem_limit);
889 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
890 			ret = -EFAULT;
891 		break;
892 	default:
893 		ret = -ENXIO;
894 		break;
895 	}
896 	return ret;
897 }
898 
899 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
900 {
901 	int ret;
902 	unsigned int idx;
903 	switch (attr->attr) {
904 	case KVM_S390_VM_MEM_ENABLE_CMMA:
905 		ret = -ENXIO;
906 		if (!sclp.has_cmma)
907 			break;
908 
909 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
910 		mutex_lock(&kvm->lock);
911 		if (kvm->created_vcpus)
912 			ret = -EBUSY;
913 		else if (kvm->mm->context.allow_gmap_hpage_1m)
914 			ret = -EINVAL;
915 		else {
916 			kvm->arch.use_cmma = 1;
917 			/* Not compatible with cmma. */
918 			kvm->arch.use_pfmfi = 0;
919 			ret = 0;
920 		}
921 		mutex_unlock(&kvm->lock);
922 		break;
923 	case KVM_S390_VM_MEM_CLR_CMMA:
924 		ret = -ENXIO;
925 		if (!sclp.has_cmma)
926 			break;
927 		ret = -EINVAL;
928 		if (!kvm->arch.use_cmma)
929 			break;
930 
931 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
932 		mutex_lock(&kvm->lock);
933 		idx = srcu_read_lock(&kvm->srcu);
934 		s390_reset_cmma(kvm->arch.gmap->mm);
935 		srcu_read_unlock(&kvm->srcu, idx);
936 		mutex_unlock(&kvm->lock);
937 		ret = 0;
938 		break;
939 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
940 		unsigned long new_limit;
941 
942 		if (kvm_is_ucontrol(kvm))
943 			return -EINVAL;
944 
945 		if (get_user(new_limit, (u64 __user *)attr->addr))
946 			return -EFAULT;
947 
948 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
949 		    new_limit > kvm->arch.mem_limit)
950 			return -E2BIG;
951 
952 		if (!new_limit)
953 			return -EINVAL;
954 
955 		/* gmap_create takes last usable address */
956 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
957 			new_limit -= 1;
958 
959 		ret = -EBUSY;
960 		mutex_lock(&kvm->lock);
961 		if (!kvm->created_vcpus) {
962 			/* gmap_create will round the limit up */
963 			struct gmap *new = gmap_create(current->mm, new_limit);
964 
965 			if (!new) {
966 				ret = -ENOMEM;
967 			} else {
968 				gmap_remove(kvm->arch.gmap);
969 				new->private = kvm;
970 				kvm->arch.gmap = new;
971 				ret = 0;
972 			}
973 		}
974 		mutex_unlock(&kvm->lock);
975 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
976 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
977 			 (void *) kvm->arch.gmap->asce);
978 		break;
979 	}
980 	default:
981 		ret = -ENXIO;
982 		break;
983 	}
984 	return ret;
985 }
986 
987 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
988 
989 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
990 {
991 	struct kvm_vcpu *vcpu;
992 	unsigned long i;
993 
994 	kvm_s390_vcpu_block_all(kvm);
995 
996 	kvm_for_each_vcpu(i, vcpu, kvm) {
997 		kvm_s390_vcpu_crypto_setup(vcpu);
998 		/* recreate the shadow crycb by leaving the VSIE handler */
999 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1000 	}
1001 
1002 	kvm_s390_vcpu_unblock_all(kvm);
1003 }
1004 
1005 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007 	mutex_lock(&kvm->lock);
1008 	switch (attr->attr) {
1009 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1010 		if (!test_kvm_facility(kvm, 76)) {
1011 			mutex_unlock(&kvm->lock);
1012 			return -EINVAL;
1013 		}
1014 		get_random_bytes(
1015 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1016 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1017 		kvm->arch.crypto.aes_kw = 1;
1018 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1019 		break;
1020 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1021 		if (!test_kvm_facility(kvm, 76)) {
1022 			mutex_unlock(&kvm->lock);
1023 			return -EINVAL;
1024 		}
1025 		get_random_bytes(
1026 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1027 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1028 		kvm->arch.crypto.dea_kw = 1;
1029 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1030 		break;
1031 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1032 		if (!test_kvm_facility(kvm, 76)) {
1033 			mutex_unlock(&kvm->lock);
1034 			return -EINVAL;
1035 		}
1036 		kvm->arch.crypto.aes_kw = 0;
1037 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1038 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1039 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1040 		break;
1041 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1042 		if (!test_kvm_facility(kvm, 76)) {
1043 			mutex_unlock(&kvm->lock);
1044 			return -EINVAL;
1045 		}
1046 		kvm->arch.crypto.dea_kw = 0;
1047 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1048 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1049 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1050 		break;
1051 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1052 		if (!ap_instructions_available()) {
1053 			mutex_unlock(&kvm->lock);
1054 			return -EOPNOTSUPP;
1055 		}
1056 		kvm->arch.crypto.apie = 1;
1057 		break;
1058 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1059 		if (!ap_instructions_available()) {
1060 			mutex_unlock(&kvm->lock);
1061 			return -EOPNOTSUPP;
1062 		}
1063 		kvm->arch.crypto.apie = 0;
1064 		break;
1065 	default:
1066 		mutex_unlock(&kvm->lock);
1067 		return -ENXIO;
1068 	}
1069 
1070 	kvm_s390_vcpu_crypto_reset_all(kvm);
1071 	mutex_unlock(&kvm->lock);
1072 	return 0;
1073 }
1074 
1075 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1076 {
1077 	/* Only set the ECB bits after guest requests zPCI interpretation */
1078 	if (!vcpu->kvm->arch.use_zpci_interp)
1079 		return;
1080 
1081 	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1082 	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1083 }
1084 
1085 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1086 {
1087 	struct kvm_vcpu *vcpu;
1088 	unsigned long i;
1089 
1090 	lockdep_assert_held(&kvm->lock);
1091 
1092 	if (!kvm_s390_pci_interp_allowed())
1093 		return;
1094 
1095 	/*
1096 	 * If host is configured for PCI and the necessary facilities are
1097 	 * available, turn on interpretation for the life of this guest
1098 	 */
1099 	kvm->arch.use_zpci_interp = 1;
1100 
1101 	kvm_s390_vcpu_block_all(kvm);
1102 
1103 	kvm_for_each_vcpu(i, vcpu, kvm) {
1104 		kvm_s390_vcpu_pci_setup(vcpu);
1105 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1106 	}
1107 
1108 	kvm_s390_vcpu_unblock_all(kvm);
1109 }
1110 
1111 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1112 {
1113 	unsigned long cx;
1114 	struct kvm_vcpu *vcpu;
1115 
1116 	kvm_for_each_vcpu(cx, vcpu, kvm)
1117 		kvm_s390_sync_request(req, vcpu);
1118 }
1119 
1120 /*
1121  * Must be called with kvm->srcu held to avoid races on memslots, and with
1122  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1123  */
1124 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1125 {
1126 	struct kvm_memory_slot *ms;
1127 	struct kvm_memslots *slots;
1128 	unsigned long ram_pages = 0;
1129 	int bkt;
1130 
1131 	/* migration mode already enabled */
1132 	if (kvm->arch.migration_mode)
1133 		return 0;
1134 	slots = kvm_memslots(kvm);
1135 	if (!slots || kvm_memslots_empty(slots))
1136 		return -EINVAL;
1137 
1138 	if (!kvm->arch.use_cmma) {
1139 		kvm->arch.migration_mode = 1;
1140 		return 0;
1141 	}
1142 	/* mark all the pages in active slots as dirty */
1143 	kvm_for_each_memslot(ms, bkt, slots) {
1144 		if (!ms->dirty_bitmap)
1145 			return -EINVAL;
1146 		/*
1147 		 * The second half of the bitmap is only used on x86,
1148 		 * and would be wasted otherwise, so we put it to good
1149 		 * use here to keep track of the state of the storage
1150 		 * attributes.
1151 		 */
1152 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1153 		ram_pages += ms->npages;
1154 	}
1155 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1156 	kvm->arch.migration_mode = 1;
1157 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1158 	return 0;
1159 }
1160 
1161 /*
1162  * Must be called with kvm->slots_lock to avoid races with ourselves and
1163  * kvm_s390_vm_start_migration.
1164  */
1165 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1166 {
1167 	/* migration mode already disabled */
1168 	if (!kvm->arch.migration_mode)
1169 		return 0;
1170 	kvm->arch.migration_mode = 0;
1171 	if (kvm->arch.use_cmma)
1172 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1173 	return 0;
1174 }
1175 
1176 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1177 				     struct kvm_device_attr *attr)
1178 {
1179 	int res = -ENXIO;
1180 
1181 	mutex_lock(&kvm->slots_lock);
1182 	switch (attr->attr) {
1183 	case KVM_S390_VM_MIGRATION_START:
1184 		res = kvm_s390_vm_start_migration(kvm);
1185 		break;
1186 	case KVM_S390_VM_MIGRATION_STOP:
1187 		res = kvm_s390_vm_stop_migration(kvm);
1188 		break;
1189 	default:
1190 		break;
1191 	}
1192 	mutex_unlock(&kvm->slots_lock);
1193 
1194 	return res;
1195 }
1196 
1197 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1198 				     struct kvm_device_attr *attr)
1199 {
1200 	u64 mig = kvm->arch.migration_mode;
1201 
1202 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1203 		return -ENXIO;
1204 
1205 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1206 		return -EFAULT;
1207 	return 0;
1208 }
1209 
1210 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 	struct kvm_s390_vm_tod_clock gtod;
1213 
1214 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1215 		return -EFAULT;
1216 
1217 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1218 		return -EINVAL;
1219 	kvm_s390_set_tod_clock(kvm, &gtod);
1220 
1221 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1222 		gtod.epoch_idx, gtod.tod);
1223 
1224 	return 0;
1225 }
1226 
1227 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228 {
1229 	u8 gtod_high;
1230 
1231 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1232 					   sizeof(gtod_high)))
1233 		return -EFAULT;
1234 
1235 	if (gtod_high != 0)
1236 		return -EINVAL;
1237 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1238 
1239 	return 0;
1240 }
1241 
1242 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1245 
1246 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1247 			   sizeof(gtod.tod)))
1248 		return -EFAULT;
1249 
1250 	kvm_s390_set_tod_clock(kvm, &gtod);
1251 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1252 	return 0;
1253 }
1254 
1255 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1256 {
1257 	int ret;
1258 
1259 	if (attr->flags)
1260 		return -EINVAL;
1261 
1262 	switch (attr->attr) {
1263 	case KVM_S390_VM_TOD_EXT:
1264 		ret = kvm_s390_set_tod_ext(kvm, attr);
1265 		break;
1266 	case KVM_S390_VM_TOD_HIGH:
1267 		ret = kvm_s390_set_tod_high(kvm, attr);
1268 		break;
1269 	case KVM_S390_VM_TOD_LOW:
1270 		ret = kvm_s390_set_tod_low(kvm, attr);
1271 		break;
1272 	default:
1273 		ret = -ENXIO;
1274 		break;
1275 	}
1276 	return ret;
1277 }
1278 
1279 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1280 				   struct kvm_s390_vm_tod_clock *gtod)
1281 {
1282 	union tod_clock clk;
1283 
1284 	preempt_disable();
1285 
1286 	store_tod_clock_ext(&clk);
1287 
1288 	gtod->tod = clk.tod + kvm->arch.epoch;
1289 	gtod->epoch_idx = 0;
1290 	if (test_kvm_facility(kvm, 139)) {
1291 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1292 		if (gtod->tod < clk.tod)
1293 			gtod->epoch_idx += 1;
1294 	}
1295 
1296 	preempt_enable();
1297 }
1298 
1299 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1300 {
1301 	struct kvm_s390_vm_tod_clock gtod;
1302 
1303 	memset(&gtod, 0, sizeof(gtod));
1304 	kvm_s390_get_tod_clock(kvm, &gtod);
1305 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1306 		return -EFAULT;
1307 
1308 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1309 		gtod.epoch_idx, gtod.tod);
1310 	return 0;
1311 }
1312 
1313 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1314 {
1315 	u8 gtod_high = 0;
1316 
1317 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1318 					 sizeof(gtod_high)))
1319 		return -EFAULT;
1320 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1321 
1322 	return 0;
1323 }
1324 
1325 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1326 {
1327 	u64 gtod;
1328 
1329 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1330 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1331 		return -EFAULT;
1332 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1333 
1334 	return 0;
1335 }
1336 
1337 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1338 {
1339 	int ret;
1340 
1341 	if (attr->flags)
1342 		return -EINVAL;
1343 
1344 	switch (attr->attr) {
1345 	case KVM_S390_VM_TOD_EXT:
1346 		ret = kvm_s390_get_tod_ext(kvm, attr);
1347 		break;
1348 	case KVM_S390_VM_TOD_HIGH:
1349 		ret = kvm_s390_get_tod_high(kvm, attr);
1350 		break;
1351 	case KVM_S390_VM_TOD_LOW:
1352 		ret = kvm_s390_get_tod_low(kvm, attr);
1353 		break;
1354 	default:
1355 		ret = -ENXIO;
1356 		break;
1357 	}
1358 	return ret;
1359 }
1360 
1361 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1362 {
1363 	struct kvm_s390_vm_cpu_processor *proc;
1364 	u16 lowest_ibc, unblocked_ibc;
1365 	int ret = 0;
1366 
1367 	mutex_lock(&kvm->lock);
1368 	if (kvm->created_vcpus) {
1369 		ret = -EBUSY;
1370 		goto out;
1371 	}
1372 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1373 	if (!proc) {
1374 		ret = -ENOMEM;
1375 		goto out;
1376 	}
1377 	if (!copy_from_user(proc, (void __user *)attr->addr,
1378 			    sizeof(*proc))) {
1379 		kvm->arch.model.cpuid = proc->cpuid;
1380 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1381 		unblocked_ibc = sclp.ibc & 0xfff;
1382 		if (lowest_ibc && proc->ibc) {
1383 			if (proc->ibc > unblocked_ibc)
1384 				kvm->arch.model.ibc = unblocked_ibc;
1385 			else if (proc->ibc < lowest_ibc)
1386 				kvm->arch.model.ibc = lowest_ibc;
1387 			else
1388 				kvm->arch.model.ibc = proc->ibc;
1389 		}
1390 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1391 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1392 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1393 			 kvm->arch.model.ibc,
1394 			 kvm->arch.model.cpuid);
1395 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1396 			 kvm->arch.model.fac_list[0],
1397 			 kvm->arch.model.fac_list[1],
1398 			 kvm->arch.model.fac_list[2]);
1399 	} else
1400 		ret = -EFAULT;
1401 	kfree(proc);
1402 out:
1403 	mutex_unlock(&kvm->lock);
1404 	return ret;
1405 }
1406 
1407 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1408 				       struct kvm_device_attr *attr)
1409 {
1410 	struct kvm_s390_vm_cpu_feat data;
1411 
1412 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1413 		return -EFAULT;
1414 	if (!bitmap_subset((unsigned long *) data.feat,
1415 			   kvm_s390_available_cpu_feat,
1416 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1417 		return -EINVAL;
1418 
1419 	mutex_lock(&kvm->lock);
1420 	if (kvm->created_vcpus) {
1421 		mutex_unlock(&kvm->lock);
1422 		return -EBUSY;
1423 	}
1424 	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1425 	mutex_unlock(&kvm->lock);
1426 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1427 			 data.feat[0],
1428 			 data.feat[1],
1429 			 data.feat[2]);
1430 	return 0;
1431 }
1432 
1433 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1434 					  struct kvm_device_attr *attr)
1435 {
1436 	mutex_lock(&kvm->lock);
1437 	if (kvm->created_vcpus) {
1438 		mutex_unlock(&kvm->lock);
1439 		return -EBUSY;
1440 	}
1441 
1442 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1443 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1444 		mutex_unlock(&kvm->lock);
1445 		return -EFAULT;
1446 	}
1447 	mutex_unlock(&kvm->lock);
1448 
1449 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1450 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1451 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1452 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1453 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1454 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1455 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1456 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1457 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1458 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1459 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1460 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1461 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1462 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1463 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1464 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1465 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1466 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1467 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1468 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1469 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1470 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1471 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1472 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1473 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1474 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1475 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1476 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1477 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1478 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1479 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1480 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1481 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1482 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1483 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1484 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1485 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1486 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1487 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1488 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1489 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1490 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1491 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1492 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1493 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1494 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1495 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1496 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1497 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1498 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1499 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1500 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1501 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1502 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1503 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1504 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1505 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1506 
1507 	return 0;
1508 }
1509 
1510 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1511 {
1512 	int ret = -ENXIO;
1513 
1514 	switch (attr->attr) {
1515 	case KVM_S390_VM_CPU_PROCESSOR:
1516 		ret = kvm_s390_set_processor(kvm, attr);
1517 		break;
1518 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1519 		ret = kvm_s390_set_processor_feat(kvm, attr);
1520 		break;
1521 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1522 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1523 		break;
1524 	}
1525 	return ret;
1526 }
1527 
1528 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1529 {
1530 	struct kvm_s390_vm_cpu_processor *proc;
1531 	int ret = 0;
1532 
1533 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1534 	if (!proc) {
1535 		ret = -ENOMEM;
1536 		goto out;
1537 	}
1538 	proc->cpuid = kvm->arch.model.cpuid;
1539 	proc->ibc = kvm->arch.model.ibc;
1540 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1541 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1542 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1543 		 kvm->arch.model.ibc,
1544 		 kvm->arch.model.cpuid);
1545 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1546 		 kvm->arch.model.fac_list[0],
1547 		 kvm->arch.model.fac_list[1],
1548 		 kvm->arch.model.fac_list[2]);
1549 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1550 		ret = -EFAULT;
1551 	kfree(proc);
1552 out:
1553 	return ret;
1554 }
1555 
1556 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1557 {
1558 	struct kvm_s390_vm_cpu_machine *mach;
1559 	int ret = 0;
1560 
1561 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1562 	if (!mach) {
1563 		ret = -ENOMEM;
1564 		goto out;
1565 	}
1566 	get_cpu_id((struct cpuid *) &mach->cpuid);
1567 	mach->ibc = sclp.ibc;
1568 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1569 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1570 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1571 	       sizeof(stfle_fac_list));
1572 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1573 		 kvm->arch.model.ibc,
1574 		 kvm->arch.model.cpuid);
1575 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1576 		 mach->fac_mask[0],
1577 		 mach->fac_mask[1],
1578 		 mach->fac_mask[2]);
1579 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1580 		 mach->fac_list[0],
1581 		 mach->fac_list[1],
1582 		 mach->fac_list[2]);
1583 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1584 		ret = -EFAULT;
1585 	kfree(mach);
1586 out:
1587 	return ret;
1588 }
1589 
1590 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1591 				       struct kvm_device_attr *attr)
1592 {
1593 	struct kvm_s390_vm_cpu_feat data;
1594 
1595 	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1596 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1597 		return -EFAULT;
1598 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1599 			 data.feat[0],
1600 			 data.feat[1],
1601 			 data.feat[2]);
1602 	return 0;
1603 }
1604 
1605 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1606 				     struct kvm_device_attr *attr)
1607 {
1608 	struct kvm_s390_vm_cpu_feat data;
1609 
1610 	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1611 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1612 		return -EFAULT;
1613 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1614 			 data.feat[0],
1615 			 data.feat[1],
1616 			 data.feat[2]);
1617 	return 0;
1618 }
1619 
1620 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1621 					  struct kvm_device_attr *attr)
1622 {
1623 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1624 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1625 		return -EFAULT;
1626 
1627 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1629 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1630 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1631 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1632 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1633 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1634 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1635 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1636 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1637 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1638 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1639 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1640 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1641 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1642 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1643 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1644 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1645 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1646 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1647 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1648 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1649 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1650 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1651 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1652 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1653 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1654 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1655 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1656 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1657 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1658 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1659 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1660 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1661 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1662 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1663 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1664 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1665 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1666 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1667 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1668 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1669 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1670 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1671 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1672 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1673 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1674 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1675 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1676 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1677 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1678 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1679 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1680 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1681 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1682 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1683 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1684 
1685 	return 0;
1686 }
1687 
1688 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1689 					struct kvm_device_attr *attr)
1690 {
1691 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1692 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1693 		return -EFAULT;
1694 
1695 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1696 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1697 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1698 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1699 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1700 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1701 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1702 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1703 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1704 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1705 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1706 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1707 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1708 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1709 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1710 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1711 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1712 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1713 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1714 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1715 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1716 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1717 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1718 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1719 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1720 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1721 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1722 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1723 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1724 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1725 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1726 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1727 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1728 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1729 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1730 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1731 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1732 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1733 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1734 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1735 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1736 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1737 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1738 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1739 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1740 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1741 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1742 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1743 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1744 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1745 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1746 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1747 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1748 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1749 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1750 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1751 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1752 
1753 	return 0;
1754 }
1755 
1756 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1757 {
1758 	int ret = -ENXIO;
1759 
1760 	switch (attr->attr) {
1761 	case KVM_S390_VM_CPU_PROCESSOR:
1762 		ret = kvm_s390_get_processor(kvm, attr);
1763 		break;
1764 	case KVM_S390_VM_CPU_MACHINE:
1765 		ret = kvm_s390_get_machine(kvm, attr);
1766 		break;
1767 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1768 		ret = kvm_s390_get_processor_feat(kvm, attr);
1769 		break;
1770 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1771 		ret = kvm_s390_get_machine_feat(kvm, attr);
1772 		break;
1773 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1774 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1775 		break;
1776 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1777 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1778 		break;
1779 	}
1780 	return ret;
1781 }
1782 
1783 /**
1784  * kvm_s390_update_topology_change_report - update CPU topology change report
1785  * @kvm: guest KVM description
1786  * @val: set or clear the MTCR bit
1787  *
1788  * Updates the Multiprocessor Topology-Change-Report bit to signal
1789  * the guest with a topology change.
1790  * This is only relevant if the topology facility is present.
1791  *
1792  * The SCA version, bsca or esca, doesn't matter as offset is the same.
1793  */
1794 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1795 {
1796 	union sca_utility new, old;
1797 	struct bsca_block *sca;
1798 
1799 	read_lock(&kvm->arch.sca_lock);
1800 	sca = kvm->arch.sca;
1801 	do {
1802 		old = READ_ONCE(sca->utility);
1803 		new = old;
1804 		new.mtcr = val;
1805 	} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1806 	read_unlock(&kvm->arch.sca_lock);
1807 }
1808 
1809 static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1810 					       struct kvm_device_attr *attr)
1811 {
1812 	if (!test_kvm_facility(kvm, 11))
1813 		return -ENXIO;
1814 
1815 	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1816 	return 0;
1817 }
1818 
1819 static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1820 					       struct kvm_device_attr *attr)
1821 {
1822 	u8 topo;
1823 
1824 	if (!test_kvm_facility(kvm, 11))
1825 		return -ENXIO;
1826 
1827 	read_lock(&kvm->arch.sca_lock);
1828 	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1829 	read_unlock(&kvm->arch.sca_lock);
1830 
1831 	return put_user(topo, (u8 __user *)attr->addr);
1832 }
1833 
1834 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1835 {
1836 	int ret;
1837 
1838 	switch (attr->group) {
1839 	case KVM_S390_VM_MEM_CTRL:
1840 		ret = kvm_s390_set_mem_control(kvm, attr);
1841 		break;
1842 	case KVM_S390_VM_TOD:
1843 		ret = kvm_s390_set_tod(kvm, attr);
1844 		break;
1845 	case KVM_S390_VM_CPU_MODEL:
1846 		ret = kvm_s390_set_cpu_model(kvm, attr);
1847 		break;
1848 	case KVM_S390_VM_CRYPTO:
1849 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1850 		break;
1851 	case KVM_S390_VM_MIGRATION:
1852 		ret = kvm_s390_vm_set_migration(kvm, attr);
1853 		break;
1854 	case KVM_S390_VM_CPU_TOPOLOGY:
1855 		ret = kvm_s390_set_topo_change_indication(kvm, attr);
1856 		break;
1857 	default:
1858 		ret = -ENXIO;
1859 		break;
1860 	}
1861 
1862 	return ret;
1863 }
1864 
1865 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1866 {
1867 	int ret;
1868 
1869 	switch (attr->group) {
1870 	case KVM_S390_VM_MEM_CTRL:
1871 		ret = kvm_s390_get_mem_control(kvm, attr);
1872 		break;
1873 	case KVM_S390_VM_TOD:
1874 		ret = kvm_s390_get_tod(kvm, attr);
1875 		break;
1876 	case KVM_S390_VM_CPU_MODEL:
1877 		ret = kvm_s390_get_cpu_model(kvm, attr);
1878 		break;
1879 	case KVM_S390_VM_MIGRATION:
1880 		ret = kvm_s390_vm_get_migration(kvm, attr);
1881 		break;
1882 	case KVM_S390_VM_CPU_TOPOLOGY:
1883 		ret = kvm_s390_get_topo_change_indication(kvm, attr);
1884 		break;
1885 	default:
1886 		ret = -ENXIO;
1887 		break;
1888 	}
1889 
1890 	return ret;
1891 }
1892 
1893 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1894 {
1895 	int ret;
1896 
1897 	switch (attr->group) {
1898 	case KVM_S390_VM_MEM_CTRL:
1899 		switch (attr->attr) {
1900 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1901 		case KVM_S390_VM_MEM_CLR_CMMA:
1902 			ret = sclp.has_cmma ? 0 : -ENXIO;
1903 			break;
1904 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1905 			ret = 0;
1906 			break;
1907 		default:
1908 			ret = -ENXIO;
1909 			break;
1910 		}
1911 		break;
1912 	case KVM_S390_VM_TOD:
1913 		switch (attr->attr) {
1914 		case KVM_S390_VM_TOD_LOW:
1915 		case KVM_S390_VM_TOD_HIGH:
1916 			ret = 0;
1917 			break;
1918 		default:
1919 			ret = -ENXIO;
1920 			break;
1921 		}
1922 		break;
1923 	case KVM_S390_VM_CPU_MODEL:
1924 		switch (attr->attr) {
1925 		case KVM_S390_VM_CPU_PROCESSOR:
1926 		case KVM_S390_VM_CPU_MACHINE:
1927 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1928 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1929 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1930 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1931 			ret = 0;
1932 			break;
1933 		default:
1934 			ret = -ENXIO;
1935 			break;
1936 		}
1937 		break;
1938 	case KVM_S390_VM_CRYPTO:
1939 		switch (attr->attr) {
1940 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1941 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1942 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1943 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1944 			ret = 0;
1945 			break;
1946 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1947 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1948 			ret = ap_instructions_available() ? 0 : -ENXIO;
1949 			break;
1950 		default:
1951 			ret = -ENXIO;
1952 			break;
1953 		}
1954 		break;
1955 	case KVM_S390_VM_MIGRATION:
1956 		ret = 0;
1957 		break;
1958 	case KVM_S390_VM_CPU_TOPOLOGY:
1959 		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1960 		break;
1961 	default:
1962 		ret = -ENXIO;
1963 		break;
1964 	}
1965 
1966 	return ret;
1967 }
1968 
1969 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1970 {
1971 	uint8_t *keys;
1972 	uint64_t hva;
1973 	int srcu_idx, i, r = 0;
1974 
1975 	if (args->flags != 0)
1976 		return -EINVAL;
1977 
1978 	/* Is this guest using storage keys? */
1979 	if (!mm_uses_skeys(current->mm))
1980 		return KVM_S390_GET_SKEYS_NONE;
1981 
1982 	/* Enforce sane limit on memory allocation */
1983 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1984 		return -EINVAL;
1985 
1986 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1987 	if (!keys)
1988 		return -ENOMEM;
1989 
1990 	mmap_read_lock(current->mm);
1991 	srcu_idx = srcu_read_lock(&kvm->srcu);
1992 	for (i = 0; i < args->count; i++) {
1993 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1994 		if (kvm_is_error_hva(hva)) {
1995 			r = -EFAULT;
1996 			break;
1997 		}
1998 
1999 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
2000 		if (r)
2001 			break;
2002 	}
2003 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2004 	mmap_read_unlock(current->mm);
2005 
2006 	if (!r) {
2007 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2008 				 sizeof(uint8_t) * args->count);
2009 		if (r)
2010 			r = -EFAULT;
2011 	}
2012 
2013 	kvfree(keys);
2014 	return r;
2015 }
2016 
2017 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2018 {
2019 	uint8_t *keys;
2020 	uint64_t hva;
2021 	int srcu_idx, i, r = 0;
2022 	bool unlocked;
2023 
2024 	if (args->flags != 0)
2025 		return -EINVAL;
2026 
2027 	/* Enforce sane limit on memory allocation */
2028 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2029 		return -EINVAL;
2030 
2031 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2032 	if (!keys)
2033 		return -ENOMEM;
2034 
2035 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2036 			   sizeof(uint8_t) * args->count);
2037 	if (r) {
2038 		r = -EFAULT;
2039 		goto out;
2040 	}
2041 
2042 	/* Enable storage key handling for the guest */
2043 	r = s390_enable_skey();
2044 	if (r)
2045 		goto out;
2046 
2047 	i = 0;
2048 	mmap_read_lock(current->mm);
2049 	srcu_idx = srcu_read_lock(&kvm->srcu);
2050         while (i < args->count) {
2051 		unlocked = false;
2052 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2053 		if (kvm_is_error_hva(hva)) {
2054 			r = -EFAULT;
2055 			break;
2056 		}
2057 
2058 		/* Lowest order bit is reserved */
2059 		if (keys[i] & 0x01) {
2060 			r = -EINVAL;
2061 			break;
2062 		}
2063 
2064 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2065 		if (r) {
2066 			r = fixup_user_fault(current->mm, hva,
2067 					     FAULT_FLAG_WRITE, &unlocked);
2068 			if (r)
2069 				break;
2070 		}
2071 		if (!r)
2072 			i++;
2073 	}
2074 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2075 	mmap_read_unlock(current->mm);
2076 out:
2077 	kvfree(keys);
2078 	return r;
2079 }
2080 
2081 /*
2082  * Base address and length must be sent at the start of each block, therefore
2083  * it's cheaper to send some clean data, as long as it's less than the size of
2084  * two longs.
2085  */
2086 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2087 /* for consistency */
2088 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2089 
2090 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2091 			      u8 *res, unsigned long bufsize)
2092 {
2093 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2094 
2095 	args->count = 0;
2096 	while (args->count < bufsize) {
2097 		hva = gfn_to_hva(kvm, cur_gfn);
2098 		/*
2099 		 * We return an error if the first value was invalid, but we
2100 		 * return successfully if at least one value was copied.
2101 		 */
2102 		if (kvm_is_error_hva(hva))
2103 			return args->count ? 0 : -EFAULT;
2104 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2105 			pgstev = 0;
2106 		res[args->count++] = (pgstev >> 24) & 0x43;
2107 		cur_gfn++;
2108 	}
2109 
2110 	return 0;
2111 }
2112 
2113 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2114 						     gfn_t gfn)
2115 {
2116 	return ____gfn_to_memslot(slots, gfn, true);
2117 }
2118 
2119 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2120 					      unsigned long cur_gfn)
2121 {
2122 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2123 	unsigned long ofs = cur_gfn - ms->base_gfn;
2124 	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2125 
2126 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2127 		mnode = rb_next(mnode);
2128 		/* If we are above the highest slot, wrap around */
2129 		if (!mnode)
2130 			mnode = rb_first(&slots->gfn_tree);
2131 
2132 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2133 		ofs = 0;
2134 	}
2135 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2136 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2137 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2138 		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2139 	}
2140 	return ms->base_gfn + ofs;
2141 }
2142 
2143 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2144 			     u8 *res, unsigned long bufsize)
2145 {
2146 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2147 	struct kvm_memslots *slots = kvm_memslots(kvm);
2148 	struct kvm_memory_slot *ms;
2149 
2150 	if (unlikely(kvm_memslots_empty(slots)))
2151 		return 0;
2152 
2153 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2154 	ms = gfn_to_memslot(kvm, cur_gfn);
2155 	args->count = 0;
2156 	args->start_gfn = cur_gfn;
2157 	if (!ms)
2158 		return 0;
2159 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2160 	mem_end = kvm_s390_get_gfn_end(slots);
2161 
2162 	while (args->count < bufsize) {
2163 		hva = gfn_to_hva(kvm, cur_gfn);
2164 		if (kvm_is_error_hva(hva))
2165 			return 0;
2166 		/* Decrement only if we actually flipped the bit to 0 */
2167 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2168 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2169 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2170 			pgstev = 0;
2171 		/* Save the value */
2172 		res[args->count++] = (pgstev >> 24) & 0x43;
2173 		/* If the next bit is too far away, stop. */
2174 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2175 			return 0;
2176 		/* If we reached the previous "next", find the next one */
2177 		if (cur_gfn == next_gfn)
2178 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2179 		/* Reached the end of memory or of the buffer, stop */
2180 		if ((next_gfn >= mem_end) ||
2181 		    (next_gfn - args->start_gfn >= bufsize))
2182 			return 0;
2183 		cur_gfn++;
2184 		/* Reached the end of the current memslot, take the next one. */
2185 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2186 			ms = gfn_to_memslot(kvm, cur_gfn);
2187 			if (!ms)
2188 				return 0;
2189 		}
2190 	}
2191 	return 0;
2192 }
2193 
2194 /*
2195  * This function searches for the next page with dirty CMMA attributes, and
2196  * saves the attributes in the buffer up to either the end of the buffer or
2197  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2198  * no trailing clean bytes are saved.
2199  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2200  * output buffer will indicate 0 as length.
2201  */
2202 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2203 				  struct kvm_s390_cmma_log *args)
2204 {
2205 	unsigned long bufsize;
2206 	int srcu_idx, peek, ret;
2207 	u8 *values;
2208 
2209 	if (!kvm->arch.use_cmma)
2210 		return -ENXIO;
2211 	/* Invalid/unsupported flags were specified */
2212 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2213 		return -EINVAL;
2214 	/* Migration mode query, and we are not doing a migration */
2215 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2216 	if (!peek && !kvm->arch.migration_mode)
2217 		return -EINVAL;
2218 	/* CMMA is disabled or was not used, or the buffer has length zero */
2219 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2220 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2221 		memset(args, 0, sizeof(*args));
2222 		return 0;
2223 	}
2224 	/* We are not peeking, and there are no dirty pages */
2225 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2226 		memset(args, 0, sizeof(*args));
2227 		return 0;
2228 	}
2229 
2230 	values = vmalloc(bufsize);
2231 	if (!values)
2232 		return -ENOMEM;
2233 
2234 	mmap_read_lock(kvm->mm);
2235 	srcu_idx = srcu_read_lock(&kvm->srcu);
2236 	if (peek)
2237 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2238 	else
2239 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2240 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2241 	mmap_read_unlock(kvm->mm);
2242 
2243 	if (kvm->arch.migration_mode)
2244 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2245 	else
2246 		args->remaining = 0;
2247 
2248 	if (copy_to_user((void __user *)args->values, values, args->count))
2249 		ret = -EFAULT;
2250 
2251 	vfree(values);
2252 	return ret;
2253 }
2254 
2255 /*
2256  * This function sets the CMMA attributes for the given pages. If the input
2257  * buffer has zero length, no action is taken, otherwise the attributes are
2258  * set and the mm->context.uses_cmm flag is set.
2259  */
2260 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2261 				  const struct kvm_s390_cmma_log *args)
2262 {
2263 	unsigned long hva, mask, pgstev, i;
2264 	uint8_t *bits;
2265 	int srcu_idx, r = 0;
2266 
2267 	mask = args->mask;
2268 
2269 	if (!kvm->arch.use_cmma)
2270 		return -ENXIO;
2271 	/* invalid/unsupported flags */
2272 	if (args->flags != 0)
2273 		return -EINVAL;
2274 	/* Enforce sane limit on memory allocation */
2275 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2276 		return -EINVAL;
2277 	/* Nothing to do */
2278 	if (args->count == 0)
2279 		return 0;
2280 
2281 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2282 	if (!bits)
2283 		return -ENOMEM;
2284 
2285 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2286 	if (r) {
2287 		r = -EFAULT;
2288 		goto out;
2289 	}
2290 
2291 	mmap_read_lock(kvm->mm);
2292 	srcu_idx = srcu_read_lock(&kvm->srcu);
2293 	for (i = 0; i < args->count; i++) {
2294 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2295 		if (kvm_is_error_hva(hva)) {
2296 			r = -EFAULT;
2297 			break;
2298 		}
2299 
2300 		pgstev = bits[i];
2301 		pgstev = pgstev << 24;
2302 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2303 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2304 	}
2305 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2306 	mmap_read_unlock(kvm->mm);
2307 
2308 	if (!kvm->mm->context.uses_cmm) {
2309 		mmap_write_lock(kvm->mm);
2310 		kvm->mm->context.uses_cmm = 1;
2311 		mmap_write_unlock(kvm->mm);
2312 	}
2313 out:
2314 	vfree(bits);
2315 	return r;
2316 }
2317 
2318 /**
2319  * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2320  * non protected.
2321  * @kvm: the VM whose protected vCPUs are to be converted
2322  * @rc: return value for the RC field of the UVC (in case of error)
2323  * @rrc: return value for the RRC field of the UVC (in case of error)
2324  *
2325  * Does not stop in case of error, tries to convert as many
2326  * CPUs as possible. In case of error, the RC and RRC of the last error are
2327  * returned.
2328  *
2329  * Return: 0 in case of success, otherwise -EIO
2330  */
2331 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2332 {
2333 	struct kvm_vcpu *vcpu;
2334 	unsigned long i;
2335 	u16 _rc, _rrc;
2336 	int ret = 0;
2337 
2338 	/*
2339 	 * We ignore failures and try to destroy as many CPUs as possible.
2340 	 * At the same time we must not free the assigned resources when
2341 	 * this fails, as the ultravisor has still access to that memory.
2342 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2343 	 * behind.
2344 	 * We want to return the first failure rc and rrc, though.
2345 	 */
2346 	kvm_for_each_vcpu(i, vcpu, kvm) {
2347 		mutex_lock(&vcpu->mutex);
2348 		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2349 			*rc = _rc;
2350 			*rrc = _rrc;
2351 			ret = -EIO;
2352 		}
2353 		mutex_unlock(&vcpu->mutex);
2354 	}
2355 	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2356 	if (use_gisa)
2357 		kvm_s390_gisa_enable(kvm);
2358 	return ret;
2359 }
2360 
2361 /**
2362  * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2363  * to protected.
2364  * @kvm: the VM whose protected vCPUs are to be converted
2365  * @rc: return value for the RC field of the UVC (in case of error)
2366  * @rrc: return value for the RRC field of the UVC (in case of error)
2367  *
2368  * Tries to undo the conversion in case of error.
2369  *
2370  * Return: 0 in case of success, otherwise -EIO
2371  */
2372 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2373 {
2374 	unsigned long i;
2375 	int r = 0;
2376 	u16 dummy;
2377 
2378 	struct kvm_vcpu *vcpu;
2379 
2380 	/* Disable the GISA if the ultravisor does not support AIV. */
2381 	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2382 		kvm_s390_gisa_disable(kvm);
2383 
2384 	kvm_for_each_vcpu(i, vcpu, kvm) {
2385 		mutex_lock(&vcpu->mutex);
2386 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2387 		mutex_unlock(&vcpu->mutex);
2388 		if (r)
2389 			break;
2390 	}
2391 	if (r)
2392 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2393 	return r;
2394 }
2395 
2396 /*
2397  * Here we provide user space with a direct interface to query UV
2398  * related data like UV maxima and available features as well as
2399  * feature specific data.
2400  *
2401  * To facilitate future extension of the data structures we'll try to
2402  * write data up to the maximum requested length.
2403  */
2404 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2405 {
2406 	ssize_t len_min;
2407 
2408 	switch (info->header.id) {
2409 	case KVM_PV_INFO_VM: {
2410 		len_min =  sizeof(info->header) + sizeof(info->vm);
2411 
2412 		if (info->header.len_max < len_min)
2413 			return -EINVAL;
2414 
2415 		memcpy(info->vm.inst_calls_list,
2416 		       uv_info.inst_calls_list,
2417 		       sizeof(uv_info.inst_calls_list));
2418 
2419 		/* It's max cpuid not max cpus, so it's off by one */
2420 		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2421 		info->vm.max_guests = uv_info.max_num_sec_conf;
2422 		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2423 		info->vm.feature_indication = uv_info.uv_feature_indications;
2424 
2425 		return len_min;
2426 	}
2427 	case KVM_PV_INFO_DUMP: {
2428 		len_min =  sizeof(info->header) + sizeof(info->dump);
2429 
2430 		if (info->header.len_max < len_min)
2431 			return -EINVAL;
2432 
2433 		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2434 		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2435 		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2436 		return len_min;
2437 	}
2438 	default:
2439 		return -EINVAL;
2440 	}
2441 }
2442 
2443 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2444 			   struct kvm_s390_pv_dmp dmp)
2445 {
2446 	int r = -EINVAL;
2447 	void __user *result_buff = (void __user *)dmp.buff_addr;
2448 
2449 	switch (dmp.subcmd) {
2450 	case KVM_PV_DUMP_INIT: {
2451 		if (kvm->arch.pv.dumping)
2452 			break;
2453 
2454 		/*
2455 		 * Block SIE entry as concurrent dump UVCs could lead
2456 		 * to validities.
2457 		 */
2458 		kvm_s390_vcpu_block_all(kvm);
2459 
2460 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2461 				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2462 		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2463 			     cmd->rc, cmd->rrc);
2464 		if (!r) {
2465 			kvm->arch.pv.dumping = true;
2466 		} else {
2467 			kvm_s390_vcpu_unblock_all(kvm);
2468 			r = -EINVAL;
2469 		}
2470 		break;
2471 	}
2472 	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2473 		if (!kvm->arch.pv.dumping)
2474 			break;
2475 
2476 		/*
2477 		 * gaddr is an output parameter since we might stop
2478 		 * early. As dmp will be copied back in our caller, we
2479 		 * don't need to do it ourselves.
2480 		 */
2481 		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2482 						&cmd->rc, &cmd->rrc);
2483 		break;
2484 	}
2485 	case KVM_PV_DUMP_COMPLETE: {
2486 		if (!kvm->arch.pv.dumping)
2487 			break;
2488 
2489 		r = -EINVAL;
2490 		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2491 			break;
2492 
2493 		r = kvm_s390_pv_dump_complete(kvm, result_buff,
2494 					      &cmd->rc, &cmd->rrc);
2495 		break;
2496 	}
2497 	default:
2498 		r = -ENOTTY;
2499 		break;
2500 	}
2501 
2502 	return r;
2503 }
2504 
2505 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2506 {
2507 	int r = 0;
2508 	u16 dummy;
2509 	void __user *argp = (void __user *)cmd->data;
2510 
2511 	switch (cmd->cmd) {
2512 	case KVM_PV_ENABLE: {
2513 		r = -EINVAL;
2514 		if (kvm_s390_pv_is_protected(kvm))
2515 			break;
2516 
2517 		/*
2518 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2519 		 *  esca, we need no cleanup in the error cases below
2520 		 */
2521 		r = sca_switch_to_extended(kvm);
2522 		if (r)
2523 			break;
2524 
2525 		mmap_write_lock(current->mm);
2526 		r = gmap_mark_unmergeable();
2527 		mmap_write_unlock(current->mm);
2528 		if (r)
2529 			break;
2530 
2531 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2532 		if (r)
2533 			break;
2534 
2535 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2536 		if (r)
2537 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2538 
2539 		/* we need to block service interrupts from now on */
2540 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2541 		break;
2542 	}
2543 	case KVM_PV_DISABLE: {
2544 		r = -EINVAL;
2545 		if (!kvm_s390_pv_is_protected(kvm))
2546 			break;
2547 
2548 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2549 		/*
2550 		 * If a CPU could not be destroyed, destroy VM will also fail.
2551 		 * There is no point in trying to destroy it. Instead return
2552 		 * the rc and rrc from the first CPU that failed destroying.
2553 		 */
2554 		if (r)
2555 			break;
2556 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2557 
2558 		/* no need to block service interrupts any more */
2559 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2560 		break;
2561 	}
2562 	case KVM_PV_SET_SEC_PARMS: {
2563 		struct kvm_s390_pv_sec_parm parms = {};
2564 		void *hdr;
2565 
2566 		r = -EINVAL;
2567 		if (!kvm_s390_pv_is_protected(kvm))
2568 			break;
2569 
2570 		r = -EFAULT;
2571 		if (copy_from_user(&parms, argp, sizeof(parms)))
2572 			break;
2573 
2574 		/* Currently restricted to 8KB */
2575 		r = -EINVAL;
2576 		if (parms.length > PAGE_SIZE * 2)
2577 			break;
2578 
2579 		r = -ENOMEM;
2580 		hdr = vmalloc(parms.length);
2581 		if (!hdr)
2582 			break;
2583 
2584 		r = -EFAULT;
2585 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2586 				    parms.length))
2587 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2588 						      &cmd->rc, &cmd->rrc);
2589 
2590 		vfree(hdr);
2591 		break;
2592 	}
2593 	case KVM_PV_UNPACK: {
2594 		struct kvm_s390_pv_unp unp = {};
2595 
2596 		r = -EINVAL;
2597 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2598 			break;
2599 
2600 		r = -EFAULT;
2601 		if (copy_from_user(&unp, argp, sizeof(unp)))
2602 			break;
2603 
2604 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2605 				       &cmd->rc, &cmd->rrc);
2606 		break;
2607 	}
2608 	case KVM_PV_VERIFY: {
2609 		r = -EINVAL;
2610 		if (!kvm_s390_pv_is_protected(kvm))
2611 			break;
2612 
2613 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2614 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2615 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2616 			     cmd->rrc);
2617 		break;
2618 	}
2619 	case KVM_PV_PREP_RESET: {
2620 		r = -EINVAL;
2621 		if (!kvm_s390_pv_is_protected(kvm))
2622 			break;
2623 
2624 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2625 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2626 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2627 			     cmd->rc, cmd->rrc);
2628 		break;
2629 	}
2630 	case KVM_PV_UNSHARE_ALL: {
2631 		r = -EINVAL;
2632 		if (!kvm_s390_pv_is_protected(kvm))
2633 			break;
2634 
2635 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2636 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2637 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2638 			     cmd->rc, cmd->rrc);
2639 		break;
2640 	}
2641 	case KVM_PV_INFO: {
2642 		struct kvm_s390_pv_info info = {};
2643 		ssize_t data_len;
2644 
2645 		/*
2646 		 * No need to check the VM protection here.
2647 		 *
2648 		 * Maybe user space wants to query some of the data
2649 		 * when the VM is still unprotected. If we see the
2650 		 * need to fence a new data command we can still
2651 		 * return an error in the info handler.
2652 		 */
2653 
2654 		r = -EFAULT;
2655 		if (copy_from_user(&info, argp, sizeof(info.header)))
2656 			break;
2657 
2658 		r = -EINVAL;
2659 		if (info.header.len_max < sizeof(info.header))
2660 			break;
2661 
2662 		data_len = kvm_s390_handle_pv_info(&info);
2663 		if (data_len < 0) {
2664 			r = data_len;
2665 			break;
2666 		}
2667 		/*
2668 		 * If a data command struct is extended (multiple
2669 		 * times) this can be used to determine how much of it
2670 		 * is valid.
2671 		 */
2672 		info.header.len_written = data_len;
2673 
2674 		r = -EFAULT;
2675 		if (copy_to_user(argp, &info, data_len))
2676 			break;
2677 
2678 		r = 0;
2679 		break;
2680 	}
2681 	case KVM_PV_DUMP: {
2682 		struct kvm_s390_pv_dmp dmp;
2683 
2684 		r = -EINVAL;
2685 		if (!kvm_s390_pv_is_protected(kvm))
2686 			break;
2687 
2688 		r = -EFAULT;
2689 		if (copy_from_user(&dmp, argp, sizeof(dmp)))
2690 			break;
2691 
2692 		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2693 		if (r)
2694 			break;
2695 
2696 		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2697 			r = -EFAULT;
2698 			break;
2699 		}
2700 
2701 		break;
2702 	}
2703 	default:
2704 		r = -ENOTTY;
2705 	}
2706 	return r;
2707 }
2708 
2709 static bool access_key_invalid(u8 access_key)
2710 {
2711 	return access_key > 0xf;
2712 }
2713 
2714 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2715 {
2716 	void __user *uaddr = (void __user *)mop->buf;
2717 	u64 supported_flags;
2718 	void *tmpbuf = NULL;
2719 	int r, srcu_idx;
2720 
2721 	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2722 			  | KVM_S390_MEMOP_F_CHECK_ONLY;
2723 	if (mop->flags & ~supported_flags || !mop->size)
2724 		return -EINVAL;
2725 	if (mop->size > MEM_OP_MAX_SIZE)
2726 		return -E2BIG;
2727 	/*
2728 	 * This is technically a heuristic only, if the kvm->lock is not
2729 	 * taken, it is not guaranteed that the vm is/remains non-protected.
2730 	 * This is ok from a kernel perspective, wrongdoing is detected
2731 	 * on the access, -EFAULT is returned and the vm may crash the
2732 	 * next time it accesses the memory in question.
2733 	 * There is no sane usecase to do switching and a memop on two
2734 	 * different CPUs at the same time.
2735 	 */
2736 	if (kvm_s390_pv_get_handle(kvm))
2737 		return -EINVAL;
2738 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2739 		if (access_key_invalid(mop->key))
2740 			return -EINVAL;
2741 	} else {
2742 		mop->key = 0;
2743 	}
2744 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2745 		tmpbuf = vmalloc(mop->size);
2746 		if (!tmpbuf)
2747 			return -ENOMEM;
2748 	}
2749 
2750 	srcu_idx = srcu_read_lock(&kvm->srcu);
2751 
2752 	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2753 		r = PGM_ADDRESSING;
2754 		goto out_unlock;
2755 	}
2756 
2757 	switch (mop->op) {
2758 	case KVM_S390_MEMOP_ABSOLUTE_READ: {
2759 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2760 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2761 		} else {
2762 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2763 						      mop->size, GACC_FETCH, mop->key);
2764 			if (r == 0) {
2765 				if (copy_to_user(uaddr, tmpbuf, mop->size))
2766 					r = -EFAULT;
2767 			}
2768 		}
2769 		break;
2770 	}
2771 	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2772 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2773 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2774 		} else {
2775 			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2776 				r = -EFAULT;
2777 				break;
2778 			}
2779 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2780 						      mop->size, GACC_STORE, mop->key);
2781 		}
2782 		break;
2783 	}
2784 	default:
2785 		r = -EINVAL;
2786 	}
2787 
2788 out_unlock:
2789 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2790 
2791 	vfree(tmpbuf);
2792 	return r;
2793 }
2794 
2795 long kvm_arch_vm_ioctl(struct file *filp,
2796 		       unsigned int ioctl, unsigned long arg)
2797 {
2798 	struct kvm *kvm = filp->private_data;
2799 	void __user *argp = (void __user *)arg;
2800 	struct kvm_device_attr attr;
2801 	int r;
2802 
2803 	switch (ioctl) {
2804 	case KVM_S390_INTERRUPT: {
2805 		struct kvm_s390_interrupt s390int;
2806 
2807 		r = -EFAULT;
2808 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2809 			break;
2810 		r = kvm_s390_inject_vm(kvm, &s390int);
2811 		break;
2812 	}
2813 	case KVM_CREATE_IRQCHIP: {
2814 		struct kvm_irq_routing_entry routing;
2815 
2816 		r = -EINVAL;
2817 		if (kvm->arch.use_irqchip) {
2818 			/* Set up dummy routing. */
2819 			memset(&routing, 0, sizeof(routing));
2820 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2821 		}
2822 		break;
2823 	}
2824 	case KVM_SET_DEVICE_ATTR: {
2825 		r = -EFAULT;
2826 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2827 			break;
2828 		r = kvm_s390_vm_set_attr(kvm, &attr);
2829 		break;
2830 	}
2831 	case KVM_GET_DEVICE_ATTR: {
2832 		r = -EFAULT;
2833 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2834 			break;
2835 		r = kvm_s390_vm_get_attr(kvm, &attr);
2836 		break;
2837 	}
2838 	case KVM_HAS_DEVICE_ATTR: {
2839 		r = -EFAULT;
2840 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2841 			break;
2842 		r = kvm_s390_vm_has_attr(kvm, &attr);
2843 		break;
2844 	}
2845 	case KVM_S390_GET_SKEYS: {
2846 		struct kvm_s390_skeys args;
2847 
2848 		r = -EFAULT;
2849 		if (copy_from_user(&args, argp,
2850 				   sizeof(struct kvm_s390_skeys)))
2851 			break;
2852 		r = kvm_s390_get_skeys(kvm, &args);
2853 		break;
2854 	}
2855 	case KVM_S390_SET_SKEYS: {
2856 		struct kvm_s390_skeys args;
2857 
2858 		r = -EFAULT;
2859 		if (copy_from_user(&args, argp,
2860 				   sizeof(struct kvm_s390_skeys)))
2861 			break;
2862 		r = kvm_s390_set_skeys(kvm, &args);
2863 		break;
2864 	}
2865 	case KVM_S390_GET_CMMA_BITS: {
2866 		struct kvm_s390_cmma_log args;
2867 
2868 		r = -EFAULT;
2869 		if (copy_from_user(&args, argp, sizeof(args)))
2870 			break;
2871 		mutex_lock(&kvm->slots_lock);
2872 		r = kvm_s390_get_cmma_bits(kvm, &args);
2873 		mutex_unlock(&kvm->slots_lock);
2874 		if (!r) {
2875 			r = copy_to_user(argp, &args, sizeof(args));
2876 			if (r)
2877 				r = -EFAULT;
2878 		}
2879 		break;
2880 	}
2881 	case KVM_S390_SET_CMMA_BITS: {
2882 		struct kvm_s390_cmma_log args;
2883 
2884 		r = -EFAULT;
2885 		if (copy_from_user(&args, argp, sizeof(args)))
2886 			break;
2887 		mutex_lock(&kvm->slots_lock);
2888 		r = kvm_s390_set_cmma_bits(kvm, &args);
2889 		mutex_unlock(&kvm->slots_lock);
2890 		break;
2891 	}
2892 	case KVM_S390_PV_COMMAND: {
2893 		struct kvm_pv_cmd args;
2894 
2895 		/* protvirt means user cpu state */
2896 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2897 		r = 0;
2898 		if (!is_prot_virt_host()) {
2899 			r = -EINVAL;
2900 			break;
2901 		}
2902 		if (copy_from_user(&args, argp, sizeof(args))) {
2903 			r = -EFAULT;
2904 			break;
2905 		}
2906 		if (args.flags) {
2907 			r = -EINVAL;
2908 			break;
2909 		}
2910 		mutex_lock(&kvm->lock);
2911 		r = kvm_s390_handle_pv(kvm, &args);
2912 		mutex_unlock(&kvm->lock);
2913 		if (copy_to_user(argp, &args, sizeof(args))) {
2914 			r = -EFAULT;
2915 			break;
2916 		}
2917 		break;
2918 	}
2919 	case KVM_S390_MEM_OP: {
2920 		struct kvm_s390_mem_op mem_op;
2921 
2922 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2923 			r = kvm_s390_vm_mem_op(kvm, &mem_op);
2924 		else
2925 			r = -EFAULT;
2926 		break;
2927 	}
2928 	case KVM_S390_ZPCI_OP: {
2929 		struct kvm_s390_zpci_op args;
2930 
2931 		r = -EINVAL;
2932 		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2933 			break;
2934 		if (copy_from_user(&args, argp, sizeof(args))) {
2935 			r = -EFAULT;
2936 			break;
2937 		}
2938 		r = kvm_s390_pci_zpci_op(kvm, &args);
2939 		break;
2940 	}
2941 	default:
2942 		r = -ENOTTY;
2943 	}
2944 
2945 	return r;
2946 }
2947 
2948 static int kvm_s390_apxa_installed(void)
2949 {
2950 	struct ap_config_info info;
2951 
2952 	if (ap_instructions_available()) {
2953 		if (ap_qci(&info) == 0)
2954 			return info.apxa;
2955 	}
2956 
2957 	return 0;
2958 }
2959 
2960 /*
2961  * The format of the crypto control block (CRYCB) is specified in the 3 low
2962  * order bits of the CRYCB designation (CRYCBD) field as follows:
2963  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2964  *	     AP extended addressing (APXA) facility are installed.
2965  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2966  * Format 2: Both the APXA and MSAX3 facilities are installed
2967  */
2968 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2969 {
2970 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2971 
2972 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2973 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2974 
2975 	/* Check whether MSAX3 is installed */
2976 	if (!test_kvm_facility(kvm, 76))
2977 		return;
2978 
2979 	if (kvm_s390_apxa_installed())
2980 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2981 	else
2982 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2983 }
2984 
2985 /*
2986  * kvm_arch_crypto_set_masks
2987  *
2988  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2989  *	 to be set.
2990  * @apm: the mask identifying the accessible AP adapters
2991  * @aqm: the mask identifying the accessible AP domains
2992  * @adm: the mask identifying the accessible AP control domains
2993  *
2994  * Set the masks that identify the adapters, domains and control domains to
2995  * which the KVM guest is granted access.
2996  *
2997  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2998  *	 function.
2999  */
3000 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3001 			       unsigned long *aqm, unsigned long *adm)
3002 {
3003 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3004 
3005 	kvm_s390_vcpu_block_all(kvm);
3006 
3007 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3008 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3009 		memcpy(crycb->apcb1.apm, apm, 32);
3010 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3011 			 apm[0], apm[1], apm[2], apm[3]);
3012 		memcpy(crycb->apcb1.aqm, aqm, 32);
3013 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3014 			 aqm[0], aqm[1], aqm[2], aqm[3]);
3015 		memcpy(crycb->apcb1.adm, adm, 32);
3016 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3017 			 adm[0], adm[1], adm[2], adm[3]);
3018 		break;
3019 	case CRYCB_FORMAT1:
3020 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3021 		memcpy(crycb->apcb0.apm, apm, 8);
3022 		memcpy(crycb->apcb0.aqm, aqm, 2);
3023 		memcpy(crycb->apcb0.adm, adm, 2);
3024 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3025 			 apm[0], *((unsigned short *)aqm),
3026 			 *((unsigned short *)adm));
3027 		break;
3028 	default:	/* Can not happen */
3029 		break;
3030 	}
3031 
3032 	/* recreate the shadow crycb for each vcpu */
3033 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3034 	kvm_s390_vcpu_unblock_all(kvm);
3035 }
3036 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3037 
3038 /*
3039  * kvm_arch_crypto_clear_masks
3040  *
3041  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3042  *	 to be cleared.
3043  *
3044  * Clear the masks that identify the adapters, domains and control domains to
3045  * which the KVM guest is granted access.
3046  *
3047  * Note: The kvm->lock mutex must be locked by the caller before invoking this
3048  *	 function.
3049  */
3050 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3051 {
3052 	kvm_s390_vcpu_block_all(kvm);
3053 
3054 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
3055 	       sizeof(kvm->arch.crypto.crycb->apcb0));
3056 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
3057 	       sizeof(kvm->arch.crypto.crycb->apcb1));
3058 
3059 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3060 	/* recreate the shadow crycb for each vcpu */
3061 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3062 	kvm_s390_vcpu_unblock_all(kvm);
3063 }
3064 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3065 
3066 static u64 kvm_s390_get_initial_cpuid(void)
3067 {
3068 	struct cpuid cpuid;
3069 
3070 	get_cpu_id(&cpuid);
3071 	cpuid.version = 0xff;
3072 	return *((u64 *) &cpuid);
3073 }
3074 
3075 static void kvm_s390_crypto_init(struct kvm *kvm)
3076 {
3077 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3078 	kvm_s390_set_crycb_format(kvm);
3079 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3080 
3081 	if (!test_kvm_facility(kvm, 76))
3082 		return;
3083 
3084 	/* Enable AES/DEA protected key functions by default */
3085 	kvm->arch.crypto.aes_kw = 1;
3086 	kvm->arch.crypto.dea_kw = 1;
3087 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3088 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3089 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3090 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3091 }
3092 
3093 static void sca_dispose(struct kvm *kvm)
3094 {
3095 	if (kvm->arch.use_esca)
3096 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3097 	else
3098 		free_page((unsigned long)(kvm->arch.sca));
3099 	kvm->arch.sca = NULL;
3100 }
3101 
3102 void kvm_arch_free_vm(struct kvm *kvm)
3103 {
3104 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3105 		kvm_s390_pci_clear_list(kvm);
3106 
3107 	__kvm_arch_free_vm(kvm);
3108 }
3109 
3110 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3111 {
3112 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3113 	int i, rc;
3114 	char debug_name[16];
3115 	static unsigned long sca_offset;
3116 
3117 	rc = -EINVAL;
3118 #ifdef CONFIG_KVM_S390_UCONTROL
3119 	if (type & ~KVM_VM_S390_UCONTROL)
3120 		goto out_err;
3121 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3122 		goto out_err;
3123 #else
3124 	if (type)
3125 		goto out_err;
3126 #endif
3127 
3128 	rc = s390_enable_sie();
3129 	if (rc)
3130 		goto out_err;
3131 
3132 	rc = -ENOMEM;
3133 
3134 	if (!sclp.has_64bscao)
3135 		alloc_flags |= GFP_DMA;
3136 	rwlock_init(&kvm->arch.sca_lock);
3137 	/* start with basic SCA */
3138 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3139 	if (!kvm->arch.sca)
3140 		goto out_err;
3141 	mutex_lock(&kvm_lock);
3142 	sca_offset += 16;
3143 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3144 		sca_offset = 0;
3145 	kvm->arch.sca = (struct bsca_block *)
3146 			((char *) kvm->arch.sca + sca_offset);
3147 	mutex_unlock(&kvm_lock);
3148 
3149 	sprintf(debug_name, "kvm-%u", current->pid);
3150 
3151 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3152 	if (!kvm->arch.dbf)
3153 		goto out_err;
3154 
3155 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3156 	kvm->arch.sie_page2 =
3157 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3158 	if (!kvm->arch.sie_page2)
3159 		goto out_err;
3160 
3161 	kvm->arch.sie_page2->kvm = kvm;
3162 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3163 
3164 	for (i = 0; i < kvm_s390_fac_size(); i++) {
3165 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3166 					      (kvm_s390_fac_base[i] |
3167 					       kvm_s390_fac_ext[i]);
3168 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3169 					      kvm_s390_fac_base[i];
3170 	}
3171 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3172 
3173 	/* we are always in czam mode - even on pre z14 machines */
3174 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
3175 	set_kvm_facility(kvm->arch.model.fac_list, 138);
3176 	/* we emulate STHYI in kvm */
3177 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
3178 	set_kvm_facility(kvm->arch.model.fac_list, 74);
3179 	if (MACHINE_HAS_TLB_GUEST) {
3180 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
3181 		set_kvm_facility(kvm->arch.model.fac_list, 147);
3182 	}
3183 
3184 	if (css_general_characteristics.aiv && test_facility(65))
3185 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
3186 
3187 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3188 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3189 
3190 	kvm_s390_crypto_init(kvm);
3191 
3192 	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3193 		mutex_lock(&kvm->lock);
3194 		kvm_s390_pci_init_list(kvm);
3195 		kvm_s390_vcpu_pci_enable_interp(kvm);
3196 		mutex_unlock(&kvm->lock);
3197 	}
3198 
3199 	mutex_init(&kvm->arch.float_int.ais_lock);
3200 	spin_lock_init(&kvm->arch.float_int.lock);
3201 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
3202 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3203 	init_waitqueue_head(&kvm->arch.ipte_wq);
3204 	mutex_init(&kvm->arch.ipte_mutex);
3205 
3206 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3207 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
3208 
3209 	if (type & KVM_VM_S390_UCONTROL) {
3210 		kvm->arch.gmap = NULL;
3211 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3212 	} else {
3213 		if (sclp.hamax == U64_MAX)
3214 			kvm->arch.mem_limit = TASK_SIZE_MAX;
3215 		else
3216 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3217 						    sclp.hamax + 1);
3218 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3219 		if (!kvm->arch.gmap)
3220 			goto out_err;
3221 		kvm->arch.gmap->private = kvm;
3222 		kvm->arch.gmap->pfault_enabled = 0;
3223 	}
3224 
3225 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
3226 	kvm->arch.use_skf = sclp.has_skey;
3227 	spin_lock_init(&kvm->arch.start_stop_lock);
3228 	kvm_s390_vsie_init(kvm);
3229 	if (use_gisa)
3230 		kvm_s390_gisa_init(kvm);
3231 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3232 
3233 	return 0;
3234 out_err:
3235 	free_page((unsigned long)kvm->arch.sie_page2);
3236 	debug_unregister(kvm->arch.dbf);
3237 	sca_dispose(kvm);
3238 	KVM_EVENT(3, "creation of vm failed: %d", rc);
3239 	return rc;
3240 }
3241 
3242 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3243 {
3244 	u16 rc, rrc;
3245 
3246 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3247 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3248 	kvm_s390_clear_local_irqs(vcpu);
3249 	kvm_clear_async_pf_completion_queue(vcpu);
3250 	if (!kvm_is_ucontrol(vcpu->kvm))
3251 		sca_del_vcpu(vcpu);
3252 	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3253 
3254 	if (kvm_is_ucontrol(vcpu->kvm))
3255 		gmap_remove(vcpu->arch.gmap);
3256 
3257 	if (vcpu->kvm->arch.use_cmma)
3258 		kvm_s390_vcpu_unsetup_cmma(vcpu);
3259 	/* We can not hold the vcpu mutex here, we are already dying */
3260 	if (kvm_s390_pv_cpu_get_handle(vcpu))
3261 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3262 	free_page((unsigned long)(vcpu->arch.sie_block));
3263 }
3264 
3265 void kvm_arch_destroy_vm(struct kvm *kvm)
3266 {
3267 	u16 rc, rrc;
3268 
3269 	kvm_destroy_vcpus(kvm);
3270 	sca_dispose(kvm);
3271 	kvm_s390_gisa_destroy(kvm);
3272 	/*
3273 	 * We are already at the end of life and kvm->lock is not taken.
3274 	 * This is ok as the file descriptor is closed by now and nobody
3275 	 * can mess with the pv state. To avoid lockdep_assert_held from
3276 	 * complaining we do not use kvm_s390_pv_is_protected.
3277 	 */
3278 	if (kvm_s390_pv_get_handle(kvm))
3279 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
3280 	/*
3281 	 * Remove the mmu notifier only when the whole KVM VM is torn down,
3282 	 * and only if one was registered to begin with. If the VM is
3283 	 * currently not protected, but has been previously been protected,
3284 	 * then it's possible that the notifier is still registered.
3285 	 */
3286 	if (kvm->arch.pv.mmu_notifier.ops)
3287 		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3288 
3289 	debug_unregister(kvm->arch.dbf);
3290 	free_page((unsigned long)kvm->arch.sie_page2);
3291 	if (!kvm_is_ucontrol(kvm))
3292 		gmap_remove(kvm->arch.gmap);
3293 	kvm_s390_destroy_adapters(kvm);
3294 	kvm_s390_clear_float_irqs(kvm);
3295 	kvm_s390_vsie_destroy(kvm);
3296 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3297 }
3298 
3299 /* Section: vcpu related */
3300 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3301 {
3302 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3303 	if (!vcpu->arch.gmap)
3304 		return -ENOMEM;
3305 	vcpu->arch.gmap->private = vcpu->kvm;
3306 
3307 	return 0;
3308 }
3309 
3310 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3311 {
3312 	if (!kvm_s390_use_sca_entries())
3313 		return;
3314 	read_lock(&vcpu->kvm->arch.sca_lock);
3315 	if (vcpu->kvm->arch.use_esca) {
3316 		struct esca_block *sca = vcpu->kvm->arch.sca;
3317 
3318 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3319 		sca->cpu[vcpu->vcpu_id].sda = 0;
3320 	} else {
3321 		struct bsca_block *sca = vcpu->kvm->arch.sca;
3322 
3323 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3324 		sca->cpu[vcpu->vcpu_id].sda = 0;
3325 	}
3326 	read_unlock(&vcpu->kvm->arch.sca_lock);
3327 }
3328 
3329 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3330 {
3331 	if (!kvm_s390_use_sca_entries()) {
3332 		struct bsca_block *sca = vcpu->kvm->arch.sca;
3333 
3334 		/* we still need the basic sca for the ipte control */
3335 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3336 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3337 		return;
3338 	}
3339 	read_lock(&vcpu->kvm->arch.sca_lock);
3340 	if (vcpu->kvm->arch.use_esca) {
3341 		struct esca_block *sca = vcpu->kvm->arch.sca;
3342 
3343 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3344 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3345 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
3346 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3347 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3348 	} else {
3349 		struct bsca_block *sca = vcpu->kvm->arch.sca;
3350 
3351 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3352 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3353 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3354 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3355 	}
3356 	read_unlock(&vcpu->kvm->arch.sca_lock);
3357 }
3358 
3359 /* Basic SCA to Extended SCA data copy routines */
3360 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3361 {
3362 	d->sda = s->sda;
3363 	d->sigp_ctrl.c = s->sigp_ctrl.c;
3364 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3365 }
3366 
3367 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3368 {
3369 	int i;
3370 
3371 	d->ipte_control = s->ipte_control;
3372 	d->mcn[0] = s->mcn;
3373 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3374 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3375 }
3376 
3377 static int sca_switch_to_extended(struct kvm *kvm)
3378 {
3379 	struct bsca_block *old_sca = kvm->arch.sca;
3380 	struct esca_block *new_sca;
3381 	struct kvm_vcpu *vcpu;
3382 	unsigned long vcpu_idx;
3383 	u32 scaol, scaoh;
3384 
3385 	if (kvm->arch.use_esca)
3386 		return 0;
3387 
3388 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3389 	if (!new_sca)
3390 		return -ENOMEM;
3391 
3392 	scaoh = (u32)((u64)(new_sca) >> 32);
3393 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
3394 
3395 	kvm_s390_vcpu_block_all(kvm);
3396 	write_lock(&kvm->arch.sca_lock);
3397 
3398 	sca_copy_b_to_e(new_sca, old_sca);
3399 
3400 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3401 		vcpu->arch.sie_block->scaoh = scaoh;
3402 		vcpu->arch.sie_block->scaol = scaol;
3403 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3404 	}
3405 	kvm->arch.sca = new_sca;
3406 	kvm->arch.use_esca = 1;
3407 
3408 	write_unlock(&kvm->arch.sca_lock);
3409 	kvm_s390_vcpu_unblock_all(kvm);
3410 
3411 	free_page((unsigned long)old_sca);
3412 
3413 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3414 		 old_sca, kvm->arch.sca);
3415 	return 0;
3416 }
3417 
3418 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3419 {
3420 	int rc;
3421 
3422 	if (!kvm_s390_use_sca_entries()) {
3423 		if (id < KVM_MAX_VCPUS)
3424 			return true;
3425 		return false;
3426 	}
3427 	if (id < KVM_S390_BSCA_CPU_SLOTS)
3428 		return true;
3429 	if (!sclp.has_esca || !sclp.has_64bscao)
3430 		return false;
3431 
3432 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3433 
3434 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3435 }
3436 
3437 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3438 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3439 {
3440 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3441 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3442 	vcpu->arch.cputm_start = get_tod_clock_fast();
3443 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3444 }
3445 
3446 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3447 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3448 {
3449 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3450 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3451 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3452 	vcpu->arch.cputm_start = 0;
3453 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3454 }
3455 
3456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3457 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3458 {
3459 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3460 	vcpu->arch.cputm_enabled = true;
3461 	__start_cpu_timer_accounting(vcpu);
3462 }
3463 
3464 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3465 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3466 {
3467 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3468 	__stop_cpu_timer_accounting(vcpu);
3469 	vcpu->arch.cputm_enabled = false;
3470 }
3471 
3472 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3473 {
3474 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3475 	__enable_cpu_timer_accounting(vcpu);
3476 	preempt_enable();
3477 }
3478 
3479 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3480 {
3481 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3482 	__disable_cpu_timer_accounting(vcpu);
3483 	preempt_enable();
3484 }
3485 
3486 /* set the cpu timer - may only be called from the VCPU thread itself */
3487 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3488 {
3489 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3490 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3491 	if (vcpu->arch.cputm_enabled)
3492 		vcpu->arch.cputm_start = get_tod_clock_fast();
3493 	vcpu->arch.sie_block->cputm = cputm;
3494 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3495 	preempt_enable();
3496 }
3497 
3498 /* update and get the cpu timer - can also be called from other VCPU threads */
3499 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3500 {
3501 	unsigned int seq;
3502 	__u64 value;
3503 
3504 	if (unlikely(!vcpu->arch.cputm_enabled))
3505 		return vcpu->arch.sie_block->cputm;
3506 
3507 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3508 	do {
3509 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3510 		/*
3511 		 * If the writer would ever execute a read in the critical
3512 		 * section, e.g. in irq context, we have a deadlock.
3513 		 */
3514 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3515 		value = vcpu->arch.sie_block->cputm;
3516 		/* if cputm_start is 0, accounting is being started/stopped */
3517 		if (likely(vcpu->arch.cputm_start))
3518 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3519 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3520 	preempt_enable();
3521 	return value;
3522 }
3523 
3524 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3525 {
3526 
3527 	gmap_enable(vcpu->arch.enabled_gmap);
3528 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3529 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3530 		__start_cpu_timer_accounting(vcpu);
3531 	vcpu->cpu = cpu;
3532 }
3533 
3534 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3535 {
3536 	vcpu->cpu = -1;
3537 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3538 		__stop_cpu_timer_accounting(vcpu);
3539 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3540 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3541 	gmap_disable(vcpu->arch.enabled_gmap);
3542 
3543 }
3544 
3545 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3546 {
3547 	mutex_lock(&vcpu->kvm->lock);
3548 	preempt_disable();
3549 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3550 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3551 	preempt_enable();
3552 	mutex_unlock(&vcpu->kvm->lock);
3553 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3554 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3555 		sca_add_vcpu(vcpu);
3556 	}
3557 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3558 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3559 	/* make vcpu_load load the right gmap on the first trigger */
3560 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3561 }
3562 
3563 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3564 {
3565 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3566 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3567 		return true;
3568 	return false;
3569 }
3570 
3571 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3572 {
3573 	/* At least one ECC subfunction must be present */
3574 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3575 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3576 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3577 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3578 	       kvm_has_pckmo_subfunc(kvm, 41);
3579 
3580 }
3581 
3582 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3583 {
3584 	/*
3585 	 * If the AP instructions are not being interpreted and the MSAX3
3586 	 * facility is not configured for the guest, there is nothing to set up.
3587 	 */
3588 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3589 		return;
3590 
3591 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3592 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3593 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3594 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3595 
3596 	if (vcpu->kvm->arch.crypto.apie)
3597 		vcpu->arch.sie_block->eca |= ECA_APIE;
3598 
3599 	/* Set up protected key support */
3600 	if (vcpu->kvm->arch.crypto.aes_kw) {
3601 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3602 		/* ecc is also wrapped with AES key */
3603 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3604 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3605 	}
3606 
3607 	if (vcpu->kvm->arch.crypto.dea_kw)
3608 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3609 }
3610 
3611 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3612 {
3613 	free_page(vcpu->arch.sie_block->cbrlo);
3614 	vcpu->arch.sie_block->cbrlo = 0;
3615 }
3616 
3617 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3618 {
3619 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3620 	if (!vcpu->arch.sie_block->cbrlo)
3621 		return -ENOMEM;
3622 	return 0;
3623 }
3624 
3625 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3626 {
3627 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3628 
3629 	vcpu->arch.sie_block->ibc = model->ibc;
3630 	if (test_kvm_facility(vcpu->kvm, 7))
3631 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3632 }
3633 
3634 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3635 {
3636 	int rc = 0;
3637 	u16 uvrc, uvrrc;
3638 
3639 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3640 						    CPUSTAT_SM |
3641 						    CPUSTAT_STOPPED);
3642 
3643 	if (test_kvm_facility(vcpu->kvm, 78))
3644 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3645 	else if (test_kvm_facility(vcpu->kvm, 8))
3646 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3647 
3648 	kvm_s390_vcpu_setup_model(vcpu);
3649 
3650 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3651 	if (MACHINE_HAS_ESOP)
3652 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3653 	if (test_kvm_facility(vcpu->kvm, 9))
3654 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3655 	if (test_kvm_facility(vcpu->kvm, 11))
3656 		vcpu->arch.sie_block->ecb |= ECB_PTF;
3657 	if (test_kvm_facility(vcpu->kvm, 73))
3658 		vcpu->arch.sie_block->ecb |= ECB_TE;
3659 	if (!kvm_is_ucontrol(vcpu->kvm))
3660 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3661 
3662 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3663 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3664 	if (test_kvm_facility(vcpu->kvm, 130))
3665 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3666 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3667 	if (sclp.has_cei)
3668 		vcpu->arch.sie_block->eca |= ECA_CEI;
3669 	if (sclp.has_ib)
3670 		vcpu->arch.sie_block->eca |= ECA_IB;
3671 	if (sclp.has_siif)
3672 		vcpu->arch.sie_block->eca |= ECA_SII;
3673 	if (sclp.has_sigpif)
3674 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3675 	if (test_kvm_facility(vcpu->kvm, 129)) {
3676 		vcpu->arch.sie_block->eca |= ECA_VX;
3677 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3678 	}
3679 	if (test_kvm_facility(vcpu->kvm, 139))
3680 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3681 	if (test_kvm_facility(vcpu->kvm, 156))
3682 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3683 	if (vcpu->arch.sie_block->gd) {
3684 		vcpu->arch.sie_block->eca |= ECA_AIV;
3685 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3686 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3687 	}
3688 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3689 					| SDNXC;
3690 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3691 
3692 	if (sclp.has_kss)
3693 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3694 	else
3695 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3696 
3697 	if (vcpu->kvm->arch.use_cmma) {
3698 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3699 		if (rc)
3700 			return rc;
3701 	}
3702 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3703 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3704 
3705 	vcpu->arch.sie_block->hpid = HPID_KVM;
3706 
3707 	kvm_s390_vcpu_crypto_setup(vcpu);
3708 
3709 	kvm_s390_vcpu_pci_setup(vcpu);
3710 
3711 	mutex_lock(&vcpu->kvm->lock);
3712 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3713 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3714 		if (rc)
3715 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3716 	}
3717 	mutex_unlock(&vcpu->kvm->lock);
3718 
3719 	return rc;
3720 }
3721 
3722 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3723 {
3724 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3725 		return -EINVAL;
3726 	return 0;
3727 }
3728 
3729 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3730 {
3731 	struct sie_page *sie_page;
3732 	int rc;
3733 
3734 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3735 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3736 	if (!sie_page)
3737 		return -ENOMEM;
3738 
3739 	vcpu->arch.sie_block = &sie_page->sie_block;
3740 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3741 
3742 	/* the real guest size will always be smaller than msl */
3743 	vcpu->arch.sie_block->mso = 0;
3744 	vcpu->arch.sie_block->msl = sclp.hamax;
3745 
3746 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3747 	spin_lock_init(&vcpu->arch.local_int.lock);
3748 	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3749 	seqcount_init(&vcpu->arch.cputm_seqcount);
3750 
3751 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3752 	kvm_clear_async_pf_completion_queue(vcpu);
3753 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3754 				    KVM_SYNC_GPRS |
3755 				    KVM_SYNC_ACRS |
3756 				    KVM_SYNC_CRS |
3757 				    KVM_SYNC_ARCH0 |
3758 				    KVM_SYNC_PFAULT |
3759 				    KVM_SYNC_DIAG318;
3760 	kvm_s390_set_prefix(vcpu, 0);
3761 	if (test_kvm_facility(vcpu->kvm, 64))
3762 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3763 	if (test_kvm_facility(vcpu->kvm, 82))
3764 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3765 	if (test_kvm_facility(vcpu->kvm, 133))
3766 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3767 	if (test_kvm_facility(vcpu->kvm, 156))
3768 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3769 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3770 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3771 	 */
3772 	if (MACHINE_HAS_VX)
3773 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3774 	else
3775 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3776 
3777 	if (kvm_is_ucontrol(vcpu->kvm)) {
3778 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3779 		if (rc)
3780 			goto out_free_sie_block;
3781 	}
3782 
3783 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3784 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3785 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3786 
3787 	rc = kvm_s390_vcpu_setup(vcpu);
3788 	if (rc)
3789 		goto out_ucontrol_uninit;
3790 
3791 	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3792 	return 0;
3793 
3794 out_ucontrol_uninit:
3795 	if (kvm_is_ucontrol(vcpu->kvm))
3796 		gmap_remove(vcpu->arch.gmap);
3797 out_free_sie_block:
3798 	free_page((unsigned long)(vcpu->arch.sie_block));
3799 	return rc;
3800 }
3801 
3802 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3803 {
3804 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3805 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3806 }
3807 
3808 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3809 {
3810 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3811 }
3812 
3813 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3814 {
3815 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3816 	exit_sie(vcpu);
3817 }
3818 
3819 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3820 {
3821 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3822 }
3823 
3824 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3825 {
3826 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3827 	exit_sie(vcpu);
3828 }
3829 
3830 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3831 {
3832 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3833 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3834 }
3835 
3836 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3837 {
3838 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3839 }
3840 
3841 /*
3842  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3843  * If the CPU is not running (e.g. waiting as idle) the function will
3844  * return immediately. */
3845 void exit_sie(struct kvm_vcpu *vcpu)
3846 {
3847 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3848 	kvm_s390_vsie_kick(vcpu);
3849 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3850 		cpu_relax();
3851 }
3852 
3853 /* Kick a guest cpu out of SIE to process a request synchronously */
3854 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3855 {
3856 	__kvm_make_request(req, vcpu);
3857 	kvm_s390_vcpu_request(vcpu);
3858 }
3859 
3860 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3861 			      unsigned long end)
3862 {
3863 	struct kvm *kvm = gmap->private;
3864 	struct kvm_vcpu *vcpu;
3865 	unsigned long prefix;
3866 	unsigned long i;
3867 
3868 	if (gmap_is_shadow(gmap))
3869 		return;
3870 	if (start >= 1UL << 31)
3871 		/* We are only interested in prefix pages */
3872 		return;
3873 	kvm_for_each_vcpu(i, vcpu, kvm) {
3874 		/* match against both prefix pages */
3875 		prefix = kvm_s390_get_prefix(vcpu);
3876 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3877 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3878 				   start, end);
3879 			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3880 		}
3881 	}
3882 }
3883 
3884 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3885 {
3886 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3887 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3888 	    READ_ONCE(halt_poll_max_steal)) {
3889 		vcpu->stat.halt_no_poll_steal++;
3890 		return true;
3891 	}
3892 	return false;
3893 }
3894 
3895 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3896 {
3897 	/* kvm common code refers to this, but never calls it */
3898 	BUG();
3899 	return 0;
3900 }
3901 
3902 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3903 					   struct kvm_one_reg *reg)
3904 {
3905 	int r = -EINVAL;
3906 
3907 	switch (reg->id) {
3908 	case KVM_REG_S390_TODPR:
3909 		r = put_user(vcpu->arch.sie_block->todpr,
3910 			     (u32 __user *)reg->addr);
3911 		break;
3912 	case KVM_REG_S390_EPOCHDIFF:
3913 		r = put_user(vcpu->arch.sie_block->epoch,
3914 			     (u64 __user *)reg->addr);
3915 		break;
3916 	case KVM_REG_S390_CPU_TIMER:
3917 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3918 			     (u64 __user *)reg->addr);
3919 		break;
3920 	case KVM_REG_S390_CLOCK_COMP:
3921 		r = put_user(vcpu->arch.sie_block->ckc,
3922 			     (u64 __user *)reg->addr);
3923 		break;
3924 	case KVM_REG_S390_PFTOKEN:
3925 		r = put_user(vcpu->arch.pfault_token,
3926 			     (u64 __user *)reg->addr);
3927 		break;
3928 	case KVM_REG_S390_PFCOMPARE:
3929 		r = put_user(vcpu->arch.pfault_compare,
3930 			     (u64 __user *)reg->addr);
3931 		break;
3932 	case KVM_REG_S390_PFSELECT:
3933 		r = put_user(vcpu->arch.pfault_select,
3934 			     (u64 __user *)reg->addr);
3935 		break;
3936 	case KVM_REG_S390_PP:
3937 		r = put_user(vcpu->arch.sie_block->pp,
3938 			     (u64 __user *)reg->addr);
3939 		break;
3940 	case KVM_REG_S390_GBEA:
3941 		r = put_user(vcpu->arch.sie_block->gbea,
3942 			     (u64 __user *)reg->addr);
3943 		break;
3944 	default:
3945 		break;
3946 	}
3947 
3948 	return r;
3949 }
3950 
3951 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3952 					   struct kvm_one_reg *reg)
3953 {
3954 	int r = -EINVAL;
3955 	__u64 val;
3956 
3957 	switch (reg->id) {
3958 	case KVM_REG_S390_TODPR:
3959 		r = get_user(vcpu->arch.sie_block->todpr,
3960 			     (u32 __user *)reg->addr);
3961 		break;
3962 	case KVM_REG_S390_EPOCHDIFF:
3963 		r = get_user(vcpu->arch.sie_block->epoch,
3964 			     (u64 __user *)reg->addr);
3965 		break;
3966 	case KVM_REG_S390_CPU_TIMER:
3967 		r = get_user(val, (u64 __user *)reg->addr);
3968 		if (!r)
3969 			kvm_s390_set_cpu_timer(vcpu, val);
3970 		break;
3971 	case KVM_REG_S390_CLOCK_COMP:
3972 		r = get_user(vcpu->arch.sie_block->ckc,
3973 			     (u64 __user *)reg->addr);
3974 		break;
3975 	case KVM_REG_S390_PFTOKEN:
3976 		r = get_user(vcpu->arch.pfault_token,
3977 			     (u64 __user *)reg->addr);
3978 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3979 			kvm_clear_async_pf_completion_queue(vcpu);
3980 		break;
3981 	case KVM_REG_S390_PFCOMPARE:
3982 		r = get_user(vcpu->arch.pfault_compare,
3983 			     (u64 __user *)reg->addr);
3984 		break;
3985 	case KVM_REG_S390_PFSELECT:
3986 		r = get_user(vcpu->arch.pfault_select,
3987 			     (u64 __user *)reg->addr);
3988 		break;
3989 	case KVM_REG_S390_PP:
3990 		r = get_user(vcpu->arch.sie_block->pp,
3991 			     (u64 __user *)reg->addr);
3992 		break;
3993 	case KVM_REG_S390_GBEA:
3994 		r = get_user(vcpu->arch.sie_block->gbea,
3995 			     (u64 __user *)reg->addr);
3996 		break;
3997 	default:
3998 		break;
3999 	}
4000 
4001 	return r;
4002 }
4003 
4004 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4005 {
4006 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4007 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4008 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4009 
4010 	kvm_clear_async_pf_completion_queue(vcpu);
4011 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4012 		kvm_s390_vcpu_stop(vcpu);
4013 	kvm_s390_clear_local_irqs(vcpu);
4014 }
4015 
4016 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4017 {
4018 	/* Initial reset is a superset of the normal reset */
4019 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4020 
4021 	/*
4022 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
4023 	 * We do not only reset the internal data, but also ...
4024 	 */
4025 	vcpu->arch.sie_block->gpsw.mask = 0;
4026 	vcpu->arch.sie_block->gpsw.addr = 0;
4027 	kvm_s390_set_prefix(vcpu, 0);
4028 	kvm_s390_set_cpu_timer(vcpu, 0);
4029 	vcpu->arch.sie_block->ckc = 0;
4030 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4031 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4032 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4033 
4034 	/* ... the data in sync regs */
4035 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4036 	vcpu->run->s.regs.ckc = 0;
4037 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4038 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4039 	vcpu->run->psw_addr = 0;
4040 	vcpu->run->psw_mask = 0;
4041 	vcpu->run->s.regs.todpr = 0;
4042 	vcpu->run->s.regs.cputm = 0;
4043 	vcpu->run->s.regs.ckc = 0;
4044 	vcpu->run->s.regs.pp = 0;
4045 	vcpu->run->s.regs.gbea = 1;
4046 	vcpu->run->s.regs.fpc = 0;
4047 	/*
4048 	 * Do not reset these registers in the protected case, as some of
4049 	 * them are overlayed and they are not accessible in this case
4050 	 * anyway.
4051 	 */
4052 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4053 		vcpu->arch.sie_block->gbea = 1;
4054 		vcpu->arch.sie_block->pp = 0;
4055 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4056 		vcpu->arch.sie_block->todpr = 0;
4057 	}
4058 }
4059 
4060 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4061 {
4062 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4063 
4064 	/* Clear reset is a superset of the initial reset */
4065 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4066 
4067 	memset(&regs->gprs, 0, sizeof(regs->gprs));
4068 	memset(&regs->vrs, 0, sizeof(regs->vrs));
4069 	memset(&regs->acrs, 0, sizeof(regs->acrs));
4070 	memset(&regs->gscb, 0, sizeof(regs->gscb));
4071 
4072 	regs->etoken = 0;
4073 	regs->etoken_extension = 0;
4074 }
4075 
4076 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4077 {
4078 	vcpu_load(vcpu);
4079 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4080 	vcpu_put(vcpu);
4081 	return 0;
4082 }
4083 
4084 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4085 {
4086 	vcpu_load(vcpu);
4087 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4088 	vcpu_put(vcpu);
4089 	return 0;
4090 }
4091 
4092 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4093 				  struct kvm_sregs *sregs)
4094 {
4095 	vcpu_load(vcpu);
4096 
4097 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4098 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4099 
4100 	vcpu_put(vcpu);
4101 	return 0;
4102 }
4103 
4104 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4105 				  struct kvm_sregs *sregs)
4106 {
4107 	vcpu_load(vcpu);
4108 
4109 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4110 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4111 
4112 	vcpu_put(vcpu);
4113 	return 0;
4114 }
4115 
4116 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4117 {
4118 	int ret = 0;
4119 
4120 	vcpu_load(vcpu);
4121 
4122 	if (test_fp_ctl(fpu->fpc)) {
4123 		ret = -EINVAL;
4124 		goto out;
4125 	}
4126 	vcpu->run->s.regs.fpc = fpu->fpc;
4127 	if (MACHINE_HAS_VX)
4128 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4129 				 (freg_t *) fpu->fprs);
4130 	else
4131 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4132 
4133 out:
4134 	vcpu_put(vcpu);
4135 	return ret;
4136 }
4137 
4138 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4139 {
4140 	vcpu_load(vcpu);
4141 
4142 	/* make sure we have the latest values */
4143 	save_fpu_regs();
4144 	if (MACHINE_HAS_VX)
4145 		convert_vx_to_fp((freg_t *) fpu->fprs,
4146 				 (__vector128 *) vcpu->run->s.regs.vrs);
4147 	else
4148 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4149 	fpu->fpc = vcpu->run->s.regs.fpc;
4150 
4151 	vcpu_put(vcpu);
4152 	return 0;
4153 }
4154 
4155 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4156 {
4157 	int rc = 0;
4158 
4159 	if (!is_vcpu_stopped(vcpu))
4160 		rc = -EBUSY;
4161 	else {
4162 		vcpu->run->psw_mask = psw.mask;
4163 		vcpu->run->psw_addr = psw.addr;
4164 	}
4165 	return rc;
4166 }
4167 
4168 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4169 				  struct kvm_translation *tr)
4170 {
4171 	return -EINVAL; /* not implemented yet */
4172 }
4173 
4174 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4175 			      KVM_GUESTDBG_USE_HW_BP | \
4176 			      KVM_GUESTDBG_ENABLE)
4177 
4178 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4179 					struct kvm_guest_debug *dbg)
4180 {
4181 	int rc = 0;
4182 
4183 	vcpu_load(vcpu);
4184 
4185 	vcpu->guest_debug = 0;
4186 	kvm_s390_clear_bp_data(vcpu);
4187 
4188 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4189 		rc = -EINVAL;
4190 		goto out;
4191 	}
4192 	if (!sclp.has_gpere) {
4193 		rc = -EINVAL;
4194 		goto out;
4195 	}
4196 
4197 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
4198 		vcpu->guest_debug = dbg->control;
4199 		/* enforce guest PER */
4200 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4201 
4202 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4203 			rc = kvm_s390_import_bp_data(vcpu, dbg);
4204 	} else {
4205 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4206 		vcpu->arch.guestdbg.last_bp = 0;
4207 	}
4208 
4209 	if (rc) {
4210 		vcpu->guest_debug = 0;
4211 		kvm_s390_clear_bp_data(vcpu);
4212 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4213 	}
4214 
4215 out:
4216 	vcpu_put(vcpu);
4217 	return rc;
4218 }
4219 
4220 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4221 				    struct kvm_mp_state *mp_state)
4222 {
4223 	int ret;
4224 
4225 	vcpu_load(vcpu);
4226 
4227 	/* CHECK_STOP and LOAD are not supported yet */
4228 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4229 				      KVM_MP_STATE_OPERATING;
4230 
4231 	vcpu_put(vcpu);
4232 	return ret;
4233 }
4234 
4235 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4236 				    struct kvm_mp_state *mp_state)
4237 {
4238 	int rc = 0;
4239 
4240 	vcpu_load(vcpu);
4241 
4242 	/* user space knows about this interface - let it control the state */
4243 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4244 
4245 	switch (mp_state->mp_state) {
4246 	case KVM_MP_STATE_STOPPED:
4247 		rc = kvm_s390_vcpu_stop(vcpu);
4248 		break;
4249 	case KVM_MP_STATE_OPERATING:
4250 		rc = kvm_s390_vcpu_start(vcpu);
4251 		break;
4252 	case KVM_MP_STATE_LOAD:
4253 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4254 			rc = -ENXIO;
4255 			break;
4256 		}
4257 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4258 		break;
4259 	case KVM_MP_STATE_CHECK_STOP:
4260 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
4261 	default:
4262 		rc = -ENXIO;
4263 	}
4264 
4265 	vcpu_put(vcpu);
4266 	return rc;
4267 }
4268 
4269 static bool ibs_enabled(struct kvm_vcpu *vcpu)
4270 {
4271 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4272 }
4273 
4274 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4275 {
4276 retry:
4277 	kvm_s390_vcpu_request_handled(vcpu);
4278 	if (!kvm_request_pending(vcpu))
4279 		return 0;
4280 	/*
4281 	 * If the guest prefix changed, re-arm the ipte notifier for the
4282 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4283 	 * This ensures that the ipte instruction for this request has
4284 	 * already finished. We might race against a second unmapper that
4285 	 * wants to set the blocking bit. Lets just retry the request loop.
4286 	 */
4287 	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4288 		int rc;
4289 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
4290 					  kvm_s390_get_prefix(vcpu),
4291 					  PAGE_SIZE * 2, PROT_WRITE);
4292 		if (rc) {
4293 			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4294 			return rc;
4295 		}
4296 		goto retry;
4297 	}
4298 
4299 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4300 		vcpu->arch.sie_block->ihcpu = 0xffff;
4301 		goto retry;
4302 	}
4303 
4304 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4305 		if (!ibs_enabled(vcpu)) {
4306 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4307 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4308 		}
4309 		goto retry;
4310 	}
4311 
4312 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4313 		if (ibs_enabled(vcpu)) {
4314 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4315 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4316 		}
4317 		goto retry;
4318 	}
4319 
4320 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4321 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4322 		goto retry;
4323 	}
4324 
4325 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4326 		/*
4327 		 * Disable CMM virtualization; we will emulate the ESSA
4328 		 * instruction manually, in order to provide additional
4329 		 * functionalities needed for live migration.
4330 		 */
4331 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4332 		goto retry;
4333 	}
4334 
4335 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4336 		/*
4337 		 * Re-enable CMM virtualization if CMMA is available and
4338 		 * CMM has been used.
4339 		 */
4340 		if ((vcpu->kvm->arch.use_cmma) &&
4341 		    (vcpu->kvm->mm->context.uses_cmm))
4342 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4343 		goto retry;
4344 	}
4345 
4346 	/* we left the vsie handler, nothing to do, just clear the request */
4347 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4348 
4349 	return 0;
4350 }
4351 
4352 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4353 {
4354 	struct kvm_vcpu *vcpu;
4355 	union tod_clock clk;
4356 	unsigned long i;
4357 
4358 	preempt_disable();
4359 
4360 	store_tod_clock_ext(&clk);
4361 
4362 	kvm->arch.epoch = gtod->tod - clk.tod;
4363 	kvm->arch.epdx = 0;
4364 	if (test_kvm_facility(kvm, 139)) {
4365 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4366 		if (kvm->arch.epoch > gtod->tod)
4367 			kvm->arch.epdx -= 1;
4368 	}
4369 
4370 	kvm_s390_vcpu_block_all(kvm);
4371 	kvm_for_each_vcpu(i, vcpu, kvm) {
4372 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4373 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4374 	}
4375 
4376 	kvm_s390_vcpu_unblock_all(kvm);
4377 	preempt_enable();
4378 }
4379 
4380 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4381 {
4382 	mutex_lock(&kvm->lock);
4383 	__kvm_s390_set_tod_clock(kvm, gtod);
4384 	mutex_unlock(&kvm->lock);
4385 }
4386 
4387 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4388 {
4389 	if (!mutex_trylock(&kvm->lock))
4390 		return 0;
4391 	__kvm_s390_set_tod_clock(kvm, gtod);
4392 	mutex_unlock(&kvm->lock);
4393 	return 1;
4394 }
4395 
4396 /**
4397  * kvm_arch_fault_in_page - fault-in guest page if necessary
4398  * @vcpu: The corresponding virtual cpu
4399  * @gpa: Guest physical address
4400  * @writable: Whether the page should be writable or not
4401  *
4402  * Make sure that a guest page has been faulted-in on the host.
4403  *
4404  * Return: Zero on success, negative error code otherwise.
4405  */
4406 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4407 {
4408 	return gmap_fault(vcpu->arch.gmap, gpa,
4409 			  writable ? FAULT_FLAG_WRITE : 0);
4410 }
4411 
4412 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4413 				      unsigned long token)
4414 {
4415 	struct kvm_s390_interrupt inti;
4416 	struct kvm_s390_irq irq;
4417 
4418 	if (start_token) {
4419 		irq.u.ext.ext_params2 = token;
4420 		irq.type = KVM_S390_INT_PFAULT_INIT;
4421 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4422 	} else {
4423 		inti.type = KVM_S390_INT_PFAULT_DONE;
4424 		inti.parm64 = token;
4425 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4426 	}
4427 }
4428 
4429 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4430 				     struct kvm_async_pf *work)
4431 {
4432 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4433 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4434 
4435 	return true;
4436 }
4437 
4438 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4439 				 struct kvm_async_pf *work)
4440 {
4441 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4442 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4443 }
4444 
4445 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4446 			       struct kvm_async_pf *work)
4447 {
4448 	/* s390 will always inject the page directly */
4449 }
4450 
4451 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4452 {
4453 	/*
4454 	 * s390 will always inject the page directly,
4455 	 * but we still want check_async_completion to cleanup
4456 	 */
4457 	return true;
4458 }
4459 
4460 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4461 {
4462 	hva_t hva;
4463 	struct kvm_arch_async_pf arch;
4464 
4465 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4466 		return false;
4467 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4468 	    vcpu->arch.pfault_compare)
4469 		return false;
4470 	if (psw_extint_disabled(vcpu))
4471 		return false;
4472 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4473 		return false;
4474 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4475 		return false;
4476 	if (!vcpu->arch.gmap->pfault_enabled)
4477 		return false;
4478 
4479 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4480 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4481 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4482 		return false;
4483 
4484 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4485 }
4486 
4487 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4488 {
4489 	int rc, cpuflags;
4490 
4491 	/*
4492 	 * On s390 notifications for arriving pages will be delivered directly
4493 	 * to the guest but the house keeping for completed pfaults is
4494 	 * handled outside the worker.
4495 	 */
4496 	kvm_check_async_pf_completion(vcpu);
4497 
4498 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4499 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4500 
4501 	if (need_resched())
4502 		schedule();
4503 
4504 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4505 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4506 		if (rc)
4507 			return rc;
4508 	}
4509 
4510 	rc = kvm_s390_handle_requests(vcpu);
4511 	if (rc)
4512 		return rc;
4513 
4514 	if (guestdbg_enabled(vcpu)) {
4515 		kvm_s390_backup_guest_per_regs(vcpu);
4516 		kvm_s390_patch_guest_per_regs(vcpu);
4517 	}
4518 
4519 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4520 
4521 	vcpu->arch.sie_block->icptcode = 0;
4522 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4523 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4524 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4525 
4526 	return 0;
4527 }
4528 
4529 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4530 {
4531 	struct kvm_s390_pgm_info pgm_info = {
4532 		.code = PGM_ADDRESSING,
4533 	};
4534 	u8 opcode, ilen;
4535 	int rc;
4536 
4537 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4538 	trace_kvm_s390_sie_fault(vcpu);
4539 
4540 	/*
4541 	 * We want to inject an addressing exception, which is defined as a
4542 	 * suppressing or terminating exception. However, since we came here
4543 	 * by a DAT access exception, the PSW still points to the faulting
4544 	 * instruction since DAT exceptions are nullifying. So we've got
4545 	 * to look up the current opcode to get the length of the instruction
4546 	 * to be able to forward the PSW.
4547 	 */
4548 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4549 	ilen = insn_length(opcode);
4550 	if (rc < 0) {
4551 		return rc;
4552 	} else if (rc) {
4553 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4554 		 * Forward by arbitrary ilc, injection will take care of
4555 		 * nullification if necessary.
4556 		 */
4557 		pgm_info = vcpu->arch.pgm;
4558 		ilen = 4;
4559 	}
4560 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4561 	kvm_s390_forward_psw(vcpu, ilen);
4562 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4563 }
4564 
4565 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4566 {
4567 	struct mcck_volatile_info *mcck_info;
4568 	struct sie_page *sie_page;
4569 
4570 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4571 		   vcpu->arch.sie_block->icptcode);
4572 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4573 
4574 	if (guestdbg_enabled(vcpu))
4575 		kvm_s390_restore_guest_per_regs(vcpu);
4576 
4577 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4578 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4579 
4580 	if (exit_reason == -EINTR) {
4581 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4582 		sie_page = container_of(vcpu->arch.sie_block,
4583 					struct sie_page, sie_block);
4584 		mcck_info = &sie_page->mcck_info;
4585 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4586 		return 0;
4587 	}
4588 
4589 	if (vcpu->arch.sie_block->icptcode > 0) {
4590 		int rc = kvm_handle_sie_intercept(vcpu);
4591 
4592 		if (rc != -EOPNOTSUPP)
4593 			return rc;
4594 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4595 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4596 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4597 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4598 		return -EREMOTE;
4599 	} else if (exit_reason != -EFAULT) {
4600 		vcpu->stat.exit_null++;
4601 		return 0;
4602 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4603 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4604 		vcpu->run->s390_ucontrol.trans_exc_code =
4605 						current->thread.gmap_addr;
4606 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4607 		return -EREMOTE;
4608 	} else if (current->thread.gmap_pfault) {
4609 		trace_kvm_s390_major_guest_pfault(vcpu);
4610 		current->thread.gmap_pfault = 0;
4611 		if (kvm_arch_setup_async_pf(vcpu))
4612 			return 0;
4613 		vcpu->stat.pfault_sync++;
4614 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4615 	}
4616 	return vcpu_post_run_fault_in_sie(vcpu);
4617 }
4618 
4619 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4620 static int __vcpu_run(struct kvm_vcpu *vcpu)
4621 {
4622 	int rc, exit_reason;
4623 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4624 
4625 	/*
4626 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4627 	 * ning the guest), so that memslots (and other stuff) are protected
4628 	 */
4629 	kvm_vcpu_srcu_read_lock(vcpu);
4630 
4631 	do {
4632 		rc = vcpu_pre_run(vcpu);
4633 		if (rc)
4634 			break;
4635 
4636 		kvm_vcpu_srcu_read_unlock(vcpu);
4637 		/*
4638 		 * As PF_VCPU will be used in fault handler, between
4639 		 * guest_enter and guest_exit should be no uaccess.
4640 		 */
4641 		local_irq_disable();
4642 		guest_enter_irqoff();
4643 		__disable_cpu_timer_accounting(vcpu);
4644 		local_irq_enable();
4645 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4646 			memcpy(sie_page->pv_grregs,
4647 			       vcpu->run->s.regs.gprs,
4648 			       sizeof(sie_page->pv_grregs));
4649 		}
4650 		if (test_cpu_flag(CIF_FPU))
4651 			load_fpu_regs();
4652 		exit_reason = sie64a(vcpu->arch.sie_block,
4653 				     vcpu->run->s.regs.gprs);
4654 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4655 			memcpy(vcpu->run->s.regs.gprs,
4656 			       sie_page->pv_grregs,
4657 			       sizeof(sie_page->pv_grregs));
4658 			/*
4659 			 * We're not allowed to inject interrupts on intercepts
4660 			 * that leave the guest state in an "in-between" state
4661 			 * where the next SIE entry will do a continuation.
4662 			 * Fence interrupts in our "internal" PSW.
4663 			 */
4664 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4665 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4666 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4667 			}
4668 		}
4669 		local_irq_disable();
4670 		__enable_cpu_timer_accounting(vcpu);
4671 		guest_exit_irqoff();
4672 		local_irq_enable();
4673 		kvm_vcpu_srcu_read_lock(vcpu);
4674 
4675 		rc = vcpu_post_run(vcpu, exit_reason);
4676 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4677 
4678 	kvm_vcpu_srcu_read_unlock(vcpu);
4679 	return rc;
4680 }
4681 
4682 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4683 {
4684 	struct kvm_run *kvm_run = vcpu->run;
4685 	struct runtime_instr_cb *riccb;
4686 	struct gs_cb *gscb;
4687 
4688 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4689 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4690 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4691 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4692 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4693 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4694 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4695 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4696 	}
4697 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4698 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4699 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4700 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4701 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4702 			kvm_clear_async_pf_completion_queue(vcpu);
4703 	}
4704 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4705 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4706 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4707 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4708 	}
4709 	/*
4710 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4711 	 * we should enable RI here instead of doing the lazy enablement.
4712 	 */
4713 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4714 	    test_kvm_facility(vcpu->kvm, 64) &&
4715 	    riccb->v &&
4716 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4717 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4718 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4719 	}
4720 	/*
4721 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4722 	 * we should enable GS here instead of doing the lazy enablement.
4723 	 */
4724 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4725 	    test_kvm_facility(vcpu->kvm, 133) &&
4726 	    gscb->gssm &&
4727 	    !vcpu->arch.gs_enabled) {
4728 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4729 		vcpu->arch.sie_block->ecb |= ECB_GS;
4730 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4731 		vcpu->arch.gs_enabled = 1;
4732 	}
4733 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4734 	    test_kvm_facility(vcpu->kvm, 82)) {
4735 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4736 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4737 	}
4738 	if (MACHINE_HAS_GS) {
4739 		preempt_disable();
4740 		__ctl_set_bit(2, 4);
4741 		if (current->thread.gs_cb) {
4742 			vcpu->arch.host_gscb = current->thread.gs_cb;
4743 			save_gs_cb(vcpu->arch.host_gscb);
4744 		}
4745 		if (vcpu->arch.gs_enabled) {
4746 			current->thread.gs_cb = (struct gs_cb *)
4747 						&vcpu->run->s.regs.gscb;
4748 			restore_gs_cb(current->thread.gs_cb);
4749 		}
4750 		preempt_enable();
4751 	}
4752 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4753 }
4754 
4755 static void sync_regs(struct kvm_vcpu *vcpu)
4756 {
4757 	struct kvm_run *kvm_run = vcpu->run;
4758 
4759 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4760 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4761 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4762 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4763 		/* some control register changes require a tlb flush */
4764 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4765 	}
4766 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4767 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4768 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4769 	}
4770 	save_access_regs(vcpu->arch.host_acrs);
4771 	restore_access_regs(vcpu->run->s.regs.acrs);
4772 	/* save host (userspace) fprs/vrs */
4773 	save_fpu_regs();
4774 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4775 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4776 	if (MACHINE_HAS_VX)
4777 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4778 	else
4779 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4780 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4781 	if (test_fp_ctl(current->thread.fpu.fpc))
4782 		/* User space provided an invalid FPC, let's clear it */
4783 		current->thread.fpu.fpc = 0;
4784 
4785 	/* Sync fmt2 only data */
4786 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4787 		sync_regs_fmt2(vcpu);
4788 	} else {
4789 		/*
4790 		 * In several places we have to modify our internal view to
4791 		 * not do things that are disallowed by the ultravisor. For
4792 		 * example we must not inject interrupts after specific exits
4793 		 * (e.g. 112 prefix page not secure). We do this by turning
4794 		 * off the machine check, external and I/O interrupt bits
4795 		 * of our PSW copy. To avoid getting validity intercepts, we
4796 		 * do only accept the condition code from userspace.
4797 		 */
4798 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4799 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4800 						   PSW_MASK_CC;
4801 	}
4802 
4803 	kvm_run->kvm_dirty_regs = 0;
4804 }
4805 
4806 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4807 {
4808 	struct kvm_run *kvm_run = vcpu->run;
4809 
4810 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4811 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4812 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4813 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4814 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4815 	if (MACHINE_HAS_GS) {
4816 		preempt_disable();
4817 		__ctl_set_bit(2, 4);
4818 		if (vcpu->arch.gs_enabled)
4819 			save_gs_cb(current->thread.gs_cb);
4820 		current->thread.gs_cb = vcpu->arch.host_gscb;
4821 		restore_gs_cb(vcpu->arch.host_gscb);
4822 		if (!vcpu->arch.host_gscb)
4823 			__ctl_clear_bit(2, 4);
4824 		vcpu->arch.host_gscb = NULL;
4825 		preempt_enable();
4826 	}
4827 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4828 }
4829 
4830 static void store_regs(struct kvm_vcpu *vcpu)
4831 {
4832 	struct kvm_run *kvm_run = vcpu->run;
4833 
4834 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4835 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4836 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4837 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4838 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4839 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4840 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4841 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4842 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4843 	save_access_regs(vcpu->run->s.regs.acrs);
4844 	restore_access_regs(vcpu->arch.host_acrs);
4845 	/* Save guest register state */
4846 	save_fpu_regs();
4847 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4848 	/* Restore will be done lazily at return */
4849 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4850 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4851 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4852 		store_regs_fmt2(vcpu);
4853 }
4854 
4855 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4856 {
4857 	struct kvm_run *kvm_run = vcpu->run;
4858 	int rc;
4859 
4860 	/*
4861 	 * Running a VM while dumping always has the potential to
4862 	 * produce inconsistent dump data. But for PV vcpus a SIE
4863 	 * entry while dumping could also lead to a fatal validity
4864 	 * intercept which we absolutely want to avoid.
4865 	 */
4866 	if (vcpu->kvm->arch.pv.dumping)
4867 		return -EINVAL;
4868 
4869 	if (kvm_run->immediate_exit)
4870 		return -EINTR;
4871 
4872 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4873 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4874 		return -EINVAL;
4875 
4876 	vcpu_load(vcpu);
4877 
4878 	if (guestdbg_exit_pending(vcpu)) {
4879 		kvm_s390_prepare_debug_exit(vcpu);
4880 		rc = 0;
4881 		goto out;
4882 	}
4883 
4884 	kvm_sigset_activate(vcpu);
4885 
4886 	/*
4887 	 * no need to check the return value of vcpu_start as it can only have
4888 	 * an error for protvirt, but protvirt means user cpu state
4889 	 */
4890 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4891 		kvm_s390_vcpu_start(vcpu);
4892 	} else if (is_vcpu_stopped(vcpu)) {
4893 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4894 				   vcpu->vcpu_id);
4895 		rc = -EINVAL;
4896 		goto out;
4897 	}
4898 
4899 	sync_regs(vcpu);
4900 	enable_cpu_timer_accounting(vcpu);
4901 
4902 	might_fault();
4903 	rc = __vcpu_run(vcpu);
4904 
4905 	if (signal_pending(current) && !rc) {
4906 		kvm_run->exit_reason = KVM_EXIT_INTR;
4907 		rc = -EINTR;
4908 	}
4909 
4910 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4911 		kvm_s390_prepare_debug_exit(vcpu);
4912 		rc = 0;
4913 	}
4914 
4915 	if (rc == -EREMOTE) {
4916 		/* userspace support is needed, kvm_run has been prepared */
4917 		rc = 0;
4918 	}
4919 
4920 	disable_cpu_timer_accounting(vcpu);
4921 	store_regs(vcpu);
4922 
4923 	kvm_sigset_deactivate(vcpu);
4924 
4925 	vcpu->stat.exit_userspace++;
4926 out:
4927 	vcpu_put(vcpu);
4928 	return rc;
4929 }
4930 
4931 /*
4932  * store status at address
4933  * we use have two special cases:
4934  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4935  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4936  */
4937 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4938 {
4939 	unsigned char archmode = 1;
4940 	freg_t fprs[NUM_FPRS];
4941 	unsigned int px;
4942 	u64 clkcomp, cputm;
4943 	int rc;
4944 
4945 	px = kvm_s390_get_prefix(vcpu);
4946 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4947 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4948 			return -EFAULT;
4949 		gpa = 0;
4950 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4951 		if (write_guest_real(vcpu, 163, &archmode, 1))
4952 			return -EFAULT;
4953 		gpa = px;
4954 	} else
4955 		gpa -= __LC_FPREGS_SAVE_AREA;
4956 
4957 	/* manually convert vector registers if necessary */
4958 	if (MACHINE_HAS_VX) {
4959 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4960 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4961 				     fprs, 128);
4962 	} else {
4963 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4964 				     vcpu->run->s.regs.fprs, 128);
4965 	}
4966 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4967 			      vcpu->run->s.regs.gprs, 128);
4968 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4969 			      &vcpu->arch.sie_block->gpsw, 16);
4970 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4971 			      &px, 4);
4972 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4973 			      &vcpu->run->s.regs.fpc, 4);
4974 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4975 			      &vcpu->arch.sie_block->todpr, 4);
4976 	cputm = kvm_s390_get_cpu_timer(vcpu);
4977 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4978 			      &cputm, 8);
4979 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4980 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4981 			      &clkcomp, 8);
4982 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4983 			      &vcpu->run->s.regs.acrs, 64);
4984 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4985 			      &vcpu->arch.sie_block->gcr, 128);
4986 	return rc ? -EFAULT : 0;
4987 }
4988 
4989 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4990 {
4991 	/*
4992 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4993 	 * switch in the run ioctl. Let's update our copies before we save
4994 	 * it into the save area
4995 	 */
4996 	save_fpu_regs();
4997 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4998 	save_access_regs(vcpu->run->s.regs.acrs);
4999 
5000 	return kvm_s390_store_status_unloaded(vcpu, addr);
5001 }
5002 
5003 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5004 {
5005 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5006 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5007 }
5008 
5009 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5010 {
5011 	unsigned long i;
5012 	struct kvm_vcpu *vcpu;
5013 
5014 	kvm_for_each_vcpu(i, vcpu, kvm) {
5015 		__disable_ibs_on_vcpu(vcpu);
5016 	}
5017 }
5018 
5019 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5020 {
5021 	if (!sclp.has_ibs)
5022 		return;
5023 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5024 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5025 }
5026 
5027 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5028 {
5029 	int i, online_vcpus, r = 0, started_vcpus = 0;
5030 
5031 	if (!is_vcpu_stopped(vcpu))
5032 		return 0;
5033 
5034 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5035 	/* Only one cpu at a time may enter/leave the STOPPED state. */
5036 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5037 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5038 
5039 	/* Let's tell the UV that we want to change into the operating state */
5040 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5041 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5042 		if (r) {
5043 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5044 			return r;
5045 		}
5046 	}
5047 
5048 	for (i = 0; i < online_vcpus; i++) {
5049 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5050 			started_vcpus++;
5051 	}
5052 
5053 	if (started_vcpus == 0) {
5054 		/* we're the only active VCPU -> speed it up */
5055 		__enable_ibs_on_vcpu(vcpu);
5056 	} else if (started_vcpus == 1) {
5057 		/*
5058 		 * As we are starting a second VCPU, we have to disable
5059 		 * the IBS facility on all VCPUs to remove potentially
5060 		 * outstanding ENABLE requests.
5061 		 */
5062 		__disable_ibs_on_all_vcpus(vcpu->kvm);
5063 	}
5064 
5065 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5066 	/*
5067 	 * The real PSW might have changed due to a RESTART interpreted by the
5068 	 * ultravisor. We block all interrupts and let the next sie exit
5069 	 * refresh our view.
5070 	 */
5071 	if (kvm_s390_pv_cpu_is_protected(vcpu))
5072 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5073 	/*
5074 	 * Another VCPU might have used IBS while we were offline.
5075 	 * Let's play safe and flush the VCPU at startup.
5076 	 */
5077 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5078 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5079 	return 0;
5080 }
5081 
5082 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5083 {
5084 	int i, online_vcpus, r = 0, started_vcpus = 0;
5085 	struct kvm_vcpu *started_vcpu = NULL;
5086 
5087 	if (is_vcpu_stopped(vcpu))
5088 		return 0;
5089 
5090 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5091 	/* Only one cpu at a time may enter/leave the STOPPED state. */
5092 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5093 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5094 
5095 	/* Let's tell the UV that we want to change into the stopped state */
5096 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5097 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5098 		if (r) {
5099 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5100 			return r;
5101 		}
5102 	}
5103 
5104 	/*
5105 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5106 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5107 	 * have been fully processed. This will ensure that the VCPU
5108 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5109 	 */
5110 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5111 	kvm_s390_clear_stop_irq(vcpu);
5112 
5113 	__disable_ibs_on_vcpu(vcpu);
5114 
5115 	for (i = 0; i < online_vcpus; i++) {
5116 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5117 
5118 		if (!is_vcpu_stopped(tmp)) {
5119 			started_vcpus++;
5120 			started_vcpu = tmp;
5121 		}
5122 	}
5123 
5124 	if (started_vcpus == 1) {
5125 		/*
5126 		 * As we only have one VCPU left, we want to enable the
5127 		 * IBS facility for that VCPU to speed it up.
5128 		 */
5129 		__enable_ibs_on_vcpu(started_vcpu);
5130 	}
5131 
5132 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5133 	return 0;
5134 }
5135 
5136 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5137 				     struct kvm_enable_cap *cap)
5138 {
5139 	int r;
5140 
5141 	if (cap->flags)
5142 		return -EINVAL;
5143 
5144 	switch (cap->cap) {
5145 	case KVM_CAP_S390_CSS_SUPPORT:
5146 		if (!vcpu->kvm->arch.css_support) {
5147 			vcpu->kvm->arch.css_support = 1;
5148 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5149 			trace_kvm_s390_enable_css(vcpu->kvm);
5150 		}
5151 		r = 0;
5152 		break;
5153 	default:
5154 		r = -EINVAL;
5155 		break;
5156 	}
5157 	return r;
5158 }
5159 
5160 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5161 				  struct kvm_s390_mem_op *mop)
5162 {
5163 	void __user *uaddr = (void __user *)mop->buf;
5164 	int r = 0;
5165 
5166 	if (mop->flags || !mop->size)
5167 		return -EINVAL;
5168 	if (mop->size + mop->sida_offset < mop->size)
5169 		return -EINVAL;
5170 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5171 		return -E2BIG;
5172 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
5173 		return -EINVAL;
5174 
5175 	switch (mop->op) {
5176 	case KVM_S390_MEMOP_SIDA_READ:
5177 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
5178 				 mop->sida_offset), mop->size))
5179 			r = -EFAULT;
5180 
5181 		break;
5182 	case KVM_S390_MEMOP_SIDA_WRITE:
5183 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
5184 				   mop->sida_offset), uaddr, mop->size))
5185 			r = -EFAULT;
5186 		break;
5187 	}
5188 	return r;
5189 }
5190 
5191 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5192 				 struct kvm_s390_mem_op *mop)
5193 {
5194 	void __user *uaddr = (void __user *)mop->buf;
5195 	void *tmpbuf = NULL;
5196 	int r = 0;
5197 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5198 				    | KVM_S390_MEMOP_F_CHECK_ONLY
5199 				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5200 
5201 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5202 		return -EINVAL;
5203 	if (mop->size > MEM_OP_MAX_SIZE)
5204 		return -E2BIG;
5205 	if (kvm_s390_pv_cpu_is_protected(vcpu))
5206 		return -EINVAL;
5207 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5208 		if (access_key_invalid(mop->key))
5209 			return -EINVAL;
5210 	} else {
5211 		mop->key = 0;
5212 	}
5213 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5214 		tmpbuf = vmalloc(mop->size);
5215 		if (!tmpbuf)
5216 			return -ENOMEM;
5217 	}
5218 
5219 	switch (mop->op) {
5220 	case KVM_S390_MEMOP_LOGICAL_READ:
5221 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5222 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5223 					    GACC_FETCH, mop->key);
5224 			break;
5225 		}
5226 		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5227 					mop->size, mop->key);
5228 		if (r == 0) {
5229 			if (copy_to_user(uaddr, tmpbuf, mop->size))
5230 				r = -EFAULT;
5231 		}
5232 		break;
5233 	case KVM_S390_MEMOP_LOGICAL_WRITE:
5234 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5235 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5236 					    GACC_STORE, mop->key);
5237 			break;
5238 		}
5239 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5240 			r = -EFAULT;
5241 			break;
5242 		}
5243 		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5244 					 mop->size, mop->key);
5245 		break;
5246 	}
5247 
5248 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5249 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5250 
5251 	vfree(tmpbuf);
5252 	return r;
5253 }
5254 
5255 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5256 				     struct kvm_s390_mem_op *mop)
5257 {
5258 	int r, srcu_idx;
5259 
5260 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5261 
5262 	switch (mop->op) {
5263 	case KVM_S390_MEMOP_LOGICAL_READ:
5264 	case KVM_S390_MEMOP_LOGICAL_WRITE:
5265 		r = kvm_s390_vcpu_mem_op(vcpu, mop);
5266 		break;
5267 	case KVM_S390_MEMOP_SIDA_READ:
5268 	case KVM_S390_MEMOP_SIDA_WRITE:
5269 		/* we are locked against sida going away by the vcpu->mutex */
5270 		r = kvm_s390_vcpu_sida_op(vcpu, mop);
5271 		break;
5272 	default:
5273 		r = -EINVAL;
5274 	}
5275 
5276 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5277 	return r;
5278 }
5279 
5280 long kvm_arch_vcpu_async_ioctl(struct file *filp,
5281 			       unsigned int ioctl, unsigned long arg)
5282 {
5283 	struct kvm_vcpu *vcpu = filp->private_data;
5284 	void __user *argp = (void __user *)arg;
5285 
5286 	switch (ioctl) {
5287 	case KVM_S390_IRQ: {
5288 		struct kvm_s390_irq s390irq;
5289 
5290 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5291 			return -EFAULT;
5292 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
5293 	}
5294 	case KVM_S390_INTERRUPT: {
5295 		struct kvm_s390_interrupt s390int;
5296 		struct kvm_s390_irq s390irq = {};
5297 
5298 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
5299 			return -EFAULT;
5300 		if (s390int_to_s390irq(&s390int, &s390irq))
5301 			return -EINVAL;
5302 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
5303 	}
5304 	}
5305 	return -ENOIOCTLCMD;
5306 }
5307 
5308 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5309 					struct kvm_pv_cmd *cmd)
5310 {
5311 	struct kvm_s390_pv_dmp dmp;
5312 	void *data;
5313 	int ret;
5314 
5315 	/* Dump initialization is a prerequisite */
5316 	if (!vcpu->kvm->arch.pv.dumping)
5317 		return -EINVAL;
5318 
5319 	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5320 		return -EFAULT;
5321 
5322 	/* We only handle this subcmd right now */
5323 	if (dmp.subcmd != KVM_PV_DUMP_CPU)
5324 		return -EINVAL;
5325 
5326 	/* CPU dump length is the same as create cpu storage donation. */
5327 	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5328 		return -EINVAL;
5329 
5330 	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5331 	if (!data)
5332 		return -ENOMEM;
5333 
5334 	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5335 
5336 	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5337 		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
5338 
5339 	if (ret)
5340 		ret = -EINVAL;
5341 
5342 	/* On success copy over the dump data */
5343 	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5344 		ret = -EFAULT;
5345 
5346 	kvfree(data);
5347 	return ret;
5348 }
5349 
5350 long kvm_arch_vcpu_ioctl(struct file *filp,
5351 			 unsigned int ioctl, unsigned long arg)
5352 {
5353 	struct kvm_vcpu *vcpu = filp->private_data;
5354 	void __user *argp = (void __user *)arg;
5355 	int idx;
5356 	long r;
5357 	u16 rc, rrc;
5358 
5359 	vcpu_load(vcpu);
5360 
5361 	switch (ioctl) {
5362 	case KVM_S390_STORE_STATUS:
5363 		idx = srcu_read_lock(&vcpu->kvm->srcu);
5364 		r = kvm_s390_store_status_unloaded(vcpu, arg);
5365 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5366 		break;
5367 	case KVM_S390_SET_INITIAL_PSW: {
5368 		psw_t psw;
5369 
5370 		r = -EFAULT;
5371 		if (copy_from_user(&psw, argp, sizeof(psw)))
5372 			break;
5373 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5374 		break;
5375 	}
5376 	case KVM_S390_CLEAR_RESET:
5377 		r = 0;
5378 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5379 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5380 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5381 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5382 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5383 				   rc, rrc);
5384 		}
5385 		break;
5386 	case KVM_S390_INITIAL_RESET:
5387 		r = 0;
5388 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5389 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5390 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5391 					  UVC_CMD_CPU_RESET_INITIAL,
5392 					  &rc, &rrc);
5393 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5394 				   rc, rrc);
5395 		}
5396 		break;
5397 	case KVM_S390_NORMAL_RESET:
5398 		r = 0;
5399 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5400 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5401 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5402 					  UVC_CMD_CPU_RESET, &rc, &rrc);
5403 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5404 				   rc, rrc);
5405 		}
5406 		break;
5407 	case KVM_SET_ONE_REG:
5408 	case KVM_GET_ONE_REG: {
5409 		struct kvm_one_reg reg;
5410 		r = -EINVAL;
5411 		if (kvm_s390_pv_cpu_is_protected(vcpu))
5412 			break;
5413 		r = -EFAULT;
5414 		if (copy_from_user(&reg, argp, sizeof(reg)))
5415 			break;
5416 		if (ioctl == KVM_SET_ONE_REG)
5417 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5418 		else
5419 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5420 		break;
5421 	}
5422 #ifdef CONFIG_KVM_S390_UCONTROL
5423 	case KVM_S390_UCAS_MAP: {
5424 		struct kvm_s390_ucas_mapping ucasmap;
5425 
5426 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5427 			r = -EFAULT;
5428 			break;
5429 		}
5430 
5431 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5432 			r = -EINVAL;
5433 			break;
5434 		}
5435 
5436 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5437 				     ucasmap.vcpu_addr, ucasmap.length);
5438 		break;
5439 	}
5440 	case KVM_S390_UCAS_UNMAP: {
5441 		struct kvm_s390_ucas_mapping ucasmap;
5442 
5443 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5444 			r = -EFAULT;
5445 			break;
5446 		}
5447 
5448 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5449 			r = -EINVAL;
5450 			break;
5451 		}
5452 
5453 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5454 			ucasmap.length);
5455 		break;
5456 	}
5457 #endif
5458 	case KVM_S390_VCPU_FAULT: {
5459 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5460 		break;
5461 	}
5462 	case KVM_ENABLE_CAP:
5463 	{
5464 		struct kvm_enable_cap cap;
5465 		r = -EFAULT;
5466 		if (copy_from_user(&cap, argp, sizeof(cap)))
5467 			break;
5468 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5469 		break;
5470 	}
5471 	case KVM_S390_MEM_OP: {
5472 		struct kvm_s390_mem_op mem_op;
5473 
5474 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5475 			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5476 		else
5477 			r = -EFAULT;
5478 		break;
5479 	}
5480 	case KVM_S390_SET_IRQ_STATE: {
5481 		struct kvm_s390_irq_state irq_state;
5482 
5483 		r = -EFAULT;
5484 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5485 			break;
5486 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5487 		    irq_state.len == 0 ||
5488 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5489 			r = -EINVAL;
5490 			break;
5491 		}
5492 		/* do not use irq_state.flags, it will break old QEMUs */
5493 		r = kvm_s390_set_irq_state(vcpu,
5494 					   (void __user *) irq_state.buf,
5495 					   irq_state.len);
5496 		break;
5497 	}
5498 	case KVM_S390_GET_IRQ_STATE: {
5499 		struct kvm_s390_irq_state irq_state;
5500 
5501 		r = -EFAULT;
5502 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5503 			break;
5504 		if (irq_state.len == 0) {
5505 			r = -EINVAL;
5506 			break;
5507 		}
5508 		/* do not use irq_state.flags, it will break old QEMUs */
5509 		r = kvm_s390_get_irq_state(vcpu,
5510 					   (__u8 __user *)  irq_state.buf,
5511 					   irq_state.len);
5512 		break;
5513 	}
5514 	case KVM_S390_PV_CPU_COMMAND: {
5515 		struct kvm_pv_cmd cmd;
5516 
5517 		r = -EINVAL;
5518 		if (!is_prot_virt_host())
5519 			break;
5520 
5521 		r = -EFAULT;
5522 		if (copy_from_user(&cmd, argp, sizeof(cmd)))
5523 			break;
5524 
5525 		r = -EINVAL;
5526 		if (cmd.flags)
5527 			break;
5528 
5529 		/* We only handle this cmd right now */
5530 		if (cmd.cmd != KVM_PV_DUMP)
5531 			break;
5532 
5533 		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5534 
5535 		/* Always copy over UV rc / rrc data */
5536 		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5537 				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
5538 			r = -EFAULT;
5539 		break;
5540 	}
5541 	default:
5542 		r = -ENOTTY;
5543 	}
5544 
5545 	vcpu_put(vcpu);
5546 	return r;
5547 }
5548 
5549 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5550 {
5551 #ifdef CONFIG_KVM_S390_UCONTROL
5552 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5553 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5554 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5555 		get_page(vmf->page);
5556 		return 0;
5557 	}
5558 #endif
5559 	return VM_FAULT_SIGBUS;
5560 }
5561 
5562 /* Section: memory related */
5563 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5564 				   const struct kvm_memory_slot *old,
5565 				   struct kvm_memory_slot *new,
5566 				   enum kvm_mr_change change)
5567 {
5568 	gpa_t size;
5569 
5570 	/* When we are protected, we should not change the memory slots */
5571 	if (kvm_s390_pv_get_handle(kvm))
5572 		return -EINVAL;
5573 
5574 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5575 		return 0;
5576 
5577 	/* A few sanity checks. We can have memory slots which have to be
5578 	   located/ended at a segment boundary (1MB). The memory in userland is
5579 	   ok to be fragmented into various different vmas. It is okay to mmap()
5580 	   and munmap() stuff in this slot after doing this call at any time */
5581 
5582 	if (new->userspace_addr & 0xffffful)
5583 		return -EINVAL;
5584 
5585 	size = new->npages * PAGE_SIZE;
5586 	if (size & 0xffffful)
5587 		return -EINVAL;
5588 
5589 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5590 		return -EINVAL;
5591 
5592 	return 0;
5593 }
5594 
5595 void kvm_arch_commit_memory_region(struct kvm *kvm,
5596 				struct kvm_memory_slot *old,
5597 				const struct kvm_memory_slot *new,
5598 				enum kvm_mr_change change)
5599 {
5600 	int rc = 0;
5601 
5602 	switch (change) {
5603 	case KVM_MR_DELETE:
5604 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5605 					old->npages * PAGE_SIZE);
5606 		break;
5607 	case KVM_MR_MOVE:
5608 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5609 					old->npages * PAGE_SIZE);
5610 		if (rc)
5611 			break;
5612 		fallthrough;
5613 	case KVM_MR_CREATE:
5614 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5615 				      new->base_gfn * PAGE_SIZE,
5616 				      new->npages * PAGE_SIZE);
5617 		break;
5618 	case KVM_MR_FLAGS_ONLY:
5619 		break;
5620 	default:
5621 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5622 	}
5623 	if (rc)
5624 		pr_warn("failed to commit memory region\n");
5625 	return;
5626 }
5627 
5628 static inline unsigned long nonhyp_mask(int i)
5629 {
5630 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5631 
5632 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5633 }
5634 
5635 static int __init kvm_s390_init(void)
5636 {
5637 	int i;
5638 
5639 	if (!sclp.has_sief2) {
5640 		pr_info("SIE is not available\n");
5641 		return -ENODEV;
5642 	}
5643 
5644 	if (nested && hpage) {
5645 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5646 		return -EINVAL;
5647 	}
5648 
5649 	for (i = 0; i < 16; i++)
5650 		kvm_s390_fac_base[i] |=
5651 			stfle_fac_list[i] & nonhyp_mask(i);
5652 
5653 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5654 }
5655 
5656 static void __exit kvm_s390_exit(void)
5657 {
5658 	kvm_exit();
5659 }
5660 
5661 module_init(kvm_s390_init);
5662 module_exit(kvm_s390_exit);
5663 
5664 /*
5665  * Enable autoloading of the kvm module.
5666  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5667  * since x86 takes a different approach.
5668  */
5669 #include <linux/miscdevice.h>
5670 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5671 MODULE_ALIAS("devname:kvm");
5672