xref: /linux/arch/s390/kvm/kvm-s390.c (revision a4721ced760684d1776bf31f7925aa41bb3f4846)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62 
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
65 	{ "exit_null", VCPU_STAT(exit_null) },
66 	{ "exit_validity", VCPU_STAT(exit_validity) },
67 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
69 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
80 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
81 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
82 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
83 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
84 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
85 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
86 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
87 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
88 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
89 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
90 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
91 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
92 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
93 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
94 	{ "deliver_program", VCPU_STAT(deliver_program) },
95 	{ "deliver_io", VCPU_STAT(deliver_io) },
96 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
97 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
98 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
99 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
100 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
101 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
102 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
103 	{ "inject_io", VM_STAT(inject_io) },
104 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
105 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
106 	{ "inject_program", VCPU_STAT(inject_program) },
107 	{ "inject_restart", VCPU_STAT(inject_restart) },
108 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
109 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
110 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
111 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
112 	{ "inject_virtio", VM_STAT(inject_virtio) },
113 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
114 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
115 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
116 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
117 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
118 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
119 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
120 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
121 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
122 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
123 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
124 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
125 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
126 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
127 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
128 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
129 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
130 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
131 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
132 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
133 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
134 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
135 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
136 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
137 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
138 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
139 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
140 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
141 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
142 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
143 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
144 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
145 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
146 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
147 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
148 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
149 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
150 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
151 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
152 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
153 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
154 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
155 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
156 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
157 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
158 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
159 	{ "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
160 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
161 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
162 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
163 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
164 	{ NULL }
165 };
166 
167 struct kvm_s390_tod_clock_ext {
168 	__u8 epoch_idx;
169 	__u64 tod;
170 	__u8 reserved[7];
171 } __packed;
172 
173 /* allow nested virtualization in KVM (if enabled by user space) */
174 static int nested;
175 module_param(nested, int, S_IRUGO);
176 MODULE_PARM_DESC(nested, "Nested virtualization support");
177 
178 /* allow 1m huge page guest backing, if !nested */
179 static int hpage;
180 module_param(hpage, int, 0444);
181 MODULE_PARM_DESC(hpage, "1m huge page backing support");
182 
183 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
184 static u8 halt_poll_max_steal = 10;
185 module_param(halt_poll_max_steal, byte, 0644);
186 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
187 
188 /* if set to true, the GISA will be initialized and used if available */
189 static bool use_gisa  = true;
190 module_param(use_gisa, bool, 0644);
191 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
192 
193 /*
194  * For now we handle at most 16 double words as this is what the s390 base
195  * kernel handles and stores in the prefix page. If we ever need to go beyond
196  * this, this requires changes to code, but the external uapi can stay.
197  */
198 #define SIZE_INTERNAL 16
199 
200 /*
201  * Base feature mask that defines default mask for facilities. Consists of the
202  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
203  */
204 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
205 /*
206  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
207  * and defines the facilities that can be enabled via a cpu model.
208  */
209 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
210 
211 static unsigned long kvm_s390_fac_size(void)
212 {
213 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
214 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
215 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
216 		sizeof(S390_lowcore.stfle_fac_list));
217 
218 	return SIZE_INTERNAL;
219 }
220 
221 /* available cpu features supported by kvm */
222 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
223 /* available subfunctions indicated via query / "test bit" */
224 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
225 
226 static struct gmap_notifier gmap_notifier;
227 static struct gmap_notifier vsie_gmap_notifier;
228 debug_info_t *kvm_s390_dbf;
229 debug_info_t *kvm_s390_dbf_uv;
230 
231 /* Section: not file related */
232 int kvm_arch_hardware_enable(void)
233 {
234 	/* every s390 is virtualization enabled ;-) */
235 	return 0;
236 }
237 
238 int kvm_arch_check_processor_compat(void *opaque)
239 {
240 	return 0;
241 }
242 
243 /* forward declarations */
244 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
245 			      unsigned long end);
246 static int sca_switch_to_extended(struct kvm *kvm);
247 
248 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
249 {
250 	u8 delta_idx = 0;
251 
252 	/*
253 	 * The TOD jumps by delta, we have to compensate this by adding
254 	 * -delta to the epoch.
255 	 */
256 	delta = -delta;
257 
258 	/* sign-extension - we're adding to signed values below */
259 	if ((s64)delta < 0)
260 		delta_idx = -1;
261 
262 	scb->epoch += delta;
263 	if (scb->ecd & ECD_MEF) {
264 		scb->epdx += delta_idx;
265 		if (scb->epoch < delta)
266 			scb->epdx += 1;
267 	}
268 }
269 
270 /*
271  * This callback is executed during stop_machine(). All CPUs are therefore
272  * temporarily stopped. In order not to change guest behavior, we have to
273  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
274  * so a CPU won't be stopped while calculating with the epoch.
275  */
276 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
277 			  void *v)
278 {
279 	struct kvm *kvm;
280 	struct kvm_vcpu *vcpu;
281 	int i;
282 	unsigned long long *delta = v;
283 
284 	list_for_each_entry(kvm, &vm_list, vm_list) {
285 		kvm_for_each_vcpu(i, vcpu, kvm) {
286 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
287 			if (i == 0) {
288 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
289 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
290 			}
291 			if (vcpu->arch.cputm_enabled)
292 				vcpu->arch.cputm_start += *delta;
293 			if (vcpu->arch.vsie_block)
294 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
295 						   *delta);
296 		}
297 	}
298 	return NOTIFY_OK;
299 }
300 
301 static struct notifier_block kvm_clock_notifier = {
302 	.notifier_call = kvm_clock_sync,
303 };
304 
305 int kvm_arch_hardware_setup(void *opaque)
306 {
307 	gmap_notifier.notifier_call = kvm_gmap_notifier;
308 	gmap_register_pte_notifier(&gmap_notifier);
309 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
310 	gmap_register_pte_notifier(&vsie_gmap_notifier);
311 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
312 				       &kvm_clock_notifier);
313 	return 0;
314 }
315 
316 void kvm_arch_hardware_unsetup(void)
317 {
318 	gmap_unregister_pte_notifier(&gmap_notifier);
319 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
320 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
321 					 &kvm_clock_notifier);
322 }
323 
324 static void allow_cpu_feat(unsigned long nr)
325 {
326 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
327 }
328 
329 static inline int plo_test_bit(unsigned char nr)
330 {
331 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
332 	int cc;
333 
334 	asm volatile(
335 		/* Parameter registers are ignored for "test bit" */
336 		"	plo	0,0,0,0(0)\n"
337 		"	ipm	%0\n"
338 		"	srl	%0,28\n"
339 		: "=d" (cc)
340 		: "d" (r0)
341 		: "cc");
342 	return cc == 0;
343 }
344 
345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347 	register unsigned long r0 asm("0") = 0;	/* query function */
348 	register unsigned long r1 asm("1") = (unsigned long) query;
349 
350 	asm volatile(
351 		/* Parameter regs are ignored */
352 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
353 		:
354 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
355 		: "cc", "memory");
356 }
357 
358 #define INSN_SORTL 0xb938
359 #define INSN_DFLTCC 0xb939
360 
361 static void kvm_s390_cpu_feat_init(void)
362 {
363 	int i;
364 
365 	for (i = 0; i < 256; ++i) {
366 		if (plo_test_bit(i))
367 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
368 	}
369 
370 	if (test_facility(28)) /* TOD-clock steering */
371 		ptff(kvm_s390_available_subfunc.ptff,
372 		     sizeof(kvm_s390_available_subfunc.ptff),
373 		     PTFF_QAF);
374 
375 	if (test_facility(17)) { /* MSA */
376 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
377 			      kvm_s390_available_subfunc.kmac);
378 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
379 			      kvm_s390_available_subfunc.kmc);
380 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
381 			      kvm_s390_available_subfunc.km);
382 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
383 			      kvm_s390_available_subfunc.kimd);
384 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
385 			      kvm_s390_available_subfunc.klmd);
386 	}
387 	if (test_facility(76)) /* MSA3 */
388 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
389 			      kvm_s390_available_subfunc.pckmo);
390 	if (test_facility(77)) { /* MSA4 */
391 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
392 			      kvm_s390_available_subfunc.kmctr);
393 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
394 			      kvm_s390_available_subfunc.kmf);
395 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmo);
397 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.pcc);
399 	}
400 	if (test_facility(57)) /* MSA5 */
401 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.ppno);
403 
404 	if (test_facility(146)) /* MSA8 */
405 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.kma);
407 
408 	if (test_facility(155)) /* MSA9 */
409 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
410 			      kvm_s390_available_subfunc.kdsa);
411 
412 	if (test_facility(150)) /* SORTL */
413 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
414 
415 	if (test_facility(151)) /* DFLTCC */
416 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
417 
418 	if (MACHINE_HAS_ESOP)
419 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
420 	/*
421 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
422 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
423 	 */
424 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
425 	    !test_facility(3) || !nested)
426 		return;
427 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
428 	if (sclp.has_64bscao)
429 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
430 	if (sclp.has_siif)
431 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
432 	if (sclp.has_gpere)
433 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
434 	if (sclp.has_gsls)
435 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
436 	if (sclp.has_ib)
437 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
438 	if (sclp.has_cei)
439 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
440 	if (sclp.has_ibs)
441 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
442 	if (sclp.has_kss)
443 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
444 	/*
445 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
446 	 * all skey handling functions read/set the skey from the PGSTE
447 	 * instead of the real storage key.
448 	 *
449 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
450 	 * pages being detected as preserved although they are resident.
451 	 *
452 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
453 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
454 	 *
455 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
456 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
457 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
458 	 *
459 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
460 	 * cannot easily shadow the SCA because of the ipte lock.
461 	 */
462 }
463 
464 int kvm_arch_init(void *opaque)
465 {
466 	int rc = -ENOMEM;
467 
468 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
469 	if (!kvm_s390_dbf)
470 		return -ENOMEM;
471 
472 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
473 	if (!kvm_s390_dbf_uv)
474 		goto out;
475 
476 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
477 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
478 		goto out;
479 
480 	kvm_s390_cpu_feat_init();
481 
482 	/* Register floating interrupt controller interface. */
483 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
484 	if (rc) {
485 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
486 		goto out;
487 	}
488 
489 	rc = kvm_s390_gib_init(GAL_ISC);
490 	if (rc)
491 		goto out;
492 
493 	return 0;
494 
495 out:
496 	kvm_arch_exit();
497 	return rc;
498 }
499 
500 void kvm_arch_exit(void)
501 {
502 	kvm_s390_gib_destroy();
503 	debug_unregister(kvm_s390_dbf);
504 	debug_unregister(kvm_s390_dbf_uv);
505 }
506 
507 /* Section: device related */
508 long kvm_arch_dev_ioctl(struct file *filp,
509 			unsigned int ioctl, unsigned long arg)
510 {
511 	if (ioctl == KVM_S390_ENABLE_SIE)
512 		return s390_enable_sie();
513 	return -EINVAL;
514 }
515 
516 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
517 {
518 	int r;
519 
520 	switch (ext) {
521 	case KVM_CAP_S390_PSW:
522 	case KVM_CAP_S390_GMAP:
523 	case KVM_CAP_SYNC_MMU:
524 #ifdef CONFIG_KVM_S390_UCONTROL
525 	case KVM_CAP_S390_UCONTROL:
526 #endif
527 	case KVM_CAP_ASYNC_PF:
528 	case KVM_CAP_SYNC_REGS:
529 	case KVM_CAP_ONE_REG:
530 	case KVM_CAP_ENABLE_CAP:
531 	case KVM_CAP_S390_CSS_SUPPORT:
532 	case KVM_CAP_IOEVENTFD:
533 	case KVM_CAP_DEVICE_CTRL:
534 	case KVM_CAP_S390_IRQCHIP:
535 	case KVM_CAP_VM_ATTRIBUTES:
536 	case KVM_CAP_MP_STATE:
537 	case KVM_CAP_IMMEDIATE_EXIT:
538 	case KVM_CAP_S390_INJECT_IRQ:
539 	case KVM_CAP_S390_USER_SIGP:
540 	case KVM_CAP_S390_USER_STSI:
541 	case KVM_CAP_S390_SKEYS:
542 	case KVM_CAP_S390_IRQ_STATE:
543 	case KVM_CAP_S390_USER_INSTR0:
544 	case KVM_CAP_S390_CMMA_MIGRATION:
545 	case KVM_CAP_S390_AIS:
546 	case KVM_CAP_S390_AIS_MIGRATION:
547 	case KVM_CAP_S390_VCPU_RESETS:
548 		r = 1;
549 		break;
550 	case KVM_CAP_S390_HPAGE_1M:
551 		r = 0;
552 		if (hpage && !kvm_is_ucontrol(kvm))
553 			r = 1;
554 		break;
555 	case KVM_CAP_S390_MEM_OP:
556 		r = MEM_OP_MAX_SIZE;
557 		break;
558 	case KVM_CAP_NR_VCPUS:
559 	case KVM_CAP_MAX_VCPUS:
560 	case KVM_CAP_MAX_VCPU_ID:
561 		r = KVM_S390_BSCA_CPU_SLOTS;
562 		if (!kvm_s390_use_sca_entries())
563 			r = KVM_MAX_VCPUS;
564 		else if (sclp.has_esca && sclp.has_64bscao)
565 			r = KVM_S390_ESCA_CPU_SLOTS;
566 		break;
567 	case KVM_CAP_S390_COW:
568 		r = MACHINE_HAS_ESOP;
569 		break;
570 	case KVM_CAP_S390_VECTOR_REGISTERS:
571 		r = MACHINE_HAS_VX;
572 		break;
573 	case KVM_CAP_S390_RI:
574 		r = test_facility(64);
575 		break;
576 	case KVM_CAP_S390_GS:
577 		r = test_facility(133);
578 		break;
579 	case KVM_CAP_S390_BPB:
580 		r = test_facility(82);
581 		break;
582 	case KVM_CAP_S390_PROTECTED:
583 		r = is_prot_virt_host();
584 		break;
585 	default:
586 		r = 0;
587 	}
588 	return r;
589 }
590 
591 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
592 {
593 	int i;
594 	gfn_t cur_gfn, last_gfn;
595 	unsigned long gaddr, vmaddr;
596 	struct gmap *gmap = kvm->arch.gmap;
597 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
598 
599 	/* Loop over all guest segments */
600 	cur_gfn = memslot->base_gfn;
601 	last_gfn = memslot->base_gfn + memslot->npages;
602 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
603 		gaddr = gfn_to_gpa(cur_gfn);
604 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
605 		if (kvm_is_error_hva(vmaddr))
606 			continue;
607 
608 		bitmap_zero(bitmap, _PAGE_ENTRIES);
609 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
610 		for (i = 0; i < _PAGE_ENTRIES; i++) {
611 			if (test_bit(i, bitmap))
612 				mark_page_dirty(kvm, cur_gfn + i);
613 		}
614 
615 		if (fatal_signal_pending(current))
616 			return;
617 		cond_resched();
618 	}
619 }
620 
621 /* Section: vm related */
622 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
623 
624 /*
625  * Get (and clear) the dirty memory log for a memory slot.
626  */
627 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
628 			       struct kvm_dirty_log *log)
629 {
630 	int r;
631 	unsigned long n;
632 	struct kvm_memory_slot *memslot;
633 	int is_dirty;
634 
635 	if (kvm_is_ucontrol(kvm))
636 		return -EINVAL;
637 
638 	mutex_lock(&kvm->slots_lock);
639 
640 	r = -EINVAL;
641 	if (log->slot >= KVM_USER_MEM_SLOTS)
642 		goto out;
643 
644 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
645 	if (r)
646 		goto out;
647 
648 	/* Clear the dirty log */
649 	if (is_dirty) {
650 		n = kvm_dirty_bitmap_bytes(memslot);
651 		memset(memslot->dirty_bitmap, 0, n);
652 	}
653 	r = 0;
654 out:
655 	mutex_unlock(&kvm->slots_lock);
656 	return r;
657 }
658 
659 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
660 {
661 	unsigned int i;
662 	struct kvm_vcpu *vcpu;
663 
664 	kvm_for_each_vcpu(i, vcpu, kvm) {
665 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
666 	}
667 }
668 
669 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
670 {
671 	int r;
672 
673 	if (cap->flags)
674 		return -EINVAL;
675 
676 	switch (cap->cap) {
677 	case KVM_CAP_S390_IRQCHIP:
678 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
679 		kvm->arch.use_irqchip = 1;
680 		r = 0;
681 		break;
682 	case KVM_CAP_S390_USER_SIGP:
683 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
684 		kvm->arch.user_sigp = 1;
685 		r = 0;
686 		break;
687 	case KVM_CAP_S390_VECTOR_REGISTERS:
688 		mutex_lock(&kvm->lock);
689 		if (kvm->created_vcpus) {
690 			r = -EBUSY;
691 		} else if (MACHINE_HAS_VX) {
692 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
693 			set_kvm_facility(kvm->arch.model.fac_list, 129);
694 			if (test_facility(134)) {
695 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
696 				set_kvm_facility(kvm->arch.model.fac_list, 134);
697 			}
698 			if (test_facility(135)) {
699 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
700 				set_kvm_facility(kvm->arch.model.fac_list, 135);
701 			}
702 			if (test_facility(148)) {
703 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
704 				set_kvm_facility(kvm->arch.model.fac_list, 148);
705 			}
706 			if (test_facility(152)) {
707 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
708 				set_kvm_facility(kvm->arch.model.fac_list, 152);
709 			}
710 			r = 0;
711 		} else
712 			r = -EINVAL;
713 		mutex_unlock(&kvm->lock);
714 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
715 			 r ? "(not available)" : "(success)");
716 		break;
717 	case KVM_CAP_S390_RI:
718 		r = -EINVAL;
719 		mutex_lock(&kvm->lock);
720 		if (kvm->created_vcpus) {
721 			r = -EBUSY;
722 		} else if (test_facility(64)) {
723 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
724 			set_kvm_facility(kvm->arch.model.fac_list, 64);
725 			r = 0;
726 		}
727 		mutex_unlock(&kvm->lock);
728 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
729 			 r ? "(not available)" : "(success)");
730 		break;
731 	case KVM_CAP_S390_AIS:
732 		mutex_lock(&kvm->lock);
733 		if (kvm->created_vcpus) {
734 			r = -EBUSY;
735 		} else {
736 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
737 			set_kvm_facility(kvm->arch.model.fac_list, 72);
738 			r = 0;
739 		}
740 		mutex_unlock(&kvm->lock);
741 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
742 			 r ? "(not available)" : "(success)");
743 		break;
744 	case KVM_CAP_S390_GS:
745 		r = -EINVAL;
746 		mutex_lock(&kvm->lock);
747 		if (kvm->created_vcpus) {
748 			r = -EBUSY;
749 		} else if (test_facility(133)) {
750 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
751 			set_kvm_facility(kvm->arch.model.fac_list, 133);
752 			r = 0;
753 		}
754 		mutex_unlock(&kvm->lock);
755 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
756 			 r ? "(not available)" : "(success)");
757 		break;
758 	case KVM_CAP_S390_HPAGE_1M:
759 		mutex_lock(&kvm->lock);
760 		if (kvm->created_vcpus)
761 			r = -EBUSY;
762 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
763 			r = -EINVAL;
764 		else {
765 			r = 0;
766 			down_write(&kvm->mm->mmap_sem);
767 			kvm->mm->context.allow_gmap_hpage_1m = 1;
768 			up_write(&kvm->mm->mmap_sem);
769 			/*
770 			 * We might have to create fake 4k page
771 			 * tables. To avoid that the hardware works on
772 			 * stale PGSTEs, we emulate these instructions.
773 			 */
774 			kvm->arch.use_skf = 0;
775 			kvm->arch.use_pfmfi = 0;
776 		}
777 		mutex_unlock(&kvm->lock);
778 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
779 			 r ? "(not available)" : "(success)");
780 		break;
781 	case KVM_CAP_S390_USER_STSI:
782 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
783 		kvm->arch.user_stsi = 1;
784 		r = 0;
785 		break;
786 	case KVM_CAP_S390_USER_INSTR0:
787 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
788 		kvm->arch.user_instr0 = 1;
789 		icpt_operexc_on_all_vcpus(kvm);
790 		r = 0;
791 		break;
792 	default:
793 		r = -EINVAL;
794 		break;
795 	}
796 	return r;
797 }
798 
799 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
800 {
801 	int ret;
802 
803 	switch (attr->attr) {
804 	case KVM_S390_VM_MEM_LIMIT_SIZE:
805 		ret = 0;
806 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
807 			 kvm->arch.mem_limit);
808 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
809 			ret = -EFAULT;
810 		break;
811 	default:
812 		ret = -ENXIO;
813 		break;
814 	}
815 	return ret;
816 }
817 
818 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
819 {
820 	int ret;
821 	unsigned int idx;
822 	switch (attr->attr) {
823 	case KVM_S390_VM_MEM_ENABLE_CMMA:
824 		ret = -ENXIO;
825 		if (!sclp.has_cmma)
826 			break;
827 
828 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
829 		mutex_lock(&kvm->lock);
830 		if (kvm->created_vcpus)
831 			ret = -EBUSY;
832 		else if (kvm->mm->context.allow_gmap_hpage_1m)
833 			ret = -EINVAL;
834 		else {
835 			kvm->arch.use_cmma = 1;
836 			/* Not compatible with cmma. */
837 			kvm->arch.use_pfmfi = 0;
838 			ret = 0;
839 		}
840 		mutex_unlock(&kvm->lock);
841 		break;
842 	case KVM_S390_VM_MEM_CLR_CMMA:
843 		ret = -ENXIO;
844 		if (!sclp.has_cmma)
845 			break;
846 		ret = -EINVAL;
847 		if (!kvm->arch.use_cmma)
848 			break;
849 
850 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
851 		mutex_lock(&kvm->lock);
852 		idx = srcu_read_lock(&kvm->srcu);
853 		s390_reset_cmma(kvm->arch.gmap->mm);
854 		srcu_read_unlock(&kvm->srcu, idx);
855 		mutex_unlock(&kvm->lock);
856 		ret = 0;
857 		break;
858 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
859 		unsigned long new_limit;
860 
861 		if (kvm_is_ucontrol(kvm))
862 			return -EINVAL;
863 
864 		if (get_user(new_limit, (u64 __user *)attr->addr))
865 			return -EFAULT;
866 
867 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
868 		    new_limit > kvm->arch.mem_limit)
869 			return -E2BIG;
870 
871 		if (!new_limit)
872 			return -EINVAL;
873 
874 		/* gmap_create takes last usable address */
875 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
876 			new_limit -= 1;
877 
878 		ret = -EBUSY;
879 		mutex_lock(&kvm->lock);
880 		if (!kvm->created_vcpus) {
881 			/* gmap_create will round the limit up */
882 			struct gmap *new = gmap_create(current->mm, new_limit);
883 
884 			if (!new) {
885 				ret = -ENOMEM;
886 			} else {
887 				gmap_remove(kvm->arch.gmap);
888 				new->private = kvm;
889 				kvm->arch.gmap = new;
890 				ret = 0;
891 			}
892 		}
893 		mutex_unlock(&kvm->lock);
894 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
895 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
896 			 (void *) kvm->arch.gmap->asce);
897 		break;
898 	}
899 	default:
900 		ret = -ENXIO;
901 		break;
902 	}
903 	return ret;
904 }
905 
906 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
907 
908 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
909 {
910 	struct kvm_vcpu *vcpu;
911 	int i;
912 
913 	kvm_s390_vcpu_block_all(kvm);
914 
915 	kvm_for_each_vcpu(i, vcpu, kvm) {
916 		kvm_s390_vcpu_crypto_setup(vcpu);
917 		/* recreate the shadow crycb by leaving the VSIE handler */
918 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
919 	}
920 
921 	kvm_s390_vcpu_unblock_all(kvm);
922 }
923 
924 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
925 {
926 	mutex_lock(&kvm->lock);
927 	switch (attr->attr) {
928 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
929 		if (!test_kvm_facility(kvm, 76)) {
930 			mutex_unlock(&kvm->lock);
931 			return -EINVAL;
932 		}
933 		get_random_bytes(
934 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
935 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
936 		kvm->arch.crypto.aes_kw = 1;
937 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
938 		break;
939 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
940 		if (!test_kvm_facility(kvm, 76)) {
941 			mutex_unlock(&kvm->lock);
942 			return -EINVAL;
943 		}
944 		get_random_bytes(
945 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
946 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
947 		kvm->arch.crypto.dea_kw = 1;
948 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
949 		break;
950 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
951 		if (!test_kvm_facility(kvm, 76)) {
952 			mutex_unlock(&kvm->lock);
953 			return -EINVAL;
954 		}
955 		kvm->arch.crypto.aes_kw = 0;
956 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
957 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
958 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
959 		break;
960 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
961 		if (!test_kvm_facility(kvm, 76)) {
962 			mutex_unlock(&kvm->lock);
963 			return -EINVAL;
964 		}
965 		kvm->arch.crypto.dea_kw = 0;
966 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
967 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
968 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
969 		break;
970 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
971 		if (!ap_instructions_available()) {
972 			mutex_unlock(&kvm->lock);
973 			return -EOPNOTSUPP;
974 		}
975 		kvm->arch.crypto.apie = 1;
976 		break;
977 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
978 		if (!ap_instructions_available()) {
979 			mutex_unlock(&kvm->lock);
980 			return -EOPNOTSUPP;
981 		}
982 		kvm->arch.crypto.apie = 0;
983 		break;
984 	default:
985 		mutex_unlock(&kvm->lock);
986 		return -ENXIO;
987 	}
988 
989 	kvm_s390_vcpu_crypto_reset_all(kvm);
990 	mutex_unlock(&kvm->lock);
991 	return 0;
992 }
993 
994 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
995 {
996 	int cx;
997 	struct kvm_vcpu *vcpu;
998 
999 	kvm_for_each_vcpu(cx, vcpu, kvm)
1000 		kvm_s390_sync_request(req, vcpu);
1001 }
1002 
1003 /*
1004  * Must be called with kvm->srcu held to avoid races on memslots, and with
1005  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1006  */
1007 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1008 {
1009 	struct kvm_memory_slot *ms;
1010 	struct kvm_memslots *slots;
1011 	unsigned long ram_pages = 0;
1012 	int slotnr;
1013 
1014 	/* migration mode already enabled */
1015 	if (kvm->arch.migration_mode)
1016 		return 0;
1017 	slots = kvm_memslots(kvm);
1018 	if (!slots || !slots->used_slots)
1019 		return -EINVAL;
1020 
1021 	if (!kvm->arch.use_cmma) {
1022 		kvm->arch.migration_mode = 1;
1023 		return 0;
1024 	}
1025 	/* mark all the pages in active slots as dirty */
1026 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1027 		ms = slots->memslots + slotnr;
1028 		if (!ms->dirty_bitmap)
1029 			return -EINVAL;
1030 		/*
1031 		 * The second half of the bitmap is only used on x86,
1032 		 * and would be wasted otherwise, so we put it to good
1033 		 * use here to keep track of the state of the storage
1034 		 * attributes.
1035 		 */
1036 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1037 		ram_pages += ms->npages;
1038 	}
1039 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1040 	kvm->arch.migration_mode = 1;
1041 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1042 	return 0;
1043 }
1044 
1045 /*
1046  * Must be called with kvm->slots_lock to avoid races with ourselves and
1047  * kvm_s390_vm_start_migration.
1048  */
1049 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1050 {
1051 	/* migration mode already disabled */
1052 	if (!kvm->arch.migration_mode)
1053 		return 0;
1054 	kvm->arch.migration_mode = 0;
1055 	if (kvm->arch.use_cmma)
1056 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1057 	return 0;
1058 }
1059 
1060 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1061 				     struct kvm_device_attr *attr)
1062 {
1063 	int res = -ENXIO;
1064 
1065 	mutex_lock(&kvm->slots_lock);
1066 	switch (attr->attr) {
1067 	case KVM_S390_VM_MIGRATION_START:
1068 		res = kvm_s390_vm_start_migration(kvm);
1069 		break;
1070 	case KVM_S390_VM_MIGRATION_STOP:
1071 		res = kvm_s390_vm_stop_migration(kvm);
1072 		break;
1073 	default:
1074 		break;
1075 	}
1076 	mutex_unlock(&kvm->slots_lock);
1077 
1078 	return res;
1079 }
1080 
1081 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1082 				     struct kvm_device_attr *attr)
1083 {
1084 	u64 mig = kvm->arch.migration_mode;
1085 
1086 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1087 		return -ENXIO;
1088 
1089 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1090 		return -EFAULT;
1091 	return 0;
1092 }
1093 
1094 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1095 {
1096 	struct kvm_s390_vm_tod_clock gtod;
1097 
1098 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1099 		return -EFAULT;
1100 
1101 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1102 		return -EINVAL;
1103 	kvm_s390_set_tod_clock(kvm, &gtod);
1104 
1105 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1106 		gtod.epoch_idx, gtod.tod);
1107 
1108 	return 0;
1109 }
1110 
1111 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1112 {
1113 	u8 gtod_high;
1114 
1115 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1116 					   sizeof(gtod_high)))
1117 		return -EFAULT;
1118 
1119 	if (gtod_high != 0)
1120 		return -EINVAL;
1121 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1122 
1123 	return 0;
1124 }
1125 
1126 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1127 {
1128 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1129 
1130 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1131 			   sizeof(gtod.tod)))
1132 		return -EFAULT;
1133 
1134 	kvm_s390_set_tod_clock(kvm, &gtod);
1135 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1136 	return 0;
1137 }
1138 
1139 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141 	int ret;
1142 
1143 	if (attr->flags)
1144 		return -EINVAL;
1145 
1146 	switch (attr->attr) {
1147 	case KVM_S390_VM_TOD_EXT:
1148 		ret = kvm_s390_set_tod_ext(kvm, attr);
1149 		break;
1150 	case KVM_S390_VM_TOD_HIGH:
1151 		ret = kvm_s390_set_tod_high(kvm, attr);
1152 		break;
1153 	case KVM_S390_VM_TOD_LOW:
1154 		ret = kvm_s390_set_tod_low(kvm, attr);
1155 		break;
1156 	default:
1157 		ret = -ENXIO;
1158 		break;
1159 	}
1160 	return ret;
1161 }
1162 
1163 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1164 				   struct kvm_s390_vm_tod_clock *gtod)
1165 {
1166 	struct kvm_s390_tod_clock_ext htod;
1167 
1168 	preempt_disable();
1169 
1170 	get_tod_clock_ext((char *)&htod);
1171 
1172 	gtod->tod = htod.tod + kvm->arch.epoch;
1173 	gtod->epoch_idx = 0;
1174 	if (test_kvm_facility(kvm, 139)) {
1175 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1176 		if (gtod->tod < htod.tod)
1177 			gtod->epoch_idx += 1;
1178 	}
1179 
1180 	preempt_enable();
1181 }
1182 
1183 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1184 {
1185 	struct kvm_s390_vm_tod_clock gtod;
1186 
1187 	memset(&gtod, 0, sizeof(gtod));
1188 	kvm_s390_get_tod_clock(kvm, &gtod);
1189 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1190 		return -EFAULT;
1191 
1192 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1193 		gtod.epoch_idx, gtod.tod);
1194 	return 0;
1195 }
1196 
1197 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1198 {
1199 	u8 gtod_high = 0;
1200 
1201 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1202 					 sizeof(gtod_high)))
1203 		return -EFAULT;
1204 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1205 
1206 	return 0;
1207 }
1208 
1209 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1210 {
1211 	u64 gtod;
1212 
1213 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1214 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1215 		return -EFAULT;
1216 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1217 
1218 	return 0;
1219 }
1220 
1221 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1222 {
1223 	int ret;
1224 
1225 	if (attr->flags)
1226 		return -EINVAL;
1227 
1228 	switch (attr->attr) {
1229 	case KVM_S390_VM_TOD_EXT:
1230 		ret = kvm_s390_get_tod_ext(kvm, attr);
1231 		break;
1232 	case KVM_S390_VM_TOD_HIGH:
1233 		ret = kvm_s390_get_tod_high(kvm, attr);
1234 		break;
1235 	case KVM_S390_VM_TOD_LOW:
1236 		ret = kvm_s390_get_tod_low(kvm, attr);
1237 		break;
1238 	default:
1239 		ret = -ENXIO;
1240 		break;
1241 	}
1242 	return ret;
1243 }
1244 
1245 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1246 {
1247 	struct kvm_s390_vm_cpu_processor *proc;
1248 	u16 lowest_ibc, unblocked_ibc;
1249 	int ret = 0;
1250 
1251 	mutex_lock(&kvm->lock);
1252 	if (kvm->created_vcpus) {
1253 		ret = -EBUSY;
1254 		goto out;
1255 	}
1256 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1257 	if (!proc) {
1258 		ret = -ENOMEM;
1259 		goto out;
1260 	}
1261 	if (!copy_from_user(proc, (void __user *)attr->addr,
1262 			    sizeof(*proc))) {
1263 		kvm->arch.model.cpuid = proc->cpuid;
1264 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1265 		unblocked_ibc = sclp.ibc & 0xfff;
1266 		if (lowest_ibc && proc->ibc) {
1267 			if (proc->ibc > unblocked_ibc)
1268 				kvm->arch.model.ibc = unblocked_ibc;
1269 			else if (proc->ibc < lowest_ibc)
1270 				kvm->arch.model.ibc = lowest_ibc;
1271 			else
1272 				kvm->arch.model.ibc = proc->ibc;
1273 		}
1274 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1275 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1276 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1277 			 kvm->arch.model.ibc,
1278 			 kvm->arch.model.cpuid);
1279 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1280 			 kvm->arch.model.fac_list[0],
1281 			 kvm->arch.model.fac_list[1],
1282 			 kvm->arch.model.fac_list[2]);
1283 	} else
1284 		ret = -EFAULT;
1285 	kfree(proc);
1286 out:
1287 	mutex_unlock(&kvm->lock);
1288 	return ret;
1289 }
1290 
1291 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1292 				       struct kvm_device_attr *attr)
1293 {
1294 	struct kvm_s390_vm_cpu_feat data;
1295 
1296 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1297 		return -EFAULT;
1298 	if (!bitmap_subset((unsigned long *) data.feat,
1299 			   kvm_s390_available_cpu_feat,
1300 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1301 		return -EINVAL;
1302 
1303 	mutex_lock(&kvm->lock);
1304 	if (kvm->created_vcpus) {
1305 		mutex_unlock(&kvm->lock);
1306 		return -EBUSY;
1307 	}
1308 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1309 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1310 	mutex_unlock(&kvm->lock);
1311 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1312 			 data.feat[0],
1313 			 data.feat[1],
1314 			 data.feat[2]);
1315 	return 0;
1316 }
1317 
1318 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1319 					  struct kvm_device_attr *attr)
1320 {
1321 	mutex_lock(&kvm->lock);
1322 	if (kvm->created_vcpus) {
1323 		mutex_unlock(&kvm->lock);
1324 		return -EBUSY;
1325 	}
1326 
1327 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1328 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1329 		mutex_unlock(&kvm->lock);
1330 		return -EFAULT;
1331 	}
1332 	mutex_unlock(&kvm->lock);
1333 
1334 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1335 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1336 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1339 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1342 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1345 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1348 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1351 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1354 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1357 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1360 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1363 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1366 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1386 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1391 
1392 	return 0;
1393 }
1394 
1395 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1396 {
1397 	int ret = -ENXIO;
1398 
1399 	switch (attr->attr) {
1400 	case KVM_S390_VM_CPU_PROCESSOR:
1401 		ret = kvm_s390_set_processor(kvm, attr);
1402 		break;
1403 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1404 		ret = kvm_s390_set_processor_feat(kvm, attr);
1405 		break;
1406 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1407 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1408 		break;
1409 	}
1410 	return ret;
1411 }
1412 
1413 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1414 {
1415 	struct kvm_s390_vm_cpu_processor *proc;
1416 	int ret = 0;
1417 
1418 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1419 	if (!proc) {
1420 		ret = -ENOMEM;
1421 		goto out;
1422 	}
1423 	proc->cpuid = kvm->arch.model.cpuid;
1424 	proc->ibc = kvm->arch.model.ibc;
1425 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1426 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1427 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1428 		 kvm->arch.model.ibc,
1429 		 kvm->arch.model.cpuid);
1430 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1431 		 kvm->arch.model.fac_list[0],
1432 		 kvm->arch.model.fac_list[1],
1433 		 kvm->arch.model.fac_list[2]);
1434 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1435 		ret = -EFAULT;
1436 	kfree(proc);
1437 out:
1438 	return ret;
1439 }
1440 
1441 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1442 {
1443 	struct kvm_s390_vm_cpu_machine *mach;
1444 	int ret = 0;
1445 
1446 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1447 	if (!mach) {
1448 		ret = -ENOMEM;
1449 		goto out;
1450 	}
1451 	get_cpu_id((struct cpuid *) &mach->cpuid);
1452 	mach->ibc = sclp.ibc;
1453 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1454 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1455 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1456 	       sizeof(S390_lowcore.stfle_fac_list));
1457 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1458 		 kvm->arch.model.ibc,
1459 		 kvm->arch.model.cpuid);
1460 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1461 		 mach->fac_mask[0],
1462 		 mach->fac_mask[1],
1463 		 mach->fac_mask[2]);
1464 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1465 		 mach->fac_list[0],
1466 		 mach->fac_list[1],
1467 		 mach->fac_list[2]);
1468 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1469 		ret = -EFAULT;
1470 	kfree(mach);
1471 out:
1472 	return ret;
1473 }
1474 
1475 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1476 				       struct kvm_device_attr *attr)
1477 {
1478 	struct kvm_s390_vm_cpu_feat data;
1479 
1480 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1481 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1482 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1483 		return -EFAULT;
1484 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1485 			 data.feat[0],
1486 			 data.feat[1],
1487 			 data.feat[2]);
1488 	return 0;
1489 }
1490 
1491 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1492 				     struct kvm_device_attr *attr)
1493 {
1494 	struct kvm_s390_vm_cpu_feat data;
1495 
1496 	bitmap_copy((unsigned long *) data.feat,
1497 		    kvm_s390_available_cpu_feat,
1498 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1499 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1500 		return -EFAULT;
1501 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1502 			 data.feat[0],
1503 			 data.feat[1],
1504 			 data.feat[2]);
1505 	return 0;
1506 }
1507 
1508 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1509 					  struct kvm_device_attr *attr)
1510 {
1511 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1512 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1513 		return -EFAULT;
1514 
1515 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1516 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1517 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1520 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1523 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1526 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1529 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1532 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1535 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1538 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1541 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1544 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1547 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1567 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1572 
1573 	return 0;
1574 }
1575 
1576 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1577 					struct kvm_device_attr *attr)
1578 {
1579 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1580 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1581 		return -EFAULT;
1582 
1583 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1585 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1588 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1591 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1594 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1597 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1600 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1603 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1606 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1609 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1612 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1615 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1635 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1640 
1641 	return 0;
1642 }
1643 
1644 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1645 {
1646 	int ret = -ENXIO;
1647 
1648 	switch (attr->attr) {
1649 	case KVM_S390_VM_CPU_PROCESSOR:
1650 		ret = kvm_s390_get_processor(kvm, attr);
1651 		break;
1652 	case KVM_S390_VM_CPU_MACHINE:
1653 		ret = kvm_s390_get_machine(kvm, attr);
1654 		break;
1655 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1656 		ret = kvm_s390_get_processor_feat(kvm, attr);
1657 		break;
1658 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1659 		ret = kvm_s390_get_machine_feat(kvm, attr);
1660 		break;
1661 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1662 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1663 		break;
1664 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1665 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1666 		break;
1667 	}
1668 	return ret;
1669 }
1670 
1671 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673 	int ret;
1674 
1675 	switch (attr->group) {
1676 	case KVM_S390_VM_MEM_CTRL:
1677 		ret = kvm_s390_set_mem_control(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_TOD:
1680 		ret = kvm_s390_set_tod(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CPU_MODEL:
1683 		ret = kvm_s390_set_cpu_model(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_CRYPTO:
1686 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_MIGRATION:
1689 		ret = kvm_s390_vm_set_migration(kvm, attr);
1690 		break;
1691 	default:
1692 		ret = -ENXIO;
1693 		break;
1694 	}
1695 
1696 	return ret;
1697 }
1698 
1699 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1700 {
1701 	int ret;
1702 
1703 	switch (attr->group) {
1704 	case KVM_S390_VM_MEM_CTRL:
1705 		ret = kvm_s390_get_mem_control(kvm, attr);
1706 		break;
1707 	case KVM_S390_VM_TOD:
1708 		ret = kvm_s390_get_tod(kvm, attr);
1709 		break;
1710 	case KVM_S390_VM_CPU_MODEL:
1711 		ret = kvm_s390_get_cpu_model(kvm, attr);
1712 		break;
1713 	case KVM_S390_VM_MIGRATION:
1714 		ret = kvm_s390_vm_get_migration(kvm, attr);
1715 		break;
1716 	default:
1717 		ret = -ENXIO;
1718 		break;
1719 	}
1720 
1721 	return ret;
1722 }
1723 
1724 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1725 {
1726 	int ret;
1727 
1728 	switch (attr->group) {
1729 	case KVM_S390_VM_MEM_CTRL:
1730 		switch (attr->attr) {
1731 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1732 		case KVM_S390_VM_MEM_CLR_CMMA:
1733 			ret = sclp.has_cmma ? 0 : -ENXIO;
1734 			break;
1735 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1736 			ret = 0;
1737 			break;
1738 		default:
1739 			ret = -ENXIO;
1740 			break;
1741 		}
1742 		break;
1743 	case KVM_S390_VM_TOD:
1744 		switch (attr->attr) {
1745 		case KVM_S390_VM_TOD_LOW:
1746 		case KVM_S390_VM_TOD_HIGH:
1747 			ret = 0;
1748 			break;
1749 		default:
1750 			ret = -ENXIO;
1751 			break;
1752 		}
1753 		break;
1754 	case KVM_S390_VM_CPU_MODEL:
1755 		switch (attr->attr) {
1756 		case KVM_S390_VM_CPU_PROCESSOR:
1757 		case KVM_S390_VM_CPU_MACHINE:
1758 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1759 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1760 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1761 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1762 			ret = 0;
1763 			break;
1764 		default:
1765 			ret = -ENXIO;
1766 			break;
1767 		}
1768 		break;
1769 	case KVM_S390_VM_CRYPTO:
1770 		switch (attr->attr) {
1771 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1772 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1773 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1774 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1775 			ret = 0;
1776 			break;
1777 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1778 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1779 			ret = ap_instructions_available() ? 0 : -ENXIO;
1780 			break;
1781 		default:
1782 			ret = -ENXIO;
1783 			break;
1784 		}
1785 		break;
1786 	case KVM_S390_VM_MIGRATION:
1787 		ret = 0;
1788 		break;
1789 	default:
1790 		ret = -ENXIO;
1791 		break;
1792 	}
1793 
1794 	return ret;
1795 }
1796 
1797 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1798 {
1799 	uint8_t *keys;
1800 	uint64_t hva;
1801 	int srcu_idx, i, r = 0;
1802 
1803 	if (args->flags != 0)
1804 		return -EINVAL;
1805 
1806 	/* Is this guest using storage keys? */
1807 	if (!mm_uses_skeys(current->mm))
1808 		return KVM_S390_GET_SKEYS_NONE;
1809 
1810 	/* Enforce sane limit on memory allocation */
1811 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1812 		return -EINVAL;
1813 
1814 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1815 	if (!keys)
1816 		return -ENOMEM;
1817 
1818 	down_read(&current->mm->mmap_sem);
1819 	srcu_idx = srcu_read_lock(&kvm->srcu);
1820 	for (i = 0; i < args->count; i++) {
1821 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1822 		if (kvm_is_error_hva(hva)) {
1823 			r = -EFAULT;
1824 			break;
1825 		}
1826 
1827 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1828 		if (r)
1829 			break;
1830 	}
1831 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1832 	up_read(&current->mm->mmap_sem);
1833 
1834 	if (!r) {
1835 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1836 				 sizeof(uint8_t) * args->count);
1837 		if (r)
1838 			r = -EFAULT;
1839 	}
1840 
1841 	kvfree(keys);
1842 	return r;
1843 }
1844 
1845 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1846 {
1847 	uint8_t *keys;
1848 	uint64_t hva;
1849 	int srcu_idx, i, r = 0;
1850 	bool unlocked;
1851 
1852 	if (args->flags != 0)
1853 		return -EINVAL;
1854 
1855 	/* Enforce sane limit on memory allocation */
1856 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1857 		return -EINVAL;
1858 
1859 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1860 	if (!keys)
1861 		return -ENOMEM;
1862 
1863 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1864 			   sizeof(uint8_t) * args->count);
1865 	if (r) {
1866 		r = -EFAULT;
1867 		goto out;
1868 	}
1869 
1870 	/* Enable storage key handling for the guest */
1871 	r = s390_enable_skey();
1872 	if (r)
1873 		goto out;
1874 
1875 	i = 0;
1876 	down_read(&current->mm->mmap_sem);
1877 	srcu_idx = srcu_read_lock(&kvm->srcu);
1878         while (i < args->count) {
1879 		unlocked = false;
1880 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1881 		if (kvm_is_error_hva(hva)) {
1882 			r = -EFAULT;
1883 			break;
1884 		}
1885 
1886 		/* Lowest order bit is reserved */
1887 		if (keys[i] & 0x01) {
1888 			r = -EINVAL;
1889 			break;
1890 		}
1891 
1892 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1893 		if (r) {
1894 			r = fixup_user_fault(current, current->mm, hva,
1895 					     FAULT_FLAG_WRITE, &unlocked);
1896 			if (r)
1897 				break;
1898 		}
1899 		if (!r)
1900 			i++;
1901 	}
1902 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1903 	up_read(&current->mm->mmap_sem);
1904 out:
1905 	kvfree(keys);
1906 	return r;
1907 }
1908 
1909 /*
1910  * Base address and length must be sent at the start of each block, therefore
1911  * it's cheaper to send some clean data, as long as it's less than the size of
1912  * two longs.
1913  */
1914 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1915 /* for consistency */
1916 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1917 
1918 /*
1919  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1920  * address falls in a hole. In that case the index of one of the memslots
1921  * bordering the hole is returned.
1922  */
1923 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1924 {
1925 	int start = 0, end = slots->used_slots;
1926 	int slot = atomic_read(&slots->lru_slot);
1927 	struct kvm_memory_slot *memslots = slots->memslots;
1928 
1929 	if (gfn >= memslots[slot].base_gfn &&
1930 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1931 		return slot;
1932 
1933 	while (start < end) {
1934 		slot = start + (end - start) / 2;
1935 
1936 		if (gfn >= memslots[slot].base_gfn)
1937 			end = slot;
1938 		else
1939 			start = slot + 1;
1940 	}
1941 
1942 	if (gfn >= memslots[start].base_gfn &&
1943 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1944 		atomic_set(&slots->lru_slot, start);
1945 	}
1946 
1947 	return start;
1948 }
1949 
1950 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1951 			      u8 *res, unsigned long bufsize)
1952 {
1953 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1954 
1955 	args->count = 0;
1956 	while (args->count < bufsize) {
1957 		hva = gfn_to_hva(kvm, cur_gfn);
1958 		/*
1959 		 * We return an error if the first value was invalid, but we
1960 		 * return successfully if at least one value was copied.
1961 		 */
1962 		if (kvm_is_error_hva(hva))
1963 			return args->count ? 0 : -EFAULT;
1964 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1965 			pgstev = 0;
1966 		res[args->count++] = (pgstev >> 24) & 0x43;
1967 		cur_gfn++;
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1974 					      unsigned long cur_gfn)
1975 {
1976 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1977 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1978 	unsigned long ofs = cur_gfn - ms->base_gfn;
1979 
1980 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1981 		slotidx--;
1982 		/* If we are above the highest slot, wrap around */
1983 		if (slotidx < 0)
1984 			slotidx = slots->used_slots - 1;
1985 
1986 		ms = slots->memslots + slotidx;
1987 		ofs = 0;
1988 	}
1989 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1990 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1991 		slotidx--;
1992 		ms = slots->memslots + slotidx;
1993 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1994 	}
1995 	return ms->base_gfn + ofs;
1996 }
1997 
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999 			     u8 *res, unsigned long bufsize)
2000 {
2001 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002 	struct kvm_memslots *slots = kvm_memslots(kvm);
2003 	struct kvm_memory_slot *ms;
2004 
2005 	if (unlikely(!slots->used_slots))
2006 		return 0;
2007 
2008 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009 	ms = gfn_to_memslot(kvm, cur_gfn);
2010 	args->count = 0;
2011 	args->start_gfn = cur_gfn;
2012 	if (!ms)
2013 		return 0;
2014 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2016 
2017 	while (args->count < bufsize) {
2018 		hva = gfn_to_hva(kvm, cur_gfn);
2019 		if (kvm_is_error_hva(hva))
2020 			return 0;
2021 		/* Decrement only if we actually flipped the bit to 0 */
2022 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025 			pgstev = 0;
2026 		/* Save the value */
2027 		res[args->count++] = (pgstev >> 24) & 0x43;
2028 		/* If the next bit is too far away, stop. */
2029 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030 			return 0;
2031 		/* If we reached the previous "next", find the next one */
2032 		if (cur_gfn == next_gfn)
2033 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 		/* Reached the end of memory or of the buffer, stop */
2035 		if ((next_gfn >= mem_end) ||
2036 		    (next_gfn - args->start_gfn >= bufsize))
2037 			return 0;
2038 		cur_gfn++;
2039 		/* Reached the end of the current memslot, take the next one. */
2040 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2041 			ms = gfn_to_memslot(kvm, cur_gfn);
2042 			if (!ms)
2043 				return 0;
2044 		}
2045 	}
2046 	return 0;
2047 }
2048 
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058 				  struct kvm_s390_cmma_log *args)
2059 {
2060 	unsigned long bufsize;
2061 	int srcu_idx, peek, ret;
2062 	u8 *values;
2063 
2064 	if (!kvm->arch.use_cmma)
2065 		return -ENXIO;
2066 	/* Invalid/unsupported flags were specified */
2067 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2068 		return -EINVAL;
2069 	/* Migration mode query, and we are not doing a migration */
2070 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071 	if (!peek && !kvm->arch.migration_mode)
2072 		return -EINVAL;
2073 	/* CMMA is disabled or was not used, or the buffer has length zero */
2074 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2076 		memset(args, 0, sizeof(*args));
2077 		return 0;
2078 	}
2079 	/* We are not peeking, and there are no dirty pages */
2080 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081 		memset(args, 0, sizeof(*args));
2082 		return 0;
2083 	}
2084 
2085 	values = vmalloc(bufsize);
2086 	if (!values)
2087 		return -ENOMEM;
2088 
2089 	down_read(&kvm->mm->mmap_sem);
2090 	srcu_idx = srcu_read_lock(&kvm->srcu);
2091 	if (peek)
2092 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093 	else
2094 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2096 	up_read(&kvm->mm->mmap_sem);
2097 
2098 	if (kvm->arch.migration_mode)
2099 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100 	else
2101 		args->remaining = 0;
2102 
2103 	if (copy_to_user((void __user *)args->values, values, args->count))
2104 		ret = -EFAULT;
2105 
2106 	vfree(values);
2107 	return ret;
2108 }
2109 
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116 				  const struct kvm_s390_cmma_log *args)
2117 {
2118 	unsigned long hva, mask, pgstev, i;
2119 	uint8_t *bits;
2120 	int srcu_idx, r = 0;
2121 
2122 	mask = args->mask;
2123 
2124 	if (!kvm->arch.use_cmma)
2125 		return -ENXIO;
2126 	/* invalid/unsupported flags */
2127 	if (args->flags != 0)
2128 		return -EINVAL;
2129 	/* Enforce sane limit on memory allocation */
2130 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131 		return -EINVAL;
2132 	/* Nothing to do */
2133 	if (args->count == 0)
2134 		return 0;
2135 
2136 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2137 	if (!bits)
2138 		return -ENOMEM;
2139 
2140 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2141 	if (r) {
2142 		r = -EFAULT;
2143 		goto out;
2144 	}
2145 
2146 	down_read(&kvm->mm->mmap_sem);
2147 	srcu_idx = srcu_read_lock(&kvm->srcu);
2148 	for (i = 0; i < args->count; i++) {
2149 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2150 		if (kvm_is_error_hva(hva)) {
2151 			r = -EFAULT;
2152 			break;
2153 		}
2154 
2155 		pgstev = bits[i];
2156 		pgstev = pgstev << 24;
2157 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159 	}
2160 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2161 	up_read(&kvm->mm->mmap_sem);
2162 
2163 	if (!kvm->mm->context.uses_cmm) {
2164 		down_write(&kvm->mm->mmap_sem);
2165 		kvm->mm->context.uses_cmm = 1;
2166 		up_write(&kvm->mm->mmap_sem);
2167 	}
2168 out:
2169 	vfree(bits);
2170 	return r;
2171 }
2172 
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175 	struct kvm_vcpu *vcpu;
2176 	u16 rc, rrc;
2177 	int ret = 0;
2178 	int i;
2179 
2180 	/*
2181 	 * We ignore failures and try to destroy as many CPUs as possible.
2182 	 * At the same time we must not free the assigned resources when
2183 	 * this fails, as the ultravisor has still access to that memory.
2184 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185 	 * behind.
2186 	 * We want to return the first failure rc and rrc, though.
2187 	 */
2188 	kvm_for_each_vcpu(i, vcpu, kvm) {
2189 		mutex_lock(&vcpu->mutex);
2190 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191 			*rcp = rc;
2192 			*rrcp = rrc;
2193 			ret = -EIO;
2194 		}
2195 		mutex_unlock(&vcpu->mutex);
2196 	}
2197 	return ret;
2198 }
2199 
2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2201 {
2202 	int i, r = 0;
2203 	u16 dummy;
2204 
2205 	struct kvm_vcpu *vcpu;
2206 
2207 	kvm_for_each_vcpu(i, vcpu, kvm) {
2208 		mutex_lock(&vcpu->mutex);
2209 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2210 		mutex_unlock(&vcpu->mutex);
2211 		if (r)
2212 			break;
2213 	}
2214 	if (r)
2215 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2216 	return r;
2217 }
2218 
2219 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2220 {
2221 	int r = 0;
2222 	u16 dummy;
2223 	void __user *argp = (void __user *)cmd->data;
2224 
2225 	switch (cmd->cmd) {
2226 	case KVM_PV_ENABLE: {
2227 		r = -EINVAL;
2228 		if (kvm_s390_pv_is_protected(kvm))
2229 			break;
2230 
2231 		/*
2232 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2233 		 *  esca, we need no cleanup in the error cases below
2234 		 */
2235 		r = sca_switch_to_extended(kvm);
2236 		if (r)
2237 			break;
2238 
2239 		down_write(&current->mm->mmap_sem);
2240 		r = gmap_mark_unmergeable();
2241 		up_write(&current->mm->mmap_sem);
2242 		if (r)
2243 			break;
2244 
2245 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2246 		if (r)
2247 			break;
2248 
2249 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2250 		if (r)
2251 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2252 
2253 		/* we need to block service interrupts from now on */
2254 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2255 		break;
2256 	}
2257 	case KVM_PV_DISABLE: {
2258 		r = -EINVAL;
2259 		if (!kvm_s390_pv_is_protected(kvm))
2260 			break;
2261 
2262 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2263 		/*
2264 		 * If a CPU could not be destroyed, destroy VM will also fail.
2265 		 * There is no point in trying to destroy it. Instead return
2266 		 * the rc and rrc from the first CPU that failed destroying.
2267 		 */
2268 		if (r)
2269 			break;
2270 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2271 
2272 		/* no need to block service interrupts any more */
2273 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2274 		break;
2275 	}
2276 	case KVM_PV_SET_SEC_PARMS: {
2277 		struct kvm_s390_pv_sec_parm parms = {};
2278 		void *hdr;
2279 
2280 		r = -EINVAL;
2281 		if (!kvm_s390_pv_is_protected(kvm))
2282 			break;
2283 
2284 		r = -EFAULT;
2285 		if (copy_from_user(&parms, argp, sizeof(parms)))
2286 			break;
2287 
2288 		/* Currently restricted to 8KB */
2289 		r = -EINVAL;
2290 		if (parms.length > PAGE_SIZE * 2)
2291 			break;
2292 
2293 		r = -ENOMEM;
2294 		hdr = vmalloc(parms.length);
2295 		if (!hdr)
2296 			break;
2297 
2298 		r = -EFAULT;
2299 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2300 				    parms.length))
2301 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2302 						      &cmd->rc, &cmd->rrc);
2303 
2304 		vfree(hdr);
2305 		break;
2306 	}
2307 	case KVM_PV_UNPACK: {
2308 		struct kvm_s390_pv_unp unp = {};
2309 
2310 		r = -EINVAL;
2311 		if (!kvm_s390_pv_is_protected(kvm))
2312 			break;
2313 
2314 		r = -EFAULT;
2315 		if (copy_from_user(&unp, argp, sizeof(unp)))
2316 			break;
2317 
2318 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2319 				       &cmd->rc, &cmd->rrc);
2320 		break;
2321 	}
2322 	case KVM_PV_VERIFY: {
2323 		r = -EINVAL;
2324 		if (!kvm_s390_pv_is_protected(kvm))
2325 			break;
2326 
2327 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2328 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2329 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2330 			     cmd->rrc);
2331 		break;
2332 	}
2333 	case KVM_PV_PREP_RESET: {
2334 		r = -EINVAL;
2335 		if (!kvm_s390_pv_is_protected(kvm))
2336 			break;
2337 
2338 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2339 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2340 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2341 			     cmd->rc, cmd->rrc);
2342 		break;
2343 	}
2344 	case KVM_PV_UNSHARE_ALL: {
2345 		r = -EINVAL;
2346 		if (!kvm_s390_pv_is_protected(kvm))
2347 			break;
2348 
2349 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2350 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2351 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2352 			     cmd->rc, cmd->rrc);
2353 		break;
2354 	}
2355 	default:
2356 		r = -ENOTTY;
2357 	}
2358 	return r;
2359 }
2360 
2361 long kvm_arch_vm_ioctl(struct file *filp,
2362 		       unsigned int ioctl, unsigned long arg)
2363 {
2364 	struct kvm *kvm = filp->private_data;
2365 	void __user *argp = (void __user *)arg;
2366 	struct kvm_device_attr attr;
2367 	int r;
2368 
2369 	switch (ioctl) {
2370 	case KVM_S390_INTERRUPT: {
2371 		struct kvm_s390_interrupt s390int;
2372 
2373 		r = -EFAULT;
2374 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2375 			break;
2376 		r = kvm_s390_inject_vm(kvm, &s390int);
2377 		break;
2378 	}
2379 	case KVM_CREATE_IRQCHIP: {
2380 		struct kvm_irq_routing_entry routing;
2381 
2382 		r = -EINVAL;
2383 		if (kvm->arch.use_irqchip) {
2384 			/* Set up dummy routing. */
2385 			memset(&routing, 0, sizeof(routing));
2386 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2387 		}
2388 		break;
2389 	}
2390 	case KVM_SET_DEVICE_ATTR: {
2391 		r = -EFAULT;
2392 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2393 			break;
2394 		r = kvm_s390_vm_set_attr(kvm, &attr);
2395 		break;
2396 	}
2397 	case KVM_GET_DEVICE_ATTR: {
2398 		r = -EFAULT;
2399 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2400 			break;
2401 		r = kvm_s390_vm_get_attr(kvm, &attr);
2402 		break;
2403 	}
2404 	case KVM_HAS_DEVICE_ATTR: {
2405 		r = -EFAULT;
2406 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2407 			break;
2408 		r = kvm_s390_vm_has_attr(kvm, &attr);
2409 		break;
2410 	}
2411 	case KVM_S390_GET_SKEYS: {
2412 		struct kvm_s390_skeys args;
2413 
2414 		r = -EFAULT;
2415 		if (copy_from_user(&args, argp,
2416 				   sizeof(struct kvm_s390_skeys)))
2417 			break;
2418 		r = kvm_s390_get_skeys(kvm, &args);
2419 		break;
2420 	}
2421 	case KVM_S390_SET_SKEYS: {
2422 		struct kvm_s390_skeys args;
2423 
2424 		r = -EFAULT;
2425 		if (copy_from_user(&args, argp,
2426 				   sizeof(struct kvm_s390_skeys)))
2427 			break;
2428 		r = kvm_s390_set_skeys(kvm, &args);
2429 		break;
2430 	}
2431 	case KVM_S390_GET_CMMA_BITS: {
2432 		struct kvm_s390_cmma_log args;
2433 
2434 		r = -EFAULT;
2435 		if (copy_from_user(&args, argp, sizeof(args)))
2436 			break;
2437 		mutex_lock(&kvm->slots_lock);
2438 		r = kvm_s390_get_cmma_bits(kvm, &args);
2439 		mutex_unlock(&kvm->slots_lock);
2440 		if (!r) {
2441 			r = copy_to_user(argp, &args, sizeof(args));
2442 			if (r)
2443 				r = -EFAULT;
2444 		}
2445 		break;
2446 	}
2447 	case KVM_S390_SET_CMMA_BITS: {
2448 		struct kvm_s390_cmma_log args;
2449 
2450 		r = -EFAULT;
2451 		if (copy_from_user(&args, argp, sizeof(args)))
2452 			break;
2453 		mutex_lock(&kvm->slots_lock);
2454 		r = kvm_s390_set_cmma_bits(kvm, &args);
2455 		mutex_unlock(&kvm->slots_lock);
2456 		break;
2457 	}
2458 	case KVM_S390_PV_COMMAND: {
2459 		struct kvm_pv_cmd args;
2460 
2461 		/* protvirt means user sigp */
2462 		kvm->arch.user_cpu_state_ctrl = 1;
2463 		r = 0;
2464 		if (!is_prot_virt_host()) {
2465 			r = -EINVAL;
2466 			break;
2467 		}
2468 		if (copy_from_user(&args, argp, sizeof(args))) {
2469 			r = -EFAULT;
2470 			break;
2471 		}
2472 		if (args.flags) {
2473 			r = -EINVAL;
2474 			break;
2475 		}
2476 		mutex_lock(&kvm->lock);
2477 		r = kvm_s390_handle_pv(kvm, &args);
2478 		mutex_unlock(&kvm->lock);
2479 		if (copy_to_user(argp, &args, sizeof(args))) {
2480 			r = -EFAULT;
2481 			break;
2482 		}
2483 		break;
2484 	}
2485 	default:
2486 		r = -ENOTTY;
2487 	}
2488 
2489 	return r;
2490 }
2491 
2492 static int kvm_s390_apxa_installed(void)
2493 {
2494 	struct ap_config_info info;
2495 
2496 	if (ap_instructions_available()) {
2497 		if (ap_qci(&info) == 0)
2498 			return info.apxa;
2499 	}
2500 
2501 	return 0;
2502 }
2503 
2504 /*
2505  * The format of the crypto control block (CRYCB) is specified in the 3 low
2506  * order bits of the CRYCB designation (CRYCBD) field as follows:
2507  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2508  *	     AP extended addressing (APXA) facility are installed.
2509  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2510  * Format 2: Both the APXA and MSAX3 facilities are installed
2511  */
2512 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2513 {
2514 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2515 
2516 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2517 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2518 
2519 	/* Check whether MSAX3 is installed */
2520 	if (!test_kvm_facility(kvm, 76))
2521 		return;
2522 
2523 	if (kvm_s390_apxa_installed())
2524 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2525 	else
2526 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2527 }
2528 
2529 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2530 			       unsigned long *aqm, unsigned long *adm)
2531 {
2532 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2533 
2534 	mutex_lock(&kvm->lock);
2535 	kvm_s390_vcpu_block_all(kvm);
2536 
2537 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2538 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2539 		memcpy(crycb->apcb1.apm, apm, 32);
2540 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2541 			 apm[0], apm[1], apm[2], apm[3]);
2542 		memcpy(crycb->apcb1.aqm, aqm, 32);
2543 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2544 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2545 		memcpy(crycb->apcb1.adm, adm, 32);
2546 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2547 			 adm[0], adm[1], adm[2], adm[3]);
2548 		break;
2549 	case CRYCB_FORMAT1:
2550 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2551 		memcpy(crycb->apcb0.apm, apm, 8);
2552 		memcpy(crycb->apcb0.aqm, aqm, 2);
2553 		memcpy(crycb->apcb0.adm, adm, 2);
2554 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2555 			 apm[0], *((unsigned short *)aqm),
2556 			 *((unsigned short *)adm));
2557 		break;
2558 	default:	/* Can not happen */
2559 		break;
2560 	}
2561 
2562 	/* recreate the shadow crycb for each vcpu */
2563 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2564 	kvm_s390_vcpu_unblock_all(kvm);
2565 	mutex_unlock(&kvm->lock);
2566 }
2567 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2568 
2569 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2570 {
2571 	mutex_lock(&kvm->lock);
2572 	kvm_s390_vcpu_block_all(kvm);
2573 
2574 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2575 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2576 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2577 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2578 
2579 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2580 	/* recreate the shadow crycb for each vcpu */
2581 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2582 	kvm_s390_vcpu_unblock_all(kvm);
2583 	mutex_unlock(&kvm->lock);
2584 }
2585 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2586 
2587 static u64 kvm_s390_get_initial_cpuid(void)
2588 {
2589 	struct cpuid cpuid;
2590 
2591 	get_cpu_id(&cpuid);
2592 	cpuid.version = 0xff;
2593 	return *((u64 *) &cpuid);
2594 }
2595 
2596 static void kvm_s390_crypto_init(struct kvm *kvm)
2597 {
2598 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2599 	kvm_s390_set_crycb_format(kvm);
2600 
2601 	if (!test_kvm_facility(kvm, 76))
2602 		return;
2603 
2604 	/* Enable AES/DEA protected key functions by default */
2605 	kvm->arch.crypto.aes_kw = 1;
2606 	kvm->arch.crypto.dea_kw = 1;
2607 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2608 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2609 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2610 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2611 }
2612 
2613 static void sca_dispose(struct kvm *kvm)
2614 {
2615 	if (kvm->arch.use_esca)
2616 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2617 	else
2618 		free_page((unsigned long)(kvm->arch.sca));
2619 	kvm->arch.sca = NULL;
2620 }
2621 
2622 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2623 {
2624 	gfp_t alloc_flags = GFP_KERNEL;
2625 	int i, rc;
2626 	char debug_name[16];
2627 	static unsigned long sca_offset;
2628 
2629 	rc = -EINVAL;
2630 #ifdef CONFIG_KVM_S390_UCONTROL
2631 	if (type & ~KVM_VM_S390_UCONTROL)
2632 		goto out_err;
2633 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2634 		goto out_err;
2635 #else
2636 	if (type)
2637 		goto out_err;
2638 #endif
2639 
2640 	rc = s390_enable_sie();
2641 	if (rc)
2642 		goto out_err;
2643 
2644 	rc = -ENOMEM;
2645 
2646 	if (!sclp.has_64bscao)
2647 		alloc_flags |= GFP_DMA;
2648 	rwlock_init(&kvm->arch.sca_lock);
2649 	/* start with basic SCA */
2650 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2651 	if (!kvm->arch.sca)
2652 		goto out_err;
2653 	mutex_lock(&kvm_lock);
2654 	sca_offset += 16;
2655 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2656 		sca_offset = 0;
2657 	kvm->arch.sca = (struct bsca_block *)
2658 			((char *) kvm->arch.sca + sca_offset);
2659 	mutex_unlock(&kvm_lock);
2660 
2661 	sprintf(debug_name, "kvm-%u", current->pid);
2662 
2663 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2664 	if (!kvm->arch.dbf)
2665 		goto out_err;
2666 
2667 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2668 	kvm->arch.sie_page2 =
2669 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2670 	if (!kvm->arch.sie_page2)
2671 		goto out_err;
2672 
2673 	kvm->arch.sie_page2->kvm = kvm;
2674 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2675 
2676 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2677 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2678 					      (kvm_s390_fac_base[i] |
2679 					       kvm_s390_fac_ext[i]);
2680 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2681 					      kvm_s390_fac_base[i];
2682 	}
2683 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2684 
2685 	/* we are always in czam mode - even on pre z14 machines */
2686 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2687 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2688 	/* we emulate STHYI in kvm */
2689 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2690 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2691 	if (MACHINE_HAS_TLB_GUEST) {
2692 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2693 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2694 	}
2695 
2696 	if (css_general_characteristics.aiv && test_facility(65))
2697 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2698 
2699 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2700 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2701 
2702 	kvm_s390_crypto_init(kvm);
2703 
2704 	mutex_init(&kvm->arch.float_int.ais_lock);
2705 	spin_lock_init(&kvm->arch.float_int.lock);
2706 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2707 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2708 	init_waitqueue_head(&kvm->arch.ipte_wq);
2709 	mutex_init(&kvm->arch.ipte_mutex);
2710 
2711 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2712 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2713 
2714 	if (type & KVM_VM_S390_UCONTROL) {
2715 		kvm->arch.gmap = NULL;
2716 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2717 	} else {
2718 		if (sclp.hamax == U64_MAX)
2719 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2720 		else
2721 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2722 						    sclp.hamax + 1);
2723 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2724 		if (!kvm->arch.gmap)
2725 			goto out_err;
2726 		kvm->arch.gmap->private = kvm;
2727 		kvm->arch.gmap->pfault_enabled = 0;
2728 	}
2729 
2730 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2731 	kvm->arch.use_skf = sclp.has_skey;
2732 	spin_lock_init(&kvm->arch.start_stop_lock);
2733 	kvm_s390_vsie_init(kvm);
2734 	if (use_gisa)
2735 		kvm_s390_gisa_init(kvm);
2736 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2737 
2738 	return 0;
2739 out_err:
2740 	free_page((unsigned long)kvm->arch.sie_page2);
2741 	debug_unregister(kvm->arch.dbf);
2742 	sca_dispose(kvm);
2743 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2744 	return rc;
2745 }
2746 
2747 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2748 {
2749 	u16 rc, rrc;
2750 
2751 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2752 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2753 	kvm_s390_clear_local_irqs(vcpu);
2754 	kvm_clear_async_pf_completion_queue(vcpu);
2755 	if (!kvm_is_ucontrol(vcpu->kvm))
2756 		sca_del_vcpu(vcpu);
2757 
2758 	if (kvm_is_ucontrol(vcpu->kvm))
2759 		gmap_remove(vcpu->arch.gmap);
2760 
2761 	if (vcpu->kvm->arch.use_cmma)
2762 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2763 	/* We can not hold the vcpu mutex here, we are already dying */
2764 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2765 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2766 	free_page((unsigned long)(vcpu->arch.sie_block));
2767 }
2768 
2769 static void kvm_free_vcpus(struct kvm *kvm)
2770 {
2771 	unsigned int i;
2772 	struct kvm_vcpu *vcpu;
2773 
2774 	kvm_for_each_vcpu(i, vcpu, kvm)
2775 		kvm_vcpu_destroy(vcpu);
2776 
2777 	mutex_lock(&kvm->lock);
2778 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2779 		kvm->vcpus[i] = NULL;
2780 
2781 	atomic_set(&kvm->online_vcpus, 0);
2782 	mutex_unlock(&kvm->lock);
2783 }
2784 
2785 void kvm_arch_destroy_vm(struct kvm *kvm)
2786 {
2787 	u16 rc, rrc;
2788 
2789 	kvm_free_vcpus(kvm);
2790 	sca_dispose(kvm);
2791 	kvm_s390_gisa_destroy(kvm);
2792 	/*
2793 	 * We are already at the end of life and kvm->lock is not taken.
2794 	 * This is ok as the file descriptor is closed by now and nobody
2795 	 * can mess with the pv state. To avoid lockdep_assert_held from
2796 	 * complaining we do not use kvm_s390_pv_is_protected.
2797 	 */
2798 	if (kvm_s390_pv_get_handle(kvm))
2799 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2800 	debug_unregister(kvm->arch.dbf);
2801 	free_page((unsigned long)kvm->arch.sie_page2);
2802 	if (!kvm_is_ucontrol(kvm))
2803 		gmap_remove(kvm->arch.gmap);
2804 	kvm_s390_destroy_adapters(kvm);
2805 	kvm_s390_clear_float_irqs(kvm);
2806 	kvm_s390_vsie_destroy(kvm);
2807 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2808 }
2809 
2810 /* Section: vcpu related */
2811 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2812 {
2813 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2814 	if (!vcpu->arch.gmap)
2815 		return -ENOMEM;
2816 	vcpu->arch.gmap->private = vcpu->kvm;
2817 
2818 	return 0;
2819 }
2820 
2821 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2822 {
2823 	if (!kvm_s390_use_sca_entries())
2824 		return;
2825 	read_lock(&vcpu->kvm->arch.sca_lock);
2826 	if (vcpu->kvm->arch.use_esca) {
2827 		struct esca_block *sca = vcpu->kvm->arch.sca;
2828 
2829 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2830 		sca->cpu[vcpu->vcpu_id].sda = 0;
2831 	} else {
2832 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2833 
2834 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2835 		sca->cpu[vcpu->vcpu_id].sda = 0;
2836 	}
2837 	read_unlock(&vcpu->kvm->arch.sca_lock);
2838 }
2839 
2840 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2841 {
2842 	if (!kvm_s390_use_sca_entries()) {
2843 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2844 
2845 		/* we still need the basic sca for the ipte control */
2846 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2847 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2848 		return;
2849 	}
2850 	read_lock(&vcpu->kvm->arch.sca_lock);
2851 	if (vcpu->kvm->arch.use_esca) {
2852 		struct esca_block *sca = vcpu->kvm->arch.sca;
2853 
2854 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2855 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2857 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2858 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2859 	} else {
2860 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2861 
2862 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2863 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2864 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2865 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2866 	}
2867 	read_unlock(&vcpu->kvm->arch.sca_lock);
2868 }
2869 
2870 /* Basic SCA to Extended SCA data copy routines */
2871 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2872 {
2873 	d->sda = s->sda;
2874 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2875 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2876 }
2877 
2878 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2879 {
2880 	int i;
2881 
2882 	d->ipte_control = s->ipte_control;
2883 	d->mcn[0] = s->mcn;
2884 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2885 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2886 }
2887 
2888 static int sca_switch_to_extended(struct kvm *kvm)
2889 {
2890 	struct bsca_block *old_sca = kvm->arch.sca;
2891 	struct esca_block *new_sca;
2892 	struct kvm_vcpu *vcpu;
2893 	unsigned int vcpu_idx;
2894 	u32 scaol, scaoh;
2895 
2896 	if (kvm->arch.use_esca)
2897 		return 0;
2898 
2899 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2900 	if (!new_sca)
2901 		return -ENOMEM;
2902 
2903 	scaoh = (u32)((u64)(new_sca) >> 32);
2904 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2905 
2906 	kvm_s390_vcpu_block_all(kvm);
2907 	write_lock(&kvm->arch.sca_lock);
2908 
2909 	sca_copy_b_to_e(new_sca, old_sca);
2910 
2911 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2912 		vcpu->arch.sie_block->scaoh = scaoh;
2913 		vcpu->arch.sie_block->scaol = scaol;
2914 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2915 	}
2916 	kvm->arch.sca = new_sca;
2917 	kvm->arch.use_esca = 1;
2918 
2919 	write_unlock(&kvm->arch.sca_lock);
2920 	kvm_s390_vcpu_unblock_all(kvm);
2921 
2922 	free_page((unsigned long)old_sca);
2923 
2924 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2925 		 old_sca, kvm->arch.sca);
2926 	return 0;
2927 }
2928 
2929 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2930 {
2931 	int rc;
2932 
2933 	if (!kvm_s390_use_sca_entries()) {
2934 		if (id < KVM_MAX_VCPUS)
2935 			return true;
2936 		return false;
2937 	}
2938 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2939 		return true;
2940 	if (!sclp.has_esca || !sclp.has_64bscao)
2941 		return false;
2942 
2943 	mutex_lock(&kvm->lock);
2944 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2945 	mutex_unlock(&kvm->lock);
2946 
2947 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2948 }
2949 
2950 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2951 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2952 {
2953 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2954 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2955 	vcpu->arch.cputm_start = get_tod_clock_fast();
2956 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2957 }
2958 
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2961 {
2962 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2963 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2965 	vcpu->arch.cputm_start = 0;
2966 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2967 }
2968 
2969 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2970 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2971 {
2972 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2973 	vcpu->arch.cputm_enabled = true;
2974 	__start_cpu_timer_accounting(vcpu);
2975 }
2976 
2977 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2978 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2979 {
2980 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2981 	__stop_cpu_timer_accounting(vcpu);
2982 	vcpu->arch.cputm_enabled = false;
2983 }
2984 
2985 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2986 {
2987 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2988 	__enable_cpu_timer_accounting(vcpu);
2989 	preempt_enable();
2990 }
2991 
2992 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2993 {
2994 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2995 	__disable_cpu_timer_accounting(vcpu);
2996 	preempt_enable();
2997 }
2998 
2999 /* set the cpu timer - may only be called from the VCPU thread itself */
3000 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3001 {
3002 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3003 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3004 	if (vcpu->arch.cputm_enabled)
3005 		vcpu->arch.cputm_start = get_tod_clock_fast();
3006 	vcpu->arch.sie_block->cputm = cputm;
3007 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3008 	preempt_enable();
3009 }
3010 
3011 /* update and get the cpu timer - can also be called from other VCPU threads */
3012 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3013 {
3014 	unsigned int seq;
3015 	__u64 value;
3016 
3017 	if (unlikely(!vcpu->arch.cputm_enabled))
3018 		return vcpu->arch.sie_block->cputm;
3019 
3020 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3021 	do {
3022 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3023 		/*
3024 		 * If the writer would ever execute a read in the critical
3025 		 * section, e.g. in irq context, we have a deadlock.
3026 		 */
3027 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3028 		value = vcpu->arch.sie_block->cputm;
3029 		/* if cputm_start is 0, accounting is being started/stopped */
3030 		if (likely(vcpu->arch.cputm_start))
3031 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3032 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3033 	preempt_enable();
3034 	return value;
3035 }
3036 
3037 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3038 {
3039 
3040 	gmap_enable(vcpu->arch.enabled_gmap);
3041 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3042 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3043 		__start_cpu_timer_accounting(vcpu);
3044 	vcpu->cpu = cpu;
3045 }
3046 
3047 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3048 {
3049 	vcpu->cpu = -1;
3050 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3051 		__stop_cpu_timer_accounting(vcpu);
3052 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3053 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3054 	gmap_disable(vcpu->arch.enabled_gmap);
3055 
3056 }
3057 
3058 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3059 {
3060 	mutex_lock(&vcpu->kvm->lock);
3061 	preempt_disable();
3062 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3063 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3064 	preempt_enable();
3065 	mutex_unlock(&vcpu->kvm->lock);
3066 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3067 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3068 		sca_add_vcpu(vcpu);
3069 	}
3070 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3071 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3072 	/* make vcpu_load load the right gmap on the first trigger */
3073 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3074 }
3075 
3076 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3077 {
3078 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3079 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3080 		return true;
3081 	return false;
3082 }
3083 
3084 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3085 {
3086 	/* At least one ECC subfunction must be present */
3087 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3088 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3089 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3090 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3091 	       kvm_has_pckmo_subfunc(kvm, 41);
3092 
3093 }
3094 
3095 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3096 {
3097 	/*
3098 	 * If the AP instructions are not being interpreted and the MSAX3
3099 	 * facility is not configured for the guest, there is nothing to set up.
3100 	 */
3101 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3102 		return;
3103 
3104 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3105 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3106 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3107 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3108 
3109 	if (vcpu->kvm->arch.crypto.apie)
3110 		vcpu->arch.sie_block->eca |= ECA_APIE;
3111 
3112 	/* Set up protected key support */
3113 	if (vcpu->kvm->arch.crypto.aes_kw) {
3114 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3115 		/* ecc is also wrapped with AES key */
3116 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3117 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3118 	}
3119 
3120 	if (vcpu->kvm->arch.crypto.dea_kw)
3121 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3122 }
3123 
3124 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3125 {
3126 	free_page(vcpu->arch.sie_block->cbrlo);
3127 	vcpu->arch.sie_block->cbrlo = 0;
3128 }
3129 
3130 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3131 {
3132 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3133 	if (!vcpu->arch.sie_block->cbrlo)
3134 		return -ENOMEM;
3135 	return 0;
3136 }
3137 
3138 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3139 {
3140 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3141 
3142 	vcpu->arch.sie_block->ibc = model->ibc;
3143 	if (test_kvm_facility(vcpu->kvm, 7))
3144 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3145 }
3146 
3147 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3148 {
3149 	int rc = 0;
3150 	u16 uvrc, uvrrc;
3151 
3152 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3153 						    CPUSTAT_SM |
3154 						    CPUSTAT_STOPPED);
3155 
3156 	if (test_kvm_facility(vcpu->kvm, 78))
3157 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3158 	else if (test_kvm_facility(vcpu->kvm, 8))
3159 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3160 
3161 	kvm_s390_vcpu_setup_model(vcpu);
3162 
3163 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3164 	if (MACHINE_HAS_ESOP)
3165 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3166 	if (test_kvm_facility(vcpu->kvm, 9))
3167 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3168 	if (test_kvm_facility(vcpu->kvm, 73))
3169 		vcpu->arch.sie_block->ecb |= ECB_TE;
3170 
3171 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3172 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3173 	if (test_kvm_facility(vcpu->kvm, 130))
3174 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3175 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3176 	if (sclp.has_cei)
3177 		vcpu->arch.sie_block->eca |= ECA_CEI;
3178 	if (sclp.has_ib)
3179 		vcpu->arch.sie_block->eca |= ECA_IB;
3180 	if (sclp.has_siif)
3181 		vcpu->arch.sie_block->eca |= ECA_SII;
3182 	if (sclp.has_sigpif)
3183 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3184 	if (test_kvm_facility(vcpu->kvm, 129)) {
3185 		vcpu->arch.sie_block->eca |= ECA_VX;
3186 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3187 	}
3188 	if (test_kvm_facility(vcpu->kvm, 139))
3189 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3190 	if (test_kvm_facility(vcpu->kvm, 156))
3191 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3192 	if (vcpu->arch.sie_block->gd) {
3193 		vcpu->arch.sie_block->eca |= ECA_AIV;
3194 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3195 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3196 	}
3197 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3198 					| SDNXC;
3199 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3200 
3201 	if (sclp.has_kss)
3202 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3203 	else
3204 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3205 
3206 	if (vcpu->kvm->arch.use_cmma) {
3207 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3208 		if (rc)
3209 			return rc;
3210 	}
3211 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3212 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3213 
3214 	vcpu->arch.sie_block->hpid = HPID_KVM;
3215 
3216 	kvm_s390_vcpu_crypto_setup(vcpu);
3217 
3218 	mutex_lock(&vcpu->kvm->lock);
3219 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3220 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3221 		if (rc)
3222 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3223 	}
3224 	mutex_unlock(&vcpu->kvm->lock);
3225 
3226 	return rc;
3227 }
3228 
3229 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3230 {
3231 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3232 		return -EINVAL;
3233 	return 0;
3234 }
3235 
3236 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3237 {
3238 	struct sie_page *sie_page;
3239 	int rc;
3240 
3241 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3242 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3243 	if (!sie_page)
3244 		return -ENOMEM;
3245 
3246 	vcpu->arch.sie_block = &sie_page->sie_block;
3247 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3248 
3249 	/* the real guest size will always be smaller than msl */
3250 	vcpu->arch.sie_block->mso = 0;
3251 	vcpu->arch.sie_block->msl = sclp.hamax;
3252 
3253 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3254 	spin_lock_init(&vcpu->arch.local_int.lock);
3255 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3256 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3257 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3258 	seqcount_init(&vcpu->arch.cputm_seqcount);
3259 
3260 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3261 	kvm_clear_async_pf_completion_queue(vcpu);
3262 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3263 				    KVM_SYNC_GPRS |
3264 				    KVM_SYNC_ACRS |
3265 				    KVM_SYNC_CRS |
3266 				    KVM_SYNC_ARCH0 |
3267 				    KVM_SYNC_PFAULT;
3268 	kvm_s390_set_prefix(vcpu, 0);
3269 	if (test_kvm_facility(vcpu->kvm, 64))
3270 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3271 	if (test_kvm_facility(vcpu->kvm, 82))
3272 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3273 	if (test_kvm_facility(vcpu->kvm, 133))
3274 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3275 	if (test_kvm_facility(vcpu->kvm, 156))
3276 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3277 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3278 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3279 	 */
3280 	if (MACHINE_HAS_VX)
3281 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3282 	else
3283 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3284 
3285 	if (kvm_is_ucontrol(vcpu->kvm)) {
3286 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3287 		if (rc)
3288 			goto out_free_sie_block;
3289 	}
3290 
3291 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3292 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3293 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3294 
3295 	rc = kvm_s390_vcpu_setup(vcpu);
3296 	if (rc)
3297 		goto out_ucontrol_uninit;
3298 	return 0;
3299 
3300 out_ucontrol_uninit:
3301 	if (kvm_is_ucontrol(vcpu->kvm))
3302 		gmap_remove(vcpu->arch.gmap);
3303 out_free_sie_block:
3304 	free_page((unsigned long)(vcpu->arch.sie_block));
3305 	return rc;
3306 }
3307 
3308 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3309 {
3310 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3311 }
3312 
3313 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3314 {
3315 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3316 }
3317 
3318 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3319 {
3320 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3321 	exit_sie(vcpu);
3322 }
3323 
3324 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3325 {
3326 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3327 }
3328 
3329 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3330 {
3331 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3332 	exit_sie(vcpu);
3333 }
3334 
3335 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3336 {
3337 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3338 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3339 }
3340 
3341 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3342 {
3343 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3344 }
3345 
3346 /*
3347  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3348  * If the CPU is not running (e.g. waiting as idle) the function will
3349  * return immediately. */
3350 void exit_sie(struct kvm_vcpu *vcpu)
3351 {
3352 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3353 	kvm_s390_vsie_kick(vcpu);
3354 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3355 		cpu_relax();
3356 }
3357 
3358 /* Kick a guest cpu out of SIE to process a request synchronously */
3359 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3360 {
3361 	kvm_make_request(req, vcpu);
3362 	kvm_s390_vcpu_request(vcpu);
3363 }
3364 
3365 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3366 			      unsigned long end)
3367 {
3368 	struct kvm *kvm = gmap->private;
3369 	struct kvm_vcpu *vcpu;
3370 	unsigned long prefix;
3371 	int i;
3372 
3373 	if (gmap_is_shadow(gmap))
3374 		return;
3375 	if (start >= 1UL << 31)
3376 		/* We are only interested in prefix pages */
3377 		return;
3378 	kvm_for_each_vcpu(i, vcpu, kvm) {
3379 		/* match against both prefix pages */
3380 		prefix = kvm_s390_get_prefix(vcpu);
3381 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3382 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3383 				   start, end);
3384 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3385 		}
3386 	}
3387 }
3388 
3389 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3390 {
3391 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3392 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3393 	    halt_poll_max_steal) {
3394 		vcpu->stat.halt_no_poll_steal++;
3395 		return true;
3396 	}
3397 	return false;
3398 }
3399 
3400 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3401 {
3402 	/* kvm common code refers to this, but never calls it */
3403 	BUG();
3404 	return 0;
3405 }
3406 
3407 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3408 					   struct kvm_one_reg *reg)
3409 {
3410 	int r = -EINVAL;
3411 
3412 	switch (reg->id) {
3413 	case KVM_REG_S390_TODPR:
3414 		r = put_user(vcpu->arch.sie_block->todpr,
3415 			     (u32 __user *)reg->addr);
3416 		break;
3417 	case KVM_REG_S390_EPOCHDIFF:
3418 		r = put_user(vcpu->arch.sie_block->epoch,
3419 			     (u64 __user *)reg->addr);
3420 		break;
3421 	case KVM_REG_S390_CPU_TIMER:
3422 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3423 			     (u64 __user *)reg->addr);
3424 		break;
3425 	case KVM_REG_S390_CLOCK_COMP:
3426 		r = put_user(vcpu->arch.sie_block->ckc,
3427 			     (u64 __user *)reg->addr);
3428 		break;
3429 	case KVM_REG_S390_PFTOKEN:
3430 		r = put_user(vcpu->arch.pfault_token,
3431 			     (u64 __user *)reg->addr);
3432 		break;
3433 	case KVM_REG_S390_PFCOMPARE:
3434 		r = put_user(vcpu->arch.pfault_compare,
3435 			     (u64 __user *)reg->addr);
3436 		break;
3437 	case KVM_REG_S390_PFSELECT:
3438 		r = put_user(vcpu->arch.pfault_select,
3439 			     (u64 __user *)reg->addr);
3440 		break;
3441 	case KVM_REG_S390_PP:
3442 		r = put_user(vcpu->arch.sie_block->pp,
3443 			     (u64 __user *)reg->addr);
3444 		break;
3445 	case KVM_REG_S390_GBEA:
3446 		r = put_user(vcpu->arch.sie_block->gbea,
3447 			     (u64 __user *)reg->addr);
3448 		break;
3449 	default:
3450 		break;
3451 	}
3452 
3453 	return r;
3454 }
3455 
3456 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3457 					   struct kvm_one_reg *reg)
3458 {
3459 	int r = -EINVAL;
3460 	__u64 val;
3461 
3462 	switch (reg->id) {
3463 	case KVM_REG_S390_TODPR:
3464 		r = get_user(vcpu->arch.sie_block->todpr,
3465 			     (u32 __user *)reg->addr);
3466 		break;
3467 	case KVM_REG_S390_EPOCHDIFF:
3468 		r = get_user(vcpu->arch.sie_block->epoch,
3469 			     (u64 __user *)reg->addr);
3470 		break;
3471 	case KVM_REG_S390_CPU_TIMER:
3472 		r = get_user(val, (u64 __user *)reg->addr);
3473 		if (!r)
3474 			kvm_s390_set_cpu_timer(vcpu, val);
3475 		break;
3476 	case KVM_REG_S390_CLOCK_COMP:
3477 		r = get_user(vcpu->arch.sie_block->ckc,
3478 			     (u64 __user *)reg->addr);
3479 		break;
3480 	case KVM_REG_S390_PFTOKEN:
3481 		r = get_user(vcpu->arch.pfault_token,
3482 			     (u64 __user *)reg->addr);
3483 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3484 			kvm_clear_async_pf_completion_queue(vcpu);
3485 		break;
3486 	case KVM_REG_S390_PFCOMPARE:
3487 		r = get_user(vcpu->arch.pfault_compare,
3488 			     (u64 __user *)reg->addr);
3489 		break;
3490 	case KVM_REG_S390_PFSELECT:
3491 		r = get_user(vcpu->arch.pfault_select,
3492 			     (u64 __user *)reg->addr);
3493 		break;
3494 	case KVM_REG_S390_PP:
3495 		r = get_user(vcpu->arch.sie_block->pp,
3496 			     (u64 __user *)reg->addr);
3497 		break;
3498 	case KVM_REG_S390_GBEA:
3499 		r = get_user(vcpu->arch.sie_block->gbea,
3500 			     (u64 __user *)reg->addr);
3501 		break;
3502 	default:
3503 		break;
3504 	}
3505 
3506 	return r;
3507 }
3508 
3509 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3510 {
3511 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3512 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3513 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3514 
3515 	kvm_clear_async_pf_completion_queue(vcpu);
3516 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3517 		kvm_s390_vcpu_stop(vcpu);
3518 	kvm_s390_clear_local_irqs(vcpu);
3519 }
3520 
3521 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3522 {
3523 	/* Initial reset is a superset of the normal reset */
3524 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3525 
3526 	/*
3527 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3528 	 * We do not only reset the internal data, but also ...
3529 	 */
3530 	vcpu->arch.sie_block->gpsw.mask = 0;
3531 	vcpu->arch.sie_block->gpsw.addr = 0;
3532 	kvm_s390_set_prefix(vcpu, 0);
3533 	kvm_s390_set_cpu_timer(vcpu, 0);
3534 	vcpu->arch.sie_block->ckc = 0;
3535 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3536 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3537 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3538 
3539 	/* ... the data in sync regs */
3540 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3541 	vcpu->run->s.regs.ckc = 0;
3542 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3543 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3544 	vcpu->run->psw_addr = 0;
3545 	vcpu->run->psw_mask = 0;
3546 	vcpu->run->s.regs.todpr = 0;
3547 	vcpu->run->s.regs.cputm = 0;
3548 	vcpu->run->s.regs.ckc = 0;
3549 	vcpu->run->s.regs.pp = 0;
3550 	vcpu->run->s.regs.gbea = 1;
3551 	vcpu->run->s.regs.fpc = 0;
3552 	/*
3553 	 * Do not reset these registers in the protected case, as some of
3554 	 * them are overlayed and they are not accessible in this case
3555 	 * anyway.
3556 	 */
3557 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3558 		vcpu->arch.sie_block->gbea = 1;
3559 		vcpu->arch.sie_block->pp = 0;
3560 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3561 		vcpu->arch.sie_block->todpr = 0;
3562 	}
3563 }
3564 
3565 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3566 {
3567 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3568 
3569 	/* Clear reset is a superset of the initial reset */
3570 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3571 
3572 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3573 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3574 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3575 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3576 
3577 	regs->etoken = 0;
3578 	regs->etoken_extension = 0;
3579 }
3580 
3581 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3582 {
3583 	vcpu_load(vcpu);
3584 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3585 	vcpu_put(vcpu);
3586 	return 0;
3587 }
3588 
3589 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3590 {
3591 	vcpu_load(vcpu);
3592 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3593 	vcpu_put(vcpu);
3594 	return 0;
3595 }
3596 
3597 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3598 				  struct kvm_sregs *sregs)
3599 {
3600 	vcpu_load(vcpu);
3601 
3602 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3603 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3604 
3605 	vcpu_put(vcpu);
3606 	return 0;
3607 }
3608 
3609 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3610 				  struct kvm_sregs *sregs)
3611 {
3612 	vcpu_load(vcpu);
3613 
3614 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3615 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3616 
3617 	vcpu_put(vcpu);
3618 	return 0;
3619 }
3620 
3621 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3622 {
3623 	int ret = 0;
3624 
3625 	vcpu_load(vcpu);
3626 
3627 	if (test_fp_ctl(fpu->fpc)) {
3628 		ret = -EINVAL;
3629 		goto out;
3630 	}
3631 	vcpu->run->s.regs.fpc = fpu->fpc;
3632 	if (MACHINE_HAS_VX)
3633 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3634 				 (freg_t *) fpu->fprs);
3635 	else
3636 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3637 
3638 out:
3639 	vcpu_put(vcpu);
3640 	return ret;
3641 }
3642 
3643 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3644 {
3645 	vcpu_load(vcpu);
3646 
3647 	/* make sure we have the latest values */
3648 	save_fpu_regs();
3649 	if (MACHINE_HAS_VX)
3650 		convert_vx_to_fp((freg_t *) fpu->fprs,
3651 				 (__vector128 *) vcpu->run->s.regs.vrs);
3652 	else
3653 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3654 	fpu->fpc = vcpu->run->s.regs.fpc;
3655 
3656 	vcpu_put(vcpu);
3657 	return 0;
3658 }
3659 
3660 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3661 {
3662 	int rc = 0;
3663 
3664 	if (!is_vcpu_stopped(vcpu))
3665 		rc = -EBUSY;
3666 	else {
3667 		vcpu->run->psw_mask = psw.mask;
3668 		vcpu->run->psw_addr = psw.addr;
3669 	}
3670 	return rc;
3671 }
3672 
3673 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3674 				  struct kvm_translation *tr)
3675 {
3676 	return -EINVAL; /* not implemented yet */
3677 }
3678 
3679 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3680 			      KVM_GUESTDBG_USE_HW_BP | \
3681 			      KVM_GUESTDBG_ENABLE)
3682 
3683 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3684 					struct kvm_guest_debug *dbg)
3685 {
3686 	int rc = 0;
3687 
3688 	vcpu_load(vcpu);
3689 
3690 	vcpu->guest_debug = 0;
3691 	kvm_s390_clear_bp_data(vcpu);
3692 
3693 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3694 		rc = -EINVAL;
3695 		goto out;
3696 	}
3697 	if (!sclp.has_gpere) {
3698 		rc = -EINVAL;
3699 		goto out;
3700 	}
3701 
3702 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3703 		vcpu->guest_debug = dbg->control;
3704 		/* enforce guest PER */
3705 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3706 
3707 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3708 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3709 	} else {
3710 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3711 		vcpu->arch.guestdbg.last_bp = 0;
3712 	}
3713 
3714 	if (rc) {
3715 		vcpu->guest_debug = 0;
3716 		kvm_s390_clear_bp_data(vcpu);
3717 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3718 	}
3719 
3720 out:
3721 	vcpu_put(vcpu);
3722 	return rc;
3723 }
3724 
3725 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3726 				    struct kvm_mp_state *mp_state)
3727 {
3728 	int ret;
3729 
3730 	vcpu_load(vcpu);
3731 
3732 	/* CHECK_STOP and LOAD are not supported yet */
3733 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3734 				      KVM_MP_STATE_OPERATING;
3735 
3736 	vcpu_put(vcpu);
3737 	return ret;
3738 }
3739 
3740 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3741 				    struct kvm_mp_state *mp_state)
3742 {
3743 	int rc = 0;
3744 
3745 	vcpu_load(vcpu);
3746 
3747 	/* user space knows about this interface - let it control the state */
3748 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3749 
3750 	switch (mp_state->mp_state) {
3751 	case KVM_MP_STATE_STOPPED:
3752 		rc = kvm_s390_vcpu_stop(vcpu);
3753 		break;
3754 	case KVM_MP_STATE_OPERATING:
3755 		rc = kvm_s390_vcpu_start(vcpu);
3756 		break;
3757 	case KVM_MP_STATE_LOAD:
3758 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3759 			rc = -ENXIO;
3760 			break;
3761 		}
3762 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3763 		break;
3764 	case KVM_MP_STATE_CHECK_STOP:
3765 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3766 	default:
3767 		rc = -ENXIO;
3768 	}
3769 
3770 	vcpu_put(vcpu);
3771 	return rc;
3772 }
3773 
3774 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3775 {
3776 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3777 }
3778 
3779 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3780 {
3781 retry:
3782 	kvm_s390_vcpu_request_handled(vcpu);
3783 	if (!kvm_request_pending(vcpu))
3784 		return 0;
3785 	/*
3786 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3787 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3788 	 * This ensures that the ipte instruction for this request has
3789 	 * already finished. We might race against a second unmapper that
3790 	 * wants to set the blocking bit. Lets just retry the request loop.
3791 	 */
3792 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3793 		int rc;
3794 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3795 					  kvm_s390_get_prefix(vcpu),
3796 					  PAGE_SIZE * 2, PROT_WRITE);
3797 		if (rc) {
3798 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3799 			return rc;
3800 		}
3801 		goto retry;
3802 	}
3803 
3804 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3805 		vcpu->arch.sie_block->ihcpu = 0xffff;
3806 		goto retry;
3807 	}
3808 
3809 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3810 		if (!ibs_enabled(vcpu)) {
3811 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3812 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3813 		}
3814 		goto retry;
3815 	}
3816 
3817 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3818 		if (ibs_enabled(vcpu)) {
3819 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3820 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3821 		}
3822 		goto retry;
3823 	}
3824 
3825 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3826 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3827 		goto retry;
3828 	}
3829 
3830 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3831 		/*
3832 		 * Disable CMM virtualization; we will emulate the ESSA
3833 		 * instruction manually, in order to provide additional
3834 		 * functionalities needed for live migration.
3835 		 */
3836 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3837 		goto retry;
3838 	}
3839 
3840 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3841 		/*
3842 		 * Re-enable CMM virtualization if CMMA is available and
3843 		 * CMM has been used.
3844 		 */
3845 		if ((vcpu->kvm->arch.use_cmma) &&
3846 		    (vcpu->kvm->mm->context.uses_cmm))
3847 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3848 		goto retry;
3849 	}
3850 
3851 	/* nothing to do, just clear the request */
3852 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3853 	/* we left the vsie handler, nothing to do, just clear the request */
3854 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3855 
3856 	return 0;
3857 }
3858 
3859 void kvm_s390_set_tod_clock(struct kvm *kvm,
3860 			    const struct kvm_s390_vm_tod_clock *gtod)
3861 {
3862 	struct kvm_vcpu *vcpu;
3863 	struct kvm_s390_tod_clock_ext htod;
3864 	int i;
3865 
3866 	mutex_lock(&kvm->lock);
3867 	preempt_disable();
3868 
3869 	get_tod_clock_ext((char *)&htod);
3870 
3871 	kvm->arch.epoch = gtod->tod - htod.tod;
3872 	kvm->arch.epdx = 0;
3873 	if (test_kvm_facility(kvm, 139)) {
3874 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3875 		if (kvm->arch.epoch > gtod->tod)
3876 			kvm->arch.epdx -= 1;
3877 	}
3878 
3879 	kvm_s390_vcpu_block_all(kvm);
3880 	kvm_for_each_vcpu(i, vcpu, kvm) {
3881 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3882 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3883 	}
3884 
3885 	kvm_s390_vcpu_unblock_all(kvm);
3886 	preempt_enable();
3887 	mutex_unlock(&kvm->lock);
3888 }
3889 
3890 /**
3891  * kvm_arch_fault_in_page - fault-in guest page if necessary
3892  * @vcpu: The corresponding virtual cpu
3893  * @gpa: Guest physical address
3894  * @writable: Whether the page should be writable or not
3895  *
3896  * Make sure that a guest page has been faulted-in on the host.
3897  *
3898  * Return: Zero on success, negative error code otherwise.
3899  */
3900 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3901 {
3902 	return gmap_fault(vcpu->arch.gmap, gpa,
3903 			  writable ? FAULT_FLAG_WRITE : 0);
3904 }
3905 
3906 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3907 				      unsigned long token)
3908 {
3909 	struct kvm_s390_interrupt inti;
3910 	struct kvm_s390_irq irq;
3911 
3912 	if (start_token) {
3913 		irq.u.ext.ext_params2 = token;
3914 		irq.type = KVM_S390_INT_PFAULT_INIT;
3915 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3916 	} else {
3917 		inti.type = KVM_S390_INT_PFAULT_DONE;
3918 		inti.parm64 = token;
3919 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3920 	}
3921 }
3922 
3923 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3924 				     struct kvm_async_pf *work)
3925 {
3926 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3927 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3928 }
3929 
3930 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3931 				 struct kvm_async_pf *work)
3932 {
3933 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3934 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3935 }
3936 
3937 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3938 			       struct kvm_async_pf *work)
3939 {
3940 	/* s390 will always inject the page directly */
3941 }
3942 
3943 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3944 {
3945 	/*
3946 	 * s390 will always inject the page directly,
3947 	 * but we still want check_async_completion to cleanup
3948 	 */
3949 	return true;
3950 }
3951 
3952 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3953 {
3954 	hva_t hva;
3955 	struct kvm_arch_async_pf arch;
3956 	int rc;
3957 
3958 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3959 		return 0;
3960 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3961 	    vcpu->arch.pfault_compare)
3962 		return 0;
3963 	if (psw_extint_disabled(vcpu))
3964 		return 0;
3965 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3966 		return 0;
3967 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3968 		return 0;
3969 	if (!vcpu->arch.gmap->pfault_enabled)
3970 		return 0;
3971 
3972 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3973 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3974 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3975 		return 0;
3976 
3977 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3978 	return rc;
3979 }
3980 
3981 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3982 {
3983 	int rc, cpuflags;
3984 
3985 	/*
3986 	 * On s390 notifications for arriving pages will be delivered directly
3987 	 * to the guest but the house keeping for completed pfaults is
3988 	 * handled outside the worker.
3989 	 */
3990 	kvm_check_async_pf_completion(vcpu);
3991 
3992 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3993 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3994 
3995 	if (need_resched())
3996 		schedule();
3997 
3998 	if (test_cpu_flag(CIF_MCCK_PENDING))
3999 		s390_handle_mcck();
4000 
4001 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4002 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4003 		if (rc)
4004 			return rc;
4005 	}
4006 
4007 	rc = kvm_s390_handle_requests(vcpu);
4008 	if (rc)
4009 		return rc;
4010 
4011 	if (guestdbg_enabled(vcpu)) {
4012 		kvm_s390_backup_guest_per_regs(vcpu);
4013 		kvm_s390_patch_guest_per_regs(vcpu);
4014 	}
4015 
4016 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4017 
4018 	vcpu->arch.sie_block->icptcode = 0;
4019 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4020 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4021 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4022 
4023 	return 0;
4024 }
4025 
4026 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4027 {
4028 	struct kvm_s390_pgm_info pgm_info = {
4029 		.code = PGM_ADDRESSING,
4030 	};
4031 	u8 opcode, ilen;
4032 	int rc;
4033 
4034 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4035 	trace_kvm_s390_sie_fault(vcpu);
4036 
4037 	/*
4038 	 * We want to inject an addressing exception, which is defined as a
4039 	 * suppressing or terminating exception. However, since we came here
4040 	 * by a DAT access exception, the PSW still points to the faulting
4041 	 * instruction since DAT exceptions are nullifying. So we've got
4042 	 * to look up the current opcode to get the length of the instruction
4043 	 * to be able to forward the PSW.
4044 	 */
4045 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4046 	ilen = insn_length(opcode);
4047 	if (rc < 0) {
4048 		return rc;
4049 	} else if (rc) {
4050 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4051 		 * Forward by arbitrary ilc, injection will take care of
4052 		 * nullification if necessary.
4053 		 */
4054 		pgm_info = vcpu->arch.pgm;
4055 		ilen = 4;
4056 	}
4057 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4058 	kvm_s390_forward_psw(vcpu, ilen);
4059 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4060 }
4061 
4062 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4063 {
4064 	struct mcck_volatile_info *mcck_info;
4065 	struct sie_page *sie_page;
4066 
4067 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4068 		   vcpu->arch.sie_block->icptcode);
4069 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4070 
4071 	if (guestdbg_enabled(vcpu))
4072 		kvm_s390_restore_guest_per_regs(vcpu);
4073 
4074 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4075 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4076 
4077 	if (exit_reason == -EINTR) {
4078 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4079 		sie_page = container_of(vcpu->arch.sie_block,
4080 					struct sie_page, sie_block);
4081 		mcck_info = &sie_page->mcck_info;
4082 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4083 		return 0;
4084 	}
4085 
4086 	if (vcpu->arch.sie_block->icptcode > 0) {
4087 		int rc = kvm_handle_sie_intercept(vcpu);
4088 
4089 		if (rc != -EOPNOTSUPP)
4090 			return rc;
4091 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4092 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4093 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4094 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4095 		return -EREMOTE;
4096 	} else if (exit_reason != -EFAULT) {
4097 		vcpu->stat.exit_null++;
4098 		return 0;
4099 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4100 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4101 		vcpu->run->s390_ucontrol.trans_exc_code =
4102 						current->thread.gmap_addr;
4103 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4104 		return -EREMOTE;
4105 	} else if (current->thread.gmap_pfault) {
4106 		trace_kvm_s390_major_guest_pfault(vcpu);
4107 		current->thread.gmap_pfault = 0;
4108 		if (kvm_arch_setup_async_pf(vcpu))
4109 			return 0;
4110 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4111 	}
4112 	return vcpu_post_run_fault_in_sie(vcpu);
4113 }
4114 
4115 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4116 static int __vcpu_run(struct kvm_vcpu *vcpu)
4117 {
4118 	int rc, exit_reason;
4119 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4120 
4121 	/*
4122 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4123 	 * ning the guest), so that memslots (and other stuff) are protected
4124 	 */
4125 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4126 
4127 	do {
4128 		rc = vcpu_pre_run(vcpu);
4129 		if (rc)
4130 			break;
4131 
4132 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4133 		/*
4134 		 * As PF_VCPU will be used in fault handler, between
4135 		 * guest_enter and guest_exit should be no uaccess.
4136 		 */
4137 		local_irq_disable();
4138 		guest_enter_irqoff();
4139 		__disable_cpu_timer_accounting(vcpu);
4140 		local_irq_enable();
4141 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4142 			memcpy(sie_page->pv_grregs,
4143 			       vcpu->run->s.regs.gprs,
4144 			       sizeof(sie_page->pv_grregs));
4145 		}
4146 		exit_reason = sie64a(vcpu->arch.sie_block,
4147 				     vcpu->run->s.regs.gprs);
4148 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4149 			memcpy(vcpu->run->s.regs.gprs,
4150 			       sie_page->pv_grregs,
4151 			       sizeof(sie_page->pv_grregs));
4152 			/*
4153 			 * We're not allowed to inject interrupts on intercepts
4154 			 * that leave the guest state in an "in-between" state
4155 			 * where the next SIE entry will do a continuation.
4156 			 * Fence interrupts in our "internal" PSW.
4157 			 */
4158 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4159 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4160 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4161 			}
4162 		}
4163 		local_irq_disable();
4164 		__enable_cpu_timer_accounting(vcpu);
4165 		guest_exit_irqoff();
4166 		local_irq_enable();
4167 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4168 
4169 		rc = vcpu_post_run(vcpu, exit_reason);
4170 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4171 
4172 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4173 	return rc;
4174 }
4175 
4176 static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4177 {
4178 	struct runtime_instr_cb *riccb;
4179 	struct gs_cb *gscb;
4180 
4181 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4182 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4183 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4184 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4185 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4186 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4187 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4188 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4189 	}
4190 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4191 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4192 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4193 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4194 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4195 			kvm_clear_async_pf_completion_queue(vcpu);
4196 	}
4197 	/*
4198 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4199 	 * we should enable RI here instead of doing the lazy enablement.
4200 	 */
4201 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4202 	    test_kvm_facility(vcpu->kvm, 64) &&
4203 	    riccb->v &&
4204 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4205 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4206 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4207 	}
4208 	/*
4209 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4210 	 * we should enable GS here instead of doing the lazy enablement.
4211 	 */
4212 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4213 	    test_kvm_facility(vcpu->kvm, 133) &&
4214 	    gscb->gssm &&
4215 	    !vcpu->arch.gs_enabled) {
4216 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4217 		vcpu->arch.sie_block->ecb |= ECB_GS;
4218 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4219 		vcpu->arch.gs_enabled = 1;
4220 	}
4221 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4222 	    test_kvm_facility(vcpu->kvm, 82)) {
4223 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4224 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4225 	}
4226 	if (MACHINE_HAS_GS) {
4227 		preempt_disable();
4228 		__ctl_set_bit(2, 4);
4229 		if (current->thread.gs_cb) {
4230 			vcpu->arch.host_gscb = current->thread.gs_cb;
4231 			save_gs_cb(vcpu->arch.host_gscb);
4232 		}
4233 		if (vcpu->arch.gs_enabled) {
4234 			current->thread.gs_cb = (struct gs_cb *)
4235 						&vcpu->run->s.regs.gscb;
4236 			restore_gs_cb(current->thread.gs_cb);
4237 		}
4238 		preempt_enable();
4239 	}
4240 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4241 }
4242 
4243 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4244 {
4245 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4246 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4247 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4248 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4249 		/* some control register changes require a tlb flush */
4250 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4251 	}
4252 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4253 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4254 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4255 	}
4256 	save_access_regs(vcpu->arch.host_acrs);
4257 	restore_access_regs(vcpu->run->s.regs.acrs);
4258 	/* save host (userspace) fprs/vrs */
4259 	save_fpu_regs();
4260 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4261 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4262 	if (MACHINE_HAS_VX)
4263 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4264 	else
4265 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4266 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4267 	if (test_fp_ctl(current->thread.fpu.fpc))
4268 		/* User space provided an invalid FPC, let's clear it */
4269 		current->thread.fpu.fpc = 0;
4270 
4271 	/* Sync fmt2 only data */
4272 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4273 		sync_regs_fmt2(vcpu, kvm_run);
4274 	} else {
4275 		/*
4276 		 * In several places we have to modify our internal view to
4277 		 * not do things that are disallowed by the ultravisor. For
4278 		 * example we must not inject interrupts after specific exits
4279 		 * (e.g. 112 prefix page not secure). We do this by turning
4280 		 * off the machine check, external and I/O interrupt bits
4281 		 * of our PSW copy. To avoid getting validity intercepts, we
4282 		 * do only accept the condition code from userspace.
4283 		 */
4284 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4285 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4286 						   PSW_MASK_CC;
4287 	}
4288 
4289 	kvm_run->kvm_dirty_regs = 0;
4290 }
4291 
4292 static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4293 {
4294 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4295 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4296 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4297 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4298 	if (MACHINE_HAS_GS) {
4299 		__ctl_set_bit(2, 4);
4300 		if (vcpu->arch.gs_enabled)
4301 			save_gs_cb(current->thread.gs_cb);
4302 		preempt_disable();
4303 		current->thread.gs_cb = vcpu->arch.host_gscb;
4304 		restore_gs_cb(vcpu->arch.host_gscb);
4305 		preempt_enable();
4306 		if (!vcpu->arch.host_gscb)
4307 			__ctl_clear_bit(2, 4);
4308 		vcpu->arch.host_gscb = NULL;
4309 	}
4310 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4311 }
4312 
4313 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4314 {
4315 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4316 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4317 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4318 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4319 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4320 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4321 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4322 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4323 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4324 	save_access_regs(vcpu->run->s.regs.acrs);
4325 	restore_access_regs(vcpu->arch.host_acrs);
4326 	/* Save guest register state */
4327 	save_fpu_regs();
4328 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4329 	/* Restore will be done lazily at return */
4330 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4331 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4332 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4333 		store_regs_fmt2(vcpu, kvm_run);
4334 }
4335 
4336 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4337 {
4338 	int rc;
4339 
4340 	if (kvm_run->immediate_exit)
4341 		return -EINTR;
4342 
4343 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4344 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4345 		return -EINVAL;
4346 
4347 	vcpu_load(vcpu);
4348 
4349 	if (guestdbg_exit_pending(vcpu)) {
4350 		kvm_s390_prepare_debug_exit(vcpu);
4351 		rc = 0;
4352 		goto out;
4353 	}
4354 
4355 	kvm_sigset_activate(vcpu);
4356 
4357 	/*
4358 	 * no need to check the return value of vcpu_start as it can only have
4359 	 * an error for protvirt, but protvirt means user cpu state
4360 	 */
4361 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4362 		kvm_s390_vcpu_start(vcpu);
4363 	} else if (is_vcpu_stopped(vcpu)) {
4364 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4365 				   vcpu->vcpu_id);
4366 		rc = -EINVAL;
4367 		goto out;
4368 	}
4369 
4370 	sync_regs(vcpu, kvm_run);
4371 	enable_cpu_timer_accounting(vcpu);
4372 
4373 	might_fault();
4374 	rc = __vcpu_run(vcpu);
4375 
4376 	if (signal_pending(current) && !rc) {
4377 		kvm_run->exit_reason = KVM_EXIT_INTR;
4378 		rc = -EINTR;
4379 	}
4380 
4381 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4382 		kvm_s390_prepare_debug_exit(vcpu);
4383 		rc = 0;
4384 	}
4385 
4386 	if (rc == -EREMOTE) {
4387 		/* userspace support is needed, kvm_run has been prepared */
4388 		rc = 0;
4389 	}
4390 
4391 	disable_cpu_timer_accounting(vcpu);
4392 	store_regs(vcpu, kvm_run);
4393 
4394 	kvm_sigset_deactivate(vcpu);
4395 
4396 	vcpu->stat.exit_userspace++;
4397 out:
4398 	vcpu_put(vcpu);
4399 	return rc;
4400 }
4401 
4402 /*
4403  * store status at address
4404  * we use have two special cases:
4405  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4406  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4407  */
4408 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4409 {
4410 	unsigned char archmode = 1;
4411 	freg_t fprs[NUM_FPRS];
4412 	unsigned int px;
4413 	u64 clkcomp, cputm;
4414 	int rc;
4415 
4416 	px = kvm_s390_get_prefix(vcpu);
4417 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4418 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4419 			return -EFAULT;
4420 		gpa = 0;
4421 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4422 		if (write_guest_real(vcpu, 163, &archmode, 1))
4423 			return -EFAULT;
4424 		gpa = px;
4425 	} else
4426 		gpa -= __LC_FPREGS_SAVE_AREA;
4427 
4428 	/* manually convert vector registers if necessary */
4429 	if (MACHINE_HAS_VX) {
4430 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4431 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4432 				     fprs, 128);
4433 	} else {
4434 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4435 				     vcpu->run->s.regs.fprs, 128);
4436 	}
4437 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4438 			      vcpu->run->s.regs.gprs, 128);
4439 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4440 			      &vcpu->arch.sie_block->gpsw, 16);
4441 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4442 			      &px, 4);
4443 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4444 			      &vcpu->run->s.regs.fpc, 4);
4445 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4446 			      &vcpu->arch.sie_block->todpr, 4);
4447 	cputm = kvm_s390_get_cpu_timer(vcpu);
4448 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4449 			      &cputm, 8);
4450 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4451 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4452 			      &clkcomp, 8);
4453 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4454 			      &vcpu->run->s.regs.acrs, 64);
4455 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4456 			      &vcpu->arch.sie_block->gcr, 128);
4457 	return rc ? -EFAULT : 0;
4458 }
4459 
4460 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4461 {
4462 	/*
4463 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4464 	 * switch in the run ioctl. Let's update our copies before we save
4465 	 * it into the save area
4466 	 */
4467 	save_fpu_regs();
4468 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4469 	save_access_regs(vcpu->run->s.regs.acrs);
4470 
4471 	return kvm_s390_store_status_unloaded(vcpu, addr);
4472 }
4473 
4474 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4475 {
4476 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4477 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4478 }
4479 
4480 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4481 {
4482 	unsigned int i;
4483 	struct kvm_vcpu *vcpu;
4484 
4485 	kvm_for_each_vcpu(i, vcpu, kvm) {
4486 		__disable_ibs_on_vcpu(vcpu);
4487 	}
4488 }
4489 
4490 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4491 {
4492 	if (!sclp.has_ibs)
4493 		return;
4494 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4495 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4496 }
4497 
4498 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4499 {
4500 	int i, online_vcpus, r = 0, started_vcpus = 0;
4501 
4502 	if (!is_vcpu_stopped(vcpu))
4503 		return 0;
4504 
4505 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4506 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4507 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4508 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4509 
4510 	/* Let's tell the UV that we want to change into the operating state */
4511 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4512 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4513 		if (r) {
4514 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4515 			return r;
4516 		}
4517 	}
4518 
4519 	for (i = 0; i < online_vcpus; i++) {
4520 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4521 			started_vcpus++;
4522 	}
4523 
4524 	if (started_vcpus == 0) {
4525 		/* we're the only active VCPU -> speed it up */
4526 		__enable_ibs_on_vcpu(vcpu);
4527 	} else if (started_vcpus == 1) {
4528 		/*
4529 		 * As we are starting a second VCPU, we have to disable
4530 		 * the IBS facility on all VCPUs to remove potentially
4531 		 * oustanding ENABLE requests.
4532 		 */
4533 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4534 	}
4535 
4536 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4537 	/*
4538 	 * The real PSW might have changed due to a RESTART interpreted by the
4539 	 * ultravisor. We block all interrupts and let the next sie exit
4540 	 * refresh our view.
4541 	 */
4542 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4543 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4544 	/*
4545 	 * Another VCPU might have used IBS while we were offline.
4546 	 * Let's play safe and flush the VCPU at startup.
4547 	 */
4548 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4549 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4550 	return 0;
4551 }
4552 
4553 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4554 {
4555 	int i, online_vcpus, r = 0, started_vcpus = 0;
4556 	struct kvm_vcpu *started_vcpu = NULL;
4557 
4558 	if (is_vcpu_stopped(vcpu))
4559 		return 0;
4560 
4561 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4562 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4563 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4564 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4565 
4566 	/* Let's tell the UV that we want to change into the stopped state */
4567 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4568 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4569 		if (r) {
4570 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4571 			return r;
4572 		}
4573 	}
4574 
4575 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4576 	kvm_s390_clear_stop_irq(vcpu);
4577 
4578 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4579 	__disable_ibs_on_vcpu(vcpu);
4580 
4581 	for (i = 0; i < online_vcpus; i++) {
4582 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4583 			started_vcpus++;
4584 			started_vcpu = vcpu->kvm->vcpus[i];
4585 		}
4586 	}
4587 
4588 	if (started_vcpus == 1) {
4589 		/*
4590 		 * As we only have one VCPU left, we want to enable the
4591 		 * IBS facility for that VCPU to speed it up.
4592 		 */
4593 		__enable_ibs_on_vcpu(started_vcpu);
4594 	}
4595 
4596 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4597 	return 0;
4598 }
4599 
4600 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4601 				     struct kvm_enable_cap *cap)
4602 {
4603 	int r;
4604 
4605 	if (cap->flags)
4606 		return -EINVAL;
4607 
4608 	switch (cap->cap) {
4609 	case KVM_CAP_S390_CSS_SUPPORT:
4610 		if (!vcpu->kvm->arch.css_support) {
4611 			vcpu->kvm->arch.css_support = 1;
4612 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4613 			trace_kvm_s390_enable_css(vcpu->kvm);
4614 		}
4615 		r = 0;
4616 		break;
4617 	default:
4618 		r = -EINVAL;
4619 		break;
4620 	}
4621 	return r;
4622 }
4623 
4624 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4625 				   struct kvm_s390_mem_op *mop)
4626 {
4627 	void __user *uaddr = (void __user *)mop->buf;
4628 	int r = 0;
4629 
4630 	if (mop->flags || !mop->size)
4631 		return -EINVAL;
4632 	if (mop->size + mop->sida_offset < mop->size)
4633 		return -EINVAL;
4634 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4635 		return -E2BIG;
4636 
4637 	switch (mop->op) {
4638 	case KVM_S390_MEMOP_SIDA_READ:
4639 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4640 				 mop->sida_offset), mop->size))
4641 			r = -EFAULT;
4642 
4643 		break;
4644 	case KVM_S390_MEMOP_SIDA_WRITE:
4645 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4646 				   mop->sida_offset), uaddr, mop->size))
4647 			r = -EFAULT;
4648 		break;
4649 	}
4650 	return r;
4651 }
4652 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4653 				  struct kvm_s390_mem_op *mop)
4654 {
4655 	void __user *uaddr = (void __user *)mop->buf;
4656 	void *tmpbuf = NULL;
4657 	int r = 0;
4658 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4659 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4660 
4661 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4662 		return -EINVAL;
4663 
4664 	if (mop->size > MEM_OP_MAX_SIZE)
4665 		return -E2BIG;
4666 
4667 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4668 		return -EINVAL;
4669 
4670 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4671 		tmpbuf = vmalloc(mop->size);
4672 		if (!tmpbuf)
4673 			return -ENOMEM;
4674 	}
4675 
4676 	switch (mop->op) {
4677 	case KVM_S390_MEMOP_LOGICAL_READ:
4678 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4679 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4680 					    mop->size, GACC_FETCH);
4681 			break;
4682 		}
4683 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4684 		if (r == 0) {
4685 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4686 				r = -EFAULT;
4687 		}
4688 		break;
4689 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4690 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4691 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4692 					    mop->size, GACC_STORE);
4693 			break;
4694 		}
4695 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4696 			r = -EFAULT;
4697 			break;
4698 		}
4699 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4700 		break;
4701 	}
4702 
4703 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4704 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4705 
4706 	vfree(tmpbuf);
4707 	return r;
4708 }
4709 
4710 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4711 				      struct kvm_s390_mem_op *mop)
4712 {
4713 	int r, srcu_idx;
4714 
4715 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4716 
4717 	switch (mop->op) {
4718 	case KVM_S390_MEMOP_LOGICAL_READ:
4719 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4720 		r = kvm_s390_guest_mem_op(vcpu, mop);
4721 		break;
4722 	case KVM_S390_MEMOP_SIDA_READ:
4723 	case KVM_S390_MEMOP_SIDA_WRITE:
4724 		/* we are locked against sida going away by the vcpu->mutex */
4725 		r = kvm_s390_guest_sida_op(vcpu, mop);
4726 		break;
4727 	default:
4728 		r = -EINVAL;
4729 	}
4730 
4731 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4732 	return r;
4733 }
4734 
4735 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4736 			       unsigned int ioctl, unsigned long arg)
4737 {
4738 	struct kvm_vcpu *vcpu = filp->private_data;
4739 	void __user *argp = (void __user *)arg;
4740 
4741 	switch (ioctl) {
4742 	case KVM_S390_IRQ: {
4743 		struct kvm_s390_irq s390irq;
4744 
4745 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4746 			return -EFAULT;
4747 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4748 	}
4749 	case KVM_S390_INTERRUPT: {
4750 		struct kvm_s390_interrupt s390int;
4751 		struct kvm_s390_irq s390irq = {};
4752 
4753 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4754 			return -EFAULT;
4755 		if (s390int_to_s390irq(&s390int, &s390irq))
4756 			return -EINVAL;
4757 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4758 	}
4759 	}
4760 	return -ENOIOCTLCMD;
4761 }
4762 
4763 long kvm_arch_vcpu_ioctl(struct file *filp,
4764 			 unsigned int ioctl, unsigned long arg)
4765 {
4766 	struct kvm_vcpu *vcpu = filp->private_data;
4767 	void __user *argp = (void __user *)arg;
4768 	int idx;
4769 	long r;
4770 	u16 rc, rrc;
4771 
4772 	vcpu_load(vcpu);
4773 
4774 	switch (ioctl) {
4775 	case KVM_S390_STORE_STATUS:
4776 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4777 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4778 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4779 		break;
4780 	case KVM_S390_SET_INITIAL_PSW: {
4781 		psw_t psw;
4782 
4783 		r = -EFAULT;
4784 		if (copy_from_user(&psw, argp, sizeof(psw)))
4785 			break;
4786 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4787 		break;
4788 	}
4789 	case KVM_S390_CLEAR_RESET:
4790 		r = 0;
4791 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4792 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4793 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4794 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4795 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4796 				   rc, rrc);
4797 		}
4798 		break;
4799 	case KVM_S390_INITIAL_RESET:
4800 		r = 0;
4801 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4802 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4803 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4804 					  UVC_CMD_CPU_RESET_INITIAL,
4805 					  &rc, &rrc);
4806 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4807 				   rc, rrc);
4808 		}
4809 		break;
4810 	case KVM_S390_NORMAL_RESET:
4811 		r = 0;
4812 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4813 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4814 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4815 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4816 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4817 				   rc, rrc);
4818 		}
4819 		break;
4820 	case KVM_SET_ONE_REG:
4821 	case KVM_GET_ONE_REG: {
4822 		struct kvm_one_reg reg;
4823 		r = -EINVAL;
4824 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4825 			break;
4826 		r = -EFAULT;
4827 		if (copy_from_user(&reg, argp, sizeof(reg)))
4828 			break;
4829 		if (ioctl == KVM_SET_ONE_REG)
4830 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4831 		else
4832 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4833 		break;
4834 	}
4835 #ifdef CONFIG_KVM_S390_UCONTROL
4836 	case KVM_S390_UCAS_MAP: {
4837 		struct kvm_s390_ucas_mapping ucasmap;
4838 
4839 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4840 			r = -EFAULT;
4841 			break;
4842 		}
4843 
4844 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4845 			r = -EINVAL;
4846 			break;
4847 		}
4848 
4849 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4850 				     ucasmap.vcpu_addr, ucasmap.length);
4851 		break;
4852 	}
4853 	case KVM_S390_UCAS_UNMAP: {
4854 		struct kvm_s390_ucas_mapping ucasmap;
4855 
4856 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4857 			r = -EFAULT;
4858 			break;
4859 		}
4860 
4861 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4862 			r = -EINVAL;
4863 			break;
4864 		}
4865 
4866 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4867 			ucasmap.length);
4868 		break;
4869 	}
4870 #endif
4871 	case KVM_S390_VCPU_FAULT: {
4872 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4873 		break;
4874 	}
4875 	case KVM_ENABLE_CAP:
4876 	{
4877 		struct kvm_enable_cap cap;
4878 		r = -EFAULT;
4879 		if (copy_from_user(&cap, argp, sizeof(cap)))
4880 			break;
4881 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4882 		break;
4883 	}
4884 	case KVM_S390_MEM_OP: {
4885 		struct kvm_s390_mem_op mem_op;
4886 
4887 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4888 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4889 		else
4890 			r = -EFAULT;
4891 		break;
4892 	}
4893 	case KVM_S390_SET_IRQ_STATE: {
4894 		struct kvm_s390_irq_state irq_state;
4895 
4896 		r = -EFAULT;
4897 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4898 			break;
4899 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4900 		    irq_state.len == 0 ||
4901 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4902 			r = -EINVAL;
4903 			break;
4904 		}
4905 		/* do not use irq_state.flags, it will break old QEMUs */
4906 		r = kvm_s390_set_irq_state(vcpu,
4907 					   (void __user *) irq_state.buf,
4908 					   irq_state.len);
4909 		break;
4910 	}
4911 	case KVM_S390_GET_IRQ_STATE: {
4912 		struct kvm_s390_irq_state irq_state;
4913 
4914 		r = -EFAULT;
4915 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4916 			break;
4917 		if (irq_state.len == 0) {
4918 			r = -EINVAL;
4919 			break;
4920 		}
4921 		/* do not use irq_state.flags, it will break old QEMUs */
4922 		r = kvm_s390_get_irq_state(vcpu,
4923 					   (__u8 __user *)  irq_state.buf,
4924 					   irq_state.len);
4925 		break;
4926 	}
4927 	default:
4928 		r = -ENOTTY;
4929 	}
4930 
4931 	vcpu_put(vcpu);
4932 	return r;
4933 }
4934 
4935 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4936 {
4937 #ifdef CONFIG_KVM_S390_UCONTROL
4938 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4939 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4940 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4941 		get_page(vmf->page);
4942 		return 0;
4943 	}
4944 #endif
4945 	return VM_FAULT_SIGBUS;
4946 }
4947 
4948 /* Section: memory related */
4949 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4950 				   struct kvm_memory_slot *memslot,
4951 				   const struct kvm_userspace_memory_region *mem,
4952 				   enum kvm_mr_change change)
4953 {
4954 	/* A few sanity checks. We can have memory slots which have to be
4955 	   located/ended at a segment boundary (1MB). The memory in userland is
4956 	   ok to be fragmented into various different vmas. It is okay to mmap()
4957 	   and munmap() stuff in this slot after doing this call at any time */
4958 
4959 	if (mem->userspace_addr & 0xffffful)
4960 		return -EINVAL;
4961 
4962 	if (mem->memory_size & 0xffffful)
4963 		return -EINVAL;
4964 
4965 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4966 		return -EINVAL;
4967 
4968 	/* When we are protected, we should not change the memory slots */
4969 	if (kvm_s390_pv_get_handle(kvm))
4970 		return -EINVAL;
4971 	return 0;
4972 }
4973 
4974 void kvm_arch_commit_memory_region(struct kvm *kvm,
4975 				const struct kvm_userspace_memory_region *mem,
4976 				struct kvm_memory_slot *old,
4977 				const struct kvm_memory_slot *new,
4978 				enum kvm_mr_change change)
4979 {
4980 	int rc = 0;
4981 
4982 	switch (change) {
4983 	case KVM_MR_DELETE:
4984 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4985 					old->npages * PAGE_SIZE);
4986 		break;
4987 	case KVM_MR_MOVE:
4988 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4989 					old->npages * PAGE_SIZE);
4990 		if (rc)
4991 			break;
4992 		fallthrough;
4993 	case KVM_MR_CREATE:
4994 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4995 				      mem->guest_phys_addr, mem->memory_size);
4996 		break;
4997 	case KVM_MR_FLAGS_ONLY:
4998 		break;
4999 	default:
5000 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5001 	}
5002 	if (rc)
5003 		pr_warn("failed to commit memory region\n");
5004 	return;
5005 }
5006 
5007 static inline unsigned long nonhyp_mask(int i)
5008 {
5009 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5010 
5011 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5012 }
5013 
5014 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5015 {
5016 	vcpu->valid_wakeup = false;
5017 }
5018 
5019 static int __init kvm_s390_init(void)
5020 {
5021 	int i;
5022 
5023 	if (!sclp.has_sief2) {
5024 		pr_info("SIE is not available\n");
5025 		return -ENODEV;
5026 	}
5027 
5028 	if (nested && hpage) {
5029 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5030 		return -EINVAL;
5031 	}
5032 
5033 	for (i = 0; i < 16; i++)
5034 		kvm_s390_fac_base[i] |=
5035 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5036 
5037 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5038 }
5039 
5040 static void __exit kvm_s390_exit(void)
5041 {
5042 	kvm_exit();
5043 }
5044 
5045 module_init(kvm_s390_init);
5046 module_exit(kvm_s390_exit);
5047 
5048 /*
5049  * Enable autoloading of the kvm module.
5050  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5051  * since x86 takes a different approach.
5052  */
5053 #include <linux/miscdevice.h>
5054 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5055 MODULE_ALIAS("devname:kvm");
5056