xref: /linux/arch/s390/kvm/kvm-s390.c (revision 1f2367a39f17bd553a75e179a747f9b257bc9478)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 	{ "deliver_program", VCPU_STAT(deliver_program) },
93 	{ "deliver_io", VCPU_STAT(deliver_io) },
94 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
97 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
98 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
99 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 	{ "inject_io", VM_STAT(inject_io) },
102 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
103 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 	{ "inject_program", VCPU_STAT(inject_program) },
105 	{ "inject_restart", VCPU_STAT(inject_restart) },
106 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
107 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 	{ "inject_virtio", VM_STAT(inject_virtio) },
111 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
113 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
120 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
122 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
124 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
125 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
126 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
128 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
130 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
133 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
138 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
161 	{ NULL }
162 };
163 
164 struct kvm_s390_tod_clock_ext {
165 	__u8 epoch_idx;
166 	__u64 tod;
167 	__u8 reserved[7];
168 } __packed;
169 
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174 
175 /* allow 1m huge page guest backing, if !nested */
176 static int hpage;
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
179 
180 /*
181  * For now we handle at most 16 double words as this is what the s390 base
182  * kernel handles and stores in the prefix page. If we ever need to go beyond
183  * this, this requires changes to code, but the external uapi can stay.
184  */
185 #define SIZE_INTERNAL 16
186 
187 /*
188  * Base feature mask that defines default mask for facilities. Consists of the
189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
190  */
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
192 /*
193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194  * and defines the facilities that can be enabled via a cpu model.
195  */
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
197 
198 static unsigned long kvm_s390_fac_size(void)
199 {
200 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203 		sizeof(S390_lowcore.stfle_fac_list));
204 
205 	return SIZE_INTERNAL;
206 }
207 
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
212 
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
216 
217 /* Section: not file related */
218 int kvm_arch_hardware_enable(void)
219 {
220 	/* every s390 is virtualization enabled ;-) */
221 	return 0;
222 }
223 
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
225 			      unsigned long end);
226 
227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
228 {
229 	u8 delta_idx = 0;
230 
231 	/*
232 	 * The TOD jumps by delta, we have to compensate this by adding
233 	 * -delta to the epoch.
234 	 */
235 	delta = -delta;
236 
237 	/* sign-extension - we're adding to signed values below */
238 	if ((s64)delta < 0)
239 		delta_idx = -1;
240 
241 	scb->epoch += delta;
242 	if (scb->ecd & ECD_MEF) {
243 		scb->epdx += delta_idx;
244 		if (scb->epoch < delta)
245 			scb->epdx += 1;
246 	}
247 }
248 
249 /*
250  * This callback is executed during stop_machine(). All CPUs are therefore
251  * temporarily stopped. In order not to change guest behavior, we have to
252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253  * so a CPU won't be stopped while calculating with the epoch.
254  */
255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
256 			  void *v)
257 {
258 	struct kvm *kvm;
259 	struct kvm_vcpu *vcpu;
260 	int i;
261 	unsigned long long *delta = v;
262 
263 	list_for_each_entry(kvm, &vm_list, vm_list) {
264 		kvm_for_each_vcpu(i, vcpu, kvm) {
265 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
266 			if (i == 0) {
267 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
269 			}
270 			if (vcpu->arch.cputm_enabled)
271 				vcpu->arch.cputm_start += *delta;
272 			if (vcpu->arch.vsie_block)
273 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
274 						   *delta);
275 		}
276 	}
277 	return NOTIFY_OK;
278 }
279 
280 static struct notifier_block kvm_clock_notifier = {
281 	.notifier_call = kvm_clock_sync,
282 };
283 
284 int kvm_arch_hardware_setup(void)
285 {
286 	gmap_notifier.notifier_call = kvm_gmap_notifier;
287 	gmap_register_pte_notifier(&gmap_notifier);
288 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289 	gmap_register_pte_notifier(&vsie_gmap_notifier);
290 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291 				       &kvm_clock_notifier);
292 	return 0;
293 }
294 
295 void kvm_arch_hardware_unsetup(void)
296 {
297 	gmap_unregister_pte_notifier(&gmap_notifier);
298 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300 					 &kvm_clock_notifier);
301 }
302 
303 static void allow_cpu_feat(unsigned long nr)
304 {
305 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
306 }
307 
308 static inline int plo_test_bit(unsigned char nr)
309 {
310 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
311 	int cc;
312 
313 	asm volatile(
314 		/* Parameter registers are ignored for "test bit" */
315 		"	plo	0,0,0,0(0)\n"
316 		"	ipm	%0\n"
317 		"	srl	%0,28\n"
318 		: "=d" (cc)
319 		: "d" (r0)
320 		: "cc");
321 	return cc == 0;
322 }
323 
324 static void kvm_s390_cpu_feat_init(void)
325 {
326 	int i;
327 
328 	for (i = 0; i < 256; ++i) {
329 		if (plo_test_bit(i))
330 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
331 	}
332 
333 	if (test_facility(28)) /* TOD-clock steering */
334 		ptff(kvm_s390_available_subfunc.ptff,
335 		     sizeof(kvm_s390_available_subfunc.ptff),
336 		     PTFF_QAF);
337 
338 	if (test_facility(17)) { /* MSA */
339 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340 			      kvm_s390_available_subfunc.kmac);
341 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342 			      kvm_s390_available_subfunc.kmc);
343 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
344 			      kvm_s390_available_subfunc.km);
345 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346 			      kvm_s390_available_subfunc.kimd);
347 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348 			      kvm_s390_available_subfunc.klmd);
349 	}
350 	if (test_facility(76)) /* MSA3 */
351 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352 			      kvm_s390_available_subfunc.pckmo);
353 	if (test_facility(77)) { /* MSA4 */
354 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355 			      kvm_s390_available_subfunc.kmctr);
356 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357 			      kvm_s390_available_subfunc.kmf);
358 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359 			      kvm_s390_available_subfunc.kmo);
360 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361 			      kvm_s390_available_subfunc.pcc);
362 	}
363 	if (test_facility(57)) /* MSA5 */
364 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365 			      kvm_s390_available_subfunc.ppno);
366 
367 	if (test_facility(146)) /* MSA8 */
368 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369 			      kvm_s390_available_subfunc.kma);
370 
371 	if (MACHINE_HAS_ESOP)
372 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
373 	/*
374 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
376 	 */
377 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378 	    !test_facility(3) || !nested)
379 		return;
380 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381 	if (sclp.has_64bscao)
382 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
383 	if (sclp.has_siif)
384 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
385 	if (sclp.has_gpere)
386 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
387 	if (sclp.has_gsls)
388 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
389 	if (sclp.has_ib)
390 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
391 	if (sclp.has_cei)
392 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
393 	if (sclp.has_ibs)
394 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
395 	if (sclp.has_kss)
396 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
397 	/*
398 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399 	 * all skey handling functions read/set the skey from the PGSTE
400 	 * instead of the real storage key.
401 	 *
402 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403 	 * pages being detected as preserved although they are resident.
404 	 *
405 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
407 	 *
408 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
411 	 *
412 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413 	 * cannot easily shadow the SCA because of the ipte lock.
414 	 */
415 }
416 
417 int kvm_arch_init(void *opaque)
418 {
419 	int rc;
420 
421 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
422 	if (!kvm_s390_dbf)
423 		return -ENOMEM;
424 
425 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
426 		rc = -ENOMEM;
427 		goto out_debug_unreg;
428 	}
429 
430 	kvm_s390_cpu_feat_init();
431 
432 	/* Register floating interrupt controller interface. */
433 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
434 	if (rc) {
435 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
436 		goto out_debug_unreg;
437 	}
438 
439 	rc = kvm_s390_gib_init(GAL_ISC);
440 	if (rc)
441 		goto out_gib_destroy;
442 
443 	return 0;
444 
445 out_gib_destroy:
446 	kvm_s390_gib_destroy();
447 out_debug_unreg:
448 	debug_unregister(kvm_s390_dbf);
449 	return rc;
450 }
451 
452 void kvm_arch_exit(void)
453 {
454 	kvm_s390_gib_destroy();
455 	debug_unregister(kvm_s390_dbf);
456 }
457 
458 /* Section: device related */
459 long kvm_arch_dev_ioctl(struct file *filp,
460 			unsigned int ioctl, unsigned long arg)
461 {
462 	if (ioctl == KVM_S390_ENABLE_SIE)
463 		return s390_enable_sie();
464 	return -EINVAL;
465 }
466 
467 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
468 {
469 	int r;
470 
471 	switch (ext) {
472 	case KVM_CAP_S390_PSW:
473 	case KVM_CAP_S390_GMAP:
474 	case KVM_CAP_SYNC_MMU:
475 #ifdef CONFIG_KVM_S390_UCONTROL
476 	case KVM_CAP_S390_UCONTROL:
477 #endif
478 	case KVM_CAP_ASYNC_PF:
479 	case KVM_CAP_SYNC_REGS:
480 	case KVM_CAP_ONE_REG:
481 	case KVM_CAP_ENABLE_CAP:
482 	case KVM_CAP_S390_CSS_SUPPORT:
483 	case KVM_CAP_IOEVENTFD:
484 	case KVM_CAP_DEVICE_CTRL:
485 	case KVM_CAP_S390_IRQCHIP:
486 	case KVM_CAP_VM_ATTRIBUTES:
487 	case KVM_CAP_MP_STATE:
488 	case KVM_CAP_IMMEDIATE_EXIT:
489 	case KVM_CAP_S390_INJECT_IRQ:
490 	case KVM_CAP_S390_USER_SIGP:
491 	case KVM_CAP_S390_USER_STSI:
492 	case KVM_CAP_S390_SKEYS:
493 	case KVM_CAP_S390_IRQ_STATE:
494 	case KVM_CAP_S390_USER_INSTR0:
495 	case KVM_CAP_S390_CMMA_MIGRATION:
496 	case KVM_CAP_S390_AIS:
497 	case KVM_CAP_S390_AIS_MIGRATION:
498 		r = 1;
499 		break;
500 	case KVM_CAP_S390_HPAGE_1M:
501 		r = 0;
502 		if (hpage && !kvm_is_ucontrol(kvm))
503 			r = 1;
504 		break;
505 	case KVM_CAP_S390_MEM_OP:
506 		r = MEM_OP_MAX_SIZE;
507 		break;
508 	case KVM_CAP_NR_VCPUS:
509 	case KVM_CAP_MAX_VCPUS:
510 		r = KVM_S390_BSCA_CPU_SLOTS;
511 		if (!kvm_s390_use_sca_entries())
512 			r = KVM_MAX_VCPUS;
513 		else if (sclp.has_esca && sclp.has_64bscao)
514 			r = KVM_S390_ESCA_CPU_SLOTS;
515 		break;
516 	case KVM_CAP_NR_MEMSLOTS:
517 		r = KVM_USER_MEM_SLOTS;
518 		break;
519 	case KVM_CAP_S390_COW:
520 		r = MACHINE_HAS_ESOP;
521 		break;
522 	case KVM_CAP_S390_VECTOR_REGISTERS:
523 		r = MACHINE_HAS_VX;
524 		break;
525 	case KVM_CAP_S390_RI:
526 		r = test_facility(64);
527 		break;
528 	case KVM_CAP_S390_GS:
529 		r = test_facility(133);
530 		break;
531 	case KVM_CAP_S390_BPB:
532 		r = test_facility(82);
533 		break;
534 	default:
535 		r = 0;
536 	}
537 	return r;
538 }
539 
540 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
541 				    struct kvm_memory_slot *memslot)
542 {
543 	int i;
544 	gfn_t cur_gfn, last_gfn;
545 	unsigned long gaddr, vmaddr;
546 	struct gmap *gmap = kvm->arch.gmap;
547 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
548 
549 	/* Loop over all guest segments */
550 	cur_gfn = memslot->base_gfn;
551 	last_gfn = memslot->base_gfn + memslot->npages;
552 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
553 		gaddr = gfn_to_gpa(cur_gfn);
554 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
555 		if (kvm_is_error_hva(vmaddr))
556 			continue;
557 
558 		bitmap_zero(bitmap, _PAGE_ENTRIES);
559 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
560 		for (i = 0; i < _PAGE_ENTRIES; i++) {
561 			if (test_bit(i, bitmap))
562 				mark_page_dirty(kvm, cur_gfn + i);
563 		}
564 
565 		if (fatal_signal_pending(current))
566 			return;
567 		cond_resched();
568 	}
569 }
570 
571 /* Section: vm related */
572 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
573 
574 /*
575  * Get (and clear) the dirty memory log for a memory slot.
576  */
577 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
578 			       struct kvm_dirty_log *log)
579 {
580 	int r;
581 	unsigned long n;
582 	struct kvm_memslots *slots;
583 	struct kvm_memory_slot *memslot;
584 	int is_dirty = 0;
585 
586 	if (kvm_is_ucontrol(kvm))
587 		return -EINVAL;
588 
589 	mutex_lock(&kvm->slots_lock);
590 
591 	r = -EINVAL;
592 	if (log->slot >= KVM_USER_MEM_SLOTS)
593 		goto out;
594 
595 	slots = kvm_memslots(kvm);
596 	memslot = id_to_memslot(slots, log->slot);
597 	r = -ENOENT;
598 	if (!memslot->dirty_bitmap)
599 		goto out;
600 
601 	kvm_s390_sync_dirty_log(kvm, memslot);
602 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
603 	if (r)
604 		goto out;
605 
606 	/* Clear the dirty log */
607 	if (is_dirty) {
608 		n = kvm_dirty_bitmap_bytes(memslot);
609 		memset(memslot->dirty_bitmap, 0, n);
610 	}
611 	r = 0;
612 out:
613 	mutex_unlock(&kvm->slots_lock);
614 	return r;
615 }
616 
617 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
618 {
619 	unsigned int i;
620 	struct kvm_vcpu *vcpu;
621 
622 	kvm_for_each_vcpu(i, vcpu, kvm) {
623 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
624 	}
625 }
626 
627 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
628 {
629 	int r;
630 
631 	if (cap->flags)
632 		return -EINVAL;
633 
634 	switch (cap->cap) {
635 	case KVM_CAP_S390_IRQCHIP:
636 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
637 		kvm->arch.use_irqchip = 1;
638 		r = 0;
639 		break;
640 	case KVM_CAP_S390_USER_SIGP:
641 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
642 		kvm->arch.user_sigp = 1;
643 		r = 0;
644 		break;
645 	case KVM_CAP_S390_VECTOR_REGISTERS:
646 		mutex_lock(&kvm->lock);
647 		if (kvm->created_vcpus) {
648 			r = -EBUSY;
649 		} else if (MACHINE_HAS_VX) {
650 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
651 			set_kvm_facility(kvm->arch.model.fac_list, 129);
652 			if (test_facility(134)) {
653 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
654 				set_kvm_facility(kvm->arch.model.fac_list, 134);
655 			}
656 			if (test_facility(135)) {
657 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
658 				set_kvm_facility(kvm->arch.model.fac_list, 135);
659 			}
660 			r = 0;
661 		} else
662 			r = -EINVAL;
663 		mutex_unlock(&kvm->lock);
664 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
665 			 r ? "(not available)" : "(success)");
666 		break;
667 	case KVM_CAP_S390_RI:
668 		r = -EINVAL;
669 		mutex_lock(&kvm->lock);
670 		if (kvm->created_vcpus) {
671 			r = -EBUSY;
672 		} else if (test_facility(64)) {
673 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
674 			set_kvm_facility(kvm->arch.model.fac_list, 64);
675 			r = 0;
676 		}
677 		mutex_unlock(&kvm->lock);
678 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
679 			 r ? "(not available)" : "(success)");
680 		break;
681 	case KVM_CAP_S390_AIS:
682 		mutex_lock(&kvm->lock);
683 		if (kvm->created_vcpus) {
684 			r = -EBUSY;
685 		} else {
686 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
687 			set_kvm_facility(kvm->arch.model.fac_list, 72);
688 			r = 0;
689 		}
690 		mutex_unlock(&kvm->lock);
691 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
692 			 r ? "(not available)" : "(success)");
693 		break;
694 	case KVM_CAP_S390_GS:
695 		r = -EINVAL;
696 		mutex_lock(&kvm->lock);
697 		if (kvm->created_vcpus) {
698 			r = -EBUSY;
699 		} else if (test_facility(133)) {
700 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
701 			set_kvm_facility(kvm->arch.model.fac_list, 133);
702 			r = 0;
703 		}
704 		mutex_unlock(&kvm->lock);
705 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
706 			 r ? "(not available)" : "(success)");
707 		break;
708 	case KVM_CAP_S390_HPAGE_1M:
709 		mutex_lock(&kvm->lock);
710 		if (kvm->created_vcpus)
711 			r = -EBUSY;
712 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
713 			r = -EINVAL;
714 		else {
715 			r = 0;
716 			down_write(&kvm->mm->mmap_sem);
717 			kvm->mm->context.allow_gmap_hpage_1m = 1;
718 			up_write(&kvm->mm->mmap_sem);
719 			/*
720 			 * We might have to create fake 4k page
721 			 * tables. To avoid that the hardware works on
722 			 * stale PGSTEs, we emulate these instructions.
723 			 */
724 			kvm->arch.use_skf = 0;
725 			kvm->arch.use_pfmfi = 0;
726 		}
727 		mutex_unlock(&kvm->lock);
728 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
729 			 r ? "(not available)" : "(success)");
730 		break;
731 	case KVM_CAP_S390_USER_STSI:
732 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
733 		kvm->arch.user_stsi = 1;
734 		r = 0;
735 		break;
736 	case KVM_CAP_S390_USER_INSTR0:
737 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
738 		kvm->arch.user_instr0 = 1;
739 		icpt_operexc_on_all_vcpus(kvm);
740 		r = 0;
741 		break;
742 	default:
743 		r = -EINVAL;
744 		break;
745 	}
746 	return r;
747 }
748 
749 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
750 {
751 	int ret;
752 
753 	switch (attr->attr) {
754 	case KVM_S390_VM_MEM_LIMIT_SIZE:
755 		ret = 0;
756 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
757 			 kvm->arch.mem_limit);
758 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
759 			ret = -EFAULT;
760 		break;
761 	default:
762 		ret = -ENXIO;
763 		break;
764 	}
765 	return ret;
766 }
767 
768 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
769 {
770 	int ret;
771 	unsigned int idx;
772 	switch (attr->attr) {
773 	case KVM_S390_VM_MEM_ENABLE_CMMA:
774 		ret = -ENXIO;
775 		if (!sclp.has_cmma)
776 			break;
777 
778 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
779 		mutex_lock(&kvm->lock);
780 		if (kvm->created_vcpus)
781 			ret = -EBUSY;
782 		else if (kvm->mm->context.allow_gmap_hpage_1m)
783 			ret = -EINVAL;
784 		else {
785 			kvm->arch.use_cmma = 1;
786 			/* Not compatible with cmma. */
787 			kvm->arch.use_pfmfi = 0;
788 			ret = 0;
789 		}
790 		mutex_unlock(&kvm->lock);
791 		break;
792 	case KVM_S390_VM_MEM_CLR_CMMA:
793 		ret = -ENXIO;
794 		if (!sclp.has_cmma)
795 			break;
796 		ret = -EINVAL;
797 		if (!kvm->arch.use_cmma)
798 			break;
799 
800 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
801 		mutex_lock(&kvm->lock);
802 		idx = srcu_read_lock(&kvm->srcu);
803 		s390_reset_cmma(kvm->arch.gmap->mm);
804 		srcu_read_unlock(&kvm->srcu, idx);
805 		mutex_unlock(&kvm->lock);
806 		ret = 0;
807 		break;
808 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
809 		unsigned long new_limit;
810 
811 		if (kvm_is_ucontrol(kvm))
812 			return -EINVAL;
813 
814 		if (get_user(new_limit, (u64 __user *)attr->addr))
815 			return -EFAULT;
816 
817 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
818 		    new_limit > kvm->arch.mem_limit)
819 			return -E2BIG;
820 
821 		if (!new_limit)
822 			return -EINVAL;
823 
824 		/* gmap_create takes last usable address */
825 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
826 			new_limit -= 1;
827 
828 		ret = -EBUSY;
829 		mutex_lock(&kvm->lock);
830 		if (!kvm->created_vcpus) {
831 			/* gmap_create will round the limit up */
832 			struct gmap *new = gmap_create(current->mm, new_limit);
833 
834 			if (!new) {
835 				ret = -ENOMEM;
836 			} else {
837 				gmap_remove(kvm->arch.gmap);
838 				new->private = kvm;
839 				kvm->arch.gmap = new;
840 				ret = 0;
841 			}
842 		}
843 		mutex_unlock(&kvm->lock);
844 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
845 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
846 			 (void *) kvm->arch.gmap->asce);
847 		break;
848 	}
849 	default:
850 		ret = -ENXIO;
851 		break;
852 	}
853 	return ret;
854 }
855 
856 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
857 
858 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
859 {
860 	struct kvm_vcpu *vcpu;
861 	int i;
862 
863 	kvm_s390_vcpu_block_all(kvm);
864 
865 	kvm_for_each_vcpu(i, vcpu, kvm) {
866 		kvm_s390_vcpu_crypto_setup(vcpu);
867 		/* recreate the shadow crycb by leaving the VSIE handler */
868 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
869 	}
870 
871 	kvm_s390_vcpu_unblock_all(kvm);
872 }
873 
874 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
875 {
876 	mutex_lock(&kvm->lock);
877 	switch (attr->attr) {
878 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
879 		if (!test_kvm_facility(kvm, 76)) {
880 			mutex_unlock(&kvm->lock);
881 			return -EINVAL;
882 		}
883 		get_random_bytes(
884 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
885 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
886 		kvm->arch.crypto.aes_kw = 1;
887 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
888 		break;
889 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
890 		if (!test_kvm_facility(kvm, 76)) {
891 			mutex_unlock(&kvm->lock);
892 			return -EINVAL;
893 		}
894 		get_random_bytes(
895 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
896 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
897 		kvm->arch.crypto.dea_kw = 1;
898 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
899 		break;
900 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
901 		if (!test_kvm_facility(kvm, 76)) {
902 			mutex_unlock(&kvm->lock);
903 			return -EINVAL;
904 		}
905 		kvm->arch.crypto.aes_kw = 0;
906 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
907 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
908 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
909 		break;
910 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
911 		if (!test_kvm_facility(kvm, 76)) {
912 			mutex_unlock(&kvm->lock);
913 			return -EINVAL;
914 		}
915 		kvm->arch.crypto.dea_kw = 0;
916 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
917 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
918 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
919 		break;
920 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
921 		if (!ap_instructions_available()) {
922 			mutex_unlock(&kvm->lock);
923 			return -EOPNOTSUPP;
924 		}
925 		kvm->arch.crypto.apie = 1;
926 		break;
927 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
928 		if (!ap_instructions_available()) {
929 			mutex_unlock(&kvm->lock);
930 			return -EOPNOTSUPP;
931 		}
932 		kvm->arch.crypto.apie = 0;
933 		break;
934 	default:
935 		mutex_unlock(&kvm->lock);
936 		return -ENXIO;
937 	}
938 
939 	kvm_s390_vcpu_crypto_reset_all(kvm);
940 	mutex_unlock(&kvm->lock);
941 	return 0;
942 }
943 
944 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
945 {
946 	int cx;
947 	struct kvm_vcpu *vcpu;
948 
949 	kvm_for_each_vcpu(cx, vcpu, kvm)
950 		kvm_s390_sync_request(req, vcpu);
951 }
952 
953 /*
954  * Must be called with kvm->srcu held to avoid races on memslots, and with
955  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
956  */
957 static int kvm_s390_vm_start_migration(struct kvm *kvm)
958 {
959 	struct kvm_memory_slot *ms;
960 	struct kvm_memslots *slots;
961 	unsigned long ram_pages = 0;
962 	int slotnr;
963 
964 	/* migration mode already enabled */
965 	if (kvm->arch.migration_mode)
966 		return 0;
967 	slots = kvm_memslots(kvm);
968 	if (!slots || !slots->used_slots)
969 		return -EINVAL;
970 
971 	if (!kvm->arch.use_cmma) {
972 		kvm->arch.migration_mode = 1;
973 		return 0;
974 	}
975 	/* mark all the pages in active slots as dirty */
976 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
977 		ms = slots->memslots + slotnr;
978 		/*
979 		 * The second half of the bitmap is only used on x86,
980 		 * and would be wasted otherwise, so we put it to good
981 		 * use here to keep track of the state of the storage
982 		 * attributes.
983 		 */
984 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
985 		ram_pages += ms->npages;
986 	}
987 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
988 	kvm->arch.migration_mode = 1;
989 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
990 	return 0;
991 }
992 
993 /*
994  * Must be called with kvm->slots_lock to avoid races with ourselves and
995  * kvm_s390_vm_start_migration.
996  */
997 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
998 {
999 	/* migration mode already disabled */
1000 	if (!kvm->arch.migration_mode)
1001 		return 0;
1002 	kvm->arch.migration_mode = 0;
1003 	if (kvm->arch.use_cmma)
1004 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1005 	return 0;
1006 }
1007 
1008 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1009 				     struct kvm_device_attr *attr)
1010 {
1011 	int res = -ENXIO;
1012 
1013 	mutex_lock(&kvm->slots_lock);
1014 	switch (attr->attr) {
1015 	case KVM_S390_VM_MIGRATION_START:
1016 		res = kvm_s390_vm_start_migration(kvm);
1017 		break;
1018 	case KVM_S390_VM_MIGRATION_STOP:
1019 		res = kvm_s390_vm_stop_migration(kvm);
1020 		break;
1021 	default:
1022 		break;
1023 	}
1024 	mutex_unlock(&kvm->slots_lock);
1025 
1026 	return res;
1027 }
1028 
1029 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1030 				     struct kvm_device_attr *attr)
1031 {
1032 	u64 mig = kvm->arch.migration_mode;
1033 
1034 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1035 		return -ENXIO;
1036 
1037 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1038 		return -EFAULT;
1039 	return 0;
1040 }
1041 
1042 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1043 {
1044 	struct kvm_s390_vm_tod_clock gtod;
1045 
1046 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1047 		return -EFAULT;
1048 
1049 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1050 		return -EINVAL;
1051 	kvm_s390_set_tod_clock(kvm, &gtod);
1052 
1053 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1054 		gtod.epoch_idx, gtod.tod);
1055 
1056 	return 0;
1057 }
1058 
1059 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1060 {
1061 	u8 gtod_high;
1062 
1063 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1064 					   sizeof(gtod_high)))
1065 		return -EFAULT;
1066 
1067 	if (gtod_high != 0)
1068 		return -EINVAL;
1069 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1070 
1071 	return 0;
1072 }
1073 
1074 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1075 {
1076 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1077 
1078 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1079 			   sizeof(gtod.tod)))
1080 		return -EFAULT;
1081 
1082 	kvm_s390_set_tod_clock(kvm, &gtod);
1083 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1084 	return 0;
1085 }
1086 
1087 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1088 {
1089 	int ret;
1090 
1091 	if (attr->flags)
1092 		return -EINVAL;
1093 
1094 	switch (attr->attr) {
1095 	case KVM_S390_VM_TOD_EXT:
1096 		ret = kvm_s390_set_tod_ext(kvm, attr);
1097 		break;
1098 	case KVM_S390_VM_TOD_HIGH:
1099 		ret = kvm_s390_set_tod_high(kvm, attr);
1100 		break;
1101 	case KVM_S390_VM_TOD_LOW:
1102 		ret = kvm_s390_set_tod_low(kvm, attr);
1103 		break;
1104 	default:
1105 		ret = -ENXIO;
1106 		break;
1107 	}
1108 	return ret;
1109 }
1110 
1111 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1112 				   struct kvm_s390_vm_tod_clock *gtod)
1113 {
1114 	struct kvm_s390_tod_clock_ext htod;
1115 
1116 	preempt_disable();
1117 
1118 	get_tod_clock_ext((char *)&htod);
1119 
1120 	gtod->tod = htod.tod + kvm->arch.epoch;
1121 	gtod->epoch_idx = 0;
1122 	if (test_kvm_facility(kvm, 139)) {
1123 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1124 		if (gtod->tod < htod.tod)
1125 			gtod->epoch_idx += 1;
1126 	}
1127 
1128 	preempt_enable();
1129 }
1130 
1131 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1132 {
1133 	struct kvm_s390_vm_tod_clock gtod;
1134 
1135 	memset(&gtod, 0, sizeof(gtod));
1136 	kvm_s390_get_tod_clock(kvm, &gtod);
1137 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1138 		return -EFAULT;
1139 
1140 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1141 		gtod.epoch_idx, gtod.tod);
1142 	return 0;
1143 }
1144 
1145 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1146 {
1147 	u8 gtod_high = 0;
1148 
1149 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1150 					 sizeof(gtod_high)))
1151 		return -EFAULT;
1152 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1153 
1154 	return 0;
1155 }
1156 
1157 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1158 {
1159 	u64 gtod;
1160 
1161 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1162 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1163 		return -EFAULT;
1164 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1165 
1166 	return 0;
1167 }
1168 
1169 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1170 {
1171 	int ret;
1172 
1173 	if (attr->flags)
1174 		return -EINVAL;
1175 
1176 	switch (attr->attr) {
1177 	case KVM_S390_VM_TOD_EXT:
1178 		ret = kvm_s390_get_tod_ext(kvm, attr);
1179 		break;
1180 	case KVM_S390_VM_TOD_HIGH:
1181 		ret = kvm_s390_get_tod_high(kvm, attr);
1182 		break;
1183 	case KVM_S390_VM_TOD_LOW:
1184 		ret = kvm_s390_get_tod_low(kvm, attr);
1185 		break;
1186 	default:
1187 		ret = -ENXIO;
1188 		break;
1189 	}
1190 	return ret;
1191 }
1192 
1193 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1194 {
1195 	struct kvm_s390_vm_cpu_processor *proc;
1196 	u16 lowest_ibc, unblocked_ibc;
1197 	int ret = 0;
1198 
1199 	mutex_lock(&kvm->lock);
1200 	if (kvm->created_vcpus) {
1201 		ret = -EBUSY;
1202 		goto out;
1203 	}
1204 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1205 	if (!proc) {
1206 		ret = -ENOMEM;
1207 		goto out;
1208 	}
1209 	if (!copy_from_user(proc, (void __user *)attr->addr,
1210 			    sizeof(*proc))) {
1211 		kvm->arch.model.cpuid = proc->cpuid;
1212 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1213 		unblocked_ibc = sclp.ibc & 0xfff;
1214 		if (lowest_ibc && proc->ibc) {
1215 			if (proc->ibc > unblocked_ibc)
1216 				kvm->arch.model.ibc = unblocked_ibc;
1217 			else if (proc->ibc < lowest_ibc)
1218 				kvm->arch.model.ibc = lowest_ibc;
1219 			else
1220 				kvm->arch.model.ibc = proc->ibc;
1221 		}
1222 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1223 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1224 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1225 			 kvm->arch.model.ibc,
1226 			 kvm->arch.model.cpuid);
1227 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1228 			 kvm->arch.model.fac_list[0],
1229 			 kvm->arch.model.fac_list[1],
1230 			 kvm->arch.model.fac_list[2]);
1231 	} else
1232 		ret = -EFAULT;
1233 	kfree(proc);
1234 out:
1235 	mutex_unlock(&kvm->lock);
1236 	return ret;
1237 }
1238 
1239 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1240 				       struct kvm_device_attr *attr)
1241 {
1242 	struct kvm_s390_vm_cpu_feat data;
1243 
1244 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1245 		return -EFAULT;
1246 	if (!bitmap_subset((unsigned long *) data.feat,
1247 			   kvm_s390_available_cpu_feat,
1248 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1249 		return -EINVAL;
1250 
1251 	mutex_lock(&kvm->lock);
1252 	if (kvm->created_vcpus) {
1253 		mutex_unlock(&kvm->lock);
1254 		return -EBUSY;
1255 	}
1256 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1257 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1258 	mutex_unlock(&kvm->lock);
1259 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1260 			 data.feat[0],
1261 			 data.feat[1],
1262 			 data.feat[2]);
1263 	return 0;
1264 }
1265 
1266 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1267 					  struct kvm_device_attr *attr)
1268 {
1269 	mutex_lock(&kvm->lock);
1270 	if (kvm->created_vcpus) {
1271 		mutex_unlock(&kvm->lock);
1272 		return -EBUSY;
1273 	}
1274 
1275 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1276 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1277 		mutex_unlock(&kvm->lock);
1278 		return -EFAULT;
1279 	}
1280 	mutex_unlock(&kvm->lock);
1281 
1282 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1283 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1284 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1285 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1286 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1287 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1288 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1289 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1290 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1291 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1292 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1293 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1294 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1295 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1296 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1297 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1298 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1299 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1300 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1301 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1302 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1303 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1304 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1305 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1306 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1307 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1308 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1309 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1310 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1311 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1312 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1313 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1314 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1315 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1316 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1317 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1318 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1319 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1320 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1321 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1322 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1323 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1324 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1325 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1326 
1327 	return 0;
1328 }
1329 
1330 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1331 {
1332 	int ret = -ENXIO;
1333 
1334 	switch (attr->attr) {
1335 	case KVM_S390_VM_CPU_PROCESSOR:
1336 		ret = kvm_s390_set_processor(kvm, attr);
1337 		break;
1338 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1339 		ret = kvm_s390_set_processor_feat(kvm, attr);
1340 		break;
1341 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1342 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1343 		break;
1344 	}
1345 	return ret;
1346 }
1347 
1348 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1349 {
1350 	struct kvm_s390_vm_cpu_processor *proc;
1351 	int ret = 0;
1352 
1353 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1354 	if (!proc) {
1355 		ret = -ENOMEM;
1356 		goto out;
1357 	}
1358 	proc->cpuid = kvm->arch.model.cpuid;
1359 	proc->ibc = kvm->arch.model.ibc;
1360 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1361 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1362 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1363 		 kvm->arch.model.ibc,
1364 		 kvm->arch.model.cpuid);
1365 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1366 		 kvm->arch.model.fac_list[0],
1367 		 kvm->arch.model.fac_list[1],
1368 		 kvm->arch.model.fac_list[2]);
1369 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1370 		ret = -EFAULT;
1371 	kfree(proc);
1372 out:
1373 	return ret;
1374 }
1375 
1376 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1377 {
1378 	struct kvm_s390_vm_cpu_machine *mach;
1379 	int ret = 0;
1380 
1381 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1382 	if (!mach) {
1383 		ret = -ENOMEM;
1384 		goto out;
1385 	}
1386 	get_cpu_id((struct cpuid *) &mach->cpuid);
1387 	mach->ibc = sclp.ibc;
1388 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1389 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1390 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1391 	       sizeof(S390_lowcore.stfle_fac_list));
1392 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1393 		 kvm->arch.model.ibc,
1394 		 kvm->arch.model.cpuid);
1395 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1396 		 mach->fac_mask[0],
1397 		 mach->fac_mask[1],
1398 		 mach->fac_mask[2]);
1399 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1400 		 mach->fac_list[0],
1401 		 mach->fac_list[1],
1402 		 mach->fac_list[2]);
1403 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1404 		ret = -EFAULT;
1405 	kfree(mach);
1406 out:
1407 	return ret;
1408 }
1409 
1410 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1411 				       struct kvm_device_attr *attr)
1412 {
1413 	struct kvm_s390_vm_cpu_feat data;
1414 
1415 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1416 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1417 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1418 		return -EFAULT;
1419 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1420 			 data.feat[0],
1421 			 data.feat[1],
1422 			 data.feat[2]);
1423 	return 0;
1424 }
1425 
1426 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1427 				     struct kvm_device_attr *attr)
1428 {
1429 	struct kvm_s390_vm_cpu_feat data;
1430 
1431 	bitmap_copy((unsigned long *) data.feat,
1432 		    kvm_s390_available_cpu_feat,
1433 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1434 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1435 		return -EFAULT;
1436 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1437 			 data.feat[0],
1438 			 data.feat[1],
1439 			 data.feat[2]);
1440 	return 0;
1441 }
1442 
1443 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1444 					  struct kvm_device_attr *attr)
1445 {
1446 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1447 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1448 		return -EFAULT;
1449 
1450 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1451 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1452 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1453 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1454 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1455 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1456 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1457 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1458 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1459 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1460 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1461 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1462 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1463 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1464 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1465 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1466 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1467 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1468 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1469 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1470 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1471 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1472 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1473 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1474 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1475 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1476 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1477 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1478 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1479 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1480 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1481 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1482 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1483 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1484 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1485 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1486 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1487 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1488 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1489 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1490 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1491 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1492 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1493 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1494 
1495 	return 0;
1496 }
1497 
1498 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1499 					struct kvm_device_attr *attr)
1500 {
1501 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1502 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1503 		return -EFAULT;
1504 
1505 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1506 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1507 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1508 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1509 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1510 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1511 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1512 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1513 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1514 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1515 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1516 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1517 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1518 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1519 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1520 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1521 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1522 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1523 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1524 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1525 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1526 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1527 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1528 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1529 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1530 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1531 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1532 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1533 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1534 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1535 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1536 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1537 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1538 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1539 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1540 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1541 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1542 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1543 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1544 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1545 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1546 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1547 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1548 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1549 
1550 	return 0;
1551 }
1552 
1553 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1554 {
1555 	int ret = -ENXIO;
1556 
1557 	switch (attr->attr) {
1558 	case KVM_S390_VM_CPU_PROCESSOR:
1559 		ret = kvm_s390_get_processor(kvm, attr);
1560 		break;
1561 	case KVM_S390_VM_CPU_MACHINE:
1562 		ret = kvm_s390_get_machine(kvm, attr);
1563 		break;
1564 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1565 		ret = kvm_s390_get_processor_feat(kvm, attr);
1566 		break;
1567 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1568 		ret = kvm_s390_get_machine_feat(kvm, attr);
1569 		break;
1570 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1571 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1572 		break;
1573 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1574 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1575 		break;
1576 	}
1577 	return ret;
1578 }
1579 
1580 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1581 {
1582 	int ret;
1583 
1584 	switch (attr->group) {
1585 	case KVM_S390_VM_MEM_CTRL:
1586 		ret = kvm_s390_set_mem_control(kvm, attr);
1587 		break;
1588 	case KVM_S390_VM_TOD:
1589 		ret = kvm_s390_set_tod(kvm, attr);
1590 		break;
1591 	case KVM_S390_VM_CPU_MODEL:
1592 		ret = kvm_s390_set_cpu_model(kvm, attr);
1593 		break;
1594 	case KVM_S390_VM_CRYPTO:
1595 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1596 		break;
1597 	case KVM_S390_VM_MIGRATION:
1598 		ret = kvm_s390_vm_set_migration(kvm, attr);
1599 		break;
1600 	default:
1601 		ret = -ENXIO;
1602 		break;
1603 	}
1604 
1605 	return ret;
1606 }
1607 
1608 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1609 {
1610 	int ret;
1611 
1612 	switch (attr->group) {
1613 	case KVM_S390_VM_MEM_CTRL:
1614 		ret = kvm_s390_get_mem_control(kvm, attr);
1615 		break;
1616 	case KVM_S390_VM_TOD:
1617 		ret = kvm_s390_get_tod(kvm, attr);
1618 		break;
1619 	case KVM_S390_VM_CPU_MODEL:
1620 		ret = kvm_s390_get_cpu_model(kvm, attr);
1621 		break;
1622 	case KVM_S390_VM_MIGRATION:
1623 		ret = kvm_s390_vm_get_migration(kvm, attr);
1624 		break;
1625 	default:
1626 		ret = -ENXIO;
1627 		break;
1628 	}
1629 
1630 	return ret;
1631 }
1632 
1633 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1634 {
1635 	int ret;
1636 
1637 	switch (attr->group) {
1638 	case KVM_S390_VM_MEM_CTRL:
1639 		switch (attr->attr) {
1640 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1641 		case KVM_S390_VM_MEM_CLR_CMMA:
1642 			ret = sclp.has_cmma ? 0 : -ENXIO;
1643 			break;
1644 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1645 			ret = 0;
1646 			break;
1647 		default:
1648 			ret = -ENXIO;
1649 			break;
1650 		}
1651 		break;
1652 	case KVM_S390_VM_TOD:
1653 		switch (attr->attr) {
1654 		case KVM_S390_VM_TOD_LOW:
1655 		case KVM_S390_VM_TOD_HIGH:
1656 			ret = 0;
1657 			break;
1658 		default:
1659 			ret = -ENXIO;
1660 			break;
1661 		}
1662 		break;
1663 	case KVM_S390_VM_CPU_MODEL:
1664 		switch (attr->attr) {
1665 		case KVM_S390_VM_CPU_PROCESSOR:
1666 		case KVM_S390_VM_CPU_MACHINE:
1667 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1668 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1669 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1670 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1671 			ret = 0;
1672 			break;
1673 		default:
1674 			ret = -ENXIO;
1675 			break;
1676 		}
1677 		break;
1678 	case KVM_S390_VM_CRYPTO:
1679 		switch (attr->attr) {
1680 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1681 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1682 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1683 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1684 			ret = 0;
1685 			break;
1686 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1687 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1688 			ret = ap_instructions_available() ? 0 : -ENXIO;
1689 			break;
1690 		default:
1691 			ret = -ENXIO;
1692 			break;
1693 		}
1694 		break;
1695 	case KVM_S390_VM_MIGRATION:
1696 		ret = 0;
1697 		break;
1698 	default:
1699 		ret = -ENXIO;
1700 		break;
1701 	}
1702 
1703 	return ret;
1704 }
1705 
1706 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1707 {
1708 	uint8_t *keys;
1709 	uint64_t hva;
1710 	int srcu_idx, i, r = 0;
1711 
1712 	if (args->flags != 0)
1713 		return -EINVAL;
1714 
1715 	/* Is this guest using storage keys? */
1716 	if (!mm_uses_skeys(current->mm))
1717 		return KVM_S390_GET_SKEYS_NONE;
1718 
1719 	/* Enforce sane limit on memory allocation */
1720 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1721 		return -EINVAL;
1722 
1723 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1724 	if (!keys)
1725 		return -ENOMEM;
1726 
1727 	down_read(&current->mm->mmap_sem);
1728 	srcu_idx = srcu_read_lock(&kvm->srcu);
1729 	for (i = 0; i < args->count; i++) {
1730 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1731 		if (kvm_is_error_hva(hva)) {
1732 			r = -EFAULT;
1733 			break;
1734 		}
1735 
1736 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1737 		if (r)
1738 			break;
1739 	}
1740 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1741 	up_read(&current->mm->mmap_sem);
1742 
1743 	if (!r) {
1744 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1745 				 sizeof(uint8_t) * args->count);
1746 		if (r)
1747 			r = -EFAULT;
1748 	}
1749 
1750 	kvfree(keys);
1751 	return r;
1752 }
1753 
1754 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1755 {
1756 	uint8_t *keys;
1757 	uint64_t hva;
1758 	int srcu_idx, i, r = 0;
1759 	bool unlocked;
1760 
1761 	if (args->flags != 0)
1762 		return -EINVAL;
1763 
1764 	/* Enforce sane limit on memory allocation */
1765 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1766 		return -EINVAL;
1767 
1768 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1769 	if (!keys)
1770 		return -ENOMEM;
1771 
1772 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1773 			   sizeof(uint8_t) * args->count);
1774 	if (r) {
1775 		r = -EFAULT;
1776 		goto out;
1777 	}
1778 
1779 	/* Enable storage key handling for the guest */
1780 	r = s390_enable_skey();
1781 	if (r)
1782 		goto out;
1783 
1784 	i = 0;
1785 	down_read(&current->mm->mmap_sem);
1786 	srcu_idx = srcu_read_lock(&kvm->srcu);
1787         while (i < args->count) {
1788 		unlocked = false;
1789 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1790 		if (kvm_is_error_hva(hva)) {
1791 			r = -EFAULT;
1792 			break;
1793 		}
1794 
1795 		/* Lowest order bit is reserved */
1796 		if (keys[i] & 0x01) {
1797 			r = -EINVAL;
1798 			break;
1799 		}
1800 
1801 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1802 		if (r) {
1803 			r = fixup_user_fault(current, current->mm, hva,
1804 					     FAULT_FLAG_WRITE, &unlocked);
1805 			if (r)
1806 				break;
1807 		}
1808 		if (!r)
1809 			i++;
1810 	}
1811 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1812 	up_read(&current->mm->mmap_sem);
1813 out:
1814 	kvfree(keys);
1815 	return r;
1816 }
1817 
1818 /*
1819  * Base address and length must be sent at the start of each block, therefore
1820  * it's cheaper to send some clean data, as long as it's less than the size of
1821  * two longs.
1822  */
1823 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1824 /* for consistency */
1825 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1826 
1827 /*
1828  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1829  * address falls in a hole. In that case the index of one of the memslots
1830  * bordering the hole is returned.
1831  */
1832 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1833 {
1834 	int start = 0, end = slots->used_slots;
1835 	int slot = atomic_read(&slots->lru_slot);
1836 	struct kvm_memory_slot *memslots = slots->memslots;
1837 
1838 	if (gfn >= memslots[slot].base_gfn &&
1839 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1840 		return slot;
1841 
1842 	while (start < end) {
1843 		slot = start + (end - start) / 2;
1844 
1845 		if (gfn >= memslots[slot].base_gfn)
1846 			end = slot;
1847 		else
1848 			start = slot + 1;
1849 	}
1850 
1851 	if (gfn >= memslots[start].base_gfn &&
1852 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1853 		atomic_set(&slots->lru_slot, start);
1854 	}
1855 
1856 	return start;
1857 }
1858 
1859 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1860 			      u8 *res, unsigned long bufsize)
1861 {
1862 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1863 
1864 	args->count = 0;
1865 	while (args->count < bufsize) {
1866 		hva = gfn_to_hva(kvm, cur_gfn);
1867 		/*
1868 		 * We return an error if the first value was invalid, but we
1869 		 * return successfully if at least one value was copied.
1870 		 */
1871 		if (kvm_is_error_hva(hva))
1872 			return args->count ? 0 : -EFAULT;
1873 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1874 			pgstev = 0;
1875 		res[args->count++] = (pgstev >> 24) & 0x43;
1876 		cur_gfn++;
1877 	}
1878 
1879 	return 0;
1880 }
1881 
1882 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1883 					      unsigned long cur_gfn)
1884 {
1885 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1886 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1887 	unsigned long ofs = cur_gfn - ms->base_gfn;
1888 
1889 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1890 		slotidx--;
1891 		/* If we are above the highest slot, wrap around */
1892 		if (slotidx < 0)
1893 			slotidx = slots->used_slots - 1;
1894 
1895 		ms = slots->memslots + slotidx;
1896 		ofs = 0;
1897 	}
1898 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1899 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1900 		slotidx--;
1901 		ms = slots->memslots + slotidx;
1902 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1903 	}
1904 	return ms->base_gfn + ofs;
1905 }
1906 
1907 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1908 			     u8 *res, unsigned long bufsize)
1909 {
1910 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1911 	struct kvm_memslots *slots = kvm_memslots(kvm);
1912 	struct kvm_memory_slot *ms;
1913 
1914 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1915 	ms = gfn_to_memslot(kvm, cur_gfn);
1916 	args->count = 0;
1917 	args->start_gfn = cur_gfn;
1918 	if (!ms)
1919 		return 0;
1920 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1921 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1922 
1923 	while (args->count < bufsize) {
1924 		hva = gfn_to_hva(kvm, cur_gfn);
1925 		if (kvm_is_error_hva(hva))
1926 			return 0;
1927 		/* Decrement only if we actually flipped the bit to 0 */
1928 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1929 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
1930 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1931 			pgstev = 0;
1932 		/* Save the value */
1933 		res[args->count++] = (pgstev >> 24) & 0x43;
1934 		/* If the next bit is too far away, stop. */
1935 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1936 			return 0;
1937 		/* If we reached the previous "next", find the next one */
1938 		if (cur_gfn == next_gfn)
1939 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1940 		/* Reached the end of memory or of the buffer, stop */
1941 		if ((next_gfn >= mem_end) ||
1942 		    (next_gfn - args->start_gfn >= bufsize))
1943 			return 0;
1944 		cur_gfn++;
1945 		/* Reached the end of the current memslot, take the next one. */
1946 		if (cur_gfn - ms->base_gfn >= ms->npages) {
1947 			ms = gfn_to_memslot(kvm, cur_gfn);
1948 			if (!ms)
1949 				return 0;
1950 		}
1951 	}
1952 	return 0;
1953 }
1954 
1955 /*
1956  * This function searches for the next page with dirty CMMA attributes, and
1957  * saves the attributes in the buffer up to either the end of the buffer or
1958  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1959  * no trailing clean bytes are saved.
1960  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1961  * output buffer will indicate 0 as length.
1962  */
1963 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1964 				  struct kvm_s390_cmma_log *args)
1965 {
1966 	unsigned long bufsize;
1967 	int srcu_idx, peek, ret;
1968 	u8 *values;
1969 
1970 	if (!kvm->arch.use_cmma)
1971 		return -ENXIO;
1972 	/* Invalid/unsupported flags were specified */
1973 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1974 		return -EINVAL;
1975 	/* Migration mode query, and we are not doing a migration */
1976 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1977 	if (!peek && !kvm->arch.migration_mode)
1978 		return -EINVAL;
1979 	/* CMMA is disabled or was not used, or the buffer has length zero */
1980 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1981 	if (!bufsize || !kvm->mm->context.uses_cmm) {
1982 		memset(args, 0, sizeof(*args));
1983 		return 0;
1984 	}
1985 	/* We are not peeking, and there are no dirty pages */
1986 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1987 		memset(args, 0, sizeof(*args));
1988 		return 0;
1989 	}
1990 
1991 	values = vmalloc(bufsize);
1992 	if (!values)
1993 		return -ENOMEM;
1994 
1995 	down_read(&kvm->mm->mmap_sem);
1996 	srcu_idx = srcu_read_lock(&kvm->srcu);
1997 	if (peek)
1998 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1999 	else
2000 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2001 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2002 	up_read(&kvm->mm->mmap_sem);
2003 
2004 	if (kvm->arch.migration_mode)
2005 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2006 	else
2007 		args->remaining = 0;
2008 
2009 	if (copy_to_user((void __user *)args->values, values, args->count))
2010 		ret = -EFAULT;
2011 
2012 	vfree(values);
2013 	return ret;
2014 }
2015 
2016 /*
2017  * This function sets the CMMA attributes for the given pages. If the input
2018  * buffer has zero length, no action is taken, otherwise the attributes are
2019  * set and the mm->context.uses_cmm flag is set.
2020  */
2021 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2022 				  const struct kvm_s390_cmma_log *args)
2023 {
2024 	unsigned long hva, mask, pgstev, i;
2025 	uint8_t *bits;
2026 	int srcu_idx, r = 0;
2027 
2028 	mask = args->mask;
2029 
2030 	if (!kvm->arch.use_cmma)
2031 		return -ENXIO;
2032 	/* invalid/unsupported flags */
2033 	if (args->flags != 0)
2034 		return -EINVAL;
2035 	/* Enforce sane limit on memory allocation */
2036 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2037 		return -EINVAL;
2038 	/* Nothing to do */
2039 	if (args->count == 0)
2040 		return 0;
2041 
2042 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2043 	if (!bits)
2044 		return -ENOMEM;
2045 
2046 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2047 	if (r) {
2048 		r = -EFAULT;
2049 		goto out;
2050 	}
2051 
2052 	down_read(&kvm->mm->mmap_sem);
2053 	srcu_idx = srcu_read_lock(&kvm->srcu);
2054 	for (i = 0; i < args->count; i++) {
2055 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2056 		if (kvm_is_error_hva(hva)) {
2057 			r = -EFAULT;
2058 			break;
2059 		}
2060 
2061 		pgstev = bits[i];
2062 		pgstev = pgstev << 24;
2063 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2064 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2065 	}
2066 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2067 	up_read(&kvm->mm->mmap_sem);
2068 
2069 	if (!kvm->mm->context.uses_cmm) {
2070 		down_write(&kvm->mm->mmap_sem);
2071 		kvm->mm->context.uses_cmm = 1;
2072 		up_write(&kvm->mm->mmap_sem);
2073 	}
2074 out:
2075 	vfree(bits);
2076 	return r;
2077 }
2078 
2079 long kvm_arch_vm_ioctl(struct file *filp,
2080 		       unsigned int ioctl, unsigned long arg)
2081 {
2082 	struct kvm *kvm = filp->private_data;
2083 	void __user *argp = (void __user *)arg;
2084 	struct kvm_device_attr attr;
2085 	int r;
2086 
2087 	switch (ioctl) {
2088 	case KVM_S390_INTERRUPT: {
2089 		struct kvm_s390_interrupt s390int;
2090 
2091 		r = -EFAULT;
2092 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2093 			break;
2094 		r = kvm_s390_inject_vm(kvm, &s390int);
2095 		break;
2096 	}
2097 	case KVM_CREATE_IRQCHIP: {
2098 		struct kvm_irq_routing_entry routing;
2099 
2100 		r = -EINVAL;
2101 		if (kvm->arch.use_irqchip) {
2102 			/* Set up dummy routing. */
2103 			memset(&routing, 0, sizeof(routing));
2104 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2105 		}
2106 		break;
2107 	}
2108 	case KVM_SET_DEVICE_ATTR: {
2109 		r = -EFAULT;
2110 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2111 			break;
2112 		r = kvm_s390_vm_set_attr(kvm, &attr);
2113 		break;
2114 	}
2115 	case KVM_GET_DEVICE_ATTR: {
2116 		r = -EFAULT;
2117 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2118 			break;
2119 		r = kvm_s390_vm_get_attr(kvm, &attr);
2120 		break;
2121 	}
2122 	case KVM_HAS_DEVICE_ATTR: {
2123 		r = -EFAULT;
2124 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2125 			break;
2126 		r = kvm_s390_vm_has_attr(kvm, &attr);
2127 		break;
2128 	}
2129 	case KVM_S390_GET_SKEYS: {
2130 		struct kvm_s390_skeys args;
2131 
2132 		r = -EFAULT;
2133 		if (copy_from_user(&args, argp,
2134 				   sizeof(struct kvm_s390_skeys)))
2135 			break;
2136 		r = kvm_s390_get_skeys(kvm, &args);
2137 		break;
2138 	}
2139 	case KVM_S390_SET_SKEYS: {
2140 		struct kvm_s390_skeys args;
2141 
2142 		r = -EFAULT;
2143 		if (copy_from_user(&args, argp,
2144 				   sizeof(struct kvm_s390_skeys)))
2145 			break;
2146 		r = kvm_s390_set_skeys(kvm, &args);
2147 		break;
2148 	}
2149 	case KVM_S390_GET_CMMA_BITS: {
2150 		struct kvm_s390_cmma_log args;
2151 
2152 		r = -EFAULT;
2153 		if (copy_from_user(&args, argp, sizeof(args)))
2154 			break;
2155 		mutex_lock(&kvm->slots_lock);
2156 		r = kvm_s390_get_cmma_bits(kvm, &args);
2157 		mutex_unlock(&kvm->slots_lock);
2158 		if (!r) {
2159 			r = copy_to_user(argp, &args, sizeof(args));
2160 			if (r)
2161 				r = -EFAULT;
2162 		}
2163 		break;
2164 	}
2165 	case KVM_S390_SET_CMMA_BITS: {
2166 		struct kvm_s390_cmma_log args;
2167 
2168 		r = -EFAULT;
2169 		if (copy_from_user(&args, argp, sizeof(args)))
2170 			break;
2171 		mutex_lock(&kvm->slots_lock);
2172 		r = kvm_s390_set_cmma_bits(kvm, &args);
2173 		mutex_unlock(&kvm->slots_lock);
2174 		break;
2175 	}
2176 	default:
2177 		r = -ENOTTY;
2178 	}
2179 
2180 	return r;
2181 }
2182 
2183 static int kvm_s390_apxa_installed(void)
2184 {
2185 	struct ap_config_info info;
2186 
2187 	if (ap_instructions_available()) {
2188 		if (ap_qci(&info) == 0)
2189 			return info.apxa;
2190 	}
2191 
2192 	return 0;
2193 }
2194 
2195 /*
2196  * The format of the crypto control block (CRYCB) is specified in the 3 low
2197  * order bits of the CRYCB designation (CRYCBD) field as follows:
2198  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2199  *	     AP extended addressing (APXA) facility are installed.
2200  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2201  * Format 2: Both the APXA and MSAX3 facilities are installed
2202  */
2203 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2204 {
2205 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2206 
2207 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2208 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2209 
2210 	/* Check whether MSAX3 is installed */
2211 	if (!test_kvm_facility(kvm, 76))
2212 		return;
2213 
2214 	if (kvm_s390_apxa_installed())
2215 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2216 	else
2217 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2218 }
2219 
2220 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2221 			       unsigned long *aqm, unsigned long *adm)
2222 {
2223 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2224 
2225 	mutex_lock(&kvm->lock);
2226 	kvm_s390_vcpu_block_all(kvm);
2227 
2228 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2229 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2230 		memcpy(crycb->apcb1.apm, apm, 32);
2231 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2232 			 apm[0], apm[1], apm[2], apm[3]);
2233 		memcpy(crycb->apcb1.aqm, aqm, 32);
2234 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2235 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2236 		memcpy(crycb->apcb1.adm, adm, 32);
2237 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2238 			 adm[0], adm[1], adm[2], adm[3]);
2239 		break;
2240 	case CRYCB_FORMAT1:
2241 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2242 		memcpy(crycb->apcb0.apm, apm, 8);
2243 		memcpy(crycb->apcb0.aqm, aqm, 2);
2244 		memcpy(crycb->apcb0.adm, adm, 2);
2245 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2246 			 apm[0], *((unsigned short *)aqm),
2247 			 *((unsigned short *)adm));
2248 		break;
2249 	default:	/* Can not happen */
2250 		break;
2251 	}
2252 
2253 	/* recreate the shadow crycb for each vcpu */
2254 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2255 	kvm_s390_vcpu_unblock_all(kvm);
2256 	mutex_unlock(&kvm->lock);
2257 }
2258 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2259 
2260 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2261 {
2262 	mutex_lock(&kvm->lock);
2263 	kvm_s390_vcpu_block_all(kvm);
2264 
2265 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2266 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2267 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2268 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2269 
2270 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2271 	/* recreate the shadow crycb for each vcpu */
2272 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2273 	kvm_s390_vcpu_unblock_all(kvm);
2274 	mutex_unlock(&kvm->lock);
2275 }
2276 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2277 
2278 static u64 kvm_s390_get_initial_cpuid(void)
2279 {
2280 	struct cpuid cpuid;
2281 
2282 	get_cpu_id(&cpuid);
2283 	cpuid.version = 0xff;
2284 	return *((u64 *) &cpuid);
2285 }
2286 
2287 static void kvm_s390_crypto_init(struct kvm *kvm)
2288 {
2289 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2290 	kvm_s390_set_crycb_format(kvm);
2291 
2292 	if (!test_kvm_facility(kvm, 76))
2293 		return;
2294 
2295 	/* Enable AES/DEA protected key functions by default */
2296 	kvm->arch.crypto.aes_kw = 1;
2297 	kvm->arch.crypto.dea_kw = 1;
2298 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2299 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2300 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2301 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2302 }
2303 
2304 static void sca_dispose(struct kvm *kvm)
2305 {
2306 	if (kvm->arch.use_esca)
2307 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2308 	else
2309 		free_page((unsigned long)(kvm->arch.sca));
2310 	kvm->arch.sca = NULL;
2311 }
2312 
2313 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2314 {
2315 	gfp_t alloc_flags = GFP_KERNEL;
2316 	int i, rc;
2317 	char debug_name[16];
2318 	static unsigned long sca_offset;
2319 
2320 	rc = -EINVAL;
2321 #ifdef CONFIG_KVM_S390_UCONTROL
2322 	if (type & ~KVM_VM_S390_UCONTROL)
2323 		goto out_err;
2324 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2325 		goto out_err;
2326 #else
2327 	if (type)
2328 		goto out_err;
2329 #endif
2330 
2331 	rc = s390_enable_sie();
2332 	if (rc)
2333 		goto out_err;
2334 
2335 	rc = -ENOMEM;
2336 
2337 	if (!sclp.has_64bscao)
2338 		alloc_flags |= GFP_DMA;
2339 	rwlock_init(&kvm->arch.sca_lock);
2340 	/* start with basic SCA */
2341 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2342 	if (!kvm->arch.sca)
2343 		goto out_err;
2344 	spin_lock(&kvm_lock);
2345 	sca_offset += 16;
2346 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2347 		sca_offset = 0;
2348 	kvm->arch.sca = (struct bsca_block *)
2349 			((char *) kvm->arch.sca + sca_offset);
2350 	spin_unlock(&kvm_lock);
2351 
2352 	sprintf(debug_name, "kvm-%u", current->pid);
2353 
2354 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2355 	if (!kvm->arch.dbf)
2356 		goto out_err;
2357 
2358 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2359 	kvm->arch.sie_page2 =
2360 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2361 	if (!kvm->arch.sie_page2)
2362 		goto out_err;
2363 
2364 	kvm->arch.sie_page2->kvm = kvm;
2365 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2366 
2367 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2368 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2369 					      (kvm_s390_fac_base[i] |
2370 					       kvm_s390_fac_ext[i]);
2371 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2372 					      kvm_s390_fac_base[i];
2373 	}
2374 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2375 
2376 	/* we are always in czam mode - even on pre z14 machines */
2377 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2378 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2379 	/* we emulate STHYI in kvm */
2380 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2381 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2382 	if (MACHINE_HAS_TLB_GUEST) {
2383 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2384 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2385 	}
2386 
2387 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2388 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2389 
2390 	kvm_s390_crypto_init(kvm);
2391 
2392 	mutex_init(&kvm->arch.float_int.ais_lock);
2393 	spin_lock_init(&kvm->arch.float_int.lock);
2394 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2395 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2396 	init_waitqueue_head(&kvm->arch.ipte_wq);
2397 	mutex_init(&kvm->arch.ipte_mutex);
2398 
2399 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2400 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2401 
2402 	if (type & KVM_VM_S390_UCONTROL) {
2403 		kvm->arch.gmap = NULL;
2404 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2405 	} else {
2406 		if (sclp.hamax == U64_MAX)
2407 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2408 		else
2409 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2410 						    sclp.hamax + 1);
2411 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2412 		if (!kvm->arch.gmap)
2413 			goto out_err;
2414 		kvm->arch.gmap->private = kvm;
2415 		kvm->arch.gmap->pfault_enabled = 0;
2416 	}
2417 
2418 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2419 	kvm->arch.use_skf = sclp.has_skey;
2420 	spin_lock_init(&kvm->arch.start_stop_lock);
2421 	kvm_s390_vsie_init(kvm);
2422 	kvm_s390_gisa_init(kvm);
2423 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2424 
2425 	return 0;
2426 out_err:
2427 	free_page((unsigned long)kvm->arch.sie_page2);
2428 	debug_unregister(kvm->arch.dbf);
2429 	sca_dispose(kvm);
2430 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2431 	return rc;
2432 }
2433 
2434 bool kvm_arch_has_vcpu_debugfs(void)
2435 {
2436 	return false;
2437 }
2438 
2439 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2440 {
2441 	return 0;
2442 }
2443 
2444 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2445 {
2446 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2447 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2448 	kvm_s390_clear_local_irqs(vcpu);
2449 	kvm_clear_async_pf_completion_queue(vcpu);
2450 	if (!kvm_is_ucontrol(vcpu->kvm))
2451 		sca_del_vcpu(vcpu);
2452 
2453 	if (kvm_is_ucontrol(vcpu->kvm))
2454 		gmap_remove(vcpu->arch.gmap);
2455 
2456 	if (vcpu->kvm->arch.use_cmma)
2457 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2458 	free_page((unsigned long)(vcpu->arch.sie_block));
2459 
2460 	kvm_vcpu_uninit(vcpu);
2461 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2462 }
2463 
2464 static void kvm_free_vcpus(struct kvm *kvm)
2465 {
2466 	unsigned int i;
2467 	struct kvm_vcpu *vcpu;
2468 
2469 	kvm_for_each_vcpu(i, vcpu, kvm)
2470 		kvm_arch_vcpu_destroy(vcpu);
2471 
2472 	mutex_lock(&kvm->lock);
2473 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2474 		kvm->vcpus[i] = NULL;
2475 
2476 	atomic_set(&kvm->online_vcpus, 0);
2477 	mutex_unlock(&kvm->lock);
2478 }
2479 
2480 void kvm_arch_destroy_vm(struct kvm *kvm)
2481 {
2482 	kvm_free_vcpus(kvm);
2483 	sca_dispose(kvm);
2484 	debug_unregister(kvm->arch.dbf);
2485 	kvm_s390_gisa_destroy(kvm);
2486 	free_page((unsigned long)kvm->arch.sie_page2);
2487 	if (!kvm_is_ucontrol(kvm))
2488 		gmap_remove(kvm->arch.gmap);
2489 	kvm_s390_destroy_adapters(kvm);
2490 	kvm_s390_clear_float_irqs(kvm);
2491 	kvm_s390_vsie_destroy(kvm);
2492 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2493 }
2494 
2495 /* Section: vcpu related */
2496 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2497 {
2498 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2499 	if (!vcpu->arch.gmap)
2500 		return -ENOMEM;
2501 	vcpu->arch.gmap->private = vcpu->kvm;
2502 
2503 	return 0;
2504 }
2505 
2506 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2507 {
2508 	if (!kvm_s390_use_sca_entries())
2509 		return;
2510 	read_lock(&vcpu->kvm->arch.sca_lock);
2511 	if (vcpu->kvm->arch.use_esca) {
2512 		struct esca_block *sca = vcpu->kvm->arch.sca;
2513 
2514 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2515 		sca->cpu[vcpu->vcpu_id].sda = 0;
2516 	} else {
2517 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2518 
2519 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2520 		sca->cpu[vcpu->vcpu_id].sda = 0;
2521 	}
2522 	read_unlock(&vcpu->kvm->arch.sca_lock);
2523 }
2524 
2525 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2526 {
2527 	if (!kvm_s390_use_sca_entries()) {
2528 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2529 
2530 		/* we still need the basic sca for the ipte control */
2531 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2532 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2533 		return;
2534 	}
2535 	read_lock(&vcpu->kvm->arch.sca_lock);
2536 	if (vcpu->kvm->arch.use_esca) {
2537 		struct esca_block *sca = vcpu->kvm->arch.sca;
2538 
2539 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2540 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2541 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2542 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2543 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2544 	} else {
2545 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2546 
2547 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2548 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2549 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2550 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2551 	}
2552 	read_unlock(&vcpu->kvm->arch.sca_lock);
2553 }
2554 
2555 /* Basic SCA to Extended SCA data copy routines */
2556 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2557 {
2558 	d->sda = s->sda;
2559 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2560 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2561 }
2562 
2563 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2564 {
2565 	int i;
2566 
2567 	d->ipte_control = s->ipte_control;
2568 	d->mcn[0] = s->mcn;
2569 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2570 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2571 }
2572 
2573 static int sca_switch_to_extended(struct kvm *kvm)
2574 {
2575 	struct bsca_block *old_sca = kvm->arch.sca;
2576 	struct esca_block *new_sca;
2577 	struct kvm_vcpu *vcpu;
2578 	unsigned int vcpu_idx;
2579 	u32 scaol, scaoh;
2580 
2581 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2582 	if (!new_sca)
2583 		return -ENOMEM;
2584 
2585 	scaoh = (u32)((u64)(new_sca) >> 32);
2586 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2587 
2588 	kvm_s390_vcpu_block_all(kvm);
2589 	write_lock(&kvm->arch.sca_lock);
2590 
2591 	sca_copy_b_to_e(new_sca, old_sca);
2592 
2593 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2594 		vcpu->arch.sie_block->scaoh = scaoh;
2595 		vcpu->arch.sie_block->scaol = scaol;
2596 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2597 	}
2598 	kvm->arch.sca = new_sca;
2599 	kvm->arch.use_esca = 1;
2600 
2601 	write_unlock(&kvm->arch.sca_lock);
2602 	kvm_s390_vcpu_unblock_all(kvm);
2603 
2604 	free_page((unsigned long)old_sca);
2605 
2606 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2607 		 old_sca, kvm->arch.sca);
2608 	return 0;
2609 }
2610 
2611 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2612 {
2613 	int rc;
2614 
2615 	if (!kvm_s390_use_sca_entries()) {
2616 		if (id < KVM_MAX_VCPUS)
2617 			return true;
2618 		return false;
2619 	}
2620 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2621 		return true;
2622 	if (!sclp.has_esca || !sclp.has_64bscao)
2623 		return false;
2624 
2625 	mutex_lock(&kvm->lock);
2626 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2627 	mutex_unlock(&kvm->lock);
2628 
2629 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2630 }
2631 
2632 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2633 {
2634 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2635 	kvm_clear_async_pf_completion_queue(vcpu);
2636 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2637 				    KVM_SYNC_GPRS |
2638 				    KVM_SYNC_ACRS |
2639 				    KVM_SYNC_CRS |
2640 				    KVM_SYNC_ARCH0 |
2641 				    KVM_SYNC_PFAULT;
2642 	kvm_s390_set_prefix(vcpu, 0);
2643 	if (test_kvm_facility(vcpu->kvm, 64))
2644 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2645 	if (test_kvm_facility(vcpu->kvm, 82))
2646 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2647 	if (test_kvm_facility(vcpu->kvm, 133))
2648 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2649 	if (test_kvm_facility(vcpu->kvm, 156))
2650 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2651 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2652 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2653 	 */
2654 	if (MACHINE_HAS_VX)
2655 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2656 	else
2657 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2658 
2659 	if (kvm_is_ucontrol(vcpu->kvm))
2660 		return __kvm_ucontrol_vcpu_init(vcpu);
2661 
2662 	return 0;
2663 }
2664 
2665 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2666 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2667 {
2668 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2669 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2670 	vcpu->arch.cputm_start = get_tod_clock_fast();
2671 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2672 }
2673 
2674 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2675 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2676 {
2677 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2678 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2679 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2680 	vcpu->arch.cputm_start = 0;
2681 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2682 }
2683 
2684 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2685 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2686 {
2687 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2688 	vcpu->arch.cputm_enabled = true;
2689 	__start_cpu_timer_accounting(vcpu);
2690 }
2691 
2692 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2693 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2694 {
2695 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2696 	__stop_cpu_timer_accounting(vcpu);
2697 	vcpu->arch.cputm_enabled = false;
2698 }
2699 
2700 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2701 {
2702 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2703 	__enable_cpu_timer_accounting(vcpu);
2704 	preempt_enable();
2705 }
2706 
2707 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2708 {
2709 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2710 	__disable_cpu_timer_accounting(vcpu);
2711 	preempt_enable();
2712 }
2713 
2714 /* set the cpu timer - may only be called from the VCPU thread itself */
2715 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2716 {
2717 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2718 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2719 	if (vcpu->arch.cputm_enabled)
2720 		vcpu->arch.cputm_start = get_tod_clock_fast();
2721 	vcpu->arch.sie_block->cputm = cputm;
2722 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2723 	preempt_enable();
2724 }
2725 
2726 /* update and get the cpu timer - can also be called from other VCPU threads */
2727 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2728 {
2729 	unsigned int seq;
2730 	__u64 value;
2731 
2732 	if (unlikely(!vcpu->arch.cputm_enabled))
2733 		return vcpu->arch.sie_block->cputm;
2734 
2735 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2736 	do {
2737 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2738 		/*
2739 		 * If the writer would ever execute a read in the critical
2740 		 * section, e.g. in irq context, we have a deadlock.
2741 		 */
2742 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2743 		value = vcpu->arch.sie_block->cputm;
2744 		/* if cputm_start is 0, accounting is being started/stopped */
2745 		if (likely(vcpu->arch.cputm_start))
2746 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2747 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2748 	preempt_enable();
2749 	return value;
2750 }
2751 
2752 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2753 {
2754 
2755 	gmap_enable(vcpu->arch.enabled_gmap);
2756 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2757 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2758 		__start_cpu_timer_accounting(vcpu);
2759 	vcpu->cpu = cpu;
2760 }
2761 
2762 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2763 {
2764 	vcpu->cpu = -1;
2765 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2766 		__stop_cpu_timer_accounting(vcpu);
2767 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2768 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2769 	gmap_disable(vcpu->arch.enabled_gmap);
2770 
2771 }
2772 
2773 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2774 {
2775 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2776 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2777 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2778 	kvm_s390_set_prefix(vcpu, 0);
2779 	kvm_s390_set_cpu_timer(vcpu, 0);
2780 	vcpu->arch.sie_block->ckc       = 0UL;
2781 	vcpu->arch.sie_block->todpr     = 0;
2782 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2783 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2784 					CR0_INTERRUPT_KEY_SUBMASK |
2785 					CR0_MEASUREMENT_ALERT_SUBMASK;
2786 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2787 					CR14_UNUSED_33 |
2788 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2789 	/* make sure the new fpc will be lazily loaded */
2790 	save_fpu_regs();
2791 	current->thread.fpu.fpc = 0;
2792 	vcpu->arch.sie_block->gbea = 1;
2793 	vcpu->arch.sie_block->pp = 0;
2794 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2795 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2796 	kvm_clear_async_pf_completion_queue(vcpu);
2797 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2798 		kvm_s390_vcpu_stop(vcpu);
2799 	kvm_s390_clear_local_irqs(vcpu);
2800 }
2801 
2802 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2803 {
2804 	mutex_lock(&vcpu->kvm->lock);
2805 	preempt_disable();
2806 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2807 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2808 	preempt_enable();
2809 	mutex_unlock(&vcpu->kvm->lock);
2810 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2811 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2812 		sca_add_vcpu(vcpu);
2813 	}
2814 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2815 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2816 	/* make vcpu_load load the right gmap on the first trigger */
2817 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2818 }
2819 
2820 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2821 {
2822 	/*
2823 	 * If the AP instructions are not being interpreted and the MSAX3
2824 	 * facility is not configured for the guest, there is nothing to set up.
2825 	 */
2826 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2827 		return;
2828 
2829 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2830 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2831 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2832 
2833 	if (vcpu->kvm->arch.crypto.apie)
2834 		vcpu->arch.sie_block->eca |= ECA_APIE;
2835 
2836 	/* Set up protected key support */
2837 	if (vcpu->kvm->arch.crypto.aes_kw)
2838 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2839 	if (vcpu->kvm->arch.crypto.dea_kw)
2840 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2841 }
2842 
2843 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2844 {
2845 	free_page(vcpu->arch.sie_block->cbrlo);
2846 	vcpu->arch.sie_block->cbrlo = 0;
2847 }
2848 
2849 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2850 {
2851 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2852 	if (!vcpu->arch.sie_block->cbrlo)
2853 		return -ENOMEM;
2854 	return 0;
2855 }
2856 
2857 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2858 {
2859 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2860 
2861 	vcpu->arch.sie_block->ibc = model->ibc;
2862 	if (test_kvm_facility(vcpu->kvm, 7))
2863 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2864 }
2865 
2866 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2867 {
2868 	int rc = 0;
2869 
2870 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2871 						    CPUSTAT_SM |
2872 						    CPUSTAT_STOPPED);
2873 
2874 	if (test_kvm_facility(vcpu->kvm, 78))
2875 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2876 	else if (test_kvm_facility(vcpu->kvm, 8))
2877 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2878 
2879 	kvm_s390_vcpu_setup_model(vcpu);
2880 
2881 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2882 	if (MACHINE_HAS_ESOP)
2883 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2884 	if (test_kvm_facility(vcpu->kvm, 9))
2885 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2886 	if (test_kvm_facility(vcpu->kvm, 73))
2887 		vcpu->arch.sie_block->ecb |= ECB_TE;
2888 
2889 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2890 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2891 	if (test_kvm_facility(vcpu->kvm, 130))
2892 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2893 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2894 	if (sclp.has_cei)
2895 		vcpu->arch.sie_block->eca |= ECA_CEI;
2896 	if (sclp.has_ib)
2897 		vcpu->arch.sie_block->eca |= ECA_IB;
2898 	if (sclp.has_siif)
2899 		vcpu->arch.sie_block->eca |= ECA_SII;
2900 	if (sclp.has_sigpif)
2901 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2902 	if (test_kvm_facility(vcpu->kvm, 129)) {
2903 		vcpu->arch.sie_block->eca |= ECA_VX;
2904 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2905 	}
2906 	if (test_kvm_facility(vcpu->kvm, 139))
2907 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2908 	if (test_kvm_facility(vcpu->kvm, 156))
2909 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2910 	if (vcpu->arch.sie_block->gd) {
2911 		vcpu->arch.sie_block->eca |= ECA_AIV;
2912 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2913 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2914 	}
2915 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2916 					| SDNXC;
2917 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2918 
2919 	if (sclp.has_kss)
2920 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2921 	else
2922 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2923 
2924 	if (vcpu->kvm->arch.use_cmma) {
2925 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2926 		if (rc)
2927 			return rc;
2928 	}
2929 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2930 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2931 
2932 	vcpu->arch.sie_block->hpid = HPID_KVM;
2933 
2934 	kvm_s390_vcpu_crypto_setup(vcpu);
2935 
2936 	return rc;
2937 }
2938 
2939 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2940 				      unsigned int id)
2941 {
2942 	struct kvm_vcpu *vcpu;
2943 	struct sie_page *sie_page;
2944 	int rc = -EINVAL;
2945 
2946 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2947 		goto out;
2948 
2949 	rc = -ENOMEM;
2950 
2951 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2952 	if (!vcpu)
2953 		goto out;
2954 
2955 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2956 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2957 	if (!sie_page)
2958 		goto out_free_cpu;
2959 
2960 	vcpu->arch.sie_block = &sie_page->sie_block;
2961 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2962 
2963 	/* the real guest size will always be smaller than msl */
2964 	vcpu->arch.sie_block->mso = 0;
2965 	vcpu->arch.sie_block->msl = sclp.hamax;
2966 
2967 	vcpu->arch.sie_block->icpua = id;
2968 	spin_lock_init(&vcpu->arch.local_int.lock);
2969 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
2970 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2971 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2972 	seqcount_init(&vcpu->arch.cputm_seqcount);
2973 
2974 	rc = kvm_vcpu_init(vcpu, kvm, id);
2975 	if (rc)
2976 		goto out_free_sie_block;
2977 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2978 		 vcpu->arch.sie_block);
2979 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2980 
2981 	return vcpu;
2982 out_free_sie_block:
2983 	free_page((unsigned long)(vcpu->arch.sie_block));
2984 out_free_cpu:
2985 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2986 out:
2987 	return ERR_PTR(rc);
2988 }
2989 
2990 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2991 {
2992 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2993 }
2994 
2995 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2996 {
2997 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2998 }
2999 
3000 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3001 {
3002 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3003 	exit_sie(vcpu);
3004 }
3005 
3006 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3007 {
3008 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3009 }
3010 
3011 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3012 {
3013 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3014 	exit_sie(vcpu);
3015 }
3016 
3017 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3018 {
3019 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3020 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3021 }
3022 
3023 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3024 {
3025 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3026 }
3027 
3028 /*
3029  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3030  * If the CPU is not running (e.g. waiting as idle) the function will
3031  * return immediately. */
3032 void exit_sie(struct kvm_vcpu *vcpu)
3033 {
3034 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3035 	kvm_s390_vsie_kick(vcpu);
3036 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3037 		cpu_relax();
3038 }
3039 
3040 /* Kick a guest cpu out of SIE to process a request synchronously */
3041 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3042 {
3043 	kvm_make_request(req, vcpu);
3044 	kvm_s390_vcpu_request(vcpu);
3045 }
3046 
3047 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3048 			      unsigned long end)
3049 {
3050 	struct kvm *kvm = gmap->private;
3051 	struct kvm_vcpu *vcpu;
3052 	unsigned long prefix;
3053 	int i;
3054 
3055 	if (gmap_is_shadow(gmap))
3056 		return;
3057 	if (start >= 1UL << 31)
3058 		/* We are only interested in prefix pages */
3059 		return;
3060 	kvm_for_each_vcpu(i, vcpu, kvm) {
3061 		/* match against both prefix pages */
3062 		prefix = kvm_s390_get_prefix(vcpu);
3063 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3064 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3065 				   start, end);
3066 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3067 		}
3068 	}
3069 }
3070 
3071 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3072 {
3073 	/* kvm common code refers to this, but never calls it */
3074 	BUG();
3075 	return 0;
3076 }
3077 
3078 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3079 					   struct kvm_one_reg *reg)
3080 {
3081 	int r = -EINVAL;
3082 
3083 	switch (reg->id) {
3084 	case KVM_REG_S390_TODPR:
3085 		r = put_user(vcpu->arch.sie_block->todpr,
3086 			     (u32 __user *)reg->addr);
3087 		break;
3088 	case KVM_REG_S390_EPOCHDIFF:
3089 		r = put_user(vcpu->arch.sie_block->epoch,
3090 			     (u64 __user *)reg->addr);
3091 		break;
3092 	case KVM_REG_S390_CPU_TIMER:
3093 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3094 			     (u64 __user *)reg->addr);
3095 		break;
3096 	case KVM_REG_S390_CLOCK_COMP:
3097 		r = put_user(vcpu->arch.sie_block->ckc,
3098 			     (u64 __user *)reg->addr);
3099 		break;
3100 	case KVM_REG_S390_PFTOKEN:
3101 		r = put_user(vcpu->arch.pfault_token,
3102 			     (u64 __user *)reg->addr);
3103 		break;
3104 	case KVM_REG_S390_PFCOMPARE:
3105 		r = put_user(vcpu->arch.pfault_compare,
3106 			     (u64 __user *)reg->addr);
3107 		break;
3108 	case KVM_REG_S390_PFSELECT:
3109 		r = put_user(vcpu->arch.pfault_select,
3110 			     (u64 __user *)reg->addr);
3111 		break;
3112 	case KVM_REG_S390_PP:
3113 		r = put_user(vcpu->arch.sie_block->pp,
3114 			     (u64 __user *)reg->addr);
3115 		break;
3116 	case KVM_REG_S390_GBEA:
3117 		r = put_user(vcpu->arch.sie_block->gbea,
3118 			     (u64 __user *)reg->addr);
3119 		break;
3120 	default:
3121 		break;
3122 	}
3123 
3124 	return r;
3125 }
3126 
3127 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3128 					   struct kvm_one_reg *reg)
3129 {
3130 	int r = -EINVAL;
3131 	__u64 val;
3132 
3133 	switch (reg->id) {
3134 	case KVM_REG_S390_TODPR:
3135 		r = get_user(vcpu->arch.sie_block->todpr,
3136 			     (u32 __user *)reg->addr);
3137 		break;
3138 	case KVM_REG_S390_EPOCHDIFF:
3139 		r = get_user(vcpu->arch.sie_block->epoch,
3140 			     (u64 __user *)reg->addr);
3141 		break;
3142 	case KVM_REG_S390_CPU_TIMER:
3143 		r = get_user(val, (u64 __user *)reg->addr);
3144 		if (!r)
3145 			kvm_s390_set_cpu_timer(vcpu, val);
3146 		break;
3147 	case KVM_REG_S390_CLOCK_COMP:
3148 		r = get_user(vcpu->arch.sie_block->ckc,
3149 			     (u64 __user *)reg->addr);
3150 		break;
3151 	case KVM_REG_S390_PFTOKEN:
3152 		r = get_user(vcpu->arch.pfault_token,
3153 			     (u64 __user *)reg->addr);
3154 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3155 			kvm_clear_async_pf_completion_queue(vcpu);
3156 		break;
3157 	case KVM_REG_S390_PFCOMPARE:
3158 		r = get_user(vcpu->arch.pfault_compare,
3159 			     (u64 __user *)reg->addr);
3160 		break;
3161 	case KVM_REG_S390_PFSELECT:
3162 		r = get_user(vcpu->arch.pfault_select,
3163 			     (u64 __user *)reg->addr);
3164 		break;
3165 	case KVM_REG_S390_PP:
3166 		r = get_user(vcpu->arch.sie_block->pp,
3167 			     (u64 __user *)reg->addr);
3168 		break;
3169 	case KVM_REG_S390_GBEA:
3170 		r = get_user(vcpu->arch.sie_block->gbea,
3171 			     (u64 __user *)reg->addr);
3172 		break;
3173 	default:
3174 		break;
3175 	}
3176 
3177 	return r;
3178 }
3179 
3180 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3181 {
3182 	kvm_s390_vcpu_initial_reset(vcpu);
3183 	return 0;
3184 }
3185 
3186 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3187 {
3188 	vcpu_load(vcpu);
3189 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3190 	vcpu_put(vcpu);
3191 	return 0;
3192 }
3193 
3194 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3195 {
3196 	vcpu_load(vcpu);
3197 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3198 	vcpu_put(vcpu);
3199 	return 0;
3200 }
3201 
3202 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3203 				  struct kvm_sregs *sregs)
3204 {
3205 	vcpu_load(vcpu);
3206 
3207 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3208 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3209 
3210 	vcpu_put(vcpu);
3211 	return 0;
3212 }
3213 
3214 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3215 				  struct kvm_sregs *sregs)
3216 {
3217 	vcpu_load(vcpu);
3218 
3219 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3220 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3221 
3222 	vcpu_put(vcpu);
3223 	return 0;
3224 }
3225 
3226 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3227 {
3228 	int ret = 0;
3229 
3230 	vcpu_load(vcpu);
3231 
3232 	if (test_fp_ctl(fpu->fpc)) {
3233 		ret = -EINVAL;
3234 		goto out;
3235 	}
3236 	vcpu->run->s.regs.fpc = fpu->fpc;
3237 	if (MACHINE_HAS_VX)
3238 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3239 				 (freg_t *) fpu->fprs);
3240 	else
3241 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3242 
3243 out:
3244 	vcpu_put(vcpu);
3245 	return ret;
3246 }
3247 
3248 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3249 {
3250 	vcpu_load(vcpu);
3251 
3252 	/* make sure we have the latest values */
3253 	save_fpu_regs();
3254 	if (MACHINE_HAS_VX)
3255 		convert_vx_to_fp((freg_t *) fpu->fprs,
3256 				 (__vector128 *) vcpu->run->s.regs.vrs);
3257 	else
3258 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3259 	fpu->fpc = vcpu->run->s.regs.fpc;
3260 
3261 	vcpu_put(vcpu);
3262 	return 0;
3263 }
3264 
3265 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3266 {
3267 	int rc = 0;
3268 
3269 	if (!is_vcpu_stopped(vcpu))
3270 		rc = -EBUSY;
3271 	else {
3272 		vcpu->run->psw_mask = psw.mask;
3273 		vcpu->run->psw_addr = psw.addr;
3274 	}
3275 	return rc;
3276 }
3277 
3278 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3279 				  struct kvm_translation *tr)
3280 {
3281 	return -EINVAL; /* not implemented yet */
3282 }
3283 
3284 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3285 			      KVM_GUESTDBG_USE_HW_BP | \
3286 			      KVM_GUESTDBG_ENABLE)
3287 
3288 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3289 					struct kvm_guest_debug *dbg)
3290 {
3291 	int rc = 0;
3292 
3293 	vcpu_load(vcpu);
3294 
3295 	vcpu->guest_debug = 0;
3296 	kvm_s390_clear_bp_data(vcpu);
3297 
3298 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3299 		rc = -EINVAL;
3300 		goto out;
3301 	}
3302 	if (!sclp.has_gpere) {
3303 		rc = -EINVAL;
3304 		goto out;
3305 	}
3306 
3307 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3308 		vcpu->guest_debug = dbg->control;
3309 		/* enforce guest PER */
3310 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3311 
3312 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3313 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3314 	} else {
3315 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3316 		vcpu->arch.guestdbg.last_bp = 0;
3317 	}
3318 
3319 	if (rc) {
3320 		vcpu->guest_debug = 0;
3321 		kvm_s390_clear_bp_data(vcpu);
3322 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3323 	}
3324 
3325 out:
3326 	vcpu_put(vcpu);
3327 	return rc;
3328 }
3329 
3330 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3331 				    struct kvm_mp_state *mp_state)
3332 {
3333 	int ret;
3334 
3335 	vcpu_load(vcpu);
3336 
3337 	/* CHECK_STOP and LOAD are not supported yet */
3338 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3339 				      KVM_MP_STATE_OPERATING;
3340 
3341 	vcpu_put(vcpu);
3342 	return ret;
3343 }
3344 
3345 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3346 				    struct kvm_mp_state *mp_state)
3347 {
3348 	int rc = 0;
3349 
3350 	vcpu_load(vcpu);
3351 
3352 	/* user space knows about this interface - let it control the state */
3353 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3354 
3355 	switch (mp_state->mp_state) {
3356 	case KVM_MP_STATE_STOPPED:
3357 		kvm_s390_vcpu_stop(vcpu);
3358 		break;
3359 	case KVM_MP_STATE_OPERATING:
3360 		kvm_s390_vcpu_start(vcpu);
3361 		break;
3362 	case KVM_MP_STATE_LOAD:
3363 	case KVM_MP_STATE_CHECK_STOP:
3364 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3365 	default:
3366 		rc = -ENXIO;
3367 	}
3368 
3369 	vcpu_put(vcpu);
3370 	return rc;
3371 }
3372 
3373 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3374 {
3375 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3376 }
3377 
3378 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3379 {
3380 retry:
3381 	kvm_s390_vcpu_request_handled(vcpu);
3382 	if (!kvm_request_pending(vcpu))
3383 		return 0;
3384 	/*
3385 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3386 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3387 	 * This ensures that the ipte instruction for this request has
3388 	 * already finished. We might race against a second unmapper that
3389 	 * wants to set the blocking bit. Lets just retry the request loop.
3390 	 */
3391 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3392 		int rc;
3393 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3394 					  kvm_s390_get_prefix(vcpu),
3395 					  PAGE_SIZE * 2, PROT_WRITE);
3396 		if (rc) {
3397 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3398 			return rc;
3399 		}
3400 		goto retry;
3401 	}
3402 
3403 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3404 		vcpu->arch.sie_block->ihcpu = 0xffff;
3405 		goto retry;
3406 	}
3407 
3408 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3409 		if (!ibs_enabled(vcpu)) {
3410 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3411 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3412 		}
3413 		goto retry;
3414 	}
3415 
3416 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3417 		if (ibs_enabled(vcpu)) {
3418 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3419 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3420 		}
3421 		goto retry;
3422 	}
3423 
3424 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3425 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3426 		goto retry;
3427 	}
3428 
3429 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3430 		/*
3431 		 * Disable CMM virtualization; we will emulate the ESSA
3432 		 * instruction manually, in order to provide additional
3433 		 * functionalities needed for live migration.
3434 		 */
3435 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3436 		goto retry;
3437 	}
3438 
3439 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3440 		/*
3441 		 * Re-enable CMM virtualization if CMMA is available and
3442 		 * CMM has been used.
3443 		 */
3444 		if ((vcpu->kvm->arch.use_cmma) &&
3445 		    (vcpu->kvm->mm->context.uses_cmm))
3446 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3447 		goto retry;
3448 	}
3449 
3450 	/* nothing to do, just clear the request */
3451 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3452 	/* we left the vsie handler, nothing to do, just clear the request */
3453 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3454 
3455 	return 0;
3456 }
3457 
3458 void kvm_s390_set_tod_clock(struct kvm *kvm,
3459 			    const struct kvm_s390_vm_tod_clock *gtod)
3460 {
3461 	struct kvm_vcpu *vcpu;
3462 	struct kvm_s390_tod_clock_ext htod;
3463 	int i;
3464 
3465 	mutex_lock(&kvm->lock);
3466 	preempt_disable();
3467 
3468 	get_tod_clock_ext((char *)&htod);
3469 
3470 	kvm->arch.epoch = gtod->tod - htod.tod;
3471 	kvm->arch.epdx = 0;
3472 	if (test_kvm_facility(kvm, 139)) {
3473 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3474 		if (kvm->arch.epoch > gtod->tod)
3475 			kvm->arch.epdx -= 1;
3476 	}
3477 
3478 	kvm_s390_vcpu_block_all(kvm);
3479 	kvm_for_each_vcpu(i, vcpu, kvm) {
3480 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3481 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3482 	}
3483 
3484 	kvm_s390_vcpu_unblock_all(kvm);
3485 	preempt_enable();
3486 	mutex_unlock(&kvm->lock);
3487 }
3488 
3489 /**
3490  * kvm_arch_fault_in_page - fault-in guest page if necessary
3491  * @vcpu: The corresponding virtual cpu
3492  * @gpa: Guest physical address
3493  * @writable: Whether the page should be writable or not
3494  *
3495  * Make sure that a guest page has been faulted-in on the host.
3496  *
3497  * Return: Zero on success, negative error code otherwise.
3498  */
3499 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3500 {
3501 	return gmap_fault(vcpu->arch.gmap, gpa,
3502 			  writable ? FAULT_FLAG_WRITE : 0);
3503 }
3504 
3505 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3506 				      unsigned long token)
3507 {
3508 	struct kvm_s390_interrupt inti;
3509 	struct kvm_s390_irq irq;
3510 
3511 	if (start_token) {
3512 		irq.u.ext.ext_params2 = token;
3513 		irq.type = KVM_S390_INT_PFAULT_INIT;
3514 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3515 	} else {
3516 		inti.type = KVM_S390_INT_PFAULT_DONE;
3517 		inti.parm64 = token;
3518 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3519 	}
3520 }
3521 
3522 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3523 				     struct kvm_async_pf *work)
3524 {
3525 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3526 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3527 }
3528 
3529 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3530 				 struct kvm_async_pf *work)
3531 {
3532 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3533 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3534 }
3535 
3536 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3537 			       struct kvm_async_pf *work)
3538 {
3539 	/* s390 will always inject the page directly */
3540 }
3541 
3542 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3543 {
3544 	/*
3545 	 * s390 will always inject the page directly,
3546 	 * but we still want check_async_completion to cleanup
3547 	 */
3548 	return true;
3549 }
3550 
3551 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3552 {
3553 	hva_t hva;
3554 	struct kvm_arch_async_pf arch;
3555 	int rc;
3556 
3557 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3558 		return 0;
3559 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3560 	    vcpu->arch.pfault_compare)
3561 		return 0;
3562 	if (psw_extint_disabled(vcpu))
3563 		return 0;
3564 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3565 		return 0;
3566 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3567 		return 0;
3568 	if (!vcpu->arch.gmap->pfault_enabled)
3569 		return 0;
3570 
3571 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3572 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3573 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3574 		return 0;
3575 
3576 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3577 	return rc;
3578 }
3579 
3580 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3581 {
3582 	int rc, cpuflags;
3583 
3584 	/*
3585 	 * On s390 notifications for arriving pages will be delivered directly
3586 	 * to the guest but the house keeping for completed pfaults is
3587 	 * handled outside the worker.
3588 	 */
3589 	kvm_check_async_pf_completion(vcpu);
3590 
3591 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3592 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3593 
3594 	if (need_resched())
3595 		schedule();
3596 
3597 	if (test_cpu_flag(CIF_MCCK_PENDING))
3598 		s390_handle_mcck();
3599 
3600 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3601 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3602 		if (rc)
3603 			return rc;
3604 	}
3605 
3606 	rc = kvm_s390_handle_requests(vcpu);
3607 	if (rc)
3608 		return rc;
3609 
3610 	if (guestdbg_enabled(vcpu)) {
3611 		kvm_s390_backup_guest_per_regs(vcpu);
3612 		kvm_s390_patch_guest_per_regs(vcpu);
3613 	}
3614 
3615 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3616 
3617 	vcpu->arch.sie_block->icptcode = 0;
3618 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3619 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3620 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3621 
3622 	return 0;
3623 }
3624 
3625 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3626 {
3627 	struct kvm_s390_pgm_info pgm_info = {
3628 		.code = PGM_ADDRESSING,
3629 	};
3630 	u8 opcode, ilen;
3631 	int rc;
3632 
3633 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3634 	trace_kvm_s390_sie_fault(vcpu);
3635 
3636 	/*
3637 	 * We want to inject an addressing exception, which is defined as a
3638 	 * suppressing or terminating exception. However, since we came here
3639 	 * by a DAT access exception, the PSW still points to the faulting
3640 	 * instruction since DAT exceptions are nullifying. So we've got
3641 	 * to look up the current opcode to get the length of the instruction
3642 	 * to be able to forward the PSW.
3643 	 */
3644 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3645 	ilen = insn_length(opcode);
3646 	if (rc < 0) {
3647 		return rc;
3648 	} else if (rc) {
3649 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3650 		 * Forward by arbitrary ilc, injection will take care of
3651 		 * nullification if necessary.
3652 		 */
3653 		pgm_info = vcpu->arch.pgm;
3654 		ilen = 4;
3655 	}
3656 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3657 	kvm_s390_forward_psw(vcpu, ilen);
3658 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3659 }
3660 
3661 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3662 {
3663 	struct mcck_volatile_info *mcck_info;
3664 	struct sie_page *sie_page;
3665 
3666 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3667 		   vcpu->arch.sie_block->icptcode);
3668 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3669 
3670 	if (guestdbg_enabled(vcpu))
3671 		kvm_s390_restore_guest_per_regs(vcpu);
3672 
3673 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3674 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3675 
3676 	if (exit_reason == -EINTR) {
3677 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3678 		sie_page = container_of(vcpu->arch.sie_block,
3679 					struct sie_page, sie_block);
3680 		mcck_info = &sie_page->mcck_info;
3681 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3682 		return 0;
3683 	}
3684 
3685 	if (vcpu->arch.sie_block->icptcode > 0) {
3686 		int rc = kvm_handle_sie_intercept(vcpu);
3687 
3688 		if (rc != -EOPNOTSUPP)
3689 			return rc;
3690 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3691 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3692 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3693 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3694 		return -EREMOTE;
3695 	} else if (exit_reason != -EFAULT) {
3696 		vcpu->stat.exit_null++;
3697 		return 0;
3698 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3699 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3700 		vcpu->run->s390_ucontrol.trans_exc_code =
3701 						current->thread.gmap_addr;
3702 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3703 		return -EREMOTE;
3704 	} else if (current->thread.gmap_pfault) {
3705 		trace_kvm_s390_major_guest_pfault(vcpu);
3706 		current->thread.gmap_pfault = 0;
3707 		if (kvm_arch_setup_async_pf(vcpu))
3708 			return 0;
3709 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3710 	}
3711 	return vcpu_post_run_fault_in_sie(vcpu);
3712 }
3713 
3714 static int __vcpu_run(struct kvm_vcpu *vcpu)
3715 {
3716 	int rc, exit_reason;
3717 
3718 	/*
3719 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3720 	 * ning the guest), so that memslots (and other stuff) are protected
3721 	 */
3722 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3723 
3724 	do {
3725 		rc = vcpu_pre_run(vcpu);
3726 		if (rc)
3727 			break;
3728 
3729 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3730 		/*
3731 		 * As PF_VCPU will be used in fault handler, between
3732 		 * guest_enter and guest_exit should be no uaccess.
3733 		 */
3734 		local_irq_disable();
3735 		guest_enter_irqoff();
3736 		__disable_cpu_timer_accounting(vcpu);
3737 		local_irq_enable();
3738 		exit_reason = sie64a(vcpu->arch.sie_block,
3739 				     vcpu->run->s.regs.gprs);
3740 		local_irq_disable();
3741 		__enable_cpu_timer_accounting(vcpu);
3742 		guest_exit_irqoff();
3743 		local_irq_enable();
3744 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3745 
3746 		rc = vcpu_post_run(vcpu, exit_reason);
3747 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3748 
3749 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3750 	return rc;
3751 }
3752 
3753 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3754 {
3755 	struct runtime_instr_cb *riccb;
3756 	struct gs_cb *gscb;
3757 
3758 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3759 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3760 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3761 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3762 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3763 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3764 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3765 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3766 		/* some control register changes require a tlb flush */
3767 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3768 	}
3769 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3770 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3771 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3772 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3773 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3774 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3775 	}
3776 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3777 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3778 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3779 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3780 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3781 			kvm_clear_async_pf_completion_queue(vcpu);
3782 	}
3783 	/*
3784 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3785 	 * we should enable RI here instead of doing the lazy enablement.
3786 	 */
3787 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3788 	    test_kvm_facility(vcpu->kvm, 64) &&
3789 	    riccb->v &&
3790 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3791 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3792 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3793 	}
3794 	/*
3795 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3796 	 * we should enable GS here instead of doing the lazy enablement.
3797 	 */
3798 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3799 	    test_kvm_facility(vcpu->kvm, 133) &&
3800 	    gscb->gssm &&
3801 	    !vcpu->arch.gs_enabled) {
3802 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3803 		vcpu->arch.sie_block->ecb |= ECB_GS;
3804 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3805 		vcpu->arch.gs_enabled = 1;
3806 	}
3807 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3808 	    test_kvm_facility(vcpu->kvm, 82)) {
3809 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3810 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3811 	}
3812 	save_access_regs(vcpu->arch.host_acrs);
3813 	restore_access_regs(vcpu->run->s.regs.acrs);
3814 	/* save host (userspace) fprs/vrs */
3815 	save_fpu_regs();
3816 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3817 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3818 	if (MACHINE_HAS_VX)
3819 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3820 	else
3821 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3822 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3823 	if (test_fp_ctl(current->thread.fpu.fpc))
3824 		/* User space provided an invalid FPC, let's clear it */
3825 		current->thread.fpu.fpc = 0;
3826 	if (MACHINE_HAS_GS) {
3827 		preempt_disable();
3828 		__ctl_set_bit(2, 4);
3829 		if (current->thread.gs_cb) {
3830 			vcpu->arch.host_gscb = current->thread.gs_cb;
3831 			save_gs_cb(vcpu->arch.host_gscb);
3832 		}
3833 		if (vcpu->arch.gs_enabled) {
3834 			current->thread.gs_cb = (struct gs_cb *)
3835 						&vcpu->run->s.regs.gscb;
3836 			restore_gs_cb(current->thread.gs_cb);
3837 		}
3838 		preempt_enable();
3839 	}
3840 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3841 
3842 	kvm_run->kvm_dirty_regs = 0;
3843 }
3844 
3845 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3846 {
3847 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3848 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3849 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3850 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3851 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3852 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3853 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3854 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3855 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3856 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3857 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3858 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3859 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3860 	save_access_regs(vcpu->run->s.regs.acrs);
3861 	restore_access_regs(vcpu->arch.host_acrs);
3862 	/* Save guest register state */
3863 	save_fpu_regs();
3864 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3865 	/* Restore will be done lazily at return */
3866 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3867 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3868 	if (MACHINE_HAS_GS) {
3869 		__ctl_set_bit(2, 4);
3870 		if (vcpu->arch.gs_enabled)
3871 			save_gs_cb(current->thread.gs_cb);
3872 		preempt_disable();
3873 		current->thread.gs_cb = vcpu->arch.host_gscb;
3874 		restore_gs_cb(vcpu->arch.host_gscb);
3875 		preempt_enable();
3876 		if (!vcpu->arch.host_gscb)
3877 			__ctl_clear_bit(2, 4);
3878 		vcpu->arch.host_gscb = NULL;
3879 	}
3880 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3881 }
3882 
3883 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3884 {
3885 	int rc;
3886 
3887 	if (kvm_run->immediate_exit)
3888 		return -EINTR;
3889 
3890 	vcpu_load(vcpu);
3891 
3892 	if (guestdbg_exit_pending(vcpu)) {
3893 		kvm_s390_prepare_debug_exit(vcpu);
3894 		rc = 0;
3895 		goto out;
3896 	}
3897 
3898 	kvm_sigset_activate(vcpu);
3899 
3900 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3901 		kvm_s390_vcpu_start(vcpu);
3902 	} else if (is_vcpu_stopped(vcpu)) {
3903 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3904 				   vcpu->vcpu_id);
3905 		rc = -EINVAL;
3906 		goto out;
3907 	}
3908 
3909 	sync_regs(vcpu, kvm_run);
3910 	enable_cpu_timer_accounting(vcpu);
3911 
3912 	might_fault();
3913 	rc = __vcpu_run(vcpu);
3914 
3915 	if (signal_pending(current) && !rc) {
3916 		kvm_run->exit_reason = KVM_EXIT_INTR;
3917 		rc = -EINTR;
3918 	}
3919 
3920 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3921 		kvm_s390_prepare_debug_exit(vcpu);
3922 		rc = 0;
3923 	}
3924 
3925 	if (rc == -EREMOTE) {
3926 		/* userspace support is needed, kvm_run has been prepared */
3927 		rc = 0;
3928 	}
3929 
3930 	disable_cpu_timer_accounting(vcpu);
3931 	store_regs(vcpu, kvm_run);
3932 
3933 	kvm_sigset_deactivate(vcpu);
3934 
3935 	vcpu->stat.exit_userspace++;
3936 out:
3937 	vcpu_put(vcpu);
3938 	return rc;
3939 }
3940 
3941 /*
3942  * store status at address
3943  * we use have two special cases:
3944  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3945  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3946  */
3947 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3948 {
3949 	unsigned char archmode = 1;
3950 	freg_t fprs[NUM_FPRS];
3951 	unsigned int px;
3952 	u64 clkcomp, cputm;
3953 	int rc;
3954 
3955 	px = kvm_s390_get_prefix(vcpu);
3956 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3957 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3958 			return -EFAULT;
3959 		gpa = 0;
3960 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3961 		if (write_guest_real(vcpu, 163, &archmode, 1))
3962 			return -EFAULT;
3963 		gpa = px;
3964 	} else
3965 		gpa -= __LC_FPREGS_SAVE_AREA;
3966 
3967 	/* manually convert vector registers if necessary */
3968 	if (MACHINE_HAS_VX) {
3969 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3970 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3971 				     fprs, 128);
3972 	} else {
3973 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3974 				     vcpu->run->s.regs.fprs, 128);
3975 	}
3976 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3977 			      vcpu->run->s.regs.gprs, 128);
3978 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3979 			      &vcpu->arch.sie_block->gpsw, 16);
3980 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3981 			      &px, 4);
3982 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3983 			      &vcpu->run->s.regs.fpc, 4);
3984 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3985 			      &vcpu->arch.sie_block->todpr, 4);
3986 	cputm = kvm_s390_get_cpu_timer(vcpu);
3987 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3988 			      &cputm, 8);
3989 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3990 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3991 			      &clkcomp, 8);
3992 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3993 			      &vcpu->run->s.regs.acrs, 64);
3994 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3995 			      &vcpu->arch.sie_block->gcr, 128);
3996 	return rc ? -EFAULT : 0;
3997 }
3998 
3999 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4000 {
4001 	/*
4002 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4003 	 * switch in the run ioctl. Let's update our copies before we save
4004 	 * it into the save area
4005 	 */
4006 	save_fpu_regs();
4007 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4008 	save_access_regs(vcpu->run->s.regs.acrs);
4009 
4010 	return kvm_s390_store_status_unloaded(vcpu, addr);
4011 }
4012 
4013 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4014 {
4015 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4016 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4017 }
4018 
4019 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4020 {
4021 	unsigned int i;
4022 	struct kvm_vcpu *vcpu;
4023 
4024 	kvm_for_each_vcpu(i, vcpu, kvm) {
4025 		__disable_ibs_on_vcpu(vcpu);
4026 	}
4027 }
4028 
4029 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4030 {
4031 	if (!sclp.has_ibs)
4032 		return;
4033 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4034 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4035 }
4036 
4037 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4038 {
4039 	int i, online_vcpus, started_vcpus = 0;
4040 
4041 	if (!is_vcpu_stopped(vcpu))
4042 		return;
4043 
4044 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4045 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4046 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4047 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4048 
4049 	for (i = 0; i < online_vcpus; i++) {
4050 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4051 			started_vcpus++;
4052 	}
4053 
4054 	if (started_vcpus == 0) {
4055 		/* we're the only active VCPU -> speed it up */
4056 		__enable_ibs_on_vcpu(vcpu);
4057 	} else if (started_vcpus == 1) {
4058 		/*
4059 		 * As we are starting a second VCPU, we have to disable
4060 		 * the IBS facility on all VCPUs to remove potentially
4061 		 * oustanding ENABLE requests.
4062 		 */
4063 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4064 	}
4065 
4066 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4067 	/*
4068 	 * Another VCPU might have used IBS while we were offline.
4069 	 * Let's play safe and flush the VCPU at startup.
4070 	 */
4071 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4072 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4073 	return;
4074 }
4075 
4076 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4077 {
4078 	int i, online_vcpus, started_vcpus = 0;
4079 	struct kvm_vcpu *started_vcpu = NULL;
4080 
4081 	if (is_vcpu_stopped(vcpu))
4082 		return;
4083 
4084 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4085 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4086 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4087 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4088 
4089 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4090 	kvm_s390_clear_stop_irq(vcpu);
4091 
4092 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4093 	__disable_ibs_on_vcpu(vcpu);
4094 
4095 	for (i = 0; i < online_vcpus; i++) {
4096 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4097 			started_vcpus++;
4098 			started_vcpu = vcpu->kvm->vcpus[i];
4099 		}
4100 	}
4101 
4102 	if (started_vcpus == 1) {
4103 		/*
4104 		 * As we only have one VCPU left, we want to enable the
4105 		 * IBS facility for that VCPU to speed it up.
4106 		 */
4107 		__enable_ibs_on_vcpu(started_vcpu);
4108 	}
4109 
4110 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4111 	return;
4112 }
4113 
4114 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4115 				     struct kvm_enable_cap *cap)
4116 {
4117 	int r;
4118 
4119 	if (cap->flags)
4120 		return -EINVAL;
4121 
4122 	switch (cap->cap) {
4123 	case KVM_CAP_S390_CSS_SUPPORT:
4124 		if (!vcpu->kvm->arch.css_support) {
4125 			vcpu->kvm->arch.css_support = 1;
4126 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4127 			trace_kvm_s390_enable_css(vcpu->kvm);
4128 		}
4129 		r = 0;
4130 		break;
4131 	default:
4132 		r = -EINVAL;
4133 		break;
4134 	}
4135 	return r;
4136 }
4137 
4138 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4139 				  struct kvm_s390_mem_op *mop)
4140 {
4141 	void __user *uaddr = (void __user *)mop->buf;
4142 	void *tmpbuf = NULL;
4143 	int r, srcu_idx;
4144 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4145 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4146 
4147 	if (mop->flags & ~supported_flags)
4148 		return -EINVAL;
4149 
4150 	if (mop->size > MEM_OP_MAX_SIZE)
4151 		return -E2BIG;
4152 
4153 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4154 		tmpbuf = vmalloc(mop->size);
4155 		if (!tmpbuf)
4156 			return -ENOMEM;
4157 	}
4158 
4159 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4160 
4161 	switch (mop->op) {
4162 	case KVM_S390_MEMOP_LOGICAL_READ:
4163 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4164 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4165 					    mop->size, GACC_FETCH);
4166 			break;
4167 		}
4168 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4169 		if (r == 0) {
4170 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4171 				r = -EFAULT;
4172 		}
4173 		break;
4174 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4175 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4176 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4177 					    mop->size, GACC_STORE);
4178 			break;
4179 		}
4180 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4181 			r = -EFAULT;
4182 			break;
4183 		}
4184 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4185 		break;
4186 	default:
4187 		r = -EINVAL;
4188 	}
4189 
4190 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4191 
4192 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4193 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4194 
4195 	vfree(tmpbuf);
4196 	return r;
4197 }
4198 
4199 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4200 			       unsigned int ioctl, unsigned long arg)
4201 {
4202 	struct kvm_vcpu *vcpu = filp->private_data;
4203 	void __user *argp = (void __user *)arg;
4204 
4205 	switch (ioctl) {
4206 	case KVM_S390_IRQ: {
4207 		struct kvm_s390_irq s390irq;
4208 
4209 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4210 			return -EFAULT;
4211 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4212 	}
4213 	case KVM_S390_INTERRUPT: {
4214 		struct kvm_s390_interrupt s390int;
4215 		struct kvm_s390_irq s390irq;
4216 
4217 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4218 			return -EFAULT;
4219 		if (s390int_to_s390irq(&s390int, &s390irq))
4220 			return -EINVAL;
4221 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4222 	}
4223 	}
4224 	return -ENOIOCTLCMD;
4225 }
4226 
4227 long kvm_arch_vcpu_ioctl(struct file *filp,
4228 			 unsigned int ioctl, unsigned long arg)
4229 {
4230 	struct kvm_vcpu *vcpu = filp->private_data;
4231 	void __user *argp = (void __user *)arg;
4232 	int idx;
4233 	long r;
4234 
4235 	vcpu_load(vcpu);
4236 
4237 	switch (ioctl) {
4238 	case KVM_S390_STORE_STATUS:
4239 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4240 		r = kvm_s390_vcpu_store_status(vcpu, arg);
4241 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4242 		break;
4243 	case KVM_S390_SET_INITIAL_PSW: {
4244 		psw_t psw;
4245 
4246 		r = -EFAULT;
4247 		if (copy_from_user(&psw, argp, sizeof(psw)))
4248 			break;
4249 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4250 		break;
4251 	}
4252 	case KVM_S390_INITIAL_RESET:
4253 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4254 		break;
4255 	case KVM_SET_ONE_REG:
4256 	case KVM_GET_ONE_REG: {
4257 		struct kvm_one_reg reg;
4258 		r = -EFAULT;
4259 		if (copy_from_user(&reg, argp, sizeof(reg)))
4260 			break;
4261 		if (ioctl == KVM_SET_ONE_REG)
4262 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4263 		else
4264 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4265 		break;
4266 	}
4267 #ifdef CONFIG_KVM_S390_UCONTROL
4268 	case KVM_S390_UCAS_MAP: {
4269 		struct kvm_s390_ucas_mapping ucasmap;
4270 
4271 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4272 			r = -EFAULT;
4273 			break;
4274 		}
4275 
4276 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4277 			r = -EINVAL;
4278 			break;
4279 		}
4280 
4281 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4282 				     ucasmap.vcpu_addr, ucasmap.length);
4283 		break;
4284 	}
4285 	case KVM_S390_UCAS_UNMAP: {
4286 		struct kvm_s390_ucas_mapping ucasmap;
4287 
4288 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4289 			r = -EFAULT;
4290 			break;
4291 		}
4292 
4293 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4294 			r = -EINVAL;
4295 			break;
4296 		}
4297 
4298 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4299 			ucasmap.length);
4300 		break;
4301 	}
4302 #endif
4303 	case KVM_S390_VCPU_FAULT: {
4304 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4305 		break;
4306 	}
4307 	case KVM_ENABLE_CAP:
4308 	{
4309 		struct kvm_enable_cap cap;
4310 		r = -EFAULT;
4311 		if (copy_from_user(&cap, argp, sizeof(cap)))
4312 			break;
4313 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4314 		break;
4315 	}
4316 	case KVM_S390_MEM_OP: {
4317 		struct kvm_s390_mem_op mem_op;
4318 
4319 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4320 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4321 		else
4322 			r = -EFAULT;
4323 		break;
4324 	}
4325 	case KVM_S390_SET_IRQ_STATE: {
4326 		struct kvm_s390_irq_state irq_state;
4327 
4328 		r = -EFAULT;
4329 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4330 			break;
4331 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4332 		    irq_state.len == 0 ||
4333 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4334 			r = -EINVAL;
4335 			break;
4336 		}
4337 		/* do not use irq_state.flags, it will break old QEMUs */
4338 		r = kvm_s390_set_irq_state(vcpu,
4339 					   (void __user *) irq_state.buf,
4340 					   irq_state.len);
4341 		break;
4342 	}
4343 	case KVM_S390_GET_IRQ_STATE: {
4344 		struct kvm_s390_irq_state irq_state;
4345 
4346 		r = -EFAULT;
4347 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4348 			break;
4349 		if (irq_state.len == 0) {
4350 			r = -EINVAL;
4351 			break;
4352 		}
4353 		/* do not use irq_state.flags, it will break old QEMUs */
4354 		r = kvm_s390_get_irq_state(vcpu,
4355 					   (__u8 __user *)  irq_state.buf,
4356 					   irq_state.len);
4357 		break;
4358 	}
4359 	default:
4360 		r = -ENOTTY;
4361 	}
4362 
4363 	vcpu_put(vcpu);
4364 	return r;
4365 }
4366 
4367 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4368 {
4369 #ifdef CONFIG_KVM_S390_UCONTROL
4370 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4371 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4372 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4373 		get_page(vmf->page);
4374 		return 0;
4375 	}
4376 #endif
4377 	return VM_FAULT_SIGBUS;
4378 }
4379 
4380 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4381 			    unsigned long npages)
4382 {
4383 	return 0;
4384 }
4385 
4386 /* Section: memory related */
4387 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4388 				   struct kvm_memory_slot *memslot,
4389 				   const struct kvm_userspace_memory_region *mem,
4390 				   enum kvm_mr_change change)
4391 {
4392 	/* A few sanity checks. We can have memory slots which have to be
4393 	   located/ended at a segment boundary (1MB). The memory in userland is
4394 	   ok to be fragmented into various different vmas. It is okay to mmap()
4395 	   and munmap() stuff in this slot after doing this call at any time */
4396 
4397 	if (mem->userspace_addr & 0xffffful)
4398 		return -EINVAL;
4399 
4400 	if (mem->memory_size & 0xffffful)
4401 		return -EINVAL;
4402 
4403 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4404 		return -EINVAL;
4405 
4406 	return 0;
4407 }
4408 
4409 void kvm_arch_commit_memory_region(struct kvm *kvm,
4410 				const struct kvm_userspace_memory_region *mem,
4411 				const struct kvm_memory_slot *old,
4412 				const struct kvm_memory_slot *new,
4413 				enum kvm_mr_change change)
4414 {
4415 	int rc;
4416 
4417 	/* If the basics of the memslot do not change, we do not want
4418 	 * to update the gmap. Every update causes several unnecessary
4419 	 * segment translation exceptions. This is usually handled just
4420 	 * fine by the normal fault handler + gmap, but it will also
4421 	 * cause faults on the prefix page of running guest CPUs.
4422 	 */
4423 	if (old->userspace_addr == mem->userspace_addr &&
4424 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4425 	    old->npages * PAGE_SIZE == mem->memory_size)
4426 		return;
4427 
4428 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4429 		mem->guest_phys_addr, mem->memory_size);
4430 	if (rc)
4431 		pr_warn("failed to commit memory region\n");
4432 	return;
4433 }
4434 
4435 static inline unsigned long nonhyp_mask(int i)
4436 {
4437 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4438 
4439 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4440 }
4441 
4442 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4443 {
4444 	vcpu->valid_wakeup = false;
4445 }
4446 
4447 static int __init kvm_s390_init(void)
4448 {
4449 	int i;
4450 
4451 	if (!sclp.has_sief2) {
4452 		pr_info("SIE is not available\n");
4453 		return -ENODEV;
4454 	}
4455 
4456 	if (nested && hpage) {
4457 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4458 		return -EINVAL;
4459 	}
4460 
4461 	for (i = 0; i < 16; i++)
4462 		kvm_s390_fac_base[i] |=
4463 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4464 
4465 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4466 }
4467 
4468 static void __exit kvm_s390_exit(void)
4469 {
4470 	kvm_exit();
4471 }
4472 
4473 module_init(kvm_s390_init);
4474 module_exit(kvm_s390_exit);
4475 
4476 /*
4477  * Enable autoloading of the kvm module.
4478  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4479  * since x86 takes a different approach.
4480  */
4481 #include <linux/miscdevice.h>
4482 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4483 MODULE_ALIAS("devname:kvm");
4484