xref: /linux/arch/s390/kvm/kvm-s390.c (revision bfd5bb6f90af092aa345b15cd78143956a13c2a8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45 
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 	{ "deliver_program", VCPU_STAT(deliver_program) },
93 	{ "deliver_io", VCPU_STAT(deliver_io) },
94 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
97 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
98 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
99 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 	{ "inject_io", VM_STAT(inject_io) },
102 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
103 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 	{ "inject_program", VCPU_STAT(inject_program) },
105 	{ "inject_restart", VCPU_STAT(inject_restart) },
106 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
107 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 	{ "inject_virtio", VM_STAT(inject_virtio) },
111 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
113 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
120 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
122 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
124 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
125 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
126 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
128 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
130 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
133 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
138 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
161 	{ NULL }
162 };
163 
164 struct kvm_s390_tod_clock_ext {
165 	__u8 epoch_idx;
166 	__u64 tod;
167 	__u8 reserved[7];
168 } __packed;
169 
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174 
175 
176 /*
177  * For now we handle at most 16 double words as this is what the s390 base
178  * kernel handles and stores in the prefix page. If we ever need to go beyond
179  * this, this requires changes to code, but the external uapi can stay.
180  */
181 #define SIZE_INTERNAL 16
182 
183 /*
184  * Base feature mask that defines default mask for facilities. Consists of the
185  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
186  */
187 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
188 /*
189  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
190  * and defines the facilities that can be enabled via a cpu model.
191  */
192 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
193 
194 static unsigned long kvm_s390_fac_size(void)
195 {
196 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
197 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
198 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
199 		sizeof(S390_lowcore.stfle_fac_list));
200 
201 	return SIZE_INTERNAL;
202 }
203 
204 /* available cpu features supported by kvm */
205 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
206 /* available subfunctions indicated via query / "test bit" */
207 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
208 
209 static struct gmap_notifier gmap_notifier;
210 static struct gmap_notifier vsie_gmap_notifier;
211 debug_info_t *kvm_s390_dbf;
212 
213 /* Section: not file related */
214 int kvm_arch_hardware_enable(void)
215 {
216 	/* every s390 is virtualization enabled ;-) */
217 	return 0;
218 }
219 
220 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
221 			      unsigned long end);
222 
223 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
224 {
225 	u8 delta_idx = 0;
226 
227 	/*
228 	 * The TOD jumps by delta, we have to compensate this by adding
229 	 * -delta to the epoch.
230 	 */
231 	delta = -delta;
232 
233 	/* sign-extension - we're adding to signed values below */
234 	if ((s64)delta < 0)
235 		delta_idx = -1;
236 
237 	scb->epoch += delta;
238 	if (scb->ecd & ECD_MEF) {
239 		scb->epdx += delta_idx;
240 		if (scb->epoch < delta)
241 			scb->epdx += 1;
242 	}
243 }
244 
245 /*
246  * This callback is executed during stop_machine(). All CPUs are therefore
247  * temporarily stopped. In order not to change guest behavior, we have to
248  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
249  * so a CPU won't be stopped while calculating with the epoch.
250  */
251 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
252 			  void *v)
253 {
254 	struct kvm *kvm;
255 	struct kvm_vcpu *vcpu;
256 	int i;
257 	unsigned long long *delta = v;
258 
259 	list_for_each_entry(kvm, &vm_list, vm_list) {
260 		kvm_for_each_vcpu(i, vcpu, kvm) {
261 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
262 			if (i == 0) {
263 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
264 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
265 			}
266 			if (vcpu->arch.cputm_enabled)
267 				vcpu->arch.cputm_start += *delta;
268 			if (vcpu->arch.vsie_block)
269 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
270 						   *delta);
271 		}
272 	}
273 	return NOTIFY_OK;
274 }
275 
276 static struct notifier_block kvm_clock_notifier = {
277 	.notifier_call = kvm_clock_sync,
278 };
279 
280 int kvm_arch_hardware_setup(void)
281 {
282 	gmap_notifier.notifier_call = kvm_gmap_notifier;
283 	gmap_register_pte_notifier(&gmap_notifier);
284 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
285 	gmap_register_pte_notifier(&vsie_gmap_notifier);
286 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
287 				       &kvm_clock_notifier);
288 	return 0;
289 }
290 
291 void kvm_arch_hardware_unsetup(void)
292 {
293 	gmap_unregister_pte_notifier(&gmap_notifier);
294 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
295 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
296 					 &kvm_clock_notifier);
297 }
298 
299 static void allow_cpu_feat(unsigned long nr)
300 {
301 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
302 }
303 
304 static inline int plo_test_bit(unsigned char nr)
305 {
306 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
307 	int cc;
308 
309 	asm volatile(
310 		/* Parameter registers are ignored for "test bit" */
311 		"	plo	0,0,0,0(0)\n"
312 		"	ipm	%0\n"
313 		"	srl	%0,28\n"
314 		: "=d" (cc)
315 		: "d" (r0)
316 		: "cc");
317 	return cc == 0;
318 }
319 
320 static void kvm_s390_cpu_feat_init(void)
321 {
322 	int i;
323 
324 	for (i = 0; i < 256; ++i) {
325 		if (plo_test_bit(i))
326 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
327 	}
328 
329 	if (test_facility(28)) /* TOD-clock steering */
330 		ptff(kvm_s390_available_subfunc.ptff,
331 		     sizeof(kvm_s390_available_subfunc.ptff),
332 		     PTFF_QAF);
333 
334 	if (test_facility(17)) { /* MSA */
335 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
336 			      kvm_s390_available_subfunc.kmac);
337 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
338 			      kvm_s390_available_subfunc.kmc);
339 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
340 			      kvm_s390_available_subfunc.km);
341 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
342 			      kvm_s390_available_subfunc.kimd);
343 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
344 			      kvm_s390_available_subfunc.klmd);
345 	}
346 	if (test_facility(76)) /* MSA3 */
347 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
348 			      kvm_s390_available_subfunc.pckmo);
349 	if (test_facility(77)) { /* MSA4 */
350 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
351 			      kvm_s390_available_subfunc.kmctr);
352 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
353 			      kvm_s390_available_subfunc.kmf);
354 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
355 			      kvm_s390_available_subfunc.kmo);
356 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
357 			      kvm_s390_available_subfunc.pcc);
358 	}
359 	if (test_facility(57)) /* MSA5 */
360 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
361 			      kvm_s390_available_subfunc.ppno);
362 
363 	if (test_facility(146)) /* MSA8 */
364 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
365 			      kvm_s390_available_subfunc.kma);
366 
367 	if (MACHINE_HAS_ESOP)
368 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
369 	/*
370 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
371 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
372 	 */
373 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
374 	    !test_facility(3) || !nested)
375 		return;
376 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
377 	if (sclp.has_64bscao)
378 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
379 	if (sclp.has_siif)
380 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
381 	if (sclp.has_gpere)
382 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
383 	if (sclp.has_gsls)
384 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
385 	if (sclp.has_ib)
386 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
387 	if (sclp.has_cei)
388 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
389 	if (sclp.has_ibs)
390 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
391 	if (sclp.has_kss)
392 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
393 	/*
394 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
395 	 * all skey handling functions read/set the skey from the PGSTE
396 	 * instead of the real storage key.
397 	 *
398 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
399 	 * pages being detected as preserved although they are resident.
400 	 *
401 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
402 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
403 	 *
404 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
405 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
406 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
407 	 *
408 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
409 	 * cannot easily shadow the SCA because of the ipte lock.
410 	 */
411 }
412 
413 int kvm_arch_init(void *opaque)
414 {
415 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
416 	if (!kvm_s390_dbf)
417 		return -ENOMEM;
418 
419 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
420 		debug_unregister(kvm_s390_dbf);
421 		return -ENOMEM;
422 	}
423 
424 	kvm_s390_cpu_feat_init();
425 
426 	/* Register floating interrupt controller interface. */
427 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
428 }
429 
430 void kvm_arch_exit(void)
431 {
432 	debug_unregister(kvm_s390_dbf);
433 }
434 
435 /* Section: device related */
436 long kvm_arch_dev_ioctl(struct file *filp,
437 			unsigned int ioctl, unsigned long arg)
438 {
439 	if (ioctl == KVM_S390_ENABLE_SIE)
440 		return s390_enable_sie();
441 	return -EINVAL;
442 }
443 
444 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
445 {
446 	int r;
447 
448 	switch (ext) {
449 	case KVM_CAP_S390_PSW:
450 	case KVM_CAP_S390_GMAP:
451 	case KVM_CAP_SYNC_MMU:
452 #ifdef CONFIG_KVM_S390_UCONTROL
453 	case KVM_CAP_S390_UCONTROL:
454 #endif
455 	case KVM_CAP_ASYNC_PF:
456 	case KVM_CAP_SYNC_REGS:
457 	case KVM_CAP_ONE_REG:
458 	case KVM_CAP_ENABLE_CAP:
459 	case KVM_CAP_S390_CSS_SUPPORT:
460 	case KVM_CAP_IOEVENTFD:
461 	case KVM_CAP_DEVICE_CTRL:
462 	case KVM_CAP_ENABLE_CAP_VM:
463 	case KVM_CAP_S390_IRQCHIP:
464 	case KVM_CAP_VM_ATTRIBUTES:
465 	case KVM_CAP_MP_STATE:
466 	case KVM_CAP_IMMEDIATE_EXIT:
467 	case KVM_CAP_S390_INJECT_IRQ:
468 	case KVM_CAP_S390_USER_SIGP:
469 	case KVM_CAP_S390_USER_STSI:
470 	case KVM_CAP_S390_SKEYS:
471 	case KVM_CAP_S390_IRQ_STATE:
472 	case KVM_CAP_S390_USER_INSTR0:
473 	case KVM_CAP_S390_CMMA_MIGRATION:
474 	case KVM_CAP_S390_AIS:
475 	case KVM_CAP_S390_AIS_MIGRATION:
476 		r = 1;
477 		break;
478 	case KVM_CAP_S390_MEM_OP:
479 		r = MEM_OP_MAX_SIZE;
480 		break;
481 	case KVM_CAP_NR_VCPUS:
482 	case KVM_CAP_MAX_VCPUS:
483 		r = KVM_S390_BSCA_CPU_SLOTS;
484 		if (!kvm_s390_use_sca_entries())
485 			r = KVM_MAX_VCPUS;
486 		else if (sclp.has_esca && sclp.has_64bscao)
487 			r = KVM_S390_ESCA_CPU_SLOTS;
488 		break;
489 	case KVM_CAP_NR_MEMSLOTS:
490 		r = KVM_USER_MEM_SLOTS;
491 		break;
492 	case KVM_CAP_S390_COW:
493 		r = MACHINE_HAS_ESOP;
494 		break;
495 	case KVM_CAP_S390_VECTOR_REGISTERS:
496 		r = MACHINE_HAS_VX;
497 		break;
498 	case KVM_CAP_S390_RI:
499 		r = test_facility(64);
500 		break;
501 	case KVM_CAP_S390_GS:
502 		r = test_facility(133);
503 		break;
504 	case KVM_CAP_S390_BPB:
505 		r = test_facility(82);
506 		break;
507 	default:
508 		r = 0;
509 	}
510 	return r;
511 }
512 
513 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
514 					struct kvm_memory_slot *memslot)
515 {
516 	gfn_t cur_gfn, last_gfn;
517 	unsigned long address;
518 	struct gmap *gmap = kvm->arch.gmap;
519 
520 	/* Loop over all guest pages */
521 	last_gfn = memslot->base_gfn + memslot->npages;
522 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
523 		address = gfn_to_hva_memslot(memslot, cur_gfn);
524 
525 		if (test_and_clear_guest_dirty(gmap->mm, address))
526 			mark_page_dirty(kvm, cur_gfn);
527 		if (fatal_signal_pending(current))
528 			return;
529 		cond_resched();
530 	}
531 }
532 
533 /* Section: vm related */
534 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
535 
536 /*
537  * Get (and clear) the dirty memory log for a memory slot.
538  */
539 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
540 			       struct kvm_dirty_log *log)
541 {
542 	int r;
543 	unsigned long n;
544 	struct kvm_memslots *slots;
545 	struct kvm_memory_slot *memslot;
546 	int is_dirty = 0;
547 
548 	if (kvm_is_ucontrol(kvm))
549 		return -EINVAL;
550 
551 	mutex_lock(&kvm->slots_lock);
552 
553 	r = -EINVAL;
554 	if (log->slot >= KVM_USER_MEM_SLOTS)
555 		goto out;
556 
557 	slots = kvm_memslots(kvm);
558 	memslot = id_to_memslot(slots, log->slot);
559 	r = -ENOENT;
560 	if (!memslot->dirty_bitmap)
561 		goto out;
562 
563 	kvm_s390_sync_dirty_log(kvm, memslot);
564 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
565 	if (r)
566 		goto out;
567 
568 	/* Clear the dirty log */
569 	if (is_dirty) {
570 		n = kvm_dirty_bitmap_bytes(memslot);
571 		memset(memslot->dirty_bitmap, 0, n);
572 	}
573 	r = 0;
574 out:
575 	mutex_unlock(&kvm->slots_lock);
576 	return r;
577 }
578 
579 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
580 {
581 	unsigned int i;
582 	struct kvm_vcpu *vcpu;
583 
584 	kvm_for_each_vcpu(i, vcpu, kvm) {
585 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
586 	}
587 }
588 
589 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
590 {
591 	int r;
592 
593 	if (cap->flags)
594 		return -EINVAL;
595 
596 	switch (cap->cap) {
597 	case KVM_CAP_S390_IRQCHIP:
598 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
599 		kvm->arch.use_irqchip = 1;
600 		r = 0;
601 		break;
602 	case KVM_CAP_S390_USER_SIGP:
603 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
604 		kvm->arch.user_sigp = 1;
605 		r = 0;
606 		break;
607 	case KVM_CAP_S390_VECTOR_REGISTERS:
608 		mutex_lock(&kvm->lock);
609 		if (kvm->created_vcpus) {
610 			r = -EBUSY;
611 		} else if (MACHINE_HAS_VX) {
612 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
613 			set_kvm_facility(kvm->arch.model.fac_list, 129);
614 			if (test_facility(134)) {
615 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
616 				set_kvm_facility(kvm->arch.model.fac_list, 134);
617 			}
618 			if (test_facility(135)) {
619 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
620 				set_kvm_facility(kvm->arch.model.fac_list, 135);
621 			}
622 			r = 0;
623 		} else
624 			r = -EINVAL;
625 		mutex_unlock(&kvm->lock);
626 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
627 			 r ? "(not available)" : "(success)");
628 		break;
629 	case KVM_CAP_S390_RI:
630 		r = -EINVAL;
631 		mutex_lock(&kvm->lock);
632 		if (kvm->created_vcpus) {
633 			r = -EBUSY;
634 		} else if (test_facility(64)) {
635 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
636 			set_kvm_facility(kvm->arch.model.fac_list, 64);
637 			r = 0;
638 		}
639 		mutex_unlock(&kvm->lock);
640 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
641 			 r ? "(not available)" : "(success)");
642 		break;
643 	case KVM_CAP_S390_AIS:
644 		mutex_lock(&kvm->lock);
645 		if (kvm->created_vcpus) {
646 			r = -EBUSY;
647 		} else {
648 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
649 			set_kvm_facility(kvm->arch.model.fac_list, 72);
650 			r = 0;
651 		}
652 		mutex_unlock(&kvm->lock);
653 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
654 			 r ? "(not available)" : "(success)");
655 		break;
656 	case KVM_CAP_S390_GS:
657 		r = -EINVAL;
658 		mutex_lock(&kvm->lock);
659 		if (kvm->created_vcpus) {
660 			r = -EBUSY;
661 		} else if (test_facility(133)) {
662 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
663 			set_kvm_facility(kvm->arch.model.fac_list, 133);
664 			r = 0;
665 		}
666 		mutex_unlock(&kvm->lock);
667 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
668 			 r ? "(not available)" : "(success)");
669 		break;
670 	case KVM_CAP_S390_USER_STSI:
671 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
672 		kvm->arch.user_stsi = 1;
673 		r = 0;
674 		break;
675 	case KVM_CAP_S390_USER_INSTR0:
676 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
677 		kvm->arch.user_instr0 = 1;
678 		icpt_operexc_on_all_vcpus(kvm);
679 		r = 0;
680 		break;
681 	default:
682 		r = -EINVAL;
683 		break;
684 	}
685 	return r;
686 }
687 
688 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
689 {
690 	int ret;
691 
692 	switch (attr->attr) {
693 	case KVM_S390_VM_MEM_LIMIT_SIZE:
694 		ret = 0;
695 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
696 			 kvm->arch.mem_limit);
697 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
698 			ret = -EFAULT;
699 		break;
700 	default:
701 		ret = -ENXIO;
702 		break;
703 	}
704 	return ret;
705 }
706 
707 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
708 {
709 	int ret;
710 	unsigned int idx;
711 	switch (attr->attr) {
712 	case KVM_S390_VM_MEM_ENABLE_CMMA:
713 		ret = -ENXIO;
714 		if (!sclp.has_cmma)
715 			break;
716 
717 		ret = -EBUSY;
718 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
719 		mutex_lock(&kvm->lock);
720 		if (!kvm->created_vcpus) {
721 			kvm->arch.use_cmma = 1;
722 			/* Not compatible with cmma. */
723 			kvm->arch.use_pfmfi = 0;
724 			ret = 0;
725 		}
726 		mutex_unlock(&kvm->lock);
727 		break;
728 	case KVM_S390_VM_MEM_CLR_CMMA:
729 		ret = -ENXIO;
730 		if (!sclp.has_cmma)
731 			break;
732 		ret = -EINVAL;
733 		if (!kvm->arch.use_cmma)
734 			break;
735 
736 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
737 		mutex_lock(&kvm->lock);
738 		idx = srcu_read_lock(&kvm->srcu);
739 		s390_reset_cmma(kvm->arch.gmap->mm);
740 		srcu_read_unlock(&kvm->srcu, idx);
741 		mutex_unlock(&kvm->lock);
742 		ret = 0;
743 		break;
744 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
745 		unsigned long new_limit;
746 
747 		if (kvm_is_ucontrol(kvm))
748 			return -EINVAL;
749 
750 		if (get_user(new_limit, (u64 __user *)attr->addr))
751 			return -EFAULT;
752 
753 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
754 		    new_limit > kvm->arch.mem_limit)
755 			return -E2BIG;
756 
757 		if (!new_limit)
758 			return -EINVAL;
759 
760 		/* gmap_create takes last usable address */
761 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
762 			new_limit -= 1;
763 
764 		ret = -EBUSY;
765 		mutex_lock(&kvm->lock);
766 		if (!kvm->created_vcpus) {
767 			/* gmap_create will round the limit up */
768 			struct gmap *new = gmap_create(current->mm, new_limit);
769 
770 			if (!new) {
771 				ret = -ENOMEM;
772 			} else {
773 				gmap_remove(kvm->arch.gmap);
774 				new->private = kvm;
775 				kvm->arch.gmap = new;
776 				ret = 0;
777 			}
778 		}
779 		mutex_unlock(&kvm->lock);
780 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
781 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
782 			 (void *) kvm->arch.gmap->asce);
783 		break;
784 	}
785 	default:
786 		ret = -ENXIO;
787 		break;
788 	}
789 	return ret;
790 }
791 
792 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
793 
794 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
795 {
796 	struct kvm_vcpu *vcpu;
797 	int i;
798 
799 	kvm_s390_vcpu_block_all(kvm);
800 
801 	kvm_for_each_vcpu(i, vcpu, kvm)
802 		kvm_s390_vcpu_crypto_setup(vcpu);
803 
804 	kvm_s390_vcpu_unblock_all(kvm);
805 }
806 
807 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
808 {
809 	if (!test_kvm_facility(kvm, 76))
810 		return -EINVAL;
811 
812 	mutex_lock(&kvm->lock);
813 	switch (attr->attr) {
814 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
815 		get_random_bytes(
816 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
817 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
818 		kvm->arch.crypto.aes_kw = 1;
819 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
820 		break;
821 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
822 		get_random_bytes(
823 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
824 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
825 		kvm->arch.crypto.dea_kw = 1;
826 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
827 		break;
828 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
829 		kvm->arch.crypto.aes_kw = 0;
830 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
831 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
832 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
833 		break;
834 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
835 		kvm->arch.crypto.dea_kw = 0;
836 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
837 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
838 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
839 		break;
840 	default:
841 		mutex_unlock(&kvm->lock);
842 		return -ENXIO;
843 	}
844 
845 	kvm_s390_vcpu_crypto_reset_all(kvm);
846 	mutex_unlock(&kvm->lock);
847 	return 0;
848 }
849 
850 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
851 {
852 	int cx;
853 	struct kvm_vcpu *vcpu;
854 
855 	kvm_for_each_vcpu(cx, vcpu, kvm)
856 		kvm_s390_sync_request(req, vcpu);
857 }
858 
859 /*
860  * Must be called with kvm->srcu held to avoid races on memslots, and with
861  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
862  */
863 static int kvm_s390_vm_start_migration(struct kvm *kvm)
864 {
865 	struct kvm_s390_migration_state *mgs;
866 	struct kvm_memory_slot *ms;
867 	/* should be the only one */
868 	struct kvm_memslots *slots;
869 	unsigned long ram_pages;
870 	int slotnr;
871 
872 	/* migration mode already enabled */
873 	if (kvm->arch.migration_state)
874 		return 0;
875 
876 	slots = kvm_memslots(kvm);
877 	if (!slots || !slots->used_slots)
878 		return -EINVAL;
879 
880 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
881 	if (!mgs)
882 		return -ENOMEM;
883 	kvm->arch.migration_state = mgs;
884 
885 	if (kvm->arch.use_cmma) {
886 		/*
887 		 * Get the first slot. They are reverse sorted by base_gfn, so
888 		 * the first slot is also the one at the end of the address
889 		 * space. We have verified above that at least one slot is
890 		 * present.
891 		 */
892 		ms = slots->memslots;
893 		/* round up so we only use full longs */
894 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
895 		/* allocate enough bytes to store all the bits */
896 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
897 		if (!mgs->pgste_bitmap) {
898 			kfree(mgs);
899 			kvm->arch.migration_state = NULL;
900 			return -ENOMEM;
901 		}
902 
903 		mgs->bitmap_size = ram_pages;
904 		atomic64_set(&mgs->dirty_pages, ram_pages);
905 		/* mark all the pages in active slots as dirty */
906 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
907 			ms = slots->memslots + slotnr;
908 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
909 		}
910 
911 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
912 	}
913 	return 0;
914 }
915 
916 /*
917  * Must be called with kvm->slots_lock to avoid races with ourselves and
918  * kvm_s390_vm_start_migration.
919  */
920 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
921 {
922 	struct kvm_s390_migration_state *mgs;
923 
924 	/* migration mode already disabled */
925 	if (!kvm->arch.migration_state)
926 		return 0;
927 	mgs = kvm->arch.migration_state;
928 	kvm->arch.migration_state = NULL;
929 
930 	if (kvm->arch.use_cmma) {
931 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
932 		/* We have to wait for the essa emulation to finish */
933 		synchronize_srcu(&kvm->srcu);
934 		vfree(mgs->pgste_bitmap);
935 	}
936 	kfree(mgs);
937 	return 0;
938 }
939 
940 static int kvm_s390_vm_set_migration(struct kvm *kvm,
941 				     struct kvm_device_attr *attr)
942 {
943 	int res = -ENXIO;
944 
945 	mutex_lock(&kvm->slots_lock);
946 	switch (attr->attr) {
947 	case KVM_S390_VM_MIGRATION_START:
948 		res = kvm_s390_vm_start_migration(kvm);
949 		break;
950 	case KVM_S390_VM_MIGRATION_STOP:
951 		res = kvm_s390_vm_stop_migration(kvm);
952 		break;
953 	default:
954 		break;
955 	}
956 	mutex_unlock(&kvm->slots_lock);
957 
958 	return res;
959 }
960 
961 static int kvm_s390_vm_get_migration(struct kvm *kvm,
962 				     struct kvm_device_attr *attr)
963 {
964 	u64 mig = (kvm->arch.migration_state != NULL);
965 
966 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
967 		return -ENXIO;
968 
969 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
970 		return -EFAULT;
971 	return 0;
972 }
973 
974 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
975 {
976 	struct kvm_s390_vm_tod_clock gtod;
977 
978 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
979 		return -EFAULT;
980 
981 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
982 		return -EINVAL;
983 	kvm_s390_set_tod_clock(kvm, &gtod);
984 
985 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
986 		gtod.epoch_idx, gtod.tod);
987 
988 	return 0;
989 }
990 
991 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
992 {
993 	u8 gtod_high;
994 
995 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
996 					   sizeof(gtod_high)))
997 		return -EFAULT;
998 
999 	if (gtod_high != 0)
1000 		return -EINVAL;
1001 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1002 
1003 	return 0;
1004 }
1005 
1006 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1007 {
1008 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1009 
1010 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1011 			   sizeof(gtod.tod)))
1012 		return -EFAULT;
1013 
1014 	kvm_s390_set_tod_clock(kvm, &gtod);
1015 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1016 	return 0;
1017 }
1018 
1019 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1020 {
1021 	int ret;
1022 
1023 	if (attr->flags)
1024 		return -EINVAL;
1025 
1026 	switch (attr->attr) {
1027 	case KVM_S390_VM_TOD_EXT:
1028 		ret = kvm_s390_set_tod_ext(kvm, attr);
1029 		break;
1030 	case KVM_S390_VM_TOD_HIGH:
1031 		ret = kvm_s390_set_tod_high(kvm, attr);
1032 		break;
1033 	case KVM_S390_VM_TOD_LOW:
1034 		ret = kvm_s390_set_tod_low(kvm, attr);
1035 		break;
1036 	default:
1037 		ret = -ENXIO;
1038 		break;
1039 	}
1040 	return ret;
1041 }
1042 
1043 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1044 				   struct kvm_s390_vm_tod_clock *gtod)
1045 {
1046 	struct kvm_s390_tod_clock_ext htod;
1047 
1048 	preempt_disable();
1049 
1050 	get_tod_clock_ext((char *)&htod);
1051 
1052 	gtod->tod = htod.tod + kvm->arch.epoch;
1053 	gtod->epoch_idx = 0;
1054 	if (test_kvm_facility(kvm, 139)) {
1055 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1056 		if (gtod->tod < htod.tod)
1057 			gtod->epoch_idx += 1;
1058 	}
1059 
1060 	preempt_enable();
1061 }
1062 
1063 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1064 {
1065 	struct kvm_s390_vm_tod_clock gtod;
1066 
1067 	memset(&gtod, 0, sizeof(gtod));
1068 	kvm_s390_get_tod_clock(kvm, &gtod);
1069 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1070 		return -EFAULT;
1071 
1072 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1073 		gtod.epoch_idx, gtod.tod);
1074 	return 0;
1075 }
1076 
1077 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1078 {
1079 	u8 gtod_high = 0;
1080 
1081 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1082 					 sizeof(gtod_high)))
1083 		return -EFAULT;
1084 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1085 
1086 	return 0;
1087 }
1088 
1089 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1090 {
1091 	u64 gtod;
1092 
1093 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1094 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1095 		return -EFAULT;
1096 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1097 
1098 	return 0;
1099 }
1100 
1101 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103 	int ret;
1104 
1105 	if (attr->flags)
1106 		return -EINVAL;
1107 
1108 	switch (attr->attr) {
1109 	case KVM_S390_VM_TOD_EXT:
1110 		ret = kvm_s390_get_tod_ext(kvm, attr);
1111 		break;
1112 	case KVM_S390_VM_TOD_HIGH:
1113 		ret = kvm_s390_get_tod_high(kvm, attr);
1114 		break;
1115 	case KVM_S390_VM_TOD_LOW:
1116 		ret = kvm_s390_get_tod_low(kvm, attr);
1117 		break;
1118 	default:
1119 		ret = -ENXIO;
1120 		break;
1121 	}
1122 	return ret;
1123 }
1124 
1125 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1126 {
1127 	struct kvm_s390_vm_cpu_processor *proc;
1128 	u16 lowest_ibc, unblocked_ibc;
1129 	int ret = 0;
1130 
1131 	mutex_lock(&kvm->lock);
1132 	if (kvm->created_vcpus) {
1133 		ret = -EBUSY;
1134 		goto out;
1135 	}
1136 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1137 	if (!proc) {
1138 		ret = -ENOMEM;
1139 		goto out;
1140 	}
1141 	if (!copy_from_user(proc, (void __user *)attr->addr,
1142 			    sizeof(*proc))) {
1143 		kvm->arch.model.cpuid = proc->cpuid;
1144 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1145 		unblocked_ibc = sclp.ibc & 0xfff;
1146 		if (lowest_ibc && proc->ibc) {
1147 			if (proc->ibc > unblocked_ibc)
1148 				kvm->arch.model.ibc = unblocked_ibc;
1149 			else if (proc->ibc < lowest_ibc)
1150 				kvm->arch.model.ibc = lowest_ibc;
1151 			else
1152 				kvm->arch.model.ibc = proc->ibc;
1153 		}
1154 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1155 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1156 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1157 			 kvm->arch.model.ibc,
1158 			 kvm->arch.model.cpuid);
1159 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1160 			 kvm->arch.model.fac_list[0],
1161 			 kvm->arch.model.fac_list[1],
1162 			 kvm->arch.model.fac_list[2]);
1163 	} else
1164 		ret = -EFAULT;
1165 	kfree(proc);
1166 out:
1167 	mutex_unlock(&kvm->lock);
1168 	return ret;
1169 }
1170 
1171 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1172 				       struct kvm_device_attr *attr)
1173 {
1174 	struct kvm_s390_vm_cpu_feat data;
1175 
1176 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1177 		return -EFAULT;
1178 	if (!bitmap_subset((unsigned long *) data.feat,
1179 			   kvm_s390_available_cpu_feat,
1180 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1181 		return -EINVAL;
1182 
1183 	mutex_lock(&kvm->lock);
1184 	if (kvm->created_vcpus) {
1185 		mutex_unlock(&kvm->lock);
1186 		return -EBUSY;
1187 	}
1188 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1189 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1190 	mutex_unlock(&kvm->lock);
1191 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1192 			 data.feat[0],
1193 			 data.feat[1],
1194 			 data.feat[2]);
1195 	return 0;
1196 }
1197 
1198 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1199 					  struct kvm_device_attr *attr)
1200 {
1201 	/*
1202 	 * Once supported by kernel + hw, we have to store the subfunctions
1203 	 * in kvm->arch and remember that user space configured them.
1204 	 */
1205 	return -ENXIO;
1206 }
1207 
1208 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1209 {
1210 	int ret = -ENXIO;
1211 
1212 	switch (attr->attr) {
1213 	case KVM_S390_VM_CPU_PROCESSOR:
1214 		ret = kvm_s390_set_processor(kvm, attr);
1215 		break;
1216 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1217 		ret = kvm_s390_set_processor_feat(kvm, attr);
1218 		break;
1219 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1220 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1221 		break;
1222 	}
1223 	return ret;
1224 }
1225 
1226 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1227 {
1228 	struct kvm_s390_vm_cpu_processor *proc;
1229 	int ret = 0;
1230 
1231 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1232 	if (!proc) {
1233 		ret = -ENOMEM;
1234 		goto out;
1235 	}
1236 	proc->cpuid = kvm->arch.model.cpuid;
1237 	proc->ibc = kvm->arch.model.ibc;
1238 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1239 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1240 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1241 		 kvm->arch.model.ibc,
1242 		 kvm->arch.model.cpuid);
1243 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1244 		 kvm->arch.model.fac_list[0],
1245 		 kvm->arch.model.fac_list[1],
1246 		 kvm->arch.model.fac_list[2]);
1247 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1248 		ret = -EFAULT;
1249 	kfree(proc);
1250 out:
1251 	return ret;
1252 }
1253 
1254 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1255 {
1256 	struct kvm_s390_vm_cpu_machine *mach;
1257 	int ret = 0;
1258 
1259 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1260 	if (!mach) {
1261 		ret = -ENOMEM;
1262 		goto out;
1263 	}
1264 	get_cpu_id((struct cpuid *) &mach->cpuid);
1265 	mach->ibc = sclp.ibc;
1266 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1267 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1268 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1269 	       sizeof(S390_lowcore.stfle_fac_list));
1270 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1271 		 kvm->arch.model.ibc,
1272 		 kvm->arch.model.cpuid);
1273 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1274 		 mach->fac_mask[0],
1275 		 mach->fac_mask[1],
1276 		 mach->fac_mask[2]);
1277 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1278 		 mach->fac_list[0],
1279 		 mach->fac_list[1],
1280 		 mach->fac_list[2]);
1281 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1282 		ret = -EFAULT;
1283 	kfree(mach);
1284 out:
1285 	return ret;
1286 }
1287 
1288 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1289 				       struct kvm_device_attr *attr)
1290 {
1291 	struct kvm_s390_vm_cpu_feat data;
1292 
1293 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1294 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1295 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1296 		return -EFAULT;
1297 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1298 			 data.feat[0],
1299 			 data.feat[1],
1300 			 data.feat[2]);
1301 	return 0;
1302 }
1303 
1304 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1305 				     struct kvm_device_attr *attr)
1306 {
1307 	struct kvm_s390_vm_cpu_feat data;
1308 
1309 	bitmap_copy((unsigned long *) data.feat,
1310 		    kvm_s390_available_cpu_feat,
1311 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1312 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1313 		return -EFAULT;
1314 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1315 			 data.feat[0],
1316 			 data.feat[1],
1317 			 data.feat[2]);
1318 	return 0;
1319 }
1320 
1321 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1322 					  struct kvm_device_attr *attr)
1323 {
1324 	/*
1325 	 * Once we can actually configure subfunctions (kernel + hw support),
1326 	 * we have to check if they were already set by user space, if so copy
1327 	 * them from kvm->arch.
1328 	 */
1329 	return -ENXIO;
1330 }
1331 
1332 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1333 					struct kvm_device_attr *attr)
1334 {
1335 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1336 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1337 		return -EFAULT;
1338 	return 0;
1339 }
1340 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1341 {
1342 	int ret = -ENXIO;
1343 
1344 	switch (attr->attr) {
1345 	case KVM_S390_VM_CPU_PROCESSOR:
1346 		ret = kvm_s390_get_processor(kvm, attr);
1347 		break;
1348 	case KVM_S390_VM_CPU_MACHINE:
1349 		ret = kvm_s390_get_machine(kvm, attr);
1350 		break;
1351 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1352 		ret = kvm_s390_get_processor_feat(kvm, attr);
1353 		break;
1354 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1355 		ret = kvm_s390_get_machine_feat(kvm, attr);
1356 		break;
1357 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1358 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1359 		break;
1360 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1362 		break;
1363 	}
1364 	return ret;
1365 }
1366 
1367 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1368 {
1369 	int ret;
1370 
1371 	switch (attr->group) {
1372 	case KVM_S390_VM_MEM_CTRL:
1373 		ret = kvm_s390_set_mem_control(kvm, attr);
1374 		break;
1375 	case KVM_S390_VM_TOD:
1376 		ret = kvm_s390_set_tod(kvm, attr);
1377 		break;
1378 	case KVM_S390_VM_CPU_MODEL:
1379 		ret = kvm_s390_set_cpu_model(kvm, attr);
1380 		break;
1381 	case KVM_S390_VM_CRYPTO:
1382 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1383 		break;
1384 	case KVM_S390_VM_MIGRATION:
1385 		ret = kvm_s390_vm_set_migration(kvm, attr);
1386 		break;
1387 	default:
1388 		ret = -ENXIO;
1389 		break;
1390 	}
1391 
1392 	return ret;
1393 }
1394 
1395 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1396 {
1397 	int ret;
1398 
1399 	switch (attr->group) {
1400 	case KVM_S390_VM_MEM_CTRL:
1401 		ret = kvm_s390_get_mem_control(kvm, attr);
1402 		break;
1403 	case KVM_S390_VM_TOD:
1404 		ret = kvm_s390_get_tod(kvm, attr);
1405 		break;
1406 	case KVM_S390_VM_CPU_MODEL:
1407 		ret = kvm_s390_get_cpu_model(kvm, attr);
1408 		break;
1409 	case KVM_S390_VM_MIGRATION:
1410 		ret = kvm_s390_vm_get_migration(kvm, attr);
1411 		break;
1412 	default:
1413 		ret = -ENXIO;
1414 		break;
1415 	}
1416 
1417 	return ret;
1418 }
1419 
1420 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1421 {
1422 	int ret;
1423 
1424 	switch (attr->group) {
1425 	case KVM_S390_VM_MEM_CTRL:
1426 		switch (attr->attr) {
1427 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1428 		case KVM_S390_VM_MEM_CLR_CMMA:
1429 			ret = sclp.has_cmma ? 0 : -ENXIO;
1430 			break;
1431 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1432 			ret = 0;
1433 			break;
1434 		default:
1435 			ret = -ENXIO;
1436 			break;
1437 		}
1438 		break;
1439 	case KVM_S390_VM_TOD:
1440 		switch (attr->attr) {
1441 		case KVM_S390_VM_TOD_LOW:
1442 		case KVM_S390_VM_TOD_HIGH:
1443 			ret = 0;
1444 			break;
1445 		default:
1446 			ret = -ENXIO;
1447 			break;
1448 		}
1449 		break;
1450 	case KVM_S390_VM_CPU_MODEL:
1451 		switch (attr->attr) {
1452 		case KVM_S390_VM_CPU_PROCESSOR:
1453 		case KVM_S390_VM_CPU_MACHINE:
1454 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1455 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1456 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1457 			ret = 0;
1458 			break;
1459 		/* configuring subfunctions is not supported yet */
1460 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1461 		default:
1462 			ret = -ENXIO;
1463 			break;
1464 		}
1465 		break;
1466 	case KVM_S390_VM_CRYPTO:
1467 		switch (attr->attr) {
1468 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1469 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1470 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1471 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1472 			ret = 0;
1473 			break;
1474 		default:
1475 			ret = -ENXIO;
1476 			break;
1477 		}
1478 		break;
1479 	case KVM_S390_VM_MIGRATION:
1480 		ret = 0;
1481 		break;
1482 	default:
1483 		ret = -ENXIO;
1484 		break;
1485 	}
1486 
1487 	return ret;
1488 }
1489 
1490 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1491 {
1492 	uint8_t *keys;
1493 	uint64_t hva;
1494 	int srcu_idx, i, r = 0;
1495 
1496 	if (args->flags != 0)
1497 		return -EINVAL;
1498 
1499 	/* Is this guest using storage keys? */
1500 	if (!mm_uses_skeys(current->mm))
1501 		return KVM_S390_GET_SKEYS_NONE;
1502 
1503 	/* Enforce sane limit on memory allocation */
1504 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1505 		return -EINVAL;
1506 
1507 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1508 	if (!keys)
1509 		return -ENOMEM;
1510 
1511 	down_read(&current->mm->mmap_sem);
1512 	srcu_idx = srcu_read_lock(&kvm->srcu);
1513 	for (i = 0; i < args->count; i++) {
1514 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1515 		if (kvm_is_error_hva(hva)) {
1516 			r = -EFAULT;
1517 			break;
1518 		}
1519 
1520 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1521 		if (r)
1522 			break;
1523 	}
1524 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1525 	up_read(&current->mm->mmap_sem);
1526 
1527 	if (!r) {
1528 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1529 				 sizeof(uint8_t) * args->count);
1530 		if (r)
1531 			r = -EFAULT;
1532 	}
1533 
1534 	kvfree(keys);
1535 	return r;
1536 }
1537 
1538 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1539 {
1540 	uint8_t *keys;
1541 	uint64_t hva;
1542 	int srcu_idx, i, r = 0;
1543 
1544 	if (args->flags != 0)
1545 		return -EINVAL;
1546 
1547 	/* Enforce sane limit on memory allocation */
1548 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1549 		return -EINVAL;
1550 
1551 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1552 	if (!keys)
1553 		return -ENOMEM;
1554 
1555 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1556 			   sizeof(uint8_t) * args->count);
1557 	if (r) {
1558 		r = -EFAULT;
1559 		goto out;
1560 	}
1561 
1562 	/* Enable storage key handling for the guest */
1563 	r = s390_enable_skey();
1564 	if (r)
1565 		goto out;
1566 
1567 	down_read(&current->mm->mmap_sem);
1568 	srcu_idx = srcu_read_lock(&kvm->srcu);
1569 	for (i = 0; i < args->count; i++) {
1570 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1571 		if (kvm_is_error_hva(hva)) {
1572 			r = -EFAULT;
1573 			break;
1574 		}
1575 
1576 		/* Lowest order bit is reserved */
1577 		if (keys[i] & 0x01) {
1578 			r = -EINVAL;
1579 			break;
1580 		}
1581 
1582 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1583 		if (r)
1584 			break;
1585 	}
1586 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1587 	up_read(&current->mm->mmap_sem);
1588 out:
1589 	kvfree(keys);
1590 	return r;
1591 }
1592 
1593 /*
1594  * Base address and length must be sent at the start of each block, therefore
1595  * it's cheaper to send some clean data, as long as it's less than the size of
1596  * two longs.
1597  */
1598 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1599 /* for consistency */
1600 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1601 
1602 /*
1603  * This function searches for the next page with dirty CMMA attributes, and
1604  * saves the attributes in the buffer up to either the end of the buffer or
1605  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1606  * no trailing clean bytes are saved.
1607  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1608  * output buffer will indicate 0 as length.
1609  */
1610 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1611 				  struct kvm_s390_cmma_log *args)
1612 {
1613 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1614 	unsigned long bufsize, hva, pgstev, i, next, cur;
1615 	int srcu_idx, peek, r = 0, rr;
1616 	u8 *res;
1617 
1618 	cur = args->start_gfn;
1619 	i = next = pgstev = 0;
1620 
1621 	if (unlikely(!kvm->arch.use_cmma))
1622 		return -ENXIO;
1623 	/* Invalid/unsupported flags were specified */
1624 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1625 		return -EINVAL;
1626 	/* Migration mode query, and we are not doing a migration */
1627 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1628 	if (!peek && !s)
1629 		return -EINVAL;
1630 	/* CMMA is disabled or was not used, or the buffer has length zero */
1631 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1632 	if (!bufsize || !kvm->mm->context.uses_cmm) {
1633 		memset(args, 0, sizeof(*args));
1634 		return 0;
1635 	}
1636 
1637 	if (!peek) {
1638 		/* We are not peeking, and there are no dirty pages */
1639 		if (!atomic64_read(&s->dirty_pages)) {
1640 			memset(args, 0, sizeof(*args));
1641 			return 0;
1642 		}
1643 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1644 				    args->start_gfn);
1645 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1646 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1647 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1648 			memset(args, 0, sizeof(*args));
1649 			return 0;
1650 		}
1651 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1652 	}
1653 
1654 	res = vmalloc(bufsize);
1655 	if (!res)
1656 		return -ENOMEM;
1657 
1658 	args->start_gfn = cur;
1659 
1660 	down_read(&kvm->mm->mmap_sem);
1661 	srcu_idx = srcu_read_lock(&kvm->srcu);
1662 	while (i < bufsize) {
1663 		hva = gfn_to_hva(kvm, cur);
1664 		if (kvm_is_error_hva(hva)) {
1665 			r = -EFAULT;
1666 			break;
1667 		}
1668 		/* decrement only if we actually flipped the bit to 0 */
1669 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1670 			atomic64_dec(&s->dirty_pages);
1671 		r = get_pgste(kvm->mm, hva, &pgstev);
1672 		if (r < 0)
1673 			pgstev = 0;
1674 		/* save the value */
1675 		res[i++] = (pgstev >> 24) & 0x43;
1676 		/*
1677 		 * if the next bit is too far away, stop.
1678 		 * if we reached the previous "next", find the next one
1679 		 */
1680 		if (!peek) {
1681 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1682 				break;
1683 			if (cur == next)
1684 				next = find_next_bit(s->pgste_bitmap,
1685 						     s->bitmap_size, cur + 1);
1686 		/* reached the end of the bitmap or of the buffer, stop */
1687 			if ((next >= s->bitmap_size) ||
1688 			    (next >= args->start_gfn + bufsize))
1689 				break;
1690 		}
1691 		cur++;
1692 	}
1693 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1694 	up_read(&kvm->mm->mmap_sem);
1695 	args->count = i;
1696 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1697 
1698 	rr = copy_to_user((void __user *)args->values, res, args->count);
1699 	if (rr)
1700 		r = -EFAULT;
1701 
1702 	vfree(res);
1703 	return r;
1704 }
1705 
1706 /*
1707  * This function sets the CMMA attributes for the given pages. If the input
1708  * buffer has zero length, no action is taken, otherwise the attributes are
1709  * set and the mm->context.uses_cmm flag is set.
1710  */
1711 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1712 				  const struct kvm_s390_cmma_log *args)
1713 {
1714 	unsigned long hva, mask, pgstev, i;
1715 	uint8_t *bits;
1716 	int srcu_idx, r = 0;
1717 
1718 	mask = args->mask;
1719 
1720 	if (!kvm->arch.use_cmma)
1721 		return -ENXIO;
1722 	/* invalid/unsupported flags */
1723 	if (args->flags != 0)
1724 		return -EINVAL;
1725 	/* Enforce sane limit on memory allocation */
1726 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1727 		return -EINVAL;
1728 	/* Nothing to do */
1729 	if (args->count == 0)
1730 		return 0;
1731 
1732 	bits = vmalloc(array_size(sizeof(*bits), args->count));
1733 	if (!bits)
1734 		return -ENOMEM;
1735 
1736 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1737 	if (r) {
1738 		r = -EFAULT;
1739 		goto out;
1740 	}
1741 
1742 	down_read(&kvm->mm->mmap_sem);
1743 	srcu_idx = srcu_read_lock(&kvm->srcu);
1744 	for (i = 0; i < args->count; i++) {
1745 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1746 		if (kvm_is_error_hva(hva)) {
1747 			r = -EFAULT;
1748 			break;
1749 		}
1750 
1751 		pgstev = bits[i];
1752 		pgstev = pgstev << 24;
1753 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1754 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1755 	}
1756 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1757 	up_read(&kvm->mm->mmap_sem);
1758 
1759 	if (!kvm->mm->context.uses_cmm) {
1760 		down_write(&kvm->mm->mmap_sem);
1761 		kvm->mm->context.uses_cmm = 1;
1762 		up_write(&kvm->mm->mmap_sem);
1763 	}
1764 out:
1765 	vfree(bits);
1766 	return r;
1767 }
1768 
1769 long kvm_arch_vm_ioctl(struct file *filp,
1770 		       unsigned int ioctl, unsigned long arg)
1771 {
1772 	struct kvm *kvm = filp->private_data;
1773 	void __user *argp = (void __user *)arg;
1774 	struct kvm_device_attr attr;
1775 	int r;
1776 
1777 	switch (ioctl) {
1778 	case KVM_S390_INTERRUPT: {
1779 		struct kvm_s390_interrupt s390int;
1780 
1781 		r = -EFAULT;
1782 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1783 			break;
1784 		r = kvm_s390_inject_vm(kvm, &s390int);
1785 		break;
1786 	}
1787 	case KVM_ENABLE_CAP: {
1788 		struct kvm_enable_cap cap;
1789 		r = -EFAULT;
1790 		if (copy_from_user(&cap, argp, sizeof(cap)))
1791 			break;
1792 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1793 		break;
1794 	}
1795 	case KVM_CREATE_IRQCHIP: {
1796 		struct kvm_irq_routing_entry routing;
1797 
1798 		r = -EINVAL;
1799 		if (kvm->arch.use_irqchip) {
1800 			/* Set up dummy routing. */
1801 			memset(&routing, 0, sizeof(routing));
1802 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1803 		}
1804 		break;
1805 	}
1806 	case KVM_SET_DEVICE_ATTR: {
1807 		r = -EFAULT;
1808 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1809 			break;
1810 		r = kvm_s390_vm_set_attr(kvm, &attr);
1811 		break;
1812 	}
1813 	case KVM_GET_DEVICE_ATTR: {
1814 		r = -EFAULT;
1815 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1816 			break;
1817 		r = kvm_s390_vm_get_attr(kvm, &attr);
1818 		break;
1819 	}
1820 	case KVM_HAS_DEVICE_ATTR: {
1821 		r = -EFAULT;
1822 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1823 			break;
1824 		r = kvm_s390_vm_has_attr(kvm, &attr);
1825 		break;
1826 	}
1827 	case KVM_S390_GET_SKEYS: {
1828 		struct kvm_s390_skeys args;
1829 
1830 		r = -EFAULT;
1831 		if (copy_from_user(&args, argp,
1832 				   sizeof(struct kvm_s390_skeys)))
1833 			break;
1834 		r = kvm_s390_get_skeys(kvm, &args);
1835 		break;
1836 	}
1837 	case KVM_S390_SET_SKEYS: {
1838 		struct kvm_s390_skeys args;
1839 
1840 		r = -EFAULT;
1841 		if (copy_from_user(&args, argp,
1842 				   sizeof(struct kvm_s390_skeys)))
1843 			break;
1844 		r = kvm_s390_set_skeys(kvm, &args);
1845 		break;
1846 	}
1847 	case KVM_S390_GET_CMMA_BITS: {
1848 		struct kvm_s390_cmma_log args;
1849 
1850 		r = -EFAULT;
1851 		if (copy_from_user(&args, argp, sizeof(args)))
1852 			break;
1853 		mutex_lock(&kvm->slots_lock);
1854 		r = kvm_s390_get_cmma_bits(kvm, &args);
1855 		mutex_unlock(&kvm->slots_lock);
1856 		if (!r) {
1857 			r = copy_to_user(argp, &args, sizeof(args));
1858 			if (r)
1859 				r = -EFAULT;
1860 		}
1861 		break;
1862 	}
1863 	case KVM_S390_SET_CMMA_BITS: {
1864 		struct kvm_s390_cmma_log args;
1865 
1866 		r = -EFAULT;
1867 		if (copy_from_user(&args, argp, sizeof(args)))
1868 			break;
1869 		mutex_lock(&kvm->slots_lock);
1870 		r = kvm_s390_set_cmma_bits(kvm, &args);
1871 		mutex_unlock(&kvm->slots_lock);
1872 		break;
1873 	}
1874 	default:
1875 		r = -ENOTTY;
1876 	}
1877 
1878 	return r;
1879 }
1880 
1881 static int kvm_s390_query_ap_config(u8 *config)
1882 {
1883 	u32 fcn_code = 0x04000000UL;
1884 	u32 cc = 0;
1885 
1886 	memset(config, 0, 128);
1887 	asm volatile(
1888 		"lgr 0,%1\n"
1889 		"lgr 2,%2\n"
1890 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1891 		"0: ipm %0\n"
1892 		"srl %0,28\n"
1893 		"1:\n"
1894 		EX_TABLE(0b, 1b)
1895 		: "+r" (cc)
1896 		: "r" (fcn_code), "r" (config)
1897 		: "cc", "0", "2", "memory"
1898 	);
1899 
1900 	return cc;
1901 }
1902 
1903 static int kvm_s390_apxa_installed(void)
1904 {
1905 	u8 config[128];
1906 	int cc;
1907 
1908 	if (test_facility(12)) {
1909 		cc = kvm_s390_query_ap_config(config);
1910 
1911 		if (cc)
1912 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1913 		else
1914 			return config[0] & 0x40;
1915 	}
1916 
1917 	return 0;
1918 }
1919 
1920 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1921 {
1922 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1923 
1924 	if (kvm_s390_apxa_installed())
1925 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1926 	else
1927 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1928 }
1929 
1930 static u64 kvm_s390_get_initial_cpuid(void)
1931 {
1932 	struct cpuid cpuid;
1933 
1934 	get_cpu_id(&cpuid);
1935 	cpuid.version = 0xff;
1936 	return *((u64 *) &cpuid);
1937 }
1938 
1939 static void kvm_s390_crypto_init(struct kvm *kvm)
1940 {
1941 	if (!test_kvm_facility(kvm, 76))
1942 		return;
1943 
1944 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1945 	kvm_s390_set_crycb_format(kvm);
1946 
1947 	/* Enable AES/DEA protected key functions by default */
1948 	kvm->arch.crypto.aes_kw = 1;
1949 	kvm->arch.crypto.dea_kw = 1;
1950 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1951 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1952 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1953 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1954 }
1955 
1956 static void sca_dispose(struct kvm *kvm)
1957 {
1958 	if (kvm->arch.use_esca)
1959 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1960 	else
1961 		free_page((unsigned long)(kvm->arch.sca));
1962 	kvm->arch.sca = NULL;
1963 }
1964 
1965 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1966 {
1967 	gfp_t alloc_flags = GFP_KERNEL;
1968 	int i, rc;
1969 	char debug_name[16];
1970 	static unsigned long sca_offset;
1971 
1972 	rc = -EINVAL;
1973 #ifdef CONFIG_KVM_S390_UCONTROL
1974 	if (type & ~KVM_VM_S390_UCONTROL)
1975 		goto out_err;
1976 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1977 		goto out_err;
1978 #else
1979 	if (type)
1980 		goto out_err;
1981 #endif
1982 
1983 	rc = s390_enable_sie();
1984 	if (rc)
1985 		goto out_err;
1986 
1987 	rc = -ENOMEM;
1988 
1989 	if (!sclp.has_64bscao)
1990 		alloc_flags |= GFP_DMA;
1991 	rwlock_init(&kvm->arch.sca_lock);
1992 	/* start with basic SCA */
1993 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1994 	if (!kvm->arch.sca)
1995 		goto out_err;
1996 	spin_lock(&kvm_lock);
1997 	sca_offset += 16;
1998 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1999 		sca_offset = 0;
2000 	kvm->arch.sca = (struct bsca_block *)
2001 			((char *) kvm->arch.sca + sca_offset);
2002 	spin_unlock(&kvm_lock);
2003 
2004 	sprintf(debug_name, "kvm-%u", current->pid);
2005 
2006 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2007 	if (!kvm->arch.dbf)
2008 		goto out_err;
2009 
2010 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2011 	kvm->arch.sie_page2 =
2012 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2013 	if (!kvm->arch.sie_page2)
2014 		goto out_err;
2015 
2016 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2017 
2018 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2019 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2020 					      (kvm_s390_fac_base[i] |
2021 					       kvm_s390_fac_ext[i]);
2022 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2023 					      kvm_s390_fac_base[i];
2024 	}
2025 
2026 	/* we are always in czam mode - even on pre z14 machines */
2027 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2028 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2029 	/* we emulate STHYI in kvm */
2030 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2031 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2032 	if (MACHINE_HAS_TLB_GUEST) {
2033 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2034 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2035 	}
2036 
2037 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2038 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2039 
2040 	kvm_s390_crypto_init(kvm);
2041 
2042 	mutex_init(&kvm->arch.float_int.ais_lock);
2043 	spin_lock_init(&kvm->arch.float_int.lock);
2044 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2045 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2046 	init_waitqueue_head(&kvm->arch.ipte_wq);
2047 	mutex_init(&kvm->arch.ipte_mutex);
2048 
2049 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2050 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2051 
2052 	if (type & KVM_VM_S390_UCONTROL) {
2053 		kvm->arch.gmap = NULL;
2054 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2055 	} else {
2056 		if (sclp.hamax == U64_MAX)
2057 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2058 		else
2059 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2060 						    sclp.hamax + 1);
2061 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2062 		if (!kvm->arch.gmap)
2063 			goto out_err;
2064 		kvm->arch.gmap->private = kvm;
2065 		kvm->arch.gmap->pfault_enabled = 0;
2066 	}
2067 
2068 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2069 	kvm->arch.use_skf = sclp.has_skey;
2070 	spin_lock_init(&kvm->arch.start_stop_lock);
2071 	kvm_s390_vsie_init(kvm);
2072 	kvm_s390_gisa_init(kvm);
2073 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2074 
2075 	return 0;
2076 out_err:
2077 	free_page((unsigned long)kvm->arch.sie_page2);
2078 	debug_unregister(kvm->arch.dbf);
2079 	sca_dispose(kvm);
2080 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2081 	return rc;
2082 }
2083 
2084 bool kvm_arch_has_vcpu_debugfs(void)
2085 {
2086 	return false;
2087 }
2088 
2089 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2090 {
2091 	return 0;
2092 }
2093 
2094 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2095 {
2096 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2097 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2098 	kvm_s390_clear_local_irqs(vcpu);
2099 	kvm_clear_async_pf_completion_queue(vcpu);
2100 	if (!kvm_is_ucontrol(vcpu->kvm))
2101 		sca_del_vcpu(vcpu);
2102 
2103 	if (kvm_is_ucontrol(vcpu->kvm))
2104 		gmap_remove(vcpu->arch.gmap);
2105 
2106 	if (vcpu->kvm->arch.use_cmma)
2107 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2108 	free_page((unsigned long)(vcpu->arch.sie_block));
2109 
2110 	kvm_vcpu_uninit(vcpu);
2111 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2112 }
2113 
2114 static void kvm_free_vcpus(struct kvm *kvm)
2115 {
2116 	unsigned int i;
2117 	struct kvm_vcpu *vcpu;
2118 
2119 	kvm_for_each_vcpu(i, vcpu, kvm)
2120 		kvm_arch_vcpu_destroy(vcpu);
2121 
2122 	mutex_lock(&kvm->lock);
2123 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2124 		kvm->vcpus[i] = NULL;
2125 
2126 	atomic_set(&kvm->online_vcpus, 0);
2127 	mutex_unlock(&kvm->lock);
2128 }
2129 
2130 void kvm_arch_destroy_vm(struct kvm *kvm)
2131 {
2132 	kvm_free_vcpus(kvm);
2133 	sca_dispose(kvm);
2134 	debug_unregister(kvm->arch.dbf);
2135 	kvm_s390_gisa_destroy(kvm);
2136 	free_page((unsigned long)kvm->arch.sie_page2);
2137 	if (!kvm_is_ucontrol(kvm))
2138 		gmap_remove(kvm->arch.gmap);
2139 	kvm_s390_destroy_adapters(kvm);
2140 	kvm_s390_clear_float_irqs(kvm);
2141 	kvm_s390_vsie_destroy(kvm);
2142 	if (kvm->arch.migration_state) {
2143 		vfree(kvm->arch.migration_state->pgste_bitmap);
2144 		kfree(kvm->arch.migration_state);
2145 	}
2146 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2147 }
2148 
2149 /* Section: vcpu related */
2150 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2151 {
2152 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2153 	if (!vcpu->arch.gmap)
2154 		return -ENOMEM;
2155 	vcpu->arch.gmap->private = vcpu->kvm;
2156 
2157 	return 0;
2158 }
2159 
2160 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2161 {
2162 	if (!kvm_s390_use_sca_entries())
2163 		return;
2164 	read_lock(&vcpu->kvm->arch.sca_lock);
2165 	if (vcpu->kvm->arch.use_esca) {
2166 		struct esca_block *sca = vcpu->kvm->arch.sca;
2167 
2168 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2169 		sca->cpu[vcpu->vcpu_id].sda = 0;
2170 	} else {
2171 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2172 
2173 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2174 		sca->cpu[vcpu->vcpu_id].sda = 0;
2175 	}
2176 	read_unlock(&vcpu->kvm->arch.sca_lock);
2177 }
2178 
2179 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2180 {
2181 	if (!kvm_s390_use_sca_entries()) {
2182 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2183 
2184 		/* we still need the basic sca for the ipte control */
2185 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2186 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2187 		return;
2188 	}
2189 	read_lock(&vcpu->kvm->arch.sca_lock);
2190 	if (vcpu->kvm->arch.use_esca) {
2191 		struct esca_block *sca = vcpu->kvm->arch.sca;
2192 
2193 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2194 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2195 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2196 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2197 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2198 	} else {
2199 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2200 
2201 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2202 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2203 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2204 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2205 	}
2206 	read_unlock(&vcpu->kvm->arch.sca_lock);
2207 }
2208 
2209 /* Basic SCA to Extended SCA data copy routines */
2210 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2211 {
2212 	d->sda = s->sda;
2213 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2214 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2215 }
2216 
2217 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2218 {
2219 	int i;
2220 
2221 	d->ipte_control = s->ipte_control;
2222 	d->mcn[0] = s->mcn;
2223 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2224 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2225 }
2226 
2227 static int sca_switch_to_extended(struct kvm *kvm)
2228 {
2229 	struct bsca_block *old_sca = kvm->arch.sca;
2230 	struct esca_block *new_sca;
2231 	struct kvm_vcpu *vcpu;
2232 	unsigned int vcpu_idx;
2233 	u32 scaol, scaoh;
2234 
2235 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2236 	if (!new_sca)
2237 		return -ENOMEM;
2238 
2239 	scaoh = (u32)((u64)(new_sca) >> 32);
2240 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2241 
2242 	kvm_s390_vcpu_block_all(kvm);
2243 	write_lock(&kvm->arch.sca_lock);
2244 
2245 	sca_copy_b_to_e(new_sca, old_sca);
2246 
2247 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2248 		vcpu->arch.sie_block->scaoh = scaoh;
2249 		vcpu->arch.sie_block->scaol = scaol;
2250 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2251 	}
2252 	kvm->arch.sca = new_sca;
2253 	kvm->arch.use_esca = 1;
2254 
2255 	write_unlock(&kvm->arch.sca_lock);
2256 	kvm_s390_vcpu_unblock_all(kvm);
2257 
2258 	free_page((unsigned long)old_sca);
2259 
2260 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2261 		 old_sca, kvm->arch.sca);
2262 	return 0;
2263 }
2264 
2265 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2266 {
2267 	int rc;
2268 
2269 	if (!kvm_s390_use_sca_entries()) {
2270 		if (id < KVM_MAX_VCPUS)
2271 			return true;
2272 		return false;
2273 	}
2274 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2275 		return true;
2276 	if (!sclp.has_esca || !sclp.has_64bscao)
2277 		return false;
2278 
2279 	mutex_lock(&kvm->lock);
2280 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2281 	mutex_unlock(&kvm->lock);
2282 
2283 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2284 }
2285 
2286 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2287 {
2288 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2289 	kvm_clear_async_pf_completion_queue(vcpu);
2290 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2291 				    KVM_SYNC_GPRS |
2292 				    KVM_SYNC_ACRS |
2293 				    KVM_SYNC_CRS |
2294 				    KVM_SYNC_ARCH0 |
2295 				    KVM_SYNC_PFAULT;
2296 	kvm_s390_set_prefix(vcpu, 0);
2297 	if (test_kvm_facility(vcpu->kvm, 64))
2298 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2299 	if (test_kvm_facility(vcpu->kvm, 82))
2300 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2301 	if (test_kvm_facility(vcpu->kvm, 133))
2302 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2303 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2304 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2305 	 */
2306 	if (MACHINE_HAS_VX)
2307 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2308 	else
2309 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2310 
2311 	if (kvm_is_ucontrol(vcpu->kvm))
2312 		return __kvm_ucontrol_vcpu_init(vcpu);
2313 
2314 	return 0;
2315 }
2316 
2317 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2318 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2319 {
2320 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2321 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2322 	vcpu->arch.cputm_start = get_tod_clock_fast();
2323 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2324 }
2325 
2326 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2327 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2328 {
2329 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2330 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2331 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2332 	vcpu->arch.cputm_start = 0;
2333 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2334 }
2335 
2336 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2337 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2338 {
2339 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2340 	vcpu->arch.cputm_enabled = true;
2341 	__start_cpu_timer_accounting(vcpu);
2342 }
2343 
2344 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2345 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2346 {
2347 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2348 	__stop_cpu_timer_accounting(vcpu);
2349 	vcpu->arch.cputm_enabled = false;
2350 }
2351 
2352 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2353 {
2354 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2355 	__enable_cpu_timer_accounting(vcpu);
2356 	preempt_enable();
2357 }
2358 
2359 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2360 {
2361 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2362 	__disable_cpu_timer_accounting(vcpu);
2363 	preempt_enable();
2364 }
2365 
2366 /* set the cpu timer - may only be called from the VCPU thread itself */
2367 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2368 {
2369 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2370 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2371 	if (vcpu->arch.cputm_enabled)
2372 		vcpu->arch.cputm_start = get_tod_clock_fast();
2373 	vcpu->arch.sie_block->cputm = cputm;
2374 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2375 	preempt_enable();
2376 }
2377 
2378 /* update and get the cpu timer - can also be called from other VCPU threads */
2379 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2380 {
2381 	unsigned int seq;
2382 	__u64 value;
2383 
2384 	if (unlikely(!vcpu->arch.cputm_enabled))
2385 		return vcpu->arch.sie_block->cputm;
2386 
2387 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2388 	do {
2389 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2390 		/*
2391 		 * If the writer would ever execute a read in the critical
2392 		 * section, e.g. in irq context, we have a deadlock.
2393 		 */
2394 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2395 		value = vcpu->arch.sie_block->cputm;
2396 		/* if cputm_start is 0, accounting is being started/stopped */
2397 		if (likely(vcpu->arch.cputm_start))
2398 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2399 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2400 	preempt_enable();
2401 	return value;
2402 }
2403 
2404 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2405 {
2406 
2407 	gmap_enable(vcpu->arch.enabled_gmap);
2408 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2409 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2410 		__start_cpu_timer_accounting(vcpu);
2411 	vcpu->cpu = cpu;
2412 }
2413 
2414 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2415 {
2416 	vcpu->cpu = -1;
2417 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2418 		__stop_cpu_timer_accounting(vcpu);
2419 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2420 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2421 	gmap_disable(vcpu->arch.enabled_gmap);
2422 
2423 }
2424 
2425 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2426 {
2427 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2428 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2429 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2430 	kvm_s390_set_prefix(vcpu, 0);
2431 	kvm_s390_set_cpu_timer(vcpu, 0);
2432 	vcpu->arch.sie_block->ckc       = 0UL;
2433 	vcpu->arch.sie_block->todpr     = 0;
2434 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2435 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2436 					CR0_INTERRUPT_KEY_SUBMASK |
2437 					CR0_MEASUREMENT_ALERT_SUBMASK;
2438 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2439 					CR14_UNUSED_33 |
2440 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2441 	/* make sure the new fpc will be lazily loaded */
2442 	save_fpu_regs();
2443 	current->thread.fpu.fpc = 0;
2444 	vcpu->arch.sie_block->gbea = 1;
2445 	vcpu->arch.sie_block->pp = 0;
2446 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2447 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2448 	kvm_clear_async_pf_completion_queue(vcpu);
2449 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2450 		kvm_s390_vcpu_stop(vcpu);
2451 	kvm_s390_clear_local_irqs(vcpu);
2452 }
2453 
2454 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2455 {
2456 	mutex_lock(&vcpu->kvm->lock);
2457 	preempt_disable();
2458 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2459 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2460 	preempt_enable();
2461 	mutex_unlock(&vcpu->kvm->lock);
2462 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2463 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2464 		sca_add_vcpu(vcpu);
2465 	}
2466 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2467 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2468 	/* make vcpu_load load the right gmap on the first trigger */
2469 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2470 }
2471 
2472 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2473 {
2474 	if (!test_kvm_facility(vcpu->kvm, 76))
2475 		return;
2476 
2477 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2478 
2479 	if (vcpu->kvm->arch.crypto.aes_kw)
2480 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2481 	if (vcpu->kvm->arch.crypto.dea_kw)
2482 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2483 
2484 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2485 }
2486 
2487 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2488 {
2489 	free_page(vcpu->arch.sie_block->cbrlo);
2490 	vcpu->arch.sie_block->cbrlo = 0;
2491 }
2492 
2493 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2494 {
2495 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2496 	if (!vcpu->arch.sie_block->cbrlo)
2497 		return -ENOMEM;
2498 	return 0;
2499 }
2500 
2501 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2502 {
2503 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2504 
2505 	vcpu->arch.sie_block->ibc = model->ibc;
2506 	if (test_kvm_facility(vcpu->kvm, 7))
2507 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2508 }
2509 
2510 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2511 {
2512 	int rc = 0;
2513 
2514 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2515 						    CPUSTAT_SM |
2516 						    CPUSTAT_STOPPED);
2517 
2518 	if (test_kvm_facility(vcpu->kvm, 78))
2519 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2520 	else if (test_kvm_facility(vcpu->kvm, 8))
2521 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2522 
2523 	kvm_s390_vcpu_setup_model(vcpu);
2524 
2525 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2526 	if (MACHINE_HAS_ESOP)
2527 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2528 	if (test_kvm_facility(vcpu->kvm, 9))
2529 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2530 	if (test_kvm_facility(vcpu->kvm, 73))
2531 		vcpu->arch.sie_block->ecb |= ECB_TE;
2532 
2533 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2534 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2535 	if (test_kvm_facility(vcpu->kvm, 130))
2536 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2537 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2538 	if (sclp.has_cei)
2539 		vcpu->arch.sie_block->eca |= ECA_CEI;
2540 	if (sclp.has_ib)
2541 		vcpu->arch.sie_block->eca |= ECA_IB;
2542 	if (sclp.has_siif)
2543 		vcpu->arch.sie_block->eca |= ECA_SII;
2544 	if (sclp.has_sigpif)
2545 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2546 	if (test_kvm_facility(vcpu->kvm, 129)) {
2547 		vcpu->arch.sie_block->eca |= ECA_VX;
2548 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2549 	}
2550 	if (test_kvm_facility(vcpu->kvm, 139))
2551 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2552 
2553 	if (vcpu->arch.sie_block->gd) {
2554 		vcpu->arch.sie_block->eca |= ECA_AIV;
2555 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2556 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2557 	}
2558 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2559 					| SDNXC;
2560 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2561 
2562 	if (sclp.has_kss)
2563 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2564 	else
2565 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2566 
2567 	if (vcpu->kvm->arch.use_cmma) {
2568 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2569 		if (rc)
2570 			return rc;
2571 	}
2572 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2573 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2574 
2575 	kvm_s390_vcpu_crypto_setup(vcpu);
2576 
2577 	return rc;
2578 }
2579 
2580 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2581 				      unsigned int id)
2582 {
2583 	struct kvm_vcpu *vcpu;
2584 	struct sie_page *sie_page;
2585 	int rc = -EINVAL;
2586 
2587 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2588 		goto out;
2589 
2590 	rc = -ENOMEM;
2591 
2592 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2593 	if (!vcpu)
2594 		goto out;
2595 
2596 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2597 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2598 	if (!sie_page)
2599 		goto out_free_cpu;
2600 
2601 	vcpu->arch.sie_block = &sie_page->sie_block;
2602 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2603 
2604 	/* the real guest size will always be smaller than msl */
2605 	vcpu->arch.sie_block->mso = 0;
2606 	vcpu->arch.sie_block->msl = sclp.hamax;
2607 
2608 	vcpu->arch.sie_block->icpua = id;
2609 	spin_lock_init(&vcpu->arch.local_int.lock);
2610 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2611 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2612 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2613 	seqcount_init(&vcpu->arch.cputm_seqcount);
2614 
2615 	rc = kvm_vcpu_init(vcpu, kvm, id);
2616 	if (rc)
2617 		goto out_free_sie_block;
2618 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2619 		 vcpu->arch.sie_block);
2620 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2621 
2622 	return vcpu;
2623 out_free_sie_block:
2624 	free_page((unsigned long)(vcpu->arch.sie_block));
2625 out_free_cpu:
2626 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2627 out:
2628 	return ERR_PTR(rc);
2629 }
2630 
2631 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2632 {
2633 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2634 }
2635 
2636 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2637 {
2638 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2639 }
2640 
2641 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2642 {
2643 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2644 	exit_sie(vcpu);
2645 }
2646 
2647 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2648 {
2649 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2650 }
2651 
2652 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2653 {
2654 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2655 	exit_sie(vcpu);
2656 }
2657 
2658 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2659 {
2660 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2661 }
2662 
2663 /*
2664  * Kick a guest cpu out of SIE and wait until SIE is not running.
2665  * If the CPU is not running (e.g. waiting as idle) the function will
2666  * return immediately. */
2667 void exit_sie(struct kvm_vcpu *vcpu)
2668 {
2669 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2670 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2671 		cpu_relax();
2672 }
2673 
2674 /* Kick a guest cpu out of SIE to process a request synchronously */
2675 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2676 {
2677 	kvm_make_request(req, vcpu);
2678 	kvm_s390_vcpu_request(vcpu);
2679 }
2680 
2681 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2682 			      unsigned long end)
2683 {
2684 	struct kvm *kvm = gmap->private;
2685 	struct kvm_vcpu *vcpu;
2686 	unsigned long prefix;
2687 	int i;
2688 
2689 	if (gmap_is_shadow(gmap))
2690 		return;
2691 	if (start >= 1UL << 31)
2692 		/* We are only interested in prefix pages */
2693 		return;
2694 	kvm_for_each_vcpu(i, vcpu, kvm) {
2695 		/* match against both prefix pages */
2696 		prefix = kvm_s390_get_prefix(vcpu);
2697 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2698 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2699 				   start, end);
2700 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2701 		}
2702 	}
2703 }
2704 
2705 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2706 {
2707 	/* kvm common code refers to this, but never calls it */
2708 	BUG();
2709 	return 0;
2710 }
2711 
2712 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2713 					   struct kvm_one_reg *reg)
2714 {
2715 	int r = -EINVAL;
2716 
2717 	switch (reg->id) {
2718 	case KVM_REG_S390_TODPR:
2719 		r = put_user(vcpu->arch.sie_block->todpr,
2720 			     (u32 __user *)reg->addr);
2721 		break;
2722 	case KVM_REG_S390_EPOCHDIFF:
2723 		r = put_user(vcpu->arch.sie_block->epoch,
2724 			     (u64 __user *)reg->addr);
2725 		break;
2726 	case KVM_REG_S390_CPU_TIMER:
2727 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2728 			     (u64 __user *)reg->addr);
2729 		break;
2730 	case KVM_REG_S390_CLOCK_COMP:
2731 		r = put_user(vcpu->arch.sie_block->ckc,
2732 			     (u64 __user *)reg->addr);
2733 		break;
2734 	case KVM_REG_S390_PFTOKEN:
2735 		r = put_user(vcpu->arch.pfault_token,
2736 			     (u64 __user *)reg->addr);
2737 		break;
2738 	case KVM_REG_S390_PFCOMPARE:
2739 		r = put_user(vcpu->arch.pfault_compare,
2740 			     (u64 __user *)reg->addr);
2741 		break;
2742 	case KVM_REG_S390_PFSELECT:
2743 		r = put_user(vcpu->arch.pfault_select,
2744 			     (u64 __user *)reg->addr);
2745 		break;
2746 	case KVM_REG_S390_PP:
2747 		r = put_user(vcpu->arch.sie_block->pp,
2748 			     (u64 __user *)reg->addr);
2749 		break;
2750 	case KVM_REG_S390_GBEA:
2751 		r = put_user(vcpu->arch.sie_block->gbea,
2752 			     (u64 __user *)reg->addr);
2753 		break;
2754 	default:
2755 		break;
2756 	}
2757 
2758 	return r;
2759 }
2760 
2761 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2762 					   struct kvm_one_reg *reg)
2763 {
2764 	int r = -EINVAL;
2765 	__u64 val;
2766 
2767 	switch (reg->id) {
2768 	case KVM_REG_S390_TODPR:
2769 		r = get_user(vcpu->arch.sie_block->todpr,
2770 			     (u32 __user *)reg->addr);
2771 		break;
2772 	case KVM_REG_S390_EPOCHDIFF:
2773 		r = get_user(vcpu->arch.sie_block->epoch,
2774 			     (u64 __user *)reg->addr);
2775 		break;
2776 	case KVM_REG_S390_CPU_TIMER:
2777 		r = get_user(val, (u64 __user *)reg->addr);
2778 		if (!r)
2779 			kvm_s390_set_cpu_timer(vcpu, val);
2780 		break;
2781 	case KVM_REG_S390_CLOCK_COMP:
2782 		r = get_user(vcpu->arch.sie_block->ckc,
2783 			     (u64 __user *)reg->addr);
2784 		break;
2785 	case KVM_REG_S390_PFTOKEN:
2786 		r = get_user(vcpu->arch.pfault_token,
2787 			     (u64 __user *)reg->addr);
2788 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2789 			kvm_clear_async_pf_completion_queue(vcpu);
2790 		break;
2791 	case KVM_REG_S390_PFCOMPARE:
2792 		r = get_user(vcpu->arch.pfault_compare,
2793 			     (u64 __user *)reg->addr);
2794 		break;
2795 	case KVM_REG_S390_PFSELECT:
2796 		r = get_user(vcpu->arch.pfault_select,
2797 			     (u64 __user *)reg->addr);
2798 		break;
2799 	case KVM_REG_S390_PP:
2800 		r = get_user(vcpu->arch.sie_block->pp,
2801 			     (u64 __user *)reg->addr);
2802 		break;
2803 	case KVM_REG_S390_GBEA:
2804 		r = get_user(vcpu->arch.sie_block->gbea,
2805 			     (u64 __user *)reg->addr);
2806 		break;
2807 	default:
2808 		break;
2809 	}
2810 
2811 	return r;
2812 }
2813 
2814 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2815 {
2816 	kvm_s390_vcpu_initial_reset(vcpu);
2817 	return 0;
2818 }
2819 
2820 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2821 {
2822 	vcpu_load(vcpu);
2823 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2824 	vcpu_put(vcpu);
2825 	return 0;
2826 }
2827 
2828 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2829 {
2830 	vcpu_load(vcpu);
2831 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2832 	vcpu_put(vcpu);
2833 	return 0;
2834 }
2835 
2836 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2837 				  struct kvm_sregs *sregs)
2838 {
2839 	vcpu_load(vcpu);
2840 
2841 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2842 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2843 
2844 	vcpu_put(vcpu);
2845 	return 0;
2846 }
2847 
2848 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2849 				  struct kvm_sregs *sregs)
2850 {
2851 	vcpu_load(vcpu);
2852 
2853 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2854 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2855 
2856 	vcpu_put(vcpu);
2857 	return 0;
2858 }
2859 
2860 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2861 {
2862 	int ret = 0;
2863 
2864 	vcpu_load(vcpu);
2865 
2866 	if (test_fp_ctl(fpu->fpc)) {
2867 		ret = -EINVAL;
2868 		goto out;
2869 	}
2870 	vcpu->run->s.regs.fpc = fpu->fpc;
2871 	if (MACHINE_HAS_VX)
2872 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2873 				 (freg_t *) fpu->fprs);
2874 	else
2875 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2876 
2877 out:
2878 	vcpu_put(vcpu);
2879 	return ret;
2880 }
2881 
2882 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2883 {
2884 	vcpu_load(vcpu);
2885 
2886 	/* make sure we have the latest values */
2887 	save_fpu_regs();
2888 	if (MACHINE_HAS_VX)
2889 		convert_vx_to_fp((freg_t *) fpu->fprs,
2890 				 (__vector128 *) vcpu->run->s.regs.vrs);
2891 	else
2892 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2893 	fpu->fpc = vcpu->run->s.regs.fpc;
2894 
2895 	vcpu_put(vcpu);
2896 	return 0;
2897 }
2898 
2899 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2900 {
2901 	int rc = 0;
2902 
2903 	if (!is_vcpu_stopped(vcpu))
2904 		rc = -EBUSY;
2905 	else {
2906 		vcpu->run->psw_mask = psw.mask;
2907 		vcpu->run->psw_addr = psw.addr;
2908 	}
2909 	return rc;
2910 }
2911 
2912 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2913 				  struct kvm_translation *tr)
2914 {
2915 	return -EINVAL; /* not implemented yet */
2916 }
2917 
2918 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2919 			      KVM_GUESTDBG_USE_HW_BP | \
2920 			      KVM_GUESTDBG_ENABLE)
2921 
2922 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2923 					struct kvm_guest_debug *dbg)
2924 {
2925 	int rc = 0;
2926 
2927 	vcpu_load(vcpu);
2928 
2929 	vcpu->guest_debug = 0;
2930 	kvm_s390_clear_bp_data(vcpu);
2931 
2932 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2933 		rc = -EINVAL;
2934 		goto out;
2935 	}
2936 	if (!sclp.has_gpere) {
2937 		rc = -EINVAL;
2938 		goto out;
2939 	}
2940 
2941 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2942 		vcpu->guest_debug = dbg->control;
2943 		/* enforce guest PER */
2944 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2945 
2946 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2947 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2948 	} else {
2949 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2950 		vcpu->arch.guestdbg.last_bp = 0;
2951 	}
2952 
2953 	if (rc) {
2954 		vcpu->guest_debug = 0;
2955 		kvm_s390_clear_bp_data(vcpu);
2956 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2957 	}
2958 
2959 out:
2960 	vcpu_put(vcpu);
2961 	return rc;
2962 }
2963 
2964 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2965 				    struct kvm_mp_state *mp_state)
2966 {
2967 	int ret;
2968 
2969 	vcpu_load(vcpu);
2970 
2971 	/* CHECK_STOP and LOAD are not supported yet */
2972 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2973 				      KVM_MP_STATE_OPERATING;
2974 
2975 	vcpu_put(vcpu);
2976 	return ret;
2977 }
2978 
2979 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2980 				    struct kvm_mp_state *mp_state)
2981 {
2982 	int rc = 0;
2983 
2984 	vcpu_load(vcpu);
2985 
2986 	/* user space knows about this interface - let it control the state */
2987 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2988 
2989 	switch (mp_state->mp_state) {
2990 	case KVM_MP_STATE_STOPPED:
2991 		kvm_s390_vcpu_stop(vcpu);
2992 		break;
2993 	case KVM_MP_STATE_OPERATING:
2994 		kvm_s390_vcpu_start(vcpu);
2995 		break;
2996 	case KVM_MP_STATE_LOAD:
2997 	case KVM_MP_STATE_CHECK_STOP:
2998 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2999 	default:
3000 		rc = -ENXIO;
3001 	}
3002 
3003 	vcpu_put(vcpu);
3004 	return rc;
3005 }
3006 
3007 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3008 {
3009 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3010 }
3011 
3012 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3013 {
3014 retry:
3015 	kvm_s390_vcpu_request_handled(vcpu);
3016 	if (!kvm_request_pending(vcpu))
3017 		return 0;
3018 	/*
3019 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3020 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3021 	 * This ensures that the ipte instruction for this request has
3022 	 * already finished. We might race against a second unmapper that
3023 	 * wants to set the blocking bit. Lets just retry the request loop.
3024 	 */
3025 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3026 		int rc;
3027 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3028 					  kvm_s390_get_prefix(vcpu),
3029 					  PAGE_SIZE * 2, PROT_WRITE);
3030 		if (rc) {
3031 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3032 			return rc;
3033 		}
3034 		goto retry;
3035 	}
3036 
3037 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3038 		vcpu->arch.sie_block->ihcpu = 0xffff;
3039 		goto retry;
3040 	}
3041 
3042 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3043 		if (!ibs_enabled(vcpu)) {
3044 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3045 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3046 		}
3047 		goto retry;
3048 	}
3049 
3050 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3051 		if (ibs_enabled(vcpu)) {
3052 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3053 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3054 		}
3055 		goto retry;
3056 	}
3057 
3058 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3059 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3060 		goto retry;
3061 	}
3062 
3063 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3064 		/*
3065 		 * Disable CMM virtualization; we will emulate the ESSA
3066 		 * instruction manually, in order to provide additional
3067 		 * functionalities needed for live migration.
3068 		 */
3069 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3070 		goto retry;
3071 	}
3072 
3073 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3074 		/*
3075 		 * Re-enable CMM virtualization if CMMA is available and
3076 		 * CMM has been used.
3077 		 */
3078 		if ((vcpu->kvm->arch.use_cmma) &&
3079 		    (vcpu->kvm->mm->context.uses_cmm))
3080 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3081 		goto retry;
3082 	}
3083 
3084 	/* nothing to do, just clear the request */
3085 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3086 
3087 	return 0;
3088 }
3089 
3090 void kvm_s390_set_tod_clock(struct kvm *kvm,
3091 			    const struct kvm_s390_vm_tod_clock *gtod)
3092 {
3093 	struct kvm_vcpu *vcpu;
3094 	struct kvm_s390_tod_clock_ext htod;
3095 	int i;
3096 
3097 	mutex_lock(&kvm->lock);
3098 	preempt_disable();
3099 
3100 	get_tod_clock_ext((char *)&htod);
3101 
3102 	kvm->arch.epoch = gtod->tod - htod.tod;
3103 	kvm->arch.epdx = 0;
3104 	if (test_kvm_facility(kvm, 139)) {
3105 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3106 		if (kvm->arch.epoch > gtod->tod)
3107 			kvm->arch.epdx -= 1;
3108 	}
3109 
3110 	kvm_s390_vcpu_block_all(kvm);
3111 	kvm_for_each_vcpu(i, vcpu, kvm) {
3112 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3113 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3114 	}
3115 
3116 	kvm_s390_vcpu_unblock_all(kvm);
3117 	preempt_enable();
3118 	mutex_unlock(&kvm->lock);
3119 }
3120 
3121 /**
3122  * kvm_arch_fault_in_page - fault-in guest page if necessary
3123  * @vcpu: The corresponding virtual cpu
3124  * @gpa: Guest physical address
3125  * @writable: Whether the page should be writable or not
3126  *
3127  * Make sure that a guest page has been faulted-in on the host.
3128  *
3129  * Return: Zero on success, negative error code otherwise.
3130  */
3131 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3132 {
3133 	return gmap_fault(vcpu->arch.gmap, gpa,
3134 			  writable ? FAULT_FLAG_WRITE : 0);
3135 }
3136 
3137 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3138 				      unsigned long token)
3139 {
3140 	struct kvm_s390_interrupt inti;
3141 	struct kvm_s390_irq irq;
3142 
3143 	if (start_token) {
3144 		irq.u.ext.ext_params2 = token;
3145 		irq.type = KVM_S390_INT_PFAULT_INIT;
3146 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3147 	} else {
3148 		inti.type = KVM_S390_INT_PFAULT_DONE;
3149 		inti.parm64 = token;
3150 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3151 	}
3152 }
3153 
3154 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3155 				     struct kvm_async_pf *work)
3156 {
3157 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3158 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3159 }
3160 
3161 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3162 				 struct kvm_async_pf *work)
3163 {
3164 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3165 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3166 }
3167 
3168 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3169 			       struct kvm_async_pf *work)
3170 {
3171 	/* s390 will always inject the page directly */
3172 }
3173 
3174 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3175 {
3176 	/*
3177 	 * s390 will always inject the page directly,
3178 	 * but we still want check_async_completion to cleanup
3179 	 */
3180 	return true;
3181 }
3182 
3183 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3184 {
3185 	hva_t hva;
3186 	struct kvm_arch_async_pf arch;
3187 	int rc;
3188 
3189 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3190 		return 0;
3191 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3192 	    vcpu->arch.pfault_compare)
3193 		return 0;
3194 	if (psw_extint_disabled(vcpu))
3195 		return 0;
3196 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3197 		return 0;
3198 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3199 		return 0;
3200 	if (!vcpu->arch.gmap->pfault_enabled)
3201 		return 0;
3202 
3203 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3204 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3205 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3206 		return 0;
3207 
3208 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3209 	return rc;
3210 }
3211 
3212 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3213 {
3214 	int rc, cpuflags;
3215 
3216 	/*
3217 	 * On s390 notifications for arriving pages will be delivered directly
3218 	 * to the guest but the house keeping for completed pfaults is
3219 	 * handled outside the worker.
3220 	 */
3221 	kvm_check_async_pf_completion(vcpu);
3222 
3223 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3224 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3225 
3226 	if (need_resched())
3227 		schedule();
3228 
3229 	if (test_cpu_flag(CIF_MCCK_PENDING))
3230 		s390_handle_mcck();
3231 
3232 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3233 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3234 		if (rc)
3235 			return rc;
3236 	}
3237 
3238 	rc = kvm_s390_handle_requests(vcpu);
3239 	if (rc)
3240 		return rc;
3241 
3242 	if (guestdbg_enabled(vcpu)) {
3243 		kvm_s390_backup_guest_per_regs(vcpu);
3244 		kvm_s390_patch_guest_per_regs(vcpu);
3245 	}
3246 
3247 	vcpu->arch.sie_block->icptcode = 0;
3248 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3249 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3250 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3251 
3252 	return 0;
3253 }
3254 
3255 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3256 {
3257 	struct kvm_s390_pgm_info pgm_info = {
3258 		.code = PGM_ADDRESSING,
3259 	};
3260 	u8 opcode, ilen;
3261 	int rc;
3262 
3263 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3264 	trace_kvm_s390_sie_fault(vcpu);
3265 
3266 	/*
3267 	 * We want to inject an addressing exception, which is defined as a
3268 	 * suppressing or terminating exception. However, since we came here
3269 	 * by a DAT access exception, the PSW still points to the faulting
3270 	 * instruction since DAT exceptions are nullifying. So we've got
3271 	 * to look up the current opcode to get the length of the instruction
3272 	 * to be able to forward the PSW.
3273 	 */
3274 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3275 	ilen = insn_length(opcode);
3276 	if (rc < 0) {
3277 		return rc;
3278 	} else if (rc) {
3279 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3280 		 * Forward by arbitrary ilc, injection will take care of
3281 		 * nullification if necessary.
3282 		 */
3283 		pgm_info = vcpu->arch.pgm;
3284 		ilen = 4;
3285 	}
3286 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3287 	kvm_s390_forward_psw(vcpu, ilen);
3288 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3289 }
3290 
3291 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3292 {
3293 	struct mcck_volatile_info *mcck_info;
3294 	struct sie_page *sie_page;
3295 
3296 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3297 		   vcpu->arch.sie_block->icptcode);
3298 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3299 
3300 	if (guestdbg_enabled(vcpu))
3301 		kvm_s390_restore_guest_per_regs(vcpu);
3302 
3303 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3304 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3305 
3306 	if (exit_reason == -EINTR) {
3307 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3308 		sie_page = container_of(vcpu->arch.sie_block,
3309 					struct sie_page, sie_block);
3310 		mcck_info = &sie_page->mcck_info;
3311 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3312 		return 0;
3313 	}
3314 
3315 	if (vcpu->arch.sie_block->icptcode > 0) {
3316 		int rc = kvm_handle_sie_intercept(vcpu);
3317 
3318 		if (rc != -EOPNOTSUPP)
3319 			return rc;
3320 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3321 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3322 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3323 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3324 		return -EREMOTE;
3325 	} else if (exit_reason != -EFAULT) {
3326 		vcpu->stat.exit_null++;
3327 		return 0;
3328 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3329 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3330 		vcpu->run->s390_ucontrol.trans_exc_code =
3331 						current->thread.gmap_addr;
3332 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3333 		return -EREMOTE;
3334 	} else if (current->thread.gmap_pfault) {
3335 		trace_kvm_s390_major_guest_pfault(vcpu);
3336 		current->thread.gmap_pfault = 0;
3337 		if (kvm_arch_setup_async_pf(vcpu))
3338 			return 0;
3339 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3340 	}
3341 	return vcpu_post_run_fault_in_sie(vcpu);
3342 }
3343 
3344 static int __vcpu_run(struct kvm_vcpu *vcpu)
3345 {
3346 	int rc, exit_reason;
3347 
3348 	/*
3349 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3350 	 * ning the guest), so that memslots (and other stuff) are protected
3351 	 */
3352 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3353 
3354 	do {
3355 		rc = vcpu_pre_run(vcpu);
3356 		if (rc)
3357 			break;
3358 
3359 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3360 		/*
3361 		 * As PF_VCPU will be used in fault handler, between
3362 		 * guest_enter and guest_exit should be no uaccess.
3363 		 */
3364 		local_irq_disable();
3365 		guest_enter_irqoff();
3366 		__disable_cpu_timer_accounting(vcpu);
3367 		local_irq_enable();
3368 		exit_reason = sie64a(vcpu->arch.sie_block,
3369 				     vcpu->run->s.regs.gprs);
3370 		local_irq_disable();
3371 		__enable_cpu_timer_accounting(vcpu);
3372 		guest_exit_irqoff();
3373 		local_irq_enable();
3374 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3375 
3376 		rc = vcpu_post_run(vcpu, exit_reason);
3377 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3378 
3379 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3380 	return rc;
3381 }
3382 
3383 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3384 {
3385 	struct runtime_instr_cb *riccb;
3386 	struct gs_cb *gscb;
3387 
3388 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3389 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3390 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3391 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3392 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3393 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3394 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3395 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3396 		/* some control register changes require a tlb flush */
3397 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3398 	}
3399 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3400 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3401 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3402 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3403 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3404 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3405 	}
3406 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3407 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3408 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3409 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3410 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3411 			kvm_clear_async_pf_completion_queue(vcpu);
3412 	}
3413 	/*
3414 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3415 	 * we should enable RI here instead of doing the lazy enablement.
3416 	 */
3417 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3418 	    test_kvm_facility(vcpu->kvm, 64) &&
3419 	    riccb->v &&
3420 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3421 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3422 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3423 	}
3424 	/*
3425 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3426 	 * we should enable GS here instead of doing the lazy enablement.
3427 	 */
3428 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3429 	    test_kvm_facility(vcpu->kvm, 133) &&
3430 	    gscb->gssm &&
3431 	    !vcpu->arch.gs_enabled) {
3432 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3433 		vcpu->arch.sie_block->ecb |= ECB_GS;
3434 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3435 		vcpu->arch.gs_enabled = 1;
3436 	}
3437 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3438 	    test_kvm_facility(vcpu->kvm, 82)) {
3439 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3440 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3441 	}
3442 	save_access_regs(vcpu->arch.host_acrs);
3443 	restore_access_regs(vcpu->run->s.regs.acrs);
3444 	/* save host (userspace) fprs/vrs */
3445 	save_fpu_regs();
3446 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3447 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3448 	if (MACHINE_HAS_VX)
3449 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3450 	else
3451 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3452 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3453 	if (test_fp_ctl(current->thread.fpu.fpc))
3454 		/* User space provided an invalid FPC, let's clear it */
3455 		current->thread.fpu.fpc = 0;
3456 	if (MACHINE_HAS_GS) {
3457 		preempt_disable();
3458 		__ctl_set_bit(2, 4);
3459 		if (current->thread.gs_cb) {
3460 			vcpu->arch.host_gscb = current->thread.gs_cb;
3461 			save_gs_cb(vcpu->arch.host_gscb);
3462 		}
3463 		if (vcpu->arch.gs_enabled) {
3464 			current->thread.gs_cb = (struct gs_cb *)
3465 						&vcpu->run->s.regs.gscb;
3466 			restore_gs_cb(current->thread.gs_cb);
3467 		}
3468 		preempt_enable();
3469 	}
3470 
3471 	kvm_run->kvm_dirty_regs = 0;
3472 }
3473 
3474 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3475 {
3476 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3477 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3478 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3479 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3480 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3481 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3482 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3483 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3484 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3485 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3486 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3487 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3488 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3489 	save_access_regs(vcpu->run->s.regs.acrs);
3490 	restore_access_regs(vcpu->arch.host_acrs);
3491 	/* Save guest register state */
3492 	save_fpu_regs();
3493 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3494 	/* Restore will be done lazily at return */
3495 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3496 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3497 	if (MACHINE_HAS_GS) {
3498 		__ctl_set_bit(2, 4);
3499 		if (vcpu->arch.gs_enabled)
3500 			save_gs_cb(current->thread.gs_cb);
3501 		preempt_disable();
3502 		current->thread.gs_cb = vcpu->arch.host_gscb;
3503 		restore_gs_cb(vcpu->arch.host_gscb);
3504 		preempt_enable();
3505 		if (!vcpu->arch.host_gscb)
3506 			__ctl_clear_bit(2, 4);
3507 		vcpu->arch.host_gscb = NULL;
3508 	}
3509 
3510 }
3511 
3512 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3513 {
3514 	int rc;
3515 
3516 	if (kvm_run->immediate_exit)
3517 		return -EINTR;
3518 
3519 	vcpu_load(vcpu);
3520 
3521 	if (guestdbg_exit_pending(vcpu)) {
3522 		kvm_s390_prepare_debug_exit(vcpu);
3523 		rc = 0;
3524 		goto out;
3525 	}
3526 
3527 	kvm_sigset_activate(vcpu);
3528 
3529 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3530 		kvm_s390_vcpu_start(vcpu);
3531 	} else if (is_vcpu_stopped(vcpu)) {
3532 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3533 				   vcpu->vcpu_id);
3534 		rc = -EINVAL;
3535 		goto out;
3536 	}
3537 
3538 	sync_regs(vcpu, kvm_run);
3539 	enable_cpu_timer_accounting(vcpu);
3540 
3541 	might_fault();
3542 	rc = __vcpu_run(vcpu);
3543 
3544 	if (signal_pending(current) && !rc) {
3545 		kvm_run->exit_reason = KVM_EXIT_INTR;
3546 		rc = -EINTR;
3547 	}
3548 
3549 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3550 		kvm_s390_prepare_debug_exit(vcpu);
3551 		rc = 0;
3552 	}
3553 
3554 	if (rc == -EREMOTE) {
3555 		/* userspace support is needed, kvm_run has been prepared */
3556 		rc = 0;
3557 	}
3558 
3559 	disable_cpu_timer_accounting(vcpu);
3560 	store_regs(vcpu, kvm_run);
3561 
3562 	kvm_sigset_deactivate(vcpu);
3563 
3564 	vcpu->stat.exit_userspace++;
3565 out:
3566 	vcpu_put(vcpu);
3567 	return rc;
3568 }
3569 
3570 /*
3571  * store status at address
3572  * we use have two special cases:
3573  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3574  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3575  */
3576 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3577 {
3578 	unsigned char archmode = 1;
3579 	freg_t fprs[NUM_FPRS];
3580 	unsigned int px;
3581 	u64 clkcomp, cputm;
3582 	int rc;
3583 
3584 	px = kvm_s390_get_prefix(vcpu);
3585 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3586 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3587 			return -EFAULT;
3588 		gpa = 0;
3589 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3590 		if (write_guest_real(vcpu, 163, &archmode, 1))
3591 			return -EFAULT;
3592 		gpa = px;
3593 	} else
3594 		gpa -= __LC_FPREGS_SAVE_AREA;
3595 
3596 	/* manually convert vector registers if necessary */
3597 	if (MACHINE_HAS_VX) {
3598 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3599 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3600 				     fprs, 128);
3601 	} else {
3602 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3603 				     vcpu->run->s.regs.fprs, 128);
3604 	}
3605 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3606 			      vcpu->run->s.regs.gprs, 128);
3607 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3608 			      &vcpu->arch.sie_block->gpsw, 16);
3609 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3610 			      &px, 4);
3611 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3612 			      &vcpu->run->s.regs.fpc, 4);
3613 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3614 			      &vcpu->arch.sie_block->todpr, 4);
3615 	cputm = kvm_s390_get_cpu_timer(vcpu);
3616 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3617 			      &cputm, 8);
3618 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3619 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3620 			      &clkcomp, 8);
3621 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3622 			      &vcpu->run->s.regs.acrs, 64);
3623 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3624 			      &vcpu->arch.sie_block->gcr, 128);
3625 	return rc ? -EFAULT : 0;
3626 }
3627 
3628 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3629 {
3630 	/*
3631 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3632 	 * switch in the run ioctl. Let's update our copies before we save
3633 	 * it into the save area
3634 	 */
3635 	save_fpu_regs();
3636 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3637 	save_access_regs(vcpu->run->s.regs.acrs);
3638 
3639 	return kvm_s390_store_status_unloaded(vcpu, addr);
3640 }
3641 
3642 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3643 {
3644 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3645 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3646 }
3647 
3648 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3649 {
3650 	unsigned int i;
3651 	struct kvm_vcpu *vcpu;
3652 
3653 	kvm_for_each_vcpu(i, vcpu, kvm) {
3654 		__disable_ibs_on_vcpu(vcpu);
3655 	}
3656 }
3657 
3658 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3659 {
3660 	if (!sclp.has_ibs)
3661 		return;
3662 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3663 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3664 }
3665 
3666 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3667 {
3668 	int i, online_vcpus, started_vcpus = 0;
3669 
3670 	if (!is_vcpu_stopped(vcpu))
3671 		return;
3672 
3673 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3674 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3675 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3676 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3677 
3678 	for (i = 0; i < online_vcpus; i++) {
3679 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3680 			started_vcpus++;
3681 	}
3682 
3683 	if (started_vcpus == 0) {
3684 		/* we're the only active VCPU -> speed it up */
3685 		__enable_ibs_on_vcpu(vcpu);
3686 	} else if (started_vcpus == 1) {
3687 		/*
3688 		 * As we are starting a second VCPU, we have to disable
3689 		 * the IBS facility on all VCPUs to remove potentially
3690 		 * oustanding ENABLE requests.
3691 		 */
3692 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3693 	}
3694 
3695 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3696 	/*
3697 	 * Another VCPU might have used IBS while we were offline.
3698 	 * Let's play safe and flush the VCPU at startup.
3699 	 */
3700 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3701 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3702 	return;
3703 }
3704 
3705 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3706 {
3707 	int i, online_vcpus, started_vcpus = 0;
3708 	struct kvm_vcpu *started_vcpu = NULL;
3709 
3710 	if (is_vcpu_stopped(vcpu))
3711 		return;
3712 
3713 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3714 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3715 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3716 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3717 
3718 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3719 	kvm_s390_clear_stop_irq(vcpu);
3720 
3721 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3722 	__disable_ibs_on_vcpu(vcpu);
3723 
3724 	for (i = 0; i < online_vcpus; i++) {
3725 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3726 			started_vcpus++;
3727 			started_vcpu = vcpu->kvm->vcpus[i];
3728 		}
3729 	}
3730 
3731 	if (started_vcpus == 1) {
3732 		/*
3733 		 * As we only have one VCPU left, we want to enable the
3734 		 * IBS facility for that VCPU to speed it up.
3735 		 */
3736 		__enable_ibs_on_vcpu(started_vcpu);
3737 	}
3738 
3739 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3740 	return;
3741 }
3742 
3743 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3744 				     struct kvm_enable_cap *cap)
3745 {
3746 	int r;
3747 
3748 	if (cap->flags)
3749 		return -EINVAL;
3750 
3751 	switch (cap->cap) {
3752 	case KVM_CAP_S390_CSS_SUPPORT:
3753 		if (!vcpu->kvm->arch.css_support) {
3754 			vcpu->kvm->arch.css_support = 1;
3755 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3756 			trace_kvm_s390_enable_css(vcpu->kvm);
3757 		}
3758 		r = 0;
3759 		break;
3760 	default:
3761 		r = -EINVAL;
3762 		break;
3763 	}
3764 	return r;
3765 }
3766 
3767 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3768 				  struct kvm_s390_mem_op *mop)
3769 {
3770 	void __user *uaddr = (void __user *)mop->buf;
3771 	void *tmpbuf = NULL;
3772 	int r, srcu_idx;
3773 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3774 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3775 
3776 	if (mop->flags & ~supported_flags)
3777 		return -EINVAL;
3778 
3779 	if (mop->size > MEM_OP_MAX_SIZE)
3780 		return -E2BIG;
3781 
3782 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3783 		tmpbuf = vmalloc(mop->size);
3784 		if (!tmpbuf)
3785 			return -ENOMEM;
3786 	}
3787 
3788 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3789 
3790 	switch (mop->op) {
3791 	case KVM_S390_MEMOP_LOGICAL_READ:
3792 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3793 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3794 					    mop->size, GACC_FETCH);
3795 			break;
3796 		}
3797 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3798 		if (r == 0) {
3799 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3800 				r = -EFAULT;
3801 		}
3802 		break;
3803 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3804 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3805 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3806 					    mop->size, GACC_STORE);
3807 			break;
3808 		}
3809 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3810 			r = -EFAULT;
3811 			break;
3812 		}
3813 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3814 		break;
3815 	default:
3816 		r = -EINVAL;
3817 	}
3818 
3819 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3820 
3821 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3822 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3823 
3824 	vfree(tmpbuf);
3825 	return r;
3826 }
3827 
3828 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3829 			       unsigned int ioctl, unsigned long arg)
3830 {
3831 	struct kvm_vcpu *vcpu = filp->private_data;
3832 	void __user *argp = (void __user *)arg;
3833 
3834 	switch (ioctl) {
3835 	case KVM_S390_IRQ: {
3836 		struct kvm_s390_irq s390irq;
3837 
3838 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3839 			return -EFAULT;
3840 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3841 	}
3842 	case KVM_S390_INTERRUPT: {
3843 		struct kvm_s390_interrupt s390int;
3844 		struct kvm_s390_irq s390irq;
3845 
3846 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3847 			return -EFAULT;
3848 		if (s390int_to_s390irq(&s390int, &s390irq))
3849 			return -EINVAL;
3850 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3851 	}
3852 	}
3853 	return -ENOIOCTLCMD;
3854 }
3855 
3856 long kvm_arch_vcpu_ioctl(struct file *filp,
3857 			 unsigned int ioctl, unsigned long arg)
3858 {
3859 	struct kvm_vcpu *vcpu = filp->private_data;
3860 	void __user *argp = (void __user *)arg;
3861 	int idx;
3862 	long r;
3863 
3864 	vcpu_load(vcpu);
3865 
3866 	switch (ioctl) {
3867 	case KVM_S390_STORE_STATUS:
3868 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3869 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3870 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3871 		break;
3872 	case KVM_S390_SET_INITIAL_PSW: {
3873 		psw_t psw;
3874 
3875 		r = -EFAULT;
3876 		if (copy_from_user(&psw, argp, sizeof(psw)))
3877 			break;
3878 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3879 		break;
3880 	}
3881 	case KVM_S390_INITIAL_RESET:
3882 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3883 		break;
3884 	case KVM_SET_ONE_REG:
3885 	case KVM_GET_ONE_REG: {
3886 		struct kvm_one_reg reg;
3887 		r = -EFAULT;
3888 		if (copy_from_user(&reg, argp, sizeof(reg)))
3889 			break;
3890 		if (ioctl == KVM_SET_ONE_REG)
3891 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3892 		else
3893 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3894 		break;
3895 	}
3896 #ifdef CONFIG_KVM_S390_UCONTROL
3897 	case KVM_S390_UCAS_MAP: {
3898 		struct kvm_s390_ucas_mapping ucasmap;
3899 
3900 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3901 			r = -EFAULT;
3902 			break;
3903 		}
3904 
3905 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3906 			r = -EINVAL;
3907 			break;
3908 		}
3909 
3910 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3911 				     ucasmap.vcpu_addr, ucasmap.length);
3912 		break;
3913 	}
3914 	case KVM_S390_UCAS_UNMAP: {
3915 		struct kvm_s390_ucas_mapping ucasmap;
3916 
3917 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3918 			r = -EFAULT;
3919 			break;
3920 		}
3921 
3922 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3923 			r = -EINVAL;
3924 			break;
3925 		}
3926 
3927 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3928 			ucasmap.length);
3929 		break;
3930 	}
3931 #endif
3932 	case KVM_S390_VCPU_FAULT: {
3933 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3934 		break;
3935 	}
3936 	case KVM_ENABLE_CAP:
3937 	{
3938 		struct kvm_enable_cap cap;
3939 		r = -EFAULT;
3940 		if (copy_from_user(&cap, argp, sizeof(cap)))
3941 			break;
3942 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3943 		break;
3944 	}
3945 	case KVM_S390_MEM_OP: {
3946 		struct kvm_s390_mem_op mem_op;
3947 
3948 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3949 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3950 		else
3951 			r = -EFAULT;
3952 		break;
3953 	}
3954 	case KVM_S390_SET_IRQ_STATE: {
3955 		struct kvm_s390_irq_state irq_state;
3956 
3957 		r = -EFAULT;
3958 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3959 			break;
3960 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3961 		    irq_state.len == 0 ||
3962 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3963 			r = -EINVAL;
3964 			break;
3965 		}
3966 		/* do not use irq_state.flags, it will break old QEMUs */
3967 		r = kvm_s390_set_irq_state(vcpu,
3968 					   (void __user *) irq_state.buf,
3969 					   irq_state.len);
3970 		break;
3971 	}
3972 	case KVM_S390_GET_IRQ_STATE: {
3973 		struct kvm_s390_irq_state irq_state;
3974 
3975 		r = -EFAULT;
3976 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3977 			break;
3978 		if (irq_state.len == 0) {
3979 			r = -EINVAL;
3980 			break;
3981 		}
3982 		/* do not use irq_state.flags, it will break old QEMUs */
3983 		r = kvm_s390_get_irq_state(vcpu,
3984 					   (__u8 __user *)  irq_state.buf,
3985 					   irq_state.len);
3986 		break;
3987 	}
3988 	default:
3989 		r = -ENOTTY;
3990 	}
3991 
3992 	vcpu_put(vcpu);
3993 	return r;
3994 }
3995 
3996 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3997 {
3998 #ifdef CONFIG_KVM_S390_UCONTROL
3999 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4000 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4001 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4002 		get_page(vmf->page);
4003 		return 0;
4004 	}
4005 #endif
4006 	return VM_FAULT_SIGBUS;
4007 }
4008 
4009 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4010 			    unsigned long npages)
4011 {
4012 	return 0;
4013 }
4014 
4015 /* Section: memory related */
4016 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4017 				   struct kvm_memory_slot *memslot,
4018 				   const struct kvm_userspace_memory_region *mem,
4019 				   enum kvm_mr_change change)
4020 {
4021 	/* A few sanity checks. We can have memory slots which have to be
4022 	   located/ended at a segment boundary (1MB). The memory in userland is
4023 	   ok to be fragmented into various different vmas. It is okay to mmap()
4024 	   and munmap() stuff in this slot after doing this call at any time */
4025 
4026 	if (mem->userspace_addr & 0xffffful)
4027 		return -EINVAL;
4028 
4029 	if (mem->memory_size & 0xffffful)
4030 		return -EINVAL;
4031 
4032 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4033 		return -EINVAL;
4034 
4035 	return 0;
4036 }
4037 
4038 void kvm_arch_commit_memory_region(struct kvm *kvm,
4039 				const struct kvm_userspace_memory_region *mem,
4040 				const struct kvm_memory_slot *old,
4041 				const struct kvm_memory_slot *new,
4042 				enum kvm_mr_change change)
4043 {
4044 	int rc;
4045 
4046 	/* If the basics of the memslot do not change, we do not want
4047 	 * to update the gmap. Every update causes several unnecessary
4048 	 * segment translation exceptions. This is usually handled just
4049 	 * fine by the normal fault handler + gmap, but it will also
4050 	 * cause faults on the prefix page of running guest CPUs.
4051 	 */
4052 	if (old->userspace_addr == mem->userspace_addr &&
4053 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4054 	    old->npages * PAGE_SIZE == mem->memory_size)
4055 		return;
4056 
4057 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4058 		mem->guest_phys_addr, mem->memory_size);
4059 	if (rc)
4060 		pr_warn("failed to commit memory region\n");
4061 	return;
4062 }
4063 
4064 static inline unsigned long nonhyp_mask(int i)
4065 {
4066 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4067 
4068 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4069 }
4070 
4071 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4072 {
4073 	vcpu->valid_wakeup = false;
4074 }
4075 
4076 static int __init kvm_s390_init(void)
4077 {
4078 	int i;
4079 
4080 	if (!sclp.has_sief2) {
4081 		pr_info("SIE not available\n");
4082 		return -ENODEV;
4083 	}
4084 
4085 	for (i = 0; i < 16; i++)
4086 		kvm_s390_fac_base[i] |=
4087 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4088 
4089 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4090 }
4091 
4092 static void __exit kvm_s390_exit(void)
4093 {
4094 	kvm_exit();
4095 }
4096 
4097 module_init(kvm_s390_init);
4098 module_exit(kvm_s390_exit);
4099 
4100 /*
4101  * Enable autoloading of the kvm module.
4102  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4103  * since x86 takes a different approach.
4104  */
4105 #include <linux/miscdevice.h>
4106 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4107 MODULE_ALIAS("devname:kvm");
4108