xref: /linux/arch/s390/kvm/kvm-s390.c (revision 8a8d6bbe1d3bc7137c777ba06246d7e9c08dde4d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 	KVM_GENERIC_VM_STATS(),
63 	STATS_DESC_COUNTER(VM, inject_io),
64 	STATS_DESC_COUNTER(VM, inject_float_mchk),
65 	STATS_DESC_COUNTER(VM, inject_pfault_done),
66 	STATS_DESC_COUNTER(VM, inject_service_signal),
67 	STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69 
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 	.name_size = KVM_STATS_NAME_SIZE,
72 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 	.id_offset = sizeof(struct kvm_stats_header),
74 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 		       sizeof(kvm_vm_stats_desc),
77 };
78 
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 	KVM_GENERIC_VCPU_STATS(),
81 	STATS_DESC_COUNTER(VCPU, exit_userspace),
82 	STATS_DESC_COUNTER(VCPU, exit_null),
83 	STATS_DESC_COUNTER(VCPU, exit_external_request),
84 	STATS_DESC_COUNTER(VCPU, exit_io_request),
85 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 	STATS_DESC_COUNTER(VCPU, exit_validity),
88 	STATS_DESC_COUNTER(VCPU, exit_instruction),
89 	STATS_DESC_COUNTER(VCPU, exit_pei),
90 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_program),
108 	STATS_DESC_COUNTER(VCPU, deliver_io),
109 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 	STATS_DESC_COUNTER(VCPU, inject_ckc),
112 	STATS_DESC_COUNTER(VCPU, inject_cputm),
113 	STATS_DESC_COUNTER(VCPU, inject_external_call),
114 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 	STATS_DESC_COUNTER(VCPU, inject_mchk),
116 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 	STATS_DESC_COUNTER(VCPU, inject_program),
118 	STATS_DESC_COUNTER(VCPU, inject_restart),
119 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 	STATS_DESC_COUNTER(VCPU, instruction_gs),
123 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 	STATS_DESC_COUNTER(VCPU, instruction_sck),
129 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 	STATS_DESC_COUNTER(VCPU, instruction_spx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 	STATS_DESC_COUNTER(VCPU, instruction_stap),
134 	STATS_DESC_COUNTER(VCPU, instruction_iske),
135 	STATS_DESC_COUNTER(VCPU, instruction_ri),
136 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 	STATS_DESC_COUNTER(VCPU, instruction_sske),
138 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 	STATS_DESC_COUNTER(VCPU, instruction_tb),
142 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 	STATS_DESC_COUNTER(VCPU, instruction_sie),
146 	STATS_DESC_COUNTER(VCPU, instruction_essa),
147 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 	STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175 
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 	.name_size = KVM_STATS_NAME_SIZE,
178 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 	.id_offset = sizeof(struct kvm_stats_header),
180 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 		       sizeof(kvm_vcpu_stats_desc),
183 };
184 
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189 
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194 
195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199 
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa  = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204 
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209 
210 /*
211  * For now we handle at most 16 double words as this is what the s390 base
212  * kernel handles and stores in the prefix page. If we ever need to go beyond
213  * this, this requires changes to code, but the external uapi can stay.
214  */
215 #define SIZE_INTERNAL 16
216 
217 /*
218  * Base feature mask that defines default mask for facilities. Consists of the
219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220  */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224  * and defines the facilities that can be enabled via a cpu model.
225  */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227 
228 static unsigned long kvm_s390_fac_size(void)
229 {
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 		sizeof(stfle_fac_list));
234 
235 	return SIZE_INTERNAL;
236 }
237 
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242 
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247 
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
250 {
251 	/* every s390 is virtualization enabled ;-) */
252 	return 0;
253 }
254 
255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 	return 0;
258 }
259 
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 			      unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264 
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 	u8 delta_idx = 0;
268 
269 	/*
270 	 * The TOD jumps by delta, we have to compensate this by adding
271 	 * -delta to the epoch.
272 	 */
273 	delta = -delta;
274 
275 	/* sign-extension - we're adding to signed values below */
276 	if ((s64)delta < 0)
277 		delta_idx = -1;
278 
279 	scb->epoch += delta;
280 	if (scb->ecd & ECD_MEF) {
281 		scb->epdx += delta_idx;
282 		if (scb->epoch < delta)
283 			scb->epdx += 1;
284 	}
285 }
286 
287 /*
288  * This callback is executed during stop_machine(). All CPUs are therefore
289  * temporarily stopped. In order not to change guest behavior, we have to
290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291  * so a CPU won't be stopped while calculating with the epoch.
292  */
293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 			  void *v)
295 {
296 	struct kvm *kvm;
297 	struct kvm_vcpu *vcpu;
298 	int i;
299 	unsigned long long *delta = v;
300 
301 	list_for_each_entry(kvm, &vm_list, vm_list) {
302 		kvm_for_each_vcpu(i, vcpu, kvm) {
303 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 			if (i == 0) {
305 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 			}
308 			if (vcpu->arch.cputm_enabled)
309 				vcpu->arch.cputm_start += *delta;
310 			if (vcpu->arch.vsie_block)
311 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 						   *delta);
313 		}
314 	}
315 	return NOTIFY_OK;
316 }
317 
318 static struct notifier_block kvm_clock_notifier = {
319 	.notifier_call = kvm_clock_sync,
320 };
321 
322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 	gmap_notifier.notifier_call = kvm_gmap_notifier;
325 	gmap_register_pte_notifier(&gmap_notifier);
326 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 	gmap_register_pte_notifier(&vsie_gmap_notifier);
328 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 				       &kvm_clock_notifier);
330 	return 0;
331 }
332 
333 void kvm_arch_hardware_unsetup(void)
334 {
335 	gmap_unregister_pte_notifier(&gmap_notifier);
336 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 					 &kvm_clock_notifier);
339 }
340 
341 static void allow_cpu_feat(unsigned long nr)
342 {
343 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345 
346 static inline int plo_test_bit(unsigned char nr)
347 {
348 	unsigned long function = (unsigned long)nr | 0x100;
349 	int cc;
350 
351 	asm volatile(
352 		"	lgr	0,%[function]\n"
353 		/* Parameter registers are ignored for "test bit" */
354 		"	plo	0,0,0,0(0)\n"
355 		"	ipm	%0\n"
356 		"	srl	%0,28\n"
357 		: "=d" (cc)
358 		: [function] "d" (function)
359 		: "cc", "0");
360 	return cc == 0;
361 }
362 
363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 	asm volatile(
366 		"	lghi	0,0\n"
367 		"	lgr	1,%[query]\n"
368 		/* Parameter registers are ignored */
369 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
370 		:
371 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 		: "cc", "memory", "0", "1");
373 }
374 
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377 
378 static void kvm_s390_cpu_feat_init(void)
379 {
380 	int i;
381 
382 	for (i = 0; i < 256; ++i) {
383 		if (plo_test_bit(i))
384 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 	}
386 
387 	if (test_facility(28)) /* TOD-clock steering */
388 		ptff(kvm_s390_available_subfunc.ptff,
389 		     sizeof(kvm_s390_available_subfunc.ptff),
390 		     PTFF_QAF);
391 
392 	if (test_facility(17)) { /* MSA */
393 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 			      kvm_s390_available_subfunc.kmac);
395 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmc);
397 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.km);
399 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kimd);
401 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.klmd);
403 	}
404 	if (test_facility(76)) /* MSA3 */
405 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.pckmo);
407 	if (test_facility(77)) { /* MSA4 */
408 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kmctr);
410 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kmf);
412 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmo);
414 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.pcc);
416 	}
417 	if (test_facility(57)) /* MSA5 */
418 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.ppno);
420 
421 	if (test_facility(146)) /* MSA8 */
422 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.kma);
424 
425 	if (test_facility(155)) /* MSA9 */
426 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 			      kvm_s390_available_subfunc.kdsa);
428 
429 	if (test_facility(150)) /* SORTL */
430 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431 
432 	if (test_facility(151)) /* DFLTCC */
433 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434 
435 	if (MACHINE_HAS_ESOP)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 	/*
438 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 	 */
441 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 	    !test_facility(3) || !nested)
443 		return;
444 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 	if (sclp.has_64bscao)
446 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 	if (sclp.has_siif)
448 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 	if (sclp.has_gpere)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 	if (sclp.has_gsls)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 	if (sclp.has_ib)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 	if (sclp.has_cei)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 	if (sclp.has_ibs)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 	if (sclp.has_kss)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 	/*
462 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 	 * all skey handling functions read/set the skey from the PGSTE
464 	 * instead of the real storage key.
465 	 *
466 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 	 * pages being detected as preserved although they are resident.
468 	 *
469 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 	 *
472 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 	 *
476 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 	 * cannot easily shadow the SCA because of the ipte lock.
478 	 */
479 }
480 
481 int kvm_arch_init(void *opaque)
482 {
483 	int rc = -ENOMEM;
484 
485 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 	if (!kvm_s390_dbf)
487 		return -ENOMEM;
488 
489 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 	if (!kvm_s390_dbf_uv)
491 		goto out;
492 
493 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 		goto out;
496 
497 	kvm_s390_cpu_feat_init();
498 
499 	/* Register floating interrupt controller interface. */
500 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 	if (rc) {
502 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 		goto out;
504 	}
505 
506 	rc = kvm_s390_gib_init(GAL_ISC);
507 	if (rc)
508 		goto out;
509 
510 	return 0;
511 
512 out:
513 	kvm_arch_exit();
514 	return rc;
515 }
516 
517 void kvm_arch_exit(void)
518 {
519 	kvm_s390_gib_destroy();
520 	debug_unregister(kvm_s390_dbf);
521 	debug_unregister(kvm_s390_dbf_uv);
522 }
523 
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file *filp,
526 			unsigned int ioctl, unsigned long arg)
527 {
528 	if (ioctl == KVM_S390_ENABLE_SIE)
529 		return s390_enable_sie();
530 	return -EINVAL;
531 }
532 
533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 	int r;
536 
537 	switch (ext) {
538 	case KVM_CAP_S390_PSW:
539 	case KVM_CAP_S390_GMAP:
540 	case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 	case KVM_CAP_S390_UCONTROL:
543 #endif
544 	case KVM_CAP_ASYNC_PF:
545 	case KVM_CAP_SYNC_REGS:
546 	case KVM_CAP_ONE_REG:
547 	case KVM_CAP_ENABLE_CAP:
548 	case KVM_CAP_S390_CSS_SUPPORT:
549 	case KVM_CAP_IOEVENTFD:
550 	case KVM_CAP_DEVICE_CTRL:
551 	case KVM_CAP_S390_IRQCHIP:
552 	case KVM_CAP_VM_ATTRIBUTES:
553 	case KVM_CAP_MP_STATE:
554 	case KVM_CAP_IMMEDIATE_EXIT:
555 	case KVM_CAP_S390_INJECT_IRQ:
556 	case KVM_CAP_S390_USER_SIGP:
557 	case KVM_CAP_S390_USER_STSI:
558 	case KVM_CAP_S390_SKEYS:
559 	case KVM_CAP_S390_IRQ_STATE:
560 	case KVM_CAP_S390_USER_INSTR0:
561 	case KVM_CAP_S390_CMMA_MIGRATION:
562 	case KVM_CAP_S390_AIS:
563 	case KVM_CAP_S390_AIS_MIGRATION:
564 	case KVM_CAP_S390_VCPU_RESETS:
565 	case KVM_CAP_SET_GUEST_DEBUG:
566 	case KVM_CAP_S390_DIAG318:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		if (ext == KVM_CAP_NR_VCPUS)
589 			r = min_t(unsigned int, num_online_cpus(), r);
590 		break;
591 	case KVM_CAP_S390_COW:
592 		r = MACHINE_HAS_ESOP;
593 		break;
594 	case KVM_CAP_S390_VECTOR_REGISTERS:
595 		r = MACHINE_HAS_VX;
596 		break;
597 	case KVM_CAP_S390_RI:
598 		r = test_facility(64);
599 		break;
600 	case KVM_CAP_S390_GS:
601 		r = test_facility(133);
602 		break;
603 	case KVM_CAP_S390_BPB:
604 		r = test_facility(82);
605 		break;
606 	case KVM_CAP_S390_PROTECTED:
607 		r = is_prot_virt_host();
608 		break;
609 	default:
610 		r = 0;
611 	}
612 	return r;
613 }
614 
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 {
617 	int i;
618 	gfn_t cur_gfn, last_gfn;
619 	unsigned long gaddr, vmaddr;
620 	struct gmap *gmap = kvm->arch.gmap;
621 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
622 
623 	/* Loop over all guest segments */
624 	cur_gfn = memslot->base_gfn;
625 	last_gfn = memslot->base_gfn + memslot->npages;
626 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 		gaddr = gfn_to_gpa(cur_gfn);
628 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 		if (kvm_is_error_hva(vmaddr))
630 			continue;
631 
632 		bitmap_zero(bitmap, _PAGE_ENTRIES);
633 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 		for (i = 0; i < _PAGE_ENTRIES; i++) {
635 			if (test_bit(i, bitmap))
636 				mark_page_dirty(kvm, cur_gfn + i);
637 		}
638 
639 		if (fatal_signal_pending(current))
640 			return;
641 		cond_resched();
642 	}
643 }
644 
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 
648 /*
649  * Get (and clear) the dirty memory log for a memory slot.
650  */
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 			       struct kvm_dirty_log *log)
653 {
654 	int r;
655 	unsigned long n;
656 	struct kvm_memory_slot *memslot;
657 	int is_dirty;
658 
659 	if (kvm_is_ucontrol(kvm))
660 		return -EINVAL;
661 
662 	mutex_lock(&kvm->slots_lock);
663 
664 	r = -EINVAL;
665 	if (log->slot >= KVM_USER_MEM_SLOTS)
666 		goto out;
667 
668 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
669 	if (r)
670 		goto out;
671 
672 	/* Clear the dirty log */
673 	if (is_dirty) {
674 		n = kvm_dirty_bitmap_bytes(memslot);
675 		memset(memslot->dirty_bitmap, 0, n);
676 	}
677 	r = 0;
678 out:
679 	mutex_unlock(&kvm->slots_lock);
680 	return r;
681 }
682 
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 {
685 	unsigned int i;
686 	struct kvm_vcpu *vcpu;
687 
688 	kvm_for_each_vcpu(i, vcpu, kvm) {
689 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
690 	}
691 }
692 
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
694 {
695 	int r;
696 
697 	if (cap->flags)
698 		return -EINVAL;
699 
700 	switch (cap->cap) {
701 	case KVM_CAP_S390_IRQCHIP:
702 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 		kvm->arch.use_irqchip = 1;
704 		r = 0;
705 		break;
706 	case KVM_CAP_S390_USER_SIGP:
707 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 		kvm->arch.user_sigp = 1;
709 		r = 0;
710 		break;
711 	case KVM_CAP_S390_VECTOR_REGISTERS:
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (MACHINE_HAS_VX) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 			set_kvm_facility(kvm->arch.model.fac_list, 129);
718 			if (test_facility(134)) {
719 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 				set_kvm_facility(kvm->arch.model.fac_list, 134);
721 			}
722 			if (test_facility(135)) {
723 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 				set_kvm_facility(kvm->arch.model.fac_list, 135);
725 			}
726 			if (test_facility(148)) {
727 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 				set_kvm_facility(kvm->arch.model.fac_list, 148);
729 			}
730 			if (test_facility(152)) {
731 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 				set_kvm_facility(kvm->arch.model.fac_list, 152);
733 			}
734 			if (test_facility(192)) {
735 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 				set_kvm_facility(kvm->arch.model.fac_list, 192);
737 			}
738 			r = 0;
739 		} else
740 			r = -EINVAL;
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_RI:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(64)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 			set_kvm_facility(kvm->arch.model.fac_list, 64);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_AIS:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus) {
762 			r = -EBUSY;
763 		} else {
764 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 			set_kvm_facility(kvm->arch.model.fac_list, 72);
766 			r = 0;
767 		}
768 		mutex_unlock(&kvm->lock);
769 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 			 r ? "(not available)" : "(success)");
771 		break;
772 	case KVM_CAP_S390_GS:
773 		r = -EINVAL;
774 		mutex_lock(&kvm->lock);
775 		if (kvm->created_vcpus) {
776 			r = -EBUSY;
777 		} else if (test_facility(133)) {
778 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 			set_kvm_facility(kvm->arch.model.fac_list, 133);
780 			r = 0;
781 		}
782 		mutex_unlock(&kvm->lock);
783 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 			 r ? "(not available)" : "(success)");
785 		break;
786 	case KVM_CAP_S390_HPAGE_1M:
787 		mutex_lock(&kvm->lock);
788 		if (kvm->created_vcpus)
789 			r = -EBUSY;
790 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
791 			r = -EINVAL;
792 		else {
793 			r = 0;
794 			mmap_write_lock(kvm->mm);
795 			kvm->mm->context.allow_gmap_hpage_1m = 1;
796 			mmap_write_unlock(kvm->mm);
797 			/*
798 			 * We might have to create fake 4k page
799 			 * tables. To avoid that the hardware works on
800 			 * stale PGSTEs, we emulate these instructions.
801 			 */
802 			kvm->arch.use_skf = 0;
803 			kvm->arch.use_pfmfi = 0;
804 		}
805 		mutex_unlock(&kvm->lock);
806 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 			 r ? "(not available)" : "(success)");
808 		break;
809 	case KVM_CAP_S390_USER_STSI:
810 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 		kvm->arch.user_stsi = 1;
812 		r = 0;
813 		break;
814 	case KVM_CAP_S390_USER_INSTR0:
815 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 		kvm->arch.user_instr0 = 1;
817 		icpt_operexc_on_all_vcpus(kvm);
818 		r = 0;
819 		break;
820 	default:
821 		r = -EINVAL;
822 		break;
823 	}
824 	return r;
825 }
826 
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 {
829 	int ret;
830 
831 	switch (attr->attr) {
832 	case KVM_S390_VM_MEM_LIMIT_SIZE:
833 		ret = 0;
834 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 			 kvm->arch.mem_limit);
836 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
837 			ret = -EFAULT;
838 		break;
839 	default:
840 		ret = -ENXIO;
841 		break;
842 	}
843 	return ret;
844 }
845 
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848 	int ret;
849 	unsigned int idx;
850 	switch (attr->attr) {
851 	case KVM_S390_VM_MEM_ENABLE_CMMA:
852 		ret = -ENXIO;
853 		if (!sclp.has_cmma)
854 			break;
855 
856 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 		mutex_lock(&kvm->lock);
858 		if (kvm->created_vcpus)
859 			ret = -EBUSY;
860 		else if (kvm->mm->context.allow_gmap_hpage_1m)
861 			ret = -EINVAL;
862 		else {
863 			kvm->arch.use_cmma = 1;
864 			/* Not compatible with cmma. */
865 			kvm->arch.use_pfmfi = 0;
866 			ret = 0;
867 		}
868 		mutex_unlock(&kvm->lock);
869 		break;
870 	case KVM_S390_VM_MEM_CLR_CMMA:
871 		ret = -ENXIO;
872 		if (!sclp.has_cmma)
873 			break;
874 		ret = -EINVAL;
875 		if (!kvm->arch.use_cmma)
876 			break;
877 
878 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 		mutex_lock(&kvm->lock);
880 		idx = srcu_read_lock(&kvm->srcu);
881 		s390_reset_cmma(kvm->arch.gmap->mm);
882 		srcu_read_unlock(&kvm->srcu, idx);
883 		mutex_unlock(&kvm->lock);
884 		ret = 0;
885 		break;
886 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 		unsigned long new_limit;
888 
889 		if (kvm_is_ucontrol(kvm))
890 			return -EINVAL;
891 
892 		if (get_user(new_limit, (u64 __user *)attr->addr))
893 			return -EFAULT;
894 
895 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 		    new_limit > kvm->arch.mem_limit)
897 			return -E2BIG;
898 
899 		if (!new_limit)
900 			return -EINVAL;
901 
902 		/* gmap_create takes last usable address */
903 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
904 			new_limit -= 1;
905 
906 		ret = -EBUSY;
907 		mutex_lock(&kvm->lock);
908 		if (!kvm->created_vcpus) {
909 			/* gmap_create will round the limit up */
910 			struct gmap *new = gmap_create(current->mm, new_limit);
911 
912 			if (!new) {
913 				ret = -ENOMEM;
914 			} else {
915 				gmap_remove(kvm->arch.gmap);
916 				new->private = kvm;
917 				kvm->arch.gmap = new;
918 				ret = 0;
919 			}
920 		}
921 		mutex_unlock(&kvm->lock);
922 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 			 (void *) kvm->arch.gmap->asce);
925 		break;
926 	}
927 	default:
928 		ret = -ENXIO;
929 		break;
930 	}
931 	return ret;
932 }
933 
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
935 
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
937 {
938 	struct kvm_vcpu *vcpu;
939 	int i;
940 
941 	kvm_s390_vcpu_block_all(kvm);
942 
943 	kvm_for_each_vcpu(i, vcpu, kvm) {
944 		kvm_s390_vcpu_crypto_setup(vcpu);
945 		/* recreate the shadow crycb by leaving the VSIE handler */
946 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 	}
948 
949 	kvm_s390_vcpu_unblock_all(kvm);
950 }
951 
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
953 {
954 	mutex_lock(&kvm->lock);
955 	switch (attr->attr) {
956 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 		if (!test_kvm_facility(kvm, 76)) {
958 			mutex_unlock(&kvm->lock);
959 			return -EINVAL;
960 		}
961 		get_random_bytes(
962 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 		kvm->arch.crypto.aes_kw = 1;
965 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
966 		break;
967 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 		if (!test_kvm_facility(kvm, 76)) {
969 			mutex_unlock(&kvm->lock);
970 			return -EINVAL;
971 		}
972 		get_random_bytes(
973 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 		kvm->arch.crypto.dea_kw = 1;
976 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 		if (!test_kvm_facility(kvm, 76)) {
980 			mutex_unlock(&kvm->lock);
981 			return -EINVAL;
982 		}
983 		kvm->arch.crypto.aes_kw = 0;
984 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
987 		break;
988 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 		if (!test_kvm_facility(kvm, 76)) {
990 			mutex_unlock(&kvm->lock);
991 			return -EINVAL;
992 		}
993 		kvm->arch.crypto.dea_kw = 0;
994 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
997 		break;
998 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 		if (!ap_instructions_available()) {
1000 			mutex_unlock(&kvm->lock);
1001 			return -EOPNOTSUPP;
1002 		}
1003 		kvm->arch.crypto.apie = 1;
1004 		break;
1005 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 		if (!ap_instructions_available()) {
1007 			mutex_unlock(&kvm->lock);
1008 			return -EOPNOTSUPP;
1009 		}
1010 		kvm->arch.crypto.apie = 0;
1011 		break;
1012 	default:
1013 		mutex_unlock(&kvm->lock);
1014 		return -ENXIO;
1015 	}
1016 
1017 	kvm_s390_vcpu_crypto_reset_all(kvm);
1018 	mutex_unlock(&kvm->lock);
1019 	return 0;
1020 }
1021 
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024 	int cx;
1025 	struct kvm_vcpu *vcpu;
1026 
1027 	kvm_for_each_vcpu(cx, vcpu, kvm)
1028 		kvm_s390_sync_request(req, vcpu);
1029 }
1030 
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037 	struct kvm_memory_slot *ms;
1038 	struct kvm_memslots *slots;
1039 	unsigned long ram_pages = 0;
1040 	int slotnr;
1041 
1042 	/* migration mode already enabled */
1043 	if (kvm->arch.migration_mode)
1044 		return 0;
1045 	slots = kvm_memslots(kvm);
1046 	if (!slots || !slots->used_slots)
1047 		return -EINVAL;
1048 
1049 	if (!kvm->arch.use_cmma) {
1050 		kvm->arch.migration_mode = 1;
1051 		return 0;
1052 	}
1053 	/* mark all the pages in active slots as dirty */
1054 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1055 		ms = slots->memslots + slotnr;
1056 		if (!ms->dirty_bitmap)
1057 			return -EINVAL;
1058 		/*
1059 		 * The second half of the bitmap is only used on x86,
1060 		 * and would be wasted otherwise, so we put it to good
1061 		 * use here to keep track of the state of the storage
1062 		 * attributes.
1063 		 */
1064 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1065 		ram_pages += ms->npages;
1066 	}
1067 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1068 	kvm->arch.migration_mode = 1;
1069 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1070 	return 0;
1071 }
1072 
1073 /*
1074  * Must be called with kvm->slots_lock to avoid races with ourselves and
1075  * kvm_s390_vm_start_migration.
1076  */
1077 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1078 {
1079 	/* migration mode already disabled */
1080 	if (!kvm->arch.migration_mode)
1081 		return 0;
1082 	kvm->arch.migration_mode = 0;
1083 	if (kvm->arch.use_cmma)
1084 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1085 	return 0;
1086 }
1087 
1088 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1089 				     struct kvm_device_attr *attr)
1090 {
1091 	int res = -ENXIO;
1092 
1093 	mutex_lock(&kvm->slots_lock);
1094 	switch (attr->attr) {
1095 	case KVM_S390_VM_MIGRATION_START:
1096 		res = kvm_s390_vm_start_migration(kvm);
1097 		break;
1098 	case KVM_S390_VM_MIGRATION_STOP:
1099 		res = kvm_s390_vm_stop_migration(kvm);
1100 		break;
1101 	default:
1102 		break;
1103 	}
1104 	mutex_unlock(&kvm->slots_lock);
1105 
1106 	return res;
1107 }
1108 
1109 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1110 				     struct kvm_device_attr *attr)
1111 {
1112 	u64 mig = kvm->arch.migration_mode;
1113 
1114 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1115 		return -ENXIO;
1116 
1117 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1118 		return -EFAULT;
1119 	return 0;
1120 }
1121 
1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 {
1124 	struct kvm_s390_vm_tod_clock gtod;
1125 
1126 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1127 		return -EFAULT;
1128 
1129 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130 		return -EINVAL;
1131 	kvm_s390_set_tod_clock(kvm, &gtod);
1132 
1133 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1134 		gtod.epoch_idx, gtod.tod);
1135 
1136 	return 0;
1137 }
1138 
1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141 	u8 gtod_high;
1142 
1143 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1144 					   sizeof(gtod_high)))
1145 		return -EFAULT;
1146 
1147 	if (gtod_high != 0)
1148 		return -EINVAL;
1149 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1150 
1151 	return 0;
1152 }
1153 
1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 {
1156 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1157 
1158 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1159 			   sizeof(gtod.tod)))
1160 		return -EFAULT;
1161 
1162 	kvm_s390_set_tod_clock(kvm, &gtod);
1163 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1164 	return 0;
1165 }
1166 
1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1168 {
1169 	int ret;
1170 
1171 	if (attr->flags)
1172 		return -EINVAL;
1173 
1174 	switch (attr->attr) {
1175 	case KVM_S390_VM_TOD_EXT:
1176 		ret = kvm_s390_set_tod_ext(kvm, attr);
1177 		break;
1178 	case KVM_S390_VM_TOD_HIGH:
1179 		ret = kvm_s390_set_tod_high(kvm, attr);
1180 		break;
1181 	case KVM_S390_VM_TOD_LOW:
1182 		ret = kvm_s390_set_tod_low(kvm, attr);
1183 		break;
1184 	default:
1185 		ret = -ENXIO;
1186 		break;
1187 	}
1188 	return ret;
1189 }
1190 
1191 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1192 				   struct kvm_s390_vm_tod_clock *gtod)
1193 {
1194 	union tod_clock clk;
1195 
1196 	preempt_disable();
1197 
1198 	store_tod_clock_ext(&clk);
1199 
1200 	gtod->tod = clk.tod + kvm->arch.epoch;
1201 	gtod->epoch_idx = 0;
1202 	if (test_kvm_facility(kvm, 139)) {
1203 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1204 		if (gtod->tod < clk.tod)
1205 			gtod->epoch_idx += 1;
1206 	}
1207 
1208 	preempt_enable();
1209 }
1210 
1211 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213 	struct kvm_s390_vm_tod_clock gtod;
1214 
1215 	memset(&gtod, 0, sizeof(gtod));
1216 	kvm_s390_get_tod_clock(kvm, &gtod);
1217 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1218 		return -EFAULT;
1219 
1220 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1221 		gtod.epoch_idx, gtod.tod);
1222 	return 0;
1223 }
1224 
1225 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1226 {
1227 	u8 gtod_high = 0;
1228 
1229 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1230 					 sizeof(gtod_high)))
1231 		return -EFAULT;
1232 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1233 
1234 	return 0;
1235 }
1236 
1237 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1238 {
1239 	u64 gtod;
1240 
1241 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1242 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1243 		return -EFAULT;
1244 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1245 
1246 	return 0;
1247 }
1248 
1249 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1250 {
1251 	int ret;
1252 
1253 	if (attr->flags)
1254 		return -EINVAL;
1255 
1256 	switch (attr->attr) {
1257 	case KVM_S390_VM_TOD_EXT:
1258 		ret = kvm_s390_get_tod_ext(kvm, attr);
1259 		break;
1260 	case KVM_S390_VM_TOD_HIGH:
1261 		ret = kvm_s390_get_tod_high(kvm, attr);
1262 		break;
1263 	case KVM_S390_VM_TOD_LOW:
1264 		ret = kvm_s390_get_tod_low(kvm, attr);
1265 		break;
1266 	default:
1267 		ret = -ENXIO;
1268 		break;
1269 	}
1270 	return ret;
1271 }
1272 
1273 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1274 {
1275 	struct kvm_s390_vm_cpu_processor *proc;
1276 	u16 lowest_ibc, unblocked_ibc;
1277 	int ret = 0;
1278 
1279 	mutex_lock(&kvm->lock);
1280 	if (kvm->created_vcpus) {
1281 		ret = -EBUSY;
1282 		goto out;
1283 	}
1284 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1285 	if (!proc) {
1286 		ret = -ENOMEM;
1287 		goto out;
1288 	}
1289 	if (!copy_from_user(proc, (void __user *)attr->addr,
1290 			    sizeof(*proc))) {
1291 		kvm->arch.model.cpuid = proc->cpuid;
1292 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1293 		unblocked_ibc = sclp.ibc & 0xfff;
1294 		if (lowest_ibc && proc->ibc) {
1295 			if (proc->ibc > unblocked_ibc)
1296 				kvm->arch.model.ibc = unblocked_ibc;
1297 			else if (proc->ibc < lowest_ibc)
1298 				kvm->arch.model.ibc = lowest_ibc;
1299 			else
1300 				kvm->arch.model.ibc = proc->ibc;
1301 		}
1302 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1303 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1304 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1305 			 kvm->arch.model.ibc,
1306 			 kvm->arch.model.cpuid);
1307 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1308 			 kvm->arch.model.fac_list[0],
1309 			 kvm->arch.model.fac_list[1],
1310 			 kvm->arch.model.fac_list[2]);
1311 	} else
1312 		ret = -EFAULT;
1313 	kfree(proc);
1314 out:
1315 	mutex_unlock(&kvm->lock);
1316 	return ret;
1317 }
1318 
1319 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1320 				       struct kvm_device_attr *attr)
1321 {
1322 	struct kvm_s390_vm_cpu_feat data;
1323 
1324 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1325 		return -EFAULT;
1326 	if (!bitmap_subset((unsigned long *) data.feat,
1327 			   kvm_s390_available_cpu_feat,
1328 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1329 		return -EINVAL;
1330 
1331 	mutex_lock(&kvm->lock);
1332 	if (kvm->created_vcpus) {
1333 		mutex_unlock(&kvm->lock);
1334 		return -EBUSY;
1335 	}
1336 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1337 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1338 	mutex_unlock(&kvm->lock);
1339 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1340 			 data.feat[0],
1341 			 data.feat[1],
1342 			 data.feat[2]);
1343 	return 0;
1344 }
1345 
1346 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1347 					  struct kvm_device_attr *attr)
1348 {
1349 	mutex_lock(&kvm->lock);
1350 	if (kvm->created_vcpus) {
1351 		mutex_unlock(&kvm->lock);
1352 		return -EBUSY;
1353 	}
1354 
1355 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1356 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1357 		mutex_unlock(&kvm->lock);
1358 		return -EFAULT;
1359 	}
1360 	mutex_unlock(&kvm->lock);
1361 
1362 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1367 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1370 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1373 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1376 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1379 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1382 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1385 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1388 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1391 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1393 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1394 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1396 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1397 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1399 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1400 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1402 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1403 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1405 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1406 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1407 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1408 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1409 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1413 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1414 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1416 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1418 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1419 
1420 	return 0;
1421 }
1422 
1423 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1424 {
1425 	int ret = -ENXIO;
1426 
1427 	switch (attr->attr) {
1428 	case KVM_S390_VM_CPU_PROCESSOR:
1429 		ret = kvm_s390_set_processor(kvm, attr);
1430 		break;
1431 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1432 		ret = kvm_s390_set_processor_feat(kvm, attr);
1433 		break;
1434 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1435 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1436 		break;
1437 	}
1438 	return ret;
1439 }
1440 
1441 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1442 {
1443 	struct kvm_s390_vm_cpu_processor *proc;
1444 	int ret = 0;
1445 
1446 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1447 	if (!proc) {
1448 		ret = -ENOMEM;
1449 		goto out;
1450 	}
1451 	proc->cpuid = kvm->arch.model.cpuid;
1452 	proc->ibc = kvm->arch.model.ibc;
1453 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1454 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1455 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1456 		 kvm->arch.model.ibc,
1457 		 kvm->arch.model.cpuid);
1458 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1459 		 kvm->arch.model.fac_list[0],
1460 		 kvm->arch.model.fac_list[1],
1461 		 kvm->arch.model.fac_list[2]);
1462 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1463 		ret = -EFAULT;
1464 	kfree(proc);
1465 out:
1466 	return ret;
1467 }
1468 
1469 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1470 {
1471 	struct kvm_s390_vm_cpu_machine *mach;
1472 	int ret = 0;
1473 
1474 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1475 	if (!mach) {
1476 		ret = -ENOMEM;
1477 		goto out;
1478 	}
1479 	get_cpu_id((struct cpuid *) &mach->cpuid);
1480 	mach->ibc = sclp.ibc;
1481 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1482 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1483 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1484 	       sizeof(stfle_fac_list));
1485 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1486 		 kvm->arch.model.ibc,
1487 		 kvm->arch.model.cpuid);
1488 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1489 		 mach->fac_mask[0],
1490 		 mach->fac_mask[1],
1491 		 mach->fac_mask[2]);
1492 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1493 		 mach->fac_list[0],
1494 		 mach->fac_list[1],
1495 		 mach->fac_list[2]);
1496 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1497 		ret = -EFAULT;
1498 	kfree(mach);
1499 out:
1500 	return ret;
1501 }
1502 
1503 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1504 				       struct kvm_device_attr *attr)
1505 {
1506 	struct kvm_s390_vm_cpu_feat data;
1507 
1508 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1509 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1510 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1511 		return -EFAULT;
1512 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1513 			 data.feat[0],
1514 			 data.feat[1],
1515 			 data.feat[2]);
1516 	return 0;
1517 }
1518 
1519 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1520 				     struct kvm_device_attr *attr)
1521 {
1522 	struct kvm_s390_vm_cpu_feat data;
1523 
1524 	bitmap_copy((unsigned long *) data.feat,
1525 		    kvm_s390_available_cpu_feat,
1526 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1527 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1528 		return -EFAULT;
1529 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1530 			 data.feat[0],
1531 			 data.feat[1],
1532 			 data.feat[2]);
1533 	return 0;
1534 }
1535 
1536 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1537 					  struct kvm_device_attr *attr)
1538 {
1539 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1540 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1541 		return -EFAULT;
1542 
1543 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1548 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1551 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1554 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1557 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1560 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1563 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1566 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1569 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1572 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1574 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1575 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1577 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1578 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1580 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1581 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1583 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1584 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1586 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1587 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1589 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1590 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1594 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1595 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1597 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1599 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1600 
1601 	return 0;
1602 }
1603 
1604 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1605 					struct kvm_device_attr *attr)
1606 {
1607 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1608 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1609 		return -EFAULT;
1610 
1611 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1616 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1619 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1622 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1625 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1628 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1631 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1634 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1637 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1640 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1642 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1643 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1645 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1646 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1648 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1649 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1651 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1652 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1654 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1655 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1657 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1658 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1662 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1663 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1665 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1667 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1668 
1669 	return 0;
1670 }
1671 
1672 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1673 {
1674 	int ret = -ENXIO;
1675 
1676 	switch (attr->attr) {
1677 	case KVM_S390_VM_CPU_PROCESSOR:
1678 		ret = kvm_s390_get_processor(kvm, attr);
1679 		break;
1680 	case KVM_S390_VM_CPU_MACHINE:
1681 		ret = kvm_s390_get_machine(kvm, attr);
1682 		break;
1683 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1684 		ret = kvm_s390_get_processor_feat(kvm, attr);
1685 		break;
1686 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1687 		ret = kvm_s390_get_machine_feat(kvm, attr);
1688 		break;
1689 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1690 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1691 		break;
1692 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1693 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1694 		break;
1695 	}
1696 	return ret;
1697 }
1698 
1699 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1700 {
1701 	int ret;
1702 
1703 	switch (attr->group) {
1704 	case KVM_S390_VM_MEM_CTRL:
1705 		ret = kvm_s390_set_mem_control(kvm, attr);
1706 		break;
1707 	case KVM_S390_VM_TOD:
1708 		ret = kvm_s390_set_tod(kvm, attr);
1709 		break;
1710 	case KVM_S390_VM_CPU_MODEL:
1711 		ret = kvm_s390_set_cpu_model(kvm, attr);
1712 		break;
1713 	case KVM_S390_VM_CRYPTO:
1714 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1715 		break;
1716 	case KVM_S390_VM_MIGRATION:
1717 		ret = kvm_s390_vm_set_migration(kvm, attr);
1718 		break;
1719 	default:
1720 		ret = -ENXIO;
1721 		break;
1722 	}
1723 
1724 	return ret;
1725 }
1726 
1727 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1728 {
1729 	int ret;
1730 
1731 	switch (attr->group) {
1732 	case KVM_S390_VM_MEM_CTRL:
1733 		ret = kvm_s390_get_mem_control(kvm, attr);
1734 		break;
1735 	case KVM_S390_VM_TOD:
1736 		ret = kvm_s390_get_tod(kvm, attr);
1737 		break;
1738 	case KVM_S390_VM_CPU_MODEL:
1739 		ret = kvm_s390_get_cpu_model(kvm, attr);
1740 		break;
1741 	case KVM_S390_VM_MIGRATION:
1742 		ret = kvm_s390_vm_get_migration(kvm, attr);
1743 		break;
1744 	default:
1745 		ret = -ENXIO;
1746 		break;
1747 	}
1748 
1749 	return ret;
1750 }
1751 
1752 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1753 {
1754 	int ret;
1755 
1756 	switch (attr->group) {
1757 	case KVM_S390_VM_MEM_CTRL:
1758 		switch (attr->attr) {
1759 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1760 		case KVM_S390_VM_MEM_CLR_CMMA:
1761 			ret = sclp.has_cmma ? 0 : -ENXIO;
1762 			break;
1763 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1764 			ret = 0;
1765 			break;
1766 		default:
1767 			ret = -ENXIO;
1768 			break;
1769 		}
1770 		break;
1771 	case KVM_S390_VM_TOD:
1772 		switch (attr->attr) {
1773 		case KVM_S390_VM_TOD_LOW:
1774 		case KVM_S390_VM_TOD_HIGH:
1775 			ret = 0;
1776 			break;
1777 		default:
1778 			ret = -ENXIO;
1779 			break;
1780 		}
1781 		break;
1782 	case KVM_S390_VM_CPU_MODEL:
1783 		switch (attr->attr) {
1784 		case KVM_S390_VM_CPU_PROCESSOR:
1785 		case KVM_S390_VM_CPU_MACHINE:
1786 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1787 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1788 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1789 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1790 			ret = 0;
1791 			break;
1792 		default:
1793 			ret = -ENXIO;
1794 			break;
1795 		}
1796 		break;
1797 	case KVM_S390_VM_CRYPTO:
1798 		switch (attr->attr) {
1799 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1800 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1801 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1802 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1803 			ret = 0;
1804 			break;
1805 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1806 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1807 			ret = ap_instructions_available() ? 0 : -ENXIO;
1808 			break;
1809 		default:
1810 			ret = -ENXIO;
1811 			break;
1812 		}
1813 		break;
1814 	case KVM_S390_VM_MIGRATION:
1815 		ret = 0;
1816 		break;
1817 	default:
1818 		ret = -ENXIO;
1819 		break;
1820 	}
1821 
1822 	return ret;
1823 }
1824 
1825 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1826 {
1827 	uint8_t *keys;
1828 	uint64_t hva;
1829 	int srcu_idx, i, r = 0;
1830 
1831 	if (args->flags != 0)
1832 		return -EINVAL;
1833 
1834 	/* Is this guest using storage keys? */
1835 	if (!mm_uses_skeys(current->mm))
1836 		return KVM_S390_GET_SKEYS_NONE;
1837 
1838 	/* Enforce sane limit on memory allocation */
1839 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1840 		return -EINVAL;
1841 
1842 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1843 	if (!keys)
1844 		return -ENOMEM;
1845 
1846 	mmap_read_lock(current->mm);
1847 	srcu_idx = srcu_read_lock(&kvm->srcu);
1848 	for (i = 0; i < args->count; i++) {
1849 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1850 		if (kvm_is_error_hva(hva)) {
1851 			r = -EFAULT;
1852 			break;
1853 		}
1854 
1855 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1856 		if (r)
1857 			break;
1858 	}
1859 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1860 	mmap_read_unlock(current->mm);
1861 
1862 	if (!r) {
1863 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1864 				 sizeof(uint8_t) * args->count);
1865 		if (r)
1866 			r = -EFAULT;
1867 	}
1868 
1869 	kvfree(keys);
1870 	return r;
1871 }
1872 
1873 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1874 {
1875 	uint8_t *keys;
1876 	uint64_t hva;
1877 	int srcu_idx, i, r = 0;
1878 	bool unlocked;
1879 
1880 	if (args->flags != 0)
1881 		return -EINVAL;
1882 
1883 	/* Enforce sane limit on memory allocation */
1884 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1885 		return -EINVAL;
1886 
1887 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1888 	if (!keys)
1889 		return -ENOMEM;
1890 
1891 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1892 			   sizeof(uint8_t) * args->count);
1893 	if (r) {
1894 		r = -EFAULT;
1895 		goto out;
1896 	}
1897 
1898 	/* Enable storage key handling for the guest */
1899 	r = s390_enable_skey();
1900 	if (r)
1901 		goto out;
1902 
1903 	i = 0;
1904 	mmap_read_lock(current->mm);
1905 	srcu_idx = srcu_read_lock(&kvm->srcu);
1906         while (i < args->count) {
1907 		unlocked = false;
1908 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1909 		if (kvm_is_error_hva(hva)) {
1910 			r = -EFAULT;
1911 			break;
1912 		}
1913 
1914 		/* Lowest order bit is reserved */
1915 		if (keys[i] & 0x01) {
1916 			r = -EINVAL;
1917 			break;
1918 		}
1919 
1920 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1921 		if (r) {
1922 			r = fixup_user_fault(current->mm, hva,
1923 					     FAULT_FLAG_WRITE, &unlocked);
1924 			if (r)
1925 				break;
1926 		}
1927 		if (!r)
1928 			i++;
1929 	}
1930 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1931 	mmap_read_unlock(current->mm);
1932 out:
1933 	kvfree(keys);
1934 	return r;
1935 }
1936 
1937 /*
1938  * Base address and length must be sent at the start of each block, therefore
1939  * it's cheaper to send some clean data, as long as it's less than the size of
1940  * two longs.
1941  */
1942 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1943 /* for consistency */
1944 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1945 
1946 /*
1947  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1948  * address falls in a hole. In that case the index of one of the memslots
1949  * bordering the hole is returned.
1950  */
1951 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1952 {
1953 	int start = 0, end = slots->used_slots;
1954 	int slot = atomic_read(&slots->last_used_slot);
1955 	struct kvm_memory_slot *memslots = slots->memslots;
1956 
1957 	if (gfn >= memslots[slot].base_gfn &&
1958 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1959 		return slot;
1960 
1961 	while (start < end) {
1962 		slot = start + (end - start) / 2;
1963 
1964 		if (gfn >= memslots[slot].base_gfn)
1965 			end = slot;
1966 		else
1967 			start = slot + 1;
1968 	}
1969 
1970 	if (start >= slots->used_slots)
1971 		return slots->used_slots - 1;
1972 
1973 	if (gfn >= memslots[start].base_gfn &&
1974 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1975 		atomic_set(&slots->last_used_slot, start);
1976 	}
1977 
1978 	return start;
1979 }
1980 
1981 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1982 			      u8 *res, unsigned long bufsize)
1983 {
1984 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1985 
1986 	args->count = 0;
1987 	while (args->count < bufsize) {
1988 		hva = gfn_to_hva(kvm, cur_gfn);
1989 		/*
1990 		 * We return an error if the first value was invalid, but we
1991 		 * return successfully if at least one value was copied.
1992 		 */
1993 		if (kvm_is_error_hva(hva))
1994 			return args->count ? 0 : -EFAULT;
1995 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1996 			pgstev = 0;
1997 		res[args->count++] = (pgstev >> 24) & 0x43;
1998 		cur_gfn++;
1999 	}
2000 
2001 	return 0;
2002 }
2003 
2004 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2005 					      unsigned long cur_gfn)
2006 {
2007 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2008 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
2009 	unsigned long ofs = cur_gfn - ms->base_gfn;
2010 
2011 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2012 		slotidx--;
2013 		/* If we are above the highest slot, wrap around */
2014 		if (slotidx < 0)
2015 			slotidx = slots->used_slots - 1;
2016 
2017 		ms = slots->memslots + slotidx;
2018 		ofs = 0;
2019 	}
2020 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2021 	while ((slotidx > 0) && (ofs >= ms->npages)) {
2022 		slotidx--;
2023 		ms = slots->memslots + slotidx;
2024 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2025 	}
2026 	return ms->base_gfn + ofs;
2027 }
2028 
2029 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2030 			     u8 *res, unsigned long bufsize)
2031 {
2032 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2033 	struct kvm_memslots *slots = kvm_memslots(kvm);
2034 	struct kvm_memory_slot *ms;
2035 
2036 	if (unlikely(!slots->used_slots))
2037 		return 0;
2038 
2039 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2040 	ms = gfn_to_memslot(kvm, cur_gfn);
2041 	args->count = 0;
2042 	args->start_gfn = cur_gfn;
2043 	if (!ms)
2044 		return 0;
2045 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2046 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2047 
2048 	while (args->count < bufsize) {
2049 		hva = gfn_to_hva(kvm, cur_gfn);
2050 		if (kvm_is_error_hva(hva))
2051 			return 0;
2052 		/* Decrement only if we actually flipped the bit to 0 */
2053 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2054 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2055 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2056 			pgstev = 0;
2057 		/* Save the value */
2058 		res[args->count++] = (pgstev >> 24) & 0x43;
2059 		/* If the next bit is too far away, stop. */
2060 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2061 			return 0;
2062 		/* If we reached the previous "next", find the next one */
2063 		if (cur_gfn == next_gfn)
2064 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2065 		/* Reached the end of memory or of the buffer, stop */
2066 		if ((next_gfn >= mem_end) ||
2067 		    (next_gfn - args->start_gfn >= bufsize))
2068 			return 0;
2069 		cur_gfn++;
2070 		/* Reached the end of the current memslot, take the next one. */
2071 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2072 			ms = gfn_to_memslot(kvm, cur_gfn);
2073 			if (!ms)
2074 				return 0;
2075 		}
2076 	}
2077 	return 0;
2078 }
2079 
2080 /*
2081  * This function searches for the next page with dirty CMMA attributes, and
2082  * saves the attributes in the buffer up to either the end of the buffer or
2083  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2084  * no trailing clean bytes are saved.
2085  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2086  * output buffer will indicate 0 as length.
2087  */
2088 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2089 				  struct kvm_s390_cmma_log *args)
2090 {
2091 	unsigned long bufsize;
2092 	int srcu_idx, peek, ret;
2093 	u8 *values;
2094 
2095 	if (!kvm->arch.use_cmma)
2096 		return -ENXIO;
2097 	/* Invalid/unsupported flags were specified */
2098 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2099 		return -EINVAL;
2100 	/* Migration mode query, and we are not doing a migration */
2101 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2102 	if (!peek && !kvm->arch.migration_mode)
2103 		return -EINVAL;
2104 	/* CMMA is disabled or was not used, or the buffer has length zero */
2105 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2106 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2107 		memset(args, 0, sizeof(*args));
2108 		return 0;
2109 	}
2110 	/* We are not peeking, and there are no dirty pages */
2111 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2112 		memset(args, 0, sizeof(*args));
2113 		return 0;
2114 	}
2115 
2116 	values = vmalloc(bufsize);
2117 	if (!values)
2118 		return -ENOMEM;
2119 
2120 	mmap_read_lock(kvm->mm);
2121 	srcu_idx = srcu_read_lock(&kvm->srcu);
2122 	if (peek)
2123 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2124 	else
2125 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2126 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2127 	mmap_read_unlock(kvm->mm);
2128 
2129 	if (kvm->arch.migration_mode)
2130 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2131 	else
2132 		args->remaining = 0;
2133 
2134 	if (copy_to_user((void __user *)args->values, values, args->count))
2135 		ret = -EFAULT;
2136 
2137 	vfree(values);
2138 	return ret;
2139 }
2140 
2141 /*
2142  * This function sets the CMMA attributes for the given pages. If the input
2143  * buffer has zero length, no action is taken, otherwise the attributes are
2144  * set and the mm->context.uses_cmm flag is set.
2145  */
2146 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2147 				  const struct kvm_s390_cmma_log *args)
2148 {
2149 	unsigned long hva, mask, pgstev, i;
2150 	uint8_t *bits;
2151 	int srcu_idx, r = 0;
2152 
2153 	mask = args->mask;
2154 
2155 	if (!kvm->arch.use_cmma)
2156 		return -ENXIO;
2157 	/* invalid/unsupported flags */
2158 	if (args->flags != 0)
2159 		return -EINVAL;
2160 	/* Enforce sane limit on memory allocation */
2161 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2162 		return -EINVAL;
2163 	/* Nothing to do */
2164 	if (args->count == 0)
2165 		return 0;
2166 
2167 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2168 	if (!bits)
2169 		return -ENOMEM;
2170 
2171 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2172 	if (r) {
2173 		r = -EFAULT;
2174 		goto out;
2175 	}
2176 
2177 	mmap_read_lock(kvm->mm);
2178 	srcu_idx = srcu_read_lock(&kvm->srcu);
2179 	for (i = 0; i < args->count; i++) {
2180 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2181 		if (kvm_is_error_hva(hva)) {
2182 			r = -EFAULT;
2183 			break;
2184 		}
2185 
2186 		pgstev = bits[i];
2187 		pgstev = pgstev << 24;
2188 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2189 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2190 	}
2191 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2192 	mmap_read_unlock(kvm->mm);
2193 
2194 	if (!kvm->mm->context.uses_cmm) {
2195 		mmap_write_lock(kvm->mm);
2196 		kvm->mm->context.uses_cmm = 1;
2197 		mmap_write_unlock(kvm->mm);
2198 	}
2199 out:
2200 	vfree(bits);
2201 	return r;
2202 }
2203 
2204 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2205 {
2206 	struct kvm_vcpu *vcpu;
2207 	u16 rc, rrc;
2208 	int ret = 0;
2209 	int i;
2210 
2211 	/*
2212 	 * We ignore failures and try to destroy as many CPUs as possible.
2213 	 * At the same time we must not free the assigned resources when
2214 	 * this fails, as the ultravisor has still access to that memory.
2215 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2216 	 * behind.
2217 	 * We want to return the first failure rc and rrc, though.
2218 	 */
2219 	kvm_for_each_vcpu(i, vcpu, kvm) {
2220 		mutex_lock(&vcpu->mutex);
2221 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2222 			*rcp = rc;
2223 			*rrcp = rrc;
2224 			ret = -EIO;
2225 		}
2226 		mutex_unlock(&vcpu->mutex);
2227 	}
2228 	return ret;
2229 }
2230 
2231 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2232 {
2233 	int i, r = 0;
2234 	u16 dummy;
2235 
2236 	struct kvm_vcpu *vcpu;
2237 
2238 	kvm_for_each_vcpu(i, vcpu, kvm) {
2239 		mutex_lock(&vcpu->mutex);
2240 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2241 		mutex_unlock(&vcpu->mutex);
2242 		if (r)
2243 			break;
2244 	}
2245 	if (r)
2246 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2247 	return r;
2248 }
2249 
2250 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2251 {
2252 	int r = 0;
2253 	u16 dummy;
2254 	void __user *argp = (void __user *)cmd->data;
2255 
2256 	switch (cmd->cmd) {
2257 	case KVM_PV_ENABLE: {
2258 		r = -EINVAL;
2259 		if (kvm_s390_pv_is_protected(kvm))
2260 			break;
2261 
2262 		/*
2263 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2264 		 *  esca, we need no cleanup in the error cases below
2265 		 */
2266 		r = sca_switch_to_extended(kvm);
2267 		if (r)
2268 			break;
2269 
2270 		mmap_write_lock(current->mm);
2271 		r = gmap_mark_unmergeable();
2272 		mmap_write_unlock(current->mm);
2273 		if (r)
2274 			break;
2275 
2276 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2277 		if (r)
2278 			break;
2279 
2280 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2281 		if (r)
2282 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2283 
2284 		/* we need to block service interrupts from now on */
2285 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2286 		break;
2287 	}
2288 	case KVM_PV_DISABLE: {
2289 		r = -EINVAL;
2290 		if (!kvm_s390_pv_is_protected(kvm))
2291 			break;
2292 
2293 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2294 		/*
2295 		 * If a CPU could not be destroyed, destroy VM will also fail.
2296 		 * There is no point in trying to destroy it. Instead return
2297 		 * the rc and rrc from the first CPU that failed destroying.
2298 		 */
2299 		if (r)
2300 			break;
2301 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2302 
2303 		/* no need to block service interrupts any more */
2304 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2305 		break;
2306 	}
2307 	case KVM_PV_SET_SEC_PARMS: {
2308 		struct kvm_s390_pv_sec_parm parms = {};
2309 		void *hdr;
2310 
2311 		r = -EINVAL;
2312 		if (!kvm_s390_pv_is_protected(kvm))
2313 			break;
2314 
2315 		r = -EFAULT;
2316 		if (copy_from_user(&parms, argp, sizeof(parms)))
2317 			break;
2318 
2319 		/* Currently restricted to 8KB */
2320 		r = -EINVAL;
2321 		if (parms.length > PAGE_SIZE * 2)
2322 			break;
2323 
2324 		r = -ENOMEM;
2325 		hdr = vmalloc(parms.length);
2326 		if (!hdr)
2327 			break;
2328 
2329 		r = -EFAULT;
2330 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2331 				    parms.length))
2332 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2333 						      &cmd->rc, &cmd->rrc);
2334 
2335 		vfree(hdr);
2336 		break;
2337 	}
2338 	case KVM_PV_UNPACK: {
2339 		struct kvm_s390_pv_unp unp = {};
2340 
2341 		r = -EINVAL;
2342 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2343 			break;
2344 
2345 		r = -EFAULT;
2346 		if (copy_from_user(&unp, argp, sizeof(unp)))
2347 			break;
2348 
2349 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2350 				       &cmd->rc, &cmd->rrc);
2351 		break;
2352 	}
2353 	case KVM_PV_VERIFY: {
2354 		r = -EINVAL;
2355 		if (!kvm_s390_pv_is_protected(kvm))
2356 			break;
2357 
2358 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2359 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2360 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2361 			     cmd->rrc);
2362 		break;
2363 	}
2364 	case KVM_PV_PREP_RESET: {
2365 		r = -EINVAL;
2366 		if (!kvm_s390_pv_is_protected(kvm))
2367 			break;
2368 
2369 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2370 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2371 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2372 			     cmd->rc, cmd->rrc);
2373 		break;
2374 	}
2375 	case KVM_PV_UNSHARE_ALL: {
2376 		r = -EINVAL;
2377 		if (!kvm_s390_pv_is_protected(kvm))
2378 			break;
2379 
2380 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2381 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2382 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2383 			     cmd->rc, cmd->rrc);
2384 		break;
2385 	}
2386 	default:
2387 		r = -ENOTTY;
2388 	}
2389 	return r;
2390 }
2391 
2392 long kvm_arch_vm_ioctl(struct file *filp,
2393 		       unsigned int ioctl, unsigned long arg)
2394 {
2395 	struct kvm *kvm = filp->private_data;
2396 	void __user *argp = (void __user *)arg;
2397 	struct kvm_device_attr attr;
2398 	int r;
2399 
2400 	switch (ioctl) {
2401 	case KVM_S390_INTERRUPT: {
2402 		struct kvm_s390_interrupt s390int;
2403 
2404 		r = -EFAULT;
2405 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2406 			break;
2407 		r = kvm_s390_inject_vm(kvm, &s390int);
2408 		break;
2409 	}
2410 	case KVM_CREATE_IRQCHIP: {
2411 		struct kvm_irq_routing_entry routing;
2412 
2413 		r = -EINVAL;
2414 		if (kvm->arch.use_irqchip) {
2415 			/* Set up dummy routing. */
2416 			memset(&routing, 0, sizeof(routing));
2417 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2418 		}
2419 		break;
2420 	}
2421 	case KVM_SET_DEVICE_ATTR: {
2422 		r = -EFAULT;
2423 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2424 			break;
2425 		r = kvm_s390_vm_set_attr(kvm, &attr);
2426 		break;
2427 	}
2428 	case KVM_GET_DEVICE_ATTR: {
2429 		r = -EFAULT;
2430 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2431 			break;
2432 		r = kvm_s390_vm_get_attr(kvm, &attr);
2433 		break;
2434 	}
2435 	case KVM_HAS_DEVICE_ATTR: {
2436 		r = -EFAULT;
2437 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2438 			break;
2439 		r = kvm_s390_vm_has_attr(kvm, &attr);
2440 		break;
2441 	}
2442 	case KVM_S390_GET_SKEYS: {
2443 		struct kvm_s390_skeys args;
2444 
2445 		r = -EFAULT;
2446 		if (copy_from_user(&args, argp,
2447 				   sizeof(struct kvm_s390_skeys)))
2448 			break;
2449 		r = kvm_s390_get_skeys(kvm, &args);
2450 		break;
2451 	}
2452 	case KVM_S390_SET_SKEYS: {
2453 		struct kvm_s390_skeys args;
2454 
2455 		r = -EFAULT;
2456 		if (copy_from_user(&args, argp,
2457 				   sizeof(struct kvm_s390_skeys)))
2458 			break;
2459 		r = kvm_s390_set_skeys(kvm, &args);
2460 		break;
2461 	}
2462 	case KVM_S390_GET_CMMA_BITS: {
2463 		struct kvm_s390_cmma_log args;
2464 
2465 		r = -EFAULT;
2466 		if (copy_from_user(&args, argp, sizeof(args)))
2467 			break;
2468 		mutex_lock(&kvm->slots_lock);
2469 		r = kvm_s390_get_cmma_bits(kvm, &args);
2470 		mutex_unlock(&kvm->slots_lock);
2471 		if (!r) {
2472 			r = copy_to_user(argp, &args, sizeof(args));
2473 			if (r)
2474 				r = -EFAULT;
2475 		}
2476 		break;
2477 	}
2478 	case KVM_S390_SET_CMMA_BITS: {
2479 		struct kvm_s390_cmma_log args;
2480 
2481 		r = -EFAULT;
2482 		if (copy_from_user(&args, argp, sizeof(args)))
2483 			break;
2484 		mutex_lock(&kvm->slots_lock);
2485 		r = kvm_s390_set_cmma_bits(kvm, &args);
2486 		mutex_unlock(&kvm->slots_lock);
2487 		break;
2488 	}
2489 	case KVM_S390_PV_COMMAND: {
2490 		struct kvm_pv_cmd args;
2491 
2492 		/* protvirt means user cpu state */
2493 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2494 		r = 0;
2495 		if (!is_prot_virt_host()) {
2496 			r = -EINVAL;
2497 			break;
2498 		}
2499 		if (copy_from_user(&args, argp, sizeof(args))) {
2500 			r = -EFAULT;
2501 			break;
2502 		}
2503 		if (args.flags) {
2504 			r = -EINVAL;
2505 			break;
2506 		}
2507 		mutex_lock(&kvm->lock);
2508 		r = kvm_s390_handle_pv(kvm, &args);
2509 		mutex_unlock(&kvm->lock);
2510 		if (copy_to_user(argp, &args, sizeof(args))) {
2511 			r = -EFAULT;
2512 			break;
2513 		}
2514 		break;
2515 	}
2516 	default:
2517 		r = -ENOTTY;
2518 	}
2519 
2520 	return r;
2521 }
2522 
2523 static int kvm_s390_apxa_installed(void)
2524 {
2525 	struct ap_config_info info;
2526 
2527 	if (ap_instructions_available()) {
2528 		if (ap_qci(&info) == 0)
2529 			return info.apxa;
2530 	}
2531 
2532 	return 0;
2533 }
2534 
2535 /*
2536  * The format of the crypto control block (CRYCB) is specified in the 3 low
2537  * order bits of the CRYCB designation (CRYCBD) field as follows:
2538  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2539  *	     AP extended addressing (APXA) facility are installed.
2540  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2541  * Format 2: Both the APXA and MSAX3 facilities are installed
2542  */
2543 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2544 {
2545 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2546 
2547 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2548 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2549 
2550 	/* Check whether MSAX3 is installed */
2551 	if (!test_kvm_facility(kvm, 76))
2552 		return;
2553 
2554 	if (kvm_s390_apxa_installed())
2555 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2556 	else
2557 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2558 }
2559 
2560 /*
2561  * kvm_arch_crypto_set_masks
2562  *
2563  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2564  *	 to be set.
2565  * @apm: the mask identifying the accessible AP adapters
2566  * @aqm: the mask identifying the accessible AP domains
2567  * @adm: the mask identifying the accessible AP control domains
2568  *
2569  * Set the masks that identify the adapters, domains and control domains to
2570  * which the KVM guest is granted access.
2571  *
2572  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2573  *	 function.
2574  */
2575 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2576 			       unsigned long *aqm, unsigned long *adm)
2577 {
2578 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2579 
2580 	kvm_s390_vcpu_block_all(kvm);
2581 
2582 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2583 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2584 		memcpy(crycb->apcb1.apm, apm, 32);
2585 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2586 			 apm[0], apm[1], apm[2], apm[3]);
2587 		memcpy(crycb->apcb1.aqm, aqm, 32);
2588 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2589 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2590 		memcpy(crycb->apcb1.adm, adm, 32);
2591 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2592 			 adm[0], adm[1], adm[2], adm[3]);
2593 		break;
2594 	case CRYCB_FORMAT1:
2595 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2596 		memcpy(crycb->apcb0.apm, apm, 8);
2597 		memcpy(crycb->apcb0.aqm, aqm, 2);
2598 		memcpy(crycb->apcb0.adm, adm, 2);
2599 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2600 			 apm[0], *((unsigned short *)aqm),
2601 			 *((unsigned short *)adm));
2602 		break;
2603 	default:	/* Can not happen */
2604 		break;
2605 	}
2606 
2607 	/* recreate the shadow crycb for each vcpu */
2608 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2609 	kvm_s390_vcpu_unblock_all(kvm);
2610 }
2611 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2612 
2613 /*
2614  * kvm_arch_crypto_clear_masks
2615  *
2616  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2617  *	 to be cleared.
2618  *
2619  * Clear the masks that identify the adapters, domains and control domains to
2620  * which the KVM guest is granted access.
2621  *
2622  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2623  *	 function.
2624  */
2625 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2626 {
2627 	kvm_s390_vcpu_block_all(kvm);
2628 
2629 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2630 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2631 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2632 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2633 
2634 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2635 	/* recreate the shadow crycb for each vcpu */
2636 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2637 	kvm_s390_vcpu_unblock_all(kvm);
2638 }
2639 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2640 
2641 static u64 kvm_s390_get_initial_cpuid(void)
2642 {
2643 	struct cpuid cpuid;
2644 
2645 	get_cpu_id(&cpuid);
2646 	cpuid.version = 0xff;
2647 	return *((u64 *) &cpuid);
2648 }
2649 
2650 static void kvm_s390_crypto_init(struct kvm *kvm)
2651 {
2652 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2653 	kvm_s390_set_crycb_format(kvm);
2654 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2655 
2656 	if (!test_kvm_facility(kvm, 76))
2657 		return;
2658 
2659 	/* Enable AES/DEA protected key functions by default */
2660 	kvm->arch.crypto.aes_kw = 1;
2661 	kvm->arch.crypto.dea_kw = 1;
2662 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2663 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2664 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2665 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2666 }
2667 
2668 static void sca_dispose(struct kvm *kvm)
2669 {
2670 	if (kvm->arch.use_esca)
2671 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2672 	else
2673 		free_page((unsigned long)(kvm->arch.sca));
2674 	kvm->arch.sca = NULL;
2675 }
2676 
2677 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2678 {
2679 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2680 	int i, rc;
2681 	char debug_name[16];
2682 	static unsigned long sca_offset;
2683 
2684 	rc = -EINVAL;
2685 #ifdef CONFIG_KVM_S390_UCONTROL
2686 	if (type & ~KVM_VM_S390_UCONTROL)
2687 		goto out_err;
2688 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2689 		goto out_err;
2690 #else
2691 	if (type)
2692 		goto out_err;
2693 #endif
2694 
2695 	rc = s390_enable_sie();
2696 	if (rc)
2697 		goto out_err;
2698 
2699 	rc = -ENOMEM;
2700 
2701 	if (!sclp.has_64bscao)
2702 		alloc_flags |= GFP_DMA;
2703 	rwlock_init(&kvm->arch.sca_lock);
2704 	/* start with basic SCA */
2705 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2706 	if (!kvm->arch.sca)
2707 		goto out_err;
2708 	mutex_lock(&kvm_lock);
2709 	sca_offset += 16;
2710 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2711 		sca_offset = 0;
2712 	kvm->arch.sca = (struct bsca_block *)
2713 			((char *) kvm->arch.sca + sca_offset);
2714 	mutex_unlock(&kvm_lock);
2715 
2716 	sprintf(debug_name, "kvm-%u", current->pid);
2717 
2718 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2719 	if (!kvm->arch.dbf)
2720 		goto out_err;
2721 
2722 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2723 	kvm->arch.sie_page2 =
2724 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2725 	if (!kvm->arch.sie_page2)
2726 		goto out_err;
2727 
2728 	kvm->arch.sie_page2->kvm = kvm;
2729 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2730 
2731 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2732 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2733 					      (kvm_s390_fac_base[i] |
2734 					       kvm_s390_fac_ext[i]);
2735 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2736 					      kvm_s390_fac_base[i];
2737 	}
2738 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2739 
2740 	/* we are always in czam mode - even on pre z14 machines */
2741 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2742 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2743 	/* we emulate STHYI in kvm */
2744 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2745 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2746 	if (MACHINE_HAS_TLB_GUEST) {
2747 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2748 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2749 	}
2750 
2751 	if (css_general_characteristics.aiv && test_facility(65))
2752 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2753 
2754 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2755 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2756 
2757 	kvm_s390_crypto_init(kvm);
2758 
2759 	mutex_init(&kvm->arch.float_int.ais_lock);
2760 	spin_lock_init(&kvm->arch.float_int.lock);
2761 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2762 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2763 	init_waitqueue_head(&kvm->arch.ipte_wq);
2764 	mutex_init(&kvm->arch.ipte_mutex);
2765 
2766 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2767 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2768 
2769 	if (type & KVM_VM_S390_UCONTROL) {
2770 		kvm->arch.gmap = NULL;
2771 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2772 	} else {
2773 		if (sclp.hamax == U64_MAX)
2774 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2775 		else
2776 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2777 						    sclp.hamax + 1);
2778 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2779 		if (!kvm->arch.gmap)
2780 			goto out_err;
2781 		kvm->arch.gmap->private = kvm;
2782 		kvm->arch.gmap->pfault_enabled = 0;
2783 	}
2784 
2785 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2786 	kvm->arch.use_skf = sclp.has_skey;
2787 	spin_lock_init(&kvm->arch.start_stop_lock);
2788 	kvm_s390_vsie_init(kvm);
2789 	if (use_gisa)
2790 		kvm_s390_gisa_init(kvm);
2791 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2792 
2793 	return 0;
2794 out_err:
2795 	free_page((unsigned long)kvm->arch.sie_page2);
2796 	debug_unregister(kvm->arch.dbf);
2797 	sca_dispose(kvm);
2798 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2799 	return rc;
2800 }
2801 
2802 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2803 {
2804 	u16 rc, rrc;
2805 
2806 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2807 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2808 	kvm_s390_clear_local_irqs(vcpu);
2809 	kvm_clear_async_pf_completion_queue(vcpu);
2810 	if (!kvm_is_ucontrol(vcpu->kvm))
2811 		sca_del_vcpu(vcpu);
2812 
2813 	if (kvm_is_ucontrol(vcpu->kvm))
2814 		gmap_remove(vcpu->arch.gmap);
2815 
2816 	if (vcpu->kvm->arch.use_cmma)
2817 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2818 	/* We can not hold the vcpu mutex here, we are already dying */
2819 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2820 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2821 	free_page((unsigned long)(vcpu->arch.sie_block));
2822 }
2823 
2824 static void kvm_free_vcpus(struct kvm *kvm)
2825 {
2826 	unsigned int i;
2827 	struct kvm_vcpu *vcpu;
2828 
2829 	kvm_for_each_vcpu(i, vcpu, kvm)
2830 		kvm_vcpu_destroy(vcpu);
2831 
2832 	mutex_lock(&kvm->lock);
2833 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2834 		kvm->vcpus[i] = NULL;
2835 
2836 	atomic_set(&kvm->online_vcpus, 0);
2837 	mutex_unlock(&kvm->lock);
2838 }
2839 
2840 void kvm_arch_destroy_vm(struct kvm *kvm)
2841 {
2842 	u16 rc, rrc;
2843 
2844 	kvm_free_vcpus(kvm);
2845 	sca_dispose(kvm);
2846 	kvm_s390_gisa_destroy(kvm);
2847 	/*
2848 	 * We are already at the end of life and kvm->lock is not taken.
2849 	 * This is ok as the file descriptor is closed by now and nobody
2850 	 * can mess with the pv state. To avoid lockdep_assert_held from
2851 	 * complaining we do not use kvm_s390_pv_is_protected.
2852 	 */
2853 	if (kvm_s390_pv_get_handle(kvm))
2854 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2855 	debug_unregister(kvm->arch.dbf);
2856 	free_page((unsigned long)kvm->arch.sie_page2);
2857 	if (!kvm_is_ucontrol(kvm))
2858 		gmap_remove(kvm->arch.gmap);
2859 	kvm_s390_destroy_adapters(kvm);
2860 	kvm_s390_clear_float_irqs(kvm);
2861 	kvm_s390_vsie_destroy(kvm);
2862 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2863 }
2864 
2865 /* Section: vcpu related */
2866 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2867 {
2868 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2869 	if (!vcpu->arch.gmap)
2870 		return -ENOMEM;
2871 	vcpu->arch.gmap->private = vcpu->kvm;
2872 
2873 	return 0;
2874 }
2875 
2876 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2877 {
2878 	if (!kvm_s390_use_sca_entries())
2879 		return;
2880 	read_lock(&vcpu->kvm->arch.sca_lock);
2881 	if (vcpu->kvm->arch.use_esca) {
2882 		struct esca_block *sca = vcpu->kvm->arch.sca;
2883 
2884 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2885 		sca->cpu[vcpu->vcpu_id].sda = 0;
2886 	} else {
2887 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2888 
2889 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2890 		sca->cpu[vcpu->vcpu_id].sda = 0;
2891 	}
2892 	read_unlock(&vcpu->kvm->arch.sca_lock);
2893 }
2894 
2895 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2896 {
2897 	if (!kvm_s390_use_sca_entries()) {
2898 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2899 
2900 		/* we still need the basic sca for the ipte control */
2901 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2902 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2903 		return;
2904 	}
2905 	read_lock(&vcpu->kvm->arch.sca_lock);
2906 	if (vcpu->kvm->arch.use_esca) {
2907 		struct esca_block *sca = vcpu->kvm->arch.sca;
2908 
2909 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2910 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2911 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2912 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2913 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2914 	} else {
2915 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2916 
2917 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2918 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2919 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2920 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2921 	}
2922 	read_unlock(&vcpu->kvm->arch.sca_lock);
2923 }
2924 
2925 /* Basic SCA to Extended SCA data copy routines */
2926 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2927 {
2928 	d->sda = s->sda;
2929 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2930 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2931 }
2932 
2933 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2934 {
2935 	int i;
2936 
2937 	d->ipte_control = s->ipte_control;
2938 	d->mcn[0] = s->mcn;
2939 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2940 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2941 }
2942 
2943 static int sca_switch_to_extended(struct kvm *kvm)
2944 {
2945 	struct bsca_block *old_sca = kvm->arch.sca;
2946 	struct esca_block *new_sca;
2947 	struct kvm_vcpu *vcpu;
2948 	unsigned int vcpu_idx;
2949 	u32 scaol, scaoh;
2950 
2951 	if (kvm->arch.use_esca)
2952 		return 0;
2953 
2954 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2955 	if (!new_sca)
2956 		return -ENOMEM;
2957 
2958 	scaoh = (u32)((u64)(new_sca) >> 32);
2959 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2960 
2961 	kvm_s390_vcpu_block_all(kvm);
2962 	write_lock(&kvm->arch.sca_lock);
2963 
2964 	sca_copy_b_to_e(new_sca, old_sca);
2965 
2966 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2967 		vcpu->arch.sie_block->scaoh = scaoh;
2968 		vcpu->arch.sie_block->scaol = scaol;
2969 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2970 	}
2971 	kvm->arch.sca = new_sca;
2972 	kvm->arch.use_esca = 1;
2973 
2974 	write_unlock(&kvm->arch.sca_lock);
2975 	kvm_s390_vcpu_unblock_all(kvm);
2976 
2977 	free_page((unsigned long)old_sca);
2978 
2979 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2980 		 old_sca, kvm->arch.sca);
2981 	return 0;
2982 }
2983 
2984 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2985 {
2986 	int rc;
2987 
2988 	if (!kvm_s390_use_sca_entries()) {
2989 		if (id < KVM_MAX_VCPUS)
2990 			return true;
2991 		return false;
2992 	}
2993 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2994 		return true;
2995 	if (!sclp.has_esca || !sclp.has_64bscao)
2996 		return false;
2997 
2998 	mutex_lock(&kvm->lock);
2999 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3000 	mutex_unlock(&kvm->lock);
3001 
3002 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3003 }
3004 
3005 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3006 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3007 {
3008 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3009 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3010 	vcpu->arch.cputm_start = get_tod_clock_fast();
3011 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3012 }
3013 
3014 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3015 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3016 {
3017 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3018 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3019 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3020 	vcpu->arch.cputm_start = 0;
3021 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3022 }
3023 
3024 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3025 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3026 {
3027 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3028 	vcpu->arch.cputm_enabled = true;
3029 	__start_cpu_timer_accounting(vcpu);
3030 }
3031 
3032 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3033 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3034 {
3035 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3036 	__stop_cpu_timer_accounting(vcpu);
3037 	vcpu->arch.cputm_enabled = false;
3038 }
3039 
3040 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3041 {
3042 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3043 	__enable_cpu_timer_accounting(vcpu);
3044 	preempt_enable();
3045 }
3046 
3047 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3048 {
3049 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3050 	__disable_cpu_timer_accounting(vcpu);
3051 	preempt_enable();
3052 }
3053 
3054 /* set the cpu timer - may only be called from the VCPU thread itself */
3055 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3056 {
3057 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3058 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3059 	if (vcpu->arch.cputm_enabled)
3060 		vcpu->arch.cputm_start = get_tod_clock_fast();
3061 	vcpu->arch.sie_block->cputm = cputm;
3062 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3063 	preempt_enable();
3064 }
3065 
3066 /* update and get the cpu timer - can also be called from other VCPU threads */
3067 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3068 {
3069 	unsigned int seq;
3070 	__u64 value;
3071 
3072 	if (unlikely(!vcpu->arch.cputm_enabled))
3073 		return vcpu->arch.sie_block->cputm;
3074 
3075 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3076 	do {
3077 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3078 		/*
3079 		 * If the writer would ever execute a read in the critical
3080 		 * section, e.g. in irq context, we have a deadlock.
3081 		 */
3082 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3083 		value = vcpu->arch.sie_block->cputm;
3084 		/* if cputm_start is 0, accounting is being started/stopped */
3085 		if (likely(vcpu->arch.cputm_start))
3086 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3087 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3088 	preempt_enable();
3089 	return value;
3090 }
3091 
3092 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3093 {
3094 
3095 	gmap_enable(vcpu->arch.enabled_gmap);
3096 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3097 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3098 		__start_cpu_timer_accounting(vcpu);
3099 	vcpu->cpu = cpu;
3100 }
3101 
3102 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3103 {
3104 	vcpu->cpu = -1;
3105 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3106 		__stop_cpu_timer_accounting(vcpu);
3107 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3108 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3109 	gmap_disable(vcpu->arch.enabled_gmap);
3110 
3111 }
3112 
3113 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3114 {
3115 	mutex_lock(&vcpu->kvm->lock);
3116 	preempt_disable();
3117 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3118 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3119 	preempt_enable();
3120 	mutex_unlock(&vcpu->kvm->lock);
3121 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3122 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3123 		sca_add_vcpu(vcpu);
3124 	}
3125 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3126 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3127 	/* make vcpu_load load the right gmap on the first trigger */
3128 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3129 }
3130 
3131 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3132 {
3133 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3134 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3135 		return true;
3136 	return false;
3137 }
3138 
3139 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3140 {
3141 	/* At least one ECC subfunction must be present */
3142 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3143 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3144 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3145 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3146 	       kvm_has_pckmo_subfunc(kvm, 41);
3147 
3148 }
3149 
3150 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3151 {
3152 	/*
3153 	 * If the AP instructions are not being interpreted and the MSAX3
3154 	 * facility is not configured for the guest, there is nothing to set up.
3155 	 */
3156 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3157 		return;
3158 
3159 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3160 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3161 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3162 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3163 
3164 	if (vcpu->kvm->arch.crypto.apie)
3165 		vcpu->arch.sie_block->eca |= ECA_APIE;
3166 
3167 	/* Set up protected key support */
3168 	if (vcpu->kvm->arch.crypto.aes_kw) {
3169 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3170 		/* ecc is also wrapped with AES key */
3171 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3172 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3173 	}
3174 
3175 	if (vcpu->kvm->arch.crypto.dea_kw)
3176 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3177 }
3178 
3179 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3180 {
3181 	free_page(vcpu->arch.sie_block->cbrlo);
3182 	vcpu->arch.sie_block->cbrlo = 0;
3183 }
3184 
3185 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3186 {
3187 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3188 	if (!vcpu->arch.sie_block->cbrlo)
3189 		return -ENOMEM;
3190 	return 0;
3191 }
3192 
3193 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3194 {
3195 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3196 
3197 	vcpu->arch.sie_block->ibc = model->ibc;
3198 	if (test_kvm_facility(vcpu->kvm, 7))
3199 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3200 }
3201 
3202 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3203 {
3204 	int rc = 0;
3205 	u16 uvrc, uvrrc;
3206 
3207 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3208 						    CPUSTAT_SM |
3209 						    CPUSTAT_STOPPED);
3210 
3211 	if (test_kvm_facility(vcpu->kvm, 78))
3212 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3213 	else if (test_kvm_facility(vcpu->kvm, 8))
3214 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3215 
3216 	kvm_s390_vcpu_setup_model(vcpu);
3217 
3218 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3219 	if (MACHINE_HAS_ESOP)
3220 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3221 	if (test_kvm_facility(vcpu->kvm, 9))
3222 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3223 	if (test_kvm_facility(vcpu->kvm, 73))
3224 		vcpu->arch.sie_block->ecb |= ECB_TE;
3225 	if (!kvm_is_ucontrol(vcpu->kvm))
3226 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3227 
3228 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3229 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3230 	if (test_kvm_facility(vcpu->kvm, 130))
3231 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3232 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3233 	if (sclp.has_cei)
3234 		vcpu->arch.sie_block->eca |= ECA_CEI;
3235 	if (sclp.has_ib)
3236 		vcpu->arch.sie_block->eca |= ECA_IB;
3237 	if (sclp.has_siif)
3238 		vcpu->arch.sie_block->eca |= ECA_SII;
3239 	if (sclp.has_sigpif)
3240 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3241 	if (test_kvm_facility(vcpu->kvm, 129)) {
3242 		vcpu->arch.sie_block->eca |= ECA_VX;
3243 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3244 	}
3245 	if (test_kvm_facility(vcpu->kvm, 139))
3246 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3247 	if (test_kvm_facility(vcpu->kvm, 156))
3248 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3249 	if (vcpu->arch.sie_block->gd) {
3250 		vcpu->arch.sie_block->eca |= ECA_AIV;
3251 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3252 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3253 	}
3254 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3255 					| SDNXC;
3256 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3257 
3258 	if (sclp.has_kss)
3259 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3260 	else
3261 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3262 
3263 	if (vcpu->kvm->arch.use_cmma) {
3264 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3265 		if (rc)
3266 			return rc;
3267 	}
3268 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3269 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3270 
3271 	vcpu->arch.sie_block->hpid = HPID_KVM;
3272 
3273 	kvm_s390_vcpu_crypto_setup(vcpu);
3274 
3275 	mutex_lock(&vcpu->kvm->lock);
3276 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3277 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3278 		if (rc)
3279 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3280 	}
3281 	mutex_unlock(&vcpu->kvm->lock);
3282 
3283 	return rc;
3284 }
3285 
3286 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3287 {
3288 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3289 		return -EINVAL;
3290 	return 0;
3291 }
3292 
3293 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3294 {
3295 	struct sie_page *sie_page;
3296 	int rc;
3297 
3298 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3299 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3300 	if (!sie_page)
3301 		return -ENOMEM;
3302 
3303 	vcpu->arch.sie_block = &sie_page->sie_block;
3304 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3305 
3306 	/* the real guest size will always be smaller than msl */
3307 	vcpu->arch.sie_block->mso = 0;
3308 	vcpu->arch.sie_block->msl = sclp.hamax;
3309 
3310 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3311 	spin_lock_init(&vcpu->arch.local_int.lock);
3312 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3313 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3314 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3315 	seqcount_init(&vcpu->arch.cputm_seqcount);
3316 
3317 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3318 	kvm_clear_async_pf_completion_queue(vcpu);
3319 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3320 				    KVM_SYNC_GPRS |
3321 				    KVM_SYNC_ACRS |
3322 				    KVM_SYNC_CRS |
3323 				    KVM_SYNC_ARCH0 |
3324 				    KVM_SYNC_PFAULT |
3325 				    KVM_SYNC_DIAG318;
3326 	kvm_s390_set_prefix(vcpu, 0);
3327 	if (test_kvm_facility(vcpu->kvm, 64))
3328 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3329 	if (test_kvm_facility(vcpu->kvm, 82))
3330 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3331 	if (test_kvm_facility(vcpu->kvm, 133))
3332 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3333 	if (test_kvm_facility(vcpu->kvm, 156))
3334 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3335 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3336 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3337 	 */
3338 	if (MACHINE_HAS_VX)
3339 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3340 	else
3341 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3342 
3343 	if (kvm_is_ucontrol(vcpu->kvm)) {
3344 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3345 		if (rc)
3346 			goto out_free_sie_block;
3347 	}
3348 
3349 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3350 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3351 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3352 
3353 	rc = kvm_s390_vcpu_setup(vcpu);
3354 	if (rc)
3355 		goto out_ucontrol_uninit;
3356 	return 0;
3357 
3358 out_ucontrol_uninit:
3359 	if (kvm_is_ucontrol(vcpu->kvm))
3360 		gmap_remove(vcpu->arch.gmap);
3361 out_free_sie_block:
3362 	free_page((unsigned long)(vcpu->arch.sie_block));
3363 	return rc;
3364 }
3365 
3366 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3367 {
3368 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3369 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3370 }
3371 
3372 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3373 {
3374 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3375 }
3376 
3377 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3378 {
3379 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3380 	exit_sie(vcpu);
3381 }
3382 
3383 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3384 {
3385 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3386 }
3387 
3388 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3389 {
3390 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3391 	exit_sie(vcpu);
3392 }
3393 
3394 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3395 {
3396 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3397 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3398 }
3399 
3400 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3401 {
3402 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3403 }
3404 
3405 /*
3406  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3407  * If the CPU is not running (e.g. waiting as idle) the function will
3408  * return immediately. */
3409 void exit_sie(struct kvm_vcpu *vcpu)
3410 {
3411 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3412 	kvm_s390_vsie_kick(vcpu);
3413 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3414 		cpu_relax();
3415 }
3416 
3417 /* Kick a guest cpu out of SIE to process a request synchronously */
3418 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3419 {
3420 	kvm_make_request(req, vcpu);
3421 	kvm_s390_vcpu_request(vcpu);
3422 }
3423 
3424 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3425 			      unsigned long end)
3426 {
3427 	struct kvm *kvm = gmap->private;
3428 	struct kvm_vcpu *vcpu;
3429 	unsigned long prefix;
3430 	int i;
3431 
3432 	if (gmap_is_shadow(gmap))
3433 		return;
3434 	if (start >= 1UL << 31)
3435 		/* We are only interested in prefix pages */
3436 		return;
3437 	kvm_for_each_vcpu(i, vcpu, kvm) {
3438 		/* match against both prefix pages */
3439 		prefix = kvm_s390_get_prefix(vcpu);
3440 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3441 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3442 				   start, end);
3443 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3444 		}
3445 	}
3446 }
3447 
3448 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3449 {
3450 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3451 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3452 	    halt_poll_max_steal) {
3453 		vcpu->stat.halt_no_poll_steal++;
3454 		return true;
3455 	}
3456 	return false;
3457 }
3458 
3459 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3460 {
3461 	/* kvm common code refers to this, but never calls it */
3462 	BUG();
3463 	return 0;
3464 }
3465 
3466 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3467 					   struct kvm_one_reg *reg)
3468 {
3469 	int r = -EINVAL;
3470 
3471 	switch (reg->id) {
3472 	case KVM_REG_S390_TODPR:
3473 		r = put_user(vcpu->arch.sie_block->todpr,
3474 			     (u32 __user *)reg->addr);
3475 		break;
3476 	case KVM_REG_S390_EPOCHDIFF:
3477 		r = put_user(vcpu->arch.sie_block->epoch,
3478 			     (u64 __user *)reg->addr);
3479 		break;
3480 	case KVM_REG_S390_CPU_TIMER:
3481 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3482 			     (u64 __user *)reg->addr);
3483 		break;
3484 	case KVM_REG_S390_CLOCK_COMP:
3485 		r = put_user(vcpu->arch.sie_block->ckc,
3486 			     (u64 __user *)reg->addr);
3487 		break;
3488 	case KVM_REG_S390_PFTOKEN:
3489 		r = put_user(vcpu->arch.pfault_token,
3490 			     (u64 __user *)reg->addr);
3491 		break;
3492 	case KVM_REG_S390_PFCOMPARE:
3493 		r = put_user(vcpu->arch.pfault_compare,
3494 			     (u64 __user *)reg->addr);
3495 		break;
3496 	case KVM_REG_S390_PFSELECT:
3497 		r = put_user(vcpu->arch.pfault_select,
3498 			     (u64 __user *)reg->addr);
3499 		break;
3500 	case KVM_REG_S390_PP:
3501 		r = put_user(vcpu->arch.sie_block->pp,
3502 			     (u64 __user *)reg->addr);
3503 		break;
3504 	case KVM_REG_S390_GBEA:
3505 		r = put_user(vcpu->arch.sie_block->gbea,
3506 			     (u64 __user *)reg->addr);
3507 		break;
3508 	default:
3509 		break;
3510 	}
3511 
3512 	return r;
3513 }
3514 
3515 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3516 					   struct kvm_one_reg *reg)
3517 {
3518 	int r = -EINVAL;
3519 	__u64 val;
3520 
3521 	switch (reg->id) {
3522 	case KVM_REG_S390_TODPR:
3523 		r = get_user(vcpu->arch.sie_block->todpr,
3524 			     (u32 __user *)reg->addr);
3525 		break;
3526 	case KVM_REG_S390_EPOCHDIFF:
3527 		r = get_user(vcpu->arch.sie_block->epoch,
3528 			     (u64 __user *)reg->addr);
3529 		break;
3530 	case KVM_REG_S390_CPU_TIMER:
3531 		r = get_user(val, (u64 __user *)reg->addr);
3532 		if (!r)
3533 			kvm_s390_set_cpu_timer(vcpu, val);
3534 		break;
3535 	case KVM_REG_S390_CLOCK_COMP:
3536 		r = get_user(vcpu->arch.sie_block->ckc,
3537 			     (u64 __user *)reg->addr);
3538 		break;
3539 	case KVM_REG_S390_PFTOKEN:
3540 		r = get_user(vcpu->arch.pfault_token,
3541 			     (u64 __user *)reg->addr);
3542 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3543 			kvm_clear_async_pf_completion_queue(vcpu);
3544 		break;
3545 	case KVM_REG_S390_PFCOMPARE:
3546 		r = get_user(vcpu->arch.pfault_compare,
3547 			     (u64 __user *)reg->addr);
3548 		break;
3549 	case KVM_REG_S390_PFSELECT:
3550 		r = get_user(vcpu->arch.pfault_select,
3551 			     (u64 __user *)reg->addr);
3552 		break;
3553 	case KVM_REG_S390_PP:
3554 		r = get_user(vcpu->arch.sie_block->pp,
3555 			     (u64 __user *)reg->addr);
3556 		break;
3557 	case KVM_REG_S390_GBEA:
3558 		r = get_user(vcpu->arch.sie_block->gbea,
3559 			     (u64 __user *)reg->addr);
3560 		break;
3561 	default:
3562 		break;
3563 	}
3564 
3565 	return r;
3566 }
3567 
3568 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3569 {
3570 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3571 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3572 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3573 
3574 	kvm_clear_async_pf_completion_queue(vcpu);
3575 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3576 		kvm_s390_vcpu_stop(vcpu);
3577 	kvm_s390_clear_local_irqs(vcpu);
3578 }
3579 
3580 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3581 {
3582 	/* Initial reset is a superset of the normal reset */
3583 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3584 
3585 	/*
3586 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3587 	 * We do not only reset the internal data, but also ...
3588 	 */
3589 	vcpu->arch.sie_block->gpsw.mask = 0;
3590 	vcpu->arch.sie_block->gpsw.addr = 0;
3591 	kvm_s390_set_prefix(vcpu, 0);
3592 	kvm_s390_set_cpu_timer(vcpu, 0);
3593 	vcpu->arch.sie_block->ckc = 0;
3594 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3595 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3596 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3597 
3598 	/* ... the data in sync regs */
3599 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3600 	vcpu->run->s.regs.ckc = 0;
3601 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3602 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3603 	vcpu->run->psw_addr = 0;
3604 	vcpu->run->psw_mask = 0;
3605 	vcpu->run->s.regs.todpr = 0;
3606 	vcpu->run->s.regs.cputm = 0;
3607 	vcpu->run->s.regs.ckc = 0;
3608 	vcpu->run->s.regs.pp = 0;
3609 	vcpu->run->s.regs.gbea = 1;
3610 	vcpu->run->s.regs.fpc = 0;
3611 	/*
3612 	 * Do not reset these registers in the protected case, as some of
3613 	 * them are overlayed and they are not accessible in this case
3614 	 * anyway.
3615 	 */
3616 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3617 		vcpu->arch.sie_block->gbea = 1;
3618 		vcpu->arch.sie_block->pp = 0;
3619 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3620 		vcpu->arch.sie_block->todpr = 0;
3621 	}
3622 }
3623 
3624 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3625 {
3626 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3627 
3628 	/* Clear reset is a superset of the initial reset */
3629 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3630 
3631 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3632 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3633 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3634 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3635 
3636 	regs->etoken = 0;
3637 	regs->etoken_extension = 0;
3638 }
3639 
3640 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3641 {
3642 	vcpu_load(vcpu);
3643 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3644 	vcpu_put(vcpu);
3645 	return 0;
3646 }
3647 
3648 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3649 {
3650 	vcpu_load(vcpu);
3651 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3652 	vcpu_put(vcpu);
3653 	return 0;
3654 }
3655 
3656 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3657 				  struct kvm_sregs *sregs)
3658 {
3659 	vcpu_load(vcpu);
3660 
3661 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3662 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3663 
3664 	vcpu_put(vcpu);
3665 	return 0;
3666 }
3667 
3668 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3669 				  struct kvm_sregs *sregs)
3670 {
3671 	vcpu_load(vcpu);
3672 
3673 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3674 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3675 
3676 	vcpu_put(vcpu);
3677 	return 0;
3678 }
3679 
3680 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3681 {
3682 	int ret = 0;
3683 
3684 	vcpu_load(vcpu);
3685 
3686 	if (test_fp_ctl(fpu->fpc)) {
3687 		ret = -EINVAL;
3688 		goto out;
3689 	}
3690 	vcpu->run->s.regs.fpc = fpu->fpc;
3691 	if (MACHINE_HAS_VX)
3692 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3693 				 (freg_t *) fpu->fprs);
3694 	else
3695 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3696 
3697 out:
3698 	vcpu_put(vcpu);
3699 	return ret;
3700 }
3701 
3702 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3703 {
3704 	vcpu_load(vcpu);
3705 
3706 	/* make sure we have the latest values */
3707 	save_fpu_regs();
3708 	if (MACHINE_HAS_VX)
3709 		convert_vx_to_fp((freg_t *) fpu->fprs,
3710 				 (__vector128 *) vcpu->run->s.regs.vrs);
3711 	else
3712 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3713 	fpu->fpc = vcpu->run->s.regs.fpc;
3714 
3715 	vcpu_put(vcpu);
3716 	return 0;
3717 }
3718 
3719 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3720 {
3721 	int rc = 0;
3722 
3723 	if (!is_vcpu_stopped(vcpu))
3724 		rc = -EBUSY;
3725 	else {
3726 		vcpu->run->psw_mask = psw.mask;
3727 		vcpu->run->psw_addr = psw.addr;
3728 	}
3729 	return rc;
3730 }
3731 
3732 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3733 				  struct kvm_translation *tr)
3734 {
3735 	return -EINVAL; /* not implemented yet */
3736 }
3737 
3738 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3739 			      KVM_GUESTDBG_USE_HW_BP | \
3740 			      KVM_GUESTDBG_ENABLE)
3741 
3742 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3743 					struct kvm_guest_debug *dbg)
3744 {
3745 	int rc = 0;
3746 
3747 	vcpu_load(vcpu);
3748 
3749 	vcpu->guest_debug = 0;
3750 	kvm_s390_clear_bp_data(vcpu);
3751 
3752 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3753 		rc = -EINVAL;
3754 		goto out;
3755 	}
3756 	if (!sclp.has_gpere) {
3757 		rc = -EINVAL;
3758 		goto out;
3759 	}
3760 
3761 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3762 		vcpu->guest_debug = dbg->control;
3763 		/* enforce guest PER */
3764 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3765 
3766 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3767 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3768 	} else {
3769 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3770 		vcpu->arch.guestdbg.last_bp = 0;
3771 	}
3772 
3773 	if (rc) {
3774 		vcpu->guest_debug = 0;
3775 		kvm_s390_clear_bp_data(vcpu);
3776 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3777 	}
3778 
3779 out:
3780 	vcpu_put(vcpu);
3781 	return rc;
3782 }
3783 
3784 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3785 				    struct kvm_mp_state *mp_state)
3786 {
3787 	int ret;
3788 
3789 	vcpu_load(vcpu);
3790 
3791 	/* CHECK_STOP and LOAD are not supported yet */
3792 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3793 				      KVM_MP_STATE_OPERATING;
3794 
3795 	vcpu_put(vcpu);
3796 	return ret;
3797 }
3798 
3799 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3800 				    struct kvm_mp_state *mp_state)
3801 {
3802 	int rc = 0;
3803 
3804 	vcpu_load(vcpu);
3805 
3806 	/* user space knows about this interface - let it control the state */
3807 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3808 
3809 	switch (mp_state->mp_state) {
3810 	case KVM_MP_STATE_STOPPED:
3811 		rc = kvm_s390_vcpu_stop(vcpu);
3812 		break;
3813 	case KVM_MP_STATE_OPERATING:
3814 		rc = kvm_s390_vcpu_start(vcpu);
3815 		break;
3816 	case KVM_MP_STATE_LOAD:
3817 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3818 			rc = -ENXIO;
3819 			break;
3820 		}
3821 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3822 		break;
3823 	case KVM_MP_STATE_CHECK_STOP:
3824 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3825 	default:
3826 		rc = -ENXIO;
3827 	}
3828 
3829 	vcpu_put(vcpu);
3830 	return rc;
3831 }
3832 
3833 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3834 {
3835 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3836 }
3837 
3838 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3839 {
3840 retry:
3841 	kvm_s390_vcpu_request_handled(vcpu);
3842 	if (!kvm_request_pending(vcpu))
3843 		return 0;
3844 	/*
3845 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3846 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3847 	 * This ensures that the ipte instruction for this request has
3848 	 * already finished. We might race against a second unmapper that
3849 	 * wants to set the blocking bit. Lets just retry the request loop.
3850 	 */
3851 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3852 		int rc;
3853 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3854 					  kvm_s390_get_prefix(vcpu),
3855 					  PAGE_SIZE * 2, PROT_WRITE);
3856 		if (rc) {
3857 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3858 			return rc;
3859 		}
3860 		goto retry;
3861 	}
3862 
3863 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3864 		vcpu->arch.sie_block->ihcpu = 0xffff;
3865 		goto retry;
3866 	}
3867 
3868 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3869 		if (!ibs_enabled(vcpu)) {
3870 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3871 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3872 		}
3873 		goto retry;
3874 	}
3875 
3876 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3877 		if (ibs_enabled(vcpu)) {
3878 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3879 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3880 		}
3881 		goto retry;
3882 	}
3883 
3884 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3885 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3886 		goto retry;
3887 	}
3888 
3889 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3890 		/*
3891 		 * Disable CMM virtualization; we will emulate the ESSA
3892 		 * instruction manually, in order to provide additional
3893 		 * functionalities needed for live migration.
3894 		 */
3895 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3896 		goto retry;
3897 	}
3898 
3899 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3900 		/*
3901 		 * Re-enable CMM virtualization if CMMA is available and
3902 		 * CMM has been used.
3903 		 */
3904 		if ((vcpu->kvm->arch.use_cmma) &&
3905 		    (vcpu->kvm->mm->context.uses_cmm))
3906 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3907 		goto retry;
3908 	}
3909 
3910 	/* nothing to do, just clear the request */
3911 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3912 	/* we left the vsie handler, nothing to do, just clear the request */
3913 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3914 
3915 	return 0;
3916 }
3917 
3918 void kvm_s390_set_tod_clock(struct kvm *kvm,
3919 			    const struct kvm_s390_vm_tod_clock *gtod)
3920 {
3921 	struct kvm_vcpu *vcpu;
3922 	union tod_clock clk;
3923 	int i;
3924 
3925 	mutex_lock(&kvm->lock);
3926 	preempt_disable();
3927 
3928 	store_tod_clock_ext(&clk);
3929 
3930 	kvm->arch.epoch = gtod->tod - clk.tod;
3931 	kvm->arch.epdx = 0;
3932 	if (test_kvm_facility(kvm, 139)) {
3933 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3934 		if (kvm->arch.epoch > gtod->tod)
3935 			kvm->arch.epdx -= 1;
3936 	}
3937 
3938 	kvm_s390_vcpu_block_all(kvm);
3939 	kvm_for_each_vcpu(i, vcpu, kvm) {
3940 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3941 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3942 	}
3943 
3944 	kvm_s390_vcpu_unblock_all(kvm);
3945 	preempt_enable();
3946 	mutex_unlock(&kvm->lock);
3947 }
3948 
3949 /**
3950  * kvm_arch_fault_in_page - fault-in guest page if necessary
3951  * @vcpu: The corresponding virtual cpu
3952  * @gpa: Guest physical address
3953  * @writable: Whether the page should be writable or not
3954  *
3955  * Make sure that a guest page has been faulted-in on the host.
3956  *
3957  * Return: Zero on success, negative error code otherwise.
3958  */
3959 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3960 {
3961 	return gmap_fault(vcpu->arch.gmap, gpa,
3962 			  writable ? FAULT_FLAG_WRITE : 0);
3963 }
3964 
3965 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3966 				      unsigned long token)
3967 {
3968 	struct kvm_s390_interrupt inti;
3969 	struct kvm_s390_irq irq;
3970 
3971 	if (start_token) {
3972 		irq.u.ext.ext_params2 = token;
3973 		irq.type = KVM_S390_INT_PFAULT_INIT;
3974 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3975 	} else {
3976 		inti.type = KVM_S390_INT_PFAULT_DONE;
3977 		inti.parm64 = token;
3978 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3979 	}
3980 }
3981 
3982 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3983 				     struct kvm_async_pf *work)
3984 {
3985 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3986 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3987 
3988 	return true;
3989 }
3990 
3991 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3992 				 struct kvm_async_pf *work)
3993 {
3994 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3995 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3996 }
3997 
3998 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3999 			       struct kvm_async_pf *work)
4000 {
4001 	/* s390 will always inject the page directly */
4002 }
4003 
4004 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4005 {
4006 	/*
4007 	 * s390 will always inject the page directly,
4008 	 * but we still want check_async_completion to cleanup
4009 	 */
4010 	return true;
4011 }
4012 
4013 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4014 {
4015 	hva_t hva;
4016 	struct kvm_arch_async_pf arch;
4017 
4018 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4019 		return false;
4020 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4021 	    vcpu->arch.pfault_compare)
4022 		return false;
4023 	if (psw_extint_disabled(vcpu))
4024 		return false;
4025 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4026 		return false;
4027 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4028 		return false;
4029 	if (!vcpu->arch.gmap->pfault_enabled)
4030 		return false;
4031 
4032 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4033 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4034 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4035 		return false;
4036 
4037 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4038 }
4039 
4040 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4041 {
4042 	int rc, cpuflags;
4043 
4044 	/*
4045 	 * On s390 notifications for arriving pages will be delivered directly
4046 	 * to the guest but the house keeping for completed pfaults is
4047 	 * handled outside the worker.
4048 	 */
4049 	kvm_check_async_pf_completion(vcpu);
4050 
4051 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4052 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4053 
4054 	if (need_resched())
4055 		schedule();
4056 
4057 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4058 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4059 		if (rc)
4060 			return rc;
4061 	}
4062 
4063 	rc = kvm_s390_handle_requests(vcpu);
4064 	if (rc)
4065 		return rc;
4066 
4067 	if (guestdbg_enabled(vcpu)) {
4068 		kvm_s390_backup_guest_per_regs(vcpu);
4069 		kvm_s390_patch_guest_per_regs(vcpu);
4070 	}
4071 
4072 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4073 
4074 	vcpu->arch.sie_block->icptcode = 0;
4075 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4076 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4077 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4078 
4079 	return 0;
4080 }
4081 
4082 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4083 {
4084 	struct kvm_s390_pgm_info pgm_info = {
4085 		.code = PGM_ADDRESSING,
4086 	};
4087 	u8 opcode, ilen;
4088 	int rc;
4089 
4090 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4091 	trace_kvm_s390_sie_fault(vcpu);
4092 
4093 	/*
4094 	 * We want to inject an addressing exception, which is defined as a
4095 	 * suppressing or terminating exception. However, since we came here
4096 	 * by a DAT access exception, the PSW still points to the faulting
4097 	 * instruction since DAT exceptions are nullifying. So we've got
4098 	 * to look up the current opcode to get the length of the instruction
4099 	 * to be able to forward the PSW.
4100 	 */
4101 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4102 	ilen = insn_length(opcode);
4103 	if (rc < 0) {
4104 		return rc;
4105 	} else if (rc) {
4106 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4107 		 * Forward by arbitrary ilc, injection will take care of
4108 		 * nullification if necessary.
4109 		 */
4110 		pgm_info = vcpu->arch.pgm;
4111 		ilen = 4;
4112 	}
4113 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4114 	kvm_s390_forward_psw(vcpu, ilen);
4115 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4116 }
4117 
4118 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4119 {
4120 	struct mcck_volatile_info *mcck_info;
4121 	struct sie_page *sie_page;
4122 
4123 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4124 		   vcpu->arch.sie_block->icptcode);
4125 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4126 
4127 	if (guestdbg_enabled(vcpu))
4128 		kvm_s390_restore_guest_per_regs(vcpu);
4129 
4130 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4131 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4132 
4133 	if (exit_reason == -EINTR) {
4134 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4135 		sie_page = container_of(vcpu->arch.sie_block,
4136 					struct sie_page, sie_block);
4137 		mcck_info = &sie_page->mcck_info;
4138 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4139 		return 0;
4140 	}
4141 
4142 	if (vcpu->arch.sie_block->icptcode > 0) {
4143 		int rc = kvm_handle_sie_intercept(vcpu);
4144 
4145 		if (rc != -EOPNOTSUPP)
4146 			return rc;
4147 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4148 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4149 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4150 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4151 		return -EREMOTE;
4152 	} else if (exit_reason != -EFAULT) {
4153 		vcpu->stat.exit_null++;
4154 		return 0;
4155 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4156 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4157 		vcpu->run->s390_ucontrol.trans_exc_code =
4158 						current->thread.gmap_addr;
4159 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4160 		return -EREMOTE;
4161 	} else if (current->thread.gmap_pfault) {
4162 		trace_kvm_s390_major_guest_pfault(vcpu);
4163 		current->thread.gmap_pfault = 0;
4164 		if (kvm_arch_setup_async_pf(vcpu))
4165 			return 0;
4166 		vcpu->stat.pfault_sync++;
4167 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4168 	}
4169 	return vcpu_post_run_fault_in_sie(vcpu);
4170 }
4171 
4172 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4173 static int __vcpu_run(struct kvm_vcpu *vcpu)
4174 {
4175 	int rc, exit_reason;
4176 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4177 
4178 	/*
4179 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4180 	 * ning the guest), so that memslots (and other stuff) are protected
4181 	 */
4182 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4183 
4184 	do {
4185 		rc = vcpu_pre_run(vcpu);
4186 		if (rc)
4187 			break;
4188 
4189 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4190 		/*
4191 		 * As PF_VCPU will be used in fault handler, between
4192 		 * guest_enter and guest_exit should be no uaccess.
4193 		 */
4194 		local_irq_disable();
4195 		guest_enter_irqoff();
4196 		__disable_cpu_timer_accounting(vcpu);
4197 		local_irq_enable();
4198 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4199 			memcpy(sie_page->pv_grregs,
4200 			       vcpu->run->s.regs.gprs,
4201 			       sizeof(sie_page->pv_grregs));
4202 		}
4203 		if (test_cpu_flag(CIF_FPU))
4204 			load_fpu_regs();
4205 		exit_reason = sie64a(vcpu->arch.sie_block,
4206 				     vcpu->run->s.regs.gprs);
4207 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4208 			memcpy(vcpu->run->s.regs.gprs,
4209 			       sie_page->pv_grregs,
4210 			       sizeof(sie_page->pv_grregs));
4211 			/*
4212 			 * We're not allowed to inject interrupts on intercepts
4213 			 * that leave the guest state in an "in-between" state
4214 			 * where the next SIE entry will do a continuation.
4215 			 * Fence interrupts in our "internal" PSW.
4216 			 */
4217 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4218 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4219 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4220 			}
4221 		}
4222 		local_irq_disable();
4223 		__enable_cpu_timer_accounting(vcpu);
4224 		guest_exit_irqoff();
4225 		local_irq_enable();
4226 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4227 
4228 		rc = vcpu_post_run(vcpu, exit_reason);
4229 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4230 
4231 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4232 	return rc;
4233 }
4234 
4235 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4236 {
4237 	struct kvm_run *kvm_run = vcpu->run;
4238 	struct runtime_instr_cb *riccb;
4239 	struct gs_cb *gscb;
4240 
4241 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4242 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4243 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4244 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4245 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4246 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4247 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4248 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4249 	}
4250 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4251 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4252 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4253 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4254 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4255 			kvm_clear_async_pf_completion_queue(vcpu);
4256 	}
4257 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4258 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4259 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4260 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4261 	}
4262 	/*
4263 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4264 	 * we should enable RI here instead of doing the lazy enablement.
4265 	 */
4266 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4267 	    test_kvm_facility(vcpu->kvm, 64) &&
4268 	    riccb->v &&
4269 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4270 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4271 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4272 	}
4273 	/*
4274 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4275 	 * we should enable GS here instead of doing the lazy enablement.
4276 	 */
4277 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4278 	    test_kvm_facility(vcpu->kvm, 133) &&
4279 	    gscb->gssm &&
4280 	    !vcpu->arch.gs_enabled) {
4281 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4282 		vcpu->arch.sie_block->ecb |= ECB_GS;
4283 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4284 		vcpu->arch.gs_enabled = 1;
4285 	}
4286 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4287 	    test_kvm_facility(vcpu->kvm, 82)) {
4288 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4289 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4290 	}
4291 	if (MACHINE_HAS_GS) {
4292 		preempt_disable();
4293 		__ctl_set_bit(2, 4);
4294 		if (current->thread.gs_cb) {
4295 			vcpu->arch.host_gscb = current->thread.gs_cb;
4296 			save_gs_cb(vcpu->arch.host_gscb);
4297 		}
4298 		if (vcpu->arch.gs_enabled) {
4299 			current->thread.gs_cb = (struct gs_cb *)
4300 						&vcpu->run->s.regs.gscb;
4301 			restore_gs_cb(current->thread.gs_cb);
4302 		}
4303 		preempt_enable();
4304 	}
4305 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4306 }
4307 
4308 static void sync_regs(struct kvm_vcpu *vcpu)
4309 {
4310 	struct kvm_run *kvm_run = vcpu->run;
4311 
4312 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4313 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4314 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4315 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4316 		/* some control register changes require a tlb flush */
4317 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4318 	}
4319 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4320 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4321 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4322 	}
4323 	save_access_regs(vcpu->arch.host_acrs);
4324 	restore_access_regs(vcpu->run->s.regs.acrs);
4325 	/* save host (userspace) fprs/vrs */
4326 	save_fpu_regs();
4327 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4328 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4329 	if (MACHINE_HAS_VX)
4330 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4331 	else
4332 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4333 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4334 	if (test_fp_ctl(current->thread.fpu.fpc))
4335 		/* User space provided an invalid FPC, let's clear it */
4336 		current->thread.fpu.fpc = 0;
4337 
4338 	/* Sync fmt2 only data */
4339 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4340 		sync_regs_fmt2(vcpu);
4341 	} else {
4342 		/*
4343 		 * In several places we have to modify our internal view to
4344 		 * not do things that are disallowed by the ultravisor. For
4345 		 * example we must not inject interrupts after specific exits
4346 		 * (e.g. 112 prefix page not secure). We do this by turning
4347 		 * off the machine check, external and I/O interrupt bits
4348 		 * of our PSW copy. To avoid getting validity intercepts, we
4349 		 * do only accept the condition code from userspace.
4350 		 */
4351 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4352 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4353 						   PSW_MASK_CC;
4354 	}
4355 
4356 	kvm_run->kvm_dirty_regs = 0;
4357 }
4358 
4359 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4360 {
4361 	struct kvm_run *kvm_run = vcpu->run;
4362 
4363 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4364 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4365 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4366 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4367 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4368 	if (MACHINE_HAS_GS) {
4369 		preempt_disable();
4370 		__ctl_set_bit(2, 4);
4371 		if (vcpu->arch.gs_enabled)
4372 			save_gs_cb(current->thread.gs_cb);
4373 		current->thread.gs_cb = vcpu->arch.host_gscb;
4374 		restore_gs_cb(vcpu->arch.host_gscb);
4375 		if (!vcpu->arch.host_gscb)
4376 			__ctl_clear_bit(2, 4);
4377 		vcpu->arch.host_gscb = NULL;
4378 		preempt_enable();
4379 	}
4380 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4381 }
4382 
4383 static void store_regs(struct kvm_vcpu *vcpu)
4384 {
4385 	struct kvm_run *kvm_run = vcpu->run;
4386 
4387 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4388 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4389 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4390 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4391 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4392 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4393 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4394 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4395 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4396 	save_access_regs(vcpu->run->s.regs.acrs);
4397 	restore_access_regs(vcpu->arch.host_acrs);
4398 	/* Save guest register state */
4399 	save_fpu_regs();
4400 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4401 	/* Restore will be done lazily at return */
4402 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4403 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4404 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4405 		store_regs_fmt2(vcpu);
4406 }
4407 
4408 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4409 {
4410 	struct kvm_run *kvm_run = vcpu->run;
4411 	int rc;
4412 
4413 	if (kvm_run->immediate_exit)
4414 		return -EINTR;
4415 
4416 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4417 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4418 		return -EINVAL;
4419 
4420 	vcpu_load(vcpu);
4421 
4422 	if (guestdbg_exit_pending(vcpu)) {
4423 		kvm_s390_prepare_debug_exit(vcpu);
4424 		rc = 0;
4425 		goto out;
4426 	}
4427 
4428 	kvm_sigset_activate(vcpu);
4429 
4430 	/*
4431 	 * no need to check the return value of vcpu_start as it can only have
4432 	 * an error for protvirt, but protvirt means user cpu state
4433 	 */
4434 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4435 		kvm_s390_vcpu_start(vcpu);
4436 	} else if (is_vcpu_stopped(vcpu)) {
4437 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4438 				   vcpu->vcpu_id);
4439 		rc = -EINVAL;
4440 		goto out;
4441 	}
4442 
4443 	sync_regs(vcpu);
4444 	enable_cpu_timer_accounting(vcpu);
4445 
4446 	might_fault();
4447 	rc = __vcpu_run(vcpu);
4448 
4449 	if (signal_pending(current) && !rc) {
4450 		kvm_run->exit_reason = KVM_EXIT_INTR;
4451 		rc = -EINTR;
4452 	}
4453 
4454 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4455 		kvm_s390_prepare_debug_exit(vcpu);
4456 		rc = 0;
4457 	}
4458 
4459 	if (rc == -EREMOTE) {
4460 		/* userspace support is needed, kvm_run has been prepared */
4461 		rc = 0;
4462 	}
4463 
4464 	disable_cpu_timer_accounting(vcpu);
4465 	store_regs(vcpu);
4466 
4467 	kvm_sigset_deactivate(vcpu);
4468 
4469 	vcpu->stat.exit_userspace++;
4470 out:
4471 	vcpu_put(vcpu);
4472 	return rc;
4473 }
4474 
4475 /*
4476  * store status at address
4477  * we use have two special cases:
4478  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4479  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4480  */
4481 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4482 {
4483 	unsigned char archmode = 1;
4484 	freg_t fprs[NUM_FPRS];
4485 	unsigned int px;
4486 	u64 clkcomp, cputm;
4487 	int rc;
4488 
4489 	px = kvm_s390_get_prefix(vcpu);
4490 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4491 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4492 			return -EFAULT;
4493 		gpa = 0;
4494 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4495 		if (write_guest_real(vcpu, 163, &archmode, 1))
4496 			return -EFAULT;
4497 		gpa = px;
4498 	} else
4499 		gpa -= __LC_FPREGS_SAVE_AREA;
4500 
4501 	/* manually convert vector registers if necessary */
4502 	if (MACHINE_HAS_VX) {
4503 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4504 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4505 				     fprs, 128);
4506 	} else {
4507 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4508 				     vcpu->run->s.regs.fprs, 128);
4509 	}
4510 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4511 			      vcpu->run->s.regs.gprs, 128);
4512 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4513 			      &vcpu->arch.sie_block->gpsw, 16);
4514 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4515 			      &px, 4);
4516 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4517 			      &vcpu->run->s.regs.fpc, 4);
4518 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4519 			      &vcpu->arch.sie_block->todpr, 4);
4520 	cputm = kvm_s390_get_cpu_timer(vcpu);
4521 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4522 			      &cputm, 8);
4523 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4524 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4525 			      &clkcomp, 8);
4526 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4527 			      &vcpu->run->s.regs.acrs, 64);
4528 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4529 			      &vcpu->arch.sie_block->gcr, 128);
4530 	return rc ? -EFAULT : 0;
4531 }
4532 
4533 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4534 {
4535 	/*
4536 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4537 	 * switch in the run ioctl. Let's update our copies before we save
4538 	 * it into the save area
4539 	 */
4540 	save_fpu_regs();
4541 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4542 	save_access_regs(vcpu->run->s.regs.acrs);
4543 
4544 	return kvm_s390_store_status_unloaded(vcpu, addr);
4545 }
4546 
4547 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4548 {
4549 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4550 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4551 }
4552 
4553 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4554 {
4555 	unsigned int i;
4556 	struct kvm_vcpu *vcpu;
4557 
4558 	kvm_for_each_vcpu(i, vcpu, kvm) {
4559 		__disable_ibs_on_vcpu(vcpu);
4560 	}
4561 }
4562 
4563 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4564 {
4565 	if (!sclp.has_ibs)
4566 		return;
4567 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4568 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4569 }
4570 
4571 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4572 {
4573 	int i, online_vcpus, r = 0, started_vcpus = 0;
4574 
4575 	if (!is_vcpu_stopped(vcpu))
4576 		return 0;
4577 
4578 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4579 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4580 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4581 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4582 
4583 	/* Let's tell the UV that we want to change into the operating state */
4584 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4585 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4586 		if (r) {
4587 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4588 			return r;
4589 		}
4590 	}
4591 
4592 	for (i = 0; i < online_vcpus; i++) {
4593 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4594 			started_vcpus++;
4595 	}
4596 
4597 	if (started_vcpus == 0) {
4598 		/* we're the only active VCPU -> speed it up */
4599 		__enable_ibs_on_vcpu(vcpu);
4600 	} else if (started_vcpus == 1) {
4601 		/*
4602 		 * As we are starting a second VCPU, we have to disable
4603 		 * the IBS facility on all VCPUs to remove potentially
4604 		 * outstanding ENABLE requests.
4605 		 */
4606 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4607 	}
4608 
4609 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4610 	/*
4611 	 * The real PSW might have changed due to a RESTART interpreted by the
4612 	 * ultravisor. We block all interrupts and let the next sie exit
4613 	 * refresh our view.
4614 	 */
4615 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4616 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4617 	/*
4618 	 * Another VCPU might have used IBS while we were offline.
4619 	 * Let's play safe and flush the VCPU at startup.
4620 	 */
4621 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4622 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4623 	return 0;
4624 }
4625 
4626 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4627 {
4628 	int i, online_vcpus, r = 0, started_vcpus = 0;
4629 	struct kvm_vcpu *started_vcpu = NULL;
4630 
4631 	if (is_vcpu_stopped(vcpu))
4632 		return 0;
4633 
4634 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4635 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4636 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4637 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4638 
4639 	/* Let's tell the UV that we want to change into the stopped state */
4640 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4641 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4642 		if (r) {
4643 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4644 			return r;
4645 		}
4646 	}
4647 
4648 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4649 	kvm_s390_clear_stop_irq(vcpu);
4650 
4651 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4652 	__disable_ibs_on_vcpu(vcpu);
4653 
4654 	for (i = 0; i < online_vcpus; i++) {
4655 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4656 			started_vcpus++;
4657 			started_vcpu = vcpu->kvm->vcpus[i];
4658 		}
4659 	}
4660 
4661 	if (started_vcpus == 1) {
4662 		/*
4663 		 * As we only have one VCPU left, we want to enable the
4664 		 * IBS facility for that VCPU to speed it up.
4665 		 */
4666 		__enable_ibs_on_vcpu(started_vcpu);
4667 	}
4668 
4669 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4670 	return 0;
4671 }
4672 
4673 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4674 				     struct kvm_enable_cap *cap)
4675 {
4676 	int r;
4677 
4678 	if (cap->flags)
4679 		return -EINVAL;
4680 
4681 	switch (cap->cap) {
4682 	case KVM_CAP_S390_CSS_SUPPORT:
4683 		if (!vcpu->kvm->arch.css_support) {
4684 			vcpu->kvm->arch.css_support = 1;
4685 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4686 			trace_kvm_s390_enable_css(vcpu->kvm);
4687 		}
4688 		r = 0;
4689 		break;
4690 	default:
4691 		r = -EINVAL;
4692 		break;
4693 	}
4694 	return r;
4695 }
4696 
4697 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4698 				   struct kvm_s390_mem_op *mop)
4699 {
4700 	void __user *uaddr = (void __user *)mop->buf;
4701 	int r = 0;
4702 
4703 	if (mop->flags || !mop->size)
4704 		return -EINVAL;
4705 	if (mop->size + mop->sida_offset < mop->size)
4706 		return -EINVAL;
4707 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4708 		return -E2BIG;
4709 
4710 	switch (mop->op) {
4711 	case KVM_S390_MEMOP_SIDA_READ:
4712 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4713 				 mop->sida_offset), mop->size))
4714 			r = -EFAULT;
4715 
4716 		break;
4717 	case KVM_S390_MEMOP_SIDA_WRITE:
4718 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4719 				   mop->sida_offset), uaddr, mop->size))
4720 			r = -EFAULT;
4721 		break;
4722 	}
4723 	return r;
4724 }
4725 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4726 				  struct kvm_s390_mem_op *mop)
4727 {
4728 	void __user *uaddr = (void __user *)mop->buf;
4729 	void *tmpbuf = NULL;
4730 	int r = 0;
4731 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4732 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4733 
4734 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4735 		return -EINVAL;
4736 
4737 	if (mop->size > MEM_OP_MAX_SIZE)
4738 		return -E2BIG;
4739 
4740 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4741 		return -EINVAL;
4742 
4743 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4744 		tmpbuf = vmalloc(mop->size);
4745 		if (!tmpbuf)
4746 			return -ENOMEM;
4747 	}
4748 
4749 	switch (mop->op) {
4750 	case KVM_S390_MEMOP_LOGICAL_READ:
4751 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4752 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4753 					    mop->size, GACC_FETCH);
4754 			break;
4755 		}
4756 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4757 		if (r == 0) {
4758 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4759 				r = -EFAULT;
4760 		}
4761 		break;
4762 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4763 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4764 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4765 					    mop->size, GACC_STORE);
4766 			break;
4767 		}
4768 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4769 			r = -EFAULT;
4770 			break;
4771 		}
4772 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4773 		break;
4774 	}
4775 
4776 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4777 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4778 
4779 	vfree(tmpbuf);
4780 	return r;
4781 }
4782 
4783 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4784 				      struct kvm_s390_mem_op *mop)
4785 {
4786 	int r, srcu_idx;
4787 
4788 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4789 
4790 	switch (mop->op) {
4791 	case KVM_S390_MEMOP_LOGICAL_READ:
4792 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4793 		r = kvm_s390_guest_mem_op(vcpu, mop);
4794 		break;
4795 	case KVM_S390_MEMOP_SIDA_READ:
4796 	case KVM_S390_MEMOP_SIDA_WRITE:
4797 		/* we are locked against sida going away by the vcpu->mutex */
4798 		r = kvm_s390_guest_sida_op(vcpu, mop);
4799 		break;
4800 	default:
4801 		r = -EINVAL;
4802 	}
4803 
4804 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4805 	return r;
4806 }
4807 
4808 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4809 			       unsigned int ioctl, unsigned long arg)
4810 {
4811 	struct kvm_vcpu *vcpu = filp->private_data;
4812 	void __user *argp = (void __user *)arg;
4813 
4814 	switch (ioctl) {
4815 	case KVM_S390_IRQ: {
4816 		struct kvm_s390_irq s390irq;
4817 
4818 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4819 			return -EFAULT;
4820 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4821 	}
4822 	case KVM_S390_INTERRUPT: {
4823 		struct kvm_s390_interrupt s390int;
4824 		struct kvm_s390_irq s390irq = {};
4825 
4826 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4827 			return -EFAULT;
4828 		if (s390int_to_s390irq(&s390int, &s390irq))
4829 			return -EINVAL;
4830 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4831 	}
4832 	}
4833 	return -ENOIOCTLCMD;
4834 }
4835 
4836 long kvm_arch_vcpu_ioctl(struct file *filp,
4837 			 unsigned int ioctl, unsigned long arg)
4838 {
4839 	struct kvm_vcpu *vcpu = filp->private_data;
4840 	void __user *argp = (void __user *)arg;
4841 	int idx;
4842 	long r;
4843 	u16 rc, rrc;
4844 
4845 	vcpu_load(vcpu);
4846 
4847 	switch (ioctl) {
4848 	case KVM_S390_STORE_STATUS:
4849 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4850 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4851 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4852 		break;
4853 	case KVM_S390_SET_INITIAL_PSW: {
4854 		psw_t psw;
4855 
4856 		r = -EFAULT;
4857 		if (copy_from_user(&psw, argp, sizeof(psw)))
4858 			break;
4859 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4860 		break;
4861 	}
4862 	case KVM_S390_CLEAR_RESET:
4863 		r = 0;
4864 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4865 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4866 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4867 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4868 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4869 				   rc, rrc);
4870 		}
4871 		break;
4872 	case KVM_S390_INITIAL_RESET:
4873 		r = 0;
4874 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4875 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4876 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4877 					  UVC_CMD_CPU_RESET_INITIAL,
4878 					  &rc, &rrc);
4879 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4880 				   rc, rrc);
4881 		}
4882 		break;
4883 	case KVM_S390_NORMAL_RESET:
4884 		r = 0;
4885 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4886 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4887 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4888 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4889 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4890 				   rc, rrc);
4891 		}
4892 		break;
4893 	case KVM_SET_ONE_REG:
4894 	case KVM_GET_ONE_REG: {
4895 		struct kvm_one_reg reg;
4896 		r = -EINVAL;
4897 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4898 			break;
4899 		r = -EFAULT;
4900 		if (copy_from_user(&reg, argp, sizeof(reg)))
4901 			break;
4902 		if (ioctl == KVM_SET_ONE_REG)
4903 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4904 		else
4905 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4906 		break;
4907 	}
4908 #ifdef CONFIG_KVM_S390_UCONTROL
4909 	case KVM_S390_UCAS_MAP: {
4910 		struct kvm_s390_ucas_mapping ucasmap;
4911 
4912 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4913 			r = -EFAULT;
4914 			break;
4915 		}
4916 
4917 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4918 			r = -EINVAL;
4919 			break;
4920 		}
4921 
4922 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4923 				     ucasmap.vcpu_addr, ucasmap.length);
4924 		break;
4925 	}
4926 	case KVM_S390_UCAS_UNMAP: {
4927 		struct kvm_s390_ucas_mapping ucasmap;
4928 
4929 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4930 			r = -EFAULT;
4931 			break;
4932 		}
4933 
4934 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4935 			r = -EINVAL;
4936 			break;
4937 		}
4938 
4939 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4940 			ucasmap.length);
4941 		break;
4942 	}
4943 #endif
4944 	case KVM_S390_VCPU_FAULT: {
4945 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4946 		break;
4947 	}
4948 	case KVM_ENABLE_CAP:
4949 	{
4950 		struct kvm_enable_cap cap;
4951 		r = -EFAULT;
4952 		if (copy_from_user(&cap, argp, sizeof(cap)))
4953 			break;
4954 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4955 		break;
4956 	}
4957 	case KVM_S390_MEM_OP: {
4958 		struct kvm_s390_mem_op mem_op;
4959 
4960 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4961 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4962 		else
4963 			r = -EFAULT;
4964 		break;
4965 	}
4966 	case KVM_S390_SET_IRQ_STATE: {
4967 		struct kvm_s390_irq_state irq_state;
4968 
4969 		r = -EFAULT;
4970 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4971 			break;
4972 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4973 		    irq_state.len == 0 ||
4974 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4975 			r = -EINVAL;
4976 			break;
4977 		}
4978 		/* do not use irq_state.flags, it will break old QEMUs */
4979 		r = kvm_s390_set_irq_state(vcpu,
4980 					   (void __user *) irq_state.buf,
4981 					   irq_state.len);
4982 		break;
4983 	}
4984 	case KVM_S390_GET_IRQ_STATE: {
4985 		struct kvm_s390_irq_state irq_state;
4986 
4987 		r = -EFAULT;
4988 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4989 			break;
4990 		if (irq_state.len == 0) {
4991 			r = -EINVAL;
4992 			break;
4993 		}
4994 		/* do not use irq_state.flags, it will break old QEMUs */
4995 		r = kvm_s390_get_irq_state(vcpu,
4996 					   (__u8 __user *)  irq_state.buf,
4997 					   irq_state.len);
4998 		break;
4999 	}
5000 	default:
5001 		r = -ENOTTY;
5002 	}
5003 
5004 	vcpu_put(vcpu);
5005 	return r;
5006 }
5007 
5008 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5009 {
5010 #ifdef CONFIG_KVM_S390_UCONTROL
5011 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5012 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5013 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5014 		get_page(vmf->page);
5015 		return 0;
5016 	}
5017 #endif
5018 	return VM_FAULT_SIGBUS;
5019 }
5020 
5021 /* Section: memory related */
5022 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5023 				   struct kvm_memory_slot *memslot,
5024 				   const struct kvm_userspace_memory_region *mem,
5025 				   enum kvm_mr_change change)
5026 {
5027 	/* A few sanity checks. We can have memory slots which have to be
5028 	   located/ended at a segment boundary (1MB). The memory in userland is
5029 	   ok to be fragmented into various different vmas. It is okay to mmap()
5030 	   and munmap() stuff in this slot after doing this call at any time */
5031 
5032 	if (mem->userspace_addr & 0xffffful)
5033 		return -EINVAL;
5034 
5035 	if (mem->memory_size & 0xffffful)
5036 		return -EINVAL;
5037 
5038 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5039 		return -EINVAL;
5040 
5041 	/* When we are protected, we should not change the memory slots */
5042 	if (kvm_s390_pv_get_handle(kvm))
5043 		return -EINVAL;
5044 	return 0;
5045 }
5046 
5047 void kvm_arch_commit_memory_region(struct kvm *kvm,
5048 				const struct kvm_userspace_memory_region *mem,
5049 				struct kvm_memory_slot *old,
5050 				const struct kvm_memory_slot *new,
5051 				enum kvm_mr_change change)
5052 {
5053 	int rc = 0;
5054 
5055 	switch (change) {
5056 	case KVM_MR_DELETE:
5057 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5058 					old->npages * PAGE_SIZE);
5059 		break;
5060 	case KVM_MR_MOVE:
5061 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5062 					old->npages * PAGE_SIZE);
5063 		if (rc)
5064 			break;
5065 		fallthrough;
5066 	case KVM_MR_CREATE:
5067 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5068 				      mem->guest_phys_addr, mem->memory_size);
5069 		break;
5070 	case KVM_MR_FLAGS_ONLY:
5071 		break;
5072 	default:
5073 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5074 	}
5075 	if (rc)
5076 		pr_warn("failed to commit memory region\n");
5077 	return;
5078 }
5079 
5080 static inline unsigned long nonhyp_mask(int i)
5081 {
5082 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5083 
5084 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5085 }
5086 
5087 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5088 {
5089 	vcpu->valid_wakeup = false;
5090 }
5091 
5092 static int __init kvm_s390_init(void)
5093 {
5094 	int i;
5095 
5096 	if (!sclp.has_sief2) {
5097 		pr_info("SIE is not available\n");
5098 		return -ENODEV;
5099 	}
5100 
5101 	if (nested && hpage) {
5102 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5103 		return -EINVAL;
5104 	}
5105 
5106 	for (i = 0; i < 16; i++)
5107 		kvm_s390_fac_base[i] |=
5108 			stfle_fac_list[i] & nonhyp_mask(i);
5109 
5110 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5111 }
5112 
5113 static void __exit kvm_s390_exit(void)
5114 {
5115 	kvm_exit();
5116 }
5117 
5118 module_init(kvm_s390_init);
5119 module_exit(kvm_s390_exit);
5120 
5121 /*
5122  * Enable autoloading of the kvm module.
5123  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5124  * since x86 takes a different approach.
5125  */
5126 #include <linux/miscdevice.h>
5127 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5128 MODULE_ALIAS("devname:kvm");
5129