xref: /linux/arch/s390/kvm/kvm-s390.c (revision 24bce201d79807b668bf9d9e0aca801c5c0d5f78)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
10  *               Jason J. Herne <jjherne@us.ibm.com>
11  */
12 
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15 
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/gmap.h>
39 #include <asm/nmi.h>
40 #include <asm/switch_to.h>
41 #include <asm/isc.h>
42 #include <asm/sclp.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
45 #include <asm/ap.h>
46 #include <asm/uv.h>
47 #include <asm/fpu/api.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
61 	KVM_GENERIC_VM_STATS(),
62 	STATS_DESC_COUNTER(VM, inject_io),
63 	STATS_DESC_COUNTER(VM, inject_float_mchk),
64 	STATS_DESC_COUNTER(VM, inject_pfault_done),
65 	STATS_DESC_COUNTER(VM, inject_service_signal),
66 	STATS_DESC_COUNTER(VM, inject_virtio)
67 };
68 
69 const struct kvm_stats_header kvm_vm_stats_header = {
70 	.name_size = KVM_STATS_NAME_SIZE,
71 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
72 	.id_offset = sizeof(struct kvm_stats_header),
73 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
74 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
75 		       sizeof(kvm_vm_stats_desc),
76 };
77 
78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
79 	KVM_GENERIC_VCPU_STATS(),
80 	STATS_DESC_COUNTER(VCPU, exit_userspace),
81 	STATS_DESC_COUNTER(VCPU, exit_null),
82 	STATS_DESC_COUNTER(VCPU, exit_external_request),
83 	STATS_DESC_COUNTER(VCPU, exit_io_request),
84 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
85 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
86 	STATS_DESC_COUNTER(VCPU, exit_validity),
87 	STATS_DESC_COUNTER(VCPU, exit_instruction),
88 	STATS_DESC_COUNTER(VCPU, exit_pei),
89 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
90 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
92 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
94 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
95 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
96 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
97 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
98 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
99 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
100 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
101 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
103 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
104 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_program),
107 	STATS_DESC_COUNTER(VCPU, deliver_io),
108 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
109 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
110 	STATS_DESC_COUNTER(VCPU, inject_ckc),
111 	STATS_DESC_COUNTER(VCPU, inject_cputm),
112 	STATS_DESC_COUNTER(VCPU, inject_external_call),
113 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
114 	STATS_DESC_COUNTER(VCPU, inject_mchk),
115 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
116 	STATS_DESC_COUNTER(VCPU, inject_program),
117 	STATS_DESC_COUNTER(VCPU, inject_restart),
118 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
119 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
120 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
121 	STATS_DESC_COUNTER(VCPU, instruction_gs),
122 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
123 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
125 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
126 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
127 	STATS_DESC_COUNTER(VCPU, instruction_sck),
128 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
129 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
130 	STATS_DESC_COUNTER(VCPU, instruction_spx),
131 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stap),
133 	STATS_DESC_COUNTER(VCPU, instruction_iske),
134 	STATS_DESC_COUNTER(VCPU, instruction_ri),
135 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
136 	STATS_DESC_COUNTER(VCPU, instruction_sske),
137 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
138 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
139 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
140 	STATS_DESC_COUNTER(VCPU, instruction_tb),
141 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
142 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
143 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
144 	STATS_DESC_COUNTER(VCPU, instruction_sie),
145 	STATS_DESC_COUNTER(VCPU, instruction_essa),
146 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
147 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
163 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
166 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
168 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
172 	STATS_DESC_COUNTER(VCPU, pfault_sync)
173 };
174 
175 const struct kvm_stats_header kvm_vcpu_stats_header = {
176 	.name_size = KVM_STATS_NAME_SIZE,
177 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
178 	.id_offset = sizeof(struct kvm_stats_header),
179 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
180 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
181 		       sizeof(kvm_vcpu_stats_desc),
182 };
183 
184 /* allow nested virtualization in KVM (if enabled by user space) */
185 static int nested;
186 module_param(nested, int, S_IRUGO);
187 MODULE_PARM_DESC(nested, "Nested virtualization support");
188 
189 /* allow 1m huge page guest backing, if !nested */
190 static int hpage;
191 module_param(hpage, int, 0444);
192 MODULE_PARM_DESC(hpage, "1m huge page backing support");
193 
194 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
195 static u8 halt_poll_max_steal = 10;
196 module_param(halt_poll_max_steal, byte, 0644);
197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
198 
199 /* if set to true, the GISA will be initialized and used if available */
200 static bool use_gisa  = true;
201 module_param(use_gisa, bool, 0644);
202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
203 
204 /* maximum diag9c forwarding per second */
205 unsigned int diag9c_forwarding_hz;
206 module_param(diag9c_forwarding_hz, uint, 0644);
207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
208 
209 /*
210  * For now we handle at most 16 double words as this is what the s390 base
211  * kernel handles and stores in the prefix page. If we ever need to go beyond
212  * this, this requires changes to code, but the external uapi can stay.
213  */
214 #define SIZE_INTERNAL 16
215 
216 /*
217  * Base feature mask that defines default mask for facilities. Consists of the
218  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
219  */
220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
221 /*
222  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
223  * and defines the facilities that can be enabled via a cpu model.
224  */
225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
226 
227 static unsigned long kvm_s390_fac_size(void)
228 {
229 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
232 		sizeof(stfle_fac_list));
233 
234 	return SIZE_INTERNAL;
235 }
236 
237 /* available cpu features supported by kvm */
238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
239 /* available subfunctions indicated via query / "test bit" */
240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
241 
242 static struct gmap_notifier gmap_notifier;
243 static struct gmap_notifier vsie_gmap_notifier;
244 debug_info_t *kvm_s390_dbf;
245 debug_info_t *kvm_s390_dbf_uv;
246 
247 /* Section: not file related */
248 int kvm_arch_hardware_enable(void)
249 {
250 	/* every s390 is virtualization enabled ;-) */
251 	return 0;
252 }
253 
254 int kvm_arch_check_processor_compat(void *opaque)
255 {
256 	return 0;
257 }
258 
259 /* forward declarations */
260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
261 			      unsigned long end);
262 static int sca_switch_to_extended(struct kvm *kvm);
263 
264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
265 {
266 	u8 delta_idx = 0;
267 
268 	/*
269 	 * The TOD jumps by delta, we have to compensate this by adding
270 	 * -delta to the epoch.
271 	 */
272 	delta = -delta;
273 
274 	/* sign-extension - we're adding to signed values below */
275 	if ((s64)delta < 0)
276 		delta_idx = -1;
277 
278 	scb->epoch += delta;
279 	if (scb->ecd & ECD_MEF) {
280 		scb->epdx += delta_idx;
281 		if (scb->epoch < delta)
282 			scb->epdx += 1;
283 	}
284 }
285 
286 /*
287  * This callback is executed during stop_machine(). All CPUs are therefore
288  * temporarily stopped. In order not to change guest behavior, we have to
289  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
290  * so a CPU won't be stopped while calculating with the epoch.
291  */
292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
293 			  void *v)
294 {
295 	struct kvm *kvm;
296 	struct kvm_vcpu *vcpu;
297 	unsigned long i;
298 	unsigned long long *delta = v;
299 
300 	list_for_each_entry(kvm, &vm_list, vm_list) {
301 		kvm_for_each_vcpu(i, vcpu, kvm) {
302 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
303 			if (i == 0) {
304 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
305 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
306 			}
307 			if (vcpu->arch.cputm_enabled)
308 				vcpu->arch.cputm_start += *delta;
309 			if (vcpu->arch.vsie_block)
310 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
311 						   *delta);
312 		}
313 	}
314 	return NOTIFY_OK;
315 }
316 
317 static struct notifier_block kvm_clock_notifier = {
318 	.notifier_call = kvm_clock_sync,
319 };
320 
321 int kvm_arch_hardware_setup(void *opaque)
322 {
323 	gmap_notifier.notifier_call = kvm_gmap_notifier;
324 	gmap_register_pte_notifier(&gmap_notifier);
325 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
326 	gmap_register_pte_notifier(&vsie_gmap_notifier);
327 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
328 				       &kvm_clock_notifier);
329 	return 0;
330 }
331 
332 void kvm_arch_hardware_unsetup(void)
333 {
334 	gmap_unregister_pte_notifier(&gmap_notifier);
335 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
336 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
337 					 &kvm_clock_notifier);
338 }
339 
340 static void allow_cpu_feat(unsigned long nr)
341 {
342 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
343 }
344 
345 static inline int plo_test_bit(unsigned char nr)
346 {
347 	unsigned long function = (unsigned long)nr | 0x100;
348 	int cc;
349 
350 	asm volatile(
351 		"	lgr	0,%[function]\n"
352 		/* Parameter registers are ignored for "test bit" */
353 		"	plo	0,0,0,0(0)\n"
354 		"	ipm	%0\n"
355 		"	srl	%0,28\n"
356 		: "=d" (cc)
357 		: [function] "d" (function)
358 		: "cc", "0");
359 	return cc == 0;
360 }
361 
362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
363 {
364 	asm volatile(
365 		"	lghi	0,0\n"
366 		"	lgr	1,%[query]\n"
367 		/* Parameter registers are ignored */
368 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
369 		:
370 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
371 		: "cc", "memory", "0", "1");
372 }
373 
374 #define INSN_SORTL 0xb938
375 #define INSN_DFLTCC 0xb939
376 
377 static void kvm_s390_cpu_feat_init(void)
378 {
379 	int i;
380 
381 	for (i = 0; i < 256; ++i) {
382 		if (plo_test_bit(i))
383 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
384 	}
385 
386 	if (test_facility(28)) /* TOD-clock steering */
387 		ptff(kvm_s390_available_subfunc.ptff,
388 		     sizeof(kvm_s390_available_subfunc.ptff),
389 		     PTFF_QAF);
390 
391 	if (test_facility(17)) { /* MSA */
392 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.kmac);
394 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kmc);
396 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.km);
398 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
399 			      kvm_s390_available_subfunc.kimd);
400 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.klmd);
402 	}
403 	if (test_facility(76)) /* MSA3 */
404 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
405 			      kvm_s390_available_subfunc.pckmo);
406 	if (test_facility(77)) { /* MSA4 */
407 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
408 			      kvm_s390_available_subfunc.kmctr);
409 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
410 			      kvm_s390_available_subfunc.kmf);
411 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
412 			      kvm_s390_available_subfunc.kmo);
413 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
414 			      kvm_s390_available_subfunc.pcc);
415 	}
416 	if (test_facility(57)) /* MSA5 */
417 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
418 			      kvm_s390_available_subfunc.ppno);
419 
420 	if (test_facility(146)) /* MSA8 */
421 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
422 			      kvm_s390_available_subfunc.kma);
423 
424 	if (test_facility(155)) /* MSA9 */
425 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
426 			      kvm_s390_available_subfunc.kdsa);
427 
428 	if (test_facility(150)) /* SORTL */
429 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
430 
431 	if (test_facility(151)) /* DFLTCC */
432 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
433 
434 	if (MACHINE_HAS_ESOP)
435 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
436 	/*
437 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
438 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
439 	 */
440 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
441 	    !test_facility(3) || !nested)
442 		return;
443 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
444 	if (sclp.has_64bscao)
445 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
446 	if (sclp.has_siif)
447 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
448 	if (sclp.has_gpere)
449 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
450 	if (sclp.has_gsls)
451 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
452 	if (sclp.has_ib)
453 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
454 	if (sclp.has_cei)
455 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
456 	if (sclp.has_ibs)
457 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
458 	if (sclp.has_kss)
459 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
460 	/*
461 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
462 	 * all skey handling functions read/set the skey from the PGSTE
463 	 * instead of the real storage key.
464 	 *
465 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
466 	 * pages being detected as preserved although they are resident.
467 	 *
468 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
469 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
470 	 *
471 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
472 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
473 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
474 	 *
475 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
476 	 * cannot easily shadow the SCA because of the ipte lock.
477 	 */
478 }
479 
480 int kvm_arch_init(void *opaque)
481 {
482 	int rc = -ENOMEM;
483 
484 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
485 	if (!kvm_s390_dbf)
486 		return -ENOMEM;
487 
488 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
489 	if (!kvm_s390_dbf_uv)
490 		goto out;
491 
492 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
493 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
494 		goto out;
495 
496 	kvm_s390_cpu_feat_init();
497 
498 	/* Register floating interrupt controller interface. */
499 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
500 	if (rc) {
501 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
502 		goto out;
503 	}
504 
505 	rc = kvm_s390_gib_init(GAL_ISC);
506 	if (rc)
507 		goto out;
508 
509 	return 0;
510 
511 out:
512 	kvm_arch_exit();
513 	return rc;
514 }
515 
516 void kvm_arch_exit(void)
517 {
518 	kvm_s390_gib_destroy();
519 	debug_unregister(kvm_s390_dbf);
520 	debug_unregister(kvm_s390_dbf_uv);
521 }
522 
523 /* Section: device related */
524 long kvm_arch_dev_ioctl(struct file *filp,
525 			unsigned int ioctl, unsigned long arg)
526 {
527 	if (ioctl == KVM_S390_ENABLE_SIE)
528 		return s390_enable_sie();
529 	return -EINVAL;
530 }
531 
532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
533 {
534 	int r;
535 
536 	switch (ext) {
537 	case KVM_CAP_S390_PSW:
538 	case KVM_CAP_S390_GMAP:
539 	case KVM_CAP_SYNC_MMU:
540 #ifdef CONFIG_KVM_S390_UCONTROL
541 	case KVM_CAP_S390_UCONTROL:
542 #endif
543 	case KVM_CAP_ASYNC_PF:
544 	case KVM_CAP_SYNC_REGS:
545 	case KVM_CAP_ONE_REG:
546 	case KVM_CAP_ENABLE_CAP:
547 	case KVM_CAP_S390_CSS_SUPPORT:
548 	case KVM_CAP_IOEVENTFD:
549 	case KVM_CAP_DEVICE_CTRL:
550 	case KVM_CAP_S390_IRQCHIP:
551 	case KVM_CAP_VM_ATTRIBUTES:
552 	case KVM_CAP_MP_STATE:
553 	case KVM_CAP_IMMEDIATE_EXIT:
554 	case KVM_CAP_S390_INJECT_IRQ:
555 	case KVM_CAP_S390_USER_SIGP:
556 	case KVM_CAP_S390_USER_STSI:
557 	case KVM_CAP_S390_SKEYS:
558 	case KVM_CAP_S390_IRQ_STATE:
559 	case KVM_CAP_S390_USER_INSTR0:
560 	case KVM_CAP_S390_CMMA_MIGRATION:
561 	case KVM_CAP_S390_AIS:
562 	case KVM_CAP_S390_AIS_MIGRATION:
563 	case KVM_CAP_S390_VCPU_RESETS:
564 	case KVM_CAP_SET_GUEST_DEBUG:
565 	case KVM_CAP_S390_DIAG318:
566 	case KVM_CAP_S390_MEM_OP_EXTENSION:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		if (ext == KVM_CAP_NR_VCPUS)
589 			r = min_t(unsigned int, num_online_cpus(), r);
590 		break;
591 	case KVM_CAP_S390_COW:
592 		r = MACHINE_HAS_ESOP;
593 		break;
594 	case KVM_CAP_S390_VECTOR_REGISTERS:
595 		r = MACHINE_HAS_VX;
596 		break;
597 	case KVM_CAP_S390_RI:
598 		r = test_facility(64);
599 		break;
600 	case KVM_CAP_S390_GS:
601 		r = test_facility(133);
602 		break;
603 	case KVM_CAP_S390_BPB:
604 		r = test_facility(82);
605 		break;
606 	case KVM_CAP_S390_PROTECTED:
607 		r = is_prot_virt_host();
608 		break;
609 	default:
610 		r = 0;
611 	}
612 	return r;
613 }
614 
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 {
617 	int i;
618 	gfn_t cur_gfn, last_gfn;
619 	unsigned long gaddr, vmaddr;
620 	struct gmap *gmap = kvm->arch.gmap;
621 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
622 
623 	/* Loop over all guest segments */
624 	cur_gfn = memslot->base_gfn;
625 	last_gfn = memslot->base_gfn + memslot->npages;
626 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 		gaddr = gfn_to_gpa(cur_gfn);
628 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 		if (kvm_is_error_hva(vmaddr))
630 			continue;
631 
632 		bitmap_zero(bitmap, _PAGE_ENTRIES);
633 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 		for (i = 0; i < _PAGE_ENTRIES; i++) {
635 			if (test_bit(i, bitmap))
636 				mark_page_dirty(kvm, cur_gfn + i);
637 		}
638 
639 		if (fatal_signal_pending(current))
640 			return;
641 		cond_resched();
642 	}
643 }
644 
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 
648 /*
649  * Get (and clear) the dirty memory log for a memory slot.
650  */
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 			       struct kvm_dirty_log *log)
653 {
654 	int r;
655 	unsigned long n;
656 	struct kvm_memory_slot *memslot;
657 	int is_dirty;
658 
659 	if (kvm_is_ucontrol(kvm))
660 		return -EINVAL;
661 
662 	mutex_lock(&kvm->slots_lock);
663 
664 	r = -EINVAL;
665 	if (log->slot >= KVM_USER_MEM_SLOTS)
666 		goto out;
667 
668 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
669 	if (r)
670 		goto out;
671 
672 	/* Clear the dirty log */
673 	if (is_dirty) {
674 		n = kvm_dirty_bitmap_bytes(memslot);
675 		memset(memslot->dirty_bitmap, 0, n);
676 	}
677 	r = 0;
678 out:
679 	mutex_unlock(&kvm->slots_lock);
680 	return r;
681 }
682 
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 {
685 	unsigned long i;
686 	struct kvm_vcpu *vcpu;
687 
688 	kvm_for_each_vcpu(i, vcpu, kvm) {
689 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
690 	}
691 }
692 
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
694 {
695 	int r;
696 
697 	if (cap->flags)
698 		return -EINVAL;
699 
700 	switch (cap->cap) {
701 	case KVM_CAP_S390_IRQCHIP:
702 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 		kvm->arch.use_irqchip = 1;
704 		r = 0;
705 		break;
706 	case KVM_CAP_S390_USER_SIGP:
707 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 		kvm->arch.user_sigp = 1;
709 		r = 0;
710 		break;
711 	case KVM_CAP_S390_VECTOR_REGISTERS:
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (MACHINE_HAS_VX) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 			set_kvm_facility(kvm->arch.model.fac_list, 129);
718 			if (test_facility(134)) {
719 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 				set_kvm_facility(kvm->arch.model.fac_list, 134);
721 			}
722 			if (test_facility(135)) {
723 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 				set_kvm_facility(kvm->arch.model.fac_list, 135);
725 			}
726 			if (test_facility(148)) {
727 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 				set_kvm_facility(kvm->arch.model.fac_list, 148);
729 			}
730 			if (test_facility(152)) {
731 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 				set_kvm_facility(kvm->arch.model.fac_list, 152);
733 			}
734 			if (test_facility(192)) {
735 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 				set_kvm_facility(kvm->arch.model.fac_list, 192);
737 			}
738 			r = 0;
739 		} else
740 			r = -EINVAL;
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_RI:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(64)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 			set_kvm_facility(kvm->arch.model.fac_list, 64);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_AIS:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus) {
762 			r = -EBUSY;
763 		} else {
764 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 			set_kvm_facility(kvm->arch.model.fac_list, 72);
766 			r = 0;
767 		}
768 		mutex_unlock(&kvm->lock);
769 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 			 r ? "(not available)" : "(success)");
771 		break;
772 	case KVM_CAP_S390_GS:
773 		r = -EINVAL;
774 		mutex_lock(&kvm->lock);
775 		if (kvm->created_vcpus) {
776 			r = -EBUSY;
777 		} else if (test_facility(133)) {
778 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 			set_kvm_facility(kvm->arch.model.fac_list, 133);
780 			r = 0;
781 		}
782 		mutex_unlock(&kvm->lock);
783 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 			 r ? "(not available)" : "(success)");
785 		break;
786 	case KVM_CAP_S390_HPAGE_1M:
787 		mutex_lock(&kvm->lock);
788 		if (kvm->created_vcpus)
789 			r = -EBUSY;
790 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
791 			r = -EINVAL;
792 		else {
793 			r = 0;
794 			mmap_write_lock(kvm->mm);
795 			kvm->mm->context.allow_gmap_hpage_1m = 1;
796 			mmap_write_unlock(kvm->mm);
797 			/*
798 			 * We might have to create fake 4k page
799 			 * tables. To avoid that the hardware works on
800 			 * stale PGSTEs, we emulate these instructions.
801 			 */
802 			kvm->arch.use_skf = 0;
803 			kvm->arch.use_pfmfi = 0;
804 		}
805 		mutex_unlock(&kvm->lock);
806 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 			 r ? "(not available)" : "(success)");
808 		break;
809 	case KVM_CAP_S390_USER_STSI:
810 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 		kvm->arch.user_stsi = 1;
812 		r = 0;
813 		break;
814 	case KVM_CAP_S390_USER_INSTR0:
815 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 		kvm->arch.user_instr0 = 1;
817 		icpt_operexc_on_all_vcpus(kvm);
818 		r = 0;
819 		break;
820 	default:
821 		r = -EINVAL;
822 		break;
823 	}
824 	return r;
825 }
826 
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 {
829 	int ret;
830 
831 	switch (attr->attr) {
832 	case KVM_S390_VM_MEM_LIMIT_SIZE:
833 		ret = 0;
834 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 			 kvm->arch.mem_limit);
836 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
837 			ret = -EFAULT;
838 		break;
839 	default:
840 		ret = -ENXIO;
841 		break;
842 	}
843 	return ret;
844 }
845 
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848 	int ret;
849 	unsigned int idx;
850 	switch (attr->attr) {
851 	case KVM_S390_VM_MEM_ENABLE_CMMA:
852 		ret = -ENXIO;
853 		if (!sclp.has_cmma)
854 			break;
855 
856 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 		mutex_lock(&kvm->lock);
858 		if (kvm->created_vcpus)
859 			ret = -EBUSY;
860 		else if (kvm->mm->context.allow_gmap_hpage_1m)
861 			ret = -EINVAL;
862 		else {
863 			kvm->arch.use_cmma = 1;
864 			/* Not compatible with cmma. */
865 			kvm->arch.use_pfmfi = 0;
866 			ret = 0;
867 		}
868 		mutex_unlock(&kvm->lock);
869 		break;
870 	case KVM_S390_VM_MEM_CLR_CMMA:
871 		ret = -ENXIO;
872 		if (!sclp.has_cmma)
873 			break;
874 		ret = -EINVAL;
875 		if (!kvm->arch.use_cmma)
876 			break;
877 
878 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 		mutex_lock(&kvm->lock);
880 		idx = srcu_read_lock(&kvm->srcu);
881 		s390_reset_cmma(kvm->arch.gmap->mm);
882 		srcu_read_unlock(&kvm->srcu, idx);
883 		mutex_unlock(&kvm->lock);
884 		ret = 0;
885 		break;
886 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 		unsigned long new_limit;
888 
889 		if (kvm_is_ucontrol(kvm))
890 			return -EINVAL;
891 
892 		if (get_user(new_limit, (u64 __user *)attr->addr))
893 			return -EFAULT;
894 
895 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 		    new_limit > kvm->arch.mem_limit)
897 			return -E2BIG;
898 
899 		if (!new_limit)
900 			return -EINVAL;
901 
902 		/* gmap_create takes last usable address */
903 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
904 			new_limit -= 1;
905 
906 		ret = -EBUSY;
907 		mutex_lock(&kvm->lock);
908 		if (!kvm->created_vcpus) {
909 			/* gmap_create will round the limit up */
910 			struct gmap *new = gmap_create(current->mm, new_limit);
911 
912 			if (!new) {
913 				ret = -ENOMEM;
914 			} else {
915 				gmap_remove(kvm->arch.gmap);
916 				new->private = kvm;
917 				kvm->arch.gmap = new;
918 				ret = 0;
919 			}
920 		}
921 		mutex_unlock(&kvm->lock);
922 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 			 (void *) kvm->arch.gmap->asce);
925 		break;
926 	}
927 	default:
928 		ret = -ENXIO;
929 		break;
930 	}
931 	return ret;
932 }
933 
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
935 
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
937 {
938 	struct kvm_vcpu *vcpu;
939 	unsigned long i;
940 
941 	kvm_s390_vcpu_block_all(kvm);
942 
943 	kvm_for_each_vcpu(i, vcpu, kvm) {
944 		kvm_s390_vcpu_crypto_setup(vcpu);
945 		/* recreate the shadow crycb by leaving the VSIE handler */
946 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 	}
948 
949 	kvm_s390_vcpu_unblock_all(kvm);
950 }
951 
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
953 {
954 	mutex_lock(&kvm->lock);
955 	switch (attr->attr) {
956 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 		if (!test_kvm_facility(kvm, 76)) {
958 			mutex_unlock(&kvm->lock);
959 			return -EINVAL;
960 		}
961 		get_random_bytes(
962 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 		kvm->arch.crypto.aes_kw = 1;
965 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
966 		break;
967 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 		if (!test_kvm_facility(kvm, 76)) {
969 			mutex_unlock(&kvm->lock);
970 			return -EINVAL;
971 		}
972 		get_random_bytes(
973 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 		kvm->arch.crypto.dea_kw = 1;
976 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 		if (!test_kvm_facility(kvm, 76)) {
980 			mutex_unlock(&kvm->lock);
981 			return -EINVAL;
982 		}
983 		kvm->arch.crypto.aes_kw = 0;
984 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
987 		break;
988 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 		if (!test_kvm_facility(kvm, 76)) {
990 			mutex_unlock(&kvm->lock);
991 			return -EINVAL;
992 		}
993 		kvm->arch.crypto.dea_kw = 0;
994 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
997 		break;
998 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 		if (!ap_instructions_available()) {
1000 			mutex_unlock(&kvm->lock);
1001 			return -EOPNOTSUPP;
1002 		}
1003 		kvm->arch.crypto.apie = 1;
1004 		break;
1005 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 		if (!ap_instructions_available()) {
1007 			mutex_unlock(&kvm->lock);
1008 			return -EOPNOTSUPP;
1009 		}
1010 		kvm->arch.crypto.apie = 0;
1011 		break;
1012 	default:
1013 		mutex_unlock(&kvm->lock);
1014 		return -ENXIO;
1015 	}
1016 
1017 	kvm_s390_vcpu_crypto_reset_all(kvm);
1018 	mutex_unlock(&kvm->lock);
1019 	return 0;
1020 }
1021 
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024 	unsigned long cx;
1025 	struct kvm_vcpu *vcpu;
1026 
1027 	kvm_for_each_vcpu(cx, vcpu, kvm)
1028 		kvm_s390_sync_request(req, vcpu);
1029 }
1030 
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037 	struct kvm_memory_slot *ms;
1038 	struct kvm_memslots *slots;
1039 	unsigned long ram_pages = 0;
1040 	int bkt;
1041 
1042 	/* migration mode already enabled */
1043 	if (kvm->arch.migration_mode)
1044 		return 0;
1045 	slots = kvm_memslots(kvm);
1046 	if (!slots || kvm_memslots_empty(slots))
1047 		return -EINVAL;
1048 
1049 	if (!kvm->arch.use_cmma) {
1050 		kvm->arch.migration_mode = 1;
1051 		return 0;
1052 	}
1053 	/* mark all the pages in active slots as dirty */
1054 	kvm_for_each_memslot(ms, bkt, slots) {
1055 		if (!ms->dirty_bitmap)
1056 			return -EINVAL;
1057 		/*
1058 		 * The second half of the bitmap is only used on x86,
1059 		 * and would be wasted otherwise, so we put it to good
1060 		 * use here to keep track of the state of the storage
1061 		 * attributes.
1062 		 */
1063 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064 		ram_pages += ms->npages;
1065 	}
1066 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067 	kvm->arch.migration_mode = 1;
1068 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069 	return 0;
1070 }
1071 
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078 	/* migration mode already disabled */
1079 	if (!kvm->arch.migration_mode)
1080 		return 0;
1081 	kvm->arch.migration_mode = 0;
1082 	if (kvm->arch.use_cmma)
1083 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084 	return 0;
1085 }
1086 
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088 				     struct kvm_device_attr *attr)
1089 {
1090 	int res = -ENXIO;
1091 
1092 	mutex_lock(&kvm->slots_lock);
1093 	switch (attr->attr) {
1094 	case KVM_S390_VM_MIGRATION_START:
1095 		res = kvm_s390_vm_start_migration(kvm);
1096 		break;
1097 	case KVM_S390_VM_MIGRATION_STOP:
1098 		res = kvm_s390_vm_stop_migration(kvm);
1099 		break;
1100 	default:
1101 		break;
1102 	}
1103 	mutex_unlock(&kvm->slots_lock);
1104 
1105 	return res;
1106 }
1107 
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109 				     struct kvm_device_attr *attr)
1110 {
1111 	u64 mig = kvm->arch.migration_mode;
1112 
1113 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114 		return -ENXIO;
1115 
1116 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117 		return -EFAULT;
1118 	return 0;
1119 }
1120 
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 	struct kvm_s390_vm_tod_clock gtod;
1124 
1125 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126 		return -EFAULT;
1127 
1128 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129 		return -EINVAL;
1130 	kvm_s390_set_tod_clock(kvm, &gtod);
1131 
1132 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 		gtod.epoch_idx, gtod.tod);
1134 
1135 	return 0;
1136 }
1137 
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140 	u8 gtod_high;
1141 
1142 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143 					   sizeof(gtod_high)))
1144 		return -EFAULT;
1145 
1146 	if (gtod_high != 0)
1147 		return -EINVAL;
1148 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149 
1150 	return 0;
1151 }
1152 
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1156 
1157 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158 			   sizeof(gtod.tod)))
1159 		return -EFAULT;
1160 
1161 	kvm_s390_set_tod_clock(kvm, &gtod);
1162 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163 	return 0;
1164 }
1165 
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168 	int ret;
1169 
1170 	if (attr->flags)
1171 		return -EINVAL;
1172 
1173 	switch (attr->attr) {
1174 	case KVM_S390_VM_TOD_EXT:
1175 		ret = kvm_s390_set_tod_ext(kvm, attr);
1176 		break;
1177 	case KVM_S390_VM_TOD_HIGH:
1178 		ret = kvm_s390_set_tod_high(kvm, attr);
1179 		break;
1180 	case KVM_S390_VM_TOD_LOW:
1181 		ret = kvm_s390_set_tod_low(kvm, attr);
1182 		break;
1183 	default:
1184 		ret = -ENXIO;
1185 		break;
1186 	}
1187 	return ret;
1188 }
1189 
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191 				   struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193 	union tod_clock clk;
1194 
1195 	preempt_disable();
1196 
1197 	store_tod_clock_ext(&clk);
1198 
1199 	gtod->tod = clk.tod + kvm->arch.epoch;
1200 	gtod->epoch_idx = 0;
1201 	if (test_kvm_facility(kvm, 139)) {
1202 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203 		if (gtod->tod < clk.tod)
1204 			gtod->epoch_idx += 1;
1205 	}
1206 
1207 	preempt_enable();
1208 }
1209 
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 	struct kvm_s390_vm_tod_clock gtod;
1213 
1214 	memset(&gtod, 0, sizeof(gtod));
1215 	kvm_s390_get_tod_clock(kvm, &gtod);
1216 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217 		return -EFAULT;
1218 
1219 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 		gtod.epoch_idx, gtod.tod);
1221 	return 0;
1222 }
1223 
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 	u8 gtod_high = 0;
1227 
1228 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229 					 sizeof(gtod_high)))
1230 		return -EFAULT;
1231 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232 
1233 	return 0;
1234 }
1235 
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 	u64 gtod;
1239 
1240 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1241 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242 		return -EFAULT;
1243 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244 
1245 	return 0;
1246 }
1247 
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250 	int ret;
1251 
1252 	if (attr->flags)
1253 		return -EINVAL;
1254 
1255 	switch (attr->attr) {
1256 	case KVM_S390_VM_TOD_EXT:
1257 		ret = kvm_s390_get_tod_ext(kvm, attr);
1258 		break;
1259 	case KVM_S390_VM_TOD_HIGH:
1260 		ret = kvm_s390_get_tod_high(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_TOD_LOW:
1263 		ret = kvm_s390_get_tod_low(kvm, attr);
1264 		break;
1265 	default:
1266 		ret = -ENXIO;
1267 		break;
1268 	}
1269 	return ret;
1270 }
1271 
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274 	struct kvm_s390_vm_cpu_processor *proc;
1275 	u16 lowest_ibc, unblocked_ibc;
1276 	int ret = 0;
1277 
1278 	mutex_lock(&kvm->lock);
1279 	if (kvm->created_vcpus) {
1280 		ret = -EBUSY;
1281 		goto out;
1282 	}
1283 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284 	if (!proc) {
1285 		ret = -ENOMEM;
1286 		goto out;
1287 	}
1288 	if (!copy_from_user(proc, (void __user *)attr->addr,
1289 			    sizeof(*proc))) {
1290 		kvm->arch.model.cpuid = proc->cpuid;
1291 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292 		unblocked_ibc = sclp.ibc & 0xfff;
1293 		if (lowest_ibc && proc->ibc) {
1294 			if (proc->ibc > unblocked_ibc)
1295 				kvm->arch.model.ibc = unblocked_ibc;
1296 			else if (proc->ibc < lowest_ibc)
1297 				kvm->arch.model.ibc = lowest_ibc;
1298 			else
1299 				kvm->arch.model.ibc = proc->ibc;
1300 		}
1301 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1303 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 			 kvm->arch.model.ibc,
1305 			 kvm->arch.model.cpuid);
1306 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 			 kvm->arch.model.fac_list[0],
1308 			 kvm->arch.model.fac_list[1],
1309 			 kvm->arch.model.fac_list[2]);
1310 	} else
1311 		ret = -EFAULT;
1312 	kfree(proc);
1313 out:
1314 	mutex_unlock(&kvm->lock);
1315 	return ret;
1316 }
1317 
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319 				       struct kvm_device_attr *attr)
1320 {
1321 	struct kvm_s390_vm_cpu_feat data;
1322 
1323 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324 		return -EFAULT;
1325 	if (!bitmap_subset((unsigned long *) data.feat,
1326 			   kvm_s390_available_cpu_feat,
1327 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1328 		return -EINVAL;
1329 
1330 	mutex_lock(&kvm->lock);
1331 	if (kvm->created_vcpus) {
1332 		mutex_unlock(&kvm->lock);
1333 		return -EBUSY;
1334 	}
1335 	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1336 	mutex_unlock(&kvm->lock);
1337 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1338 			 data.feat[0],
1339 			 data.feat[1],
1340 			 data.feat[2]);
1341 	return 0;
1342 }
1343 
1344 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1345 					  struct kvm_device_attr *attr)
1346 {
1347 	mutex_lock(&kvm->lock);
1348 	if (kvm->created_vcpus) {
1349 		mutex_unlock(&kvm->lock);
1350 		return -EBUSY;
1351 	}
1352 
1353 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1354 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1355 		mutex_unlock(&kvm->lock);
1356 		return -EFAULT;
1357 	}
1358 	mutex_unlock(&kvm->lock);
1359 
1360 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1365 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1368 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1371 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1374 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1377 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1380 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1383 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1386 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1389 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1392 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1393 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1395 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1396 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1398 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1399 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1401 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1402 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1404 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1405 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1407 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1408 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1410 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1412 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1413 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1416 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1417 
1418 	return 0;
1419 }
1420 
1421 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1422 {
1423 	int ret = -ENXIO;
1424 
1425 	switch (attr->attr) {
1426 	case KVM_S390_VM_CPU_PROCESSOR:
1427 		ret = kvm_s390_set_processor(kvm, attr);
1428 		break;
1429 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1430 		ret = kvm_s390_set_processor_feat(kvm, attr);
1431 		break;
1432 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1433 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1434 		break;
1435 	}
1436 	return ret;
1437 }
1438 
1439 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1440 {
1441 	struct kvm_s390_vm_cpu_processor *proc;
1442 	int ret = 0;
1443 
1444 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1445 	if (!proc) {
1446 		ret = -ENOMEM;
1447 		goto out;
1448 	}
1449 	proc->cpuid = kvm->arch.model.cpuid;
1450 	proc->ibc = kvm->arch.model.ibc;
1451 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1452 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1453 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1454 		 kvm->arch.model.ibc,
1455 		 kvm->arch.model.cpuid);
1456 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1457 		 kvm->arch.model.fac_list[0],
1458 		 kvm->arch.model.fac_list[1],
1459 		 kvm->arch.model.fac_list[2]);
1460 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1461 		ret = -EFAULT;
1462 	kfree(proc);
1463 out:
1464 	return ret;
1465 }
1466 
1467 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1468 {
1469 	struct kvm_s390_vm_cpu_machine *mach;
1470 	int ret = 0;
1471 
1472 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1473 	if (!mach) {
1474 		ret = -ENOMEM;
1475 		goto out;
1476 	}
1477 	get_cpu_id((struct cpuid *) &mach->cpuid);
1478 	mach->ibc = sclp.ibc;
1479 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1480 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1481 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1482 	       sizeof(stfle_fac_list));
1483 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1484 		 kvm->arch.model.ibc,
1485 		 kvm->arch.model.cpuid);
1486 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1487 		 mach->fac_mask[0],
1488 		 mach->fac_mask[1],
1489 		 mach->fac_mask[2]);
1490 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1491 		 mach->fac_list[0],
1492 		 mach->fac_list[1],
1493 		 mach->fac_list[2]);
1494 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1495 		ret = -EFAULT;
1496 	kfree(mach);
1497 out:
1498 	return ret;
1499 }
1500 
1501 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1502 				       struct kvm_device_attr *attr)
1503 {
1504 	struct kvm_s390_vm_cpu_feat data;
1505 
1506 	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1507 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1508 		return -EFAULT;
1509 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1510 			 data.feat[0],
1511 			 data.feat[1],
1512 			 data.feat[2]);
1513 	return 0;
1514 }
1515 
1516 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1517 				     struct kvm_device_attr *attr)
1518 {
1519 	struct kvm_s390_vm_cpu_feat data;
1520 
1521 	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1522 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1523 		return -EFAULT;
1524 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1525 			 data.feat[0],
1526 			 data.feat[1],
1527 			 data.feat[2]);
1528 	return 0;
1529 }
1530 
1531 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1532 					  struct kvm_device_attr *attr)
1533 {
1534 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1535 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1536 		return -EFAULT;
1537 
1538 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1541 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1543 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1546 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1549 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1552 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1555 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1558 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1561 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1564 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1567 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1570 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1573 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1574 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1576 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1577 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1579 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1580 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1582 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1583 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1585 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1588 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1589 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1590 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1591 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1594 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1595 
1596 	return 0;
1597 }
1598 
1599 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1600 					struct kvm_device_attr *attr)
1601 {
1602 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1603 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1604 		return -EFAULT;
1605 
1606 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1609 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1611 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1614 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1617 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1620 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1623 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1626 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1629 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1632 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1635 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1638 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1641 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1642 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1644 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1645 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1647 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1648 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1650 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1651 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1653 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1654 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1656 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1657 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1658 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1659 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1662 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1663 
1664 	return 0;
1665 }
1666 
1667 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1668 {
1669 	int ret = -ENXIO;
1670 
1671 	switch (attr->attr) {
1672 	case KVM_S390_VM_CPU_PROCESSOR:
1673 		ret = kvm_s390_get_processor(kvm, attr);
1674 		break;
1675 	case KVM_S390_VM_CPU_MACHINE:
1676 		ret = kvm_s390_get_machine(kvm, attr);
1677 		break;
1678 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1679 		ret = kvm_s390_get_processor_feat(kvm, attr);
1680 		break;
1681 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1682 		ret = kvm_s390_get_machine_feat(kvm, attr);
1683 		break;
1684 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1685 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1686 		break;
1687 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1688 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1689 		break;
1690 	}
1691 	return ret;
1692 }
1693 
1694 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1695 {
1696 	int ret;
1697 
1698 	switch (attr->group) {
1699 	case KVM_S390_VM_MEM_CTRL:
1700 		ret = kvm_s390_set_mem_control(kvm, attr);
1701 		break;
1702 	case KVM_S390_VM_TOD:
1703 		ret = kvm_s390_set_tod(kvm, attr);
1704 		break;
1705 	case KVM_S390_VM_CPU_MODEL:
1706 		ret = kvm_s390_set_cpu_model(kvm, attr);
1707 		break;
1708 	case KVM_S390_VM_CRYPTO:
1709 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1710 		break;
1711 	case KVM_S390_VM_MIGRATION:
1712 		ret = kvm_s390_vm_set_migration(kvm, attr);
1713 		break;
1714 	default:
1715 		ret = -ENXIO;
1716 		break;
1717 	}
1718 
1719 	return ret;
1720 }
1721 
1722 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1723 {
1724 	int ret;
1725 
1726 	switch (attr->group) {
1727 	case KVM_S390_VM_MEM_CTRL:
1728 		ret = kvm_s390_get_mem_control(kvm, attr);
1729 		break;
1730 	case KVM_S390_VM_TOD:
1731 		ret = kvm_s390_get_tod(kvm, attr);
1732 		break;
1733 	case KVM_S390_VM_CPU_MODEL:
1734 		ret = kvm_s390_get_cpu_model(kvm, attr);
1735 		break;
1736 	case KVM_S390_VM_MIGRATION:
1737 		ret = kvm_s390_vm_get_migration(kvm, attr);
1738 		break;
1739 	default:
1740 		ret = -ENXIO;
1741 		break;
1742 	}
1743 
1744 	return ret;
1745 }
1746 
1747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1748 {
1749 	int ret;
1750 
1751 	switch (attr->group) {
1752 	case KVM_S390_VM_MEM_CTRL:
1753 		switch (attr->attr) {
1754 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1755 		case KVM_S390_VM_MEM_CLR_CMMA:
1756 			ret = sclp.has_cmma ? 0 : -ENXIO;
1757 			break;
1758 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1759 			ret = 0;
1760 			break;
1761 		default:
1762 			ret = -ENXIO;
1763 			break;
1764 		}
1765 		break;
1766 	case KVM_S390_VM_TOD:
1767 		switch (attr->attr) {
1768 		case KVM_S390_VM_TOD_LOW:
1769 		case KVM_S390_VM_TOD_HIGH:
1770 			ret = 0;
1771 			break;
1772 		default:
1773 			ret = -ENXIO;
1774 			break;
1775 		}
1776 		break;
1777 	case KVM_S390_VM_CPU_MODEL:
1778 		switch (attr->attr) {
1779 		case KVM_S390_VM_CPU_PROCESSOR:
1780 		case KVM_S390_VM_CPU_MACHINE:
1781 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1782 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1783 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1784 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1785 			ret = 0;
1786 			break;
1787 		default:
1788 			ret = -ENXIO;
1789 			break;
1790 		}
1791 		break;
1792 	case KVM_S390_VM_CRYPTO:
1793 		switch (attr->attr) {
1794 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1795 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1796 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1797 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1798 			ret = 0;
1799 			break;
1800 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1801 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1802 			ret = ap_instructions_available() ? 0 : -ENXIO;
1803 			break;
1804 		default:
1805 			ret = -ENXIO;
1806 			break;
1807 		}
1808 		break;
1809 	case KVM_S390_VM_MIGRATION:
1810 		ret = 0;
1811 		break;
1812 	default:
1813 		ret = -ENXIO;
1814 		break;
1815 	}
1816 
1817 	return ret;
1818 }
1819 
1820 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1821 {
1822 	uint8_t *keys;
1823 	uint64_t hva;
1824 	int srcu_idx, i, r = 0;
1825 
1826 	if (args->flags != 0)
1827 		return -EINVAL;
1828 
1829 	/* Is this guest using storage keys? */
1830 	if (!mm_uses_skeys(current->mm))
1831 		return KVM_S390_GET_SKEYS_NONE;
1832 
1833 	/* Enforce sane limit on memory allocation */
1834 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1835 		return -EINVAL;
1836 
1837 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1838 	if (!keys)
1839 		return -ENOMEM;
1840 
1841 	mmap_read_lock(current->mm);
1842 	srcu_idx = srcu_read_lock(&kvm->srcu);
1843 	for (i = 0; i < args->count; i++) {
1844 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1845 		if (kvm_is_error_hva(hva)) {
1846 			r = -EFAULT;
1847 			break;
1848 		}
1849 
1850 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1851 		if (r)
1852 			break;
1853 	}
1854 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1855 	mmap_read_unlock(current->mm);
1856 
1857 	if (!r) {
1858 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1859 				 sizeof(uint8_t) * args->count);
1860 		if (r)
1861 			r = -EFAULT;
1862 	}
1863 
1864 	kvfree(keys);
1865 	return r;
1866 }
1867 
1868 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1869 {
1870 	uint8_t *keys;
1871 	uint64_t hva;
1872 	int srcu_idx, i, r = 0;
1873 	bool unlocked;
1874 
1875 	if (args->flags != 0)
1876 		return -EINVAL;
1877 
1878 	/* Enforce sane limit on memory allocation */
1879 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1880 		return -EINVAL;
1881 
1882 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1883 	if (!keys)
1884 		return -ENOMEM;
1885 
1886 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1887 			   sizeof(uint8_t) * args->count);
1888 	if (r) {
1889 		r = -EFAULT;
1890 		goto out;
1891 	}
1892 
1893 	/* Enable storage key handling for the guest */
1894 	r = s390_enable_skey();
1895 	if (r)
1896 		goto out;
1897 
1898 	i = 0;
1899 	mmap_read_lock(current->mm);
1900 	srcu_idx = srcu_read_lock(&kvm->srcu);
1901         while (i < args->count) {
1902 		unlocked = false;
1903 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1904 		if (kvm_is_error_hva(hva)) {
1905 			r = -EFAULT;
1906 			break;
1907 		}
1908 
1909 		/* Lowest order bit is reserved */
1910 		if (keys[i] & 0x01) {
1911 			r = -EINVAL;
1912 			break;
1913 		}
1914 
1915 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1916 		if (r) {
1917 			r = fixup_user_fault(current->mm, hva,
1918 					     FAULT_FLAG_WRITE, &unlocked);
1919 			if (r)
1920 				break;
1921 		}
1922 		if (!r)
1923 			i++;
1924 	}
1925 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1926 	mmap_read_unlock(current->mm);
1927 out:
1928 	kvfree(keys);
1929 	return r;
1930 }
1931 
1932 /*
1933  * Base address and length must be sent at the start of each block, therefore
1934  * it's cheaper to send some clean data, as long as it's less than the size of
1935  * two longs.
1936  */
1937 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1938 /* for consistency */
1939 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1940 
1941 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1942 			      u8 *res, unsigned long bufsize)
1943 {
1944 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1945 
1946 	args->count = 0;
1947 	while (args->count < bufsize) {
1948 		hva = gfn_to_hva(kvm, cur_gfn);
1949 		/*
1950 		 * We return an error if the first value was invalid, but we
1951 		 * return successfully if at least one value was copied.
1952 		 */
1953 		if (kvm_is_error_hva(hva))
1954 			return args->count ? 0 : -EFAULT;
1955 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1956 			pgstev = 0;
1957 		res[args->count++] = (pgstev >> 24) & 0x43;
1958 		cur_gfn++;
1959 	}
1960 
1961 	return 0;
1962 }
1963 
1964 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1965 						     gfn_t gfn)
1966 {
1967 	return ____gfn_to_memslot(slots, gfn, true);
1968 }
1969 
1970 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1971 					      unsigned long cur_gfn)
1972 {
1973 	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1974 	unsigned long ofs = cur_gfn - ms->base_gfn;
1975 	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1976 
1977 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1978 		mnode = rb_next(mnode);
1979 		/* If we are above the highest slot, wrap around */
1980 		if (!mnode)
1981 			mnode = rb_first(&slots->gfn_tree);
1982 
1983 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1984 		ofs = 0;
1985 	}
1986 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1987 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1988 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1989 		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1990 	}
1991 	return ms->base_gfn + ofs;
1992 }
1993 
1994 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1995 			     u8 *res, unsigned long bufsize)
1996 {
1997 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1998 	struct kvm_memslots *slots = kvm_memslots(kvm);
1999 	struct kvm_memory_slot *ms;
2000 
2001 	if (unlikely(kvm_memslots_empty(slots)))
2002 		return 0;
2003 
2004 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2005 	ms = gfn_to_memslot(kvm, cur_gfn);
2006 	args->count = 0;
2007 	args->start_gfn = cur_gfn;
2008 	if (!ms)
2009 		return 0;
2010 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2011 	mem_end = kvm_s390_get_gfn_end(slots);
2012 
2013 	while (args->count < bufsize) {
2014 		hva = gfn_to_hva(kvm, cur_gfn);
2015 		if (kvm_is_error_hva(hva))
2016 			return 0;
2017 		/* Decrement only if we actually flipped the bit to 0 */
2018 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2019 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2020 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2021 			pgstev = 0;
2022 		/* Save the value */
2023 		res[args->count++] = (pgstev >> 24) & 0x43;
2024 		/* If the next bit is too far away, stop. */
2025 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2026 			return 0;
2027 		/* If we reached the previous "next", find the next one */
2028 		if (cur_gfn == next_gfn)
2029 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2030 		/* Reached the end of memory or of the buffer, stop */
2031 		if ((next_gfn >= mem_end) ||
2032 		    (next_gfn - args->start_gfn >= bufsize))
2033 			return 0;
2034 		cur_gfn++;
2035 		/* Reached the end of the current memslot, take the next one. */
2036 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2037 			ms = gfn_to_memslot(kvm, cur_gfn);
2038 			if (!ms)
2039 				return 0;
2040 		}
2041 	}
2042 	return 0;
2043 }
2044 
2045 /*
2046  * This function searches for the next page with dirty CMMA attributes, and
2047  * saves the attributes in the buffer up to either the end of the buffer or
2048  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2049  * no trailing clean bytes are saved.
2050  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2051  * output buffer will indicate 0 as length.
2052  */
2053 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2054 				  struct kvm_s390_cmma_log *args)
2055 {
2056 	unsigned long bufsize;
2057 	int srcu_idx, peek, ret;
2058 	u8 *values;
2059 
2060 	if (!kvm->arch.use_cmma)
2061 		return -ENXIO;
2062 	/* Invalid/unsupported flags were specified */
2063 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2064 		return -EINVAL;
2065 	/* Migration mode query, and we are not doing a migration */
2066 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2067 	if (!peek && !kvm->arch.migration_mode)
2068 		return -EINVAL;
2069 	/* CMMA is disabled or was not used, or the buffer has length zero */
2070 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2071 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2072 		memset(args, 0, sizeof(*args));
2073 		return 0;
2074 	}
2075 	/* We are not peeking, and there are no dirty pages */
2076 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2077 		memset(args, 0, sizeof(*args));
2078 		return 0;
2079 	}
2080 
2081 	values = vmalloc(bufsize);
2082 	if (!values)
2083 		return -ENOMEM;
2084 
2085 	mmap_read_lock(kvm->mm);
2086 	srcu_idx = srcu_read_lock(&kvm->srcu);
2087 	if (peek)
2088 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2089 	else
2090 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2091 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2092 	mmap_read_unlock(kvm->mm);
2093 
2094 	if (kvm->arch.migration_mode)
2095 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2096 	else
2097 		args->remaining = 0;
2098 
2099 	if (copy_to_user((void __user *)args->values, values, args->count))
2100 		ret = -EFAULT;
2101 
2102 	vfree(values);
2103 	return ret;
2104 }
2105 
2106 /*
2107  * This function sets the CMMA attributes for the given pages. If the input
2108  * buffer has zero length, no action is taken, otherwise the attributes are
2109  * set and the mm->context.uses_cmm flag is set.
2110  */
2111 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2112 				  const struct kvm_s390_cmma_log *args)
2113 {
2114 	unsigned long hva, mask, pgstev, i;
2115 	uint8_t *bits;
2116 	int srcu_idx, r = 0;
2117 
2118 	mask = args->mask;
2119 
2120 	if (!kvm->arch.use_cmma)
2121 		return -ENXIO;
2122 	/* invalid/unsupported flags */
2123 	if (args->flags != 0)
2124 		return -EINVAL;
2125 	/* Enforce sane limit on memory allocation */
2126 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2127 		return -EINVAL;
2128 	/* Nothing to do */
2129 	if (args->count == 0)
2130 		return 0;
2131 
2132 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2133 	if (!bits)
2134 		return -ENOMEM;
2135 
2136 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2137 	if (r) {
2138 		r = -EFAULT;
2139 		goto out;
2140 	}
2141 
2142 	mmap_read_lock(kvm->mm);
2143 	srcu_idx = srcu_read_lock(&kvm->srcu);
2144 	for (i = 0; i < args->count; i++) {
2145 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2146 		if (kvm_is_error_hva(hva)) {
2147 			r = -EFAULT;
2148 			break;
2149 		}
2150 
2151 		pgstev = bits[i];
2152 		pgstev = pgstev << 24;
2153 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2154 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2155 	}
2156 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2157 	mmap_read_unlock(kvm->mm);
2158 
2159 	if (!kvm->mm->context.uses_cmm) {
2160 		mmap_write_lock(kvm->mm);
2161 		kvm->mm->context.uses_cmm = 1;
2162 		mmap_write_unlock(kvm->mm);
2163 	}
2164 out:
2165 	vfree(bits);
2166 	return r;
2167 }
2168 
2169 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2170 {
2171 	struct kvm_vcpu *vcpu;
2172 	u16 rc, rrc;
2173 	int ret = 0;
2174 	unsigned long i;
2175 
2176 	/*
2177 	 * We ignore failures and try to destroy as many CPUs as possible.
2178 	 * At the same time we must not free the assigned resources when
2179 	 * this fails, as the ultravisor has still access to that memory.
2180 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2181 	 * behind.
2182 	 * We want to return the first failure rc and rrc, though.
2183 	 */
2184 	kvm_for_each_vcpu(i, vcpu, kvm) {
2185 		mutex_lock(&vcpu->mutex);
2186 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2187 			*rcp = rc;
2188 			*rrcp = rrc;
2189 			ret = -EIO;
2190 		}
2191 		mutex_unlock(&vcpu->mutex);
2192 	}
2193 	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2194 	if (use_gisa)
2195 		kvm_s390_gisa_enable(kvm);
2196 	return ret;
2197 }
2198 
2199 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2200 {
2201 	unsigned long i;
2202 	int r = 0;
2203 	u16 dummy;
2204 
2205 	struct kvm_vcpu *vcpu;
2206 
2207 	/* Disable the GISA if the ultravisor does not support AIV. */
2208 	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2209 		kvm_s390_gisa_disable(kvm);
2210 
2211 	kvm_for_each_vcpu(i, vcpu, kvm) {
2212 		mutex_lock(&vcpu->mutex);
2213 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2214 		mutex_unlock(&vcpu->mutex);
2215 		if (r)
2216 			break;
2217 	}
2218 	if (r)
2219 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220 	return r;
2221 }
2222 
2223 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224 {
2225 	int r = 0;
2226 	u16 dummy;
2227 	void __user *argp = (void __user *)cmd->data;
2228 
2229 	switch (cmd->cmd) {
2230 	case KVM_PV_ENABLE: {
2231 		r = -EINVAL;
2232 		if (kvm_s390_pv_is_protected(kvm))
2233 			break;
2234 
2235 		/*
2236 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2237 		 *  esca, we need no cleanup in the error cases below
2238 		 */
2239 		r = sca_switch_to_extended(kvm);
2240 		if (r)
2241 			break;
2242 
2243 		mmap_write_lock(current->mm);
2244 		r = gmap_mark_unmergeable();
2245 		mmap_write_unlock(current->mm);
2246 		if (r)
2247 			break;
2248 
2249 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250 		if (r)
2251 			break;
2252 
2253 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2254 		if (r)
2255 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2256 
2257 		/* we need to block service interrupts from now on */
2258 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2259 		break;
2260 	}
2261 	case KVM_PV_DISABLE: {
2262 		r = -EINVAL;
2263 		if (!kvm_s390_pv_is_protected(kvm))
2264 			break;
2265 
2266 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2267 		/*
2268 		 * If a CPU could not be destroyed, destroy VM will also fail.
2269 		 * There is no point in trying to destroy it. Instead return
2270 		 * the rc and rrc from the first CPU that failed destroying.
2271 		 */
2272 		if (r)
2273 			break;
2274 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2275 
2276 		/* no need to block service interrupts any more */
2277 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278 		break;
2279 	}
2280 	case KVM_PV_SET_SEC_PARMS: {
2281 		struct kvm_s390_pv_sec_parm parms = {};
2282 		void *hdr;
2283 
2284 		r = -EINVAL;
2285 		if (!kvm_s390_pv_is_protected(kvm))
2286 			break;
2287 
2288 		r = -EFAULT;
2289 		if (copy_from_user(&parms, argp, sizeof(parms)))
2290 			break;
2291 
2292 		/* Currently restricted to 8KB */
2293 		r = -EINVAL;
2294 		if (parms.length > PAGE_SIZE * 2)
2295 			break;
2296 
2297 		r = -ENOMEM;
2298 		hdr = vmalloc(parms.length);
2299 		if (!hdr)
2300 			break;
2301 
2302 		r = -EFAULT;
2303 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2304 				    parms.length))
2305 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2306 						      &cmd->rc, &cmd->rrc);
2307 
2308 		vfree(hdr);
2309 		break;
2310 	}
2311 	case KVM_PV_UNPACK: {
2312 		struct kvm_s390_pv_unp unp = {};
2313 
2314 		r = -EINVAL;
2315 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2316 			break;
2317 
2318 		r = -EFAULT;
2319 		if (copy_from_user(&unp, argp, sizeof(unp)))
2320 			break;
2321 
2322 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2323 				       &cmd->rc, &cmd->rrc);
2324 		break;
2325 	}
2326 	case KVM_PV_VERIFY: {
2327 		r = -EINVAL;
2328 		if (!kvm_s390_pv_is_protected(kvm))
2329 			break;
2330 
2331 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2332 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2333 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334 			     cmd->rrc);
2335 		break;
2336 	}
2337 	case KVM_PV_PREP_RESET: {
2338 		r = -EINVAL;
2339 		if (!kvm_s390_pv_is_protected(kvm))
2340 			break;
2341 
2342 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2343 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2344 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345 			     cmd->rc, cmd->rrc);
2346 		break;
2347 	}
2348 	case KVM_PV_UNSHARE_ALL: {
2349 		r = -EINVAL;
2350 		if (!kvm_s390_pv_is_protected(kvm))
2351 			break;
2352 
2353 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2354 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2355 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2356 			     cmd->rc, cmd->rrc);
2357 		break;
2358 	}
2359 	default:
2360 		r = -ENOTTY;
2361 	}
2362 	return r;
2363 }
2364 
2365 static bool access_key_invalid(u8 access_key)
2366 {
2367 	return access_key > 0xf;
2368 }
2369 
2370 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2371 {
2372 	void __user *uaddr = (void __user *)mop->buf;
2373 	u64 supported_flags;
2374 	void *tmpbuf = NULL;
2375 	int r, srcu_idx;
2376 
2377 	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2378 			  | KVM_S390_MEMOP_F_CHECK_ONLY;
2379 	if (mop->flags & ~supported_flags || !mop->size)
2380 		return -EINVAL;
2381 	if (mop->size > MEM_OP_MAX_SIZE)
2382 		return -E2BIG;
2383 	/*
2384 	 * This is technically a heuristic only, if the kvm->lock is not
2385 	 * taken, it is not guaranteed that the vm is/remains non-protected.
2386 	 * This is ok from a kernel perspective, wrongdoing is detected
2387 	 * on the access, -EFAULT is returned and the vm may crash the
2388 	 * next time it accesses the memory in question.
2389 	 * There is no sane usecase to do switching and a memop on two
2390 	 * different CPUs at the same time.
2391 	 */
2392 	if (kvm_s390_pv_get_handle(kvm))
2393 		return -EINVAL;
2394 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2395 		if (access_key_invalid(mop->key))
2396 			return -EINVAL;
2397 	} else {
2398 		mop->key = 0;
2399 	}
2400 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2401 		tmpbuf = vmalloc(mop->size);
2402 		if (!tmpbuf)
2403 			return -ENOMEM;
2404 	}
2405 
2406 	srcu_idx = srcu_read_lock(&kvm->srcu);
2407 
2408 	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2409 		r = PGM_ADDRESSING;
2410 		goto out_unlock;
2411 	}
2412 
2413 	switch (mop->op) {
2414 	case KVM_S390_MEMOP_ABSOLUTE_READ: {
2415 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2416 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2417 		} else {
2418 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2419 						      mop->size, GACC_FETCH, mop->key);
2420 			if (r == 0) {
2421 				if (copy_to_user(uaddr, tmpbuf, mop->size))
2422 					r = -EFAULT;
2423 			}
2424 		}
2425 		break;
2426 	}
2427 	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2428 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2429 			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2430 		} else {
2431 			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2432 				r = -EFAULT;
2433 				break;
2434 			}
2435 			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2436 						      mop->size, GACC_STORE, mop->key);
2437 		}
2438 		break;
2439 	}
2440 	default:
2441 		r = -EINVAL;
2442 	}
2443 
2444 out_unlock:
2445 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2446 
2447 	vfree(tmpbuf);
2448 	return r;
2449 }
2450 
2451 long kvm_arch_vm_ioctl(struct file *filp,
2452 		       unsigned int ioctl, unsigned long arg)
2453 {
2454 	struct kvm *kvm = filp->private_data;
2455 	void __user *argp = (void __user *)arg;
2456 	struct kvm_device_attr attr;
2457 	int r;
2458 
2459 	switch (ioctl) {
2460 	case KVM_S390_INTERRUPT: {
2461 		struct kvm_s390_interrupt s390int;
2462 
2463 		r = -EFAULT;
2464 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2465 			break;
2466 		r = kvm_s390_inject_vm(kvm, &s390int);
2467 		break;
2468 	}
2469 	case KVM_CREATE_IRQCHIP: {
2470 		struct kvm_irq_routing_entry routing;
2471 
2472 		r = -EINVAL;
2473 		if (kvm->arch.use_irqchip) {
2474 			/* Set up dummy routing. */
2475 			memset(&routing, 0, sizeof(routing));
2476 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2477 		}
2478 		break;
2479 	}
2480 	case KVM_SET_DEVICE_ATTR: {
2481 		r = -EFAULT;
2482 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2483 			break;
2484 		r = kvm_s390_vm_set_attr(kvm, &attr);
2485 		break;
2486 	}
2487 	case KVM_GET_DEVICE_ATTR: {
2488 		r = -EFAULT;
2489 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2490 			break;
2491 		r = kvm_s390_vm_get_attr(kvm, &attr);
2492 		break;
2493 	}
2494 	case KVM_HAS_DEVICE_ATTR: {
2495 		r = -EFAULT;
2496 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2497 			break;
2498 		r = kvm_s390_vm_has_attr(kvm, &attr);
2499 		break;
2500 	}
2501 	case KVM_S390_GET_SKEYS: {
2502 		struct kvm_s390_skeys args;
2503 
2504 		r = -EFAULT;
2505 		if (copy_from_user(&args, argp,
2506 				   sizeof(struct kvm_s390_skeys)))
2507 			break;
2508 		r = kvm_s390_get_skeys(kvm, &args);
2509 		break;
2510 	}
2511 	case KVM_S390_SET_SKEYS: {
2512 		struct kvm_s390_skeys args;
2513 
2514 		r = -EFAULT;
2515 		if (copy_from_user(&args, argp,
2516 				   sizeof(struct kvm_s390_skeys)))
2517 			break;
2518 		r = kvm_s390_set_skeys(kvm, &args);
2519 		break;
2520 	}
2521 	case KVM_S390_GET_CMMA_BITS: {
2522 		struct kvm_s390_cmma_log args;
2523 
2524 		r = -EFAULT;
2525 		if (copy_from_user(&args, argp, sizeof(args)))
2526 			break;
2527 		mutex_lock(&kvm->slots_lock);
2528 		r = kvm_s390_get_cmma_bits(kvm, &args);
2529 		mutex_unlock(&kvm->slots_lock);
2530 		if (!r) {
2531 			r = copy_to_user(argp, &args, sizeof(args));
2532 			if (r)
2533 				r = -EFAULT;
2534 		}
2535 		break;
2536 	}
2537 	case KVM_S390_SET_CMMA_BITS: {
2538 		struct kvm_s390_cmma_log args;
2539 
2540 		r = -EFAULT;
2541 		if (copy_from_user(&args, argp, sizeof(args)))
2542 			break;
2543 		mutex_lock(&kvm->slots_lock);
2544 		r = kvm_s390_set_cmma_bits(kvm, &args);
2545 		mutex_unlock(&kvm->slots_lock);
2546 		break;
2547 	}
2548 	case KVM_S390_PV_COMMAND: {
2549 		struct kvm_pv_cmd args;
2550 
2551 		/* protvirt means user cpu state */
2552 		kvm_s390_set_user_cpu_state_ctrl(kvm);
2553 		r = 0;
2554 		if (!is_prot_virt_host()) {
2555 			r = -EINVAL;
2556 			break;
2557 		}
2558 		if (copy_from_user(&args, argp, sizeof(args))) {
2559 			r = -EFAULT;
2560 			break;
2561 		}
2562 		if (args.flags) {
2563 			r = -EINVAL;
2564 			break;
2565 		}
2566 		mutex_lock(&kvm->lock);
2567 		r = kvm_s390_handle_pv(kvm, &args);
2568 		mutex_unlock(&kvm->lock);
2569 		if (copy_to_user(argp, &args, sizeof(args))) {
2570 			r = -EFAULT;
2571 			break;
2572 		}
2573 		break;
2574 	}
2575 	case KVM_S390_MEM_OP: {
2576 		struct kvm_s390_mem_op mem_op;
2577 
2578 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2579 			r = kvm_s390_vm_mem_op(kvm, &mem_op);
2580 		else
2581 			r = -EFAULT;
2582 		break;
2583 	}
2584 	default:
2585 		r = -ENOTTY;
2586 	}
2587 
2588 	return r;
2589 }
2590 
2591 static int kvm_s390_apxa_installed(void)
2592 {
2593 	struct ap_config_info info;
2594 
2595 	if (ap_instructions_available()) {
2596 		if (ap_qci(&info) == 0)
2597 			return info.apxa;
2598 	}
2599 
2600 	return 0;
2601 }
2602 
2603 /*
2604  * The format of the crypto control block (CRYCB) is specified in the 3 low
2605  * order bits of the CRYCB designation (CRYCBD) field as follows:
2606  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2607  *	     AP extended addressing (APXA) facility are installed.
2608  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2609  * Format 2: Both the APXA and MSAX3 facilities are installed
2610  */
2611 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2612 {
2613 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2614 
2615 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2616 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2617 
2618 	/* Check whether MSAX3 is installed */
2619 	if (!test_kvm_facility(kvm, 76))
2620 		return;
2621 
2622 	if (kvm_s390_apxa_installed())
2623 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2624 	else
2625 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2626 }
2627 
2628 /*
2629  * kvm_arch_crypto_set_masks
2630  *
2631  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2632  *	 to be set.
2633  * @apm: the mask identifying the accessible AP adapters
2634  * @aqm: the mask identifying the accessible AP domains
2635  * @adm: the mask identifying the accessible AP control domains
2636  *
2637  * Set the masks that identify the adapters, domains and control domains to
2638  * which the KVM guest is granted access.
2639  *
2640  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2641  *	 function.
2642  */
2643 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2644 			       unsigned long *aqm, unsigned long *adm)
2645 {
2646 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2647 
2648 	kvm_s390_vcpu_block_all(kvm);
2649 
2650 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2651 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2652 		memcpy(crycb->apcb1.apm, apm, 32);
2653 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2654 			 apm[0], apm[1], apm[2], apm[3]);
2655 		memcpy(crycb->apcb1.aqm, aqm, 32);
2656 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2657 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2658 		memcpy(crycb->apcb1.adm, adm, 32);
2659 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2660 			 adm[0], adm[1], adm[2], adm[3]);
2661 		break;
2662 	case CRYCB_FORMAT1:
2663 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2664 		memcpy(crycb->apcb0.apm, apm, 8);
2665 		memcpy(crycb->apcb0.aqm, aqm, 2);
2666 		memcpy(crycb->apcb0.adm, adm, 2);
2667 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2668 			 apm[0], *((unsigned short *)aqm),
2669 			 *((unsigned short *)adm));
2670 		break;
2671 	default:	/* Can not happen */
2672 		break;
2673 	}
2674 
2675 	/* recreate the shadow crycb for each vcpu */
2676 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2677 	kvm_s390_vcpu_unblock_all(kvm);
2678 }
2679 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2680 
2681 /*
2682  * kvm_arch_crypto_clear_masks
2683  *
2684  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2685  *	 to be cleared.
2686  *
2687  * Clear the masks that identify the adapters, domains and control domains to
2688  * which the KVM guest is granted access.
2689  *
2690  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2691  *	 function.
2692  */
2693 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2694 {
2695 	kvm_s390_vcpu_block_all(kvm);
2696 
2697 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2698 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2699 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2700 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2701 
2702 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2703 	/* recreate the shadow crycb for each vcpu */
2704 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2705 	kvm_s390_vcpu_unblock_all(kvm);
2706 }
2707 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2708 
2709 static u64 kvm_s390_get_initial_cpuid(void)
2710 {
2711 	struct cpuid cpuid;
2712 
2713 	get_cpu_id(&cpuid);
2714 	cpuid.version = 0xff;
2715 	return *((u64 *) &cpuid);
2716 }
2717 
2718 static void kvm_s390_crypto_init(struct kvm *kvm)
2719 {
2720 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2721 	kvm_s390_set_crycb_format(kvm);
2722 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2723 
2724 	if (!test_kvm_facility(kvm, 76))
2725 		return;
2726 
2727 	/* Enable AES/DEA protected key functions by default */
2728 	kvm->arch.crypto.aes_kw = 1;
2729 	kvm->arch.crypto.dea_kw = 1;
2730 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2731 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2732 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2733 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2734 }
2735 
2736 static void sca_dispose(struct kvm *kvm)
2737 {
2738 	if (kvm->arch.use_esca)
2739 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2740 	else
2741 		free_page((unsigned long)(kvm->arch.sca));
2742 	kvm->arch.sca = NULL;
2743 }
2744 
2745 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2746 {
2747 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2748 	int i, rc;
2749 	char debug_name[16];
2750 	static unsigned long sca_offset;
2751 
2752 	rc = -EINVAL;
2753 #ifdef CONFIG_KVM_S390_UCONTROL
2754 	if (type & ~KVM_VM_S390_UCONTROL)
2755 		goto out_err;
2756 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2757 		goto out_err;
2758 #else
2759 	if (type)
2760 		goto out_err;
2761 #endif
2762 
2763 	rc = s390_enable_sie();
2764 	if (rc)
2765 		goto out_err;
2766 
2767 	rc = -ENOMEM;
2768 
2769 	if (!sclp.has_64bscao)
2770 		alloc_flags |= GFP_DMA;
2771 	rwlock_init(&kvm->arch.sca_lock);
2772 	/* start with basic SCA */
2773 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2774 	if (!kvm->arch.sca)
2775 		goto out_err;
2776 	mutex_lock(&kvm_lock);
2777 	sca_offset += 16;
2778 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2779 		sca_offset = 0;
2780 	kvm->arch.sca = (struct bsca_block *)
2781 			((char *) kvm->arch.sca + sca_offset);
2782 	mutex_unlock(&kvm_lock);
2783 
2784 	sprintf(debug_name, "kvm-%u", current->pid);
2785 
2786 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2787 	if (!kvm->arch.dbf)
2788 		goto out_err;
2789 
2790 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2791 	kvm->arch.sie_page2 =
2792 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2793 	if (!kvm->arch.sie_page2)
2794 		goto out_err;
2795 
2796 	kvm->arch.sie_page2->kvm = kvm;
2797 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2798 
2799 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2800 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2801 					      (kvm_s390_fac_base[i] |
2802 					       kvm_s390_fac_ext[i]);
2803 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2804 					      kvm_s390_fac_base[i];
2805 	}
2806 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2807 
2808 	/* we are always in czam mode - even on pre z14 machines */
2809 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2810 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2811 	/* we emulate STHYI in kvm */
2812 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2813 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2814 	if (MACHINE_HAS_TLB_GUEST) {
2815 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2816 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2817 	}
2818 
2819 	if (css_general_characteristics.aiv && test_facility(65))
2820 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2821 
2822 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2823 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2824 
2825 	kvm_s390_crypto_init(kvm);
2826 
2827 	mutex_init(&kvm->arch.float_int.ais_lock);
2828 	spin_lock_init(&kvm->arch.float_int.lock);
2829 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2830 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2831 	init_waitqueue_head(&kvm->arch.ipte_wq);
2832 	mutex_init(&kvm->arch.ipte_mutex);
2833 
2834 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2835 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2836 
2837 	if (type & KVM_VM_S390_UCONTROL) {
2838 		kvm->arch.gmap = NULL;
2839 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2840 	} else {
2841 		if (sclp.hamax == U64_MAX)
2842 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2843 		else
2844 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2845 						    sclp.hamax + 1);
2846 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2847 		if (!kvm->arch.gmap)
2848 			goto out_err;
2849 		kvm->arch.gmap->private = kvm;
2850 		kvm->arch.gmap->pfault_enabled = 0;
2851 	}
2852 
2853 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2854 	kvm->arch.use_skf = sclp.has_skey;
2855 	spin_lock_init(&kvm->arch.start_stop_lock);
2856 	kvm_s390_vsie_init(kvm);
2857 	if (use_gisa)
2858 		kvm_s390_gisa_init(kvm);
2859 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2860 
2861 	return 0;
2862 out_err:
2863 	free_page((unsigned long)kvm->arch.sie_page2);
2864 	debug_unregister(kvm->arch.dbf);
2865 	sca_dispose(kvm);
2866 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2867 	return rc;
2868 }
2869 
2870 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2871 {
2872 	u16 rc, rrc;
2873 
2874 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2875 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2876 	kvm_s390_clear_local_irqs(vcpu);
2877 	kvm_clear_async_pf_completion_queue(vcpu);
2878 	if (!kvm_is_ucontrol(vcpu->kvm))
2879 		sca_del_vcpu(vcpu);
2880 
2881 	if (kvm_is_ucontrol(vcpu->kvm))
2882 		gmap_remove(vcpu->arch.gmap);
2883 
2884 	if (vcpu->kvm->arch.use_cmma)
2885 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2886 	/* We can not hold the vcpu mutex here, we are already dying */
2887 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2888 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2889 	free_page((unsigned long)(vcpu->arch.sie_block));
2890 }
2891 
2892 void kvm_arch_destroy_vm(struct kvm *kvm)
2893 {
2894 	u16 rc, rrc;
2895 
2896 	kvm_destroy_vcpus(kvm);
2897 	sca_dispose(kvm);
2898 	kvm_s390_gisa_destroy(kvm);
2899 	/*
2900 	 * We are already at the end of life and kvm->lock is not taken.
2901 	 * This is ok as the file descriptor is closed by now and nobody
2902 	 * can mess with the pv state. To avoid lockdep_assert_held from
2903 	 * complaining we do not use kvm_s390_pv_is_protected.
2904 	 */
2905 	if (kvm_s390_pv_get_handle(kvm))
2906 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2907 	debug_unregister(kvm->arch.dbf);
2908 	free_page((unsigned long)kvm->arch.sie_page2);
2909 	if (!kvm_is_ucontrol(kvm))
2910 		gmap_remove(kvm->arch.gmap);
2911 	kvm_s390_destroy_adapters(kvm);
2912 	kvm_s390_clear_float_irqs(kvm);
2913 	kvm_s390_vsie_destroy(kvm);
2914 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2915 }
2916 
2917 /* Section: vcpu related */
2918 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2919 {
2920 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2921 	if (!vcpu->arch.gmap)
2922 		return -ENOMEM;
2923 	vcpu->arch.gmap->private = vcpu->kvm;
2924 
2925 	return 0;
2926 }
2927 
2928 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2929 {
2930 	if (!kvm_s390_use_sca_entries())
2931 		return;
2932 	read_lock(&vcpu->kvm->arch.sca_lock);
2933 	if (vcpu->kvm->arch.use_esca) {
2934 		struct esca_block *sca = vcpu->kvm->arch.sca;
2935 
2936 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2937 		sca->cpu[vcpu->vcpu_id].sda = 0;
2938 	} else {
2939 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2940 
2941 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2942 		sca->cpu[vcpu->vcpu_id].sda = 0;
2943 	}
2944 	read_unlock(&vcpu->kvm->arch.sca_lock);
2945 }
2946 
2947 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2948 {
2949 	if (!kvm_s390_use_sca_entries()) {
2950 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2951 
2952 		/* we still need the basic sca for the ipte control */
2953 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2954 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2955 		return;
2956 	}
2957 	read_lock(&vcpu->kvm->arch.sca_lock);
2958 	if (vcpu->kvm->arch.use_esca) {
2959 		struct esca_block *sca = vcpu->kvm->arch.sca;
2960 
2961 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2962 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2963 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2964 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2965 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2966 	} else {
2967 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2968 
2969 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2970 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2971 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2972 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2973 	}
2974 	read_unlock(&vcpu->kvm->arch.sca_lock);
2975 }
2976 
2977 /* Basic SCA to Extended SCA data copy routines */
2978 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2979 {
2980 	d->sda = s->sda;
2981 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2982 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2983 }
2984 
2985 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2986 {
2987 	int i;
2988 
2989 	d->ipte_control = s->ipte_control;
2990 	d->mcn[0] = s->mcn;
2991 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2992 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2993 }
2994 
2995 static int sca_switch_to_extended(struct kvm *kvm)
2996 {
2997 	struct bsca_block *old_sca = kvm->arch.sca;
2998 	struct esca_block *new_sca;
2999 	struct kvm_vcpu *vcpu;
3000 	unsigned long vcpu_idx;
3001 	u32 scaol, scaoh;
3002 
3003 	if (kvm->arch.use_esca)
3004 		return 0;
3005 
3006 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3007 	if (!new_sca)
3008 		return -ENOMEM;
3009 
3010 	scaoh = (u32)((u64)(new_sca) >> 32);
3011 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
3012 
3013 	kvm_s390_vcpu_block_all(kvm);
3014 	write_lock(&kvm->arch.sca_lock);
3015 
3016 	sca_copy_b_to_e(new_sca, old_sca);
3017 
3018 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3019 		vcpu->arch.sie_block->scaoh = scaoh;
3020 		vcpu->arch.sie_block->scaol = scaol;
3021 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3022 	}
3023 	kvm->arch.sca = new_sca;
3024 	kvm->arch.use_esca = 1;
3025 
3026 	write_unlock(&kvm->arch.sca_lock);
3027 	kvm_s390_vcpu_unblock_all(kvm);
3028 
3029 	free_page((unsigned long)old_sca);
3030 
3031 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3032 		 old_sca, kvm->arch.sca);
3033 	return 0;
3034 }
3035 
3036 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3037 {
3038 	int rc;
3039 
3040 	if (!kvm_s390_use_sca_entries()) {
3041 		if (id < KVM_MAX_VCPUS)
3042 			return true;
3043 		return false;
3044 	}
3045 	if (id < KVM_S390_BSCA_CPU_SLOTS)
3046 		return true;
3047 	if (!sclp.has_esca || !sclp.has_64bscao)
3048 		return false;
3049 
3050 	mutex_lock(&kvm->lock);
3051 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3052 	mutex_unlock(&kvm->lock);
3053 
3054 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3055 }
3056 
3057 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3058 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3059 {
3060 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3061 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3062 	vcpu->arch.cputm_start = get_tod_clock_fast();
3063 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3064 }
3065 
3066 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3067 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3068 {
3069 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3070 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3071 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3072 	vcpu->arch.cputm_start = 0;
3073 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3074 }
3075 
3076 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3077 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3078 {
3079 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3080 	vcpu->arch.cputm_enabled = true;
3081 	__start_cpu_timer_accounting(vcpu);
3082 }
3083 
3084 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3085 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3086 {
3087 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3088 	__stop_cpu_timer_accounting(vcpu);
3089 	vcpu->arch.cputm_enabled = false;
3090 }
3091 
3092 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3093 {
3094 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3095 	__enable_cpu_timer_accounting(vcpu);
3096 	preempt_enable();
3097 }
3098 
3099 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3100 {
3101 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3102 	__disable_cpu_timer_accounting(vcpu);
3103 	preempt_enable();
3104 }
3105 
3106 /* set the cpu timer - may only be called from the VCPU thread itself */
3107 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3108 {
3109 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3110 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3111 	if (vcpu->arch.cputm_enabled)
3112 		vcpu->arch.cputm_start = get_tod_clock_fast();
3113 	vcpu->arch.sie_block->cputm = cputm;
3114 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3115 	preempt_enable();
3116 }
3117 
3118 /* update and get the cpu timer - can also be called from other VCPU threads */
3119 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3120 {
3121 	unsigned int seq;
3122 	__u64 value;
3123 
3124 	if (unlikely(!vcpu->arch.cputm_enabled))
3125 		return vcpu->arch.sie_block->cputm;
3126 
3127 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3128 	do {
3129 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3130 		/*
3131 		 * If the writer would ever execute a read in the critical
3132 		 * section, e.g. in irq context, we have a deadlock.
3133 		 */
3134 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3135 		value = vcpu->arch.sie_block->cputm;
3136 		/* if cputm_start is 0, accounting is being started/stopped */
3137 		if (likely(vcpu->arch.cputm_start))
3138 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3139 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3140 	preempt_enable();
3141 	return value;
3142 }
3143 
3144 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3145 {
3146 
3147 	gmap_enable(vcpu->arch.enabled_gmap);
3148 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3149 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3150 		__start_cpu_timer_accounting(vcpu);
3151 	vcpu->cpu = cpu;
3152 }
3153 
3154 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3155 {
3156 	vcpu->cpu = -1;
3157 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3158 		__stop_cpu_timer_accounting(vcpu);
3159 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3160 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3161 	gmap_disable(vcpu->arch.enabled_gmap);
3162 
3163 }
3164 
3165 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3166 {
3167 	mutex_lock(&vcpu->kvm->lock);
3168 	preempt_disable();
3169 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3170 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3171 	preempt_enable();
3172 	mutex_unlock(&vcpu->kvm->lock);
3173 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3174 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3175 		sca_add_vcpu(vcpu);
3176 	}
3177 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3178 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3179 	/* make vcpu_load load the right gmap on the first trigger */
3180 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3181 }
3182 
3183 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3184 {
3185 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3186 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3187 		return true;
3188 	return false;
3189 }
3190 
3191 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3192 {
3193 	/* At least one ECC subfunction must be present */
3194 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3195 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3196 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3197 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3198 	       kvm_has_pckmo_subfunc(kvm, 41);
3199 
3200 }
3201 
3202 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3203 {
3204 	/*
3205 	 * If the AP instructions are not being interpreted and the MSAX3
3206 	 * facility is not configured for the guest, there is nothing to set up.
3207 	 */
3208 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3209 		return;
3210 
3211 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3212 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3213 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3214 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3215 
3216 	if (vcpu->kvm->arch.crypto.apie)
3217 		vcpu->arch.sie_block->eca |= ECA_APIE;
3218 
3219 	/* Set up protected key support */
3220 	if (vcpu->kvm->arch.crypto.aes_kw) {
3221 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3222 		/* ecc is also wrapped with AES key */
3223 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3224 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3225 	}
3226 
3227 	if (vcpu->kvm->arch.crypto.dea_kw)
3228 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3229 }
3230 
3231 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3232 {
3233 	free_page(vcpu->arch.sie_block->cbrlo);
3234 	vcpu->arch.sie_block->cbrlo = 0;
3235 }
3236 
3237 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3238 {
3239 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3240 	if (!vcpu->arch.sie_block->cbrlo)
3241 		return -ENOMEM;
3242 	return 0;
3243 }
3244 
3245 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3246 {
3247 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3248 
3249 	vcpu->arch.sie_block->ibc = model->ibc;
3250 	if (test_kvm_facility(vcpu->kvm, 7))
3251 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3252 }
3253 
3254 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3255 {
3256 	int rc = 0;
3257 	u16 uvrc, uvrrc;
3258 
3259 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3260 						    CPUSTAT_SM |
3261 						    CPUSTAT_STOPPED);
3262 
3263 	if (test_kvm_facility(vcpu->kvm, 78))
3264 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3265 	else if (test_kvm_facility(vcpu->kvm, 8))
3266 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3267 
3268 	kvm_s390_vcpu_setup_model(vcpu);
3269 
3270 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3271 	if (MACHINE_HAS_ESOP)
3272 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3273 	if (test_kvm_facility(vcpu->kvm, 9))
3274 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3275 	if (test_kvm_facility(vcpu->kvm, 73))
3276 		vcpu->arch.sie_block->ecb |= ECB_TE;
3277 	if (!kvm_is_ucontrol(vcpu->kvm))
3278 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3279 
3280 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3281 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3282 	if (test_kvm_facility(vcpu->kvm, 130))
3283 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3284 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3285 	if (sclp.has_cei)
3286 		vcpu->arch.sie_block->eca |= ECA_CEI;
3287 	if (sclp.has_ib)
3288 		vcpu->arch.sie_block->eca |= ECA_IB;
3289 	if (sclp.has_siif)
3290 		vcpu->arch.sie_block->eca |= ECA_SII;
3291 	if (sclp.has_sigpif)
3292 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3293 	if (test_kvm_facility(vcpu->kvm, 129)) {
3294 		vcpu->arch.sie_block->eca |= ECA_VX;
3295 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3296 	}
3297 	if (test_kvm_facility(vcpu->kvm, 139))
3298 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3299 	if (test_kvm_facility(vcpu->kvm, 156))
3300 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3301 	if (vcpu->arch.sie_block->gd) {
3302 		vcpu->arch.sie_block->eca |= ECA_AIV;
3303 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3304 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3305 	}
3306 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3307 					| SDNXC;
3308 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3309 
3310 	if (sclp.has_kss)
3311 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3312 	else
3313 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3314 
3315 	if (vcpu->kvm->arch.use_cmma) {
3316 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3317 		if (rc)
3318 			return rc;
3319 	}
3320 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3321 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3322 
3323 	vcpu->arch.sie_block->hpid = HPID_KVM;
3324 
3325 	kvm_s390_vcpu_crypto_setup(vcpu);
3326 
3327 	mutex_lock(&vcpu->kvm->lock);
3328 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3329 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3330 		if (rc)
3331 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3332 	}
3333 	mutex_unlock(&vcpu->kvm->lock);
3334 
3335 	return rc;
3336 }
3337 
3338 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3339 {
3340 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3341 		return -EINVAL;
3342 	return 0;
3343 }
3344 
3345 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3346 {
3347 	struct sie_page *sie_page;
3348 	int rc;
3349 
3350 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3351 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3352 	if (!sie_page)
3353 		return -ENOMEM;
3354 
3355 	vcpu->arch.sie_block = &sie_page->sie_block;
3356 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3357 
3358 	/* the real guest size will always be smaller than msl */
3359 	vcpu->arch.sie_block->mso = 0;
3360 	vcpu->arch.sie_block->msl = sclp.hamax;
3361 
3362 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3363 	spin_lock_init(&vcpu->arch.local_int.lock);
3364 	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3365 	seqcount_init(&vcpu->arch.cputm_seqcount);
3366 
3367 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3368 	kvm_clear_async_pf_completion_queue(vcpu);
3369 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3370 				    KVM_SYNC_GPRS |
3371 				    KVM_SYNC_ACRS |
3372 				    KVM_SYNC_CRS |
3373 				    KVM_SYNC_ARCH0 |
3374 				    KVM_SYNC_PFAULT |
3375 				    KVM_SYNC_DIAG318;
3376 	kvm_s390_set_prefix(vcpu, 0);
3377 	if (test_kvm_facility(vcpu->kvm, 64))
3378 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3379 	if (test_kvm_facility(vcpu->kvm, 82))
3380 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3381 	if (test_kvm_facility(vcpu->kvm, 133))
3382 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3383 	if (test_kvm_facility(vcpu->kvm, 156))
3384 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3385 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3386 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3387 	 */
3388 	if (MACHINE_HAS_VX)
3389 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3390 	else
3391 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3392 
3393 	if (kvm_is_ucontrol(vcpu->kvm)) {
3394 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3395 		if (rc)
3396 			goto out_free_sie_block;
3397 	}
3398 
3399 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3400 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3401 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3402 
3403 	rc = kvm_s390_vcpu_setup(vcpu);
3404 	if (rc)
3405 		goto out_ucontrol_uninit;
3406 	return 0;
3407 
3408 out_ucontrol_uninit:
3409 	if (kvm_is_ucontrol(vcpu->kvm))
3410 		gmap_remove(vcpu->arch.gmap);
3411 out_free_sie_block:
3412 	free_page((unsigned long)(vcpu->arch.sie_block));
3413 	return rc;
3414 }
3415 
3416 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3417 {
3418 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3419 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3420 }
3421 
3422 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3423 {
3424 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3425 }
3426 
3427 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3428 {
3429 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3430 	exit_sie(vcpu);
3431 }
3432 
3433 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3434 {
3435 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3436 }
3437 
3438 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3439 {
3440 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3441 	exit_sie(vcpu);
3442 }
3443 
3444 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3445 {
3446 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3447 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3448 }
3449 
3450 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3451 {
3452 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3453 }
3454 
3455 /*
3456  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3457  * If the CPU is not running (e.g. waiting as idle) the function will
3458  * return immediately. */
3459 void exit_sie(struct kvm_vcpu *vcpu)
3460 {
3461 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3462 	kvm_s390_vsie_kick(vcpu);
3463 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3464 		cpu_relax();
3465 }
3466 
3467 /* Kick a guest cpu out of SIE to process a request synchronously */
3468 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3469 {
3470 	__kvm_make_request(req, vcpu);
3471 	kvm_s390_vcpu_request(vcpu);
3472 }
3473 
3474 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3475 			      unsigned long end)
3476 {
3477 	struct kvm *kvm = gmap->private;
3478 	struct kvm_vcpu *vcpu;
3479 	unsigned long prefix;
3480 	unsigned long i;
3481 
3482 	if (gmap_is_shadow(gmap))
3483 		return;
3484 	if (start >= 1UL << 31)
3485 		/* We are only interested in prefix pages */
3486 		return;
3487 	kvm_for_each_vcpu(i, vcpu, kvm) {
3488 		/* match against both prefix pages */
3489 		prefix = kvm_s390_get_prefix(vcpu);
3490 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3491 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3492 				   start, end);
3493 			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3494 		}
3495 	}
3496 }
3497 
3498 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3499 {
3500 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3501 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3502 	    READ_ONCE(halt_poll_max_steal)) {
3503 		vcpu->stat.halt_no_poll_steal++;
3504 		return true;
3505 	}
3506 	return false;
3507 }
3508 
3509 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3510 {
3511 	/* kvm common code refers to this, but never calls it */
3512 	BUG();
3513 	return 0;
3514 }
3515 
3516 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3517 					   struct kvm_one_reg *reg)
3518 {
3519 	int r = -EINVAL;
3520 
3521 	switch (reg->id) {
3522 	case KVM_REG_S390_TODPR:
3523 		r = put_user(vcpu->arch.sie_block->todpr,
3524 			     (u32 __user *)reg->addr);
3525 		break;
3526 	case KVM_REG_S390_EPOCHDIFF:
3527 		r = put_user(vcpu->arch.sie_block->epoch,
3528 			     (u64 __user *)reg->addr);
3529 		break;
3530 	case KVM_REG_S390_CPU_TIMER:
3531 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3532 			     (u64 __user *)reg->addr);
3533 		break;
3534 	case KVM_REG_S390_CLOCK_COMP:
3535 		r = put_user(vcpu->arch.sie_block->ckc,
3536 			     (u64 __user *)reg->addr);
3537 		break;
3538 	case KVM_REG_S390_PFTOKEN:
3539 		r = put_user(vcpu->arch.pfault_token,
3540 			     (u64 __user *)reg->addr);
3541 		break;
3542 	case KVM_REG_S390_PFCOMPARE:
3543 		r = put_user(vcpu->arch.pfault_compare,
3544 			     (u64 __user *)reg->addr);
3545 		break;
3546 	case KVM_REG_S390_PFSELECT:
3547 		r = put_user(vcpu->arch.pfault_select,
3548 			     (u64 __user *)reg->addr);
3549 		break;
3550 	case KVM_REG_S390_PP:
3551 		r = put_user(vcpu->arch.sie_block->pp,
3552 			     (u64 __user *)reg->addr);
3553 		break;
3554 	case KVM_REG_S390_GBEA:
3555 		r = put_user(vcpu->arch.sie_block->gbea,
3556 			     (u64 __user *)reg->addr);
3557 		break;
3558 	default:
3559 		break;
3560 	}
3561 
3562 	return r;
3563 }
3564 
3565 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3566 					   struct kvm_one_reg *reg)
3567 {
3568 	int r = -EINVAL;
3569 	__u64 val;
3570 
3571 	switch (reg->id) {
3572 	case KVM_REG_S390_TODPR:
3573 		r = get_user(vcpu->arch.sie_block->todpr,
3574 			     (u32 __user *)reg->addr);
3575 		break;
3576 	case KVM_REG_S390_EPOCHDIFF:
3577 		r = get_user(vcpu->arch.sie_block->epoch,
3578 			     (u64 __user *)reg->addr);
3579 		break;
3580 	case KVM_REG_S390_CPU_TIMER:
3581 		r = get_user(val, (u64 __user *)reg->addr);
3582 		if (!r)
3583 			kvm_s390_set_cpu_timer(vcpu, val);
3584 		break;
3585 	case KVM_REG_S390_CLOCK_COMP:
3586 		r = get_user(vcpu->arch.sie_block->ckc,
3587 			     (u64 __user *)reg->addr);
3588 		break;
3589 	case KVM_REG_S390_PFTOKEN:
3590 		r = get_user(vcpu->arch.pfault_token,
3591 			     (u64 __user *)reg->addr);
3592 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3593 			kvm_clear_async_pf_completion_queue(vcpu);
3594 		break;
3595 	case KVM_REG_S390_PFCOMPARE:
3596 		r = get_user(vcpu->arch.pfault_compare,
3597 			     (u64 __user *)reg->addr);
3598 		break;
3599 	case KVM_REG_S390_PFSELECT:
3600 		r = get_user(vcpu->arch.pfault_select,
3601 			     (u64 __user *)reg->addr);
3602 		break;
3603 	case KVM_REG_S390_PP:
3604 		r = get_user(vcpu->arch.sie_block->pp,
3605 			     (u64 __user *)reg->addr);
3606 		break;
3607 	case KVM_REG_S390_GBEA:
3608 		r = get_user(vcpu->arch.sie_block->gbea,
3609 			     (u64 __user *)reg->addr);
3610 		break;
3611 	default:
3612 		break;
3613 	}
3614 
3615 	return r;
3616 }
3617 
3618 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3619 {
3620 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3621 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3622 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3623 
3624 	kvm_clear_async_pf_completion_queue(vcpu);
3625 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3626 		kvm_s390_vcpu_stop(vcpu);
3627 	kvm_s390_clear_local_irqs(vcpu);
3628 }
3629 
3630 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3631 {
3632 	/* Initial reset is a superset of the normal reset */
3633 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3634 
3635 	/*
3636 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3637 	 * We do not only reset the internal data, but also ...
3638 	 */
3639 	vcpu->arch.sie_block->gpsw.mask = 0;
3640 	vcpu->arch.sie_block->gpsw.addr = 0;
3641 	kvm_s390_set_prefix(vcpu, 0);
3642 	kvm_s390_set_cpu_timer(vcpu, 0);
3643 	vcpu->arch.sie_block->ckc = 0;
3644 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3645 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3646 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3647 
3648 	/* ... the data in sync regs */
3649 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3650 	vcpu->run->s.regs.ckc = 0;
3651 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3652 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3653 	vcpu->run->psw_addr = 0;
3654 	vcpu->run->psw_mask = 0;
3655 	vcpu->run->s.regs.todpr = 0;
3656 	vcpu->run->s.regs.cputm = 0;
3657 	vcpu->run->s.regs.ckc = 0;
3658 	vcpu->run->s.regs.pp = 0;
3659 	vcpu->run->s.regs.gbea = 1;
3660 	vcpu->run->s.regs.fpc = 0;
3661 	/*
3662 	 * Do not reset these registers in the protected case, as some of
3663 	 * them are overlayed and they are not accessible in this case
3664 	 * anyway.
3665 	 */
3666 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3667 		vcpu->arch.sie_block->gbea = 1;
3668 		vcpu->arch.sie_block->pp = 0;
3669 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3670 		vcpu->arch.sie_block->todpr = 0;
3671 	}
3672 }
3673 
3674 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3675 {
3676 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3677 
3678 	/* Clear reset is a superset of the initial reset */
3679 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3680 
3681 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3682 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3683 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3684 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3685 
3686 	regs->etoken = 0;
3687 	regs->etoken_extension = 0;
3688 }
3689 
3690 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3691 {
3692 	vcpu_load(vcpu);
3693 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3694 	vcpu_put(vcpu);
3695 	return 0;
3696 }
3697 
3698 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3699 {
3700 	vcpu_load(vcpu);
3701 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3702 	vcpu_put(vcpu);
3703 	return 0;
3704 }
3705 
3706 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3707 				  struct kvm_sregs *sregs)
3708 {
3709 	vcpu_load(vcpu);
3710 
3711 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3712 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3713 
3714 	vcpu_put(vcpu);
3715 	return 0;
3716 }
3717 
3718 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3719 				  struct kvm_sregs *sregs)
3720 {
3721 	vcpu_load(vcpu);
3722 
3723 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3724 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3725 
3726 	vcpu_put(vcpu);
3727 	return 0;
3728 }
3729 
3730 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3731 {
3732 	int ret = 0;
3733 
3734 	vcpu_load(vcpu);
3735 
3736 	if (test_fp_ctl(fpu->fpc)) {
3737 		ret = -EINVAL;
3738 		goto out;
3739 	}
3740 	vcpu->run->s.regs.fpc = fpu->fpc;
3741 	if (MACHINE_HAS_VX)
3742 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3743 				 (freg_t *) fpu->fprs);
3744 	else
3745 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3746 
3747 out:
3748 	vcpu_put(vcpu);
3749 	return ret;
3750 }
3751 
3752 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3753 {
3754 	vcpu_load(vcpu);
3755 
3756 	/* make sure we have the latest values */
3757 	save_fpu_regs();
3758 	if (MACHINE_HAS_VX)
3759 		convert_vx_to_fp((freg_t *) fpu->fprs,
3760 				 (__vector128 *) vcpu->run->s.regs.vrs);
3761 	else
3762 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3763 	fpu->fpc = vcpu->run->s.regs.fpc;
3764 
3765 	vcpu_put(vcpu);
3766 	return 0;
3767 }
3768 
3769 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3770 {
3771 	int rc = 0;
3772 
3773 	if (!is_vcpu_stopped(vcpu))
3774 		rc = -EBUSY;
3775 	else {
3776 		vcpu->run->psw_mask = psw.mask;
3777 		vcpu->run->psw_addr = psw.addr;
3778 	}
3779 	return rc;
3780 }
3781 
3782 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3783 				  struct kvm_translation *tr)
3784 {
3785 	return -EINVAL; /* not implemented yet */
3786 }
3787 
3788 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3789 			      KVM_GUESTDBG_USE_HW_BP | \
3790 			      KVM_GUESTDBG_ENABLE)
3791 
3792 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3793 					struct kvm_guest_debug *dbg)
3794 {
3795 	int rc = 0;
3796 
3797 	vcpu_load(vcpu);
3798 
3799 	vcpu->guest_debug = 0;
3800 	kvm_s390_clear_bp_data(vcpu);
3801 
3802 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3803 		rc = -EINVAL;
3804 		goto out;
3805 	}
3806 	if (!sclp.has_gpere) {
3807 		rc = -EINVAL;
3808 		goto out;
3809 	}
3810 
3811 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3812 		vcpu->guest_debug = dbg->control;
3813 		/* enforce guest PER */
3814 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3815 
3816 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3817 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3818 	} else {
3819 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3820 		vcpu->arch.guestdbg.last_bp = 0;
3821 	}
3822 
3823 	if (rc) {
3824 		vcpu->guest_debug = 0;
3825 		kvm_s390_clear_bp_data(vcpu);
3826 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3827 	}
3828 
3829 out:
3830 	vcpu_put(vcpu);
3831 	return rc;
3832 }
3833 
3834 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3835 				    struct kvm_mp_state *mp_state)
3836 {
3837 	int ret;
3838 
3839 	vcpu_load(vcpu);
3840 
3841 	/* CHECK_STOP and LOAD are not supported yet */
3842 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3843 				      KVM_MP_STATE_OPERATING;
3844 
3845 	vcpu_put(vcpu);
3846 	return ret;
3847 }
3848 
3849 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3850 				    struct kvm_mp_state *mp_state)
3851 {
3852 	int rc = 0;
3853 
3854 	vcpu_load(vcpu);
3855 
3856 	/* user space knows about this interface - let it control the state */
3857 	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3858 
3859 	switch (mp_state->mp_state) {
3860 	case KVM_MP_STATE_STOPPED:
3861 		rc = kvm_s390_vcpu_stop(vcpu);
3862 		break;
3863 	case KVM_MP_STATE_OPERATING:
3864 		rc = kvm_s390_vcpu_start(vcpu);
3865 		break;
3866 	case KVM_MP_STATE_LOAD:
3867 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3868 			rc = -ENXIO;
3869 			break;
3870 		}
3871 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3872 		break;
3873 	case KVM_MP_STATE_CHECK_STOP:
3874 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3875 	default:
3876 		rc = -ENXIO;
3877 	}
3878 
3879 	vcpu_put(vcpu);
3880 	return rc;
3881 }
3882 
3883 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3884 {
3885 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3886 }
3887 
3888 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3889 {
3890 retry:
3891 	kvm_s390_vcpu_request_handled(vcpu);
3892 	if (!kvm_request_pending(vcpu))
3893 		return 0;
3894 	/*
3895 	 * If the guest prefix changed, re-arm the ipte notifier for the
3896 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3897 	 * This ensures that the ipte instruction for this request has
3898 	 * already finished. We might race against a second unmapper that
3899 	 * wants to set the blocking bit. Lets just retry the request loop.
3900 	 */
3901 	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
3902 		int rc;
3903 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3904 					  kvm_s390_get_prefix(vcpu),
3905 					  PAGE_SIZE * 2, PROT_WRITE);
3906 		if (rc) {
3907 			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3908 			return rc;
3909 		}
3910 		goto retry;
3911 	}
3912 
3913 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3914 		vcpu->arch.sie_block->ihcpu = 0xffff;
3915 		goto retry;
3916 	}
3917 
3918 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3919 		if (!ibs_enabled(vcpu)) {
3920 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3921 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3922 		}
3923 		goto retry;
3924 	}
3925 
3926 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3927 		if (ibs_enabled(vcpu)) {
3928 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3929 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3930 		}
3931 		goto retry;
3932 	}
3933 
3934 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3935 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3936 		goto retry;
3937 	}
3938 
3939 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3940 		/*
3941 		 * Disable CMM virtualization; we will emulate the ESSA
3942 		 * instruction manually, in order to provide additional
3943 		 * functionalities needed for live migration.
3944 		 */
3945 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3946 		goto retry;
3947 	}
3948 
3949 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3950 		/*
3951 		 * Re-enable CMM virtualization if CMMA is available and
3952 		 * CMM has been used.
3953 		 */
3954 		if ((vcpu->kvm->arch.use_cmma) &&
3955 		    (vcpu->kvm->mm->context.uses_cmm))
3956 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3957 		goto retry;
3958 	}
3959 
3960 	/* nothing to do, just clear the request */
3961 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3962 	/* we left the vsie handler, nothing to do, just clear the request */
3963 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3964 
3965 	return 0;
3966 }
3967 
3968 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3969 {
3970 	struct kvm_vcpu *vcpu;
3971 	union tod_clock clk;
3972 	unsigned long i;
3973 
3974 	preempt_disable();
3975 
3976 	store_tod_clock_ext(&clk);
3977 
3978 	kvm->arch.epoch = gtod->tod - clk.tod;
3979 	kvm->arch.epdx = 0;
3980 	if (test_kvm_facility(kvm, 139)) {
3981 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3982 		if (kvm->arch.epoch > gtod->tod)
3983 			kvm->arch.epdx -= 1;
3984 	}
3985 
3986 	kvm_s390_vcpu_block_all(kvm);
3987 	kvm_for_each_vcpu(i, vcpu, kvm) {
3988 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3989 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3990 	}
3991 
3992 	kvm_s390_vcpu_unblock_all(kvm);
3993 	preempt_enable();
3994 }
3995 
3996 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3997 {
3998 	mutex_lock(&kvm->lock);
3999 	__kvm_s390_set_tod_clock(kvm, gtod);
4000 	mutex_unlock(&kvm->lock);
4001 }
4002 
4003 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4004 {
4005 	if (!mutex_trylock(&kvm->lock))
4006 		return 0;
4007 	__kvm_s390_set_tod_clock(kvm, gtod);
4008 	mutex_unlock(&kvm->lock);
4009 	return 1;
4010 }
4011 
4012 /**
4013  * kvm_arch_fault_in_page - fault-in guest page if necessary
4014  * @vcpu: The corresponding virtual cpu
4015  * @gpa: Guest physical address
4016  * @writable: Whether the page should be writable or not
4017  *
4018  * Make sure that a guest page has been faulted-in on the host.
4019  *
4020  * Return: Zero on success, negative error code otherwise.
4021  */
4022 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4023 {
4024 	return gmap_fault(vcpu->arch.gmap, gpa,
4025 			  writable ? FAULT_FLAG_WRITE : 0);
4026 }
4027 
4028 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4029 				      unsigned long token)
4030 {
4031 	struct kvm_s390_interrupt inti;
4032 	struct kvm_s390_irq irq;
4033 
4034 	if (start_token) {
4035 		irq.u.ext.ext_params2 = token;
4036 		irq.type = KVM_S390_INT_PFAULT_INIT;
4037 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4038 	} else {
4039 		inti.type = KVM_S390_INT_PFAULT_DONE;
4040 		inti.parm64 = token;
4041 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4042 	}
4043 }
4044 
4045 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4046 				     struct kvm_async_pf *work)
4047 {
4048 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4049 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4050 
4051 	return true;
4052 }
4053 
4054 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4055 				 struct kvm_async_pf *work)
4056 {
4057 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4058 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4059 }
4060 
4061 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4062 			       struct kvm_async_pf *work)
4063 {
4064 	/* s390 will always inject the page directly */
4065 }
4066 
4067 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4068 {
4069 	/*
4070 	 * s390 will always inject the page directly,
4071 	 * but we still want check_async_completion to cleanup
4072 	 */
4073 	return true;
4074 }
4075 
4076 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4077 {
4078 	hva_t hva;
4079 	struct kvm_arch_async_pf arch;
4080 
4081 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4082 		return false;
4083 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4084 	    vcpu->arch.pfault_compare)
4085 		return false;
4086 	if (psw_extint_disabled(vcpu))
4087 		return false;
4088 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4089 		return false;
4090 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4091 		return false;
4092 	if (!vcpu->arch.gmap->pfault_enabled)
4093 		return false;
4094 
4095 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4096 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4097 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4098 		return false;
4099 
4100 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4101 }
4102 
4103 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4104 {
4105 	int rc, cpuflags;
4106 
4107 	/*
4108 	 * On s390 notifications for arriving pages will be delivered directly
4109 	 * to the guest but the house keeping for completed pfaults is
4110 	 * handled outside the worker.
4111 	 */
4112 	kvm_check_async_pf_completion(vcpu);
4113 
4114 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4115 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4116 
4117 	if (need_resched())
4118 		schedule();
4119 
4120 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4121 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4122 		if (rc)
4123 			return rc;
4124 	}
4125 
4126 	rc = kvm_s390_handle_requests(vcpu);
4127 	if (rc)
4128 		return rc;
4129 
4130 	if (guestdbg_enabled(vcpu)) {
4131 		kvm_s390_backup_guest_per_regs(vcpu);
4132 		kvm_s390_patch_guest_per_regs(vcpu);
4133 	}
4134 
4135 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4136 
4137 	vcpu->arch.sie_block->icptcode = 0;
4138 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4139 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4140 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4141 
4142 	return 0;
4143 }
4144 
4145 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4146 {
4147 	struct kvm_s390_pgm_info pgm_info = {
4148 		.code = PGM_ADDRESSING,
4149 	};
4150 	u8 opcode, ilen;
4151 	int rc;
4152 
4153 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4154 	trace_kvm_s390_sie_fault(vcpu);
4155 
4156 	/*
4157 	 * We want to inject an addressing exception, which is defined as a
4158 	 * suppressing or terminating exception. However, since we came here
4159 	 * by a DAT access exception, the PSW still points to the faulting
4160 	 * instruction since DAT exceptions are nullifying. So we've got
4161 	 * to look up the current opcode to get the length of the instruction
4162 	 * to be able to forward the PSW.
4163 	 */
4164 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4165 	ilen = insn_length(opcode);
4166 	if (rc < 0) {
4167 		return rc;
4168 	} else if (rc) {
4169 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4170 		 * Forward by arbitrary ilc, injection will take care of
4171 		 * nullification if necessary.
4172 		 */
4173 		pgm_info = vcpu->arch.pgm;
4174 		ilen = 4;
4175 	}
4176 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4177 	kvm_s390_forward_psw(vcpu, ilen);
4178 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4179 }
4180 
4181 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4182 {
4183 	struct mcck_volatile_info *mcck_info;
4184 	struct sie_page *sie_page;
4185 
4186 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4187 		   vcpu->arch.sie_block->icptcode);
4188 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4189 
4190 	if (guestdbg_enabled(vcpu))
4191 		kvm_s390_restore_guest_per_regs(vcpu);
4192 
4193 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4194 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4195 
4196 	if (exit_reason == -EINTR) {
4197 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4198 		sie_page = container_of(vcpu->arch.sie_block,
4199 					struct sie_page, sie_block);
4200 		mcck_info = &sie_page->mcck_info;
4201 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4202 		return 0;
4203 	}
4204 
4205 	if (vcpu->arch.sie_block->icptcode > 0) {
4206 		int rc = kvm_handle_sie_intercept(vcpu);
4207 
4208 		if (rc != -EOPNOTSUPP)
4209 			return rc;
4210 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4211 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4212 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4213 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4214 		return -EREMOTE;
4215 	} else if (exit_reason != -EFAULT) {
4216 		vcpu->stat.exit_null++;
4217 		return 0;
4218 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4219 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4220 		vcpu->run->s390_ucontrol.trans_exc_code =
4221 						current->thread.gmap_addr;
4222 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4223 		return -EREMOTE;
4224 	} else if (current->thread.gmap_pfault) {
4225 		trace_kvm_s390_major_guest_pfault(vcpu);
4226 		current->thread.gmap_pfault = 0;
4227 		if (kvm_arch_setup_async_pf(vcpu))
4228 			return 0;
4229 		vcpu->stat.pfault_sync++;
4230 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4231 	}
4232 	return vcpu_post_run_fault_in_sie(vcpu);
4233 }
4234 
4235 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4236 static int __vcpu_run(struct kvm_vcpu *vcpu)
4237 {
4238 	int rc, exit_reason;
4239 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4240 
4241 	/*
4242 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4243 	 * ning the guest), so that memslots (and other stuff) are protected
4244 	 */
4245 	kvm_vcpu_srcu_read_lock(vcpu);
4246 
4247 	do {
4248 		rc = vcpu_pre_run(vcpu);
4249 		if (rc)
4250 			break;
4251 
4252 		kvm_vcpu_srcu_read_unlock(vcpu);
4253 		/*
4254 		 * As PF_VCPU will be used in fault handler, between
4255 		 * guest_enter and guest_exit should be no uaccess.
4256 		 */
4257 		local_irq_disable();
4258 		guest_enter_irqoff();
4259 		__disable_cpu_timer_accounting(vcpu);
4260 		local_irq_enable();
4261 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4262 			memcpy(sie_page->pv_grregs,
4263 			       vcpu->run->s.regs.gprs,
4264 			       sizeof(sie_page->pv_grregs));
4265 		}
4266 		if (test_cpu_flag(CIF_FPU))
4267 			load_fpu_regs();
4268 		exit_reason = sie64a(vcpu->arch.sie_block,
4269 				     vcpu->run->s.regs.gprs);
4270 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4271 			memcpy(vcpu->run->s.regs.gprs,
4272 			       sie_page->pv_grregs,
4273 			       sizeof(sie_page->pv_grregs));
4274 			/*
4275 			 * We're not allowed to inject interrupts on intercepts
4276 			 * that leave the guest state in an "in-between" state
4277 			 * where the next SIE entry will do a continuation.
4278 			 * Fence interrupts in our "internal" PSW.
4279 			 */
4280 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4281 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4282 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4283 			}
4284 		}
4285 		local_irq_disable();
4286 		__enable_cpu_timer_accounting(vcpu);
4287 		guest_exit_irqoff();
4288 		local_irq_enable();
4289 		kvm_vcpu_srcu_read_lock(vcpu);
4290 
4291 		rc = vcpu_post_run(vcpu, exit_reason);
4292 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4293 
4294 	kvm_vcpu_srcu_read_unlock(vcpu);
4295 	return rc;
4296 }
4297 
4298 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4299 {
4300 	struct kvm_run *kvm_run = vcpu->run;
4301 	struct runtime_instr_cb *riccb;
4302 	struct gs_cb *gscb;
4303 
4304 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4305 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4306 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4307 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4308 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4309 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4310 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4311 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4312 	}
4313 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4314 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4315 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4316 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4317 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4318 			kvm_clear_async_pf_completion_queue(vcpu);
4319 	}
4320 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4321 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4322 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4323 		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4324 	}
4325 	/*
4326 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4327 	 * we should enable RI here instead of doing the lazy enablement.
4328 	 */
4329 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4330 	    test_kvm_facility(vcpu->kvm, 64) &&
4331 	    riccb->v &&
4332 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4333 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4334 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4335 	}
4336 	/*
4337 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4338 	 * we should enable GS here instead of doing the lazy enablement.
4339 	 */
4340 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4341 	    test_kvm_facility(vcpu->kvm, 133) &&
4342 	    gscb->gssm &&
4343 	    !vcpu->arch.gs_enabled) {
4344 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4345 		vcpu->arch.sie_block->ecb |= ECB_GS;
4346 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4347 		vcpu->arch.gs_enabled = 1;
4348 	}
4349 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4350 	    test_kvm_facility(vcpu->kvm, 82)) {
4351 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4352 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4353 	}
4354 	if (MACHINE_HAS_GS) {
4355 		preempt_disable();
4356 		__ctl_set_bit(2, 4);
4357 		if (current->thread.gs_cb) {
4358 			vcpu->arch.host_gscb = current->thread.gs_cb;
4359 			save_gs_cb(vcpu->arch.host_gscb);
4360 		}
4361 		if (vcpu->arch.gs_enabled) {
4362 			current->thread.gs_cb = (struct gs_cb *)
4363 						&vcpu->run->s.regs.gscb;
4364 			restore_gs_cb(current->thread.gs_cb);
4365 		}
4366 		preempt_enable();
4367 	}
4368 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4369 }
4370 
4371 static void sync_regs(struct kvm_vcpu *vcpu)
4372 {
4373 	struct kvm_run *kvm_run = vcpu->run;
4374 
4375 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4376 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4377 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4378 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4379 		/* some control register changes require a tlb flush */
4380 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4381 	}
4382 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4383 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4384 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4385 	}
4386 	save_access_regs(vcpu->arch.host_acrs);
4387 	restore_access_regs(vcpu->run->s.regs.acrs);
4388 	/* save host (userspace) fprs/vrs */
4389 	save_fpu_regs();
4390 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4391 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4392 	if (MACHINE_HAS_VX)
4393 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4394 	else
4395 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4396 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4397 	if (test_fp_ctl(current->thread.fpu.fpc))
4398 		/* User space provided an invalid FPC, let's clear it */
4399 		current->thread.fpu.fpc = 0;
4400 
4401 	/* Sync fmt2 only data */
4402 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4403 		sync_regs_fmt2(vcpu);
4404 	} else {
4405 		/*
4406 		 * In several places we have to modify our internal view to
4407 		 * not do things that are disallowed by the ultravisor. For
4408 		 * example we must not inject interrupts after specific exits
4409 		 * (e.g. 112 prefix page not secure). We do this by turning
4410 		 * off the machine check, external and I/O interrupt bits
4411 		 * of our PSW copy. To avoid getting validity intercepts, we
4412 		 * do only accept the condition code from userspace.
4413 		 */
4414 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4415 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4416 						   PSW_MASK_CC;
4417 	}
4418 
4419 	kvm_run->kvm_dirty_regs = 0;
4420 }
4421 
4422 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4423 {
4424 	struct kvm_run *kvm_run = vcpu->run;
4425 
4426 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4427 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4428 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4429 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4430 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4431 	if (MACHINE_HAS_GS) {
4432 		preempt_disable();
4433 		__ctl_set_bit(2, 4);
4434 		if (vcpu->arch.gs_enabled)
4435 			save_gs_cb(current->thread.gs_cb);
4436 		current->thread.gs_cb = vcpu->arch.host_gscb;
4437 		restore_gs_cb(vcpu->arch.host_gscb);
4438 		if (!vcpu->arch.host_gscb)
4439 			__ctl_clear_bit(2, 4);
4440 		vcpu->arch.host_gscb = NULL;
4441 		preempt_enable();
4442 	}
4443 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4444 }
4445 
4446 static void store_regs(struct kvm_vcpu *vcpu)
4447 {
4448 	struct kvm_run *kvm_run = vcpu->run;
4449 
4450 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4451 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4452 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4453 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4454 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4455 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4456 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4457 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4458 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4459 	save_access_regs(vcpu->run->s.regs.acrs);
4460 	restore_access_regs(vcpu->arch.host_acrs);
4461 	/* Save guest register state */
4462 	save_fpu_regs();
4463 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4464 	/* Restore will be done lazily at return */
4465 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4466 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4467 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4468 		store_regs_fmt2(vcpu);
4469 }
4470 
4471 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4472 {
4473 	struct kvm_run *kvm_run = vcpu->run;
4474 	int rc;
4475 
4476 	if (kvm_run->immediate_exit)
4477 		return -EINTR;
4478 
4479 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4480 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4481 		return -EINVAL;
4482 
4483 	vcpu_load(vcpu);
4484 
4485 	if (guestdbg_exit_pending(vcpu)) {
4486 		kvm_s390_prepare_debug_exit(vcpu);
4487 		rc = 0;
4488 		goto out;
4489 	}
4490 
4491 	kvm_sigset_activate(vcpu);
4492 
4493 	/*
4494 	 * no need to check the return value of vcpu_start as it can only have
4495 	 * an error for protvirt, but protvirt means user cpu state
4496 	 */
4497 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4498 		kvm_s390_vcpu_start(vcpu);
4499 	} else if (is_vcpu_stopped(vcpu)) {
4500 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4501 				   vcpu->vcpu_id);
4502 		rc = -EINVAL;
4503 		goto out;
4504 	}
4505 
4506 	sync_regs(vcpu);
4507 	enable_cpu_timer_accounting(vcpu);
4508 
4509 	might_fault();
4510 	rc = __vcpu_run(vcpu);
4511 
4512 	if (signal_pending(current) && !rc) {
4513 		kvm_run->exit_reason = KVM_EXIT_INTR;
4514 		rc = -EINTR;
4515 	}
4516 
4517 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4518 		kvm_s390_prepare_debug_exit(vcpu);
4519 		rc = 0;
4520 	}
4521 
4522 	if (rc == -EREMOTE) {
4523 		/* userspace support is needed, kvm_run has been prepared */
4524 		rc = 0;
4525 	}
4526 
4527 	disable_cpu_timer_accounting(vcpu);
4528 	store_regs(vcpu);
4529 
4530 	kvm_sigset_deactivate(vcpu);
4531 
4532 	vcpu->stat.exit_userspace++;
4533 out:
4534 	vcpu_put(vcpu);
4535 	return rc;
4536 }
4537 
4538 /*
4539  * store status at address
4540  * we use have two special cases:
4541  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4542  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4543  */
4544 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4545 {
4546 	unsigned char archmode = 1;
4547 	freg_t fprs[NUM_FPRS];
4548 	unsigned int px;
4549 	u64 clkcomp, cputm;
4550 	int rc;
4551 
4552 	px = kvm_s390_get_prefix(vcpu);
4553 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4554 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4555 			return -EFAULT;
4556 		gpa = 0;
4557 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4558 		if (write_guest_real(vcpu, 163, &archmode, 1))
4559 			return -EFAULT;
4560 		gpa = px;
4561 	} else
4562 		gpa -= __LC_FPREGS_SAVE_AREA;
4563 
4564 	/* manually convert vector registers if necessary */
4565 	if (MACHINE_HAS_VX) {
4566 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4567 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4568 				     fprs, 128);
4569 	} else {
4570 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4571 				     vcpu->run->s.regs.fprs, 128);
4572 	}
4573 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4574 			      vcpu->run->s.regs.gprs, 128);
4575 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4576 			      &vcpu->arch.sie_block->gpsw, 16);
4577 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4578 			      &px, 4);
4579 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4580 			      &vcpu->run->s.regs.fpc, 4);
4581 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4582 			      &vcpu->arch.sie_block->todpr, 4);
4583 	cputm = kvm_s390_get_cpu_timer(vcpu);
4584 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4585 			      &cputm, 8);
4586 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4587 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4588 			      &clkcomp, 8);
4589 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4590 			      &vcpu->run->s.regs.acrs, 64);
4591 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4592 			      &vcpu->arch.sie_block->gcr, 128);
4593 	return rc ? -EFAULT : 0;
4594 }
4595 
4596 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4597 {
4598 	/*
4599 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4600 	 * switch in the run ioctl. Let's update our copies before we save
4601 	 * it into the save area
4602 	 */
4603 	save_fpu_regs();
4604 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4605 	save_access_regs(vcpu->run->s.regs.acrs);
4606 
4607 	return kvm_s390_store_status_unloaded(vcpu, addr);
4608 }
4609 
4610 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4611 {
4612 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4613 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4614 }
4615 
4616 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4617 {
4618 	unsigned long i;
4619 	struct kvm_vcpu *vcpu;
4620 
4621 	kvm_for_each_vcpu(i, vcpu, kvm) {
4622 		__disable_ibs_on_vcpu(vcpu);
4623 	}
4624 }
4625 
4626 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4627 {
4628 	if (!sclp.has_ibs)
4629 		return;
4630 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4631 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4632 }
4633 
4634 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4635 {
4636 	int i, online_vcpus, r = 0, started_vcpus = 0;
4637 
4638 	if (!is_vcpu_stopped(vcpu))
4639 		return 0;
4640 
4641 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4642 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4643 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4644 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4645 
4646 	/* Let's tell the UV that we want to change into the operating state */
4647 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4648 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4649 		if (r) {
4650 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4651 			return r;
4652 		}
4653 	}
4654 
4655 	for (i = 0; i < online_vcpus; i++) {
4656 		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4657 			started_vcpus++;
4658 	}
4659 
4660 	if (started_vcpus == 0) {
4661 		/* we're the only active VCPU -> speed it up */
4662 		__enable_ibs_on_vcpu(vcpu);
4663 	} else if (started_vcpus == 1) {
4664 		/*
4665 		 * As we are starting a second VCPU, we have to disable
4666 		 * the IBS facility on all VCPUs to remove potentially
4667 		 * outstanding ENABLE requests.
4668 		 */
4669 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4670 	}
4671 
4672 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4673 	/*
4674 	 * The real PSW might have changed due to a RESTART interpreted by the
4675 	 * ultravisor. We block all interrupts and let the next sie exit
4676 	 * refresh our view.
4677 	 */
4678 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4679 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4680 	/*
4681 	 * Another VCPU might have used IBS while we were offline.
4682 	 * Let's play safe and flush the VCPU at startup.
4683 	 */
4684 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4685 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4686 	return 0;
4687 }
4688 
4689 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4690 {
4691 	int i, online_vcpus, r = 0, started_vcpus = 0;
4692 	struct kvm_vcpu *started_vcpu = NULL;
4693 
4694 	if (is_vcpu_stopped(vcpu))
4695 		return 0;
4696 
4697 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4698 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4699 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4700 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4701 
4702 	/* Let's tell the UV that we want to change into the stopped state */
4703 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4704 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4705 		if (r) {
4706 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4707 			return r;
4708 		}
4709 	}
4710 
4711 	/*
4712 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4713 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4714 	 * have been fully processed. This will ensure that the VCPU
4715 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4716 	 */
4717 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4718 	kvm_s390_clear_stop_irq(vcpu);
4719 
4720 	__disable_ibs_on_vcpu(vcpu);
4721 
4722 	for (i = 0; i < online_vcpus; i++) {
4723 		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4724 
4725 		if (!is_vcpu_stopped(tmp)) {
4726 			started_vcpus++;
4727 			started_vcpu = tmp;
4728 		}
4729 	}
4730 
4731 	if (started_vcpus == 1) {
4732 		/*
4733 		 * As we only have one VCPU left, we want to enable the
4734 		 * IBS facility for that VCPU to speed it up.
4735 		 */
4736 		__enable_ibs_on_vcpu(started_vcpu);
4737 	}
4738 
4739 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4740 	return 0;
4741 }
4742 
4743 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4744 				     struct kvm_enable_cap *cap)
4745 {
4746 	int r;
4747 
4748 	if (cap->flags)
4749 		return -EINVAL;
4750 
4751 	switch (cap->cap) {
4752 	case KVM_CAP_S390_CSS_SUPPORT:
4753 		if (!vcpu->kvm->arch.css_support) {
4754 			vcpu->kvm->arch.css_support = 1;
4755 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4756 			trace_kvm_s390_enable_css(vcpu->kvm);
4757 		}
4758 		r = 0;
4759 		break;
4760 	default:
4761 		r = -EINVAL;
4762 		break;
4763 	}
4764 	return r;
4765 }
4766 
4767 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4768 				  struct kvm_s390_mem_op *mop)
4769 {
4770 	void __user *uaddr = (void __user *)mop->buf;
4771 	int r = 0;
4772 
4773 	if (mop->flags || !mop->size)
4774 		return -EINVAL;
4775 	if (mop->size + mop->sida_offset < mop->size)
4776 		return -EINVAL;
4777 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4778 		return -E2BIG;
4779 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4780 		return -EINVAL;
4781 
4782 	switch (mop->op) {
4783 	case KVM_S390_MEMOP_SIDA_READ:
4784 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4785 				 mop->sida_offset), mop->size))
4786 			r = -EFAULT;
4787 
4788 		break;
4789 	case KVM_S390_MEMOP_SIDA_WRITE:
4790 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4791 				   mop->sida_offset), uaddr, mop->size))
4792 			r = -EFAULT;
4793 		break;
4794 	}
4795 	return r;
4796 }
4797 
4798 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4799 				 struct kvm_s390_mem_op *mop)
4800 {
4801 	void __user *uaddr = (void __user *)mop->buf;
4802 	void *tmpbuf = NULL;
4803 	int r = 0;
4804 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4805 				    | KVM_S390_MEMOP_F_CHECK_ONLY
4806 				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4807 
4808 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4809 		return -EINVAL;
4810 	if (mop->size > MEM_OP_MAX_SIZE)
4811 		return -E2BIG;
4812 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4813 		return -EINVAL;
4814 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4815 		if (access_key_invalid(mop->key))
4816 			return -EINVAL;
4817 	} else {
4818 		mop->key = 0;
4819 	}
4820 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4821 		tmpbuf = vmalloc(mop->size);
4822 		if (!tmpbuf)
4823 			return -ENOMEM;
4824 	}
4825 
4826 	switch (mop->op) {
4827 	case KVM_S390_MEMOP_LOGICAL_READ:
4828 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4829 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4830 					    GACC_FETCH, mop->key);
4831 			break;
4832 		}
4833 		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4834 					mop->size, mop->key);
4835 		if (r == 0) {
4836 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4837 				r = -EFAULT;
4838 		}
4839 		break;
4840 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4841 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4842 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4843 					    GACC_STORE, mop->key);
4844 			break;
4845 		}
4846 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4847 			r = -EFAULT;
4848 			break;
4849 		}
4850 		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4851 					 mop->size, mop->key);
4852 		break;
4853 	}
4854 
4855 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4856 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4857 
4858 	vfree(tmpbuf);
4859 	return r;
4860 }
4861 
4862 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4863 				     struct kvm_s390_mem_op *mop)
4864 {
4865 	int r, srcu_idx;
4866 
4867 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4868 
4869 	switch (mop->op) {
4870 	case KVM_S390_MEMOP_LOGICAL_READ:
4871 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4872 		r = kvm_s390_vcpu_mem_op(vcpu, mop);
4873 		break;
4874 	case KVM_S390_MEMOP_SIDA_READ:
4875 	case KVM_S390_MEMOP_SIDA_WRITE:
4876 		/* we are locked against sida going away by the vcpu->mutex */
4877 		r = kvm_s390_vcpu_sida_op(vcpu, mop);
4878 		break;
4879 	default:
4880 		r = -EINVAL;
4881 	}
4882 
4883 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4884 	return r;
4885 }
4886 
4887 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4888 			       unsigned int ioctl, unsigned long arg)
4889 {
4890 	struct kvm_vcpu *vcpu = filp->private_data;
4891 	void __user *argp = (void __user *)arg;
4892 
4893 	switch (ioctl) {
4894 	case KVM_S390_IRQ: {
4895 		struct kvm_s390_irq s390irq;
4896 
4897 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4898 			return -EFAULT;
4899 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4900 	}
4901 	case KVM_S390_INTERRUPT: {
4902 		struct kvm_s390_interrupt s390int;
4903 		struct kvm_s390_irq s390irq = {};
4904 
4905 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4906 			return -EFAULT;
4907 		if (s390int_to_s390irq(&s390int, &s390irq))
4908 			return -EINVAL;
4909 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4910 	}
4911 	}
4912 	return -ENOIOCTLCMD;
4913 }
4914 
4915 long kvm_arch_vcpu_ioctl(struct file *filp,
4916 			 unsigned int ioctl, unsigned long arg)
4917 {
4918 	struct kvm_vcpu *vcpu = filp->private_data;
4919 	void __user *argp = (void __user *)arg;
4920 	int idx;
4921 	long r;
4922 	u16 rc, rrc;
4923 
4924 	vcpu_load(vcpu);
4925 
4926 	switch (ioctl) {
4927 	case KVM_S390_STORE_STATUS:
4928 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4929 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4930 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4931 		break;
4932 	case KVM_S390_SET_INITIAL_PSW: {
4933 		psw_t psw;
4934 
4935 		r = -EFAULT;
4936 		if (copy_from_user(&psw, argp, sizeof(psw)))
4937 			break;
4938 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4939 		break;
4940 	}
4941 	case KVM_S390_CLEAR_RESET:
4942 		r = 0;
4943 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4944 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4945 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4946 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4947 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4948 				   rc, rrc);
4949 		}
4950 		break;
4951 	case KVM_S390_INITIAL_RESET:
4952 		r = 0;
4953 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4954 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4955 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4956 					  UVC_CMD_CPU_RESET_INITIAL,
4957 					  &rc, &rrc);
4958 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4959 				   rc, rrc);
4960 		}
4961 		break;
4962 	case KVM_S390_NORMAL_RESET:
4963 		r = 0;
4964 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4965 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4966 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4967 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4968 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4969 				   rc, rrc);
4970 		}
4971 		break;
4972 	case KVM_SET_ONE_REG:
4973 	case KVM_GET_ONE_REG: {
4974 		struct kvm_one_reg reg;
4975 		r = -EINVAL;
4976 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4977 			break;
4978 		r = -EFAULT;
4979 		if (copy_from_user(&reg, argp, sizeof(reg)))
4980 			break;
4981 		if (ioctl == KVM_SET_ONE_REG)
4982 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4983 		else
4984 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4985 		break;
4986 	}
4987 #ifdef CONFIG_KVM_S390_UCONTROL
4988 	case KVM_S390_UCAS_MAP: {
4989 		struct kvm_s390_ucas_mapping ucasmap;
4990 
4991 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4992 			r = -EFAULT;
4993 			break;
4994 		}
4995 
4996 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4997 			r = -EINVAL;
4998 			break;
4999 		}
5000 
5001 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5002 				     ucasmap.vcpu_addr, ucasmap.length);
5003 		break;
5004 	}
5005 	case KVM_S390_UCAS_UNMAP: {
5006 		struct kvm_s390_ucas_mapping ucasmap;
5007 
5008 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5009 			r = -EFAULT;
5010 			break;
5011 		}
5012 
5013 		if (!kvm_is_ucontrol(vcpu->kvm)) {
5014 			r = -EINVAL;
5015 			break;
5016 		}
5017 
5018 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5019 			ucasmap.length);
5020 		break;
5021 	}
5022 #endif
5023 	case KVM_S390_VCPU_FAULT: {
5024 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
5025 		break;
5026 	}
5027 	case KVM_ENABLE_CAP:
5028 	{
5029 		struct kvm_enable_cap cap;
5030 		r = -EFAULT;
5031 		if (copy_from_user(&cap, argp, sizeof(cap)))
5032 			break;
5033 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5034 		break;
5035 	}
5036 	case KVM_S390_MEM_OP: {
5037 		struct kvm_s390_mem_op mem_op;
5038 
5039 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5040 			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5041 		else
5042 			r = -EFAULT;
5043 		break;
5044 	}
5045 	case KVM_S390_SET_IRQ_STATE: {
5046 		struct kvm_s390_irq_state irq_state;
5047 
5048 		r = -EFAULT;
5049 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5050 			break;
5051 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5052 		    irq_state.len == 0 ||
5053 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5054 			r = -EINVAL;
5055 			break;
5056 		}
5057 		/* do not use irq_state.flags, it will break old QEMUs */
5058 		r = kvm_s390_set_irq_state(vcpu,
5059 					   (void __user *) irq_state.buf,
5060 					   irq_state.len);
5061 		break;
5062 	}
5063 	case KVM_S390_GET_IRQ_STATE: {
5064 		struct kvm_s390_irq_state irq_state;
5065 
5066 		r = -EFAULT;
5067 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5068 			break;
5069 		if (irq_state.len == 0) {
5070 			r = -EINVAL;
5071 			break;
5072 		}
5073 		/* do not use irq_state.flags, it will break old QEMUs */
5074 		r = kvm_s390_get_irq_state(vcpu,
5075 					   (__u8 __user *)  irq_state.buf,
5076 					   irq_state.len);
5077 		break;
5078 	}
5079 	default:
5080 		r = -ENOTTY;
5081 	}
5082 
5083 	vcpu_put(vcpu);
5084 	return r;
5085 }
5086 
5087 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5088 {
5089 #ifdef CONFIG_KVM_S390_UCONTROL
5090 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5091 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5092 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5093 		get_page(vmf->page);
5094 		return 0;
5095 	}
5096 #endif
5097 	return VM_FAULT_SIGBUS;
5098 }
5099 
5100 /* Section: memory related */
5101 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5102 				   const struct kvm_memory_slot *old,
5103 				   struct kvm_memory_slot *new,
5104 				   enum kvm_mr_change change)
5105 {
5106 	gpa_t size;
5107 
5108 	/* When we are protected, we should not change the memory slots */
5109 	if (kvm_s390_pv_get_handle(kvm))
5110 		return -EINVAL;
5111 
5112 	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5113 		return 0;
5114 
5115 	/* A few sanity checks. We can have memory slots which have to be
5116 	   located/ended at a segment boundary (1MB). The memory in userland is
5117 	   ok to be fragmented into various different vmas. It is okay to mmap()
5118 	   and munmap() stuff in this slot after doing this call at any time */
5119 
5120 	if (new->userspace_addr & 0xffffful)
5121 		return -EINVAL;
5122 
5123 	size = new->npages * PAGE_SIZE;
5124 	if (size & 0xffffful)
5125 		return -EINVAL;
5126 
5127 	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5128 		return -EINVAL;
5129 
5130 	return 0;
5131 }
5132 
5133 void kvm_arch_commit_memory_region(struct kvm *kvm,
5134 				struct kvm_memory_slot *old,
5135 				const struct kvm_memory_slot *new,
5136 				enum kvm_mr_change change)
5137 {
5138 	int rc = 0;
5139 
5140 	switch (change) {
5141 	case KVM_MR_DELETE:
5142 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5143 					old->npages * PAGE_SIZE);
5144 		break;
5145 	case KVM_MR_MOVE:
5146 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5147 					old->npages * PAGE_SIZE);
5148 		if (rc)
5149 			break;
5150 		fallthrough;
5151 	case KVM_MR_CREATE:
5152 		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5153 				      new->base_gfn * PAGE_SIZE,
5154 				      new->npages * PAGE_SIZE);
5155 		break;
5156 	case KVM_MR_FLAGS_ONLY:
5157 		break;
5158 	default:
5159 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5160 	}
5161 	if (rc)
5162 		pr_warn("failed to commit memory region\n");
5163 	return;
5164 }
5165 
5166 static inline unsigned long nonhyp_mask(int i)
5167 {
5168 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5169 
5170 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5171 }
5172 
5173 static int __init kvm_s390_init(void)
5174 {
5175 	int i;
5176 
5177 	if (!sclp.has_sief2) {
5178 		pr_info("SIE is not available\n");
5179 		return -ENODEV;
5180 	}
5181 
5182 	if (nested && hpage) {
5183 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5184 		return -EINVAL;
5185 	}
5186 
5187 	for (i = 0; i < 16; i++)
5188 		kvm_s390_fac_base[i] |=
5189 			stfle_fac_list[i] & nonhyp_mask(i);
5190 
5191 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5192 }
5193 
5194 static void __exit kvm_s390_exit(void)
5195 {
5196 	kvm_exit();
5197 }
5198 
5199 module_init(kvm_s390_init);
5200 module_exit(kvm_s390_exit);
5201 
5202 /*
5203  * Enable autoloading of the kvm module.
5204  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5205  * since x86 takes a different approach.
5206  */
5207 #include <linux/miscdevice.h>
5208 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5209 MODULE_ALIAS("devname:kvm");
5210