xref: /linux/arch/s390/kvm/kvm-s390.c (revision 140eb5227767c6754742020a16d2691222b9c19b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2017
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45 
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
63 	{ "exit_null", VCPU_STAT(exit_null) },
64 	{ "exit_validity", VCPU_STAT(exit_validity) },
65 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
67 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
69 	{ "exit_pei", VCPU_STAT(exit_pei) },
70 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
93 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
95 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
100 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
105 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
122 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
123 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
125 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
126 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
127 	{ NULL }
128 };
129 
130 struct kvm_s390_tod_clock_ext {
131 	__u8 epoch_idx;
132 	__u64 tod;
133 	__u8 reserved[7];
134 } __packed;
135 
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140 
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143 
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149 
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154 
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158 
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162 	/* every s390 is virtualization enabled ;-) */
163 	return 0;
164 }
165 
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167 			      unsigned long end);
168 
169 /*
170  * This callback is executed during stop_machine(). All CPUs are therefore
171  * temporarily stopped. In order not to change guest behavior, we have to
172  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173  * so a CPU won't be stopped while calculating with the epoch.
174  */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176 			  void *v)
177 {
178 	struct kvm *kvm;
179 	struct kvm_vcpu *vcpu;
180 	int i;
181 	unsigned long long *delta = v;
182 
183 	list_for_each_entry(kvm, &vm_list, vm_list) {
184 		kvm->arch.epoch -= *delta;
185 		kvm_for_each_vcpu(i, vcpu, kvm) {
186 			vcpu->arch.sie_block->epoch -= *delta;
187 			if (vcpu->arch.cputm_enabled)
188 				vcpu->arch.cputm_start += *delta;
189 			if (vcpu->arch.vsie_block)
190 				vcpu->arch.vsie_block->epoch -= *delta;
191 		}
192 	}
193 	return NOTIFY_OK;
194 }
195 
196 static struct notifier_block kvm_clock_notifier = {
197 	.notifier_call = kvm_clock_sync,
198 };
199 
200 int kvm_arch_hardware_setup(void)
201 {
202 	gmap_notifier.notifier_call = kvm_gmap_notifier;
203 	gmap_register_pte_notifier(&gmap_notifier);
204 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205 	gmap_register_pte_notifier(&vsie_gmap_notifier);
206 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207 				       &kvm_clock_notifier);
208 	return 0;
209 }
210 
211 void kvm_arch_hardware_unsetup(void)
212 {
213 	gmap_unregister_pte_notifier(&gmap_notifier);
214 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216 					 &kvm_clock_notifier);
217 }
218 
219 static void allow_cpu_feat(unsigned long nr)
220 {
221 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223 
224 static inline int plo_test_bit(unsigned char nr)
225 {
226 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227 	int cc;
228 
229 	asm volatile(
230 		/* Parameter registers are ignored for "test bit" */
231 		"	plo	0,0,0,0(0)\n"
232 		"	ipm	%0\n"
233 		"	srl	%0,28\n"
234 		: "=d" (cc)
235 		: "d" (r0)
236 		: "cc");
237 	return cc == 0;
238 }
239 
240 static void kvm_s390_cpu_feat_init(void)
241 {
242 	int i;
243 
244 	for (i = 0; i < 256; ++i) {
245 		if (plo_test_bit(i))
246 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247 	}
248 
249 	if (test_facility(28)) /* TOD-clock steering */
250 		ptff(kvm_s390_available_subfunc.ptff,
251 		     sizeof(kvm_s390_available_subfunc.ptff),
252 		     PTFF_QAF);
253 
254 	if (test_facility(17)) { /* MSA */
255 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256 			      kvm_s390_available_subfunc.kmac);
257 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258 			      kvm_s390_available_subfunc.kmc);
259 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
260 			      kvm_s390_available_subfunc.km);
261 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262 			      kvm_s390_available_subfunc.kimd);
263 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264 			      kvm_s390_available_subfunc.klmd);
265 	}
266 	if (test_facility(76)) /* MSA3 */
267 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268 			      kvm_s390_available_subfunc.pckmo);
269 	if (test_facility(77)) { /* MSA4 */
270 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.kmctr);
272 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273 			      kvm_s390_available_subfunc.kmf);
274 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275 			      kvm_s390_available_subfunc.kmo);
276 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277 			      kvm_s390_available_subfunc.pcc);
278 	}
279 	if (test_facility(57)) /* MSA5 */
280 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281 			      kvm_s390_available_subfunc.ppno);
282 
283 	if (test_facility(146)) /* MSA8 */
284 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285 			      kvm_s390_available_subfunc.kma);
286 
287 	if (MACHINE_HAS_ESOP)
288 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289 	/*
290 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292 	 */
293 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294 	    !test_facility(3) || !nested)
295 		return;
296 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297 	if (sclp.has_64bscao)
298 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299 	if (sclp.has_siif)
300 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301 	if (sclp.has_gpere)
302 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303 	if (sclp.has_gsls)
304 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305 	if (sclp.has_ib)
306 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307 	if (sclp.has_cei)
308 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309 	if (sclp.has_ibs)
310 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311 	if (sclp.has_kss)
312 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313 	/*
314 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315 	 * all skey handling functions read/set the skey from the PGSTE
316 	 * instead of the real storage key.
317 	 *
318 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319 	 * pages being detected as preserved although they are resident.
320 	 *
321 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323 	 *
324 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327 	 *
328 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329 	 * cannot easily shadow the SCA because of the ipte lock.
330 	 */
331 }
332 
333 int kvm_arch_init(void *opaque)
334 {
335 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336 	if (!kvm_s390_dbf)
337 		return -ENOMEM;
338 
339 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340 		debug_unregister(kvm_s390_dbf);
341 		return -ENOMEM;
342 	}
343 
344 	kvm_s390_cpu_feat_init();
345 
346 	/* Register floating interrupt controller interface. */
347 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349 
350 void kvm_arch_exit(void)
351 {
352 	debug_unregister(kvm_s390_dbf);
353 }
354 
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357 			unsigned int ioctl, unsigned long arg)
358 {
359 	if (ioctl == KVM_S390_ENABLE_SIE)
360 		return s390_enable_sie();
361 	return -EINVAL;
362 }
363 
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366 	int r;
367 
368 	switch (ext) {
369 	case KVM_CAP_S390_PSW:
370 	case KVM_CAP_S390_GMAP:
371 	case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373 	case KVM_CAP_S390_UCONTROL:
374 #endif
375 	case KVM_CAP_ASYNC_PF:
376 	case KVM_CAP_SYNC_REGS:
377 	case KVM_CAP_ONE_REG:
378 	case KVM_CAP_ENABLE_CAP:
379 	case KVM_CAP_S390_CSS_SUPPORT:
380 	case KVM_CAP_IOEVENTFD:
381 	case KVM_CAP_DEVICE_CTRL:
382 	case KVM_CAP_ENABLE_CAP_VM:
383 	case KVM_CAP_S390_IRQCHIP:
384 	case KVM_CAP_VM_ATTRIBUTES:
385 	case KVM_CAP_MP_STATE:
386 	case KVM_CAP_IMMEDIATE_EXIT:
387 	case KVM_CAP_S390_INJECT_IRQ:
388 	case KVM_CAP_S390_USER_SIGP:
389 	case KVM_CAP_S390_USER_STSI:
390 	case KVM_CAP_S390_SKEYS:
391 	case KVM_CAP_S390_IRQ_STATE:
392 	case KVM_CAP_S390_USER_INSTR0:
393 	case KVM_CAP_S390_CMMA_MIGRATION:
394 	case KVM_CAP_S390_AIS:
395 	case KVM_CAP_S390_AIS_MIGRATION:
396 		r = 1;
397 		break;
398 	case KVM_CAP_S390_MEM_OP:
399 		r = MEM_OP_MAX_SIZE;
400 		break;
401 	case KVM_CAP_NR_VCPUS:
402 	case KVM_CAP_MAX_VCPUS:
403 		r = KVM_S390_BSCA_CPU_SLOTS;
404 		if (!kvm_s390_use_sca_entries())
405 			r = KVM_MAX_VCPUS;
406 		else if (sclp.has_esca && sclp.has_64bscao)
407 			r = KVM_S390_ESCA_CPU_SLOTS;
408 		break;
409 	case KVM_CAP_NR_MEMSLOTS:
410 		r = KVM_USER_MEM_SLOTS;
411 		break;
412 	case KVM_CAP_S390_COW:
413 		r = MACHINE_HAS_ESOP;
414 		break;
415 	case KVM_CAP_S390_VECTOR_REGISTERS:
416 		r = MACHINE_HAS_VX;
417 		break;
418 	case KVM_CAP_S390_RI:
419 		r = test_facility(64);
420 		break;
421 	case KVM_CAP_S390_GS:
422 		r = test_facility(133);
423 		break;
424 	case KVM_CAP_S390_BPB:
425 		r = test_facility(82);
426 		break;
427 	default:
428 		r = 0;
429 	}
430 	return r;
431 }
432 
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434 					struct kvm_memory_slot *memslot)
435 {
436 	gfn_t cur_gfn, last_gfn;
437 	unsigned long address;
438 	struct gmap *gmap = kvm->arch.gmap;
439 
440 	/* Loop over all guest pages */
441 	last_gfn = memslot->base_gfn + memslot->npages;
442 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443 		address = gfn_to_hva_memslot(memslot, cur_gfn);
444 
445 		if (test_and_clear_guest_dirty(gmap->mm, address))
446 			mark_page_dirty(kvm, cur_gfn);
447 		if (fatal_signal_pending(current))
448 			return;
449 		cond_resched();
450 	}
451 }
452 
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
455 
456 /*
457  * Get (and clear) the dirty memory log for a memory slot.
458  */
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460 			       struct kvm_dirty_log *log)
461 {
462 	int r;
463 	unsigned long n;
464 	struct kvm_memslots *slots;
465 	struct kvm_memory_slot *memslot;
466 	int is_dirty = 0;
467 
468 	if (kvm_is_ucontrol(kvm))
469 		return -EINVAL;
470 
471 	mutex_lock(&kvm->slots_lock);
472 
473 	r = -EINVAL;
474 	if (log->slot >= KVM_USER_MEM_SLOTS)
475 		goto out;
476 
477 	slots = kvm_memslots(kvm);
478 	memslot = id_to_memslot(slots, log->slot);
479 	r = -ENOENT;
480 	if (!memslot->dirty_bitmap)
481 		goto out;
482 
483 	kvm_s390_sync_dirty_log(kvm, memslot);
484 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
485 	if (r)
486 		goto out;
487 
488 	/* Clear the dirty log */
489 	if (is_dirty) {
490 		n = kvm_dirty_bitmap_bytes(memslot);
491 		memset(memslot->dirty_bitmap, 0, n);
492 	}
493 	r = 0;
494 out:
495 	mutex_unlock(&kvm->slots_lock);
496 	return r;
497 }
498 
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
500 {
501 	unsigned int i;
502 	struct kvm_vcpu *vcpu;
503 
504 	kvm_for_each_vcpu(i, vcpu, kvm) {
505 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506 	}
507 }
508 
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 {
511 	int r;
512 
513 	if (cap->flags)
514 		return -EINVAL;
515 
516 	switch (cap->cap) {
517 	case KVM_CAP_S390_IRQCHIP:
518 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519 		kvm->arch.use_irqchip = 1;
520 		r = 0;
521 		break;
522 	case KVM_CAP_S390_USER_SIGP:
523 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524 		kvm->arch.user_sigp = 1;
525 		r = 0;
526 		break;
527 	case KVM_CAP_S390_VECTOR_REGISTERS:
528 		mutex_lock(&kvm->lock);
529 		if (kvm->created_vcpus) {
530 			r = -EBUSY;
531 		} else if (MACHINE_HAS_VX) {
532 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
533 			set_kvm_facility(kvm->arch.model.fac_list, 129);
534 			if (test_facility(134)) {
535 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
536 				set_kvm_facility(kvm->arch.model.fac_list, 134);
537 			}
538 			if (test_facility(135)) {
539 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
540 				set_kvm_facility(kvm->arch.model.fac_list, 135);
541 			}
542 			r = 0;
543 		} else
544 			r = -EINVAL;
545 		mutex_unlock(&kvm->lock);
546 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547 			 r ? "(not available)" : "(success)");
548 		break;
549 	case KVM_CAP_S390_RI:
550 		r = -EINVAL;
551 		mutex_lock(&kvm->lock);
552 		if (kvm->created_vcpus) {
553 			r = -EBUSY;
554 		} else if (test_facility(64)) {
555 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
556 			set_kvm_facility(kvm->arch.model.fac_list, 64);
557 			r = 0;
558 		}
559 		mutex_unlock(&kvm->lock);
560 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561 			 r ? "(not available)" : "(success)");
562 		break;
563 	case KVM_CAP_S390_AIS:
564 		mutex_lock(&kvm->lock);
565 		if (kvm->created_vcpus) {
566 			r = -EBUSY;
567 		} else {
568 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
569 			set_kvm_facility(kvm->arch.model.fac_list, 72);
570 			r = 0;
571 		}
572 		mutex_unlock(&kvm->lock);
573 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574 			 r ? "(not available)" : "(success)");
575 		break;
576 	case KVM_CAP_S390_GS:
577 		r = -EINVAL;
578 		mutex_lock(&kvm->lock);
579 		if (atomic_read(&kvm->online_vcpus)) {
580 			r = -EBUSY;
581 		} else if (test_facility(133)) {
582 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
583 			set_kvm_facility(kvm->arch.model.fac_list, 133);
584 			r = 0;
585 		}
586 		mutex_unlock(&kvm->lock);
587 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588 			 r ? "(not available)" : "(success)");
589 		break;
590 	case KVM_CAP_S390_USER_STSI:
591 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592 		kvm->arch.user_stsi = 1;
593 		r = 0;
594 		break;
595 	case KVM_CAP_S390_USER_INSTR0:
596 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597 		kvm->arch.user_instr0 = 1;
598 		icpt_operexc_on_all_vcpus(kvm);
599 		r = 0;
600 		break;
601 	default:
602 		r = -EINVAL;
603 		break;
604 	}
605 	return r;
606 }
607 
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 {
610 	int ret;
611 
612 	switch (attr->attr) {
613 	case KVM_S390_VM_MEM_LIMIT_SIZE:
614 		ret = 0;
615 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616 			 kvm->arch.mem_limit);
617 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
618 			ret = -EFAULT;
619 		break;
620 	default:
621 		ret = -ENXIO;
622 		break;
623 	}
624 	return ret;
625 }
626 
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 {
629 	int ret;
630 	unsigned int idx;
631 	switch (attr->attr) {
632 	case KVM_S390_VM_MEM_ENABLE_CMMA:
633 		ret = -ENXIO;
634 		if (!sclp.has_cmma)
635 			break;
636 
637 		ret = -EBUSY;
638 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639 		mutex_lock(&kvm->lock);
640 		if (!kvm->created_vcpus) {
641 			kvm->arch.use_cmma = 1;
642 			ret = 0;
643 		}
644 		mutex_unlock(&kvm->lock);
645 		break;
646 	case KVM_S390_VM_MEM_CLR_CMMA:
647 		ret = -ENXIO;
648 		if (!sclp.has_cmma)
649 			break;
650 		ret = -EINVAL;
651 		if (!kvm->arch.use_cmma)
652 			break;
653 
654 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655 		mutex_lock(&kvm->lock);
656 		idx = srcu_read_lock(&kvm->srcu);
657 		s390_reset_cmma(kvm->arch.gmap->mm);
658 		srcu_read_unlock(&kvm->srcu, idx);
659 		mutex_unlock(&kvm->lock);
660 		ret = 0;
661 		break;
662 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
663 		unsigned long new_limit;
664 
665 		if (kvm_is_ucontrol(kvm))
666 			return -EINVAL;
667 
668 		if (get_user(new_limit, (u64 __user *)attr->addr))
669 			return -EFAULT;
670 
671 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672 		    new_limit > kvm->arch.mem_limit)
673 			return -E2BIG;
674 
675 		if (!new_limit)
676 			return -EINVAL;
677 
678 		/* gmap_create takes last usable address */
679 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
680 			new_limit -= 1;
681 
682 		ret = -EBUSY;
683 		mutex_lock(&kvm->lock);
684 		if (!kvm->created_vcpus) {
685 			/* gmap_create will round the limit up */
686 			struct gmap *new = gmap_create(current->mm, new_limit);
687 
688 			if (!new) {
689 				ret = -ENOMEM;
690 			} else {
691 				gmap_remove(kvm->arch.gmap);
692 				new->private = kvm;
693 				kvm->arch.gmap = new;
694 				ret = 0;
695 			}
696 		}
697 		mutex_unlock(&kvm->lock);
698 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700 			 (void *) kvm->arch.gmap->asce);
701 		break;
702 	}
703 	default:
704 		ret = -ENXIO;
705 		break;
706 	}
707 	return ret;
708 }
709 
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711 
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714 	struct kvm_vcpu *vcpu;
715 	int i;
716 
717 	if (!test_kvm_facility(kvm, 76))
718 		return -EINVAL;
719 
720 	mutex_lock(&kvm->lock);
721 	switch (attr->attr) {
722 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723 		get_random_bytes(
724 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726 		kvm->arch.crypto.aes_kw = 1;
727 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728 		break;
729 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730 		get_random_bytes(
731 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733 		kvm->arch.crypto.dea_kw = 1;
734 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735 		break;
736 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737 		kvm->arch.crypto.aes_kw = 0;
738 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741 		break;
742 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743 		kvm->arch.crypto.dea_kw = 0;
744 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
747 		break;
748 	default:
749 		mutex_unlock(&kvm->lock);
750 		return -ENXIO;
751 	}
752 
753 	kvm_for_each_vcpu(i, vcpu, kvm) {
754 		kvm_s390_vcpu_crypto_setup(vcpu);
755 		exit_sie(vcpu);
756 	}
757 	mutex_unlock(&kvm->lock);
758 	return 0;
759 }
760 
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
762 {
763 	int cx;
764 	struct kvm_vcpu *vcpu;
765 
766 	kvm_for_each_vcpu(cx, vcpu, kvm)
767 		kvm_s390_sync_request(req, vcpu);
768 }
769 
770 /*
771  * Must be called with kvm->srcu held to avoid races on memslots, and with
772  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773  */
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 {
776 	struct kvm_s390_migration_state *mgs;
777 	struct kvm_memory_slot *ms;
778 	/* should be the only one */
779 	struct kvm_memslots *slots;
780 	unsigned long ram_pages;
781 	int slotnr;
782 
783 	/* migration mode already enabled */
784 	if (kvm->arch.migration_state)
785 		return 0;
786 
787 	slots = kvm_memslots(kvm);
788 	if (!slots || !slots->used_slots)
789 		return -EINVAL;
790 
791 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
792 	if (!mgs)
793 		return -ENOMEM;
794 	kvm->arch.migration_state = mgs;
795 
796 	if (kvm->arch.use_cmma) {
797 		/*
798 		 * Get the first slot. They are reverse sorted by base_gfn, so
799 		 * the first slot is also the one at the end of the address
800 		 * space. We have verified above that at least one slot is
801 		 * present.
802 		 */
803 		ms = slots->memslots;
804 		/* round up so we only use full longs */
805 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
806 		/* allocate enough bytes to store all the bits */
807 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
808 		if (!mgs->pgste_bitmap) {
809 			kfree(mgs);
810 			kvm->arch.migration_state = NULL;
811 			return -ENOMEM;
812 		}
813 
814 		mgs->bitmap_size = ram_pages;
815 		atomic64_set(&mgs->dirty_pages, ram_pages);
816 		/* mark all the pages in active slots as dirty */
817 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
818 			ms = slots->memslots + slotnr;
819 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
820 		}
821 
822 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
823 	}
824 	return 0;
825 }
826 
827 /*
828  * Must be called with kvm->slots_lock to avoid races with ourselves and
829  * kvm_s390_vm_start_migration.
830  */
831 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
832 {
833 	struct kvm_s390_migration_state *mgs;
834 
835 	/* migration mode already disabled */
836 	if (!kvm->arch.migration_state)
837 		return 0;
838 	mgs = kvm->arch.migration_state;
839 	kvm->arch.migration_state = NULL;
840 
841 	if (kvm->arch.use_cmma) {
842 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
843 		/* We have to wait for the essa emulation to finish */
844 		synchronize_srcu(&kvm->srcu);
845 		vfree(mgs->pgste_bitmap);
846 	}
847 	kfree(mgs);
848 	return 0;
849 }
850 
851 static int kvm_s390_vm_set_migration(struct kvm *kvm,
852 				     struct kvm_device_attr *attr)
853 {
854 	int res = -ENXIO;
855 
856 	mutex_lock(&kvm->slots_lock);
857 	switch (attr->attr) {
858 	case KVM_S390_VM_MIGRATION_START:
859 		res = kvm_s390_vm_start_migration(kvm);
860 		break;
861 	case KVM_S390_VM_MIGRATION_STOP:
862 		res = kvm_s390_vm_stop_migration(kvm);
863 		break;
864 	default:
865 		break;
866 	}
867 	mutex_unlock(&kvm->slots_lock);
868 
869 	return res;
870 }
871 
872 static int kvm_s390_vm_get_migration(struct kvm *kvm,
873 				     struct kvm_device_attr *attr)
874 {
875 	u64 mig = (kvm->arch.migration_state != NULL);
876 
877 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
878 		return -ENXIO;
879 
880 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
881 		return -EFAULT;
882 	return 0;
883 }
884 
885 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
886 {
887 	struct kvm_s390_vm_tod_clock gtod;
888 
889 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
890 		return -EFAULT;
891 
892 	if (test_kvm_facility(kvm, 139))
893 		kvm_s390_set_tod_clock_ext(kvm, &gtod);
894 	else if (gtod.epoch_idx == 0)
895 		kvm_s390_set_tod_clock(kvm, gtod.tod);
896 	else
897 		return -EINVAL;
898 
899 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
900 		gtod.epoch_idx, gtod.tod);
901 
902 	return 0;
903 }
904 
905 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907 	u8 gtod_high;
908 
909 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
910 					   sizeof(gtod_high)))
911 		return -EFAULT;
912 
913 	if (gtod_high != 0)
914 		return -EINVAL;
915 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
916 
917 	return 0;
918 }
919 
920 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
921 {
922 	u64 gtod;
923 
924 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
925 		return -EFAULT;
926 
927 	kvm_s390_set_tod_clock(kvm, gtod);
928 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
929 	return 0;
930 }
931 
932 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
933 {
934 	int ret;
935 
936 	if (attr->flags)
937 		return -EINVAL;
938 
939 	switch (attr->attr) {
940 	case KVM_S390_VM_TOD_EXT:
941 		ret = kvm_s390_set_tod_ext(kvm, attr);
942 		break;
943 	case KVM_S390_VM_TOD_HIGH:
944 		ret = kvm_s390_set_tod_high(kvm, attr);
945 		break;
946 	case KVM_S390_VM_TOD_LOW:
947 		ret = kvm_s390_set_tod_low(kvm, attr);
948 		break;
949 	default:
950 		ret = -ENXIO;
951 		break;
952 	}
953 	return ret;
954 }
955 
956 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
957 					struct kvm_s390_vm_tod_clock *gtod)
958 {
959 	struct kvm_s390_tod_clock_ext htod;
960 
961 	preempt_disable();
962 
963 	get_tod_clock_ext((char *)&htod);
964 
965 	gtod->tod = htod.tod + kvm->arch.epoch;
966 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
967 
968 	if (gtod->tod < htod.tod)
969 		gtod->epoch_idx += 1;
970 
971 	preempt_enable();
972 }
973 
974 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
975 {
976 	struct kvm_s390_vm_tod_clock gtod;
977 
978 	memset(&gtod, 0, sizeof(gtod));
979 
980 	if (test_kvm_facility(kvm, 139))
981 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
982 	else
983 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
984 
985 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
986 		return -EFAULT;
987 
988 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
989 		gtod.epoch_idx, gtod.tod);
990 	return 0;
991 }
992 
993 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
994 {
995 	u8 gtod_high = 0;
996 
997 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
998 					 sizeof(gtod_high)))
999 		return -EFAULT;
1000 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1001 
1002 	return 0;
1003 }
1004 
1005 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007 	u64 gtod;
1008 
1009 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1010 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1011 		return -EFAULT;
1012 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1013 
1014 	return 0;
1015 }
1016 
1017 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1018 {
1019 	int ret;
1020 
1021 	if (attr->flags)
1022 		return -EINVAL;
1023 
1024 	switch (attr->attr) {
1025 	case KVM_S390_VM_TOD_EXT:
1026 		ret = kvm_s390_get_tod_ext(kvm, attr);
1027 		break;
1028 	case KVM_S390_VM_TOD_HIGH:
1029 		ret = kvm_s390_get_tod_high(kvm, attr);
1030 		break;
1031 	case KVM_S390_VM_TOD_LOW:
1032 		ret = kvm_s390_get_tod_low(kvm, attr);
1033 		break;
1034 	default:
1035 		ret = -ENXIO;
1036 		break;
1037 	}
1038 	return ret;
1039 }
1040 
1041 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1042 {
1043 	struct kvm_s390_vm_cpu_processor *proc;
1044 	u16 lowest_ibc, unblocked_ibc;
1045 	int ret = 0;
1046 
1047 	mutex_lock(&kvm->lock);
1048 	if (kvm->created_vcpus) {
1049 		ret = -EBUSY;
1050 		goto out;
1051 	}
1052 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1053 	if (!proc) {
1054 		ret = -ENOMEM;
1055 		goto out;
1056 	}
1057 	if (!copy_from_user(proc, (void __user *)attr->addr,
1058 			    sizeof(*proc))) {
1059 		kvm->arch.model.cpuid = proc->cpuid;
1060 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1061 		unblocked_ibc = sclp.ibc & 0xfff;
1062 		if (lowest_ibc && proc->ibc) {
1063 			if (proc->ibc > unblocked_ibc)
1064 				kvm->arch.model.ibc = unblocked_ibc;
1065 			else if (proc->ibc < lowest_ibc)
1066 				kvm->arch.model.ibc = lowest_ibc;
1067 			else
1068 				kvm->arch.model.ibc = proc->ibc;
1069 		}
1070 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1071 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1072 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1073 			 kvm->arch.model.ibc,
1074 			 kvm->arch.model.cpuid);
1075 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1076 			 kvm->arch.model.fac_list[0],
1077 			 kvm->arch.model.fac_list[1],
1078 			 kvm->arch.model.fac_list[2]);
1079 	} else
1080 		ret = -EFAULT;
1081 	kfree(proc);
1082 out:
1083 	mutex_unlock(&kvm->lock);
1084 	return ret;
1085 }
1086 
1087 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1088 				       struct kvm_device_attr *attr)
1089 {
1090 	struct kvm_s390_vm_cpu_feat data;
1091 	int ret = -EBUSY;
1092 
1093 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1094 		return -EFAULT;
1095 	if (!bitmap_subset((unsigned long *) data.feat,
1096 			   kvm_s390_available_cpu_feat,
1097 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1098 		return -EINVAL;
1099 
1100 	mutex_lock(&kvm->lock);
1101 	if (!atomic_read(&kvm->online_vcpus)) {
1102 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1103 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1104 		ret = 0;
1105 	}
1106 	mutex_unlock(&kvm->lock);
1107 	return ret;
1108 }
1109 
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111 					  struct kvm_device_attr *attr)
1112 {
1113 	/*
1114 	 * Once supported by kernel + hw, we have to store the subfunctions
1115 	 * in kvm->arch and remember that user space configured them.
1116 	 */
1117 	return -ENXIO;
1118 }
1119 
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122 	int ret = -ENXIO;
1123 
1124 	switch (attr->attr) {
1125 	case KVM_S390_VM_CPU_PROCESSOR:
1126 		ret = kvm_s390_set_processor(kvm, attr);
1127 		break;
1128 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129 		ret = kvm_s390_set_processor_feat(kvm, attr);
1130 		break;
1131 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1133 		break;
1134 	}
1135 	return ret;
1136 }
1137 
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140 	struct kvm_s390_vm_cpu_processor *proc;
1141 	int ret = 0;
1142 
1143 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1144 	if (!proc) {
1145 		ret = -ENOMEM;
1146 		goto out;
1147 	}
1148 	proc->cpuid = kvm->arch.model.cpuid;
1149 	proc->ibc = kvm->arch.model.ibc;
1150 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1152 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153 		 kvm->arch.model.ibc,
1154 		 kvm->arch.model.cpuid);
1155 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156 		 kvm->arch.model.fac_list[0],
1157 		 kvm->arch.model.fac_list[1],
1158 		 kvm->arch.model.fac_list[2]);
1159 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1160 		ret = -EFAULT;
1161 	kfree(proc);
1162 out:
1163 	return ret;
1164 }
1165 
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168 	struct kvm_s390_vm_cpu_machine *mach;
1169 	int ret = 0;
1170 
1171 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1172 	if (!mach) {
1173 		ret = -ENOMEM;
1174 		goto out;
1175 	}
1176 	get_cpu_id((struct cpuid *) &mach->cpuid);
1177 	mach->ibc = sclp.ibc;
1178 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1180 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181 	       sizeof(S390_lowcore.stfle_fac_list));
1182 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1183 		 kvm->arch.model.ibc,
1184 		 kvm->arch.model.cpuid);
1185 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1186 		 mach->fac_mask[0],
1187 		 mach->fac_mask[1],
1188 		 mach->fac_mask[2]);
1189 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1190 		 mach->fac_list[0],
1191 		 mach->fac_list[1],
1192 		 mach->fac_list[2]);
1193 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1194 		ret = -EFAULT;
1195 	kfree(mach);
1196 out:
1197 	return ret;
1198 }
1199 
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201 				       struct kvm_device_attr *attr)
1202 {
1203 	struct kvm_s390_vm_cpu_feat data;
1204 
1205 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1207 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1208 		return -EFAULT;
1209 	return 0;
1210 }
1211 
1212 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1213 				     struct kvm_device_attr *attr)
1214 {
1215 	struct kvm_s390_vm_cpu_feat data;
1216 
1217 	bitmap_copy((unsigned long *) data.feat,
1218 		    kvm_s390_available_cpu_feat,
1219 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1220 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1221 		return -EFAULT;
1222 	return 0;
1223 }
1224 
1225 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1226 					  struct kvm_device_attr *attr)
1227 {
1228 	/*
1229 	 * Once we can actually configure subfunctions (kernel + hw support),
1230 	 * we have to check if they were already set by user space, if so copy
1231 	 * them from kvm->arch.
1232 	 */
1233 	return -ENXIO;
1234 }
1235 
1236 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1237 					struct kvm_device_attr *attr)
1238 {
1239 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1240 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1241 		return -EFAULT;
1242 	return 0;
1243 }
1244 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246 	int ret = -ENXIO;
1247 
1248 	switch (attr->attr) {
1249 	case KVM_S390_VM_CPU_PROCESSOR:
1250 		ret = kvm_s390_get_processor(kvm, attr);
1251 		break;
1252 	case KVM_S390_VM_CPU_MACHINE:
1253 		ret = kvm_s390_get_machine(kvm, attr);
1254 		break;
1255 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1256 		ret = kvm_s390_get_processor_feat(kvm, attr);
1257 		break;
1258 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1259 		ret = kvm_s390_get_machine_feat(kvm, attr);
1260 		break;
1261 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1262 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1263 		break;
1264 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1265 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1266 		break;
1267 	}
1268 	return ret;
1269 }
1270 
1271 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1272 {
1273 	int ret;
1274 
1275 	switch (attr->group) {
1276 	case KVM_S390_VM_MEM_CTRL:
1277 		ret = kvm_s390_set_mem_control(kvm, attr);
1278 		break;
1279 	case KVM_S390_VM_TOD:
1280 		ret = kvm_s390_set_tod(kvm, attr);
1281 		break;
1282 	case KVM_S390_VM_CPU_MODEL:
1283 		ret = kvm_s390_set_cpu_model(kvm, attr);
1284 		break;
1285 	case KVM_S390_VM_CRYPTO:
1286 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1287 		break;
1288 	case KVM_S390_VM_MIGRATION:
1289 		ret = kvm_s390_vm_set_migration(kvm, attr);
1290 		break;
1291 	default:
1292 		ret = -ENXIO;
1293 		break;
1294 	}
1295 
1296 	return ret;
1297 }
1298 
1299 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1300 {
1301 	int ret;
1302 
1303 	switch (attr->group) {
1304 	case KVM_S390_VM_MEM_CTRL:
1305 		ret = kvm_s390_get_mem_control(kvm, attr);
1306 		break;
1307 	case KVM_S390_VM_TOD:
1308 		ret = kvm_s390_get_tod(kvm, attr);
1309 		break;
1310 	case KVM_S390_VM_CPU_MODEL:
1311 		ret = kvm_s390_get_cpu_model(kvm, attr);
1312 		break;
1313 	case KVM_S390_VM_MIGRATION:
1314 		ret = kvm_s390_vm_get_migration(kvm, attr);
1315 		break;
1316 	default:
1317 		ret = -ENXIO;
1318 		break;
1319 	}
1320 
1321 	return ret;
1322 }
1323 
1324 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 {
1326 	int ret;
1327 
1328 	switch (attr->group) {
1329 	case KVM_S390_VM_MEM_CTRL:
1330 		switch (attr->attr) {
1331 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1332 		case KVM_S390_VM_MEM_CLR_CMMA:
1333 			ret = sclp.has_cmma ? 0 : -ENXIO;
1334 			break;
1335 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1336 			ret = 0;
1337 			break;
1338 		default:
1339 			ret = -ENXIO;
1340 			break;
1341 		}
1342 		break;
1343 	case KVM_S390_VM_TOD:
1344 		switch (attr->attr) {
1345 		case KVM_S390_VM_TOD_LOW:
1346 		case KVM_S390_VM_TOD_HIGH:
1347 			ret = 0;
1348 			break;
1349 		default:
1350 			ret = -ENXIO;
1351 			break;
1352 		}
1353 		break;
1354 	case KVM_S390_VM_CPU_MODEL:
1355 		switch (attr->attr) {
1356 		case KVM_S390_VM_CPU_PROCESSOR:
1357 		case KVM_S390_VM_CPU_MACHINE:
1358 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1359 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1360 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361 			ret = 0;
1362 			break;
1363 		/* configuring subfunctions is not supported yet */
1364 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1365 		default:
1366 			ret = -ENXIO;
1367 			break;
1368 		}
1369 		break;
1370 	case KVM_S390_VM_CRYPTO:
1371 		switch (attr->attr) {
1372 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1373 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1374 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1375 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1376 			ret = 0;
1377 			break;
1378 		default:
1379 			ret = -ENXIO;
1380 			break;
1381 		}
1382 		break;
1383 	case KVM_S390_VM_MIGRATION:
1384 		ret = 0;
1385 		break;
1386 	default:
1387 		ret = -ENXIO;
1388 		break;
1389 	}
1390 
1391 	return ret;
1392 }
1393 
1394 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1395 {
1396 	uint8_t *keys;
1397 	uint64_t hva;
1398 	int srcu_idx, i, r = 0;
1399 
1400 	if (args->flags != 0)
1401 		return -EINVAL;
1402 
1403 	/* Is this guest using storage keys? */
1404 	if (!mm_use_skey(current->mm))
1405 		return KVM_S390_GET_SKEYS_NONE;
1406 
1407 	/* Enforce sane limit on memory allocation */
1408 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1409 		return -EINVAL;
1410 
1411 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1412 	if (!keys)
1413 		return -ENOMEM;
1414 
1415 	down_read(&current->mm->mmap_sem);
1416 	srcu_idx = srcu_read_lock(&kvm->srcu);
1417 	for (i = 0; i < args->count; i++) {
1418 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1419 		if (kvm_is_error_hva(hva)) {
1420 			r = -EFAULT;
1421 			break;
1422 		}
1423 
1424 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1425 		if (r)
1426 			break;
1427 	}
1428 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1429 	up_read(&current->mm->mmap_sem);
1430 
1431 	if (!r) {
1432 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1433 				 sizeof(uint8_t) * args->count);
1434 		if (r)
1435 			r = -EFAULT;
1436 	}
1437 
1438 	kvfree(keys);
1439 	return r;
1440 }
1441 
1442 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1443 {
1444 	uint8_t *keys;
1445 	uint64_t hva;
1446 	int srcu_idx, i, r = 0;
1447 
1448 	if (args->flags != 0)
1449 		return -EINVAL;
1450 
1451 	/* Enforce sane limit on memory allocation */
1452 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1453 		return -EINVAL;
1454 
1455 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1456 	if (!keys)
1457 		return -ENOMEM;
1458 
1459 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1460 			   sizeof(uint8_t) * args->count);
1461 	if (r) {
1462 		r = -EFAULT;
1463 		goto out;
1464 	}
1465 
1466 	/* Enable storage key handling for the guest */
1467 	r = s390_enable_skey();
1468 	if (r)
1469 		goto out;
1470 
1471 	down_read(&current->mm->mmap_sem);
1472 	srcu_idx = srcu_read_lock(&kvm->srcu);
1473 	for (i = 0; i < args->count; i++) {
1474 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1475 		if (kvm_is_error_hva(hva)) {
1476 			r = -EFAULT;
1477 			break;
1478 		}
1479 
1480 		/* Lowest order bit is reserved */
1481 		if (keys[i] & 0x01) {
1482 			r = -EINVAL;
1483 			break;
1484 		}
1485 
1486 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1487 		if (r)
1488 			break;
1489 	}
1490 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1491 	up_read(&current->mm->mmap_sem);
1492 out:
1493 	kvfree(keys);
1494 	return r;
1495 }
1496 
1497 /*
1498  * Base address and length must be sent at the start of each block, therefore
1499  * it's cheaper to send some clean data, as long as it's less than the size of
1500  * two longs.
1501  */
1502 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1503 /* for consistency */
1504 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1505 
1506 /*
1507  * This function searches for the next page with dirty CMMA attributes, and
1508  * saves the attributes in the buffer up to either the end of the buffer or
1509  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1510  * no trailing clean bytes are saved.
1511  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1512  * output buffer will indicate 0 as length.
1513  */
1514 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1515 				  struct kvm_s390_cmma_log *args)
1516 {
1517 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1518 	unsigned long bufsize, hva, pgstev, i, next, cur;
1519 	int srcu_idx, peek, r = 0, rr;
1520 	u8 *res;
1521 
1522 	cur = args->start_gfn;
1523 	i = next = pgstev = 0;
1524 
1525 	if (unlikely(!kvm->arch.use_cmma))
1526 		return -ENXIO;
1527 	/* Invalid/unsupported flags were specified */
1528 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1529 		return -EINVAL;
1530 	/* Migration mode query, and we are not doing a migration */
1531 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1532 	if (!peek && !s)
1533 		return -EINVAL;
1534 	/* CMMA is disabled or was not used, or the buffer has length zero */
1535 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1536 	if (!bufsize || !kvm->mm->context.use_cmma) {
1537 		memset(args, 0, sizeof(*args));
1538 		return 0;
1539 	}
1540 
1541 	if (!peek) {
1542 		/* We are not peeking, and there are no dirty pages */
1543 		if (!atomic64_read(&s->dirty_pages)) {
1544 			memset(args, 0, sizeof(*args));
1545 			return 0;
1546 		}
1547 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1548 				    args->start_gfn);
1549 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1550 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1551 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1552 			memset(args, 0, sizeof(*args));
1553 			return 0;
1554 		}
1555 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1556 	}
1557 
1558 	res = vmalloc(bufsize);
1559 	if (!res)
1560 		return -ENOMEM;
1561 
1562 	args->start_gfn = cur;
1563 
1564 	down_read(&kvm->mm->mmap_sem);
1565 	srcu_idx = srcu_read_lock(&kvm->srcu);
1566 	while (i < bufsize) {
1567 		hva = gfn_to_hva(kvm, cur);
1568 		if (kvm_is_error_hva(hva)) {
1569 			r = -EFAULT;
1570 			break;
1571 		}
1572 		/* decrement only if we actually flipped the bit to 0 */
1573 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1574 			atomic64_dec(&s->dirty_pages);
1575 		r = get_pgste(kvm->mm, hva, &pgstev);
1576 		if (r < 0)
1577 			pgstev = 0;
1578 		/* save the value */
1579 		res[i++] = (pgstev >> 24) & 0x43;
1580 		/*
1581 		 * if the next bit is too far away, stop.
1582 		 * if we reached the previous "next", find the next one
1583 		 */
1584 		if (!peek) {
1585 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1586 				break;
1587 			if (cur == next)
1588 				next = find_next_bit(s->pgste_bitmap,
1589 						     s->bitmap_size, cur + 1);
1590 		/* reached the end of the bitmap or of the buffer, stop */
1591 			if ((next >= s->bitmap_size) ||
1592 			    (next >= args->start_gfn + bufsize))
1593 				break;
1594 		}
1595 		cur++;
1596 	}
1597 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1598 	up_read(&kvm->mm->mmap_sem);
1599 	args->count = i;
1600 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1601 
1602 	rr = copy_to_user((void __user *)args->values, res, args->count);
1603 	if (rr)
1604 		r = -EFAULT;
1605 
1606 	vfree(res);
1607 	return r;
1608 }
1609 
1610 /*
1611  * This function sets the CMMA attributes for the given pages. If the input
1612  * buffer has zero length, no action is taken, otherwise the attributes are
1613  * set and the mm->context.use_cmma flag is set.
1614  */
1615 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1616 				  const struct kvm_s390_cmma_log *args)
1617 {
1618 	unsigned long hva, mask, pgstev, i;
1619 	uint8_t *bits;
1620 	int srcu_idx, r = 0;
1621 
1622 	mask = args->mask;
1623 
1624 	if (!kvm->arch.use_cmma)
1625 		return -ENXIO;
1626 	/* invalid/unsupported flags */
1627 	if (args->flags != 0)
1628 		return -EINVAL;
1629 	/* Enforce sane limit on memory allocation */
1630 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1631 		return -EINVAL;
1632 	/* Nothing to do */
1633 	if (args->count == 0)
1634 		return 0;
1635 
1636 	bits = vmalloc(sizeof(*bits) * args->count);
1637 	if (!bits)
1638 		return -ENOMEM;
1639 
1640 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1641 	if (r) {
1642 		r = -EFAULT;
1643 		goto out;
1644 	}
1645 
1646 	down_read(&kvm->mm->mmap_sem);
1647 	srcu_idx = srcu_read_lock(&kvm->srcu);
1648 	for (i = 0; i < args->count; i++) {
1649 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1650 		if (kvm_is_error_hva(hva)) {
1651 			r = -EFAULT;
1652 			break;
1653 		}
1654 
1655 		pgstev = bits[i];
1656 		pgstev = pgstev << 24;
1657 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1658 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1659 	}
1660 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1661 	up_read(&kvm->mm->mmap_sem);
1662 
1663 	if (!kvm->mm->context.use_cmma) {
1664 		down_write(&kvm->mm->mmap_sem);
1665 		kvm->mm->context.use_cmma = 1;
1666 		up_write(&kvm->mm->mmap_sem);
1667 	}
1668 out:
1669 	vfree(bits);
1670 	return r;
1671 }
1672 
1673 long kvm_arch_vm_ioctl(struct file *filp,
1674 		       unsigned int ioctl, unsigned long arg)
1675 {
1676 	struct kvm *kvm = filp->private_data;
1677 	void __user *argp = (void __user *)arg;
1678 	struct kvm_device_attr attr;
1679 	int r;
1680 
1681 	switch (ioctl) {
1682 	case KVM_S390_INTERRUPT: {
1683 		struct kvm_s390_interrupt s390int;
1684 
1685 		r = -EFAULT;
1686 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1687 			break;
1688 		r = kvm_s390_inject_vm(kvm, &s390int);
1689 		break;
1690 	}
1691 	case KVM_ENABLE_CAP: {
1692 		struct kvm_enable_cap cap;
1693 		r = -EFAULT;
1694 		if (copy_from_user(&cap, argp, sizeof(cap)))
1695 			break;
1696 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1697 		break;
1698 	}
1699 	case KVM_CREATE_IRQCHIP: {
1700 		struct kvm_irq_routing_entry routing;
1701 
1702 		r = -EINVAL;
1703 		if (kvm->arch.use_irqchip) {
1704 			/* Set up dummy routing. */
1705 			memset(&routing, 0, sizeof(routing));
1706 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1707 		}
1708 		break;
1709 	}
1710 	case KVM_SET_DEVICE_ATTR: {
1711 		r = -EFAULT;
1712 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1713 			break;
1714 		r = kvm_s390_vm_set_attr(kvm, &attr);
1715 		break;
1716 	}
1717 	case KVM_GET_DEVICE_ATTR: {
1718 		r = -EFAULT;
1719 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1720 			break;
1721 		r = kvm_s390_vm_get_attr(kvm, &attr);
1722 		break;
1723 	}
1724 	case KVM_HAS_DEVICE_ATTR: {
1725 		r = -EFAULT;
1726 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1727 			break;
1728 		r = kvm_s390_vm_has_attr(kvm, &attr);
1729 		break;
1730 	}
1731 	case KVM_S390_GET_SKEYS: {
1732 		struct kvm_s390_skeys args;
1733 
1734 		r = -EFAULT;
1735 		if (copy_from_user(&args, argp,
1736 				   sizeof(struct kvm_s390_skeys)))
1737 			break;
1738 		r = kvm_s390_get_skeys(kvm, &args);
1739 		break;
1740 	}
1741 	case KVM_S390_SET_SKEYS: {
1742 		struct kvm_s390_skeys args;
1743 
1744 		r = -EFAULT;
1745 		if (copy_from_user(&args, argp,
1746 				   sizeof(struct kvm_s390_skeys)))
1747 			break;
1748 		r = kvm_s390_set_skeys(kvm, &args);
1749 		break;
1750 	}
1751 	case KVM_S390_GET_CMMA_BITS: {
1752 		struct kvm_s390_cmma_log args;
1753 
1754 		r = -EFAULT;
1755 		if (copy_from_user(&args, argp, sizeof(args)))
1756 			break;
1757 		mutex_lock(&kvm->slots_lock);
1758 		r = kvm_s390_get_cmma_bits(kvm, &args);
1759 		mutex_unlock(&kvm->slots_lock);
1760 		if (!r) {
1761 			r = copy_to_user(argp, &args, sizeof(args));
1762 			if (r)
1763 				r = -EFAULT;
1764 		}
1765 		break;
1766 	}
1767 	case KVM_S390_SET_CMMA_BITS: {
1768 		struct kvm_s390_cmma_log args;
1769 
1770 		r = -EFAULT;
1771 		if (copy_from_user(&args, argp, sizeof(args)))
1772 			break;
1773 		mutex_lock(&kvm->slots_lock);
1774 		r = kvm_s390_set_cmma_bits(kvm, &args);
1775 		mutex_unlock(&kvm->slots_lock);
1776 		break;
1777 	}
1778 	default:
1779 		r = -ENOTTY;
1780 	}
1781 
1782 	return r;
1783 }
1784 
1785 static int kvm_s390_query_ap_config(u8 *config)
1786 {
1787 	u32 fcn_code = 0x04000000UL;
1788 	u32 cc = 0;
1789 
1790 	memset(config, 0, 128);
1791 	asm volatile(
1792 		"lgr 0,%1\n"
1793 		"lgr 2,%2\n"
1794 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1795 		"0: ipm %0\n"
1796 		"srl %0,28\n"
1797 		"1:\n"
1798 		EX_TABLE(0b, 1b)
1799 		: "+r" (cc)
1800 		: "r" (fcn_code), "r" (config)
1801 		: "cc", "0", "2", "memory"
1802 	);
1803 
1804 	return cc;
1805 }
1806 
1807 static int kvm_s390_apxa_installed(void)
1808 {
1809 	u8 config[128];
1810 	int cc;
1811 
1812 	if (test_facility(12)) {
1813 		cc = kvm_s390_query_ap_config(config);
1814 
1815 		if (cc)
1816 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1817 		else
1818 			return config[0] & 0x40;
1819 	}
1820 
1821 	return 0;
1822 }
1823 
1824 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1825 {
1826 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1827 
1828 	if (kvm_s390_apxa_installed())
1829 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1830 	else
1831 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1832 }
1833 
1834 static u64 kvm_s390_get_initial_cpuid(void)
1835 {
1836 	struct cpuid cpuid;
1837 
1838 	get_cpu_id(&cpuid);
1839 	cpuid.version = 0xff;
1840 	return *((u64 *) &cpuid);
1841 }
1842 
1843 static void kvm_s390_crypto_init(struct kvm *kvm)
1844 {
1845 	if (!test_kvm_facility(kvm, 76))
1846 		return;
1847 
1848 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1849 	kvm_s390_set_crycb_format(kvm);
1850 
1851 	/* Enable AES/DEA protected key functions by default */
1852 	kvm->arch.crypto.aes_kw = 1;
1853 	kvm->arch.crypto.dea_kw = 1;
1854 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1855 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1856 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1857 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1858 }
1859 
1860 static void sca_dispose(struct kvm *kvm)
1861 {
1862 	if (kvm->arch.use_esca)
1863 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1864 	else
1865 		free_page((unsigned long)(kvm->arch.sca));
1866 	kvm->arch.sca = NULL;
1867 }
1868 
1869 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1870 {
1871 	gfp_t alloc_flags = GFP_KERNEL;
1872 	int i, rc;
1873 	char debug_name[16];
1874 	static unsigned long sca_offset;
1875 
1876 	rc = -EINVAL;
1877 #ifdef CONFIG_KVM_S390_UCONTROL
1878 	if (type & ~KVM_VM_S390_UCONTROL)
1879 		goto out_err;
1880 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1881 		goto out_err;
1882 #else
1883 	if (type)
1884 		goto out_err;
1885 #endif
1886 
1887 	rc = s390_enable_sie();
1888 	if (rc)
1889 		goto out_err;
1890 
1891 	rc = -ENOMEM;
1892 
1893 	kvm->arch.use_esca = 0; /* start with basic SCA */
1894 	if (!sclp.has_64bscao)
1895 		alloc_flags |= GFP_DMA;
1896 	rwlock_init(&kvm->arch.sca_lock);
1897 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1898 	if (!kvm->arch.sca)
1899 		goto out_err;
1900 	spin_lock(&kvm_lock);
1901 	sca_offset += 16;
1902 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1903 		sca_offset = 0;
1904 	kvm->arch.sca = (struct bsca_block *)
1905 			((char *) kvm->arch.sca + sca_offset);
1906 	spin_unlock(&kvm_lock);
1907 
1908 	sprintf(debug_name, "kvm-%u", current->pid);
1909 
1910 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1911 	if (!kvm->arch.dbf)
1912 		goto out_err;
1913 
1914 	kvm->arch.sie_page2 =
1915 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1916 	if (!kvm->arch.sie_page2)
1917 		goto out_err;
1918 
1919 	/* Populate the facility mask initially. */
1920 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1921 	       sizeof(S390_lowcore.stfle_fac_list));
1922 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1923 		if (i < kvm_s390_fac_list_mask_size())
1924 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1925 		else
1926 			kvm->arch.model.fac_mask[i] = 0UL;
1927 	}
1928 
1929 	/* Populate the facility list initially. */
1930 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1931 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1932 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1933 
1934 	/* we are always in czam mode - even on pre z14 machines */
1935 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1936 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1937 	/* we emulate STHYI in kvm */
1938 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1939 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1940 	if (MACHINE_HAS_TLB_GUEST) {
1941 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1942 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1943 	}
1944 
1945 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1946 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1947 
1948 	kvm_s390_crypto_init(kvm);
1949 
1950 	mutex_init(&kvm->arch.float_int.ais_lock);
1951 	kvm->arch.float_int.simm = 0;
1952 	kvm->arch.float_int.nimm = 0;
1953 	spin_lock_init(&kvm->arch.float_int.lock);
1954 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1955 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1956 	init_waitqueue_head(&kvm->arch.ipte_wq);
1957 	mutex_init(&kvm->arch.ipte_mutex);
1958 
1959 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1960 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1961 
1962 	if (type & KVM_VM_S390_UCONTROL) {
1963 		kvm->arch.gmap = NULL;
1964 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1965 	} else {
1966 		if (sclp.hamax == U64_MAX)
1967 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1968 		else
1969 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1970 						    sclp.hamax + 1);
1971 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1972 		if (!kvm->arch.gmap)
1973 			goto out_err;
1974 		kvm->arch.gmap->private = kvm;
1975 		kvm->arch.gmap->pfault_enabled = 0;
1976 	}
1977 
1978 	kvm->arch.css_support = 0;
1979 	kvm->arch.use_irqchip = 0;
1980 	kvm->arch.epoch = 0;
1981 
1982 	spin_lock_init(&kvm->arch.start_stop_lock);
1983 	kvm_s390_vsie_init(kvm);
1984 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1985 
1986 	return 0;
1987 out_err:
1988 	free_page((unsigned long)kvm->arch.sie_page2);
1989 	debug_unregister(kvm->arch.dbf);
1990 	sca_dispose(kvm);
1991 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1992 	return rc;
1993 }
1994 
1995 bool kvm_arch_has_vcpu_debugfs(void)
1996 {
1997 	return false;
1998 }
1999 
2000 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2001 {
2002 	return 0;
2003 }
2004 
2005 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2006 {
2007 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2008 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2009 	kvm_s390_clear_local_irqs(vcpu);
2010 	kvm_clear_async_pf_completion_queue(vcpu);
2011 	if (!kvm_is_ucontrol(vcpu->kvm))
2012 		sca_del_vcpu(vcpu);
2013 
2014 	if (kvm_is_ucontrol(vcpu->kvm))
2015 		gmap_remove(vcpu->arch.gmap);
2016 
2017 	if (vcpu->kvm->arch.use_cmma)
2018 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2019 	free_page((unsigned long)(vcpu->arch.sie_block));
2020 
2021 	kvm_vcpu_uninit(vcpu);
2022 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2023 }
2024 
2025 static void kvm_free_vcpus(struct kvm *kvm)
2026 {
2027 	unsigned int i;
2028 	struct kvm_vcpu *vcpu;
2029 
2030 	kvm_for_each_vcpu(i, vcpu, kvm)
2031 		kvm_arch_vcpu_destroy(vcpu);
2032 
2033 	mutex_lock(&kvm->lock);
2034 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2035 		kvm->vcpus[i] = NULL;
2036 
2037 	atomic_set(&kvm->online_vcpus, 0);
2038 	mutex_unlock(&kvm->lock);
2039 }
2040 
2041 void kvm_arch_destroy_vm(struct kvm *kvm)
2042 {
2043 	kvm_free_vcpus(kvm);
2044 	sca_dispose(kvm);
2045 	debug_unregister(kvm->arch.dbf);
2046 	free_page((unsigned long)kvm->arch.sie_page2);
2047 	if (!kvm_is_ucontrol(kvm))
2048 		gmap_remove(kvm->arch.gmap);
2049 	kvm_s390_destroy_adapters(kvm);
2050 	kvm_s390_clear_float_irqs(kvm);
2051 	kvm_s390_vsie_destroy(kvm);
2052 	if (kvm->arch.migration_state) {
2053 		vfree(kvm->arch.migration_state->pgste_bitmap);
2054 		kfree(kvm->arch.migration_state);
2055 	}
2056 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2057 }
2058 
2059 /* Section: vcpu related */
2060 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2061 {
2062 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2063 	if (!vcpu->arch.gmap)
2064 		return -ENOMEM;
2065 	vcpu->arch.gmap->private = vcpu->kvm;
2066 
2067 	return 0;
2068 }
2069 
2070 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2071 {
2072 	if (!kvm_s390_use_sca_entries())
2073 		return;
2074 	read_lock(&vcpu->kvm->arch.sca_lock);
2075 	if (vcpu->kvm->arch.use_esca) {
2076 		struct esca_block *sca = vcpu->kvm->arch.sca;
2077 
2078 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2079 		sca->cpu[vcpu->vcpu_id].sda = 0;
2080 	} else {
2081 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2082 
2083 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2084 		sca->cpu[vcpu->vcpu_id].sda = 0;
2085 	}
2086 	read_unlock(&vcpu->kvm->arch.sca_lock);
2087 }
2088 
2089 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2090 {
2091 	if (!kvm_s390_use_sca_entries()) {
2092 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2093 
2094 		/* we still need the basic sca for the ipte control */
2095 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2096 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2097 	}
2098 	read_lock(&vcpu->kvm->arch.sca_lock);
2099 	if (vcpu->kvm->arch.use_esca) {
2100 		struct esca_block *sca = vcpu->kvm->arch.sca;
2101 
2102 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2103 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2104 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2105 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2106 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2107 	} else {
2108 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2109 
2110 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2111 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2112 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2113 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2114 	}
2115 	read_unlock(&vcpu->kvm->arch.sca_lock);
2116 }
2117 
2118 /* Basic SCA to Extended SCA data copy routines */
2119 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2120 {
2121 	d->sda = s->sda;
2122 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2123 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2124 }
2125 
2126 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2127 {
2128 	int i;
2129 
2130 	d->ipte_control = s->ipte_control;
2131 	d->mcn[0] = s->mcn;
2132 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2133 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2134 }
2135 
2136 static int sca_switch_to_extended(struct kvm *kvm)
2137 {
2138 	struct bsca_block *old_sca = kvm->arch.sca;
2139 	struct esca_block *new_sca;
2140 	struct kvm_vcpu *vcpu;
2141 	unsigned int vcpu_idx;
2142 	u32 scaol, scaoh;
2143 
2144 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2145 	if (!new_sca)
2146 		return -ENOMEM;
2147 
2148 	scaoh = (u32)((u64)(new_sca) >> 32);
2149 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2150 
2151 	kvm_s390_vcpu_block_all(kvm);
2152 	write_lock(&kvm->arch.sca_lock);
2153 
2154 	sca_copy_b_to_e(new_sca, old_sca);
2155 
2156 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2157 		vcpu->arch.sie_block->scaoh = scaoh;
2158 		vcpu->arch.sie_block->scaol = scaol;
2159 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2160 	}
2161 	kvm->arch.sca = new_sca;
2162 	kvm->arch.use_esca = 1;
2163 
2164 	write_unlock(&kvm->arch.sca_lock);
2165 	kvm_s390_vcpu_unblock_all(kvm);
2166 
2167 	free_page((unsigned long)old_sca);
2168 
2169 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2170 		 old_sca, kvm->arch.sca);
2171 	return 0;
2172 }
2173 
2174 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2175 {
2176 	int rc;
2177 
2178 	if (!kvm_s390_use_sca_entries()) {
2179 		if (id < KVM_MAX_VCPUS)
2180 			return true;
2181 		return false;
2182 	}
2183 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2184 		return true;
2185 	if (!sclp.has_esca || !sclp.has_64bscao)
2186 		return false;
2187 
2188 	mutex_lock(&kvm->lock);
2189 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2190 	mutex_unlock(&kvm->lock);
2191 
2192 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2193 }
2194 
2195 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2196 {
2197 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2198 	kvm_clear_async_pf_completion_queue(vcpu);
2199 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2200 				    KVM_SYNC_GPRS |
2201 				    KVM_SYNC_ACRS |
2202 				    KVM_SYNC_CRS |
2203 				    KVM_SYNC_ARCH0 |
2204 				    KVM_SYNC_PFAULT;
2205 	kvm_s390_set_prefix(vcpu, 0);
2206 	if (test_kvm_facility(vcpu->kvm, 64))
2207 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2208 	if (test_kvm_facility(vcpu->kvm, 82))
2209 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2210 	if (test_kvm_facility(vcpu->kvm, 133))
2211 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2212 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2213 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2214 	 */
2215 	if (MACHINE_HAS_VX)
2216 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2217 	else
2218 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2219 
2220 	if (kvm_is_ucontrol(vcpu->kvm))
2221 		return __kvm_ucontrol_vcpu_init(vcpu);
2222 
2223 	return 0;
2224 }
2225 
2226 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2227 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2228 {
2229 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2230 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2231 	vcpu->arch.cputm_start = get_tod_clock_fast();
2232 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2233 }
2234 
2235 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2236 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2237 {
2238 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2239 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2240 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2241 	vcpu->arch.cputm_start = 0;
2242 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2243 }
2244 
2245 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2246 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2247 {
2248 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2249 	vcpu->arch.cputm_enabled = true;
2250 	__start_cpu_timer_accounting(vcpu);
2251 }
2252 
2253 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2254 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2255 {
2256 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2257 	__stop_cpu_timer_accounting(vcpu);
2258 	vcpu->arch.cputm_enabled = false;
2259 }
2260 
2261 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2262 {
2263 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2264 	__enable_cpu_timer_accounting(vcpu);
2265 	preempt_enable();
2266 }
2267 
2268 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2269 {
2270 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2271 	__disable_cpu_timer_accounting(vcpu);
2272 	preempt_enable();
2273 }
2274 
2275 /* set the cpu timer - may only be called from the VCPU thread itself */
2276 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2277 {
2278 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2279 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2280 	if (vcpu->arch.cputm_enabled)
2281 		vcpu->arch.cputm_start = get_tod_clock_fast();
2282 	vcpu->arch.sie_block->cputm = cputm;
2283 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2284 	preempt_enable();
2285 }
2286 
2287 /* update and get the cpu timer - can also be called from other VCPU threads */
2288 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2289 {
2290 	unsigned int seq;
2291 	__u64 value;
2292 
2293 	if (unlikely(!vcpu->arch.cputm_enabled))
2294 		return vcpu->arch.sie_block->cputm;
2295 
2296 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2297 	do {
2298 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2299 		/*
2300 		 * If the writer would ever execute a read in the critical
2301 		 * section, e.g. in irq context, we have a deadlock.
2302 		 */
2303 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2304 		value = vcpu->arch.sie_block->cputm;
2305 		/* if cputm_start is 0, accounting is being started/stopped */
2306 		if (likely(vcpu->arch.cputm_start))
2307 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2308 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2309 	preempt_enable();
2310 	return value;
2311 }
2312 
2313 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2314 {
2315 
2316 	gmap_enable(vcpu->arch.enabled_gmap);
2317 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2318 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2319 		__start_cpu_timer_accounting(vcpu);
2320 	vcpu->cpu = cpu;
2321 }
2322 
2323 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2324 {
2325 	vcpu->cpu = -1;
2326 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2327 		__stop_cpu_timer_accounting(vcpu);
2328 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2329 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2330 	gmap_disable(vcpu->arch.enabled_gmap);
2331 
2332 }
2333 
2334 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2335 {
2336 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2337 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2338 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2339 	kvm_s390_set_prefix(vcpu, 0);
2340 	kvm_s390_set_cpu_timer(vcpu, 0);
2341 	vcpu->arch.sie_block->ckc       = 0UL;
2342 	vcpu->arch.sie_block->todpr     = 0;
2343 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2344 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2345 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2346 	/* make sure the new fpc will be lazily loaded */
2347 	save_fpu_regs();
2348 	current->thread.fpu.fpc = 0;
2349 	vcpu->arch.sie_block->gbea = 1;
2350 	vcpu->arch.sie_block->pp = 0;
2351 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2352 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2353 	kvm_clear_async_pf_completion_queue(vcpu);
2354 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2355 		kvm_s390_vcpu_stop(vcpu);
2356 	kvm_s390_clear_local_irqs(vcpu);
2357 }
2358 
2359 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2360 {
2361 	mutex_lock(&vcpu->kvm->lock);
2362 	preempt_disable();
2363 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2364 	preempt_enable();
2365 	mutex_unlock(&vcpu->kvm->lock);
2366 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2367 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2368 		sca_add_vcpu(vcpu);
2369 	}
2370 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2371 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2372 	/* make vcpu_load load the right gmap on the first trigger */
2373 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2374 }
2375 
2376 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2377 {
2378 	if (!test_kvm_facility(vcpu->kvm, 76))
2379 		return;
2380 
2381 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2382 
2383 	if (vcpu->kvm->arch.crypto.aes_kw)
2384 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2385 	if (vcpu->kvm->arch.crypto.dea_kw)
2386 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2387 
2388 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2389 }
2390 
2391 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2392 {
2393 	free_page(vcpu->arch.sie_block->cbrlo);
2394 	vcpu->arch.sie_block->cbrlo = 0;
2395 }
2396 
2397 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2398 {
2399 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2400 	if (!vcpu->arch.sie_block->cbrlo)
2401 		return -ENOMEM;
2402 
2403 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2404 	return 0;
2405 }
2406 
2407 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2408 {
2409 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2410 
2411 	vcpu->arch.sie_block->ibc = model->ibc;
2412 	if (test_kvm_facility(vcpu->kvm, 7))
2413 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2414 }
2415 
2416 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2417 {
2418 	int rc = 0;
2419 
2420 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2421 						    CPUSTAT_SM |
2422 						    CPUSTAT_STOPPED);
2423 
2424 	if (test_kvm_facility(vcpu->kvm, 78))
2425 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2426 	else if (test_kvm_facility(vcpu->kvm, 8))
2427 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2428 
2429 	kvm_s390_vcpu_setup_model(vcpu);
2430 
2431 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2432 	if (MACHINE_HAS_ESOP)
2433 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2434 	if (test_kvm_facility(vcpu->kvm, 9))
2435 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2436 	if (test_kvm_facility(vcpu->kvm, 73))
2437 		vcpu->arch.sie_block->ecb |= ECB_TE;
2438 
2439 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2440 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2441 	if (test_kvm_facility(vcpu->kvm, 130))
2442 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2443 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2444 	if (sclp.has_cei)
2445 		vcpu->arch.sie_block->eca |= ECA_CEI;
2446 	if (sclp.has_ib)
2447 		vcpu->arch.sie_block->eca |= ECA_IB;
2448 	if (sclp.has_siif)
2449 		vcpu->arch.sie_block->eca |= ECA_SII;
2450 	if (sclp.has_sigpif)
2451 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2452 	if (test_kvm_facility(vcpu->kvm, 129)) {
2453 		vcpu->arch.sie_block->eca |= ECA_VX;
2454 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2455 	}
2456 	if (test_kvm_facility(vcpu->kvm, 139))
2457 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2458 
2459 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2460 					| SDNXC;
2461 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2462 
2463 	if (sclp.has_kss)
2464 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2465 	else
2466 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2467 
2468 	if (vcpu->kvm->arch.use_cmma) {
2469 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2470 		if (rc)
2471 			return rc;
2472 	}
2473 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2474 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2475 
2476 	kvm_s390_vcpu_crypto_setup(vcpu);
2477 
2478 	return rc;
2479 }
2480 
2481 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2482 				      unsigned int id)
2483 {
2484 	struct kvm_vcpu *vcpu;
2485 	struct sie_page *sie_page;
2486 	int rc = -EINVAL;
2487 
2488 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2489 		goto out;
2490 
2491 	rc = -ENOMEM;
2492 
2493 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2494 	if (!vcpu)
2495 		goto out;
2496 
2497 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2498 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2499 	if (!sie_page)
2500 		goto out_free_cpu;
2501 
2502 	vcpu->arch.sie_block = &sie_page->sie_block;
2503 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2504 
2505 	/* the real guest size will always be smaller than msl */
2506 	vcpu->arch.sie_block->mso = 0;
2507 	vcpu->arch.sie_block->msl = sclp.hamax;
2508 
2509 	vcpu->arch.sie_block->icpua = id;
2510 	spin_lock_init(&vcpu->arch.local_int.lock);
2511 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2512 	vcpu->arch.local_int.wq = &vcpu->wq;
2513 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2514 	seqcount_init(&vcpu->arch.cputm_seqcount);
2515 
2516 	rc = kvm_vcpu_init(vcpu, kvm, id);
2517 	if (rc)
2518 		goto out_free_sie_block;
2519 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2520 		 vcpu->arch.sie_block);
2521 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2522 
2523 	return vcpu;
2524 out_free_sie_block:
2525 	free_page((unsigned long)(vcpu->arch.sie_block));
2526 out_free_cpu:
2527 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2528 out:
2529 	return ERR_PTR(rc);
2530 }
2531 
2532 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2533 {
2534 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2535 }
2536 
2537 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2538 {
2539 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2540 }
2541 
2542 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2543 {
2544 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2545 	exit_sie(vcpu);
2546 }
2547 
2548 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2549 {
2550 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2551 }
2552 
2553 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2554 {
2555 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2556 	exit_sie(vcpu);
2557 }
2558 
2559 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2560 {
2561 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2562 }
2563 
2564 /*
2565  * Kick a guest cpu out of SIE and wait until SIE is not running.
2566  * If the CPU is not running (e.g. waiting as idle) the function will
2567  * return immediately. */
2568 void exit_sie(struct kvm_vcpu *vcpu)
2569 {
2570 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2571 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2572 		cpu_relax();
2573 }
2574 
2575 /* Kick a guest cpu out of SIE to process a request synchronously */
2576 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2577 {
2578 	kvm_make_request(req, vcpu);
2579 	kvm_s390_vcpu_request(vcpu);
2580 }
2581 
2582 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2583 			      unsigned long end)
2584 {
2585 	struct kvm *kvm = gmap->private;
2586 	struct kvm_vcpu *vcpu;
2587 	unsigned long prefix;
2588 	int i;
2589 
2590 	if (gmap_is_shadow(gmap))
2591 		return;
2592 	if (start >= 1UL << 31)
2593 		/* We are only interested in prefix pages */
2594 		return;
2595 	kvm_for_each_vcpu(i, vcpu, kvm) {
2596 		/* match against both prefix pages */
2597 		prefix = kvm_s390_get_prefix(vcpu);
2598 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2599 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2600 				   start, end);
2601 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2602 		}
2603 	}
2604 }
2605 
2606 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2607 {
2608 	/* kvm common code refers to this, but never calls it */
2609 	BUG();
2610 	return 0;
2611 }
2612 
2613 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2614 					   struct kvm_one_reg *reg)
2615 {
2616 	int r = -EINVAL;
2617 
2618 	switch (reg->id) {
2619 	case KVM_REG_S390_TODPR:
2620 		r = put_user(vcpu->arch.sie_block->todpr,
2621 			     (u32 __user *)reg->addr);
2622 		break;
2623 	case KVM_REG_S390_EPOCHDIFF:
2624 		r = put_user(vcpu->arch.sie_block->epoch,
2625 			     (u64 __user *)reg->addr);
2626 		break;
2627 	case KVM_REG_S390_CPU_TIMER:
2628 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2629 			     (u64 __user *)reg->addr);
2630 		break;
2631 	case KVM_REG_S390_CLOCK_COMP:
2632 		r = put_user(vcpu->arch.sie_block->ckc,
2633 			     (u64 __user *)reg->addr);
2634 		break;
2635 	case KVM_REG_S390_PFTOKEN:
2636 		r = put_user(vcpu->arch.pfault_token,
2637 			     (u64 __user *)reg->addr);
2638 		break;
2639 	case KVM_REG_S390_PFCOMPARE:
2640 		r = put_user(vcpu->arch.pfault_compare,
2641 			     (u64 __user *)reg->addr);
2642 		break;
2643 	case KVM_REG_S390_PFSELECT:
2644 		r = put_user(vcpu->arch.pfault_select,
2645 			     (u64 __user *)reg->addr);
2646 		break;
2647 	case KVM_REG_S390_PP:
2648 		r = put_user(vcpu->arch.sie_block->pp,
2649 			     (u64 __user *)reg->addr);
2650 		break;
2651 	case KVM_REG_S390_GBEA:
2652 		r = put_user(vcpu->arch.sie_block->gbea,
2653 			     (u64 __user *)reg->addr);
2654 		break;
2655 	default:
2656 		break;
2657 	}
2658 
2659 	return r;
2660 }
2661 
2662 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2663 					   struct kvm_one_reg *reg)
2664 {
2665 	int r = -EINVAL;
2666 	__u64 val;
2667 
2668 	switch (reg->id) {
2669 	case KVM_REG_S390_TODPR:
2670 		r = get_user(vcpu->arch.sie_block->todpr,
2671 			     (u32 __user *)reg->addr);
2672 		break;
2673 	case KVM_REG_S390_EPOCHDIFF:
2674 		r = get_user(vcpu->arch.sie_block->epoch,
2675 			     (u64 __user *)reg->addr);
2676 		break;
2677 	case KVM_REG_S390_CPU_TIMER:
2678 		r = get_user(val, (u64 __user *)reg->addr);
2679 		if (!r)
2680 			kvm_s390_set_cpu_timer(vcpu, val);
2681 		break;
2682 	case KVM_REG_S390_CLOCK_COMP:
2683 		r = get_user(vcpu->arch.sie_block->ckc,
2684 			     (u64 __user *)reg->addr);
2685 		break;
2686 	case KVM_REG_S390_PFTOKEN:
2687 		r = get_user(vcpu->arch.pfault_token,
2688 			     (u64 __user *)reg->addr);
2689 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2690 			kvm_clear_async_pf_completion_queue(vcpu);
2691 		break;
2692 	case KVM_REG_S390_PFCOMPARE:
2693 		r = get_user(vcpu->arch.pfault_compare,
2694 			     (u64 __user *)reg->addr);
2695 		break;
2696 	case KVM_REG_S390_PFSELECT:
2697 		r = get_user(vcpu->arch.pfault_select,
2698 			     (u64 __user *)reg->addr);
2699 		break;
2700 	case KVM_REG_S390_PP:
2701 		r = get_user(vcpu->arch.sie_block->pp,
2702 			     (u64 __user *)reg->addr);
2703 		break;
2704 	case KVM_REG_S390_GBEA:
2705 		r = get_user(vcpu->arch.sie_block->gbea,
2706 			     (u64 __user *)reg->addr);
2707 		break;
2708 	default:
2709 		break;
2710 	}
2711 
2712 	return r;
2713 }
2714 
2715 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2716 {
2717 	kvm_s390_vcpu_initial_reset(vcpu);
2718 	return 0;
2719 }
2720 
2721 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2722 {
2723 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2724 	return 0;
2725 }
2726 
2727 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2728 {
2729 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2730 	return 0;
2731 }
2732 
2733 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2734 				  struct kvm_sregs *sregs)
2735 {
2736 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2737 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2738 	return 0;
2739 }
2740 
2741 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2742 				  struct kvm_sregs *sregs)
2743 {
2744 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2745 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2746 	return 0;
2747 }
2748 
2749 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2750 {
2751 	if (test_fp_ctl(fpu->fpc))
2752 		return -EINVAL;
2753 	vcpu->run->s.regs.fpc = fpu->fpc;
2754 	if (MACHINE_HAS_VX)
2755 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2756 				 (freg_t *) fpu->fprs);
2757 	else
2758 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2759 	return 0;
2760 }
2761 
2762 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2763 {
2764 	/* make sure we have the latest values */
2765 	save_fpu_regs();
2766 	if (MACHINE_HAS_VX)
2767 		convert_vx_to_fp((freg_t *) fpu->fprs,
2768 				 (__vector128 *) vcpu->run->s.regs.vrs);
2769 	else
2770 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2771 	fpu->fpc = vcpu->run->s.regs.fpc;
2772 	return 0;
2773 }
2774 
2775 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2776 {
2777 	int rc = 0;
2778 
2779 	if (!is_vcpu_stopped(vcpu))
2780 		rc = -EBUSY;
2781 	else {
2782 		vcpu->run->psw_mask = psw.mask;
2783 		vcpu->run->psw_addr = psw.addr;
2784 	}
2785 	return rc;
2786 }
2787 
2788 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2789 				  struct kvm_translation *tr)
2790 {
2791 	return -EINVAL; /* not implemented yet */
2792 }
2793 
2794 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2795 			      KVM_GUESTDBG_USE_HW_BP | \
2796 			      KVM_GUESTDBG_ENABLE)
2797 
2798 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2799 					struct kvm_guest_debug *dbg)
2800 {
2801 	int rc = 0;
2802 
2803 	vcpu->guest_debug = 0;
2804 	kvm_s390_clear_bp_data(vcpu);
2805 
2806 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2807 		return -EINVAL;
2808 	if (!sclp.has_gpere)
2809 		return -EINVAL;
2810 
2811 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2812 		vcpu->guest_debug = dbg->control;
2813 		/* enforce guest PER */
2814 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2815 
2816 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2817 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2818 	} else {
2819 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2820 		vcpu->arch.guestdbg.last_bp = 0;
2821 	}
2822 
2823 	if (rc) {
2824 		vcpu->guest_debug = 0;
2825 		kvm_s390_clear_bp_data(vcpu);
2826 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2827 	}
2828 
2829 	return rc;
2830 }
2831 
2832 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2833 				    struct kvm_mp_state *mp_state)
2834 {
2835 	/* CHECK_STOP and LOAD are not supported yet */
2836 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2837 				       KVM_MP_STATE_OPERATING;
2838 }
2839 
2840 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2841 				    struct kvm_mp_state *mp_state)
2842 {
2843 	int rc = 0;
2844 
2845 	/* user space knows about this interface - let it control the state */
2846 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2847 
2848 	switch (mp_state->mp_state) {
2849 	case KVM_MP_STATE_STOPPED:
2850 		kvm_s390_vcpu_stop(vcpu);
2851 		break;
2852 	case KVM_MP_STATE_OPERATING:
2853 		kvm_s390_vcpu_start(vcpu);
2854 		break;
2855 	case KVM_MP_STATE_LOAD:
2856 	case KVM_MP_STATE_CHECK_STOP:
2857 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2858 	default:
2859 		rc = -ENXIO;
2860 	}
2861 
2862 	return rc;
2863 }
2864 
2865 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2866 {
2867 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2868 }
2869 
2870 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2871 {
2872 retry:
2873 	kvm_s390_vcpu_request_handled(vcpu);
2874 	if (!kvm_request_pending(vcpu))
2875 		return 0;
2876 	/*
2877 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2878 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2879 	 * This ensures that the ipte instruction for this request has
2880 	 * already finished. We might race against a second unmapper that
2881 	 * wants to set the blocking bit. Lets just retry the request loop.
2882 	 */
2883 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2884 		int rc;
2885 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2886 					  kvm_s390_get_prefix(vcpu),
2887 					  PAGE_SIZE * 2, PROT_WRITE);
2888 		if (rc) {
2889 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2890 			return rc;
2891 		}
2892 		goto retry;
2893 	}
2894 
2895 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2896 		vcpu->arch.sie_block->ihcpu = 0xffff;
2897 		goto retry;
2898 	}
2899 
2900 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2901 		if (!ibs_enabled(vcpu)) {
2902 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2903 			atomic_or(CPUSTAT_IBS,
2904 					&vcpu->arch.sie_block->cpuflags);
2905 		}
2906 		goto retry;
2907 	}
2908 
2909 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2910 		if (ibs_enabled(vcpu)) {
2911 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2912 			atomic_andnot(CPUSTAT_IBS,
2913 					  &vcpu->arch.sie_block->cpuflags);
2914 		}
2915 		goto retry;
2916 	}
2917 
2918 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2919 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2920 		goto retry;
2921 	}
2922 
2923 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2924 		/*
2925 		 * Disable CMMA virtualization; we will emulate the ESSA
2926 		 * instruction manually, in order to provide additional
2927 		 * functionalities needed for live migration.
2928 		 */
2929 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2930 		goto retry;
2931 	}
2932 
2933 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2934 		/*
2935 		 * Re-enable CMMA virtualization if CMMA is available and
2936 		 * was used.
2937 		 */
2938 		if ((vcpu->kvm->arch.use_cmma) &&
2939 		    (vcpu->kvm->mm->context.use_cmma))
2940 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2941 		goto retry;
2942 	}
2943 
2944 	/* nothing to do, just clear the request */
2945 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2946 
2947 	return 0;
2948 }
2949 
2950 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2951 				 const struct kvm_s390_vm_tod_clock *gtod)
2952 {
2953 	struct kvm_vcpu *vcpu;
2954 	struct kvm_s390_tod_clock_ext htod;
2955 	int i;
2956 
2957 	mutex_lock(&kvm->lock);
2958 	preempt_disable();
2959 
2960 	get_tod_clock_ext((char *)&htod);
2961 
2962 	kvm->arch.epoch = gtod->tod - htod.tod;
2963 	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2964 
2965 	if (kvm->arch.epoch > gtod->tod)
2966 		kvm->arch.epdx -= 1;
2967 
2968 	kvm_s390_vcpu_block_all(kvm);
2969 	kvm_for_each_vcpu(i, vcpu, kvm) {
2970 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2971 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2972 	}
2973 
2974 	kvm_s390_vcpu_unblock_all(kvm);
2975 	preempt_enable();
2976 	mutex_unlock(&kvm->lock);
2977 }
2978 
2979 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2980 {
2981 	struct kvm_vcpu *vcpu;
2982 	int i;
2983 
2984 	mutex_lock(&kvm->lock);
2985 	preempt_disable();
2986 	kvm->arch.epoch = tod - get_tod_clock();
2987 	kvm_s390_vcpu_block_all(kvm);
2988 	kvm_for_each_vcpu(i, vcpu, kvm)
2989 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2990 	kvm_s390_vcpu_unblock_all(kvm);
2991 	preempt_enable();
2992 	mutex_unlock(&kvm->lock);
2993 }
2994 
2995 /**
2996  * kvm_arch_fault_in_page - fault-in guest page if necessary
2997  * @vcpu: The corresponding virtual cpu
2998  * @gpa: Guest physical address
2999  * @writable: Whether the page should be writable or not
3000  *
3001  * Make sure that a guest page has been faulted-in on the host.
3002  *
3003  * Return: Zero on success, negative error code otherwise.
3004  */
3005 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3006 {
3007 	return gmap_fault(vcpu->arch.gmap, gpa,
3008 			  writable ? FAULT_FLAG_WRITE : 0);
3009 }
3010 
3011 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3012 				      unsigned long token)
3013 {
3014 	struct kvm_s390_interrupt inti;
3015 	struct kvm_s390_irq irq;
3016 
3017 	if (start_token) {
3018 		irq.u.ext.ext_params2 = token;
3019 		irq.type = KVM_S390_INT_PFAULT_INIT;
3020 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3021 	} else {
3022 		inti.type = KVM_S390_INT_PFAULT_DONE;
3023 		inti.parm64 = token;
3024 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3025 	}
3026 }
3027 
3028 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3029 				     struct kvm_async_pf *work)
3030 {
3031 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3032 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3033 }
3034 
3035 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3036 				 struct kvm_async_pf *work)
3037 {
3038 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3039 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3040 }
3041 
3042 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3043 			       struct kvm_async_pf *work)
3044 {
3045 	/* s390 will always inject the page directly */
3046 }
3047 
3048 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3049 {
3050 	/*
3051 	 * s390 will always inject the page directly,
3052 	 * but we still want check_async_completion to cleanup
3053 	 */
3054 	return true;
3055 }
3056 
3057 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3058 {
3059 	hva_t hva;
3060 	struct kvm_arch_async_pf arch;
3061 	int rc;
3062 
3063 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3064 		return 0;
3065 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3066 	    vcpu->arch.pfault_compare)
3067 		return 0;
3068 	if (psw_extint_disabled(vcpu))
3069 		return 0;
3070 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3071 		return 0;
3072 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3073 		return 0;
3074 	if (!vcpu->arch.gmap->pfault_enabled)
3075 		return 0;
3076 
3077 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3078 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3079 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3080 		return 0;
3081 
3082 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3083 	return rc;
3084 }
3085 
3086 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3087 {
3088 	int rc, cpuflags;
3089 
3090 	/*
3091 	 * On s390 notifications for arriving pages will be delivered directly
3092 	 * to the guest but the house keeping for completed pfaults is
3093 	 * handled outside the worker.
3094 	 */
3095 	kvm_check_async_pf_completion(vcpu);
3096 
3097 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3098 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3099 
3100 	if (need_resched())
3101 		schedule();
3102 
3103 	if (test_cpu_flag(CIF_MCCK_PENDING))
3104 		s390_handle_mcck();
3105 
3106 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3107 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3108 		if (rc)
3109 			return rc;
3110 	}
3111 
3112 	rc = kvm_s390_handle_requests(vcpu);
3113 	if (rc)
3114 		return rc;
3115 
3116 	if (guestdbg_enabled(vcpu)) {
3117 		kvm_s390_backup_guest_per_regs(vcpu);
3118 		kvm_s390_patch_guest_per_regs(vcpu);
3119 	}
3120 
3121 	vcpu->arch.sie_block->icptcode = 0;
3122 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3123 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3124 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3125 
3126 	return 0;
3127 }
3128 
3129 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3130 {
3131 	struct kvm_s390_pgm_info pgm_info = {
3132 		.code = PGM_ADDRESSING,
3133 	};
3134 	u8 opcode, ilen;
3135 	int rc;
3136 
3137 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3138 	trace_kvm_s390_sie_fault(vcpu);
3139 
3140 	/*
3141 	 * We want to inject an addressing exception, which is defined as a
3142 	 * suppressing or terminating exception. However, since we came here
3143 	 * by a DAT access exception, the PSW still points to the faulting
3144 	 * instruction since DAT exceptions are nullifying. So we've got
3145 	 * to look up the current opcode to get the length of the instruction
3146 	 * to be able to forward the PSW.
3147 	 */
3148 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3149 	ilen = insn_length(opcode);
3150 	if (rc < 0) {
3151 		return rc;
3152 	} else if (rc) {
3153 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3154 		 * Forward by arbitrary ilc, injection will take care of
3155 		 * nullification if necessary.
3156 		 */
3157 		pgm_info = vcpu->arch.pgm;
3158 		ilen = 4;
3159 	}
3160 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3161 	kvm_s390_forward_psw(vcpu, ilen);
3162 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3163 }
3164 
3165 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3166 {
3167 	struct mcck_volatile_info *mcck_info;
3168 	struct sie_page *sie_page;
3169 
3170 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3171 		   vcpu->arch.sie_block->icptcode);
3172 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3173 
3174 	if (guestdbg_enabled(vcpu))
3175 		kvm_s390_restore_guest_per_regs(vcpu);
3176 
3177 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3178 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3179 
3180 	if (exit_reason == -EINTR) {
3181 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3182 		sie_page = container_of(vcpu->arch.sie_block,
3183 					struct sie_page, sie_block);
3184 		mcck_info = &sie_page->mcck_info;
3185 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3186 		return 0;
3187 	}
3188 
3189 	if (vcpu->arch.sie_block->icptcode > 0) {
3190 		int rc = kvm_handle_sie_intercept(vcpu);
3191 
3192 		if (rc != -EOPNOTSUPP)
3193 			return rc;
3194 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3195 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3196 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3197 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3198 		return -EREMOTE;
3199 	} else if (exit_reason != -EFAULT) {
3200 		vcpu->stat.exit_null++;
3201 		return 0;
3202 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3203 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3204 		vcpu->run->s390_ucontrol.trans_exc_code =
3205 						current->thread.gmap_addr;
3206 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3207 		return -EREMOTE;
3208 	} else if (current->thread.gmap_pfault) {
3209 		trace_kvm_s390_major_guest_pfault(vcpu);
3210 		current->thread.gmap_pfault = 0;
3211 		if (kvm_arch_setup_async_pf(vcpu))
3212 			return 0;
3213 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3214 	}
3215 	return vcpu_post_run_fault_in_sie(vcpu);
3216 }
3217 
3218 static int __vcpu_run(struct kvm_vcpu *vcpu)
3219 {
3220 	int rc, exit_reason;
3221 
3222 	/*
3223 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3224 	 * ning the guest), so that memslots (and other stuff) are protected
3225 	 */
3226 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3227 
3228 	do {
3229 		rc = vcpu_pre_run(vcpu);
3230 		if (rc)
3231 			break;
3232 
3233 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3234 		/*
3235 		 * As PF_VCPU will be used in fault handler, between
3236 		 * guest_enter and guest_exit should be no uaccess.
3237 		 */
3238 		local_irq_disable();
3239 		guest_enter_irqoff();
3240 		__disable_cpu_timer_accounting(vcpu);
3241 		local_irq_enable();
3242 		exit_reason = sie64a(vcpu->arch.sie_block,
3243 				     vcpu->run->s.regs.gprs);
3244 		local_irq_disable();
3245 		__enable_cpu_timer_accounting(vcpu);
3246 		guest_exit_irqoff();
3247 		local_irq_enable();
3248 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3249 
3250 		rc = vcpu_post_run(vcpu, exit_reason);
3251 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3252 
3253 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3254 	return rc;
3255 }
3256 
3257 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3258 {
3259 	struct runtime_instr_cb *riccb;
3260 	struct gs_cb *gscb;
3261 
3262 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3263 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3264 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3265 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3266 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3267 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3268 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3269 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3270 		/* some control register changes require a tlb flush */
3271 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3272 	}
3273 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3274 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3275 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3276 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3277 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3278 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3279 	}
3280 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3281 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3282 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3283 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3284 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3285 			kvm_clear_async_pf_completion_queue(vcpu);
3286 	}
3287 	/*
3288 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3289 	 * we should enable RI here instead of doing the lazy enablement.
3290 	 */
3291 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3292 	    test_kvm_facility(vcpu->kvm, 64) &&
3293 	    riccb->v &&
3294 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3295 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3296 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3297 	}
3298 	/*
3299 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3300 	 * we should enable GS here instead of doing the lazy enablement.
3301 	 */
3302 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3303 	    test_kvm_facility(vcpu->kvm, 133) &&
3304 	    gscb->gssm &&
3305 	    !vcpu->arch.gs_enabled) {
3306 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3307 		vcpu->arch.sie_block->ecb |= ECB_GS;
3308 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3309 		vcpu->arch.gs_enabled = 1;
3310 	}
3311 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3312 	    test_kvm_facility(vcpu->kvm, 82)) {
3313 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3314 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3315 	}
3316 	save_access_regs(vcpu->arch.host_acrs);
3317 	restore_access_regs(vcpu->run->s.regs.acrs);
3318 	/* save host (userspace) fprs/vrs */
3319 	save_fpu_regs();
3320 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3321 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3322 	if (MACHINE_HAS_VX)
3323 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3324 	else
3325 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3326 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3327 	if (test_fp_ctl(current->thread.fpu.fpc))
3328 		/* User space provided an invalid FPC, let's clear it */
3329 		current->thread.fpu.fpc = 0;
3330 	if (MACHINE_HAS_GS) {
3331 		preempt_disable();
3332 		__ctl_set_bit(2, 4);
3333 		if (current->thread.gs_cb) {
3334 			vcpu->arch.host_gscb = current->thread.gs_cb;
3335 			save_gs_cb(vcpu->arch.host_gscb);
3336 		}
3337 		if (vcpu->arch.gs_enabled) {
3338 			current->thread.gs_cb = (struct gs_cb *)
3339 						&vcpu->run->s.regs.gscb;
3340 			restore_gs_cb(current->thread.gs_cb);
3341 		}
3342 		preempt_enable();
3343 	}
3344 
3345 	kvm_run->kvm_dirty_regs = 0;
3346 }
3347 
3348 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3349 {
3350 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3351 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3352 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3353 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3354 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3355 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3356 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3357 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3358 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3359 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3360 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3361 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3362 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3363 	save_access_regs(vcpu->run->s.regs.acrs);
3364 	restore_access_regs(vcpu->arch.host_acrs);
3365 	/* Save guest register state */
3366 	save_fpu_regs();
3367 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3368 	/* Restore will be done lazily at return */
3369 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3370 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3371 	if (MACHINE_HAS_GS) {
3372 		__ctl_set_bit(2, 4);
3373 		if (vcpu->arch.gs_enabled)
3374 			save_gs_cb(current->thread.gs_cb);
3375 		preempt_disable();
3376 		current->thread.gs_cb = vcpu->arch.host_gscb;
3377 		restore_gs_cb(vcpu->arch.host_gscb);
3378 		preempt_enable();
3379 		if (!vcpu->arch.host_gscb)
3380 			__ctl_clear_bit(2, 4);
3381 		vcpu->arch.host_gscb = NULL;
3382 	}
3383 
3384 }
3385 
3386 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3387 {
3388 	int rc;
3389 
3390 	if (kvm_run->immediate_exit)
3391 		return -EINTR;
3392 
3393 	if (guestdbg_exit_pending(vcpu)) {
3394 		kvm_s390_prepare_debug_exit(vcpu);
3395 		return 0;
3396 	}
3397 
3398 	kvm_sigset_activate(vcpu);
3399 
3400 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3401 		kvm_s390_vcpu_start(vcpu);
3402 	} else if (is_vcpu_stopped(vcpu)) {
3403 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3404 				   vcpu->vcpu_id);
3405 		return -EINVAL;
3406 	}
3407 
3408 	sync_regs(vcpu, kvm_run);
3409 	enable_cpu_timer_accounting(vcpu);
3410 
3411 	might_fault();
3412 	rc = __vcpu_run(vcpu);
3413 
3414 	if (signal_pending(current) && !rc) {
3415 		kvm_run->exit_reason = KVM_EXIT_INTR;
3416 		rc = -EINTR;
3417 	}
3418 
3419 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3420 		kvm_s390_prepare_debug_exit(vcpu);
3421 		rc = 0;
3422 	}
3423 
3424 	if (rc == -EREMOTE) {
3425 		/* userspace support is needed, kvm_run has been prepared */
3426 		rc = 0;
3427 	}
3428 
3429 	disable_cpu_timer_accounting(vcpu);
3430 	store_regs(vcpu, kvm_run);
3431 
3432 	kvm_sigset_deactivate(vcpu);
3433 
3434 	vcpu->stat.exit_userspace++;
3435 	return rc;
3436 }
3437 
3438 /*
3439  * store status at address
3440  * we use have two special cases:
3441  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3442  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3443  */
3444 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3445 {
3446 	unsigned char archmode = 1;
3447 	freg_t fprs[NUM_FPRS];
3448 	unsigned int px;
3449 	u64 clkcomp, cputm;
3450 	int rc;
3451 
3452 	px = kvm_s390_get_prefix(vcpu);
3453 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3454 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3455 			return -EFAULT;
3456 		gpa = 0;
3457 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3458 		if (write_guest_real(vcpu, 163, &archmode, 1))
3459 			return -EFAULT;
3460 		gpa = px;
3461 	} else
3462 		gpa -= __LC_FPREGS_SAVE_AREA;
3463 
3464 	/* manually convert vector registers if necessary */
3465 	if (MACHINE_HAS_VX) {
3466 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3467 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3468 				     fprs, 128);
3469 	} else {
3470 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3471 				     vcpu->run->s.regs.fprs, 128);
3472 	}
3473 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3474 			      vcpu->run->s.regs.gprs, 128);
3475 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3476 			      &vcpu->arch.sie_block->gpsw, 16);
3477 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3478 			      &px, 4);
3479 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3480 			      &vcpu->run->s.regs.fpc, 4);
3481 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3482 			      &vcpu->arch.sie_block->todpr, 4);
3483 	cputm = kvm_s390_get_cpu_timer(vcpu);
3484 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3485 			      &cputm, 8);
3486 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3487 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3488 			      &clkcomp, 8);
3489 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3490 			      &vcpu->run->s.regs.acrs, 64);
3491 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3492 			      &vcpu->arch.sie_block->gcr, 128);
3493 	return rc ? -EFAULT : 0;
3494 }
3495 
3496 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3497 {
3498 	/*
3499 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3500 	 * switch in the run ioctl. Let's update our copies before we save
3501 	 * it into the save area
3502 	 */
3503 	save_fpu_regs();
3504 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3505 	save_access_regs(vcpu->run->s.regs.acrs);
3506 
3507 	return kvm_s390_store_status_unloaded(vcpu, addr);
3508 }
3509 
3510 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3511 {
3512 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3513 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3514 }
3515 
3516 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3517 {
3518 	unsigned int i;
3519 	struct kvm_vcpu *vcpu;
3520 
3521 	kvm_for_each_vcpu(i, vcpu, kvm) {
3522 		__disable_ibs_on_vcpu(vcpu);
3523 	}
3524 }
3525 
3526 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3527 {
3528 	if (!sclp.has_ibs)
3529 		return;
3530 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3531 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3532 }
3533 
3534 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3535 {
3536 	int i, online_vcpus, started_vcpus = 0;
3537 
3538 	if (!is_vcpu_stopped(vcpu))
3539 		return;
3540 
3541 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3542 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3543 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3544 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3545 
3546 	for (i = 0; i < online_vcpus; i++) {
3547 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3548 			started_vcpus++;
3549 	}
3550 
3551 	if (started_vcpus == 0) {
3552 		/* we're the only active VCPU -> speed it up */
3553 		__enable_ibs_on_vcpu(vcpu);
3554 	} else if (started_vcpus == 1) {
3555 		/*
3556 		 * As we are starting a second VCPU, we have to disable
3557 		 * the IBS facility on all VCPUs to remove potentially
3558 		 * oustanding ENABLE requests.
3559 		 */
3560 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3561 	}
3562 
3563 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3564 	/*
3565 	 * Another VCPU might have used IBS while we were offline.
3566 	 * Let's play safe and flush the VCPU at startup.
3567 	 */
3568 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3569 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3570 	return;
3571 }
3572 
3573 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3574 {
3575 	int i, online_vcpus, started_vcpus = 0;
3576 	struct kvm_vcpu *started_vcpu = NULL;
3577 
3578 	if (is_vcpu_stopped(vcpu))
3579 		return;
3580 
3581 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3582 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3583 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3584 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3585 
3586 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3587 	kvm_s390_clear_stop_irq(vcpu);
3588 
3589 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3590 	__disable_ibs_on_vcpu(vcpu);
3591 
3592 	for (i = 0; i < online_vcpus; i++) {
3593 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3594 			started_vcpus++;
3595 			started_vcpu = vcpu->kvm->vcpus[i];
3596 		}
3597 	}
3598 
3599 	if (started_vcpus == 1) {
3600 		/*
3601 		 * As we only have one VCPU left, we want to enable the
3602 		 * IBS facility for that VCPU to speed it up.
3603 		 */
3604 		__enable_ibs_on_vcpu(started_vcpu);
3605 	}
3606 
3607 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3608 	return;
3609 }
3610 
3611 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3612 				     struct kvm_enable_cap *cap)
3613 {
3614 	int r;
3615 
3616 	if (cap->flags)
3617 		return -EINVAL;
3618 
3619 	switch (cap->cap) {
3620 	case KVM_CAP_S390_CSS_SUPPORT:
3621 		if (!vcpu->kvm->arch.css_support) {
3622 			vcpu->kvm->arch.css_support = 1;
3623 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3624 			trace_kvm_s390_enable_css(vcpu->kvm);
3625 		}
3626 		r = 0;
3627 		break;
3628 	default:
3629 		r = -EINVAL;
3630 		break;
3631 	}
3632 	return r;
3633 }
3634 
3635 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3636 				  struct kvm_s390_mem_op *mop)
3637 {
3638 	void __user *uaddr = (void __user *)mop->buf;
3639 	void *tmpbuf = NULL;
3640 	int r, srcu_idx;
3641 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3642 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3643 
3644 	if (mop->flags & ~supported_flags)
3645 		return -EINVAL;
3646 
3647 	if (mop->size > MEM_OP_MAX_SIZE)
3648 		return -E2BIG;
3649 
3650 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3651 		tmpbuf = vmalloc(mop->size);
3652 		if (!tmpbuf)
3653 			return -ENOMEM;
3654 	}
3655 
3656 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3657 
3658 	switch (mop->op) {
3659 	case KVM_S390_MEMOP_LOGICAL_READ:
3660 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3661 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3662 					    mop->size, GACC_FETCH);
3663 			break;
3664 		}
3665 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3666 		if (r == 0) {
3667 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3668 				r = -EFAULT;
3669 		}
3670 		break;
3671 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3672 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3673 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3674 					    mop->size, GACC_STORE);
3675 			break;
3676 		}
3677 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3678 			r = -EFAULT;
3679 			break;
3680 		}
3681 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3682 		break;
3683 	default:
3684 		r = -EINVAL;
3685 	}
3686 
3687 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3688 
3689 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3690 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3691 
3692 	vfree(tmpbuf);
3693 	return r;
3694 }
3695 
3696 long kvm_arch_vcpu_ioctl(struct file *filp,
3697 			 unsigned int ioctl, unsigned long arg)
3698 {
3699 	struct kvm_vcpu *vcpu = filp->private_data;
3700 	void __user *argp = (void __user *)arg;
3701 	int idx;
3702 	long r;
3703 
3704 	switch (ioctl) {
3705 	case KVM_S390_IRQ: {
3706 		struct kvm_s390_irq s390irq;
3707 
3708 		r = -EFAULT;
3709 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3710 			break;
3711 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3712 		break;
3713 	}
3714 	case KVM_S390_INTERRUPT: {
3715 		struct kvm_s390_interrupt s390int;
3716 		struct kvm_s390_irq s390irq;
3717 
3718 		r = -EFAULT;
3719 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3720 			break;
3721 		if (s390int_to_s390irq(&s390int, &s390irq))
3722 			return -EINVAL;
3723 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3724 		break;
3725 	}
3726 	case KVM_S390_STORE_STATUS:
3727 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3728 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3729 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3730 		break;
3731 	case KVM_S390_SET_INITIAL_PSW: {
3732 		psw_t psw;
3733 
3734 		r = -EFAULT;
3735 		if (copy_from_user(&psw, argp, sizeof(psw)))
3736 			break;
3737 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3738 		break;
3739 	}
3740 	case KVM_S390_INITIAL_RESET:
3741 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3742 		break;
3743 	case KVM_SET_ONE_REG:
3744 	case KVM_GET_ONE_REG: {
3745 		struct kvm_one_reg reg;
3746 		r = -EFAULT;
3747 		if (copy_from_user(&reg, argp, sizeof(reg)))
3748 			break;
3749 		if (ioctl == KVM_SET_ONE_REG)
3750 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3751 		else
3752 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3753 		break;
3754 	}
3755 #ifdef CONFIG_KVM_S390_UCONTROL
3756 	case KVM_S390_UCAS_MAP: {
3757 		struct kvm_s390_ucas_mapping ucasmap;
3758 
3759 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3760 			r = -EFAULT;
3761 			break;
3762 		}
3763 
3764 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3765 			r = -EINVAL;
3766 			break;
3767 		}
3768 
3769 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3770 				     ucasmap.vcpu_addr, ucasmap.length);
3771 		break;
3772 	}
3773 	case KVM_S390_UCAS_UNMAP: {
3774 		struct kvm_s390_ucas_mapping ucasmap;
3775 
3776 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3777 			r = -EFAULT;
3778 			break;
3779 		}
3780 
3781 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3782 			r = -EINVAL;
3783 			break;
3784 		}
3785 
3786 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3787 			ucasmap.length);
3788 		break;
3789 	}
3790 #endif
3791 	case KVM_S390_VCPU_FAULT: {
3792 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3793 		break;
3794 	}
3795 	case KVM_ENABLE_CAP:
3796 	{
3797 		struct kvm_enable_cap cap;
3798 		r = -EFAULT;
3799 		if (copy_from_user(&cap, argp, sizeof(cap)))
3800 			break;
3801 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3802 		break;
3803 	}
3804 	case KVM_S390_MEM_OP: {
3805 		struct kvm_s390_mem_op mem_op;
3806 
3807 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3808 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3809 		else
3810 			r = -EFAULT;
3811 		break;
3812 	}
3813 	case KVM_S390_SET_IRQ_STATE: {
3814 		struct kvm_s390_irq_state irq_state;
3815 
3816 		r = -EFAULT;
3817 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3818 			break;
3819 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3820 		    irq_state.len == 0 ||
3821 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3822 			r = -EINVAL;
3823 			break;
3824 		}
3825 		/* do not use irq_state.flags, it will break old QEMUs */
3826 		r = kvm_s390_set_irq_state(vcpu,
3827 					   (void __user *) irq_state.buf,
3828 					   irq_state.len);
3829 		break;
3830 	}
3831 	case KVM_S390_GET_IRQ_STATE: {
3832 		struct kvm_s390_irq_state irq_state;
3833 
3834 		r = -EFAULT;
3835 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3836 			break;
3837 		if (irq_state.len == 0) {
3838 			r = -EINVAL;
3839 			break;
3840 		}
3841 		/* do not use irq_state.flags, it will break old QEMUs */
3842 		r = kvm_s390_get_irq_state(vcpu,
3843 					   (__u8 __user *)  irq_state.buf,
3844 					   irq_state.len);
3845 		break;
3846 	}
3847 	default:
3848 		r = -ENOTTY;
3849 	}
3850 	return r;
3851 }
3852 
3853 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3854 {
3855 #ifdef CONFIG_KVM_S390_UCONTROL
3856 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3857 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3858 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3859 		get_page(vmf->page);
3860 		return 0;
3861 	}
3862 #endif
3863 	return VM_FAULT_SIGBUS;
3864 }
3865 
3866 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3867 			    unsigned long npages)
3868 {
3869 	return 0;
3870 }
3871 
3872 /* Section: memory related */
3873 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3874 				   struct kvm_memory_slot *memslot,
3875 				   const struct kvm_userspace_memory_region *mem,
3876 				   enum kvm_mr_change change)
3877 {
3878 	/* A few sanity checks. We can have memory slots which have to be
3879 	   located/ended at a segment boundary (1MB). The memory in userland is
3880 	   ok to be fragmented into various different vmas. It is okay to mmap()
3881 	   and munmap() stuff in this slot after doing this call at any time */
3882 
3883 	if (mem->userspace_addr & 0xffffful)
3884 		return -EINVAL;
3885 
3886 	if (mem->memory_size & 0xffffful)
3887 		return -EINVAL;
3888 
3889 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3890 		return -EINVAL;
3891 
3892 	return 0;
3893 }
3894 
3895 void kvm_arch_commit_memory_region(struct kvm *kvm,
3896 				const struct kvm_userspace_memory_region *mem,
3897 				const struct kvm_memory_slot *old,
3898 				const struct kvm_memory_slot *new,
3899 				enum kvm_mr_change change)
3900 {
3901 	int rc;
3902 
3903 	/* If the basics of the memslot do not change, we do not want
3904 	 * to update the gmap. Every update causes several unnecessary
3905 	 * segment translation exceptions. This is usually handled just
3906 	 * fine by the normal fault handler + gmap, but it will also
3907 	 * cause faults on the prefix page of running guest CPUs.
3908 	 */
3909 	if (old->userspace_addr == mem->userspace_addr &&
3910 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3911 	    old->npages * PAGE_SIZE == mem->memory_size)
3912 		return;
3913 
3914 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3915 		mem->guest_phys_addr, mem->memory_size);
3916 	if (rc)
3917 		pr_warn("failed to commit memory region\n");
3918 	return;
3919 }
3920 
3921 static inline unsigned long nonhyp_mask(int i)
3922 {
3923 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3924 
3925 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3926 }
3927 
3928 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3929 {
3930 	vcpu->valid_wakeup = false;
3931 }
3932 
3933 static int __init kvm_s390_init(void)
3934 {
3935 	int i;
3936 
3937 	if (!sclp.has_sief2) {
3938 		pr_info("SIE not available\n");
3939 		return -ENODEV;
3940 	}
3941 
3942 	for (i = 0; i < 16; i++)
3943 		kvm_s390_fac_list_mask[i] |=
3944 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3945 
3946 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3947 }
3948 
3949 static void __exit kvm_s390_exit(void)
3950 {
3951 	kvm_exit();
3952 }
3953 
3954 module_init(kvm_s390_init);
3955 module_exit(kvm_s390_exit);
3956 
3957 /*
3958  * Enable autoloading of the kvm module.
3959  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3960  * since x86 takes a different approach.
3961  */
3962 #include <linux/miscdevice.h>
3963 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3964 MODULE_ALIAS("devname:kvm");
3965