xref: /linux/arch/s390/kvm/kvm-s390.c (revision 43347d56c8d9dd732cee2f8efd384ad21dd1f6c4)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48 
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
66 	{ "exit_null", VCPU_STAT(exit_null) },
67 	{ "exit_validity", VCPU_STAT(exit_validity) },
68 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
96 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
98 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
103 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
108 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
125 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
126 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
128 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
129 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
130 	{ NULL }
131 };
132 
133 struct kvm_s390_tod_clock_ext {
134 	__u8 epoch_idx;
135 	__u64 tod;
136 	__u8 reserved[7];
137 } __packed;
138 
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143 
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146 
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152 
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157 
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161 
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165 	/* every s390 is virtualization enabled ;-) */
166 	return 0;
167 }
168 
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 			      unsigned long end);
171 
172 /*
173  * This callback is executed during stop_machine(). All CPUs are therefore
174  * temporarily stopped. In order not to change guest behavior, we have to
175  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176  * so a CPU won't be stopped while calculating with the epoch.
177  */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179 			  void *v)
180 {
181 	struct kvm *kvm;
182 	struct kvm_vcpu *vcpu;
183 	int i;
184 	unsigned long long *delta = v;
185 
186 	list_for_each_entry(kvm, &vm_list, vm_list) {
187 		kvm->arch.epoch -= *delta;
188 		kvm_for_each_vcpu(i, vcpu, kvm) {
189 			vcpu->arch.sie_block->epoch -= *delta;
190 			if (vcpu->arch.cputm_enabled)
191 				vcpu->arch.cputm_start += *delta;
192 			if (vcpu->arch.vsie_block)
193 				vcpu->arch.vsie_block->epoch -= *delta;
194 		}
195 	}
196 	return NOTIFY_OK;
197 }
198 
199 static struct notifier_block kvm_clock_notifier = {
200 	.notifier_call = kvm_clock_sync,
201 };
202 
203 int kvm_arch_hardware_setup(void)
204 {
205 	gmap_notifier.notifier_call = kvm_gmap_notifier;
206 	gmap_register_pte_notifier(&gmap_notifier);
207 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208 	gmap_register_pte_notifier(&vsie_gmap_notifier);
209 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210 				       &kvm_clock_notifier);
211 	return 0;
212 }
213 
214 void kvm_arch_hardware_unsetup(void)
215 {
216 	gmap_unregister_pte_notifier(&gmap_notifier);
217 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219 					 &kvm_clock_notifier);
220 }
221 
222 static void allow_cpu_feat(unsigned long nr)
223 {
224 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226 
227 static inline int plo_test_bit(unsigned char nr)
228 {
229 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230 	int cc;
231 
232 	asm volatile(
233 		/* Parameter registers are ignored for "test bit" */
234 		"	plo	0,0,0,0(0)\n"
235 		"	ipm	%0\n"
236 		"	srl	%0,28\n"
237 		: "=d" (cc)
238 		: "d" (r0)
239 		: "cc");
240 	return cc == 0;
241 }
242 
243 static void kvm_s390_cpu_feat_init(void)
244 {
245 	int i;
246 
247 	for (i = 0; i < 256; ++i) {
248 		if (plo_test_bit(i))
249 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250 	}
251 
252 	if (test_facility(28)) /* TOD-clock steering */
253 		ptff(kvm_s390_available_subfunc.ptff,
254 		     sizeof(kvm_s390_available_subfunc.ptff),
255 		     PTFF_QAF);
256 
257 	if (test_facility(17)) { /* MSA */
258 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259 			      kvm_s390_available_subfunc.kmac);
260 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261 			      kvm_s390_available_subfunc.kmc);
262 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
263 			      kvm_s390_available_subfunc.km);
264 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265 			      kvm_s390_available_subfunc.kimd);
266 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267 			      kvm_s390_available_subfunc.klmd);
268 	}
269 	if (test_facility(76)) /* MSA3 */
270 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.pckmo);
272 	if (test_facility(77)) { /* MSA4 */
273 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274 			      kvm_s390_available_subfunc.kmctr);
275 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276 			      kvm_s390_available_subfunc.kmf);
277 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278 			      kvm_s390_available_subfunc.kmo);
279 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280 			      kvm_s390_available_subfunc.pcc);
281 	}
282 	if (test_facility(57)) /* MSA5 */
283 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284 			      kvm_s390_available_subfunc.ppno);
285 
286 	if (test_facility(146)) /* MSA8 */
287 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288 			      kvm_s390_available_subfunc.kma);
289 
290 	if (MACHINE_HAS_ESOP)
291 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292 	/*
293 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295 	 */
296 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297 	    !test_facility(3) || !nested)
298 		return;
299 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300 	if (sclp.has_64bscao)
301 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302 	if (sclp.has_siif)
303 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304 	if (sclp.has_gpere)
305 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306 	if (sclp.has_gsls)
307 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308 	if (sclp.has_ib)
309 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310 	if (sclp.has_cei)
311 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312 	if (sclp.has_ibs)
313 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314 	if (sclp.has_kss)
315 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316 	/*
317 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318 	 * all skey handling functions read/set the skey from the PGSTE
319 	 * instead of the real storage key.
320 	 *
321 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322 	 * pages being detected as preserved although they are resident.
323 	 *
324 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326 	 *
327 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330 	 *
331 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332 	 * cannot easily shadow the SCA because of the ipte lock.
333 	 */
334 }
335 
336 int kvm_arch_init(void *opaque)
337 {
338 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339 	if (!kvm_s390_dbf)
340 		return -ENOMEM;
341 
342 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343 		debug_unregister(kvm_s390_dbf);
344 		return -ENOMEM;
345 	}
346 
347 	kvm_s390_cpu_feat_init();
348 
349 	/* Register floating interrupt controller interface. */
350 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352 
353 void kvm_arch_exit(void)
354 {
355 	debug_unregister(kvm_s390_dbf);
356 }
357 
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360 			unsigned int ioctl, unsigned long arg)
361 {
362 	if (ioctl == KVM_S390_ENABLE_SIE)
363 		return s390_enable_sie();
364 	return -EINVAL;
365 }
366 
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369 	int r;
370 
371 	switch (ext) {
372 	case KVM_CAP_S390_PSW:
373 	case KVM_CAP_S390_GMAP:
374 	case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376 	case KVM_CAP_S390_UCONTROL:
377 #endif
378 	case KVM_CAP_ASYNC_PF:
379 	case KVM_CAP_SYNC_REGS:
380 	case KVM_CAP_ONE_REG:
381 	case KVM_CAP_ENABLE_CAP:
382 	case KVM_CAP_S390_CSS_SUPPORT:
383 	case KVM_CAP_IOEVENTFD:
384 	case KVM_CAP_DEVICE_CTRL:
385 	case KVM_CAP_ENABLE_CAP_VM:
386 	case KVM_CAP_S390_IRQCHIP:
387 	case KVM_CAP_VM_ATTRIBUTES:
388 	case KVM_CAP_MP_STATE:
389 	case KVM_CAP_IMMEDIATE_EXIT:
390 	case KVM_CAP_S390_INJECT_IRQ:
391 	case KVM_CAP_S390_USER_SIGP:
392 	case KVM_CAP_S390_USER_STSI:
393 	case KVM_CAP_S390_SKEYS:
394 	case KVM_CAP_S390_IRQ_STATE:
395 	case KVM_CAP_S390_USER_INSTR0:
396 	case KVM_CAP_S390_CMMA_MIGRATION:
397 	case KVM_CAP_S390_AIS:
398 		r = 1;
399 		break;
400 	case KVM_CAP_S390_MEM_OP:
401 		r = MEM_OP_MAX_SIZE;
402 		break;
403 	case KVM_CAP_NR_VCPUS:
404 	case KVM_CAP_MAX_VCPUS:
405 		r = KVM_S390_BSCA_CPU_SLOTS;
406 		if (!kvm_s390_use_sca_entries())
407 			r = KVM_MAX_VCPUS;
408 		else if (sclp.has_esca && sclp.has_64bscao)
409 			r = KVM_S390_ESCA_CPU_SLOTS;
410 		break;
411 	case KVM_CAP_NR_MEMSLOTS:
412 		r = KVM_USER_MEM_SLOTS;
413 		break;
414 	case KVM_CAP_S390_COW:
415 		r = MACHINE_HAS_ESOP;
416 		break;
417 	case KVM_CAP_S390_VECTOR_REGISTERS:
418 		r = MACHINE_HAS_VX;
419 		break;
420 	case KVM_CAP_S390_RI:
421 		r = test_facility(64);
422 		break;
423 	case KVM_CAP_S390_GS:
424 		r = test_facility(133);
425 		break;
426 	default:
427 		r = 0;
428 	}
429 	return r;
430 }
431 
432 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
433 					struct kvm_memory_slot *memslot)
434 {
435 	gfn_t cur_gfn, last_gfn;
436 	unsigned long address;
437 	struct gmap *gmap = kvm->arch.gmap;
438 
439 	/* Loop over all guest pages */
440 	last_gfn = memslot->base_gfn + memslot->npages;
441 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
442 		address = gfn_to_hva_memslot(memslot, cur_gfn);
443 
444 		if (test_and_clear_guest_dirty(gmap->mm, address))
445 			mark_page_dirty(kvm, cur_gfn);
446 		if (fatal_signal_pending(current))
447 			return;
448 		cond_resched();
449 	}
450 }
451 
452 /* Section: vm related */
453 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
454 
455 /*
456  * Get (and clear) the dirty memory log for a memory slot.
457  */
458 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
459 			       struct kvm_dirty_log *log)
460 {
461 	int r;
462 	unsigned long n;
463 	struct kvm_memslots *slots;
464 	struct kvm_memory_slot *memslot;
465 	int is_dirty = 0;
466 
467 	if (kvm_is_ucontrol(kvm))
468 		return -EINVAL;
469 
470 	mutex_lock(&kvm->slots_lock);
471 
472 	r = -EINVAL;
473 	if (log->slot >= KVM_USER_MEM_SLOTS)
474 		goto out;
475 
476 	slots = kvm_memslots(kvm);
477 	memslot = id_to_memslot(slots, log->slot);
478 	r = -ENOENT;
479 	if (!memslot->dirty_bitmap)
480 		goto out;
481 
482 	kvm_s390_sync_dirty_log(kvm, memslot);
483 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
484 	if (r)
485 		goto out;
486 
487 	/* Clear the dirty log */
488 	if (is_dirty) {
489 		n = kvm_dirty_bitmap_bytes(memslot);
490 		memset(memslot->dirty_bitmap, 0, n);
491 	}
492 	r = 0;
493 out:
494 	mutex_unlock(&kvm->slots_lock);
495 	return r;
496 }
497 
498 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
499 {
500 	unsigned int i;
501 	struct kvm_vcpu *vcpu;
502 
503 	kvm_for_each_vcpu(i, vcpu, kvm) {
504 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
505 	}
506 }
507 
508 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
509 {
510 	int r;
511 
512 	if (cap->flags)
513 		return -EINVAL;
514 
515 	switch (cap->cap) {
516 	case KVM_CAP_S390_IRQCHIP:
517 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
518 		kvm->arch.use_irqchip = 1;
519 		r = 0;
520 		break;
521 	case KVM_CAP_S390_USER_SIGP:
522 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
523 		kvm->arch.user_sigp = 1;
524 		r = 0;
525 		break;
526 	case KVM_CAP_S390_VECTOR_REGISTERS:
527 		mutex_lock(&kvm->lock);
528 		if (kvm->created_vcpus) {
529 			r = -EBUSY;
530 		} else if (MACHINE_HAS_VX) {
531 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
532 			set_kvm_facility(kvm->arch.model.fac_list, 129);
533 			if (test_facility(134)) {
534 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
535 				set_kvm_facility(kvm->arch.model.fac_list, 134);
536 			}
537 			if (test_facility(135)) {
538 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
539 				set_kvm_facility(kvm->arch.model.fac_list, 135);
540 			}
541 			r = 0;
542 		} else
543 			r = -EINVAL;
544 		mutex_unlock(&kvm->lock);
545 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
546 			 r ? "(not available)" : "(success)");
547 		break;
548 	case KVM_CAP_S390_RI:
549 		r = -EINVAL;
550 		mutex_lock(&kvm->lock);
551 		if (kvm->created_vcpus) {
552 			r = -EBUSY;
553 		} else if (test_facility(64)) {
554 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
555 			set_kvm_facility(kvm->arch.model.fac_list, 64);
556 			r = 0;
557 		}
558 		mutex_unlock(&kvm->lock);
559 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
560 			 r ? "(not available)" : "(success)");
561 		break;
562 	case KVM_CAP_S390_AIS:
563 		mutex_lock(&kvm->lock);
564 		if (kvm->created_vcpus) {
565 			r = -EBUSY;
566 		} else {
567 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
568 			set_kvm_facility(kvm->arch.model.fac_list, 72);
569 			r = 0;
570 		}
571 		mutex_unlock(&kvm->lock);
572 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
573 			 r ? "(not available)" : "(success)");
574 		break;
575 	case KVM_CAP_S390_GS:
576 		r = -EINVAL;
577 		mutex_lock(&kvm->lock);
578 		if (atomic_read(&kvm->online_vcpus)) {
579 			r = -EBUSY;
580 		} else if (test_facility(133)) {
581 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
582 			set_kvm_facility(kvm->arch.model.fac_list, 133);
583 			r = 0;
584 		}
585 		mutex_unlock(&kvm->lock);
586 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
587 			 r ? "(not available)" : "(success)");
588 		break;
589 	case KVM_CAP_S390_USER_STSI:
590 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
591 		kvm->arch.user_stsi = 1;
592 		r = 0;
593 		break;
594 	case KVM_CAP_S390_USER_INSTR0:
595 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
596 		kvm->arch.user_instr0 = 1;
597 		icpt_operexc_on_all_vcpus(kvm);
598 		r = 0;
599 		break;
600 	default:
601 		r = -EINVAL;
602 		break;
603 	}
604 	return r;
605 }
606 
607 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609 	int ret;
610 
611 	switch (attr->attr) {
612 	case KVM_S390_VM_MEM_LIMIT_SIZE:
613 		ret = 0;
614 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
615 			 kvm->arch.mem_limit);
616 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
617 			ret = -EFAULT;
618 		break;
619 	default:
620 		ret = -ENXIO;
621 		break;
622 	}
623 	return ret;
624 }
625 
626 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
627 {
628 	int ret;
629 	unsigned int idx;
630 	switch (attr->attr) {
631 	case KVM_S390_VM_MEM_ENABLE_CMMA:
632 		ret = -ENXIO;
633 		if (!sclp.has_cmma)
634 			break;
635 
636 		ret = -EBUSY;
637 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
638 		mutex_lock(&kvm->lock);
639 		if (!kvm->created_vcpus) {
640 			kvm->arch.use_cmma = 1;
641 			ret = 0;
642 		}
643 		mutex_unlock(&kvm->lock);
644 		break;
645 	case KVM_S390_VM_MEM_CLR_CMMA:
646 		ret = -ENXIO;
647 		if (!sclp.has_cmma)
648 			break;
649 		ret = -EINVAL;
650 		if (!kvm->arch.use_cmma)
651 			break;
652 
653 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
654 		mutex_lock(&kvm->lock);
655 		idx = srcu_read_lock(&kvm->srcu);
656 		s390_reset_cmma(kvm->arch.gmap->mm);
657 		srcu_read_unlock(&kvm->srcu, idx);
658 		mutex_unlock(&kvm->lock);
659 		ret = 0;
660 		break;
661 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
662 		unsigned long new_limit;
663 
664 		if (kvm_is_ucontrol(kvm))
665 			return -EINVAL;
666 
667 		if (get_user(new_limit, (u64 __user *)attr->addr))
668 			return -EFAULT;
669 
670 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
671 		    new_limit > kvm->arch.mem_limit)
672 			return -E2BIG;
673 
674 		if (!new_limit)
675 			return -EINVAL;
676 
677 		/* gmap_create takes last usable address */
678 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
679 			new_limit -= 1;
680 
681 		ret = -EBUSY;
682 		mutex_lock(&kvm->lock);
683 		if (!kvm->created_vcpus) {
684 			/* gmap_create will round the limit up */
685 			struct gmap *new = gmap_create(current->mm, new_limit);
686 
687 			if (!new) {
688 				ret = -ENOMEM;
689 			} else {
690 				gmap_remove(kvm->arch.gmap);
691 				new->private = kvm;
692 				kvm->arch.gmap = new;
693 				ret = 0;
694 			}
695 		}
696 		mutex_unlock(&kvm->lock);
697 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
698 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
699 			 (void *) kvm->arch.gmap->asce);
700 		break;
701 	}
702 	default:
703 		ret = -ENXIO;
704 		break;
705 	}
706 	return ret;
707 }
708 
709 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
710 
711 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
712 {
713 	struct kvm_vcpu *vcpu;
714 	int i;
715 
716 	if (!test_kvm_facility(kvm, 76))
717 		return -EINVAL;
718 
719 	mutex_lock(&kvm->lock);
720 	switch (attr->attr) {
721 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
722 		get_random_bytes(
723 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
724 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
725 		kvm->arch.crypto.aes_kw = 1;
726 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
727 		break;
728 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
729 		get_random_bytes(
730 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
731 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
732 		kvm->arch.crypto.dea_kw = 1;
733 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
734 		break;
735 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
736 		kvm->arch.crypto.aes_kw = 0;
737 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
738 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
739 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
740 		break;
741 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
742 		kvm->arch.crypto.dea_kw = 0;
743 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
744 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
745 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
746 		break;
747 	default:
748 		mutex_unlock(&kvm->lock);
749 		return -ENXIO;
750 	}
751 
752 	kvm_for_each_vcpu(i, vcpu, kvm) {
753 		kvm_s390_vcpu_crypto_setup(vcpu);
754 		exit_sie(vcpu);
755 	}
756 	mutex_unlock(&kvm->lock);
757 	return 0;
758 }
759 
760 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
761 {
762 	int cx;
763 	struct kvm_vcpu *vcpu;
764 
765 	kvm_for_each_vcpu(cx, vcpu, kvm)
766 		kvm_s390_sync_request(req, vcpu);
767 }
768 
769 /*
770  * Must be called with kvm->srcu held to avoid races on memslots, and with
771  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
772  */
773 static int kvm_s390_vm_start_migration(struct kvm *kvm)
774 {
775 	struct kvm_s390_migration_state *mgs;
776 	struct kvm_memory_slot *ms;
777 	/* should be the only one */
778 	struct kvm_memslots *slots;
779 	unsigned long ram_pages;
780 	int slotnr;
781 
782 	/* migration mode already enabled */
783 	if (kvm->arch.migration_state)
784 		return 0;
785 
786 	slots = kvm_memslots(kvm);
787 	if (!slots || !slots->used_slots)
788 		return -EINVAL;
789 
790 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
791 	if (!mgs)
792 		return -ENOMEM;
793 	kvm->arch.migration_state = mgs;
794 
795 	if (kvm->arch.use_cmma) {
796 		/*
797 		 * Get the last slot. They should be sorted by base_gfn, so the
798 		 * last slot is also the one at the end of the address space.
799 		 * We have verified above that at least one slot is present.
800 		 */
801 		ms = slots->memslots + slots->used_slots - 1;
802 		/* round up so we only use full longs */
803 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
804 		/* allocate enough bytes to store all the bits */
805 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
806 		if (!mgs->pgste_bitmap) {
807 			kfree(mgs);
808 			kvm->arch.migration_state = NULL;
809 			return -ENOMEM;
810 		}
811 
812 		mgs->bitmap_size = ram_pages;
813 		atomic64_set(&mgs->dirty_pages, ram_pages);
814 		/* mark all the pages in active slots as dirty */
815 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
816 			ms = slots->memslots + slotnr;
817 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
818 		}
819 
820 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
821 	}
822 	return 0;
823 }
824 
825 /*
826  * Must be called with kvm->lock to avoid races with ourselves and
827  * kvm_s390_vm_start_migration.
828  */
829 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
830 {
831 	struct kvm_s390_migration_state *mgs;
832 
833 	/* migration mode already disabled */
834 	if (!kvm->arch.migration_state)
835 		return 0;
836 	mgs = kvm->arch.migration_state;
837 	kvm->arch.migration_state = NULL;
838 
839 	if (kvm->arch.use_cmma) {
840 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
841 		vfree(mgs->pgste_bitmap);
842 	}
843 	kfree(mgs);
844 	return 0;
845 }
846 
847 static int kvm_s390_vm_set_migration(struct kvm *kvm,
848 				     struct kvm_device_attr *attr)
849 {
850 	int idx, res = -ENXIO;
851 
852 	mutex_lock(&kvm->lock);
853 	switch (attr->attr) {
854 	case KVM_S390_VM_MIGRATION_START:
855 		idx = srcu_read_lock(&kvm->srcu);
856 		res = kvm_s390_vm_start_migration(kvm);
857 		srcu_read_unlock(&kvm->srcu, idx);
858 		break;
859 	case KVM_S390_VM_MIGRATION_STOP:
860 		res = kvm_s390_vm_stop_migration(kvm);
861 		break;
862 	default:
863 		break;
864 	}
865 	mutex_unlock(&kvm->lock);
866 
867 	return res;
868 }
869 
870 static int kvm_s390_vm_get_migration(struct kvm *kvm,
871 				     struct kvm_device_attr *attr)
872 {
873 	u64 mig = (kvm->arch.migration_state != NULL);
874 
875 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
876 		return -ENXIO;
877 
878 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
879 		return -EFAULT;
880 	return 0;
881 }
882 
883 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
884 {
885 	struct kvm_s390_vm_tod_clock gtod;
886 
887 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
888 		return -EFAULT;
889 
890 	if (test_kvm_facility(kvm, 139))
891 		kvm_s390_set_tod_clock_ext(kvm, &gtod);
892 	else if (gtod.epoch_idx == 0)
893 		kvm_s390_set_tod_clock(kvm, gtod.tod);
894 	else
895 		return -EINVAL;
896 
897 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
898 		gtod.epoch_idx, gtod.tod);
899 
900 	return 0;
901 }
902 
903 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
904 {
905 	u8 gtod_high;
906 
907 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
908 					   sizeof(gtod_high)))
909 		return -EFAULT;
910 
911 	if (gtod_high != 0)
912 		return -EINVAL;
913 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
914 
915 	return 0;
916 }
917 
918 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
919 {
920 	u64 gtod;
921 
922 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
923 		return -EFAULT;
924 
925 	kvm_s390_set_tod_clock(kvm, gtod);
926 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
927 	return 0;
928 }
929 
930 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
931 {
932 	int ret;
933 
934 	if (attr->flags)
935 		return -EINVAL;
936 
937 	switch (attr->attr) {
938 	case KVM_S390_VM_TOD_EXT:
939 		ret = kvm_s390_set_tod_ext(kvm, attr);
940 		break;
941 	case KVM_S390_VM_TOD_HIGH:
942 		ret = kvm_s390_set_tod_high(kvm, attr);
943 		break;
944 	case KVM_S390_VM_TOD_LOW:
945 		ret = kvm_s390_set_tod_low(kvm, attr);
946 		break;
947 	default:
948 		ret = -ENXIO;
949 		break;
950 	}
951 	return ret;
952 }
953 
954 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
955 					struct kvm_s390_vm_tod_clock *gtod)
956 {
957 	struct kvm_s390_tod_clock_ext htod;
958 
959 	preempt_disable();
960 
961 	get_tod_clock_ext((char *)&htod);
962 
963 	gtod->tod = htod.tod + kvm->arch.epoch;
964 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
965 
966 	if (gtod->tod < htod.tod)
967 		gtod->epoch_idx += 1;
968 
969 	preempt_enable();
970 }
971 
972 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
973 {
974 	struct kvm_s390_vm_tod_clock gtod;
975 
976 	memset(&gtod, 0, sizeof(gtod));
977 
978 	if (test_kvm_facility(kvm, 139))
979 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
980 	else
981 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
982 
983 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
984 		return -EFAULT;
985 
986 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
987 		gtod.epoch_idx, gtod.tod);
988 	return 0;
989 }
990 
991 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
992 {
993 	u8 gtod_high = 0;
994 
995 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
996 					 sizeof(gtod_high)))
997 		return -EFAULT;
998 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
999 
1000 	return 0;
1001 }
1002 
1003 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1004 {
1005 	u64 gtod;
1006 
1007 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1008 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1009 		return -EFAULT;
1010 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1011 
1012 	return 0;
1013 }
1014 
1015 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017 	int ret;
1018 
1019 	if (attr->flags)
1020 		return -EINVAL;
1021 
1022 	switch (attr->attr) {
1023 	case KVM_S390_VM_TOD_EXT:
1024 		ret = kvm_s390_get_tod_ext(kvm, attr);
1025 		break;
1026 	case KVM_S390_VM_TOD_HIGH:
1027 		ret = kvm_s390_get_tod_high(kvm, attr);
1028 		break;
1029 	case KVM_S390_VM_TOD_LOW:
1030 		ret = kvm_s390_get_tod_low(kvm, attr);
1031 		break;
1032 	default:
1033 		ret = -ENXIO;
1034 		break;
1035 	}
1036 	return ret;
1037 }
1038 
1039 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041 	struct kvm_s390_vm_cpu_processor *proc;
1042 	u16 lowest_ibc, unblocked_ibc;
1043 	int ret = 0;
1044 
1045 	mutex_lock(&kvm->lock);
1046 	if (kvm->created_vcpus) {
1047 		ret = -EBUSY;
1048 		goto out;
1049 	}
1050 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1051 	if (!proc) {
1052 		ret = -ENOMEM;
1053 		goto out;
1054 	}
1055 	if (!copy_from_user(proc, (void __user *)attr->addr,
1056 			    sizeof(*proc))) {
1057 		kvm->arch.model.cpuid = proc->cpuid;
1058 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1059 		unblocked_ibc = sclp.ibc & 0xfff;
1060 		if (lowest_ibc && proc->ibc) {
1061 			if (proc->ibc > unblocked_ibc)
1062 				kvm->arch.model.ibc = unblocked_ibc;
1063 			else if (proc->ibc < lowest_ibc)
1064 				kvm->arch.model.ibc = lowest_ibc;
1065 			else
1066 				kvm->arch.model.ibc = proc->ibc;
1067 		}
1068 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1069 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1070 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1071 			 kvm->arch.model.ibc,
1072 			 kvm->arch.model.cpuid);
1073 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1074 			 kvm->arch.model.fac_list[0],
1075 			 kvm->arch.model.fac_list[1],
1076 			 kvm->arch.model.fac_list[2]);
1077 	} else
1078 		ret = -EFAULT;
1079 	kfree(proc);
1080 out:
1081 	mutex_unlock(&kvm->lock);
1082 	return ret;
1083 }
1084 
1085 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1086 				       struct kvm_device_attr *attr)
1087 {
1088 	struct kvm_s390_vm_cpu_feat data;
1089 	int ret = -EBUSY;
1090 
1091 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1092 		return -EFAULT;
1093 	if (!bitmap_subset((unsigned long *) data.feat,
1094 			   kvm_s390_available_cpu_feat,
1095 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1096 		return -EINVAL;
1097 
1098 	mutex_lock(&kvm->lock);
1099 	if (!atomic_read(&kvm->online_vcpus)) {
1100 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1102 		ret = 0;
1103 	}
1104 	mutex_unlock(&kvm->lock);
1105 	return ret;
1106 }
1107 
1108 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1109 					  struct kvm_device_attr *attr)
1110 {
1111 	/*
1112 	 * Once supported by kernel + hw, we have to store the subfunctions
1113 	 * in kvm->arch and remember that user space configured them.
1114 	 */
1115 	return -ENXIO;
1116 }
1117 
1118 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1119 {
1120 	int ret = -ENXIO;
1121 
1122 	switch (attr->attr) {
1123 	case KVM_S390_VM_CPU_PROCESSOR:
1124 		ret = kvm_s390_set_processor(kvm, attr);
1125 		break;
1126 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1127 		ret = kvm_s390_set_processor_feat(kvm, attr);
1128 		break;
1129 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1130 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1131 		break;
1132 	}
1133 	return ret;
1134 }
1135 
1136 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1137 {
1138 	struct kvm_s390_vm_cpu_processor *proc;
1139 	int ret = 0;
1140 
1141 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1142 	if (!proc) {
1143 		ret = -ENOMEM;
1144 		goto out;
1145 	}
1146 	proc->cpuid = kvm->arch.model.cpuid;
1147 	proc->ibc = kvm->arch.model.ibc;
1148 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1149 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1150 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1151 		 kvm->arch.model.ibc,
1152 		 kvm->arch.model.cpuid);
1153 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1154 		 kvm->arch.model.fac_list[0],
1155 		 kvm->arch.model.fac_list[1],
1156 		 kvm->arch.model.fac_list[2]);
1157 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1158 		ret = -EFAULT;
1159 	kfree(proc);
1160 out:
1161 	return ret;
1162 }
1163 
1164 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1165 {
1166 	struct kvm_s390_vm_cpu_machine *mach;
1167 	int ret = 0;
1168 
1169 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1170 	if (!mach) {
1171 		ret = -ENOMEM;
1172 		goto out;
1173 	}
1174 	get_cpu_id((struct cpuid *) &mach->cpuid);
1175 	mach->ibc = sclp.ibc;
1176 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1177 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1178 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1179 	       sizeof(S390_lowcore.stfle_fac_list));
1180 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1181 		 kvm->arch.model.ibc,
1182 		 kvm->arch.model.cpuid);
1183 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1184 		 mach->fac_mask[0],
1185 		 mach->fac_mask[1],
1186 		 mach->fac_mask[2]);
1187 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1188 		 mach->fac_list[0],
1189 		 mach->fac_list[1],
1190 		 mach->fac_list[2]);
1191 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1192 		ret = -EFAULT;
1193 	kfree(mach);
1194 out:
1195 	return ret;
1196 }
1197 
1198 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1199 				       struct kvm_device_attr *attr)
1200 {
1201 	struct kvm_s390_vm_cpu_feat data;
1202 
1203 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1204 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1205 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1206 		return -EFAULT;
1207 	return 0;
1208 }
1209 
1210 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1211 				     struct kvm_device_attr *attr)
1212 {
1213 	struct kvm_s390_vm_cpu_feat data;
1214 
1215 	bitmap_copy((unsigned long *) data.feat,
1216 		    kvm_s390_available_cpu_feat,
1217 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1218 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1219 		return -EFAULT;
1220 	return 0;
1221 }
1222 
1223 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1224 					  struct kvm_device_attr *attr)
1225 {
1226 	/*
1227 	 * Once we can actually configure subfunctions (kernel + hw support),
1228 	 * we have to check if they were already set by user space, if so copy
1229 	 * them from kvm->arch.
1230 	 */
1231 	return -ENXIO;
1232 }
1233 
1234 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1235 					struct kvm_device_attr *attr)
1236 {
1237 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1238 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1239 		return -EFAULT;
1240 	return 0;
1241 }
1242 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244 	int ret = -ENXIO;
1245 
1246 	switch (attr->attr) {
1247 	case KVM_S390_VM_CPU_PROCESSOR:
1248 		ret = kvm_s390_get_processor(kvm, attr);
1249 		break;
1250 	case KVM_S390_VM_CPU_MACHINE:
1251 		ret = kvm_s390_get_machine(kvm, attr);
1252 		break;
1253 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1254 		ret = kvm_s390_get_processor_feat(kvm, attr);
1255 		break;
1256 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1257 		ret = kvm_s390_get_machine_feat(kvm, attr);
1258 		break;
1259 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1260 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1263 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1264 		break;
1265 	}
1266 	return ret;
1267 }
1268 
1269 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1270 {
1271 	int ret;
1272 
1273 	switch (attr->group) {
1274 	case KVM_S390_VM_MEM_CTRL:
1275 		ret = kvm_s390_set_mem_control(kvm, attr);
1276 		break;
1277 	case KVM_S390_VM_TOD:
1278 		ret = kvm_s390_set_tod(kvm, attr);
1279 		break;
1280 	case KVM_S390_VM_CPU_MODEL:
1281 		ret = kvm_s390_set_cpu_model(kvm, attr);
1282 		break;
1283 	case KVM_S390_VM_CRYPTO:
1284 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1285 		break;
1286 	case KVM_S390_VM_MIGRATION:
1287 		ret = kvm_s390_vm_set_migration(kvm, attr);
1288 		break;
1289 	default:
1290 		ret = -ENXIO;
1291 		break;
1292 	}
1293 
1294 	return ret;
1295 }
1296 
1297 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1298 {
1299 	int ret;
1300 
1301 	switch (attr->group) {
1302 	case KVM_S390_VM_MEM_CTRL:
1303 		ret = kvm_s390_get_mem_control(kvm, attr);
1304 		break;
1305 	case KVM_S390_VM_TOD:
1306 		ret = kvm_s390_get_tod(kvm, attr);
1307 		break;
1308 	case KVM_S390_VM_CPU_MODEL:
1309 		ret = kvm_s390_get_cpu_model(kvm, attr);
1310 		break;
1311 	case KVM_S390_VM_MIGRATION:
1312 		ret = kvm_s390_vm_get_migration(kvm, attr);
1313 		break;
1314 	default:
1315 		ret = -ENXIO;
1316 		break;
1317 	}
1318 
1319 	return ret;
1320 }
1321 
1322 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1323 {
1324 	int ret;
1325 
1326 	switch (attr->group) {
1327 	case KVM_S390_VM_MEM_CTRL:
1328 		switch (attr->attr) {
1329 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1330 		case KVM_S390_VM_MEM_CLR_CMMA:
1331 			ret = sclp.has_cmma ? 0 : -ENXIO;
1332 			break;
1333 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1334 			ret = 0;
1335 			break;
1336 		default:
1337 			ret = -ENXIO;
1338 			break;
1339 		}
1340 		break;
1341 	case KVM_S390_VM_TOD:
1342 		switch (attr->attr) {
1343 		case KVM_S390_VM_TOD_LOW:
1344 		case KVM_S390_VM_TOD_HIGH:
1345 			ret = 0;
1346 			break;
1347 		default:
1348 			ret = -ENXIO;
1349 			break;
1350 		}
1351 		break;
1352 	case KVM_S390_VM_CPU_MODEL:
1353 		switch (attr->attr) {
1354 		case KVM_S390_VM_CPU_PROCESSOR:
1355 		case KVM_S390_VM_CPU_MACHINE:
1356 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1357 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1358 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1359 			ret = 0;
1360 			break;
1361 		/* configuring subfunctions is not supported yet */
1362 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1363 		default:
1364 			ret = -ENXIO;
1365 			break;
1366 		}
1367 		break;
1368 	case KVM_S390_VM_CRYPTO:
1369 		switch (attr->attr) {
1370 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1371 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1372 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1373 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1374 			ret = 0;
1375 			break;
1376 		default:
1377 			ret = -ENXIO;
1378 			break;
1379 		}
1380 		break;
1381 	case KVM_S390_VM_MIGRATION:
1382 		ret = 0;
1383 		break;
1384 	default:
1385 		ret = -ENXIO;
1386 		break;
1387 	}
1388 
1389 	return ret;
1390 }
1391 
1392 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1393 {
1394 	uint8_t *keys;
1395 	uint64_t hva;
1396 	int srcu_idx, i, r = 0;
1397 
1398 	if (args->flags != 0)
1399 		return -EINVAL;
1400 
1401 	/* Is this guest using storage keys? */
1402 	if (!mm_use_skey(current->mm))
1403 		return KVM_S390_GET_SKEYS_NONE;
1404 
1405 	/* Enforce sane limit on memory allocation */
1406 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1407 		return -EINVAL;
1408 
1409 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1410 	if (!keys)
1411 		return -ENOMEM;
1412 
1413 	down_read(&current->mm->mmap_sem);
1414 	srcu_idx = srcu_read_lock(&kvm->srcu);
1415 	for (i = 0; i < args->count; i++) {
1416 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1417 		if (kvm_is_error_hva(hva)) {
1418 			r = -EFAULT;
1419 			break;
1420 		}
1421 
1422 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1423 		if (r)
1424 			break;
1425 	}
1426 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1427 	up_read(&current->mm->mmap_sem);
1428 
1429 	if (!r) {
1430 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1431 				 sizeof(uint8_t) * args->count);
1432 		if (r)
1433 			r = -EFAULT;
1434 	}
1435 
1436 	kvfree(keys);
1437 	return r;
1438 }
1439 
1440 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1441 {
1442 	uint8_t *keys;
1443 	uint64_t hva;
1444 	int srcu_idx, i, r = 0;
1445 
1446 	if (args->flags != 0)
1447 		return -EINVAL;
1448 
1449 	/* Enforce sane limit on memory allocation */
1450 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1451 		return -EINVAL;
1452 
1453 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1454 	if (!keys)
1455 		return -ENOMEM;
1456 
1457 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1458 			   sizeof(uint8_t) * args->count);
1459 	if (r) {
1460 		r = -EFAULT;
1461 		goto out;
1462 	}
1463 
1464 	/* Enable storage key handling for the guest */
1465 	r = s390_enable_skey();
1466 	if (r)
1467 		goto out;
1468 
1469 	down_read(&current->mm->mmap_sem);
1470 	srcu_idx = srcu_read_lock(&kvm->srcu);
1471 	for (i = 0; i < args->count; i++) {
1472 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1473 		if (kvm_is_error_hva(hva)) {
1474 			r = -EFAULT;
1475 			break;
1476 		}
1477 
1478 		/* Lowest order bit is reserved */
1479 		if (keys[i] & 0x01) {
1480 			r = -EINVAL;
1481 			break;
1482 		}
1483 
1484 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1485 		if (r)
1486 			break;
1487 	}
1488 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1489 	up_read(&current->mm->mmap_sem);
1490 out:
1491 	kvfree(keys);
1492 	return r;
1493 }
1494 
1495 /*
1496  * Base address and length must be sent at the start of each block, therefore
1497  * it's cheaper to send some clean data, as long as it's less than the size of
1498  * two longs.
1499  */
1500 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1501 /* for consistency */
1502 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1503 
1504 /*
1505  * This function searches for the next page with dirty CMMA attributes, and
1506  * saves the attributes in the buffer up to either the end of the buffer or
1507  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1508  * no trailing clean bytes are saved.
1509  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1510  * output buffer will indicate 0 as length.
1511  */
1512 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1513 				  struct kvm_s390_cmma_log *args)
1514 {
1515 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1516 	unsigned long bufsize, hva, pgstev, i, next, cur;
1517 	int srcu_idx, peek, r = 0, rr;
1518 	u8 *res;
1519 
1520 	cur = args->start_gfn;
1521 	i = next = pgstev = 0;
1522 
1523 	if (unlikely(!kvm->arch.use_cmma))
1524 		return -ENXIO;
1525 	/* Invalid/unsupported flags were specified */
1526 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1527 		return -EINVAL;
1528 	/* Migration mode query, and we are not doing a migration */
1529 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1530 	if (!peek && !s)
1531 		return -EINVAL;
1532 	/* CMMA is disabled or was not used, or the buffer has length zero */
1533 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1534 	if (!bufsize || !kvm->mm->context.use_cmma) {
1535 		memset(args, 0, sizeof(*args));
1536 		return 0;
1537 	}
1538 
1539 	if (!peek) {
1540 		/* We are not peeking, and there are no dirty pages */
1541 		if (!atomic64_read(&s->dirty_pages)) {
1542 			memset(args, 0, sizeof(*args));
1543 			return 0;
1544 		}
1545 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1546 				    args->start_gfn);
1547 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1548 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1549 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1550 			memset(args, 0, sizeof(*args));
1551 			return 0;
1552 		}
1553 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1554 	}
1555 
1556 	res = vmalloc(bufsize);
1557 	if (!res)
1558 		return -ENOMEM;
1559 
1560 	args->start_gfn = cur;
1561 
1562 	down_read(&kvm->mm->mmap_sem);
1563 	srcu_idx = srcu_read_lock(&kvm->srcu);
1564 	while (i < bufsize) {
1565 		hva = gfn_to_hva(kvm, cur);
1566 		if (kvm_is_error_hva(hva)) {
1567 			r = -EFAULT;
1568 			break;
1569 		}
1570 		/* decrement only if we actually flipped the bit to 0 */
1571 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1572 			atomic64_dec(&s->dirty_pages);
1573 		r = get_pgste(kvm->mm, hva, &pgstev);
1574 		if (r < 0)
1575 			pgstev = 0;
1576 		/* save the value */
1577 		res[i++] = (pgstev >> 24) & 0x43;
1578 		/*
1579 		 * if the next bit is too far away, stop.
1580 		 * if we reached the previous "next", find the next one
1581 		 */
1582 		if (!peek) {
1583 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1584 				break;
1585 			if (cur == next)
1586 				next = find_next_bit(s->pgste_bitmap,
1587 						     s->bitmap_size, cur + 1);
1588 		/* reached the end of the bitmap or of the buffer, stop */
1589 			if ((next >= s->bitmap_size) ||
1590 			    (next >= args->start_gfn + bufsize))
1591 				break;
1592 		}
1593 		cur++;
1594 	}
1595 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1596 	up_read(&kvm->mm->mmap_sem);
1597 	args->count = i;
1598 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1599 
1600 	rr = copy_to_user((void __user *)args->values, res, args->count);
1601 	if (rr)
1602 		r = -EFAULT;
1603 
1604 	vfree(res);
1605 	return r;
1606 }
1607 
1608 /*
1609  * This function sets the CMMA attributes for the given pages. If the input
1610  * buffer has zero length, no action is taken, otherwise the attributes are
1611  * set and the mm->context.use_cmma flag is set.
1612  */
1613 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1614 				  const struct kvm_s390_cmma_log *args)
1615 {
1616 	unsigned long hva, mask, pgstev, i;
1617 	uint8_t *bits;
1618 	int srcu_idx, r = 0;
1619 
1620 	mask = args->mask;
1621 
1622 	if (!kvm->arch.use_cmma)
1623 		return -ENXIO;
1624 	/* invalid/unsupported flags */
1625 	if (args->flags != 0)
1626 		return -EINVAL;
1627 	/* Enforce sane limit on memory allocation */
1628 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1629 		return -EINVAL;
1630 	/* Nothing to do */
1631 	if (args->count == 0)
1632 		return 0;
1633 
1634 	bits = vmalloc(sizeof(*bits) * args->count);
1635 	if (!bits)
1636 		return -ENOMEM;
1637 
1638 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1639 	if (r) {
1640 		r = -EFAULT;
1641 		goto out;
1642 	}
1643 
1644 	down_read(&kvm->mm->mmap_sem);
1645 	srcu_idx = srcu_read_lock(&kvm->srcu);
1646 	for (i = 0; i < args->count; i++) {
1647 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1648 		if (kvm_is_error_hva(hva)) {
1649 			r = -EFAULT;
1650 			break;
1651 		}
1652 
1653 		pgstev = bits[i];
1654 		pgstev = pgstev << 24;
1655 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1656 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1657 	}
1658 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1659 	up_read(&kvm->mm->mmap_sem);
1660 
1661 	if (!kvm->mm->context.use_cmma) {
1662 		down_write(&kvm->mm->mmap_sem);
1663 		kvm->mm->context.use_cmma = 1;
1664 		up_write(&kvm->mm->mmap_sem);
1665 	}
1666 out:
1667 	vfree(bits);
1668 	return r;
1669 }
1670 
1671 long kvm_arch_vm_ioctl(struct file *filp,
1672 		       unsigned int ioctl, unsigned long arg)
1673 {
1674 	struct kvm *kvm = filp->private_data;
1675 	void __user *argp = (void __user *)arg;
1676 	struct kvm_device_attr attr;
1677 	int r;
1678 
1679 	switch (ioctl) {
1680 	case KVM_S390_INTERRUPT: {
1681 		struct kvm_s390_interrupt s390int;
1682 
1683 		r = -EFAULT;
1684 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1685 			break;
1686 		r = kvm_s390_inject_vm(kvm, &s390int);
1687 		break;
1688 	}
1689 	case KVM_ENABLE_CAP: {
1690 		struct kvm_enable_cap cap;
1691 		r = -EFAULT;
1692 		if (copy_from_user(&cap, argp, sizeof(cap)))
1693 			break;
1694 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1695 		break;
1696 	}
1697 	case KVM_CREATE_IRQCHIP: {
1698 		struct kvm_irq_routing_entry routing;
1699 
1700 		r = -EINVAL;
1701 		if (kvm->arch.use_irqchip) {
1702 			/* Set up dummy routing. */
1703 			memset(&routing, 0, sizeof(routing));
1704 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1705 		}
1706 		break;
1707 	}
1708 	case KVM_SET_DEVICE_ATTR: {
1709 		r = -EFAULT;
1710 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1711 			break;
1712 		r = kvm_s390_vm_set_attr(kvm, &attr);
1713 		break;
1714 	}
1715 	case KVM_GET_DEVICE_ATTR: {
1716 		r = -EFAULT;
1717 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1718 			break;
1719 		r = kvm_s390_vm_get_attr(kvm, &attr);
1720 		break;
1721 	}
1722 	case KVM_HAS_DEVICE_ATTR: {
1723 		r = -EFAULT;
1724 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1725 			break;
1726 		r = kvm_s390_vm_has_attr(kvm, &attr);
1727 		break;
1728 	}
1729 	case KVM_S390_GET_SKEYS: {
1730 		struct kvm_s390_skeys args;
1731 
1732 		r = -EFAULT;
1733 		if (copy_from_user(&args, argp,
1734 				   sizeof(struct kvm_s390_skeys)))
1735 			break;
1736 		r = kvm_s390_get_skeys(kvm, &args);
1737 		break;
1738 	}
1739 	case KVM_S390_SET_SKEYS: {
1740 		struct kvm_s390_skeys args;
1741 
1742 		r = -EFAULT;
1743 		if (copy_from_user(&args, argp,
1744 				   sizeof(struct kvm_s390_skeys)))
1745 			break;
1746 		r = kvm_s390_set_skeys(kvm, &args);
1747 		break;
1748 	}
1749 	case KVM_S390_GET_CMMA_BITS: {
1750 		struct kvm_s390_cmma_log args;
1751 
1752 		r = -EFAULT;
1753 		if (copy_from_user(&args, argp, sizeof(args)))
1754 			break;
1755 		r = kvm_s390_get_cmma_bits(kvm, &args);
1756 		if (!r) {
1757 			r = copy_to_user(argp, &args, sizeof(args));
1758 			if (r)
1759 				r = -EFAULT;
1760 		}
1761 		break;
1762 	}
1763 	case KVM_S390_SET_CMMA_BITS: {
1764 		struct kvm_s390_cmma_log args;
1765 
1766 		r = -EFAULT;
1767 		if (copy_from_user(&args, argp, sizeof(args)))
1768 			break;
1769 		r = kvm_s390_set_cmma_bits(kvm, &args);
1770 		break;
1771 	}
1772 	default:
1773 		r = -ENOTTY;
1774 	}
1775 
1776 	return r;
1777 }
1778 
1779 static int kvm_s390_query_ap_config(u8 *config)
1780 {
1781 	u32 fcn_code = 0x04000000UL;
1782 	u32 cc = 0;
1783 
1784 	memset(config, 0, 128);
1785 	asm volatile(
1786 		"lgr 0,%1\n"
1787 		"lgr 2,%2\n"
1788 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1789 		"0: ipm %0\n"
1790 		"srl %0,28\n"
1791 		"1:\n"
1792 		EX_TABLE(0b, 1b)
1793 		: "+r" (cc)
1794 		: "r" (fcn_code), "r" (config)
1795 		: "cc", "0", "2", "memory"
1796 	);
1797 
1798 	return cc;
1799 }
1800 
1801 static int kvm_s390_apxa_installed(void)
1802 {
1803 	u8 config[128];
1804 	int cc;
1805 
1806 	if (test_facility(12)) {
1807 		cc = kvm_s390_query_ap_config(config);
1808 
1809 		if (cc)
1810 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1811 		else
1812 			return config[0] & 0x40;
1813 	}
1814 
1815 	return 0;
1816 }
1817 
1818 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1819 {
1820 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1821 
1822 	if (kvm_s390_apxa_installed())
1823 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1824 	else
1825 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1826 }
1827 
1828 static u64 kvm_s390_get_initial_cpuid(void)
1829 {
1830 	struct cpuid cpuid;
1831 
1832 	get_cpu_id(&cpuid);
1833 	cpuid.version = 0xff;
1834 	return *((u64 *) &cpuid);
1835 }
1836 
1837 static void kvm_s390_crypto_init(struct kvm *kvm)
1838 {
1839 	if (!test_kvm_facility(kvm, 76))
1840 		return;
1841 
1842 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1843 	kvm_s390_set_crycb_format(kvm);
1844 
1845 	/* Enable AES/DEA protected key functions by default */
1846 	kvm->arch.crypto.aes_kw = 1;
1847 	kvm->arch.crypto.dea_kw = 1;
1848 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1849 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1850 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1851 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1852 }
1853 
1854 static void sca_dispose(struct kvm *kvm)
1855 {
1856 	if (kvm->arch.use_esca)
1857 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1858 	else
1859 		free_page((unsigned long)(kvm->arch.sca));
1860 	kvm->arch.sca = NULL;
1861 }
1862 
1863 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1864 {
1865 	gfp_t alloc_flags = GFP_KERNEL;
1866 	int i, rc;
1867 	char debug_name[16];
1868 	static unsigned long sca_offset;
1869 
1870 	rc = -EINVAL;
1871 #ifdef CONFIG_KVM_S390_UCONTROL
1872 	if (type & ~KVM_VM_S390_UCONTROL)
1873 		goto out_err;
1874 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1875 		goto out_err;
1876 #else
1877 	if (type)
1878 		goto out_err;
1879 #endif
1880 
1881 	rc = s390_enable_sie();
1882 	if (rc)
1883 		goto out_err;
1884 
1885 	rc = -ENOMEM;
1886 
1887 	kvm->arch.use_esca = 0; /* start with basic SCA */
1888 	if (!sclp.has_64bscao)
1889 		alloc_flags |= GFP_DMA;
1890 	rwlock_init(&kvm->arch.sca_lock);
1891 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1892 	if (!kvm->arch.sca)
1893 		goto out_err;
1894 	spin_lock(&kvm_lock);
1895 	sca_offset += 16;
1896 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1897 		sca_offset = 0;
1898 	kvm->arch.sca = (struct bsca_block *)
1899 			((char *) kvm->arch.sca + sca_offset);
1900 	spin_unlock(&kvm_lock);
1901 
1902 	sprintf(debug_name, "kvm-%u", current->pid);
1903 
1904 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1905 	if (!kvm->arch.dbf)
1906 		goto out_err;
1907 
1908 	kvm->arch.sie_page2 =
1909 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1910 	if (!kvm->arch.sie_page2)
1911 		goto out_err;
1912 
1913 	/* Populate the facility mask initially. */
1914 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1915 	       sizeof(S390_lowcore.stfle_fac_list));
1916 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1917 		if (i < kvm_s390_fac_list_mask_size())
1918 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1919 		else
1920 			kvm->arch.model.fac_mask[i] = 0UL;
1921 	}
1922 
1923 	/* Populate the facility list initially. */
1924 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1925 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1926 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1927 
1928 	/* we are always in czam mode - even on pre z14 machines */
1929 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1930 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1931 	/* we emulate STHYI in kvm */
1932 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1933 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1934 	if (MACHINE_HAS_TLB_GUEST) {
1935 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1936 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1937 	}
1938 
1939 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1940 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1941 
1942 	kvm_s390_crypto_init(kvm);
1943 
1944 	mutex_init(&kvm->arch.float_int.ais_lock);
1945 	kvm->arch.float_int.simm = 0;
1946 	kvm->arch.float_int.nimm = 0;
1947 	spin_lock_init(&kvm->arch.float_int.lock);
1948 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1949 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1950 	init_waitqueue_head(&kvm->arch.ipte_wq);
1951 	mutex_init(&kvm->arch.ipte_mutex);
1952 
1953 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1954 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1955 
1956 	if (type & KVM_VM_S390_UCONTROL) {
1957 		kvm->arch.gmap = NULL;
1958 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1959 	} else {
1960 		if (sclp.hamax == U64_MAX)
1961 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1962 		else
1963 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1964 						    sclp.hamax + 1);
1965 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1966 		if (!kvm->arch.gmap)
1967 			goto out_err;
1968 		kvm->arch.gmap->private = kvm;
1969 		kvm->arch.gmap->pfault_enabled = 0;
1970 	}
1971 
1972 	kvm->arch.css_support = 0;
1973 	kvm->arch.use_irqchip = 0;
1974 	kvm->arch.epoch = 0;
1975 
1976 	spin_lock_init(&kvm->arch.start_stop_lock);
1977 	kvm_s390_vsie_init(kvm);
1978 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1979 
1980 	return 0;
1981 out_err:
1982 	free_page((unsigned long)kvm->arch.sie_page2);
1983 	debug_unregister(kvm->arch.dbf);
1984 	sca_dispose(kvm);
1985 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1986 	return rc;
1987 }
1988 
1989 bool kvm_arch_has_vcpu_debugfs(void)
1990 {
1991 	return false;
1992 }
1993 
1994 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1995 {
1996 	return 0;
1997 }
1998 
1999 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2000 {
2001 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2002 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2003 	kvm_s390_clear_local_irqs(vcpu);
2004 	kvm_clear_async_pf_completion_queue(vcpu);
2005 	if (!kvm_is_ucontrol(vcpu->kvm))
2006 		sca_del_vcpu(vcpu);
2007 
2008 	if (kvm_is_ucontrol(vcpu->kvm))
2009 		gmap_remove(vcpu->arch.gmap);
2010 
2011 	if (vcpu->kvm->arch.use_cmma)
2012 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2013 	free_page((unsigned long)(vcpu->arch.sie_block));
2014 
2015 	kvm_vcpu_uninit(vcpu);
2016 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2017 }
2018 
2019 static void kvm_free_vcpus(struct kvm *kvm)
2020 {
2021 	unsigned int i;
2022 	struct kvm_vcpu *vcpu;
2023 
2024 	kvm_for_each_vcpu(i, vcpu, kvm)
2025 		kvm_arch_vcpu_destroy(vcpu);
2026 
2027 	mutex_lock(&kvm->lock);
2028 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2029 		kvm->vcpus[i] = NULL;
2030 
2031 	atomic_set(&kvm->online_vcpus, 0);
2032 	mutex_unlock(&kvm->lock);
2033 }
2034 
2035 void kvm_arch_destroy_vm(struct kvm *kvm)
2036 {
2037 	kvm_free_vcpus(kvm);
2038 	sca_dispose(kvm);
2039 	debug_unregister(kvm->arch.dbf);
2040 	free_page((unsigned long)kvm->arch.sie_page2);
2041 	if (!kvm_is_ucontrol(kvm))
2042 		gmap_remove(kvm->arch.gmap);
2043 	kvm_s390_destroy_adapters(kvm);
2044 	kvm_s390_clear_float_irqs(kvm);
2045 	kvm_s390_vsie_destroy(kvm);
2046 	if (kvm->arch.migration_state) {
2047 		vfree(kvm->arch.migration_state->pgste_bitmap);
2048 		kfree(kvm->arch.migration_state);
2049 	}
2050 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2051 }
2052 
2053 /* Section: vcpu related */
2054 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2055 {
2056 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2057 	if (!vcpu->arch.gmap)
2058 		return -ENOMEM;
2059 	vcpu->arch.gmap->private = vcpu->kvm;
2060 
2061 	return 0;
2062 }
2063 
2064 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2065 {
2066 	if (!kvm_s390_use_sca_entries())
2067 		return;
2068 	read_lock(&vcpu->kvm->arch.sca_lock);
2069 	if (vcpu->kvm->arch.use_esca) {
2070 		struct esca_block *sca = vcpu->kvm->arch.sca;
2071 
2072 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2073 		sca->cpu[vcpu->vcpu_id].sda = 0;
2074 	} else {
2075 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2076 
2077 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2078 		sca->cpu[vcpu->vcpu_id].sda = 0;
2079 	}
2080 	read_unlock(&vcpu->kvm->arch.sca_lock);
2081 }
2082 
2083 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2084 {
2085 	if (!kvm_s390_use_sca_entries()) {
2086 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2087 
2088 		/* we still need the basic sca for the ipte control */
2089 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2090 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2091 	}
2092 	read_lock(&vcpu->kvm->arch.sca_lock);
2093 	if (vcpu->kvm->arch.use_esca) {
2094 		struct esca_block *sca = vcpu->kvm->arch.sca;
2095 
2096 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2097 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2098 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2099 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2100 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2101 	} else {
2102 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2103 
2104 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2105 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2106 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2107 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2108 	}
2109 	read_unlock(&vcpu->kvm->arch.sca_lock);
2110 }
2111 
2112 /* Basic SCA to Extended SCA data copy routines */
2113 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2114 {
2115 	d->sda = s->sda;
2116 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2117 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2118 }
2119 
2120 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2121 {
2122 	int i;
2123 
2124 	d->ipte_control = s->ipte_control;
2125 	d->mcn[0] = s->mcn;
2126 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2127 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2128 }
2129 
2130 static int sca_switch_to_extended(struct kvm *kvm)
2131 {
2132 	struct bsca_block *old_sca = kvm->arch.sca;
2133 	struct esca_block *new_sca;
2134 	struct kvm_vcpu *vcpu;
2135 	unsigned int vcpu_idx;
2136 	u32 scaol, scaoh;
2137 
2138 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2139 	if (!new_sca)
2140 		return -ENOMEM;
2141 
2142 	scaoh = (u32)((u64)(new_sca) >> 32);
2143 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2144 
2145 	kvm_s390_vcpu_block_all(kvm);
2146 	write_lock(&kvm->arch.sca_lock);
2147 
2148 	sca_copy_b_to_e(new_sca, old_sca);
2149 
2150 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2151 		vcpu->arch.sie_block->scaoh = scaoh;
2152 		vcpu->arch.sie_block->scaol = scaol;
2153 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2154 	}
2155 	kvm->arch.sca = new_sca;
2156 	kvm->arch.use_esca = 1;
2157 
2158 	write_unlock(&kvm->arch.sca_lock);
2159 	kvm_s390_vcpu_unblock_all(kvm);
2160 
2161 	free_page((unsigned long)old_sca);
2162 
2163 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2164 		 old_sca, kvm->arch.sca);
2165 	return 0;
2166 }
2167 
2168 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2169 {
2170 	int rc;
2171 
2172 	if (!kvm_s390_use_sca_entries()) {
2173 		if (id < KVM_MAX_VCPUS)
2174 			return true;
2175 		return false;
2176 	}
2177 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2178 		return true;
2179 	if (!sclp.has_esca || !sclp.has_64bscao)
2180 		return false;
2181 
2182 	mutex_lock(&kvm->lock);
2183 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2184 	mutex_unlock(&kvm->lock);
2185 
2186 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2187 }
2188 
2189 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2190 {
2191 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2192 	kvm_clear_async_pf_completion_queue(vcpu);
2193 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2194 				    KVM_SYNC_GPRS |
2195 				    KVM_SYNC_ACRS |
2196 				    KVM_SYNC_CRS |
2197 				    KVM_SYNC_ARCH0 |
2198 				    KVM_SYNC_PFAULT;
2199 	kvm_s390_set_prefix(vcpu, 0);
2200 	if (test_kvm_facility(vcpu->kvm, 64))
2201 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2202 	if (test_kvm_facility(vcpu->kvm, 133))
2203 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2204 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2205 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2206 	 */
2207 	if (MACHINE_HAS_VX)
2208 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2209 	else
2210 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2211 
2212 	if (kvm_is_ucontrol(vcpu->kvm))
2213 		return __kvm_ucontrol_vcpu_init(vcpu);
2214 
2215 	return 0;
2216 }
2217 
2218 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2219 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2220 {
2221 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2222 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2223 	vcpu->arch.cputm_start = get_tod_clock_fast();
2224 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2225 }
2226 
2227 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2228 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2229 {
2230 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2231 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2232 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2233 	vcpu->arch.cputm_start = 0;
2234 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2235 }
2236 
2237 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2238 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2239 {
2240 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2241 	vcpu->arch.cputm_enabled = true;
2242 	__start_cpu_timer_accounting(vcpu);
2243 }
2244 
2245 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2246 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2247 {
2248 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2249 	__stop_cpu_timer_accounting(vcpu);
2250 	vcpu->arch.cputm_enabled = false;
2251 }
2252 
2253 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2254 {
2255 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2256 	__enable_cpu_timer_accounting(vcpu);
2257 	preempt_enable();
2258 }
2259 
2260 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2261 {
2262 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2263 	__disable_cpu_timer_accounting(vcpu);
2264 	preempt_enable();
2265 }
2266 
2267 /* set the cpu timer - may only be called from the VCPU thread itself */
2268 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2269 {
2270 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2271 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2272 	if (vcpu->arch.cputm_enabled)
2273 		vcpu->arch.cputm_start = get_tod_clock_fast();
2274 	vcpu->arch.sie_block->cputm = cputm;
2275 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2276 	preempt_enable();
2277 }
2278 
2279 /* update and get the cpu timer - can also be called from other VCPU threads */
2280 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2281 {
2282 	unsigned int seq;
2283 	__u64 value;
2284 
2285 	if (unlikely(!vcpu->arch.cputm_enabled))
2286 		return vcpu->arch.sie_block->cputm;
2287 
2288 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2289 	do {
2290 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2291 		/*
2292 		 * If the writer would ever execute a read in the critical
2293 		 * section, e.g. in irq context, we have a deadlock.
2294 		 */
2295 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2296 		value = vcpu->arch.sie_block->cputm;
2297 		/* if cputm_start is 0, accounting is being started/stopped */
2298 		if (likely(vcpu->arch.cputm_start))
2299 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2300 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2301 	preempt_enable();
2302 	return value;
2303 }
2304 
2305 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2306 {
2307 
2308 	gmap_enable(vcpu->arch.enabled_gmap);
2309 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2310 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2311 		__start_cpu_timer_accounting(vcpu);
2312 	vcpu->cpu = cpu;
2313 }
2314 
2315 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2316 {
2317 	vcpu->cpu = -1;
2318 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2319 		__stop_cpu_timer_accounting(vcpu);
2320 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2321 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2322 	gmap_disable(vcpu->arch.enabled_gmap);
2323 
2324 }
2325 
2326 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2327 {
2328 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2329 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2330 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2331 	kvm_s390_set_prefix(vcpu, 0);
2332 	kvm_s390_set_cpu_timer(vcpu, 0);
2333 	vcpu->arch.sie_block->ckc       = 0UL;
2334 	vcpu->arch.sie_block->todpr     = 0;
2335 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2336 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2337 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2338 	/* make sure the new fpc will be lazily loaded */
2339 	save_fpu_regs();
2340 	current->thread.fpu.fpc = 0;
2341 	vcpu->arch.sie_block->gbea = 1;
2342 	vcpu->arch.sie_block->pp = 0;
2343 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2344 	kvm_clear_async_pf_completion_queue(vcpu);
2345 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2346 		kvm_s390_vcpu_stop(vcpu);
2347 	kvm_s390_clear_local_irqs(vcpu);
2348 }
2349 
2350 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2351 {
2352 	mutex_lock(&vcpu->kvm->lock);
2353 	preempt_disable();
2354 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2355 	preempt_enable();
2356 	mutex_unlock(&vcpu->kvm->lock);
2357 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2358 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2359 		sca_add_vcpu(vcpu);
2360 	}
2361 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2362 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2363 	/* make vcpu_load load the right gmap on the first trigger */
2364 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2365 }
2366 
2367 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2368 {
2369 	if (!test_kvm_facility(vcpu->kvm, 76))
2370 		return;
2371 
2372 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2373 
2374 	if (vcpu->kvm->arch.crypto.aes_kw)
2375 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2376 	if (vcpu->kvm->arch.crypto.dea_kw)
2377 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2378 
2379 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2380 }
2381 
2382 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2383 {
2384 	free_page(vcpu->arch.sie_block->cbrlo);
2385 	vcpu->arch.sie_block->cbrlo = 0;
2386 }
2387 
2388 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2389 {
2390 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2391 	if (!vcpu->arch.sie_block->cbrlo)
2392 		return -ENOMEM;
2393 
2394 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2395 	return 0;
2396 }
2397 
2398 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2399 {
2400 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2401 
2402 	vcpu->arch.sie_block->ibc = model->ibc;
2403 	if (test_kvm_facility(vcpu->kvm, 7))
2404 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2405 }
2406 
2407 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2408 {
2409 	int rc = 0;
2410 
2411 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2412 						    CPUSTAT_SM |
2413 						    CPUSTAT_STOPPED);
2414 
2415 	if (test_kvm_facility(vcpu->kvm, 78))
2416 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2417 	else if (test_kvm_facility(vcpu->kvm, 8))
2418 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2419 
2420 	kvm_s390_vcpu_setup_model(vcpu);
2421 
2422 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2423 	if (MACHINE_HAS_ESOP)
2424 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2425 	if (test_kvm_facility(vcpu->kvm, 9))
2426 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2427 	if (test_kvm_facility(vcpu->kvm, 73))
2428 		vcpu->arch.sie_block->ecb |= ECB_TE;
2429 
2430 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2431 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2432 	if (test_kvm_facility(vcpu->kvm, 130))
2433 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2434 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2435 	if (sclp.has_cei)
2436 		vcpu->arch.sie_block->eca |= ECA_CEI;
2437 	if (sclp.has_ib)
2438 		vcpu->arch.sie_block->eca |= ECA_IB;
2439 	if (sclp.has_siif)
2440 		vcpu->arch.sie_block->eca |= ECA_SII;
2441 	if (sclp.has_sigpif)
2442 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2443 	if (test_kvm_facility(vcpu->kvm, 129)) {
2444 		vcpu->arch.sie_block->eca |= ECA_VX;
2445 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2446 	}
2447 	if (test_kvm_facility(vcpu->kvm, 139))
2448 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2449 
2450 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2451 					| SDNXC;
2452 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2453 
2454 	if (sclp.has_kss)
2455 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2456 	else
2457 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2458 
2459 	if (vcpu->kvm->arch.use_cmma) {
2460 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2461 		if (rc)
2462 			return rc;
2463 	}
2464 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2465 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2466 
2467 	kvm_s390_vcpu_crypto_setup(vcpu);
2468 
2469 	return rc;
2470 }
2471 
2472 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2473 				      unsigned int id)
2474 {
2475 	struct kvm_vcpu *vcpu;
2476 	struct sie_page *sie_page;
2477 	int rc = -EINVAL;
2478 
2479 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2480 		goto out;
2481 
2482 	rc = -ENOMEM;
2483 
2484 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2485 	if (!vcpu)
2486 		goto out;
2487 
2488 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2489 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2490 	if (!sie_page)
2491 		goto out_free_cpu;
2492 
2493 	vcpu->arch.sie_block = &sie_page->sie_block;
2494 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2495 
2496 	/* the real guest size will always be smaller than msl */
2497 	vcpu->arch.sie_block->mso = 0;
2498 	vcpu->arch.sie_block->msl = sclp.hamax;
2499 
2500 	vcpu->arch.sie_block->icpua = id;
2501 	spin_lock_init(&vcpu->arch.local_int.lock);
2502 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2503 	vcpu->arch.local_int.wq = &vcpu->wq;
2504 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2505 	seqcount_init(&vcpu->arch.cputm_seqcount);
2506 
2507 	rc = kvm_vcpu_init(vcpu, kvm, id);
2508 	if (rc)
2509 		goto out_free_sie_block;
2510 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2511 		 vcpu->arch.sie_block);
2512 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2513 
2514 	return vcpu;
2515 out_free_sie_block:
2516 	free_page((unsigned long)(vcpu->arch.sie_block));
2517 out_free_cpu:
2518 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2519 out:
2520 	return ERR_PTR(rc);
2521 }
2522 
2523 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2524 {
2525 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2526 }
2527 
2528 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2529 {
2530 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2531 }
2532 
2533 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2534 {
2535 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2536 	exit_sie(vcpu);
2537 }
2538 
2539 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2540 {
2541 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2542 }
2543 
2544 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2545 {
2546 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2547 	exit_sie(vcpu);
2548 }
2549 
2550 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2551 {
2552 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2553 }
2554 
2555 /*
2556  * Kick a guest cpu out of SIE and wait until SIE is not running.
2557  * If the CPU is not running (e.g. waiting as idle) the function will
2558  * return immediately. */
2559 void exit_sie(struct kvm_vcpu *vcpu)
2560 {
2561 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2562 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2563 		cpu_relax();
2564 }
2565 
2566 /* Kick a guest cpu out of SIE to process a request synchronously */
2567 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2568 {
2569 	kvm_make_request(req, vcpu);
2570 	kvm_s390_vcpu_request(vcpu);
2571 }
2572 
2573 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2574 			      unsigned long end)
2575 {
2576 	struct kvm *kvm = gmap->private;
2577 	struct kvm_vcpu *vcpu;
2578 	unsigned long prefix;
2579 	int i;
2580 
2581 	if (gmap_is_shadow(gmap))
2582 		return;
2583 	if (start >= 1UL << 31)
2584 		/* We are only interested in prefix pages */
2585 		return;
2586 	kvm_for_each_vcpu(i, vcpu, kvm) {
2587 		/* match against both prefix pages */
2588 		prefix = kvm_s390_get_prefix(vcpu);
2589 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2590 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2591 				   start, end);
2592 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2593 		}
2594 	}
2595 }
2596 
2597 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2598 {
2599 	/* kvm common code refers to this, but never calls it */
2600 	BUG();
2601 	return 0;
2602 }
2603 
2604 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2605 					   struct kvm_one_reg *reg)
2606 {
2607 	int r = -EINVAL;
2608 
2609 	switch (reg->id) {
2610 	case KVM_REG_S390_TODPR:
2611 		r = put_user(vcpu->arch.sie_block->todpr,
2612 			     (u32 __user *)reg->addr);
2613 		break;
2614 	case KVM_REG_S390_EPOCHDIFF:
2615 		r = put_user(vcpu->arch.sie_block->epoch,
2616 			     (u64 __user *)reg->addr);
2617 		break;
2618 	case KVM_REG_S390_CPU_TIMER:
2619 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2620 			     (u64 __user *)reg->addr);
2621 		break;
2622 	case KVM_REG_S390_CLOCK_COMP:
2623 		r = put_user(vcpu->arch.sie_block->ckc,
2624 			     (u64 __user *)reg->addr);
2625 		break;
2626 	case KVM_REG_S390_PFTOKEN:
2627 		r = put_user(vcpu->arch.pfault_token,
2628 			     (u64 __user *)reg->addr);
2629 		break;
2630 	case KVM_REG_S390_PFCOMPARE:
2631 		r = put_user(vcpu->arch.pfault_compare,
2632 			     (u64 __user *)reg->addr);
2633 		break;
2634 	case KVM_REG_S390_PFSELECT:
2635 		r = put_user(vcpu->arch.pfault_select,
2636 			     (u64 __user *)reg->addr);
2637 		break;
2638 	case KVM_REG_S390_PP:
2639 		r = put_user(vcpu->arch.sie_block->pp,
2640 			     (u64 __user *)reg->addr);
2641 		break;
2642 	case KVM_REG_S390_GBEA:
2643 		r = put_user(vcpu->arch.sie_block->gbea,
2644 			     (u64 __user *)reg->addr);
2645 		break;
2646 	default:
2647 		break;
2648 	}
2649 
2650 	return r;
2651 }
2652 
2653 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2654 					   struct kvm_one_reg *reg)
2655 {
2656 	int r = -EINVAL;
2657 	__u64 val;
2658 
2659 	switch (reg->id) {
2660 	case KVM_REG_S390_TODPR:
2661 		r = get_user(vcpu->arch.sie_block->todpr,
2662 			     (u32 __user *)reg->addr);
2663 		break;
2664 	case KVM_REG_S390_EPOCHDIFF:
2665 		r = get_user(vcpu->arch.sie_block->epoch,
2666 			     (u64 __user *)reg->addr);
2667 		break;
2668 	case KVM_REG_S390_CPU_TIMER:
2669 		r = get_user(val, (u64 __user *)reg->addr);
2670 		if (!r)
2671 			kvm_s390_set_cpu_timer(vcpu, val);
2672 		break;
2673 	case KVM_REG_S390_CLOCK_COMP:
2674 		r = get_user(vcpu->arch.sie_block->ckc,
2675 			     (u64 __user *)reg->addr);
2676 		break;
2677 	case KVM_REG_S390_PFTOKEN:
2678 		r = get_user(vcpu->arch.pfault_token,
2679 			     (u64 __user *)reg->addr);
2680 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2681 			kvm_clear_async_pf_completion_queue(vcpu);
2682 		break;
2683 	case KVM_REG_S390_PFCOMPARE:
2684 		r = get_user(vcpu->arch.pfault_compare,
2685 			     (u64 __user *)reg->addr);
2686 		break;
2687 	case KVM_REG_S390_PFSELECT:
2688 		r = get_user(vcpu->arch.pfault_select,
2689 			     (u64 __user *)reg->addr);
2690 		break;
2691 	case KVM_REG_S390_PP:
2692 		r = get_user(vcpu->arch.sie_block->pp,
2693 			     (u64 __user *)reg->addr);
2694 		break;
2695 	case KVM_REG_S390_GBEA:
2696 		r = get_user(vcpu->arch.sie_block->gbea,
2697 			     (u64 __user *)reg->addr);
2698 		break;
2699 	default:
2700 		break;
2701 	}
2702 
2703 	return r;
2704 }
2705 
2706 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2707 {
2708 	kvm_s390_vcpu_initial_reset(vcpu);
2709 	return 0;
2710 }
2711 
2712 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2713 {
2714 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2715 	return 0;
2716 }
2717 
2718 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2719 {
2720 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2721 	return 0;
2722 }
2723 
2724 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2725 				  struct kvm_sregs *sregs)
2726 {
2727 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2728 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2729 	return 0;
2730 }
2731 
2732 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2733 				  struct kvm_sregs *sregs)
2734 {
2735 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2736 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2737 	return 0;
2738 }
2739 
2740 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2741 {
2742 	if (test_fp_ctl(fpu->fpc))
2743 		return -EINVAL;
2744 	vcpu->run->s.regs.fpc = fpu->fpc;
2745 	if (MACHINE_HAS_VX)
2746 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2747 				 (freg_t *) fpu->fprs);
2748 	else
2749 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2750 	return 0;
2751 }
2752 
2753 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2754 {
2755 	/* make sure we have the latest values */
2756 	save_fpu_regs();
2757 	if (MACHINE_HAS_VX)
2758 		convert_vx_to_fp((freg_t *) fpu->fprs,
2759 				 (__vector128 *) vcpu->run->s.regs.vrs);
2760 	else
2761 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2762 	fpu->fpc = vcpu->run->s.regs.fpc;
2763 	return 0;
2764 }
2765 
2766 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2767 {
2768 	int rc = 0;
2769 
2770 	if (!is_vcpu_stopped(vcpu))
2771 		rc = -EBUSY;
2772 	else {
2773 		vcpu->run->psw_mask = psw.mask;
2774 		vcpu->run->psw_addr = psw.addr;
2775 	}
2776 	return rc;
2777 }
2778 
2779 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2780 				  struct kvm_translation *tr)
2781 {
2782 	return -EINVAL; /* not implemented yet */
2783 }
2784 
2785 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2786 			      KVM_GUESTDBG_USE_HW_BP | \
2787 			      KVM_GUESTDBG_ENABLE)
2788 
2789 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2790 					struct kvm_guest_debug *dbg)
2791 {
2792 	int rc = 0;
2793 
2794 	vcpu->guest_debug = 0;
2795 	kvm_s390_clear_bp_data(vcpu);
2796 
2797 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2798 		return -EINVAL;
2799 	if (!sclp.has_gpere)
2800 		return -EINVAL;
2801 
2802 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2803 		vcpu->guest_debug = dbg->control;
2804 		/* enforce guest PER */
2805 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2806 
2807 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2808 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2809 	} else {
2810 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2811 		vcpu->arch.guestdbg.last_bp = 0;
2812 	}
2813 
2814 	if (rc) {
2815 		vcpu->guest_debug = 0;
2816 		kvm_s390_clear_bp_data(vcpu);
2817 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2818 	}
2819 
2820 	return rc;
2821 }
2822 
2823 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2824 				    struct kvm_mp_state *mp_state)
2825 {
2826 	/* CHECK_STOP and LOAD are not supported yet */
2827 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2828 				       KVM_MP_STATE_OPERATING;
2829 }
2830 
2831 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2832 				    struct kvm_mp_state *mp_state)
2833 {
2834 	int rc = 0;
2835 
2836 	/* user space knows about this interface - let it control the state */
2837 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2838 
2839 	switch (mp_state->mp_state) {
2840 	case KVM_MP_STATE_STOPPED:
2841 		kvm_s390_vcpu_stop(vcpu);
2842 		break;
2843 	case KVM_MP_STATE_OPERATING:
2844 		kvm_s390_vcpu_start(vcpu);
2845 		break;
2846 	case KVM_MP_STATE_LOAD:
2847 	case KVM_MP_STATE_CHECK_STOP:
2848 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2849 	default:
2850 		rc = -ENXIO;
2851 	}
2852 
2853 	return rc;
2854 }
2855 
2856 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2857 {
2858 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2859 }
2860 
2861 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2862 {
2863 retry:
2864 	kvm_s390_vcpu_request_handled(vcpu);
2865 	if (!kvm_request_pending(vcpu))
2866 		return 0;
2867 	/*
2868 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2869 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2870 	 * This ensures that the ipte instruction for this request has
2871 	 * already finished. We might race against a second unmapper that
2872 	 * wants to set the blocking bit. Lets just retry the request loop.
2873 	 */
2874 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2875 		int rc;
2876 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2877 					  kvm_s390_get_prefix(vcpu),
2878 					  PAGE_SIZE * 2, PROT_WRITE);
2879 		if (rc) {
2880 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2881 			return rc;
2882 		}
2883 		goto retry;
2884 	}
2885 
2886 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2887 		vcpu->arch.sie_block->ihcpu = 0xffff;
2888 		goto retry;
2889 	}
2890 
2891 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2892 		if (!ibs_enabled(vcpu)) {
2893 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2894 			atomic_or(CPUSTAT_IBS,
2895 					&vcpu->arch.sie_block->cpuflags);
2896 		}
2897 		goto retry;
2898 	}
2899 
2900 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2901 		if (ibs_enabled(vcpu)) {
2902 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2903 			atomic_andnot(CPUSTAT_IBS,
2904 					  &vcpu->arch.sie_block->cpuflags);
2905 		}
2906 		goto retry;
2907 	}
2908 
2909 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2910 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2911 		goto retry;
2912 	}
2913 
2914 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2915 		/*
2916 		 * Disable CMMA virtualization; we will emulate the ESSA
2917 		 * instruction manually, in order to provide additional
2918 		 * functionalities needed for live migration.
2919 		 */
2920 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2921 		goto retry;
2922 	}
2923 
2924 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2925 		/*
2926 		 * Re-enable CMMA virtualization if CMMA is available and
2927 		 * was used.
2928 		 */
2929 		if ((vcpu->kvm->arch.use_cmma) &&
2930 		    (vcpu->kvm->mm->context.use_cmma))
2931 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2932 		goto retry;
2933 	}
2934 
2935 	/* nothing to do, just clear the request */
2936 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2937 
2938 	return 0;
2939 }
2940 
2941 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2942 				 const struct kvm_s390_vm_tod_clock *gtod)
2943 {
2944 	struct kvm_vcpu *vcpu;
2945 	struct kvm_s390_tod_clock_ext htod;
2946 	int i;
2947 
2948 	mutex_lock(&kvm->lock);
2949 	preempt_disable();
2950 
2951 	get_tod_clock_ext((char *)&htod);
2952 
2953 	kvm->arch.epoch = gtod->tod - htod.tod;
2954 	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2955 
2956 	if (kvm->arch.epoch > gtod->tod)
2957 		kvm->arch.epdx -= 1;
2958 
2959 	kvm_s390_vcpu_block_all(kvm);
2960 	kvm_for_each_vcpu(i, vcpu, kvm) {
2961 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2962 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2963 	}
2964 
2965 	kvm_s390_vcpu_unblock_all(kvm);
2966 	preempt_enable();
2967 	mutex_unlock(&kvm->lock);
2968 }
2969 
2970 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2971 {
2972 	struct kvm_vcpu *vcpu;
2973 	int i;
2974 
2975 	mutex_lock(&kvm->lock);
2976 	preempt_disable();
2977 	kvm->arch.epoch = tod - get_tod_clock();
2978 	kvm_s390_vcpu_block_all(kvm);
2979 	kvm_for_each_vcpu(i, vcpu, kvm)
2980 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2981 	kvm_s390_vcpu_unblock_all(kvm);
2982 	preempt_enable();
2983 	mutex_unlock(&kvm->lock);
2984 }
2985 
2986 /**
2987  * kvm_arch_fault_in_page - fault-in guest page if necessary
2988  * @vcpu: The corresponding virtual cpu
2989  * @gpa: Guest physical address
2990  * @writable: Whether the page should be writable or not
2991  *
2992  * Make sure that a guest page has been faulted-in on the host.
2993  *
2994  * Return: Zero on success, negative error code otherwise.
2995  */
2996 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2997 {
2998 	return gmap_fault(vcpu->arch.gmap, gpa,
2999 			  writable ? FAULT_FLAG_WRITE : 0);
3000 }
3001 
3002 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3003 				      unsigned long token)
3004 {
3005 	struct kvm_s390_interrupt inti;
3006 	struct kvm_s390_irq irq;
3007 
3008 	if (start_token) {
3009 		irq.u.ext.ext_params2 = token;
3010 		irq.type = KVM_S390_INT_PFAULT_INIT;
3011 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3012 	} else {
3013 		inti.type = KVM_S390_INT_PFAULT_DONE;
3014 		inti.parm64 = token;
3015 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3016 	}
3017 }
3018 
3019 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3020 				     struct kvm_async_pf *work)
3021 {
3022 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3023 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3024 }
3025 
3026 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3027 				 struct kvm_async_pf *work)
3028 {
3029 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3030 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3031 }
3032 
3033 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3034 			       struct kvm_async_pf *work)
3035 {
3036 	/* s390 will always inject the page directly */
3037 }
3038 
3039 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3040 {
3041 	/*
3042 	 * s390 will always inject the page directly,
3043 	 * but we still want check_async_completion to cleanup
3044 	 */
3045 	return true;
3046 }
3047 
3048 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3049 {
3050 	hva_t hva;
3051 	struct kvm_arch_async_pf arch;
3052 	int rc;
3053 
3054 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3055 		return 0;
3056 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3057 	    vcpu->arch.pfault_compare)
3058 		return 0;
3059 	if (psw_extint_disabled(vcpu))
3060 		return 0;
3061 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3062 		return 0;
3063 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3064 		return 0;
3065 	if (!vcpu->arch.gmap->pfault_enabled)
3066 		return 0;
3067 
3068 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3069 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3070 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3071 		return 0;
3072 
3073 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3074 	return rc;
3075 }
3076 
3077 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3078 {
3079 	int rc, cpuflags;
3080 
3081 	/*
3082 	 * On s390 notifications for arriving pages will be delivered directly
3083 	 * to the guest but the house keeping for completed pfaults is
3084 	 * handled outside the worker.
3085 	 */
3086 	kvm_check_async_pf_completion(vcpu);
3087 
3088 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3089 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3090 
3091 	if (need_resched())
3092 		schedule();
3093 
3094 	if (test_cpu_flag(CIF_MCCK_PENDING))
3095 		s390_handle_mcck();
3096 
3097 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3098 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3099 		if (rc)
3100 			return rc;
3101 	}
3102 
3103 	rc = kvm_s390_handle_requests(vcpu);
3104 	if (rc)
3105 		return rc;
3106 
3107 	if (guestdbg_enabled(vcpu)) {
3108 		kvm_s390_backup_guest_per_regs(vcpu);
3109 		kvm_s390_patch_guest_per_regs(vcpu);
3110 	}
3111 
3112 	vcpu->arch.sie_block->icptcode = 0;
3113 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3114 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3115 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3116 
3117 	return 0;
3118 }
3119 
3120 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3121 {
3122 	struct kvm_s390_pgm_info pgm_info = {
3123 		.code = PGM_ADDRESSING,
3124 	};
3125 	u8 opcode, ilen;
3126 	int rc;
3127 
3128 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3129 	trace_kvm_s390_sie_fault(vcpu);
3130 
3131 	/*
3132 	 * We want to inject an addressing exception, which is defined as a
3133 	 * suppressing or terminating exception. However, since we came here
3134 	 * by a DAT access exception, the PSW still points to the faulting
3135 	 * instruction since DAT exceptions are nullifying. So we've got
3136 	 * to look up the current opcode to get the length of the instruction
3137 	 * to be able to forward the PSW.
3138 	 */
3139 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3140 	ilen = insn_length(opcode);
3141 	if (rc < 0) {
3142 		return rc;
3143 	} else if (rc) {
3144 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3145 		 * Forward by arbitrary ilc, injection will take care of
3146 		 * nullification if necessary.
3147 		 */
3148 		pgm_info = vcpu->arch.pgm;
3149 		ilen = 4;
3150 	}
3151 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3152 	kvm_s390_forward_psw(vcpu, ilen);
3153 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3154 }
3155 
3156 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3157 {
3158 	struct mcck_volatile_info *mcck_info;
3159 	struct sie_page *sie_page;
3160 
3161 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3162 		   vcpu->arch.sie_block->icptcode);
3163 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3164 
3165 	if (guestdbg_enabled(vcpu))
3166 		kvm_s390_restore_guest_per_regs(vcpu);
3167 
3168 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3169 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3170 
3171 	if (exit_reason == -EINTR) {
3172 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3173 		sie_page = container_of(vcpu->arch.sie_block,
3174 					struct sie_page, sie_block);
3175 		mcck_info = &sie_page->mcck_info;
3176 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3177 		return 0;
3178 	}
3179 
3180 	if (vcpu->arch.sie_block->icptcode > 0) {
3181 		int rc = kvm_handle_sie_intercept(vcpu);
3182 
3183 		if (rc != -EOPNOTSUPP)
3184 			return rc;
3185 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3186 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3187 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3188 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3189 		return -EREMOTE;
3190 	} else if (exit_reason != -EFAULT) {
3191 		vcpu->stat.exit_null++;
3192 		return 0;
3193 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3194 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3195 		vcpu->run->s390_ucontrol.trans_exc_code =
3196 						current->thread.gmap_addr;
3197 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3198 		return -EREMOTE;
3199 	} else if (current->thread.gmap_pfault) {
3200 		trace_kvm_s390_major_guest_pfault(vcpu);
3201 		current->thread.gmap_pfault = 0;
3202 		if (kvm_arch_setup_async_pf(vcpu))
3203 			return 0;
3204 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3205 	}
3206 	return vcpu_post_run_fault_in_sie(vcpu);
3207 }
3208 
3209 static int __vcpu_run(struct kvm_vcpu *vcpu)
3210 {
3211 	int rc, exit_reason;
3212 
3213 	/*
3214 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3215 	 * ning the guest), so that memslots (and other stuff) are protected
3216 	 */
3217 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3218 
3219 	do {
3220 		rc = vcpu_pre_run(vcpu);
3221 		if (rc)
3222 			break;
3223 
3224 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3225 		/*
3226 		 * As PF_VCPU will be used in fault handler, between
3227 		 * guest_enter and guest_exit should be no uaccess.
3228 		 */
3229 		local_irq_disable();
3230 		guest_enter_irqoff();
3231 		__disable_cpu_timer_accounting(vcpu);
3232 		local_irq_enable();
3233 		exit_reason = sie64a(vcpu->arch.sie_block,
3234 				     vcpu->run->s.regs.gprs);
3235 		local_irq_disable();
3236 		__enable_cpu_timer_accounting(vcpu);
3237 		guest_exit_irqoff();
3238 		local_irq_enable();
3239 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3240 
3241 		rc = vcpu_post_run(vcpu, exit_reason);
3242 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3243 
3244 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3245 	return rc;
3246 }
3247 
3248 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3249 {
3250 	struct runtime_instr_cb *riccb;
3251 	struct gs_cb *gscb;
3252 
3253 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3254 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3255 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3256 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3257 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3258 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3259 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3260 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3261 		/* some control register changes require a tlb flush */
3262 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3263 	}
3264 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3265 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3266 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3267 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3268 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3269 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3270 	}
3271 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3272 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3273 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3274 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3275 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3276 			kvm_clear_async_pf_completion_queue(vcpu);
3277 	}
3278 	/*
3279 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3280 	 * we should enable RI here instead of doing the lazy enablement.
3281 	 */
3282 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3283 	    test_kvm_facility(vcpu->kvm, 64) &&
3284 	    riccb->v &&
3285 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3286 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3287 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3288 	}
3289 	/*
3290 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3291 	 * we should enable GS here instead of doing the lazy enablement.
3292 	 */
3293 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3294 	    test_kvm_facility(vcpu->kvm, 133) &&
3295 	    gscb->gssm &&
3296 	    !vcpu->arch.gs_enabled) {
3297 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3298 		vcpu->arch.sie_block->ecb |= ECB_GS;
3299 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3300 		vcpu->arch.gs_enabled = 1;
3301 	}
3302 	save_access_regs(vcpu->arch.host_acrs);
3303 	restore_access_regs(vcpu->run->s.regs.acrs);
3304 	/* save host (userspace) fprs/vrs */
3305 	save_fpu_regs();
3306 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3307 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3308 	if (MACHINE_HAS_VX)
3309 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3310 	else
3311 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3312 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3313 	if (test_fp_ctl(current->thread.fpu.fpc))
3314 		/* User space provided an invalid FPC, let's clear it */
3315 		current->thread.fpu.fpc = 0;
3316 	if (MACHINE_HAS_GS) {
3317 		preempt_disable();
3318 		__ctl_set_bit(2, 4);
3319 		if (current->thread.gs_cb) {
3320 			vcpu->arch.host_gscb = current->thread.gs_cb;
3321 			save_gs_cb(vcpu->arch.host_gscb);
3322 		}
3323 		if (vcpu->arch.gs_enabled) {
3324 			current->thread.gs_cb = (struct gs_cb *)
3325 						&vcpu->run->s.regs.gscb;
3326 			restore_gs_cb(current->thread.gs_cb);
3327 		}
3328 		preempt_enable();
3329 	}
3330 
3331 	kvm_run->kvm_dirty_regs = 0;
3332 }
3333 
3334 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3335 {
3336 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3337 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3338 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3339 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3340 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3341 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3342 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3343 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3344 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3345 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3346 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3347 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3348 	save_access_regs(vcpu->run->s.regs.acrs);
3349 	restore_access_regs(vcpu->arch.host_acrs);
3350 	/* Save guest register state */
3351 	save_fpu_regs();
3352 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3353 	/* Restore will be done lazily at return */
3354 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3355 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3356 	if (MACHINE_HAS_GS) {
3357 		__ctl_set_bit(2, 4);
3358 		if (vcpu->arch.gs_enabled)
3359 			save_gs_cb(current->thread.gs_cb);
3360 		preempt_disable();
3361 		current->thread.gs_cb = vcpu->arch.host_gscb;
3362 		restore_gs_cb(vcpu->arch.host_gscb);
3363 		preempt_enable();
3364 		if (!vcpu->arch.host_gscb)
3365 			__ctl_clear_bit(2, 4);
3366 		vcpu->arch.host_gscb = NULL;
3367 	}
3368 
3369 }
3370 
3371 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3372 {
3373 	int rc;
3374 	sigset_t sigsaved;
3375 
3376 	if (kvm_run->immediate_exit)
3377 		return -EINTR;
3378 
3379 	if (guestdbg_exit_pending(vcpu)) {
3380 		kvm_s390_prepare_debug_exit(vcpu);
3381 		return 0;
3382 	}
3383 
3384 	if (vcpu->sigset_active)
3385 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3386 
3387 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3388 		kvm_s390_vcpu_start(vcpu);
3389 	} else if (is_vcpu_stopped(vcpu)) {
3390 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3391 				   vcpu->vcpu_id);
3392 		return -EINVAL;
3393 	}
3394 
3395 	sync_regs(vcpu, kvm_run);
3396 	enable_cpu_timer_accounting(vcpu);
3397 
3398 	might_fault();
3399 	rc = __vcpu_run(vcpu);
3400 
3401 	if (signal_pending(current) && !rc) {
3402 		kvm_run->exit_reason = KVM_EXIT_INTR;
3403 		rc = -EINTR;
3404 	}
3405 
3406 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3407 		kvm_s390_prepare_debug_exit(vcpu);
3408 		rc = 0;
3409 	}
3410 
3411 	if (rc == -EREMOTE) {
3412 		/* userspace support is needed, kvm_run has been prepared */
3413 		rc = 0;
3414 	}
3415 
3416 	disable_cpu_timer_accounting(vcpu);
3417 	store_regs(vcpu, kvm_run);
3418 
3419 	if (vcpu->sigset_active)
3420 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3421 
3422 	vcpu->stat.exit_userspace++;
3423 	return rc;
3424 }
3425 
3426 /*
3427  * store status at address
3428  * we use have two special cases:
3429  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3430  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3431  */
3432 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3433 {
3434 	unsigned char archmode = 1;
3435 	freg_t fprs[NUM_FPRS];
3436 	unsigned int px;
3437 	u64 clkcomp, cputm;
3438 	int rc;
3439 
3440 	px = kvm_s390_get_prefix(vcpu);
3441 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3442 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3443 			return -EFAULT;
3444 		gpa = 0;
3445 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3446 		if (write_guest_real(vcpu, 163, &archmode, 1))
3447 			return -EFAULT;
3448 		gpa = px;
3449 	} else
3450 		gpa -= __LC_FPREGS_SAVE_AREA;
3451 
3452 	/* manually convert vector registers if necessary */
3453 	if (MACHINE_HAS_VX) {
3454 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3455 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3456 				     fprs, 128);
3457 	} else {
3458 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3459 				     vcpu->run->s.regs.fprs, 128);
3460 	}
3461 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3462 			      vcpu->run->s.regs.gprs, 128);
3463 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3464 			      &vcpu->arch.sie_block->gpsw, 16);
3465 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3466 			      &px, 4);
3467 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3468 			      &vcpu->run->s.regs.fpc, 4);
3469 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3470 			      &vcpu->arch.sie_block->todpr, 4);
3471 	cputm = kvm_s390_get_cpu_timer(vcpu);
3472 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3473 			      &cputm, 8);
3474 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3475 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3476 			      &clkcomp, 8);
3477 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3478 			      &vcpu->run->s.regs.acrs, 64);
3479 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3480 			      &vcpu->arch.sie_block->gcr, 128);
3481 	return rc ? -EFAULT : 0;
3482 }
3483 
3484 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3485 {
3486 	/*
3487 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3488 	 * switch in the run ioctl. Let's update our copies before we save
3489 	 * it into the save area
3490 	 */
3491 	save_fpu_regs();
3492 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3493 	save_access_regs(vcpu->run->s.regs.acrs);
3494 
3495 	return kvm_s390_store_status_unloaded(vcpu, addr);
3496 }
3497 
3498 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3499 {
3500 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3501 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3502 }
3503 
3504 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3505 {
3506 	unsigned int i;
3507 	struct kvm_vcpu *vcpu;
3508 
3509 	kvm_for_each_vcpu(i, vcpu, kvm) {
3510 		__disable_ibs_on_vcpu(vcpu);
3511 	}
3512 }
3513 
3514 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3515 {
3516 	if (!sclp.has_ibs)
3517 		return;
3518 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3519 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3520 }
3521 
3522 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3523 {
3524 	int i, online_vcpus, started_vcpus = 0;
3525 
3526 	if (!is_vcpu_stopped(vcpu))
3527 		return;
3528 
3529 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3530 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3531 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3532 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3533 
3534 	for (i = 0; i < online_vcpus; i++) {
3535 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3536 			started_vcpus++;
3537 	}
3538 
3539 	if (started_vcpus == 0) {
3540 		/* we're the only active VCPU -> speed it up */
3541 		__enable_ibs_on_vcpu(vcpu);
3542 	} else if (started_vcpus == 1) {
3543 		/*
3544 		 * As we are starting a second VCPU, we have to disable
3545 		 * the IBS facility on all VCPUs to remove potentially
3546 		 * oustanding ENABLE requests.
3547 		 */
3548 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3549 	}
3550 
3551 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3552 	/*
3553 	 * Another VCPU might have used IBS while we were offline.
3554 	 * Let's play safe and flush the VCPU at startup.
3555 	 */
3556 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3557 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3558 	return;
3559 }
3560 
3561 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3562 {
3563 	int i, online_vcpus, started_vcpus = 0;
3564 	struct kvm_vcpu *started_vcpu = NULL;
3565 
3566 	if (is_vcpu_stopped(vcpu))
3567 		return;
3568 
3569 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3570 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3571 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3572 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3573 
3574 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3575 	kvm_s390_clear_stop_irq(vcpu);
3576 
3577 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3578 	__disable_ibs_on_vcpu(vcpu);
3579 
3580 	for (i = 0; i < online_vcpus; i++) {
3581 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3582 			started_vcpus++;
3583 			started_vcpu = vcpu->kvm->vcpus[i];
3584 		}
3585 	}
3586 
3587 	if (started_vcpus == 1) {
3588 		/*
3589 		 * As we only have one VCPU left, we want to enable the
3590 		 * IBS facility for that VCPU to speed it up.
3591 		 */
3592 		__enable_ibs_on_vcpu(started_vcpu);
3593 	}
3594 
3595 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3596 	return;
3597 }
3598 
3599 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3600 				     struct kvm_enable_cap *cap)
3601 {
3602 	int r;
3603 
3604 	if (cap->flags)
3605 		return -EINVAL;
3606 
3607 	switch (cap->cap) {
3608 	case KVM_CAP_S390_CSS_SUPPORT:
3609 		if (!vcpu->kvm->arch.css_support) {
3610 			vcpu->kvm->arch.css_support = 1;
3611 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3612 			trace_kvm_s390_enable_css(vcpu->kvm);
3613 		}
3614 		r = 0;
3615 		break;
3616 	default:
3617 		r = -EINVAL;
3618 		break;
3619 	}
3620 	return r;
3621 }
3622 
3623 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3624 				  struct kvm_s390_mem_op *mop)
3625 {
3626 	void __user *uaddr = (void __user *)mop->buf;
3627 	void *tmpbuf = NULL;
3628 	int r, srcu_idx;
3629 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3630 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3631 
3632 	if (mop->flags & ~supported_flags)
3633 		return -EINVAL;
3634 
3635 	if (mop->size > MEM_OP_MAX_SIZE)
3636 		return -E2BIG;
3637 
3638 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3639 		tmpbuf = vmalloc(mop->size);
3640 		if (!tmpbuf)
3641 			return -ENOMEM;
3642 	}
3643 
3644 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3645 
3646 	switch (mop->op) {
3647 	case KVM_S390_MEMOP_LOGICAL_READ:
3648 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3649 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3650 					    mop->size, GACC_FETCH);
3651 			break;
3652 		}
3653 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3654 		if (r == 0) {
3655 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3656 				r = -EFAULT;
3657 		}
3658 		break;
3659 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3660 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3661 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3662 					    mop->size, GACC_STORE);
3663 			break;
3664 		}
3665 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3666 			r = -EFAULT;
3667 			break;
3668 		}
3669 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3670 		break;
3671 	default:
3672 		r = -EINVAL;
3673 	}
3674 
3675 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3676 
3677 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3678 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3679 
3680 	vfree(tmpbuf);
3681 	return r;
3682 }
3683 
3684 long kvm_arch_vcpu_ioctl(struct file *filp,
3685 			 unsigned int ioctl, unsigned long arg)
3686 {
3687 	struct kvm_vcpu *vcpu = filp->private_data;
3688 	void __user *argp = (void __user *)arg;
3689 	int idx;
3690 	long r;
3691 
3692 	switch (ioctl) {
3693 	case KVM_S390_IRQ: {
3694 		struct kvm_s390_irq s390irq;
3695 
3696 		r = -EFAULT;
3697 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3698 			break;
3699 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3700 		break;
3701 	}
3702 	case KVM_S390_INTERRUPT: {
3703 		struct kvm_s390_interrupt s390int;
3704 		struct kvm_s390_irq s390irq;
3705 
3706 		r = -EFAULT;
3707 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3708 			break;
3709 		if (s390int_to_s390irq(&s390int, &s390irq))
3710 			return -EINVAL;
3711 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3712 		break;
3713 	}
3714 	case KVM_S390_STORE_STATUS:
3715 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3716 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3717 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3718 		break;
3719 	case KVM_S390_SET_INITIAL_PSW: {
3720 		psw_t psw;
3721 
3722 		r = -EFAULT;
3723 		if (copy_from_user(&psw, argp, sizeof(psw)))
3724 			break;
3725 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3726 		break;
3727 	}
3728 	case KVM_S390_INITIAL_RESET:
3729 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3730 		break;
3731 	case KVM_SET_ONE_REG:
3732 	case KVM_GET_ONE_REG: {
3733 		struct kvm_one_reg reg;
3734 		r = -EFAULT;
3735 		if (copy_from_user(&reg, argp, sizeof(reg)))
3736 			break;
3737 		if (ioctl == KVM_SET_ONE_REG)
3738 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3739 		else
3740 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3741 		break;
3742 	}
3743 #ifdef CONFIG_KVM_S390_UCONTROL
3744 	case KVM_S390_UCAS_MAP: {
3745 		struct kvm_s390_ucas_mapping ucasmap;
3746 
3747 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3748 			r = -EFAULT;
3749 			break;
3750 		}
3751 
3752 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3753 			r = -EINVAL;
3754 			break;
3755 		}
3756 
3757 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3758 				     ucasmap.vcpu_addr, ucasmap.length);
3759 		break;
3760 	}
3761 	case KVM_S390_UCAS_UNMAP: {
3762 		struct kvm_s390_ucas_mapping ucasmap;
3763 
3764 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3765 			r = -EFAULT;
3766 			break;
3767 		}
3768 
3769 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3770 			r = -EINVAL;
3771 			break;
3772 		}
3773 
3774 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3775 			ucasmap.length);
3776 		break;
3777 	}
3778 #endif
3779 	case KVM_S390_VCPU_FAULT: {
3780 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3781 		break;
3782 	}
3783 	case KVM_ENABLE_CAP:
3784 	{
3785 		struct kvm_enable_cap cap;
3786 		r = -EFAULT;
3787 		if (copy_from_user(&cap, argp, sizeof(cap)))
3788 			break;
3789 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3790 		break;
3791 	}
3792 	case KVM_S390_MEM_OP: {
3793 		struct kvm_s390_mem_op mem_op;
3794 
3795 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3796 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3797 		else
3798 			r = -EFAULT;
3799 		break;
3800 	}
3801 	case KVM_S390_SET_IRQ_STATE: {
3802 		struct kvm_s390_irq_state irq_state;
3803 
3804 		r = -EFAULT;
3805 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3806 			break;
3807 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3808 		    irq_state.len == 0 ||
3809 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3810 			r = -EINVAL;
3811 			break;
3812 		}
3813 		r = kvm_s390_set_irq_state(vcpu,
3814 					   (void __user *) irq_state.buf,
3815 					   irq_state.len);
3816 		break;
3817 	}
3818 	case KVM_S390_GET_IRQ_STATE: {
3819 		struct kvm_s390_irq_state irq_state;
3820 
3821 		r = -EFAULT;
3822 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3823 			break;
3824 		if (irq_state.len == 0) {
3825 			r = -EINVAL;
3826 			break;
3827 		}
3828 		r = kvm_s390_get_irq_state(vcpu,
3829 					   (__u8 __user *)  irq_state.buf,
3830 					   irq_state.len);
3831 		break;
3832 	}
3833 	default:
3834 		r = -ENOTTY;
3835 	}
3836 	return r;
3837 }
3838 
3839 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3840 {
3841 #ifdef CONFIG_KVM_S390_UCONTROL
3842 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3843 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3844 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3845 		get_page(vmf->page);
3846 		return 0;
3847 	}
3848 #endif
3849 	return VM_FAULT_SIGBUS;
3850 }
3851 
3852 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3853 			    unsigned long npages)
3854 {
3855 	return 0;
3856 }
3857 
3858 /* Section: memory related */
3859 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3860 				   struct kvm_memory_slot *memslot,
3861 				   const struct kvm_userspace_memory_region *mem,
3862 				   enum kvm_mr_change change)
3863 {
3864 	/* A few sanity checks. We can have memory slots which have to be
3865 	   located/ended at a segment boundary (1MB). The memory in userland is
3866 	   ok to be fragmented into various different vmas. It is okay to mmap()
3867 	   and munmap() stuff in this slot after doing this call at any time */
3868 
3869 	if (mem->userspace_addr & 0xffffful)
3870 		return -EINVAL;
3871 
3872 	if (mem->memory_size & 0xffffful)
3873 		return -EINVAL;
3874 
3875 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3876 		return -EINVAL;
3877 
3878 	return 0;
3879 }
3880 
3881 void kvm_arch_commit_memory_region(struct kvm *kvm,
3882 				const struct kvm_userspace_memory_region *mem,
3883 				const struct kvm_memory_slot *old,
3884 				const struct kvm_memory_slot *new,
3885 				enum kvm_mr_change change)
3886 {
3887 	int rc;
3888 
3889 	/* If the basics of the memslot do not change, we do not want
3890 	 * to update the gmap. Every update causes several unnecessary
3891 	 * segment translation exceptions. This is usually handled just
3892 	 * fine by the normal fault handler + gmap, but it will also
3893 	 * cause faults on the prefix page of running guest CPUs.
3894 	 */
3895 	if (old->userspace_addr == mem->userspace_addr &&
3896 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3897 	    old->npages * PAGE_SIZE == mem->memory_size)
3898 		return;
3899 
3900 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3901 		mem->guest_phys_addr, mem->memory_size);
3902 	if (rc)
3903 		pr_warn("failed to commit memory region\n");
3904 	return;
3905 }
3906 
3907 static inline unsigned long nonhyp_mask(int i)
3908 {
3909 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3910 
3911 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3912 }
3913 
3914 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3915 {
3916 	vcpu->valid_wakeup = false;
3917 }
3918 
3919 static int __init kvm_s390_init(void)
3920 {
3921 	int i;
3922 
3923 	if (!sclp.has_sief2) {
3924 		pr_info("SIE not available\n");
3925 		return -ENODEV;
3926 	}
3927 
3928 	for (i = 0; i < 16; i++)
3929 		kvm_s390_fac_list_mask[i] |=
3930 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3931 
3932 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3933 }
3934 
3935 static void __exit kvm_s390_exit(void)
3936 {
3937 	kvm_exit();
3938 }
3939 
3940 module_init(kvm_s390_init);
3941 module_exit(kvm_s390_exit);
3942 
3943 /*
3944  * Enable autoloading of the kvm module.
3945  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3946  * since x86 takes a different approach.
3947  */
3948 #include <linux/miscdevice.h>
3949 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3950 MODULE_ALIAS("devname:kvm");
3951