xref: /linux/arch/s390/kvm/kvm-s390.c (revision 8fa1696ea78162ca3112a26879d9379483443c85)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48 
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
66 	{ "exit_null", VCPU_STAT(exit_null) },
67 	{ "exit_validity", VCPU_STAT(exit_validity) },
68 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
96 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
98 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
103 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
108 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
125 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
126 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
128 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
129 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
130 	{ NULL }
131 };
132 
133 struct kvm_s390_tod_clock_ext {
134 	__u8 epoch_idx;
135 	__u64 tod;
136 	__u8 reserved[7];
137 } __packed;
138 
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143 
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146 
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152 
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157 
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161 
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165 	/* every s390 is virtualization enabled ;-) */
166 	return 0;
167 }
168 
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 			      unsigned long end);
171 
172 /*
173  * This callback is executed during stop_machine(). All CPUs are therefore
174  * temporarily stopped. In order not to change guest behavior, we have to
175  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176  * so a CPU won't be stopped while calculating with the epoch.
177  */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179 			  void *v)
180 {
181 	struct kvm *kvm;
182 	struct kvm_vcpu *vcpu;
183 	int i;
184 	unsigned long long *delta = v;
185 
186 	list_for_each_entry(kvm, &vm_list, vm_list) {
187 		kvm->arch.epoch -= *delta;
188 		kvm_for_each_vcpu(i, vcpu, kvm) {
189 			vcpu->arch.sie_block->epoch -= *delta;
190 			if (vcpu->arch.cputm_enabled)
191 				vcpu->arch.cputm_start += *delta;
192 			if (vcpu->arch.vsie_block)
193 				vcpu->arch.vsie_block->epoch -= *delta;
194 		}
195 	}
196 	return NOTIFY_OK;
197 }
198 
199 static struct notifier_block kvm_clock_notifier = {
200 	.notifier_call = kvm_clock_sync,
201 };
202 
203 int kvm_arch_hardware_setup(void)
204 {
205 	gmap_notifier.notifier_call = kvm_gmap_notifier;
206 	gmap_register_pte_notifier(&gmap_notifier);
207 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208 	gmap_register_pte_notifier(&vsie_gmap_notifier);
209 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210 				       &kvm_clock_notifier);
211 	return 0;
212 }
213 
214 void kvm_arch_hardware_unsetup(void)
215 {
216 	gmap_unregister_pte_notifier(&gmap_notifier);
217 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219 					 &kvm_clock_notifier);
220 }
221 
222 static void allow_cpu_feat(unsigned long nr)
223 {
224 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226 
227 static inline int plo_test_bit(unsigned char nr)
228 {
229 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230 	int cc;
231 
232 	asm volatile(
233 		/* Parameter registers are ignored for "test bit" */
234 		"	plo	0,0,0,0(0)\n"
235 		"	ipm	%0\n"
236 		"	srl	%0,28\n"
237 		: "=d" (cc)
238 		: "d" (r0)
239 		: "cc");
240 	return cc == 0;
241 }
242 
243 static void kvm_s390_cpu_feat_init(void)
244 {
245 	int i;
246 
247 	for (i = 0; i < 256; ++i) {
248 		if (plo_test_bit(i))
249 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250 	}
251 
252 	if (test_facility(28)) /* TOD-clock steering */
253 		ptff(kvm_s390_available_subfunc.ptff,
254 		     sizeof(kvm_s390_available_subfunc.ptff),
255 		     PTFF_QAF);
256 
257 	if (test_facility(17)) { /* MSA */
258 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259 			      kvm_s390_available_subfunc.kmac);
260 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261 			      kvm_s390_available_subfunc.kmc);
262 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
263 			      kvm_s390_available_subfunc.km);
264 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265 			      kvm_s390_available_subfunc.kimd);
266 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267 			      kvm_s390_available_subfunc.klmd);
268 	}
269 	if (test_facility(76)) /* MSA3 */
270 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.pckmo);
272 	if (test_facility(77)) { /* MSA4 */
273 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274 			      kvm_s390_available_subfunc.kmctr);
275 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276 			      kvm_s390_available_subfunc.kmf);
277 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278 			      kvm_s390_available_subfunc.kmo);
279 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280 			      kvm_s390_available_subfunc.pcc);
281 	}
282 	if (test_facility(57)) /* MSA5 */
283 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284 			      kvm_s390_available_subfunc.ppno);
285 
286 	if (test_facility(146)) /* MSA8 */
287 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288 			      kvm_s390_available_subfunc.kma);
289 
290 	if (MACHINE_HAS_ESOP)
291 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292 	/*
293 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295 	 */
296 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297 	    !test_facility(3) || !nested)
298 		return;
299 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300 	if (sclp.has_64bscao)
301 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302 	if (sclp.has_siif)
303 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304 	if (sclp.has_gpere)
305 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306 	if (sclp.has_gsls)
307 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308 	if (sclp.has_ib)
309 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310 	if (sclp.has_cei)
311 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312 	if (sclp.has_ibs)
313 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314 	if (sclp.has_kss)
315 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316 	/*
317 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318 	 * all skey handling functions read/set the skey from the PGSTE
319 	 * instead of the real storage key.
320 	 *
321 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322 	 * pages being detected as preserved although they are resident.
323 	 *
324 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326 	 *
327 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330 	 *
331 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332 	 * cannot easily shadow the SCA because of the ipte lock.
333 	 */
334 }
335 
336 int kvm_arch_init(void *opaque)
337 {
338 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339 	if (!kvm_s390_dbf)
340 		return -ENOMEM;
341 
342 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343 		debug_unregister(kvm_s390_dbf);
344 		return -ENOMEM;
345 	}
346 
347 	kvm_s390_cpu_feat_init();
348 
349 	/* Register floating interrupt controller interface. */
350 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352 
353 void kvm_arch_exit(void)
354 {
355 	debug_unregister(kvm_s390_dbf);
356 }
357 
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360 			unsigned int ioctl, unsigned long arg)
361 {
362 	if (ioctl == KVM_S390_ENABLE_SIE)
363 		return s390_enable_sie();
364 	return -EINVAL;
365 }
366 
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369 	int r;
370 
371 	switch (ext) {
372 	case KVM_CAP_S390_PSW:
373 	case KVM_CAP_S390_GMAP:
374 	case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376 	case KVM_CAP_S390_UCONTROL:
377 #endif
378 	case KVM_CAP_ASYNC_PF:
379 	case KVM_CAP_SYNC_REGS:
380 	case KVM_CAP_ONE_REG:
381 	case KVM_CAP_ENABLE_CAP:
382 	case KVM_CAP_S390_CSS_SUPPORT:
383 	case KVM_CAP_IOEVENTFD:
384 	case KVM_CAP_DEVICE_CTRL:
385 	case KVM_CAP_ENABLE_CAP_VM:
386 	case KVM_CAP_S390_IRQCHIP:
387 	case KVM_CAP_VM_ATTRIBUTES:
388 	case KVM_CAP_MP_STATE:
389 	case KVM_CAP_IMMEDIATE_EXIT:
390 	case KVM_CAP_S390_INJECT_IRQ:
391 	case KVM_CAP_S390_USER_SIGP:
392 	case KVM_CAP_S390_USER_STSI:
393 	case KVM_CAP_S390_SKEYS:
394 	case KVM_CAP_S390_IRQ_STATE:
395 	case KVM_CAP_S390_USER_INSTR0:
396 	case KVM_CAP_S390_CMMA_MIGRATION:
397 	case KVM_CAP_S390_AIS:
398 		r = 1;
399 		break;
400 	case KVM_CAP_S390_MEM_OP:
401 		r = MEM_OP_MAX_SIZE;
402 		break;
403 	case KVM_CAP_NR_VCPUS:
404 	case KVM_CAP_MAX_VCPUS:
405 		r = KVM_S390_BSCA_CPU_SLOTS;
406 		if (!kvm_s390_use_sca_entries())
407 			r = KVM_MAX_VCPUS;
408 		else if (sclp.has_esca && sclp.has_64bscao)
409 			r = KVM_S390_ESCA_CPU_SLOTS;
410 		break;
411 	case KVM_CAP_NR_MEMSLOTS:
412 		r = KVM_USER_MEM_SLOTS;
413 		break;
414 	case KVM_CAP_S390_COW:
415 		r = MACHINE_HAS_ESOP;
416 		break;
417 	case KVM_CAP_S390_VECTOR_REGISTERS:
418 		r = MACHINE_HAS_VX;
419 		break;
420 	case KVM_CAP_S390_RI:
421 		r = test_facility(64);
422 		break;
423 	case KVM_CAP_S390_GS:
424 		r = test_facility(133);
425 		break;
426 	default:
427 		r = 0;
428 	}
429 	return r;
430 }
431 
432 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
433 					struct kvm_memory_slot *memslot)
434 {
435 	gfn_t cur_gfn, last_gfn;
436 	unsigned long address;
437 	struct gmap *gmap = kvm->arch.gmap;
438 
439 	/* Loop over all guest pages */
440 	last_gfn = memslot->base_gfn + memslot->npages;
441 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
442 		address = gfn_to_hva_memslot(memslot, cur_gfn);
443 
444 		if (test_and_clear_guest_dirty(gmap->mm, address))
445 			mark_page_dirty(kvm, cur_gfn);
446 		if (fatal_signal_pending(current))
447 			return;
448 		cond_resched();
449 	}
450 }
451 
452 /* Section: vm related */
453 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
454 
455 /*
456  * Get (and clear) the dirty memory log for a memory slot.
457  */
458 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
459 			       struct kvm_dirty_log *log)
460 {
461 	int r;
462 	unsigned long n;
463 	struct kvm_memslots *slots;
464 	struct kvm_memory_slot *memslot;
465 	int is_dirty = 0;
466 
467 	if (kvm_is_ucontrol(kvm))
468 		return -EINVAL;
469 
470 	mutex_lock(&kvm->slots_lock);
471 
472 	r = -EINVAL;
473 	if (log->slot >= KVM_USER_MEM_SLOTS)
474 		goto out;
475 
476 	slots = kvm_memslots(kvm);
477 	memslot = id_to_memslot(slots, log->slot);
478 	r = -ENOENT;
479 	if (!memslot->dirty_bitmap)
480 		goto out;
481 
482 	kvm_s390_sync_dirty_log(kvm, memslot);
483 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
484 	if (r)
485 		goto out;
486 
487 	/* Clear the dirty log */
488 	if (is_dirty) {
489 		n = kvm_dirty_bitmap_bytes(memslot);
490 		memset(memslot->dirty_bitmap, 0, n);
491 	}
492 	r = 0;
493 out:
494 	mutex_unlock(&kvm->slots_lock);
495 	return r;
496 }
497 
498 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
499 {
500 	unsigned int i;
501 	struct kvm_vcpu *vcpu;
502 
503 	kvm_for_each_vcpu(i, vcpu, kvm) {
504 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
505 	}
506 }
507 
508 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
509 {
510 	int r;
511 
512 	if (cap->flags)
513 		return -EINVAL;
514 
515 	switch (cap->cap) {
516 	case KVM_CAP_S390_IRQCHIP:
517 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
518 		kvm->arch.use_irqchip = 1;
519 		r = 0;
520 		break;
521 	case KVM_CAP_S390_USER_SIGP:
522 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
523 		kvm->arch.user_sigp = 1;
524 		r = 0;
525 		break;
526 	case KVM_CAP_S390_VECTOR_REGISTERS:
527 		mutex_lock(&kvm->lock);
528 		if (kvm->created_vcpus) {
529 			r = -EBUSY;
530 		} else if (MACHINE_HAS_VX) {
531 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
532 			set_kvm_facility(kvm->arch.model.fac_list, 129);
533 			if (test_facility(134)) {
534 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
535 				set_kvm_facility(kvm->arch.model.fac_list, 134);
536 			}
537 			if (test_facility(135)) {
538 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
539 				set_kvm_facility(kvm->arch.model.fac_list, 135);
540 			}
541 			r = 0;
542 		} else
543 			r = -EINVAL;
544 		mutex_unlock(&kvm->lock);
545 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
546 			 r ? "(not available)" : "(success)");
547 		break;
548 	case KVM_CAP_S390_RI:
549 		r = -EINVAL;
550 		mutex_lock(&kvm->lock);
551 		if (kvm->created_vcpus) {
552 			r = -EBUSY;
553 		} else if (test_facility(64)) {
554 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
555 			set_kvm_facility(kvm->arch.model.fac_list, 64);
556 			r = 0;
557 		}
558 		mutex_unlock(&kvm->lock);
559 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
560 			 r ? "(not available)" : "(success)");
561 		break;
562 	case KVM_CAP_S390_AIS:
563 		mutex_lock(&kvm->lock);
564 		if (kvm->created_vcpus) {
565 			r = -EBUSY;
566 		} else {
567 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
568 			set_kvm_facility(kvm->arch.model.fac_list, 72);
569 			r = 0;
570 		}
571 		mutex_unlock(&kvm->lock);
572 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
573 			 r ? "(not available)" : "(success)");
574 		break;
575 	case KVM_CAP_S390_GS:
576 		r = -EINVAL;
577 		mutex_lock(&kvm->lock);
578 		if (atomic_read(&kvm->online_vcpus)) {
579 			r = -EBUSY;
580 		} else if (test_facility(133)) {
581 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
582 			set_kvm_facility(kvm->arch.model.fac_list, 133);
583 			r = 0;
584 		}
585 		mutex_unlock(&kvm->lock);
586 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
587 			 r ? "(not available)" : "(success)");
588 		break;
589 	case KVM_CAP_S390_USER_STSI:
590 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
591 		kvm->arch.user_stsi = 1;
592 		r = 0;
593 		break;
594 	case KVM_CAP_S390_USER_INSTR0:
595 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
596 		kvm->arch.user_instr0 = 1;
597 		icpt_operexc_on_all_vcpus(kvm);
598 		r = 0;
599 		break;
600 	default:
601 		r = -EINVAL;
602 		break;
603 	}
604 	return r;
605 }
606 
607 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609 	int ret;
610 
611 	switch (attr->attr) {
612 	case KVM_S390_VM_MEM_LIMIT_SIZE:
613 		ret = 0;
614 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
615 			 kvm->arch.mem_limit);
616 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
617 			ret = -EFAULT;
618 		break;
619 	default:
620 		ret = -ENXIO;
621 		break;
622 	}
623 	return ret;
624 }
625 
626 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
627 {
628 	int ret;
629 	unsigned int idx;
630 	switch (attr->attr) {
631 	case KVM_S390_VM_MEM_ENABLE_CMMA:
632 		ret = -ENXIO;
633 		if (!sclp.has_cmma)
634 			break;
635 
636 		ret = -EBUSY;
637 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
638 		mutex_lock(&kvm->lock);
639 		if (!kvm->created_vcpus) {
640 			kvm->arch.use_cmma = 1;
641 			ret = 0;
642 		}
643 		mutex_unlock(&kvm->lock);
644 		break;
645 	case KVM_S390_VM_MEM_CLR_CMMA:
646 		ret = -ENXIO;
647 		if (!sclp.has_cmma)
648 			break;
649 		ret = -EINVAL;
650 		if (!kvm->arch.use_cmma)
651 			break;
652 
653 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
654 		mutex_lock(&kvm->lock);
655 		idx = srcu_read_lock(&kvm->srcu);
656 		s390_reset_cmma(kvm->arch.gmap->mm);
657 		srcu_read_unlock(&kvm->srcu, idx);
658 		mutex_unlock(&kvm->lock);
659 		ret = 0;
660 		break;
661 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
662 		unsigned long new_limit;
663 
664 		if (kvm_is_ucontrol(kvm))
665 			return -EINVAL;
666 
667 		if (get_user(new_limit, (u64 __user *)attr->addr))
668 			return -EFAULT;
669 
670 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
671 		    new_limit > kvm->arch.mem_limit)
672 			return -E2BIG;
673 
674 		if (!new_limit)
675 			return -EINVAL;
676 
677 		/* gmap_create takes last usable address */
678 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
679 			new_limit -= 1;
680 
681 		ret = -EBUSY;
682 		mutex_lock(&kvm->lock);
683 		if (!kvm->created_vcpus) {
684 			/* gmap_create will round the limit up */
685 			struct gmap *new = gmap_create(current->mm, new_limit);
686 
687 			if (!new) {
688 				ret = -ENOMEM;
689 			} else {
690 				gmap_remove(kvm->arch.gmap);
691 				new->private = kvm;
692 				kvm->arch.gmap = new;
693 				ret = 0;
694 			}
695 		}
696 		mutex_unlock(&kvm->lock);
697 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
698 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
699 			 (void *) kvm->arch.gmap->asce);
700 		break;
701 	}
702 	default:
703 		ret = -ENXIO;
704 		break;
705 	}
706 	return ret;
707 }
708 
709 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
710 
711 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
712 {
713 	struct kvm_vcpu *vcpu;
714 	int i;
715 
716 	if (!test_kvm_facility(kvm, 76))
717 		return -EINVAL;
718 
719 	mutex_lock(&kvm->lock);
720 	switch (attr->attr) {
721 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
722 		get_random_bytes(
723 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
724 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
725 		kvm->arch.crypto.aes_kw = 1;
726 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
727 		break;
728 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
729 		get_random_bytes(
730 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
731 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
732 		kvm->arch.crypto.dea_kw = 1;
733 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
734 		break;
735 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
736 		kvm->arch.crypto.aes_kw = 0;
737 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
738 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
739 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
740 		break;
741 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
742 		kvm->arch.crypto.dea_kw = 0;
743 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
744 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
745 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
746 		break;
747 	default:
748 		mutex_unlock(&kvm->lock);
749 		return -ENXIO;
750 	}
751 
752 	kvm_for_each_vcpu(i, vcpu, kvm) {
753 		kvm_s390_vcpu_crypto_setup(vcpu);
754 		exit_sie(vcpu);
755 	}
756 	mutex_unlock(&kvm->lock);
757 	return 0;
758 }
759 
760 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
761 {
762 	int cx;
763 	struct kvm_vcpu *vcpu;
764 
765 	kvm_for_each_vcpu(cx, vcpu, kvm)
766 		kvm_s390_sync_request(req, vcpu);
767 }
768 
769 /*
770  * Must be called with kvm->srcu held to avoid races on memslots, and with
771  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
772  */
773 static int kvm_s390_vm_start_migration(struct kvm *kvm)
774 {
775 	struct kvm_s390_migration_state *mgs;
776 	struct kvm_memory_slot *ms;
777 	/* should be the only one */
778 	struct kvm_memslots *slots;
779 	unsigned long ram_pages;
780 	int slotnr;
781 
782 	/* migration mode already enabled */
783 	if (kvm->arch.migration_state)
784 		return 0;
785 
786 	slots = kvm_memslots(kvm);
787 	if (!slots || !slots->used_slots)
788 		return -EINVAL;
789 
790 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
791 	if (!mgs)
792 		return -ENOMEM;
793 	kvm->arch.migration_state = mgs;
794 
795 	if (kvm->arch.use_cmma) {
796 		/*
797 		 * Get the last slot. They should be sorted by base_gfn, so the
798 		 * last slot is also the one at the end of the address space.
799 		 * We have verified above that at least one slot is present.
800 		 */
801 		ms = slots->memslots + slots->used_slots - 1;
802 		/* round up so we only use full longs */
803 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
804 		/* allocate enough bytes to store all the bits */
805 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
806 		if (!mgs->pgste_bitmap) {
807 			kfree(mgs);
808 			kvm->arch.migration_state = NULL;
809 			return -ENOMEM;
810 		}
811 
812 		mgs->bitmap_size = ram_pages;
813 		atomic64_set(&mgs->dirty_pages, ram_pages);
814 		/* mark all the pages in active slots as dirty */
815 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
816 			ms = slots->memslots + slotnr;
817 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
818 		}
819 
820 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
821 	}
822 	return 0;
823 }
824 
825 /*
826  * Must be called with kvm->lock to avoid races with ourselves and
827  * kvm_s390_vm_start_migration.
828  */
829 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
830 {
831 	struct kvm_s390_migration_state *mgs;
832 
833 	/* migration mode already disabled */
834 	if (!kvm->arch.migration_state)
835 		return 0;
836 	mgs = kvm->arch.migration_state;
837 	kvm->arch.migration_state = NULL;
838 
839 	if (kvm->arch.use_cmma) {
840 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
841 		vfree(mgs->pgste_bitmap);
842 	}
843 	kfree(mgs);
844 	return 0;
845 }
846 
847 static int kvm_s390_vm_set_migration(struct kvm *kvm,
848 				     struct kvm_device_attr *attr)
849 {
850 	int idx, res = -ENXIO;
851 
852 	mutex_lock(&kvm->lock);
853 	switch (attr->attr) {
854 	case KVM_S390_VM_MIGRATION_START:
855 		idx = srcu_read_lock(&kvm->srcu);
856 		res = kvm_s390_vm_start_migration(kvm);
857 		srcu_read_unlock(&kvm->srcu, idx);
858 		break;
859 	case KVM_S390_VM_MIGRATION_STOP:
860 		res = kvm_s390_vm_stop_migration(kvm);
861 		break;
862 	default:
863 		break;
864 	}
865 	mutex_unlock(&kvm->lock);
866 
867 	return res;
868 }
869 
870 static int kvm_s390_vm_get_migration(struct kvm *kvm,
871 				     struct kvm_device_attr *attr)
872 {
873 	u64 mig = (kvm->arch.migration_state != NULL);
874 
875 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
876 		return -ENXIO;
877 
878 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
879 		return -EFAULT;
880 	return 0;
881 }
882 
883 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
884 {
885 	struct kvm_s390_vm_tod_clock gtod;
886 
887 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
888 		return -EFAULT;
889 
890 	if (test_kvm_facility(kvm, 139))
891 		kvm_s390_set_tod_clock_ext(kvm, &gtod);
892 	else if (gtod.epoch_idx == 0)
893 		kvm_s390_set_tod_clock(kvm, gtod.tod);
894 	else
895 		return -EINVAL;
896 
897 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
898 		gtod.epoch_idx, gtod.tod);
899 
900 	return 0;
901 }
902 
903 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
904 {
905 	u8 gtod_high;
906 
907 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
908 					   sizeof(gtod_high)))
909 		return -EFAULT;
910 
911 	if (gtod_high != 0)
912 		return -EINVAL;
913 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
914 
915 	return 0;
916 }
917 
918 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
919 {
920 	u64 gtod;
921 
922 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
923 		return -EFAULT;
924 
925 	kvm_s390_set_tod_clock(kvm, gtod);
926 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
927 	return 0;
928 }
929 
930 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
931 {
932 	int ret;
933 
934 	if (attr->flags)
935 		return -EINVAL;
936 
937 	switch (attr->attr) {
938 	case KVM_S390_VM_TOD_EXT:
939 		ret = kvm_s390_set_tod_ext(kvm, attr);
940 		break;
941 	case KVM_S390_VM_TOD_HIGH:
942 		ret = kvm_s390_set_tod_high(kvm, attr);
943 		break;
944 	case KVM_S390_VM_TOD_LOW:
945 		ret = kvm_s390_set_tod_low(kvm, attr);
946 		break;
947 	default:
948 		ret = -ENXIO;
949 		break;
950 	}
951 	return ret;
952 }
953 
954 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
955 					struct kvm_s390_vm_tod_clock *gtod)
956 {
957 	struct kvm_s390_tod_clock_ext htod;
958 
959 	preempt_disable();
960 
961 	get_tod_clock_ext((char *)&htod);
962 
963 	gtod->tod = htod.tod + kvm->arch.epoch;
964 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
965 
966 	if (gtod->tod < htod.tod)
967 		gtod->epoch_idx += 1;
968 
969 	preempt_enable();
970 }
971 
972 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
973 {
974 	struct kvm_s390_vm_tod_clock gtod;
975 
976 	memset(&gtod, 0, sizeof(gtod));
977 
978 	if (test_kvm_facility(kvm, 139))
979 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
980 	else
981 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
982 
983 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
984 		return -EFAULT;
985 
986 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
987 		gtod.epoch_idx, gtod.tod);
988 	return 0;
989 }
990 
991 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
992 {
993 	u8 gtod_high = 0;
994 
995 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
996 					 sizeof(gtod_high)))
997 		return -EFAULT;
998 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
999 
1000 	return 0;
1001 }
1002 
1003 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1004 {
1005 	u64 gtod;
1006 
1007 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1008 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1009 		return -EFAULT;
1010 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1011 
1012 	return 0;
1013 }
1014 
1015 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017 	int ret;
1018 
1019 	if (attr->flags)
1020 		return -EINVAL;
1021 
1022 	switch (attr->attr) {
1023 	case KVM_S390_VM_TOD_EXT:
1024 		ret = kvm_s390_get_tod_ext(kvm, attr);
1025 		break;
1026 	case KVM_S390_VM_TOD_HIGH:
1027 		ret = kvm_s390_get_tod_high(kvm, attr);
1028 		break;
1029 	case KVM_S390_VM_TOD_LOW:
1030 		ret = kvm_s390_get_tod_low(kvm, attr);
1031 		break;
1032 	default:
1033 		ret = -ENXIO;
1034 		break;
1035 	}
1036 	return ret;
1037 }
1038 
1039 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041 	struct kvm_s390_vm_cpu_processor *proc;
1042 	u16 lowest_ibc, unblocked_ibc;
1043 	int ret = 0;
1044 
1045 	mutex_lock(&kvm->lock);
1046 	if (kvm->created_vcpus) {
1047 		ret = -EBUSY;
1048 		goto out;
1049 	}
1050 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1051 	if (!proc) {
1052 		ret = -ENOMEM;
1053 		goto out;
1054 	}
1055 	if (!copy_from_user(proc, (void __user *)attr->addr,
1056 			    sizeof(*proc))) {
1057 		kvm->arch.model.cpuid = proc->cpuid;
1058 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1059 		unblocked_ibc = sclp.ibc & 0xfff;
1060 		if (lowest_ibc && proc->ibc) {
1061 			if (proc->ibc > unblocked_ibc)
1062 				kvm->arch.model.ibc = unblocked_ibc;
1063 			else if (proc->ibc < lowest_ibc)
1064 				kvm->arch.model.ibc = lowest_ibc;
1065 			else
1066 				kvm->arch.model.ibc = proc->ibc;
1067 		}
1068 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1069 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1070 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1071 			 kvm->arch.model.ibc,
1072 			 kvm->arch.model.cpuid);
1073 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1074 			 kvm->arch.model.fac_list[0],
1075 			 kvm->arch.model.fac_list[1],
1076 			 kvm->arch.model.fac_list[2]);
1077 	} else
1078 		ret = -EFAULT;
1079 	kfree(proc);
1080 out:
1081 	mutex_unlock(&kvm->lock);
1082 	return ret;
1083 }
1084 
1085 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1086 				       struct kvm_device_attr *attr)
1087 {
1088 	struct kvm_s390_vm_cpu_feat data;
1089 	int ret = -EBUSY;
1090 
1091 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1092 		return -EFAULT;
1093 	if (!bitmap_subset((unsigned long *) data.feat,
1094 			   kvm_s390_available_cpu_feat,
1095 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1096 		return -EINVAL;
1097 
1098 	mutex_lock(&kvm->lock);
1099 	if (!atomic_read(&kvm->online_vcpus)) {
1100 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1102 		ret = 0;
1103 	}
1104 	mutex_unlock(&kvm->lock);
1105 	return ret;
1106 }
1107 
1108 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1109 					  struct kvm_device_attr *attr)
1110 {
1111 	/*
1112 	 * Once supported by kernel + hw, we have to store the subfunctions
1113 	 * in kvm->arch and remember that user space configured them.
1114 	 */
1115 	return -ENXIO;
1116 }
1117 
1118 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1119 {
1120 	int ret = -ENXIO;
1121 
1122 	switch (attr->attr) {
1123 	case KVM_S390_VM_CPU_PROCESSOR:
1124 		ret = kvm_s390_set_processor(kvm, attr);
1125 		break;
1126 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1127 		ret = kvm_s390_set_processor_feat(kvm, attr);
1128 		break;
1129 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1130 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1131 		break;
1132 	}
1133 	return ret;
1134 }
1135 
1136 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1137 {
1138 	struct kvm_s390_vm_cpu_processor *proc;
1139 	int ret = 0;
1140 
1141 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1142 	if (!proc) {
1143 		ret = -ENOMEM;
1144 		goto out;
1145 	}
1146 	proc->cpuid = kvm->arch.model.cpuid;
1147 	proc->ibc = kvm->arch.model.ibc;
1148 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1149 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1150 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1151 		 kvm->arch.model.ibc,
1152 		 kvm->arch.model.cpuid);
1153 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1154 		 kvm->arch.model.fac_list[0],
1155 		 kvm->arch.model.fac_list[1],
1156 		 kvm->arch.model.fac_list[2]);
1157 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1158 		ret = -EFAULT;
1159 	kfree(proc);
1160 out:
1161 	return ret;
1162 }
1163 
1164 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1165 {
1166 	struct kvm_s390_vm_cpu_machine *mach;
1167 	int ret = 0;
1168 
1169 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1170 	if (!mach) {
1171 		ret = -ENOMEM;
1172 		goto out;
1173 	}
1174 	get_cpu_id((struct cpuid *) &mach->cpuid);
1175 	mach->ibc = sclp.ibc;
1176 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1177 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1178 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1179 	       sizeof(S390_lowcore.stfle_fac_list));
1180 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1181 		 kvm->arch.model.ibc,
1182 		 kvm->arch.model.cpuid);
1183 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1184 		 mach->fac_mask[0],
1185 		 mach->fac_mask[1],
1186 		 mach->fac_mask[2]);
1187 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1188 		 mach->fac_list[0],
1189 		 mach->fac_list[1],
1190 		 mach->fac_list[2]);
1191 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1192 		ret = -EFAULT;
1193 	kfree(mach);
1194 out:
1195 	return ret;
1196 }
1197 
1198 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1199 				       struct kvm_device_attr *attr)
1200 {
1201 	struct kvm_s390_vm_cpu_feat data;
1202 
1203 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1204 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1205 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1206 		return -EFAULT;
1207 	return 0;
1208 }
1209 
1210 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1211 				     struct kvm_device_attr *attr)
1212 {
1213 	struct kvm_s390_vm_cpu_feat data;
1214 
1215 	bitmap_copy((unsigned long *) data.feat,
1216 		    kvm_s390_available_cpu_feat,
1217 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1218 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1219 		return -EFAULT;
1220 	return 0;
1221 }
1222 
1223 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1224 					  struct kvm_device_attr *attr)
1225 {
1226 	/*
1227 	 * Once we can actually configure subfunctions (kernel + hw support),
1228 	 * we have to check if they were already set by user space, if so copy
1229 	 * them from kvm->arch.
1230 	 */
1231 	return -ENXIO;
1232 }
1233 
1234 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1235 					struct kvm_device_attr *attr)
1236 {
1237 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1238 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1239 		return -EFAULT;
1240 	return 0;
1241 }
1242 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244 	int ret = -ENXIO;
1245 
1246 	switch (attr->attr) {
1247 	case KVM_S390_VM_CPU_PROCESSOR:
1248 		ret = kvm_s390_get_processor(kvm, attr);
1249 		break;
1250 	case KVM_S390_VM_CPU_MACHINE:
1251 		ret = kvm_s390_get_machine(kvm, attr);
1252 		break;
1253 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1254 		ret = kvm_s390_get_processor_feat(kvm, attr);
1255 		break;
1256 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1257 		ret = kvm_s390_get_machine_feat(kvm, attr);
1258 		break;
1259 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1260 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1263 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1264 		break;
1265 	}
1266 	return ret;
1267 }
1268 
1269 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1270 {
1271 	int ret;
1272 
1273 	switch (attr->group) {
1274 	case KVM_S390_VM_MEM_CTRL:
1275 		ret = kvm_s390_set_mem_control(kvm, attr);
1276 		break;
1277 	case KVM_S390_VM_TOD:
1278 		ret = kvm_s390_set_tod(kvm, attr);
1279 		break;
1280 	case KVM_S390_VM_CPU_MODEL:
1281 		ret = kvm_s390_set_cpu_model(kvm, attr);
1282 		break;
1283 	case KVM_S390_VM_CRYPTO:
1284 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1285 		break;
1286 	case KVM_S390_VM_MIGRATION:
1287 		ret = kvm_s390_vm_set_migration(kvm, attr);
1288 		break;
1289 	default:
1290 		ret = -ENXIO;
1291 		break;
1292 	}
1293 
1294 	return ret;
1295 }
1296 
1297 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1298 {
1299 	int ret;
1300 
1301 	switch (attr->group) {
1302 	case KVM_S390_VM_MEM_CTRL:
1303 		ret = kvm_s390_get_mem_control(kvm, attr);
1304 		break;
1305 	case KVM_S390_VM_TOD:
1306 		ret = kvm_s390_get_tod(kvm, attr);
1307 		break;
1308 	case KVM_S390_VM_CPU_MODEL:
1309 		ret = kvm_s390_get_cpu_model(kvm, attr);
1310 		break;
1311 	case KVM_S390_VM_MIGRATION:
1312 		ret = kvm_s390_vm_get_migration(kvm, attr);
1313 		break;
1314 	default:
1315 		ret = -ENXIO;
1316 		break;
1317 	}
1318 
1319 	return ret;
1320 }
1321 
1322 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1323 {
1324 	int ret;
1325 
1326 	switch (attr->group) {
1327 	case KVM_S390_VM_MEM_CTRL:
1328 		switch (attr->attr) {
1329 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1330 		case KVM_S390_VM_MEM_CLR_CMMA:
1331 			ret = sclp.has_cmma ? 0 : -ENXIO;
1332 			break;
1333 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1334 			ret = 0;
1335 			break;
1336 		default:
1337 			ret = -ENXIO;
1338 			break;
1339 		}
1340 		break;
1341 	case KVM_S390_VM_TOD:
1342 		switch (attr->attr) {
1343 		case KVM_S390_VM_TOD_LOW:
1344 		case KVM_S390_VM_TOD_HIGH:
1345 			ret = 0;
1346 			break;
1347 		default:
1348 			ret = -ENXIO;
1349 			break;
1350 		}
1351 		break;
1352 	case KVM_S390_VM_CPU_MODEL:
1353 		switch (attr->attr) {
1354 		case KVM_S390_VM_CPU_PROCESSOR:
1355 		case KVM_S390_VM_CPU_MACHINE:
1356 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1357 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1358 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1359 			ret = 0;
1360 			break;
1361 		/* configuring subfunctions is not supported yet */
1362 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1363 		default:
1364 			ret = -ENXIO;
1365 			break;
1366 		}
1367 		break;
1368 	case KVM_S390_VM_CRYPTO:
1369 		switch (attr->attr) {
1370 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1371 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1372 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1373 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1374 			ret = 0;
1375 			break;
1376 		default:
1377 			ret = -ENXIO;
1378 			break;
1379 		}
1380 		break;
1381 	case KVM_S390_VM_MIGRATION:
1382 		ret = 0;
1383 		break;
1384 	default:
1385 		ret = -ENXIO;
1386 		break;
1387 	}
1388 
1389 	return ret;
1390 }
1391 
1392 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1393 {
1394 	uint8_t *keys;
1395 	uint64_t hva;
1396 	int srcu_idx, i, r = 0;
1397 
1398 	if (args->flags != 0)
1399 		return -EINVAL;
1400 
1401 	/* Is this guest using storage keys? */
1402 	if (!mm_use_skey(current->mm))
1403 		return KVM_S390_GET_SKEYS_NONE;
1404 
1405 	/* Enforce sane limit on memory allocation */
1406 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1407 		return -EINVAL;
1408 
1409 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1410 	if (!keys)
1411 		return -ENOMEM;
1412 
1413 	down_read(&current->mm->mmap_sem);
1414 	srcu_idx = srcu_read_lock(&kvm->srcu);
1415 	for (i = 0; i < args->count; i++) {
1416 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1417 		if (kvm_is_error_hva(hva)) {
1418 			r = -EFAULT;
1419 			break;
1420 		}
1421 
1422 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1423 		if (r)
1424 			break;
1425 	}
1426 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1427 	up_read(&current->mm->mmap_sem);
1428 
1429 	if (!r) {
1430 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1431 				 sizeof(uint8_t) * args->count);
1432 		if (r)
1433 			r = -EFAULT;
1434 	}
1435 
1436 	kvfree(keys);
1437 	return r;
1438 }
1439 
1440 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1441 {
1442 	uint8_t *keys;
1443 	uint64_t hva;
1444 	int srcu_idx, i, r = 0;
1445 
1446 	if (args->flags != 0)
1447 		return -EINVAL;
1448 
1449 	/* Enforce sane limit on memory allocation */
1450 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1451 		return -EINVAL;
1452 
1453 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1454 	if (!keys)
1455 		return -ENOMEM;
1456 
1457 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1458 			   sizeof(uint8_t) * args->count);
1459 	if (r) {
1460 		r = -EFAULT;
1461 		goto out;
1462 	}
1463 
1464 	/* Enable storage key handling for the guest */
1465 	r = s390_enable_skey();
1466 	if (r)
1467 		goto out;
1468 
1469 	down_read(&current->mm->mmap_sem);
1470 	srcu_idx = srcu_read_lock(&kvm->srcu);
1471 	for (i = 0; i < args->count; i++) {
1472 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1473 		if (kvm_is_error_hva(hva)) {
1474 			r = -EFAULT;
1475 			break;
1476 		}
1477 
1478 		/* Lowest order bit is reserved */
1479 		if (keys[i] & 0x01) {
1480 			r = -EINVAL;
1481 			break;
1482 		}
1483 
1484 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1485 		if (r)
1486 			break;
1487 	}
1488 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1489 	up_read(&current->mm->mmap_sem);
1490 out:
1491 	kvfree(keys);
1492 	return r;
1493 }
1494 
1495 /*
1496  * Base address and length must be sent at the start of each block, therefore
1497  * it's cheaper to send some clean data, as long as it's less than the size of
1498  * two longs.
1499  */
1500 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1501 /* for consistency */
1502 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1503 
1504 /*
1505  * This function searches for the next page with dirty CMMA attributes, and
1506  * saves the attributes in the buffer up to either the end of the buffer or
1507  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1508  * no trailing clean bytes are saved.
1509  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1510  * output buffer will indicate 0 as length.
1511  */
1512 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1513 				  struct kvm_s390_cmma_log *args)
1514 {
1515 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1516 	unsigned long bufsize, hva, pgstev, i, next, cur;
1517 	int srcu_idx, peek, r = 0, rr;
1518 	u8 *res;
1519 
1520 	cur = args->start_gfn;
1521 	i = next = pgstev = 0;
1522 
1523 	if (unlikely(!kvm->arch.use_cmma))
1524 		return -ENXIO;
1525 	/* Invalid/unsupported flags were specified */
1526 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1527 		return -EINVAL;
1528 	/* Migration mode query, and we are not doing a migration */
1529 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1530 	if (!peek && !s)
1531 		return -EINVAL;
1532 	/* CMMA is disabled or was not used, or the buffer has length zero */
1533 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1534 	if (!bufsize || !kvm->mm->context.use_cmma) {
1535 		memset(args, 0, sizeof(*args));
1536 		return 0;
1537 	}
1538 
1539 	if (!peek) {
1540 		/* We are not peeking, and there are no dirty pages */
1541 		if (!atomic64_read(&s->dirty_pages)) {
1542 			memset(args, 0, sizeof(*args));
1543 			return 0;
1544 		}
1545 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1546 				    args->start_gfn);
1547 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1548 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1549 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1550 			memset(args, 0, sizeof(*args));
1551 			return 0;
1552 		}
1553 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1554 	}
1555 
1556 	res = vmalloc(bufsize);
1557 	if (!res)
1558 		return -ENOMEM;
1559 
1560 	args->start_gfn = cur;
1561 
1562 	down_read(&kvm->mm->mmap_sem);
1563 	srcu_idx = srcu_read_lock(&kvm->srcu);
1564 	while (i < bufsize) {
1565 		hva = gfn_to_hva(kvm, cur);
1566 		if (kvm_is_error_hva(hva)) {
1567 			r = -EFAULT;
1568 			break;
1569 		}
1570 		/* decrement only if we actually flipped the bit to 0 */
1571 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1572 			atomic64_dec(&s->dirty_pages);
1573 		r = get_pgste(kvm->mm, hva, &pgstev);
1574 		if (r < 0)
1575 			pgstev = 0;
1576 		/* save the value */
1577 		res[i++] = (pgstev >> 24) & 0x3;
1578 		/*
1579 		 * if the next bit is too far away, stop.
1580 		 * if we reached the previous "next", find the next one
1581 		 */
1582 		if (!peek) {
1583 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1584 				break;
1585 			if (cur == next)
1586 				next = find_next_bit(s->pgste_bitmap,
1587 						     s->bitmap_size, cur + 1);
1588 		/* reached the end of the bitmap or of the buffer, stop */
1589 			if ((next >= s->bitmap_size) ||
1590 			    (next >= args->start_gfn + bufsize))
1591 				break;
1592 		}
1593 		cur++;
1594 	}
1595 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1596 	up_read(&kvm->mm->mmap_sem);
1597 	args->count = i;
1598 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1599 
1600 	rr = copy_to_user((void __user *)args->values, res, args->count);
1601 	if (rr)
1602 		r = -EFAULT;
1603 
1604 	vfree(res);
1605 	return r;
1606 }
1607 
1608 /*
1609  * This function sets the CMMA attributes for the given pages. If the input
1610  * buffer has zero length, no action is taken, otherwise the attributes are
1611  * set and the mm->context.use_cmma flag is set.
1612  */
1613 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1614 				  const struct kvm_s390_cmma_log *args)
1615 {
1616 	unsigned long hva, mask, pgstev, i;
1617 	uint8_t *bits;
1618 	int srcu_idx, r = 0;
1619 
1620 	mask = args->mask;
1621 
1622 	if (!kvm->arch.use_cmma)
1623 		return -ENXIO;
1624 	/* invalid/unsupported flags */
1625 	if (args->flags != 0)
1626 		return -EINVAL;
1627 	/* Enforce sane limit on memory allocation */
1628 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1629 		return -EINVAL;
1630 	/* Nothing to do */
1631 	if (args->count == 0)
1632 		return 0;
1633 
1634 	bits = vmalloc(sizeof(*bits) * args->count);
1635 	if (!bits)
1636 		return -ENOMEM;
1637 
1638 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1639 	if (r) {
1640 		r = -EFAULT;
1641 		goto out;
1642 	}
1643 
1644 	down_read(&kvm->mm->mmap_sem);
1645 	srcu_idx = srcu_read_lock(&kvm->srcu);
1646 	for (i = 0; i < args->count; i++) {
1647 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1648 		if (kvm_is_error_hva(hva)) {
1649 			r = -EFAULT;
1650 			break;
1651 		}
1652 
1653 		pgstev = bits[i];
1654 		pgstev = pgstev << 24;
1655 		mask &= _PGSTE_GPS_USAGE_MASK;
1656 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1657 	}
1658 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1659 	up_read(&kvm->mm->mmap_sem);
1660 
1661 	if (!kvm->mm->context.use_cmma) {
1662 		down_write(&kvm->mm->mmap_sem);
1663 		kvm->mm->context.use_cmma = 1;
1664 		up_write(&kvm->mm->mmap_sem);
1665 	}
1666 out:
1667 	vfree(bits);
1668 	return r;
1669 }
1670 
1671 long kvm_arch_vm_ioctl(struct file *filp,
1672 		       unsigned int ioctl, unsigned long arg)
1673 {
1674 	struct kvm *kvm = filp->private_data;
1675 	void __user *argp = (void __user *)arg;
1676 	struct kvm_device_attr attr;
1677 	int r;
1678 
1679 	switch (ioctl) {
1680 	case KVM_S390_INTERRUPT: {
1681 		struct kvm_s390_interrupt s390int;
1682 
1683 		r = -EFAULT;
1684 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1685 			break;
1686 		r = kvm_s390_inject_vm(kvm, &s390int);
1687 		break;
1688 	}
1689 	case KVM_ENABLE_CAP: {
1690 		struct kvm_enable_cap cap;
1691 		r = -EFAULT;
1692 		if (copy_from_user(&cap, argp, sizeof(cap)))
1693 			break;
1694 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1695 		break;
1696 	}
1697 	case KVM_CREATE_IRQCHIP: {
1698 		struct kvm_irq_routing_entry routing;
1699 
1700 		r = -EINVAL;
1701 		if (kvm->arch.use_irqchip) {
1702 			/* Set up dummy routing. */
1703 			memset(&routing, 0, sizeof(routing));
1704 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1705 		}
1706 		break;
1707 	}
1708 	case KVM_SET_DEVICE_ATTR: {
1709 		r = -EFAULT;
1710 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1711 			break;
1712 		r = kvm_s390_vm_set_attr(kvm, &attr);
1713 		break;
1714 	}
1715 	case KVM_GET_DEVICE_ATTR: {
1716 		r = -EFAULT;
1717 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1718 			break;
1719 		r = kvm_s390_vm_get_attr(kvm, &attr);
1720 		break;
1721 	}
1722 	case KVM_HAS_DEVICE_ATTR: {
1723 		r = -EFAULT;
1724 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1725 			break;
1726 		r = kvm_s390_vm_has_attr(kvm, &attr);
1727 		break;
1728 	}
1729 	case KVM_S390_GET_SKEYS: {
1730 		struct kvm_s390_skeys args;
1731 
1732 		r = -EFAULT;
1733 		if (copy_from_user(&args, argp,
1734 				   sizeof(struct kvm_s390_skeys)))
1735 			break;
1736 		r = kvm_s390_get_skeys(kvm, &args);
1737 		break;
1738 	}
1739 	case KVM_S390_SET_SKEYS: {
1740 		struct kvm_s390_skeys args;
1741 
1742 		r = -EFAULT;
1743 		if (copy_from_user(&args, argp,
1744 				   sizeof(struct kvm_s390_skeys)))
1745 			break;
1746 		r = kvm_s390_set_skeys(kvm, &args);
1747 		break;
1748 	}
1749 	case KVM_S390_GET_CMMA_BITS: {
1750 		struct kvm_s390_cmma_log args;
1751 
1752 		r = -EFAULT;
1753 		if (copy_from_user(&args, argp, sizeof(args)))
1754 			break;
1755 		r = kvm_s390_get_cmma_bits(kvm, &args);
1756 		if (!r) {
1757 			r = copy_to_user(argp, &args, sizeof(args));
1758 			if (r)
1759 				r = -EFAULT;
1760 		}
1761 		break;
1762 	}
1763 	case KVM_S390_SET_CMMA_BITS: {
1764 		struct kvm_s390_cmma_log args;
1765 
1766 		r = -EFAULT;
1767 		if (copy_from_user(&args, argp, sizeof(args)))
1768 			break;
1769 		r = kvm_s390_set_cmma_bits(kvm, &args);
1770 		break;
1771 	}
1772 	default:
1773 		r = -ENOTTY;
1774 	}
1775 
1776 	return r;
1777 }
1778 
1779 static int kvm_s390_query_ap_config(u8 *config)
1780 {
1781 	u32 fcn_code = 0x04000000UL;
1782 	u32 cc = 0;
1783 
1784 	memset(config, 0, 128);
1785 	asm volatile(
1786 		"lgr 0,%1\n"
1787 		"lgr 2,%2\n"
1788 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1789 		"0: ipm %0\n"
1790 		"srl %0,28\n"
1791 		"1:\n"
1792 		EX_TABLE(0b, 1b)
1793 		: "+r" (cc)
1794 		: "r" (fcn_code), "r" (config)
1795 		: "cc", "0", "2", "memory"
1796 	);
1797 
1798 	return cc;
1799 }
1800 
1801 static int kvm_s390_apxa_installed(void)
1802 {
1803 	u8 config[128];
1804 	int cc;
1805 
1806 	if (test_facility(12)) {
1807 		cc = kvm_s390_query_ap_config(config);
1808 
1809 		if (cc)
1810 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1811 		else
1812 			return config[0] & 0x40;
1813 	}
1814 
1815 	return 0;
1816 }
1817 
1818 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1819 {
1820 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1821 
1822 	if (kvm_s390_apxa_installed())
1823 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1824 	else
1825 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1826 }
1827 
1828 static u64 kvm_s390_get_initial_cpuid(void)
1829 {
1830 	struct cpuid cpuid;
1831 
1832 	get_cpu_id(&cpuid);
1833 	cpuid.version = 0xff;
1834 	return *((u64 *) &cpuid);
1835 }
1836 
1837 static void kvm_s390_crypto_init(struct kvm *kvm)
1838 {
1839 	if (!test_kvm_facility(kvm, 76))
1840 		return;
1841 
1842 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1843 	kvm_s390_set_crycb_format(kvm);
1844 
1845 	/* Enable AES/DEA protected key functions by default */
1846 	kvm->arch.crypto.aes_kw = 1;
1847 	kvm->arch.crypto.dea_kw = 1;
1848 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1849 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1850 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1851 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1852 }
1853 
1854 static void sca_dispose(struct kvm *kvm)
1855 {
1856 	if (kvm->arch.use_esca)
1857 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1858 	else
1859 		free_page((unsigned long)(kvm->arch.sca));
1860 	kvm->arch.sca = NULL;
1861 }
1862 
1863 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1864 {
1865 	gfp_t alloc_flags = GFP_KERNEL;
1866 	int i, rc;
1867 	char debug_name[16];
1868 	static unsigned long sca_offset;
1869 
1870 	rc = -EINVAL;
1871 #ifdef CONFIG_KVM_S390_UCONTROL
1872 	if (type & ~KVM_VM_S390_UCONTROL)
1873 		goto out_err;
1874 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1875 		goto out_err;
1876 #else
1877 	if (type)
1878 		goto out_err;
1879 #endif
1880 
1881 	rc = s390_enable_sie();
1882 	if (rc)
1883 		goto out_err;
1884 
1885 	rc = -ENOMEM;
1886 
1887 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1888 
1889 	kvm->arch.use_esca = 0; /* start with basic SCA */
1890 	if (!sclp.has_64bscao)
1891 		alloc_flags |= GFP_DMA;
1892 	rwlock_init(&kvm->arch.sca_lock);
1893 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1894 	if (!kvm->arch.sca)
1895 		goto out_err;
1896 	spin_lock(&kvm_lock);
1897 	sca_offset += 16;
1898 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899 		sca_offset = 0;
1900 	kvm->arch.sca = (struct bsca_block *)
1901 			((char *) kvm->arch.sca + sca_offset);
1902 	spin_unlock(&kvm_lock);
1903 
1904 	sprintf(debug_name, "kvm-%u", current->pid);
1905 
1906 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1907 	if (!kvm->arch.dbf)
1908 		goto out_err;
1909 
1910 	kvm->arch.sie_page2 =
1911 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912 	if (!kvm->arch.sie_page2)
1913 		goto out_err;
1914 
1915 	/* Populate the facility mask initially. */
1916 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917 	       sizeof(S390_lowcore.stfle_fac_list));
1918 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919 		if (i < kvm_s390_fac_list_mask_size())
1920 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921 		else
1922 			kvm->arch.model.fac_mask[i] = 0UL;
1923 	}
1924 
1925 	/* Populate the facility list initially. */
1926 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1929 
1930 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1931 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1932 
1933 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1934 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1935 
1936 	kvm_s390_crypto_init(kvm);
1937 
1938 	mutex_init(&kvm->arch.float_int.ais_lock);
1939 	kvm->arch.float_int.simm = 0;
1940 	kvm->arch.float_int.nimm = 0;
1941 	spin_lock_init(&kvm->arch.float_int.lock);
1942 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1943 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1944 	init_waitqueue_head(&kvm->arch.ipte_wq);
1945 	mutex_init(&kvm->arch.ipte_mutex);
1946 
1947 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1948 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1949 
1950 	if (type & KVM_VM_S390_UCONTROL) {
1951 		kvm->arch.gmap = NULL;
1952 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1953 	} else {
1954 		if (sclp.hamax == U64_MAX)
1955 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1956 		else
1957 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1958 						    sclp.hamax + 1);
1959 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1960 		if (!kvm->arch.gmap)
1961 			goto out_err;
1962 		kvm->arch.gmap->private = kvm;
1963 		kvm->arch.gmap->pfault_enabled = 0;
1964 	}
1965 
1966 	kvm->arch.css_support = 0;
1967 	kvm->arch.use_irqchip = 0;
1968 	kvm->arch.epoch = 0;
1969 
1970 	spin_lock_init(&kvm->arch.start_stop_lock);
1971 	kvm_s390_vsie_init(kvm);
1972 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1973 
1974 	return 0;
1975 out_err:
1976 	free_page((unsigned long)kvm->arch.sie_page2);
1977 	debug_unregister(kvm->arch.dbf);
1978 	sca_dispose(kvm);
1979 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1980 	return rc;
1981 }
1982 
1983 bool kvm_arch_has_vcpu_debugfs(void)
1984 {
1985 	return false;
1986 }
1987 
1988 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1989 {
1990 	return 0;
1991 }
1992 
1993 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1994 {
1995 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1996 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1997 	kvm_s390_clear_local_irqs(vcpu);
1998 	kvm_clear_async_pf_completion_queue(vcpu);
1999 	if (!kvm_is_ucontrol(vcpu->kvm))
2000 		sca_del_vcpu(vcpu);
2001 
2002 	if (kvm_is_ucontrol(vcpu->kvm))
2003 		gmap_remove(vcpu->arch.gmap);
2004 
2005 	if (vcpu->kvm->arch.use_cmma)
2006 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2007 	free_page((unsigned long)(vcpu->arch.sie_block));
2008 
2009 	kvm_vcpu_uninit(vcpu);
2010 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2011 }
2012 
2013 static void kvm_free_vcpus(struct kvm *kvm)
2014 {
2015 	unsigned int i;
2016 	struct kvm_vcpu *vcpu;
2017 
2018 	kvm_for_each_vcpu(i, vcpu, kvm)
2019 		kvm_arch_vcpu_destroy(vcpu);
2020 
2021 	mutex_lock(&kvm->lock);
2022 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2023 		kvm->vcpus[i] = NULL;
2024 
2025 	atomic_set(&kvm->online_vcpus, 0);
2026 	mutex_unlock(&kvm->lock);
2027 }
2028 
2029 void kvm_arch_destroy_vm(struct kvm *kvm)
2030 {
2031 	kvm_free_vcpus(kvm);
2032 	sca_dispose(kvm);
2033 	debug_unregister(kvm->arch.dbf);
2034 	free_page((unsigned long)kvm->arch.sie_page2);
2035 	if (!kvm_is_ucontrol(kvm))
2036 		gmap_remove(kvm->arch.gmap);
2037 	kvm_s390_destroy_adapters(kvm);
2038 	kvm_s390_clear_float_irqs(kvm);
2039 	kvm_s390_vsie_destroy(kvm);
2040 	if (kvm->arch.migration_state) {
2041 		vfree(kvm->arch.migration_state->pgste_bitmap);
2042 		kfree(kvm->arch.migration_state);
2043 	}
2044 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2045 }
2046 
2047 /* Section: vcpu related */
2048 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2049 {
2050 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2051 	if (!vcpu->arch.gmap)
2052 		return -ENOMEM;
2053 	vcpu->arch.gmap->private = vcpu->kvm;
2054 
2055 	return 0;
2056 }
2057 
2058 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2059 {
2060 	if (!kvm_s390_use_sca_entries())
2061 		return;
2062 	read_lock(&vcpu->kvm->arch.sca_lock);
2063 	if (vcpu->kvm->arch.use_esca) {
2064 		struct esca_block *sca = vcpu->kvm->arch.sca;
2065 
2066 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2067 		sca->cpu[vcpu->vcpu_id].sda = 0;
2068 	} else {
2069 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2070 
2071 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2072 		sca->cpu[vcpu->vcpu_id].sda = 0;
2073 	}
2074 	read_unlock(&vcpu->kvm->arch.sca_lock);
2075 }
2076 
2077 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2078 {
2079 	if (!kvm_s390_use_sca_entries()) {
2080 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2081 
2082 		/* we still need the basic sca for the ipte control */
2083 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2084 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2085 	}
2086 	read_lock(&vcpu->kvm->arch.sca_lock);
2087 	if (vcpu->kvm->arch.use_esca) {
2088 		struct esca_block *sca = vcpu->kvm->arch.sca;
2089 
2090 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2091 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2093 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2094 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2095 	} else {
2096 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2097 
2098 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2101 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2102 	}
2103 	read_unlock(&vcpu->kvm->arch.sca_lock);
2104 }
2105 
2106 /* Basic SCA to Extended SCA data copy routines */
2107 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2108 {
2109 	d->sda = s->sda;
2110 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2111 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2112 }
2113 
2114 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2115 {
2116 	int i;
2117 
2118 	d->ipte_control = s->ipte_control;
2119 	d->mcn[0] = s->mcn;
2120 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2121 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2122 }
2123 
2124 static int sca_switch_to_extended(struct kvm *kvm)
2125 {
2126 	struct bsca_block *old_sca = kvm->arch.sca;
2127 	struct esca_block *new_sca;
2128 	struct kvm_vcpu *vcpu;
2129 	unsigned int vcpu_idx;
2130 	u32 scaol, scaoh;
2131 
2132 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2133 	if (!new_sca)
2134 		return -ENOMEM;
2135 
2136 	scaoh = (u32)((u64)(new_sca) >> 32);
2137 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2138 
2139 	kvm_s390_vcpu_block_all(kvm);
2140 	write_lock(&kvm->arch.sca_lock);
2141 
2142 	sca_copy_b_to_e(new_sca, old_sca);
2143 
2144 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2145 		vcpu->arch.sie_block->scaoh = scaoh;
2146 		vcpu->arch.sie_block->scaol = scaol;
2147 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2148 	}
2149 	kvm->arch.sca = new_sca;
2150 	kvm->arch.use_esca = 1;
2151 
2152 	write_unlock(&kvm->arch.sca_lock);
2153 	kvm_s390_vcpu_unblock_all(kvm);
2154 
2155 	free_page((unsigned long)old_sca);
2156 
2157 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2158 		 old_sca, kvm->arch.sca);
2159 	return 0;
2160 }
2161 
2162 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2163 {
2164 	int rc;
2165 
2166 	if (!kvm_s390_use_sca_entries()) {
2167 		if (id < KVM_MAX_VCPUS)
2168 			return true;
2169 		return false;
2170 	}
2171 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2172 		return true;
2173 	if (!sclp.has_esca || !sclp.has_64bscao)
2174 		return false;
2175 
2176 	mutex_lock(&kvm->lock);
2177 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2178 	mutex_unlock(&kvm->lock);
2179 
2180 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2181 }
2182 
2183 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2184 {
2185 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2186 	kvm_clear_async_pf_completion_queue(vcpu);
2187 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2188 				    KVM_SYNC_GPRS |
2189 				    KVM_SYNC_ACRS |
2190 				    KVM_SYNC_CRS |
2191 				    KVM_SYNC_ARCH0 |
2192 				    KVM_SYNC_PFAULT;
2193 	kvm_s390_set_prefix(vcpu, 0);
2194 	if (test_kvm_facility(vcpu->kvm, 64))
2195 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2196 	if (test_kvm_facility(vcpu->kvm, 133))
2197 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2198 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2199 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2200 	 */
2201 	if (MACHINE_HAS_VX)
2202 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2203 	else
2204 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2205 
2206 	if (kvm_is_ucontrol(vcpu->kvm))
2207 		return __kvm_ucontrol_vcpu_init(vcpu);
2208 
2209 	return 0;
2210 }
2211 
2212 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2213 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2214 {
2215 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2216 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2217 	vcpu->arch.cputm_start = get_tod_clock_fast();
2218 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2219 }
2220 
2221 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2222 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2223 {
2224 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2225 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2226 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2227 	vcpu->arch.cputm_start = 0;
2228 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2229 }
2230 
2231 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2232 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2233 {
2234 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2235 	vcpu->arch.cputm_enabled = true;
2236 	__start_cpu_timer_accounting(vcpu);
2237 }
2238 
2239 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2240 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2241 {
2242 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2243 	__stop_cpu_timer_accounting(vcpu);
2244 	vcpu->arch.cputm_enabled = false;
2245 }
2246 
2247 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2248 {
2249 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2250 	__enable_cpu_timer_accounting(vcpu);
2251 	preempt_enable();
2252 }
2253 
2254 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2255 {
2256 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2257 	__disable_cpu_timer_accounting(vcpu);
2258 	preempt_enable();
2259 }
2260 
2261 /* set the cpu timer - may only be called from the VCPU thread itself */
2262 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2263 {
2264 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2265 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2266 	if (vcpu->arch.cputm_enabled)
2267 		vcpu->arch.cputm_start = get_tod_clock_fast();
2268 	vcpu->arch.sie_block->cputm = cputm;
2269 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2270 	preempt_enable();
2271 }
2272 
2273 /* update and get the cpu timer - can also be called from other VCPU threads */
2274 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2275 {
2276 	unsigned int seq;
2277 	__u64 value;
2278 
2279 	if (unlikely(!vcpu->arch.cputm_enabled))
2280 		return vcpu->arch.sie_block->cputm;
2281 
2282 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2283 	do {
2284 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2285 		/*
2286 		 * If the writer would ever execute a read in the critical
2287 		 * section, e.g. in irq context, we have a deadlock.
2288 		 */
2289 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2290 		value = vcpu->arch.sie_block->cputm;
2291 		/* if cputm_start is 0, accounting is being started/stopped */
2292 		if (likely(vcpu->arch.cputm_start))
2293 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2294 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2295 	preempt_enable();
2296 	return value;
2297 }
2298 
2299 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2300 {
2301 
2302 	gmap_enable(vcpu->arch.enabled_gmap);
2303 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2304 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2305 		__start_cpu_timer_accounting(vcpu);
2306 	vcpu->cpu = cpu;
2307 }
2308 
2309 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2310 {
2311 	vcpu->cpu = -1;
2312 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2313 		__stop_cpu_timer_accounting(vcpu);
2314 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2315 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2316 	gmap_disable(vcpu->arch.enabled_gmap);
2317 
2318 }
2319 
2320 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2321 {
2322 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2323 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2324 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2325 	kvm_s390_set_prefix(vcpu, 0);
2326 	kvm_s390_set_cpu_timer(vcpu, 0);
2327 	vcpu->arch.sie_block->ckc       = 0UL;
2328 	vcpu->arch.sie_block->todpr     = 0;
2329 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2330 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2331 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2332 	/* make sure the new fpc will be lazily loaded */
2333 	save_fpu_regs();
2334 	current->thread.fpu.fpc = 0;
2335 	vcpu->arch.sie_block->gbea = 1;
2336 	vcpu->arch.sie_block->pp = 0;
2337 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2338 	kvm_clear_async_pf_completion_queue(vcpu);
2339 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2340 		kvm_s390_vcpu_stop(vcpu);
2341 	kvm_s390_clear_local_irqs(vcpu);
2342 }
2343 
2344 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2345 {
2346 	mutex_lock(&vcpu->kvm->lock);
2347 	preempt_disable();
2348 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2349 	preempt_enable();
2350 	mutex_unlock(&vcpu->kvm->lock);
2351 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2352 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2353 		sca_add_vcpu(vcpu);
2354 	}
2355 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2356 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2357 	/* make vcpu_load load the right gmap on the first trigger */
2358 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2359 }
2360 
2361 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2362 {
2363 	if (!test_kvm_facility(vcpu->kvm, 76))
2364 		return;
2365 
2366 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2367 
2368 	if (vcpu->kvm->arch.crypto.aes_kw)
2369 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2370 	if (vcpu->kvm->arch.crypto.dea_kw)
2371 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2372 
2373 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2374 }
2375 
2376 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2377 {
2378 	free_page(vcpu->arch.sie_block->cbrlo);
2379 	vcpu->arch.sie_block->cbrlo = 0;
2380 }
2381 
2382 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2383 {
2384 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2385 	if (!vcpu->arch.sie_block->cbrlo)
2386 		return -ENOMEM;
2387 
2388 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2389 	return 0;
2390 }
2391 
2392 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2393 {
2394 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2395 
2396 	vcpu->arch.sie_block->ibc = model->ibc;
2397 	if (test_kvm_facility(vcpu->kvm, 7))
2398 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2399 }
2400 
2401 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2402 {
2403 	int rc = 0;
2404 
2405 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2406 						    CPUSTAT_SM |
2407 						    CPUSTAT_STOPPED);
2408 
2409 	if (test_kvm_facility(vcpu->kvm, 78))
2410 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2411 	else if (test_kvm_facility(vcpu->kvm, 8))
2412 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2413 
2414 	kvm_s390_vcpu_setup_model(vcpu);
2415 
2416 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2417 	if (MACHINE_HAS_ESOP)
2418 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2419 	if (test_kvm_facility(vcpu->kvm, 9))
2420 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2421 	if (test_kvm_facility(vcpu->kvm, 73))
2422 		vcpu->arch.sie_block->ecb |= ECB_TE;
2423 
2424 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2425 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2426 	if (test_kvm_facility(vcpu->kvm, 130))
2427 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2428 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2429 	if (sclp.has_cei)
2430 		vcpu->arch.sie_block->eca |= ECA_CEI;
2431 	if (sclp.has_ib)
2432 		vcpu->arch.sie_block->eca |= ECA_IB;
2433 	if (sclp.has_siif)
2434 		vcpu->arch.sie_block->eca |= ECA_SII;
2435 	if (sclp.has_sigpif)
2436 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2437 	if (test_kvm_facility(vcpu->kvm, 129)) {
2438 		vcpu->arch.sie_block->eca |= ECA_VX;
2439 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2440 	}
2441 	if (test_kvm_facility(vcpu->kvm, 139))
2442 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2443 
2444 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2445 					| SDNXC;
2446 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2447 
2448 	if (sclp.has_kss)
2449 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2450 	else
2451 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2452 
2453 	if (vcpu->kvm->arch.use_cmma) {
2454 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2455 		if (rc)
2456 			return rc;
2457 	}
2458 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2459 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2460 
2461 	kvm_s390_vcpu_crypto_setup(vcpu);
2462 
2463 	return rc;
2464 }
2465 
2466 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2467 				      unsigned int id)
2468 {
2469 	struct kvm_vcpu *vcpu;
2470 	struct sie_page *sie_page;
2471 	int rc = -EINVAL;
2472 
2473 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2474 		goto out;
2475 
2476 	rc = -ENOMEM;
2477 
2478 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2479 	if (!vcpu)
2480 		goto out;
2481 
2482 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2483 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2484 	if (!sie_page)
2485 		goto out_free_cpu;
2486 
2487 	vcpu->arch.sie_block = &sie_page->sie_block;
2488 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2489 
2490 	/* the real guest size will always be smaller than msl */
2491 	vcpu->arch.sie_block->mso = 0;
2492 	vcpu->arch.sie_block->msl = sclp.hamax;
2493 
2494 	vcpu->arch.sie_block->icpua = id;
2495 	spin_lock_init(&vcpu->arch.local_int.lock);
2496 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2497 	vcpu->arch.local_int.wq = &vcpu->wq;
2498 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2499 	seqcount_init(&vcpu->arch.cputm_seqcount);
2500 
2501 	rc = kvm_vcpu_init(vcpu, kvm, id);
2502 	if (rc)
2503 		goto out_free_sie_block;
2504 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2505 		 vcpu->arch.sie_block);
2506 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2507 
2508 	return vcpu;
2509 out_free_sie_block:
2510 	free_page((unsigned long)(vcpu->arch.sie_block));
2511 out_free_cpu:
2512 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2513 out:
2514 	return ERR_PTR(rc);
2515 }
2516 
2517 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2518 {
2519 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2520 }
2521 
2522 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2523 {
2524 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2525 	exit_sie(vcpu);
2526 }
2527 
2528 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2529 {
2530 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2531 }
2532 
2533 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2534 {
2535 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2536 	exit_sie(vcpu);
2537 }
2538 
2539 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2540 {
2541 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2542 }
2543 
2544 /*
2545  * Kick a guest cpu out of SIE and wait until SIE is not running.
2546  * If the CPU is not running (e.g. waiting as idle) the function will
2547  * return immediately. */
2548 void exit_sie(struct kvm_vcpu *vcpu)
2549 {
2550 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2551 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2552 		cpu_relax();
2553 }
2554 
2555 /* Kick a guest cpu out of SIE to process a request synchronously */
2556 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2557 {
2558 	kvm_make_request(req, vcpu);
2559 	kvm_s390_vcpu_request(vcpu);
2560 }
2561 
2562 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2563 			      unsigned long end)
2564 {
2565 	struct kvm *kvm = gmap->private;
2566 	struct kvm_vcpu *vcpu;
2567 	unsigned long prefix;
2568 	int i;
2569 
2570 	if (gmap_is_shadow(gmap))
2571 		return;
2572 	if (start >= 1UL << 31)
2573 		/* We are only interested in prefix pages */
2574 		return;
2575 	kvm_for_each_vcpu(i, vcpu, kvm) {
2576 		/* match against both prefix pages */
2577 		prefix = kvm_s390_get_prefix(vcpu);
2578 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2579 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2580 				   start, end);
2581 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2582 		}
2583 	}
2584 }
2585 
2586 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2587 {
2588 	/* kvm common code refers to this, but never calls it */
2589 	BUG();
2590 	return 0;
2591 }
2592 
2593 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2594 					   struct kvm_one_reg *reg)
2595 {
2596 	int r = -EINVAL;
2597 
2598 	switch (reg->id) {
2599 	case KVM_REG_S390_TODPR:
2600 		r = put_user(vcpu->arch.sie_block->todpr,
2601 			     (u32 __user *)reg->addr);
2602 		break;
2603 	case KVM_REG_S390_EPOCHDIFF:
2604 		r = put_user(vcpu->arch.sie_block->epoch,
2605 			     (u64 __user *)reg->addr);
2606 		break;
2607 	case KVM_REG_S390_CPU_TIMER:
2608 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2609 			     (u64 __user *)reg->addr);
2610 		break;
2611 	case KVM_REG_S390_CLOCK_COMP:
2612 		r = put_user(vcpu->arch.sie_block->ckc,
2613 			     (u64 __user *)reg->addr);
2614 		break;
2615 	case KVM_REG_S390_PFTOKEN:
2616 		r = put_user(vcpu->arch.pfault_token,
2617 			     (u64 __user *)reg->addr);
2618 		break;
2619 	case KVM_REG_S390_PFCOMPARE:
2620 		r = put_user(vcpu->arch.pfault_compare,
2621 			     (u64 __user *)reg->addr);
2622 		break;
2623 	case KVM_REG_S390_PFSELECT:
2624 		r = put_user(vcpu->arch.pfault_select,
2625 			     (u64 __user *)reg->addr);
2626 		break;
2627 	case KVM_REG_S390_PP:
2628 		r = put_user(vcpu->arch.sie_block->pp,
2629 			     (u64 __user *)reg->addr);
2630 		break;
2631 	case KVM_REG_S390_GBEA:
2632 		r = put_user(vcpu->arch.sie_block->gbea,
2633 			     (u64 __user *)reg->addr);
2634 		break;
2635 	default:
2636 		break;
2637 	}
2638 
2639 	return r;
2640 }
2641 
2642 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2643 					   struct kvm_one_reg *reg)
2644 {
2645 	int r = -EINVAL;
2646 	__u64 val;
2647 
2648 	switch (reg->id) {
2649 	case KVM_REG_S390_TODPR:
2650 		r = get_user(vcpu->arch.sie_block->todpr,
2651 			     (u32 __user *)reg->addr);
2652 		break;
2653 	case KVM_REG_S390_EPOCHDIFF:
2654 		r = get_user(vcpu->arch.sie_block->epoch,
2655 			     (u64 __user *)reg->addr);
2656 		break;
2657 	case KVM_REG_S390_CPU_TIMER:
2658 		r = get_user(val, (u64 __user *)reg->addr);
2659 		if (!r)
2660 			kvm_s390_set_cpu_timer(vcpu, val);
2661 		break;
2662 	case KVM_REG_S390_CLOCK_COMP:
2663 		r = get_user(vcpu->arch.sie_block->ckc,
2664 			     (u64 __user *)reg->addr);
2665 		break;
2666 	case KVM_REG_S390_PFTOKEN:
2667 		r = get_user(vcpu->arch.pfault_token,
2668 			     (u64 __user *)reg->addr);
2669 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2670 			kvm_clear_async_pf_completion_queue(vcpu);
2671 		break;
2672 	case KVM_REG_S390_PFCOMPARE:
2673 		r = get_user(vcpu->arch.pfault_compare,
2674 			     (u64 __user *)reg->addr);
2675 		break;
2676 	case KVM_REG_S390_PFSELECT:
2677 		r = get_user(vcpu->arch.pfault_select,
2678 			     (u64 __user *)reg->addr);
2679 		break;
2680 	case KVM_REG_S390_PP:
2681 		r = get_user(vcpu->arch.sie_block->pp,
2682 			     (u64 __user *)reg->addr);
2683 		break;
2684 	case KVM_REG_S390_GBEA:
2685 		r = get_user(vcpu->arch.sie_block->gbea,
2686 			     (u64 __user *)reg->addr);
2687 		break;
2688 	default:
2689 		break;
2690 	}
2691 
2692 	return r;
2693 }
2694 
2695 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2696 {
2697 	kvm_s390_vcpu_initial_reset(vcpu);
2698 	return 0;
2699 }
2700 
2701 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2702 {
2703 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2704 	return 0;
2705 }
2706 
2707 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2708 {
2709 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2710 	return 0;
2711 }
2712 
2713 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2714 				  struct kvm_sregs *sregs)
2715 {
2716 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2717 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2718 	return 0;
2719 }
2720 
2721 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2722 				  struct kvm_sregs *sregs)
2723 {
2724 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2725 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2726 	return 0;
2727 }
2728 
2729 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2730 {
2731 	if (test_fp_ctl(fpu->fpc))
2732 		return -EINVAL;
2733 	vcpu->run->s.regs.fpc = fpu->fpc;
2734 	if (MACHINE_HAS_VX)
2735 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2736 				 (freg_t *) fpu->fprs);
2737 	else
2738 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2739 	return 0;
2740 }
2741 
2742 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2743 {
2744 	/* make sure we have the latest values */
2745 	save_fpu_regs();
2746 	if (MACHINE_HAS_VX)
2747 		convert_vx_to_fp((freg_t *) fpu->fprs,
2748 				 (__vector128 *) vcpu->run->s.regs.vrs);
2749 	else
2750 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2751 	fpu->fpc = vcpu->run->s.regs.fpc;
2752 	return 0;
2753 }
2754 
2755 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2756 {
2757 	int rc = 0;
2758 
2759 	if (!is_vcpu_stopped(vcpu))
2760 		rc = -EBUSY;
2761 	else {
2762 		vcpu->run->psw_mask = psw.mask;
2763 		vcpu->run->psw_addr = psw.addr;
2764 	}
2765 	return rc;
2766 }
2767 
2768 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2769 				  struct kvm_translation *tr)
2770 {
2771 	return -EINVAL; /* not implemented yet */
2772 }
2773 
2774 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2775 			      KVM_GUESTDBG_USE_HW_BP | \
2776 			      KVM_GUESTDBG_ENABLE)
2777 
2778 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2779 					struct kvm_guest_debug *dbg)
2780 {
2781 	int rc = 0;
2782 
2783 	vcpu->guest_debug = 0;
2784 	kvm_s390_clear_bp_data(vcpu);
2785 
2786 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2787 		return -EINVAL;
2788 	if (!sclp.has_gpere)
2789 		return -EINVAL;
2790 
2791 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2792 		vcpu->guest_debug = dbg->control;
2793 		/* enforce guest PER */
2794 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2795 
2796 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2797 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2798 	} else {
2799 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2800 		vcpu->arch.guestdbg.last_bp = 0;
2801 	}
2802 
2803 	if (rc) {
2804 		vcpu->guest_debug = 0;
2805 		kvm_s390_clear_bp_data(vcpu);
2806 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2807 	}
2808 
2809 	return rc;
2810 }
2811 
2812 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2813 				    struct kvm_mp_state *mp_state)
2814 {
2815 	/* CHECK_STOP and LOAD are not supported yet */
2816 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2817 				       KVM_MP_STATE_OPERATING;
2818 }
2819 
2820 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2821 				    struct kvm_mp_state *mp_state)
2822 {
2823 	int rc = 0;
2824 
2825 	/* user space knows about this interface - let it control the state */
2826 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2827 
2828 	switch (mp_state->mp_state) {
2829 	case KVM_MP_STATE_STOPPED:
2830 		kvm_s390_vcpu_stop(vcpu);
2831 		break;
2832 	case KVM_MP_STATE_OPERATING:
2833 		kvm_s390_vcpu_start(vcpu);
2834 		break;
2835 	case KVM_MP_STATE_LOAD:
2836 	case KVM_MP_STATE_CHECK_STOP:
2837 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2838 	default:
2839 		rc = -ENXIO;
2840 	}
2841 
2842 	return rc;
2843 }
2844 
2845 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2846 {
2847 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2848 }
2849 
2850 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2851 {
2852 retry:
2853 	kvm_s390_vcpu_request_handled(vcpu);
2854 	if (!kvm_request_pending(vcpu))
2855 		return 0;
2856 	/*
2857 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2858 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2859 	 * This ensures that the ipte instruction for this request has
2860 	 * already finished. We might race against a second unmapper that
2861 	 * wants to set the blocking bit. Lets just retry the request loop.
2862 	 */
2863 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2864 		int rc;
2865 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2866 					  kvm_s390_get_prefix(vcpu),
2867 					  PAGE_SIZE * 2, PROT_WRITE);
2868 		if (rc) {
2869 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2870 			return rc;
2871 		}
2872 		goto retry;
2873 	}
2874 
2875 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2876 		vcpu->arch.sie_block->ihcpu = 0xffff;
2877 		goto retry;
2878 	}
2879 
2880 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2881 		if (!ibs_enabled(vcpu)) {
2882 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2883 			atomic_or(CPUSTAT_IBS,
2884 					&vcpu->arch.sie_block->cpuflags);
2885 		}
2886 		goto retry;
2887 	}
2888 
2889 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2890 		if (ibs_enabled(vcpu)) {
2891 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2892 			atomic_andnot(CPUSTAT_IBS,
2893 					  &vcpu->arch.sie_block->cpuflags);
2894 		}
2895 		goto retry;
2896 	}
2897 
2898 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2899 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2900 		goto retry;
2901 	}
2902 
2903 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2904 		/*
2905 		 * Disable CMMA virtualization; we will emulate the ESSA
2906 		 * instruction manually, in order to provide additional
2907 		 * functionalities needed for live migration.
2908 		 */
2909 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2910 		goto retry;
2911 	}
2912 
2913 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2914 		/*
2915 		 * Re-enable CMMA virtualization if CMMA is available and
2916 		 * was used.
2917 		 */
2918 		if ((vcpu->kvm->arch.use_cmma) &&
2919 		    (vcpu->kvm->mm->context.use_cmma))
2920 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2921 		goto retry;
2922 	}
2923 
2924 	/* nothing to do, just clear the request */
2925 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2926 
2927 	return 0;
2928 }
2929 
2930 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2931 				 const struct kvm_s390_vm_tod_clock *gtod)
2932 {
2933 	struct kvm_vcpu *vcpu;
2934 	struct kvm_s390_tod_clock_ext htod;
2935 	int i;
2936 
2937 	mutex_lock(&kvm->lock);
2938 	preempt_disable();
2939 
2940 	get_tod_clock_ext((char *)&htod);
2941 
2942 	kvm->arch.epoch = gtod->tod - htod.tod;
2943 	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2944 
2945 	if (kvm->arch.epoch > gtod->tod)
2946 		kvm->arch.epdx -= 1;
2947 
2948 	kvm_s390_vcpu_block_all(kvm);
2949 	kvm_for_each_vcpu(i, vcpu, kvm) {
2950 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2951 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2952 	}
2953 
2954 	kvm_s390_vcpu_unblock_all(kvm);
2955 	preempt_enable();
2956 	mutex_unlock(&kvm->lock);
2957 }
2958 
2959 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2960 {
2961 	struct kvm_vcpu *vcpu;
2962 	int i;
2963 
2964 	mutex_lock(&kvm->lock);
2965 	preempt_disable();
2966 	kvm->arch.epoch = tod - get_tod_clock();
2967 	kvm_s390_vcpu_block_all(kvm);
2968 	kvm_for_each_vcpu(i, vcpu, kvm)
2969 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2970 	kvm_s390_vcpu_unblock_all(kvm);
2971 	preempt_enable();
2972 	mutex_unlock(&kvm->lock);
2973 }
2974 
2975 /**
2976  * kvm_arch_fault_in_page - fault-in guest page if necessary
2977  * @vcpu: The corresponding virtual cpu
2978  * @gpa: Guest physical address
2979  * @writable: Whether the page should be writable or not
2980  *
2981  * Make sure that a guest page has been faulted-in on the host.
2982  *
2983  * Return: Zero on success, negative error code otherwise.
2984  */
2985 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2986 {
2987 	return gmap_fault(vcpu->arch.gmap, gpa,
2988 			  writable ? FAULT_FLAG_WRITE : 0);
2989 }
2990 
2991 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2992 				      unsigned long token)
2993 {
2994 	struct kvm_s390_interrupt inti;
2995 	struct kvm_s390_irq irq;
2996 
2997 	if (start_token) {
2998 		irq.u.ext.ext_params2 = token;
2999 		irq.type = KVM_S390_INT_PFAULT_INIT;
3000 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3001 	} else {
3002 		inti.type = KVM_S390_INT_PFAULT_DONE;
3003 		inti.parm64 = token;
3004 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3005 	}
3006 }
3007 
3008 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3009 				     struct kvm_async_pf *work)
3010 {
3011 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3012 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3013 }
3014 
3015 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3016 				 struct kvm_async_pf *work)
3017 {
3018 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3019 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3020 }
3021 
3022 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3023 			       struct kvm_async_pf *work)
3024 {
3025 	/* s390 will always inject the page directly */
3026 }
3027 
3028 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3029 {
3030 	/*
3031 	 * s390 will always inject the page directly,
3032 	 * but we still want check_async_completion to cleanup
3033 	 */
3034 	return true;
3035 }
3036 
3037 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3038 {
3039 	hva_t hva;
3040 	struct kvm_arch_async_pf arch;
3041 	int rc;
3042 
3043 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3044 		return 0;
3045 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3046 	    vcpu->arch.pfault_compare)
3047 		return 0;
3048 	if (psw_extint_disabled(vcpu))
3049 		return 0;
3050 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3051 		return 0;
3052 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3053 		return 0;
3054 	if (!vcpu->arch.gmap->pfault_enabled)
3055 		return 0;
3056 
3057 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3058 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3059 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3060 		return 0;
3061 
3062 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3063 	return rc;
3064 }
3065 
3066 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3067 {
3068 	int rc, cpuflags;
3069 
3070 	/*
3071 	 * On s390 notifications for arriving pages will be delivered directly
3072 	 * to the guest but the house keeping for completed pfaults is
3073 	 * handled outside the worker.
3074 	 */
3075 	kvm_check_async_pf_completion(vcpu);
3076 
3077 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3078 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3079 
3080 	if (need_resched())
3081 		schedule();
3082 
3083 	if (test_cpu_flag(CIF_MCCK_PENDING))
3084 		s390_handle_mcck();
3085 
3086 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3087 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3088 		if (rc)
3089 			return rc;
3090 	}
3091 
3092 	rc = kvm_s390_handle_requests(vcpu);
3093 	if (rc)
3094 		return rc;
3095 
3096 	if (guestdbg_enabled(vcpu)) {
3097 		kvm_s390_backup_guest_per_regs(vcpu);
3098 		kvm_s390_patch_guest_per_regs(vcpu);
3099 	}
3100 
3101 	vcpu->arch.sie_block->icptcode = 0;
3102 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3103 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3104 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3105 
3106 	return 0;
3107 }
3108 
3109 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3110 {
3111 	struct kvm_s390_pgm_info pgm_info = {
3112 		.code = PGM_ADDRESSING,
3113 	};
3114 	u8 opcode, ilen;
3115 	int rc;
3116 
3117 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3118 	trace_kvm_s390_sie_fault(vcpu);
3119 
3120 	/*
3121 	 * We want to inject an addressing exception, which is defined as a
3122 	 * suppressing or terminating exception. However, since we came here
3123 	 * by a DAT access exception, the PSW still points to the faulting
3124 	 * instruction since DAT exceptions are nullifying. So we've got
3125 	 * to look up the current opcode to get the length of the instruction
3126 	 * to be able to forward the PSW.
3127 	 */
3128 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3129 	ilen = insn_length(opcode);
3130 	if (rc < 0) {
3131 		return rc;
3132 	} else if (rc) {
3133 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3134 		 * Forward by arbitrary ilc, injection will take care of
3135 		 * nullification if necessary.
3136 		 */
3137 		pgm_info = vcpu->arch.pgm;
3138 		ilen = 4;
3139 	}
3140 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3141 	kvm_s390_forward_psw(vcpu, ilen);
3142 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3143 }
3144 
3145 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3146 {
3147 	struct mcck_volatile_info *mcck_info;
3148 	struct sie_page *sie_page;
3149 
3150 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3151 		   vcpu->arch.sie_block->icptcode);
3152 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3153 
3154 	if (guestdbg_enabled(vcpu))
3155 		kvm_s390_restore_guest_per_regs(vcpu);
3156 
3157 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3158 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3159 
3160 	if (exit_reason == -EINTR) {
3161 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3162 		sie_page = container_of(vcpu->arch.sie_block,
3163 					struct sie_page, sie_block);
3164 		mcck_info = &sie_page->mcck_info;
3165 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3166 		return 0;
3167 	}
3168 
3169 	if (vcpu->arch.sie_block->icptcode > 0) {
3170 		int rc = kvm_handle_sie_intercept(vcpu);
3171 
3172 		if (rc != -EOPNOTSUPP)
3173 			return rc;
3174 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3175 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3176 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3177 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3178 		return -EREMOTE;
3179 	} else if (exit_reason != -EFAULT) {
3180 		vcpu->stat.exit_null++;
3181 		return 0;
3182 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3183 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3184 		vcpu->run->s390_ucontrol.trans_exc_code =
3185 						current->thread.gmap_addr;
3186 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3187 		return -EREMOTE;
3188 	} else if (current->thread.gmap_pfault) {
3189 		trace_kvm_s390_major_guest_pfault(vcpu);
3190 		current->thread.gmap_pfault = 0;
3191 		if (kvm_arch_setup_async_pf(vcpu))
3192 			return 0;
3193 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3194 	}
3195 	return vcpu_post_run_fault_in_sie(vcpu);
3196 }
3197 
3198 static int __vcpu_run(struct kvm_vcpu *vcpu)
3199 {
3200 	int rc, exit_reason;
3201 
3202 	/*
3203 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3204 	 * ning the guest), so that memslots (and other stuff) are protected
3205 	 */
3206 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3207 
3208 	do {
3209 		rc = vcpu_pre_run(vcpu);
3210 		if (rc)
3211 			break;
3212 
3213 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3214 		/*
3215 		 * As PF_VCPU will be used in fault handler, between
3216 		 * guest_enter and guest_exit should be no uaccess.
3217 		 */
3218 		local_irq_disable();
3219 		guest_enter_irqoff();
3220 		__disable_cpu_timer_accounting(vcpu);
3221 		local_irq_enable();
3222 		exit_reason = sie64a(vcpu->arch.sie_block,
3223 				     vcpu->run->s.regs.gprs);
3224 		local_irq_disable();
3225 		__enable_cpu_timer_accounting(vcpu);
3226 		guest_exit_irqoff();
3227 		local_irq_enable();
3228 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3229 
3230 		rc = vcpu_post_run(vcpu, exit_reason);
3231 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3232 
3233 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3234 	return rc;
3235 }
3236 
3237 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3238 {
3239 	struct runtime_instr_cb *riccb;
3240 	struct gs_cb *gscb;
3241 
3242 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3243 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3244 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3245 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3246 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3247 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3248 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3249 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3250 		/* some control register changes require a tlb flush */
3251 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3252 	}
3253 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3254 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3255 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3256 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3257 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3258 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3259 	}
3260 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3261 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3262 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3263 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3264 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3265 			kvm_clear_async_pf_completion_queue(vcpu);
3266 	}
3267 	/*
3268 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3269 	 * we should enable RI here instead of doing the lazy enablement.
3270 	 */
3271 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3272 	    test_kvm_facility(vcpu->kvm, 64) &&
3273 	    riccb->valid &&
3274 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3275 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3276 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3277 	}
3278 	/*
3279 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3280 	 * we should enable GS here instead of doing the lazy enablement.
3281 	 */
3282 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3283 	    test_kvm_facility(vcpu->kvm, 133) &&
3284 	    gscb->gssm &&
3285 	    !vcpu->arch.gs_enabled) {
3286 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3287 		vcpu->arch.sie_block->ecb |= ECB_GS;
3288 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3289 		vcpu->arch.gs_enabled = 1;
3290 	}
3291 	save_access_regs(vcpu->arch.host_acrs);
3292 	restore_access_regs(vcpu->run->s.regs.acrs);
3293 	/* save host (userspace) fprs/vrs */
3294 	save_fpu_regs();
3295 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3296 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3297 	if (MACHINE_HAS_VX)
3298 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3299 	else
3300 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3301 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3302 	if (test_fp_ctl(current->thread.fpu.fpc))
3303 		/* User space provided an invalid FPC, let's clear it */
3304 		current->thread.fpu.fpc = 0;
3305 	if (MACHINE_HAS_GS) {
3306 		preempt_disable();
3307 		__ctl_set_bit(2, 4);
3308 		if (current->thread.gs_cb) {
3309 			vcpu->arch.host_gscb = current->thread.gs_cb;
3310 			save_gs_cb(vcpu->arch.host_gscb);
3311 		}
3312 		if (vcpu->arch.gs_enabled) {
3313 			current->thread.gs_cb = (struct gs_cb *)
3314 						&vcpu->run->s.regs.gscb;
3315 			restore_gs_cb(current->thread.gs_cb);
3316 		}
3317 		preempt_enable();
3318 	}
3319 
3320 	kvm_run->kvm_dirty_regs = 0;
3321 }
3322 
3323 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3324 {
3325 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3326 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3327 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3328 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3329 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3330 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3331 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3332 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3333 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3334 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3335 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3336 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3337 	save_access_regs(vcpu->run->s.regs.acrs);
3338 	restore_access_regs(vcpu->arch.host_acrs);
3339 	/* Save guest register state */
3340 	save_fpu_regs();
3341 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3342 	/* Restore will be done lazily at return */
3343 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3344 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3345 	if (MACHINE_HAS_GS) {
3346 		__ctl_set_bit(2, 4);
3347 		if (vcpu->arch.gs_enabled)
3348 			save_gs_cb(current->thread.gs_cb);
3349 		preempt_disable();
3350 		current->thread.gs_cb = vcpu->arch.host_gscb;
3351 		restore_gs_cb(vcpu->arch.host_gscb);
3352 		preempt_enable();
3353 		if (!vcpu->arch.host_gscb)
3354 			__ctl_clear_bit(2, 4);
3355 		vcpu->arch.host_gscb = NULL;
3356 	}
3357 
3358 }
3359 
3360 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3361 {
3362 	int rc;
3363 	sigset_t sigsaved;
3364 
3365 	if (kvm_run->immediate_exit)
3366 		return -EINTR;
3367 
3368 	if (guestdbg_exit_pending(vcpu)) {
3369 		kvm_s390_prepare_debug_exit(vcpu);
3370 		return 0;
3371 	}
3372 
3373 	if (vcpu->sigset_active)
3374 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3375 
3376 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3377 		kvm_s390_vcpu_start(vcpu);
3378 	} else if (is_vcpu_stopped(vcpu)) {
3379 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3380 				   vcpu->vcpu_id);
3381 		return -EINVAL;
3382 	}
3383 
3384 	sync_regs(vcpu, kvm_run);
3385 	enable_cpu_timer_accounting(vcpu);
3386 
3387 	might_fault();
3388 	rc = __vcpu_run(vcpu);
3389 
3390 	if (signal_pending(current) && !rc) {
3391 		kvm_run->exit_reason = KVM_EXIT_INTR;
3392 		rc = -EINTR;
3393 	}
3394 
3395 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3396 		kvm_s390_prepare_debug_exit(vcpu);
3397 		rc = 0;
3398 	}
3399 
3400 	if (rc == -EREMOTE) {
3401 		/* userspace support is needed, kvm_run has been prepared */
3402 		rc = 0;
3403 	}
3404 
3405 	disable_cpu_timer_accounting(vcpu);
3406 	store_regs(vcpu, kvm_run);
3407 
3408 	if (vcpu->sigset_active)
3409 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3410 
3411 	vcpu->stat.exit_userspace++;
3412 	return rc;
3413 }
3414 
3415 /*
3416  * store status at address
3417  * we use have two special cases:
3418  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3419  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3420  */
3421 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3422 {
3423 	unsigned char archmode = 1;
3424 	freg_t fprs[NUM_FPRS];
3425 	unsigned int px;
3426 	u64 clkcomp, cputm;
3427 	int rc;
3428 
3429 	px = kvm_s390_get_prefix(vcpu);
3430 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3431 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3432 			return -EFAULT;
3433 		gpa = 0;
3434 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3435 		if (write_guest_real(vcpu, 163, &archmode, 1))
3436 			return -EFAULT;
3437 		gpa = px;
3438 	} else
3439 		gpa -= __LC_FPREGS_SAVE_AREA;
3440 
3441 	/* manually convert vector registers if necessary */
3442 	if (MACHINE_HAS_VX) {
3443 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3444 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3445 				     fprs, 128);
3446 	} else {
3447 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3448 				     vcpu->run->s.regs.fprs, 128);
3449 	}
3450 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3451 			      vcpu->run->s.regs.gprs, 128);
3452 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3453 			      &vcpu->arch.sie_block->gpsw, 16);
3454 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3455 			      &px, 4);
3456 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3457 			      &vcpu->run->s.regs.fpc, 4);
3458 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3459 			      &vcpu->arch.sie_block->todpr, 4);
3460 	cputm = kvm_s390_get_cpu_timer(vcpu);
3461 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3462 			      &cputm, 8);
3463 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3464 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3465 			      &clkcomp, 8);
3466 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3467 			      &vcpu->run->s.regs.acrs, 64);
3468 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3469 			      &vcpu->arch.sie_block->gcr, 128);
3470 	return rc ? -EFAULT : 0;
3471 }
3472 
3473 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3474 {
3475 	/*
3476 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3477 	 * switch in the run ioctl. Let's update our copies before we save
3478 	 * it into the save area
3479 	 */
3480 	save_fpu_regs();
3481 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3482 	save_access_regs(vcpu->run->s.regs.acrs);
3483 
3484 	return kvm_s390_store_status_unloaded(vcpu, addr);
3485 }
3486 
3487 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3488 {
3489 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3490 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3491 }
3492 
3493 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3494 {
3495 	unsigned int i;
3496 	struct kvm_vcpu *vcpu;
3497 
3498 	kvm_for_each_vcpu(i, vcpu, kvm) {
3499 		__disable_ibs_on_vcpu(vcpu);
3500 	}
3501 }
3502 
3503 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3504 {
3505 	if (!sclp.has_ibs)
3506 		return;
3507 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3508 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3509 }
3510 
3511 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3512 {
3513 	int i, online_vcpus, started_vcpus = 0;
3514 
3515 	if (!is_vcpu_stopped(vcpu))
3516 		return;
3517 
3518 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3519 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3520 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3521 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3522 
3523 	for (i = 0; i < online_vcpus; i++) {
3524 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3525 			started_vcpus++;
3526 	}
3527 
3528 	if (started_vcpus == 0) {
3529 		/* we're the only active VCPU -> speed it up */
3530 		__enable_ibs_on_vcpu(vcpu);
3531 	} else if (started_vcpus == 1) {
3532 		/*
3533 		 * As we are starting a second VCPU, we have to disable
3534 		 * the IBS facility on all VCPUs to remove potentially
3535 		 * oustanding ENABLE requests.
3536 		 */
3537 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3538 	}
3539 
3540 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3541 	/*
3542 	 * Another VCPU might have used IBS while we were offline.
3543 	 * Let's play safe and flush the VCPU at startup.
3544 	 */
3545 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3546 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3547 	return;
3548 }
3549 
3550 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3551 {
3552 	int i, online_vcpus, started_vcpus = 0;
3553 	struct kvm_vcpu *started_vcpu = NULL;
3554 
3555 	if (is_vcpu_stopped(vcpu))
3556 		return;
3557 
3558 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3559 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3560 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3561 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3562 
3563 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3564 	kvm_s390_clear_stop_irq(vcpu);
3565 
3566 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3567 	__disable_ibs_on_vcpu(vcpu);
3568 
3569 	for (i = 0; i < online_vcpus; i++) {
3570 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3571 			started_vcpus++;
3572 			started_vcpu = vcpu->kvm->vcpus[i];
3573 		}
3574 	}
3575 
3576 	if (started_vcpus == 1) {
3577 		/*
3578 		 * As we only have one VCPU left, we want to enable the
3579 		 * IBS facility for that VCPU to speed it up.
3580 		 */
3581 		__enable_ibs_on_vcpu(started_vcpu);
3582 	}
3583 
3584 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3585 	return;
3586 }
3587 
3588 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3589 				     struct kvm_enable_cap *cap)
3590 {
3591 	int r;
3592 
3593 	if (cap->flags)
3594 		return -EINVAL;
3595 
3596 	switch (cap->cap) {
3597 	case KVM_CAP_S390_CSS_SUPPORT:
3598 		if (!vcpu->kvm->arch.css_support) {
3599 			vcpu->kvm->arch.css_support = 1;
3600 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3601 			trace_kvm_s390_enable_css(vcpu->kvm);
3602 		}
3603 		r = 0;
3604 		break;
3605 	default:
3606 		r = -EINVAL;
3607 		break;
3608 	}
3609 	return r;
3610 }
3611 
3612 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3613 				  struct kvm_s390_mem_op *mop)
3614 {
3615 	void __user *uaddr = (void __user *)mop->buf;
3616 	void *tmpbuf = NULL;
3617 	int r, srcu_idx;
3618 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3619 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3620 
3621 	if (mop->flags & ~supported_flags)
3622 		return -EINVAL;
3623 
3624 	if (mop->size > MEM_OP_MAX_SIZE)
3625 		return -E2BIG;
3626 
3627 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3628 		tmpbuf = vmalloc(mop->size);
3629 		if (!tmpbuf)
3630 			return -ENOMEM;
3631 	}
3632 
3633 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3634 
3635 	switch (mop->op) {
3636 	case KVM_S390_MEMOP_LOGICAL_READ:
3637 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3638 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3639 					    mop->size, GACC_FETCH);
3640 			break;
3641 		}
3642 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3643 		if (r == 0) {
3644 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3645 				r = -EFAULT;
3646 		}
3647 		break;
3648 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3649 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3650 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3651 					    mop->size, GACC_STORE);
3652 			break;
3653 		}
3654 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3655 			r = -EFAULT;
3656 			break;
3657 		}
3658 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3659 		break;
3660 	default:
3661 		r = -EINVAL;
3662 	}
3663 
3664 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3665 
3666 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3667 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3668 
3669 	vfree(tmpbuf);
3670 	return r;
3671 }
3672 
3673 long kvm_arch_vcpu_ioctl(struct file *filp,
3674 			 unsigned int ioctl, unsigned long arg)
3675 {
3676 	struct kvm_vcpu *vcpu = filp->private_data;
3677 	void __user *argp = (void __user *)arg;
3678 	int idx;
3679 	long r;
3680 
3681 	switch (ioctl) {
3682 	case KVM_S390_IRQ: {
3683 		struct kvm_s390_irq s390irq;
3684 
3685 		r = -EFAULT;
3686 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3687 			break;
3688 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3689 		break;
3690 	}
3691 	case KVM_S390_INTERRUPT: {
3692 		struct kvm_s390_interrupt s390int;
3693 		struct kvm_s390_irq s390irq;
3694 
3695 		r = -EFAULT;
3696 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3697 			break;
3698 		if (s390int_to_s390irq(&s390int, &s390irq))
3699 			return -EINVAL;
3700 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3701 		break;
3702 	}
3703 	case KVM_S390_STORE_STATUS:
3704 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3705 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3706 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3707 		break;
3708 	case KVM_S390_SET_INITIAL_PSW: {
3709 		psw_t psw;
3710 
3711 		r = -EFAULT;
3712 		if (copy_from_user(&psw, argp, sizeof(psw)))
3713 			break;
3714 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3715 		break;
3716 	}
3717 	case KVM_S390_INITIAL_RESET:
3718 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3719 		break;
3720 	case KVM_SET_ONE_REG:
3721 	case KVM_GET_ONE_REG: {
3722 		struct kvm_one_reg reg;
3723 		r = -EFAULT;
3724 		if (copy_from_user(&reg, argp, sizeof(reg)))
3725 			break;
3726 		if (ioctl == KVM_SET_ONE_REG)
3727 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3728 		else
3729 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3730 		break;
3731 	}
3732 #ifdef CONFIG_KVM_S390_UCONTROL
3733 	case KVM_S390_UCAS_MAP: {
3734 		struct kvm_s390_ucas_mapping ucasmap;
3735 
3736 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3737 			r = -EFAULT;
3738 			break;
3739 		}
3740 
3741 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3742 			r = -EINVAL;
3743 			break;
3744 		}
3745 
3746 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3747 				     ucasmap.vcpu_addr, ucasmap.length);
3748 		break;
3749 	}
3750 	case KVM_S390_UCAS_UNMAP: {
3751 		struct kvm_s390_ucas_mapping ucasmap;
3752 
3753 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3754 			r = -EFAULT;
3755 			break;
3756 		}
3757 
3758 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3759 			r = -EINVAL;
3760 			break;
3761 		}
3762 
3763 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3764 			ucasmap.length);
3765 		break;
3766 	}
3767 #endif
3768 	case KVM_S390_VCPU_FAULT: {
3769 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3770 		break;
3771 	}
3772 	case KVM_ENABLE_CAP:
3773 	{
3774 		struct kvm_enable_cap cap;
3775 		r = -EFAULT;
3776 		if (copy_from_user(&cap, argp, sizeof(cap)))
3777 			break;
3778 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3779 		break;
3780 	}
3781 	case KVM_S390_MEM_OP: {
3782 		struct kvm_s390_mem_op mem_op;
3783 
3784 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3785 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3786 		else
3787 			r = -EFAULT;
3788 		break;
3789 	}
3790 	case KVM_S390_SET_IRQ_STATE: {
3791 		struct kvm_s390_irq_state irq_state;
3792 
3793 		r = -EFAULT;
3794 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3795 			break;
3796 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3797 		    irq_state.len == 0 ||
3798 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3799 			r = -EINVAL;
3800 			break;
3801 		}
3802 		r = kvm_s390_set_irq_state(vcpu,
3803 					   (void __user *) irq_state.buf,
3804 					   irq_state.len);
3805 		break;
3806 	}
3807 	case KVM_S390_GET_IRQ_STATE: {
3808 		struct kvm_s390_irq_state irq_state;
3809 
3810 		r = -EFAULT;
3811 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3812 			break;
3813 		if (irq_state.len == 0) {
3814 			r = -EINVAL;
3815 			break;
3816 		}
3817 		r = kvm_s390_get_irq_state(vcpu,
3818 					   (__u8 __user *)  irq_state.buf,
3819 					   irq_state.len);
3820 		break;
3821 	}
3822 	default:
3823 		r = -ENOTTY;
3824 	}
3825 	return r;
3826 }
3827 
3828 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3829 {
3830 #ifdef CONFIG_KVM_S390_UCONTROL
3831 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3832 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3833 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3834 		get_page(vmf->page);
3835 		return 0;
3836 	}
3837 #endif
3838 	return VM_FAULT_SIGBUS;
3839 }
3840 
3841 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3842 			    unsigned long npages)
3843 {
3844 	return 0;
3845 }
3846 
3847 /* Section: memory related */
3848 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3849 				   struct kvm_memory_slot *memslot,
3850 				   const struct kvm_userspace_memory_region *mem,
3851 				   enum kvm_mr_change change)
3852 {
3853 	/* A few sanity checks. We can have memory slots which have to be
3854 	   located/ended at a segment boundary (1MB). The memory in userland is
3855 	   ok to be fragmented into various different vmas. It is okay to mmap()
3856 	   and munmap() stuff in this slot after doing this call at any time */
3857 
3858 	if (mem->userspace_addr & 0xffffful)
3859 		return -EINVAL;
3860 
3861 	if (mem->memory_size & 0xffffful)
3862 		return -EINVAL;
3863 
3864 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3865 		return -EINVAL;
3866 
3867 	return 0;
3868 }
3869 
3870 void kvm_arch_commit_memory_region(struct kvm *kvm,
3871 				const struct kvm_userspace_memory_region *mem,
3872 				const struct kvm_memory_slot *old,
3873 				const struct kvm_memory_slot *new,
3874 				enum kvm_mr_change change)
3875 {
3876 	int rc;
3877 
3878 	/* If the basics of the memslot do not change, we do not want
3879 	 * to update the gmap. Every update causes several unnecessary
3880 	 * segment translation exceptions. This is usually handled just
3881 	 * fine by the normal fault handler + gmap, but it will also
3882 	 * cause faults on the prefix page of running guest CPUs.
3883 	 */
3884 	if (old->userspace_addr == mem->userspace_addr &&
3885 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3886 	    old->npages * PAGE_SIZE == mem->memory_size)
3887 		return;
3888 
3889 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3890 		mem->guest_phys_addr, mem->memory_size);
3891 	if (rc)
3892 		pr_warn("failed to commit memory region\n");
3893 	return;
3894 }
3895 
3896 static inline unsigned long nonhyp_mask(int i)
3897 {
3898 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3899 
3900 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3901 }
3902 
3903 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3904 {
3905 	vcpu->valid_wakeup = false;
3906 }
3907 
3908 static int __init kvm_s390_init(void)
3909 {
3910 	int i;
3911 
3912 	if (!sclp.has_sief2) {
3913 		pr_info("SIE not available\n");
3914 		return -ENODEV;
3915 	}
3916 
3917 	for (i = 0; i < 16; i++)
3918 		kvm_s390_fac_list_mask[i] |=
3919 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3920 
3921 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3922 }
3923 
3924 static void __exit kvm_s390_exit(void)
3925 {
3926 	kvm_exit();
3927 }
3928 
3929 module_init(kvm_s390_init);
3930 module_exit(kvm_s390_exit);
3931 
3932 /*
3933  * Enable autoloading of the kvm module.
3934  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3935  * since x86 takes a different approach.
3936  */
3937 #include <linux/miscdevice.h>
3938 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3939 MODULE_ALIAS("devname:kvm");
3940