xref: /linux/arch/s390/kvm/kvm-s390.c (revision 905e46acd3272d04566fec49afbd7ad9e2ed9ae3)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 
34 #include <asm/asm-offsets.h>
35 #include <asm/lowcore.h>
36 #include <asm/stp.h>
37 #include <asm/pgtable.h>
38 #include <asm/gmap.h>
39 #include <asm/nmi.h>
40 #include <asm/switch_to.h>
41 #include <asm/isc.h>
42 #include <asm/sclp.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
45 #include "kvm-s390.h"
46 #include "gaccess.h"
47 
48 #define KMSG_COMPONENT "kvm-s390"
49 #undef pr_fmt
50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
62 
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
65 	{ "exit_null", VCPU_STAT(exit_null) },
66 	{ "exit_validity", VCPU_STAT(exit_validity) },
67 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
84 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
85 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
86 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
87 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
88 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
89 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
90 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
91 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
92 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
93 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
94 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
95 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
96 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
97 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
98 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
99 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
100 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
101 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
102 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
103 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
104 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
105 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
106 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
107 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
108 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
109 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
110 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
111 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
112 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
113 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
114 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
115 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
116 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
117 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
118 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
119 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
120 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
121 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
122 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
123 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
124 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
125 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
126 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
127 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
128 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
129 	{ NULL }
130 };
131 
132 /* allow nested virtualization in KVM (if enabled by user space) */
133 static int nested;
134 module_param(nested, int, S_IRUGO);
135 MODULE_PARM_DESC(nested, "Nested virtualization support");
136 
137 /* upper facilities limit for kvm */
138 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
139 
140 unsigned long kvm_s390_fac_list_mask_size(void)
141 {
142 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
143 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
144 }
145 
146 /* available cpu features supported by kvm */
147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
148 /* available subfunctions indicated via query / "test bit" */
149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
150 
151 static struct gmap_notifier gmap_notifier;
152 static struct gmap_notifier vsie_gmap_notifier;
153 debug_info_t *kvm_s390_dbf;
154 
155 /* Section: not file related */
156 int kvm_arch_hardware_enable(void)
157 {
158 	/* every s390 is virtualization enabled ;-) */
159 	return 0;
160 }
161 
162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
163 			      unsigned long end);
164 
165 /*
166  * This callback is executed during stop_machine(). All CPUs are therefore
167  * temporarily stopped. In order not to change guest behavior, we have to
168  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
169  * so a CPU won't be stopped while calculating with the epoch.
170  */
171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
172 			  void *v)
173 {
174 	struct kvm *kvm;
175 	struct kvm_vcpu *vcpu;
176 	int i;
177 	unsigned long long *delta = v;
178 
179 	list_for_each_entry(kvm, &vm_list, vm_list) {
180 		kvm->arch.epoch -= *delta;
181 		kvm_for_each_vcpu(i, vcpu, kvm) {
182 			vcpu->arch.sie_block->epoch -= *delta;
183 			if (vcpu->arch.cputm_enabled)
184 				vcpu->arch.cputm_start += *delta;
185 			if (vcpu->arch.vsie_block)
186 				vcpu->arch.vsie_block->epoch -= *delta;
187 		}
188 	}
189 	return NOTIFY_OK;
190 }
191 
192 static struct notifier_block kvm_clock_notifier = {
193 	.notifier_call = kvm_clock_sync,
194 };
195 
196 int kvm_arch_hardware_setup(void)
197 {
198 	gmap_notifier.notifier_call = kvm_gmap_notifier;
199 	gmap_register_pte_notifier(&gmap_notifier);
200 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
201 	gmap_register_pte_notifier(&vsie_gmap_notifier);
202 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
203 				       &kvm_clock_notifier);
204 	return 0;
205 }
206 
207 void kvm_arch_hardware_unsetup(void)
208 {
209 	gmap_unregister_pte_notifier(&gmap_notifier);
210 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
211 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
212 					 &kvm_clock_notifier);
213 }
214 
215 static void allow_cpu_feat(unsigned long nr)
216 {
217 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
218 }
219 
220 static inline int plo_test_bit(unsigned char nr)
221 {
222 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
223 	int cc;
224 
225 	asm volatile(
226 		/* Parameter registers are ignored for "test bit" */
227 		"	plo	0,0,0,0(0)\n"
228 		"	ipm	%0\n"
229 		"	srl	%0,28\n"
230 		: "=d" (cc)
231 		: "d" (r0)
232 		: "cc");
233 	return cc == 0;
234 }
235 
236 static void kvm_s390_cpu_feat_init(void)
237 {
238 	int i;
239 
240 	for (i = 0; i < 256; ++i) {
241 		if (plo_test_bit(i))
242 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
243 	}
244 
245 	if (test_facility(28)) /* TOD-clock steering */
246 		ptff(kvm_s390_available_subfunc.ptff,
247 		     sizeof(kvm_s390_available_subfunc.ptff),
248 		     PTFF_QAF);
249 
250 	if (test_facility(17)) { /* MSA */
251 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
252 			      kvm_s390_available_subfunc.kmac);
253 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
254 			      kvm_s390_available_subfunc.kmc);
255 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
256 			      kvm_s390_available_subfunc.km);
257 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
258 			      kvm_s390_available_subfunc.kimd);
259 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
260 			      kvm_s390_available_subfunc.klmd);
261 	}
262 	if (test_facility(76)) /* MSA3 */
263 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
264 			      kvm_s390_available_subfunc.pckmo);
265 	if (test_facility(77)) { /* MSA4 */
266 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
267 			      kvm_s390_available_subfunc.kmctr);
268 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
269 			      kvm_s390_available_subfunc.kmf);
270 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.kmo);
272 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
273 			      kvm_s390_available_subfunc.pcc);
274 	}
275 	if (test_facility(57)) /* MSA5 */
276 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
277 			      kvm_s390_available_subfunc.ppno);
278 
279 	if (test_facility(146)) /* MSA8 */
280 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
281 			      kvm_s390_available_subfunc.kma);
282 
283 	if (MACHINE_HAS_ESOP)
284 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
285 	/*
286 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
287 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
288 	 */
289 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
290 	    !test_facility(3) || !nested)
291 		return;
292 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
293 	if (sclp.has_64bscao)
294 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
295 	if (sclp.has_siif)
296 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
297 	if (sclp.has_gpere)
298 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
299 	if (sclp.has_gsls)
300 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
301 	if (sclp.has_ib)
302 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
303 	if (sclp.has_cei)
304 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
305 	if (sclp.has_ibs)
306 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
307 	if (sclp.has_kss)
308 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
309 	/*
310 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
311 	 * all skey handling functions read/set the skey from the PGSTE
312 	 * instead of the real storage key.
313 	 *
314 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
315 	 * pages being detected as preserved although they are resident.
316 	 *
317 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
318 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
319 	 *
320 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
321 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
322 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
323 	 *
324 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
325 	 * cannot easily shadow the SCA because of the ipte lock.
326 	 */
327 }
328 
329 int kvm_arch_init(void *opaque)
330 {
331 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
332 	if (!kvm_s390_dbf)
333 		return -ENOMEM;
334 
335 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
336 		debug_unregister(kvm_s390_dbf);
337 		return -ENOMEM;
338 	}
339 
340 	kvm_s390_cpu_feat_init();
341 
342 	/* Register floating interrupt controller interface. */
343 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
344 }
345 
346 void kvm_arch_exit(void)
347 {
348 	debug_unregister(kvm_s390_dbf);
349 }
350 
351 /* Section: device related */
352 long kvm_arch_dev_ioctl(struct file *filp,
353 			unsigned int ioctl, unsigned long arg)
354 {
355 	if (ioctl == KVM_S390_ENABLE_SIE)
356 		return s390_enable_sie();
357 	return -EINVAL;
358 }
359 
360 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
361 {
362 	int r;
363 
364 	switch (ext) {
365 	case KVM_CAP_S390_PSW:
366 	case KVM_CAP_S390_GMAP:
367 	case KVM_CAP_SYNC_MMU:
368 #ifdef CONFIG_KVM_S390_UCONTROL
369 	case KVM_CAP_S390_UCONTROL:
370 #endif
371 	case KVM_CAP_ASYNC_PF:
372 	case KVM_CAP_SYNC_REGS:
373 	case KVM_CAP_ONE_REG:
374 	case KVM_CAP_ENABLE_CAP:
375 	case KVM_CAP_S390_CSS_SUPPORT:
376 	case KVM_CAP_IOEVENTFD:
377 	case KVM_CAP_DEVICE_CTRL:
378 	case KVM_CAP_ENABLE_CAP_VM:
379 	case KVM_CAP_S390_IRQCHIP:
380 	case KVM_CAP_VM_ATTRIBUTES:
381 	case KVM_CAP_MP_STATE:
382 	case KVM_CAP_IMMEDIATE_EXIT:
383 	case KVM_CAP_S390_INJECT_IRQ:
384 	case KVM_CAP_S390_USER_SIGP:
385 	case KVM_CAP_S390_USER_STSI:
386 	case KVM_CAP_S390_SKEYS:
387 	case KVM_CAP_S390_IRQ_STATE:
388 	case KVM_CAP_S390_USER_INSTR0:
389 	case KVM_CAP_S390_AIS:
390 		r = 1;
391 		break;
392 	case KVM_CAP_S390_MEM_OP:
393 		r = MEM_OP_MAX_SIZE;
394 		break;
395 	case KVM_CAP_NR_VCPUS:
396 	case KVM_CAP_MAX_VCPUS:
397 		r = KVM_S390_BSCA_CPU_SLOTS;
398 		if (!kvm_s390_use_sca_entries())
399 			r = KVM_MAX_VCPUS;
400 		else if (sclp.has_esca && sclp.has_64bscao)
401 			r = KVM_S390_ESCA_CPU_SLOTS;
402 		break;
403 	case KVM_CAP_NR_MEMSLOTS:
404 		r = KVM_USER_MEM_SLOTS;
405 		break;
406 	case KVM_CAP_S390_COW:
407 		r = MACHINE_HAS_ESOP;
408 		break;
409 	case KVM_CAP_S390_VECTOR_REGISTERS:
410 		r = MACHINE_HAS_VX;
411 		break;
412 	case KVM_CAP_S390_RI:
413 		r = test_facility(64);
414 		break;
415 	case KVM_CAP_S390_GS:
416 		r = test_facility(133);
417 		break;
418 	default:
419 		r = 0;
420 	}
421 	return r;
422 }
423 
424 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
425 					struct kvm_memory_slot *memslot)
426 {
427 	gfn_t cur_gfn, last_gfn;
428 	unsigned long address;
429 	struct gmap *gmap = kvm->arch.gmap;
430 
431 	/* Loop over all guest pages */
432 	last_gfn = memslot->base_gfn + memslot->npages;
433 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
434 		address = gfn_to_hva_memslot(memslot, cur_gfn);
435 
436 		if (test_and_clear_guest_dirty(gmap->mm, address))
437 			mark_page_dirty(kvm, cur_gfn);
438 		if (fatal_signal_pending(current))
439 			return;
440 		cond_resched();
441 	}
442 }
443 
444 /* Section: vm related */
445 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
446 
447 /*
448  * Get (and clear) the dirty memory log for a memory slot.
449  */
450 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
451 			       struct kvm_dirty_log *log)
452 {
453 	int r;
454 	unsigned long n;
455 	struct kvm_memslots *slots;
456 	struct kvm_memory_slot *memslot;
457 	int is_dirty = 0;
458 
459 	if (kvm_is_ucontrol(kvm))
460 		return -EINVAL;
461 
462 	mutex_lock(&kvm->slots_lock);
463 
464 	r = -EINVAL;
465 	if (log->slot >= KVM_USER_MEM_SLOTS)
466 		goto out;
467 
468 	slots = kvm_memslots(kvm);
469 	memslot = id_to_memslot(slots, log->slot);
470 	r = -ENOENT;
471 	if (!memslot->dirty_bitmap)
472 		goto out;
473 
474 	kvm_s390_sync_dirty_log(kvm, memslot);
475 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
476 	if (r)
477 		goto out;
478 
479 	/* Clear the dirty log */
480 	if (is_dirty) {
481 		n = kvm_dirty_bitmap_bytes(memslot);
482 		memset(memslot->dirty_bitmap, 0, n);
483 	}
484 	r = 0;
485 out:
486 	mutex_unlock(&kvm->slots_lock);
487 	return r;
488 }
489 
490 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
491 {
492 	unsigned int i;
493 	struct kvm_vcpu *vcpu;
494 
495 	kvm_for_each_vcpu(i, vcpu, kvm) {
496 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
497 	}
498 }
499 
500 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
501 {
502 	int r;
503 
504 	if (cap->flags)
505 		return -EINVAL;
506 
507 	switch (cap->cap) {
508 	case KVM_CAP_S390_IRQCHIP:
509 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
510 		kvm->arch.use_irqchip = 1;
511 		r = 0;
512 		break;
513 	case KVM_CAP_S390_USER_SIGP:
514 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
515 		kvm->arch.user_sigp = 1;
516 		r = 0;
517 		break;
518 	case KVM_CAP_S390_VECTOR_REGISTERS:
519 		mutex_lock(&kvm->lock);
520 		if (kvm->created_vcpus) {
521 			r = -EBUSY;
522 		} else if (MACHINE_HAS_VX) {
523 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
524 			set_kvm_facility(kvm->arch.model.fac_list, 129);
525 			if (test_facility(134)) {
526 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
527 				set_kvm_facility(kvm->arch.model.fac_list, 134);
528 			}
529 			if (test_facility(135)) {
530 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
531 				set_kvm_facility(kvm->arch.model.fac_list, 135);
532 			}
533 			r = 0;
534 		} else
535 			r = -EINVAL;
536 		mutex_unlock(&kvm->lock);
537 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
538 			 r ? "(not available)" : "(success)");
539 		break;
540 	case KVM_CAP_S390_RI:
541 		r = -EINVAL;
542 		mutex_lock(&kvm->lock);
543 		if (kvm->created_vcpus) {
544 			r = -EBUSY;
545 		} else if (test_facility(64)) {
546 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
547 			set_kvm_facility(kvm->arch.model.fac_list, 64);
548 			r = 0;
549 		}
550 		mutex_unlock(&kvm->lock);
551 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
552 			 r ? "(not available)" : "(success)");
553 		break;
554 	case KVM_CAP_S390_AIS:
555 		mutex_lock(&kvm->lock);
556 		if (kvm->created_vcpus) {
557 			r = -EBUSY;
558 		} else {
559 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
560 			set_kvm_facility(kvm->arch.model.fac_list, 72);
561 			kvm->arch.float_int.ais_enabled = 1;
562 			r = 0;
563 		}
564 		mutex_unlock(&kvm->lock);
565 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
566 			 r ? "(not available)" : "(success)");
567 		break;
568 	case KVM_CAP_S390_GS:
569 		r = -EINVAL;
570 		mutex_lock(&kvm->lock);
571 		if (atomic_read(&kvm->online_vcpus)) {
572 			r = -EBUSY;
573 		} else if (test_facility(133)) {
574 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
575 			set_kvm_facility(kvm->arch.model.fac_list, 133);
576 			r = 0;
577 		}
578 		mutex_unlock(&kvm->lock);
579 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
580 			 r ? "(not available)" : "(success)");
581 		break;
582 	case KVM_CAP_S390_USER_STSI:
583 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
584 		kvm->arch.user_stsi = 1;
585 		r = 0;
586 		break;
587 	case KVM_CAP_S390_USER_INSTR0:
588 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
589 		kvm->arch.user_instr0 = 1;
590 		icpt_operexc_on_all_vcpus(kvm);
591 		r = 0;
592 		break;
593 	default:
594 		r = -EINVAL;
595 		break;
596 	}
597 	return r;
598 }
599 
600 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
601 {
602 	int ret;
603 
604 	switch (attr->attr) {
605 	case KVM_S390_VM_MEM_LIMIT_SIZE:
606 		ret = 0;
607 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
608 			 kvm->arch.mem_limit);
609 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
610 			ret = -EFAULT;
611 		break;
612 	default:
613 		ret = -ENXIO;
614 		break;
615 	}
616 	return ret;
617 }
618 
619 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
620 {
621 	int ret;
622 	unsigned int idx;
623 	switch (attr->attr) {
624 	case KVM_S390_VM_MEM_ENABLE_CMMA:
625 		ret = -ENXIO;
626 		if (!sclp.has_cmma)
627 			break;
628 
629 		ret = -EBUSY;
630 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
631 		mutex_lock(&kvm->lock);
632 		if (!kvm->created_vcpus) {
633 			kvm->arch.use_cmma = 1;
634 			ret = 0;
635 		}
636 		mutex_unlock(&kvm->lock);
637 		break;
638 	case KVM_S390_VM_MEM_CLR_CMMA:
639 		ret = -ENXIO;
640 		if (!sclp.has_cmma)
641 			break;
642 		ret = -EINVAL;
643 		if (!kvm->arch.use_cmma)
644 			break;
645 
646 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
647 		mutex_lock(&kvm->lock);
648 		idx = srcu_read_lock(&kvm->srcu);
649 		s390_reset_cmma(kvm->arch.gmap->mm);
650 		srcu_read_unlock(&kvm->srcu, idx);
651 		mutex_unlock(&kvm->lock);
652 		ret = 0;
653 		break;
654 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
655 		unsigned long new_limit;
656 
657 		if (kvm_is_ucontrol(kvm))
658 			return -EINVAL;
659 
660 		if (get_user(new_limit, (u64 __user *)attr->addr))
661 			return -EFAULT;
662 
663 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
664 		    new_limit > kvm->arch.mem_limit)
665 			return -E2BIG;
666 
667 		if (!new_limit)
668 			return -EINVAL;
669 
670 		/* gmap_create takes last usable address */
671 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
672 			new_limit -= 1;
673 
674 		ret = -EBUSY;
675 		mutex_lock(&kvm->lock);
676 		if (!kvm->created_vcpus) {
677 			/* gmap_create will round the limit up */
678 			struct gmap *new = gmap_create(current->mm, new_limit);
679 
680 			if (!new) {
681 				ret = -ENOMEM;
682 			} else {
683 				gmap_remove(kvm->arch.gmap);
684 				new->private = kvm;
685 				kvm->arch.gmap = new;
686 				ret = 0;
687 			}
688 		}
689 		mutex_unlock(&kvm->lock);
690 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
691 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
692 			 (void *) kvm->arch.gmap->asce);
693 		break;
694 	}
695 	default:
696 		ret = -ENXIO;
697 		break;
698 	}
699 	return ret;
700 }
701 
702 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
703 
704 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
705 {
706 	struct kvm_vcpu *vcpu;
707 	int i;
708 
709 	if (!test_kvm_facility(kvm, 76))
710 		return -EINVAL;
711 
712 	mutex_lock(&kvm->lock);
713 	switch (attr->attr) {
714 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
715 		get_random_bytes(
716 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
717 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
718 		kvm->arch.crypto.aes_kw = 1;
719 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
720 		break;
721 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
722 		get_random_bytes(
723 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
724 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
725 		kvm->arch.crypto.dea_kw = 1;
726 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
727 		break;
728 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
729 		kvm->arch.crypto.aes_kw = 0;
730 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
731 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
732 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
733 		break;
734 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
735 		kvm->arch.crypto.dea_kw = 0;
736 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
737 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
738 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
739 		break;
740 	default:
741 		mutex_unlock(&kvm->lock);
742 		return -ENXIO;
743 	}
744 
745 	kvm_for_each_vcpu(i, vcpu, kvm) {
746 		kvm_s390_vcpu_crypto_setup(vcpu);
747 		exit_sie(vcpu);
748 	}
749 	mutex_unlock(&kvm->lock);
750 	return 0;
751 }
752 
753 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
754 {
755 	u8 gtod_high;
756 
757 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
758 					   sizeof(gtod_high)))
759 		return -EFAULT;
760 
761 	if (gtod_high != 0)
762 		return -EINVAL;
763 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
764 
765 	return 0;
766 }
767 
768 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
769 {
770 	u64 gtod;
771 
772 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
773 		return -EFAULT;
774 
775 	kvm_s390_set_tod_clock(kvm, gtod);
776 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
777 	return 0;
778 }
779 
780 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
781 {
782 	int ret;
783 
784 	if (attr->flags)
785 		return -EINVAL;
786 
787 	switch (attr->attr) {
788 	case KVM_S390_VM_TOD_HIGH:
789 		ret = kvm_s390_set_tod_high(kvm, attr);
790 		break;
791 	case KVM_S390_VM_TOD_LOW:
792 		ret = kvm_s390_set_tod_low(kvm, attr);
793 		break;
794 	default:
795 		ret = -ENXIO;
796 		break;
797 	}
798 	return ret;
799 }
800 
801 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
802 {
803 	u8 gtod_high = 0;
804 
805 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
806 					 sizeof(gtod_high)))
807 		return -EFAULT;
808 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
809 
810 	return 0;
811 }
812 
813 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
814 {
815 	u64 gtod;
816 
817 	gtod = kvm_s390_get_tod_clock_fast(kvm);
818 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
819 		return -EFAULT;
820 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
821 
822 	return 0;
823 }
824 
825 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
826 {
827 	int ret;
828 
829 	if (attr->flags)
830 		return -EINVAL;
831 
832 	switch (attr->attr) {
833 	case KVM_S390_VM_TOD_HIGH:
834 		ret = kvm_s390_get_tod_high(kvm, attr);
835 		break;
836 	case KVM_S390_VM_TOD_LOW:
837 		ret = kvm_s390_get_tod_low(kvm, attr);
838 		break;
839 	default:
840 		ret = -ENXIO;
841 		break;
842 	}
843 	return ret;
844 }
845 
846 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848 	struct kvm_s390_vm_cpu_processor *proc;
849 	u16 lowest_ibc, unblocked_ibc;
850 	int ret = 0;
851 
852 	mutex_lock(&kvm->lock);
853 	if (kvm->created_vcpus) {
854 		ret = -EBUSY;
855 		goto out;
856 	}
857 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
858 	if (!proc) {
859 		ret = -ENOMEM;
860 		goto out;
861 	}
862 	if (!copy_from_user(proc, (void __user *)attr->addr,
863 			    sizeof(*proc))) {
864 		kvm->arch.model.cpuid = proc->cpuid;
865 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
866 		unblocked_ibc = sclp.ibc & 0xfff;
867 		if (lowest_ibc && proc->ibc) {
868 			if (proc->ibc > unblocked_ibc)
869 				kvm->arch.model.ibc = unblocked_ibc;
870 			else if (proc->ibc < lowest_ibc)
871 				kvm->arch.model.ibc = lowest_ibc;
872 			else
873 				kvm->arch.model.ibc = proc->ibc;
874 		}
875 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
876 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
877 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
878 			 kvm->arch.model.ibc,
879 			 kvm->arch.model.cpuid);
880 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
881 			 kvm->arch.model.fac_list[0],
882 			 kvm->arch.model.fac_list[1],
883 			 kvm->arch.model.fac_list[2]);
884 	} else
885 		ret = -EFAULT;
886 	kfree(proc);
887 out:
888 	mutex_unlock(&kvm->lock);
889 	return ret;
890 }
891 
892 static int kvm_s390_set_processor_feat(struct kvm *kvm,
893 				       struct kvm_device_attr *attr)
894 {
895 	struct kvm_s390_vm_cpu_feat data;
896 	int ret = -EBUSY;
897 
898 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
899 		return -EFAULT;
900 	if (!bitmap_subset((unsigned long *) data.feat,
901 			   kvm_s390_available_cpu_feat,
902 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
903 		return -EINVAL;
904 
905 	mutex_lock(&kvm->lock);
906 	if (!atomic_read(&kvm->online_vcpus)) {
907 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
908 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
909 		ret = 0;
910 	}
911 	mutex_unlock(&kvm->lock);
912 	return ret;
913 }
914 
915 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
916 					  struct kvm_device_attr *attr)
917 {
918 	/*
919 	 * Once supported by kernel + hw, we have to store the subfunctions
920 	 * in kvm->arch and remember that user space configured them.
921 	 */
922 	return -ENXIO;
923 }
924 
925 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 	int ret = -ENXIO;
928 
929 	switch (attr->attr) {
930 	case KVM_S390_VM_CPU_PROCESSOR:
931 		ret = kvm_s390_set_processor(kvm, attr);
932 		break;
933 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
934 		ret = kvm_s390_set_processor_feat(kvm, attr);
935 		break;
936 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
937 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
938 		break;
939 	}
940 	return ret;
941 }
942 
943 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
944 {
945 	struct kvm_s390_vm_cpu_processor *proc;
946 	int ret = 0;
947 
948 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
949 	if (!proc) {
950 		ret = -ENOMEM;
951 		goto out;
952 	}
953 	proc->cpuid = kvm->arch.model.cpuid;
954 	proc->ibc = kvm->arch.model.ibc;
955 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
956 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
957 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
958 		 kvm->arch.model.ibc,
959 		 kvm->arch.model.cpuid);
960 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
961 		 kvm->arch.model.fac_list[0],
962 		 kvm->arch.model.fac_list[1],
963 		 kvm->arch.model.fac_list[2]);
964 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
965 		ret = -EFAULT;
966 	kfree(proc);
967 out:
968 	return ret;
969 }
970 
971 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
972 {
973 	struct kvm_s390_vm_cpu_machine *mach;
974 	int ret = 0;
975 
976 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
977 	if (!mach) {
978 		ret = -ENOMEM;
979 		goto out;
980 	}
981 	get_cpu_id((struct cpuid *) &mach->cpuid);
982 	mach->ibc = sclp.ibc;
983 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
984 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
985 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
986 	       sizeof(S390_lowcore.stfle_fac_list));
987 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
988 		 kvm->arch.model.ibc,
989 		 kvm->arch.model.cpuid);
990 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
991 		 mach->fac_mask[0],
992 		 mach->fac_mask[1],
993 		 mach->fac_mask[2]);
994 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
995 		 mach->fac_list[0],
996 		 mach->fac_list[1],
997 		 mach->fac_list[2]);
998 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
999 		ret = -EFAULT;
1000 	kfree(mach);
1001 out:
1002 	return ret;
1003 }
1004 
1005 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1006 				       struct kvm_device_attr *attr)
1007 {
1008 	struct kvm_s390_vm_cpu_feat data;
1009 
1010 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1011 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1012 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1013 		return -EFAULT;
1014 	return 0;
1015 }
1016 
1017 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1018 				     struct kvm_device_attr *attr)
1019 {
1020 	struct kvm_s390_vm_cpu_feat data;
1021 
1022 	bitmap_copy((unsigned long *) data.feat,
1023 		    kvm_s390_available_cpu_feat,
1024 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1025 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1026 		return -EFAULT;
1027 	return 0;
1028 }
1029 
1030 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1031 					  struct kvm_device_attr *attr)
1032 {
1033 	/*
1034 	 * Once we can actually configure subfunctions (kernel + hw support),
1035 	 * we have to check if they were already set by user space, if so copy
1036 	 * them from kvm->arch.
1037 	 */
1038 	return -ENXIO;
1039 }
1040 
1041 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1042 					struct kvm_device_attr *attr)
1043 {
1044 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1045 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1046 		return -EFAULT;
1047 	return 0;
1048 }
1049 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051 	int ret = -ENXIO;
1052 
1053 	switch (attr->attr) {
1054 	case KVM_S390_VM_CPU_PROCESSOR:
1055 		ret = kvm_s390_get_processor(kvm, attr);
1056 		break;
1057 	case KVM_S390_VM_CPU_MACHINE:
1058 		ret = kvm_s390_get_machine(kvm, attr);
1059 		break;
1060 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1061 		ret = kvm_s390_get_processor_feat(kvm, attr);
1062 		break;
1063 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1064 		ret = kvm_s390_get_machine_feat(kvm, attr);
1065 		break;
1066 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1067 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1068 		break;
1069 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1070 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1071 		break;
1072 	}
1073 	return ret;
1074 }
1075 
1076 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1077 {
1078 	int ret;
1079 
1080 	switch (attr->group) {
1081 	case KVM_S390_VM_MEM_CTRL:
1082 		ret = kvm_s390_set_mem_control(kvm, attr);
1083 		break;
1084 	case KVM_S390_VM_TOD:
1085 		ret = kvm_s390_set_tod(kvm, attr);
1086 		break;
1087 	case KVM_S390_VM_CPU_MODEL:
1088 		ret = kvm_s390_set_cpu_model(kvm, attr);
1089 		break;
1090 	case KVM_S390_VM_CRYPTO:
1091 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1092 		break;
1093 	default:
1094 		ret = -ENXIO;
1095 		break;
1096 	}
1097 
1098 	return ret;
1099 }
1100 
1101 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103 	int ret;
1104 
1105 	switch (attr->group) {
1106 	case KVM_S390_VM_MEM_CTRL:
1107 		ret = kvm_s390_get_mem_control(kvm, attr);
1108 		break;
1109 	case KVM_S390_VM_TOD:
1110 		ret = kvm_s390_get_tod(kvm, attr);
1111 		break;
1112 	case KVM_S390_VM_CPU_MODEL:
1113 		ret = kvm_s390_get_cpu_model(kvm, attr);
1114 		break;
1115 	default:
1116 		ret = -ENXIO;
1117 		break;
1118 	}
1119 
1120 	return ret;
1121 }
1122 
1123 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1124 {
1125 	int ret;
1126 
1127 	switch (attr->group) {
1128 	case KVM_S390_VM_MEM_CTRL:
1129 		switch (attr->attr) {
1130 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1131 		case KVM_S390_VM_MEM_CLR_CMMA:
1132 			ret = sclp.has_cmma ? 0 : -ENXIO;
1133 			break;
1134 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1135 			ret = 0;
1136 			break;
1137 		default:
1138 			ret = -ENXIO;
1139 			break;
1140 		}
1141 		break;
1142 	case KVM_S390_VM_TOD:
1143 		switch (attr->attr) {
1144 		case KVM_S390_VM_TOD_LOW:
1145 		case KVM_S390_VM_TOD_HIGH:
1146 			ret = 0;
1147 			break;
1148 		default:
1149 			ret = -ENXIO;
1150 			break;
1151 		}
1152 		break;
1153 	case KVM_S390_VM_CPU_MODEL:
1154 		switch (attr->attr) {
1155 		case KVM_S390_VM_CPU_PROCESSOR:
1156 		case KVM_S390_VM_CPU_MACHINE:
1157 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1158 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1159 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1160 			ret = 0;
1161 			break;
1162 		/* configuring subfunctions is not supported yet */
1163 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1164 		default:
1165 			ret = -ENXIO;
1166 			break;
1167 		}
1168 		break;
1169 	case KVM_S390_VM_CRYPTO:
1170 		switch (attr->attr) {
1171 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1172 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1173 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1174 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1175 			ret = 0;
1176 			break;
1177 		default:
1178 			ret = -ENXIO;
1179 			break;
1180 		}
1181 		break;
1182 	default:
1183 		ret = -ENXIO;
1184 		break;
1185 	}
1186 
1187 	return ret;
1188 }
1189 
1190 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1191 {
1192 	uint8_t *keys;
1193 	uint64_t hva;
1194 	int i, r = 0;
1195 
1196 	if (args->flags != 0)
1197 		return -EINVAL;
1198 
1199 	/* Is this guest using storage keys? */
1200 	if (!mm_use_skey(current->mm))
1201 		return KVM_S390_GET_SKEYS_NONE;
1202 
1203 	/* Enforce sane limit on memory allocation */
1204 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1205 		return -EINVAL;
1206 
1207 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1208 	if (!keys)
1209 		return -ENOMEM;
1210 
1211 	down_read(&current->mm->mmap_sem);
1212 	for (i = 0; i < args->count; i++) {
1213 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1214 		if (kvm_is_error_hva(hva)) {
1215 			r = -EFAULT;
1216 			break;
1217 		}
1218 
1219 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1220 		if (r)
1221 			break;
1222 	}
1223 	up_read(&current->mm->mmap_sem);
1224 
1225 	if (!r) {
1226 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1227 				 sizeof(uint8_t) * args->count);
1228 		if (r)
1229 			r = -EFAULT;
1230 	}
1231 
1232 	kvfree(keys);
1233 	return r;
1234 }
1235 
1236 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1237 {
1238 	uint8_t *keys;
1239 	uint64_t hva;
1240 	int i, r = 0;
1241 
1242 	if (args->flags != 0)
1243 		return -EINVAL;
1244 
1245 	/* Enforce sane limit on memory allocation */
1246 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1247 		return -EINVAL;
1248 
1249 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1250 	if (!keys)
1251 		return -ENOMEM;
1252 
1253 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1254 			   sizeof(uint8_t) * args->count);
1255 	if (r) {
1256 		r = -EFAULT;
1257 		goto out;
1258 	}
1259 
1260 	/* Enable storage key handling for the guest */
1261 	r = s390_enable_skey();
1262 	if (r)
1263 		goto out;
1264 
1265 	down_read(&current->mm->mmap_sem);
1266 	for (i = 0; i < args->count; i++) {
1267 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1268 		if (kvm_is_error_hva(hva)) {
1269 			r = -EFAULT;
1270 			break;
1271 		}
1272 
1273 		/* Lowest order bit is reserved */
1274 		if (keys[i] & 0x01) {
1275 			r = -EINVAL;
1276 			break;
1277 		}
1278 
1279 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1280 		if (r)
1281 			break;
1282 	}
1283 	up_read(&current->mm->mmap_sem);
1284 out:
1285 	kvfree(keys);
1286 	return r;
1287 }
1288 
1289 long kvm_arch_vm_ioctl(struct file *filp,
1290 		       unsigned int ioctl, unsigned long arg)
1291 {
1292 	struct kvm *kvm = filp->private_data;
1293 	void __user *argp = (void __user *)arg;
1294 	struct kvm_device_attr attr;
1295 	int r;
1296 
1297 	switch (ioctl) {
1298 	case KVM_S390_INTERRUPT: {
1299 		struct kvm_s390_interrupt s390int;
1300 
1301 		r = -EFAULT;
1302 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1303 			break;
1304 		r = kvm_s390_inject_vm(kvm, &s390int);
1305 		break;
1306 	}
1307 	case KVM_ENABLE_CAP: {
1308 		struct kvm_enable_cap cap;
1309 		r = -EFAULT;
1310 		if (copy_from_user(&cap, argp, sizeof(cap)))
1311 			break;
1312 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1313 		break;
1314 	}
1315 	case KVM_CREATE_IRQCHIP: {
1316 		struct kvm_irq_routing_entry routing;
1317 
1318 		r = -EINVAL;
1319 		if (kvm->arch.use_irqchip) {
1320 			/* Set up dummy routing. */
1321 			memset(&routing, 0, sizeof(routing));
1322 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1323 		}
1324 		break;
1325 	}
1326 	case KVM_SET_DEVICE_ATTR: {
1327 		r = -EFAULT;
1328 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1329 			break;
1330 		r = kvm_s390_vm_set_attr(kvm, &attr);
1331 		break;
1332 	}
1333 	case KVM_GET_DEVICE_ATTR: {
1334 		r = -EFAULT;
1335 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1336 			break;
1337 		r = kvm_s390_vm_get_attr(kvm, &attr);
1338 		break;
1339 	}
1340 	case KVM_HAS_DEVICE_ATTR: {
1341 		r = -EFAULT;
1342 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1343 			break;
1344 		r = kvm_s390_vm_has_attr(kvm, &attr);
1345 		break;
1346 	}
1347 	case KVM_S390_GET_SKEYS: {
1348 		struct kvm_s390_skeys args;
1349 
1350 		r = -EFAULT;
1351 		if (copy_from_user(&args, argp,
1352 				   sizeof(struct kvm_s390_skeys)))
1353 			break;
1354 		r = kvm_s390_get_skeys(kvm, &args);
1355 		break;
1356 	}
1357 	case KVM_S390_SET_SKEYS: {
1358 		struct kvm_s390_skeys args;
1359 
1360 		r = -EFAULT;
1361 		if (copy_from_user(&args, argp,
1362 				   sizeof(struct kvm_s390_skeys)))
1363 			break;
1364 		r = kvm_s390_set_skeys(kvm, &args);
1365 		break;
1366 	}
1367 	default:
1368 		r = -ENOTTY;
1369 	}
1370 
1371 	return r;
1372 }
1373 
1374 static int kvm_s390_query_ap_config(u8 *config)
1375 {
1376 	u32 fcn_code = 0x04000000UL;
1377 	u32 cc = 0;
1378 
1379 	memset(config, 0, 128);
1380 	asm volatile(
1381 		"lgr 0,%1\n"
1382 		"lgr 2,%2\n"
1383 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1384 		"0: ipm %0\n"
1385 		"srl %0,28\n"
1386 		"1:\n"
1387 		EX_TABLE(0b, 1b)
1388 		: "+r" (cc)
1389 		: "r" (fcn_code), "r" (config)
1390 		: "cc", "0", "2", "memory"
1391 	);
1392 
1393 	return cc;
1394 }
1395 
1396 static int kvm_s390_apxa_installed(void)
1397 {
1398 	u8 config[128];
1399 	int cc;
1400 
1401 	if (test_facility(12)) {
1402 		cc = kvm_s390_query_ap_config(config);
1403 
1404 		if (cc)
1405 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1406 		else
1407 			return config[0] & 0x40;
1408 	}
1409 
1410 	return 0;
1411 }
1412 
1413 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1414 {
1415 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1416 
1417 	if (kvm_s390_apxa_installed())
1418 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1419 	else
1420 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1421 }
1422 
1423 static u64 kvm_s390_get_initial_cpuid(void)
1424 {
1425 	struct cpuid cpuid;
1426 
1427 	get_cpu_id(&cpuid);
1428 	cpuid.version = 0xff;
1429 	return *((u64 *) &cpuid);
1430 }
1431 
1432 static void kvm_s390_crypto_init(struct kvm *kvm)
1433 {
1434 	if (!test_kvm_facility(kvm, 76))
1435 		return;
1436 
1437 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1438 	kvm_s390_set_crycb_format(kvm);
1439 
1440 	/* Enable AES/DEA protected key functions by default */
1441 	kvm->arch.crypto.aes_kw = 1;
1442 	kvm->arch.crypto.dea_kw = 1;
1443 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1444 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1445 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1446 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1447 }
1448 
1449 static void sca_dispose(struct kvm *kvm)
1450 {
1451 	if (kvm->arch.use_esca)
1452 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1453 	else
1454 		free_page((unsigned long)(kvm->arch.sca));
1455 	kvm->arch.sca = NULL;
1456 }
1457 
1458 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1459 {
1460 	gfp_t alloc_flags = GFP_KERNEL;
1461 	int i, rc;
1462 	char debug_name[16];
1463 	static unsigned long sca_offset;
1464 
1465 	rc = -EINVAL;
1466 #ifdef CONFIG_KVM_S390_UCONTROL
1467 	if (type & ~KVM_VM_S390_UCONTROL)
1468 		goto out_err;
1469 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1470 		goto out_err;
1471 #else
1472 	if (type)
1473 		goto out_err;
1474 #endif
1475 
1476 	rc = s390_enable_sie();
1477 	if (rc)
1478 		goto out_err;
1479 
1480 	rc = -ENOMEM;
1481 
1482 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1483 
1484 	kvm->arch.use_esca = 0; /* start with basic SCA */
1485 	if (!sclp.has_64bscao)
1486 		alloc_flags |= GFP_DMA;
1487 	rwlock_init(&kvm->arch.sca_lock);
1488 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1489 	if (!kvm->arch.sca)
1490 		goto out_err;
1491 	spin_lock(&kvm_lock);
1492 	sca_offset += 16;
1493 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1494 		sca_offset = 0;
1495 	kvm->arch.sca = (struct bsca_block *)
1496 			((char *) kvm->arch.sca + sca_offset);
1497 	spin_unlock(&kvm_lock);
1498 
1499 	sprintf(debug_name, "kvm-%u", current->pid);
1500 
1501 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1502 	if (!kvm->arch.dbf)
1503 		goto out_err;
1504 
1505 	kvm->arch.sie_page2 =
1506 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1507 	if (!kvm->arch.sie_page2)
1508 		goto out_err;
1509 
1510 	/* Populate the facility mask initially. */
1511 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1512 	       sizeof(S390_lowcore.stfle_fac_list));
1513 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1514 		if (i < kvm_s390_fac_list_mask_size())
1515 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1516 		else
1517 			kvm->arch.model.fac_mask[i] = 0UL;
1518 	}
1519 
1520 	/* Populate the facility list initially. */
1521 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1522 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1523 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1524 
1525 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1526 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1527 
1528 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1529 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1530 
1531 	kvm_s390_crypto_init(kvm);
1532 
1533 	mutex_init(&kvm->arch.float_int.ais_lock);
1534 	kvm->arch.float_int.simm = 0;
1535 	kvm->arch.float_int.nimm = 0;
1536 	kvm->arch.float_int.ais_enabled = 0;
1537 	spin_lock_init(&kvm->arch.float_int.lock);
1538 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1539 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1540 	init_waitqueue_head(&kvm->arch.ipte_wq);
1541 	mutex_init(&kvm->arch.ipte_mutex);
1542 
1543 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1544 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1545 
1546 	if (type & KVM_VM_S390_UCONTROL) {
1547 		kvm->arch.gmap = NULL;
1548 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1549 	} else {
1550 		if (sclp.hamax == U64_MAX)
1551 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1552 		else
1553 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1554 						    sclp.hamax + 1);
1555 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1556 		if (!kvm->arch.gmap)
1557 			goto out_err;
1558 		kvm->arch.gmap->private = kvm;
1559 		kvm->arch.gmap->pfault_enabled = 0;
1560 	}
1561 
1562 	kvm->arch.css_support = 0;
1563 	kvm->arch.use_irqchip = 0;
1564 	kvm->arch.epoch = 0;
1565 
1566 	spin_lock_init(&kvm->arch.start_stop_lock);
1567 	kvm_s390_vsie_init(kvm);
1568 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1569 
1570 	return 0;
1571 out_err:
1572 	free_page((unsigned long)kvm->arch.sie_page2);
1573 	debug_unregister(kvm->arch.dbf);
1574 	sca_dispose(kvm);
1575 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1576 	return rc;
1577 }
1578 
1579 bool kvm_arch_has_vcpu_debugfs(void)
1580 {
1581 	return false;
1582 }
1583 
1584 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1585 {
1586 	return 0;
1587 }
1588 
1589 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1590 {
1591 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1592 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1593 	kvm_s390_clear_local_irqs(vcpu);
1594 	kvm_clear_async_pf_completion_queue(vcpu);
1595 	if (!kvm_is_ucontrol(vcpu->kvm))
1596 		sca_del_vcpu(vcpu);
1597 
1598 	if (kvm_is_ucontrol(vcpu->kvm))
1599 		gmap_remove(vcpu->arch.gmap);
1600 
1601 	if (vcpu->kvm->arch.use_cmma)
1602 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1603 	free_page((unsigned long)(vcpu->arch.sie_block));
1604 
1605 	kvm_vcpu_uninit(vcpu);
1606 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1607 }
1608 
1609 static void kvm_free_vcpus(struct kvm *kvm)
1610 {
1611 	unsigned int i;
1612 	struct kvm_vcpu *vcpu;
1613 
1614 	kvm_for_each_vcpu(i, vcpu, kvm)
1615 		kvm_arch_vcpu_destroy(vcpu);
1616 
1617 	mutex_lock(&kvm->lock);
1618 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1619 		kvm->vcpus[i] = NULL;
1620 
1621 	atomic_set(&kvm->online_vcpus, 0);
1622 	mutex_unlock(&kvm->lock);
1623 }
1624 
1625 void kvm_arch_destroy_vm(struct kvm *kvm)
1626 {
1627 	kvm_free_vcpus(kvm);
1628 	sca_dispose(kvm);
1629 	debug_unregister(kvm->arch.dbf);
1630 	free_page((unsigned long)kvm->arch.sie_page2);
1631 	if (!kvm_is_ucontrol(kvm))
1632 		gmap_remove(kvm->arch.gmap);
1633 	kvm_s390_destroy_adapters(kvm);
1634 	kvm_s390_clear_float_irqs(kvm);
1635 	kvm_s390_vsie_destroy(kvm);
1636 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1637 }
1638 
1639 /* Section: vcpu related */
1640 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1641 {
1642 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1643 	if (!vcpu->arch.gmap)
1644 		return -ENOMEM;
1645 	vcpu->arch.gmap->private = vcpu->kvm;
1646 
1647 	return 0;
1648 }
1649 
1650 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1651 {
1652 	if (!kvm_s390_use_sca_entries())
1653 		return;
1654 	read_lock(&vcpu->kvm->arch.sca_lock);
1655 	if (vcpu->kvm->arch.use_esca) {
1656 		struct esca_block *sca = vcpu->kvm->arch.sca;
1657 
1658 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1659 		sca->cpu[vcpu->vcpu_id].sda = 0;
1660 	} else {
1661 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1662 
1663 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1664 		sca->cpu[vcpu->vcpu_id].sda = 0;
1665 	}
1666 	read_unlock(&vcpu->kvm->arch.sca_lock);
1667 }
1668 
1669 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1670 {
1671 	if (!kvm_s390_use_sca_entries()) {
1672 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1673 
1674 		/* we still need the basic sca for the ipte control */
1675 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1676 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1677 	}
1678 	read_lock(&vcpu->kvm->arch.sca_lock);
1679 	if (vcpu->kvm->arch.use_esca) {
1680 		struct esca_block *sca = vcpu->kvm->arch.sca;
1681 
1682 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1683 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1684 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1685 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1686 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1687 	} else {
1688 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1689 
1690 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1691 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1692 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1693 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1694 	}
1695 	read_unlock(&vcpu->kvm->arch.sca_lock);
1696 }
1697 
1698 /* Basic SCA to Extended SCA data copy routines */
1699 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1700 {
1701 	d->sda = s->sda;
1702 	d->sigp_ctrl.c = s->sigp_ctrl.c;
1703 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1704 }
1705 
1706 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1707 {
1708 	int i;
1709 
1710 	d->ipte_control = s->ipte_control;
1711 	d->mcn[0] = s->mcn;
1712 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1713 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1714 }
1715 
1716 static int sca_switch_to_extended(struct kvm *kvm)
1717 {
1718 	struct bsca_block *old_sca = kvm->arch.sca;
1719 	struct esca_block *new_sca;
1720 	struct kvm_vcpu *vcpu;
1721 	unsigned int vcpu_idx;
1722 	u32 scaol, scaoh;
1723 
1724 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1725 	if (!new_sca)
1726 		return -ENOMEM;
1727 
1728 	scaoh = (u32)((u64)(new_sca) >> 32);
1729 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
1730 
1731 	kvm_s390_vcpu_block_all(kvm);
1732 	write_lock(&kvm->arch.sca_lock);
1733 
1734 	sca_copy_b_to_e(new_sca, old_sca);
1735 
1736 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1737 		vcpu->arch.sie_block->scaoh = scaoh;
1738 		vcpu->arch.sie_block->scaol = scaol;
1739 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1740 	}
1741 	kvm->arch.sca = new_sca;
1742 	kvm->arch.use_esca = 1;
1743 
1744 	write_unlock(&kvm->arch.sca_lock);
1745 	kvm_s390_vcpu_unblock_all(kvm);
1746 
1747 	free_page((unsigned long)old_sca);
1748 
1749 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1750 		 old_sca, kvm->arch.sca);
1751 	return 0;
1752 }
1753 
1754 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1755 {
1756 	int rc;
1757 
1758 	if (!kvm_s390_use_sca_entries()) {
1759 		if (id < KVM_MAX_VCPUS)
1760 			return true;
1761 		return false;
1762 	}
1763 	if (id < KVM_S390_BSCA_CPU_SLOTS)
1764 		return true;
1765 	if (!sclp.has_esca || !sclp.has_64bscao)
1766 		return false;
1767 
1768 	mutex_lock(&kvm->lock);
1769 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1770 	mutex_unlock(&kvm->lock);
1771 
1772 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1773 }
1774 
1775 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1776 {
1777 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1778 	kvm_clear_async_pf_completion_queue(vcpu);
1779 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1780 				    KVM_SYNC_GPRS |
1781 				    KVM_SYNC_ACRS |
1782 				    KVM_SYNC_CRS |
1783 				    KVM_SYNC_ARCH0 |
1784 				    KVM_SYNC_PFAULT;
1785 	kvm_s390_set_prefix(vcpu, 0);
1786 	if (test_kvm_facility(vcpu->kvm, 64))
1787 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1788 	if (test_kvm_facility(vcpu->kvm, 133))
1789 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
1790 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
1791 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1792 	 */
1793 	if (MACHINE_HAS_VX)
1794 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1795 	else
1796 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1797 
1798 	if (kvm_is_ucontrol(vcpu->kvm))
1799 		return __kvm_ucontrol_vcpu_init(vcpu);
1800 
1801 	return 0;
1802 }
1803 
1804 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1805 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1806 {
1807 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1808 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1809 	vcpu->arch.cputm_start = get_tod_clock_fast();
1810 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1811 }
1812 
1813 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1814 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1815 {
1816 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1817 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1818 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1819 	vcpu->arch.cputm_start = 0;
1820 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1821 }
1822 
1823 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1824 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1825 {
1826 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1827 	vcpu->arch.cputm_enabled = true;
1828 	__start_cpu_timer_accounting(vcpu);
1829 }
1830 
1831 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1832 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1833 {
1834 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1835 	__stop_cpu_timer_accounting(vcpu);
1836 	vcpu->arch.cputm_enabled = false;
1837 }
1838 
1839 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1840 {
1841 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1842 	__enable_cpu_timer_accounting(vcpu);
1843 	preempt_enable();
1844 }
1845 
1846 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1847 {
1848 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1849 	__disable_cpu_timer_accounting(vcpu);
1850 	preempt_enable();
1851 }
1852 
1853 /* set the cpu timer - may only be called from the VCPU thread itself */
1854 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1855 {
1856 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1857 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1858 	if (vcpu->arch.cputm_enabled)
1859 		vcpu->arch.cputm_start = get_tod_clock_fast();
1860 	vcpu->arch.sie_block->cputm = cputm;
1861 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1862 	preempt_enable();
1863 }
1864 
1865 /* update and get the cpu timer - can also be called from other VCPU threads */
1866 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1867 {
1868 	unsigned int seq;
1869 	__u64 value;
1870 
1871 	if (unlikely(!vcpu->arch.cputm_enabled))
1872 		return vcpu->arch.sie_block->cputm;
1873 
1874 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1875 	do {
1876 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1877 		/*
1878 		 * If the writer would ever execute a read in the critical
1879 		 * section, e.g. in irq context, we have a deadlock.
1880 		 */
1881 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1882 		value = vcpu->arch.sie_block->cputm;
1883 		/* if cputm_start is 0, accounting is being started/stopped */
1884 		if (likely(vcpu->arch.cputm_start))
1885 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1886 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1887 	preempt_enable();
1888 	return value;
1889 }
1890 
1891 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1892 {
1893 
1894 	gmap_enable(vcpu->arch.enabled_gmap);
1895 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1896 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1897 		__start_cpu_timer_accounting(vcpu);
1898 	vcpu->cpu = cpu;
1899 }
1900 
1901 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1902 {
1903 	vcpu->cpu = -1;
1904 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1905 		__stop_cpu_timer_accounting(vcpu);
1906 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1907 	vcpu->arch.enabled_gmap = gmap_get_enabled();
1908 	gmap_disable(vcpu->arch.enabled_gmap);
1909 
1910 }
1911 
1912 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1913 {
1914 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1915 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1916 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1917 	kvm_s390_set_prefix(vcpu, 0);
1918 	kvm_s390_set_cpu_timer(vcpu, 0);
1919 	vcpu->arch.sie_block->ckc       = 0UL;
1920 	vcpu->arch.sie_block->todpr     = 0;
1921 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1922 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1923 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1924 	/* make sure the new fpc will be lazily loaded */
1925 	save_fpu_regs();
1926 	current->thread.fpu.fpc = 0;
1927 	vcpu->arch.sie_block->gbea = 1;
1928 	vcpu->arch.sie_block->pp = 0;
1929 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1930 	kvm_clear_async_pf_completion_queue(vcpu);
1931 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1932 		kvm_s390_vcpu_stop(vcpu);
1933 	kvm_s390_clear_local_irqs(vcpu);
1934 }
1935 
1936 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1937 {
1938 	mutex_lock(&vcpu->kvm->lock);
1939 	preempt_disable();
1940 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1941 	preempt_enable();
1942 	mutex_unlock(&vcpu->kvm->lock);
1943 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1944 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1945 		sca_add_vcpu(vcpu);
1946 	}
1947 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1948 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1949 	/* make vcpu_load load the right gmap on the first trigger */
1950 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1951 }
1952 
1953 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1954 {
1955 	if (!test_kvm_facility(vcpu->kvm, 76))
1956 		return;
1957 
1958 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1959 
1960 	if (vcpu->kvm->arch.crypto.aes_kw)
1961 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1962 	if (vcpu->kvm->arch.crypto.dea_kw)
1963 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1964 
1965 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1966 }
1967 
1968 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1969 {
1970 	free_page(vcpu->arch.sie_block->cbrlo);
1971 	vcpu->arch.sie_block->cbrlo = 0;
1972 }
1973 
1974 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1975 {
1976 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1977 	if (!vcpu->arch.sie_block->cbrlo)
1978 		return -ENOMEM;
1979 
1980 	vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
1981 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
1982 	return 0;
1983 }
1984 
1985 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1986 {
1987 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1988 
1989 	vcpu->arch.sie_block->ibc = model->ibc;
1990 	if (test_kvm_facility(vcpu->kvm, 7))
1991 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1992 }
1993 
1994 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1995 {
1996 	int rc = 0;
1997 
1998 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1999 						    CPUSTAT_SM |
2000 						    CPUSTAT_STOPPED);
2001 
2002 	if (test_kvm_facility(vcpu->kvm, 78))
2003 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2004 	else if (test_kvm_facility(vcpu->kvm, 8))
2005 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2006 
2007 	kvm_s390_vcpu_setup_model(vcpu);
2008 
2009 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2010 	if (MACHINE_HAS_ESOP)
2011 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2012 	if (test_kvm_facility(vcpu->kvm, 9))
2013 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2014 	if (test_kvm_facility(vcpu->kvm, 73))
2015 		vcpu->arch.sie_block->ecb |= ECB_TE;
2016 
2017 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2018 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2019 	if (test_kvm_facility(vcpu->kvm, 130))
2020 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2021 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2022 	if (sclp.has_cei)
2023 		vcpu->arch.sie_block->eca |= ECA_CEI;
2024 	if (sclp.has_ib)
2025 		vcpu->arch.sie_block->eca |= ECA_IB;
2026 	if (sclp.has_siif)
2027 		vcpu->arch.sie_block->eca |= ECA_SII;
2028 	if (sclp.has_sigpif)
2029 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2030 	if (test_kvm_facility(vcpu->kvm, 129)) {
2031 		vcpu->arch.sie_block->eca |= ECA_VX;
2032 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2033 	}
2034 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2035 					| SDNXC;
2036 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2037 
2038 	if (sclp.has_kss)
2039 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2040 	else
2041 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2042 
2043 	if (vcpu->kvm->arch.use_cmma) {
2044 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2045 		if (rc)
2046 			return rc;
2047 	}
2048 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2049 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2050 
2051 	kvm_s390_vcpu_crypto_setup(vcpu);
2052 
2053 	return rc;
2054 }
2055 
2056 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2057 				      unsigned int id)
2058 {
2059 	struct kvm_vcpu *vcpu;
2060 	struct sie_page *sie_page;
2061 	int rc = -EINVAL;
2062 
2063 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2064 		goto out;
2065 
2066 	rc = -ENOMEM;
2067 
2068 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2069 	if (!vcpu)
2070 		goto out;
2071 
2072 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2073 	if (!sie_page)
2074 		goto out_free_cpu;
2075 
2076 	vcpu->arch.sie_block = &sie_page->sie_block;
2077 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2078 
2079 	/* the real guest size will always be smaller than msl */
2080 	vcpu->arch.sie_block->mso = 0;
2081 	vcpu->arch.sie_block->msl = sclp.hamax;
2082 
2083 	vcpu->arch.sie_block->icpua = id;
2084 	spin_lock_init(&vcpu->arch.local_int.lock);
2085 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2086 	vcpu->arch.local_int.wq = &vcpu->wq;
2087 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2088 	seqcount_init(&vcpu->arch.cputm_seqcount);
2089 
2090 	rc = kvm_vcpu_init(vcpu, kvm, id);
2091 	if (rc)
2092 		goto out_free_sie_block;
2093 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2094 		 vcpu->arch.sie_block);
2095 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2096 
2097 	return vcpu;
2098 out_free_sie_block:
2099 	free_page((unsigned long)(vcpu->arch.sie_block));
2100 out_free_cpu:
2101 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2102 out:
2103 	return ERR_PTR(rc);
2104 }
2105 
2106 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2107 {
2108 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2109 }
2110 
2111 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2112 {
2113 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2114 	exit_sie(vcpu);
2115 }
2116 
2117 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2118 {
2119 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2120 }
2121 
2122 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2123 {
2124 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2125 	exit_sie(vcpu);
2126 }
2127 
2128 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2129 {
2130 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2131 }
2132 
2133 /*
2134  * Kick a guest cpu out of SIE and wait until SIE is not running.
2135  * If the CPU is not running (e.g. waiting as idle) the function will
2136  * return immediately. */
2137 void exit_sie(struct kvm_vcpu *vcpu)
2138 {
2139 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2140 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2141 		cpu_relax();
2142 }
2143 
2144 /* Kick a guest cpu out of SIE to process a request synchronously */
2145 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2146 {
2147 	kvm_make_request(req, vcpu);
2148 	kvm_s390_vcpu_request(vcpu);
2149 }
2150 
2151 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2152 			      unsigned long end)
2153 {
2154 	struct kvm *kvm = gmap->private;
2155 	struct kvm_vcpu *vcpu;
2156 	unsigned long prefix;
2157 	int i;
2158 
2159 	if (gmap_is_shadow(gmap))
2160 		return;
2161 	if (start >= 1UL << 31)
2162 		/* We are only interested in prefix pages */
2163 		return;
2164 	kvm_for_each_vcpu(i, vcpu, kvm) {
2165 		/* match against both prefix pages */
2166 		prefix = kvm_s390_get_prefix(vcpu);
2167 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2168 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2169 				   start, end);
2170 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2171 		}
2172 	}
2173 }
2174 
2175 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2176 {
2177 	/* kvm common code refers to this, but never calls it */
2178 	BUG();
2179 	return 0;
2180 }
2181 
2182 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2183 					   struct kvm_one_reg *reg)
2184 {
2185 	int r = -EINVAL;
2186 
2187 	switch (reg->id) {
2188 	case KVM_REG_S390_TODPR:
2189 		r = put_user(vcpu->arch.sie_block->todpr,
2190 			     (u32 __user *)reg->addr);
2191 		break;
2192 	case KVM_REG_S390_EPOCHDIFF:
2193 		r = put_user(vcpu->arch.sie_block->epoch,
2194 			     (u64 __user *)reg->addr);
2195 		break;
2196 	case KVM_REG_S390_CPU_TIMER:
2197 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2198 			     (u64 __user *)reg->addr);
2199 		break;
2200 	case KVM_REG_S390_CLOCK_COMP:
2201 		r = put_user(vcpu->arch.sie_block->ckc,
2202 			     (u64 __user *)reg->addr);
2203 		break;
2204 	case KVM_REG_S390_PFTOKEN:
2205 		r = put_user(vcpu->arch.pfault_token,
2206 			     (u64 __user *)reg->addr);
2207 		break;
2208 	case KVM_REG_S390_PFCOMPARE:
2209 		r = put_user(vcpu->arch.pfault_compare,
2210 			     (u64 __user *)reg->addr);
2211 		break;
2212 	case KVM_REG_S390_PFSELECT:
2213 		r = put_user(vcpu->arch.pfault_select,
2214 			     (u64 __user *)reg->addr);
2215 		break;
2216 	case KVM_REG_S390_PP:
2217 		r = put_user(vcpu->arch.sie_block->pp,
2218 			     (u64 __user *)reg->addr);
2219 		break;
2220 	case KVM_REG_S390_GBEA:
2221 		r = put_user(vcpu->arch.sie_block->gbea,
2222 			     (u64 __user *)reg->addr);
2223 		break;
2224 	default:
2225 		break;
2226 	}
2227 
2228 	return r;
2229 }
2230 
2231 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2232 					   struct kvm_one_reg *reg)
2233 {
2234 	int r = -EINVAL;
2235 	__u64 val;
2236 
2237 	switch (reg->id) {
2238 	case KVM_REG_S390_TODPR:
2239 		r = get_user(vcpu->arch.sie_block->todpr,
2240 			     (u32 __user *)reg->addr);
2241 		break;
2242 	case KVM_REG_S390_EPOCHDIFF:
2243 		r = get_user(vcpu->arch.sie_block->epoch,
2244 			     (u64 __user *)reg->addr);
2245 		break;
2246 	case KVM_REG_S390_CPU_TIMER:
2247 		r = get_user(val, (u64 __user *)reg->addr);
2248 		if (!r)
2249 			kvm_s390_set_cpu_timer(vcpu, val);
2250 		break;
2251 	case KVM_REG_S390_CLOCK_COMP:
2252 		r = get_user(vcpu->arch.sie_block->ckc,
2253 			     (u64 __user *)reg->addr);
2254 		break;
2255 	case KVM_REG_S390_PFTOKEN:
2256 		r = get_user(vcpu->arch.pfault_token,
2257 			     (u64 __user *)reg->addr);
2258 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2259 			kvm_clear_async_pf_completion_queue(vcpu);
2260 		break;
2261 	case KVM_REG_S390_PFCOMPARE:
2262 		r = get_user(vcpu->arch.pfault_compare,
2263 			     (u64 __user *)reg->addr);
2264 		break;
2265 	case KVM_REG_S390_PFSELECT:
2266 		r = get_user(vcpu->arch.pfault_select,
2267 			     (u64 __user *)reg->addr);
2268 		break;
2269 	case KVM_REG_S390_PP:
2270 		r = get_user(vcpu->arch.sie_block->pp,
2271 			     (u64 __user *)reg->addr);
2272 		break;
2273 	case KVM_REG_S390_GBEA:
2274 		r = get_user(vcpu->arch.sie_block->gbea,
2275 			     (u64 __user *)reg->addr);
2276 		break;
2277 	default:
2278 		break;
2279 	}
2280 
2281 	return r;
2282 }
2283 
2284 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2285 {
2286 	kvm_s390_vcpu_initial_reset(vcpu);
2287 	return 0;
2288 }
2289 
2290 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2291 {
2292 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2293 	return 0;
2294 }
2295 
2296 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2297 {
2298 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2299 	return 0;
2300 }
2301 
2302 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2303 				  struct kvm_sregs *sregs)
2304 {
2305 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2306 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2307 	return 0;
2308 }
2309 
2310 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2311 				  struct kvm_sregs *sregs)
2312 {
2313 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2314 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2315 	return 0;
2316 }
2317 
2318 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2319 {
2320 	if (test_fp_ctl(fpu->fpc))
2321 		return -EINVAL;
2322 	vcpu->run->s.regs.fpc = fpu->fpc;
2323 	if (MACHINE_HAS_VX)
2324 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2325 				 (freg_t *) fpu->fprs);
2326 	else
2327 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2328 	return 0;
2329 }
2330 
2331 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2332 {
2333 	/* make sure we have the latest values */
2334 	save_fpu_regs();
2335 	if (MACHINE_HAS_VX)
2336 		convert_vx_to_fp((freg_t *) fpu->fprs,
2337 				 (__vector128 *) vcpu->run->s.regs.vrs);
2338 	else
2339 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2340 	fpu->fpc = vcpu->run->s.regs.fpc;
2341 	return 0;
2342 }
2343 
2344 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2345 {
2346 	int rc = 0;
2347 
2348 	if (!is_vcpu_stopped(vcpu))
2349 		rc = -EBUSY;
2350 	else {
2351 		vcpu->run->psw_mask = psw.mask;
2352 		vcpu->run->psw_addr = psw.addr;
2353 	}
2354 	return rc;
2355 }
2356 
2357 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2358 				  struct kvm_translation *tr)
2359 {
2360 	return -EINVAL; /* not implemented yet */
2361 }
2362 
2363 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2364 			      KVM_GUESTDBG_USE_HW_BP | \
2365 			      KVM_GUESTDBG_ENABLE)
2366 
2367 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2368 					struct kvm_guest_debug *dbg)
2369 {
2370 	int rc = 0;
2371 
2372 	vcpu->guest_debug = 0;
2373 	kvm_s390_clear_bp_data(vcpu);
2374 
2375 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2376 		return -EINVAL;
2377 	if (!sclp.has_gpere)
2378 		return -EINVAL;
2379 
2380 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2381 		vcpu->guest_debug = dbg->control;
2382 		/* enforce guest PER */
2383 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2384 
2385 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2386 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2387 	} else {
2388 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2389 		vcpu->arch.guestdbg.last_bp = 0;
2390 	}
2391 
2392 	if (rc) {
2393 		vcpu->guest_debug = 0;
2394 		kvm_s390_clear_bp_data(vcpu);
2395 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2396 	}
2397 
2398 	return rc;
2399 }
2400 
2401 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2402 				    struct kvm_mp_state *mp_state)
2403 {
2404 	/* CHECK_STOP and LOAD are not supported yet */
2405 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2406 				       KVM_MP_STATE_OPERATING;
2407 }
2408 
2409 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2410 				    struct kvm_mp_state *mp_state)
2411 {
2412 	int rc = 0;
2413 
2414 	/* user space knows about this interface - let it control the state */
2415 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2416 
2417 	switch (mp_state->mp_state) {
2418 	case KVM_MP_STATE_STOPPED:
2419 		kvm_s390_vcpu_stop(vcpu);
2420 		break;
2421 	case KVM_MP_STATE_OPERATING:
2422 		kvm_s390_vcpu_start(vcpu);
2423 		break;
2424 	case KVM_MP_STATE_LOAD:
2425 	case KVM_MP_STATE_CHECK_STOP:
2426 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2427 	default:
2428 		rc = -ENXIO;
2429 	}
2430 
2431 	return rc;
2432 }
2433 
2434 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2435 {
2436 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2437 }
2438 
2439 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2440 {
2441 retry:
2442 	kvm_s390_vcpu_request_handled(vcpu);
2443 	if (!vcpu->requests)
2444 		return 0;
2445 	/*
2446 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2447 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2448 	 * This ensures that the ipte instruction for this request has
2449 	 * already finished. We might race against a second unmapper that
2450 	 * wants to set the blocking bit. Lets just retry the request loop.
2451 	 */
2452 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2453 		int rc;
2454 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2455 					  kvm_s390_get_prefix(vcpu),
2456 					  PAGE_SIZE * 2, PROT_WRITE);
2457 		if (rc) {
2458 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2459 			return rc;
2460 		}
2461 		goto retry;
2462 	}
2463 
2464 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2465 		vcpu->arch.sie_block->ihcpu = 0xffff;
2466 		goto retry;
2467 	}
2468 
2469 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2470 		if (!ibs_enabled(vcpu)) {
2471 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2472 			atomic_or(CPUSTAT_IBS,
2473 					&vcpu->arch.sie_block->cpuflags);
2474 		}
2475 		goto retry;
2476 	}
2477 
2478 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2479 		if (ibs_enabled(vcpu)) {
2480 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2481 			atomic_andnot(CPUSTAT_IBS,
2482 					  &vcpu->arch.sie_block->cpuflags);
2483 		}
2484 		goto retry;
2485 	}
2486 
2487 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2488 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2489 		goto retry;
2490 	}
2491 
2492 	/* nothing to do, just clear the request */
2493 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2494 
2495 	return 0;
2496 }
2497 
2498 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2499 {
2500 	struct kvm_vcpu *vcpu;
2501 	int i;
2502 
2503 	mutex_lock(&kvm->lock);
2504 	preempt_disable();
2505 	kvm->arch.epoch = tod - get_tod_clock();
2506 	kvm_s390_vcpu_block_all(kvm);
2507 	kvm_for_each_vcpu(i, vcpu, kvm)
2508 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2509 	kvm_s390_vcpu_unblock_all(kvm);
2510 	preempt_enable();
2511 	mutex_unlock(&kvm->lock);
2512 }
2513 
2514 /**
2515  * kvm_arch_fault_in_page - fault-in guest page if necessary
2516  * @vcpu: The corresponding virtual cpu
2517  * @gpa: Guest physical address
2518  * @writable: Whether the page should be writable or not
2519  *
2520  * Make sure that a guest page has been faulted-in on the host.
2521  *
2522  * Return: Zero on success, negative error code otherwise.
2523  */
2524 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2525 {
2526 	return gmap_fault(vcpu->arch.gmap, gpa,
2527 			  writable ? FAULT_FLAG_WRITE : 0);
2528 }
2529 
2530 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2531 				      unsigned long token)
2532 {
2533 	struct kvm_s390_interrupt inti;
2534 	struct kvm_s390_irq irq;
2535 
2536 	if (start_token) {
2537 		irq.u.ext.ext_params2 = token;
2538 		irq.type = KVM_S390_INT_PFAULT_INIT;
2539 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2540 	} else {
2541 		inti.type = KVM_S390_INT_PFAULT_DONE;
2542 		inti.parm64 = token;
2543 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2544 	}
2545 }
2546 
2547 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2548 				     struct kvm_async_pf *work)
2549 {
2550 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2551 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2552 }
2553 
2554 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2555 				 struct kvm_async_pf *work)
2556 {
2557 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2558 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2559 }
2560 
2561 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2562 			       struct kvm_async_pf *work)
2563 {
2564 	/* s390 will always inject the page directly */
2565 }
2566 
2567 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2568 {
2569 	/*
2570 	 * s390 will always inject the page directly,
2571 	 * but we still want check_async_completion to cleanup
2572 	 */
2573 	return true;
2574 }
2575 
2576 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2577 {
2578 	hva_t hva;
2579 	struct kvm_arch_async_pf arch;
2580 	int rc;
2581 
2582 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2583 		return 0;
2584 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2585 	    vcpu->arch.pfault_compare)
2586 		return 0;
2587 	if (psw_extint_disabled(vcpu))
2588 		return 0;
2589 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2590 		return 0;
2591 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2592 		return 0;
2593 	if (!vcpu->arch.gmap->pfault_enabled)
2594 		return 0;
2595 
2596 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2597 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2598 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2599 		return 0;
2600 
2601 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2602 	return rc;
2603 }
2604 
2605 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2606 {
2607 	int rc, cpuflags;
2608 
2609 	/*
2610 	 * On s390 notifications for arriving pages will be delivered directly
2611 	 * to the guest but the house keeping for completed pfaults is
2612 	 * handled outside the worker.
2613 	 */
2614 	kvm_check_async_pf_completion(vcpu);
2615 
2616 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2617 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2618 
2619 	if (need_resched())
2620 		schedule();
2621 
2622 	if (test_cpu_flag(CIF_MCCK_PENDING))
2623 		s390_handle_mcck();
2624 
2625 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2626 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2627 		if (rc)
2628 			return rc;
2629 	}
2630 
2631 	rc = kvm_s390_handle_requests(vcpu);
2632 	if (rc)
2633 		return rc;
2634 
2635 	if (guestdbg_enabled(vcpu)) {
2636 		kvm_s390_backup_guest_per_regs(vcpu);
2637 		kvm_s390_patch_guest_per_regs(vcpu);
2638 	}
2639 
2640 	vcpu->arch.sie_block->icptcode = 0;
2641 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2642 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2643 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2644 
2645 	return 0;
2646 }
2647 
2648 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2649 {
2650 	struct kvm_s390_pgm_info pgm_info = {
2651 		.code = PGM_ADDRESSING,
2652 	};
2653 	u8 opcode, ilen;
2654 	int rc;
2655 
2656 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2657 	trace_kvm_s390_sie_fault(vcpu);
2658 
2659 	/*
2660 	 * We want to inject an addressing exception, which is defined as a
2661 	 * suppressing or terminating exception. However, since we came here
2662 	 * by a DAT access exception, the PSW still points to the faulting
2663 	 * instruction since DAT exceptions are nullifying. So we've got
2664 	 * to look up the current opcode to get the length of the instruction
2665 	 * to be able to forward the PSW.
2666 	 */
2667 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2668 	ilen = insn_length(opcode);
2669 	if (rc < 0) {
2670 		return rc;
2671 	} else if (rc) {
2672 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
2673 		 * Forward by arbitrary ilc, injection will take care of
2674 		 * nullification if necessary.
2675 		 */
2676 		pgm_info = vcpu->arch.pgm;
2677 		ilen = 4;
2678 	}
2679 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2680 	kvm_s390_forward_psw(vcpu, ilen);
2681 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2682 }
2683 
2684 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2685 {
2686 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2687 		   vcpu->arch.sie_block->icptcode);
2688 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2689 
2690 	if (guestdbg_enabled(vcpu))
2691 		kvm_s390_restore_guest_per_regs(vcpu);
2692 
2693 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2694 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2695 
2696 	if (vcpu->arch.sie_block->icptcode > 0) {
2697 		int rc = kvm_handle_sie_intercept(vcpu);
2698 
2699 		if (rc != -EOPNOTSUPP)
2700 			return rc;
2701 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2702 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2703 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2704 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2705 		return -EREMOTE;
2706 	} else if (exit_reason != -EFAULT) {
2707 		vcpu->stat.exit_null++;
2708 		return 0;
2709 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2710 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2711 		vcpu->run->s390_ucontrol.trans_exc_code =
2712 						current->thread.gmap_addr;
2713 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2714 		return -EREMOTE;
2715 	} else if (current->thread.gmap_pfault) {
2716 		trace_kvm_s390_major_guest_pfault(vcpu);
2717 		current->thread.gmap_pfault = 0;
2718 		if (kvm_arch_setup_async_pf(vcpu))
2719 			return 0;
2720 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2721 	}
2722 	return vcpu_post_run_fault_in_sie(vcpu);
2723 }
2724 
2725 static int __vcpu_run(struct kvm_vcpu *vcpu)
2726 {
2727 	int rc, exit_reason;
2728 
2729 	/*
2730 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2731 	 * ning the guest), so that memslots (and other stuff) are protected
2732 	 */
2733 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2734 
2735 	do {
2736 		rc = vcpu_pre_run(vcpu);
2737 		if (rc)
2738 			break;
2739 
2740 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2741 		/*
2742 		 * As PF_VCPU will be used in fault handler, between
2743 		 * guest_enter and guest_exit should be no uaccess.
2744 		 */
2745 		local_irq_disable();
2746 		guest_enter_irqoff();
2747 		__disable_cpu_timer_accounting(vcpu);
2748 		local_irq_enable();
2749 		exit_reason = sie64a(vcpu->arch.sie_block,
2750 				     vcpu->run->s.regs.gprs);
2751 		local_irq_disable();
2752 		__enable_cpu_timer_accounting(vcpu);
2753 		guest_exit_irqoff();
2754 		local_irq_enable();
2755 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2756 
2757 		rc = vcpu_post_run(vcpu, exit_reason);
2758 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2759 
2760 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2761 	return rc;
2762 }
2763 
2764 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2765 {
2766 	struct runtime_instr_cb *riccb;
2767 	struct gs_cb *gscb;
2768 
2769 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2770 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
2771 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2772 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2773 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2774 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2775 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2776 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2777 		/* some control register changes require a tlb flush */
2778 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2779 	}
2780 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2781 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2782 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2783 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2784 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2785 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2786 	}
2787 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2788 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2789 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2790 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2791 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2792 			kvm_clear_async_pf_completion_queue(vcpu);
2793 	}
2794 	/*
2795 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
2796 	 * we should enable RI here instead of doing the lazy enablement.
2797 	 */
2798 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2799 	    test_kvm_facility(vcpu->kvm, 64) &&
2800 	    riccb->valid &&
2801 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
2802 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
2803 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
2804 	}
2805 	/*
2806 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
2807 	 * we should enable GS here instead of doing the lazy enablement.
2808 	 */
2809 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
2810 	    test_kvm_facility(vcpu->kvm, 133) &&
2811 	    gscb->gssm &&
2812 	    !vcpu->arch.gs_enabled) {
2813 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
2814 		vcpu->arch.sie_block->ecb |= ECB_GS;
2815 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2816 		vcpu->arch.gs_enabled = 1;
2817 	}
2818 	save_access_regs(vcpu->arch.host_acrs);
2819 	restore_access_regs(vcpu->run->s.regs.acrs);
2820 	/* save host (userspace) fprs/vrs */
2821 	save_fpu_regs();
2822 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2823 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2824 	if (MACHINE_HAS_VX)
2825 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2826 	else
2827 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2828 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2829 	if (test_fp_ctl(current->thread.fpu.fpc))
2830 		/* User space provided an invalid FPC, let's clear it */
2831 		current->thread.fpu.fpc = 0;
2832 	if (MACHINE_HAS_GS) {
2833 		preempt_disable();
2834 		__ctl_set_bit(2, 4);
2835 		if (current->thread.gs_cb) {
2836 			vcpu->arch.host_gscb = current->thread.gs_cb;
2837 			save_gs_cb(vcpu->arch.host_gscb);
2838 		}
2839 		if (vcpu->arch.gs_enabled) {
2840 			current->thread.gs_cb = (struct gs_cb *)
2841 						&vcpu->run->s.regs.gscb;
2842 			restore_gs_cb(current->thread.gs_cb);
2843 		}
2844 		preempt_enable();
2845 	}
2846 
2847 	kvm_run->kvm_dirty_regs = 0;
2848 }
2849 
2850 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2851 {
2852 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2853 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2854 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2855 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2856 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2857 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2858 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2859 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2860 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2861 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2862 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2863 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2864 	save_access_regs(vcpu->run->s.regs.acrs);
2865 	restore_access_regs(vcpu->arch.host_acrs);
2866 	/* Save guest register state */
2867 	save_fpu_regs();
2868 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2869 	/* Restore will be done lazily at return */
2870 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2871 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2872 	if (MACHINE_HAS_GS) {
2873 		__ctl_set_bit(2, 4);
2874 		if (vcpu->arch.gs_enabled)
2875 			save_gs_cb(current->thread.gs_cb);
2876 		preempt_disable();
2877 		current->thread.gs_cb = vcpu->arch.host_gscb;
2878 		restore_gs_cb(vcpu->arch.host_gscb);
2879 		preempt_enable();
2880 		if (!vcpu->arch.host_gscb)
2881 			__ctl_clear_bit(2, 4);
2882 		vcpu->arch.host_gscb = NULL;
2883 	}
2884 
2885 }
2886 
2887 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2888 {
2889 	int rc;
2890 	sigset_t sigsaved;
2891 
2892 	if (kvm_run->immediate_exit)
2893 		return -EINTR;
2894 
2895 	if (guestdbg_exit_pending(vcpu)) {
2896 		kvm_s390_prepare_debug_exit(vcpu);
2897 		return 0;
2898 	}
2899 
2900 	if (vcpu->sigset_active)
2901 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2902 
2903 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2904 		kvm_s390_vcpu_start(vcpu);
2905 	} else if (is_vcpu_stopped(vcpu)) {
2906 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2907 				   vcpu->vcpu_id);
2908 		return -EINVAL;
2909 	}
2910 
2911 	sync_regs(vcpu, kvm_run);
2912 	enable_cpu_timer_accounting(vcpu);
2913 
2914 	might_fault();
2915 	rc = __vcpu_run(vcpu);
2916 
2917 	if (signal_pending(current) && !rc) {
2918 		kvm_run->exit_reason = KVM_EXIT_INTR;
2919 		rc = -EINTR;
2920 	}
2921 
2922 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2923 		kvm_s390_prepare_debug_exit(vcpu);
2924 		rc = 0;
2925 	}
2926 
2927 	if (rc == -EREMOTE) {
2928 		/* userspace support is needed, kvm_run has been prepared */
2929 		rc = 0;
2930 	}
2931 
2932 	disable_cpu_timer_accounting(vcpu);
2933 	store_regs(vcpu, kvm_run);
2934 
2935 	if (vcpu->sigset_active)
2936 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2937 
2938 	vcpu->stat.exit_userspace++;
2939 	return rc;
2940 }
2941 
2942 /*
2943  * store status at address
2944  * we use have two special cases:
2945  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2946  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2947  */
2948 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2949 {
2950 	unsigned char archmode = 1;
2951 	freg_t fprs[NUM_FPRS];
2952 	unsigned int px;
2953 	u64 clkcomp, cputm;
2954 	int rc;
2955 
2956 	px = kvm_s390_get_prefix(vcpu);
2957 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2958 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2959 			return -EFAULT;
2960 		gpa = 0;
2961 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2962 		if (write_guest_real(vcpu, 163, &archmode, 1))
2963 			return -EFAULT;
2964 		gpa = px;
2965 	} else
2966 		gpa -= __LC_FPREGS_SAVE_AREA;
2967 
2968 	/* manually convert vector registers if necessary */
2969 	if (MACHINE_HAS_VX) {
2970 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2971 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2972 				     fprs, 128);
2973 	} else {
2974 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2975 				     vcpu->run->s.regs.fprs, 128);
2976 	}
2977 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2978 			      vcpu->run->s.regs.gprs, 128);
2979 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2980 			      &vcpu->arch.sie_block->gpsw, 16);
2981 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2982 			      &px, 4);
2983 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2984 			      &vcpu->run->s.regs.fpc, 4);
2985 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2986 			      &vcpu->arch.sie_block->todpr, 4);
2987 	cputm = kvm_s390_get_cpu_timer(vcpu);
2988 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2989 			      &cputm, 8);
2990 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2991 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2992 			      &clkcomp, 8);
2993 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2994 			      &vcpu->run->s.regs.acrs, 64);
2995 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2996 			      &vcpu->arch.sie_block->gcr, 128);
2997 	return rc ? -EFAULT : 0;
2998 }
2999 
3000 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3001 {
3002 	/*
3003 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3004 	 * switch in the run ioctl. Let's update our copies before we save
3005 	 * it into the save area
3006 	 */
3007 	save_fpu_regs();
3008 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3009 	save_access_regs(vcpu->run->s.regs.acrs);
3010 
3011 	return kvm_s390_store_status_unloaded(vcpu, addr);
3012 }
3013 
3014 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3015 {
3016 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3017 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3018 }
3019 
3020 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3021 {
3022 	unsigned int i;
3023 	struct kvm_vcpu *vcpu;
3024 
3025 	kvm_for_each_vcpu(i, vcpu, kvm) {
3026 		__disable_ibs_on_vcpu(vcpu);
3027 	}
3028 }
3029 
3030 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3031 {
3032 	if (!sclp.has_ibs)
3033 		return;
3034 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3035 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3036 }
3037 
3038 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3039 {
3040 	int i, online_vcpus, started_vcpus = 0;
3041 
3042 	if (!is_vcpu_stopped(vcpu))
3043 		return;
3044 
3045 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3046 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3047 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3048 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3049 
3050 	for (i = 0; i < online_vcpus; i++) {
3051 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3052 			started_vcpus++;
3053 	}
3054 
3055 	if (started_vcpus == 0) {
3056 		/* we're the only active VCPU -> speed it up */
3057 		__enable_ibs_on_vcpu(vcpu);
3058 	} else if (started_vcpus == 1) {
3059 		/*
3060 		 * As we are starting a second VCPU, we have to disable
3061 		 * the IBS facility on all VCPUs to remove potentially
3062 		 * oustanding ENABLE requests.
3063 		 */
3064 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3065 	}
3066 
3067 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3068 	/*
3069 	 * Another VCPU might have used IBS while we were offline.
3070 	 * Let's play safe and flush the VCPU at startup.
3071 	 */
3072 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3073 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3074 	return;
3075 }
3076 
3077 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3078 {
3079 	int i, online_vcpus, started_vcpus = 0;
3080 	struct kvm_vcpu *started_vcpu = NULL;
3081 
3082 	if (is_vcpu_stopped(vcpu))
3083 		return;
3084 
3085 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3086 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3087 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3088 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3089 
3090 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3091 	kvm_s390_clear_stop_irq(vcpu);
3092 
3093 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3094 	__disable_ibs_on_vcpu(vcpu);
3095 
3096 	for (i = 0; i < online_vcpus; i++) {
3097 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3098 			started_vcpus++;
3099 			started_vcpu = vcpu->kvm->vcpus[i];
3100 		}
3101 	}
3102 
3103 	if (started_vcpus == 1) {
3104 		/*
3105 		 * As we only have one VCPU left, we want to enable the
3106 		 * IBS facility for that VCPU to speed it up.
3107 		 */
3108 		__enable_ibs_on_vcpu(started_vcpu);
3109 	}
3110 
3111 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3112 	return;
3113 }
3114 
3115 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3116 				     struct kvm_enable_cap *cap)
3117 {
3118 	int r;
3119 
3120 	if (cap->flags)
3121 		return -EINVAL;
3122 
3123 	switch (cap->cap) {
3124 	case KVM_CAP_S390_CSS_SUPPORT:
3125 		if (!vcpu->kvm->arch.css_support) {
3126 			vcpu->kvm->arch.css_support = 1;
3127 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3128 			trace_kvm_s390_enable_css(vcpu->kvm);
3129 		}
3130 		r = 0;
3131 		break;
3132 	default:
3133 		r = -EINVAL;
3134 		break;
3135 	}
3136 	return r;
3137 }
3138 
3139 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3140 				  struct kvm_s390_mem_op *mop)
3141 {
3142 	void __user *uaddr = (void __user *)mop->buf;
3143 	void *tmpbuf = NULL;
3144 	int r, srcu_idx;
3145 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3146 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3147 
3148 	if (mop->flags & ~supported_flags)
3149 		return -EINVAL;
3150 
3151 	if (mop->size > MEM_OP_MAX_SIZE)
3152 		return -E2BIG;
3153 
3154 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3155 		tmpbuf = vmalloc(mop->size);
3156 		if (!tmpbuf)
3157 			return -ENOMEM;
3158 	}
3159 
3160 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3161 
3162 	switch (mop->op) {
3163 	case KVM_S390_MEMOP_LOGICAL_READ:
3164 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3165 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3166 					    mop->size, GACC_FETCH);
3167 			break;
3168 		}
3169 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3170 		if (r == 0) {
3171 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3172 				r = -EFAULT;
3173 		}
3174 		break;
3175 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3176 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3177 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3178 					    mop->size, GACC_STORE);
3179 			break;
3180 		}
3181 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3182 			r = -EFAULT;
3183 			break;
3184 		}
3185 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3186 		break;
3187 	default:
3188 		r = -EINVAL;
3189 	}
3190 
3191 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3192 
3193 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3194 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3195 
3196 	vfree(tmpbuf);
3197 	return r;
3198 }
3199 
3200 long kvm_arch_vcpu_ioctl(struct file *filp,
3201 			 unsigned int ioctl, unsigned long arg)
3202 {
3203 	struct kvm_vcpu *vcpu = filp->private_data;
3204 	void __user *argp = (void __user *)arg;
3205 	int idx;
3206 	long r;
3207 
3208 	switch (ioctl) {
3209 	case KVM_S390_IRQ: {
3210 		struct kvm_s390_irq s390irq;
3211 
3212 		r = -EFAULT;
3213 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3214 			break;
3215 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3216 		break;
3217 	}
3218 	case KVM_S390_INTERRUPT: {
3219 		struct kvm_s390_interrupt s390int;
3220 		struct kvm_s390_irq s390irq;
3221 
3222 		r = -EFAULT;
3223 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3224 			break;
3225 		if (s390int_to_s390irq(&s390int, &s390irq))
3226 			return -EINVAL;
3227 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3228 		break;
3229 	}
3230 	case KVM_S390_STORE_STATUS:
3231 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3232 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3233 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3234 		break;
3235 	case KVM_S390_SET_INITIAL_PSW: {
3236 		psw_t psw;
3237 
3238 		r = -EFAULT;
3239 		if (copy_from_user(&psw, argp, sizeof(psw)))
3240 			break;
3241 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3242 		break;
3243 	}
3244 	case KVM_S390_INITIAL_RESET:
3245 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3246 		break;
3247 	case KVM_SET_ONE_REG:
3248 	case KVM_GET_ONE_REG: {
3249 		struct kvm_one_reg reg;
3250 		r = -EFAULT;
3251 		if (copy_from_user(&reg, argp, sizeof(reg)))
3252 			break;
3253 		if (ioctl == KVM_SET_ONE_REG)
3254 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3255 		else
3256 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3257 		break;
3258 	}
3259 #ifdef CONFIG_KVM_S390_UCONTROL
3260 	case KVM_S390_UCAS_MAP: {
3261 		struct kvm_s390_ucas_mapping ucasmap;
3262 
3263 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3264 			r = -EFAULT;
3265 			break;
3266 		}
3267 
3268 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3269 			r = -EINVAL;
3270 			break;
3271 		}
3272 
3273 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3274 				     ucasmap.vcpu_addr, ucasmap.length);
3275 		break;
3276 	}
3277 	case KVM_S390_UCAS_UNMAP: {
3278 		struct kvm_s390_ucas_mapping ucasmap;
3279 
3280 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3281 			r = -EFAULT;
3282 			break;
3283 		}
3284 
3285 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3286 			r = -EINVAL;
3287 			break;
3288 		}
3289 
3290 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3291 			ucasmap.length);
3292 		break;
3293 	}
3294 #endif
3295 	case KVM_S390_VCPU_FAULT: {
3296 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3297 		break;
3298 	}
3299 	case KVM_ENABLE_CAP:
3300 	{
3301 		struct kvm_enable_cap cap;
3302 		r = -EFAULT;
3303 		if (copy_from_user(&cap, argp, sizeof(cap)))
3304 			break;
3305 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3306 		break;
3307 	}
3308 	case KVM_S390_MEM_OP: {
3309 		struct kvm_s390_mem_op mem_op;
3310 
3311 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3312 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3313 		else
3314 			r = -EFAULT;
3315 		break;
3316 	}
3317 	case KVM_S390_SET_IRQ_STATE: {
3318 		struct kvm_s390_irq_state irq_state;
3319 
3320 		r = -EFAULT;
3321 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3322 			break;
3323 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3324 		    irq_state.len == 0 ||
3325 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3326 			r = -EINVAL;
3327 			break;
3328 		}
3329 		r = kvm_s390_set_irq_state(vcpu,
3330 					   (void __user *) irq_state.buf,
3331 					   irq_state.len);
3332 		break;
3333 	}
3334 	case KVM_S390_GET_IRQ_STATE: {
3335 		struct kvm_s390_irq_state irq_state;
3336 
3337 		r = -EFAULT;
3338 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3339 			break;
3340 		if (irq_state.len == 0) {
3341 			r = -EINVAL;
3342 			break;
3343 		}
3344 		r = kvm_s390_get_irq_state(vcpu,
3345 					   (__u8 __user *)  irq_state.buf,
3346 					   irq_state.len);
3347 		break;
3348 	}
3349 	default:
3350 		r = -ENOTTY;
3351 	}
3352 	return r;
3353 }
3354 
3355 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3356 {
3357 #ifdef CONFIG_KVM_S390_UCONTROL
3358 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3359 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3360 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3361 		get_page(vmf->page);
3362 		return 0;
3363 	}
3364 #endif
3365 	return VM_FAULT_SIGBUS;
3366 }
3367 
3368 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3369 			    unsigned long npages)
3370 {
3371 	return 0;
3372 }
3373 
3374 /* Section: memory related */
3375 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3376 				   struct kvm_memory_slot *memslot,
3377 				   const struct kvm_userspace_memory_region *mem,
3378 				   enum kvm_mr_change change)
3379 {
3380 	/* A few sanity checks. We can have memory slots which have to be
3381 	   located/ended at a segment boundary (1MB). The memory in userland is
3382 	   ok to be fragmented into various different vmas. It is okay to mmap()
3383 	   and munmap() stuff in this slot after doing this call at any time */
3384 
3385 	if (mem->userspace_addr & 0xffffful)
3386 		return -EINVAL;
3387 
3388 	if (mem->memory_size & 0xffffful)
3389 		return -EINVAL;
3390 
3391 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3392 		return -EINVAL;
3393 
3394 	return 0;
3395 }
3396 
3397 void kvm_arch_commit_memory_region(struct kvm *kvm,
3398 				const struct kvm_userspace_memory_region *mem,
3399 				const struct kvm_memory_slot *old,
3400 				const struct kvm_memory_slot *new,
3401 				enum kvm_mr_change change)
3402 {
3403 	int rc;
3404 
3405 	/* If the basics of the memslot do not change, we do not want
3406 	 * to update the gmap. Every update causes several unnecessary
3407 	 * segment translation exceptions. This is usually handled just
3408 	 * fine by the normal fault handler + gmap, but it will also
3409 	 * cause faults on the prefix page of running guest CPUs.
3410 	 */
3411 	if (old->userspace_addr == mem->userspace_addr &&
3412 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3413 	    old->npages * PAGE_SIZE == mem->memory_size)
3414 		return;
3415 
3416 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3417 		mem->guest_phys_addr, mem->memory_size);
3418 	if (rc)
3419 		pr_warn("failed to commit memory region\n");
3420 	return;
3421 }
3422 
3423 static inline unsigned long nonhyp_mask(int i)
3424 {
3425 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3426 
3427 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3428 }
3429 
3430 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3431 {
3432 	vcpu->valid_wakeup = false;
3433 }
3434 
3435 static int __init kvm_s390_init(void)
3436 {
3437 	int i;
3438 
3439 	if (!sclp.has_sief2) {
3440 		pr_info("SIE not available\n");
3441 		return -ENODEV;
3442 	}
3443 
3444 	for (i = 0; i < 16; i++)
3445 		kvm_s390_fac_list_mask[i] |=
3446 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3447 
3448 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3449 }
3450 
3451 static void __exit kvm_s390_exit(void)
3452 {
3453 	kvm_exit();
3454 }
3455 
3456 module_init(kvm_s390_init);
3457 module_exit(kvm_s390_exit);
3458 
3459 /*
3460  * Enable autoloading of the kvm module.
3461  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3462  * since x86 takes a different approach.
3463  */
3464 #include <linux/miscdevice.h>
3465 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3466 MODULE_ALIAS("devname:kvm");
3467