xref: /linux/arch/s390/kvm/kvm-s390.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/gmap.h>
34 #include <asm/nmi.h>
35 #include <asm/switch_to.h>
36 #include <asm/isc.h>
37 #include <asm/sclp.h>
38 #include "kvm-s390.h"
39 #include "gaccess.h"
40 
41 #define KMSG_COMPONENT "kvm-s390"
42 #undef pr_fmt
43 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 
45 #define CREATE_TRACE_POINTS
46 #include "trace.h"
47 #include "trace-s390.h"
48 
49 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
50 #define LOCAL_IRQS 32
51 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
52 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
53 
54 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 
56 struct kvm_stats_debugfs_item debugfs_entries[] = {
57 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
58 	{ "exit_null", VCPU_STAT(exit_null) },
59 	{ "exit_validity", VCPU_STAT(exit_validity) },
60 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
61 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
62 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
63 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
64 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
65 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
66 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
67 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
68 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
69 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
70 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
71 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
72 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
73 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
74 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
75 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
76 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
77 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
78 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
79 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
80 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
81 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
82 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
83 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
84 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
85 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
86 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
87 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
88 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
89 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
90 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
91 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
92 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
93 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
94 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
95 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
96 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
97 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
98 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
99 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
100 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
101 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
102 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
103 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
104 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
105 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
106 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
107 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
108 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
109 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
110 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
111 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
112 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
113 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
114 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
115 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
116 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
117 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
118 	{ NULL }
119 };
120 
121 /* upper facilities limit for kvm */
122 unsigned long kvm_s390_fac_list_mask[16] = {
123 	0xffe6000000000000UL,
124 	0x005e000000000000UL,
125 };
126 
127 unsigned long kvm_s390_fac_list_mask_size(void)
128 {
129 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
130 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 }
132 
133 static struct gmap_notifier gmap_notifier;
134 debug_info_t *kvm_s390_dbf;
135 
136 /* Section: not file related */
137 int kvm_arch_hardware_enable(void)
138 {
139 	/* every s390 is virtualization enabled ;-) */
140 	return 0;
141 }
142 
143 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 
145 /*
146  * This callback is executed during stop_machine(). All CPUs are therefore
147  * temporarily stopped. In order not to change guest behavior, we have to
148  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
149  * so a CPU won't be stopped while calculating with the epoch.
150  */
151 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
152 			  void *v)
153 {
154 	struct kvm *kvm;
155 	struct kvm_vcpu *vcpu;
156 	int i;
157 	unsigned long long *delta = v;
158 
159 	list_for_each_entry(kvm, &vm_list, vm_list) {
160 		kvm->arch.epoch -= *delta;
161 		kvm_for_each_vcpu(i, vcpu, kvm) {
162 			vcpu->arch.sie_block->epoch -= *delta;
163 			if (vcpu->arch.cputm_enabled)
164 				vcpu->arch.cputm_start += *delta;
165 		}
166 	}
167 	return NOTIFY_OK;
168 }
169 
170 static struct notifier_block kvm_clock_notifier = {
171 	.notifier_call = kvm_clock_sync,
172 };
173 
174 int kvm_arch_hardware_setup(void)
175 {
176 	gmap_notifier.notifier_call = kvm_gmap_notifier;
177 	gmap_register_ipte_notifier(&gmap_notifier);
178 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
179 				       &kvm_clock_notifier);
180 	return 0;
181 }
182 
183 void kvm_arch_hardware_unsetup(void)
184 {
185 	gmap_unregister_ipte_notifier(&gmap_notifier);
186 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
187 					 &kvm_clock_notifier);
188 }
189 
190 int kvm_arch_init(void *opaque)
191 {
192 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
193 	if (!kvm_s390_dbf)
194 		return -ENOMEM;
195 
196 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
197 		debug_unregister(kvm_s390_dbf);
198 		return -ENOMEM;
199 	}
200 
201 	/* Register floating interrupt controller interface. */
202 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
203 }
204 
205 void kvm_arch_exit(void)
206 {
207 	debug_unregister(kvm_s390_dbf);
208 }
209 
210 /* Section: device related */
211 long kvm_arch_dev_ioctl(struct file *filp,
212 			unsigned int ioctl, unsigned long arg)
213 {
214 	if (ioctl == KVM_S390_ENABLE_SIE)
215 		return s390_enable_sie();
216 	return -EINVAL;
217 }
218 
219 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 {
221 	int r;
222 
223 	switch (ext) {
224 	case KVM_CAP_S390_PSW:
225 	case KVM_CAP_S390_GMAP:
226 	case KVM_CAP_SYNC_MMU:
227 #ifdef CONFIG_KVM_S390_UCONTROL
228 	case KVM_CAP_S390_UCONTROL:
229 #endif
230 	case KVM_CAP_ASYNC_PF:
231 	case KVM_CAP_SYNC_REGS:
232 	case KVM_CAP_ONE_REG:
233 	case KVM_CAP_ENABLE_CAP:
234 	case KVM_CAP_S390_CSS_SUPPORT:
235 	case KVM_CAP_IOEVENTFD:
236 	case KVM_CAP_DEVICE_CTRL:
237 	case KVM_CAP_ENABLE_CAP_VM:
238 	case KVM_CAP_S390_IRQCHIP:
239 	case KVM_CAP_VM_ATTRIBUTES:
240 	case KVM_CAP_MP_STATE:
241 	case KVM_CAP_S390_INJECT_IRQ:
242 	case KVM_CAP_S390_USER_SIGP:
243 	case KVM_CAP_S390_USER_STSI:
244 	case KVM_CAP_S390_SKEYS:
245 	case KVM_CAP_S390_IRQ_STATE:
246 		r = 1;
247 		break;
248 	case KVM_CAP_S390_MEM_OP:
249 		r = MEM_OP_MAX_SIZE;
250 		break;
251 	case KVM_CAP_NR_VCPUS:
252 	case KVM_CAP_MAX_VCPUS:
253 		r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
254 				  : KVM_S390_BSCA_CPU_SLOTS;
255 		break;
256 	case KVM_CAP_NR_MEMSLOTS:
257 		r = KVM_USER_MEM_SLOTS;
258 		break;
259 	case KVM_CAP_S390_COW:
260 		r = MACHINE_HAS_ESOP;
261 		break;
262 	case KVM_CAP_S390_VECTOR_REGISTERS:
263 		r = MACHINE_HAS_VX;
264 		break;
265 	case KVM_CAP_S390_RI:
266 		r = test_facility(64);
267 		break;
268 	default:
269 		r = 0;
270 	}
271 	return r;
272 }
273 
274 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
275 					struct kvm_memory_slot *memslot)
276 {
277 	gfn_t cur_gfn, last_gfn;
278 	unsigned long address;
279 	struct gmap *gmap = kvm->arch.gmap;
280 
281 	/* Loop over all guest pages */
282 	last_gfn = memslot->base_gfn + memslot->npages;
283 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
284 		address = gfn_to_hva_memslot(memslot, cur_gfn);
285 
286 		if (test_and_clear_guest_dirty(gmap->mm, address))
287 			mark_page_dirty(kvm, cur_gfn);
288 		if (fatal_signal_pending(current))
289 			return;
290 		cond_resched();
291 	}
292 }
293 
294 /* Section: vm related */
295 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
296 
297 /*
298  * Get (and clear) the dirty memory log for a memory slot.
299  */
300 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
301 			       struct kvm_dirty_log *log)
302 {
303 	int r;
304 	unsigned long n;
305 	struct kvm_memslots *slots;
306 	struct kvm_memory_slot *memslot;
307 	int is_dirty = 0;
308 
309 	mutex_lock(&kvm->slots_lock);
310 
311 	r = -EINVAL;
312 	if (log->slot >= KVM_USER_MEM_SLOTS)
313 		goto out;
314 
315 	slots = kvm_memslots(kvm);
316 	memslot = id_to_memslot(slots, log->slot);
317 	r = -ENOENT;
318 	if (!memslot->dirty_bitmap)
319 		goto out;
320 
321 	kvm_s390_sync_dirty_log(kvm, memslot);
322 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
323 	if (r)
324 		goto out;
325 
326 	/* Clear the dirty log */
327 	if (is_dirty) {
328 		n = kvm_dirty_bitmap_bytes(memslot);
329 		memset(memslot->dirty_bitmap, 0, n);
330 	}
331 	r = 0;
332 out:
333 	mutex_unlock(&kvm->slots_lock);
334 	return r;
335 }
336 
337 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
338 {
339 	int r;
340 
341 	if (cap->flags)
342 		return -EINVAL;
343 
344 	switch (cap->cap) {
345 	case KVM_CAP_S390_IRQCHIP:
346 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
347 		kvm->arch.use_irqchip = 1;
348 		r = 0;
349 		break;
350 	case KVM_CAP_S390_USER_SIGP:
351 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
352 		kvm->arch.user_sigp = 1;
353 		r = 0;
354 		break;
355 	case KVM_CAP_S390_VECTOR_REGISTERS:
356 		mutex_lock(&kvm->lock);
357 		if (atomic_read(&kvm->online_vcpus)) {
358 			r = -EBUSY;
359 		} else if (MACHINE_HAS_VX) {
360 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
361 			set_kvm_facility(kvm->arch.model.fac_list, 129);
362 			r = 0;
363 		} else
364 			r = -EINVAL;
365 		mutex_unlock(&kvm->lock);
366 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
367 			 r ? "(not available)" : "(success)");
368 		break;
369 	case KVM_CAP_S390_RI:
370 		r = -EINVAL;
371 		mutex_lock(&kvm->lock);
372 		if (atomic_read(&kvm->online_vcpus)) {
373 			r = -EBUSY;
374 		} else if (test_facility(64)) {
375 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
376 			set_kvm_facility(kvm->arch.model.fac_list, 64);
377 			r = 0;
378 		}
379 		mutex_unlock(&kvm->lock);
380 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
381 			 r ? "(not available)" : "(success)");
382 		break;
383 	case KVM_CAP_S390_USER_STSI:
384 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
385 		kvm->arch.user_stsi = 1;
386 		r = 0;
387 		break;
388 	default:
389 		r = -EINVAL;
390 		break;
391 	}
392 	return r;
393 }
394 
395 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
396 {
397 	int ret;
398 
399 	switch (attr->attr) {
400 	case KVM_S390_VM_MEM_LIMIT_SIZE:
401 		ret = 0;
402 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
403 			 kvm->arch.mem_limit);
404 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
405 			ret = -EFAULT;
406 		break;
407 	default:
408 		ret = -ENXIO;
409 		break;
410 	}
411 	return ret;
412 }
413 
414 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
415 {
416 	int ret;
417 	unsigned int idx;
418 	switch (attr->attr) {
419 	case KVM_S390_VM_MEM_ENABLE_CMMA:
420 		/* enable CMMA only for z10 and later (EDAT_1) */
421 		ret = -EINVAL;
422 		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
423 			break;
424 
425 		ret = -EBUSY;
426 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
427 		mutex_lock(&kvm->lock);
428 		if (atomic_read(&kvm->online_vcpus) == 0) {
429 			kvm->arch.use_cmma = 1;
430 			ret = 0;
431 		}
432 		mutex_unlock(&kvm->lock);
433 		break;
434 	case KVM_S390_VM_MEM_CLR_CMMA:
435 		ret = -EINVAL;
436 		if (!kvm->arch.use_cmma)
437 			break;
438 
439 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
440 		mutex_lock(&kvm->lock);
441 		idx = srcu_read_lock(&kvm->srcu);
442 		s390_reset_cmma(kvm->arch.gmap->mm);
443 		srcu_read_unlock(&kvm->srcu, idx);
444 		mutex_unlock(&kvm->lock);
445 		ret = 0;
446 		break;
447 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
448 		unsigned long new_limit;
449 
450 		if (kvm_is_ucontrol(kvm))
451 			return -EINVAL;
452 
453 		if (get_user(new_limit, (u64 __user *)attr->addr))
454 			return -EFAULT;
455 
456 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
457 		    new_limit > kvm->arch.mem_limit)
458 			return -E2BIG;
459 
460 		if (!new_limit)
461 			return -EINVAL;
462 
463 		/* gmap_alloc takes last usable address */
464 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
465 			new_limit -= 1;
466 
467 		ret = -EBUSY;
468 		mutex_lock(&kvm->lock);
469 		if (atomic_read(&kvm->online_vcpus) == 0) {
470 			/* gmap_alloc will round the limit up */
471 			struct gmap *new = gmap_alloc(current->mm, new_limit);
472 
473 			if (!new) {
474 				ret = -ENOMEM;
475 			} else {
476 				gmap_free(kvm->arch.gmap);
477 				new->private = kvm;
478 				kvm->arch.gmap = new;
479 				ret = 0;
480 			}
481 		}
482 		mutex_unlock(&kvm->lock);
483 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
484 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
485 			 (void *) kvm->arch.gmap->asce);
486 		break;
487 	}
488 	default:
489 		ret = -ENXIO;
490 		break;
491 	}
492 	return ret;
493 }
494 
495 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
496 
497 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
498 {
499 	struct kvm_vcpu *vcpu;
500 	int i;
501 
502 	if (!test_kvm_facility(kvm, 76))
503 		return -EINVAL;
504 
505 	mutex_lock(&kvm->lock);
506 	switch (attr->attr) {
507 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
508 		get_random_bytes(
509 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
510 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
511 		kvm->arch.crypto.aes_kw = 1;
512 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
513 		break;
514 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
515 		get_random_bytes(
516 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
517 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
518 		kvm->arch.crypto.dea_kw = 1;
519 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
520 		break;
521 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
522 		kvm->arch.crypto.aes_kw = 0;
523 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
524 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
525 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
526 		break;
527 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
528 		kvm->arch.crypto.dea_kw = 0;
529 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
530 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
531 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
532 		break;
533 	default:
534 		mutex_unlock(&kvm->lock);
535 		return -ENXIO;
536 	}
537 
538 	kvm_for_each_vcpu(i, vcpu, kvm) {
539 		kvm_s390_vcpu_crypto_setup(vcpu);
540 		exit_sie(vcpu);
541 	}
542 	mutex_unlock(&kvm->lock);
543 	return 0;
544 }
545 
546 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
547 {
548 	u8 gtod_high;
549 
550 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
551 					   sizeof(gtod_high)))
552 		return -EFAULT;
553 
554 	if (gtod_high != 0)
555 		return -EINVAL;
556 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
557 
558 	return 0;
559 }
560 
561 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
562 {
563 	u64 gtod;
564 
565 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
566 		return -EFAULT;
567 
568 	kvm_s390_set_tod_clock(kvm, gtod);
569 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
570 	return 0;
571 }
572 
573 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
574 {
575 	int ret;
576 
577 	if (attr->flags)
578 		return -EINVAL;
579 
580 	switch (attr->attr) {
581 	case KVM_S390_VM_TOD_HIGH:
582 		ret = kvm_s390_set_tod_high(kvm, attr);
583 		break;
584 	case KVM_S390_VM_TOD_LOW:
585 		ret = kvm_s390_set_tod_low(kvm, attr);
586 		break;
587 	default:
588 		ret = -ENXIO;
589 		break;
590 	}
591 	return ret;
592 }
593 
594 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
595 {
596 	u8 gtod_high = 0;
597 
598 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
599 					 sizeof(gtod_high)))
600 		return -EFAULT;
601 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
602 
603 	return 0;
604 }
605 
606 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
607 {
608 	u64 gtod;
609 
610 	gtod = kvm_s390_get_tod_clock_fast(kvm);
611 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
612 		return -EFAULT;
613 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
614 
615 	return 0;
616 }
617 
618 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
619 {
620 	int ret;
621 
622 	if (attr->flags)
623 		return -EINVAL;
624 
625 	switch (attr->attr) {
626 	case KVM_S390_VM_TOD_HIGH:
627 		ret = kvm_s390_get_tod_high(kvm, attr);
628 		break;
629 	case KVM_S390_VM_TOD_LOW:
630 		ret = kvm_s390_get_tod_low(kvm, attr);
631 		break;
632 	default:
633 		ret = -ENXIO;
634 		break;
635 	}
636 	return ret;
637 }
638 
639 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
640 {
641 	struct kvm_s390_vm_cpu_processor *proc;
642 	u16 lowest_ibc, unblocked_ibc;
643 	int ret = 0;
644 
645 	mutex_lock(&kvm->lock);
646 	if (atomic_read(&kvm->online_vcpus)) {
647 		ret = -EBUSY;
648 		goto out;
649 	}
650 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
651 	if (!proc) {
652 		ret = -ENOMEM;
653 		goto out;
654 	}
655 	if (!copy_from_user(proc, (void __user *)attr->addr,
656 			    sizeof(*proc))) {
657 		kvm->arch.model.cpuid = proc->cpuid;
658 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
659 		unblocked_ibc = sclp.ibc & 0xfff;
660 		if (lowest_ibc) {
661 			if (proc->ibc > unblocked_ibc)
662 				kvm->arch.model.ibc = unblocked_ibc;
663 			else if (proc->ibc < lowest_ibc)
664 				kvm->arch.model.ibc = lowest_ibc;
665 			else
666 				kvm->arch.model.ibc = proc->ibc;
667 		}
668 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
669 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
670 	} else
671 		ret = -EFAULT;
672 	kfree(proc);
673 out:
674 	mutex_unlock(&kvm->lock);
675 	return ret;
676 }
677 
678 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
679 {
680 	int ret = -ENXIO;
681 
682 	switch (attr->attr) {
683 	case KVM_S390_VM_CPU_PROCESSOR:
684 		ret = kvm_s390_set_processor(kvm, attr);
685 		break;
686 	}
687 	return ret;
688 }
689 
690 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
691 {
692 	struct kvm_s390_vm_cpu_processor *proc;
693 	int ret = 0;
694 
695 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
696 	if (!proc) {
697 		ret = -ENOMEM;
698 		goto out;
699 	}
700 	proc->cpuid = kvm->arch.model.cpuid;
701 	proc->ibc = kvm->arch.model.ibc;
702 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
703 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
704 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
705 		ret = -EFAULT;
706 	kfree(proc);
707 out:
708 	return ret;
709 }
710 
711 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
712 {
713 	struct kvm_s390_vm_cpu_machine *mach;
714 	int ret = 0;
715 
716 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
717 	if (!mach) {
718 		ret = -ENOMEM;
719 		goto out;
720 	}
721 	get_cpu_id((struct cpuid *) &mach->cpuid);
722 	mach->ibc = sclp.ibc;
723 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
724 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
725 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
726 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
727 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
728 		ret = -EFAULT;
729 	kfree(mach);
730 out:
731 	return ret;
732 }
733 
734 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
735 {
736 	int ret = -ENXIO;
737 
738 	switch (attr->attr) {
739 	case KVM_S390_VM_CPU_PROCESSOR:
740 		ret = kvm_s390_get_processor(kvm, attr);
741 		break;
742 	case KVM_S390_VM_CPU_MACHINE:
743 		ret = kvm_s390_get_machine(kvm, attr);
744 		break;
745 	}
746 	return ret;
747 }
748 
749 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
750 {
751 	int ret;
752 
753 	switch (attr->group) {
754 	case KVM_S390_VM_MEM_CTRL:
755 		ret = kvm_s390_set_mem_control(kvm, attr);
756 		break;
757 	case KVM_S390_VM_TOD:
758 		ret = kvm_s390_set_tod(kvm, attr);
759 		break;
760 	case KVM_S390_VM_CPU_MODEL:
761 		ret = kvm_s390_set_cpu_model(kvm, attr);
762 		break;
763 	case KVM_S390_VM_CRYPTO:
764 		ret = kvm_s390_vm_set_crypto(kvm, attr);
765 		break;
766 	default:
767 		ret = -ENXIO;
768 		break;
769 	}
770 
771 	return ret;
772 }
773 
774 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
775 {
776 	int ret;
777 
778 	switch (attr->group) {
779 	case KVM_S390_VM_MEM_CTRL:
780 		ret = kvm_s390_get_mem_control(kvm, attr);
781 		break;
782 	case KVM_S390_VM_TOD:
783 		ret = kvm_s390_get_tod(kvm, attr);
784 		break;
785 	case KVM_S390_VM_CPU_MODEL:
786 		ret = kvm_s390_get_cpu_model(kvm, attr);
787 		break;
788 	default:
789 		ret = -ENXIO;
790 		break;
791 	}
792 
793 	return ret;
794 }
795 
796 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
797 {
798 	int ret;
799 
800 	switch (attr->group) {
801 	case KVM_S390_VM_MEM_CTRL:
802 		switch (attr->attr) {
803 		case KVM_S390_VM_MEM_ENABLE_CMMA:
804 		case KVM_S390_VM_MEM_CLR_CMMA:
805 		case KVM_S390_VM_MEM_LIMIT_SIZE:
806 			ret = 0;
807 			break;
808 		default:
809 			ret = -ENXIO;
810 			break;
811 		}
812 		break;
813 	case KVM_S390_VM_TOD:
814 		switch (attr->attr) {
815 		case KVM_S390_VM_TOD_LOW:
816 		case KVM_S390_VM_TOD_HIGH:
817 			ret = 0;
818 			break;
819 		default:
820 			ret = -ENXIO;
821 			break;
822 		}
823 		break;
824 	case KVM_S390_VM_CPU_MODEL:
825 		switch (attr->attr) {
826 		case KVM_S390_VM_CPU_PROCESSOR:
827 		case KVM_S390_VM_CPU_MACHINE:
828 			ret = 0;
829 			break;
830 		default:
831 			ret = -ENXIO;
832 			break;
833 		}
834 		break;
835 	case KVM_S390_VM_CRYPTO:
836 		switch (attr->attr) {
837 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
838 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
839 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
840 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
841 			ret = 0;
842 			break;
843 		default:
844 			ret = -ENXIO;
845 			break;
846 		}
847 		break;
848 	default:
849 		ret = -ENXIO;
850 		break;
851 	}
852 
853 	return ret;
854 }
855 
856 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
857 {
858 	uint8_t *keys;
859 	uint64_t hva;
860 	unsigned long curkey;
861 	int i, r = 0;
862 
863 	if (args->flags != 0)
864 		return -EINVAL;
865 
866 	/* Is this guest using storage keys? */
867 	if (!mm_use_skey(current->mm))
868 		return KVM_S390_GET_SKEYS_NONE;
869 
870 	/* Enforce sane limit on memory allocation */
871 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
872 		return -EINVAL;
873 
874 	keys = kmalloc_array(args->count, sizeof(uint8_t),
875 			     GFP_KERNEL | __GFP_NOWARN);
876 	if (!keys)
877 		keys = vmalloc(sizeof(uint8_t) * args->count);
878 	if (!keys)
879 		return -ENOMEM;
880 
881 	for (i = 0; i < args->count; i++) {
882 		hva = gfn_to_hva(kvm, args->start_gfn + i);
883 		if (kvm_is_error_hva(hva)) {
884 			r = -EFAULT;
885 			goto out;
886 		}
887 
888 		curkey = get_guest_storage_key(current->mm, hva);
889 		if (IS_ERR_VALUE(curkey)) {
890 			r = curkey;
891 			goto out;
892 		}
893 		keys[i] = curkey;
894 	}
895 
896 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
897 			 sizeof(uint8_t) * args->count);
898 	if (r)
899 		r = -EFAULT;
900 out:
901 	kvfree(keys);
902 	return r;
903 }
904 
905 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
906 {
907 	uint8_t *keys;
908 	uint64_t hva;
909 	int i, r = 0;
910 
911 	if (args->flags != 0)
912 		return -EINVAL;
913 
914 	/* Enforce sane limit on memory allocation */
915 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
916 		return -EINVAL;
917 
918 	keys = kmalloc_array(args->count, sizeof(uint8_t),
919 			     GFP_KERNEL | __GFP_NOWARN);
920 	if (!keys)
921 		keys = vmalloc(sizeof(uint8_t) * args->count);
922 	if (!keys)
923 		return -ENOMEM;
924 
925 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
926 			   sizeof(uint8_t) * args->count);
927 	if (r) {
928 		r = -EFAULT;
929 		goto out;
930 	}
931 
932 	/* Enable storage key handling for the guest */
933 	r = s390_enable_skey();
934 	if (r)
935 		goto out;
936 
937 	for (i = 0; i < args->count; i++) {
938 		hva = gfn_to_hva(kvm, args->start_gfn + i);
939 		if (kvm_is_error_hva(hva)) {
940 			r = -EFAULT;
941 			goto out;
942 		}
943 
944 		/* Lowest order bit is reserved */
945 		if (keys[i] & 0x01) {
946 			r = -EINVAL;
947 			goto out;
948 		}
949 
950 		r = set_guest_storage_key(current->mm, hva,
951 					  (unsigned long)keys[i], 0);
952 		if (r)
953 			goto out;
954 	}
955 out:
956 	kvfree(keys);
957 	return r;
958 }
959 
960 long kvm_arch_vm_ioctl(struct file *filp,
961 		       unsigned int ioctl, unsigned long arg)
962 {
963 	struct kvm *kvm = filp->private_data;
964 	void __user *argp = (void __user *)arg;
965 	struct kvm_device_attr attr;
966 	int r;
967 
968 	switch (ioctl) {
969 	case KVM_S390_INTERRUPT: {
970 		struct kvm_s390_interrupt s390int;
971 
972 		r = -EFAULT;
973 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
974 			break;
975 		r = kvm_s390_inject_vm(kvm, &s390int);
976 		break;
977 	}
978 	case KVM_ENABLE_CAP: {
979 		struct kvm_enable_cap cap;
980 		r = -EFAULT;
981 		if (copy_from_user(&cap, argp, sizeof(cap)))
982 			break;
983 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
984 		break;
985 	}
986 	case KVM_CREATE_IRQCHIP: {
987 		struct kvm_irq_routing_entry routing;
988 
989 		r = -EINVAL;
990 		if (kvm->arch.use_irqchip) {
991 			/* Set up dummy routing. */
992 			memset(&routing, 0, sizeof(routing));
993 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
994 		}
995 		break;
996 	}
997 	case KVM_SET_DEVICE_ATTR: {
998 		r = -EFAULT;
999 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1000 			break;
1001 		r = kvm_s390_vm_set_attr(kvm, &attr);
1002 		break;
1003 	}
1004 	case KVM_GET_DEVICE_ATTR: {
1005 		r = -EFAULT;
1006 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1007 			break;
1008 		r = kvm_s390_vm_get_attr(kvm, &attr);
1009 		break;
1010 	}
1011 	case KVM_HAS_DEVICE_ATTR: {
1012 		r = -EFAULT;
1013 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1014 			break;
1015 		r = kvm_s390_vm_has_attr(kvm, &attr);
1016 		break;
1017 	}
1018 	case KVM_S390_GET_SKEYS: {
1019 		struct kvm_s390_skeys args;
1020 
1021 		r = -EFAULT;
1022 		if (copy_from_user(&args, argp,
1023 				   sizeof(struct kvm_s390_skeys)))
1024 			break;
1025 		r = kvm_s390_get_skeys(kvm, &args);
1026 		break;
1027 	}
1028 	case KVM_S390_SET_SKEYS: {
1029 		struct kvm_s390_skeys args;
1030 
1031 		r = -EFAULT;
1032 		if (copy_from_user(&args, argp,
1033 				   sizeof(struct kvm_s390_skeys)))
1034 			break;
1035 		r = kvm_s390_set_skeys(kvm, &args);
1036 		break;
1037 	}
1038 	default:
1039 		r = -ENOTTY;
1040 	}
1041 
1042 	return r;
1043 }
1044 
1045 static int kvm_s390_query_ap_config(u8 *config)
1046 {
1047 	u32 fcn_code = 0x04000000UL;
1048 	u32 cc = 0;
1049 
1050 	memset(config, 0, 128);
1051 	asm volatile(
1052 		"lgr 0,%1\n"
1053 		"lgr 2,%2\n"
1054 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1055 		"0: ipm %0\n"
1056 		"srl %0,28\n"
1057 		"1:\n"
1058 		EX_TABLE(0b, 1b)
1059 		: "+r" (cc)
1060 		: "r" (fcn_code), "r" (config)
1061 		: "cc", "0", "2", "memory"
1062 	);
1063 
1064 	return cc;
1065 }
1066 
1067 static int kvm_s390_apxa_installed(void)
1068 {
1069 	u8 config[128];
1070 	int cc;
1071 
1072 	if (test_facility(12)) {
1073 		cc = kvm_s390_query_ap_config(config);
1074 
1075 		if (cc)
1076 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1077 		else
1078 			return config[0] & 0x40;
1079 	}
1080 
1081 	return 0;
1082 }
1083 
1084 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1085 {
1086 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1087 
1088 	if (kvm_s390_apxa_installed())
1089 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1090 	else
1091 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1092 }
1093 
1094 static u64 kvm_s390_get_initial_cpuid(void)
1095 {
1096 	struct cpuid cpuid;
1097 
1098 	get_cpu_id(&cpuid);
1099 	cpuid.version = 0xff;
1100 	return *((u64 *) &cpuid);
1101 }
1102 
1103 static void kvm_s390_crypto_init(struct kvm *kvm)
1104 {
1105 	if (!test_kvm_facility(kvm, 76))
1106 		return;
1107 
1108 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1109 	kvm_s390_set_crycb_format(kvm);
1110 
1111 	/* Enable AES/DEA protected key functions by default */
1112 	kvm->arch.crypto.aes_kw = 1;
1113 	kvm->arch.crypto.dea_kw = 1;
1114 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1115 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1116 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1117 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1118 }
1119 
1120 static void sca_dispose(struct kvm *kvm)
1121 {
1122 	if (kvm->arch.use_esca)
1123 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1124 	else
1125 		free_page((unsigned long)(kvm->arch.sca));
1126 	kvm->arch.sca = NULL;
1127 }
1128 
1129 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1130 {
1131 	int i, rc;
1132 	char debug_name[16];
1133 	static unsigned long sca_offset;
1134 
1135 	rc = -EINVAL;
1136 #ifdef CONFIG_KVM_S390_UCONTROL
1137 	if (type & ~KVM_VM_S390_UCONTROL)
1138 		goto out_err;
1139 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1140 		goto out_err;
1141 #else
1142 	if (type)
1143 		goto out_err;
1144 #endif
1145 
1146 	rc = s390_enable_sie();
1147 	if (rc)
1148 		goto out_err;
1149 
1150 	rc = -ENOMEM;
1151 
1152 	kvm->arch.use_esca = 0; /* start with basic SCA */
1153 	rwlock_init(&kvm->arch.sca_lock);
1154 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1155 	if (!kvm->arch.sca)
1156 		goto out_err;
1157 	spin_lock(&kvm_lock);
1158 	sca_offset += 16;
1159 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1160 		sca_offset = 0;
1161 	kvm->arch.sca = (struct bsca_block *)
1162 			((char *) kvm->arch.sca + sca_offset);
1163 	spin_unlock(&kvm_lock);
1164 
1165 	sprintf(debug_name, "kvm-%u", current->pid);
1166 
1167 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1168 	if (!kvm->arch.dbf)
1169 		goto out_err;
1170 
1171 	kvm->arch.sie_page2 =
1172 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1173 	if (!kvm->arch.sie_page2)
1174 		goto out_err;
1175 
1176 	/* Populate the facility mask initially. */
1177 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1178 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1179 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1180 		if (i < kvm_s390_fac_list_mask_size())
1181 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1182 		else
1183 			kvm->arch.model.fac_mask[i] = 0UL;
1184 	}
1185 
1186 	/* Populate the facility list initially. */
1187 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1188 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1189 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1190 
1191 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1192 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1193 
1194 	kvm_s390_crypto_init(kvm);
1195 
1196 	spin_lock_init(&kvm->arch.float_int.lock);
1197 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1198 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1199 	init_waitqueue_head(&kvm->arch.ipte_wq);
1200 	mutex_init(&kvm->arch.ipte_mutex);
1201 
1202 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1203 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1204 
1205 	if (type & KVM_VM_S390_UCONTROL) {
1206 		kvm->arch.gmap = NULL;
1207 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1208 	} else {
1209 		if (sclp.hamax == U64_MAX)
1210 			kvm->arch.mem_limit = TASK_MAX_SIZE;
1211 		else
1212 			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1213 						    sclp.hamax + 1);
1214 		kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1215 		if (!kvm->arch.gmap)
1216 			goto out_err;
1217 		kvm->arch.gmap->private = kvm;
1218 		kvm->arch.gmap->pfault_enabled = 0;
1219 	}
1220 
1221 	kvm->arch.css_support = 0;
1222 	kvm->arch.use_irqchip = 0;
1223 	kvm->arch.epoch = 0;
1224 
1225 	spin_lock_init(&kvm->arch.start_stop_lock);
1226 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1227 
1228 	return 0;
1229 out_err:
1230 	free_page((unsigned long)kvm->arch.sie_page2);
1231 	debug_unregister(kvm->arch.dbf);
1232 	sca_dispose(kvm);
1233 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1234 	return rc;
1235 }
1236 
1237 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1238 {
1239 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1240 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1241 	kvm_s390_clear_local_irqs(vcpu);
1242 	kvm_clear_async_pf_completion_queue(vcpu);
1243 	if (!kvm_is_ucontrol(vcpu->kvm))
1244 		sca_del_vcpu(vcpu);
1245 
1246 	if (kvm_is_ucontrol(vcpu->kvm))
1247 		gmap_free(vcpu->arch.gmap);
1248 
1249 	if (vcpu->kvm->arch.use_cmma)
1250 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1251 	free_page((unsigned long)(vcpu->arch.sie_block));
1252 
1253 	kvm_vcpu_uninit(vcpu);
1254 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1255 }
1256 
1257 static void kvm_free_vcpus(struct kvm *kvm)
1258 {
1259 	unsigned int i;
1260 	struct kvm_vcpu *vcpu;
1261 
1262 	kvm_for_each_vcpu(i, vcpu, kvm)
1263 		kvm_arch_vcpu_destroy(vcpu);
1264 
1265 	mutex_lock(&kvm->lock);
1266 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1267 		kvm->vcpus[i] = NULL;
1268 
1269 	atomic_set(&kvm->online_vcpus, 0);
1270 	mutex_unlock(&kvm->lock);
1271 }
1272 
1273 void kvm_arch_destroy_vm(struct kvm *kvm)
1274 {
1275 	kvm_free_vcpus(kvm);
1276 	sca_dispose(kvm);
1277 	debug_unregister(kvm->arch.dbf);
1278 	free_page((unsigned long)kvm->arch.sie_page2);
1279 	if (!kvm_is_ucontrol(kvm))
1280 		gmap_free(kvm->arch.gmap);
1281 	kvm_s390_destroy_adapters(kvm);
1282 	kvm_s390_clear_float_irqs(kvm);
1283 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1284 }
1285 
1286 /* Section: vcpu related */
1287 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1288 {
1289 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1290 	if (!vcpu->arch.gmap)
1291 		return -ENOMEM;
1292 	vcpu->arch.gmap->private = vcpu->kvm;
1293 
1294 	return 0;
1295 }
1296 
1297 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1298 {
1299 	read_lock(&vcpu->kvm->arch.sca_lock);
1300 	if (vcpu->kvm->arch.use_esca) {
1301 		struct esca_block *sca = vcpu->kvm->arch.sca;
1302 
1303 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1304 		sca->cpu[vcpu->vcpu_id].sda = 0;
1305 	} else {
1306 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1307 
1308 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1309 		sca->cpu[vcpu->vcpu_id].sda = 0;
1310 	}
1311 	read_unlock(&vcpu->kvm->arch.sca_lock);
1312 }
1313 
1314 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1315 {
1316 	read_lock(&vcpu->kvm->arch.sca_lock);
1317 	if (vcpu->kvm->arch.use_esca) {
1318 		struct esca_block *sca = vcpu->kvm->arch.sca;
1319 
1320 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1321 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1322 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1323 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1324 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1325 	} else {
1326 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1327 
1328 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1329 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1330 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1331 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1332 	}
1333 	read_unlock(&vcpu->kvm->arch.sca_lock);
1334 }
1335 
1336 /* Basic SCA to Extended SCA data copy routines */
1337 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1338 {
1339 	d->sda = s->sda;
1340 	d->sigp_ctrl.c = s->sigp_ctrl.c;
1341 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1342 }
1343 
1344 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1345 {
1346 	int i;
1347 
1348 	d->ipte_control = s->ipte_control;
1349 	d->mcn[0] = s->mcn;
1350 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1351 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1352 }
1353 
1354 static int sca_switch_to_extended(struct kvm *kvm)
1355 {
1356 	struct bsca_block *old_sca = kvm->arch.sca;
1357 	struct esca_block *new_sca;
1358 	struct kvm_vcpu *vcpu;
1359 	unsigned int vcpu_idx;
1360 	u32 scaol, scaoh;
1361 
1362 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1363 	if (!new_sca)
1364 		return -ENOMEM;
1365 
1366 	scaoh = (u32)((u64)(new_sca) >> 32);
1367 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
1368 
1369 	kvm_s390_vcpu_block_all(kvm);
1370 	write_lock(&kvm->arch.sca_lock);
1371 
1372 	sca_copy_b_to_e(new_sca, old_sca);
1373 
1374 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1375 		vcpu->arch.sie_block->scaoh = scaoh;
1376 		vcpu->arch.sie_block->scaol = scaol;
1377 		vcpu->arch.sie_block->ecb2 |= 0x04U;
1378 	}
1379 	kvm->arch.sca = new_sca;
1380 	kvm->arch.use_esca = 1;
1381 
1382 	write_unlock(&kvm->arch.sca_lock);
1383 	kvm_s390_vcpu_unblock_all(kvm);
1384 
1385 	free_page((unsigned long)old_sca);
1386 
1387 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1388 		 old_sca, kvm->arch.sca);
1389 	return 0;
1390 }
1391 
1392 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1393 {
1394 	int rc;
1395 
1396 	if (id < KVM_S390_BSCA_CPU_SLOTS)
1397 		return true;
1398 	if (!sclp.has_esca)
1399 		return false;
1400 
1401 	mutex_lock(&kvm->lock);
1402 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1403 	mutex_unlock(&kvm->lock);
1404 
1405 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1406 }
1407 
1408 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1409 {
1410 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1411 	kvm_clear_async_pf_completion_queue(vcpu);
1412 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1413 				    KVM_SYNC_GPRS |
1414 				    KVM_SYNC_ACRS |
1415 				    KVM_SYNC_CRS |
1416 				    KVM_SYNC_ARCH0 |
1417 				    KVM_SYNC_PFAULT;
1418 	if (test_kvm_facility(vcpu->kvm, 64))
1419 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1420 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
1421 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1422 	 */
1423 	if (MACHINE_HAS_VX)
1424 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1425 	else
1426 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1427 
1428 	if (kvm_is_ucontrol(vcpu->kvm))
1429 		return __kvm_ucontrol_vcpu_init(vcpu);
1430 
1431 	return 0;
1432 }
1433 
1434 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1435 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1436 {
1437 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1438 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1439 	vcpu->arch.cputm_start = get_tod_clock_fast();
1440 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1441 }
1442 
1443 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1444 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1445 {
1446 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1447 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1448 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1449 	vcpu->arch.cputm_start = 0;
1450 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1451 }
1452 
1453 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1454 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1455 {
1456 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1457 	vcpu->arch.cputm_enabled = true;
1458 	__start_cpu_timer_accounting(vcpu);
1459 }
1460 
1461 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1462 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1463 {
1464 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1465 	__stop_cpu_timer_accounting(vcpu);
1466 	vcpu->arch.cputm_enabled = false;
1467 }
1468 
1469 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1470 {
1471 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1472 	__enable_cpu_timer_accounting(vcpu);
1473 	preempt_enable();
1474 }
1475 
1476 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1477 {
1478 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1479 	__disable_cpu_timer_accounting(vcpu);
1480 	preempt_enable();
1481 }
1482 
1483 /* set the cpu timer - may only be called from the VCPU thread itself */
1484 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1485 {
1486 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1487 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1488 	if (vcpu->arch.cputm_enabled)
1489 		vcpu->arch.cputm_start = get_tod_clock_fast();
1490 	vcpu->arch.sie_block->cputm = cputm;
1491 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1492 	preempt_enable();
1493 }
1494 
1495 /* update and get the cpu timer - can also be called from other VCPU threads */
1496 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1497 {
1498 	unsigned int seq;
1499 	__u64 value;
1500 
1501 	if (unlikely(!vcpu->arch.cputm_enabled))
1502 		return vcpu->arch.sie_block->cputm;
1503 
1504 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1505 	do {
1506 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1507 		/*
1508 		 * If the writer would ever execute a read in the critical
1509 		 * section, e.g. in irq context, we have a deadlock.
1510 		 */
1511 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1512 		value = vcpu->arch.sie_block->cputm;
1513 		/* if cputm_start is 0, accounting is being started/stopped */
1514 		if (likely(vcpu->arch.cputm_start))
1515 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1516 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1517 	preempt_enable();
1518 	return value;
1519 }
1520 
1521 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1522 {
1523 	/* Save host register state */
1524 	save_fpu_regs();
1525 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1526 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1527 
1528 	if (MACHINE_HAS_VX)
1529 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1530 	else
1531 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1532 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1533 	if (test_fp_ctl(current->thread.fpu.fpc))
1534 		/* User space provided an invalid FPC, let's clear it */
1535 		current->thread.fpu.fpc = 0;
1536 
1537 	save_access_regs(vcpu->arch.host_acrs);
1538 	restore_access_regs(vcpu->run->s.regs.acrs);
1539 	gmap_enable(vcpu->arch.gmap);
1540 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1541 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1542 		__start_cpu_timer_accounting(vcpu);
1543 	vcpu->cpu = cpu;
1544 }
1545 
1546 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1547 {
1548 	vcpu->cpu = -1;
1549 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1550 		__stop_cpu_timer_accounting(vcpu);
1551 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1552 	gmap_disable(vcpu->arch.gmap);
1553 
1554 	/* Save guest register state */
1555 	save_fpu_regs();
1556 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1557 
1558 	/* Restore host register state */
1559 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1560 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1561 
1562 	save_access_regs(vcpu->run->s.regs.acrs);
1563 	restore_access_regs(vcpu->arch.host_acrs);
1564 }
1565 
1566 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1567 {
1568 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1569 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1570 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1571 	kvm_s390_set_prefix(vcpu, 0);
1572 	kvm_s390_set_cpu_timer(vcpu, 0);
1573 	vcpu->arch.sie_block->ckc       = 0UL;
1574 	vcpu->arch.sie_block->todpr     = 0;
1575 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1576 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1577 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1578 	/* make sure the new fpc will be lazily loaded */
1579 	save_fpu_regs();
1580 	current->thread.fpu.fpc = 0;
1581 	vcpu->arch.sie_block->gbea = 1;
1582 	vcpu->arch.sie_block->pp = 0;
1583 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1584 	kvm_clear_async_pf_completion_queue(vcpu);
1585 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1586 		kvm_s390_vcpu_stop(vcpu);
1587 	kvm_s390_clear_local_irqs(vcpu);
1588 }
1589 
1590 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1591 {
1592 	mutex_lock(&vcpu->kvm->lock);
1593 	preempt_disable();
1594 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1595 	preempt_enable();
1596 	mutex_unlock(&vcpu->kvm->lock);
1597 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1598 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1599 		sca_add_vcpu(vcpu);
1600 	}
1601 
1602 }
1603 
1604 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1605 {
1606 	if (!test_kvm_facility(vcpu->kvm, 76))
1607 		return;
1608 
1609 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1610 
1611 	if (vcpu->kvm->arch.crypto.aes_kw)
1612 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1613 	if (vcpu->kvm->arch.crypto.dea_kw)
1614 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1615 
1616 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1617 }
1618 
1619 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1620 {
1621 	free_page(vcpu->arch.sie_block->cbrlo);
1622 	vcpu->arch.sie_block->cbrlo = 0;
1623 }
1624 
1625 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1626 {
1627 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1628 	if (!vcpu->arch.sie_block->cbrlo)
1629 		return -ENOMEM;
1630 
1631 	vcpu->arch.sie_block->ecb2 |= 0x80;
1632 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1633 	return 0;
1634 }
1635 
1636 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1637 {
1638 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1639 
1640 	vcpu->arch.sie_block->ibc = model->ibc;
1641 	if (test_kvm_facility(vcpu->kvm, 7))
1642 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1643 }
1644 
1645 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1646 {
1647 	int rc = 0;
1648 
1649 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1650 						    CPUSTAT_SM |
1651 						    CPUSTAT_STOPPED);
1652 
1653 	if (test_kvm_facility(vcpu->kvm, 78))
1654 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1655 	else if (test_kvm_facility(vcpu->kvm, 8))
1656 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1657 
1658 	kvm_s390_vcpu_setup_model(vcpu);
1659 
1660 	vcpu->arch.sie_block->ecb = 0x02;
1661 	if (test_kvm_facility(vcpu->kvm, 9))
1662 		vcpu->arch.sie_block->ecb |= 0x04;
1663 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1664 		vcpu->arch.sie_block->ecb |= 0x10;
1665 
1666 	if (test_kvm_facility(vcpu->kvm, 8))
1667 		vcpu->arch.sie_block->ecb2 |= 0x08;
1668 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1669 	if (sclp.has_siif)
1670 		vcpu->arch.sie_block->eca |= 1;
1671 	if (sclp.has_sigpif)
1672 		vcpu->arch.sie_block->eca |= 0x10000000U;
1673 	if (test_kvm_facility(vcpu->kvm, 64))
1674 		vcpu->arch.sie_block->ecb3 |= 0x01;
1675 	if (test_kvm_facility(vcpu->kvm, 129)) {
1676 		vcpu->arch.sie_block->eca |= 0x00020000;
1677 		vcpu->arch.sie_block->ecd |= 0x20000000;
1678 	}
1679 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1680 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1681 
1682 	if (vcpu->kvm->arch.use_cmma) {
1683 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1684 		if (rc)
1685 			return rc;
1686 	}
1687 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1688 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1689 
1690 	kvm_s390_vcpu_crypto_setup(vcpu);
1691 
1692 	return rc;
1693 }
1694 
1695 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1696 				      unsigned int id)
1697 {
1698 	struct kvm_vcpu *vcpu;
1699 	struct sie_page *sie_page;
1700 	int rc = -EINVAL;
1701 
1702 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1703 		goto out;
1704 
1705 	rc = -ENOMEM;
1706 
1707 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1708 	if (!vcpu)
1709 		goto out;
1710 
1711 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1712 	if (!sie_page)
1713 		goto out_free_cpu;
1714 
1715 	vcpu->arch.sie_block = &sie_page->sie_block;
1716 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1717 
1718 	vcpu->arch.sie_block->icpua = id;
1719 	spin_lock_init(&vcpu->arch.local_int.lock);
1720 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1721 	vcpu->arch.local_int.wq = &vcpu->wq;
1722 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1723 	seqcount_init(&vcpu->arch.cputm_seqcount);
1724 
1725 	rc = kvm_vcpu_init(vcpu, kvm, id);
1726 	if (rc)
1727 		goto out_free_sie_block;
1728 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1729 		 vcpu->arch.sie_block);
1730 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1731 
1732 	return vcpu;
1733 out_free_sie_block:
1734 	free_page((unsigned long)(vcpu->arch.sie_block));
1735 out_free_cpu:
1736 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1737 out:
1738 	return ERR_PTR(rc);
1739 }
1740 
1741 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1742 {
1743 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1744 }
1745 
1746 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1747 {
1748 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1749 	exit_sie(vcpu);
1750 }
1751 
1752 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1753 {
1754 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1755 }
1756 
1757 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1758 {
1759 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1760 	exit_sie(vcpu);
1761 }
1762 
1763 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1764 {
1765 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1766 }
1767 
1768 /*
1769  * Kick a guest cpu out of SIE and wait until SIE is not running.
1770  * If the CPU is not running (e.g. waiting as idle) the function will
1771  * return immediately. */
1772 void exit_sie(struct kvm_vcpu *vcpu)
1773 {
1774 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1775 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1776 		cpu_relax();
1777 }
1778 
1779 /* Kick a guest cpu out of SIE to process a request synchronously */
1780 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1781 {
1782 	kvm_make_request(req, vcpu);
1783 	kvm_s390_vcpu_request(vcpu);
1784 }
1785 
1786 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1787 {
1788 	int i;
1789 	struct kvm *kvm = gmap->private;
1790 	struct kvm_vcpu *vcpu;
1791 
1792 	kvm_for_each_vcpu(i, vcpu, kvm) {
1793 		/* match against both prefix pages */
1794 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1795 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1796 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1797 		}
1798 	}
1799 }
1800 
1801 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1802 {
1803 	/* kvm common code refers to this, but never calls it */
1804 	BUG();
1805 	return 0;
1806 }
1807 
1808 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1809 					   struct kvm_one_reg *reg)
1810 {
1811 	int r = -EINVAL;
1812 
1813 	switch (reg->id) {
1814 	case KVM_REG_S390_TODPR:
1815 		r = put_user(vcpu->arch.sie_block->todpr,
1816 			     (u32 __user *)reg->addr);
1817 		break;
1818 	case KVM_REG_S390_EPOCHDIFF:
1819 		r = put_user(vcpu->arch.sie_block->epoch,
1820 			     (u64 __user *)reg->addr);
1821 		break;
1822 	case KVM_REG_S390_CPU_TIMER:
1823 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
1824 			     (u64 __user *)reg->addr);
1825 		break;
1826 	case KVM_REG_S390_CLOCK_COMP:
1827 		r = put_user(vcpu->arch.sie_block->ckc,
1828 			     (u64 __user *)reg->addr);
1829 		break;
1830 	case KVM_REG_S390_PFTOKEN:
1831 		r = put_user(vcpu->arch.pfault_token,
1832 			     (u64 __user *)reg->addr);
1833 		break;
1834 	case KVM_REG_S390_PFCOMPARE:
1835 		r = put_user(vcpu->arch.pfault_compare,
1836 			     (u64 __user *)reg->addr);
1837 		break;
1838 	case KVM_REG_S390_PFSELECT:
1839 		r = put_user(vcpu->arch.pfault_select,
1840 			     (u64 __user *)reg->addr);
1841 		break;
1842 	case KVM_REG_S390_PP:
1843 		r = put_user(vcpu->arch.sie_block->pp,
1844 			     (u64 __user *)reg->addr);
1845 		break;
1846 	case KVM_REG_S390_GBEA:
1847 		r = put_user(vcpu->arch.sie_block->gbea,
1848 			     (u64 __user *)reg->addr);
1849 		break;
1850 	default:
1851 		break;
1852 	}
1853 
1854 	return r;
1855 }
1856 
1857 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1858 					   struct kvm_one_reg *reg)
1859 {
1860 	int r = -EINVAL;
1861 	__u64 val;
1862 
1863 	switch (reg->id) {
1864 	case KVM_REG_S390_TODPR:
1865 		r = get_user(vcpu->arch.sie_block->todpr,
1866 			     (u32 __user *)reg->addr);
1867 		break;
1868 	case KVM_REG_S390_EPOCHDIFF:
1869 		r = get_user(vcpu->arch.sie_block->epoch,
1870 			     (u64 __user *)reg->addr);
1871 		break;
1872 	case KVM_REG_S390_CPU_TIMER:
1873 		r = get_user(val, (u64 __user *)reg->addr);
1874 		if (!r)
1875 			kvm_s390_set_cpu_timer(vcpu, val);
1876 		break;
1877 	case KVM_REG_S390_CLOCK_COMP:
1878 		r = get_user(vcpu->arch.sie_block->ckc,
1879 			     (u64 __user *)reg->addr);
1880 		break;
1881 	case KVM_REG_S390_PFTOKEN:
1882 		r = get_user(vcpu->arch.pfault_token,
1883 			     (u64 __user *)reg->addr);
1884 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1885 			kvm_clear_async_pf_completion_queue(vcpu);
1886 		break;
1887 	case KVM_REG_S390_PFCOMPARE:
1888 		r = get_user(vcpu->arch.pfault_compare,
1889 			     (u64 __user *)reg->addr);
1890 		break;
1891 	case KVM_REG_S390_PFSELECT:
1892 		r = get_user(vcpu->arch.pfault_select,
1893 			     (u64 __user *)reg->addr);
1894 		break;
1895 	case KVM_REG_S390_PP:
1896 		r = get_user(vcpu->arch.sie_block->pp,
1897 			     (u64 __user *)reg->addr);
1898 		break;
1899 	case KVM_REG_S390_GBEA:
1900 		r = get_user(vcpu->arch.sie_block->gbea,
1901 			     (u64 __user *)reg->addr);
1902 		break;
1903 	default:
1904 		break;
1905 	}
1906 
1907 	return r;
1908 }
1909 
1910 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1911 {
1912 	kvm_s390_vcpu_initial_reset(vcpu);
1913 	return 0;
1914 }
1915 
1916 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1917 {
1918 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1919 	return 0;
1920 }
1921 
1922 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1923 {
1924 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1925 	return 0;
1926 }
1927 
1928 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1929 				  struct kvm_sregs *sregs)
1930 {
1931 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1932 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1933 	restore_access_regs(vcpu->run->s.regs.acrs);
1934 	return 0;
1935 }
1936 
1937 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1938 				  struct kvm_sregs *sregs)
1939 {
1940 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1941 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1942 	return 0;
1943 }
1944 
1945 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1946 {
1947 	/* make sure the new values will be lazily loaded */
1948 	save_fpu_regs();
1949 	if (test_fp_ctl(fpu->fpc))
1950 		return -EINVAL;
1951 	current->thread.fpu.fpc = fpu->fpc;
1952 	if (MACHINE_HAS_VX)
1953 		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1954 	else
1955 		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1956 	return 0;
1957 }
1958 
1959 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1960 {
1961 	/* make sure we have the latest values */
1962 	save_fpu_regs();
1963 	if (MACHINE_HAS_VX)
1964 		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1965 	else
1966 		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1967 	fpu->fpc = current->thread.fpu.fpc;
1968 	return 0;
1969 }
1970 
1971 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1972 {
1973 	int rc = 0;
1974 
1975 	if (!is_vcpu_stopped(vcpu))
1976 		rc = -EBUSY;
1977 	else {
1978 		vcpu->run->psw_mask = psw.mask;
1979 		vcpu->run->psw_addr = psw.addr;
1980 	}
1981 	return rc;
1982 }
1983 
1984 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1985 				  struct kvm_translation *tr)
1986 {
1987 	return -EINVAL; /* not implemented yet */
1988 }
1989 
1990 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1991 			      KVM_GUESTDBG_USE_HW_BP | \
1992 			      KVM_GUESTDBG_ENABLE)
1993 
1994 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1995 					struct kvm_guest_debug *dbg)
1996 {
1997 	int rc = 0;
1998 
1999 	vcpu->guest_debug = 0;
2000 	kvm_s390_clear_bp_data(vcpu);
2001 
2002 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2003 		return -EINVAL;
2004 
2005 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2006 		vcpu->guest_debug = dbg->control;
2007 		/* enforce guest PER */
2008 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2009 
2010 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2011 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2012 	} else {
2013 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2014 		vcpu->arch.guestdbg.last_bp = 0;
2015 	}
2016 
2017 	if (rc) {
2018 		vcpu->guest_debug = 0;
2019 		kvm_s390_clear_bp_data(vcpu);
2020 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2021 	}
2022 
2023 	return rc;
2024 }
2025 
2026 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2027 				    struct kvm_mp_state *mp_state)
2028 {
2029 	/* CHECK_STOP and LOAD are not supported yet */
2030 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2031 				       KVM_MP_STATE_OPERATING;
2032 }
2033 
2034 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2035 				    struct kvm_mp_state *mp_state)
2036 {
2037 	int rc = 0;
2038 
2039 	/* user space knows about this interface - let it control the state */
2040 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2041 
2042 	switch (mp_state->mp_state) {
2043 	case KVM_MP_STATE_STOPPED:
2044 		kvm_s390_vcpu_stop(vcpu);
2045 		break;
2046 	case KVM_MP_STATE_OPERATING:
2047 		kvm_s390_vcpu_start(vcpu);
2048 		break;
2049 	case KVM_MP_STATE_LOAD:
2050 	case KVM_MP_STATE_CHECK_STOP:
2051 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2052 	default:
2053 		rc = -ENXIO;
2054 	}
2055 
2056 	return rc;
2057 }
2058 
2059 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2060 {
2061 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2062 }
2063 
2064 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2065 {
2066 retry:
2067 	kvm_s390_vcpu_request_handled(vcpu);
2068 	if (!vcpu->requests)
2069 		return 0;
2070 	/*
2071 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2072 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2073 	 * This ensures that the ipte instruction for this request has
2074 	 * already finished. We might race against a second unmapper that
2075 	 * wants to set the blocking bit. Lets just retry the request loop.
2076 	 */
2077 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2078 		int rc;
2079 		rc = gmap_ipte_notify(vcpu->arch.gmap,
2080 				      kvm_s390_get_prefix(vcpu),
2081 				      PAGE_SIZE * 2);
2082 		if (rc)
2083 			return rc;
2084 		goto retry;
2085 	}
2086 
2087 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2088 		vcpu->arch.sie_block->ihcpu = 0xffff;
2089 		goto retry;
2090 	}
2091 
2092 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2093 		if (!ibs_enabled(vcpu)) {
2094 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2095 			atomic_or(CPUSTAT_IBS,
2096 					&vcpu->arch.sie_block->cpuflags);
2097 		}
2098 		goto retry;
2099 	}
2100 
2101 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2102 		if (ibs_enabled(vcpu)) {
2103 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2104 			atomic_andnot(CPUSTAT_IBS,
2105 					  &vcpu->arch.sie_block->cpuflags);
2106 		}
2107 		goto retry;
2108 	}
2109 
2110 	/* nothing to do, just clear the request */
2111 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2112 
2113 	return 0;
2114 }
2115 
2116 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2117 {
2118 	struct kvm_vcpu *vcpu;
2119 	int i;
2120 
2121 	mutex_lock(&kvm->lock);
2122 	preempt_disable();
2123 	kvm->arch.epoch = tod - get_tod_clock();
2124 	kvm_s390_vcpu_block_all(kvm);
2125 	kvm_for_each_vcpu(i, vcpu, kvm)
2126 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2127 	kvm_s390_vcpu_unblock_all(kvm);
2128 	preempt_enable();
2129 	mutex_unlock(&kvm->lock);
2130 }
2131 
2132 /**
2133  * kvm_arch_fault_in_page - fault-in guest page if necessary
2134  * @vcpu: The corresponding virtual cpu
2135  * @gpa: Guest physical address
2136  * @writable: Whether the page should be writable or not
2137  *
2138  * Make sure that a guest page has been faulted-in on the host.
2139  *
2140  * Return: Zero on success, negative error code otherwise.
2141  */
2142 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2143 {
2144 	return gmap_fault(vcpu->arch.gmap, gpa,
2145 			  writable ? FAULT_FLAG_WRITE : 0);
2146 }
2147 
2148 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2149 				      unsigned long token)
2150 {
2151 	struct kvm_s390_interrupt inti;
2152 	struct kvm_s390_irq irq;
2153 
2154 	if (start_token) {
2155 		irq.u.ext.ext_params2 = token;
2156 		irq.type = KVM_S390_INT_PFAULT_INIT;
2157 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2158 	} else {
2159 		inti.type = KVM_S390_INT_PFAULT_DONE;
2160 		inti.parm64 = token;
2161 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2162 	}
2163 }
2164 
2165 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2166 				     struct kvm_async_pf *work)
2167 {
2168 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2169 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2170 }
2171 
2172 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2173 				 struct kvm_async_pf *work)
2174 {
2175 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2176 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2177 }
2178 
2179 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2180 			       struct kvm_async_pf *work)
2181 {
2182 	/* s390 will always inject the page directly */
2183 }
2184 
2185 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2186 {
2187 	/*
2188 	 * s390 will always inject the page directly,
2189 	 * but we still want check_async_completion to cleanup
2190 	 */
2191 	return true;
2192 }
2193 
2194 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2195 {
2196 	hva_t hva;
2197 	struct kvm_arch_async_pf arch;
2198 	int rc;
2199 
2200 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2201 		return 0;
2202 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2203 	    vcpu->arch.pfault_compare)
2204 		return 0;
2205 	if (psw_extint_disabled(vcpu))
2206 		return 0;
2207 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2208 		return 0;
2209 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2210 		return 0;
2211 	if (!vcpu->arch.gmap->pfault_enabled)
2212 		return 0;
2213 
2214 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2215 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2216 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2217 		return 0;
2218 
2219 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2220 	return rc;
2221 }
2222 
2223 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2224 {
2225 	int rc, cpuflags;
2226 
2227 	/*
2228 	 * On s390 notifications for arriving pages will be delivered directly
2229 	 * to the guest but the house keeping for completed pfaults is
2230 	 * handled outside the worker.
2231 	 */
2232 	kvm_check_async_pf_completion(vcpu);
2233 
2234 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2235 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2236 
2237 	if (need_resched())
2238 		schedule();
2239 
2240 	if (test_cpu_flag(CIF_MCCK_PENDING))
2241 		s390_handle_mcck();
2242 
2243 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2244 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2245 		if (rc)
2246 			return rc;
2247 	}
2248 
2249 	rc = kvm_s390_handle_requests(vcpu);
2250 	if (rc)
2251 		return rc;
2252 
2253 	if (guestdbg_enabled(vcpu)) {
2254 		kvm_s390_backup_guest_per_regs(vcpu);
2255 		kvm_s390_patch_guest_per_regs(vcpu);
2256 	}
2257 
2258 	vcpu->arch.sie_block->icptcode = 0;
2259 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2260 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2261 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2262 
2263 	return 0;
2264 }
2265 
2266 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2267 {
2268 	struct kvm_s390_pgm_info pgm_info = {
2269 		.code = PGM_ADDRESSING,
2270 	};
2271 	u8 opcode, ilen;
2272 	int rc;
2273 
2274 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2275 	trace_kvm_s390_sie_fault(vcpu);
2276 
2277 	/*
2278 	 * We want to inject an addressing exception, which is defined as a
2279 	 * suppressing or terminating exception. However, since we came here
2280 	 * by a DAT access exception, the PSW still points to the faulting
2281 	 * instruction since DAT exceptions are nullifying. So we've got
2282 	 * to look up the current opcode to get the length of the instruction
2283 	 * to be able to forward the PSW.
2284 	 */
2285 	rc = read_guest_instr(vcpu, &opcode, 1);
2286 	ilen = insn_length(opcode);
2287 	if (rc < 0) {
2288 		return rc;
2289 	} else if (rc) {
2290 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
2291 		 * Forward by arbitrary ilc, injection will take care of
2292 		 * nullification if necessary.
2293 		 */
2294 		pgm_info = vcpu->arch.pgm;
2295 		ilen = 4;
2296 	}
2297 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2298 	kvm_s390_forward_psw(vcpu, ilen);
2299 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2300 }
2301 
2302 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2303 {
2304 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2305 		   vcpu->arch.sie_block->icptcode);
2306 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2307 
2308 	if (guestdbg_enabled(vcpu))
2309 		kvm_s390_restore_guest_per_regs(vcpu);
2310 
2311 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2312 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2313 
2314 	if (vcpu->arch.sie_block->icptcode > 0) {
2315 		int rc = kvm_handle_sie_intercept(vcpu);
2316 
2317 		if (rc != -EOPNOTSUPP)
2318 			return rc;
2319 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2320 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2321 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2322 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2323 		return -EREMOTE;
2324 	} else if (exit_reason != -EFAULT) {
2325 		vcpu->stat.exit_null++;
2326 		return 0;
2327 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2328 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2329 		vcpu->run->s390_ucontrol.trans_exc_code =
2330 						current->thread.gmap_addr;
2331 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2332 		return -EREMOTE;
2333 	} else if (current->thread.gmap_pfault) {
2334 		trace_kvm_s390_major_guest_pfault(vcpu);
2335 		current->thread.gmap_pfault = 0;
2336 		if (kvm_arch_setup_async_pf(vcpu))
2337 			return 0;
2338 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2339 	}
2340 	return vcpu_post_run_fault_in_sie(vcpu);
2341 }
2342 
2343 static int __vcpu_run(struct kvm_vcpu *vcpu)
2344 {
2345 	int rc, exit_reason;
2346 
2347 	/*
2348 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2349 	 * ning the guest), so that memslots (and other stuff) are protected
2350 	 */
2351 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2352 
2353 	do {
2354 		rc = vcpu_pre_run(vcpu);
2355 		if (rc)
2356 			break;
2357 
2358 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2359 		/*
2360 		 * As PF_VCPU will be used in fault handler, between
2361 		 * guest_enter and guest_exit should be no uaccess.
2362 		 */
2363 		local_irq_disable();
2364 		__kvm_guest_enter();
2365 		__disable_cpu_timer_accounting(vcpu);
2366 		local_irq_enable();
2367 		exit_reason = sie64a(vcpu->arch.sie_block,
2368 				     vcpu->run->s.regs.gprs);
2369 		local_irq_disable();
2370 		__enable_cpu_timer_accounting(vcpu);
2371 		__kvm_guest_exit();
2372 		local_irq_enable();
2373 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2374 
2375 		rc = vcpu_post_run(vcpu, exit_reason);
2376 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2377 
2378 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2379 	return rc;
2380 }
2381 
2382 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2383 {
2384 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2385 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2386 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2387 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2388 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2389 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2390 		/* some control register changes require a tlb flush */
2391 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2392 	}
2393 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2394 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2395 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2396 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2397 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2398 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2399 	}
2400 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2401 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2402 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2403 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2404 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2405 			kvm_clear_async_pf_completion_queue(vcpu);
2406 	}
2407 	kvm_run->kvm_dirty_regs = 0;
2408 }
2409 
2410 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2411 {
2412 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2413 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2414 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2415 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2416 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2417 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2418 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2419 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2420 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2421 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2422 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2423 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2424 }
2425 
2426 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2427 {
2428 	int rc;
2429 	sigset_t sigsaved;
2430 
2431 	if (guestdbg_exit_pending(vcpu)) {
2432 		kvm_s390_prepare_debug_exit(vcpu);
2433 		return 0;
2434 	}
2435 
2436 	if (vcpu->sigset_active)
2437 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2438 
2439 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2440 		kvm_s390_vcpu_start(vcpu);
2441 	} else if (is_vcpu_stopped(vcpu)) {
2442 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2443 				   vcpu->vcpu_id);
2444 		return -EINVAL;
2445 	}
2446 
2447 	sync_regs(vcpu, kvm_run);
2448 	enable_cpu_timer_accounting(vcpu);
2449 
2450 	might_fault();
2451 	rc = __vcpu_run(vcpu);
2452 
2453 	if (signal_pending(current) && !rc) {
2454 		kvm_run->exit_reason = KVM_EXIT_INTR;
2455 		rc = -EINTR;
2456 	}
2457 
2458 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2459 		kvm_s390_prepare_debug_exit(vcpu);
2460 		rc = 0;
2461 	}
2462 
2463 	if (rc == -EREMOTE) {
2464 		/* userspace support is needed, kvm_run has been prepared */
2465 		rc = 0;
2466 	}
2467 
2468 	disable_cpu_timer_accounting(vcpu);
2469 	store_regs(vcpu, kvm_run);
2470 
2471 	if (vcpu->sigset_active)
2472 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2473 
2474 	vcpu->stat.exit_userspace++;
2475 	return rc;
2476 }
2477 
2478 /*
2479  * store status at address
2480  * we use have two special cases:
2481  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2482  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2483  */
2484 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2485 {
2486 	unsigned char archmode = 1;
2487 	freg_t fprs[NUM_FPRS];
2488 	unsigned int px;
2489 	u64 clkcomp, cputm;
2490 	int rc;
2491 
2492 	px = kvm_s390_get_prefix(vcpu);
2493 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2494 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2495 			return -EFAULT;
2496 		gpa = 0;
2497 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2498 		if (write_guest_real(vcpu, 163, &archmode, 1))
2499 			return -EFAULT;
2500 		gpa = px;
2501 	} else
2502 		gpa -= __LC_FPREGS_SAVE_AREA;
2503 
2504 	/* manually convert vector registers if necessary */
2505 	if (MACHINE_HAS_VX) {
2506 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2507 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2508 				     fprs, 128);
2509 	} else {
2510 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2511 				     vcpu->run->s.regs.fprs, 128);
2512 	}
2513 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2514 			      vcpu->run->s.regs.gprs, 128);
2515 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2516 			      &vcpu->arch.sie_block->gpsw, 16);
2517 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2518 			      &px, 4);
2519 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2520 			      &vcpu->run->s.regs.fpc, 4);
2521 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2522 			      &vcpu->arch.sie_block->todpr, 4);
2523 	cputm = kvm_s390_get_cpu_timer(vcpu);
2524 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2525 			      &cputm, 8);
2526 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2527 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2528 			      &clkcomp, 8);
2529 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2530 			      &vcpu->run->s.regs.acrs, 64);
2531 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2532 			      &vcpu->arch.sie_block->gcr, 128);
2533 	return rc ? -EFAULT : 0;
2534 }
2535 
2536 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2537 {
2538 	/*
2539 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2540 	 * copying in vcpu load/put. Lets update our copies before we save
2541 	 * it into the save area
2542 	 */
2543 	save_fpu_regs();
2544 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2545 	save_access_regs(vcpu->run->s.regs.acrs);
2546 
2547 	return kvm_s390_store_status_unloaded(vcpu, addr);
2548 }
2549 
2550 /*
2551  * store additional status at address
2552  */
2553 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2554 					unsigned long gpa)
2555 {
2556 	/* Only bits 0-53 are used for address formation */
2557 	if (!(gpa & ~0x3ff))
2558 		return 0;
2559 
2560 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2561 			       (void *)&vcpu->run->s.regs.vrs, 512);
2562 }
2563 
2564 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2565 {
2566 	if (!test_kvm_facility(vcpu->kvm, 129))
2567 		return 0;
2568 
2569 	/*
2570 	 * The guest VXRS are in the host VXRs due to the lazy
2571 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2572 	 * to save the current register state because we are in the
2573 	 * middle of a load/put cycle.
2574 	 *
2575 	 * Let's update our copies before we save it into the save area.
2576 	 */
2577 	save_fpu_regs();
2578 
2579 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2580 }
2581 
2582 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2583 {
2584 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2585 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2586 }
2587 
2588 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2589 {
2590 	unsigned int i;
2591 	struct kvm_vcpu *vcpu;
2592 
2593 	kvm_for_each_vcpu(i, vcpu, kvm) {
2594 		__disable_ibs_on_vcpu(vcpu);
2595 	}
2596 }
2597 
2598 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2599 {
2600 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2601 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2602 }
2603 
2604 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2605 {
2606 	int i, online_vcpus, started_vcpus = 0;
2607 
2608 	if (!is_vcpu_stopped(vcpu))
2609 		return;
2610 
2611 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2612 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2613 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2614 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2615 
2616 	for (i = 0; i < online_vcpus; i++) {
2617 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2618 			started_vcpus++;
2619 	}
2620 
2621 	if (started_vcpus == 0) {
2622 		/* we're the only active VCPU -> speed it up */
2623 		__enable_ibs_on_vcpu(vcpu);
2624 	} else if (started_vcpus == 1) {
2625 		/*
2626 		 * As we are starting a second VCPU, we have to disable
2627 		 * the IBS facility on all VCPUs to remove potentially
2628 		 * oustanding ENABLE requests.
2629 		 */
2630 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2631 	}
2632 
2633 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2634 	/*
2635 	 * Another VCPU might have used IBS while we were offline.
2636 	 * Let's play safe and flush the VCPU at startup.
2637 	 */
2638 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2639 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2640 	return;
2641 }
2642 
2643 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2644 {
2645 	int i, online_vcpus, started_vcpus = 0;
2646 	struct kvm_vcpu *started_vcpu = NULL;
2647 
2648 	if (is_vcpu_stopped(vcpu))
2649 		return;
2650 
2651 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2652 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2653 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2654 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2655 
2656 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2657 	kvm_s390_clear_stop_irq(vcpu);
2658 
2659 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2660 	__disable_ibs_on_vcpu(vcpu);
2661 
2662 	for (i = 0; i < online_vcpus; i++) {
2663 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2664 			started_vcpus++;
2665 			started_vcpu = vcpu->kvm->vcpus[i];
2666 		}
2667 	}
2668 
2669 	if (started_vcpus == 1) {
2670 		/*
2671 		 * As we only have one VCPU left, we want to enable the
2672 		 * IBS facility for that VCPU to speed it up.
2673 		 */
2674 		__enable_ibs_on_vcpu(started_vcpu);
2675 	}
2676 
2677 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2678 	return;
2679 }
2680 
2681 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2682 				     struct kvm_enable_cap *cap)
2683 {
2684 	int r;
2685 
2686 	if (cap->flags)
2687 		return -EINVAL;
2688 
2689 	switch (cap->cap) {
2690 	case KVM_CAP_S390_CSS_SUPPORT:
2691 		if (!vcpu->kvm->arch.css_support) {
2692 			vcpu->kvm->arch.css_support = 1;
2693 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2694 			trace_kvm_s390_enable_css(vcpu->kvm);
2695 		}
2696 		r = 0;
2697 		break;
2698 	default:
2699 		r = -EINVAL;
2700 		break;
2701 	}
2702 	return r;
2703 }
2704 
2705 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2706 				  struct kvm_s390_mem_op *mop)
2707 {
2708 	void __user *uaddr = (void __user *)mop->buf;
2709 	void *tmpbuf = NULL;
2710 	int r, srcu_idx;
2711 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2712 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2713 
2714 	if (mop->flags & ~supported_flags)
2715 		return -EINVAL;
2716 
2717 	if (mop->size > MEM_OP_MAX_SIZE)
2718 		return -E2BIG;
2719 
2720 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2721 		tmpbuf = vmalloc(mop->size);
2722 		if (!tmpbuf)
2723 			return -ENOMEM;
2724 	}
2725 
2726 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2727 
2728 	switch (mop->op) {
2729 	case KVM_S390_MEMOP_LOGICAL_READ:
2730 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2731 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2732 					    mop->size, GACC_FETCH);
2733 			break;
2734 		}
2735 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2736 		if (r == 0) {
2737 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2738 				r = -EFAULT;
2739 		}
2740 		break;
2741 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2742 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2743 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2744 					    mop->size, GACC_STORE);
2745 			break;
2746 		}
2747 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2748 			r = -EFAULT;
2749 			break;
2750 		}
2751 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2752 		break;
2753 	default:
2754 		r = -EINVAL;
2755 	}
2756 
2757 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2758 
2759 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2760 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2761 
2762 	vfree(tmpbuf);
2763 	return r;
2764 }
2765 
2766 long kvm_arch_vcpu_ioctl(struct file *filp,
2767 			 unsigned int ioctl, unsigned long arg)
2768 {
2769 	struct kvm_vcpu *vcpu = filp->private_data;
2770 	void __user *argp = (void __user *)arg;
2771 	int idx;
2772 	long r;
2773 
2774 	switch (ioctl) {
2775 	case KVM_S390_IRQ: {
2776 		struct kvm_s390_irq s390irq;
2777 
2778 		r = -EFAULT;
2779 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2780 			break;
2781 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2782 		break;
2783 	}
2784 	case KVM_S390_INTERRUPT: {
2785 		struct kvm_s390_interrupt s390int;
2786 		struct kvm_s390_irq s390irq;
2787 
2788 		r = -EFAULT;
2789 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2790 			break;
2791 		if (s390int_to_s390irq(&s390int, &s390irq))
2792 			return -EINVAL;
2793 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2794 		break;
2795 	}
2796 	case KVM_S390_STORE_STATUS:
2797 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2798 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2799 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2800 		break;
2801 	case KVM_S390_SET_INITIAL_PSW: {
2802 		psw_t psw;
2803 
2804 		r = -EFAULT;
2805 		if (copy_from_user(&psw, argp, sizeof(psw)))
2806 			break;
2807 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2808 		break;
2809 	}
2810 	case KVM_S390_INITIAL_RESET:
2811 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2812 		break;
2813 	case KVM_SET_ONE_REG:
2814 	case KVM_GET_ONE_REG: {
2815 		struct kvm_one_reg reg;
2816 		r = -EFAULT;
2817 		if (copy_from_user(&reg, argp, sizeof(reg)))
2818 			break;
2819 		if (ioctl == KVM_SET_ONE_REG)
2820 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2821 		else
2822 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2823 		break;
2824 	}
2825 #ifdef CONFIG_KVM_S390_UCONTROL
2826 	case KVM_S390_UCAS_MAP: {
2827 		struct kvm_s390_ucas_mapping ucasmap;
2828 
2829 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2830 			r = -EFAULT;
2831 			break;
2832 		}
2833 
2834 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2835 			r = -EINVAL;
2836 			break;
2837 		}
2838 
2839 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2840 				     ucasmap.vcpu_addr, ucasmap.length);
2841 		break;
2842 	}
2843 	case KVM_S390_UCAS_UNMAP: {
2844 		struct kvm_s390_ucas_mapping ucasmap;
2845 
2846 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2847 			r = -EFAULT;
2848 			break;
2849 		}
2850 
2851 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2852 			r = -EINVAL;
2853 			break;
2854 		}
2855 
2856 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2857 			ucasmap.length);
2858 		break;
2859 	}
2860 #endif
2861 	case KVM_S390_VCPU_FAULT: {
2862 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2863 		break;
2864 	}
2865 	case KVM_ENABLE_CAP:
2866 	{
2867 		struct kvm_enable_cap cap;
2868 		r = -EFAULT;
2869 		if (copy_from_user(&cap, argp, sizeof(cap)))
2870 			break;
2871 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2872 		break;
2873 	}
2874 	case KVM_S390_MEM_OP: {
2875 		struct kvm_s390_mem_op mem_op;
2876 
2877 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2878 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2879 		else
2880 			r = -EFAULT;
2881 		break;
2882 	}
2883 	case KVM_S390_SET_IRQ_STATE: {
2884 		struct kvm_s390_irq_state irq_state;
2885 
2886 		r = -EFAULT;
2887 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2888 			break;
2889 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2890 		    irq_state.len == 0 ||
2891 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2892 			r = -EINVAL;
2893 			break;
2894 		}
2895 		r = kvm_s390_set_irq_state(vcpu,
2896 					   (void __user *) irq_state.buf,
2897 					   irq_state.len);
2898 		break;
2899 	}
2900 	case KVM_S390_GET_IRQ_STATE: {
2901 		struct kvm_s390_irq_state irq_state;
2902 
2903 		r = -EFAULT;
2904 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2905 			break;
2906 		if (irq_state.len == 0) {
2907 			r = -EINVAL;
2908 			break;
2909 		}
2910 		r = kvm_s390_get_irq_state(vcpu,
2911 					   (__u8 __user *)  irq_state.buf,
2912 					   irq_state.len);
2913 		break;
2914 	}
2915 	default:
2916 		r = -ENOTTY;
2917 	}
2918 	return r;
2919 }
2920 
2921 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2922 {
2923 #ifdef CONFIG_KVM_S390_UCONTROL
2924 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2925 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2926 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2927 		get_page(vmf->page);
2928 		return 0;
2929 	}
2930 #endif
2931 	return VM_FAULT_SIGBUS;
2932 }
2933 
2934 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2935 			    unsigned long npages)
2936 {
2937 	return 0;
2938 }
2939 
2940 /* Section: memory related */
2941 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2942 				   struct kvm_memory_slot *memslot,
2943 				   const struct kvm_userspace_memory_region *mem,
2944 				   enum kvm_mr_change change)
2945 {
2946 	/* A few sanity checks. We can have memory slots which have to be
2947 	   located/ended at a segment boundary (1MB). The memory in userland is
2948 	   ok to be fragmented into various different vmas. It is okay to mmap()
2949 	   and munmap() stuff in this slot after doing this call at any time */
2950 
2951 	if (mem->userspace_addr & 0xffffful)
2952 		return -EINVAL;
2953 
2954 	if (mem->memory_size & 0xffffful)
2955 		return -EINVAL;
2956 
2957 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2958 		return -EINVAL;
2959 
2960 	return 0;
2961 }
2962 
2963 void kvm_arch_commit_memory_region(struct kvm *kvm,
2964 				const struct kvm_userspace_memory_region *mem,
2965 				const struct kvm_memory_slot *old,
2966 				const struct kvm_memory_slot *new,
2967 				enum kvm_mr_change change)
2968 {
2969 	int rc;
2970 
2971 	/* If the basics of the memslot do not change, we do not want
2972 	 * to update the gmap. Every update causes several unnecessary
2973 	 * segment translation exceptions. This is usually handled just
2974 	 * fine by the normal fault handler + gmap, but it will also
2975 	 * cause faults on the prefix page of running guest CPUs.
2976 	 */
2977 	if (old->userspace_addr == mem->userspace_addr &&
2978 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2979 	    old->npages * PAGE_SIZE == mem->memory_size)
2980 		return;
2981 
2982 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2983 		mem->guest_phys_addr, mem->memory_size);
2984 	if (rc)
2985 		pr_warn("failed to commit memory region\n");
2986 	return;
2987 }
2988 
2989 static inline unsigned long nonhyp_mask(int i)
2990 {
2991 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
2992 
2993 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
2994 }
2995 
2996 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
2997 {
2998 	vcpu->valid_wakeup = false;
2999 }
3000 
3001 static int __init kvm_s390_init(void)
3002 {
3003 	int i;
3004 
3005 	if (!sclp.has_sief2) {
3006 		pr_info("SIE not available\n");
3007 		return -ENODEV;
3008 	}
3009 
3010 	for (i = 0; i < 16; i++)
3011 		kvm_s390_fac_list_mask[i] |=
3012 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3013 
3014 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3015 }
3016 
3017 static void __exit kvm_s390_exit(void)
3018 {
3019 	kvm_exit();
3020 }
3021 
3022 module_init(kvm_s390_init);
3023 module_exit(kvm_s390_exit);
3024 
3025 /*
3026  * Enable autoloading of the kvm module.
3027  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3028  * since x86 takes a different approach.
3029  */
3030 #include <linux/miscdevice.h>
3031 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3032 MODULE_ALIAS("devname:kvm");
3033