xref: /linux/arch/s390/kvm/kvm-s390.c (revision cf2f33a4e54096f90652cca3511fd6a456ea5abe)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39 
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43 
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47 
48 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
52 
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54 
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
57 	{ "exit_null", VCPU_STAT(exit_null) },
58 	{ "exit_validity", VCPU_STAT(exit_validity) },
59 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
61 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
63 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
67 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
68 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
69 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
70 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
71 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
72 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
73 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
74 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
75 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
76 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
77 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
78 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
79 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
80 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
81 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
82 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
83 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
84 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
85 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
86 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
87 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
88 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
89 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
90 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
91 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
92 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
93 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
94 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
95 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
96 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
97 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
98 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
99 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
100 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
101 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
102 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
103 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
104 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
105 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
106 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
107 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
108 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
109 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
110 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
111 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
112 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
113 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
114 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
115 	{ NULL }
116 };
117 
118 /* upper facilities limit for kvm */
119 unsigned long kvm_s390_fac_list_mask[] = {
120 	0xffe6fffbfcfdfc40UL,
121 	0x005e800000000000UL,
122 };
123 
124 unsigned long kvm_s390_fac_list_mask_size(void)
125 {
126 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
127 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
128 }
129 
130 static struct gmap_notifier gmap_notifier;
131 debug_info_t *kvm_s390_dbf;
132 
133 /* Section: not file related */
134 int kvm_arch_hardware_enable(void)
135 {
136 	/* every s390 is virtualization enabled ;-) */
137 	return 0;
138 }
139 
140 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
141 
142 /*
143  * This callback is executed during stop_machine(). All CPUs are therefore
144  * temporarily stopped. In order not to change guest behavior, we have to
145  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
146  * so a CPU won't be stopped while calculating with the epoch.
147  */
148 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
149 			  void *v)
150 {
151 	struct kvm *kvm;
152 	struct kvm_vcpu *vcpu;
153 	int i;
154 	unsigned long long *delta = v;
155 
156 	list_for_each_entry(kvm, &vm_list, vm_list) {
157 		kvm->arch.epoch -= *delta;
158 		kvm_for_each_vcpu(i, vcpu, kvm) {
159 			vcpu->arch.sie_block->epoch -= *delta;
160 		}
161 	}
162 	return NOTIFY_OK;
163 }
164 
165 static struct notifier_block kvm_clock_notifier = {
166 	.notifier_call = kvm_clock_sync,
167 };
168 
169 int kvm_arch_hardware_setup(void)
170 {
171 	gmap_notifier.notifier_call = kvm_gmap_notifier;
172 	gmap_register_ipte_notifier(&gmap_notifier);
173 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
174 				       &kvm_clock_notifier);
175 	return 0;
176 }
177 
178 void kvm_arch_hardware_unsetup(void)
179 {
180 	gmap_unregister_ipte_notifier(&gmap_notifier);
181 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
182 					 &kvm_clock_notifier);
183 }
184 
185 int kvm_arch_init(void *opaque)
186 {
187 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
188 	if (!kvm_s390_dbf)
189 		return -ENOMEM;
190 
191 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
192 		debug_unregister(kvm_s390_dbf);
193 		return -ENOMEM;
194 	}
195 
196 	/* Register floating interrupt controller interface. */
197 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
198 }
199 
200 void kvm_arch_exit(void)
201 {
202 	debug_unregister(kvm_s390_dbf);
203 }
204 
205 /* Section: device related */
206 long kvm_arch_dev_ioctl(struct file *filp,
207 			unsigned int ioctl, unsigned long arg)
208 {
209 	if (ioctl == KVM_S390_ENABLE_SIE)
210 		return s390_enable_sie();
211 	return -EINVAL;
212 }
213 
214 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
215 {
216 	int r;
217 
218 	switch (ext) {
219 	case KVM_CAP_S390_PSW:
220 	case KVM_CAP_S390_GMAP:
221 	case KVM_CAP_SYNC_MMU:
222 #ifdef CONFIG_KVM_S390_UCONTROL
223 	case KVM_CAP_S390_UCONTROL:
224 #endif
225 	case KVM_CAP_ASYNC_PF:
226 	case KVM_CAP_SYNC_REGS:
227 	case KVM_CAP_ONE_REG:
228 	case KVM_CAP_ENABLE_CAP:
229 	case KVM_CAP_S390_CSS_SUPPORT:
230 	case KVM_CAP_IOEVENTFD:
231 	case KVM_CAP_DEVICE_CTRL:
232 	case KVM_CAP_ENABLE_CAP_VM:
233 	case KVM_CAP_S390_IRQCHIP:
234 	case KVM_CAP_VM_ATTRIBUTES:
235 	case KVM_CAP_MP_STATE:
236 	case KVM_CAP_S390_INJECT_IRQ:
237 	case KVM_CAP_S390_USER_SIGP:
238 	case KVM_CAP_S390_USER_STSI:
239 	case KVM_CAP_S390_SKEYS:
240 	case KVM_CAP_S390_IRQ_STATE:
241 		r = 1;
242 		break;
243 	case KVM_CAP_S390_MEM_OP:
244 		r = MEM_OP_MAX_SIZE;
245 		break;
246 	case KVM_CAP_NR_VCPUS:
247 	case KVM_CAP_MAX_VCPUS:
248 		r = KVM_MAX_VCPUS;
249 		break;
250 	case KVM_CAP_NR_MEMSLOTS:
251 		r = KVM_USER_MEM_SLOTS;
252 		break;
253 	case KVM_CAP_S390_COW:
254 		r = MACHINE_HAS_ESOP;
255 		break;
256 	case KVM_CAP_S390_VECTOR_REGISTERS:
257 		r = MACHINE_HAS_VX;
258 		break;
259 	default:
260 		r = 0;
261 	}
262 	return r;
263 }
264 
265 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
266 					struct kvm_memory_slot *memslot)
267 {
268 	gfn_t cur_gfn, last_gfn;
269 	unsigned long address;
270 	struct gmap *gmap = kvm->arch.gmap;
271 
272 	down_read(&gmap->mm->mmap_sem);
273 	/* Loop over all guest pages */
274 	last_gfn = memslot->base_gfn + memslot->npages;
275 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
276 		address = gfn_to_hva_memslot(memslot, cur_gfn);
277 
278 		if (gmap_test_and_clear_dirty(address, gmap))
279 			mark_page_dirty(kvm, cur_gfn);
280 	}
281 	up_read(&gmap->mm->mmap_sem);
282 }
283 
284 /* Section: vm related */
285 /*
286  * Get (and clear) the dirty memory log for a memory slot.
287  */
288 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
289 			       struct kvm_dirty_log *log)
290 {
291 	int r;
292 	unsigned long n;
293 	struct kvm_memslots *slots;
294 	struct kvm_memory_slot *memslot;
295 	int is_dirty = 0;
296 
297 	mutex_lock(&kvm->slots_lock);
298 
299 	r = -EINVAL;
300 	if (log->slot >= KVM_USER_MEM_SLOTS)
301 		goto out;
302 
303 	slots = kvm_memslots(kvm);
304 	memslot = id_to_memslot(slots, log->slot);
305 	r = -ENOENT;
306 	if (!memslot->dirty_bitmap)
307 		goto out;
308 
309 	kvm_s390_sync_dirty_log(kvm, memslot);
310 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
311 	if (r)
312 		goto out;
313 
314 	/* Clear the dirty log */
315 	if (is_dirty) {
316 		n = kvm_dirty_bitmap_bytes(memslot);
317 		memset(memslot->dirty_bitmap, 0, n);
318 	}
319 	r = 0;
320 out:
321 	mutex_unlock(&kvm->slots_lock);
322 	return r;
323 }
324 
325 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
326 {
327 	int r;
328 
329 	if (cap->flags)
330 		return -EINVAL;
331 
332 	switch (cap->cap) {
333 	case KVM_CAP_S390_IRQCHIP:
334 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
335 		kvm->arch.use_irqchip = 1;
336 		r = 0;
337 		break;
338 	case KVM_CAP_S390_USER_SIGP:
339 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
340 		kvm->arch.user_sigp = 1;
341 		r = 0;
342 		break;
343 	case KVM_CAP_S390_VECTOR_REGISTERS:
344 		if (MACHINE_HAS_VX) {
345 			set_kvm_facility(kvm->arch.model.fac->mask, 129);
346 			set_kvm_facility(kvm->arch.model.fac->list, 129);
347 			r = 0;
348 		} else
349 			r = -EINVAL;
350 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
351 			 r ? "(not available)" : "(success)");
352 		break;
353 	case KVM_CAP_S390_USER_STSI:
354 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
355 		kvm->arch.user_stsi = 1;
356 		r = 0;
357 		break;
358 	default:
359 		r = -EINVAL;
360 		break;
361 	}
362 	return r;
363 }
364 
365 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
366 {
367 	int ret;
368 
369 	switch (attr->attr) {
370 	case KVM_S390_VM_MEM_LIMIT_SIZE:
371 		ret = 0;
372 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
373 			 kvm->arch.gmap->asce_end);
374 		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
375 			ret = -EFAULT;
376 		break;
377 	default:
378 		ret = -ENXIO;
379 		break;
380 	}
381 	return ret;
382 }
383 
384 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
385 {
386 	int ret;
387 	unsigned int idx;
388 	switch (attr->attr) {
389 	case KVM_S390_VM_MEM_ENABLE_CMMA:
390 		/* enable CMMA only for z10 and later (EDAT_1) */
391 		ret = -EINVAL;
392 		if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
393 			break;
394 
395 		ret = -EBUSY;
396 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
397 		mutex_lock(&kvm->lock);
398 		if (atomic_read(&kvm->online_vcpus) == 0) {
399 			kvm->arch.use_cmma = 1;
400 			ret = 0;
401 		}
402 		mutex_unlock(&kvm->lock);
403 		break;
404 	case KVM_S390_VM_MEM_CLR_CMMA:
405 		ret = -EINVAL;
406 		if (!kvm->arch.use_cmma)
407 			break;
408 
409 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
410 		mutex_lock(&kvm->lock);
411 		idx = srcu_read_lock(&kvm->srcu);
412 		s390_reset_cmma(kvm->arch.gmap->mm);
413 		srcu_read_unlock(&kvm->srcu, idx);
414 		mutex_unlock(&kvm->lock);
415 		ret = 0;
416 		break;
417 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
418 		unsigned long new_limit;
419 
420 		if (kvm_is_ucontrol(kvm))
421 			return -EINVAL;
422 
423 		if (get_user(new_limit, (u64 __user *)attr->addr))
424 			return -EFAULT;
425 
426 		if (new_limit > kvm->arch.gmap->asce_end)
427 			return -E2BIG;
428 
429 		ret = -EBUSY;
430 		mutex_lock(&kvm->lock);
431 		if (atomic_read(&kvm->online_vcpus) == 0) {
432 			/* gmap_alloc will round the limit up */
433 			struct gmap *new = gmap_alloc(current->mm, new_limit);
434 
435 			if (!new) {
436 				ret = -ENOMEM;
437 			} else {
438 				gmap_free(kvm->arch.gmap);
439 				new->private = kvm;
440 				kvm->arch.gmap = new;
441 				ret = 0;
442 			}
443 		}
444 		mutex_unlock(&kvm->lock);
445 		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
446 		break;
447 	}
448 	default:
449 		ret = -ENXIO;
450 		break;
451 	}
452 	return ret;
453 }
454 
455 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
456 
457 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
458 {
459 	struct kvm_vcpu *vcpu;
460 	int i;
461 
462 	if (!test_kvm_facility(kvm, 76))
463 		return -EINVAL;
464 
465 	mutex_lock(&kvm->lock);
466 	switch (attr->attr) {
467 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
468 		get_random_bytes(
469 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
470 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
471 		kvm->arch.crypto.aes_kw = 1;
472 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
473 		break;
474 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
475 		get_random_bytes(
476 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
477 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
478 		kvm->arch.crypto.dea_kw = 1;
479 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
480 		break;
481 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
482 		kvm->arch.crypto.aes_kw = 0;
483 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
484 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
485 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
486 		break;
487 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
488 		kvm->arch.crypto.dea_kw = 0;
489 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
490 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
491 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
492 		break;
493 	default:
494 		mutex_unlock(&kvm->lock);
495 		return -ENXIO;
496 	}
497 
498 	kvm_for_each_vcpu(i, vcpu, kvm) {
499 		kvm_s390_vcpu_crypto_setup(vcpu);
500 		exit_sie(vcpu);
501 	}
502 	mutex_unlock(&kvm->lock);
503 	return 0;
504 }
505 
506 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
507 {
508 	u8 gtod_high;
509 
510 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
511 					   sizeof(gtod_high)))
512 		return -EFAULT;
513 
514 	if (gtod_high != 0)
515 		return -EINVAL;
516 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
517 
518 	return 0;
519 }
520 
521 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
522 {
523 	struct kvm_vcpu *cur_vcpu;
524 	unsigned int vcpu_idx;
525 	u64 host_tod, gtod;
526 	int r;
527 
528 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
529 		return -EFAULT;
530 
531 	r = store_tod_clock(&host_tod);
532 	if (r)
533 		return r;
534 
535 	mutex_lock(&kvm->lock);
536 	preempt_disable();
537 	kvm->arch.epoch = gtod - host_tod;
538 	kvm_s390_vcpu_block_all(kvm);
539 	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
540 		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
541 	kvm_s390_vcpu_unblock_all(kvm);
542 	preempt_enable();
543 	mutex_unlock(&kvm->lock);
544 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
545 	return 0;
546 }
547 
548 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
549 {
550 	int ret;
551 
552 	if (attr->flags)
553 		return -EINVAL;
554 
555 	switch (attr->attr) {
556 	case KVM_S390_VM_TOD_HIGH:
557 		ret = kvm_s390_set_tod_high(kvm, attr);
558 		break;
559 	case KVM_S390_VM_TOD_LOW:
560 		ret = kvm_s390_set_tod_low(kvm, attr);
561 		break;
562 	default:
563 		ret = -ENXIO;
564 		break;
565 	}
566 	return ret;
567 }
568 
569 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
570 {
571 	u8 gtod_high = 0;
572 
573 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
574 					 sizeof(gtod_high)))
575 		return -EFAULT;
576 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
577 
578 	return 0;
579 }
580 
581 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
582 {
583 	u64 host_tod, gtod;
584 	int r;
585 
586 	r = store_tod_clock(&host_tod);
587 	if (r)
588 		return r;
589 
590 	preempt_disable();
591 	gtod = host_tod + kvm->arch.epoch;
592 	preempt_enable();
593 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
594 		return -EFAULT;
595 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
596 
597 	return 0;
598 }
599 
600 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
601 {
602 	int ret;
603 
604 	if (attr->flags)
605 		return -EINVAL;
606 
607 	switch (attr->attr) {
608 	case KVM_S390_VM_TOD_HIGH:
609 		ret = kvm_s390_get_tod_high(kvm, attr);
610 		break;
611 	case KVM_S390_VM_TOD_LOW:
612 		ret = kvm_s390_get_tod_low(kvm, attr);
613 		break;
614 	default:
615 		ret = -ENXIO;
616 		break;
617 	}
618 	return ret;
619 }
620 
621 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
622 {
623 	struct kvm_s390_vm_cpu_processor *proc;
624 	int ret = 0;
625 
626 	mutex_lock(&kvm->lock);
627 	if (atomic_read(&kvm->online_vcpus)) {
628 		ret = -EBUSY;
629 		goto out;
630 	}
631 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
632 	if (!proc) {
633 		ret = -ENOMEM;
634 		goto out;
635 	}
636 	if (!copy_from_user(proc, (void __user *)attr->addr,
637 			    sizeof(*proc))) {
638 		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
639 		       sizeof(struct cpuid));
640 		kvm->arch.model.ibc = proc->ibc;
641 		memcpy(kvm->arch.model.fac->list, proc->fac_list,
642 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
643 	} else
644 		ret = -EFAULT;
645 	kfree(proc);
646 out:
647 	mutex_unlock(&kvm->lock);
648 	return ret;
649 }
650 
651 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
652 {
653 	int ret = -ENXIO;
654 
655 	switch (attr->attr) {
656 	case KVM_S390_VM_CPU_PROCESSOR:
657 		ret = kvm_s390_set_processor(kvm, attr);
658 		break;
659 	}
660 	return ret;
661 }
662 
663 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
664 {
665 	struct kvm_s390_vm_cpu_processor *proc;
666 	int ret = 0;
667 
668 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
669 	if (!proc) {
670 		ret = -ENOMEM;
671 		goto out;
672 	}
673 	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
674 	proc->ibc = kvm->arch.model.ibc;
675 	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
676 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
677 		ret = -EFAULT;
678 	kfree(proc);
679 out:
680 	return ret;
681 }
682 
683 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
684 {
685 	struct kvm_s390_vm_cpu_machine *mach;
686 	int ret = 0;
687 
688 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
689 	if (!mach) {
690 		ret = -ENOMEM;
691 		goto out;
692 	}
693 	get_cpu_id((struct cpuid *) &mach->cpuid);
694 	mach->ibc = sclp.ibc;
695 	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
696 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
697 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
698 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
699 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
700 		ret = -EFAULT;
701 	kfree(mach);
702 out:
703 	return ret;
704 }
705 
706 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
707 {
708 	int ret = -ENXIO;
709 
710 	switch (attr->attr) {
711 	case KVM_S390_VM_CPU_PROCESSOR:
712 		ret = kvm_s390_get_processor(kvm, attr);
713 		break;
714 	case KVM_S390_VM_CPU_MACHINE:
715 		ret = kvm_s390_get_machine(kvm, attr);
716 		break;
717 	}
718 	return ret;
719 }
720 
721 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
722 {
723 	int ret;
724 
725 	switch (attr->group) {
726 	case KVM_S390_VM_MEM_CTRL:
727 		ret = kvm_s390_set_mem_control(kvm, attr);
728 		break;
729 	case KVM_S390_VM_TOD:
730 		ret = kvm_s390_set_tod(kvm, attr);
731 		break;
732 	case KVM_S390_VM_CPU_MODEL:
733 		ret = kvm_s390_set_cpu_model(kvm, attr);
734 		break;
735 	case KVM_S390_VM_CRYPTO:
736 		ret = kvm_s390_vm_set_crypto(kvm, attr);
737 		break;
738 	default:
739 		ret = -ENXIO;
740 		break;
741 	}
742 
743 	return ret;
744 }
745 
746 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
747 {
748 	int ret;
749 
750 	switch (attr->group) {
751 	case KVM_S390_VM_MEM_CTRL:
752 		ret = kvm_s390_get_mem_control(kvm, attr);
753 		break;
754 	case KVM_S390_VM_TOD:
755 		ret = kvm_s390_get_tod(kvm, attr);
756 		break;
757 	case KVM_S390_VM_CPU_MODEL:
758 		ret = kvm_s390_get_cpu_model(kvm, attr);
759 		break;
760 	default:
761 		ret = -ENXIO;
762 		break;
763 	}
764 
765 	return ret;
766 }
767 
768 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
769 {
770 	int ret;
771 
772 	switch (attr->group) {
773 	case KVM_S390_VM_MEM_CTRL:
774 		switch (attr->attr) {
775 		case KVM_S390_VM_MEM_ENABLE_CMMA:
776 		case KVM_S390_VM_MEM_CLR_CMMA:
777 		case KVM_S390_VM_MEM_LIMIT_SIZE:
778 			ret = 0;
779 			break;
780 		default:
781 			ret = -ENXIO;
782 			break;
783 		}
784 		break;
785 	case KVM_S390_VM_TOD:
786 		switch (attr->attr) {
787 		case KVM_S390_VM_TOD_LOW:
788 		case KVM_S390_VM_TOD_HIGH:
789 			ret = 0;
790 			break;
791 		default:
792 			ret = -ENXIO;
793 			break;
794 		}
795 		break;
796 	case KVM_S390_VM_CPU_MODEL:
797 		switch (attr->attr) {
798 		case KVM_S390_VM_CPU_PROCESSOR:
799 		case KVM_S390_VM_CPU_MACHINE:
800 			ret = 0;
801 			break;
802 		default:
803 			ret = -ENXIO;
804 			break;
805 		}
806 		break;
807 	case KVM_S390_VM_CRYPTO:
808 		switch (attr->attr) {
809 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
810 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
811 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
812 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
813 			ret = 0;
814 			break;
815 		default:
816 			ret = -ENXIO;
817 			break;
818 		}
819 		break;
820 	default:
821 		ret = -ENXIO;
822 		break;
823 	}
824 
825 	return ret;
826 }
827 
828 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
829 {
830 	uint8_t *keys;
831 	uint64_t hva;
832 	unsigned long curkey;
833 	int i, r = 0;
834 
835 	if (args->flags != 0)
836 		return -EINVAL;
837 
838 	/* Is this guest using storage keys? */
839 	if (!mm_use_skey(current->mm))
840 		return KVM_S390_GET_SKEYS_NONE;
841 
842 	/* Enforce sane limit on memory allocation */
843 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
844 		return -EINVAL;
845 
846 	keys = kmalloc_array(args->count, sizeof(uint8_t),
847 			     GFP_KERNEL | __GFP_NOWARN);
848 	if (!keys)
849 		keys = vmalloc(sizeof(uint8_t) * args->count);
850 	if (!keys)
851 		return -ENOMEM;
852 
853 	for (i = 0; i < args->count; i++) {
854 		hva = gfn_to_hva(kvm, args->start_gfn + i);
855 		if (kvm_is_error_hva(hva)) {
856 			r = -EFAULT;
857 			goto out;
858 		}
859 
860 		curkey = get_guest_storage_key(current->mm, hva);
861 		if (IS_ERR_VALUE(curkey)) {
862 			r = curkey;
863 			goto out;
864 		}
865 		keys[i] = curkey;
866 	}
867 
868 	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
869 			 sizeof(uint8_t) * args->count);
870 	if (r)
871 		r = -EFAULT;
872 out:
873 	kvfree(keys);
874 	return r;
875 }
876 
877 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
878 {
879 	uint8_t *keys;
880 	uint64_t hva;
881 	int i, r = 0;
882 
883 	if (args->flags != 0)
884 		return -EINVAL;
885 
886 	/* Enforce sane limit on memory allocation */
887 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
888 		return -EINVAL;
889 
890 	keys = kmalloc_array(args->count, sizeof(uint8_t),
891 			     GFP_KERNEL | __GFP_NOWARN);
892 	if (!keys)
893 		keys = vmalloc(sizeof(uint8_t) * args->count);
894 	if (!keys)
895 		return -ENOMEM;
896 
897 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
898 			   sizeof(uint8_t) * args->count);
899 	if (r) {
900 		r = -EFAULT;
901 		goto out;
902 	}
903 
904 	/* Enable storage key handling for the guest */
905 	r = s390_enable_skey();
906 	if (r)
907 		goto out;
908 
909 	for (i = 0; i < args->count; i++) {
910 		hva = gfn_to_hva(kvm, args->start_gfn + i);
911 		if (kvm_is_error_hva(hva)) {
912 			r = -EFAULT;
913 			goto out;
914 		}
915 
916 		/* Lowest order bit is reserved */
917 		if (keys[i] & 0x01) {
918 			r = -EINVAL;
919 			goto out;
920 		}
921 
922 		r = set_guest_storage_key(current->mm, hva,
923 					  (unsigned long)keys[i], 0);
924 		if (r)
925 			goto out;
926 	}
927 out:
928 	kvfree(keys);
929 	return r;
930 }
931 
932 long kvm_arch_vm_ioctl(struct file *filp,
933 		       unsigned int ioctl, unsigned long arg)
934 {
935 	struct kvm *kvm = filp->private_data;
936 	void __user *argp = (void __user *)arg;
937 	struct kvm_device_attr attr;
938 	int r;
939 
940 	switch (ioctl) {
941 	case KVM_S390_INTERRUPT: {
942 		struct kvm_s390_interrupt s390int;
943 
944 		r = -EFAULT;
945 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
946 			break;
947 		r = kvm_s390_inject_vm(kvm, &s390int);
948 		break;
949 	}
950 	case KVM_ENABLE_CAP: {
951 		struct kvm_enable_cap cap;
952 		r = -EFAULT;
953 		if (copy_from_user(&cap, argp, sizeof(cap)))
954 			break;
955 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
956 		break;
957 	}
958 	case KVM_CREATE_IRQCHIP: {
959 		struct kvm_irq_routing_entry routing;
960 
961 		r = -EINVAL;
962 		if (kvm->arch.use_irqchip) {
963 			/* Set up dummy routing. */
964 			memset(&routing, 0, sizeof(routing));
965 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
966 		}
967 		break;
968 	}
969 	case KVM_SET_DEVICE_ATTR: {
970 		r = -EFAULT;
971 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
972 			break;
973 		r = kvm_s390_vm_set_attr(kvm, &attr);
974 		break;
975 	}
976 	case KVM_GET_DEVICE_ATTR: {
977 		r = -EFAULT;
978 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
979 			break;
980 		r = kvm_s390_vm_get_attr(kvm, &attr);
981 		break;
982 	}
983 	case KVM_HAS_DEVICE_ATTR: {
984 		r = -EFAULT;
985 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
986 			break;
987 		r = kvm_s390_vm_has_attr(kvm, &attr);
988 		break;
989 	}
990 	case KVM_S390_GET_SKEYS: {
991 		struct kvm_s390_skeys args;
992 
993 		r = -EFAULT;
994 		if (copy_from_user(&args, argp,
995 				   sizeof(struct kvm_s390_skeys)))
996 			break;
997 		r = kvm_s390_get_skeys(kvm, &args);
998 		break;
999 	}
1000 	case KVM_S390_SET_SKEYS: {
1001 		struct kvm_s390_skeys args;
1002 
1003 		r = -EFAULT;
1004 		if (copy_from_user(&args, argp,
1005 				   sizeof(struct kvm_s390_skeys)))
1006 			break;
1007 		r = kvm_s390_set_skeys(kvm, &args);
1008 		break;
1009 	}
1010 	default:
1011 		r = -ENOTTY;
1012 	}
1013 
1014 	return r;
1015 }
1016 
1017 static int kvm_s390_query_ap_config(u8 *config)
1018 {
1019 	u32 fcn_code = 0x04000000UL;
1020 	u32 cc = 0;
1021 
1022 	memset(config, 0, 128);
1023 	asm volatile(
1024 		"lgr 0,%1\n"
1025 		"lgr 2,%2\n"
1026 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1027 		"0: ipm %0\n"
1028 		"srl %0,28\n"
1029 		"1:\n"
1030 		EX_TABLE(0b, 1b)
1031 		: "+r" (cc)
1032 		: "r" (fcn_code), "r" (config)
1033 		: "cc", "0", "2", "memory"
1034 	);
1035 
1036 	return cc;
1037 }
1038 
1039 static int kvm_s390_apxa_installed(void)
1040 {
1041 	u8 config[128];
1042 	int cc;
1043 
1044 	if (test_facility(2) && test_facility(12)) {
1045 		cc = kvm_s390_query_ap_config(config);
1046 
1047 		if (cc)
1048 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1049 		else
1050 			return config[0] & 0x40;
1051 	}
1052 
1053 	return 0;
1054 }
1055 
1056 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1057 {
1058 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1059 
1060 	if (kvm_s390_apxa_installed())
1061 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1062 	else
1063 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1064 }
1065 
1066 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1067 {
1068 	get_cpu_id(cpu_id);
1069 	cpu_id->version = 0xff;
1070 }
1071 
1072 static int kvm_s390_crypto_init(struct kvm *kvm)
1073 {
1074 	if (!test_kvm_facility(kvm, 76))
1075 		return 0;
1076 
1077 	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1078 					 GFP_KERNEL | GFP_DMA);
1079 	if (!kvm->arch.crypto.crycb)
1080 		return -ENOMEM;
1081 
1082 	kvm_s390_set_crycb_format(kvm);
1083 
1084 	/* Enable AES/DEA protected key functions by default */
1085 	kvm->arch.crypto.aes_kw = 1;
1086 	kvm->arch.crypto.dea_kw = 1;
1087 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1088 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1089 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1090 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1091 
1092 	return 0;
1093 }
1094 
1095 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1096 {
1097 	int i, rc;
1098 	char debug_name[16];
1099 	static unsigned long sca_offset;
1100 
1101 	rc = -EINVAL;
1102 #ifdef CONFIG_KVM_S390_UCONTROL
1103 	if (type & ~KVM_VM_S390_UCONTROL)
1104 		goto out_err;
1105 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1106 		goto out_err;
1107 #else
1108 	if (type)
1109 		goto out_err;
1110 #endif
1111 
1112 	rc = s390_enable_sie();
1113 	if (rc)
1114 		goto out_err;
1115 
1116 	rc = -ENOMEM;
1117 
1118 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1119 	if (!kvm->arch.sca)
1120 		goto out_err;
1121 	spin_lock(&kvm_lock);
1122 	sca_offset = (sca_offset + 16) & 0x7f0;
1123 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1124 	spin_unlock(&kvm_lock);
1125 
1126 	sprintf(debug_name, "kvm-%u", current->pid);
1127 
1128 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1129 	if (!kvm->arch.dbf)
1130 		goto out_err;
1131 
1132 	/*
1133 	 * The architectural maximum amount of facilities is 16 kbit. To store
1134 	 * this amount, 2 kbyte of memory is required. Thus we need a full
1135 	 * page to hold the guest facility list (arch.model.fac->list) and the
1136 	 * facility mask (arch.model.fac->mask). Its address size has to be
1137 	 * 31 bits and word aligned.
1138 	 */
1139 	kvm->arch.model.fac =
1140 		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1141 	if (!kvm->arch.model.fac)
1142 		goto out_err;
1143 
1144 	/* Populate the facility mask initially. */
1145 	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1146 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1147 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1148 		if (i < kvm_s390_fac_list_mask_size())
1149 			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1150 		else
1151 			kvm->arch.model.fac->mask[i] = 0UL;
1152 	}
1153 
1154 	/* Populate the facility list initially. */
1155 	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1156 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1157 
1158 	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1159 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1160 
1161 	if (kvm_s390_crypto_init(kvm) < 0)
1162 		goto out_err;
1163 
1164 	spin_lock_init(&kvm->arch.float_int.lock);
1165 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1166 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1167 	init_waitqueue_head(&kvm->arch.ipte_wq);
1168 	mutex_init(&kvm->arch.ipte_mutex);
1169 
1170 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1171 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1172 
1173 	if (type & KVM_VM_S390_UCONTROL) {
1174 		kvm->arch.gmap = NULL;
1175 	} else {
1176 		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1177 		if (!kvm->arch.gmap)
1178 			goto out_err;
1179 		kvm->arch.gmap->private = kvm;
1180 		kvm->arch.gmap->pfault_enabled = 0;
1181 	}
1182 
1183 	kvm->arch.css_support = 0;
1184 	kvm->arch.use_irqchip = 0;
1185 	kvm->arch.epoch = 0;
1186 
1187 	spin_lock_init(&kvm->arch.start_stop_lock);
1188 	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1189 
1190 	return 0;
1191 out_err:
1192 	kfree(kvm->arch.crypto.crycb);
1193 	free_page((unsigned long)kvm->arch.model.fac);
1194 	debug_unregister(kvm->arch.dbf);
1195 	free_page((unsigned long)(kvm->arch.sca));
1196 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1197 	return rc;
1198 }
1199 
1200 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1201 {
1202 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1203 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1204 	kvm_s390_clear_local_irqs(vcpu);
1205 	kvm_clear_async_pf_completion_queue(vcpu);
1206 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1207 		clear_bit(63 - vcpu->vcpu_id,
1208 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1209 		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1210 		    (__u64) vcpu->arch.sie_block)
1211 			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1212 	}
1213 	smp_mb();
1214 
1215 	if (kvm_is_ucontrol(vcpu->kvm))
1216 		gmap_free(vcpu->arch.gmap);
1217 
1218 	if (vcpu->kvm->arch.use_cmma)
1219 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1220 	free_page((unsigned long)(vcpu->arch.sie_block));
1221 
1222 	kvm_vcpu_uninit(vcpu);
1223 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1224 }
1225 
1226 static void kvm_free_vcpus(struct kvm *kvm)
1227 {
1228 	unsigned int i;
1229 	struct kvm_vcpu *vcpu;
1230 
1231 	kvm_for_each_vcpu(i, vcpu, kvm)
1232 		kvm_arch_vcpu_destroy(vcpu);
1233 
1234 	mutex_lock(&kvm->lock);
1235 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1236 		kvm->vcpus[i] = NULL;
1237 
1238 	atomic_set(&kvm->online_vcpus, 0);
1239 	mutex_unlock(&kvm->lock);
1240 }
1241 
1242 void kvm_arch_destroy_vm(struct kvm *kvm)
1243 {
1244 	kvm_free_vcpus(kvm);
1245 	free_page((unsigned long)kvm->arch.model.fac);
1246 	free_page((unsigned long)(kvm->arch.sca));
1247 	debug_unregister(kvm->arch.dbf);
1248 	kfree(kvm->arch.crypto.crycb);
1249 	if (!kvm_is_ucontrol(kvm))
1250 		gmap_free(kvm->arch.gmap);
1251 	kvm_s390_destroy_adapters(kvm);
1252 	kvm_s390_clear_float_irqs(kvm);
1253 	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1254 }
1255 
1256 /* Section: vcpu related */
1257 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1258 {
1259 	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1260 	if (!vcpu->arch.gmap)
1261 		return -ENOMEM;
1262 	vcpu->arch.gmap->private = vcpu->kvm;
1263 
1264 	return 0;
1265 }
1266 
1267 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1268 {
1269 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1270 	kvm_clear_async_pf_completion_queue(vcpu);
1271 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1272 				    KVM_SYNC_GPRS |
1273 				    KVM_SYNC_ACRS |
1274 				    KVM_SYNC_CRS |
1275 				    KVM_SYNC_ARCH0 |
1276 				    KVM_SYNC_PFAULT;
1277 	if (test_kvm_facility(vcpu->kvm, 129))
1278 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1279 
1280 	if (kvm_is_ucontrol(vcpu->kvm))
1281 		return __kvm_ucontrol_vcpu_init(vcpu);
1282 
1283 	return 0;
1284 }
1285 
1286 /*
1287  * Backs up the current FP/VX register save area on a particular
1288  * destination.  Used to switch between different register save
1289  * areas.
1290  */
1291 static inline void save_fpu_to(struct fpu *dst)
1292 {
1293 	dst->fpc = current->thread.fpu.fpc;
1294 	dst->flags = current->thread.fpu.flags;
1295 	dst->regs = current->thread.fpu.regs;
1296 }
1297 
1298 /*
1299  * Switches the FP/VX register save area from which to lazy
1300  * restore register contents.
1301  */
1302 static inline void load_fpu_from(struct fpu *from)
1303 {
1304 	current->thread.fpu.fpc = from->fpc;
1305 	current->thread.fpu.flags = from->flags;
1306 	current->thread.fpu.regs = from->regs;
1307 }
1308 
1309 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1310 {
1311 	/* Save host register state */
1312 	save_fpu_regs();
1313 	save_fpu_to(&vcpu->arch.host_fpregs);
1314 
1315 	if (test_kvm_facility(vcpu->kvm, 129)) {
1316 		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1317 		current->thread.fpu.flags = FPU_USE_VX;
1318 		/*
1319 		 * Use the register save area in the SIE-control block
1320 		 * for register restore and save in kvm_arch_vcpu_put()
1321 		 */
1322 		current->thread.fpu.vxrs =
1323 			(__vector128 *)&vcpu->run->s.regs.vrs;
1324 		/* Always enable the vector extension for KVM */
1325 		__ctl_set_vx();
1326 	} else
1327 		load_fpu_from(&vcpu->arch.guest_fpregs);
1328 
1329 	if (test_fp_ctl(current->thread.fpu.fpc))
1330 		/* User space provided an invalid FPC, let's clear it */
1331 		current->thread.fpu.fpc = 0;
1332 
1333 	save_access_regs(vcpu->arch.host_acrs);
1334 	restore_access_regs(vcpu->run->s.regs.acrs);
1335 	gmap_enable(vcpu->arch.gmap);
1336 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1337 }
1338 
1339 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1340 {
1341 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1342 	gmap_disable(vcpu->arch.gmap);
1343 
1344 	save_fpu_regs();
1345 
1346 	if (test_kvm_facility(vcpu->kvm, 129))
1347 		/*
1348 		 * kvm_arch_vcpu_load() set up the register save area to
1349 		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1350 		 * are already saved.  Only the floating-point control must be
1351 		 * copied.
1352 		 */
1353 		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1354 	else
1355 		save_fpu_to(&vcpu->arch.guest_fpregs);
1356 	load_fpu_from(&vcpu->arch.host_fpregs);
1357 
1358 	save_access_regs(vcpu->run->s.regs.acrs);
1359 	restore_access_regs(vcpu->arch.host_acrs);
1360 }
1361 
1362 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1363 {
1364 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1365 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1366 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1367 	kvm_s390_set_prefix(vcpu, 0);
1368 	vcpu->arch.sie_block->cputm     = 0UL;
1369 	vcpu->arch.sie_block->ckc       = 0UL;
1370 	vcpu->arch.sie_block->todpr     = 0;
1371 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1372 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1373 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1374 	vcpu->arch.guest_fpregs.fpc = 0;
1375 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1376 	vcpu->arch.sie_block->gbea = 1;
1377 	vcpu->arch.sie_block->pp = 0;
1378 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1379 	kvm_clear_async_pf_completion_queue(vcpu);
1380 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1381 		kvm_s390_vcpu_stop(vcpu);
1382 	kvm_s390_clear_local_irqs(vcpu);
1383 }
1384 
1385 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1386 {
1387 	mutex_lock(&vcpu->kvm->lock);
1388 	preempt_disable();
1389 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1390 	preempt_enable();
1391 	mutex_unlock(&vcpu->kvm->lock);
1392 	if (!kvm_is_ucontrol(vcpu->kvm))
1393 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1394 }
1395 
1396 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1397 {
1398 	if (!test_kvm_facility(vcpu->kvm, 76))
1399 		return;
1400 
1401 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1402 
1403 	if (vcpu->kvm->arch.crypto.aes_kw)
1404 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1405 	if (vcpu->kvm->arch.crypto.dea_kw)
1406 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1407 
1408 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1409 }
1410 
1411 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1412 {
1413 	free_page(vcpu->arch.sie_block->cbrlo);
1414 	vcpu->arch.sie_block->cbrlo = 0;
1415 }
1416 
1417 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1418 {
1419 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1420 	if (!vcpu->arch.sie_block->cbrlo)
1421 		return -ENOMEM;
1422 
1423 	vcpu->arch.sie_block->ecb2 |= 0x80;
1424 	vcpu->arch.sie_block->ecb2 &= ~0x08;
1425 	return 0;
1426 }
1427 
1428 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1429 {
1430 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1431 
1432 	vcpu->arch.cpu_id = model->cpu_id;
1433 	vcpu->arch.sie_block->ibc = model->ibc;
1434 	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1435 }
1436 
1437 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1438 {
1439 	int rc = 0;
1440 
1441 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1442 						    CPUSTAT_SM |
1443 						    CPUSTAT_STOPPED);
1444 
1445 	if (test_kvm_facility(vcpu->kvm, 78))
1446 		atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1447 	else if (test_kvm_facility(vcpu->kvm, 8))
1448 		atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1449 
1450 	kvm_s390_vcpu_setup_model(vcpu);
1451 
1452 	vcpu->arch.sie_block->ecb   = 6;
1453 	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1454 		vcpu->arch.sie_block->ecb |= 0x10;
1455 
1456 	vcpu->arch.sie_block->ecb2  = 8;
1457 	vcpu->arch.sie_block->eca   = 0xC1002000U;
1458 	if (sclp.has_siif)
1459 		vcpu->arch.sie_block->eca |= 1;
1460 	if (sclp.has_sigpif)
1461 		vcpu->arch.sie_block->eca |= 0x10000000U;
1462 	if (test_kvm_facility(vcpu->kvm, 129)) {
1463 		vcpu->arch.sie_block->eca |= 0x00020000;
1464 		vcpu->arch.sie_block->ecd |= 0x20000000;
1465 	}
1466 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1467 
1468 	if (vcpu->kvm->arch.use_cmma) {
1469 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1470 		if (rc)
1471 			return rc;
1472 	}
1473 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1474 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1475 
1476 	kvm_s390_vcpu_crypto_setup(vcpu);
1477 
1478 	return rc;
1479 }
1480 
1481 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1482 				      unsigned int id)
1483 {
1484 	struct kvm_vcpu *vcpu;
1485 	struct sie_page *sie_page;
1486 	int rc = -EINVAL;
1487 
1488 	if (id >= KVM_MAX_VCPUS)
1489 		goto out;
1490 
1491 	rc = -ENOMEM;
1492 
1493 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1494 	if (!vcpu)
1495 		goto out;
1496 
1497 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1498 	if (!sie_page)
1499 		goto out_free_cpu;
1500 
1501 	vcpu->arch.sie_block = &sie_page->sie_block;
1502 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1503 
1504 	vcpu->arch.sie_block->icpua = id;
1505 	if (!kvm_is_ucontrol(kvm)) {
1506 		if (!kvm->arch.sca) {
1507 			WARN_ON_ONCE(1);
1508 			goto out_free_cpu;
1509 		}
1510 		if (!kvm->arch.sca->cpu[id].sda)
1511 			kvm->arch.sca->cpu[id].sda =
1512 				(__u64) vcpu->arch.sie_block;
1513 		vcpu->arch.sie_block->scaoh =
1514 			(__u32)(((__u64)kvm->arch.sca) >> 32);
1515 		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1516 		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1517 	}
1518 
1519 	spin_lock_init(&vcpu->arch.local_int.lock);
1520 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1521 	vcpu->arch.local_int.wq = &vcpu->wq;
1522 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1523 
1524 	/*
1525 	 * Allocate a save area for floating-point registers.  If the vector
1526 	 * extension is available, register contents are saved in the SIE
1527 	 * control block.  The allocated save area is still required in
1528 	 * particular places, for example, in kvm_s390_vcpu_store_status().
1529 	 */
1530 	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1531 					       GFP_KERNEL);
1532 	if (!vcpu->arch.guest_fpregs.fprs) {
1533 		rc = -ENOMEM;
1534 		goto out_free_sie_block;
1535 	}
1536 
1537 	rc = kvm_vcpu_init(vcpu, kvm, id);
1538 	if (rc)
1539 		goto out_free_sie_block;
1540 	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1541 		 vcpu->arch.sie_block);
1542 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1543 
1544 	return vcpu;
1545 out_free_sie_block:
1546 	free_page((unsigned long)(vcpu->arch.sie_block));
1547 out_free_cpu:
1548 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1549 out:
1550 	return ERR_PTR(rc);
1551 }
1552 
1553 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1554 {
1555 	return kvm_s390_vcpu_has_irq(vcpu, 0);
1556 }
1557 
1558 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1559 {
1560 	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1561 	exit_sie(vcpu);
1562 }
1563 
1564 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1565 {
1566 	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1567 }
1568 
1569 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1570 {
1571 	atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1572 	exit_sie(vcpu);
1573 }
1574 
1575 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1576 {
1577 	atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1578 }
1579 
1580 /*
1581  * Kick a guest cpu out of SIE and wait until SIE is not running.
1582  * If the CPU is not running (e.g. waiting as idle) the function will
1583  * return immediately. */
1584 void exit_sie(struct kvm_vcpu *vcpu)
1585 {
1586 	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1587 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1588 		cpu_relax();
1589 }
1590 
1591 /* Kick a guest cpu out of SIE to process a request synchronously */
1592 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1593 {
1594 	kvm_make_request(req, vcpu);
1595 	kvm_s390_vcpu_request(vcpu);
1596 }
1597 
1598 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1599 {
1600 	int i;
1601 	struct kvm *kvm = gmap->private;
1602 	struct kvm_vcpu *vcpu;
1603 
1604 	kvm_for_each_vcpu(i, vcpu, kvm) {
1605 		/* match against both prefix pages */
1606 		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1607 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1608 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1609 		}
1610 	}
1611 }
1612 
1613 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1614 {
1615 	/* kvm common code refers to this, but never calls it */
1616 	BUG();
1617 	return 0;
1618 }
1619 
1620 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1621 					   struct kvm_one_reg *reg)
1622 {
1623 	int r = -EINVAL;
1624 
1625 	switch (reg->id) {
1626 	case KVM_REG_S390_TODPR:
1627 		r = put_user(vcpu->arch.sie_block->todpr,
1628 			     (u32 __user *)reg->addr);
1629 		break;
1630 	case KVM_REG_S390_EPOCHDIFF:
1631 		r = put_user(vcpu->arch.sie_block->epoch,
1632 			     (u64 __user *)reg->addr);
1633 		break;
1634 	case KVM_REG_S390_CPU_TIMER:
1635 		r = put_user(vcpu->arch.sie_block->cputm,
1636 			     (u64 __user *)reg->addr);
1637 		break;
1638 	case KVM_REG_S390_CLOCK_COMP:
1639 		r = put_user(vcpu->arch.sie_block->ckc,
1640 			     (u64 __user *)reg->addr);
1641 		break;
1642 	case KVM_REG_S390_PFTOKEN:
1643 		r = put_user(vcpu->arch.pfault_token,
1644 			     (u64 __user *)reg->addr);
1645 		break;
1646 	case KVM_REG_S390_PFCOMPARE:
1647 		r = put_user(vcpu->arch.pfault_compare,
1648 			     (u64 __user *)reg->addr);
1649 		break;
1650 	case KVM_REG_S390_PFSELECT:
1651 		r = put_user(vcpu->arch.pfault_select,
1652 			     (u64 __user *)reg->addr);
1653 		break;
1654 	case KVM_REG_S390_PP:
1655 		r = put_user(vcpu->arch.sie_block->pp,
1656 			     (u64 __user *)reg->addr);
1657 		break;
1658 	case KVM_REG_S390_GBEA:
1659 		r = put_user(vcpu->arch.sie_block->gbea,
1660 			     (u64 __user *)reg->addr);
1661 		break;
1662 	default:
1663 		break;
1664 	}
1665 
1666 	return r;
1667 }
1668 
1669 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1670 					   struct kvm_one_reg *reg)
1671 {
1672 	int r = -EINVAL;
1673 
1674 	switch (reg->id) {
1675 	case KVM_REG_S390_TODPR:
1676 		r = get_user(vcpu->arch.sie_block->todpr,
1677 			     (u32 __user *)reg->addr);
1678 		break;
1679 	case KVM_REG_S390_EPOCHDIFF:
1680 		r = get_user(vcpu->arch.sie_block->epoch,
1681 			     (u64 __user *)reg->addr);
1682 		break;
1683 	case KVM_REG_S390_CPU_TIMER:
1684 		r = get_user(vcpu->arch.sie_block->cputm,
1685 			     (u64 __user *)reg->addr);
1686 		break;
1687 	case KVM_REG_S390_CLOCK_COMP:
1688 		r = get_user(vcpu->arch.sie_block->ckc,
1689 			     (u64 __user *)reg->addr);
1690 		break;
1691 	case KVM_REG_S390_PFTOKEN:
1692 		r = get_user(vcpu->arch.pfault_token,
1693 			     (u64 __user *)reg->addr);
1694 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1695 			kvm_clear_async_pf_completion_queue(vcpu);
1696 		break;
1697 	case KVM_REG_S390_PFCOMPARE:
1698 		r = get_user(vcpu->arch.pfault_compare,
1699 			     (u64 __user *)reg->addr);
1700 		break;
1701 	case KVM_REG_S390_PFSELECT:
1702 		r = get_user(vcpu->arch.pfault_select,
1703 			     (u64 __user *)reg->addr);
1704 		break;
1705 	case KVM_REG_S390_PP:
1706 		r = get_user(vcpu->arch.sie_block->pp,
1707 			     (u64 __user *)reg->addr);
1708 		break;
1709 	case KVM_REG_S390_GBEA:
1710 		r = get_user(vcpu->arch.sie_block->gbea,
1711 			     (u64 __user *)reg->addr);
1712 		break;
1713 	default:
1714 		break;
1715 	}
1716 
1717 	return r;
1718 }
1719 
1720 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1721 {
1722 	kvm_s390_vcpu_initial_reset(vcpu);
1723 	return 0;
1724 }
1725 
1726 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1727 {
1728 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1729 	return 0;
1730 }
1731 
1732 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1733 {
1734 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1735 	return 0;
1736 }
1737 
1738 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1739 				  struct kvm_sregs *sregs)
1740 {
1741 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1742 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1743 	restore_access_regs(vcpu->run->s.regs.acrs);
1744 	return 0;
1745 }
1746 
1747 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1748 				  struct kvm_sregs *sregs)
1749 {
1750 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1751 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1752 	return 0;
1753 }
1754 
1755 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1756 {
1757 	if (test_fp_ctl(fpu->fpc))
1758 		return -EINVAL;
1759 	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1760 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1761 	save_fpu_regs();
1762 	load_fpu_from(&vcpu->arch.guest_fpregs);
1763 	return 0;
1764 }
1765 
1766 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1767 {
1768 	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1769 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1770 	return 0;
1771 }
1772 
1773 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1774 {
1775 	int rc = 0;
1776 
1777 	if (!is_vcpu_stopped(vcpu))
1778 		rc = -EBUSY;
1779 	else {
1780 		vcpu->run->psw_mask = psw.mask;
1781 		vcpu->run->psw_addr = psw.addr;
1782 	}
1783 	return rc;
1784 }
1785 
1786 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1787 				  struct kvm_translation *tr)
1788 {
1789 	return -EINVAL; /* not implemented yet */
1790 }
1791 
1792 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1793 			      KVM_GUESTDBG_USE_HW_BP | \
1794 			      KVM_GUESTDBG_ENABLE)
1795 
1796 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1797 					struct kvm_guest_debug *dbg)
1798 {
1799 	int rc = 0;
1800 
1801 	vcpu->guest_debug = 0;
1802 	kvm_s390_clear_bp_data(vcpu);
1803 
1804 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1805 		return -EINVAL;
1806 
1807 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1808 		vcpu->guest_debug = dbg->control;
1809 		/* enforce guest PER */
1810 		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1811 
1812 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1813 			rc = kvm_s390_import_bp_data(vcpu, dbg);
1814 	} else {
1815 		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1816 		vcpu->arch.guestdbg.last_bp = 0;
1817 	}
1818 
1819 	if (rc) {
1820 		vcpu->guest_debug = 0;
1821 		kvm_s390_clear_bp_data(vcpu);
1822 		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1823 	}
1824 
1825 	return rc;
1826 }
1827 
1828 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1829 				    struct kvm_mp_state *mp_state)
1830 {
1831 	/* CHECK_STOP and LOAD are not supported yet */
1832 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1833 				       KVM_MP_STATE_OPERATING;
1834 }
1835 
1836 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1837 				    struct kvm_mp_state *mp_state)
1838 {
1839 	int rc = 0;
1840 
1841 	/* user space knows about this interface - let it control the state */
1842 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1843 
1844 	switch (mp_state->mp_state) {
1845 	case KVM_MP_STATE_STOPPED:
1846 		kvm_s390_vcpu_stop(vcpu);
1847 		break;
1848 	case KVM_MP_STATE_OPERATING:
1849 		kvm_s390_vcpu_start(vcpu);
1850 		break;
1851 	case KVM_MP_STATE_LOAD:
1852 	case KVM_MP_STATE_CHECK_STOP:
1853 		/* fall through - CHECK_STOP and LOAD are not supported yet */
1854 	default:
1855 		rc = -ENXIO;
1856 	}
1857 
1858 	return rc;
1859 }
1860 
1861 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1862 {
1863 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1864 }
1865 
1866 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1867 {
1868 retry:
1869 	kvm_s390_vcpu_request_handled(vcpu);
1870 	if (!vcpu->requests)
1871 		return 0;
1872 	/*
1873 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1874 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1875 	 * This ensures that the ipte instruction for this request has
1876 	 * already finished. We might race against a second unmapper that
1877 	 * wants to set the blocking bit. Lets just retry the request loop.
1878 	 */
1879 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1880 		int rc;
1881 		rc = gmap_ipte_notify(vcpu->arch.gmap,
1882 				      kvm_s390_get_prefix(vcpu),
1883 				      PAGE_SIZE * 2);
1884 		if (rc)
1885 			return rc;
1886 		goto retry;
1887 	}
1888 
1889 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1890 		vcpu->arch.sie_block->ihcpu = 0xffff;
1891 		goto retry;
1892 	}
1893 
1894 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1895 		if (!ibs_enabled(vcpu)) {
1896 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1897 			atomic_set_mask(CPUSTAT_IBS,
1898 					&vcpu->arch.sie_block->cpuflags);
1899 		}
1900 		goto retry;
1901 	}
1902 
1903 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1904 		if (ibs_enabled(vcpu)) {
1905 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1906 			atomic_clear_mask(CPUSTAT_IBS,
1907 					  &vcpu->arch.sie_block->cpuflags);
1908 		}
1909 		goto retry;
1910 	}
1911 
1912 	/* nothing to do, just clear the request */
1913 	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1914 
1915 	return 0;
1916 }
1917 
1918 /**
1919  * kvm_arch_fault_in_page - fault-in guest page if necessary
1920  * @vcpu: The corresponding virtual cpu
1921  * @gpa: Guest physical address
1922  * @writable: Whether the page should be writable or not
1923  *
1924  * Make sure that a guest page has been faulted-in on the host.
1925  *
1926  * Return: Zero on success, negative error code otherwise.
1927  */
1928 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1929 {
1930 	return gmap_fault(vcpu->arch.gmap, gpa,
1931 			  writable ? FAULT_FLAG_WRITE : 0);
1932 }
1933 
1934 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1935 				      unsigned long token)
1936 {
1937 	struct kvm_s390_interrupt inti;
1938 	struct kvm_s390_irq irq;
1939 
1940 	if (start_token) {
1941 		irq.u.ext.ext_params2 = token;
1942 		irq.type = KVM_S390_INT_PFAULT_INIT;
1943 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1944 	} else {
1945 		inti.type = KVM_S390_INT_PFAULT_DONE;
1946 		inti.parm64 = token;
1947 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1948 	}
1949 }
1950 
1951 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1952 				     struct kvm_async_pf *work)
1953 {
1954 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1955 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1956 }
1957 
1958 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1959 				 struct kvm_async_pf *work)
1960 {
1961 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1962 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1963 }
1964 
1965 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1966 			       struct kvm_async_pf *work)
1967 {
1968 	/* s390 will always inject the page directly */
1969 }
1970 
1971 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1972 {
1973 	/*
1974 	 * s390 will always inject the page directly,
1975 	 * but we still want check_async_completion to cleanup
1976 	 */
1977 	return true;
1978 }
1979 
1980 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1981 {
1982 	hva_t hva;
1983 	struct kvm_arch_async_pf arch;
1984 	int rc;
1985 
1986 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1987 		return 0;
1988 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1989 	    vcpu->arch.pfault_compare)
1990 		return 0;
1991 	if (psw_extint_disabled(vcpu))
1992 		return 0;
1993 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
1994 		return 0;
1995 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1996 		return 0;
1997 	if (!vcpu->arch.gmap->pfault_enabled)
1998 		return 0;
1999 
2000 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2001 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2002 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2003 		return 0;
2004 
2005 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2006 	return rc;
2007 }
2008 
2009 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2010 {
2011 	int rc, cpuflags;
2012 
2013 	/*
2014 	 * On s390 notifications for arriving pages will be delivered directly
2015 	 * to the guest but the house keeping for completed pfaults is
2016 	 * handled outside the worker.
2017 	 */
2018 	kvm_check_async_pf_completion(vcpu);
2019 
2020 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2021 
2022 	if (need_resched())
2023 		schedule();
2024 
2025 	if (test_cpu_flag(CIF_MCCK_PENDING))
2026 		s390_handle_mcck();
2027 
2028 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2029 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2030 		if (rc)
2031 			return rc;
2032 	}
2033 
2034 	rc = kvm_s390_handle_requests(vcpu);
2035 	if (rc)
2036 		return rc;
2037 
2038 	if (guestdbg_enabled(vcpu)) {
2039 		kvm_s390_backup_guest_per_regs(vcpu);
2040 		kvm_s390_patch_guest_per_regs(vcpu);
2041 	}
2042 
2043 	vcpu->arch.sie_block->icptcode = 0;
2044 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2045 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2046 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2047 
2048 	return 0;
2049 }
2050 
2051 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2052 {
2053 	psw_t *psw = &vcpu->arch.sie_block->gpsw;
2054 	u8 opcode;
2055 	int rc;
2056 
2057 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2058 	trace_kvm_s390_sie_fault(vcpu);
2059 
2060 	/*
2061 	 * We want to inject an addressing exception, which is defined as a
2062 	 * suppressing or terminating exception. However, since we came here
2063 	 * by a DAT access exception, the PSW still points to the faulting
2064 	 * instruction since DAT exceptions are nullifying. So we've got
2065 	 * to look up the current opcode to get the length of the instruction
2066 	 * to be able to forward the PSW.
2067 	 */
2068 	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2069 	if (rc)
2070 		return kvm_s390_inject_prog_cond(vcpu, rc);
2071 	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2072 
2073 	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2074 }
2075 
2076 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2077 {
2078 	int rc = -1;
2079 
2080 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2081 		   vcpu->arch.sie_block->icptcode);
2082 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2083 
2084 	if (guestdbg_enabled(vcpu))
2085 		kvm_s390_restore_guest_per_regs(vcpu);
2086 
2087 	if (exit_reason >= 0) {
2088 		rc = 0;
2089 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2090 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2091 		vcpu->run->s390_ucontrol.trans_exc_code =
2092 						current->thread.gmap_addr;
2093 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2094 		rc = -EREMOTE;
2095 
2096 	} else if (current->thread.gmap_pfault) {
2097 		trace_kvm_s390_major_guest_pfault(vcpu);
2098 		current->thread.gmap_pfault = 0;
2099 		if (kvm_arch_setup_async_pf(vcpu)) {
2100 			rc = 0;
2101 		} else {
2102 			gpa_t gpa = current->thread.gmap_addr;
2103 			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2104 		}
2105 	}
2106 
2107 	if (rc == -1)
2108 		rc = vcpu_post_run_fault_in_sie(vcpu);
2109 
2110 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2111 
2112 	if (rc == 0) {
2113 		if (kvm_is_ucontrol(vcpu->kvm))
2114 			/* Don't exit for host interrupts. */
2115 			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2116 		else
2117 			rc = kvm_handle_sie_intercept(vcpu);
2118 	}
2119 
2120 	return rc;
2121 }
2122 
2123 static int __vcpu_run(struct kvm_vcpu *vcpu)
2124 {
2125 	int rc, exit_reason;
2126 
2127 	/*
2128 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2129 	 * ning the guest), so that memslots (and other stuff) are protected
2130 	 */
2131 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2132 
2133 	do {
2134 		rc = vcpu_pre_run(vcpu);
2135 		if (rc)
2136 			break;
2137 
2138 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2139 		/*
2140 		 * As PF_VCPU will be used in fault handler, between
2141 		 * guest_enter and guest_exit should be no uaccess.
2142 		 */
2143 		local_irq_disable();
2144 		__kvm_guest_enter();
2145 		local_irq_enable();
2146 		exit_reason = sie64a(vcpu->arch.sie_block,
2147 				     vcpu->run->s.regs.gprs);
2148 		local_irq_disable();
2149 		__kvm_guest_exit();
2150 		local_irq_enable();
2151 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2152 
2153 		rc = vcpu_post_run(vcpu, exit_reason);
2154 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2155 
2156 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2157 	return rc;
2158 }
2159 
2160 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2161 {
2162 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2163 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2164 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2165 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2166 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2167 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2168 		/* some control register changes require a tlb flush */
2169 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2170 	}
2171 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2172 		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2173 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2174 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2175 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2176 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2177 	}
2178 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2179 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2180 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2181 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2182 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2183 			kvm_clear_async_pf_completion_queue(vcpu);
2184 	}
2185 	kvm_run->kvm_dirty_regs = 0;
2186 }
2187 
2188 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2189 {
2190 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2191 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2192 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2193 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2194 	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2195 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2196 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2197 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2198 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2199 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2200 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2201 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2202 }
2203 
2204 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2205 {
2206 	int rc;
2207 	sigset_t sigsaved;
2208 
2209 	if (guestdbg_exit_pending(vcpu)) {
2210 		kvm_s390_prepare_debug_exit(vcpu);
2211 		return 0;
2212 	}
2213 
2214 	if (vcpu->sigset_active)
2215 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2216 
2217 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2218 		kvm_s390_vcpu_start(vcpu);
2219 	} else if (is_vcpu_stopped(vcpu)) {
2220 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2221 				   vcpu->vcpu_id);
2222 		return -EINVAL;
2223 	}
2224 
2225 	sync_regs(vcpu, kvm_run);
2226 
2227 	might_fault();
2228 	rc = __vcpu_run(vcpu);
2229 
2230 	if (signal_pending(current) && !rc) {
2231 		kvm_run->exit_reason = KVM_EXIT_INTR;
2232 		rc = -EINTR;
2233 	}
2234 
2235 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2236 		kvm_s390_prepare_debug_exit(vcpu);
2237 		rc = 0;
2238 	}
2239 
2240 	if (rc == -EOPNOTSUPP) {
2241 		/* intercept cannot be handled in-kernel, prepare kvm-run */
2242 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2243 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2244 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2245 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2246 		rc = 0;
2247 	}
2248 
2249 	if (rc == -EREMOTE) {
2250 		/* intercept was handled, but userspace support is needed
2251 		 * kvm_run has been prepared by the handler */
2252 		rc = 0;
2253 	}
2254 
2255 	store_regs(vcpu, kvm_run);
2256 
2257 	if (vcpu->sigset_active)
2258 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2259 
2260 	vcpu->stat.exit_userspace++;
2261 	return rc;
2262 }
2263 
2264 /*
2265  * store status at address
2266  * we use have two special cases:
2267  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2268  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2269  */
2270 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2271 {
2272 	unsigned char archmode = 1;
2273 	unsigned int px;
2274 	u64 clkcomp;
2275 	int rc;
2276 
2277 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2278 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2279 			return -EFAULT;
2280 		gpa = SAVE_AREA_BASE;
2281 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2282 		if (write_guest_real(vcpu, 163, &archmode, 1))
2283 			return -EFAULT;
2284 		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2285 	}
2286 	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2287 			     vcpu->arch.guest_fpregs.fprs, 128);
2288 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2289 			      vcpu->run->s.regs.gprs, 128);
2290 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2291 			      &vcpu->arch.sie_block->gpsw, 16);
2292 	px = kvm_s390_get_prefix(vcpu);
2293 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2294 			      &px, 4);
2295 	rc |= write_guest_abs(vcpu,
2296 			      gpa + offsetof(struct save_area, fp_ctrl_reg),
2297 			      &vcpu->arch.guest_fpregs.fpc, 4);
2298 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2299 			      &vcpu->arch.sie_block->todpr, 4);
2300 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2301 			      &vcpu->arch.sie_block->cputm, 8);
2302 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2303 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2304 			      &clkcomp, 8);
2305 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2306 			      &vcpu->run->s.regs.acrs, 64);
2307 	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2308 			      &vcpu->arch.sie_block->gcr, 128);
2309 	return rc ? -EFAULT : 0;
2310 }
2311 
2312 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2313 {
2314 	/*
2315 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2316 	 * copying in vcpu load/put. Lets update our copies before we save
2317 	 * it into the save area
2318 	 */
2319 	save_fpu_regs();
2320 	if (test_kvm_facility(vcpu->kvm, 129)) {
2321 		/*
2322 		 * If the vector extension is available, the vector registers
2323 		 * which overlaps with floating-point registers are saved in
2324 		 * the SIE-control block.  Hence, extract the floating-point
2325 		 * registers and the FPC value and store them in the
2326 		 * guest_fpregs structure.
2327 		 */
2328 		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
2329 		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2330 		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2331 				 current->thread.fpu.vxrs);
2332 	} else
2333 		save_fpu_to(&vcpu->arch.guest_fpregs);
2334 	save_access_regs(vcpu->run->s.regs.acrs);
2335 
2336 	return kvm_s390_store_status_unloaded(vcpu, addr);
2337 }
2338 
2339 /*
2340  * store additional status at address
2341  */
2342 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2343 					unsigned long gpa)
2344 {
2345 	/* Only bits 0-53 are used for address formation */
2346 	if (!(gpa & ~0x3ff))
2347 		return 0;
2348 
2349 	return write_guest_abs(vcpu, gpa & ~0x3ff,
2350 			       (void *)&vcpu->run->s.regs.vrs, 512);
2351 }
2352 
2353 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2354 {
2355 	if (!test_kvm_facility(vcpu->kvm, 129))
2356 		return 0;
2357 
2358 	/*
2359 	 * The guest VXRS are in the host VXRs due to the lazy
2360 	 * copying in vcpu load/put. We can simply call save_fpu_regs()
2361 	 * to save the current register state because we are in the
2362 	 * middle of a load/put cycle.
2363 	 *
2364 	 * Let's update our copies before we save it into the save area.
2365 	 */
2366 	save_fpu_regs();
2367 
2368 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2369 }
2370 
2371 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2372 {
2373 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2374 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2375 }
2376 
2377 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2378 {
2379 	unsigned int i;
2380 	struct kvm_vcpu *vcpu;
2381 
2382 	kvm_for_each_vcpu(i, vcpu, kvm) {
2383 		__disable_ibs_on_vcpu(vcpu);
2384 	}
2385 }
2386 
2387 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2388 {
2389 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2390 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2391 }
2392 
2393 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2394 {
2395 	int i, online_vcpus, started_vcpus = 0;
2396 
2397 	if (!is_vcpu_stopped(vcpu))
2398 		return;
2399 
2400 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2401 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2402 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2403 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2404 
2405 	for (i = 0; i < online_vcpus; i++) {
2406 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2407 			started_vcpus++;
2408 	}
2409 
2410 	if (started_vcpus == 0) {
2411 		/* we're the only active VCPU -> speed it up */
2412 		__enable_ibs_on_vcpu(vcpu);
2413 	} else if (started_vcpus == 1) {
2414 		/*
2415 		 * As we are starting a second VCPU, we have to disable
2416 		 * the IBS facility on all VCPUs to remove potentially
2417 		 * oustanding ENABLE requests.
2418 		 */
2419 		__disable_ibs_on_all_vcpus(vcpu->kvm);
2420 	}
2421 
2422 	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2423 	/*
2424 	 * Another VCPU might have used IBS while we were offline.
2425 	 * Let's play safe and flush the VCPU at startup.
2426 	 */
2427 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2428 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2429 	return;
2430 }
2431 
2432 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2433 {
2434 	int i, online_vcpus, started_vcpus = 0;
2435 	struct kvm_vcpu *started_vcpu = NULL;
2436 
2437 	if (is_vcpu_stopped(vcpu))
2438 		return;
2439 
2440 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2441 	/* Only one cpu at a time may enter/leave the STOPPED state. */
2442 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2443 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2444 
2445 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2446 	kvm_s390_clear_stop_irq(vcpu);
2447 
2448 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2449 	__disable_ibs_on_vcpu(vcpu);
2450 
2451 	for (i = 0; i < online_vcpus; i++) {
2452 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2453 			started_vcpus++;
2454 			started_vcpu = vcpu->kvm->vcpus[i];
2455 		}
2456 	}
2457 
2458 	if (started_vcpus == 1) {
2459 		/*
2460 		 * As we only have one VCPU left, we want to enable the
2461 		 * IBS facility for that VCPU to speed it up.
2462 		 */
2463 		__enable_ibs_on_vcpu(started_vcpu);
2464 	}
2465 
2466 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2467 	return;
2468 }
2469 
2470 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2471 				     struct kvm_enable_cap *cap)
2472 {
2473 	int r;
2474 
2475 	if (cap->flags)
2476 		return -EINVAL;
2477 
2478 	switch (cap->cap) {
2479 	case KVM_CAP_S390_CSS_SUPPORT:
2480 		if (!vcpu->kvm->arch.css_support) {
2481 			vcpu->kvm->arch.css_support = 1;
2482 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2483 			trace_kvm_s390_enable_css(vcpu->kvm);
2484 		}
2485 		r = 0;
2486 		break;
2487 	default:
2488 		r = -EINVAL;
2489 		break;
2490 	}
2491 	return r;
2492 }
2493 
2494 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2495 				  struct kvm_s390_mem_op *mop)
2496 {
2497 	void __user *uaddr = (void __user *)mop->buf;
2498 	void *tmpbuf = NULL;
2499 	int r, srcu_idx;
2500 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2501 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2502 
2503 	if (mop->flags & ~supported_flags)
2504 		return -EINVAL;
2505 
2506 	if (mop->size > MEM_OP_MAX_SIZE)
2507 		return -E2BIG;
2508 
2509 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2510 		tmpbuf = vmalloc(mop->size);
2511 		if (!tmpbuf)
2512 			return -ENOMEM;
2513 	}
2514 
2515 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2516 
2517 	switch (mop->op) {
2518 	case KVM_S390_MEMOP_LOGICAL_READ:
2519 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2520 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2521 			break;
2522 		}
2523 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2524 		if (r == 0) {
2525 			if (copy_to_user(uaddr, tmpbuf, mop->size))
2526 				r = -EFAULT;
2527 		}
2528 		break;
2529 	case KVM_S390_MEMOP_LOGICAL_WRITE:
2530 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2531 			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2532 			break;
2533 		}
2534 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2535 			r = -EFAULT;
2536 			break;
2537 		}
2538 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2539 		break;
2540 	default:
2541 		r = -EINVAL;
2542 	}
2543 
2544 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2545 
2546 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2547 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2548 
2549 	vfree(tmpbuf);
2550 	return r;
2551 }
2552 
2553 long kvm_arch_vcpu_ioctl(struct file *filp,
2554 			 unsigned int ioctl, unsigned long arg)
2555 {
2556 	struct kvm_vcpu *vcpu = filp->private_data;
2557 	void __user *argp = (void __user *)arg;
2558 	int idx;
2559 	long r;
2560 
2561 	switch (ioctl) {
2562 	case KVM_S390_IRQ: {
2563 		struct kvm_s390_irq s390irq;
2564 
2565 		r = -EFAULT;
2566 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2567 			break;
2568 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2569 		break;
2570 	}
2571 	case KVM_S390_INTERRUPT: {
2572 		struct kvm_s390_interrupt s390int;
2573 		struct kvm_s390_irq s390irq;
2574 
2575 		r = -EFAULT;
2576 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2577 			break;
2578 		if (s390int_to_s390irq(&s390int, &s390irq))
2579 			return -EINVAL;
2580 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2581 		break;
2582 	}
2583 	case KVM_S390_STORE_STATUS:
2584 		idx = srcu_read_lock(&vcpu->kvm->srcu);
2585 		r = kvm_s390_vcpu_store_status(vcpu, arg);
2586 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2587 		break;
2588 	case KVM_S390_SET_INITIAL_PSW: {
2589 		psw_t psw;
2590 
2591 		r = -EFAULT;
2592 		if (copy_from_user(&psw, argp, sizeof(psw)))
2593 			break;
2594 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2595 		break;
2596 	}
2597 	case KVM_S390_INITIAL_RESET:
2598 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2599 		break;
2600 	case KVM_SET_ONE_REG:
2601 	case KVM_GET_ONE_REG: {
2602 		struct kvm_one_reg reg;
2603 		r = -EFAULT;
2604 		if (copy_from_user(&reg, argp, sizeof(reg)))
2605 			break;
2606 		if (ioctl == KVM_SET_ONE_REG)
2607 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2608 		else
2609 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2610 		break;
2611 	}
2612 #ifdef CONFIG_KVM_S390_UCONTROL
2613 	case KVM_S390_UCAS_MAP: {
2614 		struct kvm_s390_ucas_mapping ucasmap;
2615 
2616 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2617 			r = -EFAULT;
2618 			break;
2619 		}
2620 
2621 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2622 			r = -EINVAL;
2623 			break;
2624 		}
2625 
2626 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2627 				     ucasmap.vcpu_addr, ucasmap.length);
2628 		break;
2629 	}
2630 	case KVM_S390_UCAS_UNMAP: {
2631 		struct kvm_s390_ucas_mapping ucasmap;
2632 
2633 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2634 			r = -EFAULT;
2635 			break;
2636 		}
2637 
2638 		if (!kvm_is_ucontrol(vcpu->kvm)) {
2639 			r = -EINVAL;
2640 			break;
2641 		}
2642 
2643 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2644 			ucasmap.length);
2645 		break;
2646 	}
2647 #endif
2648 	case KVM_S390_VCPU_FAULT: {
2649 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2650 		break;
2651 	}
2652 	case KVM_ENABLE_CAP:
2653 	{
2654 		struct kvm_enable_cap cap;
2655 		r = -EFAULT;
2656 		if (copy_from_user(&cap, argp, sizeof(cap)))
2657 			break;
2658 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2659 		break;
2660 	}
2661 	case KVM_S390_MEM_OP: {
2662 		struct kvm_s390_mem_op mem_op;
2663 
2664 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2665 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2666 		else
2667 			r = -EFAULT;
2668 		break;
2669 	}
2670 	case KVM_S390_SET_IRQ_STATE: {
2671 		struct kvm_s390_irq_state irq_state;
2672 
2673 		r = -EFAULT;
2674 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2675 			break;
2676 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2677 		    irq_state.len == 0 ||
2678 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2679 			r = -EINVAL;
2680 			break;
2681 		}
2682 		r = kvm_s390_set_irq_state(vcpu,
2683 					   (void __user *) irq_state.buf,
2684 					   irq_state.len);
2685 		break;
2686 	}
2687 	case KVM_S390_GET_IRQ_STATE: {
2688 		struct kvm_s390_irq_state irq_state;
2689 
2690 		r = -EFAULT;
2691 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2692 			break;
2693 		if (irq_state.len == 0) {
2694 			r = -EINVAL;
2695 			break;
2696 		}
2697 		r = kvm_s390_get_irq_state(vcpu,
2698 					   (__u8 __user *)  irq_state.buf,
2699 					   irq_state.len);
2700 		break;
2701 	}
2702 	default:
2703 		r = -ENOTTY;
2704 	}
2705 	return r;
2706 }
2707 
2708 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2709 {
2710 #ifdef CONFIG_KVM_S390_UCONTROL
2711 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2712 		 && (kvm_is_ucontrol(vcpu->kvm))) {
2713 		vmf->page = virt_to_page(vcpu->arch.sie_block);
2714 		get_page(vmf->page);
2715 		return 0;
2716 	}
2717 #endif
2718 	return VM_FAULT_SIGBUS;
2719 }
2720 
2721 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2722 			    unsigned long npages)
2723 {
2724 	return 0;
2725 }
2726 
2727 /* Section: memory related */
2728 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2729 				   struct kvm_memory_slot *memslot,
2730 				   const struct kvm_userspace_memory_region *mem,
2731 				   enum kvm_mr_change change)
2732 {
2733 	/* A few sanity checks. We can have memory slots which have to be
2734 	   located/ended at a segment boundary (1MB). The memory in userland is
2735 	   ok to be fragmented into various different vmas. It is okay to mmap()
2736 	   and munmap() stuff in this slot after doing this call at any time */
2737 
2738 	if (mem->userspace_addr & 0xffffful)
2739 		return -EINVAL;
2740 
2741 	if (mem->memory_size & 0xffffful)
2742 		return -EINVAL;
2743 
2744 	return 0;
2745 }
2746 
2747 void kvm_arch_commit_memory_region(struct kvm *kvm,
2748 				const struct kvm_userspace_memory_region *mem,
2749 				const struct kvm_memory_slot *old,
2750 				const struct kvm_memory_slot *new,
2751 				enum kvm_mr_change change)
2752 {
2753 	int rc;
2754 
2755 	/* If the basics of the memslot do not change, we do not want
2756 	 * to update the gmap. Every update causes several unnecessary
2757 	 * segment translation exceptions. This is usually handled just
2758 	 * fine by the normal fault handler + gmap, but it will also
2759 	 * cause faults on the prefix page of running guest CPUs.
2760 	 */
2761 	if (old->userspace_addr == mem->userspace_addr &&
2762 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2763 	    old->npages * PAGE_SIZE == mem->memory_size)
2764 		return;
2765 
2766 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2767 		mem->guest_phys_addr, mem->memory_size);
2768 	if (rc)
2769 		pr_warn("failed to commit memory region\n");
2770 	return;
2771 }
2772 
2773 static int __init kvm_s390_init(void)
2774 {
2775 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2776 }
2777 
2778 static void __exit kvm_s390_exit(void)
2779 {
2780 	kvm_exit();
2781 }
2782 
2783 module_init(kvm_s390_init);
2784 module_exit(kvm_s390_exit);
2785 
2786 /*
2787  * Enable autoloading of the kvm module.
2788  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2789  * since x86 takes a different approach.
2790  */
2791 #include <linux/miscdevice.h>
2792 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2793 MODULE_ALIAS("devname:kvm");
2794