1 /* 2 * hosting zSeries kernel virtual machines 3 * 4 * Copyright IBM Corp. 2008, 2009 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License (version 2 only) 8 * as published by the Free Software Foundation. 9 * 10 * Author(s): Carsten Otte <cotte@de.ibm.com> 11 * Christian Borntraeger <borntraeger@de.ibm.com> 12 * Heiko Carstens <heiko.carstens@de.ibm.com> 13 * Christian Ehrhardt <ehrhardt@de.ibm.com> 14 * Jason J. Herne <jjherne@us.ibm.com> 15 */ 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 35 #include <asm/asm-offsets.h> 36 #include <asm/lowcore.h> 37 #include <asm/stp.h> 38 #include <asm/pgtable.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include "kvm-s390.h" 47 #include "gaccess.h" 48 49 #define KMSG_COMPONENT "kvm-s390" 50 #undef pr_fmt 51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 52 53 #define CREATE_TRACE_POINTS 54 #include "trace.h" 55 #include "trace-s390.h" 56 57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 58 #define LOCAL_IRQS 32 59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 60 (KVM_MAX_VCPUS + LOCAL_IRQS)) 61 62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 63 64 struct kvm_stats_debugfs_item debugfs_entries[] = { 65 { "userspace_handled", VCPU_STAT(exit_userspace) }, 66 { "exit_null", VCPU_STAT(exit_null) }, 67 { "exit_validity", VCPU_STAT(exit_validity) }, 68 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 69 { "exit_external_request", VCPU_STAT(exit_external_request) }, 70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 71 { "exit_instruction", VCPU_STAT(exit_instruction) }, 72 { "exit_pei", VCPU_STAT(exit_pei) }, 73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, 76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, 79 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 81 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 82 { "instruction_stctl", VCPU_STAT(instruction_stctl) }, 83 { "instruction_stctg", VCPU_STAT(instruction_stctg) }, 84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 85 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 92 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 94 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 95 { "instruction_spx", VCPU_STAT(instruction_spx) }, 96 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 97 { "instruction_stap", VCPU_STAT(instruction_stap) }, 98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, 100 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 101 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 102 { "instruction_essa", VCPU_STAT(instruction_essa) }, 103 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 104 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 105 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, 107 { "instruction_sie", VCPU_STAT(instruction_sie) }, 108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, 113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, 114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, 116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, 117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, 118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, 122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, 123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, 124 { "diagnose_10", VCPU_STAT(diagnose_10) }, 125 { "diagnose_44", VCPU_STAT(diagnose_44) }, 126 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 127 { "diagnose_258", VCPU_STAT(diagnose_258) }, 128 { "diagnose_308", VCPU_STAT(diagnose_308) }, 129 { "diagnose_500", VCPU_STAT(diagnose_500) }, 130 { NULL } 131 }; 132 133 struct kvm_s390_tod_clock_ext { 134 __u8 epoch_idx; 135 __u64 tod; 136 __u8 reserved[7]; 137 } __packed; 138 139 /* allow nested virtualization in KVM (if enabled by user space) */ 140 static int nested; 141 module_param(nested, int, S_IRUGO); 142 MODULE_PARM_DESC(nested, "Nested virtualization support"); 143 144 /* upper facilities limit for kvm */ 145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM }; 146 147 unsigned long kvm_s390_fac_list_mask_size(void) 148 { 149 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64); 150 return ARRAY_SIZE(kvm_s390_fac_list_mask); 151 } 152 153 /* available cpu features supported by kvm */ 154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 155 /* available subfunctions indicated via query / "test bit" */ 156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 157 158 static struct gmap_notifier gmap_notifier; 159 static struct gmap_notifier vsie_gmap_notifier; 160 debug_info_t *kvm_s390_dbf; 161 162 /* Section: not file related */ 163 int kvm_arch_hardware_enable(void) 164 { 165 /* every s390 is virtualization enabled ;-) */ 166 return 0; 167 } 168 169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 170 unsigned long end); 171 172 /* 173 * This callback is executed during stop_machine(). All CPUs are therefore 174 * temporarily stopped. In order not to change guest behavior, we have to 175 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 176 * so a CPU won't be stopped while calculating with the epoch. 177 */ 178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 179 void *v) 180 { 181 struct kvm *kvm; 182 struct kvm_vcpu *vcpu; 183 int i; 184 unsigned long long *delta = v; 185 186 list_for_each_entry(kvm, &vm_list, vm_list) { 187 kvm->arch.epoch -= *delta; 188 kvm_for_each_vcpu(i, vcpu, kvm) { 189 vcpu->arch.sie_block->epoch -= *delta; 190 if (vcpu->arch.cputm_enabled) 191 vcpu->arch.cputm_start += *delta; 192 if (vcpu->arch.vsie_block) 193 vcpu->arch.vsie_block->epoch -= *delta; 194 } 195 } 196 return NOTIFY_OK; 197 } 198 199 static struct notifier_block kvm_clock_notifier = { 200 .notifier_call = kvm_clock_sync, 201 }; 202 203 int kvm_arch_hardware_setup(void) 204 { 205 gmap_notifier.notifier_call = kvm_gmap_notifier; 206 gmap_register_pte_notifier(&gmap_notifier); 207 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 208 gmap_register_pte_notifier(&vsie_gmap_notifier); 209 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 210 &kvm_clock_notifier); 211 return 0; 212 } 213 214 void kvm_arch_hardware_unsetup(void) 215 { 216 gmap_unregister_pte_notifier(&gmap_notifier); 217 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 218 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 219 &kvm_clock_notifier); 220 } 221 222 static void allow_cpu_feat(unsigned long nr) 223 { 224 set_bit_inv(nr, kvm_s390_available_cpu_feat); 225 } 226 227 static inline int plo_test_bit(unsigned char nr) 228 { 229 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 230 int cc; 231 232 asm volatile( 233 /* Parameter registers are ignored for "test bit" */ 234 " plo 0,0,0,0(0)\n" 235 " ipm %0\n" 236 " srl %0,28\n" 237 : "=d" (cc) 238 : "d" (r0) 239 : "cc"); 240 return cc == 0; 241 } 242 243 static void kvm_s390_cpu_feat_init(void) 244 { 245 int i; 246 247 for (i = 0; i < 256; ++i) { 248 if (plo_test_bit(i)) 249 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 250 } 251 252 if (test_facility(28)) /* TOD-clock steering */ 253 ptff(kvm_s390_available_subfunc.ptff, 254 sizeof(kvm_s390_available_subfunc.ptff), 255 PTFF_QAF); 256 257 if (test_facility(17)) { /* MSA */ 258 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 259 kvm_s390_available_subfunc.kmac); 260 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 261 kvm_s390_available_subfunc.kmc); 262 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 263 kvm_s390_available_subfunc.km); 264 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 265 kvm_s390_available_subfunc.kimd); 266 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 267 kvm_s390_available_subfunc.klmd); 268 } 269 if (test_facility(76)) /* MSA3 */ 270 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 271 kvm_s390_available_subfunc.pckmo); 272 if (test_facility(77)) { /* MSA4 */ 273 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 274 kvm_s390_available_subfunc.kmctr); 275 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 276 kvm_s390_available_subfunc.kmf); 277 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 278 kvm_s390_available_subfunc.kmo); 279 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 280 kvm_s390_available_subfunc.pcc); 281 } 282 if (test_facility(57)) /* MSA5 */ 283 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 284 kvm_s390_available_subfunc.ppno); 285 286 if (test_facility(146)) /* MSA8 */ 287 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 288 kvm_s390_available_subfunc.kma); 289 290 if (MACHINE_HAS_ESOP) 291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 292 /* 293 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 294 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 295 */ 296 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 297 !test_facility(3) || !nested) 298 return; 299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 300 if (sclp.has_64bscao) 301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 302 if (sclp.has_siif) 303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 304 if (sclp.has_gpere) 305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 306 if (sclp.has_gsls) 307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 308 if (sclp.has_ib) 309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 310 if (sclp.has_cei) 311 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 312 if (sclp.has_ibs) 313 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 314 if (sclp.has_kss) 315 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 316 /* 317 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 318 * all skey handling functions read/set the skey from the PGSTE 319 * instead of the real storage key. 320 * 321 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 322 * pages being detected as preserved although they are resident. 323 * 324 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 325 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 326 * 327 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 328 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 329 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 330 * 331 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 332 * cannot easily shadow the SCA because of the ipte lock. 333 */ 334 } 335 336 int kvm_arch_init(void *opaque) 337 { 338 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 339 if (!kvm_s390_dbf) 340 return -ENOMEM; 341 342 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { 343 debug_unregister(kvm_s390_dbf); 344 return -ENOMEM; 345 } 346 347 kvm_s390_cpu_feat_init(); 348 349 /* Register floating interrupt controller interface. */ 350 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 351 } 352 353 void kvm_arch_exit(void) 354 { 355 debug_unregister(kvm_s390_dbf); 356 } 357 358 /* Section: device related */ 359 long kvm_arch_dev_ioctl(struct file *filp, 360 unsigned int ioctl, unsigned long arg) 361 { 362 if (ioctl == KVM_S390_ENABLE_SIE) 363 return s390_enable_sie(); 364 return -EINVAL; 365 } 366 367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 368 { 369 int r; 370 371 switch (ext) { 372 case KVM_CAP_S390_PSW: 373 case KVM_CAP_S390_GMAP: 374 case KVM_CAP_SYNC_MMU: 375 #ifdef CONFIG_KVM_S390_UCONTROL 376 case KVM_CAP_S390_UCONTROL: 377 #endif 378 case KVM_CAP_ASYNC_PF: 379 case KVM_CAP_SYNC_REGS: 380 case KVM_CAP_ONE_REG: 381 case KVM_CAP_ENABLE_CAP: 382 case KVM_CAP_S390_CSS_SUPPORT: 383 case KVM_CAP_IOEVENTFD: 384 case KVM_CAP_DEVICE_CTRL: 385 case KVM_CAP_ENABLE_CAP_VM: 386 case KVM_CAP_S390_IRQCHIP: 387 case KVM_CAP_VM_ATTRIBUTES: 388 case KVM_CAP_MP_STATE: 389 case KVM_CAP_IMMEDIATE_EXIT: 390 case KVM_CAP_S390_INJECT_IRQ: 391 case KVM_CAP_S390_USER_SIGP: 392 case KVM_CAP_S390_USER_STSI: 393 case KVM_CAP_S390_SKEYS: 394 case KVM_CAP_S390_IRQ_STATE: 395 case KVM_CAP_S390_USER_INSTR0: 396 case KVM_CAP_S390_CMMA_MIGRATION: 397 case KVM_CAP_S390_AIS: 398 r = 1; 399 break; 400 case KVM_CAP_S390_MEM_OP: 401 r = MEM_OP_MAX_SIZE; 402 break; 403 case KVM_CAP_NR_VCPUS: 404 case KVM_CAP_MAX_VCPUS: 405 r = KVM_S390_BSCA_CPU_SLOTS; 406 if (!kvm_s390_use_sca_entries()) 407 r = KVM_MAX_VCPUS; 408 else if (sclp.has_esca && sclp.has_64bscao) 409 r = KVM_S390_ESCA_CPU_SLOTS; 410 break; 411 case KVM_CAP_NR_MEMSLOTS: 412 r = KVM_USER_MEM_SLOTS; 413 break; 414 case KVM_CAP_S390_COW: 415 r = MACHINE_HAS_ESOP; 416 break; 417 case KVM_CAP_S390_VECTOR_REGISTERS: 418 r = MACHINE_HAS_VX; 419 break; 420 case KVM_CAP_S390_RI: 421 r = test_facility(64); 422 break; 423 case KVM_CAP_S390_GS: 424 r = test_facility(133); 425 break; 426 default: 427 r = 0; 428 } 429 return r; 430 } 431 432 static void kvm_s390_sync_dirty_log(struct kvm *kvm, 433 struct kvm_memory_slot *memslot) 434 { 435 gfn_t cur_gfn, last_gfn; 436 unsigned long address; 437 struct gmap *gmap = kvm->arch.gmap; 438 439 /* Loop over all guest pages */ 440 last_gfn = memslot->base_gfn + memslot->npages; 441 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { 442 address = gfn_to_hva_memslot(memslot, cur_gfn); 443 444 if (test_and_clear_guest_dirty(gmap->mm, address)) 445 mark_page_dirty(kvm, cur_gfn); 446 if (fatal_signal_pending(current)) 447 return; 448 cond_resched(); 449 } 450 } 451 452 /* Section: vm related */ 453 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 454 455 /* 456 * Get (and clear) the dirty memory log for a memory slot. 457 */ 458 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 459 struct kvm_dirty_log *log) 460 { 461 int r; 462 unsigned long n; 463 struct kvm_memslots *slots; 464 struct kvm_memory_slot *memslot; 465 int is_dirty = 0; 466 467 if (kvm_is_ucontrol(kvm)) 468 return -EINVAL; 469 470 mutex_lock(&kvm->slots_lock); 471 472 r = -EINVAL; 473 if (log->slot >= KVM_USER_MEM_SLOTS) 474 goto out; 475 476 slots = kvm_memslots(kvm); 477 memslot = id_to_memslot(slots, log->slot); 478 r = -ENOENT; 479 if (!memslot->dirty_bitmap) 480 goto out; 481 482 kvm_s390_sync_dirty_log(kvm, memslot); 483 r = kvm_get_dirty_log(kvm, log, &is_dirty); 484 if (r) 485 goto out; 486 487 /* Clear the dirty log */ 488 if (is_dirty) { 489 n = kvm_dirty_bitmap_bytes(memslot); 490 memset(memslot->dirty_bitmap, 0, n); 491 } 492 r = 0; 493 out: 494 mutex_unlock(&kvm->slots_lock); 495 return r; 496 } 497 498 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 499 { 500 unsigned int i; 501 struct kvm_vcpu *vcpu; 502 503 kvm_for_each_vcpu(i, vcpu, kvm) { 504 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 505 } 506 } 507 508 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 509 { 510 int r; 511 512 if (cap->flags) 513 return -EINVAL; 514 515 switch (cap->cap) { 516 case KVM_CAP_S390_IRQCHIP: 517 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 518 kvm->arch.use_irqchip = 1; 519 r = 0; 520 break; 521 case KVM_CAP_S390_USER_SIGP: 522 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 523 kvm->arch.user_sigp = 1; 524 r = 0; 525 break; 526 case KVM_CAP_S390_VECTOR_REGISTERS: 527 mutex_lock(&kvm->lock); 528 if (kvm->created_vcpus) { 529 r = -EBUSY; 530 } else if (MACHINE_HAS_VX) { 531 set_kvm_facility(kvm->arch.model.fac_mask, 129); 532 set_kvm_facility(kvm->arch.model.fac_list, 129); 533 if (test_facility(134)) { 534 set_kvm_facility(kvm->arch.model.fac_mask, 134); 535 set_kvm_facility(kvm->arch.model.fac_list, 134); 536 } 537 if (test_facility(135)) { 538 set_kvm_facility(kvm->arch.model.fac_mask, 135); 539 set_kvm_facility(kvm->arch.model.fac_list, 135); 540 } 541 r = 0; 542 } else 543 r = -EINVAL; 544 mutex_unlock(&kvm->lock); 545 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 546 r ? "(not available)" : "(success)"); 547 break; 548 case KVM_CAP_S390_RI: 549 r = -EINVAL; 550 mutex_lock(&kvm->lock); 551 if (kvm->created_vcpus) { 552 r = -EBUSY; 553 } else if (test_facility(64)) { 554 set_kvm_facility(kvm->arch.model.fac_mask, 64); 555 set_kvm_facility(kvm->arch.model.fac_list, 64); 556 r = 0; 557 } 558 mutex_unlock(&kvm->lock); 559 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 560 r ? "(not available)" : "(success)"); 561 break; 562 case KVM_CAP_S390_AIS: 563 mutex_lock(&kvm->lock); 564 if (kvm->created_vcpus) { 565 r = -EBUSY; 566 } else { 567 set_kvm_facility(kvm->arch.model.fac_mask, 72); 568 set_kvm_facility(kvm->arch.model.fac_list, 72); 569 r = 0; 570 } 571 mutex_unlock(&kvm->lock); 572 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 573 r ? "(not available)" : "(success)"); 574 break; 575 case KVM_CAP_S390_GS: 576 r = -EINVAL; 577 mutex_lock(&kvm->lock); 578 if (atomic_read(&kvm->online_vcpus)) { 579 r = -EBUSY; 580 } else if (test_facility(133)) { 581 set_kvm_facility(kvm->arch.model.fac_mask, 133); 582 set_kvm_facility(kvm->arch.model.fac_list, 133); 583 r = 0; 584 } 585 mutex_unlock(&kvm->lock); 586 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 587 r ? "(not available)" : "(success)"); 588 break; 589 case KVM_CAP_S390_USER_STSI: 590 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 591 kvm->arch.user_stsi = 1; 592 r = 0; 593 break; 594 case KVM_CAP_S390_USER_INSTR0: 595 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 596 kvm->arch.user_instr0 = 1; 597 icpt_operexc_on_all_vcpus(kvm); 598 r = 0; 599 break; 600 default: 601 r = -EINVAL; 602 break; 603 } 604 return r; 605 } 606 607 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 608 { 609 int ret; 610 611 switch (attr->attr) { 612 case KVM_S390_VM_MEM_LIMIT_SIZE: 613 ret = 0; 614 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 615 kvm->arch.mem_limit); 616 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 617 ret = -EFAULT; 618 break; 619 default: 620 ret = -ENXIO; 621 break; 622 } 623 return ret; 624 } 625 626 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 627 { 628 int ret; 629 unsigned int idx; 630 switch (attr->attr) { 631 case KVM_S390_VM_MEM_ENABLE_CMMA: 632 ret = -ENXIO; 633 if (!sclp.has_cmma) 634 break; 635 636 ret = -EBUSY; 637 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 638 mutex_lock(&kvm->lock); 639 if (!kvm->created_vcpus) { 640 kvm->arch.use_cmma = 1; 641 ret = 0; 642 } 643 mutex_unlock(&kvm->lock); 644 break; 645 case KVM_S390_VM_MEM_CLR_CMMA: 646 ret = -ENXIO; 647 if (!sclp.has_cmma) 648 break; 649 ret = -EINVAL; 650 if (!kvm->arch.use_cmma) 651 break; 652 653 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 654 mutex_lock(&kvm->lock); 655 idx = srcu_read_lock(&kvm->srcu); 656 s390_reset_cmma(kvm->arch.gmap->mm); 657 srcu_read_unlock(&kvm->srcu, idx); 658 mutex_unlock(&kvm->lock); 659 ret = 0; 660 break; 661 case KVM_S390_VM_MEM_LIMIT_SIZE: { 662 unsigned long new_limit; 663 664 if (kvm_is_ucontrol(kvm)) 665 return -EINVAL; 666 667 if (get_user(new_limit, (u64 __user *)attr->addr)) 668 return -EFAULT; 669 670 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 671 new_limit > kvm->arch.mem_limit) 672 return -E2BIG; 673 674 if (!new_limit) 675 return -EINVAL; 676 677 /* gmap_create takes last usable address */ 678 if (new_limit != KVM_S390_NO_MEM_LIMIT) 679 new_limit -= 1; 680 681 ret = -EBUSY; 682 mutex_lock(&kvm->lock); 683 if (!kvm->created_vcpus) { 684 /* gmap_create will round the limit up */ 685 struct gmap *new = gmap_create(current->mm, new_limit); 686 687 if (!new) { 688 ret = -ENOMEM; 689 } else { 690 gmap_remove(kvm->arch.gmap); 691 new->private = kvm; 692 kvm->arch.gmap = new; 693 ret = 0; 694 } 695 } 696 mutex_unlock(&kvm->lock); 697 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 698 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 699 (void *) kvm->arch.gmap->asce); 700 break; 701 } 702 default: 703 ret = -ENXIO; 704 break; 705 } 706 return ret; 707 } 708 709 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 710 711 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 712 { 713 struct kvm_vcpu *vcpu; 714 int i; 715 716 if (!test_kvm_facility(kvm, 76)) 717 return -EINVAL; 718 719 mutex_lock(&kvm->lock); 720 switch (attr->attr) { 721 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 722 get_random_bytes( 723 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 724 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 725 kvm->arch.crypto.aes_kw = 1; 726 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 727 break; 728 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 729 get_random_bytes( 730 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 731 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 732 kvm->arch.crypto.dea_kw = 1; 733 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 734 break; 735 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 736 kvm->arch.crypto.aes_kw = 0; 737 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 738 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 739 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 740 break; 741 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 742 kvm->arch.crypto.dea_kw = 0; 743 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 744 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 745 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 746 break; 747 default: 748 mutex_unlock(&kvm->lock); 749 return -ENXIO; 750 } 751 752 kvm_for_each_vcpu(i, vcpu, kvm) { 753 kvm_s390_vcpu_crypto_setup(vcpu); 754 exit_sie(vcpu); 755 } 756 mutex_unlock(&kvm->lock); 757 return 0; 758 } 759 760 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 761 { 762 int cx; 763 struct kvm_vcpu *vcpu; 764 765 kvm_for_each_vcpu(cx, vcpu, kvm) 766 kvm_s390_sync_request(req, vcpu); 767 } 768 769 /* 770 * Must be called with kvm->srcu held to avoid races on memslots, and with 771 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 772 */ 773 static int kvm_s390_vm_start_migration(struct kvm *kvm) 774 { 775 struct kvm_s390_migration_state *mgs; 776 struct kvm_memory_slot *ms; 777 /* should be the only one */ 778 struct kvm_memslots *slots; 779 unsigned long ram_pages; 780 int slotnr; 781 782 /* migration mode already enabled */ 783 if (kvm->arch.migration_state) 784 return 0; 785 786 slots = kvm_memslots(kvm); 787 if (!slots || !slots->used_slots) 788 return -EINVAL; 789 790 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); 791 if (!mgs) 792 return -ENOMEM; 793 kvm->arch.migration_state = mgs; 794 795 if (kvm->arch.use_cmma) { 796 /* 797 * Get the last slot. They should be sorted by base_gfn, so the 798 * last slot is also the one at the end of the address space. 799 * We have verified above that at least one slot is present. 800 */ 801 ms = slots->memslots + slots->used_slots - 1; 802 /* round up so we only use full longs */ 803 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); 804 /* allocate enough bytes to store all the bits */ 805 mgs->pgste_bitmap = vmalloc(ram_pages / 8); 806 if (!mgs->pgste_bitmap) { 807 kfree(mgs); 808 kvm->arch.migration_state = NULL; 809 return -ENOMEM; 810 } 811 812 mgs->bitmap_size = ram_pages; 813 atomic64_set(&mgs->dirty_pages, ram_pages); 814 /* mark all the pages in active slots as dirty */ 815 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 816 ms = slots->memslots + slotnr; 817 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); 818 } 819 820 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 821 } 822 return 0; 823 } 824 825 /* 826 * Must be called with kvm->lock to avoid races with ourselves and 827 * kvm_s390_vm_start_migration. 828 */ 829 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 830 { 831 struct kvm_s390_migration_state *mgs; 832 833 /* migration mode already disabled */ 834 if (!kvm->arch.migration_state) 835 return 0; 836 mgs = kvm->arch.migration_state; 837 kvm->arch.migration_state = NULL; 838 839 if (kvm->arch.use_cmma) { 840 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 841 vfree(mgs->pgste_bitmap); 842 } 843 kfree(mgs); 844 return 0; 845 } 846 847 static int kvm_s390_vm_set_migration(struct kvm *kvm, 848 struct kvm_device_attr *attr) 849 { 850 int idx, res = -ENXIO; 851 852 mutex_lock(&kvm->lock); 853 switch (attr->attr) { 854 case KVM_S390_VM_MIGRATION_START: 855 idx = srcu_read_lock(&kvm->srcu); 856 res = kvm_s390_vm_start_migration(kvm); 857 srcu_read_unlock(&kvm->srcu, idx); 858 break; 859 case KVM_S390_VM_MIGRATION_STOP: 860 res = kvm_s390_vm_stop_migration(kvm); 861 break; 862 default: 863 break; 864 } 865 mutex_unlock(&kvm->lock); 866 867 return res; 868 } 869 870 static int kvm_s390_vm_get_migration(struct kvm *kvm, 871 struct kvm_device_attr *attr) 872 { 873 u64 mig = (kvm->arch.migration_state != NULL); 874 875 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 876 return -ENXIO; 877 878 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 879 return -EFAULT; 880 return 0; 881 } 882 883 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 884 { 885 struct kvm_s390_vm_tod_clock gtod; 886 887 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 888 return -EFAULT; 889 890 if (test_kvm_facility(kvm, 139)) 891 kvm_s390_set_tod_clock_ext(kvm, >od); 892 else if (gtod.epoch_idx == 0) 893 kvm_s390_set_tod_clock(kvm, gtod.tod); 894 else 895 return -EINVAL; 896 897 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 898 gtod.epoch_idx, gtod.tod); 899 900 return 0; 901 } 902 903 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 904 { 905 u8 gtod_high; 906 907 if (copy_from_user(>od_high, (void __user *)attr->addr, 908 sizeof(gtod_high))) 909 return -EFAULT; 910 911 if (gtod_high != 0) 912 return -EINVAL; 913 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 914 915 return 0; 916 } 917 918 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 919 { 920 u64 gtod; 921 922 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 923 return -EFAULT; 924 925 kvm_s390_set_tod_clock(kvm, gtod); 926 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); 927 return 0; 928 } 929 930 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 931 { 932 int ret; 933 934 if (attr->flags) 935 return -EINVAL; 936 937 switch (attr->attr) { 938 case KVM_S390_VM_TOD_EXT: 939 ret = kvm_s390_set_tod_ext(kvm, attr); 940 break; 941 case KVM_S390_VM_TOD_HIGH: 942 ret = kvm_s390_set_tod_high(kvm, attr); 943 break; 944 case KVM_S390_VM_TOD_LOW: 945 ret = kvm_s390_set_tod_low(kvm, attr); 946 break; 947 default: 948 ret = -ENXIO; 949 break; 950 } 951 return ret; 952 } 953 954 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm, 955 struct kvm_s390_vm_tod_clock *gtod) 956 { 957 struct kvm_s390_tod_clock_ext htod; 958 959 preempt_disable(); 960 961 get_tod_clock_ext((char *)&htod); 962 963 gtod->tod = htod.tod + kvm->arch.epoch; 964 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; 965 966 if (gtod->tod < htod.tod) 967 gtod->epoch_idx += 1; 968 969 preempt_enable(); 970 } 971 972 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 973 { 974 struct kvm_s390_vm_tod_clock gtod; 975 976 memset(>od, 0, sizeof(gtod)); 977 978 if (test_kvm_facility(kvm, 139)) 979 kvm_s390_get_tod_clock_ext(kvm, >od); 980 else 981 gtod.tod = kvm_s390_get_tod_clock_fast(kvm); 982 983 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 984 return -EFAULT; 985 986 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 987 gtod.epoch_idx, gtod.tod); 988 return 0; 989 } 990 991 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 992 { 993 u8 gtod_high = 0; 994 995 if (copy_to_user((void __user *)attr->addr, >od_high, 996 sizeof(gtod_high))) 997 return -EFAULT; 998 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 999 1000 return 0; 1001 } 1002 1003 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1004 { 1005 u64 gtod; 1006 1007 gtod = kvm_s390_get_tod_clock_fast(kvm); 1008 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1009 return -EFAULT; 1010 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1011 1012 return 0; 1013 } 1014 1015 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1016 { 1017 int ret; 1018 1019 if (attr->flags) 1020 return -EINVAL; 1021 1022 switch (attr->attr) { 1023 case KVM_S390_VM_TOD_EXT: 1024 ret = kvm_s390_get_tod_ext(kvm, attr); 1025 break; 1026 case KVM_S390_VM_TOD_HIGH: 1027 ret = kvm_s390_get_tod_high(kvm, attr); 1028 break; 1029 case KVM_S390_VM_TOD_LOW: 1030 ret = kvm_s390_get_tod_low(kvm, attr); 1031 break; 1032 default: 1033 ret = -ENXIO; 1034 break; 1035 } 1036 return ret; 1037 } 1038 1039 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1040 { 1041 struct kvm_s390_vm_cpu_processor *proc; 1042 u16 lowest_ibc, unblocked_ibc; 1043 int ret = 0; 1044 1045 mutex_lock(&kvm->lock); 1046 if (kvm->created_vcpus) { 1047 ret = -EBUSY; 1048 goto out; 1049 } 1050 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1051 if (!proc) { 1052 ret = -ENOMEM; 1053 goto out; 1054 } 1055 if (!copy_from_user(proc, (void __user *)attr->addr, 1056 sizeof(*proc))) { 1057 kvm->arch.model.cpuid = proc->cpuid; 1058 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1059 unblocked_ibc = sclp.ibc & 0xfff; 1060 if (lowest_ibc && proc->ibc) { 1061 if (proc->ibc > unblocked_ibc) 1062 kvm->arch.model.ibc = unblocked_ibc; 1063 else if (proc->ibc < lowest_ibc) 1064 kvm->arch.model.ibc = lowest_ibc; 1065 else 1066 kvm->arch.model.ibc = proc->ibc; 1067 } 1068 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1069 S390_ARCH_FAC_LIST_SIZE_BYTE); 1070 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1071 kvm->arch.model.ibc, 1072 kvm->arch.model.cpuid); 1073 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1074 kvm->arch.model.fac_list[0], 1075 kvm->arch.model.fac_list[1], 1076 kvm->arch.model.fac_list[2]); 1077 } else 1078 ret = -EFAULT; 1079 kfree(proc); 1080 out: 1081 mutex_unlock(&kvm->lock); 1082 return ret; 1083 } 1084 1085 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1086 struct kvm_device_attr *attr) 1087 { 1088 struct kvm_s390_vm_cpu_feat data; 1089 int ret = -EBUSY; 1090 1091 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1092 return -EFAULT; 1093 if (!bitmap_subset((unsigned long *) data.feat, 1094 kvm_s390_available_cpu_feat, 1095 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1096 return -EINVAL; 1097 1098 mutex_lock(&kvm->lock); 1099 if (!atomic_read(&kvm->online_vcpus)) { 1100 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1101 KVM_S390_VM_CPU_FEAT_NR_BITS); 1102 ret = 0; 1103 } 1104 mutex_unlock(&kvm->lock); 1105 return ret; 1106 } 1107 1108 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1109 struct kvm_device_attr *attr) 1110 { 1111 /* 1112 * Once supported by kernel + hw, we have to store the subfunctions 1113 * in kvm->arch and remember that user space configured them. 1114 */ 1115 return -ENXIO; 1116 } 1117 1118 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1119 { 1120 int ret = -ENXIO; 1121 1122 switch (attr->attr) { 1123 case KVM_S390_VM_CPU_PROCESSOR: 1124 ret = kvm_s390_set_processor(kvm, attr); 1125 break; 1126 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1127 ret = kvm_s390_set_processor_feat(kvm, attr); 1128 break; 1129 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1130 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1131 break; 1132 } 1133 return ret; 1134 } 1135 1136 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1137 { 1138 struct kvm_s390_vm_cpu_processor *proc; 1139 int ret = 0; 1140 1141 proc = kzalloc(sizeof(*proc), GFP_KERNEL); 1142 if (!proc) { 1143 ret = -ENOMEM; 1144 goto out; 1145 } 1146 proc->cpuid = kvm->arch.model.cpuid; 1147 proc->ibc = kvm->arch.model.ibc; 1148 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1149 S390_ARCH_FAC_LIST_SIZE_BYTE); 1150 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1151 kvm->arch.model.ibc, 1152 kvm->arch.model.cpuid); 1153 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1154 kvm->arch.model.fac_list[0], 1155 kvm->arch.model.fac_list[1], 1156 kvm->arch.model.fac_list[2]); 1157 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1158 ret = -EFAULT; 1159 kfree(proc); 1160 out: 1161 return ret; 1162 } 1163 1164 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1165 { 1166 struct kvm_s390_vm_cpu_machine *mach; 1167 int ret = 0; 1168 1169 mach = kzalloc(sizeof(*mach), GFP_KERNEL); 1170 if (!mach) { 1171 ret = -ENOMEM; 1172 goto out; 1173 } 1174 get_cpu_id((struct cpuid *) &mach->cpuid); 1175 mach->ibc = sclp.ibc; 1176 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1177 S390_ARCH_FAC_LIST_SIZE_BYTE); 1178 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1179 sizeof(S390_lowcore.stfle_fac_list)); 1180 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1181 kvm->arch.model.ibc, 1182 kvm->arch.model.cpuid); 1183 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1184 mach->fac_mask[0], 1185 mach->fac_mask[1], 1186 mach->fac_mask[2]); 1187 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1188 mach->fac_list[0], 1189 mach->fac_list[1], 1190 mach->fac_list[2]); 1191 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1192 ret = -EFAULT; 1193 kfree(mach); 1194 out: 1195 return ret; 1196 } 1197 1198 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1199 struct kvm_device_attr *attr) 1200 { 1201 struct kvm_s390_vm_cpu_feat data; 1202 1203 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1204 KVM_S390_VM_CPU_FEAT_NR_BITS); 1205 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1206 return -EFAULT; 1207 return 0; 1208 } 1209 1210 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1211 struct kvm_device_attr *attr) 1212 { 1213 struct kvm_s390_vm_cpu_feat data; 1214 1215 bitmap_copy((unsigned long *) data.feat, 1216 kvm_s390_available_cpu_feat, 1217 KVM_S390_VM_CPU_FEAT_NR_BITS); 1218 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1219 return -EFAULT; 1220 return 0; 1221 } 1222 1223 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1224 struct kvm_device_attr *attr) 1225 { 1226 /* 1227 * Once we can actually configure subfunctions (kernel + hw support), 1228 * we have to check if they were already set by user space, if so copy 1229 * them from kvm->arch. 1230 */ 1231 return -ENXIO; 1232 } 1233 1234 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1235 struct kvm_device_attr *attr) 1236 { 1237 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1238 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1239 return -EFAULT; 1240 return 0; 1241 } 1242 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1243 { 1244 int ret = -ENXIO; 1245 1246 switch (attr->attr) { 1247 case KVM_S390_VM_CPU_PROCESSOR: 1248 ret = kvm_s390_get_processor(kvm, attr); 1249 break; 1250 case KVM_S390_VM_CPU_MACHINE: 1251 ret = kvm_s390_get_machine(kvm, attr); 1252 break; 1253 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1254 ret = kvm_s390_get_processor_feat(kvm, attr); 1255 break; 1256 case KVM_S390_VM_CPU_MACHINE_FEAT: 1257 ret = kvm_s390_get_machine_feat(kvm, attr); 1258 break; 1259 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1260 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1261 break; 1262 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1263 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1264 break; 1265 } 1266 return ret; 1267 } 1268 1269 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1270 { 1271 int ret; 1272 1273 switch (attr->group) { 1274 case KVM_S390_VM_MEM_CTRL: 1275 ret = kvm_s390_set_mem_control(kvm, attr); 1276 break; 1277 case KVM_S390_VM_TOD: 1278 ret = kvm_s390_set_tod(kvm, attr); 1279 break; 1280 case KVM_S390_VM_CPU_MODEL: 1281 ret = kvm_s390_set_cpu_model(kvm, attr); 1282 break; 1283 case KVM_S390_VM_CRYPTO: 1284 ret = kvm_s390_vm_set_crypto(kvm, attr); 1285 break; 1286 case KVM_S390_VM_MIGRATION: 1287 ret = kvm_s390_vm_set_migration(kvm, attr); 1288 break; 1289 default: 1290 ret = -ENXIO; 1291 break; 1292 } 1293 1294 return ret; 1295 } 1296 1297 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1298 { 1299 int ret; 1300 1301 switch (attr->group) { 1302 case KVM_S390_VM_MEM_CTRL: 1303 ret = kvm_s390_get_mem_control(kvm, attr); 1304 break; 1305 case KVM_S390_VM_TOD: 1306 ret = kvm_s390_get_tod(kvm, attr); 1307 break; 1308 case KVM_S390_VM_CPU_MODEL: 1309 ret = kvm_s390_get_cpu_model(kvm, attr); 1310 break; 1311 case KVM_S390_VM_MIGRATION: 1312 ret = kvm_s390_vm_get_migration(kvm, attr); 1313 break; 1314 default: 1315 ret = -ENXIO; 1316 break; 1317 } 1318 1319 return ret; 1320 } 1321 1322 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1323 { 1324 int ret; 1325 1326 switch (attr->group) { 1327 case KVM_S390_VM_MEM_CTRL: 1328 switch (attr->attr) { 1329 case KVM_S390_VM_MEM_ENABLE_CMMA: 1330 case KVM_S390_VM_MEM_CLR_CMMA: 1331 ret = sclp.has_cmma ? 0 : -ENXIO; 1332 break; 1333 case KVM_S390_VM_MEM_LIMIT_SIZE: 1334 ret = 0; 1335 break; 1336 default: 1337 ret = -ENXIO; 1338 break; 1339 } 1340 break; 1341 case KVM_S390_VM_TOD: 1342 switch (attr->attr) { 1343 case KVM_S390_VM_TOD_LOW: 1344 case KVM_S390_VM_TOD_HIGH: 1345 ret = 0; 1346 break; 1347 default: 1348 ret = -ENXIO; 1349 break; 1350 } 1351 break; 1352 case KVM_S390_VM_CPU_MODEL: 1353 switch (attr->attr) { 1354 case KVM_S390_VM_CPU_PROCESSOR: 1355 case KVM_S390_VM_CPU_MACHINE: 1356 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1357 case KVM_S390_VM_CPU_MACHINE_FEAT: 1358 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1359 ret = 0; 1360 break; 1361 /* configuring subfunctions is not supported yet */ 1362 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1363 default: 1364 ret = -ENXIO; 1365 break; 1366 } 1367 break; 1368 case KVM_S390_VM_CRYPTO: 1369 switch (attr->attr) { 1370 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1371 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1372 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1373 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1374 ret = 0; 1375 break; 1376 default: 1377 ret = -ENXIO; 1378 break; 1379 } 1380 break; 1381 case KVM_S390_VM_MIGRATION: 1382 ret = 0; 1383 break; 1384 default: 1385 ret = -ENXIO; 1386 break; 1387 } 1388 1389 return ret; 1390 } 1391 1392 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1393 { 1394 uint8_t *keys; 1395 uint64_t hva; 1396 int srcu_idx, i, r = 0; 1397 1398 if (args->flags != 0) 1399 return -EINVAL; 1400 1401 /* Is this guest using storage keys? */ 1402 if (!mm_use_skey(current->mm)) 1403 return KVM_S390_GET_SKEYS_NONE; 1404 1405 /* Enforce sane limit on memory allocation */ 1406 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1407 return -EINVAL; 1408 1409 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1410 if (!keys) 1411 return -ENOMEM; 1412 1413 down_read(¤t->mm->mmap_sem); 1414 srcu_idx = srcu_read_lock(&kvm->srcu); 1415 for (i = 0; i < args->count; i++) { 1416 hva = gfn_to_hva(kvm, args->start_gfn + i); 1417 if (kvm_is_error_hva(hva)) { 1418 r = -EFAULT; 1419 break; 1420 } 1421 1422 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1423 if (r) 1424 break; 1425 } 1426 srcu_read_unlock(&kvm->srcu, srcu_idx); 1427 up_read(¤t->mm->mmap_sem); 1428 1429 if (!r) { 1430 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1431 sizeof(uint8_t) * args->count); 1432 if (r) 1433 r = -EFAULT; 1434 } 1435 1436 kvfree(keys); 1437 return r; 1438 } 1439 1440 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1441 { 1442 uint8_t *keys; 1443 uint64_t hva; 1444 int srcu_idx, i, r = 0; 1445 1446 if (args->flags != 0) 1447 return -EINVAL; 1448 1449 /* Enforce sane limit on memory allocation */ 1450 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1451 return -EINVAL; 1452 1453 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); 1454 if (!keys) 1455 return -ENOMEM; 1456 1457 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1458 sizeof(uint8_t) * args->count); 1459 if (r) { 1460 r = -EFAULT; 1461 goto out; 1462 } 1463 1464 /* Enable storage key handling for the guest */ 1465 r = s390_enable_skey(); 1466 if (r) 1467 goto out; 1468 1469 down_read(¤t->mm->mmap_sem); 1470 srcu_idx = srcu_read_lock(&kvm->srcu); 1471 for (i = 0; i < args->count; i++) { 1472 hva = gfn_to_hva(kvm, args->start_gfn + i); 1473 if (kvm_is_error_hva(hva)) { 1474 r = -EFAULT; 1475 break; 1476 } 1477 1478 /* Lowest order bit is reserved */ 1479 if (keys[i] & 0x01) { 1480 r = -EINVAL; 1481 break; 1482 } 1483 1484 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1485 if (r) 1486 break; 1487 } 1488 srcu_read_unlock(&kvm->srcu, srcu_idx); 1489 up_read(¤t->mm->mmap_sem); 1490 out: 1491 kvfree(keys); 1492 return r; 1493 } 1494 1495 /* 1496 * Base address and length must be sent at the start of each block, therefore 1497 * it's cheaper to send some clean data, as long as it's less than the size of 1498 * two longs. 1499 */ 1500 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1501 /* for consistency */ 1502 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1503 1504 /* 1505 * This function searches for the next page with dirty CMMA attributes, and 1506 * saves the attributes in the buffer up to either the end of the buffer or 1507 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 1508 * no trailing clean bytes are saved. 1509 * In case no dirty bits were found, or if CMMA was not enabled or used, the 1510 * output buffer will indicate 0 as length. 1511 */ 1512 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 1513 struct kvm_s390_cmma_log *args) 1514 { 1515 struct kvm_s390_migration_state *s = kvm->arch.migration_state; 1516 unsigned long bufsize, hva, pgstev, i, next, cur; 1517 int srcu_idx, peek, r = 0, rr; 1518 u8 *res; 1519 1520 cur = args->start_gfn; 1521 i = next = pgstev = 0; 1522 1523 if (unlikely(!kvm->arch.use_cmma)) 1524 return -ENXIO; 1525 /* Invalid/unsupported flags were specified */ 1526 if (args->flags & ~KVM_S390_CMMA_PEEK) 1527 return -EINVAL; 1528 /* Migration mode query, and we are not doing a migration */ 1529 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 1530 if (!peek && !s) 1531 return -EINVAL; 1532 /* CMMA is disabled or was not used, or the buffer has length zero */ 1533 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 1534 if (!bufsize || !kvm->mm->context.use_cmma) { 1535 memset(args, 0, sizeof(*args)); 1536 return 0; 1537 } 1538 1539 if (!peek) { 1540 /* We are not peeking, and there are no dirty pages */ 1541 if (!atomic64_read(&s->dirty_pages)) { 1542 memset(args, 0, sizeof(*args)); 1543 return 0; 1544 } 1545 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 1546 args->start_gfn); 1547 if (cur >= s->bitmap_size) /* nothing found, loop back */ 1548 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); 1549 if (cur >= s->bitmap_size) { /* again! (very unlikely) */ 1550 memset(args, 0, sizeof(*args)); 1551 return 0; 1552 } 1553 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); 1554 } 1555 1556 res = vmalloc(bufsize); 1557 if (!res) 1558 return -ENOMEM; 1559 1560 args->start_gfn = cur; 1561 1562 down_read(&kvm->mm->mmap_sem); 1563 srcu_idx = srcu_read_lock(&kvm->srcu); 1564 while (i < bufsize) { 1565 hva = gfn_to_hva(kvm, cur); 1566 if (kvm_is_error_hva(hva)) { 1567 r = -EFAULT; 1568 break; 1569 } 1570 /* decrement only if we actually flipped the bit to 0 */ 1571 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) 1572 atomic64_dec(&s->dirty_pages); 1573 r = get_pgste(kvm->mm, hva, &pgstev); 1574 if (r < 0) 1575 pgstev = 0; 1576 /* save the value */ 1577 res[i++] = (pgstev >> 24) & 0x3; 1578 /* 1579 * if the next bit is too far away, stop. 1580 * if we reached the previous "next", find the next one 1581 */ 1582 if (!peek) { 1583 if (next > cur + KVM_S390_MAX_BIT_DISTANCE) 1584 break; 1585 if (cur == next) 1586 next = find_next_bit(s->pgste_bitmap, 1587 s->bitmap_size, cur + 1); 1588 /* reached the end of the bitmap or of the buffer, stop */ 1589 if ((next >= s->bitmap_size) || 1590 (next >= args->start_gfn + bufsize)) 1591 break; 1592 } 1593 cur++; 1594 } 1595 srcu_read_unlock(&kvm->srcu, srcu_idx); 1596 up_read(&kvm->mm->mmap_sem); 1597 args->count = i; 1598 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; 1599 1600 rr = copy_to_user((void __user *)args->values, res, args->count); 1601 if (rr) 1602 r = -EFAULT; 1603 1604 vfree(res); 1605 return r; 1606 } 1607 1608 /* 1609 * This function sets the CMMA attributes for the given pages. If the input 1610 * buffer has zero length, no action is taken, otherwise the attributes are 1611 * set and the mm->context.use_cmma flag is set. 1612 */ 1613 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 1614 const struct kvm_s390_cmma_log *args) 1615 { 1616 unsigned long hva, mask, pgstev, i; 1617 uint8_t *bits; 1618 int srcu_idx, r = 0; 1619 1620 mask = args->mask; 1621 1622 if (!kvm->arch.use_cmma) 1623 return -ENXIO; 1624 /* invalid/unsupported flags */ 1625 if (args->flags != 0) 1626 return -EINVAL; 1627 /* Enforce sane limit on memory allocation */ 1628 if (args->count > KVM_S390_CMMA_SIZE_MAX) 1629 return -EINVAL; 1630 /* Nothing to do */ 1631 if (args->count == 0) 1632 return 0; 1633 1634 bits = vmalloc(sizeof(*bits) * args->count); 1635 if (!bits) 1636 return -ENOMEM; 1637 1638 r = copy_from_user(bits, (void __user *)args->values, args->count); 1639 if (r) { 1640 r = -EFAULT; 1641 goto out; 1642 } 1643 1644 down_read(&kvm->mm->mmap_sem); 1645 srcu_idx = srcu_read_lock(&kvm->srcu); 1646 for (i = 0; i < args->count; i++) { 1647 hva = gfn_to_hva(kvm, args->start_gfn + i); 1648 if (kvm_is_error_hva(hva)) { 1649 r = -EFAULT; 1650 break; 1651 } 1652 1653 pgstev = bits[i]; 1654 pgstev = pgstev << 24; 1655 mask &= _PGSTE_GPS_USAGE_MASK; 1656 set_pgste_bits(kvm->mm, hva, mask, pgstev); 1657 } 1658 srcu_read_unlock(&kvm->srcu, srcu_idx); 1659 up_read(&kvm->mm->mmap_sem); 1660 1661 if (!kvm->mm->context.use_cmma) { 1662 down_write(&kvm->mm->mmap_sem); 1663 kvm->mm->context.use_cmma = 1; 1664 up_write(&kvm->mm->mmap_sem); 1665 } 1666 out: 1667 vfree(bits); 1668 return r; 1669 } 1670 1671 long kvm_arch_vm_ioctl(struct file *filp, 1672 unsigned int ioctl, unsigned long arg) 1673 { 1674 struct kvm *kvm = filp->private_data; 1675 void __user *argp = (void __user *)arg; 1676 struct kvm_device_attr attr; 1677 int r; 1678 1679 switch (ioctl) { 1680 case KVM_S390_INTERRUPT: { 1681 struct kvm_s390_interrupt s390int; 1682 1683 r = -EFAULT; 1684 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1685 break; 1686 r = kvm_s390_inject_vm(kvm, &s390int); 1687 break; 1688 } 1689 case KVM_ENABLE_CAP: { 1690 struct kvm_enable_cap cap; 1691 r = -EFAULT; 1692 if (copy_from_user(&cap, argp, sizeof(cap))) 1693 break; 1694 r = kvm_vm_ioctl_enable_cap(kvm, &cap); 1695 break; 1696 } 1697 case KVM_CREATE_IRQCHIP: { 1698 struct kvm_irq_routing_entry routing; 1699 1700 r = -EINVAL; 1701 if (kvm->arch.use_irqchip) { 1702 /* Set up dummy routing. */ 1703 memset(&routing, 0, sizeof(routing)); 1704 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 1705 } 1706 break; 1707 } 1708 case KVM_SET_DEVICE_ATTR: { 1709 r = -EFAULT; 1710 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1711 break; 1712 r = kvm_s390_vm_set_attr(kvm, &attr); 1713 break; 1714 } 1715 case KVM_GET_DEVICE_ATTR: { 1716 r = -EFAULT; 1717 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1718 break; 1719 r = kvm_s390_vm_get_attr(kvm, &attr); 1720 break; 1721 } 1722 case KVM_HAS_DEVICE_ATTR: { 1723 r = -EFAULT; 1724 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 1725 break; 1726 r = kvm_s390_vm_has_attr(kvm, &attr); 1727 break; 1728 } 1729 case KVM_S390_GET_SKEYS: { 1730 struct kvm_s390_skeys args; 1731 1732 r = -EFAULT; 1733 if (copy_from_user(&args, argp, 1734 sizeof(struct kvm_s390_skeys))) 1735 break; 1736 r = kvm_s390_get_skeys(kvm, &args); 1737 break; 1738 } 1739 case KVM_S390_SET_SKEYS: { 1740 struct kvm_s390_skeys args; 1741 1742 r = -EFAULT; 1743 if (copy_from_user(&args, argp, 1744 sizeof(struct kvm_s390_skeys))) 1745 break; 1746 r = kvm_s390_set_skeys(kvm, &args); 1747 break; 1748 } 1749 case KVM_S390_GET_CMMA_BITS: { 1750 struct kvm_s390_cmma_log args; 1751 1752 r = -EFAULT; 1753 if (copy_from_user(&args, argp, sizeof(args))) 1754 break; 1755 r = kvm_s390_get_cmma_bits(kvm, &args); 1756 if (!r) { 1757 r = copy_to_user(argp, &args, sizeof(args)); 1758 if (r) 1759 r = -EFAULT; 1760 } 1761 break; 1762 } 1763 case KVM_S390_SET_CMMA_BITS: { 1764 struct kvm_s390_cmma_log args; 1765 1766 r = -EFAULT; 1767 if (copy_from_user(&args, argp, sizeof(args))) 1768 break; 1769 r = kvm_s390_set_cmma_bits(kvm, &args); 1770 break; 1771 } 1772 default: 1773 r = -ENOTTY; 1774 } 1775 1776 return r; 1777 } 1778 1779 static int kvm_s390_query_ap_config(u8 *config) 1780 { 1781 u32 fcn_code = 0x04000000UL; 1782 u32 cc = 0; 1783 1784 memset(config, 0, 128); 1785 asm volatile( 1786 "lgr 0,%1\n" 1787 "lgr 2,%2\n" 1788 ".long 0xb2af0000\n" /* PQAP(QCI) */ 1789 "0: ipm %0\n" 1790 "srl %0,28\n" 1791 "1:\n" 1792 EX_TABLE(0b, 1b) 1793 : "+r" (cc) 1794 : "r" (fcn_code), "r" (config) 1795 : "cc", "0", "2", "memory" 1796 ); 1797 1798 return cc; 1799 } 1800 1801 static int kvm_s390_apxa_installed(void) 1802 { 1803 u8 config[128]; 1804 int cc; 1805 1806 if (test_facility(12)) { 1807 cc = kvm_s390_query_ap_config(config); 1808 1809 if (cc) 1810 pr_err("PQAP(QCI) failed with cc=%d", cc); 1811 else 1812 return config[0] & 0x40; 1813 } 1814 1815 return 0; 1816 } 1817 1818 static void kvm_s390_set_crycb_format(struct kvm *kvm) 1819 { 1820 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 1821 1822 if (kvm_s390_apxa_installed()) 1823 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 1824 else 1825 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 1826 } 1827 1828 static u64 kvm_s390_get_initial_cpuid(void) 1829 { 1830 struct cpuid cpuid; 1831 1832 get_cpu_id(&cpuid); 1833 cpuid.version = 0xff; 1834 return *((u64 *) &cpuid); 1835 } 1836 1837 static void kvm_s390_crypto_init(struct kvm *kvm) 1838 { 1839 if (!test_kvm_facility(kvm, 76)) 1840 return; 1841 1842 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 1843 kvm_s390_set_crycb_format(kvm); 1844 1845 /* Enable AES/DEA protected key functions by default */ 1846 kvm->arch.crypto.aes_kw = 1; 1847 kvm->arch.crypto.dea_kw = 1; 1848 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 1849 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1850 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1851 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1852 } 1853 1854 static void sca_dispose(struct kvm *kvm) 1855 { 1856 if (kvm->arch.use_esca) 1857 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 1858 else 1859 free_page((unsigned long)(kvm->arch.sca)); 1860 kvm->arch.sca = NULL; 1861 } 1862 1863 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 1864 { 1865 gfp_t alloc_flags = GFP_KERNEL; 1866 int i, rc; 1867 char debug_name[16]; 1868 static unsigned long sca_offset; 1869 1870 rc = -EINVAL; 1871 #ifdef CONFIG_KVM_S390_UCONTROL 1872 if (type & ~KVM_VM_S390_UCONTROL) 1873 goto out_err; 1874 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 1875 goto out_err; 1876 #else 1877 if (type) 1878 goto out_err; 1879 #endif 1880 1881 rc = s390_enable_sie(); 1882 if (rc) 1883 goto out_err; 1884 1885 rc = -ENOMEM; 1886 1887 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500); 1888 1889 kvm->arch.use_esca = 0; /* start with basic SCA */ 1890 if (!sclp.has_64bscao) 1891 alloc_flags |= GFP_DMA; 1892 rwlock_init(&kvm->arch.sca_lock); 1893 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 1894 if (!kvm->arch.sca) 1895 goto out_err; 1896 spin_lock(&kvm_lock); 1897 sca_offset += 16; 1898 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 1899 sca_offset = 0; 1900 kvm->arch.sca = (struct bsca_block *) 1901 ((char *) kvm->arch.sca + sca_offset); 1902 spin_unlock(&kvm_lock); 1903 1904 sprintf(debug_name, "kvm-%u", current->pid); 1905 1906 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 1907 if (!kvm->arch.dbf) 1908 goto out_err; 1909 1910 kvm->arch.sie_page2 = 1911 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); 1912 if (!kvm->arch.sie_page2) 1913 goto out_err; 1914 1915 /* Populate the facility mask initially. */ 1916 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, 1917 sizeof(S390_lowcore.stfle_fac_list)); 1918 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { 1919 if (i < kvm_s390_fac_list_mask_size()) 1920 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; 1921 else 1922 kvm->arch.model.fac_mask[i] = 0UL; 1923 } 1924 1925 /* Populate the facility list initially. */ 1926 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 1927 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, 1928 S390_ARCH_FAC_LIST_SIZE_BYTE); 1929 1930 set_kvm_facility(kvm->arch.model.fac_mask, 74); 1931 set_kvm_facility(kvm->arch.model.fac_list, 74); 1932 1933 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 1934 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 1935 1936 kvm_s390_crypto_init(kvm); 1937 1938 mutex_init(&kvm->arch.float_int.ais_lock); 1939 kvm->arch.float_int.simm = 0; 1940 kvm->arch.float_int.nimm = 0; 1941 spin_lock_init(&kvm->arch.float_int.lock); 1942 for (i = 0; i < FIRQ_LIST_COUNT; i++) 1943 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 1944 init_waitqueue_head(&kvm->arch.ipte_wq); 1945 mutex_init(&kvm->arch.ipte_mutex); 1946 1947 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 1948 VM_EVENT(kvm, 3, "vm created with type %lu", type); 1949 1950 if (type & KVM_VM_S390_UCONTROL) { 1951 kvm->arch.gmap = NULL; 1952 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 1953 } else { 1954 if (sclp.hamax == U64_MAX) 1955 kvm->arch.mem_limit = TASK_SIZE_MAX; 1956 else 1957 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 1958 sclp.hamax + 1); 1959 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 1960 if (!kvm->arch.gmap) 1961 goto out_err; 1962 kvm->arch.gmap->private = kvm; 1963 kvm->arch.gmap->pfault_enabled = 0; 1964 } 1965 1966 kvm->arch.css_support = 0; 1967 kvm->arch.use_irqchip = 0; 1968 kvm->arch.epoch = 0; 1969 1970 spin_lock_init(&kvm->arch.start_stop_lock); 1971 kvm_s390_vsie_init(kvm); 1972 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 1973 1974 return 0; 1975 out_err: 1976 free_page((unsigned long)kvm->arch.sie_page2); 1977 debug_unregister(kvm->arch.dbf); 1978 sca_dispose(kvm); 1979 KVM_EVENT(3, "creation of vm failed: %d", rc); 1980 return rc; 1981 } 1982 1983 bool kvm_arch_has_vcpu_debugfs(void) 1984 { 1985 return false; 1986 } 1987 1988 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 1989 { 1990 return 0; 1991 } 1992 1993 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 1994 { 1995 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 1996 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 1997 kvm_s390_clear_local_irqs(vcpu); 1998 kvm_clear_async_pf_completion_queue(vcpu); 1999 if (!kvm_is_ucontrol(vcpu->kvm)) 2000 sca_del_vcpu(vcpu); 2001 2002 if (kvm_is_ucontrol(vcpu->kvm)) 2003 gmap_remove(vcpu->arch.gmap); 2004 2005 if (vcpu->kvm->arch.use_cmma) 2006 kvm_s390_vcpu_unsetup_cmma(vcpu); 2007 free_page((unsigned long)(vcpu->arch.sie_block)); 2008 2009 kvm_vcpu_uninit(vcpu); 2010 kmem_cache_free(kvm_vcpu_cache, vcpu); 2011 } 2012 2013 static void kvm_free_vcpus(struct kvm *kvm) 2014 { 2015 unsigned int i; 2016 struct kvm_vcpu *vcpu; 2017 2018 kvm_for_each_vcpu(i, vcpu, kvm) 2019 kvm_arch_vcpu_destroy(vcpu); 2020 2021 mutex_lock(&kvm->lock); 2022 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2023 kvm->vcpus[i] = NULL; 2024 2025 atomic_set(&kvm->online_vcpus, 0); 2026 mutex_unlock(&kvm->lock); 2027 } 2028 2029 void kvm_arch_destroy_vm(struct kvm *kvm) 2030 { 2031 kvm_free_vcpus(kvm); 2032 sca_dispose(kvm); 2033 debug_unregister(kvm->arch.dbf); 2034 free_page((unsigned long)kvm->arch.sie_page2); 2035 if (!kvm_is_ucontrol(kvm)) 2036 gmap_remove(kvm->arch.gmap); 2037 kvm_s390_destroy_adapters(kvm); 2038 kvm_s390_clear_float_irqs(kvm); 2039 kvm_s390_vsie_destroy(kvm); 2040 if (kvm->arch.migration_state) { 2041 vfree(kvm->arch.migration_state->pgste_bitmap); 2042 kfree(kvm->arch.migration_state); 2043 } 2044 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2045 } 2046 2047 /* Section: vcpu related */ 2048 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2049 { 2050 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2051 if (!vcpu->arch.gmap) 2052 return -ENOMEM; 2053 vcpu->arch.gmap->private = vcpu->kvm; 2054 2055 return 0; 2056 } 2057 2058 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2059 { 2060 if (!kvm_s390_use_sca_entries()) 2061 return; 2062 read_lock(&vcpu->kvm->arch.sca_lock); 2063 if (vcpu->kvm->arch.use_esca) { 2064 struct esca_block *sca = vcpu->kvm->arch.sca; 2065 2066 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2067 sca->cpu[vcpu->vcpu_id].sda = 0; 2068 } else { 2069 struct bsca_block *sca = vcpu->kvm->arch.sca; 2070 2071 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2072 sca->cpu[vcpu->vcpu_id].sda = 0; 2073 } 2074 read_unlock(&vcpu->kvm->arch.sca_lock); 2075 } 2076 2077 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2078 { 2079 if (!kvm_s390_use_sca_entries()) { 2080 struct bsca_block *sca = vcpu->kvm->arch.sca; 2081 2082 /* we still need the basic sca for the ipte control */ 2083 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2084 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2085 } 2086 read_lock(&vcpu->kvm->arch.sca_lock); 2087 if (vcpu->kvm->arch.use_esca) { 2088 struct esca_block *sca = vcpu->kvm->arch.sca; 2089 2090 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2091 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2092 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2093 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2094 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2095 } else { 2096 struct bsca_block *sca = vcpu->kvm->arch.sca; 2097 2098 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2099 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2100 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2101 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2102 } 2103 read_unlock(&vcpu->kvm->arch.sca_lock); 2104 } 2105 2106 /* Basic SCA to Extended SCA data copy routines */ 2107 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2108 { 2109 d->sda = s->sda; 2110 d->sigp_ctrl.c = s->sigp_ctrl.c; 2111 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2112 } 2113 2114 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2115 { 2116 int i; 2117 2118 d->ipte_control = s->ipte_control; 2119 d->mcn[0] = s->mcn; 2120 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2121 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2122 } 2123 2124 static int sca_switch_to_extended(struct kvm *kvm) 2125 { 2126 struct bsca_block *old_sca = kvm->arch.sca; 2127 struct esca_block *new_sca; 2128 struct kvm_vcpu *vcpu; 2129 unsigned int vcpu_idx; 2130 u32 scaol, scaoh; 2131 2132 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 2133 if (!new_sca) 2134 return -ENOMEM; 2135 2136 scaoh = (u32)((u64)(new_sca) >> 32); 2137 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2138 2139 kvm_s390_vcpu_block_all(kvm); 2140 write_lock(&kvm->arch.sca_lock); 2141 2142 sca_copy_b_to_e(new_sca, old_sca); 2143 2144 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2145 vcpu->arch.sie_block->scaoh = scaoh; 2146 vcpu->arch.sie_block->scaol = scaol; 2147 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2148 } 2149 kvm->arch.sca = new_sca; 2150 kvm->arch.use_esca = 1; 2151 2152 write_unlock(&kvm->arch.sca_lock); 2153 kvm_s390_vcpu_unblock_all(kvm); 2154 2155 free_page((unsigned long)old_sca); 2156 2157 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2158 old_sca, kvm->arch.sca); 2159 return 0; 2160 } 2161 2162 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2163 { 2164 int rc; 2165 2166 if (!kvm_s390_use_sca_entries()) { 2167 if (id < KVM_MAX_VCPUS) 2168 return true; 2169 return false; 2170 } 2171 if (id < KVM_S390_BSCA_CPU_SLOTS) 2172 return true; 2173 if (!sclp.has_esca || !sclp.has_64bscao) 2174 return false; 2175 2176 mutex_lock(&kvm->lock); 2177 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2178 mutex_unlock(&kvm->lock); 2179 2180 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2181 } 2182 2183 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 2184 { 2185 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2186 kvm_clear_async_pf_completion_queue(vcpu); 2187 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 2188 KVM_SYNC_GPRS | 2189 KVM_SYNC_ACRS | 2190 KVM_SYNC_CRS | 2191 KVM_SYNC_ARCH0 | 2192 KVM_SYNC_PFAULT; 2193 kvm_s390_set_prefix(vcpu, 0); 2194 if (test_kvm_facility(vcpu->kvm, 64)) 2195 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 2196 if (test_kvm_facility(vcpu->kvm, 133)) 2197 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 2198 /* fprs can be synchronized via vrs, even if the guest has no vx. With 2199 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 2200 */ 2201 if (MACHINE_HAS_VX) 2202 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 2203 else 2204 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 2205 2206 if (kvm_is_ucontrol(vcpu->kvm)) 2207 return __kvm_ucontrol_vcpu_init(vcpu); 2208 2209 return 0; 2210 } 2211 2212 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2213 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2214 { 2215 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2216 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2217 vcpu->arch.cputm_start = get_tod_clock_fast(); 2218 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2219 } 2220 2221 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2222 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2223 { 2224 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2225 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2226 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2227 vcpu->arch.cputm_start = 0; 2228 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2229 } 2230 2231 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2232 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2233 { 2234 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2235 vcpu->arch.cputm_enabled = true; 2236 __start_cpu_timer_accounting(vcpu); 2237 } 2238 2239 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2240 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2241 { 2242 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2243 __stop_cpu_timer_accounting(vcpu); 2244 vcpu->arch.cputm_enabled = false; 2245 } 2246 2247 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2248 { 2249 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2250 __enable_cpu_timer_accounting(vcpu); 2251 preempt_enable(); 2252 } 2253 2254 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2255 { 2256 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2257 __disable_cpu_timer_accounting(vcpu); 2258 preempt_enable(); 2259 } 2260 2261 /* set the cpu timer - may only be called from the VCPU thread itself */ 2262 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 2263 { 2264 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2265 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2266 if (vcpu->arch.cputm_enabled) 2267 vcpu->arch.cputm_start = get_tod_clock_fast(); 2268 vcpu->arch.sie_block->cputm = cputm; 2269 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2270 preempt_enable(); 2271 } 2272 2273 /* update and get the cpu timer - can also be called from other VCPU threads */ 2274 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 2275 { 2276 unsigned int seq; 2277 __u64 value; 2278 2279 if (unlikely(!vcpu->arch.cputm_enabled)) 2280 return vcpu->arch.sie_block->cputm; 2281 2282 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2283 do { 2284 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 2285 /* 2286 * If the writer would ever execute a read in the critical 2287 * section, e.g. in irq context, we have a deadlock. 2288 */ 2289 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 2290 value = vcpu->arch.sie_block->cputm; 2291 /* if cputm_start is 0, accounting is being started/stopped */ 2292 if (likely(vcpu->arch.cputm_start)) 2293 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2294 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 2295 preempt_enable(); 2296 return value; 2297 } 2298 2299 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2300 { 2301 2302 gmap_enable(vcpu->arch.enabled_gmap); 2303 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 2304 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2305 __start_cpu_timer_accounting(vcpu); 2306 vcpu->cpu = cpu; 2307 } 2308 2309 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 2310 { 2311 vcpu->cpu = -1; 2312 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 2313 __stop_cpu_timer_accounting(vcpu); 2314 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 2315 vcpu->arch.enabled_gmap = gmap_get_enabled(); 2316 gmap_disable(vcpu->arch.enabled_gmap); 2317 2318 } 2319 2320 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 2321 { 2322 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 2323 vcpu->arch.sie_block->gpsw.mask = 0UL; 2324 vcpu->arch.sie_block->gpsw.addr = 0UL; 2325 kvm_s390_set_prefix(vcpu, 0); 2326 kvm_s390_set_cpu_timer(vcpu, 0); 2327 vcpu->arch.sie_block->ckc = 0UL; 2328 vcpu->arch.sie_block->todpr = 0; 2329 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 2330 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 2331 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 2332 /* make sure the new fpc will be lazily loaded */ 2333 save_fpu_regs(); 2334 current->thread.fpu.fpc = 0; 2335 vcpu->arch.sie_block->gbea = 1; 2336 vcpu->arch.sie_block->pp = 0; 2337 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 2338 kvm_clear_async_pf_completion_queue(vcpu); 2339 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 2340 kvm_s390_vcpu_stop(vcpu); 2341 kvm_s390_clear_local_irqs(vcpu); 2342 } 2343 2344 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 2345 { 2346 mutex_lock(&vcpu->kvm->lock); 2347 preempt_disable(); 2348 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 2349 preempt_enable(); 2350 mutex_unlock(&vcpu->kvm->lock); 2351 if (!kvm_is_ucontrol(vcpu->kvm)) { 2352 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 2353 sca_add_vcpu(vcpu); 2354 } 2355 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 2356 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2357 /* make vcpu_load load the right gmap on the first trigger */ 2358 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 2359 } 2360 2361 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 2362 { 2363 if (!test_kvm_facility(vcpu->kvm, 76)) 2364 return; 2365 2366 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 2367 2368 if (vcpu->kvm->arch.crypto.aes_kw) 2369 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 2370 if (vcpu->kvm->arch.crypto.dea_kw) 2371 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 2372 2373 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 2374 } 2375 2376 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 2377 { 2378 free_page(vcpu->arch.sie_block->cbrlo); 2379 vcpu->arch.sie_block->cbrlo = 0; 2380 } 2381 2382 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 2383 { 2384 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); 2385 if (!vcpu->arch.sie_block->cbrlo) 2386 return -ENOMEM; 2387 2388 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; 2389 return 0; 2390 } 2391 2392 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 2393 { 2394 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 2395 2396 vcpu->arch.sie_block->ibc = model->ibc; 2397 if (test_kvm_facility(vcpu->kvm, 7)) 2398 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 2399 } 2400 2401 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 2402 { 2403 int rc = 0; 2404 2405 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 2406 CPUSTAT_SM | 2407 CPUSTAT_STOPPED); 2408 2409 if (test_kvm_facility(vcpu->kvm, 78)) 2410 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags); 2411 else if (test_kvm_facility(vcpu->kvm, 8)) 2412 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags); 2413 2414 kvm_s390_vcpu_setup_model(vcpu); 2415 2416 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 2417 if (MACHINE_HAS_ESOP) 2418 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 2419 if (test_kvm_facility(vcpu->kvm, 9)) 2420 vcpu->arch.sie_block->ecb |= ECB_SRSI; 2421 if (test_kvm_facility(vcpu->kvm, 73)) 2422 vcpu->arch.sie_block->ecb |= ECB_TE; 2423 2424 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) 2425 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 2426 if (test_kvm_facility(vcpu->kvm, 130)) 2427 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 2428 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 2429 if (sclp.has_cei) 2430 vcpu->arch.sie_block->eca |= ECA_CEI; 2431 if (sclp.has_ib) 2432 vcpu->arch.sie_block->eca |= ECA_IB; 2433 if (sclp.has_siif) 2434 vcpu->arch.sie_block->eca |= ECA_SII; 2435 if (sclp.has_sigpif) 2436 vcpu->arch.sie_block->eca |= ECA_SIGPI; 2437 if (test_kvm_facility(vcpu->kvm, 129)) { 2438 vcpu->arch.sie_block->eca |= ECA_VX; 2439 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 2440 } 2441 if (test_kvm_facility(vcpu->kvm, 139)) 2442 vcpu->arch.sie_block->ecd |= ECD_MEF; 2443 2444 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 2445 | SDNXC; 2446 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 2447 2448 if (sclp.has_kss) 2449 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags); 2450 else 2451 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 2452 2453 if (vcpu->kvm->arch.use_cmma) { 2454 rc = kvm_s390_vcpu_setup_cmma(vcpu); 2455 if (rc) 2456 return rc; 2457 } 2458 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2459 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 2460 2461 kvm_s390_vcpu_crypto_setup(vcpu); 2462 2463 return rc; 2464 } 2465 2466 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 2467 unsigned int id) 2468 { 2469 struct kvm_vcpu *vcpu; 2470 struct sie_page *sie_page; 2471 int rc = -EINVAL; 2472 2473 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 2474 goto out; 2475 2476 rc = -ENOMEM; 2477 2478 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 2479 if (!vcpu) 2480 goto out; 2481 2482 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 2483 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 2484 if (!sie_page) 2485 goto out_free_cpu; 2486 2487 vcpu->arch.sie_block = &sie_page->sie_block; 2488 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 2489 2490 /* the real guest size will always be smaller than msl */ 2491 vcpu->arch.sie_block->mso = 0; 2492 vcpu->arch.sie_block->msl = sclp.hamax; 2493 2494 vcpu->arch.sie_block->icpua = id; 2495 spin_lock_init(&vcpu->arch.local_int.lock); 2496 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 2497 vcpu->arch.local_int.wq = &vcpu->wq; 2498 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 2499 seqcount_init(&vcpu->arch.cputm_seqcount); 2500 2501 rc = kvm_vcpu_init(vcpu, kvm, id); 2502 if (rc) 2503 goto out_free_sie_block; 2504 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, 2505 vcpu->arch.sie_block); 2506 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 2507 2508 return vcpu; 2509 out_free_sie_block: 2510 free_page((unsigned long)(vcpu->arch.sie_block)); 2511 out_free_cpu: 2512 kmem_cache_free(kvm_vcpu_cache, vcpu); 2513 out: 2514 return ERR_PTR(rc); 2515 } 2516 2517 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 2518 { 2519 return kvm_s390_vcpu_has_irq(vcpu, 0); 2520 } 2521 2522 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 2523 { 2524 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2525 exit_sie(vcpu); 2526 } 2527 2528 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 2529 { 2530 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 2531 } 2532 2533 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 2534 { 2535 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2536 exit_sie(vcpu); 2537 } 2538 2539 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 2540 { 2541 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 2542 } 2543 2544 /* 2545 * Kick a guest cpu out of SIE and wait until SIE is not running. 2546 * If the CPU is not running (e.g. waiting as idle) the function will 2547 * return immediately. */ 2548 void exit_sie(struct kvm_vcpu *vcpu) 2549 { 2550 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); 2551 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 2552 cpu_relax(); 2553 } 2554 2555 /* Kick a guest cpu out of SIE to process a request synchronously */ 2556 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 2557 { 2558 kvm_make_request(req, vcpu); 2559 kvm_s390_vcpu_request(vcpu); 2560 } 2561 2562 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 2563 unsigned long end) 2564 { 2565 struct kvm *kvm = gmap->private; 2566 struct kvm_vcpu *vcpu; 2567 unsigned long prefix; 2568 int i; 2569 2570 if (gmap_is_shadow(gmap)) 2571 return; 2572 if (start >= 1UL << 31) 2573 /* We are only interested in prefix pages */ 2574 return; 2575 kvm_for_each_vcpu(i, vcpu, kvm) { 2576 /* match against both prefix pages */ 2577 prefix = kvm_s390_get_prefix(vcpu); 2578 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 2579 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 2580 start, end); 2581 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 2582 } 2583 } 2584 } 2585 2586 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 2587 { 2588 /* kvm common code refers to this, but never calls it */ 2589 BUG(); 2590 return 0; 2591 } 2592 2593 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 2594 struct kvm_one_reg *reg) 2595 { 2596 int r = -EINVAL; 2597 2598 switch (reg->id) { 2599 case KVM_REG_S390_TODPR: 2600 r = put_user(vcpu->arch.sie_block->todpr, 2601 (u32 __user *)reg->addr); 2602 break; 2603 case KVM_REG_S390_EPOCHDIFF: 2604 r = put_user(vcpu->arch.sie_block->epoch, 2605 (u64 __user *)reg->addr); 2606 break; 2607 case KVM_REG_S390_CPU_TIMER: 2608 r = put_user(kvm_s390_get_cpu_timer(vcpu), 2609 (u64 __user *)reg->addr); 2610 break; 2611 case KVM_REG_S390_CLOCK_COMP: 2612 r = put_user(vcpu->arch.sie_block->ckc, 2613 (u64 __user *)reg->addr); 2614 break; 2615 case KVM_REG_S390_PFTOKEN: 2616 r = put_user(vcpu->arch.pfault_token, 2617 (u64 __user *)reg->addr); 2618 break; 2619 case KVM_REG_S390_PFCOMPARE: 2620 r = put_user(vcpu->arch.pfault_compare, 2621 (u64 __user *)reg->addr); 2622 break; 2623 case KVM_REG_S390_PFSELECT: 2624 r = put_user(vcpu->arch.pfault_select, 2625 (u64 __user *)reg->addr); 2626 break; 2627 case KVM_REG_S390_PP: 2628 r = put_user(vcpu->arch.sie_block->pp, 2629 (u64 __user *)reg->addr); 2630 break; 2631 case KVM_REG_S390_GBEA: 2632 r = put_user(vcpu->arch.sie_block->gbea, 2633 (u64 __user *)reg->addr); 2634 break; 2635 default: 2636 break; 2637 } 2638 2639 return r; 2640 } 2641 2642 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 2643 struct kvm_one_reg *reg) 2644 { 2645 int r = -EINVAL; 2646 __u64 val; 2647 2648 switch (reg->id) { 2649 case KVM_REG_S390_TODPR: 2650 r = get_user(vcpu->arch.sie_block->todpr, 2651 (u32 __user *)reg->addr); 2652 break; 2653 case KVM_REG_S390_EPOCHDIFF: 2654 r = get_user(vcpu->arch.sie_block->epoch, 2655 (u64 __user *)reg->addr); 2656 break; 2657 case KVM_REG_S390_CPU_TIMER: 2658 r = get_user(val, (u64 __user *)reg->addr); 2659 if (!r) 2660 kvm_s390_set_cpu_timer(vcpu, val); 2661 break; 2662 case KVM_REG_S390_CLOCK_COMP: 2663 r = get_user(vcpu->arch.sie_block->ckc, 2664 (u64 __user *)reg->addr); 2665 break; 2666 case KVM_REG_S390_PFTOKEN: 2667 r = get_user(vcpu->arch.pfault_token, 2668 (u64 __user *)reg->addr); 2669 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 2670 kvm_clear_async_pf_completion_queue(vcpu); 2671 break; 2672 case KVM_REG_S390_PFCOMPARE: 2673 r = get_user(vcpu->arch.pfault_compare, 2674 (u64 __user *)reg->addr); 2675 break; 2676 case KVM_REG_S390_PFSELECT: 2677 r = get_user(vcpu->arch.pfault_select, 2678 (u64 __user *)reg->addr); 2679 break; 2680 case KVM_REG_S390_PP: 2681 r = get_user(vcpu->arch.sie_block->pp, 2682 (u64 __user *)reg->addr); 2683 break; 2684 case KVM_REG_S390_GBEA: 2685 r = get_user(vcpu->arch.sie_block->gbea, 2686 (u64 __user *)reg->addr); 2687 break; 2688 default: 2689 break; 2690 } 2691 2692 return r; 2693 } 2694 2695 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 2696 { 2697 kvm_s390_vcpu_initial_reset(vcpu); 2698 return 0; 2699 } 2700 2701 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2702 { 2703 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 2704 return 0; 2705 } 2706 2707 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 2708 { 2709 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 2710 return 0; 2711 } 2712 2713 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 2714 struct kvm_sregs *sregs) 2715 { 2716 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 2717 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 2718 return 0; 2719 } 2720 2721 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 2722 struct kvm_sregs *sregs) 2723 { 2724 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 2725 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 2726 return 0; 2727 } 2728 2729 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2730 { 2731 if (test_fp_ctl(fpu->fpc)) 2732 return -EINVAL; 2733 vcpu->run->s.regs.fpc = fpu->fpc; 2734 if (MACHINE_HAS_VX) 2735 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 2736 (freg_t *) fpu->fprs); 2737 else 2738 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 2739 return 0; 2740 } 2741 2742 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2743 { 2744 /* make sure we have the latest values */ 2745 save_fpu_regs(); 2746 if (MACHINE_HAS_VX) 2747 convert_vx_to_fp((freg_t *) fpu->fprs, 2748 (__vector128 *) vcpu->run->s.regs.vrs); 2749 else 2750 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 2751 fpu->fpc = vcpu->run->s.regs.fpc; 2752 return 0; 2753 } 2754 2755 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 2756 { 2757 int rc = 0; 2758 2759 if (!is_vcpu_stopped(vcpu)) 2760 rc = -EBUSY; 2761 else { 2762 vcpu->run->psw_mask = psw.mask; 2763 vcpu->run->psw_addr = psw.addr; 2764 } 2765 return rc; 2766 } 2767 2768 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 2769 struct kvm_translation *tr) 2770 { 2771 return -EINVAL; /* not implemented yet */ 2772 } 2773 2774 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 2775 KVM_GUESTDBG_USE_HW_BP | \ 2776 KVM_GUESTDBG_ENABLE) 2777 2778 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 2779 struct kvm_guest_debug *dbg) 2780 { 2781 int rc = 0; 2782 2783 vcpu->guest_debug = 0; 2784 kvm_s390_clear_bp_data(vcpu); 2785 2786 if (dbg->control & ~VALID_GUESTDBG_FLAGS) 2787 return -EINVAL; 2788 if (!sclp.has_gpere) 2789 return -EINVAL; 2790 2791 if (dbg->control & KVM_GUESTDBG_ENABLE) { 2792 vcpu->guest_debug = dbg->control; 2793 /* enforce guest PER */ 2794 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2795 2796 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 2797 rc = kvm_s390_import_bp_data(vcpu, dbg); 2798 } else { 2799 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2800 vcpu->arch.guestdbg.last_bp = 0; 2801 } 2802 2803 if (rc) { 2804 vcpu->guest_debug = 0; 2805 kvm_s390_clear_bp_data(vcpu); 2806 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); 2807 } 2808 2809 return rc; 2810 } 2811 2812 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 2813 struct kvm_mp_state *mp_state) 2814 { 2815 /* CHECK_STOP and LOAD are not supported yet */ 2816 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 2817 KVM_MP_STATE_OPERATING; 2818 } 2819 2820 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 2821 struct kvm_mp_state *mp_state) 2822 { 2823 int rc = 0; 2824 2825 /* user space knows about this interface - let it control the state */ 2826 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 2827 2828 switch (mp_state->mp_state) { 2829 case KVM_MP_STATE_STOPPED: 2830 kvm_s390_vcpu_stop(vcpu); 2831 break; 2832 case KVM_MP_STATE_OPERATING: 2833 kvm_s390_vcpu_start(vcpu); 2834 break; 2835 case KVM_MP_STATE_LOAD: 2836 case KVM_MP_STATE_CHECK_STOP: 2837 /* fall through - CHECK_STOP and LOAD are not supported yet */ 2838 default: 2839 rc = -ENXIO; 2840 } 2841 2842 return rc; 2843 } 2844 2845 static bool ibs_enabled(struct kvm_vcpu *vcpu) 2846 { 2847 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; 2848 } 2849 2850 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 2851 { 2852 retry: 2853 kvm_s390_vcpu_request_handled(vcpu); 2854 if (!kvm_request_pending(vcpu)) 2855 return 0; 2856 /* 2857 * We use MMU_RELOAD just to re-arm the ipte notifier for the 2858 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 2859 * This ensures that the ipte instruction for this request has 2860 * already finished. We might race against a second unmapper that 2861 * wants to set the blocking bit. Lets just retry the request loop. 2862 */ 2863 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 2864 int rc; 2865 rc = gmap_mprotect_notify(vcpu->arch.gmap, 2866 kvm_s390_get_prefix(vcpu), 2867 PAGE_SIZE * 2, PROT_WRITE); 2868 if (rc) { 2869 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 2870 return rc; 2871 } 2872 goto retry; 2873 } 2874 2875 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 2876 vcpu->arch.sie_block->ihcpu = 0xffff; 2877 goto retry; 2878 } 2879 2880 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 2881 if (!ibs_enabled(vcpu)) { 2882 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 2883 atomic_or(CPUSTAT_IBS, 2884 &vcpu->arch.sie_block->cpuflags); 2885 } 2886 goto retry; 2887 } 2888 2889 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 2890 if (ibs_enabled(vcpu)) { 2891 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 2892 atomic_andnot(CPUSTAT_IBS, 2893 &vcpu->arch.sie_block->cpuflags); 2894 } 2895 goto retry; 2896 } 2897 2898 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 2899 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 2900 goto retry; 2901 } 2902 2903 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 2904 /* 2905 * Disable CMMA virtualization; we will emulate the ESSA 2906 * instruction manually, in order to provide additional 2907 * functionalities needed for live migration. 2908 */ 2909 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 2910 goto retry; 2911 } 2912 2913 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 2914 /* 2915 * Re-enable CMMA virtualization if CMMA is available and 2916 * was used. 2917 */ 2918 if ((vcpu->kvm->arch.use_cmma) && 2919 (vcpu->kvm->mm->context.use_cmma)) 2920 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 2921 goto retry; 2922 } 2923 2924 /* nothing to do, just clear the request */ 2925 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 2926 2927 return 0; 2928 } 2929 2930 void kvm_s390_set_tod_clock_ext(struct kvm *kvm, 2931 const struct kvm_s390_vm_tod_clock *gtod) 2932 { 2933 struct kvm_vcpu *vcpu; 2934 struct kvm_s390_tod_clock_ext htod; 2935 int i; 2936 2937 mutex_lock(&kvm->lock); 2938 preempt_disable(); 2939 2940 get_tod_clock_ext((char *)&htod); 2941 2942 kvm->arch.epoch = gtod->tod - htod.tod; 2943 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; 2944 2945 if (kvm->arch.epoch > gtod->tod) 2946 kvm->arch.epdx -= 1; 2947 2948 kvm_s390_vcpu_block_all(kvm); 2949 kvm_for_each_vcpu(i, vcpu, kvm) { 2950 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 2951 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 2952 } 2953 2954 kvm_s390_vcpu_unblock_all(kvm); 2955 preempt_enable(); 2956 mutex_unlock(&kvm->lock); 2957 } 2958 2959 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) 2960 { 2961 struct kvm_vcpu *vcpu; 2962 int i; 2963 2964 mutex_lock(&kvm->lock); 2965 preempt_disable(); 2966 kvm->arch.epoch = tod - get_tod_clock(); 2967 kvm_s390_vcpu_block_all(kvm); 2968 kvm_for_each_vcpu(i, vcpu, kvm) 2969 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 2970 kvm_s390_vcpu_unblock_all(kvm); 2971 preempt_enable(); 2972 mutex_unlock(&kvm->lock); 2973 } 2974 2975 /** 2976 * kvm_arch_fault_in_page - fault-in guest page if necessary 2977 * @vcpu: The corresponding virtual cpu 2978 * @gpa: Guest physical address 2979 * @writable: Whether the page should be writable or not 2980 * 2981 * Make sure that a guest page has been faulted-in on the host. 2982 * 2983 * Return: Zero on success, negative error code otherwise. 2984 */ 2985 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 2986 { 2987 return gmap_fault(vcpu->arch.gmap, gpa, 2988 writable ? FAULT_FLAG_WRITE : 0); 2989 } 2990 2991 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 2992 unsigned long token) 2993 { 2994 struct kvm_s390_interrupt inti; 2995 struct kvm_s390_irq irq; 2996 2997 if (start_token) { 2998 irq.u.ext.ext_params2 = token; 2999 irq.type = KVM_S390_INT_PFAULT_INIT; 3000 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3001 } else { 3002 inti.type = KVM_S390_INT_PFAULT_DONE; 3003 inti.parm64 = token; 3004 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3005 } 3006 } 3007 3008 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3009 struct kvm_async_pf *work) 3010 { 3011 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3012 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3013 } 3014 3015 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3016 struct kvm_async_pf *work) 3017 { 3018 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3019 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3020 } 3021 3022 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3023 struct kvm_async_pf *work) 3024 { 3025 /* s390 will always inject the page directly */ 3026 } 3027 3028 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 3029 { 3030 /* 3031 * s390 will always inject the page directly, 3032 * but we still want check_async_completion to cleanup 3033 */ 3034 return true; 3035 } 3036 3037 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3038 { 3039 hva_t hva; 3040 struct kvm_arch_async_pf arch; 3041 int rc; 3042 3043 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3044 return 0; 3045 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3046 vcpu->arch.pfault_compare) 3047 return 0; 3048 if (psw_extint_disabled(vcpu)) 3049 return 0; 3050 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3051 return 0; 3052 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) 3053 return 0; 3054 if (!vcpu->arch.gmap->pfault_enabled) 3055 return 0; 3056 3057 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3058 hva += current->thread.gmap_addr & ~PAGE_MASK; 3059 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3060 return 0; 3061 3062 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3063 return rc; 3064 } 3065 3066 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3067 { 3068 int rc, cpuflags; 3069 3070 /* 3071 * On s390 notifications for arriving pages will be delivered directly 3072 * to the guest but the house keeping for completed pfaults is 3073 * handled outside the worker. 3074 */ 3075 kvm_check_async_pf_completion(vcpu); 3076 3077 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 3078 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 3079 3080 if (need_resched()) 3081 schedule(); 3082 3083 if (test_cpu_flag(CIF_MCCK_PENDING)) 3084 s390_handle_mcck(); 3085 3086 if (!kvm_is_ucontrol(vcpu->kvm)) { 3087 rc = kvm_s390_deliver_pending_interrupts(vcpu); 3088 if (rc) 3089 return rc; 3090 } 3091 3092 rc = kvm_s390_handle_requests(vcpu); 3093 if (rc) 3094 return rc; 3095 3096 if (guestdbg_enabled(vcpu)) { 3097 kvm_s390_backup_guest_per_regs(vcpu); 3098 kvm_s390_patch_guest_per_regs(vcpu); 3099 } 3100 3101 vcpu->arch.sie_block->icptcode = 0; 3102 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 3103 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 3104 trace_kvm_s390_sie_enter(vcpu, cpuflags); 3105 3106 return 0; 3107 } 3108 3109 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 3110 { 3111 struct kvm_s390_pgm_info pgm_info = { 3112 .code = PGM_ADDRESSING, 3113 }; 3114 u8 opcode, ilen; 3115 int rc; 3116 3117 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 3118 trace_kvm_s390_sie_fault(vcpu); 3119 3120 /* 3121 * We want to inject an addressing exception, which is defined as a 3122 * suppressing or terminating exception. However, since we came here 3123 * by a DAT access exception, the PSW still points to the faulting 3124 * instruction since DAT exceptions are nullifying. So we've got 3125 * to look up the current opcode to get the length of the instruction 3126 * to be able to forward the PSW. 3127 */ 3128 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 3129 ilen = insn_length(opcode); 3130 if (rc < 0) { 3131 return rc; 3132 } else if (rc) { 3133 /* Instruction-Fetching Exceptions - we can't detect the ilen. 3134 * Forward by arbitrary ilc, injection will take care of 3135 * nullification if necessary. 3136 */ 3137 pgm_info = vcpu->arch.pgm; 3138 ilen = 4; 3139 } 3140 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 3141 kvm_s390_forward_psw(vcpu, ilen); 3142 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 3143 } 3144 3145 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 3146 { 3147 struct mcck_volatile_info *mcck_info; 3148 struct sie_page *sie_page; 3149 3150 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 3151 vcpu->arch.sie_block->icptcode); 3152 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 3153 3154 if (guestdbg_enabled(vcpu)) 3155 kvm_s390_restore_guest_per_regs(vcpu); 3156 3157 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 3158 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 3159 3160 if (exit_reason == -EINTR) { 3161 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 3162 sie_page = container_of(vcpu->arch.sie_block, 3163 struct sie_page, sie_block); 3164 mcck_info = &sie_page->mcck_info; 3165 kvm_s390_reinject_machine_check(vcpu, mcck_info); 3166 return 0; 3167 } 3168 3169 if (vcpu->arch.sie_block->icptcode > 0) { 3170 int rc = kvm_handle_sie_intercept(vcpu); 3171 3172 if (rc != -EOPNOTSUPP) 3173 return rc; 3174 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 3175 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 3176 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 3177 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 3178 return -EREMOTE; 3179 } else if (exit_reason != -EFAULT) { 3180 vcpu->stat.exit_null++; 3181 return 0; 3182 } else if (kvm_is_ucontrol(vcpu->kvm)) { 3183 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 3184 vcpu->run->s390_ucontrol.trans_exc_code = 3185 current->thread.gmap_addr; 3186 vcpu->run->s390_ucontrol.pgm_code = 0x10; 3187 return -EREMOTE; 3188 } else if (current->thread.gmap_pfault) { 3189 trace_kvm_s390_major_guest_pfault(vcpu); 3190 current->thread.gmap_pfault = 0; 3191 if (kvm_arch_setup_async_pf(vcpu)) 3192 return 0; 3193 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 3194 } 3195 return vcpu_post_run_fault_in_sie(vcpu); 3196 } 3197 3198 static int __vcpu_run(struct kvm_vcpu *vcpu) 3199 { 3200 int rc, exit_reason; 3201 3202 /* 3203 * We try to hold kvm->srcu during most of vcpu_run (except when run- 3204 * ning the guest), so that memslots (and other stuff) are protected 3205 */ 3206 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3207 3208 do { 3209 rc = vcpu_pre_run(vcpu); 3210 if (rc) 3211 break; 3212 3213 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3214 /* 3215 * As PF_VCPU will be used in fault handler, between 3216 * guest_enter and guest_exit should be no uaccess. 3217 */ 3218 local_irq_disable(); 3219 guest_enter_irqoff(); 3220 __disable_cpu_timer_accounting(vcpu); 3221 local_irq_enable(); 3222 exit_reason = sie64a(vcpu->arch.sie_block, 3223 vcpu->run->s.regs.gprs); 3224 local_irq_disable(); 3225 __enable_cpu_timer_accounting(vcpu); 3226 guest_exit_irqoff(); 3227 local_irq_enable(); 3228 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3229 3230 rc = vcpu_post_run(vcpu, exit_reason); 3231 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 3232 3233 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 3234 return rc; 3235 } 3236 3237 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3238 { 3239 struct runtime_instr_cb *riccb; 3240 struct gs_cb *gscb; 3241 3242 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 3243 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 3244 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 3245 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 3246 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 3247 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 3248 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 3249 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 3250 /* some control register changes require a tlb flush */ 3251 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3252 } 3253 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 3254 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 3255 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 3256 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 3257 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 3258 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 3259 } 3260 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 3261 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 3262 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 3263 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 3264 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3265 kvm_clear_async_pf_completion_queue(vcpu); 3266 } 3267 /* 3268 * If userspace sets the riccb (e.g. after migration) to a valid state, 3269 * we should enable RI here instead of doing the lazy enablement. 3270 */ 3271 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 3272 test_kvm_facility(vcpu->kvm, 64) && 3273 riccb->valid && 3274 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 3275 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 3276 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 3277 } 3278 /* 3279 * If userspace sets the gscb (e.g. after migration) to non-zero, 3280 * we should enable GS here instead of doing the lazy enablement. 3281 */ 3282 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 3283 test_kvm_facility(vcpu->kvm, 133) && 3284 gscb->gssm && 3285 !vcpu->arch.gs_enabled) { 3286 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 3287 vcpu->arch.sie_block->ecb |= ECB_GS; 3288 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3289 vcpu->arch.gs_enabled = 1; 3290 } 3291 save_access_regs(vcpu->arch.host_acrs); 3292 restore_access_regs(vcpu->run->s.regs.acrs); 3293 /* save host (userspace) fprs/vrs */ 3294 save_fpu_regs(); 3295 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 3296 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 3297 if (MACHINE_HAS_VX) 3298 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 3299 else 3300 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 3301 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 3302 if (test_fp_ctl(current->thread.fpu.fpc)) 3303 /* User space provided an invalid FPC, let's clear it */ 3304 current->thread.fpu.fpc = 0; 3305 if (MACHINE_HAS_GS) { 3306 preempt_disable(); 3307 __ctl_set_bit(2, 4); 3308 if (current->thread.gs_cb) { 3309 vcpu->arch.host_gscb = current->thread.gs_cb; 3310 save_gs_cb(vcpu->arch.host_gscb); 3311 } 3312 if (vcpu->arch.gs_enabled) { 3313 current->thread.gs_cb = (struct gs_cb *) 3314 &vcpu->run->s.regs.gscb; 3315 restore_gs_cb(current->thread.gs_cb); 3316 } 3317 preempt_enable(); 3318 } 3319 3320 kvm_run->kvm_dirty_regs = 0; 3321 } 3322 3323 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3324 { 3325 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 3326 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 3327 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 3328 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 3329 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 3330 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 3331 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 3332 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 3333 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 3334 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 3335 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 3336 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 3337 save_access_regs(vcpu->run->s.regs.acrs); 3338 restore_access_regs(vcpu->arch.host_acrs); 3339 /* Save guest register state */ 3340 save_fpu_regs(); 3341 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3342 /* Restore will be done lazily at return */ 3343 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 3344 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 3345 if (MACHINE_HAS_GS) { 3346 __ctl_set_bit(2, 4); 3347 if (vcpu->arch.gs_enabled) 3348 save_gs_cb(current->thread.gs_cb); 3349 preempt_disable(); 3350 current->thread.gs_cb = vcpu->arch.host_gscb; 3351 restore_gs_cb(vcpu->arch.host_gscb); 3352 preempt_enable(); 3353 if (!vcpu->arch.host_gscb) 3354 __ctl_clear_bit(2, 4); 3355 vcpu->arch.host_gscb = NULL; 3356 } 3357 3358 } 3359 3360 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3361 { 3362 int rc; 3363 sigset_t sigsaved; 3364 3365 if (kvm_run->immediate_exit) 3366 return -EINTR; 3367 3368 if (guestdbg_exit_pending(vcpu)) { 3369 kvm_s390_prepare_debug_exit(vcpu); 3370 return 0; 3371 } 3372 3373 if (vcpu->sigset_active) 3374 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 3375 3376 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3377 kvm_s390_vcpu_start(vcpu); 3378 } else if (is_vcpu_stopped(vcpu)) { 3379 pr_err_ratelimited("can't run stopped vcpu %d\n", 3380 vcpu->vcpu_id); 3381 return -EINVAL; 3382 } 3383 3384 sync_regs(vcpu, kvm_run); 3385 enable_cpu_timer_accounting(vcpu); 3386 3387 might_fault(); 3388 rc = __vcpu_run(vcpu); 3389 3390 if (signal_pending(current) && !rc) { 3391 kvm_run->exit_reason = KVM_EXIT_INTR; 3392 rc = -EINTR; 3393 } 3394 3395 if (guestdbg_exit_pending(vcpu) && !rc) { 3396 kvm_s390_prepare_debug_exit(vcpu); 3397 rc = 0; 3398 } 3399 3400 if (rc == -EREMOTE) { 3401 /* userspace support is needed, kvm_run has been prepared */ 3402 rc = 0; 3403 } 3404 3405 disable_cpu_timer_accounting(vcpu); 3406 store_regs(vcpu, kvm_run); 3407 3408 if (vcpu->sigset_active) 3409 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 3410 3411 vcpu->stat.exit_userspace++; 3412 return rc; 3413 } 3414 3415 /* 3416 * store status at address 3417 * we use have two special cases: 3418 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 3419 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 3420 */ 3421 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 3422 { 3423 unsigned char archmode = 1; 3424 freg_t fprs[NUM_FPRS]; 3425 unsigned int px; 3426 u64 clkcomp, cputm; 3427 int rc; 3428 3429 px = kvm_s390_get_prefix(vcpu); 3430 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 3431 if (write_guest_abs(vcpu, 163, &archmode, 1)) 3432 return -EFAULT; 3433 gpa = 0; 3434 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 3435 if (write_guest_real(vcpu, 163, &archmode, 1)) 3436 return -EFAULT; 3437 gpa = px; 3438 } else 3439 gpa -= __LC_FPREGS_SAVE_AREA; 3440 3441 /* manually convert vector registers if necessary */ 3442 if (MACHINE_HAS_VX) { 3443 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 3444 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3445 fprs, 128); 3446 } else { 3447 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 3448 vcpu->run->s.regs.fprs, 128); 3449 } 3450 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 3451 vcpu->run->s.regs.gprs, 128); 3452 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 3453 &vcpu->arch.sie_block->gpsw, 16); 3454 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 3455 &px, 4); 3456 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 3457 &vcpu->run->s.regs.fpc, 4); 3458 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 3459 &vcpu->arch.sie_block->todpr, 4); 3460 cputm = kvm_s390_get_cpu_timer(vcpu); 3461 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 3462 &cputm, 8); 3463 clkcomp = vcpu->arch.sie_block->ckc >> 8; 3464 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 3465 &clkcomp, 8); 3466 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 3467 &vcpu->run->s.regs.acrs, 64); 3468 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 3469 &vcpu->arch.sie_block->gcr, 128); 3470 return rc ? -EFAULT : 0; 3471 } 3472 3473 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 3474 { 3475 /* 3476 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 3477 * switch in the run ioctl. Let's update our copies before we save 3478 * it into the save area 3479 */ 3480 save_fpu_regs(); 3481 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 3482 save_access_regs(vcpu->run->s.regs.acrs); 3483 3484 return kvm_s390_store_status_unloaded(vcpu, addr); 3485 } 3486 3487 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3488 { 3489 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 3490 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 3491 } 3492 3493 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 3494 { 3495 unsigned int i; 3496 struct kvm_vcpu *vcpu; 3497 3498 kvm_for_each_vcpu(i, vcpu, kvm) { 3499 __disable_ibs_on_vcpu(vcpu); 3500 } 3501 } 3502 3503 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 3504 { 3505 if (!sclp.has_ibs) 3506 return; 3507 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 3508 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 3509 } 3510 3511 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 3512 { 3513 int i, online_vcpus, started_vcpus = 0; 3514 3515 if (!is_vcpu_stopped(vcpu)) 3516 return; 3517 3518 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 3519 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3520 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3521 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3522 3523 for (i = 0; i < online_vcpus; i++) { 3524 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 3525 started_vcpus++; 3526 } 3527 3528 if (started_vcpus == 0) { 3529 /* we're the only active VCPU -> speed it up */ 3530 __enable_ibs_on_vcpu(vcpu); 3531 } else if (started_vcpus == 1) { 3532 /* 3533 * As we are starting a second VCPU, we have to disable 3534 * the IBS facility on all VCPUs to remove potentially 3535 * oustanding ENABLE requests. 3536 */ 3537 __disable_ibs_on_all_vcpus(vcpu->kvm); 3538 } 3539 3540 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 3541 /* 3542 * Another VCPU might have used IBS while we were offline. 3543 * Let's play safe and flush the VCPU at startup. 3544 */ 3545 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 3546 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3547 return; 3548 } 3549 3550 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 3551 { 3552 int i, online_vcpus, started_vcpus = 0; 3553 struct kvm_vcpu *started_vcpu = NULL; 3554 3555 if (is_vcpu_stopped(vcpu)) 3556 return; 3557 3558 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 3559 /* Only one cpu at a time may enter/leave the STOPPED state. */ 3560 spin_lock(&vcpu->kvm->arch.start_stop_lock); 3561 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 3562 3563 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 3564 kvm_s390_clear_stop_irq(vcpu); 3565 3566 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 3567 __disable_ibs_on_vcpu(vcpu); 3568 3569 for (i = 0; i < online_vcpus; i++) { 3570 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 3571 started_vcpus++; 3572 started_vcpu = vcpu->kvm->vcpus[i]; 3573 } 3574 } 3575 3576 if (started_vcpus == 1) { 3577 /* 3578 * As we only have one VCPU left, we want to enable the 3579 * IBS facility for that VCPU to speed it up. 3580 */ 3581 __enable_ibs_on_vcpu(started_vcpu); 3582 } 3583 3584 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 3585 return; 3586 } 3587 3588 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 3589 struct kvm_enable_cap *cap) 3590 { 3591 int r; 3592 3593 if (cap->flags) 3594 return -EINVAL; 3595 3596 switch (cap->cap) { 3597 case KVM_CAP_S390_CSS_SUPPORT: 3598 if (!vcpu->kvm->arch.css_support) { 3599 vcpu->kvm->arch.css_support = 1; 3600 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 3601 trace_kvm_s390_enable_css(vcpu->kvm); 3602 } 3603 r = 0; 3604 break; 3605 default: 3606 r = -EINVAL; 3607 break; 3608 } 3609 return r; 3610 } 3611 3612 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 3613 struct kvm_s390_mem_op *mop) 3614 { 3615 void __user *uaddr = (void __user *)mop->buf; 3616 void *tmpbuf = NULL; 3617 int r, srcu_idx; 3618 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 3619 | KVM_S390_MEMOP_F_CHECK_ONLY; 3620 3621 if (mop->flags & ~supported_flags) 3622 return -EINVAL; 3623 3624 if (mop->size > MEM_OP_MAX_SIZE) 3625 return -E2BIG; 3626 3627 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 3628 tmpbuf = vmalloc(mop->size); 3629 if (!tmpbuf) 3630 return -ENOMEM; 3631 } 3632 3633 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 3634 3635 switch (mop->op) { 3636 case KVM_S390_MEMOP_LOGICAL_READ: 3637 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3638 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3639 mop->size, GACC_FETCH); 3640 break; 3641 } 3642 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3643 if (r == 0) { 3644 if (copy_to_user(uaddr, tmpbuf, mop->size)) 3645 r = -EFAULT; 3646 } 3647 break; 3648 case KVM_S390_MEMOP_LOGICAL_WRITE: 3649 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 3650 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 3651 mop->size, GACC_STORE); 3652 break; 3653 } 3654 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 3655 r = -EFAULT; 3656 break; 3657 } 3658 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 3659 break; 3660 default: 3661 r = -EINVAL; 3662 } 3663 3664 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 3665 3666 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 3667 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 3668 3669 vfree(tmpbuf); 3670 return r; 3671 } 3672 3673 long kvm_arch_vcpu_ioctl(struct file *filp, 3674 unsigned int ioctl, unsigned long arg) 3675 { 3676 struct kvm_vcpu *vcpu = filp->private_data; 3677 void __user *argp = (void __user *)arg; 3678 int idx; 3679 long r; 3680 3681 switch (ioctl) { 3682 case KVM_S390_IRQ: { 3683 struct kvm_s390_irq s390irq; 3684 3685 r = -EFAULT; 3686 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 3687 break; 3688 r = kvm_s390_inject_vcpu(vcpu, &s390irq); 3689 break; 3690 } 3691 case KVM_S390_INTERRUPT: { 3692 struct kvm_s390_interrupt s390int; 3693 struct kvm_s390_irq s390irq; 3694 3695 r = -EFAULT; 3696 if (copy_from_user(&s390int, argp, sizeof(s390int))) 3697 break; 3698 if (s390int_to_s390irq(&s390int, &s390irq)) 3699 return -EINVAL; 3700 r = kvm_s390_inject_vcpu(vcpu, &s390irq); 3701 break; 3702 } 3703 case KVM_S390_STORE_STATUS: 3704 idx = srcu_read_lock(&vcpu->kvm->srcu); 3705 r = kvm_s390_vcpu_store_status(vcpu, arg); 3706 srcu_read_unlock(&vcpu->kvm->srcu, idx); 3707 break; 3708 case KVM_S390_SET_INITIAL_PSW: { 3709 psw_t psw; 3710 3711 r = -EFAULT; 3712 if (copy_from_user(&psw, argp, sizeof(psw))) 3713 break; 3714 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 3715 break; 3716 } 3717 case KVM_S390_INITIAL_RESET: 3718 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3719 break; 3720 case KVM_SET_ONE_REG: 3721 case KVM_GET_ONE_REG: { 3722 struct kvm_one_reg reg; 3723 r = -EFAULT; 3724 if (copy_from_user(®, argp, sizeof(reg))) 3725 break; 3726 if (ioctl == KVM_SET_ONE_REG) 3727 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 3728 else 3729 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 3730 break; 3731 } 3732 #ifdef CONFIG_KVM_S390_UCONTROL 3733 case KVM_S390_UCAS_MAP: { 3734 struct kvm_s390_ucas_mapping ucasmap; 3735 3736 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 3737 r = -EFAULT; 3738 break; 3739 } 3740 3741 if (!kvm_is_ucontrol(vcpu->kvm)) { 3742 r = -EINVAL; 3743 break; 3744 } 3745 3746 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 3747 ucasmap.vcpu_addr, ucasmap.length); 3748 break; 3749 } 3750 case KVM_S390_UCAS_UNMAP: { 3751 struct kvm_s390_ucas_mapping ucasmap; 3752 3753 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 3754 r = -EFAULT; 3755 break; 3756 } 3757 3758 if (!kvm_is_ucontrol(vcpu->kvm)) { 3759 r = -EINVAL; 3760 break; 3761 } 3762 3763 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 3764 ucasmap.length); 3765 break; 3766 } 3767 #endif 3768 case KVM_S390_VCPU_FAULT: { 3769 r = gmap_fault(vcpu->arch.gmap, arg, 0); 3770 break; 3771 } 3772 case KVM_ENABLE_CAP: 3773 { 3774 struct kvm_enable_cap cap; 3775 r = -EFAULT; 3776 if (copy_from_user(&cap, argp, sizeof(cap))) 3777 break; 3778 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 3779 break; 3780 } 3781 case KVM_S390_MEM_OP: { 3782 struct kvm_s390_mem_op mem_op; 3783 3784 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 3785 r = kvm_s390_guest_mem_op(vcpu, &mem_op); 3786 else 3787 r = -EFAULT; 3788 break; 3789 } 3790 case KVM_S390_SET_IRQ_STATE: { 3791 struct kvm_s390_irq_state irq_state; 3792 3793 r = -EFAULT; 3794 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 3795 break; 3796 if (irq_state.len > VCPU_IRQS_MAX_BUF || 3797 irq_state.len == 0 || 3798 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 3799 r = -EINVAL; 3800 break; 3801 } 3802 r = kvm_s390_set_irq_state(vcpu, 3803 (void __user *) irq_state.buf, 3804 irq_state.len); 3805 break; 3806 } 3807 case KVM_S390_GET_IRQ_STATE: { 3808 struct kvm_s390_irq_state irq_state; 3809 3810 r = -EFAULT; 3811 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 3812 break; 3813 if (irq_state.len == 0) { 3814 r = -EINVAL; 3815 break; 3816 } 3817 r = kvm_s390_get_irq_state(vcpu, 3818 (__u8 __user *) irq_state.buf, 3819 irq_state.len); 3820 break; 3821 } 3822 default: 3823 r = -ENOTTY; 3824 } 3825 return r; 3826 } 3827 3828 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 3829 { 3830 #ifdef CONFIG_KVM_S390_UCONTROL 3831 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 3832 && (kvm_is_ucontrol(vcpu->kvm))) { 3833 vmf->page = virt_to_page(vcpu->arch.sie_block); 3834 get_page(vmf->page); 3835 return 0; 3836 } 3837 #endif 3838 return VM_FAULT_SIGBUS; 3839 } 3840 3841 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 3842 unsigned long npages) 3843 { 3844 return 0; 3845 } 3846 3847 /* Section: memory related */ 3848 int kvm_arch_prepare_memory_region(struct kvm *kvm, 3849 struct kvm_memory_slot *memslot, 3850 const struct kvm_userspace_memory_region *mem, 3851 enum kvm_mr_change change) 3852 { 3853 /* A few sanity checks. We can have memory slots which have to be 3854 located/ended at a segment boundary (1MB). The memory in userland is 3855 ok to be fragmented into various different vmas. It is okay to mmap() 3856 and munmap() stuff in this slot after doing this call at any time */ 3857 3858 if (mem->userspace_addr & 0xffffful) 3859 return -EINVAL; 3860 3861 if (mem->memory_size & 0xffffful) 3862 return -EINVAL; 3863 3864 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 3865 return -EINVAL; 3866 3867 return 0; 3868 } 3869 3870 void kvm_arch_commit_memory_region(struct kvm *kvm, 3871 const struct kvm_userspace_memory_region *mem, 3872 const struct kvm_memory_slot *old, 3873 const struct kvm_memory_slot *new, 3874 enum kvm_mr_change change) 3875 { 3876 int rc; 3877 3878 /* If the basics of the memslot do not change, we do not want 3879 * to update the gmap. Every update causes several unnecessary 3880 * segment translation exceptions. This is usually handled just 3881 * fine by the normal fault handler + gmap, but it will also 3882 * cause faults on the prefix page of running guest CPUs. 3883 */ 3884 if (old->userspace_addr == mem->userspace_addr && 3885 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 3886 old->npages * PAGE_SIZE == mem->memory_size) 3887 return; 3888 3889 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 3890 mem->guest_phys_addr, mem->memory_size); 3891 if (rc) 3892 pr_warn("failed to commit memory region\n"); 3893 return; 3894 } 3895 3896 static inline unsigned long nonhyp_mask(int i) 3897 { 3898 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 3899 3900 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 3901 } 3902 3903 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 3904 { 3905 vcpu->valid_wakeup = false; 3906 } 3907 3908 static int __init kvm_s390_init(void) 3909 { 3910 int i; 3911 3912 if (!sclp.has_sief2) { 3913 pr_info("SIE not available\n"); 3914 return -ENODEV; 3915 } 3916 3917 for (i = 0; i < 16; i++) 3918 kvm_s390_fac_list_mask[i] |= 3919 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 3920 3921 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 3922 } 3923 3924 static void __exit kvm_s390_exit(void) 3925 { 3926 kvm_exit(); 3927 } 3928 3929 module_init(kvm_s390_init); 3930 module_exit(kvm_s390_exit); 3931 3932 /* 3933 * Enable autoloading of the kvm module. 3934 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 3935 * since x86 takes a different approach. 3936 */ 3937 #include <linux/miscdevice.h> 3938 MODULE_ALIAS_MISCDEV(KVM_MINOR); 3939 MODULE_ALIAS("devname:kvm"); 3940