1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #define KMSG_COMPONENT "kvm-s390" 15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 #include <linux/pgtable.h> 35 36 #include <asm/asm-offsets.h> 37 #include <asm/lowcore.h> 38 #include <asm/stp.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include <asm/fpu/api.h> 49 #include "kvm-s390.h" 50 #include "gaccess.h" 51 52 #define CREATE_TRACE_POINTS 53 #include "trace.h" 54 #include "trace-s390.h" 55 56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 57 #define LOCAL_IRQS 32 58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 59 (KVM_MAX_VCPUS + LOCAL_IRQS)) 60 61 struct kvm_stats_debugfs_item debugfs_entries[] = { 62 VCPU_STAT("userspace_handled", exit_userspace), 63 VCPU_STAT("exit_null", exit_null), 64 VCPU_STAT("pfault_sync", pfault_sync), 65 VCPU_STAT("exit_validity", exit_validity), 66 VCPU_STAT("exit_stop_request", exit_stop_request), 67 VCPU_STAT("exit_external_request", exit_external_request), 68 VCPU_STAT("exit_io_request", exit_io_request), 69 VCPU_STAT("exit_external_interrupt", exit_external_interrupt), 70 VCPU_STAT("exit_instruction", exit_instruction), 71 VCPU_STAT("exit_pei", exit_pei), 72 VCPU_STAT("exit_program_interruption", exit_program_interruption), 73 VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program), 74 VCPU_STAT("exit_operation_exception", exit_operation_exception), 75 VCPU_STAT("halt_successful_poll", halt_successful_poll), 76 VCPU_STAT("halt_attempted_poll", halt_attempted_poll), 77 VCPU_STAT("halt_poll_invalid", halt_poll_invalid), 78 VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal), 79 VCPU_STAT("halt_wakeup", halt_wakeup), 80 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), 81 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), 82 VCPU_STAT("instruction_lctlg", instruction_lctlg), 83 VCPU_STAT("instruction_lctl", instruction_lctl), 84 VCPU_STAT("instruction_stctl", instruction_stctl), 85 VCPU_STAT("instruction_stctg", instruction_stctg), 86 VCPU_STAT("deliver_ckc", deliver_ckc), 87 VCPU_STAT("deliver_cputm", deliver_cputm), 88 VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal), 89 VCPU_STAT("deliver_external_call", deliver_external_call), 90 VCPU_STAT("deliver_service_signal", deliver_service_signal), 91 VCPU_STAT("deliver_virtio", deliver_virtio), 92 VCPU_STAT("deliver_stop_signal", deliver_stop_signal), 93 VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal), 94 VCPU_STAT("deliver_restart_signal", deliver_restart_signal), 95 VCPU_STAT("deliver_program", deliver_program), 96 VCPU_STAT("deliver_io", deliver_io), 97 VCPU_STAT("deliver_machine_check", deliver_machine_check), 98 VCPU_STAT("exit_wait_state", exit_wait_state), 99 VCPU_STAT("inject_ckc", inject_ckc), 100 VCPU_STAT("inject_cputm", inject_cputm), 101 VCPU_STAT("inject_external_call", inject_external_call), 102 VM_STAT("inject_float_mchk", inject_float_mchk), 103 VCPU_STAT("inject_emergency_signal", inject_emergency_signal), 104 VM_STAT("inject_io", inject_io), 105 VCPU_STAT("inject_mchk", inject_mchk), 106 VM_STAT("inject_pfault_done", inject_pfault_done), 107 VCPU_STAT("inject_program", inject_program), 108 VCPU_STAT("inject_restart", inject_restart), 109 VM_STAT("inject_service_signal", inject_service_signal), 110 VCPU_STAT("inject_set_prefix", inject_set_prefix), 111 VCPU_STAT("inject_stop_signal", inject_stop_signal), 112 VCPU_STAT("inject_pfault_init", inject_pfault_init), 113 VM_STAT("inject_virtio", inject_virtio), 114 VCPU_STAT("instruction_epsw", instruction_epsw), 115 VCPU_STAT("instruction_gs", instruction_gs), 116 VCPU_STAT("instruction_io_other", instruction_io_other), 117 VCPU_STAT("instruction_lpsw", instruction_lpsw), 118 VCPU_STAT("instruction_lpswe", instruction_lpswe), 119 VCPU_STAT("instruction_pfmf", instruction_pfmf), 120 VCPU_STAT("instruction_ptff", instruction_ptff), 121 VCPU_STAT("instruction_stidp", instruction_stidp), 122 VCPU_STAT("instruction_sck", instruction_sck), 123 VCPU_STAT("instruction_sckpf", instruction_sckpf), 124 VCPU_STAT("instruction_spx", instruction_spx), 125 VCPU_STAT("instruction_stpx", instruction_stpx), 126 VCPU_STAT("instruction_stap", instruction_stap), 127 VCPU_STAT("instruction_iske", instruction_iske), 128 VCPU_STAT("instruction_ri", instruction_ri), 129 VCPU_STAT("instruction_rrbe", instruction_rrbe), 130 VCPU_STAT("instruction_sske", instruction_sske), 131 VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock), 132 VCPU_STAT("instruction_essa", instruction_essa), 133 VCPU_STAT("instruction_stsi", instruction_stsi), 134 VCPU_STAT("instruction_stfl", instruction_stfl), 135 VCPU_STAT("instruction_tb", instruction_tb), 136 VCPU_STAT("instruction_tpi", instruction_tpi), 137 VCPU_STAT("instruction_tprot", instruction_tprot), 138 VCPU_STAT("instruction_tsch", instruction_tsch), 139 VCPU_STAT("instruction_sthyi", instruction_sthyi), 140 VCPU_STAT("instruction_sie", instruction_sie), 141 VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense), 142 VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running), 143 VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call), 144 VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency), 145 VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency), 146 VCPU_STAT("instruction_sigp_start", instruction_sigp_start), 147 VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop), 148 VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status), 149 VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status), 150 VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status), 151 VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch), 152 VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix), 153 VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart), 154 VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset), 155 VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset), 156 VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown), 157 VCPU_STAT("instruction_diag_10", diagnose_10), 158 VCPU_STAT("instruction_diag_44", diagnose_44), 159 VCPU_STAT("instruction_diag_9c", diagnose_9c), 160 VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored), 161 VCPU_STAT("diag_9c_forward", diagnose_9c_forward), 162 VCPU_STAT("instruction_diag_258", diagnose_258), 163 VCPU_STAT("instruction_diag_308", diagnose_308), 164 VCPU_STAT("instruction_diag_500", diagnose_500), 165 VCPU_STAT("instruction_diag_other", diagnose_other), 166 { NULL } 167 }; 168 169 /* allow nested virtualization in KVM (if enabled by user space) */ 170 static int nested; 171 module_param(nested, int, S_IRUGO); 172 MODULE_PARM_DESC(nested, "Nested virtualization support"); 173 174 /* allow 1m huge page guest backing, if !nested */ 175 static int hpage; 176 module_param(hpage, int, 0444); 177 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 178 179 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 180 static u8 halt_poll_max_steal = 10; 181 module_param(halt_poll_max_steal, byte, 0644); 182 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 183 184 /* if set to true, the GISA will be initialized and used if available */ 185 static bool use_gisa = true; 186 module_param(use_gisa, bool, 0644); 187 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 188 189 /* maximum diag9c forwarding per second */ 190 unsigned int diag9c_forwarding_hz; 191 module_param(diag9c_forwarding_hz, uint, 0644); 192 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 193 194 /* 195 * For now we handle at most 16 double words as this is what the s390 base 196 * kernel handles and stores in the prefix page. If we ever need to go beyond 197 * this, this requires changes to code, but the external uapi can stay. 198 */ 199 #define SIZE_INTERNAL 16 200 201 /* 202 * Base feature mask that defines default mask for facilities. Consists of the 203 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 204 */ 205 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 206 /* 207 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 208 * and defines the facilities that can be enabled via a cpu model. 209 */ 210 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 211 212 static unsigned long kvm_s390_fac_size(void) 213 { 214 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 215 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 216 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 217 sizeof(S390_lowcore.stfle_fac_list)); 218 219 return SIZE_INTERNAL; 220 } 221 222 /* available cpu features supported by kvm */ 223 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 224 /* available subfunctions indicated via query / "test bit" */ 225 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 226 227 static struct gmap_notifier gmap_notifier; 228 static struct gmap_notifier vsie_gmap_notifier; 229 debug_info_t *kvm_s390_dbf; 230 debug_info_t *kvm_s390_dbf_uv; 231 232 /* Section: not file related */ 233 int kvm_arch_hardware_enable(void) 234 { 235 /* every s390 is virtualization enabled ;-) */ 236 return 0; 237 } 238 239 int kvm_arch_check_processor_compat(void *opaque) 240 { 241 return 0; 242 } 243 244 /* forward declarations */ 245 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 246 unsigned long end); 247 static int sca_switch_to_extended(struct kvm *kvm); 248 249 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 250 { 251 u8 delta_idx = 0; 252 253 /* 254 * The TOD jumps by delta, we have to compensate this by adding 255 * -delta to the epoch. 256 */ 257 delta = -delta; 258 259 /* sign-extension - we're adding to signed values below */ 260 if ((s64)delta < 0) 261 delta_idx = -1; 262 263 scb->epoch += delta; 264 if (scb->ecd & ECD_MEF) { 265 scb->epdx += delta_idx; 266 if (scb->epoch < delta) 267 scb->epdx += 1; 268 } 269 } 270 271 /* 272 * This callback is executed during stop_machine(). All CPUs are therefore 273 * temporarily stopped. In order not to change guest behavior, we have to 274 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 275 * so a CPU won't be stopped while calculating with the epoch. 276 */ 277 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 278 void *v) 279 { 280 struct kvm *kvm; 281 struct kvm_vcpu *vcpu; 282 int i; 283 unsigned long long *delta = v; 284 285 list_for_each_entry(kvm, &vm_list, vm_list) { 286 kvm_for_each_vcpu(i, vcpu, kvm) { 287 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 288 if (i == 0) { 289 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 290 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 291 } 292 if (vcpu->arch.cputm_enabled) 293 vcpu->arch.cputm_start += *delta; 294 if (vcpu->arch.vsie_block) 295 kvm_clock_sync_scb(vcpu->arch.vsie_block, 296 *delta); 297 } 298 } 299 return NOTIFY_OK; 300 } 301 302 static struct notifier_block kvm_clock_notifier = { 303 .notifier_call = kvm_clock_sync, 304 }; 305 306 int kvm_arch_hardware_setup(void *opaque) 307 { 308 gmap_notifier.notifier_call = kvm_gmap_notifier; 309 gmap_register_pte_notifier(&gmap_notifier); 310 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 311 gmap_register_pte_notifier(&vsie_gmap_notifier); 312 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 313 &kvm_clock_notifier); 314 return 0; 315 } 316 317 void kvm_arch_hardware_unsetup(void) 318 { 319 gmap_unregister_pte_notifier(&gmap_notifier); 320 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 321 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 322 &kvm_clock_notifier); 323 } 324 325 static void allow_cpu_feat(unsigned long nr) 326 { 327 set_bit_inv(nr, kvm_s390_available_cpu_feat); 328 } 329 330 static inline int plo_test_bit(unsigned char nr) 331 { 332 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; 333 int cc; 334 335 asm volatile( 336 /* Parameter registers are ignored for "test bit" */ 337 " plo 0,0,0,0(0)\n" 338 " ipm %0\n" 339 " srl %0,28\n" 340 : "=d" (cc) 341 : "d" (r0) 342 : "cc"); 343 return cc == 0; 344 } 345 346 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 347 { 348 register unsigned long r0 asm("0") = 0; /* query function */ 349 register unsigned long r1 asm("1") = (unsigned long) query; 350 351 asm volatile( 352 /* Parameter regs are ignored */ 353 " .insn rrf,%[opc] << 16,2,4,6,0\n" 354 : 355 : "d" (r0), "a" (r1), [opc] "i" (opcode) 356 : "cc", "memory"); 357 } 358 359 #define INSN_SORTL 0xb938 360 #define INSN_DFLTCC 0xb939 361 362 static void kvm_s390_cpu_feat_init(void) 363 { 364 int i; 365 366 for (i = 0; i < 256; ++i) { 367 if (plo_test_bit(i)) 368 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 369 } 370 371 if (test_facility(28)) /* TOD-clock steering */ 372 ptff(kvm_s390_available_subfunc.ptff, 373 sizeof(kvm_s390_available_subfunc.ptff), 374 PTFF_QAF); 375 376 if (test_facility(17)) { /* MSA */ 377 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 378 kvm_s390_available_subfunc.kmac); 379 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 380 kvm_s390_available_subfunc.kmc); 381 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 382 kvm_s390_available_subfunc.km); 383 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 384 kvm_s390_available_subfunc.kimd); 385 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 386 kvm_s390_available_subfunc.klmd); 387 } 388 if (test_facility(76)) /* MSA3 */ 389 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 390 kvm_s390_available_subfunc.pckmo); 391 if (test_facility(77)) { /* MSA4 */ 392 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 393 kvm_s390_available_subfunc.kmctr); 394 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 395 kvm_s390_available_subfunc.kmf); 396 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 397 kvm_s390_available_subfunc.kmo); 398 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 399 kvm_s390_available_subfunc.pcc); 400 } 401 if (test_facility(57)) /* MSA5 */ 402 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 403 kvm_s390_available_subfunc.ppno); 404 405 if (test_facility(146)) /* MSA8 */ 406 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 407 kvm_s390_available_subfunc.kma); 408 409 if (test_facility(155)) /* MSA9 */ 410 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 411 kvm_s390_available_subfunc.kdsa); 412 413 if (test_facility(150)) /* SORTL */ 414 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 415 416 if (test_facility(151)) /* DFLTCC */ 417 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 418 419 if (MACHINE_HAS_ESOP) 420 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 421 /* 422 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 423 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 424 */ 425 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 426 !test_facility(3) || !nested) 427 return; 428 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 429 if (sclp.has_64bscao) 430 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 431 if (sclp.has_siif) 432 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 433 if (sclp.has_gpere) 434 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 435 if (sclp.has_gsls) 436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 437 if (sclp.has_ib) 438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 439 if (sclp.has_cei) 440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 441 if (sclp.has_ibs) 442 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 443 if (sclp.has_kss) 444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 445 /* 446 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 447 * all skey handling functions read/set the skey from the PGSTE 448 * instead of the real storage key. 449 * 450 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 451 * pages being detected as preserved although they are resident. 452 * 453 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 454 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 455 * 456 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 457 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 458 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 459 * 460 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 461 * cannot easily shadow the SCA because of the ipte lock. 462 */ 463 } 464 465 int kvm_arch_init(void *opaque) 466 { 467 int rc = -ENOMEM; 468 469 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 470 if (!kvm_s390_dbf) 471 return -ENOMEM; 472 473 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 474 if (!kvm_s390_dbf_uv) 475 goto out; 476 477 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 478 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 479 goto out; 480 481 kvm_s390_cpu_feat_init(); 482 483 /* Register floating interrupt controller interface. */ 484 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 485 if (rc) { 486 pr_err("A FLIC registration call failed with rc=%d\n", rc); 487 goto out; 488 } 489 490 rc = kvm_s390_gib_init(GAL_ISC); 491 if (rc) 492 goto out; 493 494 return 0; 495 496 out: 497 kvm_arch_exit(); 498 return rc; 499 } 500 501 void kvm_arch_exit(void) 502 { 503 kvm_s390_gib_destroy(); 504 debug_unregister(kvm_s390_dbf); 505 debug_unregister(kvm_s390_dbf_uv); 506 } 507 508 /* Section: device related */ 509 long kvm_arch_dev_ioctl(struct file *filp, 510 unsigned int ioctl, unsigned long arg) 511 { 512 if (ioctl == KVM_S390_ENABLE_SIE) 513 return s390_enable_sie(); 514 return -EINVAL; 515 } 516 517 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 518 { 519 int r; 520 521 switch (ext) { 522 case KVM_CAP_S390_PSW: 523 case KVM_CAP_S390_GMAP: 524 case KVM_CAP_SYNC_MMU: 525 #ifdef CONFIG_KVM_S390_UCONTROL 526 case KVM_CAP_S390_UCONTROL: 527 #endif 528 case KVM_CAP_ASYNC_PF: 529 case KVM_CAP_SYNC_REGS: 530 case KVM_CAP_ONE_REG: 531 case KVM_CAP_ENABLE_CAP: 532 case KVM_CAP_S390_CSS_SUPPORT: 533 case KVM_CAP_IOEVENTFD: 534 case KVM_CAP_DEVICE_CTRL: 535 case KVM_CAP_S390_IRQCHIP: 536 case KVM_CAP_VM_ATTRIBUTES: 537 case KVM_CAP_MP_STATE: 538 case KVM_CAP_IMMEDIATE_EXIT: 539 case KVM_CAP_S390_INJECT_IRQ: 540 case KVM_CAP_S390_USER_SIGP: 541 case KVM_CAP_S390_USER_STSI: 542 case KVM_CAP_S390_SKEYS: 543 case KVM_CAP_S390_IRQ_STATE: 544 case KVM_CAP_S390_USER_INSTR0: 545 case KVM_CAP_S390_CMMA_MIGRATION: 546 case KVM_CAP_S390_AIS: 547 case KVM_CAP_S390_AIS_MIGRATION: 548 case KVM_CAP_S390_VCPU_RESETS: 549 case KVM_CAP_SET_GUEST_DEBUG: 550 case KVM_CAP_S390_DIAG318: 551 r = 1; 552 break; 553 case KVM_CAP_SET_GUEST_DEBUG2: 554 r = KVM_GUESTDBG_VALID_MASK; 555 break; 556 case KVM_CAP_S390_HPAGE_1M: 557 r = 0; 558 if (hpage && !kvm_is_ucontrol(kvm)) 559 r = 1; 560 break; 561 case KVM_CAP_S390_MEM_OP: 562 r = MEM_OP_MAX_SIZE; 563 break; 564 case KVM_CAP_NR_VCPUS: 565 case KVM_CAP_MAX_VCPUS: 566 case KVM_CAP_MAX_VCPU_ID: 567 r = KVM_S390_BSCA_CPU_SLOTS; 568 if (!kvm_s390_use_sca_entries()) 569 r = KVM_MAX_VCPUS; 570 else if (sclp.has_esca && sclp.has_64bscao) 571 r = KVM_S390_ESCA_CPU_SLOTS; 572 break; 573 case KVM_CAP_S390_COW: 574 r = MACHINE_HAS_ESOP; 575 break; 576 case KVM_CAP_S390_VECTOR_REGISTERS: 577 r = MACHINE_HAS_VX; 578 break; 579 case KVM_CAP_S390_RI: 580 r = test_facility(64); 581 break; 582 case KVM_CAP_S390_GS: 583 r = test_facility(133); 584 break; 585 case KVM_CAP_S390_BPB: 586 r = test_facility(82); 587 break; 588 case KVM_CAP_S390_PROTECTED: 589 r = is_prot_virt_host(); 590 break; 591 default: 592 r = 0; 593 } 594 return r; 595 } 596 597 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 598 { 599 int i; 600 gfn_t cur_gfn, last_gfn; 601 unsigned long gaddr, vmaddr; 602 struct gmap *gmap = kvm->arch.gmap; 603 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 604 605 /* Loop over all guest segments */ 606 cur_gfn = memslot->base_gfn; 607 last_gfn = memslot->base_gfn + memslot->npages; 608 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 609 gaddr = gfn_to_gpa(cur_gfn); 610 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 611 if (kvm_is_error_hva(vmaddr)) 612 continue; 613 614 bitmap_zero(bitmap, _PAGE_ENTRIES); 615 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 616 for (i = 0; i < _PAGE_ENTRIES; i++) { 617 if (test_bit(i, bitmap)) 618 mark_page_dirty(kvm, cur_gfn + i); 619 } 620 621 if (fatal_signal_pending(current)) 622 return; 623 cond_resched(); 624 } 625 } 626 627 /* Section: vm related */ 628 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 629 630 /* 631 * Get (and clear) the dirty memory log for a memory slot. 632 */ 633 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 634 struct kvm_dirty_log *log) 635 { 636 int r; 637 unsigned long n; 638 struct kvm_memory_slot *memslot; 639 int is_dirty; 640 641 if (kvm_is_ucontrol(kvm)) 642 return -EINVAL; 643 644 mutex_lock(&kvm->slots_lock); 645 646 r = -EINVAL; 647 if (log->slot >= KVM_USER_MEM_SLOTS) 648 goto out; 649 650 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 651 if (r) 652 goto out; 653 654 /* Clear the dirty log */ 655 if (is_dirty) { 656 n = kvm_dirty_bitmap_bytes(memslot); 657 memset(memslot->dirty_bitmap, 0, n); 658 } 659 r = 0; 660 out: 661 mutex_unlock(&kvm->slots_lock); 662 return r; 663 } 664 665 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 666 { 667 unsigned int i; 668 struct kvm_vcpu *vcpu; 669 670 kvm_for_each_vcpu(i, vcpu, kvm) { 671 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 672 } 673 } 674 675 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 676 { 677 int r; 678 679 if (cap->flags) 680 return -EINVAL; 681 682 switch (cap->cap) { 683 case KVM_CAP_S390_IRQCHIP: 684 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 685 kvm->arch.use_irqchip = 1; 686 r = 0; 687 break; 688 case KVM_CAP_S390_USER_SIGP: 689 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 690 kvm->arch.user_sigp = 1; 691 r = 0; 692 break; 693 case KVM_CAP_S390_VECTOR_REGISTERS: 694 mutex_lock(&kvm->lock); 695 if (kvm->created_vcpus) { 696 r = -EBUSY; 697 } else if (MACHINE_HAS_VX) { 698 set_kvm_facility(kvm->arch.model.fac_mask, 129); 699 set_kvm_facility(kvm->arch.model.fac_list, 129); 700 if (test_facility(134)) { 701 set_kvm_facility(kvm->arch.model.fac_mask, 134); 702 set_kvm_facility(kvm->arch.model.fac_list, 134); 703 } 704 if (test_facility(135)) { 705 set_kvm_facility(kvm->arch.model.fac_mask, 135); 706 set_kvm_facility(kvm->arch.model.fac_list, 135); 707 } 708 if (test_facility(148)) { 709 set_kvm_facility(kvm->arch.model.fac_mask, 148); 710 set_kvm_facility(kvm->arch.model.fac_list, 148); 711 } 712 if (test_facility(152)) { 713 set_kvm_facility(kvm->arch.model.fac_mask, 152); 714 set_kvm_facility(kvm->arch.model.fac_list, 152); 715 } 716 r = 0; 717 } else 718 r = -EINVAL; 719 mutex_unlock(&kvm->lock); 720 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 721 r ? "(not available)" : "(success)"); 722 break; 723 case KVM_CAP_S390_RI: 724 r = -EINVAL; 725 mutex_lock(&kvm->lock); 726 if (kvm->created_vcpus) { 727 r = -EBUSY; 728 } else if (test_facility(64)) { 729 set_kvm_facility(kvm->arch.model.fac_mask, 64); 730 set_kvm_facility(kvm->arch.model.fac_list, 64); 731 r = 0; 732 } 733 mutex_unlock(&kvm->lock); 734 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 735 r ? "(not available)" : "(success)"); 736 break; 737 case KVM_CAP_S390_AIS: 738 mutex_lock(&kvm->lock); 739 if (kvm->created_vcpus) { 740 r = -EBUSY; 741 } else { 742 set_kvm_facility(kvm->arch.model.fac_mask, 72); 743 set_kvm_facility(kvm->arch.model.fac_list, 72); 744 r = 0; 745 } 746 mutex_unlock(&kvm->lock); 747 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 748 r ? "(not available)" : "(success)"); 749 break; 750 case KVM_CAP_S390_GS: 751 r = -EINVAL; 752 mutex_lock(&kvm->lock); 753 if (kvm->created_vcpus) { 754 r = -EBUSY; 755 } else if (test_facility(133)) { 756 set_kvm_facility(kvm->arch.model.fac_mask, 133); 757 set_kvm_facility(kvm->arch.model.fac_list, 133); 758 r = 0; 759 } 760 mutex_unlock(&kvm->lock); 761 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 762 r ? "(not available)" : "(success)"); 763 break; 764 case KVM_CAP_S390_HPAGE_1M: 765 mutex_lock(&kvm->lock); 766 if (kvm->created_vcpus) 767 r = -EBUSY; 768 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 769 r = -EINVAL; 770 else { 771 r = 0; 772 mmap_write_lock(kvm->mm); 773 kvm->mm->context.allow_gmap_hpage_1m = 1; 774 mmap_write_unlock(kvm->mm); 775 /* 776 * We might have to create fake 4k page 777 * tables. To avoid that the hardware works on 778 * stale PGSTEs, we emulate these instructions. 779 */ 780 kvm->arch.use_skf = 0; 781 kvm->arch.use_pfmfi = 0; 782 } 783 mutex_unlock(&kvm->lock); 784 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 785 r ? "(not available)" : "(success)"); 786 break; 787 case KVM_CAP_S390_USER_STSI: 788 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 789 kvm->arch.user_stsi = 1; 790 r = 0; 791 break; 792 case KVM_CAP_S390_USER_INSTR0: 793 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 794 kvm->arch.user_instr0 = 1; 795 icpt_operexc_on_all_vcpus(kvm); 796 r = 0; 797 break; 798 default: 799 r = -EINVAL; 800 break; 801 } 802 return r; 803 } 804 805 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 806 { 807 int ret; 808 809 switch (attr->attr) { 810 case KVM_S390_VM_MEM_LIMIT_SIZE: 811 ret = 0; 812 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 813 kvm->arch.mem_limit); 814 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 815 ret = -EFAULT; 816 break; 817 default: 818 ret = -ENXIO; 819 break; 820 } 821 return ret; 822 } 823 824 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 825 { 826 int ret; 827 unsigned int idx; 828 switch (attr->attr) { 829 case KVM_S390_VM_MEM_ENABLE_CMMA: 830 ret = -ENXIO; 831 if (!sclp.has_cmma) 832 break; 833 834 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 835 mutex_lock(&kvm->lock); 836 if (kvm->created_vcpus) 837 ret = -EBUSY; 838 else if (kvm->mm->context.allow_gmap_hpage_1m) 839 ret = -EINVAL; 840 else { 841 kvm->arch.use_cmma = 1; 842 /* Not compatible with cmma. */ 843 kvm->arch.use_pfmfi = 0; 844 ret = 0; 845 } 846 mutex_unlock(&kvm->lock); 847 break; 848 case KVM_S390_VM_MEM_CLR_CMMA: 849 ret = -ENXIO; 850 if (!sclp.has_cmma) 851 break; 852 ret = -EINVAL; 853 if (!kvm->arch.use_cmma) 854 break; 855 856 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 857 mutex_lock(&kvm->lock); 858 idx = srcu_read_lock(&kvm->srcu); 859 s390_reset_cmma(kvm->arch.gmap->mm); 860 srcu_read_unlock(&kvm->srcu, idx); 861 mutex_unlock(&kvm->lock); 862 ret = 0; 863 break; 864 case KVM_S390_VM_MEM_LIMIT_SIZE: { 865 unsigned long new_limit; 866 867 if (kvm_is_ucontrol(kvm)) 868 return -EINVAL; 869 870 if (get_user(new_limit, (u64 __user *)attr->addr)) 871 return -EFAULT; 872 873 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 874 new_limit > kvm->arch.mem_limit) 875 return -E2BIG; 876 877 if (!new_limit) 878 return -EINVAL; 879 880 /* gmap_create takes last usable address */ 881 if (new_limit != KVM_S390_NO_MEM_LIMIT) 882 new_limit -= 1; 883 884 ret = -EBUSY; 885 mutex_lock(&kvm->lock); 886 if (!kvm->created_vcpus) { 887 /* gmap_create will round the limit up */ 888 struct gmap *new = gmap_create(current->mm, new_limit); 889 890 if (!new) { 891 ret = -ENOMEM; 892 } else { 893 gmap_remove(kvm->arch.gmap); 894 new->private = kvm; 895 kvm->arch.gmap = new; 896 ret = 0; 897 } 898 } 899 mutex_unlock(&kvm->lock); 900 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 901 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 902 (void *) kvm->arch.gmap->asce); 903 break; 904 } 905 default: 906 ret = -ENXIO; 907 break; 908 } 909 return ret; 910 } 911 912 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 913 914 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 915 { 916 struct kvm_vcpu *vcpu; 917 int i; 918 919 kvm_s390_vcpu_block_all(kvm); 920 921 kvm_for_each_vcpu(i, vcpu, kvm) { 922 kvm_s390_vcpu_crypto_setup(vcpu); 923 /* recreate the shadow crycb by leaving the VSIE handler */ 924 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 925 } 926 927 kvm_s390_vcpu_unblock_all(kvm); 928 } 929 930 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 931 { 932 mutex_lock(&kvm->lock); 933 switch (attr->attr) { 934 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 935 if (!test_kvm_facility(kvm, 76)) { 936 mutex_unlock(&kvm->lock); 937 return -EINVAL; 938 } 939 get_random_bytes( 940 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 941 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 942 kvm->arch.crypto.aes_kw = 1; 943 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 944 break; 945 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 946 if (!test_kvm_facility(kvm, 76)) { 947 mutex_unlock(&kvm->lock); 948 return -EINVAL; 949 } 950 get_random_bytes( 951 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 952 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 953 kvm->arch.crypto.dea_kw = 1; 954 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 955 break; 956 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 957 if (!test_kvm_facility(kvm, 76)) { 958 mutex_unlock(&kvm->lock); 959 return -EINVAL; 960 } 961 kvm->arch.crypto.aes_kw = 0; 962 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 964 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 965 break; 966 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 967 if (!test_kvm_facility(kvm, 76)) { 968 mutex_unlock(&kvm->lock); 969 return -EINVAL; 970 } 971 kvm->arch.crypto.dea_kw = 0; 972 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 973 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 974 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 975 break; 976 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 977 if (!ap_instructions_available()) { 978 mutex_unlock(&kvm->lock); 979 return -EOPNOTSUPP; 980 } 981 kvm->arch.crypto.apie = 1; 982 break; 983 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 984 if (!ap_instructions_available()) { 985 mutex_unlock(&kvm->lock); 986 return -EOPNOTSUPP; 987 } 988 kvm->arch.crypto.apie = 0; 989 break; 990 default: 991 mutex_unlock(&kvm->lock); 992 return -ENXIO; 993 } 994 995 kvm_s390_vcpu_crypto_reset_all(kvm); 996 mutex_unlock(&kvm->lock); 997 return 0; 998 } 999 1000 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1001 { 1002 int cx; 1003 struct kvm_vcpu *vcpu; 1004 1005 kvm_for_each_vcpu(cx, vcpu, kvm) 1006 kvm_s390_sync_request(req, vcpu); 1007 } 1008 1009 /* 1010 * Must be called with kvm->srcu held to avoid races on memslots, and with 1011 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1012 */ 1013 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1014 { 1015 struct kvm_memory_slot *ms; 1016 struct kvm_memslots *slots; 1017 unsigned long ram_pages = 0; 1018 int slotnr; 1019 1020 /* migration mode already enabled */ 1021 if (kvm->arch.migration_mode) 1022 return 0; 1023 slots = kvm_memslots(kvm); 1024 if (!slots || !slots->used_slots) 1025 return -EINVAL; 1026 1027 if (!kvm->arch.use_cmma) { 1028 kvm->arch.migration_mode = 1; 1029 return 0; 1030 } 1031 /* mark all the pages in active slots as dirty */ 1032 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 1033 ms = slots->memslots + slotnr; 1034 if (!ms->dirty_bitmap) 1035 return -EINVAL; 1036 /* 1037 * The second half of the bitmap is only used on x86, 1038 * and would be wasted otherwise, so we put it to good 1039 * use here to keep track of the state of the storage 1040 * attributes. 1041 */ 1042 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1043 ram_pages += ms->npages; 1044 } 1045 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1046 kvm->arch.migration_mode = 1; 1047 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1048 return 0; 1049 } 1050 1051 /* 1052 * Must be called with kvm->slots_lock to avoid races with ourselves and 1053 * kvm_s390_vm_start_migration. 1054 */ 1055 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1056 { 1057 /* migration mode already disabled */ 1058 if (!kvm->arch.migration_mode) 1059 return 0; 1060 kvm->arch.migration_mode = 0; 1061 if (kvm->arch.use_cmma) 1062 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1063 return 0; 1064 } 1065 1066 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1067 struct kvm_device_attr *attr) 1068 { 1069 int res = -ENXIO; 1070 1071 mutex_lock(&kvm->slots_lock); 1072 switch (attr->attr) { 1073 case KVM_S390_VM_MIGRATION_START: 1074 res = kvm_s390_vm_start_migration(kvm); 1075 break; 1076 case KVM_S390_VM_MIGRATION_STOP: 1077 res = kvm_s390_vm_stop_migration(kvm); 1078 break; 1079 default: 1080 break; 1081 } 1082 mutex_unlock(&kvm->slots_lock); 1083 1084 return res; 1085 } 1086 1087 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1088 struct kvm_device_attr *attr) 1089 { 1090 u64 mig = kvm->arch.migration_mode; 1091 1092 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1093 return -ENXIO; 1094 1095 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1096 return -EFAULT; 1097 return 0; 1098 } 1099 1100 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1101 { 1102 struct kvm_s390_vm_tod_clock gtod; 1103 1104 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1105 return -EFAULT; 1106 1107 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1108 return -EINVAL; 1109 kvm_s390_set_tod_clock(kvm, >od); 1110 1111 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1112 gtod.epoch_idx, gtod.tod); 1113 1114 return 0; 1115 } 1116 1117 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1118 { 1119 u8 gtod_high; 1120 1121 if (copy_from_user(>od_high, (void __user *)attr->addr, 1122 sizeof(gtod_high))) 1123 return -EFAULT; 1124 1125 if (gtod_high != 0) 1126 return -EINVAL; 1127 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1128 1129 return 0; 1130 } 1131 1132 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1133 { 1134 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1135 1136 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1137 sizeof(gtod.tod))) 1138 return -EFAULT; 1139 1140 kvm_s390_set_tod_clock(kvm, >od); 1141 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1142 return 0; 1143 } 1144 1145 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1146 { 1147 int ret; 1148 1149 if (attr->flags) 1150 return -EINVAL; 1151 1152 switch (attr->attr) { 1153 case KVM_S390_VM_TOD_EXT: 1154 ret = kvm_s390_set_tod_ext(kvm, attr); 1155 break; 1156 case KVM_S390_VM_TOD_HIGH: 1157 ret = kvm_s390_set_tod_high(kvm, attr); 1158 break; 1159 case KVM_S390_VM_TOD_LOW: 1160 ret = kvm_s390_set_tod_low(kvm, attr); 1161 break; 1162 default: 1163 ret = -ENXIO; 1164 break; 1165 } 1166 return ret; 1167 } 1168 1169 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1170 struct kvm_s390_vm_tod_clock *gtod) 1171 { 1172 union tod_clock clk; 1173 1174 preempt_disable(); 1175 1176 store_tod_clock_ext(&clk); 1177 1178 gtod->tod = clk.tod + kvm->arch.epoch; 1179 gtod->epoch_idx = 0; 1180 if (test_kvm_facility(kvm, 139)) { 1181 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1182 if (gtod->tod < clk.tod) 1183 gtod->epoch_idx += 1; 1184 } 1185 1186 preempt_enable(); 1187 } 1188 1189 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1190 { 1191 struct kvm_s390_vm_tod_clock gtod; 1192 1193 memset(>od, 0, sizeof(gtod)); 1194 kvm_s390_get_tod_clock(kvm, >od); 1195 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1196 return -EFAULT; 1197 1198 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1199 gtod.epoch_idx, gtod.tod); 1200 return 0; 1201 } 1202 1203 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1204 { 1205 u8 gtod_high = 0; 1206 1207 if (copy_to_user((void __user *)attr->addr, >od_high, 1208 sizeof(gtod_high))) 1209 return -EFAULT; 1210 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1211 1212 return 0; 1213 } 1214 1215 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1216 { 1217 u64 gtod; 1218 1219 gtod = kvm_s390_get_tod_clock_fast(kvm); 1220 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1221 return -EFAULT; 1222 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1223 1224 return 0; 1225 } 1226 1227 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1228 { 1229 int ret; 1230 1231 if (attr->flags) 1232 return -EINVAL; 1233 1234 switch (attr->attr) { 1235 case KVM_S390_VM_TOD_EXT: 1236 ret = kvm_s390_get_tod_ext(kvm, attr); 1237 break; 1238 case KVM_S390_VM_TOD_HIGH: 1239 ret = kvm_s390_get_tod_high(kvm, attr); 1240 break; 1241 case KVM_S390_VM_TOD_LOW: 1242 ret = kvm_s390_get_tod_low(kvm, attr); 1243 break; 1244 default: 1245 ret = -ENXIO; 1246 break; 1247 } 1248 return ret; 1249 } 1250 1251 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1252 { 1253 struct kvm_s390_vm_cpu_processor *proc; 1254 u16 lowest_ibc, unblocked_ibc; 1255 int ret = 0; 1256 1257 mutex_lock(&kvm->lock); 1258 if (kvm->created_vcpus) { 1259 ret = -EBUSY; 1260 goto out; 1261 } 1262 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1263 if (!proc) { 1264 ret = -ENOMEM; 1265 goto out; 1266 } 1267 if (!copy_from_user(proc, (void __user *)attr->addr, 1268 sizeof(*proc))) { 1269 kvm->arch.model.cpuid = proc->cpuid; 1270 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1271 unblocked_ibc = sclp.ibc & 0xfff; 1272 if (lowest_ibc && proc->ibc) { 1273 if (proc->ibc > unblocked_ibc) 1274 kvm->arch.model.ibc = unblocked_ibc; 1275 else if (proc->ibc < lowest_ibc) 1276 kvm->arch.model.ibc = lowest_ibc; 1277 else 1278 kvm->arch.model.ibc = proc->ibc; 1279 } 1280 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1281 S390_ARCH_FAC_LIST_SIZE_BYTE); 1282 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1283 kvm->arch.model.ibc, 1284 kvm->arch.model.cpuid); 1285 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1286 kvm->arch.model.fac_list[0], 1287 kvm->arch.model.fac_list[1], 1288 kvm->arch.model.fac_list[2]); 1289 } else 1290 ret = -EFAULT; 1291 kfree(proc); 1292 out: 1293 mutex_unlock(&kvm->lock); 1294 return ret; 1295 } 1296 1297 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1298 struct kvm_device_attr *attr) 1299 { 1300 struct kvm_s390_vm_cpu_feat data; 1301 1302 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1303 return -EFAULT; 1304 if (!bitmap_subset((unsigned long *) data.feat, 1305 kvm_s390_available_cpu_feat, 1306 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1307 return -EINVAL; 1308 1309 mutex_lock(&kvm->lock); 1310 if (kvm->created_vcpus) { 1311 mutex_unlock(&kvm->lock); 1312 return -EBUSY; 1313 } 1314 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1315 KVM_S390_VM_CPU_FEAT_NR_BITS); 1316 mutex_unlock(&kvm->lock); 1317 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1318 data.feat[0], 1319 data.feat[1], 1320 data.feat[2]); 1321 return 0; 1322 } 1323 1324 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1325 struct kvm_device_attr *attr) 1326 { 1327 mutex_lock(&kvm->lock); 1328 if (kvm->created_vcpus) { 1329 mutex_unlock(&kvm->lock); 1330 return -EBUSY; 1331 } 1332 1333 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1334 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1335 mutex_unlock(&kvm->lock); 1336 return -EFAULT; 1337 } 1338 mutex_unlock(&kvm->lock); 1339 1340 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1341 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1342 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1343 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1344 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1345 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1346 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1347 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1348 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1349 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1350 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1351 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1352 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1353 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1354 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1355 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1356 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1357 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1358 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1359 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1360 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1361 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1362 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1363 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1364 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1365 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1366 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1367 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1368 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1369 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1372 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1375 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1376 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1377 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1378 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1379 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1380 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1381 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1382 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1383 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1384 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1385 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1386 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1387 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1388 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1389 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1390 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1391 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1392 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1393 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1394 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1395 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1396 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1397 1398 return 0; 1399 } 1400 1401 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1402 { 1403 int ret = -ENXIO; 1404 1405 switch (attr->attr) { 1406 case KVM_S390_VM_CPU_PROCESSOR: 1407 ret = kvm_s390_set_processor(kvm, attr); 1408 break; 1409 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1410 ret = kvm_s390_set_processor_feat(kvm, attr); 1411 break; 1412 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1413 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1414 break; 1415 } 1416 return ret; 1417 } 1418 1419 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1420 { 1421 struct kvm_s390_vm_cpu_processor *proc; 1422 int ret = 0; 1423 1424 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1425 if (!proc) { 1426 ret = -ENOMEM; 1427 goto out; 1428 } 1429 proc->cpuid = kvm->arch.model.cpuid; 1430 proc->ibc = kvm->arch.model.ibc; 1431 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1432 S390_ARCH_FAC_LIST_SIZE_BYTE); 1433 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1434 kvm->arch.model.ibc, 1435 kvm->arch.model.cpuid); 1436 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1437 kvm->arch.model.fac_list[0], 1438 kvm->arch.model.fac_list[1], 1439 kvm->arch.model.fac_list[2]); 1440 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1441 ret = -EFAULT; 1442 kfree(proc); 1443 out: 1444 return ret; 1445 } 1446 1447 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1448 { 1449 struct kvm_s390_vm_cpu_machine *mach; 1450 int ret = 0; 1451 1452 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1453 if (!mach) { 1454 ret = -ENOMEM; 1455 goto out; 1456 } 1457 get_cpu_id((struct cpuid *) &mach->cpuid); 1458 mach->ibc = sclp.ibc; 1459 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1460 S390_ARCH_FAC_LIST_SIZE_BYTE); 1461 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, 1462 sizeof(S390_lowcore.stfle_fac_list)); 1463 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1464 kvm->arch.model.ibc, 1465 kvm->arch.model.cpuid); 1466 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1467 mach->fac_mask[0], 1468 mach->fac_mask[1], 1469 mach->fac_mask[2]); 1470 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1471 mach->fac_list[0], 1472 mach->fac_list[1], 1473 mach->fac_list[2]); 1474 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1475 ret = -EFAULT; 1476 kfree(mach); 1477 out: 1478 return ret; 1479 } 1480 1481 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1482 struct kvm_device_attr *attr) 1483 { 1484 struct kvm_s390_vm_cpu_feat data; 1485 1486 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1487 KVM_S390_VM_CPU_FEAT_NR_BITS); 1488 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1489 return -EFAULT; 1490 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1491 data.feat[0], 1492 data.feat[1], 1493 data.feat[2]); 1494 return 0; 1495 } 1496 1497 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1498 struct kvm_device_attr *attr) 1499 { 1500 struct kvm_s390_vm_cpu_feat data; 1501 1502 bitmap_copy((unsigned long *) data.feat, 1503 kvm_s390_available_cpu_feat, 1504 KVM_S390_VM_CPU_FEAT_NR_BITS); 1505 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1506 return -EFAULT; 1507 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1508 data.feat[0], 1509 data.feat[1], 1510 data.feat[2]); 1511 return 0; 1512 } 1513 1514 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1515 struct kvm_device_attr *attr) 1516 { 1517 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1518 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1519 return -EFAULT; 1520 1521 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1522 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1523 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1524 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1525 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1526 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1527 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1528 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1529 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1530 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1531 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1532 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1533 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1534 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1535 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1536 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1537 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1538 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1539 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1540 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1541 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1542 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1543 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1544 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1545 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1546 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1547 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1548 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1549 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1550 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1553 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1556 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1557 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1558 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1559 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1560 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1561 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1562 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1563 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1564 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1565 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1566 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1567 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1568 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1569 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1570 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1571 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1572 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1573 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1574 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1575 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1576 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1577 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1578 1579 return 0; 1580 } 1581 1582 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1583 struct kvm_device_attr *attr) 1584 { 1585 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1586 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1587 return -EFAULT; 1588 1589 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1590 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1591 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1592 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1593 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1594 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1595 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1596 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1597 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1598 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1599 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1600 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1601 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1602 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1603 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1604 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1605 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1606 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1607 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1608 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1609 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1610 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1611 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1612 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1613 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1614 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1615 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1616 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1617 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1618 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1619 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1620 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1621 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1622 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1623 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1624 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1625 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1626 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1627 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1628 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1629 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1630 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1631 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1632 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1633 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1634 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1635 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1636 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1637 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1638 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1639 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1640 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1641 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1642 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1643 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1644 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1645 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1646 1647 return 0; 1648 } 1649 1650 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1651 { 1652 int ret = -ENXIO; 1653 1654 switch (attr->attr) { 1655 case KVM_S390_VM_CPU_PROCESSOR: 1656 ret = kvm_s390_get_processor(kvm, attr); 1657 break; 1658 case KVM_S390_VM_CPU_MACHINE: 1659 ret = kvm_s390_get_machine(kvm, attr); 1660 break; 1661 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1662 ret = kvm_s390_get_processor_feat(kvm, attr); 1663 break; 1664 case KVM_S390_VM_CPU_MACHINE_FEAT: 1665 ret = kvm_s390_get_machine_feat(kvm, attr); 1666 break; 1667 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1668 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1669 break; 1670 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1671 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1672 break; 1673 } 1674 return ret; 1675 } 1676 1677 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1678 { 1679 int ret; 1680 1681 switch (attr->group) { 1682 case KVM_S390_VM_MEM_CTRL: 1683 ret = kvm_s390_set_mem_control(kvm, attr); 1684 break; 1685 case KVM_S390_VM_TOD: 1686 ret = kvm_s390_set_tod(kvm, attr); 1687 break; 1688 case KVM_S390_VM_CPU_MODEL: 1689 ret = kvm_s390_set_cpu_model(kvm, attr); 1690 break; 1691 case KVM_S390_VM_CRYPTO: 1692 ret = kvm_s390_vm_set_crypto(kvm, attr); 1693 break; 1694 case KVM_S390_VM_MIGRATION: 1695 ret = kvm_s390_vm_set_migration(kvm, attr); 1696 break; 1697 default: 1698 ret = -ENXIO; 1699 break; 1700 } 1701 1702 return ret; 1703 } 1704 1705 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1706 { 1707 int ret; 1708 1709 switch (attr->group) { 1710 case KVM_S390_VM_MEM_CTRL: 1711 ret = kvm_s390_get_mem_control(kvm, attr); 1712 break; 1713 case KVM_S390_VM_TOD: 1714 ret = kvm_s390_get_tod(kvm, attr); 1715 break; 1716 case KVM_S390_VM_CPU_MODEL: 1717 ret = kvm_s390_get_cpu_model(kvm, attr); 1718 break; 1719 case KVM_S390_VM_MIGRATION: 1720 ret = kvm_s390_vm_get_migration(kvm, attr); 1721 break; 1722 default: 1723 ret = -ENXIO; 1724 break; 1725 } 1726 1727 return ret; 1728 } 1729 1730 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1731 { 1732 int ret; 1733 1734 switch (attr->group) { 1735 case KVM_S390_VM_MEM_CTRL: 1736 switch (attr->attr) { 1737 case KVM_S390_VM_MEM_ENABLE_CMMA: 1738 case KVM_S390_VM_MEM_CLR_CMMA: 1739 ret = sclp.has_cmma ? 0 : -ENXIO; 1740 break; 1741 case KVM_S390_VM_MEM_LIMIT_SIZE: 1742 ret = 0; 1743 break; 1744 default: 1745 ret = -ENXIO; 1746 break; 1747 } 1748 break; 1749 case KVM_S390_VM_TOD: 1750 switch (attr->attr) { 1751 case KVM_S390_VM_TOD_LOW: 1752 case KVM_S390_VM_TOD_HIGH: 1753 ret = 0; 1754 break; 1755 default: 1756 ret = -ENXIO; 1757 break; 1758 } 1759 break; 1760 case KVM_S390_VM_CPU_MODEL: 1761 switch (attr->attr) { 1762 case KVM_S390_VM_CPU_PROCESSOR: 1763 case KVM_S390_VM_CPU_MACHINE: 1764 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1765 case KVM_S390_VM_CPU_MACHINE_FEAT: 1766 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1767 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1768 ret = 0; 1769 break; 1770 default: 1771 ret = -ENXIO; 1772 break; 1773 } 1774 break; 1775 case KVM_S390_VM_CRYPTO: 1776 switch (attr->attr) { 1777 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1778 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1779 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1780 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1781 ret = 0; 1782 break; 1783 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1784 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1785 ret = ap_instructions_available() ? 0 : -ENXIO; 1786 break; 1787 default: 1788 ret = -ENXIO; 1789 break; 1790 } 1791 break; 1792 case KVM_S390_VM_MIGRATION: 1793 ret = 0; 1794 break; 1795 default: 1796 ret = -ENXIO; 1797 break; 1798 } 1799 1800 return ret; 1801 } 1802 1803 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1804 { 1805 uint8_t *keys; 1806 uint64_t hva; 1807 int srcu_idx, i, r = 0; 1808 1809 if (args->flags != 0) 1810 return -EINVAL; 1811 1812 /* Is this guest using storage keys? */ 1813 if (!mm_uses_skeys(current->mm)) 1814 return KVM_S390_GET_SKEYS_NONE; 1815 1816 /* Enforce sane limit on memory allocation */ 1817 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1818 return -EINVAL; 1819 1820 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1821 if (!keys) 1822 return -ENOMEM; 1823 1824 mmap_read_lock(current->mm); 1825 srcu_idx = srcu_read_lock(&kvm->srcu); 1826 for (i = 0; i < args->count; i++) { 1827 hva = gfn_to_hva(kvm, args->start_gfn + i); 1828 if (kvm_is_error_hva(hva)) { 1829 r = -EFAULT; 1830 break; 1831 } 1832 1833 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1834 if (r) 1835 break; 1836 } 1837 srcu_read_unlock(&kvm->srcu, srcu_idx); 1838 mmap_read_unlock(current->mm); 1839 1840 if (!r) { 1841 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1842 sizeof(uint8_t) * args->count); 1843 if (r) 1844 r = -EFAULT; 1845 } 1846 1847 kvfree(keys); 1848 return r; 1849 } 1850 1851 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1852 { 1853 uint8_t *keys; 1854 uint64_t hva; 1855 int srcu_idx, i, r = 0; 1856 bool unlocked; 1857 1858 if (args->flags != 0) 1859 return -EINVAL; 1860 1861 /* Enforce sane limit on memory allocation */ 1862 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1863 return -EINVAL; 1864 1865 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1866 if (!keys) 1867 return -ENOMEM; 1868 1869 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1870 sizeof(uint8_t) * args->count); 1871 if (r) { 1872 r = -EFAULT; 1873 goto out; 1874 } 1875 1876 /* Enable storage key handling for the guest */ 1877 r = s390_enable_skey(); 1878 if (r) 1879 goto out; 1880 1881 i = 0; 1882 mmap_read_lock(current->mm); 1883 srcu_idx = srcu_read_lock(&kvm->srcu); 1884 while (i < args->count) { 1885 unlocked = false; 1886 hva = gfn_to_hva(kvm, args->start_gfn + i); 1887 if (kvm_is_error_hva(hva)) { 1888 r = -EFAULT; 1889 break; 1890 } 1891 1892 /* Lowest order bit is reserved */ 1893 if (keys[i] & 0x01) { 1894 r = -EINVAL; 1895 break; 1896 } 1897 1898 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1899 if (r) { 1900 r = fixup_user_fault(current->mm, hva, 1901 FAULT_FLAG_WRITE, &unlocked); 1902 if (r) 1903 break; 1904 } 1905 if (!r) 1906 i++; 1907 } 1908 srcu_read_unlock(&kvm->srcu, srcu_idx); 1909 mmap_read_unlock(current->mm); 1910 out: 1911 kvfree(keys); 1912 return r; 1913 } 1914 1915 /* 1916 * Base address and length must be sent at the start of each block, therefore 1917 * it's cheaper to send some clean data, as long as it's less than the size of 1918 * two longs. 1919 */ 1920 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1921 /* for consistency */ 1922 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1923 1924 /* 1925 * Similar to gfn_to_memslot, but returns the index of a memslot also when the 1926 * address falls in a hole. In that case the index of one of the memslots 1927 * bordering the hole is returned. 1928 */ 1929 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) 1930 { 1931 int start = 0, end = slots->used_slots; 1932 int slot = atomic_read(&slots->lru_slot); 1933 struct kvm_memory_slot *memslots = slots->memslots; 1934 1935 if (gfn >= memslots[slot].base_gfn && 1936 gfn < memslots[slot].base_gfn + memslots[slot].npages) 1937 return slot; 1938 1939 while (start < end) { 1940 slot = start + (end - start) / 2; 1941 1942 if (gfn >= memslots[slot].base_gfn) 1943 end = slot; 1944 else 1945 start = slot + 1; 1946 } 1947 1948 if (start >= slots->used_slots) 1949 return slots->used_slots - 1; 1950 1951 if (gfn >= memslots[start].base_gfn && 1952 gfn < memslots[start].base_gfn + memslots[start].npages) { 1953 atomic_set(&slots->lru_slot, start); 1954 } 1955 1956 return start; 1957 } 1958 1959 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1960 u8 *res, unsigned long bufsize) 1961 { 1962 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1963 1964 args->count = 0; 1965 while (args->count < bufsize) { 1966 hva = gfn_to_hva(kvm, cur_gfn); 1967 /* 1968 * We return an error if the first value was invalid, but we 1969 * return successfully if at least one value was copied. 1970 */ 1971 if (kvm_is_error_hva(hva)) 1972 return args->count ? 0 : -EFAULT; 1973 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1974 pgstev = 0; 1975 res[args->count++] = (pgstev >> 24) & 0x43; 1976 cur_gfn++; 1977 } 1978 1979 return 0; 1980 } 1981 1982 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1983 unsigned long cur_gfn) 1984 { 1985 int slotidx = gfn_to_memslot_approx(slots, cur_gfn); 1986 struct kvm_memory_slot *ms = slots->memslots + slotidx; 1987 unsigned long ofs = cur_gfn - ms->base_gfn; 1988 1989 if (ms->base_gfn + ms->npages <= cur_gfn) { 1990 slotidx--; 1991 /* If we are above the highest slot, wrap around */ 1992 if (slotidx < 0) 1993 slotidx = slots->used_slots - 1; 1994 1995 ms = slots->memslots + slotidx; 1996 ofs = 0; 1997 } 1998 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1999 while ((slotidx > 0) && (ofs >= ms->npages)) { 2000 slotidx--; 2001 ms = slots->memslots + slotidx; 2002 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); 2003 } 2004 return ms->base_gfn + ofs; 2005 } 2006 2007 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2008 u8 *res, unsigned long bufsize) 2009 { 2010 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2011 struct kvm_memslots *slots = kvm_memslots(kvm); 2012 struct kvm_memory_slot *ms; 2013 2014 if (unlikely(!slots->used_slots)) 2015 return 0; 2016 2017 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2018 ms = gfn_to_memslot(kvm, cur_gfn); 2019 args->count = 0; 2020 args->start_gfn = cur_gfn; 2021 if (!ms) 2022 return 0; 2023 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2024 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; 2025 2026 while (args->count < bufsize) { 2027 hva = gfn_to_hva(kvm, cur_gfn); 2028 if (kvm_is_error_hva(hva)) 2029 return 0; 2030 /* Decrement only if we actually flipped the bit to 0 */ 2031 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2032 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2033 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2034 pgstev = 0; 2035 /* Save the value */ 2036 res[args->count++] = (pgstev >> 24) & 0x43; 2037 /* If the next bit is too far away, stop. */ 2038 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2039 return 0; 2040 /* If we reached the previous "next", find the next one */ 2041 if (cur_gfn == next_gfn) 2042 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2043 /* Reached the end of memory or of the buffer, stop */ 2044 if ((next_gfn >= mem_end) || 2045 (next_gfn - args->start_gfn >= bufsize)) 2046 return 0; 2047 cur_gfn++; 2048 /* Reached the end of the current memslot, take the next one. */ 2049 if (cur_gfn - ms->base_gfn >= ms->npages) { 2050 ms = gfn_to_memslot(kvm, cur_gfn); 2051 if (!ms) 2052 return 0; 2053 } 2054 } 2055 return 0; 2056 } 2057 2058 /* 2059 * This function searches for the next page with dirty CMMA attributes, and 2060 * saves the attributes in the buffer up to either the end of the buffer or 2061 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2062 * no trailing clean bytes are saved. 2063 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2064 * output buffer will indicate 0 as length. 2065 */ 2066 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2067 struct kvm_s390_cmma_log *args) 2068 { 2069 unsigned long bufsize; 2070 int srcu_idx, peek, ret; 2071 u8 *values; 2072 2073 if (!kvm->arch.use_cmma) 2074 return -ENXIO; 2075 /* Invalid/unsupported flags were specified */ 2076 if (args->flags & ~KVM_S390_CMMA_PEEK) 2077 return -EINVAL; 2078 /* Migration mode query, and we are not doing a migration */ 2079 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2080 if (!peek && !kvm->arch.migration_mode) 2081 return -EINVAL; 2082 /* CMMA is disabled or was not used, or the buffer has length zero */ 2083 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2084 if (!bufsize || !kvm->mm->context.uses_cmm) { 2085 memset(args, 0, sizeof(*args)); 2086 return 0; 2087 } 2088 /* We are not peeking, and there are no dirty pages */ 2089 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2090 memset(args, 0, sizeof(*args)); 2091 return 0; 2092 } 2093 2094 values = vmalloc(bufsize); 2095 if (!values) 2096 return -ENOMEM; 2097 2098 mmap_read_lock(kvm->mm); 2099 srcu_idx = srcu_read_lock(&kvm->srcu); 2100 if (peek) 2101 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2102 else 2103 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2104 srcu_read_unlock(&kvm->srcu, srcu_idx); 2105 mmap_read_unlock(kvm->mm); 2106 2107 if (kvm->arch.migration_mode) 2108 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2109 else 2110 args->remaining = 0; 2111 2112 if (copy_to_user((void __user *)args->values, values, args->count)) 2113 ret = -EFAULT; 2114 2115 vfree(values); 2116 return ret; 2117 } 2118 2119 /* 2120 * This function sets the CMMA attributes for the given pages. If the input 2121 * buffer has zero length, no action is taken, otherwise the attributes are 2122 * set and the mm->context.uses_cmm flag is set. 2123 */ 2124 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2125 const struct kvm_s390_cmma_log *args) 2126 { 2127 unsigned long hva, mask, pgstev, i; 2128 uint8_t *bits; 2129 int srcu_idx, r = 0; 2130 2131 mask = args->mask; 2132 2133 if (!kvm->arch.use_cmma) 2134 return -ENXIO; 2135 /* invalid/unsupported flags */ 2136 if (args->flags != 0) 2137 return -EINVAL; 2138 /* Enforce sane limit on memory allocation */ 2139 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2140 return -EINVAL; 2141 /* Nothing to do */ 2142 if (args->count == 0) 2143 return 0; 2144 2145 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2146 if (!bits) 2147 return -ENOMEM; 2148 2149 r = copy_from_user(bits, (void __user *)args->values, args->count); 2150 if (r) { 2151 r = -EFAULT; 2152 goto out; 2153 } 2154 2155 mmap_read_lock(kvm->mm); 2156 srcu_idx = srcu_read_lock(&kvm->srcu); 2157 for (i = 0; i < args->count; i++) { 2158 hva = gfn_to_hva(kvm, args->start_gfn + i); 2159 if (kvm_is_error_hva(hva)) { 2160 r = -EFAULT; 2161 break; 2162 } 2163 2164 pgstev = bits[i]; 2165 pgstev = pgstev << 24; 2166 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2167 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2168 } 2169 srcu_read_unlock(&kvm->srcu, srcu_idx); 2170 mmap_read_unlock(kvm->mm); 2171 2172 if (!kvm->mm->context.uses_cmm) { 2173 mmap_write_lock(kvm->mm); 2174 kvm->mm->context.uses_cmm = 1; 2175 mmap_write_unlock(kvm->mm); 2176 } 2177 out: 2178 vfree(bits); 2179 return r; 2180 } 2181 2182 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2183 { 2184 struct kvm_vcpu *vcpu; 2185 u16 rc, rrc; 2186 int ret = 0; 2187 int i; 2188 2189 /* 2190 * We ignore failures and try to destroy as many CPUs as possible. 2191 * At the same time we must not free the assigned resources when 2192 * this fails, as the ultravisor has still access to that memory. 2193 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2194 * behind. 2195 * We want to return the first failure rc and rrc, though. 2196 */ 2197 kvm_for_each_vcpu(i, vcpu, kvm) { 2198 mutex_lock(&vcpu->mutex); 2199 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2200 *rcp = rc; 2201 *rrcp = rrc; 2202 ret = -EIO; 2203 } 2204 mutex_unlock(&vcpu->mutex); 2205 } 2206 return ret; 2207 } 2208 2209 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2210 { 2211 int i, r = 0; 2212 u16 dummy; 2213 2214 struct kvm_vcpu *vcpu; 2215 2216 kvm_for_each_vcpu(i, vcpu, kvm) { 2217 mutex_lock(&vcpu->mutex); 2218 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2219 mutex_unlock(&vcpu->mutex); 2220 if (r) 2221 break; 2222 } 2223 if (r) 2224 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2225 return r; 2226 } 2227 2228 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2229 { 2230 int r = 0; 2231 u16 dummy; 2232 void __user *argp = (void __user *)cmd->data; 2233 2234 switch (cmd->cmd) { 2235 case KVM_PV_ENABLE: { 2236 r = -EINVAL; 2237 if (kvm_s390_pv_is_protected(kvm)) 2238 break; 2239 2240 /* 2241 * FMT 4 SIE needs esca. As we never switch back to bsca from 2242 * esca, we need no cleanup in the error cases below 2243 */ 2244 r = sca_switch_to_extended(kvm); 2245 if (r) 2246 break; 2247 2248 mmap_write_lock(current->mm); 2249 r = gmap_mark_unmergeable(); 2250 mmap_write_unlock(current->mm); 2251 if (r) 2252 break; 2253 2254 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2255 if (r) 2256 break; 2257 2258 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2259 if (r) 2260 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2261 2262 /* we need to block service interrupts from now on */ 2263 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2264 break; 2265 } 2266 case KVM_PV_DISABLE: { 2267 r = -EINVAL; 2268 if (!kvm_s390_pv_is_protected(kvm)) 2269 break; 2270 2271 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2272 /* 2273 * If a CPU could not be destroyed, destroy VM will also fail. 2274 * There is no point in trying to destroy it. Instead return 2275 * the rc and rrc from the first CPU that failed destroying. 2276 */ 2277 if (r) 2278 break; 2279 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2280 2281 /* no need to block service interrupts any more */ 2282 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2283 break; 2284 } 2285 case KVM_PV_SET_SEC_PARMS: { 2286 struct kvm_s390_pv_sec_parm parms = {}; 2287 void *hdr; 2288 2289 r = -EINVAL; 2290 if (!kvm_s390_pv_is_protected(kvm)) 2291 break; 2292 2293 r = -EFAULT; 2294 if (copy_from_user(&parms, argp, sizeof(parms))) 2295 break; 2296 2297 /* Currently restricted to 8KB */ 2298 r = -EINVAL; 2299 if (parms.length > PAGE_SIZE * 2) 2300 break; 2301 2302 r = -ENOMEM; 2303 hdr = vmalloc(parms.length); 2304 if (!hdr) 2305 break; 2306 2307 r = -EFAULT; 2308 if (!copy_from_user(hdr, (void __user *)parms.origin, 2309 parms.length)) 2310 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2311 &cmd->rc, &cmd->rrc); 2312 2313 vfree(hdr); 2314 break; 2315 } 2316 case KVM_PV_UNPACK: { 2317 struct kvm_s390_pv_unp unp = {}; 2318 2319 r = -EINVAL; 2320 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2321 break; 2322 2323 r = -EFAULT; 2324 if (copy_from_user(&unp, argp, sizeof(unp))) 2325 break; 2326 2327 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2328 &cmd->rc, &cmd->rrc); 2329 break; 2330 } 2331 case KVM_PV_VERIFY: { 2332 r = -EINVAL; 2333 if (!kvm_s390_pv_is_protected(kvm)) 2334 break; 2335 2336 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2337 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2338 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2339 cmd->rrc); 2340 break; 2341 } 2342 case KVM_PV_PREP_RESET: { 2343 r = -EINVAL; 2344 if (!kvm_s390_pv_is_protected(kvm)) 2345 break; 2346 2347 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2348 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2349 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2350 cmd->rc, cmd->rrc); 2351 break; 2352 } 2353 case KVM_PV_UNSHARE_ALL: { 2354 r = -EINVAL; 2355 if (!kvm_s390_pv_is_protected(kvm)) 2356 break; 2357 2358 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2359 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2360 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2361 cmd->rc, cmd->rrc); 2362 break; 2363 } 2364 default: 2365 r = -ENOTTY; 2366 } 2367 return r; 2368 } 2369 2370 long kvm_arch_vm_ioctl(struct file *filp, 2371 unsigned int ioctl, unsigned long arg) 2372 { 2373 struct kvm *kvm = filp->private_data; 2374 void __user *argp = (void __user *)arg; 2375 struct kvm_device_attr attr; 2376 int r; 2377 2378 switch (ioctl) { 2379 case KVM_S390_INTERRUPT: { 2380 struct kvm_s390_interrupt s390int; 2381 2382 r = -EFAULT; 2383 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2384 break; 2385 r = kvm_s390_inject_vm(kvm, &s390int); 2386 break; 2387 } 2388 case KVM_CREATE_IRQCHIP: { 2389 struct kvm_irq_routing_entry routing; 2390 2391 r = -EINVAL; 2392 if (kvm->arch.use_irqchip) { 2393 /* Set up dummy routing. */ 2394 memset(&routing, 0, sizeof(routing)); 2395 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2396 } 2397 break; 2398 } 2399 case KVM_SET_DEVICE_ATTR: { 2400 r = -EFAULT; 2401 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2402 break; 2403 r = kvm_s390_vm_set_attr(kvm, &attr); 2404 break; 2405 } 2406 case KVM_GET_DEVICE_ATTR: { 2407 r = -EFAULT; 2408 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2409 break; 2410 r = kvm_s390_vm_get_attr(kvm, &attr); 2411 break; 2412 } 2413 case KVM_HAS_DEVICE_ATTR: { 2414 r = -EFAULT; 2415 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2416 break; 2417 r = kvm_s390_vm_has_attr(kvm, &attr); 2418 break; 2419 } 2420 case KVM_S390_GET_SKEYS: { 2421 struct kvm_s390_skeys args; 2422 2423 r = -EFAULT; 2424 if (copy_from_user(&args, argp, 2425 sizeof(struct kvm_s390_skeys))) 2426 break; 2427 r = kvm_s390_get_skeys(kvm, &args); 2428 break; 2429 } 2430 case KVM_S390_SET_SKEYS: { 2431 struct kvm_s390_skeys args; 2432 2433 r = -EFAULT; 2434 if (copy_from_user(&args, argp, 2435 sizeof(struct kvm_s390_skeys))) 2436 break; 2437 r = kvm_s390_set_skeys(kvm, &args); 2438 break; 2439 } 2440 case KVM_S390_GET_CMMA_BITS: { 2441 struct kvm_s390_cmma_log args; 2442 2443 r = -EFAULT; 2444 if (copy_from_user(&args, argp, sizeof(args))) 2445 break; 2446 mutex_lock(&kvm->slots_lock); 2447 r = kvm_s390_get_cmma_bits(kvm, &args); 2448 mutex_unlock(&kvm->slots_lock); 2449 if (!r) { 2450 r = copy_to_user(argp, &args, sizeof(args)); 2451 if (r) 2452 r = -EFAULT; 2453 } 2454 break; 2455 } 2456 case KVM_S390_SET_CMMA_BITS: { 2457 struct kvm_s390_cmma_log args; 2458 2459 r = -EFAULT; 2460 if (copy_from_user(&args, argp, sizeof(args))) 2461 break; 2462 mutex_lock(&kvm->slots_lock); 2463 r = kvm_s390_set_cmma_bits(kvm, &args); 2464 mutex_unlock(&kvm->slots_lock); 2465 break; 2466 } 2467 case KVM_S390_PV_COMMAND: { 2468 struct kvm_pv_cmd args; 2469 2470 /* protvirt means user sigp */ 2471 kvm->arch.user_cpu_state_ctrl = 1; 2472 r = 0; 2473 if (!is_prot_virt_host()) { 2474 r = -EINVAL; 2475 break; 2476 } 2477 if (copy_from_user(&args, argp, sizeof(args))) { 2478 r = -EFAULT; 2479 break; 2480 } 2481 if (args.flags) { 2482 r = -EINVAL; 2483 break; 2484 } 2485 mutex_lock(&kvm->lock); 2486 r = kvm_s390_handle_pv(kvm, &args); 2487 mutex_unlock(&kvm->lock); 2488 if (copy_to_user(argp, &args, sizeof(args))) { 2489 r = -EFAULT; 2490 break; 2491 } 2492 break; 2493 } 2494 default: 2495 r = -ENOTTY; 2496 } 2497 2498 return r; 2499 } 2500 2501 static int kvm_s390_apxa_installed(void) 2502 { 2503 struct ap_config_info info; 2504 2505 if (ap_instructions_available()) { 2506 if (ap_qci(&info) == 0) 2507 return info.apxa; 2508 } 2509 2510 return 0; 2511 } 2512 2513 /* 2514 * The format of the crypto control block (CRYCB) is specified in the 3 low 2515 * order bits of the CRYCB designation (CRYCBD) field as follows: 2516 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2517 * AP extended addressing (APXA) facility are installed. 2518 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2519 * Format 2: Both the APXA and MSAX3 facilities are installed 2520 */ 2521 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2522 { 2523 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2524 2525 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2526 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2527 2528 /* Check whether MSAX3 is installed */ 2529 if (!test_kvm_facility(kvm, 76)) 2530 return; 2531 2532 if (kvm_s390_apxa_installed()) 2533 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2534 else 2535 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2536 } 2537 2538 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2539 unsigned long *aqm, unsigned long *adm) 2540 { 2541 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2542 2543 mutex_lock(&kvm->lock); 2544 kvm_s390_vcpu_block_all(kvm); 2545 2546 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2547 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2548 memcpy(crycb->apcb1.apm, apm, 32); 2549 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2550 apm[0], apm[1], apm[2], apm[3]); 2551 memcpy(crycb->apcb1.aqm, aqm, 32); 2552 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2553 aqm[0], aqm[1], aqm[2], aqm[3]); 2554 memcpy(crycb->apcb1.adm, adm, 32); 2555 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2556 adm[0], adm[1], adm[2], adm[3]); 2557 break; 2558 case CRYCB_FORMAT1: 2559 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2560 memcpy(crycb->apcb0.apm, apm, 8); 2561 memcpy(crycb->apcb0.aqm, aqm, 2); 2562 memcpy(crycb->apcb0.adm, adm, 2); 2563 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2564 apm[0], *((unsigned short *)aqm), 2565 *((unsigned short *)adm)); 2566 break; 2567 default: /* Can not happen */ 2568 break; 2569 } 2570 2571 /* recreate the shadow crycb for each vcpu */ 2572 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2573 kvm_s390_vcpu_unblock_all(kvm); 2574 mutex_unlock(&kvm->lock); 2575 } 2576 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2577 2578 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2579 { 2580 mutex_lock(&kvm->lock); 2581 kvm_s390_vcpu_block_all(kvm); 2582 2583 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2584 sizeof(kvm->arch.crypto.crycb->apcb0)); 2585 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2586 sizeof(kvm->arch.crypto.crycb->apcb1)); 2587 2588 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2589 /* recreate the shadow crycb for each vcpu */ 2590 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2591 kvm_s390_vcpu_unblock_all(kvm); 2592 mutex_unlock(&kvm->lock); 2593 } 2594 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2595 2596 static u64 kvm_s390_get_initial_cpuid(void) 2597 { 2598 struct cpuid cpuid; 2599 2600 get_cpu_id(&cpuid); 2601 cpuid.version = 0xff; 2602 return *((u64 *) &cpuid); 2603 } 2604 2605 static void kvm_s390_crypto_init(struct kvm *kvm) 2606 { 2607 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2608 kvm_s390_set_crycb_format(kvm); 2609 2610 if (!test_kvm_facility(kvm, 76)) 2611 return; 2612 2613 /* Enable AES/DEA protected key functions by default */ 2614 kvm->arch.crypto.aes_kw = 1; 2615 kvm->arch.crypto.dea_kw = 1; 2616 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2617 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2618 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2619 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2620 } 2621 2622 static void sca_dispose(struct kvm *kvm) 2623 { 2624 if (kvm->arch.use_esca) 2625 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2626 else 2627 free_page((unsigned long)(kvm->arch.sca)); 2628 kvm->arch.sca = NULL; 2629 } 2630 2631 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2632 { 2633 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 2634 int i, rc; 2635 char debug_name[16]; 2636 static unsigned long sca_offset; 2637 2638 rc = -EINVAL; 2639 #ifdef CONFIG_KVM_S390_UCONTROL 2640 if (type & ~KVM_VM_S390_UCONTROL) 2641 goto out_err; 2642 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2643 goto out_err; 2644 #else 2645 if (type) 2646 goto out_err; 2647 #endif 2648 2649 rc = s390_enable_sie(); 2650 if (rc) 2651 goto out_err; 2652 2653 rc = -ENOMEM; 2654 2655 if (!sclp.has_64bscao) 2656 alloc_flags |= GFP_DMA; 2657 rwlock_init(&kvm->arch.sca_lock); 2658 /* start with basic SCA */ 2659 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2660 if (!kvm->arch.sca) 2661 goto out_err; 2662 mutex_lock(&kvm_lock); 2663 sca_offset += 16; 2664 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2665 sca_offset = 0; 2666 kvm->arch.sca = (struct bsca_block *) 2667 ((char *) kvm->arch.sca + sca_offset); 2668 mutex_unlock(&kvm_lock); 2669 2670 sprintf(debug_name, "kvm-%u", current->pid); 2671 2672 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2673 if (!kvm->arch.dbf) 2674 goto out_err; 2675 2676 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2677 kvm->arch.sie_page2 = 2678 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 2679 if (!kvm->arch.sie_page2) 2680 goto out_err; 2681 2682 kvm->arch.sie_page2->kvm = kvm; 2683 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2684 2685 for (i = 0; i < kvm_s390_fac_size(); i++) { 2686 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] & 2687 (kvm_s390_fac_base[i] | 2688 kvm_s390_fac_ext[i]); 2689 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & 2690 kvm_s390_fac_base[i]; 2691 } 2692 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2693 2694 /* we are always in czam mode - even on pre z14 machines */ 2695 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2696 set_kvm_facility(kvm->arch.model.fac_list, 138); 2697 /* we emulate STHYI in kvm */ 2698 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2699 set_kvm_facility(kvm->arch.model.fac_list, 74); 2700 if (MACHINE_HAS_TLB_GUEST) { 2701 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2702 set_kvm_facility(kvm->arch.model.fac_list, 147); 2703 } 2704 2705 if (css_general_characteristics.aiv && test_facility(65)) 2706 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2707 2708 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2709 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2710 2711 kvm_s390_crypto_init(kvm); 2712 2713 mutex_init(&kvm->arch.float_int.ais_lock); 2714 spin_lock_init(&kvm->arch.float_int.lock); 2715 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2716 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2717 init_waitqueue_head(&kvm->arch.ipte_wq); 2718 mutex_init(&kvm->arch.ipte_mutex); 2719 2720 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2721 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2722 2723 if (type & KVM_VM_S390_UCONTROL) { 2724 kvm->arch.gmap = NULL; 2725 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2726 } else { 2727 if (sclp.hamax == U64_MAX) 2728 kvm->arch.mem_limit = TASK_SIZE_MAX; 2729 else 2730 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2731 sclp.hamax + 1); 2732 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2733 if (!kvm->arch.gmap) 2734 goto out_err; 2735 kvm->arch.gmap->private = kvm; 2736 kvm->arch.gmap->pfault_enabled = 0; 2737 } 2738 2739 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2740 kvm->arch.use_skf = sclp.has_skey; 2741 spin_lock_init(&kvm->arch.start_stop_lock); 2742 kvm_s390_vsie_init(kvm); 2743 if (use_gisa) 2744 kvm_s390_gisa_init(kvm); 2745 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2746 2747 return 0; 2748 out_err: 2749 free_page((unsigned long)kvm->arch.sie_page2); 2750 debug_unregister(kvm->arch.dbf); 2751 sca_dispose(kvm); 2752 KVM_EVENT(3, "creation of vm failed: %d", rc); 2753 return rc; 2754 } 2755 2756 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2757 { 2758 u16 rc, rrc; 2759 2760 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2761 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2762 kvm_s390_clear_local_irqs(vcpu); 2763 kvm_clear_async_pf_completion_queue(vcpu); 2764 if (!kvm_is_ucontrol(vcpu->kvm)) 2765 sca_del_vcpu(vcpu); 2766 2767 if (kvm_is_ucontrol(vcpu->kvm)) 2768 gmap_remove(vcpu->arch.gmap); 2769 2770 if (vcpu->kvm->arch.use_cmma) 2771 kvm_s390_vcpu_unsetup_cmma(vcpu); 2772 /* We can not hold the vcpu mutex here, we are already dying */ 2773 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2774 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2775 free_page((unsigned long)(vcpu->arch.sie_block)); 2776 } 2777 2778 static void kvm_free_vcpus(struct kvm *kvm) 2779 { 2780 unsigned int i; 2781 struct kvm_vcpu *vcpu; 2782 2783 kvm_for_each_vcpu(i, vcpu, kvm) 2784 kvm_vcpu_destroy(vcpu); 2785 2786 mutex_lock(&kvm->lock); 2787 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2788 kvm->vcpus[i] = NULL; 2789 2790 atomic_set(&kvm->online_vcpus, 0); 2791 mutex_unlock(&kvm->lock); 2792 } 2793 2794 void kvm_arch_destroy_vm(struct kvm *kvm) 2795 { 2796 u16 rc, rrc; 2797 2798 kvm_free_vcpus(kvm); 2799 sca_dispose(kvm); 2800 kvm_s390_gisa_destroy(kvm); 2801 /* 2802 * We are already at the end of life and kvm->lock is not taken. 2803 * This is ok as the file descriptor is closed by now and nobody 2804 * can mess with the pv state. To avoid lockdep_assert_held from 2805 * complaining we do not use kvm_s390_pv_is_protected. 2806 */ 2807 if (kvm_s390_pv_get_handle(kvm)) 2808 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2809 debug_unregister(kvm->arch.dbf); 2810 free_page((unsigned long)kvm->arch.sie_page2); 2811 if (!kvm_is_ucontrol(kvm)) 2812 gmap_remove(kvm->arch.gmap); 2813 kvm_s390_destroy_adapters(kvm); 2814 kvm_s390_clear_float_irqs(kvm); 2815 kvm_s390_vsie_destroy(kvm); 2816 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2817 } 2818 2819 /* Section: vcpu related */ 2820 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2821 { 2822 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2823 if (!vcpu->arch.gmap) 2824 return -ENOMEM; 2825 vcpu->arch.gmap->private = vcpu->kvm; 2826 2827 return 0; 2828 } 2829 2830 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2831 { 2832 if (!kvm_s390_use_sca_entries()) 2833 return; 2834 read_lock(&vcpu->kvm->arch.sca_lock); 2835 if (vcpu->kvm->arch.use_esca) { 2836 struct esca_block *sca = vcpu->kvm->arch.sca; 2837 2838 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2839 sca->cpu[vcpu->vcpu_id].sda = 0; 2840 } else { 2841 struct bsca_block *sca = vcpu->kvm->arch.sca; 2842 2843 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2844 sca->cpu[vcpu->vcpu_id].sda = 0; 2845 } 2846 read_unlock(&vcpu->kvm->arch.sca_lock); 2847 } 2848 2849 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2850 { 2851 if (!kvm_s390_use_sca_entries()) { 2852 struct bsca_block *sca = vcpu->kvm->arch.sca; 2853 2854 /* we still need the basic sca for the ipte control */ 2855 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2856 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2857 return; 2858 } 2859 read_lock(&vcpu->kvm->arch.sca_lock); 2860 if (vcpu->kvm->arch.use_esca) { 2861 struct esca_block *sca = vcpu->kvm->arch.sca; 2862 2863 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2864 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2865 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2866 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2867 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2868 } else { 2869 struct bsca_block *sca = vcpu->kvm->arch.sca; 2870 2871 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2872 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2873 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2874 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2875 } 2876 read_unlock(&vcpu->kvm->arch.sca_lock); 2877 } 2878 2879 /* Basic SCA to Extended SCA data copy routines */ 2880 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2881 { 2882 d->sda = s->sda; 2883 d->sigp_ctrl.c = s->sigp_ctrl.c; 2884 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2885 } 2886 2887 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2888 { 2889 int i; 2890 2891 d->ipte_control = s->ipte_control; 2892 d->mcn[0] = s->mcn; 2893 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2894 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2895 } 2896 2897 static int sca_switch_to_extended(struct kvm *kvm) 2898 { 2899 struct bsca_block *old_sca = kvm->arch.sca; 2900 struct esca_block *new_sca; 2901 struct kvm_vcpu *vcpu; 2902 unsigned int vcpu_idx; 2903 u32 scaol, scaoh; 2904 2905 if (kvm->arch.use_esca) 2906 return 0; 2907 2908 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 2909 if (!new_sca) 2910 return -ENOMEM; 2911 2912 scaoh = (u32)((u64)(new_sca) >> 32); 2913 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2914 2915 kvm_s390_vcpu_block_all(kvm); 2916 write_lock(&kvm->arch.sca_lock); 2917 2918 sca_copy_b_to_e(new_sca, old_sca); 2919 2920 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2921 vcpu->arch.sie_block->scaoh = scaoh; 2922 vcpu->arch.sie_block->scaol = scaol; 2923 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2924 } 2925 kvm->arch.sca = new_sca; 2926 kvm->arch.use_esca = 1; 2927 2928 write_unlock(&kvm->arch.sca_lock); 2929 kvm_s390_vcpu_unblock_all(kvm); 2930 2931 free_page((unsigned long)old_sca); 2932 2933 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2934 old_sca, kvm->arch.sca); 2935 return 0; 2936 } 2937 2938 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2939 { 2940 int rc; 2941 2942 if (!kvm_s390_use_sca_entries()) { 2943 if (id < KVM_MAX_VCPUS) 2944 return true; 2945 return false; 2946 } 2947 if (id < KVM_S390_BSCA_CPU_SLOTS) 2948 return true; 2949 if (!sclp.has_esca || !sclp.has_64bscao) 2950 return false; 2951 2952 mutex_lock(&kvm->lock); 2953 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 2954 mutex_unlock(&kvm->lock); 2955 2956 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 2957 } 2958 2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2960 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2961 { 2962 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 2963 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2964 vcpu->arch.cputm_start = get_tod_clock_fast(); 2965 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2966 } 2967 2968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2969 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2970 { 2971 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 2972 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 2973 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 2974 vcpu->arch.cputm_start = 0; 2975 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 2976 } 2977 2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2979 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2980 { 2981 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 2982 vcpu->arch.cputm_enabled = true; 2983 __start_cpu_timer_accounting(vcpu); 2984 } 2985 2986 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 2987 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2988 { 2989 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 2990 __stop_cpu_timer_accounting(vcpu); 2991 vcpu->arch.cputm_enabled = false; 2992 } 2993 2994 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 2995 { 2996 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 2997 __enable_cpu_timer_accounting(vcpu); 2998 preempt_enable(); 2999 } 3000 3001 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3002 { 3003 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3004 __disable_cpu_timer_accounting(vcpu); 3005 preempt_enable(); 3006 } 3007 3008 /* set the cpu timer - may only be called from the VCPU thread itself */ 3009 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3010 { 3011 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3012 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3013 if (vcpu->arch.cputm_enabled) 3014 vcpu->arch.cputm_start = get_tod_clock_fast(); 3015 vcpu->arch.sie_block->cputm = cputm; 3016 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3017 preempt_enable(); 3018 } 3019 3020 /* update and get the cpu timer - can also be called from other VCPU threads */ 3021 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3022 { 3023 unsigned int seq; 3024 __u64 value; 3025 3026 if (unlikely(!vcpu->arch.cputm_enabled)) 3027 return vcpu->arch.sie_block->cputm; 3028 3029 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3030 do { 3031 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3032 /* 3033 * If the writer would ever execute a read in the critical 3034 * section, e.g. in irq context, we have a deadlock. 3035 */ 3036 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3037 value = vcpu->arch.sie_block->cputm; 3038 /* if cputm_start is 0, accounting is being started/stopped */ 3039 if (likely(vcpu->arch.cputm_start)) 3040 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3041 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3042 preempt_enable(); 3043 return value; 3044 } 3045 3046 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3047 { 3048 3049 gmap_enable(vcpu->arch.enabled_gmap); 3050 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3051 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3052 __start_cpu_timer_accounting(vcpu); 3053 vcpu->cpu = cpu; 3054 } 3055 3056 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3057 { 3058 vcpu->cpu = -1; 3059 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3060 __stop_cpu_timer_accounting(vcpu); 3061 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3062 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3063 gmap_disable(vcpu->arch.enabled_gmap); 3064 3065 } 3066 3067 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3068 { 3069 mutex_lock(&vcpu->kvm->lock); 3070 preempt_disable(); 3071 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3072 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3073 preempt_enable(); 3074 mutex_unlock(&vcpu->kvm->lock); 3075 if (!kvm_is_ucontrol(vcpu->kvm)) { 3076 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3077 sca_add_vcpu(vcpu); 3078 } 3079 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3080 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3081 /* make vcpu_load load the right gmap on the first trigger */ 3082 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3083 } 3084 3085 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3086 { 3087 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3088 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3089 return true; 3090 return false; 3091 } 3092 3093 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3094 { 3095 /* At least one ECC subfunction must be present */ 3096 return kvm_has_pckmo_subfunc(kvm, 32) || 3097 kvm_has_pckmo_subfunc(kvm, 33) || 3098 kvm_has_pckmo_subfunc(kvm, 34) || 3099 kvm_has_pckmo_subfunc(kvm, 40) || 3100 kvm_has_pckmo_subfunc(kvm, 41); 3101 3102 } 3103 3104 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3105 { 3106 /* 3107 * If the AP instructions are not being interpreted and the MSAX3 3108 * facility is not configured for the guest, there is nothing to set up. 3109 */ 3110 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3111 return; 3112 3113 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3114 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3115 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3116 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3117 3118 if (vcpu->kvm->arch.crypto.apie) 3119 vcpu->arch.sie_block->eca |= ECA_APIE; 3120 3121 /* Set up protected key support */ 3122 if (vcpu->kvm->arch.crypto.aes_kw) { 3123 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3124 /* ecc is also wrapped with AES key */ 3125 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3126 vcpu->arch.sie_block->ecd |= ECD_ECC; 3127 } 3128 3129 if (vcpu->kvm->arch.crypto.dea_kw) 3130 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3131 } 3132 3133 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3134 { 3135 free_page(vcpu->arch.sie_block->cbrlo); 3136 vcpu->arch.sie_block->cbrlo = 0; 3137 } 3138 3139 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3140 { 3141 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3142 if (!vcpu->arch.sie_block->cbrlo) 3143 return -ENOMEM; 3144 return 0; 3145 } 3146 3147 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3148 { 3149 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3150 3151 vcpu->arch.sie_block->ibc = model->ibc; 3152 if (test_kvm_facility(vcpu->kvm, 7)) 3153 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3154 } 3155 3156 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3157 { 3158 int rc = 0; 3159 u16 uvrc, uvrrc; 3160 3161 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3162 CPUSTAT_SM | 3163 CPUSTAT_STOPPED); 3164 3165 if (test_kvm_facility(vcpu->kvm, 78)) 3166 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3167 else if (test_kvm_facility(vcpu->kvm, 8)) 3168 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3169 3170 kvm_s390_vcpu_setup_model(vcpu); 3171 3172 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3173 if (MACHINE_HAS_ESOP) 3174 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3175 if (test_kvm_facility(vcpu->kvm, 9)) 3176 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3177 if (test_kvm_facility(vcpu->kvm, 73)) 3178 vcpu->arch.sie_block->ecb |= ECB_TE; 3179 3180 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3181 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3182 if (test_kvm_facility(vcpu->kvm, 130)) 3183 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3184 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3185 if (sclp.has_cei) 3186 vcpu->arch.sie_block->eca |= ECA_CEI; 3187 if (sclp.has_ib) 3188 vcpu->arch.sie_block->eca |= ECA_IB; 3189 if (sclp.has_siif) 3190 vcpu->arch.sie_block->eca |= ECA_SII; 3191 if (sclp.has_sigpif) 3192 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3193 if (test_kvm_facility(vcpu->kvm, 129)) { 3194 vcpu->arch.sie_block->eca |= ECA_VX; 3195 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3196 } 3197 if (test_kvm_facility(vcpu->kvm, 139)) 3198 vcpu->arch.sie_block->ecd |= ECD_MEF; 3199 if (test_kvm_facility(vcpu->kvm, 156)) 3200 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3201 if (vcpu->arch.sie_block->gd) { 3202 vcpu->arch.sie_block->eca |= ECA_AIV; 3203 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3204 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3205 } 3206 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3207 | SDNXC; 3208 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3209 3210 if (sclp.has_kss) 3211 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3212 else 3213 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3214 3215 if (vcpu->kvm->arch.use_cmma) { 3216 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3217 if (rc) 3218 return rc; 3219 } 3220 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3221 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3222 3223 vcpu->arch.sie_block->hpid = HPID_KVM; 3224 3225 kvm_s390_vcpu_crypto_setup(vcpu); 3226 3227 mutex_lock(&vcpu->kvm->lock); 3228 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3229 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3230 if (rc) 3231 kvm_s390_vcpu_unsetup_cmma(vcpu); 3232 } 3233 mutex_unlock(&vcpu->kvm->lock); 3234 3235 return rc; 3236 } 3237 3238 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3239 { 3240 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3241 return -EINVAL; 3242 return 0; 3243 } 3244 3245 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3246 { 3247 struct sie_page *sie_page; 3248 int rc; 3249 3250 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3251 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3252 if (!sie_page) 3253 return -ENOMEM; 3254 3255 vcpu->arch.sie_block = &sie_page->sie_block; 3256 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3257 3258 /* the real guest size will always be smaller than msl */ 3259 vcpu->arch.sie_block->mso = 0; 3260 vcpu->arch.sie_block->msl = sclp.hamax; 3261 3262 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3263 spin_lock_init(&vcpu->arch.local_int.lock); 3264 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; 3265 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 3266 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 3267 seqcount_init(&vcpu->arch.cputm_seqcount); 3268 3269 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3270 kvm_clear_async_pf_completion_queue(vcpu); 3271 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3272 KVM_SYNC_GPRS | 3273 KVM_SYNC_ACRS | 3274 KVM_SYNC_CRS | 3275 KVM_SYNC_ARCH0 | 3276 KVM_SYNC_PFAULT | 3277 KVM_SYNC_DIAG318; 3278 kvm_s390_set_prefix(vcpu, 0); 3279 if (test_kvm_facility(vcpu->kvm, 64)) 3280 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3281 if (test_kvm_facility(vcpu->kvm, 82)) 3282 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3283 if (test_kvm_facility(vcpu->kvm, 133)) 3284 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3285 if (test_kvm_facility(vcpu->kvm, 156)) 3286 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3287 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3288 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3289 */ 3290 if (MACHINE_HAS_VX) 3291 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3292 else 3293 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3294 3295 if (kvm_is_ucontrol(vcpu->kvm)) { 3296 rc = __kvm_ucontrol_vcpu_init(vcpu); 3297 if (rc) 3298 goto out_free_sie_block; 3299 } 3300 3301 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3302 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3303 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3304 3305 rc = kvm_s390_vcpu_setup(vcpu); 3306 if (rc) 3307 goto out_ucontrol_uninit; 3308 return 0; 3309 3310 out_ucontrol_uninit: 3311 if (kvm_is_ucontrol(vcpu->kvm)) 3312 gmap_remove(vcpu->arch.gmap); 3313 out_free_sie_block: 3314 free_page((unsigned long)(vcpu->arch.sie_block)); 3315 return rc; 3316 } 3317 3318 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3319 { 3320 return kvm_s390_vcpu_has_irq(vcpu, 0); 3321 } 3322 3323 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3324 { 3325 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3326 } 3327 3328 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3329 { 3330 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3331 exit_sie(vcpu); 3332 } 3333 3334 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3335 { 3336 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3337 } 3338 3339 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3340 { 3341 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3342 exit_sie(vcpu); 3343 } 3344 3345 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3346 { 3347 return atomic_read(&vcpu->arch.sie_block->prog20) & 3348 (PROG_BLOCK_SIE | PROG_REQUEST); 3349 } 3350 3351 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3352 { 3353 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3354 } 3355 3356 /* 3357 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3358 * If the CPU is not running (e.g. waiting as idle) the function will 3359 * return immediately. */ 3360 void exit_sie(struct kvm_vcpu *vcpu) 3361 { 3362 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3363 kvm_s390_vsie_kick(vcpu); 3364 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3365 cpu_relax(); 3366 } 3367 3368 /* Kick a guest cpu out of SIE to process a request synchronously */ 3369 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3370 { 3371 kvm_make_request(req, vcpu); 3372 kvm_s390_vcpu_request(vcpu); 3373 } 3374 3375 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3376 unsigned long end) 3377 { 3378 struct kvm *kvm = gmap->private; 3379 struct kvm_vcpu *vcpu; 3380 unsigned long prefix; 3381 int i; 3382 3383 if (gmap_is_shadow(gmap)) 3384 return; 3385 if (start >= 1UL << 31) 3386 /* We are only interested in prefix pages */ 3387 return; 3388 kvm_for_each_vcpu(i, vcpu, kvm) { 3389 /* match against both prefix pages */ 3390 prefix = kvm_s390_get_prefix(vcpu); 3391 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3392 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3393 start, end); 3394 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 3395 } 3396 } 3397 } 3398 3399 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3400 { 3401 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3402 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3403 halt_poll_max_steal) { 3404 vcpu->stat.halt_no_poll_steal++; 3405 return true; 3406 } 3407 return false; 3408 } 3409 3410 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3411 { 3412 /* kvm common code refers to this, but never calls it */ 3413 BUG(); 3414 return 0; 3415 } 3416 3417 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3418 struct kvm_one_reg *reg) 3419 { 3420 int r = -EINVAL; 3421 3422 switch (reg->id) { 3423 case KVM_REG_S390_TODPR: 3424 r = put_user(vcpu->arch.sie_block->todpr, 3425 (u32 __user *)reg->addr); 3426 break; 3427 case KVM_REG_S390_EPOCHDIFF: 3428 r = put_user(vcpu->arch.sie_block->epoch, 3429 (u64 __user *)reg->addr); 3430 break; 3431 case KVM_REG_S390_CPU_TIMER: 3432 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3433 (u64 __user *)reg->addr); 3434 break; 3435 case KVM_REG_S390_CLOCK_COMP: 3436 r = put_user(vcpu->arch.sie_block->ckc, 3437 (u64 __user *)reg->addr); 3438 break; 3439 case KVM_REG_S390_PFTOKEN: 3440 r = put_user(vcpu->arch.pfault_token, 3441 (u64 __user *)reg->addr); 3442 break; 3443 case KVM_REG_S390_PFCOMPARE: 3444 r = put_user(vcpu->arch.pfault_compare, 3445 (u64 __user *)reg->addr); 3446 break; 3447 case KVM_REG_S390_PFSELECT: 3448 r = put_user(vcpu->arch.pfault_select, 3449 (u64 __user *)reg->addr); 3450 break; 3451 case KVM_REG_S390_PP: 3452 r = put_user(vcpu->arch.sie_block->pp, 3453 (u64 __user *)reg->addr); 3454 break; 3455 case KVM_REG_S390_GBEA: 3456 r = put_user(vcpu->arch.sie_block->gbea, 3457 (u64 __user *)reg->addr); 3458 break; 3459 default: 3460 break; 3461 } 3462 3463 return r; 3464 } 3465 3466 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3467 struct kvm_one_reg *reg) 3468 { 3469 int r = -EINVAL; 3470 __u64 val; 3471 3472 switch (reg->id) { 3473 case KVM_REG_S390_TODPR: 3474 r = get_user(vcpu->arch.sie_block->todpr, 3475 (u32 __user *)reg->addr); 3476 break; 3477 case KVM_REG_S390_EPOCHDIFF: 3478 r = get_user(vcpu->arch.sie_block->epoch, 3479 (u64 __user *)reg->addr); 3480 break; 3481 case KVM_REG_S390_CPU_TIMER: 3482 r = get_user(val, (u64 __user *)reg->addr); 3483 if (!r) 3484 kvm_s390_set_cpu_timer(vcpu, val); 3485 break; 3486 case KVM_REG_S390_CLOCK_COMP: 3487 r = get_user(vcpu->arch.sie_block->ckc, 3488 (u64 __user *)reg->addr); 3489 break; 3490 case KVM_REG_S390_PFTOKEN: 3491 r = get_user(vcpu->arch.pfault_token, 3492 (u64 __user *)reg->addr); 3493 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3494 kvm_clear_async_pf_completion_queue(vcpu); 3495 break; 3496 case KVM_REG_S390_PFCOMPARE: 3497 r = get_user(vcpu->arch.pfault_compare, 3498 (u64 __user *)reg->addr); 3499 break; 3500 case KVM_REG_S390_PFSELECT: 3501 r = get_user(vcpu->arch.pfault_select, 3502 (u64 __user *)reg->addr); 3503 break; 3504 case KVM_REG_S390_PP: 3505 r = get_user(vcpu->arch.sie_block->pp, 3506 (u64 __user *)reg->addr); 3507 break; 3508 case KVM_REG_S390_GBEA: 3509 r = get_user(vcpu->arch.sie_block->gbea, 3510 (u64 __user *)reg->addr); 3511 break; 3512 default: 3513 break; 3514 } 3515 3516 return r; 3517 } 3518 3519 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3520 { 3521 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3522 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3523 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3524 3525 kvm_clear_async_pf_completion_queue(vcpu); 3526 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3527 kvm_s390_vcpu_stop(vcpu); 3528 kvm_s390_clear_local_irqs(vcpu); 3529 } 3530 3531 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3532 { 3533 /* Initial reset is a superset of the normal reset */ 3534 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3535 3536 /* 3537 * This equals initial cpu reset in pop, but we don't switch to ESA. 3538 * We do not only reset the internal data, but also ... 3539 */ 3540 vcpu->arch.sie_block->gpsw.mask = 0; 3541 vcpu->arch.sie_block->gpsw.addr = 0; 3542 kvm_s390_set_prefix(vcpu, 0); 3543 kvm_s390_set_cpu_timer(vcpu, 0); 3544 vcpu->arch.sie_block->ckc = 0; 3545 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3546 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3547 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3548 3549 /* ... the data in sync regs */ 3550 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 3551 vcpu->run->s.regs.ckc = 0; 3552 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 3553 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 3554 vcpu->run->psw_addr = 0; 3555 vcpu->run->psw_mask = 0; 3556 vcpu->run->s.regs.todpr = 0; 3557 vcpu->run->s.regs.cputm = 0; 3558 vcpu->run->s.regs.ckc = 0; 3559 vcpu->run->s.regs.pp = 0; 3560 vcpu->run->s.regs.gbea = 1; 3561 vcpu->run->s.regs.fpc = 0; 3562 /* 3563 * Do not reset these registers in the protected case, as some of 3564 * them are overlayed and they are not accessible in this case 3565 * anyway. 3566 */ 3567 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3568 vcpu->arch.sie_block->gbea = 1; 3569 vcpu->arch.sie_block->pp = 0; 3570 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3571 vcpu->arch.sie_block->todpr = 0; 3572 } 3573 } 3574 3575 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3576 { 3577 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3578 3579 /* Clear reset is a superset of the initial reset */ 3580 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3581 3582 memset(®s->gprs, 0, sizeof(regs->gprs)); 3583 memset(®s->vrs, 0, sizeof(regs->vrs)); 3584 memset(®s->acrs, 0, sizeof(regs->acrs)); 3585 memset(®s->gscb, 0, sizeof(regs->gscb)); 3586 3587 regs->etoken = 0; 3588 regs->etoken_extension = 0; 3589 } 3590 3591 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3592 { 3593 vcpu_load(vcpu); 3594 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3595 vcpu_put(vcpu); 3596 return 0; 3597 } 3598 3599 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3600 { 3601 vcpu_load(vcpu); 3602 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3603 vcpu_put(vcpu); 3604 return 0; 3605 } 3606 3607 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3608 struct kvm_sregs *sregs) 3609 { 3610 vcpu_load(vcpu); 3611 3612 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3613 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3614 3615 vcpu_put(vcpu); 3616 return 0; 3617 } 3618 3619 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3620 struct kvm_sregs *sregs) 3621 { 3622 vcpu_load(vcpu); 3623 3624 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3625 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3626 3627 vcpu_put(vcpu); 3628 return 0; 3629 } 3630 3631 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3632 { 3633 int ret = 0; 3634 3635 vcpu_load(vcpu); 3636 3637 if (test_fp_ctl(fpu->fpc)) { 3638 ret = -EINVAL; 3639 goto out; 3640 } 3641 vcpu->run->s.regs.fpc = fpu->fpc; 3642 if (MACHINE_HAS_VX) 3643 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3644 (freg_t *) fpu->fprs); 3645 else 3646 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3647 3648 out: 3649 vcpu_put(vcpu); 3650 return ret; 3651 } 3652 3653 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3654 { 3655 vcpu_load(vcpu); 3656 3657 /* make sure we have the latest values */ 3658 save_fpu_regs(); 3659 if (MACHINE_HAS_VX) 3660 convert_vx_to_fp((freg_t *) fpu->fprs, 3661 (__vector128 *) vcpu->run->s.regs.vrs); 3662 else 3663 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3664 fpu->fpc = vcpu->run->s.regs.fpc; 3665 3666 vcpu_put(vcpu); 3667 return 0; 3668 } 3669 3670 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3671 { 3672 int rc = 0; 3673 3674 if (!is_vcpu_stopped(vcpu)) 3675 rc = -EBUSY; 3676 else { 3677 vcpu->run->psw_mask = psw.mask; 3678 vcpu->run->psw_addr = psw.addr; 3679 } 3680 return rc; 3681 } 3682 3683 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3684 struct kvm_translation *tr) 3685 { 3686 return -EINVAL; /* not implemented yet */ 3687 } 3688 3689 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3690 KVM_GUESTDBG_USE_HW_BP | \ 3691 KVM_GUESTDBG_ENABLE) 3692 3693 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3694 struct kvm_guest_debug *dbg) 3695 { 3696 int rc = 0; 3697 3698 vcpu_load(vcpu); 3699 3700 vcpu->guest_debug = 0; 3701 kvm_s390_clear_bp_data(vcpu); 3702 3703 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3704 rc = -EINVAL; 3705 goto out; 3706 } 3707 if (!sclp.has_gpere) { 3708 rc = -EINVAL; 3709 goto out; 3710 } 3711 3712 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3713 vcpu->guest_debug = dbg->control; 3714 /* enforce guest PER */ 3715 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3716 3717 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3718 rc = kvm_s390_import_bp_data(vcpu, dbg); 3719 } else { 3720 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3721 vcpu->arch.guestdbg.last_bp = 0; 3722 } 3723 3724 if (rc) { 3725 vcpu->guest_debug = 0; 3726 kvm_s390_clear_bp_data(vcpu); 3727 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3728 } 3729 3730 out: 3731 vcpu_put(vcpu); 3732 return rc; 3733 } 3734 3735 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3736 struct kvm_mp_state *mp_state) 3737 { 3738 int ret; 3739 3740 vcpu_load(vcpu); 3741 3742 /* CHECK_STOP and LOAD are not supported yet */ 3743 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3744 KVM_MP_STATE_OPERATING; 3745 3746 vcpu_put(vcpu); 3747 return ret; 3748 } 3749 3750 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3751 struct kvm_mp_state *mp_state) 3752 { 3753 int rc = 0; 3754 3755 vcpu_load(vcpu); 3756 3757 /* user space knows about this interface - let it control the state */ 3758 vcpu->kvm->arch.user_cpu_state_ctrl = 1; 3759 3760 switch (mp_state->mp_state) { 3761 case KVM_MP_STATE_STOPPED: 3762 rc = kvm_s390_vcpu_stop(vcpu); 3763 break; 3764 case KVM_MP_STATE_OPERATING: 3765 rc = kvm_s390_vcpu_start(vcpu); 3766 break; 3767 case KVM_MP_STATE_LOAD: 3768 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3769 rc = -ENXIO; 3770 break; 3771 } 3772 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 3773 break; 3774 case KVM_MP_STATE_CHECK_STOP: 3775 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 3776 default: 3777 rc = -ENXIO; 3778 } 3779 3780 vcpu_put(vcpu); 3781 return rc; 3782 } 3783 3784 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3785 { 3786 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3787 } 3788 3789 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3790 { 3791 retry: 3792 kvm_s390_vcpu_request_handled(vcpu); 3793 if (!kvm_request_pending(vcpu)) 3794 return 0; 3795 /* 3796 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3797 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3798 * This ensures that the ipte instruction for this request has 3799 * already finished. We might race against a second unmapper that 3800 * wants to set the blocking bit. Lets just retry the request loop. 3801 */ 3802 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3803 int rc; 3804 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3805 kvm_s390_get_prefix(vcpu), 3806 PAGE_SIZE * 2, PROT_WRITE); 3807 if (rc) { 3808 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3809 return rc; 3810 } 3811 goto retry; 3812 } 3813 3814 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3815 vcpu->arch.sie_block->ihcpu = 0xffff; 3816 goto retry; 3817 } 3818 3819 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3820 if (!ibs_enabled(vcpu)) { 3821 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3822 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3823 } 3824 goto retry; 3825 } 3826 3827 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3828 if (ibs_enabled(vcpu)) { 3829 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3830 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3831 } 3832 goto retry; 3833 } 3834 3835 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3836 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3837 goto retry; 3838 } 3839 3840 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3841 /* 3842 * Disable CMM virtualization; we will emulate the ESSA 3843 * instruction manually, in order to provide additional 3844 * functionalities needed for live migration. 3845 */ 3846 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3847 goto retry; 3848 } 3849 3850 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3851 /* 3852 * Re-enable CMM virtualization if CMMA is available and 3853 * CMM has been used. 3854 */ 3855 if ((vcpu->kvm->arch.use_cmma) && 3856 (vcpu->kvm->mm->context.uses_cmm)) 3857 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3858 goto retry; 3859 } 3860 3861 /* nothing to do, just clear the request */ 3862 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3863 /* we left the vsie handler, nothing to do, just clear the request */ 3864 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3865 3866 return 0; 3867 } 3868 3869 void kvm_s390_set_tod_clock(struct kvm *kvm, 3870 const struct kvm_s390_vm_tod_clock *gtod) 3871 { 3872 struct kvm_vcpu *vcpu; 3873 union tod_clock clk; 3874 int i; 3875 3876 mutex_lock(&kvm->lock); 3877 preempt_disable(); 3878 3879 store_tod_clock_ext(&clk); 3880 3881 kvm->arch.epoch = gtod->tod - clk.tod; 3882 kvm->arch.epdx = 0; 3883 if (test_kvm_facility(kvm, 139)) { 3884 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 3885 if (kvm->arch.epoch > gtod->tod) 3886 kvm->arch.epdx -= 1; 3887 } 3888 3889 kvm_s390_vcpu_block_all(kvm); 3890 kvm_for_each_vcpu(i, vcpu, kvm) { 3891 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3892 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3893 } 3894 3895 kvm_s390_vcpu_unblock_all(kvm); 3896 preempt_enable(); 3897 mutex_unlock(&kvm->lock); 3898 } 3899 3900 /** 3901 * kvm_arch_fault_in_page - fault-in guest page if necessary 3902 * @vcpu: The corresponding virtual cpu 3903 * @gpa: Guest physical address 3904 * @writable: Whether the page should be writable or not 3905 * 3906 * Make sure that a guest page has been faulted-in on the host. 3907 * 3908 * Return: Zero on success, negative error code otherwise. 3909 */ 3910 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3911 { 3912 return gmap_fault(vcpu->arch.gmap, gpa, 3913 writable ? FAULT_FLAG_WRITE : 0); 3914 } 3915 3916 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3917 unsigned long token) 3918 { 3919 struct kvm_s390_interrupt inti; 3920 struct kvm_s390_irq irq; 3921 3922 if (start_token) { 3923 irq.u.ext.ext_params2 = token; 3924 irq.type = KVM_S390_INT_PFAULT_INIT; 3925 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3926 } else { 3927 inti.type = KVM_S390_INT_PFAULT_DONE; 3928 inti.parm64 = token; 3929 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3930 } 3931 } 3932 3933 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3934 struct kvm_async_pf *work) 3935 { 3936 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3937 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3938 3939 return true; 3940 } 3941 3942 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3943 struct kvm_async_pf *work) 3944 { 3945 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3946 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3947 } 3948 3949 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3950 struct kvm_async_pf *work) 3951 { 3952 /* s390 will always inject the page directly */ 3953 } 3954 3955 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 3956 { 3957 /* 3958 * s390 will always inject the page directly, 3959 * but we still want check_async_completion to cleanup 3960 */ 3961 return true; 3962 } 3963 3964 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 3965 { 3966 hva_t hva; 3967 struct kvm_arch_async_pf arch; 3968 3969 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3970 return false; 3971 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 3972 vcpu->arch.pfault_compare) 3973 return false; 3974 if (psw_extint_disabled(vcpu)) 3975 return false; 3976 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 3977 return false; 3978 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 3979 return false; 3980 if (!vcpu->arch.gmap->pfault_enabled) 3981 return false; 3982 3983 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 3984 hva += current->thread.gmap_addr & ~PAGE_MASK; 3985 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 3986 return false; 3987 3988 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 3989 } 3990 3991 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 3992 { 3993 int rc, cpuflags; 3994 3995 /* 3996 * On s390 notifications for arriving pages will be delivered directly 3997 * to the guest but the house keeping for completed pfaults is 3998 * handled outside the worker. 3999 */ 4000 kvm_check_async_pf_completion(vcpu); 4001 4002 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4003 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4004 4005 if (need_resched()) 4006 schedule(); 4007 4008 if (!kvm_is_ucontrol(vcpu->kvm)) { 4009 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4010 if (rc) 4011 return rc; 4012 } 4013 4014 rc = kvm_s390_handle_requests(vcpu); 4015 if (rc) 4016 return rc; 4017 4018 if (guestdbg_enabled(vcpu)) { 4019 kvm_s390_backup_guest_per_regs(vcpu); 4020 kvm_s390_patch_guest_per_regs(vcpu); 4021 } 4022 4023 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); 4024 4025 vcpu->arch.sie_block->icptcode = 0; 4026 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4027 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4028 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4029 4030 return 0; 4031 } 4032 4033 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4034 { 4035 struct kvm_s390_pgm_info pgm_info = { 4036 .code = PGM_ADDRESSING, 4037 }; 4038 u8 opcode, ilen; 4039 int rc; 4040 4041 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4042 trace_kvm_s390_sie_fault(vcpu); 4043 4044 /* 4045 * We want to inject an addressing exception, which is defined as a 4046 * suppressing or terminating exception. However, since we came here 4047 * by a DAT access exception, the PSW still points to the faulting 4048 * instruction since DAT exceptions are nullifying. So we've got 4049 * to look up the current opcode to get the length of the instruction 4050 * to be able to forward the PSW. 4051 */ 4052 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4053 ilen = insn_length(opcode); 4054 if (rc < 0) { 4055 return rc; 4056 } else if (rc) { 4057 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4058 * Forward by arbitrary ilc, injection will take care of 4059 * nullification if necessary. 4060 */ 4061 pgm_info = vcpu->arch.pgm; 4062 ilen = 4; 4063 } 4064 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4065 kvm_s390_forward_psw(vcpu, ilen); 4066 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4067 } 4068 4069 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4070 { 4071 struct mcck_volatile_info *mcck_info; 4072 struct sie_page *sie_page; 4073 4074 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4075 vcpu->arch.sie_block->icptcode); 4076 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4077 4078 if (guestdbg_enabled(vcpu)) 4079 kvm_s390_restore_guest_per_regs(vcpu); 4080 4081 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4082 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4083 4084 if (exit_reason == -EINTR) { 4085 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4086 sie_page = container_of(vcpu->arch.sie_block, 4087 struct sie_page, sie_block); 4088 mcck_info = &sie_page->mcck_info; 4089 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4090 return 0; 4091 } 4092 4093 if (vcpu->arch.sie_block->icptcode > 0) { 4094 int rc = kvm_handle_sie_intercept(vcpu); 4095 4096 if (rc != -EOPNOTSUPP) 4097 return rc; 4098 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4099 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4100 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4101 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4102 return -EREMOTE; 4103 } else if (exit_reason != -EFAULT) { 4104 vcpu->stat.exit_null++; 4105 return 0; 4106 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4107 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4108 vcpu->run->s390_ucontrol.trans_exc_code = 4109 current->thread.gmap_addr; 4110 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4111 return -EREMOTE; 4112 } else if (current->thread.gmap_pfault) { 4113 trace_kvm_s390_major_guest_pfault(vcpu); 4114 current->thread.gmap_pfault = 0; 4115 if (kvm_arch_setup_async_pf(vcpu)) 4116 return 0; 4117 vcpu->stat.pfault_sync++; 4118 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4119 } 4120 return vcpu_post_run_fault_in_sie(vcpu); 4121 } 4122 4123 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4124 static int __vcpu_run(struct kvm_vcpu *vcpu) 4125 { 4126 int rc, exit_reason; 4127 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4128 4129 /* 4130 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4131 * ning the guest), so that memslots (and other stuff) are protected 4132 */ 4133 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4134 4135 do { 4136 rc = vcpu_pre_run(vcpu); 4137 if (rc) 4138 break; 4139 4140 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4141 /* 4142 * As PF_VCPU will be used in fault handler, between 4143 * guest_enter and guest_exit should be no uaccess. 4144 */ 4145 local_irq_disable(); 4146 guest_enter_irqoff(); 4147 __disable_cpu_timer_accounting(vcpu); 4148 local_irq_enable(); 4149 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4150 memcpy(sie_page->pv_grregs, 4151 vcpu->run->s.regs.gprs, 4152 sizeof(sie_page->pv_grregs)); 4153 } 4154 if (test_cpu_flag(CIF_FPU)) 4155 load_fpu_regs(); 4156 exit_reason = sie64a(vcpu->arch.sie_block, 4157 vcpu->run->s.regs.gprs); 4158 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4159 memcpy(vcpu->run->s.regs.gprs, 4160 sie_page->pv_grregs, 4161 sizeof(sie_page->pv_grregs)); 4162 /* 4163 * We're not allowed to inject interrupts on intercepts 4164 * that leave the guest state in an "in-between" state 4165 * where the next SIE entry will do a continuation. 4166 * Fence interrupts in our "internal" PSW. 4167 */ 4168 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4169 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4170 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4171 } 4172 } 4173 local_irq_disable(); 4174 __enable_cpu_timer_accounting(vcpu); 4175 guest_exit_irqoff(); 4176 local_irq_enable(); 4177 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4178 4179 rc = vcpu_post_run(vcpu, exit_reason); 4180 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4181 4182 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4183 return rc; 4184 } 4185 4186 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4187 { 4188 struct kvm_run *kvm_run = vcpu->run; 4189 struct runtime_instr_cb *riccb; 4190 struct gs_cb *gscb; 4191 4192 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4193 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4194 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4195 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4196 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4197 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4198 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4199 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4200 } 4201 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4202 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4203 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4204 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4205 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4206 kvm_clear_async_pf_completion_queue(vcpu); 4207 } 4208 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4209 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4210 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4211 } 4212 /* 4213 * If userspace sets the riccb (e.g. after migration) to a valid state, 4214 * we should enable RI here instead of doing the lazy enablement. 4215 */ 4216 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4217 test_kvm_facility(vcpu->kvm, 64) && 4218 riccb->v && 4219 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4220 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4221 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4222 } 4223 /* 4224 * If userspace sets the gscb (e.g. after migration) to non-zero, 4225 * we should enable GS here instead of doing the lazy enablement. 4226 */ 4227 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4228 test_kvm_facility(vcpu->kvm, 133) && 4229 gscb->gssm && 4230 !vcpu->arch.gs_enabled) { 4231 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4232 vcpu->arch.sie_block->ecb |= ECB_GS; 4233 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4234 vcpu->arch.gs_enabled = 1; 4235 } 4236 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4237 test_kvm_facility(vcpu->kvm, 82)) { 4238 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4239 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4240 } 4241 if (MACHINE_HAS_GS) { 4242 preempt_disable(); 4243 __ctl_set_bit(2, 4); 4244 if (current->thread.gs_cb) { 4245 vcpu->arch.host_gscb = current->thread.gs_cb; 4246 save_gs_cb(vcpu->arch.host_gscb); 4247 } 4248 if (vcpu->arch.gs_enabled) { 4249 current->thread.gs_cb = (struct gs_cb *) 4250 &vcpu->run->s.regs.gscb; 4251 restore_gs_cb(current->thread.gs_cb); 4252 } 4253 preempt_enable(); 4254 } 4255 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4256 } 4257 4258 static void sync_regs(struct kvm_vcpu *vcpu) 4259 { 4260 struct kvm_run *kvm_run = vcpu->run; 4261 4262 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4263 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4264 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4265 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4266 /* some control register changes require a tlb flush */ 4267 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4268 } 4269 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4270 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4271 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4272 } 4273 save_access_regs(vcpu->arch.host_acrs); 4274 restore_access_regs(vcpu->run->s.regs.acrs); 4275 /* save host (userspace) fprs/vrs */ 4276 save_fpu_regs(); 4277 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4278 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4279 if (MACHINE_HAS_VX) 4280 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4281 else 4282 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4283 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4284 if (test_fp_ctl(current->thread.fpu.fpc)) 4285 /* User space provided an invalid FPC, let's clear it */ 4286 current->thread.fpu.fpc = 0; 4287 4288 /* Sync fmt2 only data */ 4289 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4290 sync_regs_fmt2(vcpu); 4291 } else { 4292 /* 4293 * In several places we have to modify our internal view to 4294 * not do things that are disallowed by the ultravisor. For 4295 * example we must not inject interrupts after specific exits 4296 * (e.g. 112 prefix page not secure). We do this by turning 4297 * off the machine check, external and I/O interrupt bits 4298 * of our PSW copy. To avoid getting validity intercepts, we 4299 * do only accept the condition code from userspace. 4300 */ 4301 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4302 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4303 PSW_MASK_CC; 4304 } 4305 4306 kvm_run->kvm_dirty_regs = 0; 4307 } 4308 4309 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4310 { 4311 struct kvm_run *kvm_run = vcpu->run; 4312 4313 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4314 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4315 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4316 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4317 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4318 if (MACHINE_HAS_GS) { 4319 preempt_disable(); 4320 __ctl_set_bit(2, 4); 4321 if (vcpu->arch.gs_enabled) 4322 save_gs_cb(current->thread.gs_cb); 4323 current->thread.gs_cb = vcpu->arch.host_gscb; 4324 restore_gs_cb(vcpu->arch.host_gscb); 4325 if (!vcpu->arch.host_gscb) 4326 __ctl_clear_bit(2, 4); 4327 vcpu->arch.host_gscb = NULL; 4328 preempt_enable(); 4329 } 4330 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4331 } 4332 4333 static void store_regs(struct kvm_vcpu *vcpu) 4334 { 4335 struct kvm_run *kvm_run = vcpu->run; 4336 4337 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4338 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4339 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4340 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4341 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4342 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4343 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4344 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4345 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4346 save_access_regs(vcpu->run->s.regs.acrs); 4347 restore_access_regs(vcpu->arch.host_acrs); 4348 /* Save guest register state */ 4349 save_fpu_regs(); 4350 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4351 /* Restore will be done lazily at return */ 4352 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4353 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4354 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4355 store_regs_fmt2(vcpu); 4356 } 4357 4358 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4359 { 4360 struct kvm_run *kvm_run = vcpu->run; 4361 int rc; 4362 4363 if (kvm_run->immediate_exit) 4364 return -EINTR; 4365 4366 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4367 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4368 return -EINVAL; 4369 4370 vcpu_load(vcpu); 4371 4372 if (guestdbg_exit_pending(vcpu)) { 4373 kvm_s390_prepare_debug_exit(vcpu); 4374 rc = 0; 4375 goto out; 4376 } 4377 4378 kvm_sigset_activate(vcpu); 4379 4380 /* 4381 * no need to check the return value of vcpu_start as it can only have 4382 * an error for protvirt, but protvirt means user cpu state 4383 */ 4384 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4385 kvm_s390_vcpu_start(vcpu); 4386 } else if (is_vcpu_stopped(vcpu)) { 4387 pr_err_ratelimited("can't run stopped vcpu %d\n", 4388 vcpu->vcpu_id); 4389 rc = -EINVAL; 4390 goto out; 4391 } 4392 4393 sync_regs(vcpu); 4394 enable_cpu_timer_accounting(vcpu); 4395 4396 might_fault(); 4397 rc = __vcpu_run(vcpu); 4398 4399 if (signal_pending(current) && !rc) { 4400 kvm_run->exit_reason = KVM_EXIT_INTR; 4401 rc = -EINTR; 4402 } 4403 4404 if (guestdbg_exit_pending(vcpu) && !rc) { 4405 kvm_s390_prepare_debug_exit(vcpu); 4406 rc = 0; 4407 } 4408 4409 if (rc == -EREMOTE) { 4410 /* userspace support is needed, kvm_run has been prepared */ 4411 rc = 0; 4412 } 4413 4414 disable_cpu_timer_accounting(vcpu); 4415 store_regs(vcpu); 4416 4417 kvm_sigset_deactivate(vcpu); 4418 4419 vcpu->stat.exit_userspace++; 4420 out: 4421 vcpu_put(vcpu); 4422 return rc; 4423 } 4424 4425 /* 4426 * store status at address 4427 * we use have two special cases: 4428 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4429 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4430 */ 4431 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4432 { 4433 unsigned char archmode = 1; 4434 freg_t fprs[NUM_FPRS]; 4435 unsigned int px; 4436 u64 clkcomp, cputm; 4437 int rc; 4438 4439 px = kvm_s390_get_prefix(vcpu); 4440 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4441 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4442 return -EFAULT; 4443 gpa = 0; 4444 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4445 if (write_guest_real(vcpu, 163, &archmode, 1)) 4446 return -EFAULT; 4447 gpa = px; 4448 } else 4449 gpa -= __LC_FPREGS_SAVE_AREA; 4450 4451 /* manually convert vector registers if necessary */ 4452 if (MACHINE_HAS_VX) { 4453 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4454 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4455 fprs, 128); 4456 } else { 4457 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4458 vcpu->run->s.regs.fprs, 128); 4459 } 4460 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4461 vcpu->run->s.regs.gprs, 128); 4462 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4463 &vcpu->arch.sie_block->gpsw, 16); 4464 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4465 &px, 4); 4466 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4467 &vcpu->run->s.regs.fpc, 4); 4468 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4469 &vcpu->arch.sie_block->todpr, 4); 4470 cputm = kvm_s390_get_cpu_timer(vcpu); 4471 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4472 &cputm, 8); 4473 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4474 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4475 &clkcomp, 8); 4476 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4477 &vcpu->run->s.regs.acrs, 64); 4478 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4479 &vcpu->arch.sie_block->gcr, 128); 4480 return rc ? -EFAULT : 0; 4481 } 4482 4483 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4484 { 4485 /* 4486 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4487 * switch in the run ioctl. Let's update our copies before we save 4488 * it into the save area 4489 */ 4490 save_fpu_regs(); 4491 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4492 save_access_regs(vcpu->run->s.regs.acrs); 4493 4494 return kvm_s390_store_status_unloaded(vcpu, addr); 4495 } 4496 4497 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4498 { 4499 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4500 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4501 } 4502 4503 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4504 { 4505 unsigned int i; 4506 struct kvm_vcpu *vcpu; 4507 4508 kvm_for_each_vcpu(i, vcpu, kvm) { 4509 __disable_ibs_on_vcpu(vcpu); 4510 } 4511 } 4512 4513 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4514 { 4515 if (!sclp.has_ibs) 4516 return; 4517 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4518 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4519 } 4520 4521 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4522 { 4523 int i, online_vcpus, r = 0, started_vcpus = 0; 4524 4525 if (!is_vcpu_stopped(vcpu)) 4526 return 0; 4527 4528 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4529 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4530 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4531 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4532 4533 /* Let's tell the UV that we want to change into the operating state */ 4534 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4535 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 4536 if (r) { 4537 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4538 return r; 4539 } 4540 } 4541 4542 for (i = 0; i < online_vcpus; i++) { 4543 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 4544 started_vcpus++; 4545 } 4546 4547 if (started_vcpus == 0) { 4548 /* we're the only active VCPU -> speed it up */ 4549 __enable_ibs_on_vcpu(vcpu); 4550 } else if (started_vcpus == 1) { 4551 /* 4552 * As we are starting a second VCPU, we have to disable 4553 * the IBS facility on all VCPUs to remove potentially 4554 * outstanding ENABLE requests. 4555 */ 4556 __disable_ibs_on_all_vcpus(vcpu->kvm); 4557 } 4558 4559 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4560 /* 4561 * The real PSW might have changed due to a RESTART interpreted by the 4562 * ultravisor. We block all interrupts and let the next sie exit 4563 * refresh our view. 4564 */ 4565 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4566 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4567 /* 4568 * Another VCPU might have used IBS while we were offline. 4569 * Let's play safe and flush the VCPU at startup. 4570 */ 4571 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4572 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4573 return 0; 4574 } 4575 4576 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4577 { 4578 int i, online_vcpus, r = 0, started_vcpus = 0; 4579 struct kvm_vcpu *started_vcpu = NULL; 4580 4581 if (is_vcpu_stopped(vcpu)) 4582 return 0; 4583 4584 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4585 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4586 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4587 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4588 4589 /* Let's tell the UV that we want to change into the stopped state */ 4590 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4591 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 4592 if (r) { 4593 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4594 return r; 4595 } 4596 } 4597 4598 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 4599 kvm_s390_clear_stop_irq(vcpu); 4600 4601 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4602 __disable_ibs_on_vcpu(vcpu); 4603 4604 for (i = 0; i < online_vcpus; i++) { 4605 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 4606 started_vcpus++; 4607 started_vcpu = vcpu->kvm->vcpus[i]; 4608 } 4609 } 4610 4611 if (started_vcpus == 1) { 4612 /* 4613 * As we only have one VCPU left, we want to enable the 4614 * IBS facility for that VCPU to speed it up. 4615 */ 4616 __enable_ibs_on_vcpu(started_vcpu); 4617 } 4618 4619 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4620 return 0; 4621 } 4622 4623 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4624 struct kvm_enable_cap *cap) 4625 { 4626 int r; 4627 4628 if (cap->flags) 4629 return -EINVAL; 4630 4631 switch (cap->cap) { 4632 case KVM_CAP_S390_CSS_SUPPORT: 4633 if (!vcpu->kvm->arch.css_support) { 4634 vcpu->kvm->arch.css_support = 1; 4635 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4636 trace_kvm_s390_enable_css(vcpu->kvm); 4637 } 4638 r = 0; 4639 break; 4640 default: 4641 r = -EINVAL; 4642 break; 4643 } 4644 return r; 4645 } 4646 4647 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, 4648 struct kvm_s390_mem_op *mop) 4649 { 4650 void __user *uaddr = (void __user *)mop->buf; 4651 int r = 0; 4652 4653 if (mop->flags || !mop->size) 4654 return -EINVAL; 4655 if (mop->size + mop->sida_offset < mop->size) 4656 return -EINVAL; 4657 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4658 return -E2BIG; 4659 4660 switch (mop->op) { 4661 case KVM_S390_MEMOP_SIDA_READ: 4662 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4663 mop->sida_offset), mop->size)) 4664 r = -EFAULT; 4665 4666 break; 4667 case KVM_S390_MEMOP_SIDA_WRITE: 4668 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4669 mop->sida_offset), uaddr, mop->size)) 4670 r = -EFAULT; 4671 break; 4672 } 4673 return r; 4674 } 4675 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 4676 struct kvm_s390_mem_op *mop) 4677 { 4678 void __user *uaddr = (void __user *)mop->buf; 4679 void *tmpbuf = NULL; 4680 int r = 0; 4681 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4682 | KVM_S390_MEMOP_F_CHECK_ONLY; 4683 4684 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4685 return -EINVAL; 4686 4687 if (mop->size > MEM_OP_MAX_SIZE) 4688 return -E2BIG; 4689 4690 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4691 return -EINVAL; 4692 4693 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4694 tmpbuf = vmalloc(mop->size); 4695 if (!tmpbuf) 4696 return -ENOMEM; 4697 } 4698 4699 switch (mop->op) { 4700 case KVM_S390_MEMOP_LOGICAL_READ: 4701 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4702 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4703 mop->size, GACC_FETCH); 4704 break; 4705 } 4706 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4707 if (r == 0) { 4708 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4709 r = -EFAULT; 4710 } 4711 break; 4712 case KVM_S390_MEMOP_LOGICAL_WRITE: 4713 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4714 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4715 mop->size, GACC_STORE); 4716 break; 4717 } 4718 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4719 r = -EFAULT; 4720 break; 4721 } 4722 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4723 break; 4724 } 4725 4726 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4727 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4728 4729 vfree(tmpbuf); 4730 return r; 4731 } 4732 4733 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, 4734 struct kvm_s390_mem_op *mop) 4735 { 4736 int r, srcu_idx; 4737 4738 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4739 4740 switch (mop->op) { 4741 case KVM_S390_MEMOP_LOGICAL_READ: 4742 case KVM_S390_MEMOP_LOGICAL_WRITE: 4743 r = kvm_s390_guest_mem_op(vcpu, mop); 4744 break; 4745 case KVM_S390_MEMOP_SIDA_READ: 4746 case KVM_S390_MEMOP_SIDA_WRITE: 4747 /* we are locked against sida going away by the vcpu->mutex */ 4748 r = kvm_s390_guest_sida_op(vcpu, mop); 4749 break; 4750 default: 4751 r = -EINVAL; 4752 } 4753 4754 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4755 return r; 4756 } 4757 4758 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4759 unsigned int ioctl, unsigned long arg) 4760 { 4761 struct kvm_vcpu *vcpu = filp->private_data; 4762 void __user *argp = (void __user *)arg; 4763 4764 switch (ioctl) { 4765 case KVM_S390_IRQ: { 4766 struct kvm_s390_irq s390irq; 4767 4768 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4769 return -EFAULT; 4770 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4771 } 4772 case KVM_S390_INTERRUPT: { 4773 struct kvm_s390_interrupt s390int; 4774 struct kvm_s390_irq s390irq = {}; 4775 4776 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4777 return -EFAULT; 4778 if (s390int_to_s390irq(&s390int, &s390irq)) 4779 return -EINVAL; 4780 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4781 } 4782 } 4783 return -ENOIOCTLCMD; 4784 } 4785 4786 long kvm_arch_vcpu_ioctl(struct file *filp, 4787 unsigned int ioctl, unsigned long arg) 4788 { 4789 struct kvm_vcpu *vcpu = filp->private_data; 4790 void __user *argp = (void __user *)arg; 4791 int idx; 4792 long r; 4793 u16 rc, rrc; 4794 4795 vcpu_load(vcpu); 4796 4797 switch (ioctl) { 4798 case KVM_S390_STORE_STATUS: 4799 idx = srcu_read_lock(&vcpu->kvm->srcu); 4800 r = kvm_s390_store_status_unloaded(vcpu, arg); 4801 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4802 break; 4803 case KVM_S390_SET_INITIAL_PSW: { 4804 psw_t psw; 4805 4806 r = -EFAULT; 4807 if (copy_from_user(&psw, argp, sizeof(psw))) 4808 break; 4809 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4810 break; 4811 } 4812 case KVM_S390_CLEAR_RESET: 4813 r = 0; 4814 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4815 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4816 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4817 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 4818 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 4819 rc, rrc); 4820 } 4821 break; 4822 case KVM_S390_INITIAL_RESET: 4823 r = 0; 4824 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4825 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4826 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4827 UVC_CMD_CPU_RESET_INITIAL, 4828 &rc, &rrc); 4829 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 4830 rc, rrc); 4831 } 4832 break; 4833 case KVM_S390_NORMAL_RESET: 4834 r = 0; 4835 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4836 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4837 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4838 UVC_CMD_CPU_RESET, &rc, &rrc); 4839 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 4840 rc, rrc); 4841 } 4842 break; 4843 case KVM_SET_ONE_REG: 4844 case KVM_GET_ONE_REG: { 4845 struct kvm_one_reg reg; 4846 r = -EINVAL; 4847 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4848 break; 4849 r = -EFAULT; 4850 if (copy_from_user(®, argp, sizeof(reg))) 4851 break; 4852 if (ioctl == KVM_SET_ONE_REG) 4853 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4854 else 4855 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4856 break; 4857 } 4858 #ifdef CONFIG_KVM_S390_UCONTROL 4859 case KVM_S390_UCAS_MAP: { 4860 struct kvm_s390_ucas_mapping ucasmap; 4861 4862 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4863 r = -EFAULT; 4864 break; 4865 } 4866 4867 if (!kvm_is_ucontrol(vcpu->kvm)) { 4868 r = -EINVAL; 4869 break; 4870 } 4871 4872 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4873 ucasmap.vcpu_addr, ucasmap.length); 4874 break; 4875 } 4876 case KVM_S390_UCAS_UNMAP: { 4877 struct kvm_s390_ucas_mapping ucasmap; 4878 4879 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4880 r = -EFAULT; 4881 break; 4882 } 4883 4884 if (!kvm_is_ucontrol(vcpu->kvm)) { 4885 r = -EINVAL; 4886 break; 4887 } 4888 4889 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4890 ucasmap.length); 4891 break; 4892 } 4893 #endif 4894 case KVM_S390_VCPU_FAULT: { 4895 r = gmap_fault(vcpu->arch.gmap, arg, 0); 4896 break; 4897 } 4898 case KVM_ENABLE_CAP: 4899 { 4900 struct kvm_enable_cap cap; 4901 r = -EFAULT; 4902 if (copy_from_user(&cap, argp, sizeof(cap))) 4903 break; 4904 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 4905 break; 4906 } 4907 case KVM_S390_MEM_OP: { 4908 struct kvm_s390_mem_op mem_op; 4909 4910 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 4911 r = kvm_s390_guest_memsida_op(vcpu, &mem_op); 4912 else 4913 r = -EFAULT; 4914 break; 4915 } 4916 case KVM_S390_SET_IRQ_STATE: { 4917 struct kvm_s390_irq_state irq_state; 4918 4919 r = -EFAULT; 4920 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4921 break; 4922 if (irq_state.len > VCPU_IRQS_MAX_BUF || 4923 irq_state.len == 0 || 4924 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 4925 r = -EINVAL; 4926 break; 4927 } 4928 /* do not use irq_state.flags, it will break old QEMUs */ 4929 r = kvm_s390_set_irq_state(vcpu, 4930 (void __user *) irq_state.buf, 4931 irq_state.len); 4932 break; 4933 } 4934 case KVM_S390_GET_IRQ_STATE: { 4935 struct kvm_s390_irq_state irq_state; 4936 4937 r = -EFAULT; 4938 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4939 break; 4940 if (irq_state.len == 0) { 4941 r = -EINVAL; 4942 break; 4943 } 4944 /* do not use irq_state.flags, it will break old QEMUs */ 4945 r = kvm_s390_get_irq_state(vcpu, 4946 (__u8 __user *) irq_state.buf, 4947 irq_state.len); 4948 break; 4949 } 4950 default: 4951 r = -ENOTTY; 4952 } 4953 4954 vcpu_put(vcpu); 4955 return r; 4956 } 4957 4958 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 4959 { 4960 #ifdef CONFIG_KVM_S390_UCONTROL 4961 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 4962 && (kvm_is_ucontrol(vcpu->kvm))) { 4963 vmf->page = virt_to_page(vcpu->arch.sie_block); 4964 get_page(vmf->page); 4965 return 0; 4966 } 4967 #endif 4968 return VM_FAULT_SIGBUS; 4969 } 4970 4971 /* Section: memory related */ 4972 int kvm_arch_prepare_memory_region(struct kvm *kvm, 4973 struct kvm_memory_slot *memslot, 4974 const struct kvm_userspace_memory_region *mem, 4975 enum kvm_mr_change change) 4976 { 4977 /* A few sanity checks. We can have memory slots which have to be 4978 located/ended at a segment boundary (1MB). The memory in userland is 4979 ok to be fragmented into various different vmas. It is okay to mmap() 4980 and munmap() stuff in this slot after doing this call at any time */ 4981 4982 if (mem->userspace_addr & 0xffffful) 4983 return -EINVAL; 4984 4985 if (mem->memory_size & 0xffffful) 4986 return -EINVAL; 4987 4988 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 4989 return -EINVAL; 4990 4991 /* When we are protected, we should not change the memory slots */ 4992 if (kvm_s390_pv_get_handle(kvm)) 4993 return -EINVAL; 4994 return 0; 4995 } 4996 4997 void kvm_arch_commit_memory_region(struct kvm *kvm, 4998 const struct kvm_userspace_memory_region *mem, 4999 struct kvm_memory_slot *old, 5000 const struct kvm_memory_slot *new, 5001 enum kvm_mr_change change) 5002 { 5003 int rc = 0; 5004 5005 switch (change) { 5006 case KVM_MR_DELETE: 5007 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5008 old->npages * PAGE_SIZE); 5009 break; 5010 case KVM_MR_MOVE: 5011 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5012 old->npages * PAGE_SIZE); 5013 if (rc) 5014 break; 5015 fallthrough; 5016 case KVM_MR_CREATE: 5017 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 5018 mem->guest_phys_addr, mem->memory_size); 5019 break; 5020 case KVM_MR_FLAGS_ONLY: 5021 break; 5022 default: 5023 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5024 } 5025 if (rc) 5026 pr_warn("failed to commit memory region\n"); 5027 return; 5028 } 5029 5030 static inline unsigned long nonhyp_mask(int i) 5031 { 5032 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5033 5034 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5035 } 5036 5037 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 5038 { 5039 vcpu->valid_wakeup = false; 5040 } 5041 5042 static int __init kvm_s390_init(void) 5043 { 5044 int i; 5045 5046 if (!sclp.has_sief2) { 5047 pr_info("SIE is not available\n"); 5048 return -ENODEV; 5049 } 5050 5051 if (nested && hpage) { 5052 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5053 return -EINVAL; 5054 } 5055 5056 for (i = 0; i < 16; i++) 5057 kvm_s390_fac_base[i] |= 5058 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); 5059 5060 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5061 } 5062 5063 static void __exit kvm_s390_exit(void) 5064 { 5065 kvm_exit(); 5066 } 5067 5068 module_init(kvm_s390_init); 5069 module_exit(kvm_s390_exit); 5070 5071 /* 5072 * Enable autoloading of the kvm module. 5073 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5074 * since x86 takes a different approach. 5075 */ 5076 #include <linux/miscdevice.h> 5077 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5078 MODULE_ALIAS("devname:kvm"); 5079