1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Heiko Carstens <heiko.carstens@de.ibm.com> 10 * Christian Ehrhardt <ehrhardt@de.ibm.com> 11 * Jason J. Herne <jjherne@us.ibm.com> 12 */ 13 14 #define KMSG_COMPONENT "kvm-s390" 15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 16 17 #include <linux/compiler.h> 18 #include <linux/err.h> 19 #include <linux/fs.h> 20 #include <linux/hrtimer.h> 21 #include <linux/init.h> 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/mman.h> 25 #include <linux/module.h> 26 #include <linux/moduleparam.h> 27 #include <linux/random.h> 28 #include <linux/slab.h> 29 #include <linux/timer.h> 30 #include <linux/vmalloc.h> 31 #include <linux/bitmap.h> 32 #include <linux/sched/signal.h> 33 #include <linux/string.h> 34 #include <linux/pgtable.h> 35 36 #include <asm/asm-offsets.h> 37 #include <asm/lowcore.h> 38 #include <asm/stp.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include <asm/fpu/api.h> 49 #include "kvm-s390.h" 50 #include "gaccess.h" 51 52 #define CREATE_TRACE_POINTS 53 #include "trace.h" 54 #include "trace-s390.h" 55 56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 57 #define LOCAL_IRQS 32 58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 59 (KVM_MAX_VCPUS + LOCAL_IRQS)) 60 61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 62 KVM_GENERIC_VM_STATS(), 63 STATS_DESC_COUNTER(VM, inject_io), 64 STATS_DESC_COUNTER(VM, inject_float_mchk), 65 STATS_DESC_COUNTER(VM, inject_pfault_done), 66 STATS_DESC_COUNTER(VM, inject_service_signal), 67 STATS_DESC_COUNTER(VM, inject_virtio) 68 }; 69 70 const struct kvm_stats_header kvm_vm_stats_header = { 71 .name_size = KVM_STATS_NAME_SIZE, 72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 73 .id_offset = sizeof(struct kvm_stats_header), 74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 76 sizeof(kvm_vm_stats_desc), 77 }; 78 79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 80 KVM_GENERIC_VCPU_STATS(), 81 STATS_DESC_COUNTER(VCPU, exit_userspace), 82 STATS_DESC_COUNTER(VCPU, exit_null), 83 STATS_DESC_COUNTER(VCPU, exit_external_request), 84 STATS_DESC_COUNTER(VCPU, exit_io_request), 85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 86 STATS_DESC_COUNTER(VCPU, exit_stop_request), 87 STATS_DESC_COUNTER(VCPU, exit_validity), 88 STATS_DESC_COUNTER(VCPU, exit_instruction), 89 STATS_DESC_COUNTER(VCPU, exit_pei), 90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 91 STATS_DESC_COUNTER(VCPU, instruction_lctl), 92 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 93 STATS_DESC_COUNTER(VCPU, instruction_stctl), 94 STATS_DESC_COUNTER(VCPU, instruction_stctg), 95 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 97 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 98 STATS_DESC_COUNTER(VCPU, deliver_ckc), 99 STATS_DESC_COUNTER(VCPU, deliver_cputm), 100 STATS_DESC_COUNTER(VCPU, deliver_external_call), 101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 102 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 103 STATS_DESC_COUNTER(VCPU, deliver_virtio), 104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 107 STATS_DESC_COUNTER(VCPU, deliver_program), 108 STATS_DESC_COUNTER(VCPU, deliver_io), 109 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 110 STATS_DESC_COUNTER(VCPU, exit_wait_state), 111 STATS_DESC_COUNTER(VCPU, inject_ckc), 112 STATS_DESC_COUNTER(VCPU, inject_cputm), 113 STATS_DESC_COUNTER(VCPU, inject_external_call), 114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 115 STATS_DESC_COUNTER(VCPU, inject_mchk), 116 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 117 STATS_DESC_COUNTER(VCPU, inject_program), 118 STATS_DESC_COUNTER(VCPU, inject_restart), 119 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 120 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 121 STATS_DESC_COUNTER(VCPU, instruction_epsw), 122 STATS_DESC_COUNTER(VCPU, instruction_gs), 123 STATS_DESC_COUNTER(VCPU, instruction_io_other), 124 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 125 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 126 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 127 STATS_DESC_COUNTER(VCPU, instruction_ptff), 128 STATS_DESC_COUNTER(VCPU, instruction_sck), 129 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 130 STATS_DESC_COUNTER(VCPU, instruction_stidp), 131 STATS_DESC_COUNTER(VCPU, instruction_spx), 132 STATS_DESC_COUNTER(VCPU, instruction_stpx), 133 STATS_DESC_COUNTER(VCPU, instruction_stap), 134 STATS_DESC_COUNTER(VCPU, instruction_iske), 135 STATS_DESC_COUNTER(VCPU, instruction_ri), 136 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 137 STATS_DESC_COUNTER(VCPU, instruction_sske), 138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 139 STATS_DESC_COUNTER(VCPU, instruction_stsi), 140 STATS_DESC_COUNTER(VCPU, instruction_stfl), 141 STATS_DESC_COUNTER(VCPU, instruction_tb), 142 STATS_DESC_COUNTER(VCPU, instruction_tpi), 143 STATS_DESC_COUNTER(VCPU, instruction_tprot), 144 STATS_DESC_COUNTER(VCPU, instruction_tsch), 145 STATS_DESC_COUNTER(VCPU, instruction_sie), 146 STATS_DESC_COUNTER(VCPU, instruction_essa), 147 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 168 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 173 STATS_DESC_COUNTER(VCPU, pfault_sync) 174 }; 175 176 const struct kvm_stats_header kvm_vcpu_stats_header = { 177 .name_size = KVM_STATS_NAME_SIZE, 178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 179 .id_offset = sizeof(struct kvm_stats_header), 180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 182 sizeof(kvm_vcpu_stats_desc), 183 }; 184 185 /* allow nested virtualization in KVM (if enabled by user space) */ 186 static int nested; 187 module_param(nested, int, S_IRUGO); 188 MODULE_PARM_DESC(nested, "Nested virtualization support"); 189 190 /* allow 1m huge page guest backing, if !nested */ 191 static int hpage; 192 module_param(hpage, int, 0444); 193 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 194 195 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 196 static u8 halt_poll_max_steal = 10; 197 module_param(halt_poll_max_steal, byte, 0644); 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 199 200 /* if set to true, the GISA will be initialized and used if available */ 201 static bool use_gisa = true; 202 module_param(use_gisa, bool, 0644); 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 204 205 /* maximum diag9c forwarding per second */ 206 unsigned int diag9c_forwarding_hz; 207 module_param(diag9c_forwarding_hz, uint, 0644); 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 209 210 /* 211 * For now we handle at most 16 double words as this is what the s390 base 212 * kernel handles and stores in the prefix page. If we ever need to go beyond 213 * this, this requires changes to code, but the external uapi can stay. 214 */ 215 #define SIZE_INTERNAL 16 216 217 /* 218 * Base feature mask that defines default mask for facilities. Consists of the 219 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 220 */ 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 222 /* 223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 224 * and defines the facilities that can be enabled via a cpu model. 225 */ 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 227 228 static unsigned long kvm_s390_fac_size(void) 229 { 230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 233 sizeof(stfle_fac_list)); 234 235 return SIZE_INTERNAL; 236 } 237 238 /* available cpu features supported by kvm */ 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 240 /* available subfunctions indicated via query / "test bit" */ 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 242 243 static struct gmap_notifier gmap_notifier; 244 static struct gmap_notifier vsie_gmap_notifier; 245 debug_info_t *kvm_s390_dbf; 246 debug_info_t *kvm_s390_dbf_uv; 247 248 /* Section: not file related */ 249 int kvm_arch_hardware_enable(void) 250 { 251 /* every s390 is virtualization enabled ;-) */ 252 return 0; 253 } 254 255 int kvm_arch_check_processor_compat(void *opaque) 256 { 257 return 0; 258 } 259 260 /* forward declarations */ 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 262 unsigned long end); 263 static int sca_switch_to_extended(struct kvm *kvm); 264 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 266 { 267 u8 delta_idx = 0; 268 269 /* 270 * The TOD jumps by delta, we have to compensate this by adding 271 * -delta to the epoch. 272 */ 273 delta = -delta; 274 275 /* sign-extension - we're adding to signed values below */ 276 if ((s64)delta < 0) 277 delta_idx = -1; 278 279 scb->epoch += delta; 280 if (scb->ecd & ECD_MEF) { 281 scb->epdx += delta_idx; 282 if (scb->epoch < delta) 283 scb->epdx += 1; 284 } 285 } 286 287 /* 288 * This callback is executed during stop_machine(). All CPUs are therefore 289 * temporarily stopped. In order not to change guest behavior, we have to 290 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 291 * so a CPU won't be stopped while calculating with the epoch. 292 */ 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 294 void *v) 295 { 296 struct kvm *kvm; 297 struct kvm_vcpu *vcpu; 298 int i; 299 unsigned long long *delta = v; 300 301 list_for_each_entry(kvm, &vm_list, vm_list) { 302 kvm_for_each_vcpu(i, vcpu, kvm) { 303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 304 if (i == 0) { 305 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 306 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 307 } 308 if (vcpu->arch.cputm_enabled) 309 vcpu->arch.cputm_start += *delta; 310 if (vcpu->arch.vsie_block) 311 kvm_clock_sync_scb(vcpu->arch.vsie_block, 312 *delta); 313 } 314 } 315 return NOTIFY_OK; 316 } 317 318 static struct notifier_block kvm_clock_notifier = { 319 .notifier_call = kvm_clock_sync, 320 }; 321 322 int kvm_arch_hardware_setup(void *opaque) 323 { 324 gmap_notifier.notifier_call = kvm_gmap_notifier; 325 gmap_register_pte_notifier(&gmap_notifier); 326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 327 gmap_register_pte_notifier(&vsie_gmap_notifier); 328 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 329 &kvm_clock_notifier); 330 return 0; 331 } 332 333 void kvm_arch_hardware_unsetup(void) 334 { 335 gmap_unregister_pte_notifier(&gmap_notifier); 336 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 338 &kvm_clock_notifier); 339 } 340 341 static void allow_cpu_feat(unsigned long nr) 342 { 343 set_bit_inv(nr, kvm_s390_available_cpu_feat); 344 } 345 346 static inline int plo_test_bit(unsigned char nr) 347 { 348 unsigned long function = (unsigned long)nr | 0x100; 349 int cc; 350 351 asm volatile( 352 " lgr 0,%[function]\n" 353 /* Parameter registers are ignored for "test bit" */ 354 " plo 0,0,0,0(0)\n" 355 " ipm %0\n" 356 " srl %0,28\n" 357 : "=d" (cc) 358 : [function] "d" (function) 359 : "cc", "0"); 360 return cc == 0; 361 } 362 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 364 { 365 asm volatile( 366 " lghi 0,0\n" 367 " lgr 1,%[query]\n" 368 /* Parameter registers are ignored */ 369 " .insn rrf,%[opc] << 16,2,4,6,0\n" 370 : 371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 372 : "cc", "memory", "0", "1"); 373 } 374 375 #define INSN_SORTL 0xb938 376 #define INSN_DFLTCC 0xb939 377 378 static void kvm_s390_cpu_feat_init(void) 379 { 380 int i; 381 382 for (i = 0; i < 256; ++i) { 383 if (plo_test_bit(i)) 384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 385 } 386 387 if (test_facility(28)) /* TOD-clock steering */ 388 ptff(kvm_s390_available_subfunc.ptff, 389 sizeof(kvm_s390_available_subfunc.ptff), 390 PTFF_QAF); 391 392 if (test_facility(17)) { /* MSA */ 393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 394 kvm_s390_available_subfunc.kmac); 395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 396 kvm_s390_available_subfunc.kmc); 397 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 398 kvm_s390_available_subfunc.km); 399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 400 kvm_s390_available_subfunc.kimd); 401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 402 kvm_s390_available_subfunc.klmd); 403 } 404 if (test_facility(76)) /* MSA3 */ 405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 406 kvm_s390_available_subfunc.pckmo); 407 if (test_facility(77)) { /* MSA4 */ 408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 409 kvm_s390_available_subfunc.kmctr); 410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 411 kvm_s390_available_subfunc.kmf); 412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 413 kvm_s390_available_subfunc.kmo); 414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 415 kvm_s390_available_subfunc.pcc); 416 } 417 if (test_facility(57)) /* MSA5 */ 418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 419 kvm_s390_available_subfunc.ppno); 420 421 if (test_facility(146)) /* MSA8 */ 422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 423 kvm_s390_available_subfunc.kma); 424 425 if (test_facility(155)) /* MSA9 */ 426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 427 kvm_s390_available_subfunc.kdsa); 428 429 if (test_facility(150)) /* SORTL */ 430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 431 432 if (test_facility(151)) /* DFLTCC */ 433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 434 435 if (MACHINE_HAS_ESOP) 436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 437 /* 438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 440 */ 441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 442 !test_facility(3) || !nested) 443 return; 444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 445 if (sclp.has_64bscao) 446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 447 if (sclp.has_siif) 448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 449 if (sclp.has_gpere) 450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 451 if (sclp.has_gsls) 452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 453 if (sclp.has_ib) 454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 455 if (sclp.has_cei) 456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 457 if (sclp.has_ibs) 458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 459 if (sclp.has_kss) 460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 461 /* 462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 463 * all skey handling functions read/set the skey from the PGSTE 464 * instead of the real storage key. 465 * 466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 467 * pages being detected as preserved although they are resident. 468 * 469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 471 * 472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 475 * 476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 477 * cannot easily shadow the SCA because of the ipte lock. 478 */ 479 } 480 481 int kvm_arch_init(void *opaque) 482 { 483 int rc = -ENOMEM; 484 485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 486 if (!kvm_s390_dbf) 487 return -ENOMEM; 488 489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 490 if (!kvm_s390_dbf_uv) 491 goto out; 492 493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 495 goto out; 496 497 kvm_s390_cpu_feat_init(); 498 499 /* Register floating interrupt controller interface. */ 500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 501 if (rc) { 502 pr_err("A FLIC registration call failed with rc=%d\n", rc); 503 goto out; 504 } 505 506 rc = kvm_s390_gib_init(GAL_ISC); 507 if (rc) 508 goto out; 509 510 return 0; 511 512 out: 513 kvm_arch_exit(); 514 return rc; 515 } 516 517 void kvm_arch_exit(void) 518 { 519 kvm_s390_gib_destroy(); 520 debug_unregister(kvm_s390_dbf); 521 debug_unregister(kvm_s390_dbf_uv); 522 } 523 524 /* Section: device related */ 525 long kvm_arch_dev_ioctl(struct file *filp, 526 unsigned int ioctl, unsigned long arg) 527 { 528 if (ioctl == KVM_S390_ENABLE_SIE) 529 return s390_enable_sie(); 530 return -EINVAL; 531 } 532 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 534 { 535 int r; 536 537 switch (ext) { 538 case KVM_CAP_S390_PSW: 539 case KVM_CAP_S390_GMAP: 540 case KVM_CAP_SYNC_MMU: 541 #ifdef CONFIG_KVM_S390_UCONTROL 542 case KVM_CAP_S390_UCONTROL: 543 #endif 544 case KVM_CAP_ASYNC_PF: 545 case KVM_CAP_SYNC_REGS: 546 case KVM_CAP_ONE_REG: 547 case KVM_CAP_ENABLE_CAP: 548 case KVM_CAP_S390_CSS_SUPPORT: 549 case KVM_CAP_IOEVENTFD: 550 case KVM_CAP_DEVICE_CTRL: 551 case KVM_CAP_S390_IRQCHIP: 552 case KVM_CAP_VM_ATTRIBUTES: 553 case KVM_CAP_MP_STATE: 554 case KVM_CAP_IMMEDIATE_EXIT: 555 case KVM_CAP_S390_INJECT_IRQ: 556 case KVM_CAP_S390_USER_SIGP: 557 case KVM_CAP_S390_USER_STSI: 558 case KVM_CAP_S390_SKEYS: 559 case KVM_CAP_S390_IRQ_STATE: 560 case KVM_CAP_S390_USER_INSTR0: 561 case KVM_CAP_S390_CMMA_MIGRATION: 562 case KVM_CAP_S390_AIS: 563 case KVM_CAP_S390_AIS_MIGRATION: 564 case KVM_CAP_S390_VCPU_RESETS: 565 case KVM_CAP_SET_GUEST_DEBUG: 566 case KVM_CAP_S390_DIAG318: 567 r = 1; 568 break; 569 case KVM_CAP_SET_GUEST_DEBUG2: 570 r = KVM_GUESTDBG_VALID_MASK; 571 break; 572 case KVM_CAP_S390_HPAGE_1M: 573 r = 0; 574 if (hpage && !kvm_is_ucontrol(kvm)) 575 r = 1; 576 break; 577 case KVM_CAP_S390_MEM_OP: 578 r = MEM_OP_MAX_SIZE; 579 break; 580 case KVM_CAP_NR_VCPUS: 581 case KVM_CAP_MAX_VCPUS: 582 case KVM_CAP_MAX_VCPU_ID: 583 r = KVM_S390_BSCA_CPU_SLOTS; 584 if (!kvm_s390_use_sca_entries()) 585 r = KVM_MAX_VCPUS; 586 else if (sclp.has_esca && sclp.has_64bscao) 587 r = KVM_S390_ESCA_CPU_SLOTS; 588 if (ext == KVM_CAP_NR_VCPUS) 589 r = min_t(unsigned int, num_online_cpus(), r); 590 break; 591 case KVM_CAP_S390_COW: 592 r = MACHINE_HAS_ESOP; 593 break; 594 case KVM_CAP_S390_VECTOR_REGISTERS: 595 r = MACHINE_HAS_VX; 596 break; 597 case KVM_CAP_S390_RI: 598 r = test_facility(64); 599 break; 600 case KVM_CAP_S390_GS: 601 r = test_facility(133); 602 break; 603 case KVM_CAP_S390_BPB: 604 r = test_facility(82); 605 break; 606 case KVM_CAP_S390_PROTECTED: 607 r = is_prot_virt_host(); 608 break; 609 default: 610 r = 0; 611 } 612 return r; 613 } 614 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 616 { 617 int i; 618 gfn_t cur_gfn, last_gfn; 619 unsigned long gaddr, vmaddr; 620 struct gmap *gmap = kvm->arch.gmap; 621 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 622 623 /* Loop over all guest segments */ 624 cur_gfn = memslot->base_gfn; 625 last_gfn = memslot->base_gfn + memslot->npages; 626 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 627 gaddr = gfn_to_gpa(cur_gfn); 628 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 629 if (kvm_is_error_hva(vmaddr)) 630 continue; 631 632 bitmap_zero(bitmap, _PAGE_ENTRIES); 633 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 634 for (i = 0; i < _PAGE_ENTRIES; i++) { 635 if (test_bit(i, bitmap)) 636 mark_page_dirty(kvm, cur_gfn + i); 637 } 638 639 if (fatal_signal_pending(current)) 640 return; 641 cond_resched(); 642 } 643 } 644 645 /* Section: vm related */ 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 647 648 /* 649 * Get (and clear) the dirty memory log for a memory slot. 650 */ 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 652 struct kvm_dirty_log *log) 653 { 654 int r; 655 unsigned long n; 656 struct kvm_memory_slot *memslot; 657 int is_dirty; 658 659 if (kvm_is_ucontrol(kvm)) 660 return -EINVAL; 661 662 mutex_lock(&kvm->slots_lock); 663 664 r = -EINVAL; 665 if (log->slot >= KVM_USER_MEM_SLOTS) 666 goto out; 667 668 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 669 if (r) 670 goto out; 671 672 /* Clear the dirty log */ 673 if (is_dirty) { 674 n = kvm_dirty_bitmap_bytes(memslot); 675 memset(memslot->dirty_bitmap, 0, n); 676 } 677 r = 0; 678 out: 679 mutex_unlock(&kvm->slots_lock); 680 return r; 681 } 682 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 684 { 685 unsigned int i; 686 struct kvm_vcpu *vcpu; 687 688 kvm_for_each_vcpu(i, vcpu, kvm) { 689 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 690 } 691 } 692 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 694 { 695 int r; 696 697 if (cap->flags) 698 return -EINVAL; 699 700 switch (cap->cap) { 701 case KVM_CAP_S390_IRQCHIP: 702 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 703 kvm->arch.use_irqchip = 1; 704 r = 0; 705 break; 706 case KVM_CAP_S390_USER_SIGP: 707 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 708 kvm->arch.user_sigp = 1; 709 r = 0; 710 break; 711 case KVM_CAP_S390_VECTOR_REGISTERS: 712 mutex_lock(&kvm->lock); 713 if (kvm->created_vcpus) { 714 r = -EBUSY; 715 } else if (MACHINE_HAS_VX) { 716 set_kvm_facility(kvm->arch.model.fac_mask, 129); 717 set_kvm_facility(kvm->arch.model.fac_list, 129); 718 if (test_facility(134)) { 719 set_kvm_facility(kvm->arch.model.fac_mask, 134); 720 set_kvm_facility(kvm->arch.model.fac_list, 134); 721 } 722 if (test_facility(135)) { 723 set_kvm_facility(kvm->arch.model.fac_mask, 135); 724 set_kvm_facility(kvm->arch.model.fac_list, 135); 725 } 726 if (test_facility(148)) { 727 set_kvm_facility(kvm->arch.model.fac_mask, 148); 728 set_kvm_facility(kvm->arch.model.fac_list, 148); 729 } 730 if (test_facility(152)) { 731 set_kvm_facility(kvm->arch.model.fac_mask, 152); 732 set_kvm_facility(kvm->arch.model.fac_list, 152); 733 } 734 if (test_facility(192)) { 735 set_kvm_facility(kvm->arch.model.fac_mask, 192); 736 set_kvm_facility(kvm->arch.model.fac_list, 192); 737 } 738 r = 0; 739 } else 740 r = -EINVAL; 741 mutex_unlock(&kvm->lock); 742 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 743 r ? "(not available)" : "(success)"); 744 break; 745 case KVM_CAP_S390_RI: 746 r = -EINVAL; 747 mutex_lock(&kvm->lock); 748 if (kvm->created_vcpus) { 749 r = -EBUSY; 750 } else if (test_facility(64)) { 751 set_kvm_facility(kvm->arch.model.fac_mask, 64); 752 set_kvm_facility(kvm->arch.model.fac_list, 64); 753 r = 0; 754 } 755 mutex_unlock(&kvm->lock); 756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 757 r ? "(not available)" : "(success)"); 758 break; 759 case KVM_CAP_S390_AIS: 760 mutex_lock(&kvm->lock); 761 if (kvm->created_vcpus) { 762 r = -EBUSY; 763 } else { 764 set_kvm_facility(kvm->arch.model.fac_mask, 72); 765 set_kvm_facility(kvm->arch.model.fac_list, 72); 766 r = 0; 767 } 768 mutex_unlock(&kvm->lock); 769 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 770 r ? "(not available)" : "(success)"); 771 break; 772 case KVM_CAP_S390_GS: 773 r = -EINVAL; 774 mutex_lock(&kvm->lock); 775 if (kvm->created_vcpus) { 776 r = -EBUSY; 777 } else if (test_facility(133)) { 778 set_kvm_facility(kvm->arch.model.fac_mask, 133); 779 set_kvm_facility(kvm->arch.model.fac_list, 133); 780 r = 0; 781 } 782 mutex_unlock(&kvm->lock); 783 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 784 r ? "(not available)" : "(success)"); 785 break; 786 case KVM_CAP_S390_HPAGE_1M: 787 mutex_lock(&kvm->lock); 788 if (kvm->created_vcpus) 789 r = -EBUSY; 790 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 791 r = -EINVAL; 792 else { 793 r = 0; 794 mmap_write_lock(kvm->mm); 795 kvm->mm->context.allow_gmap_hpage_1m = 1; 796 mmap_write_unlock(kvm->mm); 797 /* 798 * We might have to create fake 4k page 799 * tables. To avoid that the hardware works on 800 * stale PGSTEs, we emulate these instructions. 801 */ 802 kvm->arch.use_skf = 0; 803 kvm->arch.use_pfmfi = 0; 804 } 805 mutex_unlock(&kvm->lock); 806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 807 r ? "(not available)" : "(success)"); 808 break; 809 case KVM_CAP_S390_USER_STSI: 810 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 811 kvm->arch.user_stsi = 1; 812 r = 0; 813 break; 814 case KVM_CAP_S390_USER_INSTR0: 815 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 816 kvm->arch.user_instr0 = 1; 817 icpt_operexc_on_all_vcpus(kvm); 818 r = 0; 819 break; 820 default: 821 r = -EINVAL; 822 break; 823 } 824 return r; 825 } 826 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 828 { 829 int ret; 830 831 switch (attr->attr) { 832 case KVM_S390_VM_MEM_LIMIT_SIZE: 833 ret = 0; 834 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 835 kvm->arch.mem_limit); 836 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 837 ret = -EFAULT; 838 break; 839 default: 840 ret = -ENXIO; 841 break; 842 } 843 return ret; 844 } 845 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 847 { 848 int ret; 849 unsigned int idx; 850 switch (attr->attr) { 851 case KVM_S390_VM_MEM_ENABLE_CMMA: 852 ret = -ENXIO; 853 if (!sclp.has_cmma) 854 break; 855 856 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 857 mutex_lock(&kvm->lock); 858 if (kvm->created_vcpus) 859 ret = -EBUSY; 860 else if (kvm->mm->context.allow_gmap_hpage_1m) 861 ret = -EINVAL; 862 else { 863 kvm->arch.use_cmma = 1; 864 /* Not compatible with cmma. */ 865 kvm->arch.use_pfmfi = 0; 866 ret = 0; 867 } 868 mutex_unlock(&kvm->lock); 869 break; 870 case KVM_S390_VM_MEM_CLR_CMMA: 871 ret = -ENXIO; 872 if (!sclp.has_cmma) 873 break; 874 ret = -EINVAL; 875 if (!kvm->arch.use_cmma) 876 break; 877 878 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 879 mutex_lock(&kvm->lock); 880 idx = srcu_read_lock(&kvm->srcu); 881 s390_reset_cmma(kvm->arch.gmap->mm); 882 srcu_read_unlock(&kvm->srcu, idx); 883 mutex_unlock(&kvm->lock); 884 ret = 0; 885 break; 886 case KVM_S390_VM_MEM_LIMIT_SIZE: { 887 unsigned long new_limit; 888 889 if (kvm_is_ucontrol(kvm)) 890 return -EINVAL; 891 892 if (get_user(new_limit, (u64 __user *)attr->addr)) 893 return -EFAULT; 894 895 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 896 new_limit > kvm->arch.mem_limit) 897 return -E2BIG; 898 899 if (!new_limit) 900 return -EINVAL; 901 902 /* gmap_create takes last usable address */ 903 if (new_limit != KVM_S390_NO_MEM_LIMIT) 904 new_limit -= 1; 905 906 ret = -EBUSY; 907 mutex_lock(&kvm->lock); 908 if (!kvm->created_vcpus) { 909 /* gmap_create will round the limit up */ 910 struct gmap *new = gmap_create(current->mm, new_limit); 911 912 if (!new) { 913 ret = -ENOMEM; 914 } else { 915 gmap_remove(kvm->arch.gmap); 916 new->private = kvm; 917 kvm->arch.gmap = new; 918 ret = 0; 919 } 920 } 921 mutex_unlock(&kvm->lock); 922 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 923 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 924 (void *) kvm->arch.gmap->asce); 925 break; 926 } 927 default: 928 ret = -ENXIO; 929 break; 930 } 931 return ret; 932 } 933 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 935 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 937 { 938 struct kvm_vcpu *vcpu; 939 int i; 940 941 kvm_s390_vcpu_block_all(kvm); 942 943 kvm_for_each_vcpu(i, vcpu, kvm) { 944 kvm_s390_vcpu_crypto_setup(vcpu); 945 /* recreate the shadow crycb by leaving the VSIE handler */ 946 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 947 } 948 949 kvm_s390_vcpu_unblock_all(kvm); 950 } 951 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 953 { 954 mutex_lock(&kvm->lock); 955 switch (attr->attr) { 956 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 957 if (!test_kvm_facility(kvm, 76)) { 958 mutex_unlock(&kvm->lock); 959 return -EINVAL; 960 } 961 get_random_bytes( 962 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 964 kvm->arch.crypto.aes_kw = 1; 965 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 966 break; 967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 968 if (!test_kvm_facility(kvm, 76)) { 969 mutex_unlock(&kvm->lock); 970 return -EINVAL; 971 } 972 get_random_bytes( 973 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 974 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 975 kvm->arch.crypto.dea_kw = 1; 976 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 977 break; 978 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 979 if (!test_kvm_facility(kvm, 76)) { 980 mutex_unlock(&kvm->lock); 981 return -EINVAL; 982 } 983 kvm->arch.crypto.aes_kw = 0; 984 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 985 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 986 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 987 break; 988 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 989 if (!test_kvm_facility(kvm, 76)) { 990 mutex_unlock(&kvm->lock); 991 return -EINVAL; 992 } 993 kvm->arch.crypto.dea_kw = 0; 994 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 995 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 996 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 997 break; 998 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 999 if (!ap_instructions_available()) { 1000 mutex_unlock(&kvm->lock); 1001 return -EOPNOTSUPP; 1002 } 1003 kvm->arch.crypto.apie = 1; 1004 break; 1005 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1006 if (!ap_instructions_available()) { 1007 mutex_unlock(&kvm->lock); 1008 return -EOPNOTSUPP; 1009 } 1010 kvm->arch.crypto.apie = 0; 1011 break; 1012 default: 1013 mutex_unlock(&kvm->lock); 1014 return -ENXIO; 1015 } 1016 1017 kvm_s390_vcpu_crypto_reset_all(kvm); 1018 mutex_unlock(&kvm->lock); 1019 return 0; 1020 } 1021 1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1023 { 1024 int cx; 1025 struct kvm_vcpu *vcpu; 1026 1027 kvm_for_each_vcpu(cx, vcpu, kvm) 1028 kvm_s390_sync_request(req, vcpu); 1029 } 1030 1031 /* 1032 * Must be called with kvm->srcu held to avoid races on memslots, and with 1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1034 */ 1035 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1036 { 1037 struct kvm_memory_slot *ms; 1038 struct kvm_memslots *slots; 1039 unsigned long ram_pages = 0; 1040 int slotnr; 1041 1042 /* migration mode already enabled */ 1043 if (kvm->arch.migration_mode) 1044 return 0; 1045 slots = kvm_memslots(kvm); 1046 if (!slots || !slots->used_slots) 1047 return -EINVAL; 1048 1049 if (!kvm->arch.use_cmma) { 1050 kvm->arch.migration_mode = 1; 1051 return 0; 1052 } 1053 /* mark all the pages in active slots as dirty */ 1054 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { 1055 ms = slots->memslots + slotnr; 1056 if (!ms->dirty_bitmap) 1057 return -EINVAL; 1058 /* 1059 * The second half of the bitmap is only used on x86, 1060 * and would be wasted otherwise, so we put it to good 1061 * use here to keep track of the state of the storage 1062 * attributes. 1063 */ 1064 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1065 ram_pages += ms->npages; 1066 } 1067 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1068 kvm->arch.migration_mode = 1; 1069 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1070 return 0; 1071 } 1072 1073 /* 1074 * Must be called with kvm->slots_lock to avoid races with ourselves and 1075 * kvm_s390_vm_start_migration. 1076 */ 1077 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1078 { 1079 /* migration mode already disabled */ 1080 if (!kvm->arch.migration_mode) 1081 return 0; 1082 kvm->arch.migration_mode = 0; 1083 if (kvm->arch.use_cmma) 1084 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1085 return 0; 1086 } 1087 1088 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1089 struct kvm_device_attr *attr) 1090 { 1091 int res = -ENXIO; 1092 1093 mutex_lock(&kvm->slots_lock); 1094 switch (attr->attr) { 1095 case KVM_S390_VM_MIGRATION_START: 1096 res = kvm_s390_vm_start_migration(kvm); 1097 break; 1098 case KVM_S390_VM_MIGRATION_STOP: 1099 res = kvm_s390_vm_stop_migration(kvm); 1100 break; 1101 default: 1102 break; 1103 } 1104 mutex_unlock(&kvm->slots_lock); 1105 1106 return res; 1107 } 1108 1109 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1110 struct kvm_device_attr *attr) 1111 { 1112 u64 mig = kvm->arch.migration_mode; 1113 1114 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1115 return -ENXIO; 1116 1117 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1118 return -EFAULT; 1119 return 0; 1120 } 1121 1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1123 { 1124 struct kvm_s390_vm_tod_clock gtod; 1125 1126 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1127 return -EFAULT; 1128 1129 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1130 return -EINVAL; 1131 kvm_s390_set_tod_clock(kvm, >od); 1132 1133 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1134 gtod.epoch_idx, gtod.tod); 1135 1136 return 0; 1137 } 1138 1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1140 { 1141 u8 gtod_high; 1142 1143 if (copy_from_user(>od_high, (void __user *)attr->addr, 1144 sizeof(gtod_high))) 1145 return -EFAULT; 1146 1147 if (gtod_high != 0) 1148 return -EINVAL; 1149 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1150 1151 return 0; 1152 } 1153 1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1155 { 1156 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1157 1158 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1159 sizeof(gtod.tod))) 1160 return -EFAULT; 1161 1162 kvm_s390_set_tod_clock(kvm, >od); 1163 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1164 return 0; 1165 } 1166 1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1168 { 1169 int ret; 1170 1171 if (attr->flags) 1172 return -EINVAL; 1173 1174 switch (attr->attr) { 1175 case KVM_S390_VM_TOD_EXT: 1176 ret = kvm_s390_set_tod_ext(kvm, attr); 1177 break; 1178 case KVM_S390_VM_TOD_HIGH: 1179 ret = kvm_s390_set_tod_high(kvm, attr); 1180 break; 1181 case KVM_S390_VM_TOD_LOW: 1182 ret = kvm_s390_set_tod_low(kvm, attr); 1183 break; 1184 default: 1185 ret = -ENXIO; 1186 break; 1187 } 1188 return ret; 1189 } 1190 1191 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1192 struct kvm_s390_vm_tod_clock *gtod) 1193 { 1194 union tod_clock clk; 1195 1196 preempt_disable(); 1197 1198 store_tod_clock_ext(&clk); 1199 1200 gtod->tod = clk.tod + kvm->arch.epoch; 1201 gtod->epoch_idx = 0; 1202 if (test_kvm_facility(kvm, 139)) { 1203 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1204 if (gtod->tod < clk.tod) 1205 gtod->epoch_idx += 1; 1206 } 1207 1208 preempt_enable(); 1209 } 1210 1211 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1212 { 1213 struct kvm_s390_vm_tod_clock gtod; 1214 1215 memset(>od, 0, sizeof(gtod)); 1216 kvm_s390_get_tod_clock(kvm, >od); 1217 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1218 return -EFAULT; 1219 1220 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1221 gtod.epoch_idx, gtod.tod); 1222 return 0; 1223 } 1224 1225 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1226 { 1227 u8 gtod_high = 0; 1228 1229 if (copy_to_user((void __user *)attr->addr, >od_high, 1230 sizeof(gtod_high))) 1231 return -EFAULT; 1232 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1233 1234 return 0; 1235 } 1236 1237 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1238 { 1239 u64 gtod; 1240 1241 gtod = kvm_s390_get_tod_clock_fast(kvm); 1242 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1243 return -EFAULT; 1244 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1245 1246 return 0; 1247 } 1248 1249 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1250 { 1251 int ret; 1252 1253 if (attr->flags) 1254 return -EINVAL; 1255 1256 switch (attr->attr) { 1257 case KVM_S390_VM_TOD_EXT: 1258 ret = kvm_s390_get_tod_ext(kvm, attr); 1259 break; 1260 case KVM_S390_VM_TOD_HIGH: 1261 ret = kvm_s390_get_tod_high(kvm, attr); 1262 break; 1263 case KVM_S390_VM_TOD_LOW: 1264 ret = kvm_s390_get_tod_low(kvm, attr); 1265 break; 1266 default: 1267 ret = -ENXIO; 1268 break; 1269 } 1270 return ret; 1271 } 1272 1273 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1274 { 1275 struct kvm_s390_vm_cpu_processor *proc; 1276 u16 lowest_ibc, unblocked_ibc; 1277 int ret = 0; 1278 1279 mutex_lock(&kvm->lock); 1280 if (kvm->created_vcpus) { 1281 ret = -EBUSY; 1282 goto out; 1283 } 1284 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1285 if (!proc) { 1286 ret = -ENOMEM; 1287 goto out; 1288 } 1289 if (!copy_from_user(proc, (void __user *)attr->addr, 1290 sizeof(*proc))) { 1291 kvm->arch.model.cpuid = proc->cpuid; 1292 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1293 unblocked_ibc = sclp.ibc & 0xfff; 1294 if (lowest_ibc && proc->ibc) { 1295 if (proc->ibc > unblocked_ibc) 1296 kvm->arch.model.ibc = unblocked_ibc; 1297 else if (proc->ibc < lowest_ibc) 1298 kvm->arch.model.ibc = lowest_ibc; 1299 else 1300 kvm->arch.model.ibc = proc->ibc; 1301 } 1302 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1303 S390_ARCH_FAC_LIST_SIZE_BYTE); 1304 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1305 kvm->arch.model.ibc, 1306 kvm->arch.model.cpuid); 1307 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1308 kvm->arch.model.fac_list[0], 1309 kvm->arch.model.fac_list[1], 1310 kvm->arch.model.fac_list[2]); 1311 } else 1312 ret = -EFAULT; 1313 kfree(proc); 1314 out: 1315 mutex_unlock(&kvm->lock); 1316 return ret; 1317 } 1318 1319 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1320 struct kvm_device_attr *attr) 1321 { 1322 struct kvm_s390_vm_cpu_feat data; 1323 1324 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1325 return -EFAULT; 1326 if (!bitmap_subset((unsigned long *) data.feat, 1327 kvm_s390_available_cpu_feat, 1328 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1329 return -EINVAL; 1330 1331 mutex_lock(&kvm->lock); 1332 if (kvm->created_vcpus) { 1333 mutex_unlock(&kvm->lock); 1334 return -EBUSY; 1335 } 1336 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1337 KVM_S390_VM_CPU_FEAT_NR_BITS); 1338 mutex_unlock(&kvm->lock); 1339 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1340 data.feat[0], 1341 data.feat[1], 1342 data.feat[2]); 1343 return 0; 1344 } 1345 1346 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1347 struct kvm_device_attr *attr) 1348 { 1349 mutex_lock(&kvm->lock); 1350 if (kvm->created_vcpus) { 1351 mutex_unlock(&kvm->lock); 1352 return -EBUSY; 1353 } 1354 1355 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1356 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1357 mutex_unlock(&kvm->lock); 1358 return -EFAULT; 1359 } 1360 mutex_unlock(&kvm->lock); 1361 1362 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1366 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1367 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1368 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1369 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1370 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1372 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1373 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1375 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1376 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1377 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1378 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1379 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1380 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1381 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1382 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1383 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1384 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1385 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1386 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1387 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1388 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1389 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1390 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1391 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1393 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1394 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1396 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1397 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1398 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1399 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1400 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1401 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1402 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1403 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1404 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1405 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1406 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1407 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1408 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1409 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1413 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1414 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1418 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1419 1420 return 0; 1421 } 1422 1423 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1424 { 1425 int ret = -ENXIO; 1426 1427 switch (attr->attr) { 1428 case KVM_S390_VM_CPU_PROCESSOR: 1429 ret = kvm_s390_set_processor(kvm, attr); 1430 break; 1431 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1432 ret = kvm_s390_set_processor_feat(kvm, attr); 1433 break; 1434 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1435 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1436 break; 1437 } 1438 return ret; 1439 } 1440 1441 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1442 { 1443 struct kvm_s390_vm_cpu_processor *proc; 1444 int ret = 0; 1445 1446 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1447 if (!proc) { 1448 ret = -ENOMEM; 1449 goto out; 1450 } 1451 proc->cpuid = kvm->arch.model.cpuid; 1452 proc->ibc = kvm->arch.model.ibc; 1453 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1454 S390_ARCH_FAC_LIST_SIZE_BYTE); 1455 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1456 kvm->arch.model.ibc, 1457 kvm->arch.model.cpuid); 1458 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1459 kvm->arch.model.fac_list[0], 1460 kvm->arch.model.fac_list[1], 1461 kvm->arch.model.fac_list[2]); 1462 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1463 ret = -EFAULT; 1464 kfree(proc); 1465 out: 1466 return ret; 1467 } 1468 1469 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1470 { 1471 struct kvm_s390_vm_cpu_machine *mach; 1472 int ret = 0; 1473 1474 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1475 if (!mach) { 1476 ret = -ENOMEM; 1477 goto out; 1478 } 1479 get_cpu_id((struct cpuid *) &mach->cpuid); 1480 mach->ibc = sclp.ibc; 1481 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1482 S390_ARCH_FAC_LIST_SIZE_BYTE); 1483 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1484 sizeof(stfle_fac_list)); 1485 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1486 kvm->arch.model.ibc, 1487 kvm->arch.model.cpuid); 1488 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1489 mach->fac_mask[0], 1490 mach->fac_mask[1], 1491 mach->fac_mask[2]); 1492 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1493 mach->fac_list[0], 1494 mach->fac_list[1], 1495 mach->fac_list[2]); 1496 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1497 ret = -EFAULT; 1498 kfree(mach); 1499 out: 1500 return ret; 1501 } 1502 1503 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1504 struct kvm_device_attr *attr) 1505 { 1506 struct kvm_s390_vm_cpu_feat data; 1507 1508 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1509 KVM_S390_VM_CPU_FEAT_NR_BITS); 1510 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1511 return -EFAULT; 1512 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1513 data.feat[0], 1514 data.feat[1], 1515 data.feat[2]); 1516 return 0; 1517 } 1518 1519 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1520 struct kvm_device_attr *attr) 1521 { 1522 struct kvm_s390_vm_cpu_feat data; 1523 1524 bitmap_copy((unsigned long *) data.feat, 1525 kvm_s390_available_cpu_feat, 1526 KVM_S390_VM_CPU_FEAT_NR_BITS); 1527 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1528 return -EFAULT; 1529 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1530 data.feat[0], 1531 data.feat[1], 1532 data.feat[2]); 1533 return 0; 1534 } 1535 1536 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1537 struct kvm_device_attr *attr) 1538 { 1539 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1540 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1541 return -EFAULT; 1542 1543 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1547 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1548 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1549 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1550 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1551 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1553 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1554 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1556 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1557 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1558 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1559 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1560 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1561 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1562 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1563 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1564 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1565 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1566 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1567 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1568 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1569 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1570 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1571 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1572 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1574 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1575 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1577 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1578 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1579 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1580 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1581 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1582 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1583 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1584 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1585 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1586 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1587 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1588 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1589 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1590 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1594 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1595 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1599 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1600 1601 return 0; 1602 } 1603 1604 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1605 struct kvm_device_attr *attr) 1606 { 1607 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1608 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1609 return -EFAULT; 1610 1611 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1615 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1616 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1617 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1618 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1619 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1620 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1621 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1622 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1623 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1624 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1625 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1626 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1627 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1628 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1629 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1630 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1631 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1632 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1633 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1634 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1635 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1636 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1637 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1638 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1639 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1640 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1641 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1642 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1643 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1644 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1645 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1646 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1647 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1648 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1649 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1650 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1651 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1652 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1653 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1654 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1655 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1656 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1657 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1658 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1662 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1663 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1667 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1668 1669 return 0; 1670 } 1671 1672 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1673 { 1674 int ret = -ENXIO; 1675 1676 switch (attr->attr) { 1677 case KVM_S390_VM_CPU_PROCESSOR: 1678 ret = kvm_s390_get_processor(kvm, attr); 1679 break; 1680 case KVM_S390_VM_CPU_MACHINE: 1681 ret = kvm_s390_get_machine(kvm, attr); 1682 break; 1683 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1684 ret = kvm_s390_get_processor_feat(kvm, attr); 1685 break; 1686 case KVM_S390_VM_CPU_MACHINE_FEAT: 1687 ret = kvm_s390_get_machine_feat(kvm, attr); 1688 break; 1689 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1690 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1691 break; 1692 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1693 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1694 break; 1695 } 1696 return ret; 1697 } 1698 1699 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1700 { 1701 int ret; 1702 1703 switch (attr->group) { 1704 case KVM_S390_VM_MEM_CTRL: 1705 ret = kvm_s390_set_mem_control(kvm, attr); 1706 break; 1707 case KVM_S390_VM_TOD: 1708 ret = kvm_s390_set_tod(kvm, attr); 1709 break; 1710 case KVM_S390_VM_CPU_MODEL: 1711 ret = kvm_s390_set_cpu_model(kvm, attr); 1712 break; 1713 case KVM_S390_VM_CRYPTO: 1714 ret = kvm_s390_vm_set_crypto(kvm, attr); 1715 break; 1716 case KVM_S390_VM_MIGRATION: 1717 ret = kvm_s390_vm_set_migration(kvm, attr); 1718 break; 1719 default: 1720 ret = -ENXIO; 1721 break; 1722 } 1723 1724 return ret; 1725 } 1726 1727 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1728 { 1729 int ret; 1730 1731 switch (attr->group) { 1732 case KVM_S390_VM_MEM_CTRL: 1733 ret = kvm_s390_get_mem_control(kvm, attr); 1734 break; 1735 case KVM_S390_VM_TOD: 1736 ret = kvm_s390_get_tod(kvm, attr); 1737 break; 1738 case KVM_S390_VM_CPU_MODEL: 1739 ret = kvm_s390_get_cpu_model(kvm, attr); 1740 break; 1741 case KVM_S390_VM_MIGRATION: 1742 ret = kvm_s390_vm_get_migration(kvm, attr); 1743 break; 1744 default: 1745 ret = -ENXIO; 1746 break; 1747 } 1748 1749 return ret; 1750 } 1751 1752 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1753 { 1754 int ret; 1755 1756 switch (attr->group) { 1757 case KVM_S390_VM_MEM_CTRL: 1758 switch (attr->attr) { 1759 case KVM_S390_VM_MEM_ENABLE_CMMA: 1760 case KVM_S390_VM_MEM_CLR_CMMA: 1761 ret = sclp.has_cmma ? 0 : -ENXIO; 1762 break; 1763 case KVM_S390_VM_MEM_LIMIT_SIZE: 1764 ret = 0; 1765 break; 1766 default: 1767 ret = -ENXIO; 1768 break; 1769 } 1770 break; 1771 case KVM_S390_VM_TOD: 1772 switch (attr->attr) { 1773 case KVM_S390_VM_TOD_LOW: 1774 case KVM_S390_VM_TOD_HIGH: 1775 ret = 0; 1776 break; 1777 default: 1778 ret = -ENXIO; 1779 break; 1780 } 1781 break; 1782 case KVM_S390_VM_CPU_MODEL: 1783 switch (attr->attr) { 1784 case KVM_S390_VM_CPU_PROCESSOR: 1785 case KVM_S390_VM_CPU_MACHINE: 1786 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1787 case KVM_S390_VM_CPU_MACHINE_FEAT: 1788 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1789 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1790 ret = 0; 1791 break; 1792 default: 1793 ret = -ENXIO; 1794 break; 1795 } 1796 break; 1797 case KVM_S390_VM_CRYPTO: 1798 switch (attr->attr) { 1799 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1800 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1801 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1802 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1803 ret = 0; 1804 break; 1805 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1806 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1807 ret = ap_instructions_available() ? 0 : -ENXIO; 1808 break; 1809 default: 1810 ret = -ENXIO; 1811 break; 1812 } 1813 break; 1814 case KVM_S390_VM_MIGRATION: 1815 ret = 0; 1816 break; 1817 default: 1818 ret = -ENXIO; 1819 break; 1820 } 1821 1822 return ret; 1823 } 1824 1825 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1826 { 1827 uint8_t *keys; 1828 uint64_t hva; 1829 int srcu_idx, i, r = 0; 1830 1831 if (args->flags != 0) 1832 return -EINVAL; 1833 1834 /* Is this guest using storage keys? */ 1835 if (!mm_uses_skeys(current->mm)) 1836 return KVM_S390_GET_SKEYS_NONE; 1837 1838 /* Enforce sane limit on memory allocation */ 1839 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1840 return -EINVAL; 1841 1842 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1843 if (!keys) 1844 return -ENOMEM; 1845 1846 mmap_read_lock(current->mm); 1847 srcu_idx = srcu_read_lock(&kvm->srcu); 1848 for (i = 0; i < args->count; i++) { 1849 hva = gfn_to_hva(kvm, args->start_gfn + i); 1850 if (kvm_is_error_hva(hva)) { 1851 r = -EFAULT; 1852 break; 1853 } 1854 1855 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1856 if (r) 1857 break; 1858 } 1859 srcu_read_unlock(&kvm->srcu, srcu_idx); 1860 mmap_read_unlock(current->mm); 1861 1862 if (!r) { 1863 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1864 sizeof(uint8_t) * args->count); 1865 if (r) 1866 r = -EFAULT; 1867 } 1868 1869 kvfree(keys); 1870 return r; 1871 } 1872 1873 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1874 { 1875 uint8_t *keys; 1876 uint64_t hva; 1877 int srcu_idx, i, r = 0; 1878 bool unlocked; 1879 1880 if (args->flags != 0) 1881 return -EINVAL; 1882 1883 /* Enforce sane limit on memory allocation */ 1884 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1885 return -EINVAL; 1886 1887 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1888 if (!keys) 1889 return -ENOMEM; 1890 1891 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1892 sizeof(uint8_t) * args->count); 1893 if (r) { 1894 r = -EFAULT; 1895 goto out; 1896 } 1897 1898 /* Enable storage key handling for the guest */ 1899 r = s390_enable_skey(); 1900 if (r) 1901 goto out; 1902 1903 i = 0; 1904 mmap_read_lock(current->mm); 1905 srcu_idx = srcu_read_lock(&kvm->srcu); 1906 while (i < args->count) { 1907 unlocked = false; 1908 hva = gfn_to_hva(kvm, args->start_gfn + i); 1909 if (kvm_is_error_hva(hva)) { 1910 r = -EFAULT; 1911 break; 1912 } 1913 1914 /* Lowest order bit is reserved */ 1915 if (keys[i] & 0x01) { 1916 r = -EINVAL; 1917 break; 1918 } 1919 1920 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1921 if (r) { 1922 r = fixup_user_fault(current->mm, hva, 1923 FAULT_FLAG_WRITE, &unlocked); 1924 if (r) 1925 break; 1926 } 1927 if (!r) 1928 i++; 1929 } 1930 srcu_read_unlock(&kvm->srcu, srcu_idx); 1931 mmap_read_unlock(current->mm); 1932 out: 1933 kvfree(keys); 1934 return r; 1935 } 1936 1937 /* 1938 * Base address and length must be sent at the start of each block, therefore 1939 * it's cheaper to send some clean data, as long as it's less than the size of 1940 * two longs. 1941 */ 1942 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1943 /* for consistency */ 1944 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1945 1946 /* 1947 * Similar to gfn_to_memslot, but returns the index of a memslot also when the 1948 * address falls in a hole. In that case the index of one of the memslots 1949 * bordering the hole is returned. 1950 */ 1951 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) 1952 { 1953 int start = 0, end = slots->used_slots; 1954 int slot = atomic_read(&slots->last_used_slot); 1955 struct kvm_memory_slot *memslots = slots->memslots; 1956 1957 if (gfn >= memslots[slot].base_gfn && 1958 gfn < memslots[slot].base_gfn + memslots[slot].npages) 1959 return slot; 1960 1961 while (start < end) { 1962 slot = start + (end - start) / 2; 1963 1964 if (gfn >= memslots[slot].base_gfn) 1965 end = slot; 1966 else 1967 start = slot + 1; 1968 } 1969 1970 if (start >= slots->used_slots) 1971 return slots->used_slots - 1; 1972 1973 if (gfn >= memslots[start].base_gfn && 1974 gfn < memslots[start].base_gfn + memslots[start].npages) { 1975 atomic_set(&slots->last_used_slot, start); 1976 } 1977 1978 return start; 1979 } 1980 1981 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1982 u8 *res, unsigned long bufsize) 1983 { 1984 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1985 1986 args->count = 0; 1987 while (args->count < bufsize) { 1988 hva = gfn_to_hva(kvm, cur_gfn); 1989 /* 1990 * We return an error if the first value was invalid, but we 1991 * return successfully if at least one value was copied. 1992 */ 1993 if (kvm_is_error_hva(hva)) 1994 return args->count ? 0 : -EFAULT; 1995 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1996 pgstev = 0; 1997 res[args->count++] = (pgstev >> 24) & 0x43; 1998 cur_gfn++; 1999 } 2000 2001 return 0; 2002 } 2003 2004 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 2005 unsigned long cur_gfn) 2006 { 2007 int slotidx = gfn_to_memslot_approx(slots, cur_gfn); 2008 struct kvm_memory_slot *ms = slots->memslots + slotidx; 2009 unsigned long ofs = cur_gfn - ms->base_gfn; 2010 2011 if (ms->base_gfn + ms->npages <= cur_gfn) { 2012 slotidx--; 2013 /* If we are above the highest slot, wrap around */ 2014 if (slotidx < 0) 2015 slotidx = slots->used_slots - 1; 2016 2017 ms = slots->memslots + slotidx; 2018 ofs = 0; 2019 } 2020 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 2021 while ((slotidx > 0) && (ofs >= ms->npages)) { 2022 slotidx--; 2023 ms = slots->memslots + slotidx; 2024 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); 2025 } 2026 return ms->base_gfn + ofs; 2027 } 2028 2029 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2030 u8 *res, unsigned long bufsize) 2031 { 2032 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2033 struct kvm_memslots *slots = kvm_memslots(kvm); 2034 struct kvm_memory_slot *ms; 2035 2036 if (unlikely(!slots->used_slots)) 2037 return 0; 2038 2039 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2040 ms = gfn_to_memslot(kvm, cur_gfn); 2041 args->count = 0; 2042 args->start_gfn = cur_gfn; 2043 if (!ms) 2044 return 0; 2045 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2046 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; 2047 2048 while (args->count < bufsize) { 2049 hva = gfn_to_hva(kvm, cur_gfn); 2050 if (kvm_is_error_hva(hva)) 2051 return 0; 2052 /* Decrement only if we actually flipped the bit to 0 */ 2053 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2054 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2055 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2056 pgstev = 0; 2057 /* Save the value */ 2058 res[args->count++] = (pgstev >> 24) & 0x43; 2059 /* If the next bit is too far away, stop. */ 2060 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2061 return 0; 2062 /* If we reached the previous "next", find the next one */ 2063 if (cur_gfn == next_gfn) 2064 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2065 /* Reached the end of memory or of the buffer, stop */ 2066 if ((next_gfn >= mem_end) || 2067 (next_gfn - args->start_gfn >= bufsize)) 2068 return 0; 2069 cur_gfn++; 2070 /* Reached the end of the current memslot, take the next one. */ 2071 if (cur_gfn - ms->base_gfn >= ms->npages) { 2072 ms = gfn_to_memslot(kvm, cur_gfn); 2073 if (!ms) 2074 return 0; 2075 } 2076 } 2077 return 0; 2078 } 2079 2080 /* 2081 * This function searches for the next page with dirty CMMA attributes, and 2082 * saves the attributes in the buffer up to either the end of the buffer or 2083 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2084 * no trailing clean bytes are saved. 2085 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2086 * output buffer will indicate 0 as length. 2087 */ 2088 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2089 struct kvm_s390_cmma_log *args) 2090 { 2091 unsigned long bufsize; 2092 int srcu_idx, peek, ret; 2093 u8 *values; 2094 2095 if (!kvm->arch.use_cmma) 2096 return -ENXIO; 2097 /* Invalid/unsupported flags were specified */ 2098 if (args->flags & ~KVM_S390_CMMA_PEEK) 2099 return -EINVAL; 2100 /* Migration mode query, and we are not doing a migration */ 2101 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2102 if (!peek && !kvm->arch.migration_mode) 2103 return -EINVAL; 2104 /* CMMA is disabled or was not used, or the buffer has length zero */ 2105 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2106 if (!bufsize || !kvm->mm->context.uses_cmm) { 2107 memset(args, 0, sizeof(*args)); 2108 return 0; 2109 } 2110 /* We are not peeking, and there are no dirty pages */ 2111 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2112 memset(args, 0, sizeof(*args)); 2113 return 0; 2114 } 2115 2116 values = vmalloc(bufsize); 2117 if (!values) 2118 return -ENOMEM; 2119 2120 mmap_read_lock(kvm->mm); 2121 srcu_idx = srcu_read_lock(&kvm->srcu); 2122 if (peek) 2123 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2124 else 2125 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2126 srcu_read_unlock(&kvm->srcu, srcu_idx); 2127 mmap_read_unlock(kvm->mm); 2128 2129 if (kvm->arch.migration_mode) 2130 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2131 else 2132 args->remaining = 0; 2133 2134 if (copy_to_user((void __user *)args->values, values, args->count)) 2135 ret = -EFAULT; 2136 2137 vfree(values); 2138 return ret; 2139 } 2140 2141 /* 2142 * This function sets the CMMA attributes for the given pages. If the input 2143 * buffer has zero length, no action is taken, otherwise the attributes are 2144 * set and the mm->context.uses_cmm flag is set. 2145 */ 2146 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2147 const struct kvm_s390_cmma_log *args) 2148 { 2149 unsigned long hva, mask, pgstev, i; 2150 uint8_t *bits; 2151 int srcu_idx, r = 0; 2152 2153 mask = args->mask; 2154 2155 if (!kvm->arch.use_cmma) 2156 return -ENXIO; 2157 /* invalid/unsupported flags */ 2158 if (args->flags != 0) 2159 return -EINVAL; 2160 /* Enforce sane limit on memory allocation */ 2161 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2162 return -EINVAL; 2163 /* Nothing to do */ 2164 if (args->count == 0) 2165 return 0; 2166 2167 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2168 if (!bits) 2169 return -ENOMEM; 2170 2171 r = copy_from_user(bits, (void __user *)args->values, args->count); 2172 if (r) { 2173 r = -EFAULT; 2174 goto out; 2175 } 2176 2177 mmap_read_lock(kvm->mm); 2178 srcu_idx = srcu_read_lock(&kvm->srcu); 2179 for (i = 0; i < args->count; i++) { 2180 hva = gfn_to_hva(kvm, args->start_gfn + i); 2181 if (kvm_is_error_hva(hva)) { 2182 r = -EFAULT; 2183 break; 2184 } 2185 2186 pgstev = bits[i]; 2187 pgstev = pgstev << 24; 2188 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2189 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2190 } 2191 srcu_read_unlock(&kvm->srcu, srcu_idx); 2192 mmap_read_unlock(kvm->mm); 2193 2194 if (!kvm->mm->context.uses_cmm) { 2195 mmap_write_lock(kvm->mm); 2196 kvm->mm->context.uses_cmm = 1; 2197 mmap_write_unlock(kvm->mm); 2198 } 2199 out: 2200 vfree(bits); 2201 return r; 2202 } 2203 2204 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2205 { 2206 struct kvm_vcpu *vcpu; 2207 u16 rc, rrc; 2208 int ret = 0; 2209 int i; 2210 2211 /* 2212 * We ignore failures and try to destroy as many CPUs as possible. 2213 * At the same time we must not free the assigned resources when 2214 * this fails, as the ultravisor has still access to that memory. 2215 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2216 * behind. 2217 * We want to return the first failure rc and rrc, though. 2218 */ 2219 kvm_for_each_vcpu(i, vcpu, kvm) { 2220 mutex_lock(&vcpu->mutex); 2221 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2222 *rcp = rc; 2223 *rrcp = rrc; 2224 ret = -EIO; 2225 } 2226 mutex_unlock(&vcpu->mutex); 2227 } 2228 return ret; 2229 } 2230 2231 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2232 { 2233 int i, r = 0; 2234 u16 dummy; 2235 2236 struct kvm_vcpu *vcpu; 2237 2238 kvm_for_each_vcpu(i, vcpu, kvm) { 2239 mutex_lock(&vcpu->mutex); 2240 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2241 mutex_unlock(&vcpu->mutex); 2242 if (r) 2243 break; 2244 } 2245 if (r) 2246 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2247 return r; 2248 } 2249 2250 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2251 { 2252 int r = 0; 2253 u16 dummy; 2254 void __user *argp = (void __user *)cmd->data; 2255 2256 switch (cmd->cmd) { 2257 case KVM_PV_ENABLE: { 2258 r = -EINVAL; 2259 if (kvm_s390_pv_is_protected(kvm)) 2260 break; 2261 2262 /* 2263 * FMT 4 SIE needs esca. As we never switch back to bsca from 2264 * esca, we need no cleanup in the error cases below 2265 */ 2266 r = sca_switch_to_extended(kvm); 2267 if (r) 2268 break; 2269 2270 mmap_write_lock(current->mm); 2271 r = gmap_mark_unmergeable(); 2272 mmap_write_unlock(current->mm); 2273 if (r) 2274 break; 2275 2276 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2277 if (r) 2278 break; 2279 2280 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2281 if (r) 2282 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2283 2284 /* we need to block service interrupts from now on */ 2285 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2286 break; 2287 } 2288 case KVM_PV_DISABLE: { 2289 r = -EINVAL; 2290 if (!kvm_s390_pv_is_protected(kvm)) 2291 break; 2292 2293 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2294 /* 2295 * If a CPU could not be destroyed, destroy VM will also fail. 2296 * There is no point in trying to destroy it. Instead return 2297 * the rc and rrc from the first CPU that failed destroying. 2298 */ 2299 if (r) 2300 break; 2301 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2302 2303 /* no need to block service interrupts any more */ 2304 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2305 break; 2306 } 2307 case KVM_PV_SET_SEC_PARMS: { 2308 struct kvm_s390_pv_sec_parm parms = {}; 2309 void *hdr; 2310 2311 r = -EINVAL; 2312 if (!kvm_s390_pv_is_protected(kvm)) 2313 break; 2314 2315 r = -EFAULT; 2316 if (copy_from_user(&parms, argp, sizeof(parms))) 2317 break; 2318 2319 /* Currently restricted to 8KB */ 2320 r = -EINVAL; 2321 if (parms.length > PAGE_SIZE * 2) 2322 break; 2323 2324 r = -ENOMEM; 2325 hdr = vmalloc(parms.length); 2326 if (!hdr) 2327 break; 2328 2329 r = -EFAULT; 2330 if (!copy_from_user(hdr, (void __user *)parms.origin, 2331 parms.length)) 2332 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2333 &cmd->rc, &cmd->rrc); 2334 2335 vfree(hdr); 2336 break; 2337 } 2338 case KVM_PV_UNPACK: { 2339 struct kvm_s390_pv_unp unp = {}; 2340 2341 r = -EINVAL; 2342 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2343 break; 2344 2345 r = -EFAULT; 2346 if (copy_from_user(&unp, argp, sizeof(unp))) 2347 break; 2348 2349 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2350 &cmd->rc, &cmd->rrc); 2351 break; 2352 } 2353 case KVM_PV_VERIFY: { 2354 r = -EINVAL; 2355 if (!kvm_s390_pv_is_protected(kvm)) 2356 break; 2357 2358 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2359 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2360 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2361 cmd->rrc); 2362 break; 2363 } 2364 case KVM_PV_PREP_RESET: { 2365 r = -EINVAL; 2366 if (!kvm_s390_pv_is_protected(kvm)) 2367 break; 2368 2369 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2370 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2371 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2372 cmd->rc, cmd->rrc); 2373 break; 2374 } 2375 case KVM_PV_UNSHARE_ALL: { 2376 r = -EINVAL; 2377 if (!kvm_s390_pv_is_protected(kvm)) 2378 break; 2379 2380 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2381 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2382 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2383 cmd->rc, cmd->rrc); 2384 break; 2385 } 2386 default: 2387 r = -ENOTTY; 2388 } 2389 return r; 2390 } 2391 2392 long kvm_arch_vm_ioctl(struct file *filp, 2393 unsigned int ioctl, unsigned long arg) 2394 { 2395 struct kvm *kvm = filp->private_data; 2396 void __user *argp = (void __user *)arg; 2397 struct kvm_device_attr attr; 2398 int r; 2399 2400 switch (ioctl) { 2401 case KVM_S390_INTERRUPT: { 2402 struct kvm_s390_interrupt s390int; 2403 2404 r = -EFAULT; 2405 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2406 break; 2407 r = kvm_s390_inject_vm(kvm, &s390int); 2408 break; 2409 } 2410 case KVM_CREATE_IRQCHIP: { 2411 struct kvm_irq_routing_entry routing; 2412 2413 r = -EINVAL; 2414 if (kvm->arch.use_irqchip) { 2415 /* Set up dummy routing. */ 2416 memset(&routing, 0, sizeof(routing)); 2417 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2418 } 2419 break; 2420 } 2421 case KVM_SET_DEVICE_ATTR: { 2422 r = -EFAULT; 2423 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2424 break; 2425 r = kvm_s390_vm_set_attr(kvm, &attr); 2426 break; 2427 } 2428 case KVM_GET_DEVICE_ATTR: { 2429 r = -EFAULT; 2430 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2431 break; 2432 r = kvm_s390_vm_get_attr(kvm, &attr); 2433 break; 2434 } 2435 case KVM_HAS_DEVICE_ATTR: { 2436 r = -EFAULT; 2437 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2438 break; 2439 r = kvm_s390_vm_has_attr(kvm, &attr); 2440 break; 2441 } 2442 case KVM_S390_GET_SKEYS: { 2443 struct kvm_s390_skeys args; 2444 2445 r = -EFAULT; 2446 if (copy_from_user(&args, argp, 2447 sizeof(struct kvm_s390_skeys))) 2448 break; 2449 r = kvm_s390_get_skeys(kvm, &args); 2450 break; 2451 } 2452 case KVM_S390_SET_SKEYS: { 2453 struct kvm_s390_skeys args; 2454 2455 r = -EFAULT; 2456 if (copy_from_user(&args, argp, 2457 sizeof(struct kvm_s390_skeys))) 2458 break; 2459 r = kvm_s390_set_skeys(kvm, &args); 2460 break; 2461 } 2462 case KVM_S390_GET_CMMA_BITS: { 2463 struct kvm_s390_cmma_log args; 2464 2465 r = -EFAULT; 2466 if (copy_from_user(&args, argp, sizeof(args))) 2467 break; 2468 mutex_lock(&kvm->slots_lock); 2469 r = kvm_s390_get_cmma_bits(kvm, &args); 2470 mutex_unlock(&kvm->slots_lock); 2471 if (!r) { 2472 r = copy_to_user(argp, &args, sizeof(args)); 2473 if (r) 2474 r = -EFAULT; 2475 } 2476 break; 2477 } 2478 case KVM_S390_SET_CMMA_BITS: { 2479 struct kvm_s390_cmma_log args; 2480 2481 r = -EFAULT; 2482 if (copy_from_user(&args, argp, sizeof(args))) 2483 break; 2484 mutex_lock(&kvm->slots_lock); 2485 r = kvm_s390_set_cmma_bits(kvm, &args); 2486 mutex_unlock(&kvm->slots_lock); 2487 break; 2488 } 2489 case KVM_S390_PV_COMMAND: { 2490 struct kvm_pv_cmd args; 2491 2492 /* protvirt means user cpu state */ 2493 kvm_s390_set_user_cpu_state_ctrl(kvm); 2494 r = 0; 2495 if (!is_prot_virt_host()) { 2496 r = -EINVAL; 2497 break; 2498 } 2499 if (copy_from_user(&args, argp, sizeof(args))) { 2500 r = -EFAULT; 2501 break; 2502 } 2503 if (args.flags) { 2504 r = -EINVAL; 2505 break; 2506 } 2507 mutex_lock(&kvm->lock); 2508 r = kvm_s390_handle_pv(kvm, &args); 2509 mutex_unlock(&kvm->lock); 2510 if (copy_to_user(argp, &args, sizeof(args))) { 2511 r = -EFAULT; 2512 break; 2513 } 2514 break; 2515 } 2516 default: 2517 r = -ENOTTY; 2518 } 2519 2520 return r; 2521 } 2522 2523 static int kvm_s390_apxa_installed(void) 2524 { 2525 struct ap_config_info info; 2526 2527 if (ap_instructions_available()) { 2528 if (ap_qci(&info) == 0) 2529 return info.apxa; 2530 } 2531 2532 return 0; 2533 } 2534 2535 /* 2536 * The format of the crypto control block (CRYCB) is specified in the 3 low 2537 * order bits of the CRYCB designation (CRYCBD) field as follows: 2538 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2539 * AP extended addressing (APXA) facility are installed. 2540 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2541 * Format 2: Both the APXA and MSAX3 facilities are installed 2542 */ 2543 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2544 { 2545 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2546 2547 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2548 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2549 2550 /* Check whether MSAX3 is installed */ 2551 if (!test_kvm_facility(kvm, 76)) 2552 return; 2553 2554 if (kvm_s390_apxa_installed()) 2555 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2556 else 2557 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2558 } 2559 2560 /* 2561 * kvm_arch_crypto_set_masks 2562 * 2563 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2564 * to be set. 2565 * @apm: the mask identifying the accessible AP adapters 2566 * @aqm: the mask identifying the accessible AP domains 2567 * @adm: the mask identifying the accessible AP control domains 2568 * 2569 * Set the masks that identify the adapters, domains and control domains to 2570 * which the KVM guest is granted access. 2571 * 2572 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2573 * function. 2574 */ 2575 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2576 unsigned long *aqm, unsigned long *adm) 2577 { 2578 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2579 2580 kvm_s390_vcpu_block_all(kvm); 2581 2582 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2583 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2584 memcpy(crycb->apcb1.apm, apm, 32); 2585 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2586 apm[0], apm[1], apm[2], apm[3]); 2587 memcpy(crycb->apcb1.aqm, aqm, 32); 2588 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2589 aqm[0], aqm[1], aqm[2], aqm[3]); 2590 memcpy(crycb->apcb1.adm, adm, 32); 2591 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2592 adm[0], adm[1], adm[2], adm[3]); 2593 break; 2594 case CRYCB_FORMAT1: 2595 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2596 memcpy(crycb->apcb0.apm, apm, 8); 2597 memcpy(crycb->apcb0.aqm, aqm, 2); 2598 memcpy(crycb->apcb0.adm, adm, 2); 2599 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2600 apm[0], *((unsigned short *)aqm), 2601 *((unsigned short *)adm)); 2602 break; 2603 default: /* Can not happen */ 2604 break; 2605 } 2606 2607 /* recreate the shadow crycb for each vcpu */ 2608 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2609 kvm_s390_vcpu_unblock_all(kvm); 2610 } 2611 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2612 2613 /* 2614 * kvm_arch_crypto_clear_masks 2615 * 2616 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2617 * to be cleared. 2618 * 2619 * Clear the masks that identify the adapters, domains and control domains to 2620 * which the KVM guest is granted access. 2621 * 2622 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2623 * function. 2624 */ 2625 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2626 { 2627 kvm_s390_vcpu_block_all(kvm); 2628 2629 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2630 sizeof(kvm->arch.crypto.crycb->apcb0)); 2631 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2632 sizeof(kvm->arch.crypto.crycb->apcb1)); 2633 2634 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2635 /* recreate the shadow crycb for each vcpu */ 2636 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2637 kvm_s390_vcpu_unblock_all(kvm); 2638 } 2639 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2640 2641 static u64 kvm_s390_get_initial_cpuid(void) 2642 { 2643 struct cpuid cpuid; 2644 2645 get_cpu_id(&cpuid); 2646 cpuid.version = 0xff; 2647 return *((u64 *) &cpuid); 2648 } 2649 2650 static void kvm_s390_crypto_init(struct kvm *kvm) 2651 { 2652 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2653 kvm_s390_set_crycb_format(kvm); 2654 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 2655 2656 if (!test_kvm_facility(kvm, 76)) 2657 return; 2658 2659 /* Enable AES/DEA protected key functions by default */ 2660 kvm->arch.crypto.aes_kw = 1; 2661 kvm->arch.crypto.dea_kw = 1; 2662 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2663 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2664 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2665 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2666 } 2667 2668 static void sca_dispose(struct kvm *kvm) 2669 { 2670 if (kvm->arch.use_esca) 2671 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2672 else 2673 free_page((unsigned long)(kvm->arch.sca)); 2674 kvm->arch.sca = NULL; 2675 } 2676 2677 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2678 { 2679 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 2680 int i, rc; 2681 char debug_name[16]; 2682 static unsigned long sca_offset; 2683 2684 rc = -EINVAL; 2685 #ifdef CONFIG_KVM_S390_UCONTROL 2686 if (type & ~KVM_VM_S390_UCONTROL) 2687 goto out_err; 2688 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2689 goto out_err; 2690 #else 2691 if (type) 2692 goto out_err; 2693 #endif 2694 2695 rc = s390_enable_sie(); 2696 if (rc) 2697 goto out_err; 2698 2699 rc = -ENOMEM; 2700 2701 if (!sclp.has_64bscao) 2702 alloc_flags |= GFP_DMA; 2703 rwlock_init(&kvm->arch.sca_lock); 2704 /* start with basic SCA */ 2705 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2706 if (!kvm->arch.sca) 2707 goto out_err; 2708 mutex_lock(&kvm_lock); 2709 sca_offset += 16; 2710 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2711 sca_offset = 0; 2712 kvm->arch.sca = (struct bsca_block *) 2713 ((char *) kvm->arch.sca + sca_offset); 2714 mutex_unlock(&kvm_lock); 2715 2716 sprintf(debug_name, "kvm-%u", current->pid); 2717 2718 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2719 if (!kvm->arch.dbf) 2720 goto out_err; 2721 2722 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2723 kvm->arch.sie_page2 = 2724 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 2725 if (!kvm->arch.sie_page2) 2726 goto out_err; 2727 2728 kvm->arch.sie_page2->kvm = kvm; 2729 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2730 2731 for (i = 0; i < kvm_s390_fac_size(); i++) { 2732 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 2733 (kvm_s390_fac_base[i] | 2734 kvm_s390_fac_ext[i]); 2735 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 2736 kvm_s390_fac_base[i]; 2737 } 2738 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2739 2740 /* we are always in czam mode - even on pre z14 machines */ 2741 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2742 set_kvm_facility(kvm->arch.model.fac_list, 138); 2743 /* we emulate STHYI in kvm */ 2744 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2745 set_kvm_facility(kvm->arch.model.fac_list, 74); 2746 if (MACHINE_HAS_TLB_GUEST) { 2747 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2748 set_kvm_facility(kvm->arch.model.fac_list, 147); 2749 } 2750 2751 if (css_general_characteristics.aiv && test_facility(65)) 2752 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2753 2754 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2755 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2756 2757 kvm_s390_crypto_init(kvm); 2758 2759 mutex_init(&kvm->arch.float_int.ais_lock); 2760 spin_lock_init(&kvm->arch.float_int.lock); 2761 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2762 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2763 init_waitqueue_head(&kvm->arch.ipte_wq); 2764 mutex_init(&kvm->arch.ipte_mutex); 2765 2766 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2767 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2768 2769 if (type & KVM_VM_S390_UCONTROL) { 2770 kvm->arch.gmap = NULL; 2771 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2772 } else { 2773 if (sclp.hamax == U64_MAX) 2774 kvm->arch.mem_limit = TASK_SIZE_MAX; 2775 else 2776 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2777 sclp.hamax + 1); 2778 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2779 if (!kvm->arch.gmap) 2780 goto out_err; 2781 kvm->arch.gmap->private = kvm; 2782 kvm->arch.gmap->pfault_enabled = 0; 2783 } 2784 2785 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2786 kvm->arch.use_skf = sclp.has_skey; 2787 spin_lock_init(&kvm->arch.start_stop_lock); 2788 kvm_s390_vsie_init(kvm); 2789 if (use_gisa) 2790 kvm_s390_gisa_init(kvm); 2791 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2792 2793 return 0; 2794 out_err: 2795 free_page((unsigned long)kvm->arch.sie_page2); 2796 debug_unregister(kvm->arch.dbf); 2797 sca_dispose(kvm); 2798 KVM_EVENT(3, "creation of vm failed: %d", rc); 2799 return rc; 2800 } 2801 2802 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2803 { 2804 u16 rc, rrc; 2805 2806 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2807 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2808 kvm_s390_clear_local_irqs(vcpu); 2809 kvm_clear_async_pf_completion_queue(vcpu); 2810 if (!kvm_is_ucontrol(vcpu->kvm)) 2811 sca_del_vcpu(vcpu); 2812 2813 if (kvm_is_ucontrol(vcpu->kvm)) 2814 gmap_remove(vcpu->arch.gmap); 2815 2816 if (vcpu->kvm->arch.use_cmma) 2817 kvm_s390_vcpu_unsetup_cmma(vcpu); 2818 /* We can not hold the vcpu mutex here, we are already dying */ 2819 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2820 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2821 free_page((unsigned long)(vcpu->arch.sie_block)); 2822 } 2823 2824 static void kvm_free_vcpus(struct kvm *kvm) 2825 { 2826 unsigned int i; 2827 struct kvm_vcpu *vcpu; 2828 2829 kvm_for_each_vcpu(i, vcpu, kvm) 2830 kvm_vcpu_destroy(vcpu); 2831 2832 mutex_lock(&kvm->lock); 2833 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 2834 kvm->vcpus[i] = NULL; 2835 2836 atomic_set(&kvm->online_vcpus, 0); 2837 mutex_unlock(&kvm->lock); 2838 } 2839 2840 void kvm_arch_destroy_vm(struct kvm *kvm) 2841 { 2842 u16 rc, rrc; 2843 2844 kvm_free_vcpus(kvm); 2845 sca_dispose(kvm); 2846 kvm_s390_gisa_destroy(kvm); 2847 /* 2848 * We are already at the end of life and kvm->lock is not taken. 2849 * This is ok as the file descriptor is closed by now and nobody 2850 * can mess with the pv state. To avoid lockdep_assert_held from 2851 * complaining we do not use kvm_s390_pv_is_protected. 2852 */ 2853 if (kvm_s390_pv_get_handle(kvm)) 2854 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2855 debug_unregister(kvm->arch.dbf); 2856 free_page((unsigned long)kvm->arch.sie_page2); 2857 if (!kvm_is_ucontrol(kvm)) 2858 gmap_remove(kvm->arch.gmap); 2859 kvm_s390_destroy_adapters(kvm); 2860 kvm_s390_clear_float_irqs(kvm); 2861 kvm_s390_vsie_destroy(kvm); 2862 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2863 } 2864 2865 /* Section: vcpu related */ 2866 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2867 { 2868 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2869 if (!vcpu->arch.gmap) 2870 return -ENOMEM; 2871 vcpu->arch.gmap->private = vcpu->kvm; 2872 2873 return 0; 2874 } 2875 2876 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2877 { 2878 if (!kvm_s390_use_sca_entries()) 2879 return; 2880 read_lock(&vcpu->kvm->arch.sca_lock); 2881 if (vcpu->kvm->arch.use_esca) { 2882 struct esca_block *sca = vcpu->kvm->arch.sca; 2883 2884 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2885 sca->cpu[vcpu->vcpu_id].sda = 0; 2886 } else { 2887 struct bsca_block *sca = vcpu->kvm->arch.sca; 2888 2889 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2890 sca->cpu[vcpu->vcpu_id].sda = 0; 2891 } 2892 read_unlock(&vcpu->kvm->arch.sca_lock); 2893 } 2894 2895 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2896 { 2897 if (!kvm_s390_use_sca_entries()) { 2898 struct bsca_block *sca = vcpu->kvm->arch.sca; 2899 2900 /* we still need the basic sca for the ipte control */ 2901 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2902 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2903 return; 2904 } 2905 read_lock(&vcpu->kvm->arch.sca_lock); 2906 if (vcpu->kvm->arch.use_esca) { 2907 struct esca_block *sca = vcpu->kvm->arch.sca; 2908 2909 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2910 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2911 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2912 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2913 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2914 } else { 2915 struct bsca_block *sca = vcpu->kvm->arch.sca; 2916 2917 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2918 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2919 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2920 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2921 } 2922 read_unlock(&vcpu->kvm->arch.sca_lock); 2923 } 2924 2925 /* Basic SCA to Extended SCA data copy routines */ 2926 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2927 { 2928 d->sda = s->sda; 2929 d->sigp_ctrl.c = s->sigp_ctrl.c; 2930 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2931 } 2932 2933 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2934 { 2935 int i; 2936 2937 d->ipte_control = s->ipte_control; 2938 d->mcn[0] = s->mcn; 2939 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2940 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2941 } 2942 2943 static int sca_switch_to_extended(struct kvm *kvm) 2944 { 2945 struct bsca_block *old_sca = kvm->arch.sca; 2946 struct esca_block *new_sca; 2947 struct kvm_vcpu *vcpu; 2948 unsigned int vcpu_idx; 2949 u32 scaol, scaoh; 2950 2951 if (kvm->arch.use_esca) 2952 return 0; 2953 2954 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 2955 if (!new_sca) 2956 return -ENOMEM; 2957 2958 scaoh = (u32)((u64)(new_sca) >> 32); 2959 scaol = (u32)(u64)(new_sca) & ~0x3fU; 2960 2961 kvm_s390_vcpu_block_all(kvm); 2962 write_lock(&kvm->arch.sca_lock); 2963 2964 sca_copy_b_to_e(new_sca, old_sca); 2965 2966 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 2967 vcpu->arch.sie_block->scaoh = scaoh; 2968 vcpu->arch.sie_block->scaol = scaol; 2969 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2970 } 2971 kvm->arch.sca = new_sca; 2972 kvm->arch.use_esca = 1; 2973 2974 write_unlock(&kvm->arch.sca_lock); 2975 kvm_s390_vcpu_unblock_all(kvm); 2976 2977 free_page((unsigned long)old_sca); 2978 2979 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 2980 old_sca, kvm->arch.sca); 2981 return 0; 2982 } 2983 2984 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 2985 { 2986 int rc; 2987 2988 if (!kvm_s390_use_sca_entries()) { 2989 if (id < KVM_MAX_VCPUS) 2990 return true; 2991 return false; 2992 } 2993 if (id < KVM_S390_BSCA_CPU_SLOTS) 2994 return true; 2995 if (!sclp.has_esca || !sclp.has_64bscao) 2996 return false; 2997 2998 mutex_lock(&kvm->lock); 2999 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3000 mutex_unlock(&kvm->lock); 3001 3002 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3003 } 3004 3005 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3006 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3007 { 3008 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3009 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3010 vcpu->arch.cputm_start = get_tod_clock_fast(); 3011 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3012 } 3013 3014 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3015 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3016 { 3017 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3018 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3019 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3020 vcpu->arch.cputm_start = 0; 3021 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3022 } 3023 3024 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3025 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3026 { 3027 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3028 vcpu->arch.cputm_enabled = true; 3029 __start_cpu_timer_accounting(vcpu); 3030 } 3031 3032 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3033 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3034 { 3035 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3036 __stop_cpu_timer_accounting(vcpu); 3037 vcpu->arch.cputm_enabled = false; 3038 } 3039 3040 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3041 { 3042 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3043 __enable_cpu_timer_accounting(vcpu); 3044 preempt_enable(); 3045 } 3046 3047 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3048 { 3049 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3050 __disable_cpu_timer_accounting(vcpu); 3051 preempt_enable(); 3052 } 3053 3054 /* set the cpu timer - may only be called from the VCPU thread itself */ 3055 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3056 { 3057 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3058 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3059 if (vcpu->arch.cputm_enabled) 3060 vcpu->arch.cputm_start = get_tod_clock_fast(); 3061 vcpu->arch.sie_block->cputm = cputm; 3062 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3063 preempt_enable(); 3064 } 3065 3066 /* update and get the cpu timer - can also be called from other VCPU threads */ 3067 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3068 { 3069 unsigned int seq; 3070 __u64 value; 3071 3072 if (unlikely(!vcpu->arch.cputm_enabled)) 3073 return vcpu->arch.sie_block->cputm; 3074 3075 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3076 do { 3077 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3078 /* 3079 * If the writer would ever execute a read in the critical 3080 * section, e.g. in irq context, we have a deadlock. 3081 */ 3082 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3083 value = vcpu->arch.sie_block->cputm; 3084 /* if cputm_start is 0, accounting is being started/stopped */ 3085 if (likely(vcpu->arch.cputm_start)) 3086 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3087 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3088 preempt_enable(); 3089 return value; 3090 } 3091 3092 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3093 { 3094 3095 gmap_enable(vcpu->arch.enabled_gmap); 3096 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3097 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3098 __start_cpu_timer_accounting(vcpu); 3099 vcpu->cpu = cpu; 3100 } 3101 3102 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3103 { 3104 vcpu->cpu = -1; 3105 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3106 __stop_cpu_timer_accounting(vcpu); 3107 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3108 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3109 gmap_disable(vcpu->arch.enabled_gmap); 3110 3111 } 3112 3113 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3114 { 3115 mutex_lock(&vcpu->kvm->lock); 3116 preempt_disable(); 3117 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3118 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3119 preempt_enable(); 3120 mutex_unlock(&vcpu->kvm->lock); 3121 if (!kvm_is_ucontrol(vcpu->kvm)) { 3122 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3123 sca_add_vcpu(vcpu); 3124 } 3125 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3126 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3127 /* make vcpu_load load the right gmap on the first trigger */ 3128 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3129 } 3130 3131 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3132 { 3133 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3134 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3135 return true; 3136 return false; 3137 } 3138 3139 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3140 { 3141 /* At least one ECC subfunction must be present */ 3142 return kvm_has_pckmo_subfunc(kvm, 32) || 3143 kvm_has_pckmo_subfunc(kvm, 33) || 3144 kvm_has_pckmo_subfunc(kvm, 34) || 3145 kvm_has_pckmo_subfunc(kvm, 40) || 3146 kvm_has_pckmo_subfunc(kvm, 41); 3147 3148 } 3149 3150 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3151 { 3152 /* 3153 * If the AP instructions are not being interpreted and the MSAX3 3154 * facility is not configured for the guest, there is nothing to set up. 3155 */ 3156 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3157 return; 3158 3159 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3160 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3161 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3162 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3163 3164 if (vcpu->kvm->arch.crypto.apie) 3165 vcpu->arch.sie_block->eca |= ECA_APIE; 3166 3167 /* Set up protected key support */ 3168 if (vcpu->kvm->arch.crypto.aes_kw) { 3169 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3170 /* ecc is also wrapped with AES key */ 3171 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3172 vcpu->arch.sie_block->ecd |= ECD_ECC; 3173 } 3174 3175 if (vcpu->kvm->arch.crypto.dea_kw) 3176 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3177 } 3178 3179 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3180 { 3181 free_page(vcpu->arch.sie_block->cbrlo); 3182 vcpu->arch.sie_block->cbrlo = 0; 3183 } 3184 3185 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3186 { 3187 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3188 if (!vcpu->arch.sie_block->cbrlo) 3189 return -ENOMEM; 3190 return 0; 3191 } 3192 3193 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3194 { 3195 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3196 3197 vcpu->arch.sie_block->ibc = model->ibc; 3198 if (test_kvm_facility(vcpu->kvm, 7)) 3199 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3200 } 3201 3202 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3203 { 3204 int rc = 0; 3205 u16 uvrc, uvrrc; 3206 3207 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3208 CPUSTAT_SM | 3209 CPUSTAT_STOPPED); 3210 3211 if (test_kvm_facility(vcpu->kvm, 78)) 3212 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3213 else if (test_kvm_facility(vcpu->kvm, 8)) 3214 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3215 3216 kvm_s390_vcpu_setup_model(vcpu); 3217 3218 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3219 if (MACHINE_HAS_ESOP) 3220 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3221 if (test_kvm_facility(vcpu->kvm, 9)) 3222 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3223 if (test_kvm_facility(vcpu->kvm, 73)) 3224 vcpu->arch.sie_block->ecb |= ECB_TE; 3225 if (!kvm_is_ucontrol(vcpu->kvm)) 3226 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3227 3228 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3229 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3230 if (test_kvm_facility(vcpu->kvm, 130)) 3231 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3232 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3233 if (sclp.has_cei) 3234 vcpu->arch.sie_block->eca |= ECA_CEI; 3235 if (sclp.has_ib) 3236 vcpu->arch.sie_block->eca |= ECA_IB; 3237 if (sclp.has_siif) 3238 vcpu->arch.sie_block->eca |= ECA_SII; 3239 if (sclp.has_sigpif) 3240 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3241 if (test_kvm_facility(vcpu->kvm, 129)) { 3242 vcpu->arch.sie_block->eca |= ECA_VX; 3243 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3244 } 3245 if (test_kvm_facility(vcpu->kvm, 139)) 3246 vcpu->arch.sie_block->ecd |= ECD_MEF; 3247 if (test_kvm_facility(vcpu->kvm, 156)) 3248 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3249 if (vcpu->arch.sie_block->gd) { 3250 vcpu->arch.sie_block->eca |= ECA_AIV; 3251 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3252 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3253 } 3254 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3255 | SDNXC; 3256 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3257 3258 if (sclp.has_kss) 3259 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3260 else 3261 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3262 3263 if (vcpu->kvm->arch.use_cmma) { 3264 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3265 if (rc) 3266 return rc; 3267 } 3268 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3269 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3270 3271 vcpu->arch.sie_block->hpid = HPID_KVM; 3272 3273 kvm_s390_vcpu_crypto_setup(vcpu); 3274 3275 mutex_lock(&vcpu->kvm->lock); 3276 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3277 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3278 if (rc) 3279 kvm_s390_vcpu_unsetup_cmma(vcpu); 3280 } 3281 mutex_unlock(&vcpu->kvm->lock); 3282 3283 return rc; 3284 } 3285 3286 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3287 { 3288 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3289 return -EINVAL; 3290 return 0; 3291 } 3292 3293 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3294 { 3295 struct sie_page *sie_page; 3296 int rc; 3297 3298 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3299 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3300 if (!sie_page) 3301 return -ENOMEM; 3302 3303 vcpu->arch.sie_block = &sie_page->sie_block; 3304 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3305 3306 /* the real guest size will always be smaller than msl */ 3307 vcpu->arch.sie_block->mso = 0; 3308 vcpu->arch.sie_block->msl = sclp.hamax; 3309 3310 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3311 spin_lock_init(&vcpu->arch.local_int.lock); 3312 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; 3313 if (vcpu->arch.sie_block->gd && sclp.has_gisaf) 3314 vcpu->arch.sie_block->gd |= GISA_FORMAT1; 3315 seqcount_init(&vcpu->arch.cputm_seqcount); 3316 3317 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3318 kvm_clear_async_pf_completion_queue(vcpu); 3319 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3320 KVM_SYNC_GPRS | 3321 KVM_SYNC_ACRS | 3322 KVM_SYNC_CRS | 3323 KVM_SYNC_ARCH0 | 3324 KVM_SYNC_PFAULT | 3325 KVM_SYNC_DIAG318; 3326 kvm_s390_set_prefix(vcpu, 0); 3327 if (test_kvm_facility(vcpu->kvm, 64)) 3328 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3329 if (test_kvm_facility(vcpu->kvm, 82)) 3330 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3331 if (test_kvm_facility(vcpu->kvm, 133)) 3332 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3333 if (test_kvm_facility(vcpu->kvm, 156)) 3334 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3335 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3336 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3337 */ 3338 if (MACHINE_HAS_VX) 3339 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3340 else 3341 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3342 3343 if (kvm_is_ucontrol(vcpu->kvm)) { 3344 rc = __kvm_ucontrol_vcpu_init(vcpu); 3345 if (rc) 3346 goto out_free_sie_block; 3347 } 3348 3349 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3350 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3351 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3352 3353 rc = kvm_s390_vcpu_setup(vcpu); 3354 if (rc) 3355 goto out_ucontrol_uninit; 3356 return 0; 3357 3358 out_ucontrol_uninit: 3359 if (kvm_is_ucontrol(vcpu->kvm)) 3360 gmap_remove(vcpu->arch.gmap); 3361 out_free_sie_block: 3362 free_page((unsigned long)(vcpu->arch.sie_block)); 3363 return rc; 3364 } 3365 3366 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3367 { 3368 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3369 return kvm_s390_vcpu_has_irq(vcpu, 0); 3370 } 3371 3372 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3373 { 3374 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3375 } 3376 3377 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3378 { 3379 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3380 exit_sie(vcpu); 3381 } 3382 3383 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3384 { 3385 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3386 } 3387 3388 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3389 { 3390 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3391 exit_sie(vcpu); 3392 } 3393 3394 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3395 { 3396 return atomic_read(&vcpu->arch.sie_block->prog20) & 3397 (PROG_BLOCK_SIE | PROG_REQUEST); 3398 } 3399 3400 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3401 { 3402 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3403 } 3404 3405 /* 3406 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3407 * If the CPU is not running (e.g. waiting as idle) the function will 3408 * return immediately. */ 3409 void exit_sie(struct kvm_vcpu *vcpu) 3410 { 3411 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3412 kvm_s390_vsie_kick(vcpu); 3413 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3414 cpu_relax(); 3415 } 3416 3417 /* Kick a guest cpu out of SIE to process a request synchronously */ 3418 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3419 { 3420 kvm_make_request(req, vcpu); 3421 kvm_s390_vcpu_request(vcpu); 3422 } 3423 3424 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3425 unsigned long end) 3426 { 3427 struct kvm *kvm = gmap->private; 3428 struct kvm_vcpu *vcpu; 3429 unsigned long prefix; 3430 int i; 3431 3432 if (gmap_is_shadow(gmap)) 3433 return; 3434 if (start >= 1UL << 31) 3435 /* We are only interested in prefix pages */ 3436 return; 3437 kvm_for_each_vcpu(i, vcpu, kvm) { 3438 /* match against both prefix pages */ 3439 prefix = kvm_s390_get_prefix(vcpu); 3440 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3441 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3442 start, end); 3443 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu); 3444 } 3445 } 3446 } 3447 3448 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3449 { 3450 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3451 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3452 halt_poll_max_steal) { 3453 vcpu->stat.halt_no_poll_steal++; 3454 return true; 3455 } 3456 return false; 3457 } 3458 3459 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3460 { 3461 /* kvm common code refers to this, but never calls it */ 3462 BUG(); 3463 return 0; 3464 } 3465 3466 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3467 struct kvm_one_reg *reg) 3468 { 3469 int r = -EINVAL; 3470 3471 switch (reg->id) { 3472 case KVM_REG_S390_TODPR: 3473 r = put_user(vcpu->arch.sie_block->todpr, 3474 (u32 __user *)reg->addr); 3475 break; 3476 case KVM_REG_S390_EPOCHDIFF: 3477 r = put_user(vcpu->arch.sie_block->epoch, 3478 (u64 __user *)reg->addr); 3479 break; 3480 case KVM_REG_S390_CPU_TIMER: 3481 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3482 (u64 __user *)reg->addr); 3483 break; 3484 case KVM_REG_S390_CLOCK_COMP: 3485 r = put_user(vcpu->arch.sie_block->ckc, 3486 (u64 __user *)reg->addr); 3487 break; 3488 case KVM_REG_S390_PFTOKEN: 3489 r = put_user(vcpu->arch.pfault_token, 3490 (u64 __user *)reg->addr); 3491 break; 3492 case KVM_REG_S390_PFCOMPARE: 3493 r = put_user(vcpu->arch.pfault_compare, 3494 (u64 __user *)reg->addr); 3495 break; 3496 case KVM_REG_S390_PFSELECT: 3497 r = put_user(vcpu->arch.pfault_select, 3498 (u64 __user *)reg->addr); 3499 break; 3500 case KVM_REG_S390_PP: 3501 r = put_user(vcpu->arch.sie_block->pp, 3502 (u64 __user *)reg->addr); 3503 break; 3504 case KVM_REG_S390_GBEA: 3505 r = put_user(vcpu->arch.sie_block->gbea, 3506 (u64 __user *)reg->addr); 3507 break; 3508 default: 3509 break; 3510 } 3511 3512 return r; 3513 } 3514 3515 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3516 struct kvm_one_reg *reg) 3517 { 3518 int r = -EINVAL; 3519 __u64 val; 3520 3521 switch (reg->id) { 3522 case KVM_REG_S390_TODPR: 3523 r = get_user(vcpu->arch.sie_block->todpr, 3524 (u32 __user *)reg->addr); 3525 break; 3526 case KVM_REG_S390_EPOCHDIFF: 3527 r = get_user(vcpu->arch.sie_block->epoch, 3528 (u64 __user *)reg->addr); 3529 break; 3530 case KVM_REG_S390_CPU_TIMER: 3531 r = get_user(val, (u64 __user *)reg->addr); 3532 if (!r) 3533 kvm_s390_set_cpu_timer(vcpu, val); 3534 break; 3535 case KVM_REG_S390_CLOCK_COMP: 3536 r = get_user(vcpu->arch.sie_block->ckc, 3537 (u64 __user *)reg->addr); 3538 break; 3539 case KVM_REG_S390_PFTOKEN: 3540 r = get_user(vcpu->arch.pfault_token, 3541 (u64 __user *)reg->addr); 3542 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3543 kvm_clear_async_pf_completion_queue(vcpu); 3544 break; 3545 case KVM_REG_S390_PFCOMPARE: 3546 r = get_user(vcpu->arch.pfault_compare, 3547 (u64 __user *)reg->addr); 3548 break; 3549 case KVM_REG_S390_PFSELECT: 3550 r = get_user(vcpu->arch.pfault_select, 3551 (u64 __user *)reg->addr); 3552 break; 3553 case KVM_REG_S390_PP: 3554 r = get_user(vcpu->arch.sie_block->pp, 3555 (u64 __user *)reg->addr); 3556 break; 3557 case KVM_REG_S390_GBEA: 3558 r = get_user(vcpu->arch.sie_block->gbea, 3559 (u64 __user *)reg->addr); 3560 break; 3561 default: 3562 break; 3563 } 3564 3565 return r; 3566 } 3567 3568 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3569 { 3570 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3571 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3572 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3573 3574 kvm_clear_async_pf_completion_queue(vcpu); 3575 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3576 kvm_s390_vcpu_stop(vcpu); 3577 kvm_s390_clear_local_irqs(vcpu); 3578 } 3579 3580 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3581 { 3582 /* Initial reset is a superset of the normal reset */ 3583 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3584 3585 /* 3586 * This equals initial cpu reset in pop, but we don't switch to ESA. 3587 * We do not only reset the internal data, but also ... 3588 */ 3589 vcpu->arch.sie_block->gpsw.mask = 0; 3590 vcpu->arch.sie_block->gpsw.addr = 0; 3591 kvm_s390_set_prefix(vcpu, 0); 3592 kvm_s390_set_cpu_timer(vcpu, 0); 3593 vcpu->arch.sie_block->ckc = 0; 3594 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3595 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3596 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3597 3598 /* ... the data in sync regs */ 3599 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 3600 vcpu->run->s.regs.ckc = 0; 3601 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 3602 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 3603 vcpu->run->psw_addr = 0; 3604 vcpu->run->psw_mask = 0; 3605 vcpu->run->s.regs.todpr = 0; 3606 vcpu->run->s.regs.cputm = 0; 3607 vcpu->run->s.regs.ckc = 0; 3608 vcpu->run->s.regs.pp = 0; 3609 vcpu->run->s.regs.gbea = 1; 3610 vcpu->run->s.regs.fpc = 0; 3611 /* 3612 * Do not reset these registers in the protected case, as some of 3613 * them are overlayed and they are not accessible in this case 3614 * anyway. 3615 */ 3616 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3617 vcpu->arch.sie_block->gbea = 1; 3618 vcpu->arch.sie_block->pp = 0; 3619 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3620 vcpu->arch.sie_block->todpr = 0; 3621 } 3622 } 3623 3624 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3625 { 3626 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3627 3628 /* Clear reset is a superset of the initial reset */ 3629 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3630 3631 memset(®s->gprs, 0, sizeof(regs->gprs)); 3632 memset(®s->vrs, 0, sizeof(regs->vrs)); 3633 memset(®s->acrs, 0, sizeof(regs->acrs)); 3634 memset(®s->gscb, 0, sizeof(regs->gscb)); 3635 3636 regs->etoken = 0; 3637 regs->etoken_extension = 0; 3638 } 3639 3640 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3641 { 3642 vcpu_load(vcpu); 3643 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3644 vcpu_put(vcpu); 3645 return 0; 3646 } 3647 3648 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3649 { 3650 vcpu_load(vcpu); 3651 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3652 vcpu_put(vcpu); 3653 return 0; 3654 } 3655 3656 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3657 struct kvm_sregs *sregs) 3658 { 3659 vcpu_load(vcpu); 3660 3661 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3662 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3663 3664 vcpu_put(vcpu); 3665 return 0; 3666 } 3667 3668 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3669 struct kvm_sregs *sregs) 3670 { 3671 vcpu_load(vcpu); 3672 3673 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3674 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3675 3676 vcpu_put(vcpu); 3677 return 0; 3678 } 3679 3680 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3681 { 3682 int ret = 0; 3683 3684 vcpu_load(vcpu); 3685 3686 if (test_fp_ctl(fpu->fpc)) { 3687 ret = -EINVAL; 3688 goto out; 3689 } 3690 vcpu->run->s.regs.fpc = fpu->fpc; 3691 if (MACHINE_HAS_VX) 3692 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3693 (freg_t *) fpu->fprs); 3694 else 3695 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3696 3697 out: 3698 vcpu_put(vcpu); 3699 return ret; 3700 } 3701 3702 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3703 { 3704 vcpu_load(vcpu); 3705 3706 /* make sure we have the latest values */ 3707 save_fpu_regs(); 3708 if (MACHINE_HAS_VX) 3709 convert_vx_to_fp((freg_t *) fpu->fprs, 3710 (__vector128 *) vcpu->run->s.regs.vrs); 3711 else 3712 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3713 fpu->fpc = vcpu->run->s.regs.fpc; 3714 3715 vcpu_put(vcpu); 3716 return 0; 3717 } 3718 3719 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3720 { 3721 int rc = 0; 3722 3723 if (!is_vcpu_stopped(vcpu)) 3724 rc = -EBUSY; 3725 else { 3726 vcpu->run->psw_mask = psw.mask; 3727 vcpu->run->psw_addr = psw.addr; 3728 } 3729 return rc; 3730 } 3731 3732 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3733 struct kvm_translation *tr) 3734 { 3735 return -EINVAL; /* not implemented yet */ 3736 } 3737 3738 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3739 KVM_GUESTDBG_USE_HW_BP | \ 3740 KVM_GUESTDBG_ENABLE) 3741 3742 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3743 struct kvm_guest_debug *dbg) 3744 { 3745 int rc = 0; 3746 3747 vcpu_load(vcpu); 3748 3749 vcpu->guest_debug = 0; 3750 kvm_s390_clear_bp_data(vcpu); 3751 3752 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3753 rc = -EINVAL; 3754 goto out; 3755 } 3756 if (!sclp.has_gpere) { 3757 rc = -EINVAL; 3758 goto out; 3759 } 3760 3761 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3762 vcpu->guest_debug = dbg->control; 3763 /* enforce guest PER */ 3764 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3765 3766 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3767 rc = kvm_s390_import_bp_data(vcpu, dbg); 3768 } else { 3769 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3770 vcpu->arch.guestdbg.last_bp = 0; 3771 } 3772 3773 if (rc) { 3774 vcpu->guest_debug = 0; 3775 kvm_s390_clear_bp_data(vcpu); 3776 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3777 } 3778 3779 out: 3780 vcpu_put(vcpu); 3781 return rc; 3782 } 3783 3784 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3785 struct kvm_mp_state *mp_state) 3786 { 3787 int ret; 3788 3789 vcpu_load(vcpu); 3790 3791 /* CHECK_STOP and LOAD are not supported yet */ 3792 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3793 KVM_MP_STATE_OPERATING; 3794 3795 vcpu_put(vcpu); 3796 return ret; 3797 } 3798 3799 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3800 struct kvm_mp_state *mp_state) 3801 { 3802 int rc = 0; 3803 3804 vcpu_load(vcpu); 3805 3806 /* user space knows about this interface - let it control the state */ 3807 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 3808 3809 switch (mp_state->mp_state) { 3810 case KVM_MP_STATE_STOPPED: 3811 rc = kvm_s390_vcpu_stop(vcpu); 3812 break; 3813 case KVM_MP_STATE_OPERATING: 3814 rc = kvm_s390_vcpu_start(vcpu); 3815 break; 3816 case KVM_MP_STATE_LOAD: 3817 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3818 rc = -ENXIO; 3819 break; 3820 } 3821 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 3822 break; 3823 case KVM_MP_STATE_CHECK_STOP: 3824 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 3825 default: 3826 rc = -ENXIO; 3827 } 3828 3829 vcpu_put(vcpu); 3830 return rc; 3831 } 3832 3833 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3834 { 3835 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3836 } 3837 3838 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3839 { 3840 retry: 3841 kvm_s390_vcpu_request_handled(vcpu); 3842 if (!kvm_request_pending(vcpu)) 3843 return 0; 3844 /* 3845 * We use MMU_RELOAD just to re-arm the ipte notifier for the 3846 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3847 * This ensures that the ipte instruction for this request has 3848 * already finished. We might race against a second unmapper that 3849 * wants to set the blocking bit. Lets just retry the request loop. 3850 */ 3851 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 3852 int rc; 3853 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3854 kvm_s390_get_prefix(vcpu), 3855 PAGE_SIZE * 2, PROT_WRITE); 3856 if (rc) { 3857 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 3858 return rc; 3859 } 3860 goto retry; 3861 } 3862 3863 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3864 vcpu->arch.sie_block->ihcpu = 0xffff; 3865 goto retry; 3866 } 3867 3868 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3869 if (!ibs_enabled(vcpu)) { 3870 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3871 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3872 } 3873 goto retry; 3874 } 3875 3876 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3877 if (ibs_enabled(vcpu)) { 3878 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3879 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3880 } 3881 goto retry; 3882 } 3883 3884 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3885 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3886 goto retry; 3887 } 3888 3889 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3890 /* 3891 * Disable CMM virtualization; we will emulate the ESSA 3892 * instruction manually, in order to provide additional 3893 * functionalities needed for live migration. 3894 */ 3895 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3896 goto retry; 3897 } 3898 3899 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3900 /* 3901 * Re-enable CMM virtualization if CMMA is available and 3902 * CMM has been used. 3903 */ 3904 if ((vcpu->kvm->arch.use_cmma) && 3905 (vcpu->kvm->mm->context.uses_cmm)) 3906 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3907 goto retry; 3908 } 3909 3910 /* nothing to do, just clear the request */ 3911 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3912 /* we left the vsie handler, nothing to do, just clear the request */ 3913 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3914 3915 return 0; 3916 } 3917 3918 void kvm_s390_set_tod_clock(struct kvm *kvm, 3919 const struct kvm_s390_vm_tod_clock *gtod) 3920 { 3921 struct kvm_vcpu *vcpu; 3922 union tod_clock clk; 3923 int i; 3924 3925 mutex_lock(&kvm->lock); 3926 preempt_disable(); 3927 3928 store_tod_clock_ext(&clk); 3929 3930 kvm->arch.epoch = gtod->tod - clk.tod; 3931 kvm->arch.epdx = 0; 3932 if (test_kvm_facility(kvm, 139)) { 3933 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 3934 if (kvm->arch.epoch > gtod->tod) 3935 kvm->arch.epdx -= 1; 3936 } 3937 3938 kvm_s390_vcpu_block_all(kvm); 3939 kvm_for_each_vcpu(i, vcpu, kvm) { 3940 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3941 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3942 } 3943 3944 kvm_s390_vcpu_unblock_all(kvm); 3945 preempt_enable(); 3946 mutex_unlock(&kvm->lock); 3947 } 3948 3949 /** 3950 * kvm_arch_fault_in_page - fault-in guest page if necessary 3951 * @vcpu: The corresponding virtual cpu 3952 * @gpa: Guest physical address 3953 * @writable: Whether the page should be writable or not 3954 * 3955 * Make sure that a guest page has been faulted-in on the host. 3956 * 3957 * Return: Zero on success, negative error code otherwise. 3958 */ 3959 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 3960 { 3961 return gmap_fault(vcpu->arch.gmap, gpa, 3962 writable ? FAULT_FLAG_WRITE : 0); 3963 } 3964 3965 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 3966 unsigned long token) 3967 { 3968 struct kvm_s390_interrupt inti; 3969 struct kvm_s390_irq irq; 3970 3971 if (start_token) { 3972 irq.u.ext.ext_params2 = token; 3973 irq.type = KVM_S390_INT_PFAULT_INIT; 3974 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 3975 } else { 3976 inti.type = KVM_S390_INT_PFAULT_DONE; 3977 inti.parm64 = token; 3978 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 3979 } 3980 } 3981 3982 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 3983 struct kvm_async_pf *work) 3984 { 3985 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 3986 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 3987 3988 return true; 3989 } 3990 3991 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 3992 struct kvm_async_pf *work) 3993 { 3994 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 3995 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 3996 } 3997 3998 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 3999 struct kvm_async_pf *work) 4000 { 4001 /* s390 will always inject the page directly */ 4002 } 4003 4004 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4005 { 4006 /* 4007 * s390 will always inject the page directly, 4008 * but we still want check_async_completion to cleanup 4009 */ 4010 return true; 4011 } 4012 4013 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4014 { 4015 hva_t hva; 4016 struct kvm_arch_async_pf arch; 4017 4018 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4019 return false; 4020 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4021 vcpu->arch.pfault_compare) 4022 return false; 4023 if (psw_extint_disabled(vcpu)) 4024 return false; 4025 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4026 return false; 4027 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4028 return false; 4029 if (!vcpu->arch.gmap->pfault_enabled) 4030 return false; 4031 4032 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4033 hva += current->thread.gmap_addr & ~PAGE_MASK; 4034 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4035 return false; 4036 4037 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4038 } 4039 4040 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4041 { 4042 int rc, cpuflags; 4043 4044 /* 4045 * On s390 notifications for arriving pages will be delivered directly 4046 * to the guest but the house keeping for completed pfaults is 4047 * handled outside the worker. 4048 */ 4049 kvm_check_async_pf_completion(vcpu); 4050 4051 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4052 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4053 4054 if (need_resched()) 4055 schedule(); 4056 4057 if (!kvm_is_ucontrol(vcpu->kvm)) { 4058 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4059 if (rc) 4060 return rc; 4061 } 4062 4063 rc = kvm_s390_handle_requests(vcpu); 4064 if (rc) 4065 return rc; 4066 4067 if (guestdbg_enabled(vcpu)) { 4068 kvm_s390_backup_guest_per_regs(vcpu); 4069 kvm_s390_patch_guest_per_regs(vcpu); 4070 } 4071 4072 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4073 4074 vcpu->arch.sie_block->icptcode = 0; 4075 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4076 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4077 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4078 4079 return 0; 4080 } 4081 4082 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4083 { 4084 struct kvm_s390_pgm_info pgm_info = { 4085 .code = PGM_ADDRESSING, 4086 }; 4087 u8 opcode, ilen; 4088 int rc; 4089 4090 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4091 trace_kvm_s390_sie_fault(vcpu); 4092 4093 /* 4094 * We want to inject an addressing exception, which is defined as a 4095 * suppressing or terminating exception. However, since we came here 4096 * by a DAT access exception, the PSW still points to the faulting 4097 * instruction since DAT exceptions are nullifying. So we've got 4098 * to look up the current opcode to get the length of the instruction 4099 * to be able to forward the PSW. 4100 */ 4101 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4102 ilen = insn_length(opcode); 4103 if (rc < 0) { 4104 return rc; 4105 } else if (rc) { 4106 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4107 * Forward by arbitrary ilc, injection will take care of 4108 * nullification if necessary. 4109 */ 4110 pgm_info = vcpu->arch.pgm; 4111 ilen = 4; 4112 } 4113 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4114 kvm_s390_forward_psw(vcpu, ilen); 4115 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4116 } 4117 4118 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4119 { 4120 struct mcck_volatile_info *mcck_info; 4121 struct sie_page *sie_page; 4122 4123 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4124 vcpu->arch.sie_block->icptcode); 4125 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4126 4127 if (guestdbg_enabled(vcpu)) 4128 kvm_s390_restore_guest_per_regs(vcpu); 4129 4130 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4131 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4132 4133 if (exit_reason == -EINTR) { 4134 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4135 sie_page = container_of(vcpu->arch.sie_block, 4136 struct sie_page, sie_block); 4137 mcck_info = &sie_page->mcck_info; 4138 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4139 return 0; 4140 } 4141 4142 if (vcpu->arch.sie_block->icptcode > 0) { 4143 int rc = kvm_handle_sie_intercept(vcpu); 4144 4145 if (rc != -EOPNOTSUPP) 4146 return rc; 4147 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4148 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4149 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4150 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4151 return -EREMOTE; 4152 } else if (exit_reason != -EFAULT) { 4153 vcpu->stat.exit_null++; 4154 return 0; 4155 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4156 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4157 vcpu->run->s390_ucontrol.trans_exc_code = 4158 current->thread.gmap_addr; 4159 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4160 return -EREMOTE; 4161 } else if (current->thread.gmap_pfault) { 4162 trace_kvm_s390_major_guest_pfault(vcpu); 4163 current->thread.gmap_pfault = 0; 4164 if (kvm_arch_setup_async_pf(vcpu)) 4165 return 0; 4166 vcpu->stat.pfault_sync++; 4167 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4168 } 4169 return vcpu_post_run_fault_in_sie(vcpu); 4170 } 4171 4172 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4173 static int __vcpu_run(struct kvm_vcpu *vcpu) 4174 { 4175 int rc, exit_reason; 4176 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4177 4178 /* 4179 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4180 * ning the guest), so that memslots (and other stuff) are protected 4181 */ 4182 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4183 4184 do { 4185 rc = vcpu_pre_run(vcpu); 4186 if (rc) 4187 break; 4188 4189 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4190 /* 4191 * As PF_VCPU will be used in fault handler, between 4192 * guest_enter and guest_exit should be no uaccess. 4193 */ 4194 local_irq_disable(); 4195 guest_enter_irqoff(); 4196 __disable_cpu_timer_accounting(vcpu); 4197 local_irq_enable(); 4198 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4199 memcpy(sie_page->pv_grregs, 4200 vcpu->run->s.regs.gprs, 4201 sizeof(sie_page->pv_grregs)); 4202 } 4203 if (test_cpu_flag(CIF_FPU)) 4204 load_fpu_regs(); 4205 exit_reason = sie64a(vcpu->arch.sie_block, 4206 vcpu->run->s.regs.gprs); 4207 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4208 memcpy(vcpu->run->s.regs.gprs, 4209 sie_page->pv_grregs, 4210 sizeof(sie_page->pv_grregs)); 4211 /* 4212 * We're not allowed to inject interrupts on intercepts 4213 * that leave the guest state in an "in-between" state 4214 * where the next SIE entry will do a continuation. 4215 * Fence interrupts in our "internal" PSW. 4216 */ 4217 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4218 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4219 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4220 } 4221 } 4222 local_irq_disable(); 4223 __enable_cpu_timer_accounting(vcpu); 4224 guest_exit_irqoff(); 4225 local_irq_enable(); 4226 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4227 4228 rc = vcpu_post_run(vcpu, exit_reason); 4229 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4230 4231 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4232 return rc; 4233 } 4234 4235 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4236 { 4237 struct kvm_run *kvm_run = vcpu->run; 4238 struct runtime_instr_cb *riccb; 4239 struct gs_cb *gscb; 4240 4241 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4242 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4243 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4244 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4245 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4246 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4247 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4248 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4249 } 4250 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4251 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4252 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4253 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4254 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4255 kvm_clear_async_pf_completion_queue(vcpu); 4256 } 4257 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4258 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4259 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4260 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4261 } 4262 /* 4263 * If userspace sets the riccb (e.g. after migration) to a valid state, 4264 * we should enable RI here instead of doing the lazy enablement. 4265 */ 4266 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4267 test_kvm_facility(vcpu->kvm, 64) && 4268 riccb->v && 4269 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4270 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4271 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4272 } 4273 /* 4274 * If userspace sets the gscb (e.g. after migration) to non-zero, 4275 * we should enable GS here instead of doing the lazy enablement. 4276 */ 4277 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4278 test_kvm_facility(vcpu->kvm, 133) && 4279 gscb->gssm && 4280 !vcpu->arch.gs_enabled) { 4281 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4282 vcpu->arch.sie_block->ecb |= ECB_GS; 4283 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4284 vcpu->arch.gs_enabled = 1; 4285 } 4286 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4287 test_kvm_facility(vcpu->kvm, 82)) { 4288 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4289 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4290 } 4291 if (MACHINE_HAS_GS) { 4292 preempt_disable(); 4293 __ctl_set_bit(2, 4); 4294 if (current->thread.gs_cb) { 4295 vcpu->arch.host_gscb = current->thread.gs_cb; 4296 save_gs_cb(vcpu->arch.host_gscb); 4297 } 4298 if (vcpu->arch.gs_enabled) { 4299 current->thread.gs_cb = (struct gs_cb *) 4300 &vcpu->run->s.regs.gscb; 4301 restore_gs_cb(current->thread.gs_cb); 4302 } 4303 preempt_enable(); 4304 } 4305 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4306 } 4307 4308 static void sync_regs(struct kvm_vcpu *vcpu) 4309 { 4310 struct kvm_run *kvm_run = vcpu->run; 4311 4312 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4313 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4314 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4315 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4316 /* some control register changes require a tlb flush */ 4317 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4318 } 4319 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4320 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4321 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4322 } 4323 save_access_regs(vcpu->arch.host_acrs); 4324 restore_access_regs(vcpu->run->s.regs.acrs); 4325 /* save host (userspace) fprs/vrs */ 4326 save_fpu_regs(); 4327 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4328 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4329 if (MACHINE_HAS_VX) 4330 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4331 else 4332 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4333 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4334 if (test_fp_ctl(current->thread.fpu.fpc)) 4335 /* User space provided an invalid FPC, let's clear it */ 4336 current->thread.fpu.fpc = 0; 4337 4338 /* Sync fmt2 only data */ 4339 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4340 sync_regs_fmt2(vcpu); 4341 } else { 4342 /* 4343 * In several places we have to modify our internal view to 4344 * not do things that are disallowed by the ultravisor. For 4345 * example we must not inject interrupts after specific exits 4346 * (e.g. 112 prefix page not secure). We do this by turning 4347 * off the machine check, external and I/O interrupt bits 4348 * of our PSW copy. To avoid getting validity intercepts, we 4349 * do only accept the condition code from userspace. 4350 */ 4351 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4352 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4353 PSW_MASK_CC; 4354 } 4355 4356 kvm_run->kvm_dirty_regs = 0; 4357 } 4358 4359 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4360 { 4361 struct kvm_run *kvm_run = vcpu->run; 4362 4363 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4364 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4365 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4366 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4367 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4368 if (MACHINE_HAS_GS) { 4369 preempt_disable(); 4370 __ctl_set_bit(2, 4); 4371 if (vcpu->arch.gs_enabled) 4372 save_gs_cb(current->thread.gs_cb); 4373 current->thread.gs_cb = vcpu->arch.host_gscb; 4374 restore_gs_cb(vcpu->arch.host_gscb); 4375 if (!vcpu->arch.host_gscb) 4376 __ctl_clear_bit(2, 4); 4377 vcpu->arch.host_gscb = NULL; 4378 preempt_enable(); 4379 } 4380 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4381 } 4382 4383 static void store_regs(struct kvm_vcpu *vcpu) 4384 { 4385 struct kvm_run *kvm_run = vcpu->run; 4386 4387 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4388 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4389 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4390 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4391 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4392 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4393 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4394 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4395 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4396 save_access_regs(vcpu->run->s.regs.acrs); 4397 restore_access_regs(vcpu->arch.host_acrs); 4398 /* Save guest register state */ 4399 save_fpu_regs(); 4400 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4401 /* Restore will be done lazily at return */ 4402 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4403 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4404 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4405 store_regs_fmt2(vcpu); 4406 } 4407 4408 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4409 { 4410 struct kvm_run *kvm_run = vcpu->run; 4411 int rc; 4412 4413 if (kvm_run->immediate_exit) 4414 return -EINTR; 4415 4416 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4417 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4418 return -EINVAL; 4419 4420 vcpu_load(vcpu); 4421 4422 if (guestdbg_exit_pending(vcpu)) { 4423 kvm_s390_prepare_debug_exit(vcpu); 4424 rc = 0; 4425 goto out; 4426 } 4427 4428 kvm_sigset_activate(vcpu); 4429 4430 /* 4431 * no need to check the return value of vcpu_start as it can only have 4432 * an error for protvirt, but protvirt means user cpu state 4433 */ 4434 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4435 kvm_s390_vcpu_start(vcpu); 4436 } else if (is_vcpu_stopped(vcpu)) { 4437 pr_err_ratelimited("can't run stopped vcpu %d\n", 4438 vcpu->vcpu_id); 4439 rc = -EINVAL; 4440 goto out; 4441 } 4442 4443 sync_regs(vcpu); 4444 enable_cpu_timer_accounting(vcpu); 4445 4446 might_fault(); 4447 rc = __vcpu_run(vcpu); 4448 4449 if (signal_pending(current) && !rc) { 4450 kvm_run->exit_reason = KVM_EXIT_INTR; 4451 rc = -EINTR; 4452 } 4453 4454 if (guestdbg_exit_pending(vcpu) && !rc) { 4455 kvm_s390_prepare_debug_exit(vcpu); 4456 rc = 0; 4457 } 4458 4459 if (rc == -EREMOTE) { 4460 /* userspace support is needed, kvm_run has been prepared */ 4461 rc = 0; 4462 } 4463 4464 disable_cpu_timer_accounting(vcpu); 4465 store_regs(vcpu); 4466 4467 kvm_sigset_deactivate(vcpu); 4468 4469 vcpu->stat.exit_userspace++; 4470 out: 4471 vcpu_put(vcpu); 4472 return rc; 4473 } 4474 4475 /* 4476 * store status at address 4477 * we use have two special cases: 4478 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4479 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4480 */ 4481 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4482 { 4483 unsigned char archmode = 1; 4484 freg_t fprs[NUM_FPRS]; 4485 unsigned int px; 4486 u64 clkcomp, cputm; 4487 int rc; 4488 4489 px = kvm_s390_get_prefix(vcpu); 4490 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4491 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4492 return -EFAULT; 4493 gpa = 0; 4494 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4495 if (write_guest_real(vcpu, 163, &archmode, 1)) 4496 return -EFAULT; 4497 gpa = px; 4498 } else 4499 gpa -= __LC_FPREGS_SAVE_AREA; 4500 4501 /* manually convert vector registers if necessary */ 4502 if (MACHINE_HAS_VX) { 4503 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4504 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4505 fprs, 128); 4506 } else { 4507 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4508 vcpu->run->s.regs.fprs, 128); 4509 } 4510 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4511 vcpu->run->s.regs.gprs, 128); 4512 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4513 &vcpu->arch.sie_block->gpsw, 16); 4514 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4515 &px, 4); 4516 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4517 &vcpu->run->s.regs.fpc, 4); 4518 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4519 &vcpu->arch.sie_block->todpr, 4); 4520 cputm = kvm_s390_get_cpu_timer(vcpu); 4521 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4522 &cputm, 8); 4523 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4524 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4525 &clkcomp, 8); 4526 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4527 &vcpu->run->s.regs.acrs, 64); 4528 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4529 &vcpu->arch.sie_block->gcr, 128); 4530 return rc ? -EFAULT : 0; 4531 } 4532 4533 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4534 { 4535 /* 4536 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4537 * switch in the run ioctl. Let's update our copies before we save 4538 * it into the save area 4539 */ 4540 save_fpu_regs(); 4541 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4542 save_access_regs(vcpu->run->s.regs.acrs); 4543 4544 return kvm_s390_store_status_unloaded(vcpu, addr); 4545 } 4546 4547 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4548 { 4549 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4550 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4551 } 4552 4553 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4554 { 4555 unsigned int i; 4556 struct kvm_vcpu *vcpu; 4557 4558 kvm_for_each_vcpu(i, vcpu, kvm) { 4559 __disable_ibs_on_vcpu(vcpu); 4560 } 4561 } 4562 4563 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4564 { 4565 if (!sclp.has_ibs) 4566 return; 4567 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4568 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4569 } 4570 4571 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4572 { 4573 int i, online_vcpus, r = 0, started_vcpus = 0; 4574 4575 if (!is_vcpu_stopped(vcpu)) 4576 return 0; 4577 4578 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4579 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4580 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4581 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4582 4583 /* Let's tell the UV that we want to change into the operating state */ 4584 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4585 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 4586 if (r) { 4587 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4588 return r; 4589 } 4590 } 4591 4592 for (i = 0; i < online_vcpus; i++) { 4593 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) 4594 started_vcpus++; 4595 } 4596 4597 if (started_vcpus == 0) { 4598 /* we're the only active VCPU -> speed it up */ 4599 __enable_ibs_on_vcpu(vcpu); 4600 } else if (started_vcpus == 1) { 4601 /* 4602 * As we are starting a second VCPU, we have to disable 4603 * the IBS facility on all VCPUs to remove potentially 4604 * outstanding ENABLE requests. 4605 */ 4606 __disable_ibs_on_all_vcpus(vcpu->kvm); 4607 } 4608 4609 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4610 /* 4611 * The real PSW might have changed due to a RESTART interpreted by the 4612 * ultravisor. We block all interrupts and let the next sie exit 4613 * refresh our view. 4614 */ 4615 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4616 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4617 /* 4618 * Another VCPU might have used IBS while we were offline. 4619 * Let's play safe and flush the VCPU at startup. 4620 */ 4621 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4622 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4623 return 0; 4624 } 4625 4626 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4627 { 4628 int i, online_vcpus, r = 0, started_vcpus = 0; 4629 struct kvm_vcpu *started_vcpu = NULL; 4630 4631 if (is_vcpu_stopped(vcpu)) 4632 return 0; 4633 4634 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4635 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4636 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4637 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4638 4639 /* Let's tell the UV that we want to change into the stopped state */ 4640 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4641 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 4642 if (r) { 4643 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4644 return r; 4645 } 4646 } 4647 4648 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ 4649 kvm_s390_clear_stop_irq(vcpu); 4650 4651 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4652 __disable_ibs_on_vcpu(vcpu); 4653 4654 for (i = 0; i < online_vcpus; i++) { 4655 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { 4656 started_vcpus++; 4657 started_vcpu = vcpu->kvm->vcpus[i]; 4658 } 4659 } 4660 4661 if (started_vcpus == 1) { 4662 /* 4663 * As we only have one VCPU left, we want to enable the 4664 * IBS facility for that VCPU to speed it up. 4665 */ 4666 __enable_ibs_on_vcpu(started_vcpu); 4667 } 4668 4669 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4670 return 0; 4671 } 4672 4673 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4674 struct kvm_enable_cap *cap) 4675 { 4676 int r; 4677 4678 if (cap->flags) 4679 return -EINVAL; 4680 4681 switch (cap->cap) { 4682 case KVM_CAP_S390_CSS_SUPPORT: 4683 if (!vcpu->kvm->arch.css_support) { 4684 vcpu->kvm->arch.css_support = 1; 4685 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4686 trace_kvm_s390_enable_css(vcpu->kvm); 4687 } 4688 r = 0; 4689 break; 4690 default: 4691 r = -EINVAL; 4692 break; 4693 } 4694 return r; 4695 } 4696 4697 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, 4698 struct kvm_s390_mem_op *mop) 4699 { 4700 void __user *uaddr = (void __user *)mop->buf; 4701 int r = 0; 4702 4703 if (mop->flags || !mop->size) 4704 return -EINVAL; 4705 if (mop->size + mop->sida_offset < mop->size) 4706 return -EINVAL; 4707 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4708 return -E2BIG; 4709 4710 switch (mop->op) { 4711 case KVM_S390_MEMOP_SIDA_READ: 4712 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4713 mop->sida_offset), mop->size)) 4714 r = -EFAULT; 4715 4716 break; 4717 case KVM_S390_MEMOP_SIDA_WRITE: 4718 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4719 mop->sida_offset), uaddr, mop->size)) 4720 r = -EFAULT; 4721 break; 4722 } 4723 return r; 4724 } 4725 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, 4726 struct kvm_s390_mem_op *mop) 4727 { 4728 void __user *uaddr = (void __user *)mop->buf; 4729 void *tmpbuf = NULL; 4730 int r = 0; 4731 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4732 | KVM_S390_MEMOP_F_CHECK_ONLY; 4733 4734 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4735 return -EINVAL; 4736 4737 if (mop->size > MEM_OP_MAX_SIZE) 4738 return -E2BIG; 4739 4740 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4741 return -EINVAL; 4742 4743 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4744 tmpbuf = vmalloc(mop->size); 4745 if (!tmpbuf) 4746 return -ENOMEM; 4747 } 4748 4749 switch (mop->op) { 4750 case KVM_S390_MEMOP_LOGICAL_READ: 4751 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4752 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4753 mop->size, GACC_FETCH); 4754 break; 4755 } 4756 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4757 if (r == 0) { 4758 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4759 r = -EFAULT; 4760 } 4761 break; 4762 case KVM_S390_MEMOP_LOGICAL_WRITE: 4763 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4764 r = check_gva_range(vcpu, mop->gaddr, mop->ar, 4765 mop->size, GACC_STORE); 4766 break; 4767 } 4768 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4769 r = -EFAULT; 4770 break; 4771 } 4772 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); 4773 break; 4774 } 4775 4776 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4777 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4778 4779 vfree(tmpbuf); 4780 return r; 4781 } 4782 4783 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, 4784 struct kvm_s390_mem_op *mop) 4785 { 4786 int r, srcu_idx; 4787 4788 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4789 4790 switch (mop->op) { 4791 case KVM_S390_MEMOP_LOGICAL_READ: 4792 case KVM_S390_MEMOP_LOGICAL_WRITE: 4793 r = kvm_s390_guest_mem_op(vcpu, mop); 4794 break; 4795 case KVM_S390_MEMOP_SIDA_READ: 4796 case KVM_S390_MEMOP_SIDA_WRITE: 4797 /* we are locked against sida going away by the vcpu->mutex */ 4798 r = kvm_s390_guest_sida_op(vcpu, mop); 4799 break; 4800 default: 4801 r = -EINVAL; 4802 } 4803 4804 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4805 return r; 4806 } 4807 4808 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4809 unsigned int ioctl, unsigned long arg) 4810 { 4811 struct kvm_vcpu *vcpu = filp->private_data; 4812 void __user *argp = (void __user *)arg; 4813 4814 switch (ioctl) { 4815 case KVM_S390_IRQ: { 4816 struct kvm_s390_irq s390irq; 4817 4818 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4819 return -EFAULT; 4820 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4821 } 4822 case KVM_S390_INTERRUPT: { 4823 struct kvm_s390_interrupt s390int; 4824 struct kvm_s390_irq s390irq = {}; 4825 4826 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4827 return -EFAULT; 4828 if (s390int_to_s390irq(&s390int, &s390irq)) 4829 return -EINVAL; 4830 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4831 } 4832 } 4833 return -ENOIOCTLCMD; 4834 } 4835 4836 long kvm_arch_vcpu_ioctl(struct file *filp, 4837 unsigned int ioctl, unsigned long arg) 4838 { 4839 struct kvm_vcpu *vcpu = filp->private_data; 4840 void __user *argp = (void __user *)arg; 4841 int idx; 4842 long r; 4843 u16 rc, rrc; 4844 4845 vcpu_load(vcpu); 4846 4847 switch (ioctl) { 4848 case KVM_S390_STORE_STATUS: 4849 idx = srcu_read_lock(&vcpu->kvm->srcu); 4850 r = kvm_s390_store_status_unloaded(vcpu, arg); 4851 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4852 break; 4853 case KVM_S390_SET_INITIAL_PSW: { 4854 psw_t psw; 4855 4856 r = -EFAULT; 4857 if (copy_from_user(&psw, argp, sizeof(psw))) 4858 break; 4859 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4860 break; 4861 } 4862 case KVM_S390_CLEAR_RESET: 4863 r = 0; 4864 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4865 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4866 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4867 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 4868 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 4869 rc, rrc); 4870 } 4871 break; 4872 case KVM_S390_INITIAL_RESET: 4873 r = 0; 4874 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4875 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4876 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4877 UVC_CMD_CPU_RESET_INITIAL, 4878 &rc, &rrc); 4879 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 4880 rc, rrc); 4881 } 4882 break; 4883 case KVM_S390_NORMAL_RESET: 4884 r = 0; 4885 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4886 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4887 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4888 UVC_CMD_CPU_RESET, &rc, &rrc); 4889 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 4890 rc, rrc); 4891 } 4892 break; 4893 case KVM_SET_ONE_REG: 4894 case KVM_GET_ONE_REG: { 4895 struct kvm_one_reg reg; 4896 r = -EINVAL; 4897 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4898 break; 4899 r = -EFAULT; 4900 if (copy_from_user(®, argp, sizeof(reg))) 4901 break; 4902 if (ioctl == KVM_SET_ONE_REG) 4903 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4904 else 4905 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4906 break; 4907 } 4908 #ifdef CONFIG_KVM_S390_UCONTROL 4909 case KVM_S390_UCAS_MAP: { 4910 struct kvm_s390_ucas_mapping ucasmap; 4911 4912 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4913 r = -EFAULT; 4914 break; 4915 } 4916 4917 if (!kvm_is_ucontrol(vcpu->kvm)) { 4918 r = -EINVAL; 4919 break; 4920 } 4921 4922 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4923 ucasmap.vcpu_addr, ucasmap.length); 4924 break; 4925 } 4926 case KVM_S390_UCAS_UNMAP: { 4927 struct kvm_s390_ucas_mapping ucasmap; 4928 4929 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4930 r = -EFAULT; 4931 break; 4932 } 4933 4934 if (!kvm_is_ucontrol(vcpu->kvm)) { 4935 r = -EINVAL; 4936 break; 4937 } 4938 4939 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 4940 ucasmap.length); 4941 break; 4942 } 4943 #endif 4944 case KVM_S390_VCPU_FAULT: { 4945 r = gmap_fault(vcpu->arch.gmap, arg, 0); 4946 break; 4947 } 4948 case KVM_ENABLE_CAP: 4949 { 4950 struct kvm_enable_cap cap; 4951 r = -EFAULT; 4952 if (copy_from_user(&cap, argp, sizeof(cap))) 4953 break; 4954 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 4955 break; 4956 } 4957 case KVM_S390_MEM_OP: { 4958 struct kvm_s390_mem_op mem_op; 4959 4960 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 4961 r = kvm_s390_guest_memsida_op(vcpu, &mem_op); 4962 else 4963 r = -EFAULT; 4964 break; 4965 } 4966 case KVM_S390_SET_IRQ_STATE: { 4967 struct kvm_s390_irq_state irq_state; 4968 4969 r = -EFAULT; 4970 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4971 break; 4972 if (irq_state.len > VCPU_IRQS_MAX_BUF || 4973 irq_state.len == 0 || 4974 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 4975 r = -EINVAL; 4976 break; 4977 } 4978 /* do not use irq_state.flags, it will break old QEMUs */ 4979 r = kvm_s390_set_irq_state(vcpu, 4980 (void __user *) irq_state.buf, 4981 irq_state.len); 4982 break; 4983 } 4984 case KVM_S390_GET_IRQ_STATE: { 4985 struct kvm_s390_irq_state irq_state; 4986 4987 r = -EFAULT; 4988 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 4989 break; 4990 if (irq_state.len == 0) { 4991 r = -EINVAL; 4992 break; 4993 } 4994 /* do not use irq_state.flags, it will break old QEMUs */ 4995 r = kvm_s390_get_irq_state(vcpu, 4996 (__u8 __user *) irq_state.buf, 4997 irq_state.len); 4998 break; 4999 } 5000 default: 5001 r = -ENOTTY; 5002 } 5003 5004 vcpu_put(vcpu); 5005 return r; 5006 } 5007 5008 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5009 { 5010 #ifdef CONFIG_KVM_S390_UCONTROL 5011 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5012 && (kvm_is_ucontrol(vcpu->kvm))) { 5013 vmf->page = virt_to_page(vcpu->arch.sie_block); 5014 get_page(vmf->page); 5015 return 0; 5016 } 5017 #endif 5018 return VM_FAULT_SIGBUS; 5019 } 5020 5021 /* Section: memory related */ 5022 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5023 struct kvm_memory_slot *memslot, 5024 const struct kvm_userspace_memory_region *mem, 5025 enum kvm_mr_change change) 5026 { 5027 /* A few sanity checks. We can have memory slots which have to be 5028 located/ended at a segment boundary (1MB). The memory in userland is 5029 ok to be fragmented into various different vmas. It is okay to mmap() 5030 and munmap() stuff in this slot after doing this call at any time */ 5031 5032 if (mem->userspace_addr & 0xffffful) 5033 return -EINVAL; 5034 5035 if (mem->memory_size & 0xffffful) 5036 return -EINVAL; 5037 5038 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 5039 return -EINVAL; 5040 5041 /* When we are protected, we should not change the memory slots */ 5042 if (kvm_s390_pv_get_handle(kvm)) 5043 return -EINVAL; 5044 return 0; 5045 } 5046 5047 void kvm_arch_commit_memory_region(struct kvm *kvm, 5048 const struct kvm_userspace_memory_region *mem, 5049 struct kvm_memory_slot *old, 5050 const struct kvm_memory_slot *new, 5051 enum kvm_mr_change change) 5052 { 5053 int rc = 0; 5054 5055 switch (change) { 5056 case KVM_MR_DELETE: 5057 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5058 old->npages * PAGE_SIZE); 5059 break; 5060 case KVM_MR_MOVE: 5061 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5062 old->npages * PAGE_SIZE); 5063 if (rc) 5064 break; 5065 fallthrough; 5066 case KVM_MR_CREATE: 5067 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 5068 mem->guest_phys_addr, mem->memory_size); 5069 break; 5070 case KVM_MR_FLAGS_ONLY: 5071 break; 5072 default: 5073 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5074 } 5075 if (rc) 5076 pr_warn("failed to commit memory region\n"); 5077 return; 5078 } 5079 5080 static inline unsigned long nonhyp_mask(int i) 5081 { 5082 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5083 5084 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5085 } 5086 5087 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) 5088 { 5089 vcpu->valid_wakeup = false; 5090 } 5091 5092 static int __init kvm_s390_init(void) 5093 { 5094 int i; 5095 5096 if (!sclp.has_sief2) { 5097 pr_info("SIE is not available\n"); 5098 return -ENODEV; 5099 } 5100 5101 if (nested && hpage) { 5102 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5103 return -EINVAL; 5104 } 5105 5106 for (i = 0; i < 16; i++) 5107 kvm_s390_fac_base[i] |= 5108 stfle_fac_list[i] & nonhyp_mask(i); 5109 5110 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5111 } 5112 5113 static void __exit kvm_s390_exit(void) 5114 { 5115 kvm_exit(); 5116 } 5117 5118 module_init(kvm_s390_init); 5119 module_exit(kvm_s390_exit); 5120 5121 /* 5122 * Enable autoloading of the kvm module. 5123 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5124 * since x86 takes a different approach. 5125 */ 5126 #include <linux/miscdevice.h> 5127 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5128 MODULE_ALIAS("devname:kvm"); 5129