1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com> 10 * Jason J. Herne <jjherne@us.ibm.com> 11 */ 12 13 #define KMSG_COMPONENT "kvm-s390" 14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 15 16 #include <linux/compiler.h> 17 #include <linux/err.h> 18 #include <linux/fs.h> 19 #include <linux/hrtimer.h> 20 #include <linux/init.h> 21 #include <linux/kvm.h> 22 #include <linux/kvm_host.h> 23 #include <linux/mman.h> 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/random.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/vmalloc.h> 30 #include <linux/bitmap.h> 31 #include <linux/sched/signal.h> 32 #include <linux/string.h> 33 #include <linux/pgtable.h> 34 #include <linux/mmu_notifier.h> 35 36 #include <asm/asm-offsets.h> 37 #include <asm/lowcore.h> 38 #include <asm/stp.h> 39 #include <asm/gmap.h> 40 #include <asm/nmi.h> 41 #include <asm/switch_to.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/ap.h> 47 #include <asm/uv.h> 48 #include <asm/fpu/api.h> 49 #include "kvm-s390.h" 50 #include "gaccess.h" 51 #include "pci.h" 52 53 #define CREATE_TRACE_POINTS 54 #include "trace.h" 55 #include "trace-s390.h" 56 57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 58 #define LOCAL_IRQS 32 59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 60 (KVM_MAX_VCPUS + LOCAL_IRQS)) 61 62 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 63 KVM_GENERIC_VM_STATS(), 64 STATS_DESC_COUNTER(VM, inject_io), 65 STATS_DESC_COUNTER(VM, inject_float_mchk), 66 STATS_DESC_COUNTER(VM, inject_pfault_done), 67 STATS_DESC_COUNTER(VM, inject_service_signal), 68 STATS_DESC_COUNTER(VM, inject_virtio), 69 STATS_DESC_COUNTER(VM, aen_forward) 70 }; 71 72 const struct kvm_stats_header kvm_vm_stats_header = { 73 .name_size = KVM_STATS_NAME_SIZE, 74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 75 .id_offset = sizeof(struct kvm_stats_header), 76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 78 sizeof(kvm_vm_stats_desc), 79 }; 80 81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 82 KVM_GENERIC_VCPU_STATS(), 83 STATS_DESC_COUNTER(VCPU, exit_userspace), 84 STATS_DESC_COUNTER(VCPU, exit_null), 85 STATS_DESC_COUNTER(VCPU, exit_external_request), 86 STATS_DESC_COUNTER(VCPU, exit_io_request), 87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 88 STATS_DESC_COUNTER(VCPU, exit_stop_request), 89 STATS_DESC_COUNTER(VCPU, exit_validity), 90 STATS_DESC_COUNTER(VCPU, exit_instruction), 91 STATS_DESC_COUNTER(VCPU, exit_pei), 92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 93 STATS_DESC_COUNTER(VCPU, instruction_lctl), 94 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 95 STATS_DESC_COUNTER(VCPU, instruction_stctl), 96 STATS_DESC_COUNTER(VCPU, instruction_stctg), 97 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 99 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 100 STATS_DESC_COUNTER(VCPU, deliver_ckc), 101 STATS_DESC_COUNTER(VCPU, deliver_cputm), 102 STATS_DESC_COUNTER(VCPU, deliver_external_call), 103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 104 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_virtio), 106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 109 STATS_DESC_COUNTER(VCPU, deliver_program), 110 STATS_DESC_COUNTER(VCPU, deliver_io), 111 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 112 STATS_DESC_COUNTER(VCPU, exit_wait_state), 113 STATS_DESC_COUNTER(VCPU, inject_ckc), 114 STATS_DESC_COUNTER(VCPU, inject_cputm), 115 STATS_DESC_COUNTER(VCPU, inject_external_call), 116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 117 STATS_DESC_COUNTER(VCPU, inject_mchk), 118 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 119 STATS_DESC_COUNTER(VCPU, inject_program), 120 STATS_DESC_COUNTER(VCPU, inject_restart), 121 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 122 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 123 STATS_DESC_COUNTER(VCPU, instruction_epsw), 124 STATS_DESC_COUNTER(VCPU, instruction_gs), 125 STATS_DESC_COUNTER(VCPU, instruction_io_other), 126 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 127 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 128 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 129 STATS_DESC_COUNTER(VCPU, instruction_ptff), 130 STATS_DESC_COUNTER(VCPU, instruction_sck), 131 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 132 STATS_DESC_COUNTER(VCPU, instruction_stidp), 133 STATS_DESC_COUNTER(VCPU, instruction_spx), 134 STATS_DESC_COUNTER(VCPU, instruction_stpx), 135 STATS_DESC_COUNTER(VCPU, instruction_stap), 136 STATS_DESC_COUNTER(VCPU, instruction_iske), 137 STATS_DESC_COUNTER(VCPU, instruction_ri), 138 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 139 STATS_DESC_COUNTER(VCPU, instruction_sske), 140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 141 STATS_DESC_COUNTER(VCPU, instruction_stsi), 142 STATS_DESC_COUNTER(VCPU, instruction_stfl), 143 STATS_DESC_COUNTER(VCPU, instruction_tb), 144 STATS_DESC_COUNTER(VCPU, instruction_tpi), 145 STATS_DESC_COUNTER(VCPU, instruction_tprot), 146 STATS_DESC_COUNTER(VCPU, instruction_tsch), 147 STATS_DESC_COUNTER(VCPU, instruction_sie), 148 STATS_DESC_COUNTER(VCPU, instruction_essa), 149 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 170 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 175 STATS_DESC_COUNTER(VCPU, pfault_sync) 176 }; 177 178 const struct kvm_stats_header kvm_vcpu_stats_header = { 179 .name_size = KVM_STATS_NAME_SIZE, 180 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 181 .id_offset = sizeof(struct kvm_stats_header), 182 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 183 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 184 sizeof(kvm_vcpu_stats_desc), 185 }; 186 187 /* allow nested virtualization in KVM (if enabled by user space) */ 188 static int nested; 189 module_param(nested, int, S_IRUGO); 190 MODULE_PARM_DESC(nested, "Nested virtualization support"); 191 192 /* allow 1m huge page guest backing, if !nested */ 193 static int hpage; 194 module_param(hpage, int, 0444); 195 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 196 197 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 198 static u8 halt_poll_max_steal = 10; 199 module_param(halt_poll_max_steal, byte, 0644); 200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 201 202 /* if set to true, the GISA will be initialized and used if available */ 203 static bool use_gisa = true; 204 module_param(use_gisa, bool, 0644); 205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 206 207 /* maximum diag9c forwarding per second */ 208 unsigned int diag9c_forwarding_hz; 209 module_param(diag9c_forwarding_hz, uint, 0644); 210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 211 212 /* 213 * For now we handle at most 16 double words as this is what the s390 base 214 * kernel handles and stores in the prefix page. If we ever need to go beyond 215 * this, this requires changes to code, but the external uapi can stay. 216 */ 217 #define SIZE_INTERNAL 16 218 219 /* 220 * Base feature mask that defines default mask for facilities. Consists of the 221 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 222 */ 223 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 224 /* 225 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 226 * and defines the facilities that can be enabled via a cpu model. 227 */ 228 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 229 230 static unsigned long kvm_s390_fac_size(void) 231 { 232 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 233 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 234 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 235 sizeof(stfle_fac_list)); 236 237 return SIZE_INTERNAL; 238 } 239 240 /* available cpu features supported by kvm */ 241 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 242 /* available subfunctions indicated via query / "test bit" */ 243 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 244 245 static struct gmap_notifier gmap_notifier; 246 static struct gmap_notifier vsie_gmap_notifier; 247 debug_info_t *kvm_s390_dbf; 248 debug_info_t *kvm_s390_dbf_uv; 249 250 /* Section: not file related */ 251 int kvm_arch_hardware_enable(void) 252 { 253 /* every s390 is virtualization enabled ;-) */ 254 return 0; 255 } 256 257 int kvm_arch_check_processor_compat(void *opaque) 258 { 259 return 0; 260 } 261 262 /* forward declarations */ 263 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 264 unsigned long end); 265 static int sca_switch_to_extended(struct kvm *kvm); 266 267 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 268 { 269 u8 delta_idx = 0; 270 271 /* 272 * The TOD jumps by delta, we have to compensate this by adding 273 * -delta to the epoch. 274 */ 275 delta = -delta; 276 277 /* sign-extension - we're adding to signed values below */ 278 if ((s64)delta < 0) 279 delta_idx = -1; 280 281 scb->epoch += delta; 282 if (scb->ecd & ECD_MEF) { 283 scb->epdx += delta_idx; 284 if (scb->epoch < delta) 285 scb->epdx += 1; 286 } 287 } 288 289 /* 290 * This callback is executed during stop_machine(). All CPUs are therefore 291 * temporarily stopped. In order not to change guest behavior, we have to 292 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 293 * so a CPU won't be stopped while calculating with the epoch. 294 */ 295 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 296 void *v) 297 { 298 struct kvm *kvm; 299 struct kvm_vcpu *vcpu; 300 unsigned long i; 301 unsigned long long *delta = v; 302 303 list_for_each_entry(kvm, &vm_list, vm_list) { 304 kvm_for_each_vcpu(i, vcpu, kvm) { 305 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 306 if (i == 0) { 307 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 308 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 309 } 310 if (vcpu->arch.cputm_enabled) 311 vcpu->arch.cputm_start += *delta; 312 if (vcpu->arch.vsie_block) 313 kvm_clock_sync_scb(vcpu->arch.vsie_block, 314 *delta); 315 } 316 } 317 return NOTIFY_OK; 318 } 319 320 static struct notifier_block kvm_clock_notifier = { 321 .notifier_call = kvm_clock_sync, 322 }; 323 324 int kvm_arch_hardware_setup(void *opaque) 325 { 326 gmap_notifier.notifier_call = kvm_gmap_notifier; 327 gmap_register_pte_notifier(&gmap_notifier); 328 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 329 gmap_register_pte_notifier(&vsie_gmap_notifier); 330 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 331 &kvm_clock_notifier); 332 return 0; 333 } 334 335 void kvm_arch_hardware_unsetup(void) 336 { 337 gmap_unregister_pte_notifier(&gmap_notifier); 338 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 339 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 340 &kvm_clock_notifier); 341 } 342 343 static void allow_cpu_feat(unsigned long nr) 344 { 345 set_bit_inv(nr, kvm_s390_available_cpu_feat); 346 } 347 348 static inline int plo_test_bit(unsigned char nr) 349 { 350 unsigned long function = (unsigned long)nr | 0x100; 351 int cc; 352 353 asm volatile( 354 " lgr 0,%[function]\n" 355 /* Parameter registers are ignored for "test bit" */ 356 " plo 0,0,0,0(0)\n" 357 " ipm %0\n" 358 " srl %0,28\n" 359 : "=d" (cc) 360 : [function] "d" (function) 361 : "cc", "0"); 362 return cc == 0; 363 } 364 365 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 366 { 367 asm volatile( 368 " lghi 0,0\n" 369 " lgr 1,%[query]\n" 370 /* Parameter registers are ignored */ 371 " .insn rrf,%[opc] << 16,2,4,6,0\n" 372 : 373 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 374 : "cc", "memory", "0", "1"); 375 } 376 377 #define INSN_SORTL 0xb938 378 #define INSN_DFLTCC 0xb939 379 380 static void kvm_s390_cpu_feat_init(void) 381 { 382 int i; 383 384 for (i = 0; i < 256; ++i) { 385 if (plo_test_bit(i)) 386 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 387 } 388 389 if (test_facility(28)) /* TOD-clock steering */ 390 ptff(kvm_s390_available_subfunc.ptff, 391 sizeof(kvm_s390_available_subfunc.ptff), 392 PTFF_QAF); 393 394 if (test_facility(17)) { /* MSA */ 395 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 396 kvm_s390_available_subfunc.kmac); 397 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 398 kvm_s390_available_subfunc.kmc); 399 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 400 kvm_s390_available_subfunc.km); 401 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 402 kvm_s390_available_subfunc.kimd); 403 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 404 kvm_s390_available_subfunc.klmd); 405 } 406 if (test_facility(76)) /* MSA3 */ 407 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 408 kvm_s390_available_subfunc.pckmo); 409 if (test_facility(77)) { /* MSA4 */ 410 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 411 kvm_s390_available_subfunc.kmctr); 412 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 413 kvm_s390_available_subfunc.kmf); 414 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 415 kvm_s390_available_subfunc.kmo); 416 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 417 kvm_s390_available_subfunc.pcc); 418 } 419 if (test_facility(57)) /* MSA5 */ 420 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 421 kvm_s390_available_subfunc.ppno); 422 423 if (test_facility(146)) /* MSA8 */ 424 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 425 kvm_s390_available_subfunc.kma); 426 427 if (test_facility(155)) /* MSA9 */ 428 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 429 kvm_s390_available_subfunc.kdsa); 430 431 if (test_facility(150)) /* SORTL */ 432 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 433 434 if (test_facility(151)) /* DFLTCC */ 435 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 436 437 if (MACHINE_HAS_ESOP) 438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 439 /* 440 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 441 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 442 */ 443 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 444 !test_facility(3) || !nested) 445 return; 446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 447 if (sclp.has_64bscao) 448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 449 if (sclp.has_siif) 450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 451 if (sclp.has_gpere) 452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 453 if (sclp.has_gsls) 454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 455 if (sclp.has_ib) 456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 457 if (sclp.has_cei) 458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 459 if (sclp.has_ibs) 460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 461 if (sclp.has_kss) 462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 463 /* 464 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 465 * all skey handling functions read/set the skey from the PGSTE 466 * instead of the real storage key. 467 * 468 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 469 * pages being detected as preserved although they are resident. 470 * 471 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 472 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 473 * 474 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 475 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 476 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 477 * 478 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 479 * cannot easily shadow the SCA because of the ipte lock. 480 */ 481 } 482 483 int kvm_arch_init(void *opaque) 484 { 485 int rc = -ENOMEM; 486 487 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 488 if (!kvm_s390_dbf) 489 return -ENOMEM; 490 491 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 492 if (!kvm_s390_dbf_uv) 493 goto out; 494 495 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 496 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 497 goto out; 498 499 kvm_s390_cpu_feat_init(); 500 501 /* Register floating interrupt controller interface. */ 502 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 503 if (rc) { 504 pr_err("A FLIC registration call failed with rc=%d\n", rc); 505 goto out; 506 } 507 508 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { 509 rc = kvm_s390_pci_init(); 510 if (rc) { 511 pr_err("Unable to allocate AIFT for PCI\n"); 512 goto out; 513 } 514 } 515 516 rc = kvm_s390_gib_init(GAL_ISC); 517 if (rc) 518 goto out; 519 520 return 0; 521 522 out: 523 kvm_arch_exit(); 524 return rc; 525 } 526 527 void kvm_arch_exit(void) 528 { 529 kvm_s390_gib_destroy(); 530 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 531 kvm_s390_pci_exit(); 532 debug_unregister(kvm_s390_dbf); 533 debug_unregister(kvm_s390_dbf_uv); 534 } 535 536 /* Section: device related */ 537 long kvm_arch_dev_ioctl(struct file *filp, 538 unsigned int ioctl, unsigned long arg) 539 { 540 if (ioctl == KVM_S390_ENABLE_SIE) 541 return s390_enable_sie(); 542 return -EINVAL; 543 } 544 545 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 546 { 547 int r; 548 549 switch (ext) { 550 case KVM_CAP_S390_PSW: 551 case KVM_CAP_S390_GMAP: 552 case KVM_CAP_SYNC_MMU: 553 #ifdef CONFIG_KVM_S390_UCONTROL 554 case KVM_CAP_S390_UCONTROL: 555 #endif 556 case KVM_CAP_ASYNC_PF: 557 case KVM_CAP_SYNC_REGS: 558 case KVM_CAP_ONE_REG: 559 case KVM_CAP_ENABLE_CAP: 560 case KVM_CAP_S390_CSS_SUPPORT: 561 case KVM_CAP_IOEVENTFD: 562 case KVM_CAP_DEVICE_CTRL: 563 case KVM_CAP_S390_IRQCHIP: 564 case KVM_CAP_VM_ATTRIBUTES: 565 case KVM_CAP_MP_STATE: 566 case KVM_CAP_IMMEDIATE_EXIT: 567 case KVM_CAP_S390_INJECT_IRQ: 568 case KVM_CAP_S390_USER_SIGP: 569 case KVM_CAP_S390_USER_STSI: 570 case KVM_CAP_S390_SKEYS: 571 case KVM_CAP_S390_IRQ_STATE: 572 case KVM_CAP_S390_USER_INSTR0: 573 case KVM_CAP_S390_CMMA_MIGRATION: 574 case KVM_CAP_S390_AIS: 575 case KVM_CAP_S390_AIS_MIGRATION: 576 case KVM_CAP_S390_VCPU_RESETS: 577 case KVM_CAP_SET_GUEST_DEBUG: 578 case KVM_CAP_S390_DIAG318: 579 case KVM_CAP_S390_MEM_OP_EXTENSION: 580 r = 1; 581 break; 582 case KVM_CAP_SET_GUEST_DEBUG2: 583 r = KVM_GUESTDBG_VALID_MASK; 584 break; 585 case KVM_CAP_S390_HPAGE_1M: 586 r = 0; 587 if (hpage && !kvm_is_ucontrol(kvm)) 588 r = 1; 589 break; 590 case KVM_CAP_S390_MEM_OP: 591 r = MEM_OP_MAX_SIZE; 592 break; 593 case KVM_CAP_NR_VCPUS: 594 case KVM_CAP_MAX_VCPUS: 595 case KVM_CAP_MAX_VCPU_ID: 596 r = KVM_S390_BSCA_CPU_SLOTS; 597 if (!kvm_s390_use_sca_entries()) 598 r = KVM_MAX_VCPUS; 599 else if (sclp.has_esca && sclp.has_64bscao) 600 r = KVM_S390_ESCA_CPU_SLOTS; 601 if (ext == KVM_CAP_NR_VCPUS) 602 r = min_t(unsigned int, num_online_cpus(), r); 603 break; 604 case KVM_CAP_S390_COW: 605 r = MACHINE_HAS_ESOP; 606 break; 607 case KVM_CAP_S390_VECTOR_REGISTERS: 608 r = MACHINE_HAS_VX; 609 break; 610 case KVM_CAP_S390_RI: 611 r = test_facility(64); 612 break; 613 case KVM_CAP_S390_GS: 614 r = test_facility(133); 615 break; 616 case KVM_CAP_S390_BPB: 617 r = test_facility(82); 618 break; 619 case KVM_CAP_S390_PROTECTED: 620 r = is_prot_virt_host(); 621 break; 622 case KVM_CAP_S390_PROTECTED_DUMP: { 623 u64 pv_cmds_dump[] = { 624 BIT_UVC_CMD_DUMP_INIT, 625 BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE, 626 BIT_UVC_CMD_DUMP_CPU, 627 BIT_UVC_CMD_DUMP_COMPLETE, 628 }; 629 int i; 630 631 r = is_prot_virt_host(); 632 633 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) { 634 if (!test_bit_inv(pv_cmds_dump[i], 635 (unsigned long *)&uv_info.inst_calls_list)) { 636 r = 0; 637 break; 638 } 639 } 640 break; 641 } 642 case KVM_CAP_S390_ZPCI_OP: 643 r = kvm_s390_pci_interp_allowed(); 644 break; 645 case KVM_CAP_S390_CPU_TOPOLOGY: 646 r = test_facility(11); 647 break; 648 default: 649 r = 0; 650 } 651 return r; 652 } 653 654 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 655 { 656 int i; 657 gfn_t cur_gfn, last_gfn; 658 unsigned long gaddr, vmaddr; 659 struct gmap *gmap = kvm->arch.gmap; 660 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 661 662 /* Loop over all guest segments */ 663 cur_gfn = memslot->base_gfn; 664 last_gfn = memslot->base_gfn + memslot->npages; 665 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 666 gaddr = gfn_to_gpa(cur_gfn); 667 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 668 if (kvm_is_error_hva(vmaddr)) 669 continue; 670 671 bitmap_zero(bitmap, _PAGE_ENTRIES); 672 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 673 for (i = 0; i < _PAGE_ENTRIES; i++) { 674 if (test_bit(i, bitmap)) 675 mark_page_dirty(kvm, cur_gfn + i); 676 } 677 678 if (fatal_signal_pending(current)) 679 return; 680 cond_resched(); 681 } 682 } 683 684 /* Section: vm related */ 685 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 686 687 /* 688 * Get (and clear) the dirty memory log for a memory slot. 689 */ 690 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 691 struct kvm_dirty_log *log) 692 { 693 int r; 694 unsigned long n; 695 struct kvm_memory_slot *memslot; 696 int is_dirty; 697 698 if (kvm_is_ucontrol(kvm)) 699 return -EINVAL; 700 701 mutex_lock(&kvm->slots_lock); 702 703 r = -EINVAL; 704 if (log->slot >= KVM_USER_MEM_SLOTS) 705 goto out; 706 707 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 708 if (r) 709 goto out; 710 711 /* Clear the dirty log */ 712 if (is_dirty) { 713 n = kvm_dirty_bitmap_bytes(memslot); 714 memset(memslot->dirty_bitmap, 0, n); 715 } 716 r = 0; 717 out: 718 mutex_unlock(&kvm->slots_lock); 719 return r; 720 } 721 722 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 723 { 724 unsigned long i; 725 struct kvm_vcpu *vcpu; 726 727 kvm_for_each_vcpu(i, vcpu, kvm) { 728 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 729 } 730 } 731 732 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 733 { 734 int r; 735 736 if (cap->flags) 737 return -EINVAL; 738 739 switch (cap->cap) { 740 case KVM_CAP_S390_IRQCHIP: 741 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 742 kvm->arch.use_irqchip = 1; 743 r = 0; 744 break; 745 case KVM_CAP_S390_USER_SIGP: 746 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 747 kvm->arch.user_sigp = 1; 748 r = 0; 749 break; 750 case KVM_CAP_S390_VECTOR_REGISTERS: 751 mutex_lock(&kvm->lock); 752 if (kvm->created_vcpus) { 753 r = -EBUSY; 754 } else if (MACHINE_HAS_VX) { 755 set_kvm_facility(kvm->arch.model.fac_mask, 129); 756 set_kvm_facility(kvm->arch.model.fac_list, 129); 757 if (test_facility(134)) { 758 set_kvm_facility(kvm->arch.model.fac_mask, 134); 759 set_kvm_facility(kvm->arch.model.fac_list, 134); 760 } 761 if (test_facility(135)) { 762 set_kvm_facility(kvm->arch.model.fac_mask, 135); 763 set_kvm_facility(kvm->arch.model.fac_list, 135); 764 } 765 if (test_facility(148)) { 766 set_kvm_facility(kvm->arch.model.fac_mask, 148); 767 set_kvm_facility(kvm->arch.model.fac_list, 148); 768 } 769 if (test_facility(152)) { 770 set_kvm_facility(kvm->arch.model.fac_mask, 152); 771 set_kvm_facility(kvm->arch.model.fac_list, 152); 772 } 773 if (test_facility(192)) { 774 set_kvm_facility(kvm->arch.model.fac_mask, 192); 775 set_kvm_facility(kvm->arch.model.fac_list, 192); 776 } 777 r = 0; 778 } else 779 r = -EINVAL; 780 mutex_unlock(&kvm->lock); 781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 782 r ? "(not available)" : "(success)"); 783 break; 784 case KVM_CAP_S390_RI: 785 r = -EINVAL; 786 mutex_lock(&kvm->lock); 787 if (kvm->created_vcpus) { 788 r = -EBUSY; 789 } else if (test_facility(64)) { 790 set_kvm_facility(kvm->arch.model.fac_mask, 64); 791 set_kvm_facility(kvm->arch.model.fac_list, 64); 792 r = 0; 793 } 794 mutex_unlock(&kvm->lock); 795 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 796 r ? "(not available)" : "(success)"); 797 break; 798 case KVM_CAP_S390_AIS: 799 mutex_lock(&kvm->lock); 800 if (kvm->created_vcpus) { 801 r = -EBUSY; 802 } else { 803 set_kvm_facility(kvm->arch.model.fac_mask, 72); 804 set_kvm_facility(kvm->arch.model.fac_list, 72); 805 r = 0; 806 } 807 mutex_unlock(&kvm->lock); 808 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 809 r ? "(not available)" : "(success)"); 810 break; 811 case KVM_CAP_S390_GS: 812 r = -EINVAL; 813 mutex_lock(&kvm->lock); 814 if (kvm->created_vcpus) { 815 r = -EBUSY; 816 } else if (test_facility(133)) { 817 set_kvm_facility(kvm->arch.model.fac_mask, 133); 818 set_kvm_facility(kvm->arch.model.fac_list, 133); 819 r = 0; 820 } 821 mutex_unlock(&kvm->lock); 822 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 823 r ? "(not available)" : "(success)"); 824 break; 825 case KVM_CAP_S390_HPAGE_1M: 826 mutex_lock(&kvm->lock); 827 if (kvm->created_vcpus) 828 r = -EBUSY; 829 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 830 r = -EINVAL; 831 else { 832 r = 0; 833 mmap_write_lock(kvm->mm); 834 kvm->mm->context.allow_gmap_hpage_1m = 1; 835 mmap_write_unlock(kvm->mm); 836 /* 837 * We might have to create fake 4k page 838 * tables. To avoid that the hardware works on 839 * stale PGSTEs, we emulate these instructions. 840 */ 841 kvm->arch.use_skf = 0; 842 kvm->arch.use_pfmfi = 0; 843 } 844 mutex_unlock(&kvm->lock); 845 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 846 r ? "(not available)" : "(success)"); 847 break; 848 case KVM_CAP_S390_USER_STSI: 849 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 850 kvm->arch.user_stsi = 1; 851 r = 0; 852 break; 853 case KVM_CAP_S390_USER_INSTR0: 854 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 855 kvm->arch.user_instr0 = 1; 856 icpt_operexc_on_all_vcpus(kvm); 857 r = 0; 858 break; 859 case KVM_CAP_S390_CPU_TOPOLOGY: 860 r = -EINVAL; 861 mutex_lock(&kvm->lock); 862 if (kvm->created_vcpus) { 863 r = -EBUSY; 864 } else if (test_facility(11)) { 865 set_kvm_facility(kvm->arch.model.fac_mask, 11); 866 set_kvm_facility(kvm->arch.model.fac_list, 11); 867 r = 0; 868 } 869 mutex_unlock(&kvm->lock); 870 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s", 871 r ? "(not available)" : "(success)"); 872 break; 873 default: 874 r = -EINVAL; 875 break; 876 } 877 return r; 878 } 879 880 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 881 { 882 int ret; 883 884 switch (attr->attr) { 885 case KVM_S390_VM_MEM_LIMIT_SIZE: 886 ret = 0; 887 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 888 kvm->arch.mem_limit); 889 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 890 ret = -EFAULT; 891 break; 892 default: 893 ret = -ENXIO; 894 break; 895 } 896 return ret; 897 } 898 899 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 900 { 901 int ret; 902 unsigned int idx; 903 switch (attr->attr) { 904 case KVM_S390_VM_MEM_ENABLE_CMMA: 905 ret = -ENXIO; 906 if (!sclp.has_cmma) 907 break; 908 909 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 910 mutex_lock(&kvm->lock); 911 if (kvm->created_vcpus) 912 ret = -EBUSY; 913 else if (kvm->mm->context.allow_gmap_hpage_1m) 914 ret = -EINVAL; 915 else { 916 kvm->arch.use_cmma = 1; 917 /* Not compatible with cmma. */ 918 kvm->arch.use_pfmfi = 0; 919 ret = 0; 920 } 921 mutex_unlock(&kvm->lock); 922 break; 923 case KVM_S390_VM_MEM_CLR_CMMA: 924 ret = -ENXIO; 925 if (!sclp.has_cmma) 926 break; 927 ret = -EINVAL; 928 if (!kvm->arch.use_cmma) 929 break; 930 931 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 932 mutex_lock(&kvm->lock); 933 idx = srcu_read_lock(&kvm->srcu); 934 s390_reset_cmma(kvm->arch.gmap->mm); 935 srcu_read_unlock(&kvm->srcu, idx); 936 mutex_unlock(&kvm->lock); 937 ret = 0; 938 break; 939 case KVM_S390_VM_MEM_LIMIT_SIZE: { 940 unsigned long new_limit; 941 942 if (kvm_is_ucontrol(kvm)) 943 return -EINVAL; 944 945 if (get_user(new_limit, (u64 __user *)attr->addr)) 946 return -EFAULT; 947 948 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 949 new_limit > kvm->arch.mem_limit) 950 return -E2BIG; 951 952 if (!new_limit) 953 return -EINVAL; 954 955 /* gmap_create takes last usable address */ 956 if (new_limit != KVM_S390_NO_MEM_LIMIT) 957 new_limit -= 1; 958 959 ret = -EBUSY; 960 mutex_lock(&kvm->lock); 961 if (!kvm->created_vcpus) { 962 /* gmap_create will round the limit up */ 963 struct gmap *new = gmap_create(current->mm, new_limit); 964 965 if (!new) { 966 ret = -ENOMEM; 967 } else { 968 gmap_remove(kvm->arch.gmap); 969 new->private = kvm; 970 kvm->arch.gmap = new; 971 ret = 0; 972 } 973 } 974 mutex_unlock(&kvm->lock); 975 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 976 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 977 (void *) kvm->arch.gmap->asce); 978 break; 979 } 980 default: 981 ret = -ENXIO; 982 break; 983 } 984 return ret; 985 } 986 987 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 988 989 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 990 { 991 struct kvm_vcpu *vcpu; 992 unsigned long i; 993 994 kvm_s390_vcpu_block_all(kvm); 995 996 kvm_for_each_vcpu(i, vcpu, kvm) { 997 kvm_s390_vcpu_crypto_setup(vcpu); 998 /* recreate the shadow crycb by leaving the VSIE handler */ 999 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 1000 } 1001 1002 kvm_s390_vcpu_unblock_all(kvm); 1003 } 1004 1005 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 1006 { 1007 mutex_lock(&kvm->lock); 1008 switch (attr->attr) { 1009 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1010 if (!test_kvm_facility(kvm, 76)) { 1011 mutex_unlock(&kvm->lock); 1012 return -EINVAL; 1013 } 1014 get_random_bytes( 1015 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 1016 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1017 kvm->arch.crypto.aes_kw = 1; 1018 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 1019 break; 1020 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1021 if (!test_kvm_facility(kvm, 76)) { 1022 mutex_unlock(&kvm->lock); 1023 return -EINVAL; 1024 } 1025 get_random_bytes( 1026 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1027 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1028 kvm->arch.crypto.dea_kw = 1; 1029 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 1030 break; 1031 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1032 if (!test_kvm_facility(kvm, 76)) { 1033 mutex_unlock(&kvm->lock); 1034 return -EINVAL; 1035 } 1036 kvm->arch.crypto.aes_kw = 0; 1037 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 1038 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1039 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 1040 break; 1041 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1042 if (!test_kvm_facility(kvm, 76)) { 1043 mutex_unlock(&kvm->lock); 1044 return -EINVAL; 1045 } 1046 kvm->arch.crypto.dea_kw = 0; 1047 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 1048 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1049 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 1050 break; 1051 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1052 if (!ap_instructions_available()) { 1053 mutex_unlock(&kvm->lock); 1054 return -EOPNOTSUPP; 1055 } 1056 kvm->arch.crypto.apie = 1; 1057 break; 1058 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1059 if (!ap_instructions_available()) { 1060 mutex_unlock(&kvm->lock); 1061 return -EOPNOTSUPP; 1062 } 1063 kvm->arch.crypto.apie = 0; 1064 break; 1065 default: 1066 mutex_unlock(&kvm->lock); 1067 return -ENXIO; 1068 } 1069 1070 kvm_s390_vcpu_crypto_reset_all(kvm); 1071 mutex_unlock(&kvm->lock); 1072 return 0; 1073 } 1074 1075 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu) 1076 { 1077 /* Only set the ECB bits after guest requests zPCI interpretation */ 1078 if (!vcpu->kvm->arch.use_zpci_interp) 1079 return; 1080 1081 vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI; 1082 vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI; 1083 } 1084 1085 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm) 1086 { 1087 struct kvm_vcpu *vcpu; 1088 unsigned long i; 1089 1090 lockdep_assert_held(&kvm->lock); 1091 1092 if (!kvm_s390_pci_interp_allowed()) 1093 return; 1094 1095 /* 1096 * If host is configured for PCI and the necessary facilities are 1097 * available, turn on interpretation for the life of this guest 1098 */ 1099 kvm->arch.use_zpci_interp = 1; 1100 1101 kvm_s390_vcpu_block_all(kvm); 1102 1103 kvm_for_each_vcpu(i, vcpu, kvm) { 1104 kvm_s390_vcpu_pci_setup(vcpu); 1105 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 1106 } 1107 1108 kvm_s390_vcpu_unblock_all(kvm); 1109 } 1110 1111 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1112 { 1113 unsigned long cx; 1114 struct kvm_vcpu *vcpu; 1115 1116 kvm_for_each_vcpu(cx, vcpu, kvm) 1117 kvm_s390_sync_request(req, vcpu); 1118 } 1119 1120 /* 1121 * Must be called with kvm->srcu held to avoid races on memslots, and with 1122 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1123 */ 1124 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1125 { 1126 struct kvm_memory_slot *ms; 1127 struct kvm_memslots *slots; 1128 unsigned long ram_pages = 0; 1129 int bkt; 1130 1131 /* migration mode already enabled */ 1132 if (kvm->arch.migration_mode) 1133 return 0; 1134 slots = kvm_memslots(kvm); 1135 if (!slots || kvm_memslots_empty(slots)) 1136 return -EINVAL; 1137 1138 if (!kvm->arch.use_cmma) { 1139 kvm->arch.migration_mode = 1; 1140 return 0; 1141 } 1142 /* mark all the pages in active slots as dirty */ 1143 kvm_for_each_memslot(ms, bkt, slots) { 1144 if (!ms->dirty_bitmap) 1145 return -EINVAL; 1146 /* 1147 * The second half of the bitmap is only used on x86, 1148 * and would be wasted otherwise, so we put it to good 1149 * use here to keep track of the state of the storage 1150 * attributes. 1151 */ 1152 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1153 ram_pages += ms->npages; 1154 } 1155 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1156 kvm->arch.migration_mode = 1; 1157 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1158 return 0; 1159 } 1160 1161 /* 1162 * Must be called with kvm->slots_lock to avoid races with ourselves and 1163 * kvm_s390_vm_start_migration. 1164 */ 1165 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1166 { 1167 /* migration mode already disabled */ 1168 if (!kvm->arch.migration_mode) 1169 return 0; 1170 kvm->arch.migration_mode = 0; 1171 if (kvm->arch.use_cmma) 1172 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1173 return 0; 1174 } 1175 1176 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1177 struct kvm_device_attr *attr) 1178 { 1179 int res = -ENXIO; 1180 1181 mutex_lock(&kvm->slots_lock); 1182 switch (attr->attr) { 1183 case KVM_S390_VM_MIGRATION_START: 1184 res = kvm_s390_vm_start_migration(kvm); 1185 break; 1186 case KVM_S390_VM_MIGRATION_STOP: 1187 res = kvm_s390_vm_stop_migration(kvm); 1188 break; 1189 default: 1190 break; 1191 } 1192 mutex_unlock(&kvm->slots_lock); 1193 1194 return res; 1195 } 1196 1197 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1198 struct kvm_device_attr *attr) 1199 { 1200 u64 mig = kvm->arch.migration_mode; 1201 1202 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1203 return -ENXIO; 1204 1205 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1206 return -EFAULT; 1207 return 0; 1208 } 1209 1210 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1211 { 1212 struct kvm_s390_vm_tod_clock gtod; 1213 1214 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1215 return -EFAULT; 1216 1217 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1218 return -EINVAL; 1219 kvm_s390_set_tod_clock(kvm, >od); 1220 1221 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1222 gtod.epoch_idx, gtod.tod); 1223 1224 return 0; 1225 } 1226 1227 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1228 { 1229 u8 gtod_high; 1230 1231 if (copy_from_user(>od_high, (void __user *)attr->addr, 1232 sizeof(gtod_high))) 1233 return -EFAULT; 1234 1235 if (gtod_high != 0) 1236 return -EINVAL; 1237 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1238 1239 return 0; 1240 } 1241 1242 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1243 { 1244 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1245 1246 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1247 sizeof(gtod.tod))) 1248 return -EFAULT; 1249 1250 kvm_s390_set_tod_clock(kvm, >od); 1251 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1252 return 0; 1253 } 1254 1255 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1256 { 1257 int ret; 1258 1259 if (attr->flags) 1260 return -EINVAL; 1261 1262 switch (attr->attr) { 1263 case KVM_S390_VM_TOD_EXT: 1264 ret = kvm_s390_set_tod_ext(kvm, attr); 1265 break; 1266 case KVM_S390_VM_TOD_HIGH: 1267 ret = kvm_s390_set_tod_high(kvm, attr); 1268 break; 1269 case KVM_S390_VM_TOD_LOW: 1270 ret = kvm_s390_set_tod_low(kvm, attr); 1271 break; 1272 default: 1273 ret = -ENXIO; 1274 break; 1275 } 1276 return ret; 1277 } 1278 1279 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1280 struct kvm_s390_vm_tod_clock *gtod) 1281 { 1282 union tod_clock clk; 1283 1284 preempt_disable(); 1285 1286 store_tod_clock_ext(&clk); 1287 1288 gtod->tod = clk.tod + kvm->arch.epoch; 1289 gtod->epoch_idx = 0; 1290 if (test_kvm_facility(kvm, 139)) { 1291 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1292 if (gtod->tod < clk.tod) 1293 gtod->epoch_idx += 1; 1294 } 1295 1296 preempt_enable(); 1297 } 1298 1299 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1300 { 1301 struct kvm_s390_vm_tod_clock gtod; 1302 1303 memset(>od, 0, sizeof(gtod)); 1304 kvm_s390_get_tod_clock(kvm, >od); 1305 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1306 return -EFAULT; 1307 1308 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1309 gtod.epoch_idx, gtod.tod); 1310 return 0; 1311 } 1312 1313 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1314 { 1315 u8 gtod_high = 0; 1316 1317 if (copy_to_user((void __user *)attr->addr, >od_high, 1318 sizeof(gtod_high))) 1319 return -EFAULT; 1320 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1321 1322 return 0; 1323 } 1324 1325 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1326 { 1327 u64 gtod; 1328 1329 gtod = kvm_s390_get_tod_clock_fast(kvm); 1330 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1331 return -EFAULT; 1332 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1333 1334 return 0; 1335 } 1336 1337 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1338 { 1339 int ret; 1340 1341 if (attr->flags) 1342 return -EINVAL; 1343 1344 switch (attr->attr) { 1345 case KVM_S390_VM_TOD_EXT: 1346 ret = kvm_s390_get_tod_ext(kvm, attr); 1347 break; 1348 case KVM_S390_VM_TOD_HIGH: 1349 ret = kvm_s390_get_tod_high(kvm, attr); 1350 break; 1351 case KVM_S390_VM_TOD_LOW: 1352 ret = kvm_s390_get_tod_low(kvm, attr); 1353 break; 1354 default: 1355 ret = -ENXIO; 1356 break; 1357 } 1358 return ret; 1359 } 1360 1361 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1362 { 1363 struct kvm_s390_vm_cpu_processor *proc; 1364 u16 lowest_ibc, unblocked_ibc; 1365 int ret = 0; 1366 1367 mutex_lock(&kvm->lock); 1368 if (kvm->created_vcpus) { 1369 ret = -EBUSY; 1370 goto out; 1371 } 1372 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1373 if (!proc) { 1374 ret = -ENOMEM; 1375 goto out; 1376 } 1377 if (!copy_from_user(proc, (void __user *)attr->addr, 1378 sizeof(*proc))) { 1379 kvm->arch.model.cpuid = proc->cpuid; 1380 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1381 unblocked_ibc = sclp.ibc & 0xfff; 1382 if (lowest_ibc && proc->ibc) { 1383 if (proc->ibc > unblocked_ibc) 1384 kvm->arch.model.ibc = unblocked_ibc; 1385 else if (proc->ibc < lowest_ibc) 1386 kvm->arch.model.ibc = lowest_ibc; 1387 else 1388 kvm->arch.model.ibc = proc->ibc; 1389 } 1390 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1391 S390_ARCH_FAC_LIST_SIZE_BYTE); 1392 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1393 kvm->arch.model.ibc, 1394 kvm->arch.model.cpuid); 1395 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1396 kvm->arch.model.fac_list[0], 1397 kvm->arch.model.fac_list[1], 1398 kvm->arch.model.fac_list[2]); 1399 } else 1400 ret = -EFAULT; 1401 kfree(proc); 1402 out: 1403 mutex_unlock(&kvm->lock); 1404 return ret; 1405 } 1406 1407 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1408 struct kvm_device_attr *attr) 1409 { 1410 struct kvm_s390_vm_cpu_feat data; 1411 1412 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1413 return -EFAULT; 1414 if (!bitmap_subset((unsigned long *) data.feat, 1415 kvm_s390_available_cpu_feat, 1416 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1417 return -EINVAL; 1418 1419 mutex_lock(&kvm->lock); 1420 if (kvm->created_vcpus) { 1421 mutex_unlock(&kvm->lock); 1422 return -EBUSY; 1423 } 1424 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1425 mutex_unlock(&kvm->lock); 1426 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1427 data.feat[0], 1428 data.feat[1], 1429 data.feat[2]); 1430 return 0; 1431 } 1432 1433 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1434 struct kvm_device_attr *attr) 1435 { 1436 mutex_lock(&kvm->lock); 1437 if (kvm->created_vcpus) { 1438 mutex_unlock(&kvm->lock); 1439 return -EBUSY; 1440 } 1441 1442 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1443 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1444 mutex_unlock(&kvm->lock); 1445 return -EFAULT; 1446 } 1447 mutex_unlock(&kvm->lock); 1448 1449 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1450 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1451 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1452 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1453 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1454 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1455 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1456 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1457 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1458 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1459 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1460 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1461 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1462 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1463 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1464 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1465 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1466 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1467 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1468 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1469 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1470 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1471 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1472 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1473 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1474 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1475 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1476 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1477 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1478 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1479 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1480 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1481 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1482 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1483 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1484 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1485 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1486 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1487 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1488 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1489 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1490 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1491 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1492 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1493 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1494 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1495 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1496 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1497 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1498 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1499 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1500 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1501 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1502 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1503 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1504 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1505 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1506 1507 return 0; 1508 } 1509 1510 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1511 { 1512 int ret = -ENXIO; 1513 1514 switch (attr->attr) { 1515 case KVM_S390_VM_CPU_PROCESSOR: 1516 ret = kvm_s390_set_processor(kvm, attr); 1517 break; 1518 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1519 ret = kvm_s390_set_processor_feat(kvm, attr); 1520 break; 1521 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1522 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1523 break; 1524 } 1525 return ret; 1526 } 1527 1528 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1529 { 1530 struct kvm_s390_vm_cpu_processor *proc; 1531 int ret = 0; 1532 1533 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1534 if (!proc) { 1535 ret = -ENOMEM; 1536 goto out; 1537 } 1538 proc->cpuid = kvm->arch.model.cpuid; 1539 proc->ibc = kvm->arch.model.ibc; 1540 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1541 S390_ARCH_FAC_LIST_SIZE_BYTE); 1542 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1543 kvm->arch.model.ibc, 1544 kvm->arch.model.cpuid); 1545 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1546 kvm->arch.model.fac_list[0], 1547 kvm->arch.model.fac_list[1], 1548 kvm->arch.model.fac_list[2]); 1549 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1550 ret = -EFAULT; 1551 kfree(proc); 1552 out: 1553 return ret; 1554 } 1555 1556 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1557 { 1558 struct kvm_s390_vm_cpu_machine *mach; 1559 int ret = 0; 1560 1561 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1562 if (!mach) { 1563 ret = -ENOMEM; 1564 goto out; 1565 } 1566 get_cpu_id((struct cpuid *) &mach->cpuid); 1567 mach->ibc = sclp.ibc; 1568 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1569 S390_ARCH_FAC_LIST_SIZE_BYTE); 1570 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1571 sizeof(stfle_fac_list)); 1572 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1573 kvm->arch.model.ibc, 1574 kvm->arch.model.cpuid); 1575 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1576 mach->fac_mask[0], 1577 mach->fac_mask[1], 1578 mach->fac_mask[2]); 1579 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1580 mach->fac_list[0], 1581 mach->fac_list[1], 1582 mach->fac_list[2]); 1583 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1584 ret = -EFAULT; 1585 kfree(mach); 1586 out: 1587 return ret; 1588 } 1589 1590 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1591 struct kvm_device_attr *attr) 1592 { 1593 struct kvm_s390_vm_cpu_feat data; 1594 1595 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1596 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1597 return -EFAULT; 1598 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1599 data.feat[0], 1600 data.feat[1], 1601 data.feat[2]); 1602 return 0; 1603 } 1604 1605 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1606 struct kvm_device_attr *attr) 1607 { 1608 struct kvm_s390_vm_cpu_feat data; 1609 1610 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1611 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1612 return -EFAULT; 1613 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1614 data.feat[0], 1615 data.feat[1], 1616 data.feat[2]); 1617 return 0; 1618 } 1619 1620 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1621 struct kvm_device_attr *attr) 1622 { 1623 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1624 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1625 return -EFAULT; 1626 1627 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1628 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1629 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1630 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1631 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1632 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1633 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1634 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1635 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1636 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1637 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1638 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1639 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1640 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1641 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1642 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1643 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1644 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1645 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1646 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1647 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1648 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1649 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1650 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1651 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1652 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1653 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1654 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1655 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1656 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1657 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1658 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1659 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1660 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1661 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1662 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1663 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1664 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1665 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1666 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1667 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1668 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1669 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1670 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1671 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1672 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1673 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1674 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1675 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1676 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1677 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1678 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1679 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1680 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1681 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1682 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1683 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1684 1685 return 0; 1686 } 1687 1688 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1689 struct kvm_device_attr *attr) 1690 { 1691 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1692 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1693 return -EFAULT; 1694 1695 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1696 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1697 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1698 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1699 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1700 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1701 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1702 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1703 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1704 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1705 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1706 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1707 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1708 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1709 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1710 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1711 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1712 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1713 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1714 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1715 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1716 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1717 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1718 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1719 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1720 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1721 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1722 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1723 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1724 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1725 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1726 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1727 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1728 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1729 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1730 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1731 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1732 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1733 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1734 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1735 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1736 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1737 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1738 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1739 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1740 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1741 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1742 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1743 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1744 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1745 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1746 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1747 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1748 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1749 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1750 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1751 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1752 1753 return 0; 1754 } 1755 1756 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1757 { 1758 int ret = -ENXIO; 1759 1760 switch (attr->attr) { 1761 case KVM_S390_VM_CPU_PROCESSOR: 1762 ret = kvm_s390_get_processor(kvm, attr); 1763 break; 1764 case KVM_S390_VM_CPU_MACHINE: 1765 ret = kvm_s390_get_machine(kvm, attr); 1766 break; 1767 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1768 ret = kvm_s390_get_processor_feat(kvm, attr); 1769 break; 1770 case KVM_S390_VM_CPU_MACHINE_FEAT: 1771 ret = kvm_s390_get_machine_feat(kvm, attr); 1772 break; 1773 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1774 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1775 break; 1776 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1777 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1778 break; 1779 } 1780 return ret; 1781 } 1782 1783 /** 1784 * kvm_s390_update_topology_change_report - update CPU topology change report 1785 * @kvm: guest KVM description 1786 * @val: set or clear the MTCR bit 1787 * 1788 * Updates the Multiprocessor Topology-Change-Report bit to signal 1789 * the guest with a topology change. 1790 * This is only relevant if the topology facility is present. 1791 * 1792 * The SCA version, bsca or esca, doesn't matter as offset is the same. 1793 */ 1794 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) 1795 { 1796 union sca_utility new, old; 1797 struct bsca_block *sca; 1798 1799 read_lock(&kvm->arch.sca_lock); 1800 sca = kvm->arch.sca; 1801 do { 1802 old = READ_ONCE(sca->utility); 1803 new = old; 1804 new.mtcr = val; 1805 } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val); 1806 read_unlock(&kvm->arch.sca_lock); 1807 } 1808 1809 static int kvm_s390_set_topo_change_indication(struct kvm *kvm, 1810 struct kvm_device_attr *attr) 1811 { 1812 if (!test_kvm_facility(kvm, 11)) 1813 return -ENXIO; 1814 1815 kvm_s390_update_topology_change_report(kvm, !!attr->attr); 1816 return 0; 1817 } 1818 1819 static int kvm_s390_get_topo_change_indication(struct kvm *kvm, 1820 struct kvm_device_attr *attr) 1821 { 1822 u8 topo; 1823 1824 if (!test_kvm_facility(kvm, 11)) 1825 return -ENXIO; 1826 1827 read_lock(&kvm->arch.sca_lock); 1828 topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr; 1829 read_unlock(&kvm->arch.sca_lock); 1830 1831 return put_user(topo, (u8 __user *)attr->addr); 1832 } 1833 1834 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1835 { 1836 int ret; 1837 1838 switch (attr->group) { 1839 case KVM_S390_VM_MEM_CTRL: 1840 ret = kvm_s390_set_mem_control(kvm, attr); 1841 break; 1842 case KVM_S390_VM_TOD: 1843 ret = kvm_s390_set_tod(kvm, attr); 1844 break; 1845 case KVM_S390_VM_CPU_MODEL: 1846 ret = kvm_s390_set_cpu_model(kvm, attr); 1847 break; 1848 case KVM_S390_VM_CRYPTO: 1849 ret = kvm_s390_vm_set_crypto(kvm, attr); 1850 break; 1851 case KVM_S390_VM_MIGRATION: 1852 ret = kvm_s390_vm_set_migration(kvm, attr); 1853 break; 1854 case KVM_S390_VM_CPU_TOPOLOGY: 1855 ret = kvm_s390_set_topo_change_indication(kvm, attr); 1856 break; 1857 default: 1858 ret = -ENXIO; 1859 break; 1860 } 1861 1862 return ret; 1863 } 1864 1865 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1866 { 1867 int ret; 1868 1869 switch (attr->group) { 1870 case KVM_S390_VM_MEM_CTRL: 1871 ret = kvm_s390_get_mem_control(kvm, attr); 1872 break; 1873 case KVM_S390_VM_TOD: 1874 ret = kvm_s390_get_tod(kvm, attr); 1875 break; 1876 case KVM_S390_VM_CPU_MODEL: 1877 ret = kvm_s390_get_cpu_model(kvm, attr); 1878 break; 1879 case KVM_S390_VM_MIGRATION: 1880 ret = kvm_s390_vm_get_migration(kvm, attr); 1881 break; 1882 case KVM_S390_VM_CPU_TOPOLOGY: 1883 ret = kvm_s390_get_topo_change_indication(kvm, attr); 1884 break; 1885 default: 1886 ret = -ENXIO; 1887 break; 1888 } 1889 1890 return ret; 1891 } 1892 1893 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1894 { 1895 int ret; 1896 1897 switch (attr->group) { 1898 case KVM_S390_VM_MEM_CTRL: 1899 switch (attr->attr) { 1900 case KVM_S390_VM_MEM_ENABLE_CMMA: 1901 case KVM_S390_VM_MEM_CLR_CMMA: 1902 ret = sclp.has_cmma ? 0 : -ENXIO; 1903 break; 1904 case KVM_S390_VM_MEM_LIMIT_SIZE: 1905 ret = 0; 1906 break; 1907 default: 1908 ret = -ENXIO; 1909 break; 1910 } 1911 break; 1912 case KVM_S390_VM_TOD: 1913 switch (attr->attr) { 1914 case KVM_S390_VM_TOD_LOW: 1915 case KVM_S390_VM_TOD_HIGH: 1916 ret = 0; 1917 break; 1918 default: 1919 ret = -ENXIO; 1920 break; 1921 } 1922 break; 1923 case KVM_S390_VM_CPU_MODEL: 1924 switch (attr->attr) { 1925 case KVM_S390_VM_CPU_PROCESSOR: 1926 case KVM_S390_VM_CPU_MACHINE: 1927 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1928 case KVM_S390_VM_CPU_MACHINE_FEAT: 1929 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1930 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1931 ret = 0; 1932 break; 1933 default: 1934 ret = -ENXIO; 1935 break; 1936 } 1937 break; 1938 case KVM_S390_VM_CRYPTO: 1939 switch (attr->attr) { 1940 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1941 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1942 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1943 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1944 ret = 0; 1945 break; 1946 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1947 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1948 ret = ap_instructions_available() ? 0 : -ENXIO; 1949 break; 1950 default: 1951 ret = -ENXIO; 1952 break; 1953 } 1954 break; 1955 case KVM_S390_VM_MIGRATION: 1956 ret = 0; 1957 break; 1958 case KVM_S390_VM_CPU_TOPOLOGY: 1959 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO; 1960 break; 1961 default: 1962 ret = -ENXIO; 1963 break; 1964 } 1965 1966 return ret; 1967 } 1968 1969 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1970 { 1971 uint8_t *keys; 1972 uint64_t hva; 1973 int srcu_idx, i, r = 0; 1974 1975 if (args->flags != 0) 1976 return -EINVAL; 1977 1978 /* Is this guest using storage keys? */ 1979 if (!mm_uses_skeys(current->mm)) 1980 return KVM_S390_GET_SKEYS_NONE; 1981 1982 /* Enforce sane limit on memory allocation */ 1983 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1984 return -EINVAL; 1985 1986 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1987 if (!keys) 1988 return -ENOMEM; 1989 1990 mmap_read_lock(current->mm); 1991 srcu_idx = srcu_read_lock(&kvm->srcu); 1992 for (i = 0; i < args->count; i++) { 1993 hva = gfn_to_hva(kvm, args->start_gfn + i); 1994 if (kvm_is_error_hva(hva)) { 1995 r = -EFAULT; 1996 break; 1997 } 1998 1999 r = get_guest_storage_key(current->mm, hva, &keys[i]); 2000 if (r) 2001 break; 2002 } 2003 srcu_read_unlock(&kvm->srcu, srcu_idx); 2004 mmap_read_unlock(current->mm); 2005 2006 if (!r) { 2007 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 2008 sizeof(uint8_t) * args->count); 2009 if (r) 2010 r = -EFAULT; 2011 } 2012 2013 kvfree(keys); 2014 return r; 2015 } 2016 2017 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 2018 { 2019 uint8_t *keys; 2020 uint64_t hva; 2021 int srcu_idx, i, r = 0; 2022 bool unlocked; 2023 2024 if (args->flags != 0) 2025 return -EINVAL; 2026 2027 /* Enforce sane limit on memory allocation */ 2028 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 2029 return -EINVAL; 2030 2031 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 2032 if (!keys) 2033 return -ENOMEM; 2034 2035 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 2036 sizeof(uint8_t) * args->count); 2037 if (r) { 2038 r = -EFAULT; 2039 goto out; 2040 } 2041 2042 /* Enable storage key handling for the guest */ 2043 r = s390_enable_skey(); 2044 if (r) 2045 goto out; 2046 2047 i = 0; 2048 mmap_read_lock(current->mm); 2049 srcu_idx = srcu_read_lock(&kvm->srcu); 2050 while (i < args->count) { 2051 unlocked = false; 2052 hva = gfn_to_hva(kvm, args->start_gfn + i); 2053 if (kvm_is_error_hva(hva)) { 2054 r = -EFAULT; 2055 break; 2056 } 2057 2058 /* Lowest order bit is reserved */ 2059 if (keys[i] & 0x01) { 2060 r = -EINVAL; 2061 break; 2062 } 2063 2064 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 2065 if (r) { 2066 r = fixup_user_fault(current->mm, hva, 2067 FAULT_FLAG_WRITE, &unlocked); 2068 if (r) 2069 break; 2070 } 2071 if (!r) 2072 i++; 2073 } 2074 srcu_read_unlock(&kvm->srcu, srcu_idx); 2075 mmap_read_unlock(current->mm); 2076 out: 2077 kvfree(keys); 2078 return r; 2079 } 2080 2081 /* 2082 * Base address and length must be sent at the start of each block, therefore 2083 * it's cheaper to send some clean data, as long as it's less than the size of 2084 * two longs. 2085 */ 2086 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 2087 /* for consistency */ 2088 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 2089 2090 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2091 u8 *res, unsigned long bufsize) 2092 { 2093 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 2094 2095 args->count = 0; 2096 while (args->count < bufsize) { 2097 hva = gfn_to_hva(kvm, cur_gfn); 2098 /* 2099 * We return an error if the first value was invalid, but we 2100 * return successfully if at least one value was copied. 2101 */ 2102 if (kvm_is_error_hva(hva)) 2103 return args->count ? 0 : -EFAULT; 2104 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2105 pgstev = 0; 2106 res[args->count++] = (pgstev >> 24) & 0x43; 2107 cur_gfn++; 2108 } 2109 2110 return 0; 2111 } 2112 2113 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 2114 gfn_t gfn) 2115 { 2116 return ____gfn_to_memslot(slots, gfn, true); 2117 } 2118 2119 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 2120 unsigned long cur_gfn) 2121 { 2122 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 2123 unsigned long ofs = cur_gfn - ms->base_gfn; 2124 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 2125 2126 if (ms->base_gfn + ms->npages <= cur_gfn) { 2127 mnode = rb_next(mnode); 2128 /* If we are above the highest slot, wrap around */ 2129 if (!mnode) 2130 mnode = rb_first(&slots->gfn_tree); 2131 2132 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2133 ofs = 0; 2134 } 2135 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 2136 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 2137 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2138 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); 2139 } 2140 return ms->base_gfn + ofs; 2141 } 2142 2143 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2144 u8 *res, unsigned long bufsize) 2145 { 2146 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2147 struct kvm_memslots *slots = kvm_memslots(kvm); 2148 struct kvm_memory_slot *ms; 2149 2150 if (unlikely(kvm_memslots_empty(slots))) 2151 return 0; 2152 2153 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2154 ms = gfn_to_memslot(kvm, cur_gfn); 2155 args->count = 0; 2156 args->start_gfn = cur_gfn; 2157 if (!ms) 2158 return 0; 2159 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2160 mem_end = kvm_s390_get_gfn_end(slots); 2161 2162 while (args->count < bufsize) { 2163 hva = gfn_to_hva(kvm, cur_gfn); 2164 if (kvm_is_error_hva(hva)) 2165 return 0; 2166 /* Decrement only if we actually flipped the bit to 0 */ 2167 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2168 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2169 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2170 pgstev = 0; 2171 /* Save the value */ 2172 res[args->count++] = (pgstev >> 24) & 0x43; 2173 /* If the next bit is too far away, stop. */ 2174 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2175 return 0; 2176 /* If we reached the previous "next", find the next one */ 2177 if (cur_gfn == next_gfn) 2178 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2179 /* Reached the end of memory or of the buffer, stop */ 2180 if ((next_gfn >= mem_end) || 2181 (next_gfn - args->start_gfn >= bufsize)) 2182 return 0; 2183 cur_gfn++; 2184 /* Reached the end of the current memslot, take the next one. */ 2185 if (cur_gfn - ms->base_gfn >= ms->npages) { 2186 ms = gfn_to_memslot(kvm, cur_gfn); 2187 if (!ms) 2188 return 0; 2189 } 2190 } 2191 return 0; 2192 } 2193 2194 /* 2195 * This function searches for the next page with dirty CMMA attributes, and 2196 * saves the attributes in the buffer up to either the end of the buffer or 2197 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2198 * no trailing clean bytes are saved. 2199 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2200 * output buffer will indicate 0 as length. 2201 */ 2202 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2203 struct kvm_s390_cmma_log *args) 2204 { 2205 unsigned long bufsize; 2206 int srcu_idx, peek, ret; 2207 u8 *values; 2208 2209 if (!kvm->arch.use_cmma) 2210 return -ENXIO; 2211 /* Invalid/unsupported flags were specified */ 2212 if (args->flags & ~KVM_S390_CMMA_PEEK) 2213 return -EINVAL; 2214 /* Migration mode query, and we are not doing a migration */ 2215 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2216 if (!peek && !kvm->arch.migration_mode) 2217 return -EINVAL; 2218 /* CMMA is disabled or was not used, or the buffer has length zero */ 2219 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2220 if (!bufsize || !kvm->mm->context.uses_cmm) { 2221 memset(args, 0, sizeof(*args)); 2222 return 0; 2223 } 2224 /* We are not peeking, and there are no dirty pages */ 2225 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2226 memset(args, 0, sizeof(*args)); 2227 return 0; 2228 } 2229 2230 values = vmalloc(bufsize); 2231 if (!values) 2232 return -ENOMEM; 2233 2234 mmap_read_lock(kvm->mm); 2235 srcu_idx = srcu_read_lock(&kvm->srcu); 2236 if (peek) 2237 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2238 else 2239 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2240 srcu_read_unlock(&kvm->srcu, srcu_idx); 2241 mmap_read_unlock(kvm->mm); 2242 2243 if (kvm->arch.migration_mode) 2244 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2245 else 2246 args->remaining = 0; 2247 2248 if (copy_to_user((void __user *)args->values, values, args->count)) 2249 ret = -EFAULT; 2250 2251 vfree(values); 2252 return ret; 2253 } 2254 2255 /* 2256 * This function sets the CMMA attributes for the given pages. If the input 2257 * buffer has zero length, no action is taken, otherwise the attributes are 2258 * set and the mm->context.uses_cmm flag is set. 2259 */ 2260 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2261 const struct kvm_s390_cmma_log *args) 2262 { 2263 unsigned long hva, mask, pgstev, i; 2264 uint8_t *bits; 2265 int srcu_idx, r = 0; 2266 2267 mask = args->mask; 2268 2269 if (!kvm->arch.use_cmma) 2270 return -ENXIO; 2271 /* invalid/unsupported flags */ 2272 if (args->flags != 0) 2273 return -EINVAL; 2274 /* Enforce sane limit on memory allocation */ 2275 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2276 return -EINVAL; 2277 /* Nothing to do */ 2278 if (args->count == 0) 2279 return 0; 2280 2281 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2282 if (!bits) 2283 return -ENOMEM; 2284 2285 r = copy_from_user(bits, (void __user *)args->values, args->count); 2286 if (r) { 2287 r = -EFAULT; 2288 goto out; 2289 } 2290 2291 mmap_read_lock(kvm->mm); 2292 srcu_idx = srcu_read_lock(&kvm->srcu); 2293 for (i = 0; i < args->count; i++) { 2294 hva = gfn_to_hva(kvm, args->start_gfn + i); 2295 if (kvm_is_error_hva(hva)) { 2296 r = -EFAULT; 2297 break; 2298 } 2299 2300 pgstev = bits[i]; 2301 pgstev = pgstev << 24; 2302 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2303 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2304 } 2305 srcu_read_unlock(&kvm->srcu, srcu_idx); 2306 mmap_read_unlock(kvm->mm); 2307 2308 if (!kvm->mm->context.uses_cmm) { 2309 mmap_write_lock(kvm->mm); 2310 kvm->mm->context.uses_cmm = 1; 2311 mmap_write_unlock(kvm->mm); 2312 } 2313 out: 2314 vfree(bits); 2315 return r; 2316 } 2317 2318 /** 2319 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to 2320 * non protected. 2321 * @kvm: the VM whose protected vCPUs are to be converted 2322 * @rc: return value for the RC field of the UVC (in case of error) 2323 * @rrc: return value for the RRC field of the UVC (in case of error) 2324 * 2325 * Does not stop in case of error, tries to convert as many 2326 * CPUs as possible. In case of error, the RC and RRC of the last error are 2327 * returned. 2328 * 2329 * Return: 0 in case of success, otherwise -EIO 2330 */ 2331 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2332 { 2333 struct kvm_vcpu *vcpu; 2334 unsigned long i; 2335 u16 _rc, _rrc; 2336 int ret = 0; 2337 2338 /* 2339 * We ignore failures and try to destroy as many CPUs as possible. 2340 * At the same time we must not free the assigned resources when 2341 * this fails, as the ultravisor has still access to that memory. 2342 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2343 * behind. 2344 * We want to return the first failure rc and rrc, though. 2345 */ 2346 kvm_for_each_vcpu(i, vcpu, kvm) { 2347 mutex_lock(&vcpu->mutex); 2348 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) { 2349 *rc = _rc; 2350 *rrc = _rrc; 2351 ret = -EIO; 2352 } 2353 mutex_unlock(&vcpu->mutex); 2354 } 2355 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ 2356 if (use_gisa) 2357 kvm_s390_gisa_enable(kvm); 2358 return ret; 2359 } 2360 2361 /** 2362 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM 2363 * to protected. 2364 * @kvm: the VM whose protected vCPUs are to be converted 2365 * @rc: return value for the RC field of the UVC (in case of error) 2366 * @rrc: return value for the RRC field of the UVC (in case of error) 2367 * 2368 * Tries to undo the conversion in case of error. 2369 * 2370 * Return: 0 in case of success, otherwise -EIO 2371 */ 2372 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2373 { 2374 unsigned long i; 2375 int r = 0; 2376 u16 dummy; 2377 2378 struct kvm_vcpu *vcpu; 2379 2380 /* Disable the GISA if the ultravisor does not support AIV. */ 2381 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) 2382 kvm_s390_gisa_disable(kvm); 2383 2384 kvm_for_each_vcpu(i, vcpu, kvm) { 2385 mutex_lock(&vcpu->mutex); 2386 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2387 mutex_unlock(&vcpu->mutex); 2388 if (r) 2389 break; 2390 } 2391 if (r) 2392 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2393 return r; 2394 } 2395 2396 /* 2397 * Here we provide user space with a direct interface to query UV 2398 * related data like UV maxima and available features as well as 2399 * feature specific data. 2400 * 2401 * To facilitate future extension of the data structures we'll try to 2402 * write data up to the maximum requested length. 2403 */ 2404 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info) 2405 { 2406 ssize_t len_min; 2407 2408 switch (info->header.id) { 2409 case KVM_PV_INFO_VM: { 2410 len_min = sizeof(info->header) + sizeof(info->vm); 2411 2412 if (info->header.len_max < len_min) 2413 return -EINVAL; 2414 2415 memcpy(info->vm.inst_calls_list, 2416 uv_info.inst_calls_list, 2417 sizeof(uv_info.inst_calls_list)); 2418 2419 /* It's max cpuid not max cpus, so it's off by one */ 2420 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1; 2421 info->vm.max_guests = uv_info.max_num_sec_conf; 2422 info->vm.max_guest_addr = uv_info.max_sec_stor_addr; 2423 info->vm.feature_indication = uv_info.uv_feature_indications; 2424 2425 return len_min; 2426 } 2427 case KVM_PV_INFO_DUMP: { 2428 len_min = sizeof(info->header) + sizeof(info->dump); 2429 2430 if (info->header.len_max < len_min) 2431 return -EINVAL; 2432 2433 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len; 2434 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len; 2435 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len; 2436 return len_min; 2437 } 2438 default: 2439 return -EINVAL; 2440 } 2441 } 2442 2443 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, 2444 struct kvm_s390_pv_dmp dmp) 2445 { 2446 int r = -EINVAL; 2447 void __user *result_buff = (void __user *)dmp.buff_addr; 2448 2449 switch (dmp.subcmd) { 2450 case KVM_PV_DUMP_INIT: { 2451 if (kvm->arch.pv.dumping) 2452 break; 2453 2454 /* 2455 * Block SIE entry as concurrent dump UVCs could lead 2456 * to validities. 2457 */ 2458 kvm_s390_vcpu_block_all(kvm); 2459 2460 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2461 UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc); 2462 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x", 2463 cmd->rc, cmd->rrc); 2464 if (!r) { 2465 kvm->arch.pv.dumping = true; 2466 } else { 2467 kvm_s390_vcpu_unblock_all(kvm); 2468 r = -EINVAL; 2469 } 2470 break; 2471 } 2472 case KVM_PV_DUMP_CONFIG_STOR_STATE: { 2473 if (!kvm->arch.pv.dumping) 2474 break; 2475 2476 /* 2477 * gaddr is an output parameter since we might stop 2478 * early. As dmp will be copied back in our caller, we 2479 * don't need to do it ourselves. 2480 */ 2481 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len, 2482 &cmd->rc, &cmd->rrc); 2483 break; 2484 } 2485 case KVM_PV_DUMP_COMPLETE: { 2486 if (!kvm->arch.pv.dumping) 2487 break; 2488 2489 r = -EINVAL; 2490 if (dmp.buff_len < uv_info.conf_dump_finalize_len) 2491 break; 2492 2493 r = kvm_s390_pv_dump_complete(kvm, result_buff, 2494 &cmd->rc, &cmd->rrc); 2495 break; 2496 } 2497 default: 2498 r = -ENOTTY; 2499 break; 2500 } 2501 2502 return r; 2503 } 2504 2505 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2506 { 2507 int r = 0; 2508 u16 dummy; 2509 void __user *argp = (void __user *)cmd->data; 2510 2511 switch (cmd->cmd) { 2512 case KVM_PV_ENABLE: { 2513 r = -EINVAL; 2514 if (kvm_s390_pv_is_protected(kvm)) 2515 break; 2516 2517 /* 2518 * FMT 4 SIE needs esca. As we never switch back to bsca from 2519 * esca, we need no cleanup in the error cases below 2520 */ 2521 r = sca_switch_to_extended(kvm); 2522 if (r) 2523 break; 2524 2525 mmap_write_lock(current->mm); 2526 r = gmap_mark_unmergeable(); 2527 mmap_write_unlock(current->mm); 2528 if (r) 2529 break; 2530 2531 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2532 if (r) 2533 break; 2534 2535 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2536 if (r) 2537 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2538 2539 /* we need to block service interrupts from now on */ 2540 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2541 break; 2542 } 2543 case KVM_PV_DISABLE: { 2544 r = -EINVAL; 2545 if (!kvm_s390_pv_is_protected(kvm)) 2546 break; 2547 2548 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2549 /* 2550 * If a CPU could not be destroyed, destroy VM will also fail. 2551 * There is no point in trying to destroy it. Instead return 2552 * the rc and rrc from the first CPU that failed destroying. 2553 */ 2554 if (r) 2555 break; 2556 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2557 2558 /* no need to block service interrupts any more */ 2559 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2560 break; 2561 } 2562 case KVM_PV_SET_SEC_PARMS: { 2563 struct kvm_s390_pv_sec_parm parms = {}; 2564 void *hdr; 2565 2566 r = -EINVAL; 2567 if (!kvm_s390_pv_is_protected(kvm)) 2568 break; 2569 2570 r = -EFAULT; 2571 if (copy_from_user(&parms, argp, sizeof(parms))) 2572 break; 2573 2574 /* Currently restricted to 8KB */ 2575 r = -EINVAL; 2576 if (parms.length > PAGE_SIZE * 2) 2577 break; 2578 2579 r = -ENOMEM; 2580 hdr = vmalloc(parms.length); 2581 if (!hdr) 2582 break; 2583 2584 r = -EFAULT; 2585 if (!copy_from_user(hdr, (void __user *)parms.origin, 2586 parms.length)) 2587 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2588 &cmd->rc, &cmd->rrc); 2589 2590 vfree(hdr); 2591 break; 2592 } 2593 case KVM_PV_UNPACK: { 2594 struct kvm_s390_pv_unp unp = {}; 2595 2596 r = -EINVAL; 2597 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2598 break; 2599 2600 r = -EFAULT; 2601 if (copy_from_user(&unp, argp, sizeof(unp))) 2602 break; 2603 2604 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2605 &cmd->rc, &cmd->rrc); 2606 break; 2607 } 2608 case KVM_PV_VERIFY: { 2609 r = -EINVAL; 2610 if (!kvm_s390_pv_is_protected(kvm)) 2611 break; 2612 2613 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2614 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2615 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2616 cmd->rrc); 2617 break; 2618 } 2619 case KVM_PV_PREP_RESET: { 2620 r = -EINVAL; 2621 if (!kvm_s390_pv_is_protected(kvm)) 2622 break; 2623 2624 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2625 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2626 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2627 cmd->rc, cmd->rrc); 2628 break; 2629 } 2630 case KVM_PV_UNSHARE_ALL: { 2631 r = -EINVAL; 2632 if (!kvm_s390_pv_is_protected(kvm)) 2633 break; 2634 2635 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2636 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2637 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2638 cmd->rc, cmd->rrc); 2639 break; 2640 } 2641 case KVM_PV_INFO: { 2642 struct kvm_s390_pv_info info = {}; 2643 ssize_t data_len; 2644 2645 /* 2646 * No need to check the VM protection here. 2647 * 2648 * Maybe user space wants to query some of the data 2649 * when the VM is still unprotected. If we see the 2650 * need to fence a new data command we can still 2651 * return an error in the info handler. 2652 */ 2653 2654 r = -EFAULT; 2655 if (copy_from_user(&info, argp, sizeof(info.header))) 2656 break; 2657 2658 r = -EINVAL; 2659 if (info.header.len_max < sizeof(info.header)) 2660 break; 2661 2662 data_len = kvm_s390_handle_pv_info(&info); 2663 if (data_len < 0) { 2664 r = data_len; 2665 break; 2666 } 2667 /* 2668 * If a data command struct is extended (multiple 2669 * times) this can be used to determine how much of it 2670 * is valid. 2671 */ 2672 info.header.len_written = data_len; 2673 2674 r = -EFAULT; 2675 if (copy_to_user(argp, &info, data_len)) 2676 break; 2677 2678 r = 0; 2679 break; 2680 } 2681 case KVM_PV_DUMP: { 2682 struct kvm_s390_pv_dmp dmp; 2683 2684 r = -EINVAL; 2685 if (!kvm_s390_pv_is_protected(kvm)) 2686 break; 2687 2688 r = -EFAULT; 2689 if (copy_from_user(&dmp, argp, sizeof(dmp))) 2690 break; 2691 2692 r = kvm_s390_pv_dmp(kvm, cmd, dmp); 2693 if (r) 2694 break; 2695 2696 if (copy_to_user(argp, &dmp, sizeof(dmp))) { 2697 r = -EFAULT; 2698 break; 2699 } 2700 2701 break; 2702 } 2703 default: 2704 r = -ENOTTY; 2705 } 2706 return r; 2707 } 2708 2709 static bool access_key_invalid(u8 access_key) 2710 { 2711 return access_key > 0xf; 2712 } 2713 2714 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2715 { 2716 void __user *uaddr = (void __user *)mop->buf; 2717 u64 supported_flags; 2718 void *tmpbuf = NULL; 2719 int r, srcu_idx; 2720 2721 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION 2722 | KVM_S390_MEMOP_F_CHECK_ONLY; 2723 if (mop->flags & ~supported_flags || !mop->size) 2724 return -EINVAL; 2725 if (mop->size > MEM_OP_MAX_SIZE) 2726 return -E2BIG; 2727 /* 2728 * This is technically a heuristic only, if the kvm->lock is not 2729 * taken, it is not guaranteed that the vm is/remains non-protected. 2730 * This is ok from a kernel perspective, wrongdoing is detected 2731 * on the access, -EFAULT is returned and the vm may crash the 2732 * next time it accesses the memory in question. 2733 * There is no sane usecase to do switching and a memop on two 2734 * different CPUs at the same time. 2735 */ 2736 if (kvm_s390_pv_get_handle(kvm)) 2737 return -EINVAL; 2738 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 2739 if (access_key_invalid(mop->key)) 2740 return -EINVAL; 2741 } else { 2742 mop->key = 0; 2743 } 2744 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 2745 tmpbuf = vmalloc(mop->size); 2746 if (!tmpbuf) 2747 return -ENOMEM; 2748 } 2749 2750 srcu_idx = srcu_read_lock(&kvm->srcu); 2751 2752 if (kvm_is_error_gpa(kvm, mop->gaddr)) { 2753 r = PGM_ADDRESSING; 2754 goto out_unlock; 2755 } 2756 2757 switch (mop->op) { 2758 case KVM_S390_MEMOP_ABSOLUTE_READ: { 2759 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2760 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); 2761 } else { 2762 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2763 mop->size, GACC_FETCH, mop->key); 2764 if (r == 0) { 2765 if (copy_to_user(uaddr, tmpbuf, mop->size)) 2766 r = -EFAULT; 2767 } 2768 } 2769 break; 2770 } 2771 case KVM_S390_MEMOP_ABSOLUTE_WRITE: { 2772 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2773 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); 2774 } else { 2775 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2776 r = -EFAULT; 2777 break; 2778 } 2779 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2780 mop->size, GACC_STORE, mop->key); 2781 } 2782 break; 2783 } 2784 default: 2785 r = -EINVAL; 2786 } 2787 2788 out_unlock: 2789 srcu_read_unlock(&kvm->srcu, srcu_idx); 2790 2791 vfree(tmpbuf); 2792 return r; 2793 } 2794 2795 long kvm_arch_vm_ioctl(struct file *filp, 2796 unsigned int ioctl, unsigned long arg) 2797 { 2798 struct kvm *kvm = filp->private_data; 2799 void __user *argp = (void __user *)arg; 2800 struct kvm_device_attr attr; 2801 int r; 2802 2803 switch (ioctl) { 2804 case KVM_S390_INTERRUPT: { 2805 struct kvm_s390_interrupt s390int; 2806 2807 r = -EFAULT; 2808 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2809 break; 2810 r = kvm_s390_inject_vm(kvm, &s390int); 2811 break; 2812 } 2813 case KVM_CREATE_IRQCHIP: { 2814 struct kvm_irq_routing_entry routing; 2815 2816 r = -EINVAL; 2817 if (kvm->arch.use_irqchip) { 2818 /* Set up dummy routing. */ 2819 memset(&routing, 0, sizeof(routing)); 2820 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2821 } 2822 break; 2823 } 2824 case KVM_SET_DEVICE_ATTR: { 2825 r = -EFAULT; 2826 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2827 break; 2828 r = kvm_s390_vm_set_attr(kvm, &attr); 2829 break; 2830 } 2831 case KVM_GET_DEVICE_ATTR: { 2832 r = -EFAULT; 2833 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2834 break; 2835 r = kvm_s390_vm_get_attr(kvm, &attr); 2836 break; 2837 } 2838 case KVM_HAS_DEVICE_ATTR: { 2839 r = -EFAULT; 2840 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2841 break; 2842 r = kvm_s390_vm_has_attr(kvm, &attr); 2843 break; 2844 } 2845 case KVM_S390_GET_SKEYS: { 2846 struct kvm_s390_skeys args; 2847 2848 r = -EFAULT; 2849 if (copy_from_user(&args, argp, 2850 sizeof(struct kvm_s390_skeys))) 2851 break; 2852 r = kvm_s390_get_skeys(kvm, &args); 2853 break; 2854 } 2855 case KVM_S390_SET_SKEYS: { 2856 struct kvm_s390_skeys args; 2857 2858 r = -EFAULT; 2859 if (copy_from_user(&args, argp, 2860 sizeof(struct kvm_s390_skeys))) 2861 break; 2862 r = kvm_s390_set_skeys(kvm, &args); 2863 break; 2864 } 2865 case KVM_S390_GET_CMMA_BITS: { 2866 struct kvm_s390_cmma_log args; 2867 2868 r = -EFAULT; 2869 if (copy_from_user(&args, argp, sizeof(args))) 2870 break; 2871 mutex_lock(&kvm->slots_lock); 2872 r = kvm_s390_get_cmma_bits(kvm, &args); 2873 mutex_unlock(&kvm->slots_lock); 2874 if (!r) { 2875 r = copy_to_user(argp, &args, sizeof(args)); 2876 if (r) 2877 r = -EFAULT; 2878 } 2879 break; 2880 } 2881 case KVM_S390_SET_CMMA_BITS: { 2882 struct kvm_s390_cmma_log args; 2883 2884 r = -EFAULT; 2885 if (copy_from_user(&args, argp, sizeof(args))) 2886 break; 2887 mutex_lock(&kvm->slots_lock); 2888 r = kvm_s390_set_cmma_bits(kvm, &args); 2889 mutex_unlock(&kvm->slots_lock); 2890 break; 2891 } 2892 case KVM_S390_PV_COMMAND: { 2893 struct kvm_pv_cmd args; 2894 2895 /* protvirt means user cpu state */ 2896 kvm_s390_set_user_cpu_state_ctrl(kvm); 2897 r = 0; 2898 if (!is_prot_virt_host()) { 2899 r = -EINVAL; 2900 break; 2901 } 2902 if (copy_from_user(&args, argp, sizeof(args))) { 2903 r = -EFAULT; 2904 break; 2905 } 2906 if (args.flags) { 2907 r = -EINVAL; 2908 break; 2909 } 2910 mutex_lock(&kvm->lock); 2911 r = kvm_s390_handle_pv(kvm, &args); 2912 mutex_unlock(&kvm->lock); 2913 if (copy_to_user(argp, &args, sizeof(args))) { 2914 r = -EFAULT; 2915 break; 2916 } 2917 break; 2918 } 2919 case KVM_S390_MEM_OP: { 2920 struct kvm_s390_mem_op mem_op; 2921 2922 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 2923 r = kvm_s390_vm_mem_op(kvm, &mem_op); 2924 else 2925 r = -EFAULT; 2926 break; 2927 } 2928 case KVM_S390_ZPCI_OP: { 2929 struct kvm_s390_zpci_op args; 2930 2931 r = -EINVAL; 2932 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 2933 break; 2934 if (copy_from_user(&args, argp, sizeof(args))) { 2935 r = -EFAULT; 2936 break; 2937 } 2938 r = kvm_s390_pci_zpci_op(kvm, &args); 2939 break; 2940 } 2941 default: 2942 r = -ENOTTY; 2943 } 2944 2945 return r; 2946 } 2947 2948 static int kvm_s390_apxa_installed(void) 2949 { 2950 struct ap_config_info info; 2951 2952 if (ap_instructions_available()) { 2953 if (ap_qci(&info) == 0) 2954 return info.apxa; 2955 } 2956 2957 return 0; 2958 } 2959 2960 /* 2961 * The format of the crypto control block (CRYCB) is specified in the 3 low 2962 * order bits of the CRYCB designation (CRYCBD) field as follows: 2963 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2964 * AP extended addressing (APXA) facility are installed. 2965 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2966 * Format 2: Both the APXA and MSAX3 facilities are installed 2967 */ 2968 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2969 { 2970 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2971 2972 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2973 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2974 2975 /* Check whether MSAX3 is installed */ 2976 if (!test_kvm_facility(kvm, 76)) 2977 return; 2978 2979 if (kvm_s390_apxa_installed()) 2980 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2981 else 2982 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2983 } 2984 2985 /* 2986 * kvm_arch_crypto_set_masks 2987 * 2988 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2989 * to be set. 2990 * @apm: the mask identifying the accessible AP adapters 2991 * @aqm: the mask identifying the accessible AP domains 2992 * @adm: the mask identifying the accessible AP control domains 2993 * 2994 * Set the masks that identify the adapters, domains and control domains to 2995 * which the KVM guest is granted access. 2996 * 2997 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2998 * function. 2999 */ 3000 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 3001 unsigned long *aqm, unsigned long *adm) 3002 { 3003 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 3004 3005 kvm_s390_vcpu_block_all(kvm); 3006 3007 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 3008 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 3009 memcpy(crycb->apcb1.apm, apm, 32); 3010 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 3011 apm[0], apm[1], apm[2], apm[3]); 3012 memcpy(crycb->apcb1.aqm, aqm, 32); 3013 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 3014 aqm[0], aqm[1], aqm[2], aqm[3]); 3015 memcpy(crycb->apcb1.adm, adm, 32); 3016 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 3017 adm[0], adm[1], adm[2], adm[3]); 3018 break; 3019 case CRYCB_FORMAT1: 3020 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 3021 memcpy(crycb->apcb0.apm, apm, 8); 3022 memcpy(crycb->apcb0.aqm, aqm, 2); 3023 memcpy(crycb->apcb0.adm, adm, 2); 3024 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 3025 apm[0], *((unsigned short *)aqm), 3026 *((unsigned short *)adm)); 3027 break; 3028 default: /* Can not happen */ 3029 break; 3030 } 3031 3032 /* recreate the shadow crycb for each vcpu */ 3033 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 3034 kvm_s390_vcpu_unblock_all(kvm); 3035 } 3036 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 3037 3038 /* 3039 * kvm_arch_crypto_clear_masks 3040 * 3041 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 3042 * to be cleared. 3043 * 3044 * Clear the masks that identify the adapters, domains and control domains to 3045 * which the KVM guest is granted access. 3046 * 3047 * Note: The kvm->lock mutex must be locked by the caller before invoking this 3048 * function. 3049 */ 3050 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 3051 { 3052 kvm_s390_vcpu_block_all(kvm); 3053 3054 memset(&kvm->arch.crypto.crycb->apcb0, 0, 3055 sizeof(kvm->arch.crypto.crycb->apcb0)); 3056 memset(&kvm->arch.crypto.crycb->apcb1, 0, 3057 sizeof(kvm->arch.crypto.crycb->apcb1)); 3058 3059 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 3060 /* recreate the shadow crycb for each vcpu */ 3061 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 3062 kvm_s390_vcpu_unblock_all(kvm); 3063 } 3064 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 3065 3066 static u64 kvm_s390_get_initial_cpuid(void) 3067 { 3068 struct cpuid cpuid; 3069 3070 get_cpu_id(&cpuid); 3071 cpuid.version = 0xff; 3072 return *((u64 *) &cpuid); 3073 } 3074 3075 static void kvm_s390_crypto_init(struct kvm *kvm) 3076 { 3077 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 3078 kvm_s390_set_crycb_format(kvm); 3079 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 3080 3081 if (!test_kvm_facility(kvm, 76)) 3082 return; 3083 3084 /* Enable AES/DEA protected key functions by default */ 3085 kvm->arch.crypto.aes_kw = 1; 3086 kvm->arch.crypto.dea_kw = 1; 3087 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 3088 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 3089 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 3090 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 3091 } 3092 3093 static void sca_dispose(struct kvm *kvm) 3094 { 3095 if (kvm->arch.use_esca) 3096 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 3097 else 3098 free_page((unsigned long)(kvm->arch.sca)); 3099 kvm->arch.sca = NULL; 3100 } 3101 3102 void kvm_arch_free_vm(struct kvm *kvm) 3103 { 3104 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 3105 kvm_s390_pci_clear_list(kvm); 3106 3107 __kvm_arch_free_vm(kvm); 3108 } 3109 3110 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 3111 { 3112 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 3113 int i, rc; 3114 char debug_name[16]; 3115 static unsigned long sca_offset; 3116 3117 rc = -EINVAL; 3118 #ifdef CONFIG_KVM_S390_UCONTROL 3119 if (type & ~KVM_VM_S390_UCONTROL) 3120 goto out_err; 3121 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 3122 goto out_err; 3123 #else 3124 if (type) 3125 goto out_err; 3126 #endif 3127 3128 rc = s390_enable_sie(); 3129 if (rc) 3130 goto out_err; 3131 3132 rc = -ENOMEM; 3133 3134 if (!sclp.has_64bscao) 3135 alloc_flags |= GFP_DMA; 3136 rwlock_init(&kvm->arch.sca_lock); 3137 /* start with basic SCA */ 3138 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 3139 if (!kvm->arch.sca) 3140 goto out_err; 3141 mutex_lock(&kvm_lock); 3142 sca_offset += 16; 3143 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 3144 sca_offset = 0; 3145 kvm->arch.sca = (struct bsca_block *) 3146 ((char *) kvm->arch.sca + sca_offset); 3147 mutex_unlock(&kvm_lock); 3148 3149 sprintf(debug_name, "kvm-%u", current->pid); 3150 3151 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 3152 if (!kvm->arch.dbf) 3153 goto out_err; 3154 3155 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 3156 kvm->arch.sie_page2 = 3157 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 3158 if (!kvm->arch.sie_page2) 3159 goto out_err; 3160 3161 kvm->arch.sie_page2->kvm = kvm; 3162 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 3163 3164 for (i = 0; i < kvm_s390_fac_size(); i++) { 3165 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 3166 (kvm_s390_fac_base[i] | 3167 kvm_s390_fac_ext[i]); 3168 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 3169 kvm_s390_fac_base[i]; 3170 } 3171 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 3172 3173 /* we are always in czam mode - even on pre z14 machines */ 3174 set_kvm_facility(kvm->arch.model.fac_mask, 138); 3175 set_kvm_facility(kvm->arch.model.fac_list, 138); 3176 /* we emulate STHYI in kvm */ 3177 set_kvm_facility(kvm->arch.model.fac_mask, 74); 3178 set_kvm_facility(kvm->arch.model.fac_list, 74); 3179 if (MACHINE_HAS_TLB_GUEST) { 3180 set_kvm_facility(kvm->arch.model.fac_mask, 147); 3181 set_kvm_facility(kvm->arch.model.fac_list, 147); 3182 } 3183 3184 if (css_general_characteristics.aiv && test_facility(65)) 3185 set_kvm_facility(kvm->arch.model.fac_mask, 65); 3186 3187 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 3188 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 3189 3190 kvm_s390_crypto_init(kvm); 3191 3192 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { 3193 mutex_lock(&kvm->lock); 3194 kvm_s390_pci_init_list(kvm); 3195 kvm_s390_vcpu_pci_enable_interp(kvm); 3196 mutex_unlock(&kvm->lock); 3197 } 3198 3199 mutex_init(&kvm->arch.float_int.ais_lock); 3200 spin_lock_init(&kvm->arch.float_int.lock); 3201 for (i = 0; i < FIRQ_LIST_COUNT; i++) 3202 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 3203 init_waitqueue_head(&kvm->arch.ipte_wq); 3204 mutex_init(&kvm->arch.ipte_mutex); 3205 3206 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 3207 VM_EVENT(kvm, 3, "vm created with type %lu", type); 3208 3209 if (type & KVM_VM_S390_UCONTROL) { 3210 kvm->arch.gmap = NULL; 3211 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 3212 } else { 3213 if (sclp.hamax == U64_MAX) 3214 kvm->arch.mem_limit = TASK_SIZE_MAX; 3215 else 3216 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 3217 sclp.hamax + 1); 3218 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 3219 if (!kvm->arch.gmap) 3220 goto out_err; 3221 kvm->arch.gmap->private = kvm; 3222 kvm->arch.gmap->pfault_enabled = 0; 3223 } 3224 3225 kvm->arch.use_pfmfi = sclp.has_pfmfi; 3226 kvm->arch.use_skf = sclp.has_skey; 3227 spin_lock_init(&kvm->arch.start_stop_lock); 3228 kvm_s390_vsie_init(kvm); 3229 if (use_gisa) 3230 kvm_s390_gisa_init(kvm); 3231 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 3232 3233 return 0; 3234 out_err: 3235 free_page((unsigned long)kvm->arch.sie_page2); 3236 debug_unregister(kvm->arch.dbf); 3237 sca_dispose(kvm); 3238 KVM_EVENT(3, "creation of vm failed: %d", rc); 3239 return rc; 3240 } 3241 3242 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 3243 { 3244 u16 rc, rrc; 3245 3246 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 3247 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 3248 kvm_s390_clear_local_irqs(vcpu); 3249 kvm_clear_async_pf_completion_queue(vcpu); 3250 if (!kvm_is_ucontrol(vcpu->kvm)) 3251 sca_del_vcpu(vcpu); 3252 kvm_s390_update_topology_change_report(vcpu->kvm, 1); 3253 3254 if (kvm_is_ucontrol(vcpu->kvm)) 3255 gmap_remove(vcpu->arch.gmap); 3256 3257 if (vcpu->kvm->arch.use_cmma) 3258 kvm_s390_vcpu_unsetup_cmma(vcpu); 3259 /* We can not hold the vcpu mutex here, we are already dying */ 3260 if (kvm_s390_pv_cpu_get_handle(vcpu)) 3261 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 3262 free_page((unsigned long)(vcpu->arch.sie_block)); 3263 } 3264 3265 void kvm_arch_destroy_vm(struct kvm *kvm) 3266 { 3267 u16 rc, rrc; 3268 3269 kvm_destroy_vcpus(kvm); 3270 sca_dispose(kvm); 3271 kvm_s390_gisa_destroy(kvm); 3272 /* 3273 * We are already at the end of life and kvm->lock is not taken. 3274 * This is ok as the file descriptor is closed by now and nobody 3275 * can mess with the pv state. To avoid lockdep_assert_held from 3276 * complaining we do not use kvm_s390_pv_is_protected. 3277 */ 3278 if (kvm_s390_pv_get_handle(kvm)) 3279 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 3280 /* 3281 * Remove the mmu notifier only when the whole KVM VM is torn down, 3282 * and only if one was registered to begin with. If the VM is 3283 * currently not protected, but has been previously been protected, 3284 * then it's possible that the notifier is still registered. 3285 */ 3286 if (kvm->arch.pv.mmu_notifier.ops) 3287 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm); 3288 3289 debug_unregister(kvm->arch.dbf); 3290 free_page((unsigned long)kvm->arch.sie_page2); 3291 if (!kvm_is_ucontrol(kvm)) 3292 gmap_remove(kvm->arch.gmap); 3293 kvm_s390_destroy_adapters(kvm); 3294 kvm_s390_clear_float_irqs(kvm); 3295 kvm_s390_vsie_destroy(kvm); 3296 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 3297 } 3298 3299 /* Section: vcpu related */ 3300 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 3301 { 3302 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 3303 if (!vcpu->arch.gmap) 3304 return -ENOMEM; 3305 vcpu->arch.gmap->private = vcpu->kvm; 3306 3307 return 0; 3308 } 3309 3310 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 3311 { 3312 if (!kvm_s390_use_sca_entries()) 3313 return; 3314 read_lock(&vcpu->kvm->arch.sca_lock); 3315 if (vcpu->kvm->arch.use_esca) { 3316 struct esca_block *sca = vcpu->kvm->arch.sca; 3317 3318 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3319 sca->cpu[vcpu->vcpu_id].sda = 0; 3320 } else { 3321 struct bsca_block *sca = vcpu->kvm->arch.sca; 3322 3323 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3324 sca->cpu[vcpu->vcpu_id].sda = 0; 3325 } 3326 read_unlock(&vcpu->kvm->arch.sca_lock); 3327 } 3328 3329 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 3330 { 3331 if (!kvm_s390_use_sca_entries()) { 3332 struct bsca_block *sca = vcpu->kvm->arch.sca; 3333 3334 /* we still need the basic sca for the ipte control */ 3335 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3336 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 3337 return; 3338 } 3339 read_lock(&vcpu->kvm->arch.sca_lock); 3340 if (vcpu->kvm->arch.use_esca) { 3341 struct esca_block *sca = vcpu->kvm->arch.sca; 3342 3343 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 3344 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3345 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 3346 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3347 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3348 } else { 3349 struct bsca_block *sca = vcpu->kvm->arch.sca; 3350 3351 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 3352 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 3353 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 3354 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3355 } 3356 read_unlock(&vcpu->kvm->arch.sca_lock); 3357 } 3358 3359 /* Basic SCA to Extended SCA data copy routines */ 3360 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 3361 { 3362 d->sda = s->sda; 3363 d->sigp_ctrl.c = s->sigp_ctrl.c; 3364 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 3365 } 3366 3367 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 3368 { 3369 int i; 3370 3371 d->ipte_control = s->ipte_control; 3372 d->mcn[0] = s->mcn; 3373 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 3374 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 3375 } 3376 3377 static int sca_switch_to_extended(struct kvm *kvm) 3378 { 3379 struct bsca_block *old_sca = kvm->arch.sca; 3380 struct esca_block *new_sca; 3381 struct kvm_vcpu *vcpu; 3382 unsigned long vcpu_idx; 3383 u32 scaol, scaoh; 3384 3385 if (kvm->arch.use_esca) 3386 return 0; 3387 3388 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3389 if (!new_sca) 3390 return -ENOMEM; 3391 3392 scaoh = (u32)((u64)(new_sca) >> 32); 3393 scaol = (u32)(u64)(new_sca) & ~0x3fU; 3394 3395 kvm_s390_vcpu_block_all(kvm); 3396 write_lock(&kvm->arch.sca_lock); 3397 3398 sca_copy_b_to_e(new_sca, old_sca); 3399 3400 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3401 vcpu->arch.sie_block->scaoh = scaoh; 3402 vcpu->arch.sie_block->scaol = scaol; 3403 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3404 } 3405 kvm->arch.sca = new_sca; 3406 kvm->arch.use_esca = 1; 3407 3408 write_unlock(&kvm->arch.sca_lock); 3409 kvm_s390_vcpu_unblock_all(kvm); 3410 3411 free_page((unsigned long)old_sca); 3412 3413 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 3414 old_sca, kvm->arch.sca); 3415 return 0; 3416 } 3417 3418 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3419 { 3420 int rc; 3421 3422 if (!kvm_s390_use_sca_entries()) { 3423 if (id < KVM_MAX_VCPUS) 3424 return true; 3425 return false; 3426 } 3427 if (id < KVM_S390_BSCA_CPU_SLOTS) 3428 return true; 3429 if (!sclp.has_esca || !sclp.has_64bscao) 3430 return false; 3431 3432 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3433 3434 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3435 } 3436 3437 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3438 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3439 { 3440 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3441 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3442 vcpu->arch.cputm_start = get_tod_clock_fast(); 3443 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3444 } 3445 3446 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3447 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3448 { 3449 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3450 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3451 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3452 vcpu->arch.cputm_start = 0; 3453 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3454 } 3455 3456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3457 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3458 { 3459 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3460 vcpu->arch.cputm_enabled = true; 3461 __start_cpu_timer_accounting(vcpu); 3462 } 3463 3464 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3465 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3466 { 3467 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3468 __stop_cpu_timer_accounting(vcpu); 3469 vcpu->arch.cputm_enabled = false; 3470 } 3471 3472 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3473 { 3474 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3475 __enable_cpu_timer_accounting(vcpu); 3476 preempt_enable(); 3477 } 3478 3479 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3480 { 3481 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3482 __disable_cpu_timer_accounting(vcpu); 3483 preempt_enable(); 3484 } 3485 3486 /* set the cpu timer - may only be called from the VCPU thread itself */ 3487 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3488 { 3489 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3490 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3491 if (vcpu->arch.cputm_enabled) 3492 vcpu->arch.cputm_start = get_tod_clock_fast(); 3493 vcpu->arch.sie_block->cputm = cputm; 3494 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3495 preempt_enable(); 3496 } 3497 3498 /* update and get the cpu timer - can also be called from other VCPU threads */ 3499 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3500 { 3501 unsigned int seq; 3502 __u64 value; 3503 3504 if (unlikely(!vcpu->arch.cputm_enabled)) 3505 return vcpu->arch.sie_block->cputm; 3506 3507 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3508 do { 3509 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3510 /* 3511 * If the writer would ever execute a read in the critical 3512 * section, e.g. in irq context, we have a deadlock. 3513 */ 3514 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3515 value = vcpu->arch.sie_block->cputm; 3516 /* if cputm_start is 0, accounting is being started/stopped */ 3517 if (likely(vcpu->arch.cputm_start)) 3518 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3519 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3520 preempt_enable(); 3521 return value; 3522 } 3523 3524 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3525 { 3526 3527 gmap_enable(vcpu->arch.enabled_gmap); 3528 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3529 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3530 __start_cpu_timer_accounting(vcpu); 3531 vcpu->cpu = cpu; 3532 } 3533 3534 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3535 { 3536 vcpu->cpu = -1; 3537 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3538 __stop_cpu_timer_accounting(vcpu); 3539 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3540 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3541 gmap_disable(vcpu->arch.enabled_gmap); 3542 3543 } 3544 3545 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3546 { 3547 mutex_lock(&vcpu->kvm->lock); 3548 preempt_disable(); 3549 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3550 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3551 preempt_enable(); 3552 mutex_unlock(&vcpu->kvm->lock); 3553 if (!kvm_is_ucontrol(vcpu->kvm)) { 3554 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3555 sca_add_vcpu(vcpu); 3556 } 3557 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3558 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3559 /* make vcpu_load load the right gmap on the first trigger */ 3560 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3561 } 3562 3563 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3564 { 3565 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3566 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3567 return true; 3568 return false; 3569 } 3570 3571 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3572 { 3573 /* At least one ECC subfunction must be present */ 3574 return kvm_has_pckmo_subfunc(kvm, 32) || 3575 kvm_has_pckmo_subfunc(kvm, 33) || 3576 kvm_has_pckmo_subfunc(kvm, 34) || 3577 kvm_has_pckmo_subfunc(kvm, 40) || 3578 kvm_has_pckmo_subfunc(kvm, 41); 3579 3580 } 3581 3582 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3583 { 3584 /* 3585 * If the AP instructions are not being interpreted and the MSAX3 3586 * facility is not configured for the guest, there is nothing to set up. 3587 */ 3588 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3589 return; 3590 3591 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3592 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3593 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3594 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3595 3596 if (vcpu->kvm->arch.crypto.apie) 3597 vcpu->arch.sie_block->eca |= ECA_APIE; 3598 3599 /* Set up protected key support */ 3600 if (vcpu->kvm->arch.crypto.aes_kw) { 3601 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3602 /* ecc is also wrapped with AES key */ 3603 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3604 vcpu->arch.sie_block->ecd |= ECD_ECC; 3605 } 3606 3607 if (vcpu->kvm->arch.crypto.dea_kw) 3608 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3609 } 3610 3611 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3612 { 3613 free_page(vcpu->arch.sie_block->cbrlo); 3614 vcpu->arch.sie_block->cbrlo = 0; 3615 } 3616 3617 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3618 { 3619 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3620 if (!vcpu->arch.sie_block->cbrlo) 3621 return -ENOMEM; 3622 return 0; 3623 } 3624 3625 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3626 { 3627 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3628 3629 vcpu->arch.sie_block->ibc = model->ibc; 3630 if (test_kvm_facility(vcpu->kvm, 7)) 3631 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3632 } 3633 3634 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3635 { 3636 int rc = 0; 3637 u16 uvrc, uvrrc; 3638 3639 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3640 CPUSTAT_SM | 3641 CPUSTAT_STOPPED); 3642 3643 if (test_kvm_facility(vcpu->kvm, 78)) 3644 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3645 else if (test_kvm_facility(vcpu->kvm, 8)) 3646 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3647 3648 kvm_s390_vcpu_setup_model(vcpu); 3649 3650 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3651 if (MACHINE_HAS_ESOP) 3652 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3653 if (test_kvm_facility(vcpu->kvm, 9)) 3654 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3655 if (test_kvm_facility(vcpu->kvm, 11)) 3656 vcpu->arch.sie_block->ecb |= ECB_PTF; 3657 if (test_kvm_facility(vcpu->kvm, 73)) 3658 vcpu->arch.sie_block->ecb |= ECB_TE; 3659 if (!kvm_is_ucontrol(vcpu->kvm)) 3660 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3661 3662 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3663 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3664 if (test_kvm_facility(vcpu->kvm, 130)) 3665 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3666 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3667 if (sclp.has_cei) 3668 vcpu->arch.sie_block->eca |= ECA_CEI; 3669 if (sclp.has_ib) 3670 vcpu->arch.sie_block->eca |= ECA_IB; 3671 if (sclp.has_siif) 3672 vcpu->arch.sie_block->eca |= ECA_SII; 3673 if (sclp.has_sigpif) 3674 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3675 if (test_kvm_facility(vcpu->kvm, 129)) { 3676 vcpu->arch.sie_block->eca |= ECA_VX; 3677 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3678 } 3679 if (test_kvm_facility(vcpu->kvm, 139)) 3680 vcpu->arch.sie_block->ecd |= ECD_MEF; 3681 if (test_kvm_facility(vcpu->kvm, 156)) 3682 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3683 if (vcpu->arch.sie_block->gd) { 3684 vcpu->arch.sie_block->eca |= ECA_AIV; 3685 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3686 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3687 } 3688 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3689 | SDNXC; 3690 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3691 3692 if (sclp.has_kss) 3693 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3694 else 3695 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3696 3697 if (vcpu->kvm->arch.use_cmma) { 3698 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3699 if (rc) 3700 return rc; 3701 } 3702 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3703 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3704 3705 vcpu->arch.sie_block->hpid = HPID_KVM; 3706 3707 kvm_s390_vcpu_crypto_setup(vcpu); 3708 3709 kvm_s390_vcpu_pci_setup(vcpu); 3710 3711 mutex_lock(&vcpu->kvm->lock); 3712 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3713 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3714 if (rc) 3715 kvm_s390_vcpu_unsetup_cmma(vcpu); 3716 } 3717 mutex_unlock(&vcpu->kvm->lock); 3718 3719 return rc; 3720 } 3721 3722 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3723 { 3724 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3725 return -EINVAL; 3726 return 0; 3727 } 3728 3729 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3730 { 3731 struct sie_page *sie_page; 3732 int rc; 3733 3734 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3735 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3736 if (!sie_page) 3737 return -ENOMEM; 3738 3739 vcpu->arch.sie_block = &sie_page->sie_block; 3740 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3741 3742 /* the real guest size will always be smaller than msl */ 3743 vcpu->arch.sie_block->mso = 0; 3744 vcpu->arch.sie_block->msl = sclp.hamax; 3745 3746 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3747 spin_lock_init(&vcpu->arch.local_int.lock); 3748 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm); 3749 seqcount_init(&vcpu->arch.cputm_seqcount); 3750 3751 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3752 kvm_clear_async_pf_completion_queue(vcpu); 3753 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3754 KVM_SYNC_GPRS | 3755 KVM_SYNC_ACRS | 3756 KVM_SYNC_CRS | 3757 KVM_SYNC_ARCH0 | 3758 KVM_SYNC_PFAULT | 3759 KVM_SYNC_DIAG318; 3760 kvm_s390_set_prefix(vcpu, 0); 3761 if (test_kvm_facility(vcpu->kvm, 64)) 3762 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3763 if (test_kvm_facility(vcpu->kvm, 82)) 3764 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3765 if (test_kvm_facility(vcpu->kvm, 133)) 3766 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3767 if (test_kvm_facility(vcpu->kvm, 156)) 3768 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3769 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3770 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3771 */ 3772 if (MACHINE_HAS_VX) 3773 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3774 else 3775 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3776 3777 if (kvm_is_ucontrol(vcpu->kvm)) { 3778 rc = __kvm_ucontrol_vcpu_init(vcpu); 3779 if (rc) 3780 goto out_free_sie_block; 3781 } 3782 3783 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3784 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3785 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3786 3787 rc = kvm_s390_vcpu_setup(vcpu); 3788 if (rc) 3789 goto out_ucontrol_uninit; 3790 3791 kvm_s390_update_topology_change_report(vcpu->kvm, 1); 3792 return 0; 3793 3794 out_ucontrol_uninit: 3795 if (kvm_is_ucontrol(vcpu->kvm)) 3796 gmap_remove(vcpu->arch.gmap); 3797 out_free_sie_block: 3798 free_page((unsigned long)(vcpu->arch.sie_block)); 3799 return rc; 3800 } 3801 3802 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3803 { 3804 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3805 return kvm_s390_vcpu_has_irq(vcpu, 0); 3806 } 3807 3808 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3809 { 3810 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3811 } 3812 3813 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3814 { 3815 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3816 exit_sie(vcpu); 3817 } 3818 3819 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3820 { 3821 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3822 } 3823 3824 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3825 { 3826 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3827 exit_sie(vcpu); 3828 } 3829 3830 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3831 { 3832 return atomic_read(&vcpu->arch.sie_block->prog20) & 3833 (PROG_BLOCK_SIE | PROG_REQUEST); 3834 } 3835 3836 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3837 { 3838 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3839 } 3840 3841 /* 3842 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3843 * If the CPU is not running (e.g. waiting as idle) the function will 3844 * return immediately. */ 3845 void exit_sie(struct kvm_vcpu *vcpu) 3846 { 3847 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3848 kvm_s390_vsie_kick(vcpu); 3849 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3850 cpu_relax(); 3851 } 3852 3853 /* Kick a guest cpu out of SIE to process a request synchronously */ 3854 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3855 { 3856 __kvm_make_request(req, vcpu); 3857 kvm_s390_vcpu_request(vcpu); 3858 } 3859 3860 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3861 unsigned long end) 3862 { 3863 struct kvm *kvm = gmap->private; 3864 struct kvm_vcpu *vcpu; 3865 unsigned long prefix; 3866 unsigned long i; 3867 3868 if (gmap_is_shadow(gmap)) 3869 return; 3870 if (start >= 1UL << 31) 3871 /* We are only interested in prefix pages */ 3872 return; 3873 kvm_for_each_vcpu(i, vcpu, kvm) { 3874 /* match against both prefix pages */ 3875 prefix = kvm_s390_get_prefix(vcpu); 3876 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3877 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3878 start, end); 3879 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3880 } 3881 } 3882 } 3883 3884 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3885 { 3886 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3887 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3888 READ_ONCE(halt_poll_max_steal)) { 3889 vcpu->stat.halt_no_poll_steal++; 3890 return true; 3891 } 3892 return false; 3893 } 3894 3895 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3896 { 3897 /* kvm common code refers to this, but never calls it */ 3898 BUG(); 3899 return 0; 3900 } 3901 3902 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3903 struct kvm_one_reg *reg) 3904 { 3905 int r = -EINVAL; 3906 3907 switch (reg->id) { 3908 case KVM_REG_S390_TODPR: 3909 r = put_user(vcpu->arch.sie_block->todpr, 3910 (u32 __user *)reg->addr); 3911 break; 3912 case KVM_REG_S390_EPOCHDIFF: 3913 r = put_user(vcpu->arch.sie_block->epoch, 3914 (u64 __user *)reg->addr); 3915 break; 3916 case KVM_REG_S390_CPU_TIMER: 3917 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3918 (u64 __user *)reg->addr); 3919 break; 3920 case KVM_REG_S390_CLOCK_COMP: 3921 r = put_user(vcpu->arch.sie_block->ckc, 3922 (u64 __user *)reg->addr); 3923 break; 3924 case KVM_REG_S390_PFTOKEN: 3925 r = put_user(vcpu->arch.pfault_token, 3926 (u64 __user *)reg->addr); 3927 break; 3928 case KVM_REG_S390_PFCOMPARE: 3929 r = put_user(vcpu->arch.pfault_compare, 3930 (u64 __user *)reg->addr); 3931 break; 3932 case KVM_REG_S390_PFSELECT: 3933 r = put_user(vcpu->arch.pfault_select, 3934 (u64 __user *)reg->addr); 3935 break; 3936 case KVM_REG_S390_PP: 3937 r = put_user(vcpu->arch.sie_block->pp, 3938 (u64 __user *)reg->addr); 3939 break; 3940 case KVM_REG_S390_GBEA: 3941 r = put_user(vcpu->arch.sie_block->gbea, 3942 (u64 __user *)reg->addr); 3943 break; 3944 default: 3945 break; 3946 } 3947 3948 return r; 3949 } 3950 3951 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3952 struct kvm_one_reg *reg) 3953 { 3954 int r = -EINVAL; 3955 __u64 val; 3956 3957 switch (reg->id) { 3958 case KVM_REG_S390_TODPR: 3959 r = get_user(vcpu->arch.sie_block->todpr, 3960 (u32 __user *)reg->addr); 3961 break; 3962 case KVM_REG_S390_EPOCHDIFF: 3963 r = get_user(vcpu->arch.sie_block->epoch, 3964 (u64 __user *)reg->addr); 3965 break; 3966 case KVM_REG_S390_CPU_TIMER: 3967 r = get_user(val, (u64 __user *)reg->addr); 3968 if (!r) 3969 kvm_s390_set_cpu_timer(vcpu, val); 3970 break; 3971 case KVM_REG_S390_CLOCK_COMP: 3972 r = get_user(vcpu->arch.sie_block->ckc, 3973 (u64 __user *)reg->addr); 3974 break; 3975 case KVM_REG_S390_PFTOKEN: 3976 r = get_user(vcpu->arch.pfault_token, 3977 (u64 __user *)reg->addr); 3978 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3979 kvm_clear_async_pf_completion_queue(vcpu); 3980 break; 3981 case KVM_REG_S390_PFCOMPARE: 3982 r = get_user(vcpu->arch.pfault_compare, 3983 (u64 __user *)reg->addr); 3984 break; 3985 case KVM_REG_S390_PFSELECT: 3986 r = get_user(vcpu->arch.pfault_select, 3987 (u64 __user *)reg->addr); 3988 break; 3989 case KVM_REG_S390_PP: 3990 r = get_user(vcpu->arch.sie_block->pp, 3991 (u64 __user *)reg->addr); 3992 break; 3993 case KVM_REG_S390_GBEA: 3994 r = get_user(vcpu->arch.sie_block->gbea, 3995 (u64 __user *)reg->addr); 3996 break; 3997 default: 3998 break; 3999 } 4000 4001 return r; 4002 } 4003 4004 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 4005 { 4006 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 4007 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 4008 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 4009 4010 kvm_clear_async_pf_completion_queue(vcpu); 4011 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 4012 kvm_s390_vcpu_stop(vcpu); 4013 kvm_s390_clear_local_irqs(vcpu); 4014 } 4015 4016 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 4017 { 4018 /* Initial reset is a superset of the normal reset */ 4019 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4020 4021 /* 4022 * This equals initial cpu reset in pop, but we don't switch to ESA. 4023 * We do not only reset the internal data, but also ... 4024 */ 4025 vcpu->arch.sie_block->gpsw.mask = 0; 4026 vcpu->arch.sie_block->gpsw.addr = 0; 4027 kvm_s390_set_prefix(vcpu, 0); 4028 kvm_s390_set_cpu_timer(vcpu, 0); 4029 vcpu->arch.sie_block->ckc = 0; 4030 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 4031 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 4032 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 4033 4034 /* ... the data in sync regs */ 4035 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 4036 vcpu->run->s.regs.ckc = 0; 4037 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 4038 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 4039 vcpu->run->psw_addr = 0; 4040 vcpu->run->psw_mask = 0; 4041 vcpu->run->s.regs.todpr = 0; 4042 vcpu->run->s.regs.cputm = 0; 4043 vcpu->run->s.regs.ckc = 0; 4044 vcpu->run->s.regs.pp = 0; 4045 vcpu->run->s.regs.gbea = 1; 4046 vcpu->run->s.regs.fpc = 0; 4047 /* 4048 * Do not reset these registers in the protected case, as some of 4049 * them are overlayed and they are not accessible in this case 4050 * anyway. 4051 */ 4052 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 4053 vcpu->arch.sie_block->gbea = 1; 4054 vcpu->arch.sie_block->pp = 0; 4055 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4056 vcpu->arch.sie_block->todpr = 0; 4057 } 4058 } 4059 4060 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 4061 { 4062 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 4063 4064 /* Clear reset is a superset of the initial reset */ 4065 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4066 4067 memset(®s->gprs, 0, sizeof(regs->gprs)); 4068 memset(®s->vrs, 0, sizeof(regs->vrs)); 4069 memset(®s->acrs, 0, sizeof(regs->acrs)); 4070 memset(®s->gscb, 0, sizeof(regs->gscb)); 4071 4072 regs->etoken = 0; 4073 regs->etoken_extension = 0; 4074 } 4075 4076 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4077 { 4078 vcpu_load(vcpu); 4079 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 4080 vcpu_put(vcpu); 4081 return 0; 4082 } 4083 4084 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4085 { 4086 vcpu_load(vcpu); 4087 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 4088 vcpu_put(vcpu); 4089 return 0; 4090 } 4091 4092 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 4093 struct kvm_sregs *sregs) 4094 { 4095 vcpu_load(vcpu); 4096 4097 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 4098 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 4099 4100 vcpu_put(vcpu); 4101 return 0; 4102 } 4103 4104 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4105 struct kvm_sregs *sregs) 4106 { 4107 vcpu_load(vcpu); 4108 4109 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 4110 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 4111 4112 vcpu_put(vcpu); 4113 return 0; 4114 } 4115 4116 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4117 { 4118 int ret = 0; 4119 4120 vcpu_load(vcpu); 4121 4122 if (test_fp_ctl(fpu->fpc)) { 4123 ret = -EINVAL; 4124 goto out; 4125 } 4126 vcpu->run->s.regs.fpc = fpu->fpc; 4127 if (MACHINE_HAS_VX) 4128 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 4129 (freg_t *) fpu->fprs); 4130 else 4131 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 4132 4133 out: 4134 vcpu_put(vcpu); 4135 return ret; 4136 } 4137 4138 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4139 { 4140 vcpu_load(vcpu); 4141 4142 /* make sure we have the latest values */ 4143 save_fpu_regs(); 4144 if (MACHINE_HAS_VX) 4145 convert_vx_to_fp((freg_t *) fpu->fprs, 4146 (__vector128 *) vcpu->run->s.regs.vrs); 4147 else 4148 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 4149 fpu->fpc = vcpu->run->s.regs.fpc; 4150 4151 vcpu_put(vcpu); 4152 return 0; 4153 } 4154 4155 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 4156 { 4157 int rc = 0; 4158 4159 if (!is_vcpu_stopped(vcpu)) 4160 rc = -EBUSY; 4161 else { 4162 vcpu->run->psw_mask = psw.mask; 4163 vcpu->run->psw_addr = psw.addr; 4164 } 4165 return rc; 4166 } 4167 4168 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 4169 struct kvm_translation *tr) 4170 { 4171 return -EINVAL; /* not implemented yet */ 4172 } 4173 4174 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 4175 KVM_GUESTDBG_USE_HW_BP | \ 4176 KVM_GUESTDBG_ENABLE) 4177 4178 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 4179 struct kvm_guest_debug *dbg) 4180 { 4181 int rc = 0; 4182 4183 vcpu_load(vcpu); 4184 4185 vcpu->guest_debug = 0; 4186 kvm_s390_clear_bp_data(vcpu); 4187 4188 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 4189 rc = -EINVAL; 4190 goto out; 4191 } 4192 if (!sclp.has_gpere) { 4193 rc = -EINVAL; 4194 goto out; 4195 } 4196 4197 if (dbg->control & KVM_GUESTDBG_ENABLE) { 4198 vcpu->guest_debug = dbg->control; 4199 /* enforce guest PER */ 4200 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 4201 4202 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 4203 rc = kvm_s390_import_bp_data(vcpu, dbg); 4204 } else { 4205 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 4206 vcpu->arch.guestdbg.last_bp = 0; 4207 } 4208 4209 if (rc) { 4210 vcpu->guest_debug = 0; 4211 kvm_s390_clear_bp_data(vcpu); 4212 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 4213 } 4214 4215 out: 4216 vcpu_put(vcpu); 4217 return rc; 4218 } 4219 4220 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 4221 struct kvm_mp_state *mp_state) 4222 { 4223 int ret; 4224 4225 vcpu_load(vcpu); 4226 4227 /* CHECK_STOP and LOAD are not supported yet */ 4228 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 4229 KVM_MP_STATE_OPERATING; 4230 4231 vcpu_put(vcpu); 4232 return ret; 4233 } 4234 4235 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 4236 struct kvm_mp_state *mp_state) 4237 { 4238 int rc = 0; 4239 4240 vcpu_load(vcpu); 4241 4242 /* user space knows about this interface - let it control the state */ 4243 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 4244 4245 switch (mp_state->mp_state) { 4246 case KVM_MP_STATE_STOPPED: 4247 rc = kvm_s390_vcpu_stop(vcpu); 4248 break; 4249 case KVM_MP_STATE_OPERATING: 4250 rc = kvm_s390_vcpu_start(vcpu); 4251 break; 4252 case KVM_MP_STATE_LOAD: 4253 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 4254 rc = -ENXIO; 4255 break; 4256 } 4257 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 4258 break; 4259 case KVM_MP_STATE_CHECK_STOP: 4260 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 4261 default: 4262 rc = -ENXIO; 4263 } 4264 4265 vcpu_put(vcpu); 4266 return rc; 4267 } 4268 4269 static bool ibs_enabled(struct kvm_vcpu *vcpu) 4270 { 4271 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 4272 } 4273 4274 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 4275 { 4276 retry: 4277 kvm_s390_vcpu_request_handled(vcpu); 4278 if (!kvm_request_pending(vcpu)) 4279 return 0; 4280 /* 4281 * If the guest prefix changed, re-arm the ipte notifier for the 4282 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 4283 * This ensures that the ipte instruction for this request has 4284 * already finished. We might race against a second unmapper that 4285 * wants to set the blocking bit. Lets just retry the request loop. 4286 */ 4287 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { 4288 int rc; 4289 rc = gmap_mprotect_notify(vcpu->arch.gmap, 4290 kvm_s390_get_prefix(vcpu), 4291 PAGE_SIZE * 2, PROT_WRITE); 4292 if (rc) { 4293 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 4294 return rc; 4295 } 4296 goto retry; 4297 } 4298 4299 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 4300 vcpu->arch.sie_block->ihcpu = 0xffff; 4301 goto retry; 4302 } 4303 4304 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 4305 if (!ibs_enabled(vcpu)) { 4306 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 4307 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 4308 } 4309 goto retry; 4310 } 4311 4312 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 4313 if (ibs_enabled(vcpu)) { 4314 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 4315 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 4316 } 4317 goto retry; 4318 } 4319 4320 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 4321 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 4322 goto retry; 4323 } 4324 4325 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 4326 /* 4327 * Disable CMM virtualization; we will emulate the ESSA 4328 * instruction manually, in order to provide additional 4329 * functionalities needed for live migration. 4330 */ 4331 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 4332 goto retry; 4333 } 4334 4335 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 4336 /* 4337 * Re-enable CMM virtualization if CMMA is available and 4338 * CMM has been used. 4339 */ 4340 if ((vcpu->kvm->arch.use_cmma) && 4341 (vcpu->kvm->mm->context.uses_cmm)) 4342 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 4343 goto retry; 4344 } 4345 4346 /* nothing to do, just clear the request */ 4347 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 4348 /* we left the vsie handler, nothing to do, just clear the request */ 4349 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 4350 4351 return 0; 4352 } 4353 4354 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4355 { 4356 struct kvm_vcpu *vcpu; 4357 union tod_clock clk; 4358 unsigned long i; 4359 4360 preempt_disable(); 4361 4362 store_tod_clock_ext(&clk); 4363 4364 kvm->arch.epoch = gtod->tod - clk.tod; 4365 kvm->arch.epdx = 0; 4366 if (test_kvm_facility(kvm, 139)) { 4367 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 4368 if (kvm->arch.epoch > gtod->tod) 4369 kvm->arch.epdx -= 1; 4370 } 4371 4372 kvm_s390_vcpu_block_all(kvm); 4373 kvm_for_each_vcpu(i, vcpu, kvm) { 4374 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 4375 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 4376 } 4377 4378 kvm_s390_vcpu_unblock_all(kvm); 4379 preempt_enable(); 4380 } 4381 4382 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4383 { 4384 mutex_lock(&kvm->lock); 4385 __kvm_s390_set_tod_clock(kvm, gtod); 4386 mutex_unlock(&kvm->lock); 4387 } 4388 4389 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4390 { 4391 if (!mutex_trylock(&kvm->lock)) 4392 return 0; 4393 __kvm_s390_set_tod_clock(kvm, gtod); 4394 mutex_unlock(&kvm->lock); 4395 return 1; 4396 } 4397 4398 /** 4399 * kvm_arch_fault_in_page - fault-in guest page if necessary 4400 * @vcpu: The corresponding virtual cpu 4401 * @gpa: Guest physical address 4402 * @writable: Whether the page should be writable or not 4403 * 4404 * Make sure that a guest page has been faulted-in on the host. 4405 * 4406 * Return: Zero on success, negative error code otherwise. 4407 */ 4408 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 4409 { 4410 return gmap_fault(vcpu->arch.gmap, gpa, 4411 writable ? FAULT_FLAG_WRITE : 0); 4412 } 4413 4414 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 4415 unsigned long token) 4416 { 4417 struct kvm_s390_interrupt inti; 4418 struct kvm_s390_irq irq; 4419 4420 if (start_token) { 4421 irq.u.ext.ext_params2 = token; 4422 irq.type = KVM_S390_INT_PFAULT_INIT; 4423 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 4424 } else { 4425 inti.type = KVM_S390_INT_PFAULT_DONE; 4426 inti.parm64 = token; 4427 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 4428 } 4429 } 4430 4431 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 4432 struct kvm_async_pf *work) 4433 { 4434 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 4435 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 4436 4437 return true; 4438 } 4439 4440 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 4441 struct kvm_async_pf *work) 4442 { 4443 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 4444 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 4445 } 4446 4447 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 4448 struct kvm_async_pf *work) 4449 { 4450 /* s390 will always inject the page directly */ 4451 } 4452 4453 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4454 { 4455 /* 4456 * s390 will always inject the page directly, 4457 * but we still want check_async_completion to cleanup 4458 */ 4459 return true; 4460 } 4461 4462 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4463 { 4464 hva_t hva; 4465 struct kvm_arch_async_pf arch; 4466 4467 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4468 return false; 4469 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4470 vcpu->arch.pfault_compare) 4471 return false; 4472 if (psw_extint_disabled(vcpu)) 4473 return false; 4474 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4475 return false; 4476 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4477 return false; 4478 if (!vcpu->arch.gmap->pfault_enabled) 4479 return false; 4480 4481 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4482 hva += current->thread.gmap_addr & ~PAGE_MASK; 4483 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4484 return false; 4485 4486 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4487 } 4488 4489 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4490 { 4491 int rc, cpuflags; 4492 4493 /* 4494 * On s390 notifications for arriving pages will be delivered directly 4495 * to the guest but the house keeping for completed pfaults is 4496 * handled outside the worker. 4497 */ 4498 kvm_check_async_pf_completion(vcpu); 4499 4500 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4501 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4502 4503 if (need_resched()) 4504 schedule(); 4505 4506 if (!kvm_is_ucontrol(vcpu->kvm)) { 4507 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4508 if (rc) 4509 return rc; 4510 } 4511 4512 rc = kvm_s390_handle_requests(vcpu); 4513 if (rc) 4514 return rc; 4515 4516 if (guestdbg_enabled(vcpu)) { 4517 kvm_s390_backup_guest_per_regs(vcpu); 4518 kvm_s390_patch_guest_per_regs(vcpu); 4519 } 4520 4521 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4522 4523 vcpu->arch.sie_block->icptcode = 0; 4524 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4525 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4526 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4527 4528 return 0; 4529 } 4530 4531 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4532 { 4533 struct kvm_s390_pgm_info pgm_info = { 4534 .code = PGM_ADDRESSING, 4535 }; 4536 u8 opcode, ilen; 4537 int rc; 4538 4539 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4540 trace_kvm_s390_sie_fault(vcpu); 4541 4542 /* 4543 * We want to inject an addressing exception, which is defined as a 4544 * suppressing or terminating exception. However, since we came here 4545 * by a DAT access exception, the PSW still points to the faulting 4546 * instruction since DAT exceptions are nullifying. So we've got 4547 * to look up the current opcode to get the length of the instruction 4548 * to be able to forward the PSW. 4549 */ 4550 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4551 ilen = insn_length(opcode); 4552 if (rc < 0) { 4553 return rc; 4554 } else if (rc) { 4555 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4556 * Forward by arbitrary ilc, injection will take care of 4557 * nullification if necessary. 4558 */ 4559 pgm_info = vcpu->arch.pgm; 4560 ilen = 4; 4561 } 4562 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4563 kvm_s390_forward_psw(vcpu, ilen); 4564 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4565 } 4566 4567 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4568 { 4569 struct mcck_volatile_info *mcck_info; 4570 struct sie_page *sie_page; 4571 4572 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4573 vcpu->arch.sie_block->icptcode); 4574 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4575 4576 if (guestdbg_enabled(vcpu)) 4577 kvm_s390_restore_guest_per_regs(vcpu); 4578 4579 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4580 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4581 4582 if (exit_reason == -EINTR) { 4583 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4584 sie_page = container_of(vcpu->arch.sie_block, 4585 struct sie_page, sie_block); 4586 mcck_info = &sie_page->mcck_info; 4587 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4588 return 0; 4589 } 4590 4591 if (vcpu->arch.sie_block->icptcode > 0) { 4592 int rc = kvm_handle_sie_intercept(vcpu); 4593 4594 if (rc != -EOPNOTSUPP) 4595 return rc; 4596 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4597 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4598 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4599 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4600 return -EREMOTE; 4601 } else if (exit_reason != -EFAULT) { 4602 vcpu->stat.exit_null++; 4603 return 0; 4604 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4605 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4606 vcpu->run->s390_ucontrol.trans_exc_code = 4607 current->thread.gmap_addr; 4608 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4609 return -EREMOTE; 4610 } else if (current->thread.gmap_pfault) { 4611 trace_kvm_s390_major_guest_pfault(vcpu); 4612 current->thread.gmap_pfault = 0; 4613 if (kvm_arch_setup_async_pf(vcpu)) 4614 return 0; 4615 vcpu->stat.pfault_sync++; 4616 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4617 } 4618 return vcpu_post_run_fault_in_sie(vcpu); 4619 } 4620 4621 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4622 static int __vcpu_run(struct kvm_vcpu *vcpu) 4623 { 4624 int rc, exit_reason; 4625 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4626 4627 /* 4628 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4629 * ning the guest), so that memslots (and other stuff) are protected 4630 */ 4631 kvm_vcpu_srcu_read_lock(vcpu); 4632 4633 do { 4634 rc = vcpu_pre_run(vcpu); 4635 if (rc) 4636 break; 4637 4638 kvm_vcpu_srcu_read_unlock(vcpu); 4639 /* 4640 * As PF_VCPU will be used in fault handler, between 4641 * guest_enter and guest_exit should be no uaccess. 4642 */ 4643 local_irq_disable(); 4644 guest_enter_irqoff(); 4645 __disable_cpu_timer_accounting(vcpu); 4646 local_irq_enable(); 4647 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4648 memcpy(sie_page->pv_grregs, 4649 vcpu->run->s.regs.gprs, 4650 sizeof(sie_page->pv_grregs)); 4651 } 4652 if (test_cpu_flag(CIF_FPU)) 4653 load_fpu_regs(); 4654 exit_reason = sie64a(vcpu->arch.sie_block, 4655 vcpu->run->s.regs.gprs); 4656 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4657 memcpy(vcpu->run->s.regs.gprs, 4658 sie_page->pv_grregs, 4659 sizeof(sie_page->pv_grregs)); 4660 /* 4661 * We're not allowed to inject interrupts on intercepts 4662 * that leave the guest state in an "in-between" state 4663 * where the next SIE entry will do a continuation. 4664 * Fence interrupts in our "internal" PSW. 4665 */ 4666 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4667 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4668 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4669 } 4670 } 4671 local_irq_disable(); 4672 __enable_cpu_timer_accounting(vcpu); 4673 guest_exit_irqoff(); 4674 local_irq_enable(); 4675 kvm_vcpu_srcu_read_lock(vcpu); 4676 4677 rc = vcpu_post_run(vcpu, exit_reason); 4678 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4679 4680 kvm_vcpu_srcu_read_unlock(vcpu); 4681 return rc; 4682 } 4683 4684 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4685 { 4686 struct kvm_run *kvm_run = vcpu->run; 4687 struct runtime_instr_cb *riccb; 4688 struct gs_cb *gscb; 4689 4690 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4691 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4692 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4693 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4694 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4695 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4696 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4697 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4698 } 4699 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4700 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4701 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4702 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4703 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4704 kvm_clear_async_pf_completion_queue(vcpu); 4705 } 4706 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4707 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4708 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4709 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4710 } 4711 /* 4712 * If userspace sets the riccb (e.g. after migration) to a valid state, 4713 * we should enable RI here instead of doing the lazy enablement. 4714 */ 4715 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4716 test_kvm_facility(vcpu->kvm, 64) && 4717 riccb->v && 4718 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4719 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4720 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4721 } 4722 /* 4723 * If userspace sets the gscb (e.g. after migration) to non-zero, 4724 * we should enable GS here instead of doing the lazy enablement. 4725 */ 4726 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4727 test_kvm_facility(vcpu->kvm, 133) && 4728 gscb->gssm && 4729 !vcpu->arch.gs_enabled) { 4730 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4731 vcpu->arch.sie_block->ecb |= ECB_GS; 4732 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4733 vcpu->arch.gs_enabled = 1; 4734 } 4735 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4736 test_kvm_facility(vcpu->kvm, 82)) { 4737 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4738 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4739 } 4740 if (MACHINE_HAS_GS) { 4741 preempt_disable(); 4742 __ctl_set_bit(2, 4); 4743 if (current->thread.gs_cb) { 4744 vcpu->arch.host_gscb = current->thread.gs_cb; 4745 save_gs_cb(vcpu->arch.host_gscb); 4746 } 4747 if (vcpu->arch.gs_enabled) { 4748 current->thread.gs_cb = (struct gs_cb *) 4749 &vcpu->run->s.regs.gscb; 4750 restore_gs_cb(current->thread.gs_cb); 4751 } 4752 preempt_enable(); 4753 } 4754 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4755 } 4756 4757 static void sync_regs(struct kvm_vcpu *vcpu) 4758 { 4759 struct kvm_run *kvm_run = vcpu->run; 4760 4761 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4762 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4763 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4764 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4765 /* some control register changes require a tlb flush */ 4766 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4767 } 4768 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4769 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4770 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4771 } 4772 save_access_regs(vcpu->arch.host_acrs); 4773 restore_access_regs(vcpu->run->s.regs.acrs); 4774 /* save host (userspace) fprs/vrs */ 4775 save_fpu_regs(); 4776 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4777 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4778 if (MACHINE_HAS_VX) 4779 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4780 else 4781 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4782 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4783 if (test_fp_ctl(current->thread.fpu.fpc)) 4784 /* User space provided an invalid FPC, let's clear it */ 4785 current->thread.fpu.fpc = 0; 4786 4787 /* Sync fmt2 only data */ 4788 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4789 sync_regs_fmt2(vcpu); 4790 } else { 4791 /* 4792 * In several places we have to modify our internal view to 4793 * not do things that are disallowed by the ultravisor. For 4794 * example we must not inject interrupts after specific exits 4795 * (e.g. 112 prefix page not secure). We do this by turning 4796 * off the machine check, external and I/O interrupt bits 4797 * of our PSW copy. To avoid getting validity intercepts, we 4798 * do only accept the condition code from userspace. 4799 */ 4800 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4801 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4802 PSW_MASK_CC; 4803 } 4804 4805 kvm_run->kvm_dirty_regs = 0; 4806 } 4807 4808 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4809 { 4810 struct kvm_run *kvm_run = vcpu->run; 4811 4812 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4813 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4814 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4815 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4816 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4817 if (MACHINE_HAS_GS) { 4818 preempt_disable(); 4819 __ctl_set_bit(2, 4); 4820 if (vcpu->arch.gs_enabled) 4821 save_gs_cb(current->thread.gs_cb); 4822 current->thread.gs_cb = vcpu->arch.host_gscb; 4823 restore_gs_cb(vcpu->arch.host_gscb); 4824 if (!vcpu->arch.host_gscb) 4825 __ctl_clear_bit(2, 4); 4826 vcpu->arch.host_gscb = NULL; 4827 preempt_enable(); 4828 } 4829 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4830 } 4831 4832 static void store_regs(struct kvm_vcpu *vcpu) 4833 { 4834 struct kvm_run *kvm_run = vcpu->run; 4835 4836 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4837 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4838 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4839 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4840 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4841 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4842 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4843 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4844 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4845 save_access_regs(vcpu->run->s.regs.acrs); 4846 restore_access_regs(vcpu->arch.host_acrs); 4847 /* Save guest register state */ 4848 save_fpu_regs(); 4849 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4850 /* Restore will be done lazily at return */ 4851 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4852 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4853 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4854 store_regs_fmt2(vcpu); 4855 } 4856 4857 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4858 { 4859 struct kvm_run *kvm_run = vcpu->run; 4860 int rc; 4861 4862 /* 4863 * Running a VM while dumping always has the potential to 4864 * produce inconsistent dump data. But for PV vcpus a SIE 4865 * entry while dumping could also lead to a fatal validity 4866 * intercept which we absolutely want to avoid. 4867 */ 4868 if (vcpu->kvm->arch.pv.dumping) 4869 return -EINVAL; 4870 4871 if (kvm_run->immediate_exit) 4872 return -EINTR; 4873 4874 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4875 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4876 return -EINVAL; 4877 4878 vcpu_load(vcpu); 4879 4880 if (guestdbg_exit_pending(vcpu)) { 4881 kvm_s390_prepare_debug_exit(vcpu); 4882 rc = 0; 4883 goto out; 4884 } 4885 4886 kvm_sigset_activate(vcpu); 4887 4888 /* 4889 * no need to check the return value of vcpu_start as it can only have 4890 * an error for protvirt, but protvirt means user cpu state 4891 */ 4892 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4893 kvm_s390_vcpu_start(vcpu); 4894 } else if (is_vcpu_stopped(vcpu)) { 4895 pr_err_ratelimited("can't run stopped vcpu %d\n", 4896 vcpu->vcpu_id); 4897 rc = -EINVAL; 4898 goto out; 4899 } 4900 4901 sync_regs(vcpu); 4902 enable_cpu_timer_accounting(vcpu); 4903 4904 might_fault(); 4905 rc = __vcpu_run(vcpu); 4906 4907 if (signal_pending(current) && !rc) { 4908 kvm_run->exit_reason = KVM_EXIT_INTR; 4909 rc = -EINTR; 4910 } 4911 4912 if (guestdbg_exit_pending(vcpu) && !rc) { 4913 kvm_s390_prepare_debug_exit(vcpu); 4914 rc = 0; 4915 } 4916 4917 if (rc == -EREMOTE) { 4918 /* userspace support is needed, kvm_run has been prepared */ 4919 rc = 0; 4920 } 4921 4922 disable_cpu_timer_accounting(vcpu); 4923 store_regs(vcpu); 4924 4925 kvm_sigset_deactivate(vcpu); 4926 4927 vcpu->stat.exit_userspace++; 4928 out: 4929 vcpu_put(vcpu); 4930 return rc; 4931 } 4932 4933 /* 4934 * store status at address 4935 * we use have two special cases: 4936 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4937 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4938 */ 4939 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4940 { 4941 unsigned char archmode = 1; 4942 freg_t fprs[NUM_FPRS]; 4943 unsigned int px; 4944 u64 clkcomp, cputm; 4945 int rc; 4946 4947 px = kvm_s390_get_prefix(vcpu); 4948 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4949 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4950 return -EFAULT; 4951 gpa = 0; 4952 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4953 if (write_guest_real(vcpu, 163, &archmode, 1)) 4954 return -EFAULT; 4955 gpa = px; 4956 } else 4957 gpa -= __LC_FPREGS_SAVE_AREA; 4958 4959 /* manually convert vector registers if necessary */ 4960 if (MACHINE_HAS_VX) { 4961 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4962 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4963 fprs, 128); 4964 } else { 4965 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4966 vcpu->run->s.regs.fprs, 128); 4967 } 4968 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4969 vcpu->run->s.regs.gprs, 128); 4970 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4971 &vcpu->arch.sie_block->gpsw, 16); 4972 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4973 &px, 4); 4974 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4975 &vcpu->run->s.regs.fpc, 4); 4976 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4977 &vcpu->arch.sie_block->todpr, 4); 4978 cputm = kvm_s390_get_cpu_timer(vcpu); 4979 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4980 &cputm, 8); 4981 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4982 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4983 &clkcomp, 8); 4984 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4985 &vcpu->run->s.regs.acrs, 64); 4986 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4987 &vcpu->arch.sie_block->gcr, 128); 4988 return rc ? -EFAULT : 0; 4989 } 4990 4991 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4992 { 4993 /* 4994 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4995 * switch in the run ioctl. Let's update our copies before we save 4996 * it into the save area 4997 */ 4998 save_fpu_regs(); 4999 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 5000 save_access_regs(vcpu->run->s.regs.acrs); 5001 5002 return kvm_s390_store_status_unloaded(vcpu, addr); 5003 } 5004 5005 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 5006 { 5007 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 5008 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 5009 } 5010 5011 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 5012 { 5013 unsigned long i; 5014 struct kvm_vcpu *vcpu; 5015 5016 kvm_for_each_vcpu(i, vcpu, kvm) { 5017 __disable_ibs_on_vcpu(vcpu); 5018 } 5019 } 5020 5021 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 5022 { 5023 if (!sclp.has_ibs) 5024 return; 5025 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 5026 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 5027 } 5028 5029 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 5030 { 5031 int i, online_vcpus, r = 0, started_vcpus = 0; 5032 5033 if (!is_vcpu_stopped(vcpu)) 5034 return 0; 5035 5036 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 5037 /* Only one cpu at a time may enter/leave the STOPPED state. */ 5038 spin_lock(&vcpu->kvm->arch.start_stop_lock); 5039 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 5040 5041 /* Let's tell the UV that we want to change into the operating state */ 5042 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5043 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 5044 if (r) { 5045 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5046 return r; 5047 } 5048 } 5049 5050 for (i = 0; i < online_vcpus; i++) { 5051 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 5052 started_vcpus++; 5053 } 5054 5055 if (started_vcpus == 0) { 5056 /* we're the only active VCPU -> speed it up */ 5057 __enable_ibs_on_vcpu(vcpu); 5058 } else if (started_vcpus == 1) { 5059 /* 5060 * As we are starting a second VCPU, we have to disable 5061 * the IBS facility on all VCPUs to remove potentially 5062 * outstanding ENABLE requests. 5063 */ 5064 __disable_ibs_on_all_vcpus(vcpu->kvm); 5065 } 5066 5067 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 5068 /* 5069 * The real PSW might have changed due to a RESTART interpreted by the 5070 * ultravisor. We block all interrupts and let the next sie exit 5071 * refresh our view. 5072 */ 5073 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5074 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 5075 /* 5076 * Another VCPU might have used IBS while we were offline. 5077 * Let's play safe and flush the VCPU at startup. 5078 */ 5079 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 5080 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5081 return 0; 5082 } 5083 5084 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 5085 { 5086 int i, online_vcpus, r = 0, started_vcpus = 0; 5087 struct kvm_vcpu *started_vcpu = NULL; 5088 5089 if (is_vcpu_stopped(vcpu)) 5090 return 0; 5091 5092 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 5093 /* Only one cpu at a time may enter/leave the STOPPED state. */ 5094 spin_lock(&vcpu->kvm->arch.start_stop_lock); 5095 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 5096 5097 /* Let's tell the UV that we want to change into the stopped state */ 5098 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5099 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 5100 if (r) { 5101 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5102 return r; 5103 } 5104 } 5105 5106 /* 5107 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 5108 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 5109 * have been fully processed. This will ensure that the VCPU 5110 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 5111 */ 5112 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 5113 kvm_s390_clear_stop_irq(vcpu); 5114 5115 __disable_ibs_on_vcpu(vcpu); 5116 5117 for (i = 0; i < online_vcpus; i++) { 5118 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 5119 5120 if (!is_vcpu_stopped(tmp)) { 5121 started_vcpus++; 5122 started_vcpu = tmp; 5123 } 5124 } 5125 5126 if (started_vcpus == 1) { 5127 /* 5128 * As we only have one VCPU left, we want to enable the 5129 * IBS facility for that VCPU to speed it up. 5130 */ 5131 __enable_ibs_on_vcpu(started_vcpu); 5132 } 5133 5134 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5135 return 0; 5136 } 5137 5138 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 5139 struct kvm_enable_cap *cap) 5140 { 5141 int r; 5142 5143 if (cap->flags) 5144 return -EINVAL; 5145 5146 switch (cap->cap) { 5147 case KVM_CAP_S390_CSS_SUPPORT: 5148 if (!vcpu->kvm->arch.css_support) { 5149 vcpu->kvm->arch.css_support = 1; 5150 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 5151 trace_kvm_s390_enable_css(vcpu->kvm); 5152 } 5153 r = 0; 5154 break; 5155 default: 5156 r = -EINVAL; 5157 break; 5158 } 5159 return r; 5160 } 5161 5162 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, 5163 struct kvm_s390_mem_op *mop) 5164 { 5165 void __user *uaddr = (void __user *)mop->buf; 5166 int r = 0; 5167 5168 if (mop->flags || !mop->size) 5169 return -EINVAL; 5170 if (mop->size + mop->sida_offset < mop->size) 5171 return -EINVAL; 5172 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 5173 return -E2BIG; 5174 if (!kvm_s390_pv_cpu_is_protected(vcpu)) 5175 return -EINVAL; 5176 5177 switch (mop->op) { 5178 case KVM_S390_MEMOP_SIDA_READ: 5179 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 5180 mop->sida_offset), mop->size)) 5181 r = -EFAULT; 5182 5183 break; 5184 case KVM_S390_MEMOP_SIDA_WRITE: 5185 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 5186 mop->sida_offset), uaddr, mop->size)) 5187 r = -EFAULT; 5188 break; 5189 } 5190 return r; 5191 } 5192 5193 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, 5194 struct kvm_s390_mem_op *mop) 5195 { 5196 void __user *uaddr = (void __user *)mop->buf; 5197 void *tmpbuf = NULL; 5198 int r = 0; 5199 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 5200 | KVM_S390_MEMOP_F_CHECK_ONLY 5201 | KVM_S390_MEMOP_F_SKEY_PROTECTION; 5202 5203 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 5204 return -EINVAL; 5205 if (mop->size > MEM_OP_MAX_SIZE) 5206 return -E2BIG; 5207 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5208 return -EINVAL; 5209 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 5210 if (access_key_invalid(mop->key)) 5211 return -EINVAL; 5212 } else { 5213 mop->key = 0; 5214 } 5215 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 5216 tmpbuf = vmalloc(mop->size); 5217 if (!tmpbuf) 5218 return -ENOMEM; 5219 } 5220 5221 switch (mop->op) { 5222 case KVM_S390_MEMOP_LOGICAL_READ: 5223 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 5224 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 5225 GACC_FETCH, mop->key); 5226 break; 5227 } 5228 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 5229 mop->size, mop->key); 5230 if (r == 0) { 5231 if (copy_to_user(uaddr, tmpbuf, mop->size)) 5232 r = -EFAULT; 5233 } 5234 break; 5235 case KVM_S390_MEMOP_LOGICAL_WRITE: 5236 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 5237 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 5238 GACC_STORE, mop->key); 5239 break; 5240 } 5241 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 5242 r = -EFAULT; 5243 break; 5244 } 5245 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 5246 mop->size, mop->key); 5247 break; 5248 } 5249 5250 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 5251 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 5252 5253 vfree(tmpbuf); 5254 return r; 5255 } 5256 5257 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, 5258 struct kvm_s390_mem_op *mop) 5259 { 5260 int r, srcu_idx; 5261 5262 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5263 5264 switch (mop->op) { 5265 case KVM_S390_MEMOP_LOGICAL_READ: 5266 case KVM_S390_MEMOP_LOGICAL_WRITE: 5267 r = kvm_s390_vcpu_mem_op(vcpu, mop); 5268 break; 5269 case KVM_S390_MEMOP_SIDA_READ: 5270 case KVM_S390_MEMOP_SIDA_WRITE: 5271 /* we are locked against sida going away by the vcpu->mutex */ 5272 r = kvm_s390_vcpu_sida_op(vcpu, mop); 5273 break; 5274 default: 5275 r = -EINVAL; 5276 } 5277 5278 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 5279 return r; 5280 } 5281 5282 long kvm_arch_vcpu_async_ioctl(struct file *filp, 5283 unsigned int ioctl, unsigned long arg) 5284 { 5285 struct kvm_vcpu *vcpu = filp->private_data; 5286 void __user *argp = (void __user *)arg; 5287 5288 switch (ioctl) { 5289 case KVM_S390_IRQ: { 5290 struct kvm_s390_irq s390irq; 5291 5292 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 5293 return -EFAULT; 5294 return kvm_s390_inject_vcpu(vcpu, &s390irq); 5295 } 5296 case KVM_S390_INTERRUPT: { 5297 struct kvm_s390_interrupt s390int; 5298 struct kvm_s390_irq s390irq = {}; 5299 5300 if (copy_from_user(&s390int, argp, sizeof(s390int))) 5301 return -EFAULT; 5302 if (s390int_to_s390irq(&s390int, &s390irq)) 5303 return -EINVAL; 5304 return kvm_s390_inject_vcpu(vcpu, &s390irq); 5305 } 5306 } 5307 return -ENOIOCTLCMD; 5308 } 5309 5310 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu, 5311 struct kvm_pv_cmd *cmd) 5312 { 5313 struct kvm_s390_pv_dmp dmp; 5314 void *data; 5315 int ret; 5316 5317 /* Dump initialization is a prerequisite */ 5318 if (!vcpu->kvm->arch.pv.dumping) 5319 return -EINVAL; 5320 5321 if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp))) 5322 return -EFAULT; 5323 5324 /* We only handle this subcmd right now */ 5325 if (dmp.subcmd != KVM_PV_DUMP_CPU) 5326 return -EINVAL; 5327 5328 /* CPU dump length is the same as create cpu storage donation. */ 5329 if (dmp.buff_len != uv_info.guest_cpu_stor_len) 5330 return -EINVAL; 5331 5332 data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL); 5333 if (!data) 5334 return -ENOMEM; 5335 5336 ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc); 5337 5338 VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x", 5339 vcpu->vcpu_id, cmd->rc, cmd->rrc); 5340 5341 if (ret) 5342 ret = -EINVAL; 5343 5344 /* On success copy over the dump data */ 5345 if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len)) 5346 ret = -EFAULT; 5347 5348 kvfree(data); 5349 return ret; 5350 } 5351 5352 long kvm_arch_vcpu_ioctl(struct file *filp, 5353 unsigned int ioctl, unsigned long arg) 5354 { 5355 struct kvm_vcpu *vcpu = filp->private_data; 5356 void __user *argp = (void __user *)arg; 5357 int idx; 5358 long r; 5359 u16 rc, rrc; 5360 5361 vcpu_load(vcpu); 5362 5363 switch (ioctl) { 5364 case KVM_S390_STORE_STATUS: 5365 idx = srcu_read_lock(&vcpu->kvm->srcu); 5366 r = kvm_s390_store_status_unloaded(vcpu, arg); 5367 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5368 break; 5369 case KVM_S390_SET_INITIAL_PSW: { 5370 psw_t psw; 5371 5372 r = -EFAULT; 5373 if (copy_from_user(&psw, argp, sizeof(psw))) 5374 break; 5375 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 5376 break; 5377 } 5378 case KVM_S390_CLEAR_RESET: 5379 r = 0; 5380 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 5381 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5382 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5383 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 5384 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 5385 rc, rrc); 5386 } 5387 break; 5388 case KVM_S390_INITIAL_RESET: 5389 r = 0; 5390 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 5391 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5392 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5393 UVC_CMD_CPU_RESET_INITIAL, 5394 &rc, &rrc); 5395 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 5396 rc, rrc); 5397 } 5398 break; 5399 case KVM_S390_NORMAL_RESET: 5400 r = 0; 5401 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 5402 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5403 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5404 UVC_CMD_CPU_RESET, &rc, &rrc); 5405 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 5406 rc, rrc); 5407 } 5408 break; 5409 case KVM_SET_ONE_REG: 5410 case KVM_GET_ONE_REG: { 5411 struct kvm_one_reg reg; 5412 r = -EINVAL; 5413 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5414 break; 5415 r = -EFAULT; 5416 if (copy_from_user(®, argp, sizeof(reg))) 5417 break; 5418 if (ioctl == KVM_SET_ONE_REG) 5419 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 5420 else 5421 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 5422 break; 5423 } 5424 #ifdef CONFIG_KVM_S390_UCONTROL 5425 case KVM_S390_UCAS_MAP: { 5426 struct kvm_s390_ucas_mapping ucasmap; 5427 5428 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5429 r = -EFAULT; 5430 break; 5431 } 5432 5433 if (!kvm_is_ucontrol(vcpu->kvm)) { 5434 r = -EINVAL; 5435 break; 5436 } 5437 5438 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 5439 ucasmap.vcpu_addr, ucasmap.length); 5440 break; 5441 } 5442 case KVM_S390_UCAS_UNMAP: { 5443 struct kvm_s390_ucas_mapping ucasmap; 5444 5445 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5446 r = -EFAULT; 5447 break; 5448 } 5449 5450 if (!kvm_is_ucontrol(vcpu->kvm)) { 5451 r = -EINVAL; 5452 break; 5453 } 5454 5455 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 5456 ucasmap.length); 5457 break; 5458 } 5459 #endif 5460 case KVM_S390_VCPU_FAULT: { 5461 r = gmap_fault(vcpu->arch.gmap, arg, 0); 5462 break; 5463 } 5464 case KVM_ENABLE_CAP: 5465 { 5466 struct kvm_enable_cap cap; 5467 r = -EFAULT; 5468 if (copy_from_user(&cap, argp, sizeof(cap))) 5469 break; 5470 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 5471 break; 5472 } 5473 case KVM_S390_MEM_OP: { 5474 struct kvm_s390_mem_op mem_op; 5475 5476 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 5477 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); 5478 else 5479 r = -EFAULT; 5480 break; 5481 } 5482 case KVM_S390_SET_IRQ_STATE: { 5483 struct kvm_s390_irq_state irq_state; 5484 5485 r = -EFAULT; 5486 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5487 break; 5488 if (irq_state.len > VCPU_IRQS_MAX_BUF || 5489 irq_state.len == 0 || 5490 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 5491 r = -EINVAL; 5492 break; 5493 } 5494 /* do not use irq_state.flags, it will break old QEMUs */ 5495 r = kvm_s390_set_irq_state(vcpu, 5496 (void __user *) irq_state.buf, 5497 irq_state.len); 5498 break; 5499 } 5500 case KVM_S390_GET_IRQ_STATE: { 5501 struct kvm_s390_irq_state irq_state; 5502 5503 r = -EFAULT; 5504 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5505 break; 5506 if (irq_state.len == 0) { 5507 r = -EINVAL; 5508 break; 5509 } 5510 /* do not use irq_state.flags, it will break old QEMUs */ 5511 r = kvm_s390_get_irq_state(vcpu, 5512 (__u8 __user *) irq_state.buf, 5513 irq_state.len); 5514 break; 5515 } 5516 case KVM_S390_PV_CPU_COMMAND: { 5517 struct kvm_pv_cmd cmd; 5518 5519 r = -EINVAL; 5520 if (!is_prot_virt_host()) 5521 break; 5522 5523 r = -EFAULT; 5524 if (copy_from_user(&cmd, argp, sizeof(cmd))) 5525 break; 5526 5527 r = -EINVAL; 5528 if (cmd.flags) 5529 break; 5530 5531 /* We only handle this cmd right now */ 5532 if (cmd.cmd != KVM_PV_DUMP) 5533 break; 5534 5535 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd); 5536 5537 /* Always copy over UV rc / rrc data */ 5538 if (copy_to_user((__u8 __user *)argp, &cmd.rc, 5539 sizeof(cmd.rc) + sizeof(cmd.rrc))) 5540 r = -EFAULT; 5541 break; 5542 } 5543 default: 5544 r = -ENOTTY; 5545 } 5546 5547 vcpu_put(vcpu); 5548 return r; 5549 } 5550 5551 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5552 { 5553 #ifdef CONFIG_KVM_S390_UCONTROL 5554 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5555 && (kvm_is_ucontrol(vcpu->kvm))) { 5556 vmf->page = virt_to_page(vcpu->arch.sie_block); 5557 get_page(vmf->page); 5558 return 0; 5559 } 5560 #endif 5561 return VM_FAULT_SIGBUS; 5562 } 5563 5564 /* Section: memory related */ 5565 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5566 const struct kvm_memory_slot *old, 5567 struct kvm_memory_slot *new, 5568 enum kvm_mr_change change) 5569 { 5570 gpa_t size; 5571 5572 /* When we are protected, we should not change the memory slots */ 5573 if (kvm_s390_pv_get_handle(kvm)) 5574 return -EINVAL; 5575 5576 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) 5577 return 0; 5578 5579 /* A few sanity checks. We can have memory slots which have to be 5580 located/ended at a segment boundary (1MB). The memory in userland is 5581 ok to be fragmented into various different vmas. It is okay to mmap() 5582 and munmap() stuff in this slot after doing this call at any time */ 5583 5584 if (new->userspace_addr & 0xffffful) 5585 return -EINVAL; 5586 5587 size = new->npages * PAGE_SIZE; 5588 if (size & 0xffffful) 5589 return -EINVAL; 5590 5591 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 5592 return -EINVAL; 5593 5594 return 0; 5595 } 5596 5597 void kvm_arch_commit_memory_region(struct kvm *kvm, 5598 struct kvm_memory_slot *old, 5599 const struct kvm_memory_slot *new, 5600 enum kvm_mr_change change) 5601 { 5602 int rc = 0; 5603 5604 switch (change) { 5605 case KVM_MR_DELETE: 5606 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5607 old->npages * PAGE_SIZE); 5608 break; 5609 case KVM_MR_MOVE: 5610 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5611 old->npages * PAGE_SIZE); 5612 if (rc) 5613 break; 5614 fallthrough; 5615 case KVM_MR_CREATE: 5616 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 5617 new->base_gfn * PAGE_SIZE, 5618 new->npages * PAGE_SIZE); 5619 break; 5620 case KVM_MR_FLAGS_ONLY: 5621 break; 5622 default: 5623 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5624 } 5625 if (rc) 5626 pr_warn("failed to commit memory region\n"); 5627 return; 5628 } 5629 5630 static inline unsigned long nonhyp_mask(int i) 5631 { 5632 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5633 5634 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5635 } 5636 5637 static int __init kvm_s390_init(void) 5638 { 5639 int i; 5640 5641 if (!sclp.has_sief2) { 5642 pr_info("SIE is not available\n"); 5643 return -ENODEV; 5644 } 5645 5646 if (nested && hpage) { 5647 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5648 return -EINVAL; 5649 } 5650 5651 for (i = 0; i < 16; i++) 5652 kvm_s390_fac_base[i] |= 5653 stfle_fac_list[i] & nonhyp_mask(i); 5654 5655 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5656 } 5657 5658 static void __exit kvm_s390_exit(void) 5659 { 5660 kvm_exit(); 5661 } 5662 5663 module_init(kvm_s390_init); 5664 module_exit(kvm_s390_exit); 5665 5666 /* 5667 * Enable autoloading of the kvm module. 5668 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5669 * since x86 takes a different approach. 5670 */ 5671 #include <linux/miscdevice.h> 5672 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5673 MODULE_ALIAS("devname:kvm"); 5674