1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com> 10 * Jason J. Herne <jjherne@us.ibm.com> 11 */ 12 13 #define KMSG_COMPONENT "kvm-s390" 14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 15 16 #include <linux/compiler.h> 17 #include <linux/err.h> 18 #include <linux/fs.h> 19 #include <linux/hrtimer.h> 20 #include <linux/init.h> 21 #include <linux/kvm.h> 22 #include <linux/kvm_host.h> 23 #include <linux/mman.h> 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/random.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/vmalloc.h> 30 #include <linux/bitmap.h> 31 #include <linux/sched/signal.h> 32 #include <linux/string.h> 33 #include <linux/pgtable.h> 34 35 #include <asm/asm-offsets.h> 36 #include <asm/lowcore.h> 37 #include <asm/stp.h> 38 #include <asm/gmap.h> 39 #include <asm/nmi.h> 40 #include <asm/switch_to.h> 41 #include <asm/isc.h> 42 #include <asm/sclp.h> 43 #include <asm/cpacf.h> 44 #include <asm/timex.h> 45 #include <asm/ap.h> 46 #include <asm/uv.h> 47 #include <asm/fpu/api.h> 48 #include "kvm-s390.h" 49 #include "gaccess.h" 50 51 #define CREATE_TRACE_POINTS 52 #include "trace.h" 53 #include "trace-s390.h" 54 55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 56 #define LOCAL_IRQS 32 57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 58 (KVM_MAX_VCPUS + LOCAL_IRQS)) 59 60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 61 KVM_GENERIC_VM_STATS(), 62 STATS_DESC_COUNTER(VM, inject_io), 63 STATS_DESC_COUNTER(VM, inject_float_mchk), 64 STATS_DESC_COUNTER(VM, inject_pfault_done), 65 STATS_DESC_COUNTER(VM, inject_service_signal), 66 STATS_DESC_COUNTER(VM, inject_virtio) 67 }; 68 69 const struct kvm_stats_header kvm_vm_stats_header = { 70 .name_size = KVM_STATS_NAME_SIZE, 71 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 72 .id_offset = sizeof(struct kvm_stats_header), 73 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 74 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 75 sizeof(kvm_vm_stats_desc), 76 }; 77 78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 79 KVM_GENERIC_VCPU_STATS(), 80 STATS_DESC_COUNTER(VCPU, exit_userspace), 81 STATS_DESC_COUNTER(VCPU, exit_null), 82 STATS_DESC_COUNTER(VCPU, exit_external_request), 83 STATS_DESC_COUNTER(VCPU, exit_io_request), 84 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 85 STATS_DESC_COUNTER(VCPU, exit_stop_request), 86 STATS_DESC_COUNTER(VCPU, exit_validity), 87 STATS_DESC_COUNTER(VCPU, exit_instruction), 88 STATS_DESC_COUNTER(VCPU, exit_pei), 89 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 90 STATS_DESC_COUNTER(VCPU, instruction_lctl), 91 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 92 STATS_DESC_COUNTER(VCPU, instruction_stctl), 93 STATS_DESC_COUNTER(VCPU, instruction_stctg), 94 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 95 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 96 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 97 STATS_DESC_COUNTER(VCPU, deliver_ckc), 98 STATS_DESC_COUNTER(VCPU, deliver_cputm), 99 STATS_DESC_COUNTER(VCPU, deliver_external_call), 100 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 101 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 102 STATS_DESC_COUNTER(VCPU, deliver_virtio), 103 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 104 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 105 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 106 STATS_DESC_COUNTER(VCPU, deliver_program), 107 STATS_DESC_COUNTER(VCPU, deliver_io), 108 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 109 STATS_DESC_COUNTER(VCPU, exit_wait_state), 110 STATS_DESC_COUNTER(VCPU, inject_ckc), 111 STATS_DESC_COUNTER(VCPU, inject_cputm), 112 STATS_DESC_COUNTER(VCPU, inject_external_call), 113 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 114 STATS_DESC_COUNTER(VCPU, inject_mchk), 115 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 116 STATS_DESC_COUNTER(VCPU, inject_program), 117 STATS_DESC_COUNTER(VCPU, inject_restart), 118 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 119 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 120 STATS_DESC_COUNTER(VCPU, instruction_epsw), 121 STATS_DESC_COUNTER(VCPU, instruction_gs), 122 STATS_DESC_COUNTER(VCPU, instruction_io_other), 123 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 124 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 125 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 126 STATS_DESC_COUNTER(VCPU, instruction_ptff), 127 STATS_DESC_COUNTER(VCPU, instruction_sck), 128 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 129 STATS_DESC_COUNTER(VCPU, instruction_stidp), 130 STATS_DESC_COUNTER(VCPU, instruction_spx), 131 STATS_DESC_COUNTER(VCPU, instruction_stpx), 132 STATS_DESC_COUNTER(VCPU, instruction_stap), 133 STATS_DESC_COUNTER(VCPU, instruction_iske), 134 STATS_DESC_COUNTER(VCPU, instruction_ri), 135 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 136 STATS_DESC_COUNTER(VCPU, instruction_sske), 137 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 138 STATS_DESC_COUNTER(VCPU, instruction_stsi), 139 STATS_DESC_COUNTER(VCPU, instruction_stfl), 140 STATS_DESC_COUNTER(VCPU, instruction_tb), 141 STATS_DESC_COUNTER(VCPU, instruction_tpi), 142 STATS_DESC_COUNTER(VCPU, instruction_tprot), 143 STATS_DESC_COUNTER(VCPU, instruction_tsch), 144 STATS_DESC_COUNTER(VCPU, instruction_sie), 145 STATS_DESC_COUNTER(VCPU, instruction_essa), 146 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 147 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 149 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 150 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 151 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 152 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 153 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 155 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 157 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 158 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 159 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 163 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 166 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 167 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 172 STATS_DESC_COUNTER(VCPU, pfault_sync) 173 }; 174 175 const struct kvm_stats_header kvm_vcpu_stats_header = { 176 .name_size = KVM_STATS_NAME_SIZE, 177 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 178 .id_offset = sizeof(struct kvm_stats_header), 179 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 180 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 181 sizeof(kvm_vcpu_stats_desc), 182 }; 183 184 /* allow nested virtualization in KVM (if enabled by user space) */ 185 static int nested; 186 module_param(nested, int, S_IRUGO); 187 MODULE_PARM_DESC(nested, "Nested virtualization support"); 188 189 /* allow 1m huge page guest backing, if !nested */ 190 static int hpage; 191 module_param(hpage, int, 0444); 192 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 193 194 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 195 static u8 halt_poll_max_steal = 10; 196 module_param(halt_poll_max_steal, byte, 0644); 197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 198 199 /* if set to true, the GISA will be initialized and used if available */ 200 static bool use_gisa = true; 201 module_param(use_gisa, bool, 0644); 202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 203 204 /* maximum diag9c forwarding per second */ 205 unsigned int diag9c_forwarding_hz; 206 module_param(diag9c_forwarding_hz, uint, 0644); 207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 208 209 /* 210 * For now we handle at most 16 double words as this is what the s390 base 211 * kernel handles and stores in the prefix page. If we ever need to go beyond 212 * this, this requires changes to code, but the external uapi can stay. 213 */ 214 #define SIZE_INTERNAL 16 215 216 /* 217 * Base feature mask that defines default mask for facilities. Consists of the 218 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 219 */ 220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 221 /* 222 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 223 * and defines the facilities that can be enabled via a cpu model. 224 */ 225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 226 227 static unsigned long kvm_s390_fac_size(void) 228 { 229 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 231 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 232 sizeof(stfle_fac_list)); 233 234 return SIZE_INTERNAL; 235 } 236 237 /* available cpu features supported by kvm */ 238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 239 /* available subfunctions indicated via query / "test bit" */ 240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 241 242 static struct gmap_notifier gmap_notifier; 243 static struct gmap_notifier vsie_gmap_notifier; 244 debug_info_t *kvm_s390_dbf; 245 debug_info_t *kvm_s390_dbf_uv; 246 247 /* Section: not file related */ 248 int kvm_arch_hardware_enable(void) 249 { 250 /* every s390 is virtualization enabled ;-) */ 251 return 0; 252 } 253 254 int kvm_arch_check_processor_compat(void *opaque) 255 { 256 return 0; 257 } 258 259 /* forward declarations */ 260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 261 unsigned long end); 262 static int sca_switch_to_extended(struct kvm *kvm); 263 264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 265 { 266 u8 delta_idx = 0; 267 268 /* 269 * The TOD jumps by delta, we have to compensate this by adding 270 * -delta to the epoch. 271 */ 272 delta = -delta; 273 274 /* sign-extension - we're adding to signed values below */ 275 if ((s64)delta < 0) 276 delta_idx = -1; 277 278 scb->epoch += delta; 279 if (scb->ecd & ECD_MEF) { 280 scb->epdx += delta_idx; 281 if (scb->epoch < delta) 282 scb->epdx += 1; 283 } 284 } 285 286 /* 287 * This callback is executed during stop_machine(). All CPUs are therefore 288 * temporarily stopped. In order not to change guest behavior, we have to 289 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 290 * so a CPU won't be stopped while calculating with the epoch. 291 */ 292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 293 void *v) 294 { 295 struct kvm *kvm; 296 struct kvm_vcpu *vcpu; 297 unsigned long i; 298 unsigned long long *delta = v; 299 300 list_for_each_entry(kvm, &vm_list, vm_list) { 301 kvm_for_each_vcpu(i, vcpu, kvm) { 302 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 303 if (i == 0) { 304 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 305 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 306 } 307 if (vcpu->arch.cputm_enabled) 308 vcpu->arch.cputm_start += *delta; 309 if (vcpu->arch.vsie_block) 310 kvm_clock_sync_scb(vcpu->arch.vsie_block, 311 *delta); 312 } 313 } 314 return NOTIFY_OK; 315 } 316 317 static struct notifier_block kvm_clock_notifier = { 318 .notifier_call = kvm_clock_sync, 319 }; 320 321 int kvm_arch_hardware_setup(void *opaque) 322 { 323 gmap_notifier.notifier_call = kvm_gmap_notifier; 324 gmap_register_pte_notifier(&gmap_notifier); 325 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 326 gmap_register_pte_notifier(&vsie_gmap_notifier); 327 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 328 &kvm_clock_notifier); 329 return 0; 330 } 331 332 void kvm_arch_hardware_unsetup(void) 333 { 334 gmap_unregister_pte_notifier(&gmap_notifier); 335 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 336 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 337 &kvm_clock_notifier); 338 } 339 340 static void allow_cpu_feat(unsigned long nr) 341 { 342 set_bit_inv(nr, kvm_s390_available_cpu_feat); 343 } 344 345 static inline int plo_test_bit(unsigned char nr) 346 { 347 unsigned long function = (unsigned long)nr | 0x100; 348 int cc; 349 350 asm volatile( 351 " lgr 0,%[function]\n" 352 /* Parameter registers are ignored for "test bit" */ 353 " plo 0,0,0,0(0)\n" 354 " ipm %0\n" 355 " srl %0,28\n" 356 : "=d" (cc) 357 : [function] "d" (function) 358 : "cc", "0"); 359 return cc == 0; 360 } 361 362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query) 363 { 364 asm volatile( 365 " lghi 0,0\n" 366 " lgr 1,%[query]\n" 367 /* Parameter registers are ignored */ 368 " .insn rrf,%[opc] << 16,2,4,6,0\n" 369 : 370 : [query] "d" ((unsigned long)query), [opc] "i" (opcode) 371 : "cc", "memory", "0", "1"); 372 } 373 374 #define INSN_SORTL 0xb938 375 #define INSN_DFLTCC 0xb939 376 377 static void kvm_s390_cpu_feat_init(void) 378 { 379 int i; 380 381 for (i = 0; i < 256; ++i) { 382 if (plo_test_bit(i)) 383 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 384 } 385 386 if (test_facility(28)) /* TOD-clock steering */ 387 ptff(kvm_s390_available_subfunc.ptff, 388 sizeof(kvm_s390_available_subfunc.ptff), 389 PTFF_QAF); 390 391 if (test_facility(17)) { /* MSA */ 392 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 393 kvm_s390_available_subfunc.kmac); 394 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 395 kvm_s390_available_subfunc.kmc); 396 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 397 kvm_s390_available_subfunc.km); 398 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 399 kvm_s390_available_subfunc.kimd); 400 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 401 kvm_s390_available_subfunc.klmd); 402 } 403 if (test_facility(76)) /* MSA3 */ 404 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 405 kvm_s390_available_subfunc.pckmo); 406 if (test_facility(77)) { /* MSA4 */ 407 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 408 kvm_s390_available_subfunc.kmctr); 409 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 410 kvm_s390_available_subfunc.kmf); 411 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 412 kvm_s390_available_subfunc.kmo); 413 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 414 kvm_s390_available_subfunc.pcc); 415 } 416 if (test_facility(57)) /* MSA5 */ 417 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 418 kvm_s390_available_subfunc.ppno); 419 420 if (test_facility(146)) /* MSA8 */ 421 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 422 kvm_s390_available_subfunc.kma); 423 424 if (test_facility(155)) /* MSA9 */ 425 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 426 kvm_s390_available_subfunc.kdsa); 427 428 if (test_facility(150)) /* SORTL */ 429 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); 430 431 if (test_facility(151)) /* DFLTCC */ 432 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); 433 434 if (MACHINE_HAS_ESOP) 435 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 436 /* 437 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 438 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 439 */ 440 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 441 !test_facility(3) || !nested) 442 return; 443 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 444 if (sclp.has_64bscao) 445 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 446 if (sclp.has_siif) 447 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 448 if (sclp.has_gpere) 449 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 450 if (sclp.has_gsls) 451 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 452 if (sclp.has_ib) 453 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 454 if (sclp.has_cei) 455 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 456 if (sclp.has_ibs) 457 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 458 if (sclp.has_kss) 459 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 460 /* 461 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 462 * all skey handling functions read/set the skey from the PGSTE 463 * instead of the real storage key. 464 * 465 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 466 * pages being detected as preserved although they are resident. 467 * 468 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 469 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 470 * 471 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 472 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 473 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 474 * 475 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 476 * cannot easily shadow the SCA because of the ipte lock. 477 */ 478 } 479 480 int kvm_arch_init(void *opaque) 481 { 482 int rc = -ENOMEM; 483 484 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 485 if (!kvm_s390_dbf) 486 return -ENOMEM; 487 488 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 489 if (!kvm_s390_dbf_uv) 490 goto out; 491 492 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 493 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 494 goto out; 495 496 kvm_s390_cpu_feat_init(); 497 498 /* Register floating interrupt controller interface. */ 499 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 500 if (rc) { 501 pr_err("A FLIC registration call failed with rc=%d\n", rc); 502 goto out; 503 } 504 505 rc = kvm_s390_gib_init(GAL_ISC); 506 if (rc) 507 goto out; 508 509 return 0; 510 511 out: 512 kvm_arch_exit(); 513 return rc; 514 } 515 516 void kvm_arch_exit(void) 517 { 518 kvm_s390_gib_destroy(); 519 debug_unregister(kvm_s390_dbf); 520 debug_unregister(kvm_s390_dbf_uv); 521 } 522 523 /* Section: device related */ 524 long kvm_arch_dev_ioctl(struct file *filp, 525 unsigned int ioctl, unsigned long arg) 526 { 527 if (ioctl == KVM_S390_ENABLE_SIE) 528 return s390_enable_sie(); 529 return -EINVAL; 530 } 531 532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 533 { 534 int r; 535 536 switch (ext) { 537 case KVM_CAP_S390_PSW: 538 case KVM_CAP_S390_GMAP: 539 case KVM_CAP_SYNC_MMU: 540 #ifdef CONFIG_KVM_S390_UCONTROL 541 case KVM_CAP_S390_UCONTROL: 542 #endif 543 case KVM_CAP_ASYNC_PF: 544 case KVM_CAP_SYNC_REGS: 545 case KVM_CAP_ONE_REG: 546 case KVM_CAP_ENABLE_CAP: 547 case KVM_CAP_S390_CSS_SUPPORT: 548 case KVM_CAP_IOEVENTFD: 549 case KVM_CAP_DEVICE_CTRL: 550 case KVM_CAP_S390_IRQCHIP: 551 case KVM_CAP_VM_ATTRIBUTES: 552 case KVM_CAP_MP_STATE: 553 case KVM_CAP_IMMEDIATE_EXIT: 554 case KVM_CAP_S390_INJECT_IRQ: 555 case KVM_CAP_S390_USER_SIGP: 556 case KVM_CAP_S390_USER_STSI: 557 case KVM_CAP_S390_SKEYS: 558 case KVM_CAP_S390_IRQ_STATE: 559 case KVM_CAP_S390_USER_INSTR0: 560 case KVM_CAP_S390_CMMA_MIGRATION: 561 case KVM_CAP_S390_AIS: 562 case KVM_CAP_S390_AIS_MIGRATION: 563 case KVM_CAP_S390_VCPU_RESETS: 564 case KVM_CAP_SET_GUEST_DEBUG: 565 case KVM_CAP_S390_DIAG318: 566 case KVM_CAP_S390_MEM_OP_EXTENSION: 567 r = 1; 568 break; 569 case KVM_CAP_SET_GUEST_DEBUG2: 570 r = KVM_GUESTDBG_VALID_MASK; 571 break; 572 case KVM_CAP_S390_HPAGE_1M: 573 r = 0; 574 if (hpage && !kvm_is_ucontrol(kvm)) 575 r = 1; 576 break; 577 case KVM_CAP_S390_MEM_OP: 578 r = MEM_OP_MAX_SIZE; 579 break; 580 case KVM_CAP_NR_VCPUS: 581 case KVM_CAP_MAX_VCPUS: 582 case KVM_CAP_MAX_VCPU_ID: 583 r = KVM_S390_BSCA_CPU_SLOTS; 584 if (!kvm_s390_use_sca_entries()) 585 r = KVM_MAX_VCPUS; 586 else if (sclp.has_esca && sclp.has_64bscao) 587 r = KVM_S390_ESCA_CPU_SLOTS; 588 if (ext == KVM_CAP_NR_VCPUS) 589 r = min_t(unsigned int, num_online_cpus(), r); 590 break; 591 case KVM_CAP_S390_COW: 592 r = MACHINE_HAS_ESOP; 593 break; 594 case KVM_CAP_S390_VECTOR_REGISTERS: 595 r = MACHINE_HAS_VX; 596 break; 597 case KVM_CAP_S390_RI: 598 r = test_facility(64); 599 break; 600 case KVM_CAP_S390_GS: 601 r = test_facility(133); 602 break; 603 case KVM_CAP_S390_BPB: 604 r = test_facility(82); 605 break; 606 case KVM_CAP_S390_PROTECTED: 607 r = is_prot_virt_host(); 608 break; 609 default: 610 r = 0; 611 } 612 return r; 613 } 614 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 616 { 617 int i; 618 gfn_t cur_gfn, last_gfn; 619 unsigned long gaddr, vmaddr; 620 struct gmap *gmap = kvm->arch.gmap; 621 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 622 623 /* Loop over all guest segments */ 624 cur_gfn = memslot->base_gfn; 625 last_gfn = memslot->base_gfn + memslot->npages; 626 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 627 gaddr = gfn_to_gpa(cur_gfn); 628 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 629 if (kvm_is_error_hva(vmaddr)) 630 continue; 631 632 bitmap_zero(bitmap, _PAGE_ENTRIES); 633 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 634 for (i = 0; i < _PAGE_ENTRIES; i++) { 635 if (test_bit(i, bitmap)) 636 mark_page_dirty(kvm, cur_gfn + i); 637 } 638 639 if (fatal_signal_pending(current)) 640 return; 641 cond_resched(); 642 } 643 } 644 645 /* Section: vm related */ 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 647 648 /* 649 * Get (and clear) the dirty memory log for a memory slot. 650 */ 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 652 struct kvm_dirty_log *log) 653 { 654 int r; 655 unsigned long n; 656 struct kvm_memory_slot *memslot; 657 int is_dirty; 658 659 if (kvm_is_ucontrol(kvm)) 660 return -EINVAL; 661 662 mutex_lock(&kvm->slots_lock); 663 664 r = -EINVAL; 665 if (log->slot >= KVM_USER_MEM_SLOTS) 666 goto out; 667 668 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 669 if (r) 670 goto out; 671 672 /* Clear the dirty log */ 673 if (is_dirty) { 674 n = kvm_dirty_bitmap_bytes(memslot); 675 memset(memslot->dirty_bitmap, 0, n); 676 } 677 r = 0; 678 out: 679 mutex_unlock(&kvm->slots_lock); 680 return r; 681 } 682 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 684 { 685 unsigned long i; 686 struct kvm_vcpu *vcpu; 687 688 kvm_for_each_vcpu(i, vcpu, kvm) { 689 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 690 } 691 } 692 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 694 { 695 int r; 696 697 if (cap->flags) 698 return -EINVAL; 699 700 switch (cap->cap) { 701 case KVM_CAP_S390_IRQCHIP: 702 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 703 kvm->arch.use_irqchip = 1; 704 r = 0; 705 break; 706 case KVM_CAP_S390_USER_SIGP: 707 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 708 kvm->arch.user_sigp = 1; 709 r = 0; 710 break; 711 case KVM_CAP_S390_VECTOR_REGISTERS: 712 mutex_lock(&kvm->lock); 713 if (kvm->created_vcpus) { 714 r = -EBUSY; 715 } else if (MACHINE_HAS_VX) { 716 set_kvm_facility(kvm->arch.model.fac_mask, 129); 717 set_kvm_facility(kvm->arch.model.fac_list, 129); 718 if (test_facility(134)) { 719 set_kvm_facility(kvm->arch.model.fac_mask, 134); 720 set_kvm_facility(kvm->arch.model.fac_list, 134); 721 } 722 if (test_facility(135)) { 723 set_kvm_facility(kvm->arch.model.fac_mask, 135); 724 set_kvm_facility(kvm->arch.model.fac_list, 135); 725 } 726 if (test_facility(148)) { 727 set_kvm_facility(kvm->arch.model.fac_mask, 148); 728 set_kvm_facility(kvm->arch.model.fac_list, 148); 729 } 730 if (test_facility(152)) { 731 set_kvm_facility(kvm->arch.model.fac_mask, 152); 732 set_kvm_facility(kvm->arch.model.fac_list, 152); 733 } 734 if (test_facility(192)) { 735 set_kvm_facility(kvm->arch.model.fac_mask, 192); 736 set_kvm_facility(kvm->arch.model.fac_list, 192); 737 } 738 r = 0; 739 } else 740 r = -EINVAL; 741 mutex_unlock(&kvm->lock); 742 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 743 r ? "(not available)" : "(success)"); 744 break; 745 case KVM_CAP_S390_RI: 746 r = -EINVAL; 747 mutex_lock(&kvm->lock); 748 if (kvm->created_vcpus) { 749 r = -EBUSY; 750 } else if (test_facility(64)) { 751 set_kvm_facility(kvm->arch.model.fac_mask, 64); 752 set_kvm_facility(kvm->arch.model.fac_list, 64); 753 r = 0; 754 } 755 mutex_unlock(&kvm->lock); 756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 757 r ? "(not available)" : "(success)"); 758 break; 759 case KVM_CAP_S390_AIS: 760 mutex_lock(&kvm->lock); 761 if (kvm->created_vcpus) { 762 r = -EBUSY; 763 } else { 764 set_kvm_facility(kvm->arch.model.fac_mask, 72); 765 set_kvm_facility(kvm->arch.model.fac_list, 72); 766 r = 0; 767 } 768 mutex_unlock(&kvm->lock); 769 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 770 r ? "(not available)" : "(success)"); 771 break; 772 case KVM_CAP_S390_GS: 773 r = -EINVAL; 774 mutex_lock(&kvm->lock); 775 if (kvm->created_vcpus) { 776 r = -EBUSY; 777 } else if (test_facility(133)) { 778 set_kvm_facility(kvm->arch.model.fac_mask, 133); 779 set_kvm_facility(kvm->arch.model.fac_list, 133); 780 r = 0; 781 } 782 mutex_unlock(&kvm->lock); 783 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 784 r ? "(not available)" : "(success)"); 785 break; 786 case KVM_CAP_S390_HPAGE_1M: 787 mutex_lock(&kvm->lock); 788 if (kvm->created_vcpus) 789 r = -EBUSY; 790 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 791 r = -EINVAL; 792 else { 793 r = 0; 794 mmap_write_lock(kvm->mm); 795 kvm->mm->context.allow_gmap_hpage_1m = 1; 796 mmap_write_unlock(kvm->mm); 797 /* 798 * We might have to create fake 4k page 799 * tables. To avoid that the hardware works on 800 * stale PGSTEs, we emulate these instructions. 801 */ 802 kvm->arch.use_skf = 0; 803 kvm->arch.use_pfmfi = 0; 804 } 805 mutex_unlock(&kvm->lock); 806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 807 r ? "(not available)" : "(success)"); 808 break; 809 case KVM_CAP_S390_USER_STSI: 810 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 811 kvm->arch.user_stsi = 1; 812 r = 0; 813 break; 814 case KVM_CAP_S390_USER_INSTR0: 815 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 816 kvm->arch.user_instr0 = 1; 817 icpt_operexc_on_all_vcpus(kvm); 818 r = 0; 819 break; 820 default: 821 r = -EINVAL; 822 break; 823 } 824 return r; 825 } 826 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 828 { 829 int ret; 830 831 switch (attr->attr) { 832 case KVM_S390_VM_MEM_LIMIT_SIZE: 833 ret = 0; 834 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 835 kvm->arch.mem_limit); 836 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 837 ret = -EFAULT; 838 break; 839 default: 840 ret = -ENXIO; 841 break; 842 } 843 return ret; 844 } 845 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 847 { 848 int ret; 849 unsigned int idx; 850 switch (attr->attr) { 851 case KVM_S390_VM_MEM_ENABLE_CMMA: 852 ret = -ENXIO; 853 if (!sclp.has_cmma) 854 break; 855 856 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 857 mutex_lock(&kvm->lock); 858 if (kvm->created_vcpus) 859 ret = -EBUSY; 860 else if (kvm->mm->context.allow_gmap_hpage_1m) 861 ret = -EINVAL; 862 else { 863 kvm->arch.use_cmma = 1; 864 /* Not compatible with cmma. */ 865 kvm->arch.use_pfmfi = 0; 866 ret = 0; 867 } 868 mutex_unlock(&kvm->lock); 869 break; 870 case KVM_S390_VM_MEM_CLR_CMMA: 871 ret = -ENXIO; 872 if (!sclp.has_cmma) 873 break; 874 ret = -EINVAL; 875 if (!kvm->arch.use_cmma) 876 break; 877 878 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 879 mutex_lock(&kvm->lock); 880 idx = srcu_read_lock(&kvm->srcu); 881 s390_reset_cmma(kvm->arch.gmap->mm); 882 srcu_read_unlock(&kvm->srcu, idx); 883 mutex_unlock(&kvm->lock); 884 ret = 0; 885 break; 886 case KVM_S390_VM_MEM_LIMIT_SIZE: { 887 unsigned long new_limit; 888 889 if (kvm_is_ucontrol(kvm)) 890 return -EINVAL; 891 892 if (get_user(new_limit, (u64 __user *)attr->addr)) 893 return -EFAULT; 894 895 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 896 new_limit > kvm->arch.mem_limit) 897 return -E2BIG; 898 899 if (!new_limit) 900 return -EINVAL; 901 902 /* gmap_create takes last usable address */ 903 if (new_limit != KVM_S390_NO_MEM_LIMIT) 904 new_limit -= 1; 905 906 ret = -EBUSY; 907 mutex_lock(&kvm->lock); 908 if (!kvm->created_vcpus) { 909 /* gmap_create will round the limit up */ 910 struct gmap *new = gmap_create(current->mm, new_limit); 911 912 if (!new) { 913 ret = -ENOMEM; 914 } else { 915 gmap_remove(kvm->arch.gmap); 916 new->private = kvm; 917 kvm->arch.gmap = new; 918 ret = 0; 919 } 920 } 921 mutex_unlock(&kvm->lock); 922 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 923 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 924 (void *) kvm->arch.gmap->asce); 925 break; 926 } 927 default: 928 ret = -ENXIO; 929 break; 930 } 931 return ret; 932 } 933 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 935 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 937 { 938 struct kvm_vcpu *vcpu; 939 unsigned long i; 940 941 kvm_s390_vcpu_block_all(kvm); 942 943 kvm_for_each_vcpu(i, vcpu, kvm) { 944 kvm_s390_vcpu_crypto_setup(vcpu); 945 /* recreate the shadow crycb by leaving the VSIE handler */ 946 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 947 } 948 949 kvm_s390_vcpu_unblock_all(kvm); 950 } 951 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 953 { 954 mutex_lock(&kvm->lock); 955 switch (attr->attr) { 956 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 957 if (!test_kvm_facility(kvm, 76)) { 958 mutex_unlock(&kvm->lock); 959 return -EINVAL; 960 } 961 get_random_bytes( 962 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 964 kvm->arch.crypto.aes_kw = 1; 965 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 966 break; 967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 968 if (!test_kvm_facility(kvm, 76)) { 969 mutex_unlock(&kvm->lock); 970 return -EINVAL; 971 } 972 get_random_bytes( 973 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 974 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 975 kvm->arch.crypto.dea_kw = 1; 976 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 977 break; 978 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 979 if (!test_kvm_facility(kvm, 76)) { 980 mutex_unlock(&kvm->lock); 981 return -EINVAL; 982 } 983 kvm->arch.crypto.aes_kw = 0; 984 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 985 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 986 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 987 break; 988 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 989 if (!test_kvm_facility(kvm, 76)) { 990 mutex_unlock(&kvm->lock); 991 return -EINVAL; 992 } 993 kvm->arch.crypto.dea_kw = 0; 994 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 995 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 996 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 997 break; 998 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 999 if (!ap_instructions_available()) { 1000 mutex_unlock(&kvm->lock); 1001 return -EOPNOTSUPP; 1002 } 1003 kvm->arch.crypto.apie = 1; 1004 break; 1005 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1006 if (!ap_instructions_available()) { 1007 mutex_unlock(&kvm->lock); 1008 return -EOPNOTSUPP; 1009 } 1010 kvm->arch.crypto.apie = 0; 1011 break; 1012 default: 1013 mutex_unlock(&kvm->lock); 1014 return -ENXIO; 1015 } 1016 1017 kvm_s390_vcpu_crypto_reset_all(kvm); 1018 mutex_unlock(&kvm->lock); 1019 return 0; 1020 } 1021 1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1023 { 1024 unsigned long cx; 1025 struct kvm_vcpu *vcpu; 1026 1027 kvm_for_each_vcpu(cx, vcpu, kvm) 1028 kvm_s390_sync_request(req, vcpu); 1029 } 1030 1031 /* 1032 * Must be called with kvm->srcu held to avoid races on memslots, and with 1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1034 */ 1035 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1036 { 1037 struct kvm_memory_slot *ms; 1038 struct kvm_memslots *slots; 1039 unsigned long ram_pages = 0; 1040 int bkt; 1041 1042 /* migration mode already enabled */ 1043 if (kvm->arch.migration_mode) 1044 return 0; 1045 slots = kvm_memslots(kvm); 1046 if (!slots || kvm_memslots_empty(slots)) 1047 return -EINVAL; 1048 1049 if (!kvm->arch.use_cmma) { 1050 kvm->arch.migration_mode = 1; 1051 return 0; 1052 } 1053 /* mark all the pages in active slots as dirty */ 1054 kvm_for_each_memslot(ms, bkt, slots) { 1055 if (!ms->dirty_bitmap) 1056 return -EINVAL; 1057 /* 1058 * The second half of the bitmap is only used on x86, 1059 * and would be wasted otherwise, so we put it to good 1060 * use here to keep track of the state of the storage 1061 * attributes. 1062 */ 1063 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1064 ram_pages += ms->npages; 1065 } 1066 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1067 kvm->arch.migration_mode = 1; 1068 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1069 return 0; 1070 } 1071 1072 /* 1073 * Must be called with kvm->slots_lock to avoid races with ourselves and 1074 * kvm_s390_vm_start_migration. 1075 */ 1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1077 { 1078 /* migration mode already disabled */ 1079 if (!kvm->arch.migration_mode) 1080 return 0; 1081 kvm->arch.migration_mode = 0; 1082 if (kvm->arch.use_cmma) 1083 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1084 return 0; 1085 } 1086 1087 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1088 struct kvm_device_attr *attr) 1089 { 1090 int res = -ENXIO; 1091 1092 mutex_lock(&kvm->slots_lock); 1093 switch (attr->attr) { 1094 case KVM_S390_VM_MIGRATION_START: 1095 res = kvm_s390_vm_start_migration(kvm); 1096 break; 1097 case KVM_S390_VM_MIGRATION_STOP: 1098 res = kvm_s390_vm_stop_migration(kvm); 1099 break; 1100 default: 1101 break; 1102 } 1103 mutex_unlock(&kvm->slots_lock); 1104 1105 return res; 1106 } 1107 1108 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1109 struct kvm_device_attr *attr) 1110 { 1111 u64 mig = kvm->arch.migration_mode; 1112 1113 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1114 return -ENXIO; 1115 1116 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1117 return -EFAULT; 1118 return 0; 1119 } 1120 1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1122 { 1123 struct kvm_s390_vm_tod_clock gtod; 1124 1125 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1126 return -EFAULT; 1127 1128 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1129 return -EINVAL; 1130 kvm_s390_set_tod_clock(kvm, >od); 1131 1132 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1133 gtod.epoch_idx, gtod.tod); 1134 1135 return 0; 1136 } 1137 1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1139 { 1140 u8 gtod_high; 1141 1142 if (copy_from_user(>od_high, (void __user *)attr->addr, 1143 sizeof(gtod_high))) 1144 return -EFAULT; 1145 1146 if (gtod_high != 0) 1147 return -EINVAL; 1148 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1149 1150 return 0; 1151 } 1152 1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1154 { 1155 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1156 1157 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1158 sizeof(gtod.tod))) 1159 return -EFAULT; 1160 1161 kvm_s390_set_tod_clock(kvm, >od); 1162 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1163 return 0; 1164 } 1165 1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1167 { 1168 int ret; 1169 1170 if (attr->flags) 1171 return -EINVAL; 1172 1173 switch (attr->attr) { 1174 case KVM_S390_VM_TOD_EXT: 1175 ret = kvm_s390_set_tod_ext(kvm, attr); 1176 break; 1177 case KVM_S390_VM_TOD_HIGH: 1178 ret = kvm_s390_set_tod_high(kvm, attr); 1179 break; 1180 case KVM_S390_VM_TOD_LOW: 1181 ret = kvm_s390_set_tod_low(kvm, attr); 1182 break; 1183 default: 1184 ret = -ENXIO; 1185 break; 1186 } 1187 return ret; 1188 } 1189 1190 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1191 struct kvm_s390_vm_tod_clock *gtod) 1192 { 1193 union tod_clock clk; 1194 1195 preempt_disable(); 1196 1197 store_tod_clock_ext(&clk); 1198 1199 gtod->tod = clk.tod + kvm->arch.epoch; 1200 gtod->epoch_idx = 0; 1201 if (test_kvm_facility(kvm, 139)) { 1202 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1203 if (gtod->tod < clk.tod) 1204 gtod->epoch_idx += 1; 1205 } 1206 1207 preempt_enable(); 1208 } 1209 1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1211 { 1212 struct kvm_s390_vm_tod_clock gtod; 1213 1214 memset(>od, 0, sizeof(gtod)); 1215 kvm_s390_get_tod_clock(kvm, >od); 1216 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1217 return -EFAULT; 1218 1219 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1220 gtod.epoch_idx, gtod.tod); 1221 return 0; 1222 } 1223 1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1225 { 1226 u8 gtod_high = 0; 1227 1228 if (copy_to_user((void __user *)attr->addr, >od_high, 1229 sizeof(gtod_high))) 1230 return -EFAULT; 1231 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1232 1233 return 0; 1234 } 1235 1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1237 { 1238 u64 gtod; 1239 1240 gtod = kvm_s390_get_tod_clock_fast(kvm); 1241 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1242 return -EFAULT; 1243 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1244 1245 return 0; 1246 } 1247 1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1249 { 1250 int ret; 1251 1252 if (attr->flags) 1253 return -EINVAL; 1254 1255 switch (attr->attr) { 1256 case KVM_S390_VM_TOD_EXT: 1257 ret = kvm_s390_get_tod_ext(kvm, attr); 1258 break; 1259 case KVM_S390_VM_TOD_HIGH: 1260 ret = kvm_s390_get_tod_high(kvm, attr); 1261 break; 1262 case KVM_S390_VM_TOD_LOW: 1263 ret = kvm_s390_get_tod_low(kvm, attr); 1264 break; 1265 default: 1266 ret = -ENXIO; 1267 break; 1268 } 1269 return ret; 1270 } 1271 1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1273 { 1274 struct kvm_s390_vm_cpu_processor *proc; 1275 u16 lowest_ibc, unblocked_ibc; 1276 int ret = 0; 1277 1278 mutex_lock(&kvm->lock); 1279 if (kvm->created_vcpus) { 1280 ret = -EBUSY; 1281 goto out; 1282 } 1283 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1284 if (!proc) { 1285 ret = -ENOMEM; 1286 goto out; 1287 } 1288 if (!copy_from_user(proc, (void __user *)attr->addr, 1289 sizeof(*proc))) { 1290 kvm->arch.model.cpuid = proc->cpuid; 1291 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1292 unblocked_ibc = sclp.ibc & 0xfff; 1293 if (lowest_ibc && proc->ibc) { 1294 if (proc->ibc > unblocked_ibc) 1295 kvm->arch.model.ibc = unblocked_ibc; 1296 else if (proc->ibc < lowest_ibc) 1297 kvm->arch.model.ibc = lowest_ibc; 1298 else 1299 kvm->arch.model.ibc = proc->ibc; 1300 } 1301 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1302 S390_ARCH_FAC_LIST_SIZE_BYTE); 1303 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1304 kvm->arch.model.ibc, 1305 kvm->arch.model.cpuid); 1306 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1307 kvm->arch.model.fac_list[0], 1308 kvm->arch.model.fac_list[1], 1309 kvm->arch.model.fac_list[2]); 1310 } else 1311 ret = -EFAULT; 1312 kfree(proc); 1313 out: 1314 mutex_unlock(&kvm->lock); 1315 return ret; 1316 } 1317 1318 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1319 struct kvm_device_attr *attr) 1320 { 1321 struct kvm_s390_vm_cpu_feat data; 1322 1323 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1324 return -EFAULT; 1325 if (!bitmap_subset((unsigned long *) data.feat, 1326 kvm_s390_available_cpu_feat, 1327 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1328 return -EINVAL; 1329 1330 mutex_lock(&kvm->lock); 1331 if (kvm->created_vcpus) { 1332 mutex_unlock(&kvm->lock); 1333 return -EBUSY; 1334 } 1335 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, 1336 KVM_S390_VM_CPU_FEAT_NR_BITS); 1337 mutex_unlock(&kvm->lock); 1338 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1339 data.feat[0], 1340 data.feat[1], 1341 data.feat[2]); 1342 return 0; 1343 } 1344 1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1346 struct kvm_device_attr *attr) 1347 { 1348 mutex_lock(&kvm->lock); 1349 if (kvm->created_vcpus) { 1350 mutex_unlock(&kvm->lock); 1351 return -EBUSY; 1352 } 1353 1354 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1355 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1356 mutex_unlock(&kvm->lock); 1357 return -EFAULT; 1358 } 1359 mutex_unlock(&kvm->lock); 1360 1361 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1366 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1368 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1369 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1372 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1375 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1377 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1378 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1380 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1381 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1383 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1384 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1386 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1387 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1389 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1390 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1393 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1396 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1398 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1399 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1401 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1402 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1404 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1405 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1407 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1408 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1413 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1418 1419 return 0; 1420 } 1421 1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1423 { 1424 int ret = -ENXIO; 1425 1426 switch (attr->attr) { 1427 case KVM_S390_VM_CPU_PROCESSOR: 1428 ret = kvm_s390_set_processor(kvm, attr); 1429 break; 1430 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1431 ret = kvm_s390_set_processor_feat(kvm, attr); 1432 break; 1433 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1434 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1435 break; 1436 } 1437 return ret; 1438 } 1439 1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1441 { 1442 struct kvm_s390_vm_cpu_processor *proc; 1443 int ret = 0; 1444 1445 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1446 if (!proc) { 1447 ret = -ENOMEM; 1448 goto out; 1449 } 1450 proc->cpuid = kvm->arch.model.cpuid; 1451 proc->ibc = kvm->arch.model.ibc; 1452 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1453 S390_ARCH_FAC_LIST_SIZE_BYTE); 1454 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1455 kvm->arch.model.ibc, 1456 kvm->arch.model.cpuid); 1457 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1458 kvm->arch.model.fac_list[0], 1459 kvm->arch.model.fac_list[1], 1460 kvm->arch.model.fac_list[2]); 1461 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1462 ret = -EFAULT; 1463 kfree(proc); 1464 out: 1465 return ret; 1466 } 1467 1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1469 { 1470 struct kvm_s390_vm_cpu_machine *mach; 1471 int ret = 0; 1472 1473 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1474 if (!mach) { 1475 ret = -ENOMEM; 1476 goto out; 1477 } 1478 get_cpu_id((struct cpuid *) &mach->cpuid); 1479 mach->ibc = sclp.ibc; 1480 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1481 S390_ARCH_FAC_LIST_SIZE_BYTE); 1482 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1483 sizeof(stfle_fac_list)); 1484 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1485 kvm->arch.model.ibc, 1486 kvm->arch.model.cpuid); 1487 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1488 mach->fac_mask[0], 1489 mach->fac_mask[1], 1490 mach->fac_mask[2]); 1491 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1492 mach->fac_list[0], 1493 mach->fac_list[1], 1494 mach->fac_list[2]); 1495 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1496 ret = -EFAULT; 1497 kfree(mach); 1498 out: 1499 return ret; 1500 } 1501 1502 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1503 struct kvm_device_attr *attr) 1504 { 1505 struct kvm_s390_vm_cpu_feat data; 1506 1507 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, 1508 KVM_S390_VM_CPU_FEAT_NR_BITS); 1509 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1510 return -EFAULT; 1511 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1512 data.feat[0], 1513 data.feat[1], 1514 data.feat[2]); 1515 return 0; 1516 } 1517 1518 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1519 struct kvm_device_attr *attr) 1520 { 1521 struct kvm_s390_vm_cpu_feat data; 1522 1523 bitmap_copy((unsigned long *) data.feat, 1524 kvm_s390_available_cpu_feat, 1525 KVM_S390_VM_CPU_FEAT_NR_BITS); 1526 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1527 return -EFAULT; 1528 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1529 data.feat[0], 1530 data.feat[1], 1531 data.feat[2]); 1532 return 0; 1533 } 1534 1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1536 struct kvm_device_attr *attr) 1537 { 1538 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1539 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1540 return -EFAULT; 1541 1542 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1543 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1547 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1548 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1549 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1550 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1553 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1556 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1557 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1558 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1559 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1560 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1561 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1562 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1563 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1564 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1565 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1566 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1567 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1568 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1570 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1571 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1574 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1577 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1578 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1579 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1580 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1581 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1582 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1583 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1584 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1585 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1586 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1587 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1588 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1589 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1590 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1594 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1595 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1599 1600 return 0; 1601 } 1602 1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1604 struct kvm_device_attr *attr) 1605 { 1606 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1607 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1608 return -EFAULT; 1609 1610 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1611 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1615 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1616 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1617 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1618 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1619 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1620 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1621 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1622 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1623 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1624 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1625 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1626 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1627 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1628 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1629 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1630 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1631 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1632 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1633 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1634 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1635 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1636 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1637 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1638 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1639 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1640 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1641 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1642 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1643 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1644 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1645 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1646 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1647 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1648 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1649 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1650 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1651 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1652 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1653 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1654 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1655 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1656 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1657 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1658 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1662 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1663 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1667 1668 return 0; 1669 } 1670 1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1672 { 1673 int ret = -ENXIO; 1674 1675 switch (attr->attr) { 1676 case KVM_S390_VM_CPU_PROCESSOR: 1677 ret = kvm_s390_get_processor(kvm, attr); 1678 break; 1679 case KVM_S390_VM_CPU_MACHINE: 1680 ret = kvm_s390_get_machine(kvm, attr); 1681 break; 1682 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1683 ret = kvm_s390_get_processor_feat(kvm, attr); 1684 break; 1685 case KVM_S390_VM_CPU_MACHINE_FEAT: 1686 ret = kvm_s390_get_machine_feat(kvm, attr); 1687 break; 1688 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1689 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1690 break; 1691 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1692 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1693 break; 1694 } 1695 return ret; 1696 } 1697 1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1699 { 1700 int ret; 1701 1702 switch (attr->group) { 1703 case KVM_S390_VM_MEM_CTRL: 1704 ret = kvm_s390_set_mem_control(kvm, attr); 1705 break; 1706 case KVM_S390_VM_TOD: 1707 ret = kvm_s390_set_tod(kvm, attr); 1708 break; 1709 case KVM_S390_VM_CPU_MODEL: 1710 ret = kvm_s390_set_cpu_model(kvm, attr); 1711 break; 1712 case KVM_S390_VM_CRYPTO: 1713 ret = kvm_s390_vm_set_crypto(kvm, attr); 1714 break; 1715 case KVM_S390_VM_MIGRATION: 1716 ret = kvm_s390_vm_set_migration(kvm, attr); 1717 break; 1718 default: 1719 ret = -ENXIO; 1720 break; 1721 } 1722 1723 return ret; 1724 } 1725 1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1727 { 1728 int ret; 1729 1730 switch (attr->group) { 1731 case KVM_S390_VM_MEM_CTRL: 1732 ret = kvm_s390_get_mem_control(kvm, attr); 1733 break; 1734 case KVM_S390_VM_TOD: 1735 ret = kvm_s390_get_tod(kvm, attr); 1736 break; 1737 case KVM_S390_VM_CPU_MODEL: 1738 ret = kvm_s390_get_cpu_model(kvm, attr); 1739 break; 1740 case KVM_S390_VM_MIGRATION: 1741 ret = kvm_s390_vm_get_migration(kvm, attr); 1742 break; 1743 default: 1744 ret = -ENXIO; 1745 break; 1746 } 1747 1748 return ret; 1749 } 1750 1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1752 { 1753 int ret; 1754 1755 switch (attr->group) { 1756 case KVM_S390_VM_MEM_CTRL: 1757 switch (attr->attr) { 1758 case KVM_S390_VM_MEM_ENABLE_CMMA: 1759 case KVM_S390_VM_MEM_CLR_CMMA: 1760 ret = sclp.has_cmma ? 0 : -ENXIO; 1761 break; 1762 case KVM_S390_VM_MEM_LIMIT_SIZE: 1763 ret = 0; 1764 break; 1765 default: 1766 ret = -ENXIO; 1767 break; 1768 } 1769 break; 1770 case KVM_S390_VM_TOD: 1771 switch (attr->attr) { 1772 case KVM_S390_VM_TOD_LOW: 1773 case KVM_S390_VM_TOD_HIGH: 1774 ret = 0; 1775 break; 1776 default: 1777 ret = -ENXIO; 1778 break; 1779 } 1780 break; 1781 case KVM_S390_VM_CPU_MODEL: 1782 switch (attr->attr) { 1783 case KVM_S390_VM_CPU_PROCESSOR: 1784 case KVM_S390_VM_CPU_MACHINE: 1785 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1786 case KVM_S390_VM_CPU_MACHINE_FEAT: 1787 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1789 ret = 0; 1790 break; 1791 default: 1792 ret = -ENXIO; 1793 break; 1794 } 1795 break; 1796 case KVM_S390_VM_CRYPTO: 1797 switch (attr->attr) { 1798 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1799 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1800 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1801 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1802 ret = 0; 1803 break; 1804 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1805 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1806 ret = ap_instructions_available() ? 0 : -ENXIO; 1807 break; 1808 default: 1809 ret = -ENXIO; 1810 break; 1811 } 1812 break; 1813 case KVM_S390_VM_MIGRATION: 1814 ret = 0; 1815 break; 1816 default: 1817 ret = -ENXIO; 1818 break; 1819 } 1820 1821 return ret; 1822 } 1823 1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1825 { 1826 uint8_t *keys; 1827 uint64_t hva; 1828 int srcu_idx, i, r = 0; 1829 1830 if (args->flags != 0) 1831 return -EINVAL; 1832 1833 /* Is this guest using storage keys? */ 1834 if (!mm_uses_skeys(current->mm)) 1835 return KVM_S390_GET_SKEYS_NONE; 1836 1837 /* Enforce sane limit on memory allocation */ 1838 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1839 return -EINVAL; 1840 1841 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1842 if (!keys) 1843 return -ENOMEM; 1844 1845 mmap_read_lock(current->mm); 1846 srcu_idx = srcu_read_lock(&kvm->srcu); 1847 for (i = 0; i < args->count; i++) { 1848 hva = gfn_to_hva(kvm, args->start_gfn + i); 1849 if (kvm_is_error_hva(hva)) { 1850 r = -EFAULT; 1851 break; 1852 } 1853 1854 r = get_guest_storage_key(current->mm, hva, &keys[i]); 1855 if (r) 1856 break; 1857 } 1858 srcu_read_unlock(&kvm->srcu, srcu_idx); 1859 mmap_read_unlock(current->mm); 1860 1861 if (!r) { 1862 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 1863 sizeof(uint8_t) * args->count); 1864 if (r) 1865 r = -EFAULT; 1866 } 1867 1868 kvfree(keys); 1869 return r; 1870 } 1871 1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 1873 { 1874 uint8_t *keys; 1875 uint64_t hva; 1876 int srcu_idx, i, r = 0; 1877 bool unlocked; 1878 1879 if (args->flags != 0) 1880 return -EINVAL; 1881 1882 /* Enforce sane limit on memory allocation */ 1883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 1884 return -EINVAL; 1885 1886 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 1887 if (!keys) 1888 return -ENOMEM; 1889 1890 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 1891 sizeof(uint8_t) * args->count); 1892 if (r) { 1893 r = -EFAULT; 1894 goto out; 1895 } 1896 1897 /* Enable storage key handling for the guest */ 1898 r = s390_enable_skey(); 1899 if (r) 1900 goto out; 1901 1902 i = 0; 1903 mmap_read_lock(current->mm); 1904 srcu_idx = srcu_read_lock(&kvm->srcu); 1905 while (i < args->count) { 1906 unlocked = false; 1907 hva = gfn_to_hva(kvm, args->start_gfn + i); 1908 if (kvm_is_error_hva(hva)) { 1909 r = -EFAULT; 1910 break; 1911 } 1912 1913 /* Lowest order bit is reserved */ 1914 if (keys[i] & 0x01) { 1915 r = -EINVAL; 1916 break; 1917 } 1918 1919 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 1920 if (r) { 1921 r = fixup_user_fault(current->mm, hva, 1922 FAULT_FLAG_WRITE, &unlocked); 1923 if (r) 1924 break; 1925 } 1926 if (!r) 1927 i++; 1928 } 1929 srcu_read_unlock(&kvm->srcu, srcu_idx); 1930 mmap_read_unlock(current->mm); 1931 out: 1932 kvfree(keys); 1933 return r; 1934 } 1935 1936 /* 1937 * Base address and length must be sent at the start of each block, therefore 1938 * it's cheaper to send some clean data, as long as it's less than the size of 1939 * two longs. 1940 */ 1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 1942 /* for consistency */ 1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 1944 1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1946 u8 *res, unsigned long bufsize) 1947 { 1948 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 1949 1950 args->count = 0; 1951 while (args->count < bufsize) { 1952 hva = gfn_to_hva(kvm, cur_gfn); 1953 /* 1954 * We return an error if the first value was invalid, but we 1955 * return successfully if at least one value was copied. 1956 */ 1957 if (kvm_is_error_hva(hva)) 1958 return args->count ? 0 : -EFAULT; 1959 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 1960 pgstev = 0; 1961 res[args->count++] = (pgstev >> 24) & 0x43; 1962 cur_gfn++; 1963 } 1964 1965 return 0; 1966 } 1967 1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 1969 gfn_t gfn) 1970 { 1971 return ____gfn_to_memslot(slots, gfn, true); 1972 } 1973 1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 1975 unsigned long cur_gfn) 1976 { 1977 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 1978 unsigned long ofs = cur_gfn - ms->base_gfn; 1979 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 1980 1981 if (ms->base_gfn + ms->npages <= cur_gfn) { 1982 mnode = rb_next(mnode); 1983 /* If we are above the highest slot, wrap around */ 1984 if (!mnode) 1985 mnode = rb_first(&slots->gfn_tree); 1986 1987 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1988 ofs = 0; 1989 } 1990 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 1991 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 1992 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 1993 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); 1994 } 1995 return ms->base_gfn + ofs; 1996 } 1997 1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 1999 u8 *res, unsigned long bufsize) 2000 { 2001 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2002 struct kvm_memslots *slots = kvm_memslots(kvm); 2003 struct kvm_memory_slot *ms; 2004 2005 if (unlikely(kvm_memslots_empty(slots))) 2006 return 0; 2007 2008 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2009 ms = gfn_to_memslot(kvm, cur_gfn); 2010 args->count = 0; 2011 args->start_gfn = cur_gfn; 2012 if (!ms) 2013 return 0; 2014 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2015 mem_end = kvm_s390_get_gfn_end(slots); 2016 2017 while (args->count < bufsize) { 2018 hva = gfn_to_hva(kvm, cur_gfn); 2019 if (kvm_is_error_hva(hva)) 2020 return 0; 2021 /* Decrement only if we actually flipped the bit to 0 */ 2022 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2023 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2024 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2025 pgstev = 0; 2026 /* Save the value */ 2027 res[args->count++] = (pgstev >> 24) & 0x43; 2028 /* If the next bit is too far away, stop. */ 2029 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2030 return 0; 2031 /* If we reached the previous "next", find the next one */ 2032 if (cur_gfn == next_gfn) 2033 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2034 /* Reached the end of memory or of the buffer, stop */ 2035 if ((next_gfn >= mem_end) || 2036 (next_gfn - args->start_gfn >= bufsize)) 2037 return 0; 2038 cur_gfn++; 2039 /* Reached the end of the current memslot, take the next one. */ 2040 if (cur_gfn - ms->base_gfn >= ms->npages) { 2041 ms = gfn_to_memslot(kvm, cur_gfn); 2042 if (!ms) 2043 return 0; 2044 } 2045 } 2046 return 0; 2047 } 2048 2049 /* 2050 * This function searches for the next page with dirty CMMA attributes, and 2051 * saves the attributes in the buffer up to either the end of the buffer or 2052 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2053 * no trailing clean bytes are saved. 2054 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2055 * output buffer will indicate 0 as length. 2056 */ 2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2058 struct kvm_s390_cmma_log *args) 2059 { 2060 unsigned long bufsize; 2061 int srcu_idx, peek, ret; 2062 u8 *values; 2063 2064 if (!kvm->arch.use_cmma) 2065 return -ENXIO; 2066 /* Invalid/unsupported flags were specified */ 2067 if (args->flags & ~KVM_S390_CMMA_PEEK) 2068 return -EINVAL; 2069 /* Migration mode query, and we are not doing a migration */ 2070 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2071 if (!peek && !kvm->arch.migration_mode) 2072 return -EINVAL; 2073 /* CMMA is disabled or was not used, or the buffer has length zero */ 2074 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2075 if (!bufsize || !kvm->mm->context.uses_cmm) { 2076 memset(args, 0, sizeof(*args)); 2077 return 0; 2078 } 2079 /* We are not peeking, and there are no dirty pages */ 2080 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2081 memset(args, 0, sizeof(*args)); 2082 return 0; 2083 } 2084 2085 values = vmalloc(bufsize); 2086 if (!values) 2087 return -ENOMEM; 2088 2089 mmap_read_lock(kvm->mm); 2090 srcu_idx = srcu_read_lock(&kvm->srcu); 2091 if (peek) 2092 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2093 else 2094 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2095 srcu_read_unlock(&kvm->srcu, srcu_idx); 2096 mmap_read_unlock(kvm->mm); 2097 2098 if (kvm->arch.migration_mode) 2099 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2100 else 2101 args->remaining = 0; 2102 2103 if (copy_to_user((void __user *)args->values, values, args->count)) 2104 ret = -EFAULT; 2105 2106 vfree(values); 2107 return ret; 2108 } 2109 2110 /* 2111 * This function sets the CMMA attributes for the given pages. If the input 2112 * buffer has zero length, no action is taken, otherwise the attributes are 2113 * set and the mm->context.uses_cmm flag is set. 2114 */ 2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2116 const struct kvm_s390_cmma_log *args) 2117 { 2118 unsigned long hva, mask, pgstev, i; 2119 uint8_t *bits; 2120 int srcu_idx, r = 0; 2121 2122 mask = args->mask; 2123 2124 if (!kvm->arch.use_cmma) 2125 return -ENXIO; 2126 /* invalid/unsupported flags */ 2127 if (args->flags != 0) 2128 return -EINVAL; 2129 /* Enforce sane limit on memory allocation */ 2130 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2131 return -EINVAL; 2132 /* Nothing to do */ 2133 if (args->count == 0) 2134 return 0; 2135 2136 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2137 if (!bits) 2138 return -ENOMEM; 2139 2140 r = copy_from_user(bits, (void __user *)args->values, args->count); 2141 if (r) { 2142 r = -EFAULT; 2143 goto out; 2144 } 2145 2146 mmap_read_lock(kvm->mm); 2147 srcu_idx = srcu_read_lock(&kvm->srcu); 2148 for (i = 0; i < args->count; i++) { 2149 hva = gfn_to_hva(kvm, args->start_gfn + i); 2150 if (kvm_is_error_hva(hva)) { 2151 r = -EFAULT; 2152 break; 2153 } 2154 2155 pgstev = bits[i]; 2156 pgstev = pgstev << 24; 2157 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2158 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2159 } 2160 srcu_read_unlock(&kvm->srcu, srcu_idx); 2161 mmap_read_unlock(kvm->mm); 2162 2163 if (!kvm->mm->context.uses_cmm) { 2164 mmap_write_lock(kvm->mm); 2165 kvm->mm->context.uses_cmm = 1; 2166 mmap_write_unlock(kvm->mm); 2167 } 2168 out: 2169 vfree(bits); 2170 return r; 2171 } 2172 2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) 2174 { 2175 struct kvm_vcpu *vcpu; 2176 u16 rc, rrc; 2177 int ret = 0; 2178 unsigned long i; 2179 2180 /* 2181 * We ignore failures and try to destroy as many CPUs as possible. 2182 * At the same time we must not free the assigned resources when 2183 * this fails, as the ultravisor has still access to that memory. 2184 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2185 * behind. 2186 * We want to return the first failure rc and rrc, though. 2187 */ 2188 kvm_for_each_vcpu(i, vcpu, kvm) { 2189 mutex_lock(&vcpu->mutex); 2190 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { 2191 *rcp = rc; 2192 *rrcp = rrc; 2193 ret = -EIO; 2194 } 2195 mutex_unlock(&vcpu->mutex); 2196 } 2197 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ 2198 if (use_gisa) 2199 kvm_s390_gisa_enable(kvm); 2200 return ret; 2201 } 2202 2203 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2204 { 2205 unsigned long i; 2206 int r = 0; 2207 u16 dummy; 2208 2209 struct kvm_vcpu *vcpu; 2210 2211 /* Disable the GISA if the ultravisor does not support AIV. */ 2212 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) 2213 kvm_s390_gisa_disable(kvm); 2214 2215 kvm_for_each_vcpu(i, vcpu, kvm) { 2216 mutex_lock(&vcpu->mutex); 2217 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2218 mutex_unlock(&vcpu->mutex); 2219 if (r) 2220 break; 2221 } 2222 if (r) 2223 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2224 return r; 2225 } 2226 2227 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2228 { 2229 int r = 0; 2230 u16 dummy; 2231 void __user *argp = (void __user *)cmd->data; 2232 2233 switch (cmd->cmd) { 2234 case KVM_PV_ENABLE: { 2235 r = -EINVAL; 2236 if (kvm_s390_pv_is_protected(kvm)) 2237 break; 2238 2239 /* 2240 * FMT 4 SIE needs esca. As we never switch back to bsca from 2241 * esca, we need no cleanup in the error cases below 2242 */ 2243 r = sca_switch_to_extended(kvm); 2244 if (r) 2245 break; 2246 2247 mmap_write_lock(current->mm); 2248 r = gmap_mark_unmergeable(); 2249 mmap_write_unlock(current->mm); 2250 if (r) 2251 break; 2252 2253 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2254 if (r) 2255 break; 2256 2257 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2258 if (r) 2259 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2260 2261 /* we need to block service interrupts from now on */ 2262 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2263 break; 2264 } 2265 case KVM_PV_DISABLE: { 2266 r = -EINVAL; 2267 if (!kvm_s390_pv_is_protected(kvm)) 2268 break; 2269 2270 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2271 /* 2272 * If a CPU could not be destroyed, destroy VM will also fail. 2273 * There is no point in trying to destroy it. Instead return 2274 * the rc and rrc from the first CPU that failed destroying. 2275 */ 2276 if (r) 2277 break; 2278 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); 2279 2280 /* no need to block service interrupts any more */ 2281 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2282 break; 2283 } 2284 case KVM_PV_SET_SEC_PARMS: { 2285 struct kvm_s390_pv_sec_parm parms = {}; 2286 void *hdr; 2287 2288 r = -EINVAL; 2289 if (!kvm_s390_pv_is_protected(kvm)) 2290 break; 2291 2292 r = -EFAULT; 2293 if (copy_from_user(&parms, argp, sizeof(parms))) 2294 break; 2295 2296 /* Currently restricted to 8KB */ 2297 r = -EINVAL; 2298 if (parms.length > PAGE_SIZE * 2) 2299 break; 2300 2301 r = -ENOMEM; 2302 hdr = vmalloc(parms.length); 2303 if (!hdr) 2304 break; 2305 2306 r = -EFAULT; 2307 if (!copy_from_user(hdr, (void __user *)parms.origin, 2308 parms.length)) 2309 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2310 &cmd->rc, &cmd->rrc); 2311 2312 vfree(hdr); 2313 break; 2314 } 2315 case KVM_PV_UNPACK: { 2316 struct kvm_s390_pv_unp unp = {}; 2317 2318 r = -EINVAL; 2319 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2320 break; 2321 2322 r = -EFAULT; 2323 if (copy_from_user(&unp, argp, sizeof(unp))) 2324 break; 2325 2326 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2327 &cmd->rc, &cmd->rrc); 2328 break; 2329 } 2330 case KVM_PV_VERIFY: { 2331 r = -EINVAL; 2332 if (!kvm_s390_pv_is_protected(kvm)) 2333 break; 2334 2335 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2336 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2337 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2338 cmd->rrc); 2339 break; 2340 } 2341 case KVM_PV_PREP_RESET: { 2342 r = -EINVAL; 2343 if (!kvm_s390_pv_is_protected(kvm)) 2344 break; 2345 2346 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2347 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2348 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2349 cmd->rc, cmd->rrc); 2350 break; 2351 } 2352 case KVM_PV_UNSHARE_ALL: { 2353 r = -EINVAL; 2354 if (!kvm_s390_pv_is_protected(kvm)) 2355 break; 2356 2357 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2358 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2359 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2360 cmd->rc, cmd->rrc); 2361 break; 2362 } 2363 default: 2364 r = -ENOTTY; 2365 } 2366 return r; 2367 } 2368 2369 static bool access_key_invalid(u8 access_key) 2370 { 2371 return access_key > 0xf; 2372 } 2373 2374 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2375 { 2376 void __user *uaddr = (void __user *)mop->buf; 2377 u64 supported_flags; 2378 void *tmpbuf = NULL; 2379 int r, srcu_idx; 2380 2381 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION 2382 | KVM_S390_MEMOP_F_CHECK_ONLY; 2383 if (mop->flags & ~supported_flags || !mop->size) 2384 return -EINVAL; 2385 if (mop->size > MEM_OP_MAX_SIZE) 2386 return -E2BIG; 2387 if (kvm_s390_pv_is_protected(kvm)) 2388 return -EINVAL; 2389 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 2390 if (access_key_invalid(mop->key)) 2391 return -EINVAL; 2392 } else { 2393 mop->key = 0; 2394 } 2395 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 2396 tmpbuf = vmalloc(mop->size); 2397 if (!tmpbuf) 2398 return -ENOMEM; 2399 } 2400 2401 srcu_idx = srcu_read_lock(&kvm->srcu); 2402 2403 if (kvm_is_error_gpa(kvm, mop->gaddr)) { 2404 r = PGM_ADDRESSING; 2405 goto out_unlock; 2406 } 2407 2408 switch (mop->op) { 2409 case KVM_S390_MEMOP_ABSOLUTE_READ: { 2410 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2411 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); 2412 } else { 2413 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2414 mop->size, GACC_FETCH, mop->key); 2415 if (r == 0) { 2416 if (copy_to_user(uaddr, tmpbuf, mop->size)) 2417 r = -EFAULT; 2418 } 2419 } 2420 break; 2421 } 2422 case KVM_S390_MEMOP_ABSOLUTE_WRITE: { 2423 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2424 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); 2425 } else { 2426 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2427 r = -EFAULT; 2428 break; 2429 } 2430 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2431 mop->size, GACC_STORE, mop->key); 2432 } 2433 break; 2434 } 2435 default: 2436 r = -EINVAL; 2437 } 2438 2439 out_unlock: 2440 srcu_read_unlock(&kvm->srcu, srcu_idx); 2441 2442 vfree(tmpbuf); 2443 return r; 2444 } 2445 2446 long kvm_arch_vm_ioctl(struct file *filp, 2447 unsigned int ioctl, unsigned long arg) 2448 { 2449 struct kvm *kvm = filp->private_data; 2450 void __user *argp = (void __user *)arg; 2451 struct kvm_device_attr attr; 2452 int r; 2453 2454 switch (ioctl) { 2455 case KVM_S390_INTERRUPT: { 2456 struct kvm_s390_interrupt s390int; 2457 2458 r = -EFAULT; 2459 if (copy_from_user(&s390int, argp, sizeof(s390int))) 2460 break; 2461 r = kvm_s390_inject_vm(kvm, &s390int); 2462 break; 2463 } 2464 case KVM_CREATE_IRQCHIP: { 2465 struct kvm_irq_routing_entry routing; 2466 2467 r = -EINVAL; 2468 if (kvm->arch.use_irqchip) { 2469 /* Set up dummy routing. */ 2470 memset(&routing, 0, sizeof(routing)); 2471 r = kvm_set_irq_routing(kvm, &routing, 0, 0); 2472 } 2473 break; 2474 } 2475 case KVM_SET_DEVICE_ATTR: { 2476 r = -EFAULT; 2477 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2478 break; 2479 r = kvm_s390_vm_set_attr(kvm, &attr); 2480 break; 2481 } 2482 case KVM_GET_DEVICE_ATTR: { 2483 r = -EFAULT; 2484 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2485 break; 2486 r = kvm_s390_vm_get_attr(kvm, &attr); 2487 break; 2488 } 2489 case KVM_HAS_DEVICE_ATTR: { 2490 r = -EFAULT; 2491 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 2492 break; 2493 r = kvm_s390_vm_has_attr(kvm, &attr); 2494 break; 2495 } 2496 case KVM_S390_GET_SKEYS: { 2497 struct kvm_s390_skeys args; 2498 2499 r = -EFAULT; 2500 if (copy_from_user(&args, argp, 2501 sizeof(struct kvm_s390_skeys))) 2502 break; 2503 r = kvm_s390_get_skeys(kvm, &args); 2504 break; 2505 } 2506 case KVM_S390_SET_SKEYS: { 2507 struct kvm_s390_skeys args; 2508 2509 r = -EFAULT; 2510 if (copy_from_user(&args, argp, 2511 sizeof(struct kvm_s390_skeys))) 2512 break; 2513 r = kvm_s390_set_skeys(kvm, &args); 2514 break; 2515 } 2516 case KVM_S390_GET_CMMA_BITS: { 2517 struct kvm_s390_cmma_log args; 2518 2519 r = -EFAULT; 2520 if (copy_from_user(&args, argp, sizeof(args))) 2521 break; 2522 mutex_lock(&kvm->slots_lock); 2523 r = kvm_s390_get_cmma_bits(kvm, &args); 2524 mutex_unlock(&kvm->slots_lock); 2525 if (!r) { 2526 r = copy_to_user(argp, &args, sizeof(args)); 2527 if (r) 2528 r = -EFAULT; 2529 } 2530 break; 2531 } 2532 case KVM_S390_SET_CMMA_BITS: { 2533 struct kvm_s390_cmma_log args; 2534 2535 r = -EFAULT; 2536 if (copy_from_user(&args, argp, sizeof(args))) 2537 break; 2538 mutex_lock(&kvm->slots_lock); 2539 r = kvm_s390_set_cmma_bits(kvm, &args); 2540 mutex_unlock(&kvm->slots_lock); 2541 break; 2542 } 2543 case KVM_S390_PV_COMMAND: { 2544 struct kvm_pv_cmd args; 2545 2546 /* protvirt means user cpu state */ 2547 kvm_s390_set_user_cpu_state_ctrl(kvm); 2548 r = 0; 2549 if (!is_prot_virt_host()) { 2550 r = -EINVAL; 2551 break; 2552 } 2553 if (copy_from_user(&args, argp, sizeof(args))) { 2554 r = -EFAULT; 2555 break; 2556 } 2557 if (args.flags) { 2558 r = -EINVAL; 2559 break; 2560 } 2561 mutex_lock(&kvm->lock); 2562 r = kvm_s390_handle_pv(kvm, &args); 2563 mutex_unlock(&kvm->lock); 2564 if (copy_to_user(argp, &args, sizeof(args))) { 2565 r = -EFAULT; 2566 break; 2567 } 2568 break; 2569 } 2570 case KVM_S390_MEM_OP: { 2571 struct kvm_s390_mem_op mem_op; 2572 2573 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 2574 r = kvm_s390_vm_mem_op(kvm, &mem_op); 2575 else 2576 r = -EFAULT; 2577 break; 2578 } 2579 default: 2580 r = -ENOTTY; 2581 } 2582 2583 return r; 2584 } 2585 2586 static int kvm_s390_apxa_installed(void) 2587 { 2588 struct ap_config_info info; 2589 2590 if (ap_instructions_available()) { 2591 if (ap_qci(&info) == 0) 2592 return info.apxa; 2593 } 2594 2595 return 0; 2596 } 2597 2598 /* 2599 * The format of the crypto control block (CRYCB) is specified in the 3 low 2600 * order bits of the CRYCB designation (CRYCBD) field as follows: 2601 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 2602 * AP extended addressing (APXA) facility are installed. 2603 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 2604 * Format 2: Both the APXA and MSAX3 facilities are installed 2605 */ 2606 static void kvm_s390_set_crycb_format(struct kvm *kvm) 2607 { 2608 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; 2609 2610 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 2611 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 2612 2613 /* Check whether MSAX3 is installed */ 2614 if (!test_kvm_facility(kvm, 76)) 2615 return; 2616 2617 if (kvm_s390_apxa_installed()) 2618 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 2619 else 2620 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 2621 } 2622 2623 /* 2624 * kvm_arch_crypto_set_masks 2625 * 2626 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2627 * to be set. 2628 * @apm: the mask identifying the accessible AP adapters 2629 * @aqm: the mask identifying the accessible AP domains 2630 * @adm: the mask identifying the accessible AP control domains 2631 * 2632 * Set the masks that identify the adapters, domains and control domains to 2633 * which the KVM guest is granted access. 2634 * 2635 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2636 * function. 2637 */ 2638 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 2639 unsigned long *aqm, unsigned long *adm) 2640 { 2641 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 2642 2643 kvm_s390_vcpu_block_all(kvm); 2644 2645 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 2646 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 2647 memcpy(crycb->apcb1.apm, apm, 32); 2648 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 2649 apm[0], apm[1], apm[2], apm[3]); 2650 memcpy(crycb->apcb1.aqm, aqm, 32); 2651 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 2652 aqm[0], aqm[1], aqm[2], aqm[3]); 2653 memcpy(crycb->apcb1.adm, adm, 32); 2654 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 2655 adm[0], adm[1], adm[2], adm[3]); 2656 break; 2657 case CRYCB_FORMAT1: 2658 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 2659 memcpy(crycb->apcb0.apm, apm, 8); 2660 memcpy(crycb->apcb0.aqm, aqm, 2); 2661 memcpy(crycb->apcb0.adm, adm, 2); 2662 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 2663 apm[0], *((unsigned short *)aqm), 2664 *((unsigned short *)adm)); 2665 break; 2666 default: /* Can not happen */ 2667 break; 2668 } 2669 2670 /* recreate the shadow crycb for each vcpu */ 2671 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2672 kvm_s390_vcpu_unblock_all(kvm); 2673 } 2674 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 2675 2676 /* 2677 * kvm_arch_crypto_clear_masks 2678 * 2679 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 2680 * to be cleared. 2681 * 2682 * Clear the masks that identify the adapters, domains and control domains to 2683 * which the KVM guest is granted access. 2684 * 2685 * Note: The kvm->lock mutex must be locked by the caller before invoking this 2686 * function. 2687 */ 2688 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 2689 { 2690 kvm_s390_vcpu_block_all(kvm); 2691 2692 memset(&kvm->arch.crypto.crycb->apcb0, 0, 2693 sizeof(kvm->arch.crypto.crycb->apcb0)); 2694 memset(&kvm->arch.crypto.crycb->apcb1, 0, 2695 sizeof(kvm->arch.crypto.crycb->apcb1)); 2696 2697 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 2698 /* recreate the shadow crycb for each vcpu */ 2699 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 2700 kvm_s390_vcpu_unblock_all(kvm); 2701 } 2702 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 2703 2704 static u64 kvm_s390_get_initial_cpuid(void) 2705 { 2706 struct cpuid cpuid; 2707 2708 get_cpu_id(&cpuid); 2709 cpuid.version = 0xff; 2710 return *((u64 *) &cpuid); 2711 } 2712 2713 static void kvm_s390_crypto_init(struct kvm *kvm) 2714 { 2715 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 2716 kvm_s390_set_crycb_format(kvm); 2717 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 2718 2719 if (!test_kvm_facility(kvm, 76)) 2720 return; 2721 2722 /* Enable AES/DEA protected key functions by default */ 2723 kvm->arch.crypto.aes_kw = 1; 2724 kvm->arch.crypto.dea_kw = 1; 2725 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 2726 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 2727 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 2728 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 2729 } 2730 2731 static void sca_dispose(struct kvm *kvm) 2732 { 2733 if (kvm->arch.use_esca) 2734 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 2735 else 2736 free_page((unsigned long)(kvm->arch.sca)); 2737 kvm->arch.sca = NULL; 2738 } 2739 2740 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 2741 { 2742 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 2743 int i, rc; 2744 char debug_name[16]; 2745 static unsigned long sca_offset; 2746 2747 rc = -EINVAL; 2748 #ifdef CONFIG_KVM_S390_UCONTROL 2749 if (type & ~KVM_VM_S390_UCONTROL) 2750 goto out_err; 2751 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 2752 goto out_err; 2753 #else 2754 if (type) 2755 goto out_err; 2756 #endif 2757 2758 rc = s390_enable_sie(); 2759 if (rc) 2760 goto out_err; 2761 2762 rc = -ENOMEM; 2763 2764 if (!sclp.has_64bscao) 2765 alloc_flags |= GFP_DMA; 2766 rwlock_init(&kvm->arch.sca_lock); 2767 /* start with basic SCA */ 2768 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 2769 if (!kvm->arch.sca) 2770 goto out_err; 2771 mutex_lock(&kvm_lock); 2772 sca_offset += 16; 2773 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 2774 sca_offset = 0; 2775 kvm->arch.sca = (struct bsca_block *) 2776 ((char *) kvm->arch.sca + sca_offset); 2777 mutex_unlock(&kvm_lock); 2778 2779 sprintf(debug_name, "kvm-%u", current->pid); 2780 2781 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 2782 if (!kvm->arch.dbf) 2783 goto out_err; 2784 2785 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 2786 kvm->arch.sie_page2 = 2787 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 2788 if (!kvm->arch.sie_page2) 2789 goto out_err; 2790 2791 kvm->arch.sie_page2->kvm = kvm; 2792 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 2793 2794 for (i = 0; i < kvm_s390_fac_size(); i++) { 2795 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 2796 (kvm_s390_fac_base[i] | 2797 kvm_s390_fac_ext[i]); 2798 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 2799 kvm_s390_fac_base[i]; 2800 } 2801 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 2802 2803 /* we are always in czam mode - even on pre z14 machines */ 2804 set_kvm_facility(kvm->arch.model.fac_mask, 138); 2805 set_kvm_facility(kvm->arch.model.fac_list, 138); 2806 /* we emulate STHYI in kvm */ 2807 set_kvm_facility(kvm->arch.model.fac_mask, 74); 2808 set_kvm_facility(kvm->arch.model.fac_list, 74); 2809 if (MACHINE_HAS_TLB_GUEST) { 2810 set_kvm_facility(kvm->arch.model.fac_mask, 147); 2811 set_kvm_facility(kvm->arch.model.fac_list, 147); 2812 } 2813 2814 if (css_general_characteristics.aiv && test_facility(65)) 2815 set_kvm_facility(kvm->arch.model.fac_mask, 65); 2816 2817 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 2818 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 2819 2820 kvm_s390_crypto_init(kvm); 2821 2822 mutex_init(&kvm->arch.float_int.ais_lock); 2823 spin_lock_init(&kvm->arch.float_int.lock); 2824 for (i = 0; i < FIRQ_LIST_COUNT; i++) 2825 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 2826 init_waitqueue_head(&kvm->arch.ipte_wq); 2827 mutex_init(&kvm->arch.ipte_mutex); 2828 2829 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 2830 VM_EVENT(kvm, 3, "vm created with type %lu", type); 2831 2832 if (type & KVM_VM_S390_UCONTROL) { 2833 kvm->arch.gmap = NULL; 2834 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 2835 } else { 2836 if (sclp.hamax == U64_MAX) 2837 kvm->arch.mem_limit = TASK_SIZE_MAX; 2838 else 2839 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 2840 sclp.hamax + 1); 2841 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 2842 if (!kvm->arch.gmap) 2843 goto out_err; 2844 kvm->arch.gmap->private = kvm; 2845 kvm->arch.gmap->pfault_enabled = 0; 2846 } 2847 2848 kvm->arch.use_pfmfi = sclp.has_pfmfi; 2849 kvm->arch.use_skf = sclp.has_skey; 2850 spin_lock_init(&kvm->arch.start_stop_lock); 2851 kvm_s390_vsie_init(kvm); 2852 if (use_gisa) 2853 kvm_s390_gisa_init(kvm); 2854 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 2855 2856 return 0; 2857 out_err: 2858 free_page((unsigned long)kvm->arch.sie_page2); 2859 debug_unregister(kvm->arch.dbf); 2860 sca_dispose(kvm); 2861 KVM_EVENT(3, "creation of vm failed: %d", rc); 2862 return rc; 2863 } 2864 2865 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2866 { 2867 u16 rc, rrc; 2868 2869 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 2870 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 2871 kvm_s390_clear_local_irqs(vcpu); 2872 kvm_clear_async_pf_completion_queue(vcpu); 2873 if (!kvm_is_ucontrol(vcpu->kvm)) 2874 sca_del_vcpu(vcpu); 2875 2876 if (kvm_is_ucontrol(vcpu->kvm)) 2877 gmap_remove(vcpu->arch.gmap); 2878 2879 if (vcpu->kvm->arch.use_cmma) 2880 kvm_s390_vcpu_unsetup_cmma(vcpu); 2881 /* We can not hold the vcpu mutex here, we are already dying */ 2882 if (kvm_s390_pv_cpu_get_handle(vcpu)) 2883 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 2884 free_page((unsigned long)(vcpu->arch.sie_block)); 2885 } 2886 2887 void kvm_arch_destroy_vm(struct kvm *kvm) 2888 { 2889 u16 rc, rrc; 2890 2891 kvm_destroy_vcpus(kvm); 2892 sca_dispose(kvm); 2893 kvm_s390_gisa_destroy(kvm); 2894 /* 2895 * We are already at the end of life and kvm->lock is not taken. 2896 * This is ok as the file descriptor is closed by now and nobody 2897 * can mess with the pv state. To avoid lockdep_assert_held from 2898 * complaining we do not use kvm_s390_pv_is_protected. 2899 */ 2900 if (kvm_s390_pv_get_handle(kvm)) 2901 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); 2902 debug_unregister(kvm->arch.dbf); 2903 free_page((unsigned long)kvm->arch.sie_page2); 2904 if (!kvm_is_ucontrol(kvm)) 2905 gmap_remove(kvm->arch.gmap); 2906 kvm_s390_destroy_adapters(kvm); 2907 kvm_s390_clear_float_irqs(kvm); 2908 kvm_s390_vsie_destroy(kvm); 2909 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 2910 } 2911 2912 /* Section: vcpu related */ 2913 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 2914 { 2915 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 2916 if (!vcpu->arch.gmap) 2917 return -ENOMEM; 2918 vcpu->arch.gmap->private = vcpu->kvm; 2919 2920 return 0; 2921 } 2922 2923 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 2924 { 2925 if (!kvm_s390_use_sca_entries()) 2926 return; 2927 read_lock(&vcpu->kvm->arch.sca_lock); 2928 if (vcpu->kvm->arch.use_esca) { 2929 struct esca_block *sca = vcpu->kvm->arch.sca; 2930 2931 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2932 sca->cpu[vcpu->vcpu_id].sda = 0; 2933 } else { 2934 struct bsca_block *sca = vcpu->kvm->arch.sca; 2935 2936 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2937 sca->cpu[vcpu->vcpu_id].sda = 0; 2938 } 2939 read_unlock(&vcpu->kvm->arch.sca_lock); 2940 } 2941 2942 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 2943 { 2944 if (!kvm_s390_use_sca_entries()) { 2945 struct bsca_block *sca = vcpu->kvm->arch.sca; 2946 2947 /* we still need the basic sca for the ipte control */ 2948 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2949 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2950 return; 2951 } 2952 read_lock(&vcpu->kvm->arch.sca_lock); 2953 if (vcpu->kvm->arch.use_esca) { 2954 struct esca_block *sca = vcpu->kvm->arch.sca; 2955 2956 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2957 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2958 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 2959 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 2960 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 2961 } else { 2962 struct bsca_block *sca = vcpu->kvm->arch.sca; 2963 2964 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 2965 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2966 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2967 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 2968 } 2969 read_unlock(&vcpu->kvm->arch.sca_lock); 2970 } 2971 2972 /* Basic SCA to Extended SCA data copy routines */ 2973 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 2974 { 2975 d->sda = s->sda; 2976 d->sigp_ctrl.c = s->sigp_ctrl.c; 2977 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 2978 } 2979 2980 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 2981 { 2982 int i; 2983 2984 d->ipte_control = s->ipte_control; 2985 d->mcn[0] = s->mcn; 2986 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 2987 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 2988 } 2989 2990 static int sca_switch_to_extended(struct kvm *kvm) 2991 { 2992 struct bsca_block *old_sca = kvm->arch.sca; 2993 struct esca_block *new_sca; 2994 struct kvm_vcpu *vcpu; 2995 unsigned long vcpu_idx; 2996 u32 scaol, scaoh; 2997 2998 if (kvm->arch.use_esca) 2999 return 0; 3000 3001 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3002 if (!new_sca) 3003 return -ENOMEM; 3004 3005 scaoh = (u32)((u64)(new_sca) >> 32); 3006 scaol = (u32)(u64)(new_sca) & ~0x3fU; 3007 3008 kvm_s390_vcpu_block_all(kvm); 3009 write_lock(&kvm->arch.sca_lock); 3010 3011 sca_copy_b_to_e(new_sca, old_sca); 3012 3013 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3014 vcpu->arch.sie_block->scaoh = scaoh; 3015 vcpu->arch.sie_block->scaol = scaol; 3016 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3017 } 3018 kvm->arch.sca = new_sca; 3019 kvm->arch.use_esca = 1; 3020 3021 write_unlock(&kvm->arch.sca_lock); 3022 kvm_s390_vcpu_unblock_all(kvm); 3023 3024 free_page((unsigned long)old_sca); 3025 3026 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 3027 old_sca, kvm->arch.sca); 3028 return 0; 3029 } 3030 3031 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3032 { 3033 int rc; 3034 3035 if (!kvm_s390_use_sca_entries()) { 3036 if (id < KVM_MAX_VCPUS) 3037 return true; 3038 return false; 3039 } 3040 if (id < KVM_S390_BSCA_CPU_SLOTS) 3041 return true; 3042 if (!sclp.has_esca || !sclp.has_64bscao) 3043 return false; 3044 3045 mutex_lock(&kvm->lock); 3046 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3047 mutex_unlock(&kvm->lock); 3048 3049 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3050 } 3051 3052 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3053 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3054 { 3055 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3056 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3057 vcpu->arch.cputm_start = get_tod_clock_fast(); 3058 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3059 } 3060 3061 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3062 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3063 { 3064 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3065 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3066 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3067 vcpu->arch.cputm_start = 0; 3068 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3069 } 3070 3071 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3072 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3073 { 3074 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3075 vcpu->arch.cputm_enabled = true; 3076 __start_cpu_timer_accounting(vcpu); 3077 } 3078 3079 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3080 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3081 { 3082 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3083 __stop_cpu_timer_accounting(vcpu); 3084 vcpu->arch.cputm_enabled = false; 3085 } 3086 3087 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3088 { 3089 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3090 __enable_cpu_timer_accounting(vcpu); 3091 preempt_enable(); 3092 } 3093 3094 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3095 { 3096 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3097 __disable_cpu_timer_accounting(vcpu); 3098 preempt_enable(); 3099 } 3100 3101 /* set the cpu timer - may only be called from the VCPU thread itself */ 3102 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3103 { 3104 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3105 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3106 if (vcpu->arch.cputm_enabled) 3107 vcpu->arch.cputm_start = get_tod_clock_fast(); 3108 vcpu->arch.sie_block->cputm = cputm; 3109 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3110 preempt_enable(); 3111 } 3112 3113 /* update and get the cpu timer - can also be called from other VCPU threads */ 3114 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3115 { 3116 unsigned int seq; 3117 __u64 value; 3118 3119 if (unlikely(!vcpu->arch.cputm_enabled)) 3120 return vcpu->arch.sie_block->cputm; 3121 3122 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3123 do { 3124 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3125 /* 3126 * If the writer would ever execute a read in the critical 3127 * section, e.g. in irq context, we have a deadlock. 3128 */ 3129 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3130 value = vcpu->arch.sie_block->cputm; 3131 /* if cputm_start is 0, accounting is being started/stopped */ 3132 if (likely(vcpu->arch.cputm_start)) 3133 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3134 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3135 preempt_enable(); 3136 return value; 3137 } 3138 3139 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3140 { 3141 3142 gmap_enable(vcpu->arch.enabled_gmap); 3143 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3144 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3145 __start_cpu_timer_accounting(vcpu); 3146 vcpu->cpu = cpu; 3147 } 3148 3149 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3150 { 3151 vcpu->cpu = -1; 3152 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3153 __stop_cpu_timer_accounting(vcpu); 3154 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3155 vcpu->arch.enabled_gmap = gmap_get_enabled(); 3156 gmap_disable(vcpu->arch.enabled_gmap); 3157 3158 } 3159 3160 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3161 { 3162 mutex_lock(&vcpu->kvm->lock); 3163 preempt_disable(); 3164 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3165 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3166 preempt_enable(); 3167 mutex_unlock(&vcpu->kvm->lock); 3168 if (!kvm_is_ucontrol(vcpu->kvm)) { 3169 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3170 sca_add_vcpu(vcpu); 3171 } 3172 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3173 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3174 /* make vcpu_load load the right gmap on the first trigger */ 3175 vcpu->arch.enabled_gmap = vcpu->arch.gmap; 3176 } 3177 3178 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3179 { 3180 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3181 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3182 return true; 3183 return false; 3184 } 3185 3186 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3187 { 3188 /* At least one ECC subfunction must be present */ 3189 return kvm_has_pckmo_subfunc(kvm, 32) || 3190 kvm_has_pckmo_subfunc(kvm, 33) || 3191 kvm_has_pckmo_subfunc(kvm, 34) || 3192 kvm_has_pckmo_subfunc(kvm, 40) || 3193 kvm_has_pckmo_subfunc(kvm, 41); 3194 3195 } 3196 3197 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3198 { 3199 /* 3200 * If the AP instructions are not being interpreted and the MSAX3 3201 * facility is not configured for the guest, there is nothing to set up. 3202 */ 3203 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3204 return; 3205 3206 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3207 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3208 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3209 vcpu->arch.sie_block->ecd &= ~ECD_ECC; 3210 3211 if (vcpu->kvm->arch.crypto.apie) 3212 vcpu->arch.sie_block->eca |= ECA_APIE; 3213 3214 /* Set up protected key support */ 3215 if (vcpu->kvm->arch.crypto.aes_kw) { 3216 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3217 /* ecc is also wrapped with AES key */ 3218 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3219 vcpu->arch.sie_block->ecd |= ECD_ECC; 3220 } 3221 3222 if (vcpu->kvm->arch.crypto.dea_kw) 3223 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3224 } 3225 3226 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3227 { 3228 free_page(vcpu->arch.sie_block->cbrlo); 3229 vcpu->arch.sie_block->cbrlo = 0; 3230 } 3231 3232 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3233 { 3234 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); 3235 if (!vcpu->arch.sie_block->cbrlo) 3236 return -ENOMEM; 3237 return 0; 3238 } 3239 3240 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3241 { 3242 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3243 3244 vcpu->arch.sie_block->ibc = model->ibc; 3245 if (test_kvm_facility(vcpu->kvm, 7)) 3246 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 3247 } 3248 3249 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3250 { 3251 int rc = 0; 3252 u16 uvrc, uvrrc; 3253 3254 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3255 CPUSTAT_SM | 3256 CPUSTAT_STOPPED); 3257 3258 if (test_kvm_facility(vcpu->kvm, 78)) 3259 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3260 else if (test_kvm_facility(vcpu->kvm, 8)) 3261 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3262 3263 kvm_s390_vcpu_setup_model(vcpu); 3264 3265 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3266 if (MACHINE_HAS_ESOP) 3267 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3268 if (test_kvm_facility(vcpu->kvm, 9)) 3269 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3270 if (test_kvm_facility(vcpu->kvm, 73)) 3271 vcpu->arch.sie_block->ecb |= ECB_TE; 3272 if (!kvm_is_ucontrol(vcpu->kvm)) 3273 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3274 3275 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3276 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3277 if (test_kvm_facility(vcpu->kvm, 130)) 3278 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3279 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3280 if (sclp.has_cei) 3281 vcpu->arch.sie_block->eca |= ECA_CEI; 3282 if (sclp.has_ib) 3283 vcpu->arch.sie_block->eca |= ECA_IB; 3284 if (sclp.has_siif) 3285 vcpu->arch.sie_block->eca |= ECA_SII; 3286 if (sclp.has_sigpif) 3287 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3288 if (test_kvm_facility(vcpu->kvm, 129)) { 3289 vcpu->arch.sie_block->eca |= ECA_VX; 3290 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3291 } 3292 if (test_kvm_facility(vcpu->kvm, 139)) 3293 vcpu->arch.sie_block->ecd |= ECD_MEF; 3294 if (test_kvm_facility(vcpu->kvm, 156)) 3295 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3296 if (vcpu->arch.sie_block->gd) { 3297 vcpu->arch.sie_block->eca |= ECA_AIV; 3298 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3299 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3300 } 3301 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) 3302 | SDNXC; 3303 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 3304 3305 if (sclp.has_kss) 3306 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3307 else 3308 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3309 3310 if (vcpu->kvm->arch.use_cmma) { 3311 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3312 if (rc) 3313 return rc; 3314 } 3315 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3316 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3317 3318 vcpu->arch.sie_block->hpid = HPID_KVM; 3319 3320 kvm_s390_vcpu_crypto_setup(vcpu); 3321 3322 mutex_lock(&vcpu->kvm->lock); 3323 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3324 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3325 if (rc) 3326 kvm_s390_vcpu_unsetup_cmma(vcpu); 3327 } 3328 mutex_unlock(&vcpu->kvm->lock); 3329 3330 return rc; 3331 } 3332 3333 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3334 { 3335 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3336 return -EINVAL; 3337 return 0; 3338 } 3339 3340 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3341 { 3342 struct sie_page *sie_page; 3343 int rc; 3344 3345 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3346 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3347 if (!sie_page) 3348 return -ENOMEM; 3349 3350 vcpu->arch.sie_block = &sie_page->sie_block; 3351 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 3352 3353 /* the real guest size will always be smaller than msl */ 3354 vcpu->arch.sie_block->mso = 0; 3355 vcpu->arch.sie_block->msl = sclp.hamax; 3356 3357 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3358 spin_lock_init(&vcpu->arch.local_int.lock); 3359 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm); 3360 seqcount_init(&vcpu->arch.cputm_seqcount); 3361 3362 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3363 kvm_clear_async_pf_completion_queue(vcpu); 3364 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3365 KVM_SYNC_GPRS | 3366 KVM_SYNC_ACRS | 3367 KVM_SYNC_CRS | 3368 KVM_SYNC_ARCH0 | 3369 KVM_SYNC_PFAULT | 3370 KVM_SYNC_DIAG318; 3371 kvm_s390_set_prefix(vcpu, 0); 3372 if (test_kvm_facility(vcpu->kvm, 64)) 3373 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 3374 if (test_kvm_facility(vcpu->kvm, 82)) 3375 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 3376 if (test_kvm_facility(vcpu->kvm, 133)) 3377 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 3378 if (test_kvm_facility(vcpu->kvm, 156)) 3379 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 3380 /* fprs can be synchronized via vrs, even if the guest has no vx. With 3381 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. 3382 */ 3383 if (MACHINE_HAS_VX) 3384 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 3385 else 3386 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 3387 3388 if (kvm_is_ucontrol(vcpu->kvm)) { 3389 rc = __kvm_ucontrol_vcpu_init(vcpu); 3390 if (rc) 3391 goto out_free_sie_block; 3392 } 3393 3394 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 3395 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3396 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 3397 3398 rc = kvm_s390_vcpu_setup(vcpu); 3399 if (rc) 3400 goto out_ucontrol_uninit; 3401 return 0; 3402 3403 out_ucontrol_uninit: 3404 if (kvm_is_ucontrol(vcpu->kvm)) 3405 gmap_remove(vcpu->arch.gmap); 3406 out_free_sie_block: 3407 free_page((unsigned long)(vcpu->arch.sie_block)); 3408 return rc; 3409 } 3410 3411 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3412 { 3413 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 3414 return kvm_s390_vcpu_has_irq(vcpu, 0); 3415 } 3416 3417 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 3418 { 3419 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 3420 } 3421 3422 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 3423 { 3424 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3425 exit_sie(vcpu); 3426 } 3427 3428 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 3429 { 3430 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 3431 } 3432 3433 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 3434 { 3435 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3436 exit_sie(vcpu); 3437 } 3438 3439 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 3440 { 3441 return atomic_read(&vcpu->arch.sie_block->prog20) & 3442 (PROG_BLOCK_SIE | PROG_REQUEST); 3443 } 3444 3445 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 3446 { 3447 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 3448 } 3449 3450 /* 3451 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 3452 * If the CPU is not running (e.g. waiting as idle) the function will 3453 * return immediately. */ 3454 void exit_sie(struct kvm_vcpu *vcpu) 3455 { 3456 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 3457 kvm_s390_vsie_kick(vcpu); 3458 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 3459 cpu_relax(); 3460 } 3461 3462 /* Kick a guest cpu out of SIE to process a request synchronously */ 3463 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 3464 { 3465 __kvm_make_request(req, vcpu); 3466 kvm_s390_vcpu_request(vcpu); 3467 } 3468 3469 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 3470 unsigned long end) 3471 { 3472 struct kvm *kvm = gmap->private; 3473 struct kvm_vcpu *vcpu; 3474 unsigned long prefix; 3475 unsigned long i; 3476 3477 if (gmap_is_shadow(gmap)) 3478 return; 3479 if (start >= 1UL << 31) 3480 /* We are only interested in prefix pages */ 3481 return; 3482 kvm_for_each_vcpu(i, vcpu, kvm) { 3483 /* match against both prefix pages */ 3484 prefix = kvm_s390_get_prefix(vcpu); 3485 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 3486 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 3487 start, end); 3488 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3489 } 3490 } 3491 } 3492 3493 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 3494 { 3495 /* do not poll with more than halt_poll_max_steal percent of steal time */ 3496 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= 3497 READ_ONCE(halt_poll_max_steal)) { 3498 vcpu->stat.halt_no_poll_steal++; 3499 return true; 3500 } 3501 return false; 3502 } 3503 3504 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 3505 { 3506 /* kvm common code refers to this, but never calls it */ 3507 BUG(); 3508 return 0; 3509 } 3510 3511 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 3512 struct kvm_one_reg *reg) 3513 { 3514 int r = -EINVAL; 3515 3516 switch (reg->id) { 3517 case KVM_REG_S390_TODPR: 3518 r = put_user(vcpu->arch.sie_block->todpr, 3519 (u32 __user *)reg->addr); 3520 break; 3521 case KVM_REG_S390_EPOCHDIFF: 3522 r = put_user(vcpu->arch.sie_block->epoch, 3523 (u64 __user *)reg->addr); 3524 break; 3525 case KVM_REG_S390_CPU_TIMER: 3526 r = put_user(kvm_s390_get_cpu_timer(vcpu), 3527 (u64 __user *)reg->addr); 3528 break; 3529 case KVM_REG_S390_CLOCK_COMP: 3530 r = put_user(vcpu->arch.sie_block->ckc, 3531 (u64 __user *)reg->addr); 3532 break; 3533 case KVM_REG_S390_PFTOKEN: 3534 r = put_user(vcpu->arch.pfault_token, 3535 (u64 __user *)reg->addr); 3536 break; 3537 case KVM_REG_S390_PFCOMPARE: 3538 r = put_user(vcpu->arch.pfault_compare, 3539 (u64 __user *)reg->addr); 3540 break; 3541 case KVM_REG_S390_PFSELECT: 3542 r = put_user(vcpu->arch.pfault_select, 3543 (u64 __user *)reg->addr); 3544 break; 3545 case KVM_REG_S390_PP: 3546 r = put_user(vcpu->arch.sie_block->pp, 3547 (u64 __user *)reg->addr); 3548 break; 3549 case KVM_REG_S390_GBEA: 3550 r = put_user(vcpu->arch.sie_block->gbea, 3551 (u64 __user *)reg->addr); 3552 break; 3553 default: 3554 break; 3555 } 3556 3557 return r; 3558 } 3559 3560 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 3561 struct kvm_one_reg *reg) 3562 { 3563 int r = -EINVAL; 3564 __u64 val; 3565 3566 switch (reg->id) { 3567 case KVM_REG_S390_TODPR: 3568 r = get_user(vcpu->arch.sie_block->todpr, 3569 (u32 __user *)reg->addr); 3570 break; 3571 case KVM_REG_S390_EPOCHDIFF: 3572 r = get_user(vcpu->arch.sie_block->epoch, 3573 (u64 __user *)reg->addr); 3574 break; 3575 case KVM_REG_S390_CPU_TIMER: 3576 r = get_user(val, (u64 __user *)reg->addr); 3577 if (!r) 3578 kvm_s390_set_cpu_timer(vcpu, val); 3579 break; 3580 case KVM_REG_S390_CLOCK_COMP: 3581 r = get_user(vcpu->arch.sie_block->ckc, 3582 (u64 __user *)reg->addr); 3583 break; 3584 case KVM_REG_S390_PFTOKEN: 3585 r = get_user(vcpu->arch.pfault_token, 3586 (u64 __user *)reg->addr); 3587 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 3588 kvm_clear_async_pf_completion_queue(vcpu); 3589 break; 3590 case KVM_REG_S390_PFCOMPARE: 3591 r = get_user(vcpu->arch.pfault_compare, 3592 (u64 __user *)reg->addr); 3593 break; 3594 case KVM_REG_S390_PFSELECT: 3595 r = get_user(vcpu->arch.pfault_select, 3596 (u64 __user *)reg->addr); 3597 break; 3598 case KVM_REG_S390_PP: 3599 r = get_user(vcpu->arch.sie_block->pp, 3600 (u64 __user *)reg->addr); 3601 break; 3602 case KVM_REG_S390_GBEA: 3603 r = get_user(vcpu->arch.sie_block->gbea, 3604 (u64 __user *)reg->addr); 3605 break; 3606 default: 3607 break; 3608 } 3609 3610 return r; 3611 } 3612 3613 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 3614 { 3615 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 3616 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3617 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 3618 3619 kvm_clear_async_pf_completion_queue(vcpu); 3620 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 3621 kvm_s390_vcpu_stop(vcpu); 3622 kvm_s390_clear_local_irqs(vcpu); 3623 } 3624 3625 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 3626 { 3627 /* Initial reset is a superset of the normal reset */ 3628 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 3629 3630 /* 3631 * This equals initial cpu reset in pop, but we don't switch to ESA. 3632 * We do not only reset the internal data, but also ... 3633 */ 3634 vcpu->arch.sie_block->gpsw.mask = 0; 3635 vcpu->arch.sie_block->gpsw.addr = 0; 3636 kvm_s390_set_prefix(vcpu, 0); 3637 kvm_s390_set_cpu_timer(vcpu, 0); 3638 vcpu->arch.sie_block->ckc = 0; 3639 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 3640 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 3641 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 3642 3643 /* ... the data in sync regs */ 3644 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 3645 vcpu->run->s.regs.ckc = 0; 3646 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 3647 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 3648 vcpu->run->psw_addr = 0; 3649 vcpu->run->psw_mask = 0; 3650 vcpu->run->s.regs.todpr = 0; 3651 vcpu->run->s.regs.cputm = 0; 3652 vcpu->run->s.regs.ckc = 0; 3653 vcpu->run->s.regs.pp = 0; 3654 vcpu->run->s.regs.gbea = 1; 3655 vcpu->run->s.regs.fpc = 0; 3656 /* 3657 * Do not reset these registers in the protected case, as some of 3658 * them are overlayed and they are not accessible in this case 3659 * anyway. 3660 */ 3661 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3662 vcpu->arch.sie_block->gbea = 1; 3663 vcpu->arch.sie_block->pp = 0; 3664 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 3665 vcpu->arch.sie_block->todpr = 0; 3666 } 3667 } 3668 3669 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 3670 { 3671 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 3672 3673 /* Clear reset is a superset of the initial reset */ 3674 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 3675 3676 memset(®s->gprs, 0, sizeof(regs->gprs)); 3677 memset(®s->vrs, 0, sizeof(regs->vrs)); 3678 memset(®s->acrs, 0, sizeof(regs->acrs)); 3679 memset(®s->gscb, 0, sizeof(regs->gscb)); 3680 3681 regs->etoken = 0; 3682 regs->etoken_extension = 0; 3683 } 3684 3685 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3686 { 3687 vcpu_load(vcpu); 3688 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 3689 vcpu_put(vcpu); 3690 return 0; 3691 } 3692 3693 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 3694 { 3695 vcpu_load(vcpu); 3696 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 3697 vcpu_put(vcpu); 3698 return 0; 3699 } 3700 3701 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3702 struct kvm_sregs *sregs) 3703 { 3704 vcpu_load(vcpu); 3705 3706 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 3707 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 3708 3709 vcpu_put(vcpu); 3710 return 0; 3711 } 3712 3713 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 3714 struct kvm_sregs *sregs) 3715 { 3716 vcpu_load(vcpu); 3717 3718 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 3719 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 3720 3721 vcpu_put(vcpu); 3722 return 0; 3723 } 3724 3725 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3726 { 3727 int ret = 0; 3728 3729 vcpu_load(vcpu); 3730 3731 if (test_fp_ctl(fpu->fpc)) { 3732 ret = -EINVAL; 3733 goto out; 3734 } 3735 vcpu->run->s.regs.fpc = fpu->fpc; 3736 if (MACHINE_HAS_VX) 3737 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 3738 (freg_t *) fpu->fprs); 3739 else 3740 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 3741 3742 out: 3743 vcpu_put(vcpu); 3744 return ret; 3745 } 3746 3747 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 3748 { 3749 vcpu_load(vcpu); 3750 3751 /* make sure we have the latest values */ 3752 save_fpu_regs(); 3753 if (MACHINE_HAS_VX) 3754 convert_vx_to_fp((freg_t *) fpu->fprs, 3755 (__vector128 *) vcpu->run->s.regs.vrs); 3756 else 3757 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 3758 fpu->fpc = vcpu->run->s.regs.fpc; 3759 3760 vcpu_put(vcpu); 3761 return 0; 3762 } 3763 3764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 3765 { 3766 int rc = 0; 3767 3768 if (!is_vcpu_stopped(vcpu)) 3769 rc = -EBUSY; 3770 else { 3771 vcpu->run->psw_mask = psw.mask; 3772 vcpu->run->psw_addr = psw.addr; 3773 } 3774 return rc; 3775 } 3776 3777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 3778 struct kvm_translation *tr) 3779 { 3780 return -EINVAL; /* not implemented yet */ 3781 } 3782 3783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 3784 KVM_GUESTDBG_USE_HW_BP | \ 3785 KVM_GUESTDBG_ENABLE) 3786 3787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 3788 struct kvm_guest_debug *dbg) 3789 { 3790 int rc = 0; 3791 3792 vcpu_load(vcpu); 3793 3794 vcpu->guest_debug = 0; 3795 kvm_s390_clear_bp_data(vcpu); 3796 3797 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 3798 rc = -EINVAL; 3799 goto out; 3800 } 3801 if (!sclp.has_gpere) { 3802 rc = -EINVAL; 3803 goto out; 3804 } 3805 3806 if (dbg->control & KVM_GUESTDBG_ENABLE) { 3807 vcpu->guest_debug = dbg->control; 3808 /* enforce guest PER */ 3809 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 3810 3811 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 3812 rc = kvm_s390_import_bp_data(vcpu, dbg); 3813 } else { 3814 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3815 vcpu->arch.guestdbg.last_bp = 0; 3816 } 3817 3818 if (rc) { 3819 vcpu->guest_debug = 0; 3820 kvm_s390_clear_bp_data(vcpu); 3821 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 3822 } 3823 3824 out: 3825 vcpu_put(vcpu); 3826 return rc; 3827 } 3828 3829 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 3830 struct kvm_mp_state *mp_state) 3831 { 3832 int ret; 3833 3834 vcpu_load(vcpu); 3835 3836 /* CHECK_STOP and LOAD are not supported yet */ 3837 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 3838 KVM_MP_STATE_OPERATING; 3839 3840 vcpu_put(vcpu); 3841 return ret; 3842 } 3843 3844 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 3845 struct kvm_mp_state *mp_state) 3846 { 3847 int rc = 0; 3848 3849 vcpu_load(vcpu); 3850 3851 /* user space knows about this interface - let it control the state */ 3852 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 3853 3854 switch (mp_state->mp_state) { 3855 case KVM_MP_STATE_STOPPED: 3856 rc = kvm_s390_vcpu_stop(vcpu); 3857 break; 3858 case KVM_MP_STATE_OPERATING: 3859 rc = kvm_s390_vcpu_start(vcpu); 3860 break; 3861 case KVM_MP_STATE_LOAD: 3862 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 3863 rc = -ENXIO; 3864 break; 3865 } 3866 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 3867 break; 3868 case KVM_MP_STATE_CHECK_STOP: 3869 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 3870 default: 3871 rc = -ENXIO; 3872 } 3873 3874 vcpu_put(vcpu); 3875 return rc; 3876 } 3877 3878 static bool ibs_enabled(struct kvm_vcpu *vcpu) 3879 { 3880 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 3881 } 3882 3883 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 3884 { 3885 retry: 3886 kvm_s390_vcpu_request_handled(vcpu); 3887 if (!kvm_request_pending(vcpu)) 3888 return 0; 3889 /* 3890 * If the guest prefix changed, re-arm the ipte notifier for the 3891 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 3892 * This ensures that the ipte instruction for this request has 3893 * already finished. We might race against a second unmapper that 3894 * wants to set the blocking bit. Lets just retry the request loop. 3895 */ 3896 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { 3897 int rc; 3898 rc = gmap_mprotect_notify(vcpu->arch.gmap, 3899 kvm_s390_get_prefix(vcpu), 3900 PAGE_SIZE * 2, PROT_WRITE); 3901 if (rc) { 3902 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 3903 return rc; 3904 } 3905 goto retry; 3906 } 3907 3908 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 3909 vcpu->arch.sie_block->ihcpu = 0xffff; 3910 goto retry; 3911 } 3912 3913 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 3914 if (!ibs_enabled(vcpu)) { 3915 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 3916 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 3917 } 3918 goto retry; 3919 } 3920 3921 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 3922 if (ibs_enabled(vcpu)) { 3923 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 3924 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 3925 } 3926 goto retry; 3927 } 3928 3929 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 3930 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3931 goto retry; 3932 } 3933 3934 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 3935 /* 3936 * Disable CMM virtualization; we will emulate the ESSA 3937 * instruction manually, in order to provide additional 3938 * functionalities needed for live migration. 3939 */ 3940 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 3941 goto retry; 3942 } 3943 3944 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 3945 /* 3946 * Re-enable CMM virtualization if CMMA is available and 3947 * CMM has been used. 3948 */ 3949 if ((vcpu->kvm->arch.use_cmma) && 3950 (vcpu->kvm->mm->context.uses_cmm)) 3951 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 3952 goto retry; 3953 } 3954 3955 /* nothing to do, just clear the request */ 3956 kvm_clear_request(KVM_REQ_UNHALT, vcpu); 3957 /* we left the vsie handler, nothing to do, just clear the request */ 3958 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 3959 3960 return 0; 3961 } 3962 3963 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 3964 { 3965 struct kvm_vcpu *vcpu; 3966 union tod_clock clk; 3967 unsigned long i; 3968 3969 preempt_disable(); 3970 3971 store_tod_clock_ext(&clk); 3972 3973 kvm->arch.epoch = gtod->tod - clk.tod; 3974 kvm->arch.epdx = 0; 3975 if (test_kvm_facility(kvm, 139)) { 3976 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 3977 if (kvm->arch.epoch > gtod->tod) 3978 kvm->arch.epdx -= 1; 3979 } 3980 3981 kvm_s390_vcpu_block_all(kvm); 3982 kvm_for_each_vcpu(i, vcpu, kvm) { 3983 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 3984 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 3985 } 3986 3987 kvm_s390_vcpu_unblock_all(kvm); 3988 preempt_enable(); 3989 } 3990 3991 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 3992 { 3993 mutex_lock(&kvm->lock); 3994 __kvm_s390_set_tod_clock(kvm, gtod); 3995 mutex_unlock(&kvm->lock); 3996 } 3997 3998 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 3999 { 4000 if (!mutex_trylock(&kvm->lock)) 4001 return 0; 4002 __kvm_s390_set_tod_clock(kvm, gtod); 4003 mutex_unlock(&kvm->lock); 4004 return 1; 4005 } 4006 4007 /** 4008 * kvm_arch_fault_in_page - fault-in guest page if necessary 4009 * @vcpu: The corresponding virtual cpu 4010 * @gpa: Guest physical address 4011 * @writable: Whether the page should be writable or not 4012 * 4013 * Make sure that a guest page has been faulted-in on the host. 4014 * 4015 * Return: Zero on success, negative error code otherwise. 4016 */ 4017 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) 4018 { 4019 return gmap_fault(vcpu->arch.gmap, gpa, 4020 writable ? FAULT_FLAG_WRITE : 0); 4021 } 4022 4023 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 4024 unsigned long token) 4025 { 4026 struct kvm_s390_interrupt inti; 4027 struct kvm_s390_irq irq; 4028 4029 if (start_token) { 4030 irq.u.ext.ext_params2 = token; 4031 irq.type = KVM_S390_INT_PFAULT_INIT; 4032 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 4033 } else { 4034 inti.type = KVM_S390_INT_PFAULT_DONE; 4035 inti.parm64 = token; 4036 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 4037 } 4038 } 4039 4040 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 4041 struct kvm_async_pf *work) 4042 { 4043 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 4044 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 4045 4046 return true; 4047 } 4048 4049 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 4050 struct kvm_async_pf *work) 4051 { 4052 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 4053 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 4054 } 4055 4056 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 4057 struct kvm_async_pf *work) 4058 { 4059 /* s390 will always inject the page directly */ 4060 } 4061 4062 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4063 { 4064 /* 4065 * s390 will always inject the page directly, 4066 * but we still want check_async_completion to cleanup 4067 */ 4068 return true; 4069 } 4070 4071 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4072 { 4073 hva_t hva; 4074 struct kvm_arch_async_pf arch; 4075 4076 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4077 return false; 4078 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4079 vcpu->arch.pfault_compare) 4080 return false; 4081 if (psw_extint_disabled(vcpu)) 4082 return false; 4083 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4084 return false; 4085 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4086 return false; 4087 if (!vcpu->arch.gmap->pfault_enabled) 4088 return false; 4089 4090 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); 4091 hva += current->thread.gmap_addr & ~PAGE_MASK; 4092 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4093 return false; 4094 4095 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); 4096 } 4097 4098 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4099 { 4100 int rc, cpuflags; 4101 4102 /* 4103 * On s390 notifications for arriving pages will be delivered directly 4104 * to the guest but the house keeping for completed pfaults is 4105 * handled outside the worker. 4106 */ 4107 kvm_check_async_pf_completion(vcpu); 4108 4109 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4110 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4111 4112 if (need_resched()) 4113 schedule(); 4114 4115 if (!kvm_is_ucontrol(vcpu->kvm)) { 4116 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4117 if (rc) 4118 return rc; 4119 } 4120 4121 rc = kvm_s390_handle_requests(vcpu); 4122 if (rc) 4123 return rc; 4124 4125 if (guestdbg_enabled(vcpu)) { 4126 kvm_s390_backup_guest_per_regs(vcpu); 4127 kvm_s390_patch_guest_per_regs(vcpu); 4128 } 4129 4130 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4131 4132 vcpu->arch.sie_block->icptcode = 0; 4133 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4134 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4135 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4136 4137 return 0; 4138 } 4139 4140 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) 4141 { 4142 struct kvm_s390_pgm_info pgm_info = { 4143 .code = PGM_ADDRESSING, 4144 }; 4145 u8 opcode, ilen; 4146 int rc; 4147 4148 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4149 trace_kvm_s390_sie_fault(vcpu); 4150 4151 /* 4152 * We want to inject an addressing exception, which is defined as a 4153 * suppressing or terminating exception. However, since we came here 4154 * by a DAT access exception, the PSW still points to the faulting 4155 * instruction since DAT exceptions are nullifying. So we've got 4156 * to look up the current opcode to get the length of the instruction 4157 * to be able to forward the PSW. 4158 */ 4159 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4160 ilen = insn_length(opcode); 4161 if (rc < 0) { 4162 return rc; 4163 } else if (rc) { 4164 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4165 * Forward by arbitrary ilc, injection will take care of 4166 * nullification if necessary. 4167 */ 4168 pgm_info = vcpu->arch.pgm; 4169 ilen = 4; 4170 } 4171 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4172 kvm_s390_forward_psw(vcpu, ilen); 4173 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4174 } 4175 4176 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 4177 { 4178 struct mcck_volatile_info *mcck_info; 4179 struct sie_page *sie_page; 4180 4181 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 4182 vcpu->arch.sie_block->icptcode); 4183 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 4184 4185 if (guestdbg_enabled(vcpu)) 4186 kvm_s390_restore_guest_per_regs(vcpu); 4187 4188 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 4189 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 4190 4191 if (exit_reason == -EINTR) { 4192 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 4193 sie_page = container_of(vcpu->arch.sie_block, 4194 struct sie_page, sie_block); 4195 mcck_info = &sie_page->mcck_info; 4196 kvm_s390_reinject_machine_check(vcpu, mcck_info); 4197 return 0; 4198 } 4199 4200 if (vcpu->arch.sie_block->icptcode > 0) { 4201 int rc = kvm_handle_sie_intercept(vcpu); 4202 4203 if (rc != -EOPNOTSUPP) 4204 return rc; 4205 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 4206 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 4207 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 4208 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 4209 return -EREMOTE; 4210 } else if (exit_reason != -EFAULT) { 4211 vcpu->stat.exit_null++; 4212 return 0; 4213 } else if (kvm_is_ucontrol(vcpu->kvm)) { 4214 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4215 vcpu->run->s390_ucontrol.trans_exc_code = 4216 current->thread.gmap_addr; 4217 vcpu->run->s390_ucontrol.pgm_code = 0x10; 4218 return -EREMOTE; 4219 } else if (current->thread.gmap_pfault) { 4220 trace_kvm_s390_major_guest_pfault(vcpu); 4221 current->thread.gmap_pfault = 0; 4222 if (kvm_arch_setup_async_pf(vcpu)) 4223 return 0; 4224 vcpu->stat.pfault_sync++; 4225 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 4226 } 4227 return vcpu_post_run_fault_in_sie(vcpu); 4228 } 4229 4230 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 4231 static int __vcpu_run(struct kvm_vcpu *vcpu) 4232 { 4233 int rc, exit_reason; 4234 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 4235 4236 /* 4237 * We try to hold kvm->srcu during most of vcpu_run (except when run- 4238 * ning the guest), so that memslots (and other stuff) are protected 4239 */ 4240 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4241 4242 do { 4243 rc = vcpu_pre_run(vcpu); 4244 if (rc) 4245 break; 4246 4247 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4248 /* 4249 * As PF_VCPU will be used in fault handler, between 4250 * guest_enter and guest_exit should be no uaccess. 4251 */ 4252 local_irq_disable(); 4253 guest_enter_irqoff(); 4254 __disable_cpu_timer_accounting(vcpu); 4255 local_irq_enable(); 4256 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4257 memcpy(sie_page->pv_grregs, 4258 vcpu->run->s.regs.gprs, 4259 sizeof(sie_page->pv_grregs)); 4260 } 4261 if (test_cpu_flag(CIF_FPU)) 4262 load_fpu_regs(); 4263 exit_reason = sie64a(vcpu->arch.sie_block, 4264 vcpu->run->s.regs.gprs); 4265 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4266 memcpy(vcpu->run->s.regs.gprs, 4267 sie_page->pv_grregs, 4268 sizeof(sie_page->pv_grregs)); 4269 /* 4270 * We're not allowed to inject interrupts on intercepts 4271 * that leave the guest state in an "in-between" state 4272 * where the next SIE entry will do a continuation. 4273 * Fence interrupts in our "internal" PSW. 4274 */ 4275 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 4276 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 4277 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4278 } 4279 } 4280 local_irq_disable(); 4281 __enable_cpu_timer_accounting(vcpu); 4282 guest_exit_irqoff(); 4283 local_irq_enable(); 4284 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4285 4286 rc = vcpu_post_run(vcpu, exit_reason); 4287 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 4288 4289 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4290 return rc; 4291 } 4292 4293 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 4294 { 4295 struct kvm_run *kvm_run = vcpu->run; 4296 struct runtime_instr_cb *riccb; 4297 struct gs_cb *gscb; 4298 4299 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 4300 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 4301 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 4302 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 4303 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4304 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 4305 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 4306 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 4307 } 4308 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 4309 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 4310 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 4311 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 4312 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4313 kvm_clear_async_pf_completion_queue(vcpu); 4314 } 4315 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 4316 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 4317 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 4318 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 4319 } 4320 /* 4321 * If userspace sets the riccb (e.g. after migration) to a valid state, 4322 * we should enable RI here instead of doing the lazy enablement. 4323 */ 4324 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 4325 test_kvm_facility(vcpu->kvm, 64) && 4326 riccb->v && 4327 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 4328 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 4329 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 4330 } 4331 /* 4332 * If userspace sets the gscb (e.g. after migration) to non-zero, 4333 * we should enable GS here instead of doing the lazy enablement. 4334 */ 4335 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 4336 test_kvm_facility(vcpu->kvm, 133) && 4337 gscb->gssm && 4338 !vcpu->arch.gs_enabled) { 4339 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 4340 vcpu->arch.sie_block->ecb |= ECB_GS; 4341 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 4342 vcpu->arch.gs_enabled = 1; 4343 } 4344 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 4345 test_kvm_facility(vcpu->kvm, 82)) { 4346 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4347 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 4348 } 4349 if (MACHINE_HAS_GS) { 4350 preempt_disable(); 4351 __ctl_set_bit(2, 4); 4352 if (current->thread.gs_cb) { 4353 vcpu->arch.host_gscb = current->thread.gs_cb; 4354 save_gs_cb(vcpu->arch.host_gscb); 4355 } 4356 if (vcpu->arch.gs_enabled) { 4357 current->thread.gs_cb = (struct gs_cb *) 4358 &vcpu->run->s.regs.gscb; 4359 restore_gs_cb(current->thread.gs_cb); 4360 } 4361 preempt_enable(); 4362 } 4363 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 4364 } 4365 4366 static void sync_regs(struct kvm_vcpu *vcpu) 4367 { 4368 struct kvm_run *kvm_run = vcpu->run; 4369 4370 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 4371 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 4372 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 4373 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 4374 /* some control register changes require a tlb flush */ 4375 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4376 } 4377 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 4378 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 4379 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 4380 } 4381 save_access_regs(vcpu->arch.host_acrs); 4382 restore_access_regs(vcpu->run->s.regs.acrs); 4383 /* save host (userspace) fprs/vrs */ 4384 save_fpu_regs(); 4385 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; 4386 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; 4387 if (MACHINE_HAS_VX) 4388 current->thread.fpu.regs = vcpu->run->s.regs.vrs; 4389 else 4390 current->thread.fpu.regs = vcpu->run->s.regs.fprs; 4391 current->thread.fpu.fpc = vcpu->run->s.regs.fpc; 4392 if (test_fp_ctl(current->thread.fpu.fpc)) 4393 /* User space provided an invalid FPC, let's clear it */ 4394 current->thread.fpu.fpc = 0; 4395 4396 /* Sync fmt2 only data */ 4397 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 4398 sync_regs_fmt2(vcpu); 4399 } else { 4400 /* 4401 * In several places we have to modify our internal view to 4402 * not do things that are disallowed by the ultravisor. For 4403 * example we must not inject interrupts after specific exits 4404 * (e.g. 112 prefix page not secure). We do this by turning 4405 * off the machine check, external and I/O interrupt bits 4406 * of our PSW copy. To avoid getting validity intercepts, we 4407 * do only accept the condition code from userspace. 4408 */ 4409 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 4410 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 4411 PSW_MASK_CC; 4412 } 4413 4414 kvm_run->kvm_dirty_regs = 0; 4415 } 4416 4417 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 4418 { 4419 struct kvm_run *kvm_run = vcpu->run; 4420 4421 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 4422 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 4423 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 4424 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 4425 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 4426 if (MACHINE_HAS_GS) { 4427 preempt_disable(); 4428 __ctl_set_bit(2, 4); 4429 if (vcpu->arch.gs_enabled) 4430 save_gs_cb(current->thread.gs_cb); 4431 current->thread.gs_cb = vcpu->arch.host_gscb; 4432 restore_gs_cb(vcpu->arch.host_gscb); 4433 if (!vcpu->arch.host_gscb) 4434 __ctl_clear_bit(2, 4); 4435 vcpu->arch.host_gscb = NULL; 4436 preempt_enable(); 4437 } 4438 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 4439 } 4440 4441 static void store_regs(struct kvm_vcpu *vcpu) 4442 { 4443 struct kvm_run *kvm_run = vcpu->run; 4444 4445 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 4446 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 4447 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 4448 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 4449 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 4450 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 4451 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 4452 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 4453 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 4454 save_access_regs(vcpu->run->s.regs.acrs); 4455 restore_access_regs(vcpu->arch.host_acrs); 4456 /* Save guest register state */ 4457 save_fpu_regs(); 4458 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4459 /* Restore will be done lazily at return */ 4460 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; 4461 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; 4462 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 4463 store_regs_fmt2(vcpu); 4464 } 4465 4466 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 4467 { 4468 struct kvm_run *kvm_run = vcpu->run; 4469 int rc; 4470 4471 if (kvm_run->immediate_exit) 4472 return -EINTR; 4473 4474 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 4475 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 4476 return -EINVAL; 4477 4478 vcpu_load(vcpu); 4479 4480 if (guestdbg_exit_pending(vcpu)) { 4481 kvm_s390_prepare_debug_exit(vcpu); 4482 rc = 0; 4483 goto out; 4484 } 4485 4486 kvm_sigset_activate(vcpu); 4487 4488 /* 4489 * no need to check the return value of vcpu_start as it can only have 4490 * an error for protvirt, but protvirt means user cpu state 4491 */ 4492 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 4493 kvm_s390_vcpu_start(vcpu); 4494 } else if (is_vcpu_stopped(vcpu)) { 4495 pr_err_ratelimited("can't run stopped vcpu %d\n", 4496 vcpu->vcpu_id); 4497 rc = -EINVAL; 4498 goto out; 4499 } 4500 4501 sync_regs(vcpu); 4502 enable_cpu_timer_accounting(vcpu); 4503 4504 might_fault(); 4505 rc = __vcpu_run(vcpu); 4506 4507 if (signal_pending(current) && !rc) { 4508 kvm_run->exit_reason = KVM_EXIT_INTR; 4509 rc = -EINTR; 4510 } 4511 4512 if (guestdbg_exit_pending(vcpu) && !rc) { 4513 kvm_s390_prepare_debug_exit(vcpu); 4514 rc = 0; 4515 } 4516 4517 if (rc == -EREMOTE) { 4518 /* userspace support is needed, kvm_run has been prepared */ 4519 rc = 0; 4520 } 4521 4522 disable_cpu_timer_accounting(vcpu); 4523 store_regs(vcpu); 4524 4525 kvm_sigset_deactivate(vcpu); 4526 4527 vcpu->stat.exit_userspace++; 4528 out: 4529 vcpu_put(vcpu); 4530 return rc; 4531 } 4532 4533 /* 4534 * store status at address 4535 * we use have two special cases: 4536 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 4537 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 4538 */ 4539 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 4540 { 4541 unsigned char archmode = 1; 4542 freg_t fprs[NUM_FPRS]; 4543 unsigned int px; 4544 u64 clkcomp, cputm; 4545 int rc; 4546 4547 px = kvm_s390_get_prefix(vcpu); 4548 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 4549 if (write_guest_abs(vcpu, 163, &archmode, 1)) 4550 return -EFAULT; 4551 gpa = 0; 4552 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 4553 if (write_guest_real(vcpu, 163, &archmode, 1)) 4554 return -EFAULT; 4555 gpa = px; 4556 } else 4557 gpa -= __LC_FPREGS_SAVE_AREA; 4558 4559 /* manually convert vector registers if necessary */ 4560 if (MACHINE_HAS_VX) { 4561 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 4562 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4563 fprs, 128); 4564 } else { 4565 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 4566 vcpu->run->s.regs.fprs, 128); 4567 } 4568 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 4569 vcpu->run->s.regs.gprs, 128); 4570 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 4571 &vcpu->arch.sie_block->gpsw, 16); 4572 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 4573 &px, 4); 4574 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 4575 &vcpu->run->s.regs.fpc, 4); 4576 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 4577 &vcpu->arch.sie_block->todpr, 4); 4578 cputm = kvm_s390_get_cpu_timer(vcpu); 4579 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 4580 &cputm, 8); 4581 clkcomp = vcpu->arch.sie_block->ckc >> 8; 4582 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 4583 &clkcomp, 8); 4584 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 4585 &vcpu->run->s.regs.acrs, 64); 4586 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 4587 &vcpu->arch.sie_block->gcr, 128); 4588 return rc ? -EFAULT : 0; 4589 } 4590 4591 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 4592 { 4593 /* 4594 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 4595 * switch in the run ioctl. Let's update our copies before we save 4596 * it into the save area 4597 */ 4598 save_fpu_regs(); 4599 vcpu->run->s.regs.fpc = current->thread.fpu.fpc; 4600 save_access_regs(vcpu->run->s.regs.acrs); 4601 4602 return kvm_s390_store_status_unloaded(vcpu, addr); 4603 } 4604 4605 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4606 { 4607 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 4608 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 4609 } 4610 4611 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 4612 { 4613 unsigned long i; 4614 struct kvm_vcpu *vcpu; 4615 4616 kvm_for_each_vcpu(i, vcpu, kvm) { 4617 __disable_ibs_on_vcpu(vcpu); 4618 } 4619 } 4620 4621 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 4622 { 4623 if (!sclp.has_ibs) 4624 return; 4625 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 4626 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 4627 } 4628 4629 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 4630 { 4631 int i, online_vcpus, r = 0, started_vcpus = 0; 4632 4633 if (!is_vcpu_stopped(vcpu)) 4634 return 0; 4635 4636 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 4637 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4638 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4639 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4640 4641 /* Let's tell the UV that we want to change into the operating state */ 4642 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4643 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 4644 if (r) { 4645 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4646 return r; 4647 } 4648 } 4649 4650 for (i = 0; i < online_vcpus; i++) { 4651 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 4652 started_vcpus++; 4653 } 4654 4655 if (started_vcpus == 0) { 4656 /* we're the only active VCPU -> speed it up */ 4657 __enable_ibs_on_vcpu(vcpu); 4658 } else if (started_vcpus == 1) { 4659 /* 4660 * As we are starting a second VCPU, we have to disable 4661 * the IBS facility on all VCPUs to remove potentially 4662 * outstanding ENABLE requests. 4663 */ 4664 __disable_ibs_on_all_vcpus(vcpu->kvm); 4665 } 4666 4667 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 4668 /* 4669 * The real PSW might have changed due to a RESTART interpreted by the 4670 * ultravisor. We block all interrupts and let the next sie exit 4671 * refresh our view. 4672 */ 4673 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4674 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 4675 /* 4676 * Another VCPU might have used IBS while we were offline. 4677 * Let's play safe and flush the VCPU at startup. 4678 */ 4679 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 4680 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4681 return 0; 4682 } 4683 4684 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 4685 { 4686 int i, online_vcpus, r = 0, started_vcpus = 0; 4687 struct kvm_vcpu *started_vcpu = NULL; 4688 4689 if (is_vcpu_stopped(vcpu)) 4690 return 0; 4691 4692 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 4693 /* Only one cpu at a time may enter/leave the STOPPED state. */ 4694 spin_lock(&vcpu->kvm->arch.start_stop_lock); 4695 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 4696 4697 /* Let's tell the UV that we want to change into the stopped state */ 4698 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4699 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 4700 if (r) { 4701 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4702 return r; 4703 } 4704 } 4705 4706 /* 4707 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 4708 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 4709 * have been fully processed. This will ensure that the VCPU 4710 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 4711 */ 4712 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 4713 kvm_s390_clear_stop_irq(vcpu); 4714 4715 __disable_ibs_on_vcpu(vcpu); 4716 4717 for (i = 0; i < online_vcpus; i++) { 4718 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 4719 4720 if (!is_vcpu_stopped(tmp)) { 4721 started_vcpus++; 4722 started_vcpu = tmp; 4723 } 4724 } 4725 4726 if (started_vcpus == 1) { 4727 /* 4728 * As we only have one VCPU left, we want to enable the 4729 * IBS facility for that VCPU to speed it up. 4730 */ 4731 __enable_ibs_on_vcpu(started_vcpu); 4732 } 4733 4734 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 4735 return 0; 4736 } 4737 4738 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 4739 struct kvm_enable_cap *cap) 4740 { 4741 int r; 4742 4743 if (cap->flags) 4744 return -EINVAL; 4745 4746 switch (cap->cap) { 4747 case KVM_CAP_S390_CSS_SUPPORT: 4748 if (!vcpu->kvm->arch.css_support) { 4749 vcpu->kvm->arch.css_support = 1; 4750 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 4751 trace_kvm_s390_enable_css(vcpu->kvm); 4752 } 4753 r = 0; 4754 break; 4755 default: 4756 r = -EINVAL; 4757 break; 4758 } 4759 return r; 4760 } 4761 4762 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, 4763 struct kvm_s390_mem_op *mop) 4764 { 4765 void __user *uaddr = (void __user *)mop->buf; 4766 int r = 0; 4767 4768 if (mop->flags || !mop->size) 4769 return -EINVAL; 4770 if (mop->size + mop->sida_offset < mop->size) 4771 return -EINVAL; 4772 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 4773 return -E2BIG; 4774 if (!kvm_s390_pv_cpu_is_protected(vcpu)) 4775 return -EINVAL; 4776 4777 switch (mop->op) { 4778 case KVM_S390_MEMOP_SIDA_READ: 4779 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + 4780 mop->sida_offset), mop->size)) 4781 r = -EFAULT; 4782 4783 break; 4784 case KVM_S390_MEMOP_SIDA_WRITE: 4785 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + 4786 mop->sida_offset), uaddr, mop->size)) 4787 r = -EFAULT; 4788 break; 4789 } 4790 return r; 4791 } 4792 4793 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, 4794 struct kvm_s390_mem_op *mop) 4795 { 4796 void __user *uaddr = (void __user *)mop->buf; 4797 void *tmpbuf = NULL; 4798 int r = 0; 4799 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION 4800 | KVM_S390_MEMOP_F_CHECK_ONLY 4801 | KVM_S390_MEMOP_F_SKEY_PROTECTION; 4802 4803 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) 4804 return -EINVAL; 4805 if (mop->size > MEM_OP_MAX_SIZE) 4806 return -E2BIG; 4807 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4808 return -EINVAL; 4809 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 4810 if (access_key_invalid(mop->key)) 4811 return -EINVAL; 4812 } else { 4813 mop->key = 0; 4814 } 4815 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 4816 tmpbuf = vmalloc(mop->size); 4817 if (!tmpbuf) 4818 return -ENOMEM; 4819 } 4820 4821 switch (mop->op) { 4822 case KVM_S390_MEMOP_LOGICAL_READ: 4823 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4824 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 4825 GACC_FETCH, mop->key); 4826 break; 4827 } 4828 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 4829 mop->size, mop->key); 4830 if (r == 0) { 4831 if (copy_to_user(uaddr, tmpbuf, mop->size)) 4832 r = -EFAULT; 4833 } 4834 break; 4835 case KVM_S390_MEMOP_LOGICAL_WRITE: 4836 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 4837 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 4838 GACC_STORE, mop->key); 4839 break; 4840 } 4841 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 4842 r = -EFAULT; 4843 break; 4844 } 4845 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 4846 mop->size, mop->key); 4847 break; 4848 } 4849 4850 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 4851 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 4852 4853 vfree(tmpbuf); 4854 return r; 4855 } 4856 4857 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, 4858 struct kvm_s390_mem_op *mop) 4859 { 4860 int r, srcu_idx; 4861 4862 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4863 4864 switch (mop->op) { 4865 case KVM_S390_MEMOP_LOGICAL_READ: 4866 case KVM_S390_MEMOP_LOGICAL_WRITE: 4867 r = kvm_s390_vcpu_mem_op(vcpu, mop); 4868 break; 4869 case KVM_S390_MEMOP_SIDA_READ: 4870 case KVM_S390_MEMOP_SIDA_WRITE: 4871 /* we are locked against sida going away by the vcpu->mutex */ 4872 r = kvm_s390_vcpu_sida_op(vcpu, mop); 4873 break; 4874 default: 4875 r = -EINVAL; 4876 } 4877 4878 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 4879 return r; 4880 } 4881 4882 long kvm_arch_vcpu_async_ioctl(struct file *filp, 4883 unsigned int ioctl, unsigned long arg) 4884 { 4885 struct kvm_vcpu *vcpu = filp->private_data; 4886 void __user *argp = (void __user *)arg; 4887 4888 switch (ioctl) { 4889 case KVM_S390_IRQ: { 4890 struct kvm_s390_irq s390irq; 4891 4892 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 4893 return -EFAULT; 4894 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4895 } 4896 case KVM_S390_INTERRUPT: { 4897 struct kvm_s390_interrupt s390int; 4898 struct kvm_s390_irq s390irq = {}; 4899 4900 if (copy_from_user(&s390int, argp, sizeof(s390int))) 4901 return -EFAULT; 4902 if (s390int_to_s390irq(&s390int, &s390irq)) 4903 return -EINVAL; 4904 return kvm_s390_inject_vcpu(vcpu, &s390irq); 4905 } 4906 } 4907 return -ENOIOCTLCMD; 4908 } 4909 4910 long kvm_arch_vcpu_ioctl(struct file *filp, 4911 unsigned int ioctl, unsigned long arg) 4912 { 4913 struct kvm_vcpu *vcpu = filp->private_data; 4914 void __user *argp = (void __user *)arg; 4915 int idx; 4916 long r; 4917 u16 rc, rrc; 4918 4919 vcpu_load(vcpu); 4920 4921 switch (ioctl) { 4922 case KVM_S390_STORE_STATUS: 4923 idx = srcu_read_lock(&vcpu->kvm->srcu); 4924 r = kvm_s390_store_status_unloaded(vcpu, arg); 4925 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4926 break; 4927 case KVM_S390_SET_INITIAL_PSW: { 4928 psw_t psw; 4929 4930 r = -EFAULT; 4931 if (copy_from_user(&psw, argp, sizeof(psw))) 4932 break; 4933 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 4934 break; 4935 } 4936 case KVM_S390_CLEAR_RESET: 4937 r = 0; 4938 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 4939 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4940 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4941 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 4942 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 4943 rc, rrc); 4944 } 4945 break; 4946 case KVM_S390_INITIAL_RESET: 4947 r = 0; 4948 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4949 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4950 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4951 UVC_CMD_CPU_RESET_INITIAL, 4952 &rc, &rrc); 4953 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 4954 rc, rrc); 4955 } 4956 break; 4957 case KVM_S390_NORMAL_RESET: 4958 r = 0; 4959 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4960 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 4961 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 4962 UVC_CMD_CPU_RESET, &rc, &rrc); 4963 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 4964 rc, rrc); 4965 } 4966 break; 4967 case KVM_SET_ONE_REG: 4968 case KVM_GET_ONE_REG: { 4969 struct kvm_one_reg reg; 4970 r = -EINVAL; 4971 if (kvm_s390_pv_cpu_is_protected(vcpu)) 4972 break; 4973 r = -EFAULT; 4974 if (copy_from_user(®, argp, sizeof(reg))) 4975 break; 4976 if (ioctl == KVM_SET_ONE_REG) 4977 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 4978 else 4979 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 4980 break; 4981 } 4982 #ifdef CONFIG_KVM_S390_UCONTROL 4983 case KVM_S390_UCAS_MAP: { 4984 struct kvm_s390_ucas_mapping ucasmap; 4985 4986 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 4987 r = -EFAULT; 4988 break; 4989 } 4990 4991 if (!kvm_is_ucontrol(vcpu->kvm)) { 4992 r = -EINVAL; 4993 break; 4994 } 4995 4996 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 4997 ucasmap.vcpu_addr, ucasmap.length); 4998 break; 4999 } 5000 case KVM_S390_UCAS_UNMAP: { 5001 struct kvm_s390_ucas_mapping ucasmap; 5002 5003 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5004 r = -EFAULT; 5005 break; 5006 } 5007 5008 if (!kvm_is_ucontrol(vcpu->kvm)) { 5009 r = -EINVAL; 5010 break; 5011 } 5012 5013 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 5014 ucasmap.length); 5015 break; 5016 } 5017 #endif 5018 case KVM_S390_VCPU_FAULT: { 5019 r = gmap_fault(vcpu->arch.gmap, arg, 0); 5020 break; 5021 } 5022 case KVM_ENABLE_CAP: 5023 { 5024 struct kvm_enable_cap cap; 5025 r = -EFAULT; 5026 if (copy_from_user(&cap, argp, sizeof(cap))) 5027 break; 5028 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 5029 break; 5030 } 5031 case KVM_S390_MEM_OP: { 5032 struct kvm_s390_mem_op mem_op; 5033 5034 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 5035 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); 5036 else 5037 r = -EFAULT; 5038 break; 5039 } 5040 case KVM_S390_SET_IRQ_STATE: { 5041 struct kvm_s390_irq_state irq_state; 5042 5043 r = -EFAULT; 5044 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5045 break; 5046 if (irq_state.len > VCPU_IRQS_MAX_BUF || 5047 irq_state.len == 0 || 5048 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 5049 r = -EINVAL; 5050 break; 5051 } 5052 /* do not use irq_state.flags, it will break old QEMUs */ 5053 r = kvm_s390_set_irq_state(vcpu, 5054 (void __user *) irq_state.buf, 5055 irq_state.len); 5056 break; 5057 } 5058 case KVM_S390_GET_IRQ_STATE: { 5059 struct kvm_s390_irq_state irq_state; 5060 5061 r = -EFAULT; 5062 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5063 break; 5064 if (irq_state.len == 0) { 5065 r = -EINVAL; 5066 break; 5067 } 5068 /* do not use irq_state.flags, it will break old QEMUs */ 5069 r = kvm_s390_get_irq_state(vcpu, 5070 (__u8 __user *) irq_state.buf, 5071 irq_state.len); 5072 break; 5073 } 5074 default: 5075 r = -ENOTTY; 5076 } 5077 5078 vcpu_put(vcpu); 5079 return r; 5080 } 5081 5082 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5083 { 5084 #ifdef CONFIG_KVM_S390_UCONTROL 5085 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5086 && (kvm_is_ucontrol(vcpu->kvm))) { 5087 vmf->page = virt_to_page(vcpu->arch.sie_block); 5088 get_page(vmf->page); 5089 return 0; 5090 } 5091 #endif 5092 return VM_FAULT_SIGBUS; 5093 } 5094 5095 /* Section: memory related */ 5096 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5097 const struct kvm_memory_slot *old, 5098 struct kvm_memory_slot *new, 5099 enum kvm_mr_change change) 5100 { 5101 gpa_t size; 5102 5103 /* When we are protected, we should not change the memory slots */ 5104 if (kvm_s390_pv_get_handle(kvm)) 5105 return -EINVAL; 5106 5107 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) 5108 return 0; 5109 5110 /* A few sanity checks. We can have memory slots which have to be 5111 located/ended at a segment boundary (1MB). The memory in userland is 5112 ok to be fragmented into various different vmas. It is okay to mmap() 5113 and munmap() stuff in this slot after doing this call at any time */ 5114 5115 if (new->userspace_addr & 0xffffful) 5116 return -EINVAL; 5117 5118 size = new->npages * PAGE_SIZE; 5119 if (size & 0xffffful) 5120 return -EINVAL; 5121 5122 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 5123 return -EINVAL; 5124 5125 return 0; 5126 } 5127 5128 void kvm_arch_commit_memory_region(struct kvm *kvm, 5129 struct kvm_memory_slot *old, 5130 const struct kvm_memory_slot *new, 5131 enum kvm_mr_change change) 5132 { 5133 int rc = 0; 5134 5135 switch (change) { 5136 case KVM_MR_DELETE: 5137 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5138 old->npages * PAGE_SIZE); 5139 break; 5140 case KVM_MR_MOVE: 5141 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 5142 old->npages * PAGE_SIZE); 5143 if (rc) 5144 break; 5145 fallthrough; 5146 case KVM_MR_CREATE: 5147 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 5148 new->base_gfn * PAGE_SIZE, 5149 new->npages * PAGE_SIZE); 5150 break; 5151 case KVM_MR_FLAGS_ONLY: 5152 break; 5153 default: 5154 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 5155 } 5156 if (rc) 5157 pr_warn("failed to commit memory region\n"); 5158 return; 5159 } 5160 5161 static inline unsigned long nonhyp_mask(int i) 5162 { 5163 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 5164 5165 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 5166 } 5167 5168 static int __init kvm_s390_init(void) 5169 { 5170 int i; 5171 5172 if (!sclp.has_sief2) { 5173 pr_info("SIE is not available\n"); 5174 return -ENODEV; 5175 } 5176 5177 if (nested && hpage) { 5178 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 5179 return -EINVAL; 5180 } 5181 5182 for (i = 0; i < 16; i++) 5183 kvm_s390_fac_base[i] |= 5184 stfle_fac_list[i] & nonhyp_mask(i); 5185 5186 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 5187 } 5188 5189 static void __exit kvm_s390_exit(void) 5190 { 5191 kvm_exit(); 5192 } 5193 5194 module_init(kvm_s390_init); 5195 module_exit(kvm_s390_exit); 5196 5197 /* 5198 * Enable autoloading of the kvm module. 5199 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 5200 * since x86 takes a different approach. 5201 */ 5202 #include <linux/miscdevice.h> 5203 MODULE_ALIAS_MISCDEV(KVM_MINOR); 5204 MODULE_ALIAS("devname:kvm"); 5205