1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * hosting IBM Z kernel virtual machines (s390x) 4 * 5 * Copyright IBM Corp. 2008, 2020 6 * 7 * Author(s): Carsten Otte <cotte@de.ibm.com> 8 * Christian Borntraeger <borntraeger@de.ibm.com> 9 * Christian Ehrhardt <ehrhardt@de.ibm.com> 10 * Jason J. Herne <jjherne@us.ibm.com> 11 */ 12 13 #define KMSG_COMPONENT "kvm-s390" 14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 15 16 #include <linux/compiler.h> 17 #include <linux/err.h> 18 #include <linux/fs.h> 19 #include <linux/hrtimer.h> 20 #include <linux/init.h> 21 #include <linux/kvm.h> 22 #include <linux/kvm_host.h> 23 #include <linux/mman.h> 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/random.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/vmalloc.h> 30 #include <linux/bitmap.h> 31 #include <linux/sched/signal.h> 32 #include <linux/string.h> 33 #include <linux/pgtable.h> 34 #include <linux/mmu_notifier.h> 35 36 #include <asm/access-regs.h> 37 #include <asm/asm-offsets.h> 38 #include <asm/lowcore.h> 39 #include <asm/stp.h> 40 #include <asm/gmap.h> 41 #include <asm/nmi.h> 42 #include <asm/isc.h> 43 #include <asm/sclp.h> 44 #include <asm/cpacf.h> 45 #include <asm/timex.h> 46 #include <asm/asm.h> 47 #include <asm/fpu.h> 48 #include <asm/ap.h> 49 #include <asm/uv.h> 50 #include "kvm-s390.h" 51 #include "gaccess.h" 52 #include "pci.h" 53 #include "gmap.h" 54 55 #define CREATE_TRACE_POINTS 56 #include "trace.h" 57 #include "trace-s390.h" 58 59 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ 60 #define LOCAL_IRQS 32 61 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ 62 (KVM_MAX_VCPUS + LOCAL_IRQS)) 63 64 const struct _kvm_stats_desc kvm_vm_stats_desc[] = { 65 KVM_GENERIC_VM_STATS(), 66 STATS_DESC_COUNTER(VM, inject_io), 67 STATS_DESC_COUNTER(VM, inject_float_mchk), 68 STATS_DESC_COUNTER(VM, inject_pfault_done), 69 STATS_DESC_COUNTER(VM, inject_service_signal), 70 STATS_DESC_COUNTER(VM, inject_virtio), 71 STATS_DESC_COUNTER(VM, aen_forward), 72 STATS_DESC_COUNTER(VM, gmap_shadow_reuse), 73 STATS_DESC_COUNTER(VM, gmap_shadow_create), 74 STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry), 75 STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry), 76 STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry), 77 STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry), 78 STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry), 79 }; 80 81 const struct kvm_stats_header kvm_vm_stats_header = { 82 .name_size = KVM_STATS_NAME_SIZE, 83 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), 84 .id_offset = sizeof(struct kvm_stats_header), 85 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 86 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 87 sizeof(kvm_vm_stats_desc), 88 }; 89 90 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { 91 KVM_GENERIC_VCPU_STATS(), 92 STATS_DESC_COUNTER(VCPU, exit_userspace), 93 STATS_DESC_COUNTER(VCPU, exit_null), 94 STATS_DESC_COUNTER(VCPU, exit_external_request), 95 STATS_DESC_COUNTER(VCPU, exit_io_request), 96 STATS_DESC_COUNTER(VCPU, exit_external_interrupt), 97 STATS_DESC_COUNTER(VCPU, exit_stop_request), 98 STATS_DESC_COUNTER(VCPU, exit_validity), 99 STATS_DESC_COUNTER(VCPU, exit_instruction), 100 STATS_DESC_COUNTER(VCPU, exit_pei), 101 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal), 102 STATS_DESC_COUNTER(VCPU, instruction_lctl), 103 STATS_DESC_COUNTER(VCPU, instruction_lctlg), 104 STATS_DESC_COUNTER(VCPU, instruction_stctl), 105 STATS_DESC_COUNTER(VCPU, instruction_stctg), 106 STATS_DESC_COUNTER(VCPU, exit_program_interruption), 107 STATS_DESC_COUNTER(VCPU, exit_instr_and_program), 108 STATS_DESC_COUNTER(VCPU, exit_operation_exception), 109 STATS_DESC_COUNTER(VCPU, deliver_ckc), 110 STATS_DESC_COUNTER(VCPU, deliver_cputm), 111 STATS_DESC_COUNTER(VCPU, deliver_external_call), 112 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal), 113 STATS_DESC_COUNTER(VCPU, deliver_service_signal), 114 STATS_DESC_COUNTER(VCPU, deliver_virtio), 115 STATS_DESC_COUNTER(VCPU, deliver_stop_signal), 116 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal), 117 STATS_DESC_COUNTER(VCPU, deliver_restart_signal), 118 STATS_DESC_COUNTER(VCPU, deliver_program), 119 STATS_DESC_COUNTER(VCPU, deliver_io), 120 STATS_DESC_COUNTER(VCPU, deliver_machine_check), 121 STATS_DESC_COUNTER(VCPU, exit_wait_state), 122 STATS_DESC_COUNTER(VCPU, inject_ckc), 123 STATS_DESC_COUNTER(VCPU, inject_cputm), 124 STATS_DESC_COUNTER(VCPU, inject_external_call), 125 STATS_DESC_COUNTER(VCPU, inject_emergency_signal), 126 STATS_DESC_COUNTER(VCPU, inject_mchk), 127 STATS_DESC_COUNTER(VCPU, inject_pfault_init), 128 STATS_DESC_COUNTER(VCPU, inject_program), 129 STATS_DESC_COUNTER(VCPU, inject_restart), 130 STATS_DESC_COUNTER(VCPU, inject_set_prefix), 131 STATS_DESC_COUNTER(VCPU, inject_stop_signal), 132 STATS_DESC_COUNTER(VCPU, instruction_epsw), 133 STATS_DESC_COUNTER(VCPU, instruction_gs), 134 STATS_DESC_COUNTER(VCPU, instruction_io_other), 135 STATS_DESC_COUNTER(VCPU, instruction_lpsw), 136 STATS_DESC_COUNTER(VCPU, instruction_lpswe), 137 STATS_DESC_COUNTER(VCPU, instruction_lpswey), 138 STATS_DESC_COUNTER(VCPU, instruction_pfmf), 139 STATS_DESC_COUNTER(VCPU, instruction_ptff), 140 STATS_DESC_COUNTER(VCPU, instruction_sck), 141 STATS_DESC_COUNTER(VCPU, instruction_sckpf), 142 STATS_DESC_COUNTER(VCPU, instruction_stidp), 143 STATS_DESC_COUNTER(VCPU, instruction_spx), 144 STATS_DESC_COUNTER(VCPU, instruction_stpx), 145 STATS_DESC_COUNTER(VCPU, instruction_stap), 146 STATS_DESC_COUNTER(VCPU, instruction_iske), 147 STATS_DESC_COUNTER(VCPU, instruction_ri), 148 STATS_DESC_COUNTER(VCPU, instruction_rrbe), 149 STATS_DESC_COUNTER(VCPU, instruction_sske), 150 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock), 151 STATS_DESC_COUNTER(VCPU, instruction_stsi), 152 STATS_DESC_COUNTER(VCPU, instruction_stfl), 153 STATS_DESC_COUNTER(VCPU, instruction_tb), 154 STATS_DESC_COUNTER(VCPU, instruction_tpi), 155 STATS_DESC_COUNTER(VCPU, instruction_tprot), 156 STATS_DESC_COUNTER(VCPU, instruction_tsch), 157 STATS_DESC_COUNTER(VCPU, instruction_sie), 158 STATS_DESC_COUNTER(VCPU, instruction_essa), 159 STATS_DESC_COUNTER(VCPU, instruction_sthyi), 160 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense), 161 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running), 162 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call), 163 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency), 164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency), 165 STATS_DESC_COUNTER(VCPU, instruction_sigp_start), 166 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop), 167 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status), 168 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status), 169 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status), 170 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch), 171 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix), 172 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart), 173 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), 174 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), 175 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), 176 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), 177 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), 178 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), 179 STATS_DESC_COUNTER(VCPU, diag_9c_ignored), 180 STATS_DESC_COUNTER(VCPU, diag_9c_forward), 181 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), 182 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), 183 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), 184 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), 185 STATS_DESC_COUNTER(VCPU, pfault_sync) 186 }; 187 188 const struct kvm_stats_header kvm_vcpu_stats_header = { 189 .name_size = KVM_STATS_NAME_SIZE, 190 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 191 .id_offset = sizeof(struct kvm_stats_header), 192 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 193 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 194 sizeof(kvm_vcpu_stats_desc), 195 }; 196 197 /* allow nested virtualization in KVM (if enabled by user space) */ 198 static int nested; 199 module_param(nested, int, S_IRUGO); 200 MODULE_PARM_DESC(nested, "Nested virtualization support"); 201 202 /* allow 1m huge page guest backing, if !nested */ 203 static int hpage; 204 module_param(hpage, int, 0444); 205 MODULE_PARM_DESC(hpage, "1m huge page backing support"); 206 207 /* maximum percentage of steal time for polling. >100 is treated like 100 */ 208 static u8 halt_poll_max_steal = 10; 209 module_param(halt_poll_max_steal, byte, 0644); 210 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); 211 212 /* if set to true, the GISA will be initialized and used if available */ 213 static bool use_gisa = true; 214 module_param(use_gisa, bool, 0644); 215 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it."); 216 217 /* maximum diag9c forwarding per second */ 218 unsigned int diag9c_forwarding_hz; 219 module_param(diag9c_forwarding_hz, uint, 0644); 220 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); 221 222 /* 223 * allow asynchronous deinit for protected guests; enable by default since 224 * the feature is opt-in anyway 225 */ 226 static int async_destroy = 1; 227 module_param(async_destroy, int, 0444); 228 MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests"); 229 230 /* 231 * For now we handle at most 16 double words as this is what the s390 base 232 * kernel handles and stores in the prefix page. If we ever need to go beyond 233 * this, this requires changes to code, but the external uapi can stay. 234 */ 235 #define SIZE_INTERNAL 16 236 237 /* 238 * Base feature mask that defines default mask for facilities. Consists of the 239 * defines in FACILITIES_KVM and the non-hypervisor managed bits. 240 */ 241 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; 242 /* 243 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL 244 * and defines the facilities that can be enabled via a cpu model. 245 */ 246 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; 247 248 static unsigned long kvm_s390_fac_size(void) 249 { 250 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); 251 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); 252 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > 253 sizeof(stfle_fac_list)); 254 255 return SIZE_INTERNAL; 256 } 257 258 /* available cpu features supported by kvm */ 259 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 260 /* available subfunctions indicated via query / "test bit" */ 261 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; 262 263 static struct gmap_notifier gmap_notifier; 264 static struct gmap_notifier vsie_gmap_notifier; 265 debug_info_t *kvm_s390_dbf; 266 debug_info_t *kvm_s390_dbf_uv; 267 268 /* Section: not file related */ 269 /* forward declarations */ 270 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 271 unsigned long end); 272 static int sca_switch_to_extended(struct kvm *kvm); 273 274 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) 275 { 276 u8 delta_idx = 0; 277 278 /* 279 * The TOD jumps by delta, we have to compensate this by adding 280 * -delta to the epoch. 281 */ 282 delta = -delta; 283 284 /* sign-extension - we're adding to signed values below */ 285 if ((s64)delta < 0) 286 delta_idx = -1; 287 288 scb->epoch += delta; 289 if (scb->ecd & ECD_MEF) { 290 scb->epdx += delta_idx; 291 if (scb->epoch < delta) 292 scb->epdx += 1; 293 } 294 } 295 296 /* 297 * This callback is executed during stop_machine(). All CPUs are therefore 298 * temporarily stopped. In order not to change guest behavior, we have to 299 * disable preemption whenever we touch the epoch of kvm and the VCPUs, 300 * so a CPU won't be stopped while calculating with the epoch. 301 */ 302 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, 303 void *v) 304 { 305 struct kvm *kvm; 306 struct kvm_vcpu *vcpu; 307 unsigned long i; 308 unsigned long long *delta = v; 309 310 list_for_each_entry(kvm, &vm_list, vm_list) { 311 kvm_for_each_vcpu(i, vcpu, kvm) { 312 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); 313 if (i == 0) { 314 kvm->arch.epoch = vcpu->arch.sie_block->epoch; 315 kvm->arch.epdx = vcpu->arch.sie_block->epdx; 316 } 317 if (vcpu->arch.cputm_enabled) 318 vcpu->arch.cputm_start += *delta; 319 if (vcpu->arch.vsie_block) 320 kvm_clock_sync_scb(vcpu->arch.vsie_block, 321 *delta); 322 } 323 } 324 return NOTIFY_OK; 325 } 326 327 static struct notifier_block kvm_clock_notifier = { 328 .notifier_call = kvm_clock_sync, 329 }; 330 331 static void allow_cpu_feat(unsigned long nr) 332 { 333 set_bit_inv(nr, kvm_s390_available_cpu_feat); 334 } 335 336 static inline int plo_test_bit(unsigned char nr) 337 { 338 unsigned long function = (unsigned long)nr | 0x100; 339 int cc; 340 341 asm volatile( 342 " lgr 0,%[function]\n" 343 /* Parameter registers are ignored for "test bit" */ 344 " plo 0,0,0,0(0)\n" 345 CC_IPM(cc) 346 : CC_OUT(cc, cc) 347 : [function] "d" (function) 348 : CC_CLOBBER_LIST("0")); 349 return CC_TRANSFORM(cc) == 0; 350 } 351 352 static __always_inline void pfcr_query(u8 (*query)[16]) 353 { 354 asm volatile( 355 " lghi 0,0\n" 356 " .insn rsy,0xeb0000000016,0,0,%[query]\n" 357 : [query] "=QS" (*query) 358 : 359 : "cc", "0"); 360 } 361 362 static __always_inline void __sortl_query(u8 (*query)[32]) 363 { 364 asm volatile( 365 " lghi 0,0\n" 366 " la 1,%[query]\n" 367 /* Parameter registers are ignored */ 368 " .insn rre,0xb9380000,2,4\n" 369 : [query] "=R" (*query) 370 : 371 : "cc", "0", "1"); 372 } 373 374 static __always_inline void __dfltcc_query(u8 (*query)[32]) 375 { 376 asm volatile( 377 " lghi 0,0\n" 378 " la 1,%[query]\n" 379 /* Parameter registers are ignored */ 380 " .insn rrf,0xb9390000,2,4,6,0\n" 381 : [query] "=R" (*query) 382 : 383 : "cc", "0", "1"); 384 } 385 386 static void __init kvm_s390_cpu_feat_init(void) 387 { 388 int i; 389 390 for (i = 0; i < 256; ++i) { 391 if (plo_test_bit(i)) 392 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); 393 } 394 395 if (test_facility(28)) /* TOD-clock steering */ 396 ptff(kvm_s390_available_subfunc.ptff, 397 sizeof(kvm_s390_available_subfunc.ptff), 398 PTFF_QAF); 399 400 if (test_facility(17)) { /* MSA */ 401 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *) 402 kvm_s390_available_subfunc.kmac); 403 __cpacf_query(CPACF_KMC, (cpacf_mask_t *) 404 kvm_s390_available_subfunc.kmc); 405 __cpacf_query(CPACF_KM, (cpacf_mask_t *) 406 kvm_s390_available_subfunc.km); 407 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *) 408 kvm_s390_available_subfunc.kimd); 409 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *) 410 kvm_s390_available_subfunc.klmd); 411 } 412 if (test_facility(76)) /* MSA3 */ 413 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *) 414 kvm_s390_available_subfunc.pckmo); 415 if (test_facility(77)) { /* MSA4 */ 416 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *) 417 kvm_s390_available_subfunc.kmctr); 418 __cpacf_query(CPACF_KMF, (cpacf_mask_t *) 419 kvm_s390_available_subfunc.kmf); 420 __cpacf_query(CPACF_KMO, (cpacf_mask_t *) 421 kvm_s390_available_subfunc.kmo); 422 __cpacf_query(CPACF_PCC, (cpacf_mask_t *) 423 kvm_s390_available_subfunc.pcc); 424 } 425 if (test_facility(57)) /* MSA5 */ 426 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) 427 kvm_s390_available_subfunc.ppno); 428 429 if (test_facility(146)) /* MSA8 */ 430 __cpacf_query(CPACF_KMA, (cpacf_mask_t *) 431 kvm_s390_available_subfunc.kma); 432 433 if (test_facility(155)) /* MSA9 */ 434 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) 435 kvm_s390_available_subfunc.kdsa); 436 437 if (test_facility(150)) /* SORTL */ 438 __sortl_query(&kvm_s390_available_subfunc.sortl); 439 440 if (test_facility(151)) /* DFLTCC */ 441 __dfltcc_query(&kvm_s390_available_subfunc.dfltcc); 442 443 if (test_facility(201)) /* PFCR */ 444 pfcr_query(&kvm_s390_available_subfunc.pfcr); 445 446 if (MACHINE_HAS_ESOP) 447 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); 448 /* 449 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), 450 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). 451 */ 452 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || 453 !test_facility(3) || !nested) 454 return; 455 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); 456 if (sclp.has_64bscao) 457 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); 458 if (sclp.has_siif) 459 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); 460 if (sclp.has_gpere) 461 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); 462 if (sclp.has_gsls) 463 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); 464 if (sclp.has_ib) 465 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); 466 if (sclp.has_cei) 467 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); 468 if (sclp.has_ibs) 469 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); 470 if (sclp.has_kss) 471 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); 472 /* 473 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make 474 * all skey handling functions read/set the skey from the PGSTE 475 * instead of the real storage key. 476 * 477 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make 478 * pages being detected as preserved although they are resident. 479 * 480 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will 481 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. 482 * 483 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and 484 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be 485 * correctly shadowed. We can do that for the PGSTE but not for PTE.I. 486 * 487 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We 488 * cannot easily shadow the SCA because of the ipte lock. 489 */ 490 } 491 492 static int __init __kvm_s390_init(void) 493 { 494 int rc = -ENOMEM; 495 496 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); 497 if (!kvm_s390_dbf) 498 return -ENOMEM; 499 500 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); 501 if (!kvm_s390_dbf_uv) 502 goto err_kvm_uv; 503 504 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || 505 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) 506 goto err_debug_view; 507 508 kvm_s390_cpu_feat_init(); 509 510 /* Register floating interrupt controller interface. */ 511 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); 512 if (rc) { 513 pr_err("A FLIC registration call failed with rc=%d\n", rc); 514 goto err_flic; 515 } 516 517 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { 518 rc = kvm_s390_pci_init(); 519 if (rc) { 520 pr_err("Unable to allocate AIFT for PCI\n"); 521 goto err_pci; 522 } 523 } 524 525 rc = kvm_s390_gib_init(GAL_ISC); 526 if (rc) 527 goto err_gib; 528 529 gmap_notifier.notifier_call = kvm_gmap_notifier; 530 gmap_register_pte_notifier(&gmap_notifier); 531 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; 532 gmap_register_pte_notifier(&vsie_gmap_notifier); 533 atomic_notifier_chain_register(&s390_epoch_delta_notifier, 534 &kvm_clock_notifier); 535 536 return 0; 537 538 err_gib: 539 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 540 kvm_s390_pci_exit(); 541 err_pci: 542 err_flic: 543 err_debug_view: 544 debug_unregister(kvm_s390_dbf_uv); 545 err_kvm_uv: 546 debug_unregister(kvm_s390_dbf); 547 return rc; 548 } 549 550 static void __kvm_s390_exit(void) 551 { 552 gmap_unregister_pte_notifier(&gmap_notifier); 553 gmap_unregister_pte_notifier(&vsie_gmap_notifier); 554 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, 555 &kvm_clock_notifier); 556 557 kvm_s390_gib_destroy(); 558 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 559 kvm_s390_pci_exit(); 560 debug_unregister(kvm_s390_dbf); 561 debug_unregister(kvm_s390_dbf_uv); 562 } 563 564 /* Section: device related */ 565 long kvm_arch_dev_ioctl(struct file *filp, 566 unsigned int ioctl, unsigned long arg) 567 { 568 if (ioctl == KVM_S390_ENABLE_SIE) 569 return s390_enable_sie(); 570 return -EINVAL; 571 } 572 573 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 574 { 575 int r; 576 577 switch (ext) { 578 case KVM_CAP_S390_PSW: 579 case KVM_CAP_S390_GMAP: 580 case KVM_CAP_SYNC_MMU: 581 #ifdef CONFIG_KVM_S390_UCONTROL 582 case KVM_CAP_S390_UCONTROL: 583 #endif 584 case KVM_CAP_ASYNC_PF: 585 case KVM_CAP_SYNC_REGS: 586 case KVM_CAP_ONE_REG: 587 case KVM_CAP_ENABLE_CAP: 588 case KVM_CAP_S390_CSS_SUPPORT: 589 case KVM_CAP_IOEVENTFD: 590 case KVM_CAP_S390_IRQCHIP: 591 case KVM_CAP_VM_ATTRIBUTES: 592 case KVM_CAP_MP_STATE: 593 case KVM_CAP_IMMEDIATE_EXIT: 594 case KVM_CAP_S390_INJECT_IRQ: 595 case KVM_CAP_S390_USER_SIGP: 596 case KVM_CAP_S390_USER_STSI: 597 case KVM_CAP_S390_SKEYS: 598 case KVM_CAP_S390_IRQ_STATE: 599 case KVM_CAP_S390_USER_INSTR0: 600 case KVM_CAP_S390_CMMA_MIGRATION: 601 case KVM_CAP_S390_AIS: 602 case KVM_CAP_S390_AIS_MIGRATION: 603 case KVM_CAP_S390_VCPU_RESETS: 604 case KVM_CAP_SET_GUEST_DEBUG: 605 case KVM_CAP_S390_DIAG318: 606 case KVM_CAP_IRQFD_RESAMPLE: 607 r = 1; 608 break; 609 case KVM_CAP_SET_GUEST_DEBUG2: 610 r = KVM_GUESTDBG_VALID_MASK; 611 break; 612 case KVM_CAP_S390_HPAGE_1M: 613 r = 0; 614 if (hpage && !(kvm && kvm_is_ucontrol(kvm))) 615 r = 1; 616 break; 617 case KVM_CAP_S390_MEM_OP: 618 r = MEM_OP_MAX_SIZE; 619 break; 620 case KVM_CAP_S390_MEM_OP_EXTENSION: 621 /* 622 * Flag bits indicating which extensions are supported. 623 * If r > 0, the base extension must also be supported/indicated, 624 * in order to maintain backwards compatibility. 625 */ 626 r = KVM_S390_MEMOP_EXTENSION_CAP_BASE | 627 KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG; 628 break; 629 case KVM_CAP_NR_VCPUS: 630 case KVM_CAP_MAX_VCPUS: 631 case KVM_CAP_MAX_VCPU_ID: 632 r = KVM_S390_BSCA_CPU_SLOTS; 633 if (!kvm_s390_use_sca_entries()) 634 r = KVM_MAX_VCPUS; 635 else if (sclp.has_esca && sclp.has_64bscao) 636 r = KVM_S390_ESCA_CPU_SLOTS; 637 if (ext == KVM_CAP_NR_VCPUS) 638 r = min_t(unsigned int, num_online_cpus(), r); 639 break; 640 case KVM_CAP_S390_COW: 641 r = MACHINE_HAS_ESOP; 642 break; 643 case KVM_CAP_S390_VECTOR_REGISTERS: 644 r = test_facility(129); 645 break; 646 case KVM_CAP_S390_RI: 647 r = test_facility(64); 648 break; 649 case KVM_CAP_S390_GS: 650 r = test_facility(133); 651 break; 652 case KVM_CAP_S390_BPB: 653 r = test_facility(82); 654 break; 655 case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE: 656 r = async_destroy && is_prot_virt_host(); 657 break; 658 case KVM_CAP_S390_PROTECTED: 659 r = is_prot_virt_host(); 660 break; 661 case KVM_CAP_S390_PROTECTED_DUMP: { 662 u64 pv_cmds_dump[] = { 663 BIT_UVC_CMD_DUMP_INIT, 664 BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE, 665 BIT_UVC_CMD_DUMP_CPU, 666 BIT_UVC_CMD_DUMP_COMPLETE, 667 }; 668 int i; 669 670 r = is_prot_virt_host(); 671 672 for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) { 673 if (!test_bit_inv(pv_cmds_dump[i], 674 (unsigned long *)&uv_info.inst_calls_list)) { 675 r = 0; 676 break; 677 } 678 } 679 break; 680 } 681 case KVM_CAP_S390_ZPCI_OP: 682 r = kvm_s390_pci_interp_allowed(); 683 break; 684 case KVM_CAP_S390_CPU_TOPOLOGY: 685 r = test_facility(11); 686 break; 687 default: 688 r = 0; 689 } 690 return r; 691 } 692 693 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 694 { 695 int i; 696 gfn_t cur_gfn, last_gfn; 697 unsigned long gaddr, vmaddr; 698 struct gmap *gmap = kvm->arch.gmap; 699 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); 700 701 /* Loop over all guest segments */ 702 cur_gfn = memslot->base_gfn; 703 last_gfn = memslot->base_gfn + memslot->npages; 704 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { 705 gaddr = gfn_to_gpa(cur_gfn); 706 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); 707 if (kvm_is_error_hva(vmaddr)) 708 continue; 709 710 bitmap_zero(bitmap, _PAGE_ENTRIES); 711 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); 712 for (i = 0; i < _PAGE_ENTRIES; i++) { 713 if (test_bit(i, bitmap)) 714 mark_page_dirty(kvm, cur_gfn + i); 715 } 716 717 if (fatal_signal_pending(current)) 718 return; 719 cond_resched(); 720 } 721 } 722 723 /* Section: vm related */ 724 static void sca_del_vcpu(struct kvm_vcpu *vcpu); 725 726 /* 727 * Get (and clear) the dirty memory log for a memory slot. 728 */ 729 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 730 struct kvm_dirty_log *log) 731 { 732 int r; 733 unsigned long n; 734 struct kvm_memory_slot *memslot; 735 int is_dirty; 736 737 if (kvm_is_ucontrol(kvm)) 738 return -EINVAL; 739 740 mutex_lock(&kvm->slots_lock); 741 742 r = -EINVAL; 743 if (log->slot >= KVM_USER_MEM_SLOTS) 744 goto out; 745 746 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); 747 if (r) 748 goto out; 749 750 /* Clear the dirty log */ 751 if (is_dirty) { 752 n = kvm_dirty_bitmap_bytes(memslot); 753 memset(memslot->dirty_bitmap, 0, n); 754 } 755 r = 0; 756 out: 757 mutex_unlock(&kvm->slots_lock); 758 return r; 759 } 760 761 static void icpt_operexc_on_all_vcpus(struct kvm *kvm) 762 { 763 unsigned long i; 764 struct kvm_vcpu *vcpu; 765 766 kvm_for_each_vcpu(i, vcpu, kvm) { 767 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu); 768 } 769 } 770 771 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 772 { 773 int r; 774 775 if (cap->flags) 776 return -EINVAL; 777 778 switch (cap->cap) { 779 case KVM_CAP_S390_IRQCHIP: 780 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); 781 kvm->arch.use_irqchip = 1; 782 r = 0; 783 break; 784 case KVM_CAP_S390_USER_SIGP: 785 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); 786 kvm->arch.user_sigp = 1; 787 r = 0; 788 break; 789 case KVM_CAP_S390_VECTOR_REGISTERS: 790 mutex_lock(&kvm->lock); 791 if (kvm->created_vcpus) { 792 r = -EBUSY; 793 } else if (cpu_has_vx()) { 794 set_kvm_facility(kvm->arch.model.fac_mask, 129); 795 set_kvm_facility(kvm->arch.model.fac_list, 129); 796 if (test_facility(134)) { 797 set_kvm_facility(kvm->arch.model.fac_mask, 134); 798 set_kvm_facility(kvm->arch.model.fac_list, 134); 799 } 800 if (test_facility(135)) { 801 set_kvm_facility(kvm->arch.model.fac_mask, 135); 802 set_kvm_facility(kvm->arch.model.fac_list, 135); 803 } 804 if (test_facility(148)) { 805 set_kvm_facility(kvm->arch.model.fac_mask, 148); 806 set_kvm_facility(kvm->arch.model.fac_list, 148); 807 } 808 if (test_facility(152)) { 809 set_kvm_facility(kvm->arch.model.fac_mask, 152); 810 set_kvm_facility(kvm->arch.model.fac_list, 152); 811 } 812 if (test_facility(192)) { 813 set_kvm_facility(kvm->arch.model.fac_mask, 192); 814 set_kvm_facility(kvm->arch.model.fac_list, 192); 815 } 816 if (test_facility(198)) { 817 set_kvm_facility(kvm->arch.model.fac_mask, 198); 818 set_kvm_facility(kvm->arch.model.fac_list, 198); 819 } 820 if (test_facility(199)) { 821 set_kvm_facility(kvm->arch.model.fac_mask, 199); 822 set_kvm_facility(kvm->arch.model.fac_list, 199); 823 } 824 r = 0; 825 } else 826 r = -EINVAL; 827 mutex_unlock(&kvm->lock); 828 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 829 r ? "(not available)" : "(success)"); 830 break; 831 case KVM_CAP_S390_RI: 832 r = -EINVAL; 833 mutex_lock(&kvm->lock); 834 if (kvm->created_vcpus) { 835 r = -EBUSY; 836 } else if (test_facility(64)) { 837 set_kvm_facility(kvm->arch.model.fac_mask, 64); 838 set_kvm_facility(kvm->arch.model.fac_list, 64); 839 r = 0; 840 } 841 mutex_unlock(&kvm->lock); 842 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 843 r ? "(not available)" : "(success)"); 844 break; 845 case KVM_CAP_S390_AIS: 846 mutex_lock(&kvm->lock); 847 if (kvm->created_vcpus) { 848 r = -EBUSY; 849 } else { 850 set_kvm_facility(kvm->arch.model.fac_mask, 72); 851 set_kvm_facility(kvm->arch.model.fac_list, 72); 852 r = 0; 853 } 854 mutex_unlock(&kvm->lock); 855 VM_EVENT(kvm, 3, "ENABLE: AIS %s", 856 r ? "(not available)" : "(success)"); 857 break; 858 case KVM_CAP_S390_GS: 859 r = -EINVAL; 860 mutex_lock(&kvm->lock); 861 if (kvm->created_vcpus) { 862 r = -EBUSY; 863 } else if (test_facility(133)) { 864 set_kvm_facility(kvm->arch.model.fac_mask, 133); 865 set_kvm_facility(kvm->arch.model.fac_list, 133); 866 r = 0; 867 } 868 mutex_unlock(&kvm->lock); 869 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", 870 r ? "(not available)" : "(success)"); 871 break; 872 case KVM_CAP_S390_HPAGE_1M: 873 mutex_lock(&kvm->lock); 874 if (kvm->created_vcpus) 875 r = -EBUSY; 876 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) 877 r = -EINVAL; 878 else { 879 r = 0; 880 mmap_write_lock(kvm->mm); 881 kvm->mm->context.allow_gmap_hpage_1m = 1; 882 mmap_write_unlock(kvm->mm); 883 /* 884 * We might have to create fake 4k page 885 * tables. To avoid that the hardware works on 886 * stale PGSTEs, we emulate these instructions. 887 */ 888 kvm->arch.use_skf = 0; 889 kvm->arch.use_pfmfi = 0; 890 } 891 mutex_unlock(&kvm->lock); 892 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", 893 r ? "(not available)" : "(success)"); 894 break; 895 case KVM_CAP_S390_USER_STSI: 896 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 897 kvm->arch.user_stsi = 1; 898 r = 0; 899 break; 900 case KVM_CAP_S390_USER_INSTR0: 901 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0"); 902 kvm->arch.user_instr0 = 1; 903 icpt_operexc_on_all_vcpus(kvm); 904 r = 0; 905 break; 906 case KVM_CAP_S390_CPU_TOPOLOGY: 907 r = -EINVAL; 908 mutex_lock(&kvm->lock); 909 if (kvm->created_vcpus) { 910 r = -EBUSY; 911 } else if (test_facility(11)) { 912 set_kvm_facility(kvm->arch.model.fac_mask, 11); 913 set_kvm_facility(kvm->arch.model.fac_list, 11); 914 r = 0; 915 } 916 mutex_unlock(&kvm->lock); 917 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s", 918 r ? "(not available)" : "(success)"); 919 break; 920 default: 921 r = -EINVAL; 922 break; 923 } 924 return r; 925 } 926 927 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 928 { 929 int ret; 930 931 switch (attr->attr) { 932 case KVM_S390_VM_MEM_LIMIT_SIZE: 933 ret = 0; 934 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 935 kvm->arch.mem_limit); 936 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 937 ret = -EFAULT; 938 break; 939 default: 940 ret = -ENXIO; 941 break; 942 } 943 return ret; 944 } 945 946 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) 947 { 948 int ret; 949 unsigned int idx; 950 switch (attr->attr) { 951 case KVM_S390_VM_MEM_ENABLE_CMMA: 952 ret = -ENXIO; 953 if (!sclp.has_cmma) 954 break; 955 956 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); 957 mutex_lock(&kvm->lock); 958 if (kvm->created_vcpus) 959 ret = -EBUSY; 960 else if (kvm->mm->context.allow_gmap_hpage_1m) 961 ret = -EINVAL; 962 else { 963 kvm->arch.use_cmma = 1; 964 /* Not compatible with cmma. */ 965 kvm->arch.use_pfmfi = 0; 966 ret = 0; 967 } 968 mutex_unlock(&kvm->lock); 969 break; 970 case KVM_S390_VM_MEM_CLR_CMMA: 971 ret = -ENXIO; 972 if (!sclp.has_cmma) 973 break; 974 ret = -EINVAL; 975 if (!kvm->arch.use_cmma) 976 break; 977 978 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); 979 mutex_lock(&kvm->lock); 980 idx = srcu_read_lock(&kvm->srcu); 981 s390_reset_cmma(kvm->arch.gmap->mm); 982 srcu_read_unlock(&kvm->srcu, idx); 983 mutex_unlock(&kvm->lock); 984 ret = 0; 985 break; 986 case KVM_S390_VM_MEM_LIMIT_SIZE: { 987 unsigned long new_limit; 988 989 if (kvm_is_ucontrol(kvm)) 990 return -EINVAL; 991 992 if (get_user(new_limit, (u64 __user *)attr->addr)) 993 return -EFAULT; 994 995 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 996 new_limit > kvm->arch.mem_limit) 997 return -E2BIG; 998 999 if (!new_limit) 1000 return -EINVAL; 1001 1002 /* gmap_create takes last usable address */ 1003 if (new_limit != KVM_S390_NO_MEM_LIMIT) 1004 new_limit -= 1; 1005 1006 ret = -EBUSY; 1007 mutex_lock(&kvm->lock); 1008 if (!kvm->created_vcpus) { 1009 /* gmap_create will round the limit up */ 1010 struct gmap *new = gmap_create(current->mm, new_limit); 1011 1012 if (!new) { 1013 ret = -ENOMEM; 1014 } else { 1015 gmap_remove(kvm->arch.gmap); 1016 new->private = kvm; 1017 kvm->arch.gmap = new; 1018 ret = 0; 1019 } 1020 } 1021 mutex_unlock(&kvm->lock); 1022 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 1023 VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 1024 (void *) kvm->arch.gmap->asce); 1025 break; 1026 } 1027 default: 1028 ret = -ENXIO; 1029 break; 1030 } 1031 return ret; 1032 } 1033 1034 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 1035 1036 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 1037 { 1038 struct kvm_vcpu *vcpu; 1039 unsigned long i; 1040 1041 kvm_s390_vcpu_block_all(kvm); 1042 1043 kvm_for_each_vcpu(i, vcpu, kvm) { 1044 kvm_s390_vcpu_crypto_setup(vcpu); 1045 /* recreate the shadow crycb by leaving the VSIE handler */ 1046 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 1047 } 1048 1049 kvm_s390_vcpu_unblock_all(kvm); 1050 } 1051 1052 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) 1053 { 1054 mutex_lock(&kvm->lock); 1055 switch (attr->attr) { 1056 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 1057 if (!test_kvm_facility(kvm, 76)) { 1058 mutex_unlock(&kvm->lock); 1059 return -EINVAL; 1060 } 1061 get_random_bytes( 1062 kvm->arch.crypto.crycb->aes_wrapping_key_mask, 1063 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1064 kvm->arch.crypto.aes_kw = 1; 1065 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); 1066 break; 1067 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 1068 if (!test_kvm_facility(kvm, 76)) { 1069 mutex_unlock(&kvm->lock); 1070 return -EINVAL; 1071 } 1072 get_random_bytes( 1073 kvm->arch.crypto.crycb->dea_wrapping_key_mask, 1074 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1075 kvm->arch.crypto.dea_kw = 1; 1076 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); 1077 break; 1078 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 1079 if (!test_kvm_facility(kvm, 76)) { 1080 mutex_unlock(&kvm->lock); 1081 return -EINVAL; 1082 } 1083 kvm->arch.crypto.aes_kw = 0; 1084 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, 1085 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 1086 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); 1087 break; 1088 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 1089 if (!test_kvm_facility(kvm, 76)) { 1090 mutex_unlock(&kvm->lock); 1091 return -EINVAL; 1092 } 1093 kvm->arch.crypto.dea_kw = 0; 1094 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, 1095 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 1096 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); 1097 break; 1098 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 1099 if (!ap_instructions_available()) { 1100 mutex_unlock(&kvm->lock); 1101 return -EOPNOTSUPP; 1102 } 1103 kvm->arch.crypto.apie = 1; 1104 break; 1105 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 1106 if (!ap_instructions_available()) { 1107 mutex_unlock(&kvm->lock); 1108 return -EOPNOTSUPP; 1109 } 1110 kvm->arch.crypto.apie = 0; 1111 break; 1112 default: 1113 mutex_unlock(&kvm->lock); 1114 return -ENXIO; 1115 } 1116 1117 kvm_s390_vcpu_crypto_reset_all(kvm); 1118 mutex_unlock(&kvm->lock); 1119 return 0; 1120 } 1121 1122 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu) 1123 { 1124 /* Only set the ECB bits after guest requests zPCI interpretation */ 1125 if (!vcpu->kvm->arch.use_zpci_interp) 1126 return; 1127 1128 vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI; 1129 vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI; 1130 } 1131 1132 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm) 1133 { 1134 struct kvm_vcpu *vcpu; 1135 unsigned long i; 1136 1137 lockdep_assert_held(&kvm->lock); 1138 1139 if (!kvm_s390_pci_interp_allowed()) 1140 return; 1141 1142 /* 1143 * If host is configured for PCI and the necessary facilities are 1144 * available, turn on interpretation for the life of this guest 1145 */ 1146 kvm->arch.use_zpci_interp = 1; 1147 1148 kvm_s390_vcpu_block_all(kvm); 1149 1150 kvm_for_each_vcpu(i, vcpu, kvm) { 1151 kvm_s390_vcpu_pci_setup(vcpu); 1152 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu); 1153 } 1154 1155 kvm_s390_vcpu_unblock_all(kvm); 1156 } 1157 1158 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) 1159 { 1160 unsigned long cx; 1161 struct kvm_vcpu *vcpu; 1162 1163 kvm_for_each_vcpu(cx, vcpu, kvm) 1164 kvm_s390_sync_request(req, vcpu); 1165 } 1166 1167 /* 1168 * Must be called with kvm->srcu held to avoid races on memslots, and with 1169 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. 1170 */ 1171 static int kvm_s390_vm_start_migration(struct kvm *kvm) 1172 { 1173 struct kvm_memory_slot *ms; 1174 struct kvm_memslots *slots; 1175 unsigned long ram_pages = 0; 1176 int bkt; 1177 1178 /* migration mode already enabled */ 1179 if (kvm->arch.migration_mode) 1180 return 0; 1181 slots = kvm_memslots(kvm); 1182 if (!slots || kvm_memslots_empty(slots)) 1183 return -EINVAL; 1184 1185 if (!kvm->arch.use_cmma) { 1186 kvm->arch.migration_mode = 1; 1187 return 0; 1188 } 1189 /* mark all the pages in active slots as dirty */ 1190 kvm_for_each_memslot(ms, bkt, slots) { 1191 if (!ms->dirty_bitmap) 1192 return -EINVAL; 1193 /* 1194 * The second half of the bitmap is only used on x86, 1195 * and would be wasted otherwise, so we put it to good 1196 * use here to keep track of the state of the storage 1197 * attributes. 1198 */ 1199 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); 1200 ram_pages += ms->npages; 1201 } 1202 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); 1203 kvm->arch.migration_mode = 1; 1204 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); 1205 return 0; 1206 } 1207 1208 /* 1209 * Must be called with kvm->slots_lock to avoid races with ourselves and 1210 * kvm_s390_vm_start_migration. 1211 */ 1212 static int kvm_s390_vm_stop_migration(struct kvm *kvm) 1213 { 1214 /* migration mode already disabled */ 1215 if (!kvm->arch.migration_mode) 1216 return 0; 1217 kvm->arch.migration_mode = 0; 1218 if (kvm->arch.use_cmma) 1219 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); 1220 return 0; 1221 } 1222 1223 static int kvm_s390_vm_set_migration(struct kvm *kvm, 1224 struct kvm_device_attr *attr) 1225 { 1226 int res = -ENXIO; 1227 1228 mutex_lock(&kvm->slots_lock); 1229 switch (attr->attr) { 1230 case KVM_S390_VM_MIGRATION_START: 1231 res = kvm_s390_vm_start_migration(kvm); 1232 break; 1233 case KVM_S390_VM_MIGRATION_STOP: 1234 res = kvm_s390_vm_stop_migration(kvm); 1235 break; 1236 default: 1237 break; 1238 } 1239 mutex_unlock(&kvm->slots_lock); 1240 1241 return res; 1242 } 1243 1244 static int kvm_s390_vm_get_migration(struct kvm *kvm, 1245 struct kvm_device_attr *attr) 1246 { 1247 u64 mig = kvm->arch.migration_mode; 1248 1249 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) 1250 return -ENXIO; 1251 1252 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) 1253 return -EFAULT; 1254 return 0; 1255 } 1256 1257 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); 1258 1259 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1260 { 1261 struct kvm_s390_vm_tod_clock gtod; 1262 1263 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) 1264 return -EFAULT; 1265 1266 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) 1267 return -EINVAL; 1268 __kvm_s390_set_tod_clock(kvm, >od); 1269 1270 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", 1271 gtod.epoch_idx, gtod.tod); 1272 1273 return 0; 1274 } 1275 1276 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1277 { 1278 u8 gtod_high; 1279 1280 if (copy_from_user(>od_high, (void __user *)attr->addr, 1281 sizeof(gtod_high))) 1282 return -EFAULT; 1283 1284 if (gtod_high != 0) 1285 return -EINVAL; 1286 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); 1287 1288 return 0; 1289 } 1290 1291 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1292 { 1293 struct kvm_s390_vm_tod_clock gtod = { 0 }; 1294 1295 if (copy_from_user(>od.tod, (void __user *)attr->addr, 1296 sizeof(gtod.tod))) 1297 return -EFAULT; 1298 1299 __kvm_s390_set_tod_clock(kvm, >od); 1300 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); 1301 return 0; 1302 } 1303 1304 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1305 { 1306 int ret; 1307 1308 if (attr->flags) 1309 return -EINVAL; 1310 1311 mutex_lock(&kvm->lock); 1312 /* 1313 * For protected guests, the TOD is managed by the ultravisor, so trying 1314 * to change it will never bring the expected results. 1315 */ 1316 if (kvm_s390_pv_is_protected(kvm)) { 1317 ret = -EOPNOTSUPP; 1318 goto out_unlock; 1319 } 1320 1321 switch (attr->attr) { 1322 case KVM_S390_VM_TOD_EXT: 1323 ret = kvm_s390_set_tod_ext(kvm, attr); 1324 break; 1325 case KVM_S390_VM_TOD_HIGH: 1326 ret = kvm_s390_set_tod_high(kvm, attr); 1327 break; 1328 case KVM_S390_VM_TOD_LOW: 1329 ret = kvm_s390_set_tod_low(kvm, attr); 1330 break; 1331 default: 1332 ret = -ENXIO; 1333 break; 1334 } 1335 1336 out_unlock: 1337 mutex_unlock(&kvm->lock); 1338 return ret; 1339 } 1340 1341 static void kvm_s390_get_tod_clock(struct kvm *kvm, 1342 struct kvm_s390_vm_tod_clock *gtod) 1343 { 1344 union tod_clock clk; 1345 1346 preempt_disable(); 1347 1348 store_tod_clock_ext(&clk); 1349 1350 gtod->tod = clk.tod + kvm->arch.epoch; 1351 gtod->epoch_idx = 0; 1352 if (test_kvm_facility(kvm, 139)) { 1353 gtod->epoch_idx = clk.ei + kvm->arch.epdx; 1354 if (gtod->tod < clk.tod) 1355 gtod->epoch_idx += 1; 1356 } 1357 1358 preempt_enable(); 1359 } 1360 1361 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) 1362 { 1363 struct kvm_s390_vm_tod_clock gtod; 1364 1365 memset(>od, 0, sizeof(gtod)); 1366 kvm_s390_get_tod_clock(kvm, >od); 1367 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1368 return -EFAULT; 1369 1370 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", 1371 gtod.epoch_idx, gtod.tod); 1372 return 0; 1373 } 1374 1375 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) 1376 { 1377 u8 gtod_high = 0; 1378 1379 if (copy_to_user((void __user *)attr->addr, >od_high, 1380 sizeof(gtod_high))) 1381 return -EFAULT; 1382 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); 1383 1384 return 0; 1385 } 1386 1387 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) 1388 { 1389 u64 gtod; 1390 1391 gtod = kvm_s390_get_tod_clock_fast(kvm); 1392 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) 1393 return -EFAULT; 1394 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); 1395 1396 return 0; 1397 } 1398 1399 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) 1400 { 1401 int ret; 1402 1403 if (attr->flags) 1404 return -EINVAL; 1405 1406 switch (attr->attr) { 1407 case KVM_S390_VM_TOD_EXT: 1408 ret = kvm_s390_get_tod_ext(kvm, attr); 1409 break; 1410 case KVM_S390_VM_TOD_HIGH: 1411 ret = kvm_s390_get_tod_high(kvm, attr); 1412 break; 1413 case KVM_S390_VM_TOD_LOW: 1414 ret = kvm_s390_get_tod_low(kvm, attr); 1415 break; 1416 default: 1417 ret = -ENXIO; 1418 break; 1419 } 1420 return ret; 1421 } 1422 1423 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1424 { 1425 struct kvm_s390_vm_cpu_processor *proc; 1426 u16 lowest_ibc, unblocked_ibc; 1427 int ret = 0; 1428 1429 mutex_lock(&kvm->lock); 1430 if (kvm->created_vcpus) { 1431 ret = -EBUSY; 1432 goto out; 1433 } 1434 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1435 if (!proc) { 1436 ret = -ENOMEM; 1437 goto out; 1438 } 1439 if (!copy_from_user(proc, (void __user *)attr->addr, 1440 sizeof(*proc))) { 1441 kvm->arch.model.cpuid = proc->cpuid; 1442 lowest_ibc = sclp.ibc >> 16 & 0xfff; 1443 unblocked_ibc = sclp.ibc & 0xfff; 1444 if (lowest_ibc && proc->ibc) { 1445 if (proc->ibc > unblocked_ibc) 1446 kvm->arch.model.ibc = unblocked_ibc; 1447 else if (proc->ibc < lowest_ibc) 1448 kvm->arch.model.ibc = lowest_ibc; 1449 else 1450 kvm->arch.model.ibc = proc->ibc; 1451 } 1452 memcpy(kvm->arch.model.fac_list, proc->fac_list, 1453 S390_ARCH_FAC_LIST_SIZE_BYTE); 1454 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1455 kvm->arch.model.ibc, 1456 kvm->arch.model.cpuid); 1457 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1458 kvm->arch.model.fac_list[0], 1459 kvm->arch.model.fac_list[1], 1460 kvm->arch.model.fac_list[2]); 1461 } else 1462 ret = -EFAULT; 1463 kfree(proc); 1464 out: 1465 mutex_unlock(&kvm->lock); 1466 return ret; 1467 } 1468 1469 static int kvm_s390_set_processor_feat(struct kvm *kvm, 1470 struct kvm_device_attr *attr) 1471 { 1472 struct kvm_s390_vm_cpu_feat data; 1473 1474 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) 1475 return -EFAULT; 1476 if (!bitmap_subset((unsigned long *) data.feat, 1477 kvm_s390_available_cpu_feat, 1478 KVM_S390_VM_CPU_FEAT_NR_BITS)) 1479 return -EINVAL; 1480 1481 mutex_lock(&kvm->lock); 1482 if (kvm->created_vcpus) { 1483 mutex_unlock(&kvm->lock); 1484 return -EBUSY; 1485 } 1486 bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1487 mutex_unlock(&kvm->lock); 1488 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1489 data.feat[0], 1490 data.feat[1], 1491 data.feat[2]); 1492 return 0; 1493 } 1494 1495 static int kvm_s390_set_processor_subfunc(struct kvm *kvm, 1496 struct kvm_device_attr *attr) 1497 { 1498 mutex_lock(&kvm->lock); 1499 if (kvm->created_vcpus) { 1500 mutex_unlock(&kvm->lock); 1501 return -EBUSY; 1502 } 1503 1504 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, 1505 sizeof(struct kvm_s390_vm_cpu_subfunc))) { 1506 mutex_unlock(&kvm->lock); 1507 return -EFAULT; 1508 } 1509 mutex_unlock(&kvm->lock); 1510 1511 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1512 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1513 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1514 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1515 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1516 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1517 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1518 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1519 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1520 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1521 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1522 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1523 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1524 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1525 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", 1526 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1527 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1528 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1529 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1530 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1531 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1532 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1533 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1534 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1535 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1536 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1537 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1538 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1539 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1540 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1541 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1542 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1543 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1544 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1545 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1546 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1547 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1548 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1549 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1550 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1551 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1552 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1553 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1554 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1555 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1556 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1557 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1558 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1559 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1560 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1561 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1562 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1563 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1564 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1565 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1566 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1567 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1568 VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx", 1569 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0], 1570 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]); 1571 1572 return 0; 1573 } 1574 1575 #define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \ 1576 ( \ 1577 ((struct kvm_s390_vm_cpu_uv_feat){ \ 1578 .ap = 1, \ 1579 .ap_intr = 1, \ 1580 }) \ 1581 .feat \ 1582 ) 1583 1584 static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) 1585 { 1586 struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr; 1587 unsigned long data, filter; 1588 1589 filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK; 1590 if (get_user(data, &ptr->feat)) 1591 return -EFAULT; 1592 if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS)) 1593 return -EINVAL; 1594 1595 mutex_lock(&kvm->lock); 1596 if (kvm->created_vcpus) { 1597 mutex_unlock(&kvm->lock); 1598 return -EBUSY; 1599 } 1600 kvm->arch.model.uv_feat_guest.feat = data; 1601 mutex_unlock(&kvm->lock); 1602 1603 VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data); 1604 1605 return 0; 1606 } 1607 1608 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1609 { 1610 int ret = -ENXIO; 1611 1612 switch (attr->attr) { 1613 case KVM_S390_VM_CPU_PROCESSOR: 1614 ret = kvm_s390_set_processor(kvm, attr); 1615 break; 1616 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1617 ret = kvm_s390_set_processor_feat(kvm, attr); 1618 break; 1619 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1620 ret = kvm_s390_set_processor_subfunc(kvm, attr); 1621 break; 1622 case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: 1623 ret = kvm_s390_set_uv_feat(kvm, attr); 1624 break; 1625 } 1626 return ret; 1627 } 1628 1629 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) 1630 { 1631 struct kvm_s390_vm_cpu_processor *proc; 1632 int ret = 0; 1633 1634 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); 1635 if (!proc) { 1636 ret = -ENOMEM; 1637 goto out; 1638 } 1639 proc->cpuid = kvm->arch.model.cpuid; 1640 proc->ibc = kvm->arch.model.ibc; 1641 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 1642 S390_ARCH_FAC_LIST_SIZE_BYTE); 1643 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", 1644 kvm->arch.model.ibc, 1645 kvm->arch.model.cpuid); 1646 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1647 kvm->arch.model.fac_list[0], 1648 kvm->arch.model.fac_list[1], 1649 kvm->arch.model.fac_list[2]); 1650 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) 1651 ret = -EFAULT; 1652 kfree(proc); 1653 out: 1654 return ret; 1655 } 1656 1657 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) 1658 { 1659 struct kvm_s390_vm_cpu_machine *mach; 1660 int ret = 0; 1661 1662 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); 1663 if (!mach) { 1664 ret = -ENOMEM; 1665 goto out; 1666 } 1667 get_cpu_id((struct cpuid *) &mach->cpuid); 1668 mach->ibc = sclp.ibc; 1669 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, 1670 S390_ARCH_FAC_LIST_SIZE_BYTE); 1671 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list, 1672 sizeof(stfle_fac_list)); 1673 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", 1674 kvm->arch.model.ibc, 1675 kvm->arch.model.cpuid); 1676 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", 1677 mach->fac_mask[0], 1678 mach->fac_mask[1], 1679 mach->fac_mask[2]); 1680 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", 1681 mach->fac_list[0], 1682 mach->fac_list[1], 1683 mach->fac_list[2]); 1684 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) 1685 ret = -EFAULT; 1686 kfree(mach); 1687 out: 1688 return ret; 1689 } 1690 1691 static int kvm_s390_get_processor_feat(struct kvm *kvm, 1692 struct kvm_device_attr *attr) 1693 { 1694 struct kvm_s390_vm_cpu_feat data; 1695 1696 bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1697 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1698 return -EFAULT; 1699 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1700 data.feat[0], 1701 data.feat[1], 1702 data.feat[2]); 1703 return 0; 1704 } 1705 1706 static int kvm_s390_get_machine_feat(struct kvm *kvm, 1707 struct kvm_device_attr *attr) 1708 { 1709 struct kvm_s390_vm_cpu_feat data; 1710 1711 bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); 1712 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) 1713 return -EFAULT; 1714 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx", 1715 data.feat[0], 1716 data.feat[1], 1717 data.feat[2]); 1718 return 0; 1719 } 1720 1721 static int kvm_s390_get_processor_subfunc(struct kvm *kvm, 1722 struct kvm_device_attr *attr) 1723 { 1724 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, 1725 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1726 return -EFAULT; 1727 1728 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1729 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], 1730 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], 1731 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], 1732 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); 1733 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", 1734 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], 1735 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); 1736 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", 1737 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], 1738 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); 1739 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", 1740 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], 1741 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); 1742 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", 1743 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], 1744 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); 1745 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", 1746 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], 1747 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); 1748 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", 1749 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], 1750 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); 1751 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", 1752 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], 1753 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); 1754 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", 1755 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], 1756 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); 1757 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", 1758 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], 1759 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); 1760 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", 1761 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], 1762 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); 1763 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", 1764 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], 1765 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); 1766 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", 1767 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], 1768 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); 1769 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", 1770 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], 1771 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); 1772 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", 1773 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], 1774 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); 1775 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1776 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], 1777 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], 1778 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], 1779 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); 1780 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1781 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], 1782 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], 1783 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], 1784 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); 1785 VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx", 1786 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0], 1787 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]); 1788 1789 return 0; 1790 } 1791 1792 static int kvm_s390_get_machine_subfunc(struct kvm *kvm, 1793 struct kvm_device_attr *attr) 1794 { 1795 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, 1796 sizeof(struct kvm_s390_vm_cpu_subfunc))) 1797 return -EFAULT; 1798 1799 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1800 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], 1801 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], 1802 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], 1803 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); 1804 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", 1805 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], 1806 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); 1807 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", 1808 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], 1809 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); 1810 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", 1811 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], 1812 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); 1813 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", 1814 ((unsigned long *) &kvm_s390_available_subfunc.km)[0], 1815 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); 1816 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", 1817 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], 1818 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); 1819 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", 1820 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], 1821 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); 1822 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", 1823 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], 1824 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); 1825 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", 1826 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], 1827 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); 1828 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", 1829 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], 1830 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); 1831 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", 1832 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], 1833 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); 1834 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", 1835 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], 1836 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); 1837 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", 1838 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], 1839 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); 1840 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", 1841 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], 1842 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); 1843 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", 1844 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], 1845 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); 1846 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1847 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], 1848 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], 1849 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], 1850 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); 1851 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", 1852 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], 1853 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], 1854 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], 1855 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); 1856 VM_EVENT(kvm, 3, "GET: host PFCR subfunc 0x%16.16lx.%16.16lx", 1857 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0], 1858 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]); 1859 1860 return 0; 1861 } 1862 1863 static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) 1864 { 1865 struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr; 1866 unsigned long feat = kvm->arch.model.uv_feat_guest.feat; 1867 1868 if (put_user(feat, &dst->feat)) 1869 return -EFAULT; 1870 VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat); 1871 1872 return 0; 1873 } 1874 1875 static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) 1876 { 1877 struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr; 1878 unsigned long feat; 1879 1880 BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications)); 1881 1882 feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK; 1883 if (put_user(feat, &dst->feat)) 1884 return -EFAULT; 1885 VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat); 1886 1887 return 0; 1888 } 1889 1890 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) 1891 { 1892 int ret = -ENXIO; 1893 1894 switch (attr->attr) { 1895 case KVM_S390_VM_CPU_PROCESSOR: 1896 ret = kvm_s390_get_processor(kvm, attr); 1897 break; 1898 case KVM_S390_VM_CPU_MACHINE: 1899 ret = kvm_s390_get_machine(kvm, attr); 1900 break; 1901 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 1902 ret = kvm_s390_get_processor_feat(kvm, attr); 1903 break; 1904 case KVM_S390_VM_CPU_MACHINE_FEAT: 1905 ret = kvm_s390_get_machine_feat(kvm, attr); 1906 break; 1907 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 1908 ret = kvm_s390_get_processor_subfunc(kvm, attr); 1909 break; 1910 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 1911 ret = kvm_s390_get_machine_subfunc(kvm, attr); 1912 break; 1913 case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: 1914 ret = kvm_s390_get_processor_uv_feat(kvm, attr); 1915 break; 1916 case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST: 1917 ret = kvm_s390_get_machine_uv_feat(kvm, attr); 1918 break; 1919 } 1920 return ret; 1921 } 1922 1923 /** 1924 * kvm_s390_update_topology_change_report - update CPU topology change report 1925 * @kvm: guest KVM description 1926 * @val: set or clear the MTCR bit 1927 * 1928 * Updates the Multiprocessor Topology-Change-Report bit to signal 1929 * the guest with a topology change. 1930 * This is only relevant if the topology facility is present. 1931 * 1932 * The SCA version, bsca or esca, doesn't matter as offset is the same. 1933 */ 1934 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) 1935 { 1936 union sca_utility new, old; 1937 struct bsca_block *sca; 1938 1939 read_lock(&kvm->arch.sca_lock); 1940 sca = kvm->arch.sca; 1941 old = READ_ONCE(sca->utility); 1942 do { 1943 new = old; 1944 new.mtcr = val; 1945 } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val)); 1946 read_unlock(&kvm->arch.sca_lock); 1947 } 1948 1949 static int kvm_s390_set_topo_change_indication(struct kvm *kvm, 1950 struct kvm_device_attr *attr) 1951 { 1952 if (!test_kvm_facility(kvm, 11)) 1953 return -ENXIO; 1954 1955 kvm_s390_update_topology_change_report(kvm, !!attr->attr); 1956 return 0; 1957 } 1958 1959 static int kvm_s390_get_topo_change_indication(struct kvm *kvm, 1960 struct kvm_device_attr *attr) 1961 { 1962 u8 topo; 1963 1964 if (!test_kvm_facility(kvm, 11)) 1965 return -ENXIO; 1966 1967 read_lock(&kvm->arch.sca_lock); 1968 topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr; 1969 read_unlock(&kvm->arch.sca_lock); 1970 1971 return put_user(topo, (u8 __user *)attr->addr); 1972 } 1973 1974 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1975 { 1976 int ret; 1977 1978 switch (attr->group) { 1979 case KVM_S390_VM_MEM_CTRL: 1980 ret = kvm_s390_set_mem_control(kvm, attr); 1981 break; 1982 case KVM_S390_VM_TOD: 1983 ret = kvm_s390_set_tod(kvm, attr); 1984 break; 1985 case KVM_S390_VM_CPU_MODEL: 1986 ret = kvm_s390_set_cpu_model(kvm, attr); 1987 break; 1988 case KVM_S390_VM_CRYPTO: 1989 ret = kvm_s390_vm_set_crypto(kvm, attr); 1990 break; 1991 case KVM_S390_VM_MIGRATION: 1992 ret = kvm_s390_vm_set_migration(kvm, attr); 1993 break; 1994 case KVM_S390_VM_CPU_TOPOLOGY: 1995 ret = kvm_s390_set_topo_change_indication(kvm, attr); 1996 break; 1997 default: 1998 ret = -ENXIO; 1999 break; 2000 } 2001 2002 return ret; 2003 } 2004 2005 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) 2006 { 2007 int ret; 2008 2009 switch (attr->group) { 2010 case KVM_S390_VM_MEM_CTRL: 2011 ret = kvm_s390_get_mem_control(kvm, attr); 2012 break; 2013 case KVM_S390_VM_TOD: 2014 ret = kvm_s390_get_tod(kvm, attr); 2015 break; 2016 case KVM_S390_VM_CPU_MODEL: 2017 ret = kvm_s390_get_cpu_model(kvm, attr); 2018 break; 2019 case KVM_S390_VM_MIGRATION: 2020 ret = kvm_s390_vm_get_migration(kvm, attr); 2021 break; 2022 case KVM_S390_VM_CPU_TOPOLOGY: 2023 ret = kvm_s390_get_topo_change_indication(kvm, attr); 2024 break; 2025 default: 2026 ret = -ENXIO; 2027 break; 2028 } 2029 2030 return ret; 2031 } 2032 2033 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 2034 { 2035 int ret; 2036 2037 switch (attr->group) { 2038 case KVM_S390_VM_MEM_CTRL: 2039 switch (attr->attr) { 2040 case KVM_S390_VM_MEM_ENABLE_CMMA: 2041 case KVM_S390_VM_MEM_CLR_CMMA: 2042 ret = sclp.has_cmma ? 0 : -ENXIO; 2043 break; 2044 case KVM_S390_VM_MEM_LIMIT_SIZE: 2045 ret = 0; 2046 break; 2047 default: 2048 ret = -ENXIO; 2049 break; 2050 } 2051 break; 2052 case KVM_S390_VM_TOD: 2053 switch (attr->attr) { 2054 case KVM_S390_VM_TOD_LOW: 2055 case KVM_S390_VM_TOD_HIGH: 2056 ret = 0; 2057 break; 2058 default: 2059 ret = -ENXIO; 2060 break; 2061 } 2062 break; 2063 case KVM_S390_VM_CPU_MODEL: 2064 switch (attr->attr) { 2065 case KVM_S390_VM_CPU_PROCESSOR: 2066 case KVM_S390_VM_CPU_MACHINE: 2067 case KVM_S390_VM_CPU_PROCESSOR_FEAT: 2068 case KVM_S390_VM_CPU_MACHINE_FEAT: 2069 case KVM_S390_VM_CPU_MACHINE_SUBFUNC: 2070 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: 2071 case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST: 2072 case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: 2073 ret = 0; 2074 break; 2075 default: 2076 ret = -ENXIO; 2077 break; 2078 } 2079 break; 2080 case KVM_S390_VM_CRYPTO: 2081 switch (attr->attr) { 2082 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: 2083 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: 2084 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: 2085 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: 2086 ret = 0; 2087 break; 2088 case KVM_S390_VM_CRYPTO_ENABLE_APIE: 2089 case KVM_S390_VM_CRYPTO_DISABLE_APIE: 2090 ret = ap_instructions_available() ? 0 : -ENXIO; 2091 break; 2092 default: 2093 ret = -ENXIO; 2094 break; 2095 } 2096 break; 2097 case KVM_S390_VM_MIGRATION: 2098 ret = 0; 2099 break; 2100 case KVM_S390_VM_CPU_TOPOLOGY: 2101 ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO; 2102 break; 2103 default: 2104 ret = -ENXIO; 2105 break; 2106 } 2107 2108 return ret; 2109 } 2110 2111 static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 2112 { 2113 uint8_t *keys; 2114 uint64_t hva; 2115 int srcu_idx, i, r = 0; 2116 2117 if (args->flags != 0) 2118 return -EINVAL; 2119 2120 /* Is this guest using storage keys? */ 2121 if (!mm_uses_skeys(current->mm)) 2122 return KVM_S390_GET_SKEYS_NONE; 2123 2124 /* Enforce sane limit on memory allocation */ 2125 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 2126 return -EINVAL; 2127 2128 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 2129 if (!keys) 2130 return -ENOMEM; 2131 2132 mmap_read_lock(current->mm); 2133 srcu_idx = srcu_read_lock(&kvm->srcu); 2134 for (i = 0; i < args->count; i++) { 2135 hva = gfn_to_hva(kvm, args->start_gfn + i); 2136 if (kvm_is_error_hva(hva)) { 2137 r = -EFAULT; 2138 break; 2139 } 2140 2141 r = get_guest_storage_key(current->mm, hva, &keys[i]); 2142 if (r) 2143 break; 2144 } 2145 srcu_read_unlock(&kvm->srcu, srcu_idx); 2146 mmap_read_unlock(current->mm); 2147 2148 if (!r) { 2149 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, 2150 sizeof(uint8_t) * args->count); 2151 if (r) 2152 r = -EFAULT; 2153 } 2154 2155 kvfree(keys); 2156 return r; 2157 } 2158 2159 static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) 2160 { 2161 uint8_t *keys; 2162 uint64_t hva; 2163 int srcu_idx, i, r = 0; 2164 bool unlocked; 2165 2166 if (args->flags != 0) 2167 return -EINVAL; 2168 2169 /* Enforce sane limit on memory allocation */ 2170 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) 2171 return -EINVAL; 2172 2173 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); 2174 if (!keys) 2175 return -ENOMEM; 2176 2177 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, 2178 sizeof(uint8_t) * args->count); 2179 if (r) { 2180 r = -EFAULT; 2181 goto out; 2182 } 2183 2184 /* Enable storage key handling for the guest */ 2185 r = s390_enable_skey(); 2186 if (r) 2187 goto out; 2188 2189 i = 0; 2190 mmap_read_lock(current->mm); 2191 srcu_idx = srcu_read_lock(&kvm->srcu); 2192 while (i < args->count) { 2193 unlocked = false; 2194 hva = gfn_to_hva(kvm, args->start_gfn + i); 2195 if (kvm_is_error_hva(hva)) { 2196 r = -EFAULT; 2197 break; 2198 } 2199 2200 /* Lowest order bit is reserved */ 2201 if (keys[i] & 0x01) { 2202 r = -EINVAL; 2203 break; 2204 } 2205 2206 r = set_guest_storage_key(current->mm, hva, keys[i], 0); 2207 if (r) { 2208 r = fixup_user_fault(current->mm, hva, 2209 FAULT_FLAG_WRITE, &unlocked); 2210 if (r) 2211 break; 2212 } 2213 if (!r) 2214 i++; 2215 } 2216 srcu_read_unlock(&kvm->srcu, srcu_idx); 2217 mmap_read_unlock(current->mm); 2218 out: 2219 kvfree(keys); 2220 return r; 2221 } 2222 2223 /* 2224 * Base address and length must be sent at the start of each block, therefore 2225 * it's cheaper to send some clean data, as long as it's less than the size of 2226 * two longs. 2227 */ 2228 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) 2229 /* for consistency */ 2230 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) 2231 2232 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2233 u8 *res, unsigned long bufsize) 2234 { 2235 unsigned long pgstev, hva, cur_gfn = args->start_gfn; 2236 2237 args->count = 0; 2238 while (args->count < bufsize) { 2239 hva = gfn_to_hva(kvm, cur_gfn); 2240 /* 2241 * We return an error if the first value was invalid, but we 2242 * return successfully if at least one value was copied. 2243 */ 2244 if (kvm_is_error_hva(hva)) 2245 return args->count ? 0 : -EFAULT; 2246 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2247 pgstev = 0; 2248 res[args->count++] = (pgstev >> 24) & 0x43; 2249 cur_gfn++; 2250 } 2251 2252 return 0; 2253 } 2254 2255 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots, 2256 gfn_t gfn) 2257 { 2258 return ____gfn_to_memslot(slots, gfn, true); 2259 } 2260 2261 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, 2262 unsigned long cur_gfn) 2263 { 2264 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn); 2265 unsigned long ofs = cur_gfn - ms->base_gfn; 2266 struct rb_node *mnode = &ms->gfn_node[slots->node_idx]; 2267 2268 if (ms->base_gfn + ms->npages <= cur_gfn) { 2269 mnode = rb_next(mnode); 2270 /* If we are above the highest slot, wrap around */ 2271 if (!mnode) 2272 mnode = rb_first(&slots->gfn_tree); 2273 2274 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2275 ofs = 0; 2276 } 2277 2278 if (cur_gfn < ms->base_gfn) 2279 ofs = 0; 2280 2281 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 2282 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 2283 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2284 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); 2285 } 2286 return ms->base_gfn + ofs; 2287 } 2288 2289 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, 2290 u8 *res, unsigned long bufsize) 2291 { 2292 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; 2293 struct kvm_memslots *slots = kvm_memslots(kvm); 2294 struct kvm_memory_slot *ms; 2295 2296 if (unlikely(kvm_memslots_empty(slots))) 2297 return 0; 2298 2299 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); 2300 ms = gfn_to_memslot(kvm, cur_gfn); 2301 args->count = 0; 2302 args->start_gfn = cur_gfn; 2303 if (!ms) 2304 return 0; 2305 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2306 mem_end = kvm_s390_get_gfn_end(slots); 2307 2308 while (args->count < bufsize) { 2309 hva = gfn_to_hva(kvm, cur_gfn); 2310 if (kvm_is_error_hva(hva)) 2311 return 0; 2312 /* Decrement only if we actually flipped the bit to 0 */ 2313 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) 2314 atomic64_dec(&kvm->arch.cmma_dirty_pages); 2315 if (get_pgste(kvm->mm, hva, &pgstev) < 0) 2316 pgstev = 0; 2317 /* Save the value */ 2318 res[args->count++] = (pgstev >> 24) & 0x43; 2319 /* If the next bit is too far away, stop. */ 2320 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) 2321 return 0; 2322 /* If we reached the previous "next", find the next one */ 2323 if (cur_gfn == next_gfn) 2324 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); 2325 /* Reached the end of memory or of the buffer, stop */ 2326 if ((next_gfn >= mem_end) || 2327 (next_gfn - args->start_gfn >= bufsize)) 2328 return 0; 2329 cur_gfn++; 2330 /* Reached the end of the current memslot, take the next one. */ 2331 if (cur_gfn - ms->base_gfn >= ms->npages) { 2332 ms = gfn_to_memslot(kvm, cur_gfn); 2333 if (!ms) 2334 return 0; 2335 } 2336 } 2337 return 0; 2338 } 2339 2340 /* 2341 * This function searches for the next page with dirty CMMA attributes, and 2342 * saves the attributes in the buffer up to either the end of the buffer or 2343 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; 2344 * no trailing clean bytes are saved. 2345 * In case no dirty bits were found, or if CMMA was not enabled or used, the 2346 * output buffer will indicate 0 as length. 2347 */ 2348 static int kvm_s390_get_cmma_bits(struct kvm *kvm, 2349 struct kvm_s390_cmma_log *args) 2350 { 2351 unsigned long bufsize; 2352 int srcu_idx, peek, ret; 2353 u8 *values; 2354 2355 if (!kvm->arch.use_cmma) 2356 return -ENXIO; 2357 /* Invalid/unsupported flags were specified */ 2358 if (args->flags & ~KVM_S390_CMMA_PEEK) 2359 return -EINVAL; 2360 /* Migration mode query, and we are not doing a migration */ 2361 peek = !!(args->flags & KVM_S390_CMMA_PEEK); 2362 if (!peek && !kvm->arch.migration_mode) 2363 return -EINVAL; 2364 /* CMMA is disabled or was not used, or the buffer has length zero */ 2365 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); 2366 if (!bufsize || !kvm->mm->context.uses_cmm) { 2367 memset(args, 0, sizeof(*args)); 2368 return 0; 2369 } 2370 /* We are not peeking, and there are no dirty pages */ 2371 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { 2372 memset(args, 0, sizeof(*args)); 2373 return 0; 2374 } 2375 2376 values = vmalloc(bufsize); 2377 if (!values) 2378 return -ENOMEM; 2379 2380 mmap_read_lock(kvm->mm); 2381 srcu_idx = srcu_read_lock(&kvm->srcu); 2382 if (peek) 2383 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); 2384 else 2385 ret = kvm_s390_get_cmma(kvm, args, values, bufsize); 2386 srcu_read_unlock(&kvm->srcu, srcu_idx); 2387 mmap_read_unlock(kvm->mm); 2388 2389 if (kvm->arch.migration_mode) 2390 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); 2391 else 2392 args->remaining = 0; 2393 2394 if (copy_to_user((void __user *)args->values, values, args->count)) 2395 ret = -EFAULT; 2396 2397 vfree(values); 2398 return ret; 2399 } 2400 2401 /* 2402 * This function sets the CMMA attributes for the given pages. If the input 2403 * buffer has zero length, no action is taken, otherwise the attributes are 2404 * set and the mm->context.uses_cmm flag is set. 2405 */ 2406 static int kvm_s390_set_cmma_bits(struct kvm *kvm, 2407 const struct kvm_s390_cmma_log *args) 2408 { 2409 unsigned long hva, mask, pgstev, i; 2410 uint8_t *bits; 2411 int srcu_idx, r = 0; 2412 2413 mask = args->mask; 2414 2415 if (!kvm->arch.use_cmma) 2416 return -ENXIO; 2417 /* invalid/unsupported flags */ 2418 if (args->flags != 0) 2419 return -EINVAL; 2420 /* Enforce sane limit on memory allocation */ 2421 if (args->count > KVM_S390_CMMA_SIZE_MAX) 2422 return -EINVAL; 2423 /* Nothing to do */ 2424 if (args->count == 0) 2425 return 0; 2426 2427 bits = vmalloc(array_size(sizeof(*bits), args->count)); 2428 if (!bits) 2429 return -ENOMEM; 2430 2431 r = copy_from_user(bits, (void __user *)args->values, args->count); 2432 if (r) { 2433 r = -EFAULT; 2434 goto out; 2435 } 2436 2437 mmap_read_lock(kvm->mm); 2438 srcu_idx = srcu_read_lock(&kvm->srcu); 2439 for (i = 0; i < args->count; i++) { 2440 hva = gfn_to_hva(kvm, args->start_gfn + i); 2441 if (kvm_is_error_hva(hva)) { 2442 r = -EFAULT; 2443 break; 2444 } 2445 2446 pgstev = bits[i]; 2447 pgstev = pgstev << 24; 2448 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; 2449 set_pgste_bits(kvm->mm, hva, mask, pgstev); 2450 } 2451 srcu_read_unlock(&kvm->srcu, srcu_idx); 2452 mmap_read_unlock(kvm->mm); 2453 2454 if (!kvm->mm->context.uses_cmm) { 2455 mmap_write_lock(kvm->mm); 2456 kvm->mm->context.uses_cmm = 1; 2457 mmap_write_unlock(kvm->mm); 2458 } 2459 out: 2460 vfree(bits); 2461 return r; 2462 } 2463 2464 /** 2465 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to 2466 * non protected. 2467 * @kvm: the VM whose protected vCPUs are to be converted 2468 * @rc: return value for the RC field of the UVC (in case of error) 2469 * @rrc: return value for the RRC field of the UVC (in case of error) 2470 * 2471 * Does not stop in case of error, tries to convert as many 2472 * CPUs as possible. In case of error, the RC and RRC of the last error are 2473 * returned. 2474 * 2475 * Return: 0 in case of success, otherwise -EIO 2476 */ 2477 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2478 { 2479 struct kvm_vcpu *vcpu; 2480 unsigned long i; 2481 u16 _rc, _rrc; 2482 int ret = 0; 2483 2484 /* 2485 * We ignore failures and try to destroy as many CPUs as possible. 2486 * At the same time we must not free the assigned resources when 2487 * this fails, as the ultravisor has still access to that memory. 2488 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak 2489 * behind. 2490 * We want to return the first failure rc and rrc, though. 2491 */ 2492 kvm_for_each_vcpu(i, vcpu, kvm) { 2493 mutex_lock(&vcpu->mutex); 2494 if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) { 2495 *rc = _rc; 2496 *rrc = _rrc; 2497 ret = -EIO; 2498 } 2499 mutex_unlock(&vcpu->mutex); 2500 } 2501 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ 2502 if (use_gisa) 2503 kvm_s390_gisa_enable(kvm); 2504 return ret; 2505 } 2506 2507 /** 2508 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM 2509 * to protected. 2510 * @kvm: the VM whose protected vCPUs are to be converted 2511 * @rc: return value for the RC field of the UVC (in case of error) 2512 * @rrc: return value for the RRC field of the UVC (in case of error) 2513 * 2514 * Tries to undo the conversion in case of error. 2515 * 2516 * Return: 0 in case of success, otherwise -EIO 2517 */ 2518 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) 2519 { 2520 unsigned long i; 2521 int r = 0; 2522 u16 dummy; 2523 2524 struct kvm_vcpu *vcpu; 2525 2526 /* Disable the GISA if the ultravisor does not support AIV. */ 2527 if (!uv_has_feature(BIT_UV_FEAT_AIV)) 2528 kvm_s390_gisa_disable(kvm); 2529 2530 kvm_for_each_vcpu(i, vcpu, kvm) { 2531 mutex_lock(&vcpu->mutex); 2532 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); 2533 mutex_unlock(&vcpu->mutex); 2534 if (r) 2535 break; 2536 } 2537 if (r) 2538 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 2539 return r; 2540 } 2541 2542 /* 2543 * Here we provide user space with a direct interface to query UV 2544 * related data like UV maxima and available features as well as 2545 * feature specific data. 2546 * 2547 * To facilitate future extension of the data structures we'll try to 2548 * write data up to the maximum requested length. 2549 */ 2550 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info) 2551 { 2552 ssize_t len_min; 2553 2554 switch (info->header.id) { 2555 case KVM_PV_INFO_VM: { 2556 len_min = sizeof(info->header) + sizeof(info->vm); 2557 2558 if (info->header.len_max < len_min) 2559 return -EINVAL; 2560 2561 memcpy(info->vm.inst_calls_list, 2562 uv_info.inst_calls_list, 2563 sizeof(uv_info.inst_calls_list)); 2564 2565 /* It's max cpuid not max cpus, so it's off by one */ 2566 info->vm.max_cpus = uv_info.max_guest_cpu_id + 1; 2567 info->vm.max_guests = uv_info.max_num_sec_conf; 2568 info->vm.max_guest_addr = uv_info.max_sec_stor_addr; 2569 info->vm.feature_indication = uv_info.uv_feature_indications; 2570 2571 return len_min; 2572 } 2573 case KVM_PV_INFO_DUMP: { 2574 len_min = sizeof(info->header) + sizeof(info->dump); 2575 2576 if (info->header.len_max < len_min) 2577 return -EINVAL; 2578 2579 info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len; 2580 info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len; 2581 info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len; 2582 return len_min; 2583 } 2584 default: 2585 return -EINVAL; 2586 } 2587 } 2588 2589 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, 2590 struct kvm_s390_pv_dmp dmp) 2591 { 2592 int r = -EINVAL; 2593 void __user *result_buff = (void __user *)dmp.buff_addr; 2594 2595 switch (dmp.subcmd) { 2596 case KVM_PV_DUMP_INIT: { 2597 if (kvm->arch.pv.dumping) 2598 break; 2599 2600 /* 2601 * Block SIE entry as concurrent dump UVCs could lead 2602 * to validities. 2603 */ 2604 kvm_s390_vcpu_block_all(kvm); 2605 2606 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2607 UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc); 2608 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x", 2609 cmd->rc, cmd->rrc); 2610 if (!r) { 2611 kvm->arch.pv.dumping = true; 2612 } else { 2613 kvm_s390_vcpu_unblock_all(kvm); 2614 r = -EINVAL; 2615 } 2616 break; 2617 } 2618 case KVM_PV_DUMP_CONFIG_STOR_STATE: { 2619 if (!kvm->arch.pv.dumping) 2620 break; 2621 2622 /* 2623 * gaddr is an output parameter since we might stop 2624 * early. As dmp will be copied back in our caller, we 2625 * don't need to do it ourselves. 2626 */ 2627 r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len, 2628 &cmd->rc, &cmd->rrc); 2629 break; 2630 } 2631 case KVM_PV_DUMP_COMPLETE: { 2632 if (!kvm->arch.pv.dumping) 2633 break; 2634 2635 r = -EINVAL; 2636 if (dmp.buff_len < uv_info.conf_dump_finalize_len) 2637 break; 2638 2639 r = kvm_s390_pv_dump_complete(kvm, result_buff, 2640 &cmd->rc, &cmd->rrc); 2641 break; 2642 } 2643 default: 2644 r = -ENOTTY; 2645 break; 2646 } 2647 2648 return r; 2649 } 2650 2651 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2652 { 2653 const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM); 2654 void __user *argp = (void __user *)cmd->data; 2655 int r = 0; 2656 u16 dummy; 2657 2658 if (need_lock) 2659 mutex_lock(&kvm->lock); 2660 2661 switch (cmd->cmd) { 2662 case KVM_PV_ENABLE: { 2663 r = -EINVAL; 2664 if (kvm_s390_pv_is_protected(kvm)) 2665 break; 2666 2667 /* 2668 * FMT 4 SIE needs esca. As we never switch back to bsca from 2669 * esca, we need no cleanup in the error cases below 2670 */ 2671 r = sca_switch_to_extended(kvm); 2672 if (r) 2673 break; 2674 2675 r = s390_disable_cow_sharing(); 2676 if (r) 2677 break; 2678 2679 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); 2680 if (r) 2681 break; 2682 2683 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); 2684 if (r) 2685 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 2686 2687 /* we need to block service interrupts from now on */ 2688 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2689 break; 2690 } 2691 case KVM_PV_ASYNC_CLEANUP_PREPARE: 2692 r = -EINVAL; 2693 if (!kvm_s390_pv_is_protected(kvm) || !async_destroy) 2694 break; 2695 2696 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2697 /* 2698 * If a CPU could not be destroyed, destroy VM will also fail. 2699 * There is no point in trying to destroy it. Instead return 2700 * the rc and rrc from the first CPU that failed destroying. 2701 */ 2702 if (r) 2703 break; 2704 r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc); 2705 2706 /* no need to block service interrupts any more */ 2707 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2708 break; 2709 case KVM_PV_ASYNC_CLEANUP_PERFORM: 2710 r = -EINVAL; 2711 if (!async_destroy) 2712 break; 2713 /* kvm->lock must not be held; this is asserted inside the function. */ 2714 r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc); 2715 break; 2716 case KVM_PV_DISABLE: { 2717 r = -EINVAL; 2718 if (!kvm_s390_pv_is_protected(kvm)) 2719 break; 2720 2721 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); 2722 /* 2723 * If a CPU could not be destroyed, destroy VM will also fail. 2724 * There is no point in trying to destroy it. Instead return 2725 * the rc and rrc from the first CPU that failed destroying. 2726 */ 2727 if (r) 2728 break; 2729 r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc); 2730 2731 /* no need to block service interrupts any more */ 2732 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); 2733 break; 2734 } 2735 case KVM_PV_SET_SEC_PARMS: { 2736 struct kvm_s390_pv_sec_parm parms = {}; 2737 void *hdr; 2738 2739 r = -EINVAL; 2740 if (!kvm_s390_pv_is_protected(kvm)) 2741 break; 2742 2743 r = -EFAULT; 2744 if (copy_from_user(&parms, argp, sizeof(parms))) 2745 break; 2746 2747 /* Currently restricted to 8KB */ 2748 r = -EINVAL; 2749 if (parms.length > PAGE_SIZE * 2) 2750 break; 2751 2752 r = -ENOMEM; 2753 hdr = vmalloc(parms.length); 2754 if (!hdr) 2755 break; 2756 2757 r = -EFAULT; 2758 if (!copy_from_user(hdr, (void __user *)parms.origin, 2759 parms.length)) 2760 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, 2761 &cmd->rc, &cmd->rrc); 2762 2763 vfree(hdr); 2764 break; 2765 } 2766 case KVM_PV_UNPACK: { 2767 struct kvm_s390_pv_unp unp = {}; 2768 2769 r = -EINVAL; 2770 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) 2771 break; 2772 2773 r = -EFAULT; 2774 if (copy_from_user(&unp, argp, sizeof(unp))) 2775 break; 2776 2777 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, 2778 &cmd->rc, &cmd->rrc); 2779 break; 2780 } 2781 case KVM_PV_VERIFY: { 2782 r = -EINVAL; 2783 if (!kvm_s390_pv_is_protected(kvm)) 2784 break; 2785 2786 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2787 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); 2788 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, 2789 cmd->rrc); 2790 break; 2791 } 2792 case KVM_PV_PREP_RESET: { 2793 r = -EINVAL; 2794 if (!kvm_s390_pv_is_protected(kvm)) 2795 break; 2796 2797 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2798 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); 2799 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", 2800 cmd->rc, cmd->rrc); 2801 break; 2802 } 2803 case KVM_PV_UNSHARE_ALL: { 2804 r = -EINVAL; 2805 if (!kvm_s390_pv_is_protected(kvm)) 2806 break; 2807 2808 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2809 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); 2810 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", 2811 cmd->rc, cmd->rrc); 2812 break; 2813 } 2814 case KVM_PV_INFO: { 2815 struct kvm_s390_pv_info info = {}; 2816 ssize_t data_len; 2817 2818 /* 2819 * No need to check the VM protection here. 2820 * 2821 * Maybe user space wants to query some of the data 2822 * when the VM is still unprotected. If we see the 2823 * need to fence a new data command we can still 2824 * return an error in the info handler. 2825 */ 2826 2827 r = -EFAULT; 2828 if (copy_from_user(&info, argp, sizeof(info.header))) 2829 break; 2830 2831 r = -EINVAL; 2832 if (info.header.len_max < sizeof(info.header)) 2833 break; 2834 2835 data_len = kvm_s390_handle_pv_info(&info); 2836 if (data_len < 0) { 2837 r = data_len; 2838 break; 2839 } 2840 /* 2841 * If a data command struct is extended (multiple 2842 * times) this can be used to determine how much of it 2843 * is valid. 2844 */ 2845 info.header.len_written = data_len; 2846 2847 r = -EFAULT; 2848 if (copy_to_user(argp, &info, data_len)) 2849 break; 2850 2851 r = 0; 2852 break; 2853 } 2854 case KVM_PV_DUMP: { 2855 struct kvm_s390_pv_dmp dmp; 2856 2857 r = -EINVAL; 2858 if (!kvm_s390_pv_is_protected(kvm)) 2859 break; 2860 2861 r = -EFAULT; 2862 if (copy_from_user(&dmp, argp, sizeof(dmp))) 2863 break; 2864 2865 r = kvm_s390_pv_dmp(kvm, cmd, dmp); 2866 if (r) 2867 break; 2868 2869 if (copy_to_user(argp, &dmp, sizeof(dmp))) { 2870 r = -EFAULT; 2871 break; 2872 } 2873 2874 break; 2875 } 2876 default: 2877 r = -ENOTTY; 2878 } 2879 if (need_lock) 2880 mutex_unlock(&kvm->lock); 2881 2882 return r; 2883 } 2884 2885 static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags) 2886 { 2887 if (mop->flags & ~supported_flags || !mop->size) 2888 return -EINVAL; 2889 if (mop->size > MEM_OP_MAX_SIZE) 2890 return -E2BIG; 2891 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { 2892 if (mop->key > 0xf) 2893 return -EINVAL; 2894 } else { 2895 mop->key = 0; 2896 } 2897 return 0; 2898 } 2899 2900 static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2901 { 2902 void __user *uaddr = (void __user *)mop->buf; 2903 enum gacc_mode acc_mode; 2904 void *tmpbuf = NULL; 2905 int r, srcu_idx; 2906 2907 r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION | 2908 KVM_S390_MEMOP_F_CHECK_ONLY); 2909 if (r) 2910 return r; 2911 2912 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 2913 tmpbuf = vmalloc(mop->size); 2914 if (!tmpbuf) 2915 return -ENOMEM; 2916 } 2917 2918 srcu_idx = srcu_read_lock(&kvm->srcu); 2919 2920 if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) { 2921 r = PGM_ADDRESSING; 2922 goto out_unlock; 2923 } 2924 2925 acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE; 2926 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 2927 r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key); 2928 goto out_unlock; 2929 } 2930 if (acc_mode == GACC_FETCH) { 2931 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2932 mop->size, GACC_FETCH, mop->key); 2933 if (r) 2934 goto out_unlock; 2935 if (copy_to_user(uaddr, tmpbuf, mop->size)) 2936 r = -EFAULT; 2937 } else { 2938 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 2939 r = -EFAULT; 2940 goto out_unlock; 2941 } 2942 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, 2943 mop->size, GACC_STORE, mop->key); 2944 } 2945 2946 out_unlock: 2947 srcu_read_unlock(&kvm->srcu, srcu_idx); 2948 2949 vfree(tmpbuf); 2950 return r; 2951 } 2952 2953 static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2954 { 2955 void __user *uaddr = (void __user *)mop->buf; 2956 void __user *old_addr = (void __user *)mop->old_addr; 2957 union { 2958 __uint128_t quad; 2959 char raw[sizeof(__uint128_t)]; 2960 } old = { .quad = 0}, new = { .quad = 0 }; 2961 unsigned int off_in_quad = sizeof(new) - mop->size; 2962 int r, srcu_idx; 2963 bool success; 2964 2965 r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION); 2966 if (r) 2967 return r; 2968 /* 2969 * This validates off_in_quad. Checking that size is a power 2970 * of two is not necessary, as cmpxchg_guest_abs_with_key 2971 * takes care of that 2972 */ 2973 if (mop->size > sizeof(new)) 2974 return -EINVAL; 2975 if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size)) 2976 return -EFAULT; 2977 if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size)) 2978 return -EFAULT; 2979 2980 srcu_idx = srcu_read_lock(&kvm->srcu); 2981 2982 if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) { 2983 r = PGM_ADDRESSING; 2984 goto out_unlock; 2985 } 2986 2987 r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad, 2988 new.quad, mop->key, &success); 2989 if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size)) 2990 r = -EFAULT; 2991 2992 out_unlock: 2993 srcu_read_unlock(&kvm->srcu, srcu_idx); 2994 return r; 2995 } 2996 2997 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) 2998 { 2999 /* 3000 * This is technically a heuristic only, if the kvm->lock is not 3001 * taken, it is not guaranteed that the vm is/remains non-protected. 3002 * This is ok from a kernel perspective, wrongdoing is detected 3003 * on the access, -EFAULT is returned and the vm may crash the 3004 * next time it accesses the memory in question. 3005 * There is no sane usecase to do switching and a memop on two 3006 * different CPUs at the same time. 3007 */ 3008 if (kvm_s390_pv_get_handle(kvm)) 3009 return -EINVAL; 3010 3011 switch (mop->op) { 3012 case KVM_S390_MEMOP_ABSOLUTE_READ: 3013 case KVM_S390_MEMOP_ABSOLUTE_WRITE: 3014 return kvm_s390_vm_mem_op_abs(kvm, mop); 3015 case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG: 3016 return kvm_s390_vm_mem_op_cmpxchg(kvm, mop); 3017 default: 3018 return -EINVAL; 3019 } 3020 } 3021 3022 int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) 3023 { 3024 struct kvm *kvm = filp->private_data; 3025 void __user *argp = (void __user *)arg; 3026 struct kvm_device_attr attr; 3027 int r; 3028 3029 switch (ioctl) { 3030 case KVM_S390_INTERRUPT: { 3031 struct kvm_s390_interrupt s390int; 3032 3033 r = -EFAULT; 3034 if (copy_from_user(&s390int, argp, sizeof(s390int))) 3035 break; 3036 r = kvm_s390_inject_vm(kvm, &s390int); 3037 break; 3038 } 3039 case KVM_CREATE_IRQCHIP: { 3040 r = -EINVAL; 3041 if (kvm->arch.use_irqchip) 3042 r = 0; 3043 break; 3044 } 3045 case KVM_SET_DEVICE_ATTR: { 3046 r = -EFAULT; 3047 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 3048 break; 3049 r = kvm_s390_vm_set_attr(kvm, &attr); 3050 break; 3051 } 3052 case KVM_GET_DEVICE_ATTR: { 3053 r = -EFAULT; 3054 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 3055 break; 3056 r = kvm_s390_vm_get_attr(kvm, &attr); 3057 break; 3058 } 3059 case KVM_HAS_DEVICE_ATTR: { 3060 r = -EFAULT; 3061 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) 3062 break; 3063 r = kvm_s390_vm_has_attr(kvm, &attr); 3064 break; 3065 } 3066 case KVM_S390_GET_SKEYS: { 3067 struct kvm_s390_skeys args; 3068 3069 r = -EFAULT; 3070 if (copy_from_user(&args, argp, 3071 sizeof(struct kvm_s390_skeys))) 3072 break; 3073 r = kvm_s390_get_skeys(kvm, &args); 3074 break; 3075 } 3076 case KVM_S390_SET_SKEYS: { 3077 struct kvm_s390_skeys args; 3078 3079 r = -EFAULT; 3080 if (copy_from_user(&args, argp, 3081 sizeof(struct kvm_s390_skeys))) 3082 break; 3083 r = kvm_s390_set_skeys(kvm, &args); 3084 break; 3085 } 3086 case KVM_S390_GET_CMMA_BITS: { 3087 struct kvm_s390_cmma_log args; 3088 3089 r = -EFAULT; 3090 if (copy_from_user(&args, argp, sizeof(args))) 3091 break; 3092 mutex_lock(&kvm->slots_lock); 3093 r = kvm_s390_get_cmma_bits(kvm, &args); 3094 mutex_unlock(&kvm->slots_lock); 3095 if (!r) { 3096 r = copy_to_user(argp, &args, sizeof(args)); 3097 if (r) 3098 r = -EFAULT; 3099 } 3100 break; 3101 } 3102 case KVM_S390_SET_CMMA_BITS: { 3103 struct kvm_s390_cmma_log args; 3104 3105 r = -EFAULT; 3106 if (copy_from_user(&args, argp, sizeof(args))) 3107 break; 3108 mutex_lock(&kvm->slots_lock); 3109 r = kvm_s390_set_cmma_bits(kvm, &args); 3110 mutex_unlock(&kvm->slots_lock); 3111 break; 3112 } 3113 case KVM_S390_PV_COMMAND: { 3114 struct kvm_pv_cmd args; 3115 3116 /* protvirt means user cpu state */ 3117 kvm_s390_set_user_cpu_state_ctrl(kvm); 3118 r = 0; 3119 if (!is_prot_virt_host()) { 3120 r = -EINVAL; 3121 break; 3122 } 3123 if (copy_from_user(&args, argp, sizeof(args))) { 3124 r = -EFAULT; 3125 break; 3126 } 3127 if (args.flags) { 3128 r = -EINVAL; 3129 break; 3130 } 3131 /* must be called without kvm->lock */ 3132 r = kvm_s390_handle_pv(kvm, &args); 3133 if (copy_to_user(argp, &args, sizeof(args))) { 3134 r = -EFAULT; 3135 break; 3136 } 3137 break; 3138 } 3139 case KVM_S390_MEM_OP: { 3140 struct kvm_s390_mem_op mem_op; 3141 3142 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 3143 r = kvm_s390_vm_mem_op(kvm, &mem_op); 3144 else 3145 r = -EFAULT; 3146 break; 3147 } 3148 case KVM_S390_ZPCI_OP: { 3149 struct kvm_s390_zpci_op args; 3150 3151 r = -EINVAL; 3152 if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 3153 break; 3154 if (copy_from_user(&args, argp, sizeof(args))) { 3155 r = -EFAULT; 3156 break; 3157 } 3158 r = kvm_s390_pci_zpci_op(kvm, &args); 3159 break; 3160 } 3161 default: 3162 r = -ENOTTY; 3163 } 3164 3165 return r; 3166 } 3167 3168 static int kvm_s390_apxa_installed(void) 3169 { 3170 struct ap_config_info info; 3171 3172 if (ap_instructions_available()) { 3173 if (ap_qci(&info) == 0) 3174 return info.apxa; 3175 } 3176 3177 return 0; 3178 } 3179 3180 /* 3181 * The format of the crypto control block (CRYCB) is specified in the 3 low 3182 * order bits of the CRYCB designation (CRYCBD) field as follows: 3183 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the 3184 * AP extended addressing (APXA) facility are installed. 3185 * Format 1: The APXA facility is not installed but the MSAX3 facility is. 3186 * Format 2: Both the APXA and MSAX3 facilities are installed 3187 */ 3188 static void kvm_s390_set_crycb_format(struct kvm *kvm) 3189 { 3190 kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb); 3191 3192 /* Clear the CRYCB format bits - i.e., set format 0 by default */ 3193 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); 3194 3195 /* Check whether MSAX3 is installed */ 3196 if (!test_kvm_facility(kvm, 76)) 3197 return; 3198 3199 if (kvm_s390_apxa_installed()) 3200 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; 3201 else 3202 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 3203 } 3204 3205 /* 3206 * kvm_arch_crypto_set_masks 3207 * 3208 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 3209 * to be set. 3210 * @apm: the mask identifying the accessible AP adapters 3211 * @aqm: the mask identifying the accessible AP domains 3212 * @adm: the mask identifying the accessible AP control domains 3213 * 3214 * Set the masks that identify the adapters, domains and control domains to 3215 * which the KVM guest is granted access. 3216 * 3217 * Note: The kvm->lock mutex must be locked by the caller before invoking this 3218 * function. 3219 */ 3220 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, 3221 unsigned long *aqm, unsigned long *adm) 3222 { 3223 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; 3224 3225 kvm_s390_vcpu_block_all(kvm); 3226 3227 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { 3228 case CRYCB_FORMAT2: /* APCB1 use 256 bits */ 3229 memcpy(crycb->apcb1.apm, apm, 32); 3230 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx", 3231 apm[0], apm[1], apm[2], apm[3]); 3232 memcpy(crycb->apcb1.aqm, aqm, 32); 3233 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx", 3234 aqm[0], aqm[1], aqm[2], aqm[3]); 3235 memcpy(crycb->apcb1.adm, adm, 32); 3236 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx", 3237 adm[0], adm[1], adm[2], adm[3]); 3238 break; 3239 case CRYCB_FORMAT1: 3240 case CRYCB_FORMAT0: /* Fall through both use APCB0 */ 3241 memcpy(crycb->apcb0.apm, apm, 8); 3242 memcpy(crycb->apcb0.aqm, aqm, 2); 3243 memcpy(crycb->apcb0.adm, adm, 2); 3244 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x", 3245 apm[0], *((unsigned short *)aqm), 3246 *((unsigned short *)adm)); 3247 break; 3248 default: /* Can not happen */ 3249 break; 3250 } 3251 3252 /* recreate the shadow crycb for each vcpu */ 3253 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 3254 kvm_s390_vcpu_unblock_all(kvm); 3255 } 3256 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); 3257 3258 /* 3259 * kvm_arch_crypto_clear_masks 3260 * 3261 * @kvm: pointer to the target guest's KVM struct containing the crypto masks 3262 * to be cleared. 3263 * 3264 * Clear the masks that identify the adapters, domains and control domains to 3265 * which the KVM guest is granted access. 3266 * 3267 * Note: The kvm->lock mutex must be locked by the caller before invoking this 3268 * function. 3269 */ 3270 void kvm_arch_crypto_clear_masks(struct kvm *kvm) 3271 { 3272 kvm_s390_vcpu_block_all(kvm); 3273 3274 memset(&kvm->arch.crypto.crycb->apcb0, 0, 3275 sizeof(kvm->arch.crypto.crycb->apcb0)); 3276 memset(&kvm->arch.crypto.crycb->apcb1, 0, 3277 sizeof(kvm->arch.crypto.crycb->apcb1)); 3278 3279 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:"); 3280 /* recreate the shadow crycb for each vcpu */ 3281 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); 3282 kvm_s390_vcpu_unblock_all(kvm); 3283 } 3284 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); 3285 3286 static u64 kvm_s390_get_initial_cpuid(void) 3287 { 3288 struct cpuid cpuid; 3289 3290 get_cpu_id(&cpuid); 3291 cpuid.version = 0xff; 3292 return *((u64 *) &cpuid); 3293 } 3294 3295 static void kvm_s390_crypto_init(struct kvm *kvm) 3296 { 3297 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; 3298 kvm_s390_set_crycb_format(kvm); 3299 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); 3300 3301 if (!test_kvm_facility(kvm, 76)) 3302 return; 3303 3304 /* Enable AES/DEA protected key functions by default */ 3305 kvm->arch.crypto.aes_kw = 1; 3306 kvm->arch.crypto.dea_kw = 1; 3307 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 3308 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); 3309 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 3310 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); 3311 } 3312 3313 static void sca_dispose(struct kvm *kvm) 3314 { 3315 if (kvm->arch.use_esca) 3316 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 3317 else 3318 free_page((unsigned long)(kvm->arch.sca)); 3319 kvm->arch.sca = NULL; 3320 } 3321 3322 void kvm_arch_free_vm(struct kvm *kvm) 3323 { 3324 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) 3325 kvm_s390_pci_clear_list(kvm); 3326 3327 __kvm_arch_free_vm(kvm); 3328 } 3329 3330 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 3331 { 3332 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; 3333 int i, rc; 3334 char debug_name[16]; 3335 static unsigned long sca_offset; 3336 3337 rc = -EINVAL; 3338 #ifdef CONFIG_KVM_S390_UCONTROL 3339 if (type & ~KVM_VM_S390_UCONTROL) 3340 goto out_err; 3341 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 3342 goto out_err; 3343 #else 3344 if (type) 3345 goto out_err; 3346 #endif 3347 3348 rc = s390_enable_sie(); 3349 if (rc) 3350 goto out_err; 3351 3352 rc = -ENOMEM; 3353 3354 if (!sclp.has_64bscao) 3355 alloc_flags |= GFP_DMA; 3356 rwlock_init(&kvm->arch.sca_lock); 3357 /* start with basic SCA */ 3358 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); 3359 if (!kvm->arch.sca) 3360 goto out_err; 3361 mutex_lock(&kvm_lock); 3362 sca_offset += 16; 3363 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 3364 sca_offset = 0; 3365 kvm->arch.sca = (struct bsca_block *) 3366 ((char *) kvm->arch.sca + sca_offset); 3367 mutex_unlock(&kvm_lock); 3368 3369 sprintf(debug_name, "kvm-%u", current->pid); 3370 3371 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); 3372 if (!kvm->arch.dbf) 3373 goto out_err; 3374 3375 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); 3376 kvm->arch.sie_page2 = 3377 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); 3378 if (!kvm->arch.sie_page2) 3379 goto out_err; 3380 3381 kvm->arch.sie_page2->kvm = kvm; 3382 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; 3383 3384 for (i = 0; i < kvm_s390_fac_size(); i++) { 3385 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] & 3386 (kvm_s390_fac_base[i] | 3387 kvm_s390_fac_ext[i]); 3388 kvm->arch.model.fac_list[i] = stfle_fac_list[i] & 3389 kvm_s390_fac_base[i]; 3390 } 3391 kvm->arch.model.subfuncs = kvm_s390_available_subfunc; 3392 3393 /* we are always in czam mode - even on pre z14 machines */ 3394 set_kvm_facility(kvm->arch.model.fac_mask, 138); 3395 set_kvm_facility(kvm->arch.model.fac_list, 138); 3396 /* we emulate STHYI in kvm */ 3397 set_kvm_facility(kvm->arch.model.fac_mask, 74); 3398 set_kvm_facility(kvm->arch.model.fac_list, 74); 3399 if (MACHINE_HAS_TLB_GUEST) { 3400 set_kvm_facility(kvm->arch.model.fac_mask, 147); 3401 set_kvm_facility(kvm->arch.model.fac_list, 147); 3402 } 3403 3404 if (css_general_characteristics.aiv && test_facility(65)) 3405 set_kvm_facility(kvm->arch.model.fac_mask, 65); 3406 3407 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); 3408 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 3409 3410 kvm->arch.model.uv_feat_guest.feat = 0; 3411 3412 kvm_s390_crypto_init(kvm); 3413 3414 if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { 3415 mutex_lock(&kvm->lock); 3416 kvm_s390_pci_init_list(kvm); 3417 kvm_s390_vcpu_pci_enable_interp(kvm); 3418 mutex_unlock(&kvm->lock); 3419 } 3420 3421 mutex_init(&kvm->arch.float_int.ais_lock); 3422 spin_lock_init(&kvm->arch.float_int.lock); 3423 for (i = 0; i < FIRQ_LIST_COUNT; i++) 3424 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); 3425 init_waitqueue_head(&kvm->arch.ipte_wq); 3426 mutex_init(&kvm->arch.ipte_mutex); 3427 3428 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 3429 VM_EVENT(kvm, 3, "vm created with type %lu", type); 3430 3431 if (type & KVM_VM_S390_UCONTROL) { 3432 struct kvm_userspace_memory_region2 fake_memslot = { 3433 .slot = KVM_S390_UCONTROL_MEMSLOT, 3434 .guest_phys_addr = 0, 3435 .userspace_addr = 0, 3436 .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE), 3437 .flags = 0, 3438 }; 3439 3440 kvm->arch.gmap = NULL; 3441 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 3442 /* one flat fake memslot covering the whole address-space */ 3443 mutex_lock(&kvm->slots_lock); 3444 KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm); 3445 mutex_unlock(&kvm->slots_lock); 3446 } else { 3447 if (sclp.hamax == U64_MAX) 3448 kvm->arch.mem_limit = TASK_SIZE_MAX; 3449 else 3450 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX, 3451 sclp.hamax + 1); 3452 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 3453 if (!kvm->arch.gmap) 3454 goto out_err; 3455 kvm->arch.gmap->private = kvm; 3456 kvm->arch.gmap->pfault_enabled = 0; 3457 } 3458 3459 kvm->arch.use_pfmfi = sclp.has_pfmfi; 3460 kvm->arch.use_skf = sclp.has_skey; 3461 spin_lock_init(&kvm->arch.start_stop_lock); 3462 kvm_s390_vsie_init(kvm); 3463 if (use_gisa) 3464 kvm_s390_gisa_init(kvm); 3465 INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); 3466 kvm->arch.pv.set_aside = NULL; 3467 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 3468 3469 return 0; 3470 out_err: 3471 free_page((unsigned long)kvm->arch.sie_page2); 3472 debug_unregister(kvm->arch.dbf); 3473 sca_dispose(kvm); 3474 KVM_EVENT(3, "creation of vm failed: %d", rc); 3475 return rc; 3476 } 3477 3478 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 3479 { 3480 u16 rc, rrc; 3481 3482 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 3483 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 3484 kvm_s390_clear_local_irqs(vcpu); 3485 kvm_clear_async_pf_completion_queue(vcpu); 3486 if (!kvm_is_ucontrol(vcpu->kvm)) 3487 sca_del_vcpu(vcpu); 3488 kvm_s390_update_topology_change_report(vcpu->kvm, 1); 3489 3490 if (kvm_is_ucontrol(vcpu->kvm)) 3491 gmap_remove(vcpu->arch.gmap); 3492 3493 if (vcpu->kvm->arch.use_cmma) 3494 kvm_s390_vcpu_unsetup_cmma(vcpu); 3495 /* We can not hold the vcpu mutex here, we are already dying */ 3496 if (kvm_s390_pv_cpu_get_handle(vcpu)) 3497 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); 3498 free_page((unsigned long)(vcpu->arch.sie_block)); 3499 } 3500 3501 void kvm_arch_destroy_vm(struct kvm *kvm) 3502 { 3503 u16 rc, rrc; 3504 3505 kvm_destroy_vcpus(kvm); 3506 sca_dispose(kvm); 3507 kvm_s390_gisa_destroy(kvm); 3508 /* 3509 * We are already at the end of life and kvm->lock is not taken. 3510 * This is ok as the file descriptor is closed by now and nobody 3511 * can mess with the pv state. 3512 */ 3513 kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc); 3514 /* 3515 * Remove the mmu notifier only when the whole KVM VM is torn down, 3516 * and only if one was registered to begin with. If the VM is 3517 * currently not protected, but has been previously been protected, 3518 * then it's possible that the notifier is still registered. 3519 */ 3520 if (kvm->arch.pv.mmu_notifier.ops) 3521 mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm); 3522 3523 debug_unregister(kvm->arch.dbf); 3524 free_page((unsigned long)kvm->arch.sie_page2); 3525 if (!kvm_is_ucontrol(kvm)) 3526 gmap_remove(kvm->arch.gmap); 3527 kvm_s390_destroy_adapters(kvm); 3528 kvm_s390_clear_float_irqs(kvm); 3529 kvm_s390_vsie_destroy(kvm); 3530 KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 3531 } 3532 3533 /* Section: vcpu related */ 3534 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) 3535 { 3536 vcpu->arch.gmap = gmap_create(current->mm, -1UL); 3537 if (!vcpu->arch.gmap) 3538 return -ENOMEM; 3539 vcpu->arch.gmap->private = vcpu->kvm; 3540 3541 return 0; 3542 } 3543 3544 static void sca_del_vcpu(struct kvm_vcpu *vcpu) 3545 { 3546 if (!kvm_s390_use_sca_entries()) 3547 return; 3548 read_lock(&vcpu->kvm->arch.sca_lock); 3549 if (vcpu->kvm->arch.use_esca) { 3550 struct esca_block *sca = vcpu->kvm->arch.sca; 3551 3552 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3553 sca->cpu[vcpu->vcpu_id].sda = 0; 3554 } else { 3555 struct bsca_block *sca = vcpu->kvm->arch.sca; 3556 3557 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3558 sca->cpu[vcpu->vcpu_id].sda = 0; 3559 } 3560 read_unlock(&vcpu->kvm->arch.sca_lock); 3561 } 3562 3563 static void sca_add_vcpu(struct kvm_vcpu *vcpu) 3564 { 3565 if (!kvm_s390_use_sca_entries()) { 3566 phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca); 3567 3568 /* we still need the basic sca for the ipte control */ 3569 vcpu->arch.sie_block->scaoh = sca_phys >> 32; 3570 vcpu->arch.sie_block->scaol = sca_phys; 3571 return; 3572 } 3573 read_lock(&vcpu->kvm->arch.sca_lock); 3574 if (vcpu->kvm->arch.use_esca) { 3575 struct esca_block *sca = vcpu->kvm->arch.sca; 3576 phys_addr_t sca_phys = virt_to_phys(sca); 3577 3578 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); 3579 vcpu->arch.sie_block->scaoh = sca_phys >> 32; 3580 vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; 3581 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3582 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 3583 } else { 3584 struct bsca_block *sca = vcpu->kvm->arch.sca; 3585 phys_addr_t sca_phys = virt_to_phys(sca); 3586 3587 sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); 3588 vcpu->arch.sie_block->scaoh = sca_phys >> 32; 3589 vcpu->arch.sie_block->scaol = sca_phys; 3590 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 3591 } 3592 read_unlock(&vcpu->kvm->arch.sca_lock); 3593 } 3594 3595 /* Basic SCA to Extended SCA data copy routines */ 3596 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 3597 { 3598 d->sda = s->sda; 3599 d->sigp_ctrl.c = s->sigp_ctrl.c; 3600 d->sigp_ctrl.scn = s->sigp_ctrl.scn; 3601 } 3602 3603 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 3604 { 3605 int i; 3606 3607 d->ipte_control = s->ipte_control; 3608 d->mcn[0] = s->mcn; 3609 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 3610 sca_copy_entry(&d->cpu[i], &s->cpu[i]); 3611 } 3612 3613 static int sca_switch_to_extended(struct kvm *kvm) 3614 { 3615 struct bsca_block *old_sca = kvm->arch.sca; 3616 struct esca_block *new_sca; 3617 struct kvm_vcpu *vcpu; 3618 unsigned long vcpu_idx; 3619 u32 scaol, scaoh; 3620 phys_addr_t new_sca_phys; 3621 3622 if (kvm->arch.use_esca) 3623 return 0; 3624 3625 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3626 if (!new_sca) 3627 return -ENOMEM; 3628 3629 new_sca_phys = virt_to_phys(new_sca); 3630 scaoh = new_sca_phys >> 32; 3631 scaol = new_sca_phys & ESCA_SCAOL_MASK; 3632 3633 kvm_s390_vcpu_block_all(kvm); 3634 write_lock(&kvm->arch.sca_lock); 3635 3636 sca_copy_b_to_e(new_sca, old_sca); 3637 3638 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 3639 vcpu->arch.sie_block->scaoh = scaoh; 3640 vcpu->arch.sie_block->scaol = scaol; 3641 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; 3642 } 3643 kvm->arch.sca = new_sca; 3644 kvm->arch.use_esca = 1; 3645 3646 write_unlock(&kvm->arch.sca_lock); 3647 kvm_s390_vcpu_unblock_all(kvm); 3648 3649 free_page((unsigned long)old_sca); 3650 3651 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 3652 old_sca, kvm->arch.sca); 3653 return 0; 3654 } 3655 3656 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 3657 { 3658 int rc; 3659 3660 if (!kvm_s390_use_sca_entries()) { 3661 if (id < KVM_MAX_VCPUS) 3662 return true; 3663 return false; 3664 } 3665 if (id < KVM_S390_BSCA_CPU_SLOTS) 3666 return true; 3667 if (!sclp.has_esca || !sclp.has_64bscao) 3668 return false; 3669 3670 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 3671 3672 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 3673 } 3674 3675 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3676 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3677 { 3678 WARN_ON_ONCE(vcpu->arch.cputm_start != 0); 3679 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3680 vcpu->arch.cputm_start = get_tod_clock_fast(); 3681 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3682 } 3683 3684 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3685 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3686 { 3687 WARN_ON_ONCE(vcpu->arch.cputm_start == 0); 3688 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3689 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3690 vcpu->arch.cputm_start = 0; 3691 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3692 } 3693 3694 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3695 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3696 { 3697 WARN_ON_ONCE(vcpu->arch.cputm_enabled); 3698 vcpu->arch.cputm_enabled = true; 3699 __start_cpu_timer_accounting(vcpu); 3700 } 3701 3702 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ 3703 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3704 { 3705 WARN_ON_ONCE(!vcpu->arch.cputm_enabled); 3706 __stop_cpu_timer_accounting(vcpu); 3707 vcpu->arch.cputm_enabled = false; 3708 } 3709 3710 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3711 { 3712 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3713 __enable_cpu_timer_accounting(vcpu); 3714 preempt_enable(); 3715 } 3716 3717 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) 3718 { 3719 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3720 __disable_cpu_timer_accounting(vcpu); 3721 preempt_enable(); 3722 } 3723 3724 /* set the cpu timer - may only be called from the VCPU thread itself */ 3725 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) 3726 { 3727 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3728 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); 3729 if (vcpu->arch.cputm_enabled) 3730 vcpu->arch.cputm_start = get_tod_clock_fast(); 3731 vcpu->arch.sie_block->cputm = cputm; 3732 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); 3733 preempt_enable(); 3734 } 3735 3736 /* update and get the cpu timer - can also be called from other VCPU threads */ 3737 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) 3738 { 3739 unsigned int seq; 3740 __u64 value; 3741 3742 if (unlikely(!vcpu->arch.cputm_enabled)) 3743 return vcpu->arch.sie_block->cputm; 3744 3745 preempt_disable(); /* protect from TOD sync and vcpu_load/put */ 3746 do { 3747 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); 3748 /* 3749 * If the writer would ever execute a read in the critical 3750 * section, e.g. in irq context, we have a deadlock. 3751 */ 3752 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); 3753 value = vcpu->arch.sie_block->cputm; 3754 /* if cputm_start is 0, accounting is being started/stopped */ 3755 if (likely(vcpu->arch.cputm_start)) 3756 value -= get_tod_clock_fast() - vcpu->arch.cputm_start; 3757 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); 3758 preempt_enable(); 3759 return value; 3760 } 3761 3762 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 3763 { 3764 3765 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); 3766 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3767 __start_cpu_timer_accounting(vcpu); 3768 vcpu->cpu = cpu; 3769 } 3770 3771 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 3772 { 3773 vcpu->cpu = -1; 3774 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) 3775 __stop_cpu_timer_accounting(vcpu); 3776 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); 3777 3778 } 3779 3780 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 3781 { 3782 mutex_lock(&vcpu->kvm->lock); 3783 preempt_disable(); 3784 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 3785 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; 3786 preempt_enable(); 3787 mutex_unlock(&vcpu->kvm->lock); 3788 if (!kvm_is_ucontrol(vcpu->kvm)) { 3789 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 3790 sca_add_vcpu(vcpu); 3791 } 3792 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) 3793 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 3794 } 3795 3796 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) 3797 { 3798 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && 3799 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) 3800 return true; 3801 return false; 3802 } 3803 3804 static bool kvm_has_pckmo_ecc(struct kvm *kvm) 3805 { 3806 /* At least one ECC subfunction must be present */ 3807 return kvm_has_pckmo_subfunc(kvm, 32) || 3808 kvm_has_pckmo_subfunc(kvm, 33) || 3809 kvm_has_pckmo_subfunc(kvm, 34) || 3810 kvm_has_pckmo_subfunc(kvm, 40) || 3811 kvm_has_pckmo_subfunc(kvm, 41); 3812 3813 } 3814 3815 static bool kvm_has_pckmo_hmac(struct kvm *kvm) 3816 { 3817 /* At least one HMAC subfunction must be present */ 3818 return kvm_has_pckmo_subfunc(kvm, 118) || 3819 kvm_has_pckmo_subfunc(kvm, 122); 3820 } 3821 3822 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) 3823 { 3824 /* 3825 * If the AP instructions are not being interpreted and the MSAX3 3826 * facility is not configured for the guest, there is nothing to set up. 3827 */ 3828 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) 3829 return; 3830 3831 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; 3832 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); 3833 vcpu->arch.sie_block->eca &= ~ECA_APIE; 3834 vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC); 3835 3836 if (vcpu->kvm->arch.crypto.apie) 3837 vcpu->arch.sie_block->eca |= ECA_APIE; 3838 3839 /* Set up protected key support */ 3840 if (vcpu->kvm->arch.crypto.aes_kw) { 3841 vcpu->arch.sie_block->ecb3 |= ECB3_AES; 3842 /* ecc/hmac is also wrapped with AES key */ 3843 if (kvm_has_pckmo_ecc(vcpu->kvm)) 3844 vcpu->arch.sie_block->ecd |= ECD_ECC; 3845 if (kvm_has_pckmo_hmac(vcpu->kvm)) 3846 vcpu->arch.sie_block->ecd |= ECD_HMAC; 3847 } 3848 3849 if (vcpu->kvm->arch.crypto.dea_kw) 3850 vcpu->arch.sie_block->ecb3 |= ECB3_DEA; 3851 } 3852 3853 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) 3854 { 3855 free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo)); 3856 vcpu->arch.sie_block->cbrlo = 0; 3857 } 3858 3859 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) 3860 { 3861 void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 3862 3863 if (!cbrlo_page) 3864 return -ENOMEM; 3865 3866 vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page); 3867 return 0; 3868 } 3869 3870 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) 3871 { 3872 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 3873 3874 vcpu->arch.sie_block->ibc = model->ibc; 3875 if (test_kvm_facility(vcpu->kvm, 7)) 3876 vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list); 3877 } 3878 3879 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) 3880 { 3881 int rc = 0; 3882 u16 uvrc, uvrrc; 3883 3884 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 3885 CPUSTAT_SM | 3886 CPUSTAT_STOPPED); 3887 3888 if (test_kvm_facility(vcpu->kvm, 78)) 3889 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); 3890 else if (test_kvm_facility(vcpu->kvm, 8)) 3891 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED); 3892 3893 kvm_s390_vcpu_setup_model(vcpu); 3894 3895 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ 3896 if (MACHINE_HAS_ESOP) 3897 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; 3898 if (test_kvm_facility(vcpu->kvm, 9)) 3899 vcpu->arch.sie_block->ecb |= ECB_SRSI; 3900 if (test_kvm_facility(vcpu->kvm, 11)) 3901 vcpu->arch.sie_block->ecb |= ECB_PTF; 3902 if (test_kvm_facility(vcpu->kvm, 73)) 3903 vcpu->arch.sie_block->ecb |= ECB_TE; 3904 if (!kvm_is_ucontrol(vcpu->kvm)) 3905 vcpu->arch.sie_block->ecb |= ECB_SPECI; 3906 3907 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) 3908 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; 3909 if (test_kvm_facility(vcpu->kvm, 130)) 3910 vcpu->arch.sie_block->ecb2 |= ECB2_IEP; 3911 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; 3912 if (sclp.has_cei) 3913 vcpu->arch.sie_block->eca |= ECA_CEI; 3914 if (sclp.has_ib) 3915 vcpu->arch.sie_block->eca |= ECA_IB; 3916 if (sclp.has_siif) 3917 vcpu->arch.sie_block->eca |= ECA_SII; 3918 if (sclp.has_sigpif) 3919 vcpu->arch.sie_block->eca |= ECA_SIGPI; 3920 if (test_kvm_facility(vcpu->kvm, 129)) { 3921 vcpu->arch.sie_block->eca |= ECA_VX; 3922 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 3923 } 3924 if (test_kvm_facility(vcpu->kvm, 139)) 3925 vcpu->arch.sie_block->ecd |= ECD_MEF; 3926 if (test_kvm_facility(vcpu->kvm, 156)) 3927 vcpu->arch.sie_block->ecd |= ECD_ETOKENF; 3928 if (vcpu->arch.sie_block->gd) { 3929 vcpu->arch.sie_block->eca |= ECA_AIV; 3930 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", 3931 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); 3932 } 3933 vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC; 3934 vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb); 3935 3936 if (sclp.has_kss) 3937 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); 3938 else 3939 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 3940 3941 if (vcpu->kvm->arch.use_cmma) { 3942 rc = kvm_s390_vcpu_setup_cmma(vcpu); 3943 if (rc) 3944 return rc; 3945 } 3946 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 3947 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 3948 3949 vcpu->arch.sie_block->hpid = HPID_KVM; 3950 3951 kvm_s390_vcpu_crypto_setup(vcpu); 3952 3953 kvm_s390_vcpu_pci_setup(vcpu); 3954 3955 mutex_lock(&vcpu->kvm->lock); 3956 if (kvm_s390_pv_is_protected(vcpu->kvm)) { 3957 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); 3958 if (rc) 3959 kvm_s390_vcpu_unsetup_cmma(vcpu); 3960 } 3961 mutex_unlock(&vcpu->kvm->lock); 3962 3963 return rc; 3964 } 3965 3966 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 3967 { 3968 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 3969 return -EINVAL; 3970 return 0; 3971 } 3972 3973 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 3974 { 3975 struct sie_page *sie_page; 3976 int rc; 3977 3978 BUILD_BUG_ON(sizeof(struct sie_page) != 4096); 3979 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); 3980 if (!sie_page) 3981 return -ENOMEM; 3982 3983 vcpu->arch.sie_block = &sie_page->sie_block; 3984 vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb); 3985 3986 /* the real guest size will always be smaller than msl */ 3987 vcpu->arch.sie_block->mso = 0; 3988 vcpu->arch.sie_block->msl = sclp.hamax; 3989 3990 vcpu->arch.sie_block->icpua = vcpu->vcpu_id; 3991 spin_lock_init(&vcpu->arch.local_int.lock); 3992 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm); 3993 seqcount_init(&vcpu->arch.cputm_seqcount); 3994 3995 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 3996 kvm_clear_async_pf_completion_queue(vcpu); 3997 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 3998 KVM_SYNC_GPRS | 3999 KVM_SYNC_ACRS | 4000 KVM_SYNC_CRS | 4001 KVM_SYNC_ARCH0 | 4002 KVM_SYNC_PFAULT | 4003 KVM_SYNC_DIAG318; 4004 vcpu->arch.acrs_loaded = false; 4005 kvm_s390_set_prefix(vcpu, 0); 4006 if (test_kvm_facility(vcpu->kvm, 64)) 4007 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 4008 if (test_kvm_facility(vcpu->kvm, 82)) 4009 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; 4010 if (test_kvm_facility(vcpu->kvm, 133)) 4011 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; 4012 if (test_kvm_facility(vcpu->kvm, 156)) 4013 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; 4014 /* fprs can be synchronized via vrs, even if the guest has no vx. With 4015 * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format. 4016 */ 4017 if (cpu_has_vx()) 4018 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 4019 else 4020 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; 4021 4022 if (kvm_is_ucontrol(vcpu->kvm)) { 4023 rc = __kvm_ucontrol_vcpu_init(vcpu); 4024 if (rc) 4025 goto out_free_sie_block; 4026 } 4027 4028 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", 4029 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 4030 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); 4031 4032 rc = kvm_s390_vcpu_setup(vcpu); 4033 if (rc) 4034 goto out_ucontrol_uninit; 4035 4036 kvm_s390_update_topology_change_report(vcpu->kvm, 1); 4037 return 0; 4038 4039 out_ucontrol_uninit: 4040 if (kvm_is_ucontrol(vcpu->kvm)) 4041 gmap_remove(vcpu->arch.gmap); 4042 out_free_sie_block: 4043 free_page((unsigned long)(vcpu->arch.sie_block)); 4044 return rc; 4045 } 4046 4047 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4048 { 4049 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4050 return kvm_s390_vcpu_has_irq(vcpu, 0); 4051 } 4052 4053 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 4054 { 4055 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); 4056 } 4057 4058 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) 4059 { 4060 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 4061 exit_sie(vcpu); 4062 } 4063 4064 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu) 4065 { 4066 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 4067 } 4068 4069 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) 4070 { 4071 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 4072 exit_sie(vcpu); 4073 } 4074 4075 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu) 4076 { 4077 return atomic_read(&vcpu->arch.sie_block->prog20) & 4078 (PROG_BLOCK_SIE | PROG_REQUEST); 4079 } 4080 4081 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) 4082 { 4083 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); 4084 } 4085 4086 /* 4087 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. 4088 * If the CPU is not running (e.g. waiting as idle) the function will 4089 * return immediately. */ 4090 void exit_sie(struct kvm_vcpu *vcpu) 4091 { 4092 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT); 4093 kvm_s390_vsie_kick(vcpu); 4094 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 4095 cpu_relax(); 4096 } 4097 4098 /* Kick a guest cpu out of SIE to process a request synchronously */ 4099 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) 4100 { 4101 __kvm_make_request(req, vcpu); 4102 kvm_s390_vcpu_request(vcpu); 4103 } 4104 4105 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, 4106 unsigned long end) 4107 { 4108 struct kvm *kvm = gmap->private; 4109 struct kvm_vcpu *vcpu; 4110 unsigned long prefix; 4111 unsigned long i; 4112 4113 trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap)); 4114 4115 if (gmap_is_shadow(gmap)) 4116 return; 4117 if (start >= 1UL << 31) 4118 /* We are only interested in prefix pages */ 4119 return; 4120 kvm_for_each_vcpu(i, vcpu, kvm) { 4121 /* match against both prefix pages */ 4122 prefix = kvm_s390_get_prefix(vcpu); 4123 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) { 4124 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx", 4125 start, end); 4126 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 4127 } 4128 } 4129 } 4130 4131 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 4132 { 4133 /* do not poll with more than halt_poll_max_steal percent of steal time */ 4134 if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >= 4135 READ_ONCE(halt_poll_max_steal)) { 4136 vcpu->stat.halt_no_poll_steal++; 4137 return true; 4138 } 4139 return false; 4140 } 4141 4142 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 4143 { 4144 /* kvm common code refers to this, but never calls it */ 4145 BUG(); 4146 return 0; 4147 } 4148 4149 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 4150 struct kvm_one_reg *reg) 4151 { 4152 int r = -EINVAL; 4153 4154 switch (reg->id) { 4155 case KVM_REG_S390_TODPR: 4156 r = put_user(vcpu->arch.sie_block->todpr, 4157 (u32 __user *)reg->addr); 4158 break; 4159 case KVM_REG_S390_EPOCHDIFF: 4160 r = put_user(vcpu->arch.sie_block->epoch, 4161 (u64 __user *)reg->addr); 4162 break; 4163 case KVM_REG_S390_CPU_TIMER: 4164 r = put_user(kvm_s390_get_cpu_timer(vcpu), 4165 (u64 __user *)reg->addr); 4166 break; 4167 case KVM_REG_S390_CLOCK_COMP: 4168 r = put_user(vcpu->arch.sie_block->ckc, 4169 (u64 __user *)reg->addr); 4170 break; 4171 case KVM_REG_S390_PFTOKEN: 4172 r = put_user(vcpu->arch.pfault_token, 4173 (u64 __user *)reg->addr); 4174 break; 4175 case KVM_REG_S390_PFCOMPARE: 4176 r = put_user(vcpu->arch.pfault_compare, 4177 (u64 __user *)reg->addr); 4178 break; 4179 case KVM_REG_S390_PFSELECT: 4180 r = put_user(vcpu->arch.pfault_select, 4181 (u64 __user *)reg->addr); 4182 break; 4183 case KVM_REG_S390_PP: 4184 r = put_user(vcpu->arch.sie_block->pp, 4185 (u64 __user *)reg->addr); 4186 break; 4187 case KVM_REG_S390_GBEA: 4188 r = put_user(vcpu->arch.sie_block->gbea, 4189 (u64 __user *)reg->addr); 4190 break; 4191 default: 4192 break; 4193 } 4194 4195 return r; 4196 } 4197 4198 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 4199 struct kvm_one_reg *reg) 4200 { 4201 int r = -EINVAL; 4202 __u64 val; 4203 4204 switch (reg->id) { 4205 case KVM_REG_S390_TODPR: 4206 r = get_user(vcpu->arch.sie_block->todpr, 4207 (u32 __user *)reg->addr); 4208 break; 4209 case KVM_REG_S390_EPOCHDIFF: 4210 r = get_user(vcpu->arch.sie_block->epoch, 4211 (u64 __user *)reg->addr); 4212 break; 4213 case KVM_REG_S390_CPU_TIMER: 4214 r = get_user(val, (u64 __user *)reg->addr); 4215 if (!r) 4216 kvm_s390_set_cpu_timer(vcpu, val); 4217 break; 4218 case KVM_REG_S390_CLOCK_COMP: 4219 r = get_user(vcpu->arch.sie_block->ckc, 4220 (u64 __user *)reg->addr); 4221 break; 4222 case KVM_REG_S390_PFTOKEN: 4223 r = get_user(vcpu->arch.pfault_token, 4224 (u64 __user *)reg->addr); 4225 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4226 kvm_clear_async_pf_completion_queue(vcpu); 4227 break; 4228 case KVM_REG_S390_PFCOMPARE: 4229 r = get_user(vcpu->arch.pfault_compare, 4230 (u64 __user *)reg->addr); 4231 break; 4232 case KVM_REG_S390_PFSELECT: 4233 r = get_user(vcpu->arch.pfault_select, 4234 (u64 __user *)reg->addr); 4235 break; 4236 case KVM_REG_S390_PP: 4237 r = get_user(vcpu->arch.sie_block->pp, 4238 (u64 __user *)reg->addr); 4239 break; 4240 case KVM_REG_S390_GBEA: 4241 r = get_user(vcpu->arch.sie_block->gbea, 4242 (u64 __user *)reg->addr); 4243 break; 4244 default: 4245 break; 4246 } 4247 4248 return r; 4249 } 4250 4251 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) 4252 { 4253 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; 4254 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; 4255 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); 4256 4257 kvm_clear_async_pf_completion_queue(vcpu); 4258 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) 4259 kvm_s390_vcpu_stop(vcpu); 4260 kvm_s390_clear_local_irqs(vcpu); 4261 } 4262 4263 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 4264 { 4265 /* Initial reset is a superset of the normal reset */ 4266 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 4267 4268 /* 4269 * This equals initial cpu reset in pop, but we don't switch to ESA. 4270 * We do not only reset the internal data, but also ... 4271 */ 4272 vcpu->arch.sie_block->gpsw.mask = 0; 4273 vcpu->arch.sie_block->gpsw.addr = 0; 4274 kvm_s390_set_prefix(vcpu, 0); 4275 kvm_s390_set_cpu_timer(vcpu, 0); 4276 vcpu->arch.sie_block->ckc = 0; 4277 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); 4278 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; 4279 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; 4280 4281 /* ... the data in sync regs */ 4282 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); 4283 vcpu->run->s.regs.ckc = 0; 4284 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; 4285 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; 4286 vcpu->run->psw_addr = 0; 4287 vcpu->run->psw_mask = 0; 4288 vcpu->run->s.regs.todpr = 0; 4289 vcpu->run->s.regs.cputm = 0; 4290 vcpu->run->s.regs.ckc = 0; 4291 vcpu->run->s.regs.pp = 0; 4292 vcpu->run->s.regs.gbea = 1; 4293 vcpu->run->s.regs.fpc = 0; 4294 /* 4295 * Do not reset these registers in the protected case, as some of 4296 * them are overlaid and they are not accessible in this case 4297 * anyway. 4298 */ 4299 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 4300 vcpu->arch.sie_block->gbea = 1; 4301 vcpu->arch.sie_block->pp = 0; 4302 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 4303 vcpu->arch.sie_block->todpr = 0; 4304 } 4305 } 4306 4307 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) 4308 { 4309 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 4310 4311 /* Clear reset is a superset of the initial reset */ 4312 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 4313 4314 memset(®s->gprs, 0, sizeof(regs->gprs)); 4315 memset(®s->vrs, 0, sizeof(regs->vrs)); 4316 memset(®s->acrs, 0, sizeof(regs->acrs)); 4317 memset(®s->gscb, 0, sizeof(regs->gscb)); 4318 4319 regs->etoken = 0; 4320 regs->etoken_extension = 0; 4321 } 4322 4323 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4324 { 4325 vcpu_load(vcpu); 4326 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 4327 vcpu_put(vcpu); 4328 return 0; 4329 } 4330 4331 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4332 { 4333 vcpu_load(vcpu); 4334 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 4335 vcpu_put(vcpu); 4336 return 0; 4337 } 4338 4339 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 4340 struct kvm_sregs *sregs) 4341 { 4342 vcpu_load(vcpu); 4343 4344 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 4345 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 4346 4347 vcpu_put(vcpu); 4348 return 0; 4349 } 4350 4351 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4352 struct kvm_sregs *sregs) 4353 { 4354 vcpu_load(vcpu); 4355 4356 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 4357 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 4358 4359 vcpu_put(vcpu); 4360 return 0; 4361 } 4362 4363 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4364 { 4365 int ret = 0; 4366 4367 vcpu_load(vcpu); 4368 4369 vcpu->run->s.regs.fpc = fpu->fpc; 4370 if (cpu_has_vx()) 4371 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, 4372 (freg_t *) fpu->fprs); 4373 else 4374 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 4375 4376 vcpu_put(vcpu); 4377 return ret; 4378 } 4379 4380 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 4381 { 4382 vcpu_load(vcpu); 4383 4384 if (cpu_has_vx()) 4385 convert_vx_to_fp((freg_t *) fpu->fprs, 4386 (__vector128 *) vcpu->run->s.regs.vrs); 4387 else 4388 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); 4389 fpu->fpc = vcpu->run->s.regs.fpc; 4390 4391 vcpu_put(vcpu); 4392 return 0; 4393 } 4394 4395 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 4396 { 4397 int rc = 0; 4398 4399 if (!is_vcpu_stopped(vcpu)) 4400 rc = -EBUSY; 4401 else { 4402 vcpu->run->psw_mask = psw.mask; 4403 vcpu->run->psw_addr = psw.addr; 4404 } 4405 return rc; 4406 } 4407 4408 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 4409 struct kvm_translation *tr) 4410 { 4411 return -EINVAL; /* not implemented yet */ 4412 } 4413 4414 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ 4415 KVM_GUESTDBG_USE_HW_BP | \ 4416 KVM_GUESTDBG_ENABLE) 4417 4418 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 4419 struct kvm_guest_debug *dbg) 4420 { 4421 int rc = 0; 4422 4423 vcpu_load(vcpu); 4424 4425 vcpu->guest_debug = 0; 4426 kvm_s390_clear_bp_data(vcpu); 4427 4428 if (dbg->control & ~VALID_GUESTDBG_FLAGS) { 4429 rc = -EINVAL; 4430 goto out; 4431 } 4432 if (!sclp.has_gpere) { 4433 rc = -EINVAL; 4434 goto out; 4435 } 4436 4437 if (dbg->control & KVM_GUESTDBG_ENABLE) { 4438 vcpu->guest_debug = dbg->control; 4439 /* enforce guest PER */ 4440 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P); 4441 4442 if (dbg->control & KVM_GUESTDBG_USE_HW_BP) 4443 rc = kvm_s390_import_bp_data(vcpu, dbg); 4444 } else { 4445 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 4446 vcpu->arch.guestdbg.last_bp = 0; 4447 } 4448 4449 if (rc) { 4450 vcpu->guest_debug = 0; 4451 kvm_s390_clear_bp_data(vcpu); 4452 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P); 4453 } 4454 4455 out: 4456 vcpu_put(vcpu); 4457 return rc; 4458 } 4459 4460 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 4461 struct kvm_mp_state *mp_state) 4462 { 4463 int ret; 4464 4465 vcpu_load(vcpu); 4466 4467 /* CHECK_STOP and LOAD are not supported yet */ 4468 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : 4469 KVM_MP_STATE_OPERATING; 4470 4471 vcpu_put(vcpu); 4472 return ret; 4473 } 4474 4475 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 4476 struct kvm_mp_state *mp_state) 4477 { 4478 int rc = 0; 4479 4480 vcpu_load(vcpu); 4481 4482 /* user space knows about this interface - let it control the state */ 4483 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); 4484 4485 switch (mp_state->mp_state) { 4486 case KVM_MP_STATE_STOPPED: 4487 rc = kvm_s390_vcpu_stop(vcpu); 4488 break; 4489 case KVM_MP_STATE_OPERATING: 4490 rc = kvm_s390_vcpu_start(vcpu); 4491 break; 4492 case KVM_MP_STATE_LOAD: 4493 if (!kvm_s390_pv_cpu_is_protected(vcpu)) { 4494 rc = -ENXIO; 4495 break; 4496 } 4497 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); 4498 break; 4499 case KVM_MP_STATE_CHECK_STOP: 4500 fallthrough; /* CHECK_STOP and LOAD are not supported yet */ 4501 default: 4502 rc = -ENXIO; 4503 } 4504 4505 vcpu_put(vcpu); 4506 return rc; 4507 } 4508 4509 static bool ibs_enabled(struct kvm_vcpu *vcpu) 4510 { 4511 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); 4512 } 4513 4514 static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags) 4515 { 4516 struct kvm *kvm = gmap->private; 4517 gfn_t gfn = gpa_to_gfn(gaddr); 4518 bool unlocked; 4519 hva_t vmaddr; 4520 gpa_t tmp; 4521 int rc; 4522 4523 if (kvm_is_ucontrol(kvm)) { 4524 tmp = __gmap_translate(gmap, gaddr); 4525 gfn = gpa_to_gfn(tmp); 4526 } 4527 4528 vmaddr = gfn_to_hva(kvm, gfn); 4529 rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); 4530 if (!rc) 4531 rc = __gmap_link(gmap, gaddr, vmaddr); 4532 return rc; 4533 } 4534 4535 /** 4536 * __kvm_s390_mprotect_many() - Apply specified protection to guest pages 4537 * @gmap: the gmap of the guest 4538 * @gpa: the starting guest address 4539 * @npages: how many pages to protect 4540 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 4541 * @bits: pgste notification bits to set 4542 * 4543 * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one() 4544 * 4545 * Context: kvm->srcu and gmap->mm need to be held in read mode 4546 */ 4547 int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, 4548 unsigned long bits) 4549 { 4550 unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 4551 gpa_t end = gpa + npages * PAGE_SIZE; 4552 int rc; 4553 4554 for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) { 4555 rc = gmap_protect_one(gmap, gpa, prot, bits); 4556 if (rc == -EAGAIN) { 4557 __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag); 4558 rc = gmap_protect_one(gmap, gpa, prot, bits); 4559 } 4560 if (rc < 0) 4561 return rc; 4562 } 4563 4564 return 0; 4565 } 4566 4567 static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu) 4568 { 4569 gpa_t gaddr = kvm_s390_get_prefix(vcpu); 4570 int idx, rc; 4571 4572 idx = srcu_read_lock(&vcpu->kvm->srcu); 4573 mmap_read_lock(vcpu->arch.gmap->mm); 4574 4575 rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT); 4576 4577 mmap_read_unlock(vcpu->arch.gmap->mm); 4578 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4579 4580 return rc; 4581 } 4582 4583 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 4584 { 4585 retry: 4586 kvm_s390_vcpu_request_handled(vcpu); 4587 if (!kvm_request_pending(vcpu)) 4588 return 0; 4589 /* 4590 * If the guest prefix changed, re-arm the ipte notifier for the 4591 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. 4592 * This ensures that the ipte instruction for this request has 4593 * already finished. We might race against a second unmapper that 4594 * wants to set the blocking bit. Lets just retry the request loop. 4595 */ 4596 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { 4597 int rc; 4598 4599 rc = kvm_s390_mprotect_notify_prefix(vcpu); 4600 if (rc) { 4601 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); 4602 return rc; 4603 } 4604 goto retry; 4605 } 4606 4607 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { 4608 vcpu->arch.sie_block->ihcpu = 0xffff; 4609 goto retry; 4610 } 4611 4612 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { 4613 if (!ibs_enabled(vcpu)) { 4614 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); 4615 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS); 4616 } 4617 goto retry; 4618 } 4619 4620 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { 4621 if (ibs_enabled(vcpu)) { 4622 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); 4623 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS); 4624 } 4625 goto retry; 4626 } 4627 4628 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) { 4629 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; 4630 goto retry; 4631 } 4632 4633 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { 4634 /* 4635 * Disable CMM virtualization; we will emulate the ESSA 4636 * instruction manually, in order to provide additional 4637 * functionalities needed for live migration. 4638 */ 4639 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; 4640 goto retry; 4641 } 4642 4643 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { 4644 /* 4645 * Re-enable CMM virtualization if CMMA is available and 4646 * CMM has been used. 4647 */ 4648 if ((vcpu->kvm->arch.use_cmma) && 4649 (vcpu->kvm->mm->context.uses_cmm)) 4650 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; 4651 goto retry; 4652 } 4653 4654 /* we left the vsie handler, nothing to do, just clear the request */ 4655 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu); 4656 4657 return 0; 4658 } 4659 4660 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4661 { 4662 struct kvm_vcpu *vcpu; 4663 union tod_clock clk; 4664 unsigned long i; 4665 4666 preempt_disable(); 4667 4668 store_tod_clock_ext(&clk); 4669 4670 kvm->arch.epoch = gtod->tod - clk.tod; 4671 kvm->arch.epdx = 0; 4672 if (test_kvm_facility(kvm, 139)) { 4673 kvm->arch.epdx = gtod->epoch_idx - clk.ei; 4674 if (kvm->arch.epoch > gtod->tod) 4675 kvm->arch.epdx -= 1; 4676 } 4677 4678 kvm_s390_vcpu_block_all(kvm); 4679 kvm_for_each_vcpu(i, vcpu, kvm) { 4680 vcpu->arch.sie_block->epoch = kvm->arch.epoch; 4681 vcpu->arch.sie_block->epdx = kvm->arch.epdx; 4682 } 4683 4684 kvm_s390_vcpu_unblock_all(kvm); 4685 preempt_enable(); 4686 } 4687 4688 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) 4689 { 4690 if (!mutex_trylock(&kvm->lock)) 4691 return 0; 4692 __kvm_s390_set_tod_clock(kvm, gtod); 4693 mutex_unlock(&kvm->lock); 4694 return 1; 4695 } 4696 4697 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, 4698 unsigned long token) 4699 { 4700 struct kvm_s390_interrupt inti; 4701 struct kvm_s390_irq irq; 4702 4703 if (start_token) { 4704 irq.u.ext.ext_params2 = token; 4705 irq.type = KVM_S390_INT_PFAULT_INIT; 4706 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); 4707 } else { 4708 inti.type = KVM_S390_INT_PFAULT_DONE; 4709 inti.parm64 = token; 4710 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); 4711 } 4712 } 4713 4714 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 4715 struct kvm_async_pf *work) 4716 { 4717 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); 4718 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); 4719 4720 return true; 4721 } 4722 4723 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, 4724 struct kvm_async_pf *work) 4725 { 4726 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); 4727 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); 4728 } 4729 4730 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 4731 struct kvm_async_pf *work) 4732 { 4733 /* s390 will always inject the page directly */ 4734 } 4735 4736 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) 4737 { 4738 /* 4739 * s390 will always inject the page directly, 4740 * but we still want check_async_completion to cleanup 4741 */ 4742 return true; 4743 } 4744 4745 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) 4746 { 4747 hva_t hva; 4748 struct kvm_arch_async_pf arch; 4749 4750 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 4751 return false; 4752 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != 4753 vcpu->arch.pfault_compare) 4754 return false; 4755 if (psw_extint_disabled(vcpu)) 4756 return false; 4757 if (kvm_s390_vcpu_has_irq(vcpu, 0)) 4758 return false; 4759 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) 4760 return false; 4761 if (!vcpu->arch.gmap->pfault_enabled) 4762 return false; 4763 4764 hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr); 4765 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) 4766 return false; 4767 4768 return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch); 4769 } 4770 4771 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 4772 { 4773 int rc, cpuflags; 4774 4775 /* 4776 * On s390 notifications for arriving pages will be delivered directly 4777 * to the guest but the house keeping for completed pfaults is 4778 * handled outside the worker. 4779 */ 4780 kvm_check_async_pf_completion(vcpu); 4781 4782 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 4783 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 4784 4785 if (need_resched()) 4786 schedule(); 4787 4788 if (!kvm_is_ucontrol(vcpu->kvm)) { 4789 rc = kvm_s390_deliver_pending_interrupts(vcpu); 4790 if (rc || guestdbg_exit_pending(vcpu)) 4791 return rc; 4792 } 4793 4794 rc = kvm_s390_handle_requests(vcpu); 4795 if (rc) 4796 return rc; 4797 4798 if (guestdbg_enabled(vcpu)) { 4799 kvm_s390_backup_guest_per_regs(vcpu); 4800 kvm_s390_patch_guest_per_regs(vcpu); 4801 } 4802 4803 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); 4804 4805 vcpu->arch.sie_block->icptcode = 0; 4806 current->thread.gmap_int_code = 0; 4807 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 4808 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 4809 trace_kvm_s390_sie_enter(vcpu, cpuflags); 4810 4811 return 0; 4812 } 4813 4814 static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu) 4815 { 4816 struct kvm_s390_pgm_info pgm_info = { 4817 .code = PGM_ADDRESSING, 4818 }; 4819 u8 opcode, ilen; 4820 int rc; 4821 4822 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 4823 trace_kvm_s390_sie_fault(vcpu); 4824 4825 /* 4826 * We want to inject an addressing exception, which is defined as a 4827 * suppressing or terminating exception. However, since we came here 4828 * by a DAT access exception, the PSW still points to the faulting 4829 * instruction since DAT exceptions are nullifying. So we've got 4830 * to look up the current opcode to get the length of the instruction 4831 * to be able to forward the PSW. 4832 */ 4833 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); 4834 ilen = insn_length(opcode); 4835 if (rc < 0) { 4836 return rc; 4837 } else if (rc) { 4838 /* Instruction-Fetching Exceptions - we can't detect the ilen. 4839 * Forward by arbitrary ilc, injection will take care of 4840 * nullification if necessary. 4841 */ 4842 pgm_info = vcpu->arch.pgm; 4843 ilen = 4; 4844 } 4845 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; 4846 kvm_s390_forward_psw(vcpu, ilen); 4847 return kvm_s390_inject_prog_irq(vcpu, &pgm_info); 4848 } 4849 4850 static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) 4851 { 4852 KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, 4853 "Unexpected program interrupt 0x%x, TEID 0x%016lx", 4854 current->thread.gmap_int_code, current->thread.gmap_teid.val); 4855 } 4856 4857 /* 4858 * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu 4859 * @vcpu: the vCPU whose gmap is to be fixed up 4860 * @gfn: the guest frame number used for memslots (including fake memslots) 4861 * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps 4862 * @flags: FOLL_* flags 4863 * 4864 * Return: 0 on success, < 0 in case of error. 4865 * Context: The mm lock must not be held before calling. May sleep. 4866 */ 4867 int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) 4868 { 4869 struct kvm_memory_slot *slot; 4870 unsigned int fault_flags; 4871 bool writable, unlocked; 4872 unsigned long vmaddr; 4873 struct page *page; 4874 kvm_pfn_t pfn; 4875 int rc; 4876 4877 slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 4878 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 4879 return vcpu_post_run_addressing_exception(vcpu); 4880 4881 fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; 4882 if (vcpu->arch.gmap->pfault_enabled) 4883 flags |= FOLL_NOWAIT; 4884 vmaddr = __gfn_to_hva_memslot(slot, gfn); 4885 4886 try_again: 4887 pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); 4888 4889 /* Access outside memory, inject addressing exception */ 4890 if (is_noslot_pfn(pfn)) 4891 return vcpu_post_run_addressing_exception(vcpu); 4892 /* Signal pending: try again */ 4893 if (pfn == KVM_PFN_ERR_SIGPENDING) 4894 return -EAGAIN; 4895 4896 /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ 4897 if (pfn == KVM_PFN_ERR_NEEDS_IO) { 4898 trace_kvm_s390_major_guest_pfault(vcpu); 4899 if (kvm_arch_setup_async_pf(vcpu)) 4900 return 0; 4901 vcpu->stat.pfault_sync++; 4902 /* Could not setup async pfault, try again synchronously */ 4903 flags &= ~FOLL_NOWAIT; 4904 goto try_again; 4905 } 4906 /* Any other error */ 4907 if (is_error_pfn(pfn)) 4908 return -EFAULT; 4909 4910 /* Success */ 4911 mmap_read_lock(vcpu->arch.gmap->mm); 4912 /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */ 4913 rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked); 4914 if (!rc) 4915 rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr); 4916 scoped_guard(spinlock, &vcpu->kvm->mmu_lock) { 4917 kvm_release_faultin_page(vcpu->kvm, page, false, writable); 4918 } 4919 mmap_read_unlock(vcpu->arch.gmap->mm); 4920 return rc; 4921 } 4922 4923 static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) 4924 { 4925 unsigned long gaddr_tmp; 4926 gfn_t gfn; 4927 4928 gfn = gpa_to_gfn(gaddr); 4929 if (kvm_is_ucontrol(vcpu->kvm)) { 4930 /* 4931 * This translates the per-vCPU guest address into a 4932 * fake guest address, which can then be used with the 4933 * fake memslots that are identity mapping userspace. 4934 * This allows ucontrol VMs to use the normal fault 4935 * resolution path, like normal VMs. 4936 */ 4937 mmap_read_lock(vcpu->arch.gmap->mm); 4938 gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr); 4939 mmap_read_unlock(vcpu->arch.gmap->mm); 4940 if (gaddr_tmp == -EFAULT) { 4941 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 4942 vcpu->run->s390_ucontrol.trans_exc_code = gaddr; 4943 vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION; 4944 return -EREMOTE; 4945 } 4946 gfn = gpa_to_gfn(gaddr_tmp); 4947 } 4948 return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); 4949 } 4950 4951 static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) 4952 { 4953 unsigned int flags = 0; 4954 unsigned long gaddr; 4955 4956 gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; 4957 if (kvm_s390_cur_gmap_fault_is_write()) 4958 flags = FAULT_FLAG_WRITE; 4959 4960 switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) { 4961 case 0: 4962 vcpu->stat.exit_null++; 4963 break; 4964 case PGM_NON_SECURE_STORAGE_ACCESS: 4965 kvm_s390_assert_primary_as(vcpu); 4966 /* 4967 * This is normal operation; a page belonging to a protected 4968 * guest has not been imported yet. Try to import the page into 4969 * the protected guest. 4970 */ 4971 if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL) 4972 send_sig(SIGSEGV, current, 0); 4973 break; 4974 case PGM_SECURE_STORAGE_ACCESS: 4975 case PGM_SECURE_STORAGE_VIOLATION: 4976 kvm_s390_assert_primary_as(vcpu); 4977 /* 4978 * This can happen after a reboot with asynchronous teardown; 4979 * the new guest (normal or protected) will run on top of the 4980 * previous protected guest. The old pages need to be destroyed 4981 * so the new guest can use them. 4982 */ 4983 if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) { 4984 /* 4985 * Either KVM messed up the secure guest mapping or the 4986 * same page is mapped into multiple secure guests. 4987 * 4988 * This exception is only triggered when a guest 2 is 4989 * running and can therefore never occur in kernel 4990 * context. 4991 */ 4992 pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n", 4993 current->thread.gmap_int_code, current->comm, 4994 current->pid); 4995 send_sig(SIGSEGV, current, 0); 4996 } 4997 break; 4998 case PGM_PROTECTION: 4999 case PGM_SEGMENT_TRANSLATION: 5000 case PGM_PAGE_TRANSLATION: 5001 case PGM_ASCE_TYPE: 5002 case PGM_REGION_FIRST_TRANS: 5003 case PGM_REGION_SECOND_TRANS: 5004 case PGM_REGION_THIRD_TRANS: 5005 kvm_s390_assert_primary_as(vcpu); 5006 return vcpu_dat_fault_handler(vcpu, gaddr, flags); 5007 default: 5008 KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", 5009 current->thread.gmap_int_code, current->thread.gmap_teid.val); 5010 send_sig(SIGSEGV, current, 0); 5011 break; 5012 } 5013 return 0; 5014 } 5015 5016 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 5017 { 5018 struct mcck_volatile_info *mcck_info; 5019 struct sie_page *sie_page; 5020 int rc; 5021 5022 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 5023 vcpu->arch.sie_block->icptcode); 5024 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 5025 5026 if (guestdbg_enabled(vcpu)) 5027 kvm_s390_restore_guest_per_regs(vcpu); 5028 5029 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 5030 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 5031 5032 if (exit_reason == -EINTR) { 5033 VCPU_EVENT(vcpu, 3, "%s", "machine check"); 5034 sie_page = container_of(vcpu->arch.sie_block, 5035 struct sie_page, sie_block); 5036 mcck_info = &sie_page->mcck_info; 5037 kvm_s390_reinject_machine_check(vcpu, mcck_info); 5038 return 0; 5039 } 5040 5041 if (vcpu->arch.sie_block->icptcode > 0) { 5042 rc = kvm_handle_sie_intercept(vcpu); 5043 5044 if (rc != -EOPNOTSUPP) 5045 return rc; 5046 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 5047 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 5048 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 5049 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 5050 return -EREMOTE; 5051 } 5052 5053 return vcpu_post_run_handle_fault(vcpu); 5054 } 5055 5056 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) 5057 static int __vcpu_run(struct kvm_vcpu *vcpu) 5058 { 5059 int rc, exit_reason; 5060 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; 5061 5062 /* 5063 * We try to hold kvm->srcu during most of vcpu_run (except when run- 5064 * ning the guest), so that memslots (and other stuff) are protected 5065 */ 5066 kvm_vcpu_srcu_read_lock(vcpu); 5067 5068 do { 5069 rc = vcpu_pre_run(vcpu); 5070 if (rc || guestdbg_exit_pending(vcpu)) 5071 break; 5072 5073 kvm_vcpu_srcu_read_unlock(vcpu); 5074 /* 5075 * As PF_VCPU will be used in fault handler, between 5076 * guest_enter and guest_exit should be no uaccess. 5077 */ 5078 local_irq_disable(); 5079 guest_enter_irqoff(); 5080 __disable_cpu_timer_accounting(vcpu); 5081 local_irq_enable(); 5082 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5083 memcpy(sie_page->pv_grregs, 5084 vcpu->run->s.regs.gprs, 5085 sizeof(sie_page->pv_grregs)); 5086 } 5087 exit_reason = sie64a(vcpu->arch.sie_block, 5088 vcpu->run->s.regs.gprs, 5089 vcpu->arch.gmap->asce); 5090 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5091 memcpy(vcpu->run->s.regs.gprs, 5092 sie_page->pv_grregs, 5093 sizeof(sie_page->pv_grregs)); 5094 /* 5095 * We're not allowed to inject interrupts on intercepts 5096 * that leave the guest state in an "in-between" state 5097 * where the next SIE entry will do a continuation. 5098 * Fence interrupts in our "internal" PSW. 5099 */ 5100 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || 5101 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { 5102 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 5103 } 5104 } 5105 local_irq_disable(); 5106 __enable_cpu_timer_accounting(vcpu); 5107 guest_exit_irqoff(); 5108 local_irq_enable(); 5109 kvm_vcpu_srcu_read_lock(vcpu); 5110 5111 rc = vcpu_post_run(vcpu, exit_reason); 5112 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); 5113 5114 kvm_vcpu_srcu_read_unlock(vcpu); 5115 return rc; 5116 } 5117 5118 static void sync_regs_fmt2(struct kvm_vcpu *vcpu) 5119 { 5120 struct kvm_run *kvm_run = vcpu->run; 5121 struct runtime_instr_cb *riccb; 5122 struct gs_cb *gscb; 5123 5124 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; 5125 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; 5126 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 5127 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 5128 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 5129 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; 5130 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; 5131 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; 5132 } 5133 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) { 5134 vcpu->arch.pfault_token = kvm_run->s.regs.pft; 5135 vcpu->arch.pfault_select = kvm_run->s.regs.pfs; 5136 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; 5137 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) 5138 kvm_clear_async_pf_completion_queue(vcpu); 5139 } 5140 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { 5141 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; 5142 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; 5143 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); 5144 } 5145 /* 5146 * If userspace sets the riccb (e.g. after migration) to a valid state, 5147 * we should enable RI here instead of doing the lazy enablement. 5148 */ 5149 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && 5150 test_kvm_facility(vcpu->kvm, 64) && 5151 riccb->v && 5152 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { 5153 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); 5154 vcpu->arch.sie_block->ecb3 |= ECB3_RI; 5155 } 5156 /* 5157 * If userspace sets the gscb (e.g. after migration) to non-zero, 5158 * we should enable GS here instead of doing the lazy enablement. 5159 */ 5160 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && 5161 test_kvm_facility(vcpu->kvm, 133) && 5162 gscb->gssm && 5163 !vcpu->arch.gs_enabled) { 5164 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); 5165 vcpu->arch.sie_block->ecb |= ECB_GS; 5166 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; 5167 vcpu->arch.gs_enabled = 1; 5168 } 5169 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && 5170 test_kvm_facility(vcpu->kvm, 82)) { 5171 vcpu->arch.sie_block->fpf &= ~FPF_BPBC; 5172 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; 5173 } 5174 if (MACHINE_HAS_GS) { 5175 preempt_disable(); 5176 local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); 5177 if (current->thread.gs_cb) { 5178 vcpu->arch.host_gscb = current->thread.gs_cb; 5179 save_gs_cb(vcpu->arch.host_gscb); 5180 } 5181 if (vcpu->arch.gs_enabled) { 5182 current->thread.gs_cb = (struct gs_cb *) 5183 &vcpu->run->s.regs.gscb; 5184 restore_gs_cb(current->thread.gs_cb); 5185 } 5186 preempt_enable(); 5187 } 5188 /* SIE will load etoken directly from SDNX and therefore kvm_run */ 5189 } 5190 5191 static void sync_regs(struct kvm_vcpu *vcpu) 5192 { 5193 struct kvm_run *kvm_run = vcpu->run; 5194 5195 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) 5196 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 5197 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 5198 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 5199 /* some control register changes require a tlb flush */ 5200 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 5201 } 5202 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { 5203 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); 5204 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; 5205 } 5206 save_access_regs(vcpu->arch.host_acrs); 5207 restore_access_regs(vcpu->run->s.regs.acrs); 5208 vcpu->arch.acrs_loaded = true; 5209 kvm_s390_fpu_load(vcpu->run); 5210 /* Sync fmt2 only data */ 5211 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { 5212 sync_regs_fmt2(vcpu); 5213 } else { 5214 /* 5215 * In several places we have to modify our internal view to 5216 * not do things that are disallowed by the ultravisor. For 5217 * example we must not inject interrupts after specific exits 5218 * (e.g. 112 prefix page not secure). We do this by turning 5219 * off the machine check, external and I/O interrupt bits 5220 * of our PSW copy. To avoid getting validity intercepts, we 5221 * do only accept the condition code from userspace. 5222 */ 5223 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; 5224 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & 5225 PSW_MASK_CC; 5226 } 5227 5228 kvm_run->kvm_dirty_regs = 0; 5229 } 5230 5231 static void store_regs_fmt2(struct kvm_vcpu *vcpu) 5232 { 5233 struct kvm_run *kvm_run = vcpu->run; 5234 5235 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; 5236 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; 5237 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; 5238 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; 5239 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; 5240 if (MACHINE_HAS_GS) { 5241 preempt_disable(); 5242 local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); 5243 if (vcpu->arch.gs_enabled) 5244 save_gs_cb(current->thread.gs_cb); 5245 current->thread.gs_cb = vcpu->arch.host_gscb; 5246 restore_gs_cb(vcpu->arch.host_gscb); 5247 if (!vcpu->arch.host_gscb) 5248 local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT); 5249 vcpu->arch.host_gscb = NULL; 5250 preempt_enable(); 5251 } 5252 /* SIE will save etoken directly into SDNX and therefore kvm_run */ 5253 } 5254 5255 static void store_regs(struct kvm_vcpu *vcpu) 5256 { 5257 struct kvm_run *kvm_run = vcpu->run; 5258 5259 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 5260 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 5261 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); 5262 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 5263 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); 5264 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; 5265 kvm_run->s.regs.pft = vcpu->arch.pfault_token; 5266 kvm_run->s.regs.pfs = vcpu->arch.pfault_select; 5267 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; 5268 save_access_regs(vcpu->run->s.regs.acrs); 5269 restore_access_regs(vcpu->arch.host_acrs); 5270 vcpu->arch.acrs_loaded = false; 5271 kvm_s390_fpu_store(vcpu->run); 5272 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) 5273 store_regs_fmt2(vcpu); 5274 } 5275 5276 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 5277 { 5278 struct kvm_run *kvm_run = vcpu->run; 5279 DECLARE_KERNEL_FPU_ONSTACK32(fpu); 5280 int rc; 5281 5282 /* 5283 * Running a VM while dumping always has the potential to 5284 * produce inconsistent dump data. But for PV vcpus a SIE 5285 * entry while dumping could also lead to a fatal validity 5286 * intercept which we absolutely want to avoid. 5287 */ 5288 if (vcpu->kvm->arch.pv.dumping) 5289 return -EINVAL; 5290 5291 if (!vcpu->wants_to_run) 5292 return -EINTR; 5293 5294 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || 5295 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) 5296 return -EINVAL; 5297 5298 vcpu_load(vcpu); 5299 5300 if (guestdbg_exit_pending(vcpu)) { 5301 kvm_s390_prepare_debug_exit(vcpu); 5302 rc = 0; 5303 goto out; 5304 } 5305 5306 kvm_sigset_activate(vcpu); 5307 5308 /* 5309 * no need to check the return value of vcpu_start as it can only have 5310 * an error for protvirt, but protvirt means user cpu state 5311 */ 5312 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 5313 kvm_s390_vcpu_start(vcpu); 5314 } else if (is_vcpu_stopped(vcpu)) { 5315 pr_err_ratelimited("can't run stopped vcpu %d\n", 5316 vcpu->vcpu_id); 5317 rc = -EINVAL; 5318 goto out; 5319 } 5320 5321 kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR); 5322 sync_regs(vcpu); 5323 enable_cpu_timer_accounting(vcpu); 5324 5325 might_fault(); 5326 rc = __vcpu_run(vcpu); 5327 5328 if (signal_pending(current) && !rc) { 5329 kvm_run->exit_reason = KVM_EXIT_INTR; 5330 rc = -EINTR; 5331 } 5332 5333 if (guestdbg_exit_pending(vcpu) && !rc) { 5334 kvm_s390_prepare_debug_exit(vcpu); 5335 rc = 0; 5336 } 5337 5338 if (rc == -EREMOTE) { 5339 /* userspace support is needed, kvm_run has been prepared */ 5340 rc = 0; 5341 } 5342 5343 disable_cpu_timer_accounting(vcpu); 5344 store_regs(vcpu); 5345 kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR); 5346 5347 kvm_sigset_deactivate(vcpu); 5348 5349 vcpu->stat.exit_userspace++; 5350 out: 5351 vcpu_put(vcpu); 5352 return rc; 5353 } 5354 5355 /* 5356 * store status at address 5357 * we use have two special cases: 5358 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 5359 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 5360 */ 5361 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) 5362 { 5363 unsigned char archmode = 1; 5364 freg_t fprs[NUM_FPRS]; 5365 unsigned int px; 5366 u64 clkcomp, cputm; 5367 int rc; 5368 5369 px = kvm_s390_get_prefix(vcpu); 5370 if (gpa == KVM_S390_STORE_STATUS_NOADDR) { 5371 if (write_guest_abs(vcpu, 163, &archmode, 1)) 5372 return -EFAULT; 5373 gpa = 0; 5374 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { 5375 if (write_guest_real(vcpu, 163, &archmode, 1)) 5376 return -EFAULT; 5377 gpa = px; 5378 } else 5379 gpa -= __LC_FPREGS_SAVE_AREA; 5380 5381 /* manually convert vector registers if necessary */ 5382 if (cpu_has_vx()) { 5383 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 5384 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 5385 fprs, 128); 5386 } else { 5387 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 5388 vcpu->run->s.regs.fprs, 128); 5389 } 5390 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, 5391 vcpu->run->s.regs.gprs, 128); 5392 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, 5393 &vcpu->arch.sie_block->gpsw, 16); 5394 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, 5395 &px, 4); 5396 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, 5397 &vcpu->run->s.regs.fpc, 4); 5398 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, 5399 &vcpu->arch.sie_block->todpr, 4); 5400 cputm = kvm_s390_get_cpu_timer(vcpu); 5401 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, 5402 &cputm, 8); 5403 clkcomp = vcpu->arch.sie_block->ckc >> 8; 5404 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, 5405 &clkcomp, 8); 5406 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, 5407 &vcpu->run->s.regs.acrs, 64); 5408 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, 5409 &vcpu->arch.sie_block->gcr, 128); 5410 return rc ? -EFAULT : 0; 5411 } 5412 5413 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 5414 { 5415 /* 5416 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 5417 * switch in the run ioctl. Let's update our copies before we save 5418 * it into the save area 5419 */ 5420 kvm_s390_fpu_store(vcpu->run); 5421 save_access_regs(vcpu->run->s.regs.acrs); 5422 5423 return kvm_s390_store_status_unloaded(vcpu, addr); 5424 } 5425 5426 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 5427 { 5428 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); 5429 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu); 5430 } 5431 5432 static void __disable_ibs_on_all_vcpus(struct kvm *kvm) 5433 { 5434 unsigned long i; 5435 struct kvm_vcpu *vcpu; 5436 5437 kvm_for_each_vcpu(i, vcpu, kvm) { 5438 __disable_ibs_on_vcpu(vcpu); 5439 } 5440 } 5441 5442 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) 5443 { 5444 if (!sclp.has_ibs) 5445 return; 5446 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); 5447 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); 5448 } 5449 5450 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) 5451 { 5452 int i, online_vcpus, r = 0, started_vcpus = 0; 5453 5454 if (!is_vcpu_stopped(vcpu)) 5455 return 0; 5456 5457 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); 5458 /* Only one cpu at a time may enter/leave the STOPPED state. */ 5459 spin_lock(&vcpu->kvm->arch.start_stop_lock); 5460 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 5461 5462 /* Let's tell the UV that we want to change into the operating state */ 5463 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5464 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); 5465 if (r) { 5466 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5467 return r; 5468 } 5469 } 5470 5471 for (i = 0; i < online_vcpus; i++) { 5472 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i))) 5473 started_vcpus++; 5474 } 5475 5476 if (started_vcpus == 0) { 5477 /* we're the only active VCPU -> speed it up */ 5478 __enable_ibs_on_vcpu(vcpu); 5479 } else if (started_vcpus == 1) { 5480 /* 5481 * As we are starting a second VCPU, we have to disable 5482 * the IBS facility on all VCPUs to remove potentially 5483 * outstanding ENABLE requests. 5484 */ 5485 __disable_ibs_on_all_vcpus(vcpu->kvm); 5486 } 5487 5488 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); 5489 /* 5490 * The real PSW might have changed due to a RESTART interpreted by the 5491 * ultravisor. We block all interrupts and let the next sie exit 5492 * refresh our view. 5493 */ 5494 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5495 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; 5496 /* 5497 * Another VCPU might have used IBS while we were offline. 5498 * Let's play safe and flush the VCPU at startup. 5499 */ 5500 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 5501 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5502 return 0; 5503 } 5504 5505 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) 5506 { 5507 int i, online_vcpus, r = 0, started_vcpus = 0; 5508 struct kvm_vcpu *started_vcpu = NULL; 5509 5510 if (is_vcpu_stopped(vcpu)) 5511 return 0; 5512 5513 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); 5514 /* Only one cpu at a time may enter/leave the STOPPED state. */ 5515 spin_lock(&vcpu->kvm->arch.start_stop_lock); 5516 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); 5517 5518 /* Let's tell the UV that we want to change into the stopped state */ 5519 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5520 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); 5521 if (r) { 5522 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5523 return r; 5524 } 5525 } 5526 5527 /* 5528 * Set the VCPU to STOPPED and THEN clear the interrupt flag, 5529 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders 5530 * have been fully processed. This will ensure that the VCPU 5531 * is kept BUSY if another VCPU is inquiring with SIGP SENSE. 5532 */ 5533 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); 5534 kvm_s390_clear_stop_irq(vcpu); 5535 5536 __disable_ibs_on_vcpu(vcpu); 5537 5538 for (i = 0; i < online_vcpus; i++) { 5539 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i); 5540 5541 if (!is_vcpu_stopped(tmp)) { 5542 started_vcpus++; 5543 started_vcpu = tmp; 5544 } 5545 } 5546 5547 if (started_vcpus == 1) { 5548 /* 5549 * As we only have one VCPU left, we want to enable the 5550 * IBS facility for that VCPU to speed it up. 5551 */ 5552 __enable_ibs_on_vcpu(started_vcpu); 5553 } 5554 5555 spin_unlock(&vcpu->kvm->arch.start_stop_lock); 5556 return 0; 5557 } 5558 5559 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 5560 struct kvm_enable_cap *cap) 5561 { 5562 int r; 5563 5564 if (cap->flags) 5565 return -EINVAL; 5566 5567 switch (cap->cap) { 5568 case KVM_CAP_S390_CSS_SUPPORT: 5569 if (!vcpu->kvm->arch.css_support) { 5570 vcpu->kvm->arch.css_support = 1; 5571 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); 5572 trace_kvm_s390_enable_css(vcpu->kvm); 5573 } 5574 r = 0; 5575 break; 5576 default: 5577 r = -EINVAL; 5578 break; 5579 } 5580 return r; 5581 } 5582 5583 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, 5584 struct kvm_s390_mem_op *mop) 5585 { 5586 void __user *uaddr = (void __user *)mop->buf; 5587 void *sida_addr; 5588 int r = 0; 5589 5590 if (mop->flags || !mop->size) 5591 return -EINVAL; 5592 if (mop->size + mop->sida_offset < mop->size) 5593 return -EINVAL; 5594 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) 5595 return -E2BIG; 5596 if (!kvm_s390_pv_cpu_is_protected(vcpu)) 5597 return -EINVAL; 5598 5599 sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset; 5600 5601 switch (mop->op) { 5602 case KVM_S390_MEMOP_SIDA_READ: 5603 if (copy_to_user(uaddr, sida_addr, mop->size)) 5604 r = -EFAULT; 5605 5606 break; 5607 case KVM_S390_MEMOP_SIDA_WRITE: 5608 if (copy_from_user(sida_addr, uaddr, mop->size)) 5609 r = -EFAULT; 5610 break; 5611 } 5612 return r; 5613 } 5614 5615 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, 5616 struct kvm_s390_mem_op *mop) 5617 { 5618 void __user *uaddr = (void __user *)mop->buf; 5619 enum gacc_mode acc_mode; 5620 void *tmpbuf = NULL; 5621 int r; 5622 5623 r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION | 5624 KVM_S390_MEMOP_F_CHECK_ONLY | 5625 KVM_S390_MEMOP_F_SKEY_PROTECTION); 5626 if (r) 5627 return r; 5628 if (mop->ar >= NUM_ACRS) 5629 return -EINVAL; 5630 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5631 return -EINVAL; 5632 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { 5633 tmpbuf = vmalloc(mop->size); 5634 if (!tmpbuf) 5635 return -ENOMEM; 5636 } 5637 5638 acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE; 5639 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { 5640 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, 5641 acc_mode, mop->key); 5642 goto out_inject; 5643 } 5644 if (acc_mode == GACC_FETCH) { 5645 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 5646 mop->size, mop->key); 5647 if (r) 5648 goto out_inject; 5649 if (copy_to_user(uaddr, tmpbuf, mop->size)) { 5650 r = -EFAULT; 5651 goto out_free; 5652 } 5653 } else { 5654 if (copy_from_user(tmpbuf, uaddr, mop->size)) { 5655 r = -EFAULT; 5656 goto out_free; 5657 } 5658 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, 5659 mop->size, mop->key); 5660 } 5661 5662 out_inject: 5663 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) 5664 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); 5665 5666 out_free: 5667 vfree(tmpbuf); 5668 return r; 5669 } 5670 5671 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, 5672 struct kvm_s390_mem_op *mop) 5673 { 5674 int r, srcu_idx; 5675 5676 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5677 5678 switch (mop->op) { 5679 case KVM_S390_MEMOP_LOGICAL_READ: 5680 case KVM_S390_MEMOP_LOGICAL_WRITE: 5681 r = kvm_s390_vcpu_mem_op(vcpu, mop); 5682 break; 5683 case KVM_S390_MEMOP_SIDA_READ: 5684 case KVM_S390_MEMOP_SIDA_WRITE: 5685 /* we are locked against sida going away by the vcpu->mutex */ 5686 r = kvm_s390_vcpu_sida_op(vcpu, mop); 5687 break; 5688 default: 5689 r = -EINVAL; 5690 } 5691 5692 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 5693 return r; 5694 } 5695 5696 long kvm_arch_vcpu_async_ioctl(struct file *filp, 5697 unsigned int ioctl, unsigned long arg) 5698 { 5699 struct kvm_vcpu *vcpu = filp->private_data; 5700 void __user *argp = (void __user *)arg; 5701 int rc; 5702 5703 switch (ioctl) { 5704 case KVM_S390_IRQ: { 5705 struct kvm_s390_irq s390irq; 5706 5707 if (copy_from_user(&s390irq, argp, sizeof(s390irq))) 5708 return -EFAULT; 5709 rc = kvm_s390_inject_vcpu(vcpu, &s390irq); 5710 break; 5711 } 5712 case KVM_S390_INTERRUPT: { 5713 struct kvm_s390_interrupt s390int; 5714 struct kvm_s390_irq s390irq = {}; 5715 5716 if (copy_from_user(&s390int, argp, sizeof(s390int))) 5717 return -EFAULT; 5718 if (s390int_to_s390irq(&s390int, &s390irq)) 5719 return -EINVAL; 5720 rc = kvm_s390_inject_vcpu(vcpu, &s390irq); 5721 break; 5722 } 5723 default: 5724 rc = -ENOIOCTLCMD; 5725 break; 5726 } 5727 5728 /* 5729 * To simplify single stepping of userspace-emulated instructions, 5730 * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see 5731 * should_handle_per_ifetch()). However, if userspace emulation injects 5732 * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens 5733 * after (and not before) the interrupt delivery. 5734 */ 5735 if (!rc) 5736 vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING; 5737 5738 return rc; 5739 } 5740 5741 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu, 5742 struct kvm_pv_cmd *cmd) 5743 { 5744 struct kvm_s390_pv_dmp dmp; 5745 void *data; 5746 int ret; 5747 5748 /* Dump initialization is a prerequisite */ 5749 if (!vcpu->kvm->arch.pv.dumping) 5750 return -EINVAL; 5751 5752 if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp))) 5753 return -EFAULT; 5754 5755 /* We only handle this subcmd right now */ 5756 if (dmp.subcmd != KVM_PV_DUMP_CPU) 5757 return -EINVAL; 5758 5759 /* CPU dump length is the same as create cpu storage donation. */ 5760 if (dmp.buff_len != uv_info.guest_cpu_stor_len) 5761 return -EINVAL; 5762 5763 data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL); 5764 if (!data) 5765 return -ENOMEM; 5766 5767 ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc); 5768 5769 VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x", 5770 vcpu->vcpu_id, cmd->rc, cmd->rrc); 5771 5772 if (ret) 5773 ret = -EINVAL; 5774 5775 /* On success copy over the dump data */ 5776 if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len)) 5777 ret = -EFAULT; 5778 5779 kvfree(data); 5780 return ret; 5781 } 5782 5783 long kvm_arch_vcpu_ioctl(struct file *filp, 5784 unsigned int ioctl, unsigned long arg) 5785 { 5786 struct kvm_vcpu *vcpu = filp->private_data; 5787 void __user *argp = (void __user *)arg; 5788 int idx; 5789 long r; 5790 u16 rc, rrc; 5791 5792 vcpu_load(vcpu); 5793 5794 switch (ioctl) { 5795 case KVM_S390_STORE_STATUS: 5796 idx = srcu_read_lock(&vcpu->kvm->srcu); 5797 r = kvm_s390_store_status_unloaded(vcpu, arg); 5798 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5799 break; 5800 case KVM_S390_SET_INITIAL_PSW: { 5801 psw_t psw; 5802 5803 r = -EFAULT; 5804 if (copy_from_user(&psw, argp, sizeof(psw))) 5805 break; 5806 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 5807 break; 5808 } 5809 case KVM_S390_CLEAR_RESET: 5810 r = 0; 5811 kvm_arch_vcpu_ioctl_clear_reset(vcpu); 5812 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5813 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5814 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); 5815 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", 5816 rc, rrc); 5817 } 5818 break; 5819 case KVM_S390_INITIAL_RESET: 5820 r = 0; 5821 kvm_arch_vcpu_ioctl_initial_reset(vcpu); 5822 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5823 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5824 UVC_CMD_CPU_RESET_INITIAL, 5825 &rc, &rrc); 5826 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", 5827 rc, rrc); 5828 } 5829 break; 5830 case KVM_S390_NORMAL_RESET: 5831 r = 0; 5832 kvm_arch_vcpu_ioctl_normal_reset(vcpu); 5833 if (kvm_s390_pv_cpu_is_protected(vcpu)) { 5834 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), 5835 UVC_CMD_CPU_RESET, &rc, &rrc); 5836 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", 5837 rc, rrc); 5838 } 5839 break; 5840 case KVM_SET_ONE_REG: 5841 case KVM_GET_ONE_REG: { 5842 struct kvm_one_reg reg; 5843 r = -EINVAL; 5844 if (kvm_s390_pv_cpu_is_protected(vcpu)) 5845 break; 5846 r = -EFAULT; 5847 if (copy_from_user(®, argp, sizeof(reg))) 5848 break; 5849 if (ioctl == KVM_SET_ONE_REG) 5850 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 5851 else 5852 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 5853 break; 5854 } 5855 #ifdef CONFIG_KVM_S390_UCONTROL 5856 case KVM_S390_UCAS_MAP: { 5857 struct kvm_s390_ucas_mapping ucasmap; 5858 5859 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5860 r = -EFAULT; 5861 break; 5862 } 5863 5864 if (!kvm_is_ucontrol(vcpu->kvm)) { 5865 r = -EINVAL; 5866 break; 5867 } 5868 5869 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 5870 ucasmap.vcpu_addr, ucasmap.length); 5871 break; 5872 } 5873 case KVM_S390_UCAS_UNMAP: { 5874 struct kvm_s390_ucas_mapping ucasmap; 5875 5876 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 5877 r = -EFAULT; 5878 break; 5879 } 5880 5881 if (!kvm_is_ucontrol(vcpu->kvm)) { 5882 r = -EINVAL; 5883 break; 5884 } 5885 5886 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 5887 ucasmap.length); 5888 break; 5889 } 5890 #endif 5891 case KVM_S390_VCPU_FAULT: { 5892 idx = srcu_read_lock(&vcpu->kvm->srcu); 5893 r = vcpu_dat_fault_handler(vcpu, arg, 0); 5894 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5895 break; 5896 } 5897 case KVM_ENABLE_CAP: 5898 { 5899 struct kvm_enable_cap cap; 5900 r = -EFAULT; 5901 if (copy_from_user(&cap, argp, sizeof(cap))) 5902 break; 5903 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 5904 break; 5905 } 5906 case KVM_S390_MEM_OP: { 5907 struct kvm_s390_mem_op mem_op; 5908 5909 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) 5910 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); 5911 else 5912 r = -EFAULT; 5913 break; 5914 } 5915 case KVM_S390_SET_IRQ_STATE: { 5916 struct kvm_s390_irq_state irq_state; 5917 5918 r = -EFAULT; 5919 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5920 break; 5921 if (irq_state.len > VCPU_IRQS_MAX_BUF || 5922 irq_state.len == 0 || 5923 irq_state.len % sizeof(struct kvm_s390_irq) > 0) { 5924 r = -EINVAL; 5925 break; 5926 } 5927 /* do not use irq_state.flags, it will break old QEMUs */ 5928 r = kvm_s390_set_irq_state(vcpu, 5929 (void __user *) irq_state.buf, 5930 irq_state.len); 5931 break; 5932 } 5933 case KVM_S390_GET_IRQ_STATE: { 5934 struct kvm_s390_irq_state irq_state; 5935 5936 r = -EFAULT; 5937 if (copy_from_user(&irq_state, argp, sizeof(irq_state))) 5938 break; 5939 if (irq_state.len == 0) { 5940 r = -EINVAL; 5941 break; 5942 } 5943 /* do not use irq_state.flags, it will break old QEMUs */ 5944 r = kvm_s390_get_irq_state(vcpu, 5945 (__u8 __user *) irq_state.buf, 5946 irq_state.len); 5947 break; 5948 } 5949 case KVM_S390_PV_CPU_COMMAND: { 5950 struct kvm_pv_cmd cmd; 5951 5952 r = -EINVAL; 5953 if (!is_prot_virt_host()) 5954 break; 5955 5956 r = -EFAULT; 5957 if (copy_from_user(&cmd, argp, sizeof(cmd))) 5958 break; 5959 5960 r = -EINVAL; 5961 if (cmd.flags) 5962 break; 5963 5964 /* We only handle this cmd right now */ 5965 if (cmd.cmd != KVM_PV_DUMP) 5966 break; 5967 5968 r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd); 5969 5970 /* Always copy over UV rc / rrc data */ 5971 if (copy_to_user((__u8 __user *)argp, &cmd.rc, 5972 sizeof(cmd.rc) + sizeof(cmd.rrc))) 5973 r = -EFAULT; 5974 break; 5975 } 5976 default: 5977 r = -ENOTTY; 5978 } 5979 5980 vcpu_put(vcpu); 5981 return r; 5982 } 5983 5984 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 5985 { 5986 #ifdef CONFIG_KVM_S390_UCONTROL 5987 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 5988 && (kvm_is_ucontrol(vcpu->kvm))) { 5989 vmf->page = virt_to_page(vcpu->arch.sie_block); 5990 get_page(vmf->page); 5991 return 0; 5992 } 5993 #endif 5994 return VM_FAULT_SIGBUS; 5995 } 5996 5997 bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) 5998 { 5999 return true; 6000 } 6001 6002 /* Section: memory related */ 6003 int kvm_arch_prepare_memory_region(struct kvm *kvm, 6004 const struct kvm_memory_slot *old, 6005 struct kvm_memory_slot *new, 6006 enum kvm_mr_change change) 6007 { 6008 gpa_t size; 6009 6010 if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS) 6011 return -EINVAL; 6012 6013 /* When we are protected, we should not change the memory slots */ 6014 if (kvm_s390_pv_get_handle(kvm)) 6015 return -EINVAL; 6016 6017 if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) { 6018 /* 6019 * A few sanity checks. We can have memory slots which have to be 6020 * located/ended at a segment boundary (1MB). The memory in userland is 6021 * ok to be fragmented into various different vmas. It is okay to mmap() 6022 * and munmap() stuff in this slot after doing this call at any time 6023 */ 6024 6025 if (new->userspace_addr & 0xffffful) 6026 return -EINVAL; 6027 6028 size = new->npages * PAGE_SIZE; 6029 if (size & 0xffffful) 6030 return -EINVAL; 6031 6032 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) 6033 return -EINVAL; 6034 } 6035 6036 if (!kvm->arch.migration_mode) 6037 return 0; 6038 6039 /* 6040 * Turn off migration mode when: 6041 * - userspace creates a new memslot with dirty logging off, 6042 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and 6043 * dirty logging is turned off. 6044 * Migration mode expects dirty page logging being enabled to store 6045 * its dirty bitmap. 6046 */ 6047 if (change != KVM_MR_DELETE && 6048 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) 6049 WARN(kvm_s390_vm_stop_migration(kvm), 6050 "Failed to stop migration mode"); 6051 6052 return 0; 6053 } 6054 6055 void kvm_arch_commit_memory_region(struct kvm *kvm, 6056 struct kvm_memory_slot *old, 6057 const struct kvm_memory_slot *new, 6058 enum kvm_mr_change change) 6059 { 6060 int rc = 0; 6061 6062 if (kvm_is_ucontrol(kvm)) 6063 return; 6064 6065 switch (change) { 6066 case KVM_MR_DELETE: 6067 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 6068 old->npages * PAGE_SIZE); 6069 break; 6070 case KVM_MR_MOVE: 6071 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, 6072 old->npages * PAGE_SIZE); 6073 if (rc) 6074 break; 6075 fallthrough; 6076 case KVM_MR_CREATE: 6077 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr, 6078 new->base_gfn * PAGE_SIZE, 6079 new->npages * PAGE_SIZE); 6080 break; 6081 case KVM_MR_FLAGS_ONLY: 6082 break; 6083 default: 6084 WARN(1, "Unknown KVM MR CHANGE: %d\n", change); 6085 } 6086 if (rc) 6087 pr_warn("failed to commit memory region\n"); 6088 return; 6089 } 6090 6091 static inline unsigned long nonhyp_mask(int i) 6092 { 6093 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; 6094 6095 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); 6096 } 6097 6098 static int __init kvm_s390_init(void) 6099 { 6100 int i, r; 6101 6102 if (!sclp.has_sief2) { 6103 pr_info("SIE is not available\n"); 6104 return -ENODEV; 6105 } 6106 6107 if (nested && hpage) { 6108 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); 6109 return -EINVAL; 6110 } 6111 6112 for (i = 0; i < 16; i++) 6113 kvm_s390_fac_base[i] |= 6114 stfle_fac_list[i] & nonhyp_mask(i); 6115 6116 r = __kvm_s390_init(); 6117 if (r) 6118 return r; 6119 6120 r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); 6121 if (r) { 6122 __kvm_s390_exit(); 6123 return r; 6124 } 6125 return 0; 6126 } 6127 6128 static void __exit kvm_s390_exit(void) 6129 { 6130 kvm_exit(); 6131 6132 __kvm_s390_exit(); 6133 } 6134 6135 module_init(kvm_s390_init); 6136 module_exit(kvm_s390_exit); 6137 6138 /* 6139 * Enable autoloading of the kvm module. 6140 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 6141 * since x86 takes a different approach. 6142 */ 6143 #include <linux/miscdevice.h> 6144 MODULE_ALIAS_MISCDEV(KVM_MINOR); 6145 MODULE_ALIAS("devname:kvm"); 6146