1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Hosting Protected Virtual Machines
4 *
5 * Copyright IBM Corp. 2019, 2020
6 * Author(s): Janosch Frank <frankja@linux.ibm.com>
7 */
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/minmax.h>
11 #include <linux/pagemap.h>
12 #include <linux/sched/signal.h>
13 #include <asm/gmap.h>
14 #include <asm/uv.h>
15 #include <asm/mman.h>
16 #include <linux/pagewalk.h>
17 #include <linux/sched/mm.h>
18 #include <linux/mmu_notifier.h>
19 #include "kvm-s390.h"
20
kvm_s390_pv_is_protected(struct kvm * kvm)21 bool kvm_s390_pv_is_protected(struct kvm *kvm)
22 {
23 lockdep_assert_held(&kvm->lock);
24 return !!kvm_s390_pv_get_handle(kvm);
25 }
26 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
27
kvm_s390_pv_cpu_is_protected(struct kvm_vcpu * vcpu)28 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
29 {
30 lockdep_assert_held(&vcpu->mutex);
31 return !!kvm_s390_pv_cpu_get_handle(vcpu);
32 }
33 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
34
35 /**
36 * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
37 * be destroyed
38 *
39 * @list: list head for the list of leftover VMs
40 * @old_gmap_table: the gmap table of the leftover protected VM
41 * @handle: the handle of the leftover protected VM
42 * @stor_var: pointer to the variable storage of the leftover protected VM
43 * @stor_base: address of the base storage of the leftover protected VM
44 *
45 * Represents a protected VM that is still registered with the Ultravisor,
46 * but which does not correspond any longer to an active KVM VM. It should
47 * be destroyed at some point later, either asynchronously or when the
48 * process terminates.
49 */
50 struct pv_vm_to_be_destroyed {
51 struct list_head list;
52 unsigned long old_gmap_table;
53 u64 handle;
54 void *stor_var;
55 unsigned long stor_base;
56 };
57
kvm_s390_clear_pv_state(struct kvm * kvm)58 static void kvm_s390_clear_pv_state(struct kvm *kvm)
59 {
60 kvm->arch.pv.handle = 0;
61 kvm->arch.pv.guest_len = 0;
62 kvm->arch.pv.stor_base = 0;
63 kvm->arch.pv.stor_var = NULL;
64 }
65
kvm_s390_pv_destroy_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)66 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
67 {
68 int cc;
69
70 if (!kvm_s390_pv_cpu_get_handle(vcpu))
71 return 0;
72
73 cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
74
75 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
76 vcpu->vcpu_id, *rc, *rrc);
77 WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
78
79 /* Intended memory leak for something that should never happen. */
80 if (!cc)
81 free_pages(vcpu->arch.pv.stor_base,
82 get_order(uv_info.guest_cpu_stor_len));
83
84 free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
85 vcpu->arch.sie_block->pv_handle_cpu = 0;
86 vcpu->arch.sie_block->pv_handle_config = 0;
87 memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
88 vcpu->arch.sie_block->sdf = 0;
89 /*
90 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
91 * Use the reset value of gbea to avoid leaking the kernel pointer of
92 * the just freed sida.
93 */
94 vcpu->arch.sie_block->gbea = 1;
95 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
96
97 return cc ? EIO : 0;
98 }
99
kvm_s390_pv_create_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)100 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
101 {
102 struct uv_cb_csc uvcb = {
103 .header.cmd = UVC_CMD_CREATE_SEC_CPU,
104 .header.len = sizeof(uvcb),
105 };
106 void *sida_addr;
107 int cc;
108
109 if (kvm_s390_pv_cpu_get_handle(vcpu))
110 return -EINVAL;
111
112 vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
113 get_order(uv_info.guest_cpu_stor_len));
114 if (!vcpu->arch.pv.stor_base)
115 return -ENOMEM;
116
117 /* Input */
118 uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
119 uvcb.num = vcpu->arch.sie_block->icpua;
120 uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
121 uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
122
123 /* Alloc Secure Instruction Data Area Designation */
124 sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
125 if (!sida_addr) {
126 free_pages(vcpu->arch.pv.stor_base,
127 get_order(uv_info.guest_cpu_stor_len));
128 return -ENOMEM;
129 }
130 vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
131
132 cc = uv_call(0, (u64)&uvcb);
133 *rc = uvcb.header.rc;
134 *rrc = uvcb.header.rrc;
135 KVM_UV_EVENT(vcpu->kvm, 3,
136 "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
137 vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
138 uvcb.header.rrc);
139
140 if (cc) {
141 u16 dummy;
142
143 kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
144 return -EIO;
145 }
146
147 /* Output */
148 vcpu->arch.pv.handle = uvcb.cpu_handle;
149 vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
150 vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
151 vcpu->arch.sie_block->sdf = 2;
152 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
153 return 0;
154 }
155
156 /* only free resources when the destroy was successful */
kvm_s390_pv_dealloc_vm(struct kvm * kvm)157 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
158 {
159 vfree(kvm->arch.pv.stor_var);
160 free_pages(kvm->arch.pv.stor_base,
161 get_order(uv_info.guest_base_stor_len));
162 kvm_s390_clear_pv_state(kvm);
163 }
164
kvm_s390_pv_alloc_vm(struct kvm * kvm)165 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
166 {
167 unsigned long base = uv_info.guest_base_stor_len;
168 unsigned long virt = uv_info.guest_virt_var_stor_len;
169 unsigned long npages = 0, vlen = 0;
170
171 kvm->arch.pv.stor_var = NULL;
172 kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
173 if (!kvm->arch.pv.stor_base)
174 return -ENOMEM;
175
176 /*
177 * Calculate current guest storage for allocation of the
178 * variable storage, which is based on the length in MB.
179 *
180 * Slots are sorted by GFN
181 */
182 mutex_lock(&kvm->slots_lock);
183 npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
184 mutex_unlock(&kvm->slots_lock);
185
186 kvm->arch.pv.guest_len = npages * PAGE_SIZE;
187
188 /* Allocate variable storage */
189 vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
190 vlen += uv_info.guest_virt_base_stor_len;
191 kvm->arch.pv.stor_var = vzalloc(vlen);
192 if (!kvm->arch.pv.stor_var)
193 goto out_err;
194 return 0;
195
196 out_err:
197 kvm_s390_pv_dealloc_vm(kvm);
198 return -ENOMEM;
199 }
200
201 /**
202 * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
203 * @kvm: the KVM that was associated with this leftover protected VM
204 * @leftover: details about the leftover protected VM that needs a clean up
205 * @rc: the RC code of the Destroy Secure Configuration UVC
206 * @rrc: the RRC code of the Destroy Secure Configuration UVC
207 *
208 * Destroy one leftover protected VM.
209 * On success, kvm->mm->context.protected_count will be decremented atomically
210 * and all other resources used by the VM will be freed.
211 *
212 * Return: 0 in case of success, otherwise 1
213 */
kvm_s390_pv_dispose_one_leftover(struct kvm * kvm,struct pv_vm_to_be_destroyed * leftover,u16 * rc,u16 * rrc)214 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
215 struct pv_vm_to_be_destroyed *leftover,
216 u16 *rc, u16 *rrc)
217 {
218 int cc;
219
220 /* It used the destroy-fast UVC, nothing left to do here */
221 if (!leftover->handle)
222 goto done_fast;
223 cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
224 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
225 WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
226 if (cc)
227 return cc;
228 /*
229 * Intentionally leak unusable memory. If the UVC fails, the memory
230 * used for the VM and its metadata is permanently unusable.
231 * This can only happen in case of a serious KVM or hardware bug; it
232 * is not expected to happen in normal operation.
233 */
234 free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
235 free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
236 vfree(leftover->stor_var);
237 done_fast:
238 atomic_dec(&kvm->mm->context.protected_count);
239 return 0;
240 }
241
242 /**
243 * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
244 * @kvm: the VM whose memory is to be cleared.
245 *
246 * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
247 * The CPUs of the protected VM need to be destroyed beforehand.
248 */
kvm_s390_destroy_lower_2g(struct kvm * kvm)249 static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
250 {
251 const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
252 struct kvm_memory_slot *slot;
253 unsigned long len;
254 int srcu_idx;
255
256 srcu_idx = srcu_read_lock(&kvm->srcu);
257
258 /* Take the memslot containing guest absolute address 0 */
259 slot = gfn_to_memslot(kvm, 0);
260 /* Clear all slots or parts thereof that are below 2GB */
261 while (slot && slot->base_gfn < pages_2g) {
262 len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
263 s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
264 /* Take the next memslot */
265 slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
266 }
267
268 srcu_read_unlock(&kvm->srcu, srcu_idx);
269 }
270
kvm_s390_pv_deinit_vm_fast(struct kvm * kvm,u16 * rc,u16 * rrc)271 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
272 {
273 struct uv_cb_destroy_fast uvcb = {
274 .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
275 .header.len = sizeof(uvcb),
276 .handle = kvm_s390_pv_get_handle(kvm),
277 };
278 int cc;
279
280 cc = uv_call_sched(0, (u64)&uvcb);
281 if (rc)
282 *rc = uvcb.header.rc;
283 if (rrc)
284 *rrc = uvcb.header.rrc;
285 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
286 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
287 uvcb.header.rc, uvcb.header.rrc);
288 WARN_ONCE(cc && uvcb.header.rc != 0x104,
289 "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
290 kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
291 /* Intended memory leak on "impossible" error */
292 if (!cc)
293 kvm_s390_pv_dealloc_vm(kvm);
294 return cc ? -EIO : 0;
295 }
296
is_destroy_fast_available(void)297 static inline bool is_destroy_fast_available(void)
298 {
299 return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
300 }
301
302 /**
303 * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
304 * @kvm: the VM
305 * @rc: return value for the RC field of the UVCB
306 * @rrc: return value for the RRC field of the UVCB
307 *
308 * Set aside the protected VM for a subsequent teardown. The VM will be able
309 * to continue immediately as a non-secure VM, and the information needed to
310 * properly tear down the protected VM is set aside. If another protected VM
311 * was already set aside without starting its teardown, this function will
312 * fail.
313 * The CPUs of the protected VM need to be destroyed beforehand.
314 *
315 * Context: kvm->lock needs to be held
316 *
317 * Return: 0 in case of success, -EINVAL if another protected VM was already set
318 * aside, -ENOMEM if the system ran out of memory.
319 */
kvm_s390_pv_set_aside(struct kvm * kvm,u16 * rc,u16 * rrc)320 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
321 {
322 struct pv_vm_to_be_destroyed *priv;
323 int res = 0;
324
325 lockdep_assert_held(&kvm->lock);
326 /*
327 * If another protected VM was already prepared for teardown, refuse.
328 * A normal deinitialization has to be performed instead.
329 */
330 if (kvm->arch.pv.set_aside)
331 return -EINVAL;
332
333 /* Guest with segment type ASCE, refuse to destroy asynchronously */
334 if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
335 return -EINVAL;
336
337 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
338 if (!priv)
339 return -ENOMEM;
340
341 if (is_destroy_fast_available()) {
342 res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
343 } else {
344 priv->stor_var = kvm->arch.pv.stor_var;
345 priv->stor_base = kvm->arch.pv.stor_base;
346 priv->handle = kvm_s390_pv_get_handle(kvm);
347 priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
348 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
349 if (s390_replace_asce(kvm->arch.gmap))
350 res = -ENOMEM;
351 }
352
353 if (res) {
354 kfree(priv);
355 return res;
356 }
357
358 kvm_s390_destroy_lower_2g(kvm);
359 kvm_s390_clear_pv_state(kvm);
360 kvm->arch.pv.set_aside = priv;
361
362 *rc = UVC_RC_EXECUTED;
363 *rrc = 42;
364 return 0;
365 }
366
367 /**
368 * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
369 * @kvm: the KVM whose protected VM needs to be deinitialized
370 * @rc: the RC code of the UVC
371 * @rrc: the RRC code of the UVC
372 *
373 * Deinitialize the current protected VM. This function will destroy and
374 * cleanup the current protected VM, but it will not cleanup the guest
375 * memory. This function should only be called when the protected VM has
376 * just been created and therefore does not have any guest memory, or when
377 * the caller cleans up the guest memory separately.
378 *
379 * This function should not fail, but if it does, the donated memory must
380 * not be freed.
381 *
382 * Context: kvm->lock needs to be held
383 *
384 * Return: 0 in case of success, otherwise -EIO
385 */
kvm_s390_pv_deinit_vm(struct kvm * kvm,u16 * rc,u16 * rrc)386 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
387 {
388 int cc;
389
390 cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
391 UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
392 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
393 if (!cc) {
394 atomic_dec(&kvm->mm->context.protected_count);
395 kvm_s390_pv_dealloc_vm(kvm);
396 } else {
397 /* Intended memory leak on "impossible" error */
398 s390_replace_asce(kvm->arch.gmap);
399 }
400 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
401 WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
402
403 return cc ? -EIO : 0;
404 }
405
406 /**
407 * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
408 * with a specific KVM.
409 * @kvm: the KVM to be cleaned up
410 * @rc: the RC code of the first failing UVC
411 * @rrc: the RRC code of the first failing UVC
412 *
413 * This function will clean up all protected VMs associated with a KVM.
414 * This includes the active one, the one prepared for deinitialization with
415 * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
416 *
417 * Context: kvm->lock needs to be held unless being called from
418 * kvm_arch_destroy_vm.
419 *
420 * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
421 */
kvm_s390_pv_deinit_cleanup_all(struct kvm * kvm,u16 * rc,u16 * rrc)422 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
423 {
424 struct pv_vm_to_be_destroyed *cur;
425 bool need_zap = false;
426 u16 _rc, _rrc;
427 int cc = 0;
428
429 /*
430 * Nothing to do if the counter was already 0. Otherwise make sure
431 * the counter does not reach 0 before calling s390_uv_destroy_range.
432 */
433 if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
434 return 0;
435
436 *rc = 1;
437 /* If the current VM is protected, destroy it */
438 if (kvm_s390_pv_get_handle(kvm)) {
439 cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
440 need_zap = true;
441 }
442
443 /* If a previous protected VM was set aside, put it in the need_cleanup list */
444 if (kvm->arch.pv.set_aside) {
445 list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
446 kvm->arch.pv.set_aside = NULL;
447 }
448
449 /* Cleanup all protected VMs in the need_cleanup list */
450 while (!list_empty(&kvm->arch.pv.need_cleanup)) {
451 cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
452 need_zap = true;
453 if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
454 cc = 1;
455 /*
456 * Only return the first error rc and rrc, so make
457 * sure it is not overwritten. All destroys will
458 * additionally be reported via KVM_UV_EVENT().
459 */
460 if (*rc == UVC_RC_EXECUTED) {
461 *rc = _rc;
462 *rrc = _rrc;
463 }
464 }
465 list_del(&cur->list);
466 kfree(cur);
467 }
468
469 /*
470 * If the mm still has a mapping, try to mark all its pages as
471 * accessible. The counter should not reach zero before this
472 * cleanup has been performed.
473 */
474 if (need_zap && mmget_not_zero(kvm->mm)) {
475 s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
476 mmput(kvm->mm);
477 }
478
479 /* Now the counter can safely reach 0 */
480 atomic_dec(&kvm->mm->context.protected_count);
481 return cc ? -EIO : 0;
482 }
483
484 /**
485 * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
486 * @kvm: the VM previously associated with the protected VM
487 * @rc: return value for the RC field of the UVCB
488 * @rrc: return value for the RRC field of the UVCB
489 *
490 * Tear down the protected VM that had been previously prepared for teardown
491 * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
492 * userspace asynchronously from a separate thread.
493 *
494 * Context: kvm->lock must not be held.
495 *
496 * Return: 0 in case of success, -EINVAL if no protected VM had been
497 * prepared for asynchronous teardowm, -EIO in case of other errors.
498 */
kvm_s390_pv_deinit_aside_vm(struct kvm * kvm,u16 * rc,u16 * rrc)499 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
500 {
501 struct pv_vm_to_be_destroyed *p;
502 int ret = 0;
503
504 lockdep_assert_not_held(&kvm->lock);
505 mutex_lock(&kvm->lock);
506 p = kvm->arch.pv.set_aside;
507 kvm->arch.pv.set_aside = NULL;
508 mutex_unlock(&kvm->lock);
509 if (!p)
510 return -EINVAL;
511
512 /* When a fatal signal is received, stop immediately */
513 if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
514 goto done;
515 if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
516 ret = -EIO;
517 kfree(p);
518 p = NULL;
519 done:
520 /*
521 * p is not NULL if we aborted because of a fatal signal, in which
522 * case queue the leftover for later cleanup.
523 */
524 if (p) {
525 mutex_lock(&kvm->lock);
526 list_add(&p->list, &kvm->arch.pv.need_cleanup);
527 mutex_unlock(&kvm->lock);
528 /* Did not finish, but pretend things went well */
529 *rc = UVC_RC_EXECUTED;
530 *rrc = 42;
531 }
532 return ret;
533 }
534
kvm_s390_pv_mmu_notifier_release(struct mmu_notifier * subscription,struct mm_struct * mm)535 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
536 struct mm_struct *mm)
537 {
538 struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
539 u16 dummy;
540 int r;
541
542 /*
543 * No locking is needed since this is the last thread of the last user of this
544 * struct mm.
545 * When the struct kvm gets deinitialized, this notifier is also
546 * unregistered. This means that if this notifier runs, then the
547 * struct kvm is still valid.
548 */
549 r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
550 if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
551 kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
552 }
553
554 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
555 .release = kvm_s390_pv_mmu_notifier_release,
556 };
557
kvm_s390_pv_init_vm(struct kvm * kvm,u16 * rc,u16 * rrc)558 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
559 {
560 struct uv_cb_cgc uvcb = {
561 .header.cmd = UVC_CMD_CREATE_SEC_CONF,
562 .header.len = sizeof(uvcb)
563 };
564 int cc, ret;
565 u16 dummy;
566
567 ret = kvm_s390_pv_alloc_vm(kvm);
568 if (ret)
569 return ret;
570
571 /* Inputs */
572 uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
573 uvcb.guest_stor_len = kvm->arch.pv.guest_len;
574 uvcb.guest_asce = kvm->arch.gmap->asce;
575 uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
576 uvcb.conf_base_stor_origin =
577 virt_to_phys((void *)kvm->arch.pv.stor_base);
578 uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
579 uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
580 uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
581
582 cc = uv_call_sched(0, (u64)&uvcb);
583 *rc = uvcb.header.rc;
584 *rrc = uvcb.header.rrc;
585 KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
586 uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
587
588 /* Outputs */
589 kvm->arch.pv.handle = uvcb.guest_handle;
590
591 atomic_inc(&kvm->mm->context.protected_count);
592 if (cc) {
593 if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
594 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
595 } else {
596 atomic_dec(&kvm->mm->context.protected_count);
597 kvm_s390_pv_dealloc_vm(kvm);
598 }
599 return -EIO;
600 }
601 kvm->arch.gmap->guest_handle = uvcb.guest_handle;
602 /* Add the notifier only once. No races because we hold kvm->lock */
603 if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
604 kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
605 mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
606 }
607 return 0;
608 }
609
kvm_s390_pv_set_sec_parms(struct kvm * kvm,void * hdr,u64 length,u16 * rc,u16 * rrc)610 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
611 u16 *rrc)
612 {
613 struct uv_cb_ssc uvcb = {
614 .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
615 .header.len = sizeof(uvcb),
616 .sec_header_origin = (u64)hdr,
617 .sec_header_len = length,
618 .guest_handle = kvm_s390_pv_get_handle(kvm),
619 };
620 int cc = uv_call(0, (u64)&uvcb);
621
622 *rc = uvcb.header.rc;
623 *rrc = uvcb.header.rrc;
624 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
625 *rc, *rrc);
626 return cc ? -EINVAL : 0;
627 }
628
unpack_one(struct kvm * kvm,unsigned long addr,u64 tweak,u64 offset,u16 * rc,u16 * rrc)629 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
630 u64 offset, u16 *rc, u16 *rrc)
631 {
632 struct uv_cb_unp uvcb = {
633 .header.cmd = UVC_CMD_UNPACK_IMG,
634 .header.len = sizeof(uvcb),
635 .guest_handle = kvm_s390_pv_get_handle(kvm),
636 .gaddr = addr,
637 .tweak[0] = tweak,
638 .tweak[1] = offset,
639 };
640 int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
641
642 *rc = uvcb.header.rc;
643 *rrc = uvcb.header.rrc;
644
645 if (ret && ret != -EAGAIN)
646 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
647 uvcb.gaddr, *rc, *rrc);
648 return ret;
649 }
650
kvm_s390_pv_unpack(struct kvm * kvm,unsigned long addr,unsigned long size,unsigned long tweak,u16 * rc,u16 * rrc)651 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
652 unsigned long tweak, u16 *rc, u16 *rrc)
653 {
654 u64 offset = 0;
655 int ret = 0;
656
657 if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
658 return -EINVAL;
659
660 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
661 addr, size);
662
663 while (offset < size) {
664 ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
665 if (ret == -EAGAIN) {
666 cond_resched();
667 if (fatal_signal_pending(current))
668 break;
669 continue;
670 }
671 if (ret)
672 break;
673 addr += PAGE_SIZE;
674 offset += PAGE_SIZE;
675 }
676 if (!ret)
677 KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
678 return ret;
679 }
680
kvm_s390_pv_set_cpu_state(struct kvm_vcpu * vcpu,u8 state)681 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
682 {
683 struct uv_cb_cpu_set_state uvcb = {
684 .header.cmd = UVC_CMD_CPU_SET_STATE,
685 .header.len = sizeof(uvcb),
686 .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
687 .state = state,
688 };
689 int cc;
690
691 cc = uv_call(0, (u64)&uvcb);
692 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
693 vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
694 if (cc)
695 return -EINVAL;
696 return 0;
697 }
698
kvm_s390_pv_dump_cpu(struct kvm_vcpu * vcpu,void * buff,u16 * rc,u16 * rrc)699 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
700 {
701 struct uv_cb_dump_cpu uvcb = {
702 .header.cmd = UVC_CMD_DUMP_CPU,
703 .header.len = sizeof(uvcb),
704 .cpu_handle = vcpu->arch.pv.handle,
705 .dump_area_origin = (u64)buff,
706 };
707 int cc;
708
709 cc = uv_call_sched(0, (u64)&uvcb);
710 *rc = uvcb.header.rc;
711 *rrc = uvcb.header.rrc;
712 return cc;
713 }
714
715 /* Size of the cache for the storage state dump data. 1MB for now */
716 #define DUMP_BUFF_LEN HPAGE_SIZE
717
718 /**
719 * kvm_s390_pv_dump_stor_state
720 *
721 * @kvm: pointer to the guest's KVM struct
722 * @buff_user: Userspace pointer where we will write the results to
723 * @gaddr: Starting absolute guest address for which the storage state
724 * is requested.
725 * @buff_user_len: Length of the buff_user buffer
726 * @rc: Pointer to where the uvcb return code is stored
727 * @rrc: Pointer to where the uvcb return reason code is stored
728 *
729 * Stores buff_len bytes of tweak component values to buff_user
730 * starting with the 1MB block specified by the absolute guest address
731 * (gaddr). The gaddr pointer will be updated with the last address
732 * for which data was written when returning to userspace. buff_user
733 * might be written to even if an error rc is returned. For instance
734 * if we encounter a fault after writing the first page of data.
735 *
736 * Context: kvm->lock needs to be held
737 *
738 * Return:
739 * 0 on success
740 * -ENOMEM if allocating the cache fails
741 * -EINVAL if gaddr is not aligned to 1MB
742 * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
743 * -EINVAL if the UV call fails, rc and rrc will be set in this case
744 * -EFAULT if copying the result to buff_user failed
745 */
kvm_s390_pv_dump_stor_state(struct kvm * kvm,void __user * buff_user,u64 * gaddr,u64 buff_user_len,u16 * rc,u16 * rrc)746 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
747 u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
748 {
749 struct uv_cb_dump_stor_state uvcb = {
750 .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
751 .header.len = sizeof(uvcb),
752 .config_handle = kvm->arch.pv.handle,
753 .gaddr = *gaddr,
754 .dump_area_origin = 0,
755 };
756 const u64 increment_len = uv_info.conf_dump_storage_state_len;
757 size_t buff_kvm_size;
758 size_t size_done = 0;
759 u8 *buff_kvm = NULL;
760 int cc, ret;
761
762 ret = -EINVAL;
763 /* UV call processes 1MB guest storage chunks at a time */
764 if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
765 goto out;
766
767 /*
768 * We provide the storage state for 1MB chunks of guest
769 * storage. The buffer will need to be aligned to
770 * conf_dump_storage_state_len so we don't end on a partial
771 * chunk.
772 */
773 if (!buff_user_len ||
774 !IS_ALIGNED(buff_user_len, increment_len))
775 goto out;
776
777 /*
778 * Allocate a buffer from which we will later copy to the user
779 * process. We don't want userspace to dictate our buffer size
780 * so we limit it to DUMP_BUFF_LEN.
781 */
782 ret = -ENOMEM;
783 buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
784 buff_kvm = vzalloc(buff_kvm_size);
785 if (!buff_kvm)
786 goto out;
787
788 ret = 0;
789 uvcb.dump_area_origin = (u64)buff_kvm;
790 /* We will loop until the user buffer is filled or an error occurs */
791 do {
792 /* Get 1MB worth of guest storage state data */
793 cc = uv_call_sched(0, (u64)&uvcb);
794
795 /* All or nothing */
796 if (cc) {
797 ret = -EINVAL;
798 break;
799 }
800
801 size_done += increment_len;
802 uvcb.dump_area_origin += increment_len;
803 buff_user_len -= increment_len;
804 uvcb.gaddr += HPAGE_SIZE;
805
806 /* KVM Buffer full, time to copy to the process */
807 if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
808 if (copy_to_user(buff_user, buff_kvm, size_done)) {
809 ret = -EFAULT;
810 break;
811 }
812
813 buff_user += size_done;
814 size_done = 0;
815 uvcb.dump_area_origin = (u64)buff_kvm;
816 }
817 } while (buff_user_len);
818
819 /* Report back where we ended dumping */
820 *gaddr = uvcb.gaddr;
821
822 /* Lets only log errors, we don't want to spam */
823 out:
824 if (ret)
825 KVM_UV_EVENT(kvm, 3,
826 "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
827 uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
828 *rc = uvcb.header.rc;
829 *rrc = uvcb.header.rrc;
830 vfree(buff_kvm);
831
832 return ret;
833 }
834
835 /**
836 * kvm_s390_pv_dump_complete
837 *
838 * @kvm: pointer to the guest's KVM struct
839 * @buff_user: Userspace pointer where we will write the results to
840 * @rc: Pointer to where the uvcb return code is stored
841 * @rrc: Pointer to where the uvcb return reason code is stored
842 *
843 * Completes the dumping operation and writes the completion data to
844 * user space.
845 *
846 * Context: kvm->lock needs to be held
847 *
848 * Return:
849 * 0 on success
850 * -ENOMEM if allocating the completion buffer fails
851 * -EINVAL if the UV call fails, rc and rrc will be set in this case
852 * -EFAULT if copying the result to buff_user failed
853 */
kvm_s390_pv_dump_complete(struct kvm * kvm,void __user * buff_user,u16 * rc,u16 * rrc)854 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
855 u16 *rc, u16 *rrc)
856 {
857 struct uv_cb_dump_complete complete = {
858 .header.len = sizeof(complete),
859 .header.cmd = UVC_CMD_DUMP_COMPLETE,
860 .config_handle = kvm_s390_pv_get_handle(kvm),
861 };
862 u64 *compl_data;
863 int ret;
864
865 /* Allocate dump area */
866 compl_data = vzalloc(uv_info.conf_dump_finalize_len);
867 if (!compl_data)
868 return -ENOMEM;
869 complete.dump_area_origin = (u64)compl_data;
870
871 ret = uv_call_sched(0, (u64)&complete);
872 *rc = complete.header.rc;
873 *rrc = complete.header.rrc;
874 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
875 complete.header.rc, complete.header.rrc);
876
877 if (!ret) {
878 /*
879 * kvm_s390_pv_dealloc_vm() will also (mem)set
880 * this to false on a reboot or other destroy
881 * operation for this vm.
882 */
883 kvm->arch.pv.dumping = false;
884 kvm_s390_vcpu_unblock_all(kvm);
885 ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
886 if (ret)
887 ret = -EFAULT;
888 }
889 vfree(compl_data);
890 /* If the UVC returned an error, translate it to -EINVAL */
891 if (ret > 0)
892 ret = -EINVAL;
893 return ret;
894 }
895