1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Hosting Protected Virtual Machines
4 *
5 * Copyright IBM Corp. 2019, 2020
6 * Author(s): Janosch Frank <frankja@linux.ibm.com>
7 */
8
9 #include <linux/export.h>
10 #include <linux/kvm.h>
11 #include <linux/kvm_host.h>
12 #include <linux/minmax.h>
13 #include <linux/pagemap.h>
14 #include <linux/sched/signal.h>
15 #include <asm/gmap.h>
16 #include <asm/uv.h>
17 #include <asm/mman.h>
18 #include <linux/pagewalk.h>
19 #include <linux/sched/mm.h>
20 #include <linux/mmu_notifier.h>
21 #include "kvm-s390.h"
22
kvm_s390_pv_is_protected(struct kvm * kvm)23 bool kvm_s390_pv_is_protected(struct kvm *kvm)
24 {
25 lockdep_assert_held(&kvm->lock);
26 return !!kvm_s390_pv_get_handle(kvm);
27 }
28 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
29
kvm_s390_pv_cpu_is_protected(struct kvm_vcpu * vcpu)30 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
31 {
32 lockdep_assert_held(&vcpu->mutex);
33 return !!kvm_s390_pv_cpu_get_handle(vcpu);
34 }
35 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
36
37 /**
38 * kvm_s390_pv_make_secure() - make one guest page secure
39 * @kvm: the guest
40 * @gaddr: the guest address that needs to be made secure
41 * @uvcb: the UVCB specifying which operation needs to be performed
42 *
43 * Context: needs to be called with kvm->srcu held.
44 * Return: 0 on success, < 0 in case of error.
45 */
kvm_s390_pv_make_secure(struct kvm * kvm,unsigned long gaddr,void * uvcb)46 int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
47 {
48 unsigned long vmaddr;
49
50 lockdep_assert_held(&kvm->srcu);
51
52 vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
53 if (kvm_is_error_hva(vmaddr))
54 return -EFAULT;
55 return make_hva_secure(kvm->mm, vmaddr, uvcb);
56 }
57
kvm_s390_pv_convert_to_secure(struct kvm * kvm,unsigned long gaddr)58 int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
59 {
60 struct uv_cb_cts uvcb = {
61 .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
62 .header.len = sizeof(uvcb),
63 .guest_handle = kvm_s390_pv_get_handle(kvm),
64 .gaddr = gaddr,
65 };
66
67 return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
68 }
69
70 /**
71 * kvm_s390_pv_destroy_page() - Destroy a guest page.
72 * @kvm: the guest
73 * @gaddr: the guest address to destroy
74 *
75 * An attempt will be made to destroy the given guest page. If the attempt
76 * fails, an attempt is made to export the page. If both attempts fail, an
77 * appropriate error is returned.
78 *
79 * Context: may sleep.
80 */
kvm_s390_pv_destroy_page(struct kvm * kvm,unsigned long gaddr)81 int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
82 {
83 struct page *page;
84 int rc = 0;
85
86 mmap_read_lock(kvm->mm);
87 page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
88 if (page)
89 rc = __kvm_s390_pv_destroy_page(page);
90 kvm_release_page_clean(page);
91 mmap_read_unlock(kvm->mm);
92 return rc;
93 }
94
95 /**
96 * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
97 * be destroyed
98 *
99 * @list: list head for the list of leftover VMs
100 * @old_gmap_table: the gmap table of the leftover protected VM
101 * @handle: the handle of the leftover protected VM
102 * @stor_var: pointer to the variable storage of the leftover protected VM
103 * @stor_base: address of the base storage of the leftover protected VM
104 *
105 * Represents a protected VM that is still registered with the Ultravisor,
106 * but which does not correspond any longer to an active KVM VM. It should
107 * be destroyed at some point later, either asynchronously or when the
108 * process terminates.
109 */
110 struct pv_vm_to_be_destroyed {
111 struct list_head list;
112 unsigned long old_gmap_table;
113 u64 handle;
114 void *stor_var;
115 unsigned long stor_base;
116 };
117
kvm_s390_clear_pv_state(struct kvm * kvm)118 static void kvm_s390_clear_pv_state(struct kvm *kvm)
119 {
120 kvm->arch.pv.handle = 0;
121 kvm->arch.pv.guest_len = 0;
122 kvm->arch.pv.stor_base = 0;
123 kvm->arch.pv.stor_var = NULL;
124 }
125
kvm_s390_pv_destroy_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)126 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
127 {
128 int cc;
129
130 if (!kvm_s390_pv_cpu_get_handle(vcpu))
131 return 0;
132
133 cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
134
135 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
136 vcpu->vcpu_id, *rc, *rrc);
137 WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
138
139 /* Intended memory leak for something that should never happen. */
140 if (!cc)
141 free_pages(vcpu->arch.pv.stor_base,
142 get_order(uv_info.guest_cpu_stor_len));
143
144 free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
145 vcpu->arch.sie_block->pv_handle_cpu = 0;
146 vcpu->arch.sie_block->pv_handle_config = 0;
147 memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
148 vcpu->arch.sie_block->sdf = 0;
149 /*
150 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
151 * Use the reset value of gbea to avoid leaking the kernel pointer of
152 * the just freed sida.
153 */
154 vcpu->arch.sie_block->gbea = 1;
155 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
156
157 return cc ? EIO : 0;
158 }
159
kvm_s390_pv_create_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)160 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
161 {
162 struct uv_cb_csc uvcb = {
163 .header.cmd = UVC_CMD_CREATE_SEC_CPU,
164 .header.len = sizeof(uvcb),
165 };
166 void *sida_addr;
167 int cc;
168
169 if (kvm_s390_pv_cpu_get_handle(vcpu))
170 return -EINVAL;
171
172 vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
173 get_order(uv_info.guest_cpu_stor_len));
174 if (!vcpu->arch.pv.stor_base)
175 return -ENOMEM;
176
177 /* Input */
178 uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
179 uvcb.num = vcpu->arch.sie_block->icpua;
180 uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
181 uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
182
183 /* Alloc Secure Instruction Data Area Designation */
184 sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
185 if (!sida_addr) {
186 free_pages(vcpu->arch.pv.stor_base,
187 get_order(uv_info.guest_cpu_stor_len));
188 return -ENOMEM;
189 }
190 vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
191
192 cc = uv_call(0, (u64)&uvcb);
193 *rc = uvcb.header.rc;
194 *rrc = uvcb.header.rrc;
195 KVM_UV_EVENT(vcpu->kvm, 3,
196 "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
197 vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
198 uvcb.header.rrc);
199
200 if (cc) {
201 u16 dummy;
202
203 kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
204 return -EIO;
205 }
206
207 /* Output */
208 vcpu->arch.pv.handle = uvcb.cpu_handle;
209 vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
210 vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
211 vcpu->arch.sie_block->sdf = 2;
212 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
213 return 0;
214 }
215
216 /* only free resources when the destroy was successful */
kvm_s390_pv_dealloc_vm(struct kvm * kvm)217 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
218 {
219 vfree(kvm->arch.pv.stor_var);
220 free_pages(kvm->arch.pv.stor_base,
221 get_order(uv_info.guest_base_stor_len));
222 kvm_s390_clear_pv_state(kvm);
223 }
224
kvm_s390_pv_alloc_vm(struct kvm * kvm)225 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
226 {
227 unsigned long base = uv_info.guest_base_stor_len;
228 unsigned long virt = uv_info.guest_virt_var_stor_len;
229 unsigned long npages = 0, vlen = 0;
230
231 kvm->arch.pv.stor_var = NULL;
232 kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
233 if (!kvm->arch.pv.stor_base)
234 return -ENOMEM;
235
236 /*
237 * Calculate current guest storage for allocation of the
238 * variable storage, which is based on the length in MB.
239 *
240 * Slots are sorted by GFN
241 */
242 mutex_lock(&kvm->slots_lock);
243 npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
244 mutex_unlock(&kvm->slots_lock);
245
246 kvm->arch.pv.guest_len = npages * PAGE_SIZE;
247
248 /* Allocate variable storage */
249 vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
250 vlen += uv_info.guest_virt_base_stor_len;
251 kvm->arch.pv.stor_var = vzalloc(vlen);
252 if (!kvm->arch.pv.stor_var)
253 goto out_err;
254 return 0;
255
256 out_err:
257 kvm_s390_pv_dealloc_vm(kvm);
258 return -ENOMEM;
259 }
260
261 /**
262 * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
263 * @kvm: the KVM that was associated with this leftover protected VM
264 * @leftover: details about the leftover protected VM that needs a clean up
265 * @rc: the RC code of the Destroy Secure Configuration UVC
266 * @rrc: the RRC code of the Destroy Secure Configuration UVC
267 *
268 * Destroy one leftover protected VM.
269 * On success, kvm->mm->context.protected_count will be decremented atomically
270 * and all other resources used by the VM will be freed.
271 *
272 * Return: 0 in case of success, otherwise 1
273 */
kvm_s390_pv_dispose_one_leftover(struct kvm * kvm,struct pv_vm_to_be_destroyed * leftover,u16 * rc,u16 * rrc)274 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
275 struct pv_vm_to_be_destroyed *leftover,
276 u16 *rc, u16 *rrc)
277 {
278 int cc;
279
280 /* It used the destroy-fast UVC, nothing left to do here */
281 if (!leftover->handle)
282 goto done_fast;
283 cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
284 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
285 WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
286 if (cc)
287 return cc;
288 /*
289 * Intentionally leak unusable memory. If the UVC fails, the memory
290 * used for the VM and its metadata is permanently unusable.
291 * This can only happen in case of a serious KVM or hardware bug; it
292 * is not expected to happen in normal operation.
293 */
294 free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
295 free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
296 vfree(leftover->stor_var);
297 done_fast:
298 atomic_dec(&kvm->mm->context.protected_count);
299 return 0;
300 }
301
302 /**
303 * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
304 * @kvm: the VM whose memory is to be cleared.
305 *
306 * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
307 * The CPUs of the protected VM need to be destroyed beforehand.
308 */
kvm_s390_destroy_lower_2g(struct kvm * kvm)309 static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
310 {
311 const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
312 struct kvm_memory_slot *slot;
313 unsigned long len;
314 int srcu_idx;
315
316 srcu_idx = srcu_read_lock(&kvm->srcu);
317
318 /* Take the memslot containing guest absolute address 0 */
319 slot = gfn_to_memslot(kvm, 0);
320 /* Clear all slots or parts thereof that are below 2GB */
321 while (slot && slot->base_gfn < pages_2g) {
322 len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
323 s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
324 /* Take the next memslot */
325 slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
326 }
327
328 srcu_read_unlock(&kvm->srcu, srcu_idx);
329 }
330
kvm_s390_pv_deinit_vm_fast(struct kvm * kvm,u16 * rc,u16 * rrc)331 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
332 {
333 struct uv_cb_destroy_fast uvcb = {
334 .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
335 .header.len = sizeof(uvcb),
336 .handle = kvm_s390_pv_get_handle(kvm),
337 };
338 int cc;
339
340 cc = uv_call_sched(0, (u64)&uvcb);
341 if (rc)
342 *rc = uvcb.header.rc;
343 if (rrc)
344 *rrc = uvcb.header.rrc;
345 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
346 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
347 uvcb.header.rc, uvcb.header.rrc);
348 WARN_ONCE(cc && uvcb.header.rc != 0x104,
349 "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
350 kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
351 /* Intended memory leak on "impossible" error */
352 if (!cc)
353 kvm_s390_pv_dealloc_vm(kvm);
354 return cc ? -EIO : 0;
355 }
356
is_destroy_fast_available(void)357 static inline bool is_destroy_fast_available(void)
358 {
359 return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
360 }
361
362 /**
363 * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
364 * @kvm: the VM
365 * @rc: return value for the RC field of the UVCB
366 * @rrc: return value for the RRC field of the UVCB
367 *
368 * Set aside the protected VM for a subsequent teardown. The VM will be able
369 * to continue immediately as a non-secure VM, and the information needed to
370 * properly tear down the protected VM is set aside. If another protected VM
371 * was already set aside without starting its teardown, this function will
372 * fail.
373 * The CPUs of the protected VM need to be destroyed beforehand.
374 *
375 * Context: kvm->lock needs to be held
376 *
377 * Return: 0 in case of success, -EINVAL if another protected VM was already set
378 * aside, -ENOMEM if the system ran out of memory.
379 */
kvm_s390_pv_set_aside(struct kvm * kvm,u16 * rc,u16 * rrc)380 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
381 {
382 struct pv_vm_to_be_destroyed *priv;
383 int res = 0;
384
385 lockdep_assert_held(&kvm->lock);
386 /*
387 * If another protected VM was already prepared for teardown, refuse.
388 * A normal deinitialization has to be performed instead.
389 */
390 if (kvm->arch.pv.set_aside)
391 return -EINVAL;
392
393 /* Guest with segment type ASCE, refuse to destroy asynchronously */
394 if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
395 return -EINVAL;
396
397 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
398 if (!priv)
399 return -ENOMEM;
400
401 if (is_destroy_fast_available()) {
402 res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
403 } else {
404 priv->stor_var = kvm->arch.pv.stor_var;
405 priv->stor_base = kvm->arch.pv.stor_base;
406 priv->handle = kvm_s390_pv_get_handle(kvm);
407 priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
408 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
409 if (s390_replace_asce(kvm->arch.gmap))
410 res = -ENOMEM;
411 }
412
413 if (res) {
414 kfree(priv);
415 return res;
416 }
417
418 kvm_s390_destroy_lower_2g(kvm);
419 kvm_s390_clear_pv_state(kvm);
420 kvm->arch.pv.set_aside = priv;
421
422 *rc = UVC_RC_EXECUTED;
423 *rrc = 42;
424 return 0;
425 }
426
427 /**
428 * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
429 * @kvm: the KVM whose protected VM needs to be deinitialized
430 * @rc: the RC code of the UVC
431 * @rrc: the RRC code of the UVC
432 *
433 * Deinitialize the current protected VM. This function will destroy and
434 * cleanup the current protected VM, but it will not cleanup the guest
435 * memory. This function should only be called when the protected VM has
436 * just been created and therefore does not have any guest memory, or when
437 * the caller cleans up the guest memory separately.
438 *
439 * This function should not fail, but if it does, the donated memory must
440 * not be freed.
441 *
442 * Context: kvm->lock needs to be held
443 *
444 * Return: 0 in case of success, otherwise -EIO
445 */
kvm_s390_pv_deinit_vm(struct kvm * kvm,u16 * rc,u16 * rrc)446 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
447 {
448 int cc;
449
450 cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
451 UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
452 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
453 if (!cc) {
454 atomic_dec(&kvm->mm->context.protected_count);
455 kvm_s390_pv_dealloc_vm(kvm);
456 } else {
457 /* Intended memory leak on "impossible" error */
458 s390_replace_asce(kvm->arch.gmap);
459 }
460 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
461 WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
462
463 return cc ? -EIO : 0;
464 }
465
466 /**
467 * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
468 * with a specific KVM.
469 * @kvm: the KVM to be cleaned up
470 * @rc: the RC code of the first failing UVC
471 * @rrc: the RRC code of the first failing UVC
472 *
473 * This function will clean up all protected VMs associated with a KVM.
474 * This includes the active one, the one prepared for deinitialization with
475 * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
476 *
477 * Context: kvm->lock needs to be held unless being called from
478 * kvm_arch_destroy_vm.
479 *
480 * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
481 */
kvm_s390_pv_deinit_cleanup_all(struct kvm * kvm,u16 * rc,u16 * rrc)482 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
483 {
484 struct pv_vm_to_be_destroyed *cur;
485 bool need_zap = false;
486 u16 _rc, _rrc;
487 int cc = 0;
488
489 /*
490 * Nothing to do if the counter was already 0. Otherwise make sure
491 * the counter does not reach 0 before calling s390_uv_destroy_range.
492 */
493 if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
494 return 0;
495
496 *rc = 1;
497 /* If the current VM is protected, destroy it */
498 if (kvm_s390_pv_get_handle(kvm)) {
499 cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
500 need_zap = true;
501 }
502
503 /* If a previous protected VM was set aside, put it in the need_cleanup list */
504 if (kvm->arch.pv.set_aside) {
505 list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
506 kvm->arch.pv.set_aside = NULL;
507 }
508
509 /* Cleanup all protected VMs in the need_cleanup list */
510 while (!list_empty(&kvm->arch.pv.need_cleanup)) {
511 cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
512 need_zap = true;
513 if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
514 cc = 1;
515 /*
516 * Only return the first error rc and rrc, so make
517 * sure it is not overwritten. All destroys will
518 * additionally be reported via KVM_UV_EVENT().
519 */
520 if (*rc == UVC_RC_EXECUTED) {
521 *rc = _rc;
522 *rrc = _rrc;
523 }
524 }
525 list_del(&cur->list);
526 kfree(cur);
527 }
528
529 /*
530 * If the mm still has a mapping, try to mark all its pages as
531 * accessible. The counter should not reach zero before this
532 * cleanup has been performed.
533 */
534 if (need_zap && mmget_not_zero(kvm->mm)) {
535 s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
536 mmput(kvm->mm);
537 }
538
539 /* Now the counter can safely reach 0 */
540 atomic_dec(&kvm->mm->context.protected_count);
541 return cc ? -EIO : 0;
542 }
543
544 /**
545 * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
546 * @kvm: the VM previously associated with the protected VM
547 * @rc: return value for the RC field of the UVCB
548 * @rrc: return value for the RRC field of the UVCB
549 *
550 * Tear down the protected VM that had been previously prepared for teardown
551 * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
552 * userspace asynchronously from a separate thread.
553 *
554 * Context: kvm->lock must not be held.
555 *
556 * Return: 0 in case of success, -EINVAL if no protected VM had been
557 * prepared for asynchronous teardowm, -EIO in case of other errors.
558 */
kvm_s390_pv_deinit_aside_vm(struct kvm * kvm,u16 * rc,u16 * rrc)559 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
560 {
561 struct pv_vm_to_be_destroyed *p;
562 int ret = 0;
563
564 lockdep_assert_not_held(&kvm->lock);
565 mutex_lock(&kvm->lock);
566 p = kvm->arch.pv.set_aside;
567 kvm->arch.pv.set_aside = NULL;
568 mutex_unlock(&kvm->lock);
569 if (!p)
570 return -EINVAL;
571
572 /* When a fatal signal is received, stop immediately */
573 if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
574 goto done;
575 if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
576 ret = -EIO;
577 kfree(p);
578 p = NULL;
579 done:
580 /*
581 * p is not NULL if we aborted because of a fatal signal, in which
582 * case queue the leftover for later cleanup.
583 */
584 if (p) {
585 mutex_lock(&kvm->lock);
586 list_add(&p->list, &kvm->arch.pv.need_cleanup);
587 mutex_unlock(&kvm->lock);
588 /* Did not finish, but pretend things went well */
589 *rc = UVC_RC_EXECUTED;
590 *rrc = 42;
591 }
592 return ret;
593 }
594
kvm_s390_pv_mmu_notifier_release(struct mmu_notifier * subscription,struct mm_struct * mm)595 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
596 struct mm_struct *mm)
597 {
598 struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
599 u16 dummy;
600 int r;
601
602 /*
603 * No locking is needed since this is the last thread of the last user of this
604 * struct mm.
605 * When the struct kvm gets deinitialized, this notifier is also
606 * unregistered. This means that if this notifier runs, then the
607 * struct kvm is still valid.
608 */
609 r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
610 if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
611 kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
612 }
613
614 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
615 .release = kvm_s390_pv_mmu_notifier_release,
616 };
617
kvm_s390_pv_init_vm(struct kvm * kvm,u16 * rc,u16 * rrc)618 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
619 {
620 struct uv_cb_cgc uvcb = {
621 .header.cmd = UVC_CMD_CREATE_SEC_CONF,
622 .header.len = sizeof(uvcb)
623 };
624 int cc, ret;
625 u16 dummy;
626
627 ret = kvm_s390_pv_alloc_vm(kvm);
628 if (ret)
629 return ret;
630
631 /* Inputs */
632 uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
633 uvcb.guest_stor_len = kvm->arch.pv.guest_len;
634 uvcb.guest_asce = kvm->arch.gmap->asce;
635 uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
636 uvcb.conf_base_stor_origin =
637 virt_to_phys((void *)kvm->arch.pv.stor_base);
638 uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
639 uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
640 uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
641
642 cc = uv_call_sched(0, (u64)&uvcb);
643 *rc = uvcb.header.rc;
644 *rrc = uvcb.header.rrc;
645 KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
646 uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
647
648 /* Outputs */
649 kvm->arch.pv.handle = uvcb.guest_handle;
650
651 atomic_inc(&kvm->mm->context.protected_count);
652 if (cc) {
653 if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
654 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
655 } else {
656 atomic_dec(&kvm->mm->context.protected_count);
657 kvm_s390_pv_dealloc_vm(kvm);
658 }
659 return -EIO;
660 }
661 kvm->arch.gmap->guest_handle = uvcb.guest_handle;
662 /* Add the notifier only once. No races because we hold kvm->lock */
663 if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
664 kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
665 mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
666 }
667 return 0;
668 }
669
kvm_s390_pv_set_sec_parms(struct kvm * kvm,void * hdr,u64 length,u16 * rc,u16 * rrc)670 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
671 u16 *rrc)
672 {
673 struct uv_cb_ssc uvcb = {
674 .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
675 .header.len = sizeof(uvcb),
676 .sec_header_origin = (u64)hdr,
677 .sec_header_len = length,
678 .guest_handle = kvm_s390_pv_get_handle(kvm),
679 };
680 int cc = uv_call(0, (u64)&uvcb);
681
682 *rc = uvcb.header.rc;
683 *rrc = uvcb.header.rrc;
684 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
685 *rc, *rrc);
686 return cc ? -EINVAL : 0;
687 }
688
unpack_one(struct kvm * kvm,unsigned long addr,u64 tweak,u64 offset,u16 * rc,u16 * rrc)689 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
690 u64 offset, u16 *rc, u16 *rrc)
691 {
692 struct uv_cb_unp uvcb = {
693 .header.cmd = UVC_CMD_UNPACK_IMG,
694 .header.len = sizeof(uvcb),
695 .guest_handle = kvm_s390_pv_get_handle(kvm),
696 .gaddr = addr,
697 .tweak[0] = tweak,
698 .tweak[1] = offset,
699 };
700 int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
701 unsigned long vmaddr;
702 bool unlocked;
703
704 *rc = uvcb.header.rc;
705 *rrc = uvcb.header.rrc;
706
707 if (ret == -ENXIO) {
708 mmap_read_lock(kvm->mm);
709 vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
710 if (kvm_is_error_hva(vmaddr)) {
711 ret = -EFAULT;
712 } else {
713 ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
714 if (!ret)
715 ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
716 }
717 mmap_read_unlock(kvm->mm);
718 if (!ret)
719 return -EAGAIN;
720 return ret;
721 }
722
723 if (ret && ret != -EAGAIN)
724 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
725 uvcb.gaddr, *rc, *rrc);
726 return ret;
727 }
728
kvm_s390_pv_unpack(struct kvm * kvm,unsigned long addr,unsigned long size,unsigned long tweak,u16 * rc,u16 * rrc)729 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
730 unsigned long tweak, u16 *rc, u16 *rrc)
731 {
732 u64 offset = 0;
733 int ret = 0;
734
735 if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
736 return -EINVAL;
737
738 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
739 addr, size);
740
741 guard(srcu)(&kvm->srcu);
742
743 while (offset < size) {
744 ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
745 if (ret == -EAGAIN) {
746 cond_resched();
747 if (fatal_signal_pending(current))
748 break;
749 continue;
750 }
751 if (ret)
752 break;
753 addr += PAGE_SIZE;
754 offset += PAGE_SIZE;
755 }
756 if (!ret)
757 KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
758 return ret;
759 }
760
kvm_s390_pv_set_cpu_state(struct kvm_vcpu * vcpu,u8 state)761 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
762 {
763 struct uv_cb_cpu_set_state uvcb = {
764 .header.cmd = UVC_CMD_CPU_SET_STATE,
765 .header.len = sizeof(uvcb),
766 .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
767 .state = state,
768 };
769 int cc;
770
771 cc = uv_call(0, (u64)&uvcb);
772 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
773 vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
774 if (cc)
775 return -EINVAL;
776 return 0;
777 }
778
kvm_s390_pv_dump_cpu(struct kvm_vcpu * vcpu,void * buff,u16 * rc,u16 * rrc)779 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
780 {
781 struct uv_cb_dump_cpu uvcb = {
782 .header.cmd = UVC_CMD_DUMP_CPU,
783 .header.len = sizeof(uvcb),
784 .cpu_handle = vcpu->arch.pv.handle,
785 .dump_area_origin = (u64)buff,
786 };
787 int cc;
788
789 cc = uv_call_sched(0, (u64)&uvcb);
790 *rc = uvcb.header.rc;
791 *rrc = uvcb.header.rrc;
792 return cc;
793 }
794
795 /* Size of the cache for the storage state dump data. 1MB for now */
796 #define DUMP_BUFF_LEN HPAGE_SIZE
797
798 /**
799 * kvm_s390_pv_dump_stor_state
800 *
801 * @kvm: pointer to the guest's KVM struct
802 * @buff_user: Userspace pointer where we will write the results to
803 * @gaddr: Starting absolute guest address for which the storage state
804 * is requested.
805 * @buff_user_len: Length of the buff_user buffer
806 * @rc: Pointer to where the uvcb return code is stored
807 * @rrc: Pointer to where the uvcb return reason code is stored
808 *
809 * Stores buff_len bytes of tweak component values to buff_user
810 * starting with the 1MB block specified by the absolute guest address
811 * (gaddr). The gaddr pointer will be updated with the last address
812 * for which data was written when returning to userspace. buff_user
813 * might be written to even if an error rc is returned. For instance
814 * if we encounter a fault after writing the first page of data.
815 *
816 * Context: kvm->lock needs to be held
817 *
818 * Return:
819 * 0 on success
820 * -ENOMEM if allocating the cache fails
821 * -EINVAL if gaddr is not aligned to 1MB
822 * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
823 * -EINVAL if the UV call fails, rc and rrc will be set in this case
824 * -EFAULT if copying the result to buff_user failed
825 */
kvm_s390_pv_dump_stor_state(struct kvm * kvm,void __user * buff_user,u64 * gaddr,u64 buff_user_len,u16 * rc,u16 * rrc)826 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
827 u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
828 {
829 struct uv_cb_dump_stor_state uvcb = {
830 .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
831 .header.len = sizeof(uvcb),
832 .config_handle = kvm->arch.pv.handle,
833 .gaddr = *gaddr,
834 .dump_area_origin = 0,
835 };
836 const u64 increment_len = uv_info.conf_dump_storage_state_len;
837 size_t buff_kvm_size;
838 size_t size_done = 0;
839 u8 *buff_kvm = NULL;
840 int cc, ret;
841
842 ret = -EINVAL;
843 /* UV call processes 1MB guest storage chunks at a time */
844 if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
845 goto out;
846
847 /*
848 * We provide the storage state for 1MB chunks of guest
849 * storage. The buffer will need to be aligned to
850 * conf_dump_storage_state_len so we don't end on a partial
851 * chunk.
852 */
853 if (!buff_user_len ||
854 !IS_ALIGNED(buff_user_len, increment_len))
855 goto out;
856
857 /*
858 * Allocate a buffer from which we will later copy to the user
859 * process. We don't want userspace to dictate our buffer size
860 * so we limit it to DUMP_BUFF_LEN.
861 */
862 ret = -ENOMEM;
863 buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
864 buff_kvm = vzalloc(buff_kvm_size);
865 if (!buff_kvm)
866 goto out;
867
868 ret = 0;
869 uvcb.dump_area_origin = (u64)buff_kvm;
870 /* We will loop until the user buffer is filled or an error occurs */
871 do {
872 /* Get 1MB worth of guest storage state data */
873 cc = uv_call_sched(0, (u64)&uvcb);
874
875 /* All or nothing */
876 if (cc) {
877 ret = -EINVAL;
878 break;
879 }
880
881 size_done += increment_len;
882 uvcb.dump_area_origin += increment_len;
883 buff_user_len -= increment_len;
884 uvcb.gaddr += HPAGE_SIZE;
885
886 /* KVM Buffer full, time to copy to the process */
887 if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
888 if (copy_to_user(buff_user, buff_kvm, size_done)) {
889 ret = -EFAULT;
890 break;
891 }
892
893 buff_user += size_done;
894 size_done = 0;
895 uvcb.dump_area_origin = (u64)buff_kvm;
896 }
897 } while (buff_user_len);
898
899 /* Report back where we ended dumping */
900 *gaddr = uvcb.gaddr;
901
902 /* Lets only log errors, we don't want to spam */
903 out:
904 if (ret)
905 KVM_UV_EVENT(kvm, 3,
906 "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
907 uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
908 *rc = uvcb.header.rc;
909 *rrc = uvcb.header.rrc;
910 vfree(buff_kvm);
911
912 return ret;
913 }
914
915 /**
916 * kvm_s390_pv_dump_complete
917 *
918 * @kvm: pointer to the guest's KVM struct
919 * @buff_user: Userspace pointer where we will write the results to
920 * @rc: Pointer to where the uvcb return code is stored
921 * @rrc: Pointer to where the uvcb return reason code is stored
922 *
923 * Completes the dumping operation and writes the completion data to
924 * user space.
925 *
926 * Context: kvm->lock needs to be held
927 *
928 * Return:
929 * 0 on success
930 * -ENOMEM if allocating the completion buffer fails
931 * -EINVAL if the UV call fails, rc and rrc will be set in this case
932 * -EFAULT if copying the result to buff_user failed
933 */
kvm_s390_pv_dump_complete(struct kvm * kvm,void __user * buff_user,u16 * rc,u16 * rrc)934 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
935 u16 *rc, u16 *rrc)
936 {
937 struct uv_cb_dump_complete complete = {
938 .header.len = sizeof(complete),
939 .header.cmd = UVC_CMD_DUMP_COMPLETE,
940 .config_handle = kvm_s390_pv_get_handle(kvm),
941 };
942 u64 *compl_data;
943 int ret;
944
945 /* Allocate dump area */
946 compl_data = vzalloc(uv_info.conf_dump_finalize_len);
947 if (!compl_data)
948 return -ENOMEM;
949 complete.dump_area_origin = (u64)compl_data;
950
951 ret = uv_call_sched(0, (u64)&complete);
952 *rc = complete.header.rc;
953 *rrc = complete.header.rrc;
954 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
955 complete.header.rc, complete.header.rrc);
956
957 if (!ret) {
958 /*
959 * kvm_s390_pv_dealloc_vm() will also (mem)set
960 * this to false on a reboot or other destroy
961 * operation for this vm.
962 */
963 kvm->arch.pv.dumping = false;
964 kvm_s390_vcpu_unblock_all(kvm);
965 ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
966 if (ret)
967 ret = -EFAULT;
968 }
969 vfree(compl_data);
970 /* If the UVC returned an error, translate it to -EINVAL */
971 if (ret > 0)
972 ret = -EINVAL;
973 return ret;
974 }
975