1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Hosting Protected Virtual Machines
4 *
5 * Copyright IBM Corp. 2019, 2020
6 * Author(s): Janosch Frank <frankja@linux.ibm.com>
7 */
8
9 #include <linux/export.h>
10 #include <linux/kvm.h>
11 #include <linux/kvm_host.h>
12 #include <linux/minmax.h>
13 #include <linux/pagemap.h>
14 #include <linux/sched/signal.h>
15 #include <asm/gmap.h>
16 #include <asm/uv.h>
17 #include <asm/mman.h>
18 #include <linux/pagewalk.h>
19 #include <linux/sched/mm.h>
20 #include <linux/mmu_notifier.h>
21 #include "kvm-s390.h"
22
kvm_s390_pv_is_protected(struct kvm * kvm)23 bool kvm_s390_pv_is_protected(struct kvm *kvm)
24 {
25 lockdep_assert_held(&kvm->lock);
26 return !!kvm_s390_pv_get_handle(kvm);
27 }
28 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
29
kvm_s390_pv_cpu_is_protected(struct kvm_vcpu * vcpu)30 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
31 {
32 lockdep_assert_held(&vcpu->mutex);
33 return !!kvm_s390_pv_cpu_get_handle(vcpu);
34 }
35 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
36
37 /**
38 * kvm_s390_pv_make_secure() - make one guest page secure
39 * @kvm: the guest
40 * @gaddr: the guest address that needs to be made secure
41 * @uvcb: the UVCB specifying which operation needs to be performed
42 *
43 * Context: needs to be called with kvm->srcu held.
44 * Return: 0 on success, < 0 in case of error.
45 */
kvm_s390_pv_make_secure(struct kvm * kvm,unsigned long gaddr,void * uvcb)46 int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
47 {
48 unsigned long vmaddr;
49
50 lockdep_assert_held(&kvm->srcu);
51
52 vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
53 if (kvm_is_error_hva(vmaddr))
54 return -EFAULT;
55 return make_hva_secure(kvm->mm, vmaddr, uvcb);
56 }
57
kvm_s390_pv_convert_to_secure(struct kvm * kvm,unsigned long gaddr)58 int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
59 {
60 struct uv_cb_cts uvcb = {
61 .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
62 .header.len = sizeof(uvcb),
63 .guest_handle = kvm_s390_pv_get_handle(kvm),
64 .gaddr = gaddr,
65 };
66
67 return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
68 }
69
70 /**
71 * kvm_s390_pv_destroy_page() - Destroy a guest page.
72 * @kvm: the guest
73 * @gaddr: the guest address to destroy
74 *
75 * An attempt will be made to destroy the given guest page. If the attempt
76 * fails, an attempt is made to export the page. If both attempts fail, an
77 * appropriate error is returned.
78 *
79 * Context: may sleep.
80 */
kvm_s390_pv_destroy_page(struct kvm * kvm,unsigned long gaddr)81 int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
82 {
83 struct page *page;
84 int rc = 0;
85
86 mmap_read_lock(kvm->mm);
87 page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
88 if (page)
89 rc = __kvm_s390_pv_destroy_page(page);
90 kvm_release_page_clean(page);
91 mmap_read_unlock(kvm->mm);
92 return rc;
93 }
94
95 /**
96 * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
97 * be destroyed
98 *
99 * @list: list head for the list of leftover VMs
100 * @old_gmap_table: the gmap table of the leftover protected VM
101 * @handle: the handle of the leftover protected VM
102 * @stor_var: pointer to the variable storage of the leftover protected VM
103 * @stor_base: address of the base storage of the leftover protected VM
104 *
105 * Represents a protected VM that is still registered with the Ultravisor,
106 * but which does not correspond any longer to an active KVM VM. It should
107 * be destroyed at some point later, either asynchronously or when the
108 * process terminates.
109 */
110 struct pv_vm_to_be_destroyed {
111 struct list_head list;
112 unsigned long old_gmap_table;
113 u64 handle;
114 void *stor_var;
115 unsigned long stor_base;
116 };
117
kvm_s390_clear_pv_state(struct kvm * kvm)118 static void kvm_s390_clear_pv_state(struct kvm *kvm)
119 {
120 kvm->arch.pv.handle = 0;
121 kvm->arch.pv.guest_len = 0;
122 kvm->arch.pv.stor_base = 0;
123 kvm->arch.pv.stor_var = NULL;
124 }
125
kvm_s390_pv_destroy_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)126 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
127 {
128 int cc;
129
130 if (!kvm_s390_pv_cpu_get_handle(vcpu))
131 return 0;
132
133 cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
134
135 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
136 vcpu->vcpu_id, *rc, *rrc);
137 WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
138
139 /* Intended memory leak for something that should never happen. */
140 if (!cc)
141 free_pages(vcpu->arch.pv.stor_base,
142 get_order(uv_info.guest_cpu_stor_len));
143
144 free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
145 vcpu->arch.sie_block->pv_handle_cpu = 0;
146 vcpu->arch.sie_block->pv_handle_config = 0;
147 memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
148 vcpu->arch.sie_block->sdf = 0;
149 /*
150 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
151 * Use the reset value of gbea to avoid leaking the kernel pointer of
152 * the just freed sida.
153 */
154 vcpu->arch.sie_block->gbea = 1;
155 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
156
157 return cc ? EIO : 0;
158 }
159
kvm_s390_pv_create_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)160 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
161 {
162 struct uv_cb_csc uvcb = {
163 .header.cmd = UVC_CMD_CREATE_SEC_CPU,
164 .header.len = sizeof(uvcb),
165 };
166 void *sida_addr;
167 int cc;
168
169 if (kvm_s390_pv_cpu_get_handle(vcpu))
170 return -EINVAL;
171
172 vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
173 get_order(uv_info.guest_cpu_stor_len));
174 if (!vcpu->arch.pv.stor_base)
175 return -ENOMEM;
176
177 /* Input */
178 uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
179 uvcb.num = vcpu->arch.sie_block->icpua;
180 uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
181 uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
182
183 /* Alloc Secure Instruction Data Area Designation */
184 sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
185 if (!sida_addr) {
186 free_pages(vcpu->arch.pv.stor_base,
187 get_order(uv_info.guest_cpu_stor_len));
188 return -ENOMEM;
189 }
190 vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
191
192 cc = uv_call(0, (u64)&uvcb);
193 *rc = uvcb.header.rc;
194 *rrc = uvcb.header.rrc;
195 KVM_UV_EVENT(vcpu->kvm, 3,
196 "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
197 vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
198 uvcb.header.rrc);
199
200 if (cc) {
201 u16 dummy;
202
203 kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
204 return -EIO;
205 }
206
207 /* Output */
208 vcpu->arch.pv.handle = uvcb.cpu_handle;
209 vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
210 vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
211 vcpu->arch.sie_block->sdf = 2;
212 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
213 return 0;
214 }
215
216 /* only free resources when the destroy was successful */
kvm_s390_pv_dealloc_vm(struct kvm * kvm)217 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
218 {
219 vfree(kvm->arch.pv.stor_var);
220 free_pages(kvm->arch.pv.stor_base,
221 get_order(uv_info.guest_base_stor_len));
222 kvm_s390_clear_pv_state(kvm);
223 }
224
kvm_s390_pv_alloc_vm(struct kvm * kvm)225 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
226 {
227 unsigned long base = uv_info.guest_base_stor_len;
228 unsigned long virt = uv_info.guest_virt_var_stor_len;
229 unsigned long npages = 0, vlen = 0;
230
231 kvm->arch.pv.stor_var = NULL;
232 kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
233 if (!kvm->arch.pv.stor_base)
234 return -ENOMEM;
235
236 /*
237 * Calculate current guest storage for allocation of the
238 * variable storage, which is based on the length in MB.
239 *
240 * Slots are sorted by GFN
241 */
242 mutex_lock(&kvm->slots_lock);
243 npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
244 mutex_unlock(&kvm->slots_lock);
245
246 kvm->arch.pv.guest_len = npages * PAGE_SIZE;
247
248 /* Allocate variable storage */
249 vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
250 vlen += uv_info.guest_virt_base_stor_len;
251 kvm->arch.pv.stor_var = vzalloc(vlen);
252 if (!kvm->arch.pv.stor_var)
253 goto out_err;
254 return 0;
255
256 out_err:
257 kvm_s390_pv_dealloc_vm(kvm);
258 return -ENOMEM;
259 }
260
261 /**
262 * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
263 * @kvm: the KVM that was associated with this leftover protected VM
264 * @leftover: details about the leftover protected VM that needs a clean up
265 * @rc: the RC code of the Destroy Secure Configuration UVC
266 * @rrc: the RRC code of the Destroy Secure Configuration UVC
267 *
268 * Destroy one leftover protected VM.
269 * On success, kvm->mm->context.protected_count will be decremented atomically
270 * and all other resources used by the VM will be freed.
271 *
272 * Return: 0 in case of success, otherwise 1
273 */
kvm_s390_pv_dispose_one_leftover(struct kvm * kvm,struct pv_vm_to_be_destroyed * leftover,u16 * rc,u16 * rrc)274 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
275 struct pv_vm_to_be_destroyed *leftover,
276 u16 *rc, u16 *rrc)
277 {
278 int cc;
279
280 /* It used the destroy-fast UVC, nothing left to do here */
281 if (!leftover->handle)
282 goto done_fast;
283 cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
284 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
285 WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
286 if (cc)
287 return cc;
288 /*
289 * Intentionally leak unusable memory. If the UVC fails, the memory
290 * used for the VM and its metadata is permanently unusable.
291 * This can only happen in case of a serious KVM or hardware bug; it
292 * is not expected to happen in normal operation.
293 */
294 free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
295 free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
296 vfree(leftover->stor_var);
297 done_fast:
298 atomic_dec(&kvm->mm->context.protected_count);
299 return 0;
300 }
301
302 /**
303 * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
304 * @kvm: the VM whose memory is to be cleared.
305 *
306 * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
307 * The CPUs of the protected VM need to be destroyed beforehand.
308 */
kvm_s390_destroy_lower_2g(struct kvm * kvm)309 static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
310 {
311 const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
312 struct kvm_memory_slot *slot;
313 unsigned long len;
314 int srcu_idx;
315
316 srcu_idx = srcu_read_lock(&kvm->srcu);
317
318 /* Take the memslot containing guest absolute address 0 */
319 slot = gfn_to_memslot(kvm, 0);
320 /* Clear all slots or parts thereof that are below 2GB */
321 while (slot && slot->base_gfn < pages_2g) {
322 len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
323 s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
324 /* Take the next memslot */
325 slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
326 }
327
328 srcu_read_unlock(&kvm->srcu, srcu_idx);
329 }
330
kvm_s390_pv_deinit_vm_fast(struct kvm * kvm,u16 * rc,u16 * rrc)331 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
332 {
333 struct uv_cb_destroy_fast uvcb = {
334 .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
335 .header.len = sizeof(uvcb),
336 .handle = kvm_s390_pv_get_handle(kvm),
337 };
338 int cc;
339
340 cc = uv_call_sched(0, (u64)&uvcb);
341 if (rc)
342 *rc = uvcb.header.rc;
343 if (rrc)
344 *rrc = uvcb.header.rrc;
345 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
346 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
347 uvcb.header.rc, uvcb.header.rrc);
348 WARN_ONCE(cc && uvcb.header.rc != 0x104,
349 "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
350 kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
351 /* Intended memory leak on "impossible" error */
352 if (!cc)
353 kvm_s390_pv_dealloc_vm(kvm);
354 return cc ? -EIO : 0;
355 }
356
is_destroy_fast_available(void)357 static inline bool is_destroy_fast_available(void)
358 {
359 return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
360 }
361
362 /**
363 * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
364 * @kvm: the VM
365 * @rc: return value for the RC field of the UVCB
366 * @rrc: return value for the RRC field of the UVCB
367 *
368 * Set aside the protected VM for a subsequent teardown. The VM will be able
369 * to continue immediately as a non-secure VM, and the information needed to
370 * properly tear down the protected VM is set aside. If another protected VM
371 * was already set aside without starting its teardown, this function will
372 * fail.
373 * The CPUs of the protected VM need to be destroyed beforehand.
374 *
375 * Context: kvm->lock needs to be held
376 *
377 * Return: 0 in case of success, -EINVAL if another protected VM was already set
378 * aside, -ENOMEM if the system ran out of memory.
379 */
kvm_s390_pv_set_aside(struct kvm * kvm,u16 * rc,u16 * rrc)380 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
381 {
382 struct pv_vm_to_be_destroyed *priv;
383 int res = 0;
384
385 lockdep_assert_held(&kvm->lock);
386 /*
387 * If another protected VM was already prepared for teardown, refuse.
388 * A normal deinitialization has to be performed instead.
389 */
390 if (kvm->arch.pv.set_aside)
391 return -EINVAL;
392
393 /* Guest with segment type ASCE, refuse to destroy asynchronously */
394 if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
395 return -EINVAL;
396
397 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
398 if (!priv)
399 return -ENOMEM;
400
401 if (is_destroy_fast_available()) {
402 res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
403 } else {
404 priv->stor_var = kvm->arch.pv.stor_var;
405 priv->stor_base = kvm->arch.pv.stor_base;
406 priv->handle = kvm_s390_pv_get_handle(kvm);
407 priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
408 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
409 if (s390_replace_asce(kvm->arch.gmap))
410 res = -ENOMEM;
411 }
412
413 if (res) {
414 kfree(priv);
415 return res;
416 }
417
418 kvm_s390_destroy_lower_2g(kvm);
419 kvm_s390_clear_pv_state(kvm);
420 kvm->arch.pv.set_aside = priv;
421
422 *rc = UVC_RC_EXECUTED;
423 *rrc = 42;
424 return 0;
425 }
426
427 /**
428 * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
429 * @kvm: the KVM whose protected VM needs to be deinitialized
430 * @rc: the RC code of the UVC
431 * @rrc: the RRC code of the UVC
432 *
433 * Deinitialize the current protected VM. This function will destroy and
434 * cleanup the current protected VM, but it will not cleanup the guest
435 * memory. This function should only be called when the protected VM has
436 * just been created and therefore does not have any guest memory, or when
437 * the caller cleans up the guest memory separately.
438 *
439 * This function should not fail, but if it does, the donated memory must
440 * not be freed.
441 *
442 * Context: kvm->lock needs to be held
443 *
444 * Return: 0 in case of success, otherwise -EIO
445 */
kvm_s390_pv_deinit_vm(struct kvm * kvm,u16 * rc,u16 * rrc)446 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
447 {
448 int cc;
449
450 cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
451 UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
452 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
453 if (!cc) {
454 atomic_dec(&kvm->mm->context.protected_count);
455 kvm_s390_pv_dealloc_vm(kvm);
456 } else {
457 /* Intended memory leak on "impossible" error */
458 s390_replace_asce(kvm->arch.gmap);
459 }
460 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
461 WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
462
463 return cc ? -EIO : 0;
464 }
465
466 /**
467 * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
468 * with a specific KVM.
469 * @kvm: the KVM to be cleaned up
470 * @rc: the RC code of the first failing UVC
471 * @rrc: the RRC code of the first failing UVC
472 *
473 * This function will clean up all protected VMs associated with a KVM.
474 * This includes the active one, the one prepared for deinitialization with
475 * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
476 *
477 * Context: kvm->lock needs to be held unless being called from
478 * kvm_arch_destroy_vm.
479 *
480 * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
481 */
kvm_s390_pv_deinit_cleanup_all(struct kvm * kvm,u16 * rc,u16 * rrc)482 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
483 {
484 struct pv_vm_to_be_destroyed *cur;
485 bool need_zap = false;
486 u16 _rc, _rrc;
487 int cc = 0;
488
489 /*
490 * Nothing to do if the counter was already 0. Otherwise make sure
491 * the counter does not reach 0 before calling s390_uv_destroy_range.
492 */
493 if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
494 return 0;
495
496 *rc = 1;
497 /* If the current VM is protected, destroy it */
498 if (kvm_s390_pv_get_handle(kvm)) {
499 cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
500 need_zap = true;
501 }
502
503 /* If a previous protected VM was set aside, put it in the need_cleanup list */
504 if (kvm->arch.pv.set_aside) {
505 list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
506 kvm->arch.pv.set_aside = NULL;
507 }
508
509 /* Cleanup all protected VMs in the need_cleanup list */
510 while (!list_empty(&kvm->arch.pv.need_cleanup)) {
511 cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
512 need_zap = true;
513 if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
514 cc = 1;
515 /*
516 * Only return the first error rc and rrc, so make
517 * sure it is not overwritten. All destroys will
518 * additionally be reported via KVM_UV_EVENT().
519 */
520 if (*rc == UVC_RC_EXECUTED) {
521 *rc = _rc;
522 *rrc = _rrc;
523 }
524 }
525 list_del(&cur->list);
526 kfree(cur);
527 }
528
529 /*
530 * If the mm still has a mapping, try to mark all its pages as
531 * accessible. The counter should not reach zero before this
532 * cleanup has been performed.
533 */
534 if (need_zap && mmget_not_zero(kvm->mm)) {
535 s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
536 mmput(kvm->mm);
537 }
538
539 /* Now the counter can safely reach 0 */
540 atomic_dec(&kvm->mm->context.protected_count);
541 return cc ? -EIO : 0;
542 }
543
544 /**
545 * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
546 * @kvm: the VM previously associated with the protected VM
547 * @rc: return value for the RC field of the UVCB
548 * @rrc: return value for the RRC field of the UVCB
549 *
550 * Tear down the protected VM that had been previously prepared for teardown
551 * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
552 * userspace asynchronously from a separate thread.
553 *
554 * Context: kvm->lock must not be held.
555 *
556 * Return: 0 in case of success, -EINVAL if no protected VM had been
557 * prepared for asynchronous teardowm, -EIO in case of other errors.
558 */
kvm_s390_pv_deinit_aside_vm(struct kvm * kvm,u16 * rc,u16 * rrc)559 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
560 {
561 struct pv_vm_to_be_destroyed *p;
562 int ret = 0;
563
564 lockdep_assert_not_held(&kvm->lock);
565 mutex_lock(&kvm->lock);
566 p = kvm->arch.pv.set_aside;
567 kvm->arch.pv.set_aside = NULL;
568 mutex_unlock(&kvm->lock);
569 if (!p)
570 return -EINVAL;
571
572 /* When a fatal signal is received, stop immediately */
573 if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
574 goto done;
575 if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
576 ret = -EIO;
577 kfree(p);
578 p = NULL;
579 done:
580 /*
581 * p is not NULL if we aborted because of a fatal signal, in which
582 * case queue the leftover for later cleanup.
583 */
584 if (p) {
585 mutex_lock(&kvm->lock);
586 list_add(&p->list, &kvm->arch.pv.need_cleanup);
587 mutex_unlock(&kvm->lock);
588 /* Did not finish, but pretend things went well */
589 *rc = UVC_RC_EXECUTED;
590 *rrc = 42;
591 }
592 return ret;
593 }
594
kvm_s390_pv_mmu_notifier_release(struct mmu_notifier * subscription,struct mm_struct * mm)595 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
596 struct mm_struct *mm)
597 {
598 struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
599 u16 dummy;
600 int r;
601
602 /*
603 * No locking is needed since this is the last thread of the last user of this
604 * struct mm.
605 * When the struct kvm gets deinitialized, this notifier is also
606 * unregistered. This means that if this notifier runs, then the
607 * struct kvm is still valid.
608 */
609 r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
610 if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
611 kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
612 }
613
614 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
615 .release = kvm_s390_pv_mmu_notifier_release,
616 };
617
kvm_s390_pv_init_vm(struct kvm * kvm,u16 * rc,u16 * rrc)618 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
619 {
620 struct uv_cb_cgc uvcb = {
621 .header.cmd = UVC_CMD_CREATE_SEC_CONF,
622 .header.len = sizeof(uvcb)
623 };
624 int cc, ret;
625 u16 dummy;
626
627 /* Add the notifier only once. No races because we hold kvm->lock */
628 if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
629 /* The notifier will be unregistered when the VM is destroyed */
630 kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
631 ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
632 if (ret) {
633 kvm->arch.pv.mmu_notifier.ops = NULL;
634 return ret;
635 }
636 }
637
638 ret = kvm_s390_pv_alloc_vm(kvm);
639 if (ret)
640 return ret;
641
642 /* Inputs */
643 uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
644 uvcb.guest_stor_len = kvm->arch.pv.guest_len;
645 uvcb.guest_asce = kvm->arch.gmap->asce;
646 uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
647 uvcb.conf_base_stor_origin =
648 virt_to_phys((void *)kvm->arch.pv.stor_base);
649 uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
650 uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
651 uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
652
653 cc = uv_call_sched(0, (u64)&uvcb);
654 *rc = uvcb.header.rc;
655 *rrc = uvcb.header.rrc;
656 KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
657 uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
658
659 /* Outputs */
660 kvm->arch.pv.handle = uvcb.guest_handle;
661
662 atomic_inc(&kvm->mm->context.protected_count);
663 if (cc) {
664 if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
665 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
666 } else {
667 atomic_dec(&kvm->mm->context.protected_count);
668 kvm_s390_pv_dealloc_vm(kvm);
669 }
670 return -EIO;
671 }
672 kvm->arch.gmap->guest_handle = uvcb.guest_handle;
673 return 0;
674 }
675
kvm_s390_pv_set_sec_parms(struct kvm * kvm,void * hdr,u64 length,u16 * rc,u16 * rrc)676 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
677 u16 *rrc)
678 {
679 struct uv_cb_ssc uvcb = {
680 .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
681 .header.len = sizeof(uvcb),
682 .sec_header_origin = (u64)hdr,
683 .sec_header_len = length,
684 .guest_handle = kvm_s390_pv_get_handle(kvm),
685 };
686 int cc = uv_call(0, (u64)&uvcb);
687
688 *rc = uvcb.header.rc;
689 *rrc = uvcb.header.rrc;
690 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
691 *rc, *rrc);
692 return cc ? -EINVAL : 0;
693 }
694
unpack_one(struct kvm * kvm,unsigned long addr,u64 tweak,u64 offset,u16 * rc,u16 * rrc)695 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
696 u64 offset, u16 *rc, u16 *rrc)
697 {
698 struct uv_cb_unp uvcb = {
699 .header.cmd = UVC_CMD_UNPACK_IMG,
700 .header.len = sizeof(uvcb),
701 .guest_handle = kvm_s390_pv_get_handle(kvm),
702 .gaddr = addr,
703 .tweak[0] = tweak,
704 .tweak[1] = offset,
705 };
706 int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
707 unsigned long vmaddr;
708 bool unlocked;
709
710 *rc = uvcb.header.rc;
711 *rrc = uvcb.header.rrc;
712
713 if (ret == -ENXIO) {
714 mmap_read_lock(kvm->mm);
715 vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
716 if (kvm_is_error_hva(vmaddr)) {
717 ret = -EFAULT;
718 } else {
719 ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
720 if (!ret)
721 ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
722 }
723 mmap_read_unlock(kvm->mm);
724 if (!ret)
725 return -EAGAIN;
726 return ret;
727 }
728
729 if (ret && ret != -EAGAIN)
730 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
731 uvcb.gaddr, *rc, *rrc);
732 return ret;
733 }
734
kvm_s390_pv_unpack(struct kvm * kvm,unsigned long addr,unsigned long size,unsigned long tweak,u16 * rc,u16 * rrc)735 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
736 unsigned long tweak, u16 *rc, u16 *rrc)
737 {
738 u64 offset = 0;
739 int ret = 0;
740
741 if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
742 return -EINVAL;
743
744 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
745 addr, size);
746
747 guard(srcu)(&kvm->srcu);
748
749 while (offset < size) {
750 ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
751 if (ret == -EAGAIN) {
752 cond_resched();
753 if (fatal_signal_pending(current))
754 break;
755 continue;
756 }
757 if (ret)
758 break;
759 addr += PAGE_SIZE;
760 offset += PAGE_SIZE;
761 }
762 if (!ret)
763 KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
764 return ret;
765 }
766
kvm_s390_pv_set_cpu_state(struct kvm_vcpu * vcpu,u8 state)767 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
768 {
769 struct uv_cb_cpu_set_state uvcb = {
770 .header.cmd = UVC_CMD_CPU_SET_STATE,
771 .header.len = sizeof(uvcb),
772 .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
773 .state = state,
774 };
775 int cc;
776
777 cc = uv_call(0, (u64)&uvcb);
778 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
779 vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
780 if (cc)
781 return -EINVAL;
782 return 0;
783 }
784
kvm_s390_pv_dump_cpu(struct kvm_vcpu * vcpu,void * buff,u16 * rc,u16 * rrc)785 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
786 {
787 struct uv_cb_dump_cpu uvcb = {
788 .header.cmd = UVC_CMD_DUMP_CPU,
789 .header.len = sizeof(uvcb),
790 .cpu_handle = vcpu->arch.pv.handle,
791 .dump_area_origin = (u64)buff,
792 };
793 int cc;
794
795 cc = uv_call_sched(0, (u64)&uvcb);
796 *rc = uvcb.header.rc;
797 *rrc = uvcb.header.rrc;
798 return cc;
799 }
800
801 /* Size of the cache for the storage state dump data. 1MB for now */
802 #define DUMP_BUFF_LEN HPAGE_SIZE
803
804 /**
805 * kvm_s390_pv_dump_stor_state
806 *
807 * @kvm: pointer to the guest's KVM struct
808 * @buff_user: Userspace pointer where we will write the results to
809 * @gaddr: Starting absolute guest address for which the storage state
810 * is requested.
811 * @buff_user_len: Length of the buff_user buffer
812 * @rc: Pointer to where the uvcb return code is stored
813 * @rrc: Pointer to where the uvcb return reason code is stored
814 *
815 * Stores buff_len bytes of tweak component values to buff_user
816 * starting with the 1MB block specified by the absolute guest address
817 * (gaddr). The gaddr pointer will be updated with the last address
818 * for which data was written when returning to userspace. buff_user
819 * might be written to even if an error rc is returned. For instance
820 * if we encounter a fault after writing the first page of data.
821 *
822 * Context: kvm->lock needs to be held
823 *
824 * Return:
825 * 0 on success
826 * -ENOMEM if allocating the cache fails
827 * -EINVAL if gaddr is not aligned to 1MB
828 * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
829 * -EINVAL if the UV call fails, rc and rrc will be set in this case
830 * -EFAULT if copying the result to buff_user failed
831 */
kvm_s390_pv_dump_stor_state(struct kvm * kvm,void __user * buff_user,u64 * gaddr,u64 buff_user_len,u16 * rc,u16 * rrc)832 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
833 u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
834 {
835 struct uv_cb_dump_stor_state uvcb = {
836 .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
837 .header.len = sizeof(uvcb),
838 .config_handle = kvm->arch.pv.handle,
839 .gaddr = *gaddr,
840 .dump_area_origin = 0,
841 };
842 const u64 increment_len = uv_info.conf_dump_storage_state_len;
843 size_t buff_kvm_size;
844 size_t size_done = 0;
845 u8 *buff_kvm = NULL;
846 int cc, ret;
847
848 ret = -EINVAL;
849 /* UV call processes 1MB guest storage chunks at a time */
850 if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
851 goto out;
852
853 /*
854 * We provide the storage state for 1MB chunks of guest
855 * storage. The buffer will need to be aligned to
856 * conf_dump_storage_state_len so we don't end on a partial
857 * chunk.
858 */
859 if (!buff_user_len ||
860 !IS_ALIGNED(buff_user_len, increment_len))
861 goto out;
862
863 /*
864 * Allocate a buffer from which we will later copy to the user
865 * process. We don't want userspace to dictate our buffer size
866 * so we limit it to DUMP_BUFF_LEN.
867 */
868 ret = -ENOMEM;
869 buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
870 buff_kvm = vzalloc(buff_kvm_size);
871 if (!buff_kvm)
872 goto out;
873
874 ret = 0;
875 uvcb.dump_area_origin = (u64)buff_kvm;
876 /* We will loop until the user buffer is filled or an error occurs */
877 do {
878 /* Get 1MB worth of guest storage state data */
879 cc = uv_call_sched(0, (u64)&uvcb);
880
881 /* All or nothing */
882 if (cc) {
883 ret = -EINVAL;
884 break;
885 }
886
887 size_done += increment_len;
888 uvcb.dump_area_origin += increment_len;
889 buff_user_len -= increment_len;
890 uvcb.gaddr += HPAGE_SIZE;
891
892 /* KVM Buffer full, time to copy to the process */
893 if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
894 if (copy_to_user(buff_user, buff_kvm, size_done)) {
895 ret = -EFAULT;
896 break;
897 }
898
899 buff_user += size_done;
900 size_done = 0;
901 uvcb.dump_area_origin = (u64)buff_kvm;
902 }
903 } while (buff_user_len);
904
905 /* Report back where we ended dumping */
906 *gaddr = uvcb.gaddr;
907
908 /* Lets only log errors, we don't want to spam */
909 out:
910 if (ret)
911 KVM_UV_EVENT(kvm, 3,
912 "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
913 uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
914 *rc = uvcb.header.rc;
915 *rrc = uvcb.header.rrc;
916 vfree(buff_kvm);
917
918 return ret;
919 }
920
921 /**
922 * kvm_s390_pv_dump_complete
923 *
924 * @kvm: pointer to the guest's KVM struct
925 * @buff_user: Userspace pointer where we will write the results to
926 * @rc: Pointer to where the uvcb return code is stored
927 * @rrc: Pointer to where the uvcb return reason code is stored
928 *
929 * Completes the dumping operation and writes the completion data to
930 * user space.
931 *
932 * Context: kvm->lock needs to be held
933 *
934 * Return:
935 * 0 on success
936 * -ENOMEM if allocating the completion buffer fails
937 * -EINVAL if the UV call fails, rc and rrc will be set in this case
938 * -EFAULT if copying the result to buff_user failed
939 */
kvm_s390_pv_dump_complete(struct kvm * kvm,void __user * buff_user,u16 * rc,u16 * rrc)940 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
941 u16 *rc, u16 *rrc)
942 {
943 struct uv_cb_dump_complete complete = {
944 .header.len = sizeof(complete),
945 .header.cmd = UVC_CMD_DUMP_COMPLETE,
946 .config_handle = kvm_s390_pv_get_handle(kvm),
947 };
948 u64 *compl_data;
949 int ret;
950
951 /* Allocate dump area */
952 compl_data = vzalloc(uv_info.conf_dump_finalize_len);
953 if (!compl_data)
954 return -ENOMEM;
955 complete.dump_area_origin = (u64)compl_data;
956
957 ret = uv_call_sched(0, (u64)&complete);
958 *rc = complete.header.rc;
959 *rrc = complete.header.rrc;
960 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
961 complete.header.rc, complete.header.rrc);
962
963 if (!ret) {
964 /*
965 * kvm_s390_pv_dealloc_vm() will also (mem)set
966 * this to false on a reboot or other destroy
967 * operation for this vm.
968 */
969 kvm->arch.pv.dumping = false;
970 kvm_s390_vcpu_unblock_all(kvm);
971 ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
972 if (ret)
973 ret = -EFAULT;
974 }
975 vfree(compl_data);
976 /* If the UVC returned an error, translate it to -EINVAL */
977 if (ret > 0)
978 ret = -EINVAL;
979 return ret;
980 }
981