1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Hosting Protected Virtual Machines
4 *
5 * Copyright IBM Corp. 2019, 2020
6 * Author(s): Janosch Frank <frankja@linux.ibm.com>
7 */
8
9 #include <linux/export.h>
10 #include <linux/kvm.h>
11 #include <linux/kvm_host.h>
12 #include <linux/minmax.h>
13 #include <linux/pagemap.h>
14 #include <linux/sched/signal.h>
15 #include <asm/uv.h>
16 #include <asm/mman.h>
17 #include <linux/pagewalk.h>
18 #include <linux/sched/mm.h>
19 #include <linux/mmu_notifier.h>
20 #include "kvm-s390.h"
21 #include "dat.h"
22 #include "gaccess.h"
23 #include "gmap.h"
24 #include "faultin.h"
25
kvm_s390_pv_is_protected(struct kvm * kvm)26 bool kvm_s390_pv_is_protected(struct kvm *kvm)
27 {
28 lockdep_assert_held(&kvm->lock);
29 return !!kvm_s390_pv_get_handle(kvm);
30 }
31 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
32
kvm_s390_pv_cpu_is_protected(struct kvm_vcpu * vcpu)33 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
34 {
35 lockdep_assert_held(&vcpu->mutex);
36 return !!kvm_s390_pv_cpu_get_handle(vcpu);
37 }
38 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
39
40 /**
41 * should_export_before_import() - Determine whether an export is needed
42 * before an import-like operation.
43 * @uvcb: The Ultravisor control block of the UVC to be performed.
44 * @mm: The mm of the process.
45 *
46 * Returns whether an export is needed before every import-like operation.
47 * This is needed for shared pages, which don't trigger a secure storage
48 * exception when accessed from a different guest.
49 *
50 * Although considered as one, the Unpin Page UVC is not an actual import,
51 * so it is not affected.
52 *
53 * No export is needed also when there is only one protected VM, because the
54 * page cannot belong to the wrong VM in that case (there is no "other VM"
55 * it can belong to).
56 *
57 * Return: %true if an export is needed before every import, otherwise %false.
58 */
should_export_before_import(struct uv_cb_header * uvcb,struct mm_struct * mm)59 static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
60 {
61 /*
62 * The misc feature indicates, among other things, that importing a
63 * shared page from a different protected VM will automatically also
64 * transfer its ownership.
65 */
66 if (uv_has_feature(BIT_UV_FEAT_MISC))
67 return false;
68 if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
69 return false;
70 return atomic_read(&mm->context.protected_count) > 1;
71 }
72
73 struct pv_make_secure {
74 void *uvcb;
75 struct folio *folio;
76 int rc;
77 bool needs_export;
78 };
79
__kvm_s390_pv_make_secure(struct guest_fault * f,struct folio * folio)80 static int __kvm_s390_pv_make_secure(struct guest_fault *f, struct folio *folio)
81 {
82 struct pv_make_secure *priv = f->priv;
83 int rc;
84
85 if (priv->needs_export)
86 uv_convert_from_secure(folio_to_phys(folio));
87
88 if (folio_test_hugetlb(folio))
89 return -EFAULT;
90 if (folio_test_large(folio))
91 return -E2BIG;
92
93 if (!f->page)
94 folio_get(folio);
95 rc = __make_folio_secure(folio, priv->uvcb);
96 if (!f->page)
97 folio_put(folio);
98
99 return rc;
100 }
101
_kvm_s390_pv_make_secure(struct guest_fault * f)102 static void _kvm_s390_pv_make_secure(struct guest_fault *f)
103 {
104 struct pv_make_secure *priv = f->priv;
105 struct folio *folio;
106
107 folio = pfn_folio(f->pfn);
108 priv->rc = -EAGAIN;
109 if (folio_trylock(folio)) {
110 priv->rc = __kvm_s390_pv_make_secure(f, folio);
111 if (priv->rc == -E2BIG || priv->rc == -EBUSY) {
112 priv->folio = folio;
113 folio_get(folio);
114 }
115 folio_unlock(folio);
116 }
117 }
118
119 /**
120 * kvm_s390_pv_make_secure() - make one guest page secure
121 * @kvm: the guest
122 * @gaddr: the guest address that needs to be made secure
123 * @uvcb: the UVCB specifying which operation needs to be performed
124 *
125 * Context: needs to be called with kvm->srcu held.
126 * Return: 0 on success, < 0 in case of error.
127 */
kvm_s390_pv_make_secure(struct kvm * kvm,unsigned long gaddr,void * uvcb)128 int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
129 {
130 struct pv_make_secure priv = { .uvcb = uvcb };
131 struct guest_fault f = {
132 .write_attempt = true,
133 .gfn = gpa_to_gfn(gaddr),
134 .callback = _kvm_s390_pv_make_secure,
135 .priv = &priv,
136 };
137 int rc;
138
139 lockdep_assert_held(&kvm->srcu);
140
141 priv.needs_export = should_export_before_import(uvcb, kvm->mm);
142
143 scoped_guard(mutex, &kvm->arch.pv.import_lock) {
144 rc = kvm_s390_faultin_gfn(NULL, kvm, &f);
145
146 if (!rc) {
147 rc = priv.rc;
148 if (priv.folio) {
149 rc = s390_wiggle_split_folio(kvm->mm, priv.folio);
150 if (!rc)
151 rc = -EAGAIN;
152 }
153 }
154 }
155 if (priv.folio)
156 folio_put(priv.folio);
157 return rc;
158 }
159
kvm_s390_pv_convert_to_secure(struct kvm * kvm,unsigned long gaddr)160 int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
161 {
162 struct uv_cb_cts uvcb = {
163 .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
164 .header.len = sizeof(uvcb),
165 .guest_handle = kvm_s390_pv_get_handle(kvm),
166 .gaddr = gaddr,
167 };
168
169 return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
170 }
171
172 /**
173 * kvm_s390_pv_destroy_page() - Destroy a guest page.
174 * @kvm: the guest
175 * @gaddr: the guest address to destroy
176 *
177 * An attempt will be made to destroy the given guest page. If the attempt
178 * fails, an attempt is made to export the page. If both attempts fail, an
179 * appropriate error is returned.
180 *
181 * Context: may sleep.
182 */
kvm_s390_pv_destroy_page(struct kvm * kvm,unsigned long gaddr)183 int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
184 {
185 struct page *page;
186 int rc = 0;
187
188 mmap_read_lock(kvm->mm);
189 page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
190 if (page)
191 rc = __kvm_s390_pv_destroy_page(page);
192 kvm_release_page_clean(page);
193 mmap_read_unlock(kvm->mm);
194 return rc;
195 }
196
197 /**
198 * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
199 * be destroyed
200 *
201 * @list: list head for the list of leftover VMs
202 * @old_gmap_table: the gmap table of the leftover protected VM
203 * @handle: the handle of the leftover protected VM
204 * @stor_var: pointer to the variable storage of the leftover protected VM
205 * @stor_base: address of the base storage of the leftover protected VM
206 *
207 * Represents a protected VM that is still registered with the Ultravisor,
208 * but which does not correspond any longer to an active KVM VM. It should
209 * be destroyed at some point later, either asynchronously or when the
210 * process terminates.
211 */
212 struct pv_vm_to_be_destroyed {
213 struct list_head list;
214 unsigned long old_gmap_table;
215 u64 handle;
216 void *stor_var;
217 unsigned long stor_base;
218 };
219
kvm_s390_clear_pv_state(struct kvm * kvm)220 static void kvm_s390_clear_pv_state(struct kvm *kvm)
221 {
222 kvm->arch.pv.handle = 0;
223 kvm->arch.pv.guest_len = 0;
224 kvm->arch.pv.stor_base = 0;
225 kvm->arch.pv.stor_var = NULL;
226 }
227
kvm_s390_pv_destroy_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)228 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
229 {
230 int cc;
231
232 if (!kvm_s390_pv_cpu_get_handle(vcpu))
233 return 0;
234
235 cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
236
237 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
238 vcpu->vcpu_id, *rc, *rrc);
239 WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
240
241 /* Intended memory leak for something that should never happen. */
242 if (!cc)
243 free_pages(vcpu->arch.pv.stor_base,
244 get_order(uv_info.guest_cpu_stor_len));
245
246 free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
247 vcpu->arch.sie_block->pv_handle_cpu = 0;
248 vcpu->arch.sie_block->pv_handle_config = 0;
249 memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
250 vcpu->arch.sie_block->sdf = 0;
251 /*
252 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
253 * Use the reset value of gbea to avoid leaking the kernel pointer of
254 * the just freed sida.
255 */
256 vcpu->arch.sie_block->gbea = 1;
257 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
258
259 return cc ? EIO : 0;
260 }
261
kvm_s390_pv_create_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)262 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
263 {
264 struct uv_cb_csc uvcb = {
265 .header.cmd = UVC_CMD_CREATE_SEC_CPU,
266 .header.len = sizeof(uvcb),
267 };
268 void *sida_addr;
269 int cc;
270
271 if (kvm_s390_pv_cpu_get_handle(vcpu))
272 return -EINVAL;
273
274 vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
275 get_order(uv_info.guest_cpu_stor_len));
276 if (!vcpu->arch.pv.stor_base)
277 return -ENOMEM;
278
279 /* Input */
280 uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
281 uvcb.num = vcpu->arch.sie_block->icpua;
282 uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
283 uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
284
285 /* Alloc Secure Instruction Data Area Designation */
286 sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
287 if (!sida_addr) {
288 free_pages(vcpu->arch.pv.stor_base,
289 get_order(uv_info.guest_cpu_stor_len));
290 return -ENOMEM;
291 }
292 vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
293
294 cc = uv_call(0, (u64)&uvcb);
295 *rc = uvcb.header.rc;
296 *rrc = uvcb.header.rrc;
297 KVM_UV_EVENT(vcpu->kvm, 3,
298 "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
299 vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
300 uvcb.header.rrc);
301
302 if (cc) {
303 u16 dummy;
304
305 kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
306 return -EIO;
307 }
308
309 /* Output */
310 vcpu->arch.pv.handle = uvcb.cpu_handle;
311 vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
312 vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
313 vcpu->arch.sie_block->sdf = 2;
314 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
315 return 0;
316 }
317
318 /* only free resources when the destroy was successful */
kvm_s390_pv_dealloc_vm(struct kvm * kvm)319 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
320 {
321 vfree(kvm->arch.pv.stor_var);
322 free_pages(kvm->arch.pv.stor_base,
323 get_order(uv_info.guest_base_stor_len));
324 kvm_s390_clear_pv_state(kvm);
325 }
326
kvm_s390_pv_alloc_vm(struct kvm * kvm)327 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
328 {
329 unsigned long base = uv_info.guest_base_stor_len;
330 unsigned long virt = uv_info.guest_virt_var_stor_len;
331 unsigned long npages = 0, vlen = 0;
332
333 kvm->arch.pv.stor_var = NULL;
334 kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
335 if (!kvm->arch.pv.stor_base)
336 return -ENOMEM;
337
338 /*
339 * Calculate current guest storage for allocation of the
340 * variable storage, which is based on the length in MB.
341 *
342 * Slots are sorted by GFN
343 */
344 mutex_lock(&kvm->slots_lock);
345 npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
346 mutex_unlock(&kvm->slots_lock);
347
348 kvm->arch.pv.guest_len = npages * PAGE_SIZE;
349
350 /* Allocate variable storage */
351 vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
352 vlen += uv_info.guest_virt_base_stor_len;
353 kvm->arch.pv.stor_var = vzalloc(vlen);
354 if (!kvm->arch.pv.stor_var)
355 goto out_err;
356 return 0;
357
358 out_err:
359 kvm_s390_pv_dealloc_vm(kvm);
360 return -ENOMEM;
361 }
362
363 /**
364 * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
365 * @kvm: the KVM that was associated with this leftover protected VM
366 * @leftover: details about the leftover protected VM that needs a clean up
367 * @rc: the RC code of the Destroy Secure Configuration UVC
368 * @rrc: the RRC code of the Destroy Secure Configuration UVC
369 *
370 * Destroy one leftover protected VM.
371 * On success, kvm->mm->context.protected_count will be decremented atomically
372 * and all other resources used by the VM will be freed.
373 *
374 * Return: 0 in case of success, otherwise 1
375 */
kvm_s390_pv_dispose_one_leftover(struct kvm * kvm,struct pv_vm_to_be_destroyed * leftover,u16 * rc,u16 * rrc)376 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
377 struct pv_vm_to_be_destroyed *leftover,
378 u16 *rc, u16 *rrc)
379 {
380 int cc;
381
382 /* It used the destroy-fast UVC, nothing left to do here */
383 if (!leftover->handle)
384 goto done_fast;
385 cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
386 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
387 WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
388 if (cc)
389 return cc;
390 /*
391 * Intentionally leak unusable memory. If the UVC fails, the memory
392 * used for the VM and its metadata is permanently unusable.
393 * This can only happen in case of a serious KVM or hardware bug; it
394 * is not expected to happen in normal operation.
395 */
396 free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
397 free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
398 vfree(leftover->stor_var);
399 done_fast:
400 atomic_dec(&kvm->mm->context.protected_count);
401 return 0;
402 }
403
kvm_s390_pv_deinit_vm_fast(struct kvm * kvm,u16 * rc,u16 * rrc)404 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
405 {
406 struct uv_cb_destroy_fast uvcb = {
407 .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
408 .header.len = sizeof(uvcb),
409 .handle = kvm_s390_pv_get_handle(kvm),
410 };
411 int cc;
412
413 cc = uv_call_sched(0, (u64)&uvcb);
414 if (rc)
415 *rc = uvcb.header.rc;
416 if (rrc)
417 *rrc = uvcb.header.rrc;
418 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
419 uvcb.header.rc, uvcb.header.rrc);
420 WARN_ONCE(cc && uvcb.header.rc != 0x104,
421 "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
422 kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
423 /* Intended memory leak on "impossible" error */
424 if (!cc)
425 kvm_s390_pv_dealloc_vm(kvm);
426 return cc ? -EIO : 0;
427 }
428
is_destroy_fast_available(void)429 static inline bool is_destroy_fast_available(void)
430 {
431 return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
432 }
433
434 /**
435 * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
436 * @kvm: the VM
437 * @rc: return value for the RC field of the UVCB
438 * @rrc: return value for the RRC field of the UVCB
439 *
440 * Set aside the protected VM for a subsequent teardown. The VM will be able
441 * to continue immediately as a non-secure VM, and the information needed to
442 * properly tear down the protected VM is set aside. If another protected VM
443 * was already set aside without starting its teardown, this function will
444 * fail.
445 * The CPUs of the protected VM need to be destroyed beforehand.
446 *
447 * Context: kvm->lock needs to be held
448 *
449 * Return: 0 in case of success, -EINVAL if another protected VM was already set
450 * aside, -ENOMEM if the system ran out of memory.
451 */
kvm_s390_pv_set_aside(struct kvm * kvm,u16 * rc,u16 * rrc)452 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
453 {
454 struct pv_vm_to_be_destroyed *priv;
455 int res = 0;
456
457 lockdep_assert_held(&kvm->lock);
458 /*
459 * If another protected VM was already prepared for teardown, refuse.
460 * A normal deinitialization has to be performed instead.
461 */
462 if (kvm->arch.pv.set_aside)
463 return -EINVAL;
464
465 /* Guest with segment type ASCE, refuse to destroy asynchronously */
466 if (kvm->arch.gmap->asce.dt == TABLE_TYPE_SEGMENT)
467 return -EINVAL;
468
469 priv = kzalloc_obj(*priv);
470 if (!priv)
471 return -ENOMEM;
472
473 if (is_destroy_fast_available()) {
474 res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
475 } else {
476 priv->stor_var = kvm->arch.pv.stor_var;
477 priv->stor_base = kvm->arch.pv.stor_base;
478 priv->handle = kvm_s390_pv_get_handle(kvm);
479 priv->old_gmap_table = (unsigned long)dereference_asce(kvm->arch.gmap->asce);
480 if (s390_replace_asce(kvm->arch.gmap))
481 res = -ENOMEM;
482 }
483
484 if (res) {
485 kfree(priv);
486 return res;
487 }
488
489 gmap_pv_destroy_range(kvm->arch.gmap, 0, gpa_to_gfn(SZ_2G), false);
490 kvm_s390_clear_pv_state(kvm);
491 kvm->arch.pv.set_aside = priv;
492
493 *rc = UVC_RC_EXECUTED;
494 *rrc = 42;
495 return 0;
496 }
497
498 /**
499 * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
500 * @kvm: the KVM whose protected VM needs to be deinitialized
501 * @rc: the RC code of the UVC
502 * @rrc: the RRC code of the UVC
503 *
504 * Deinitialize the current protected VM. This function will destroy and
505 * cleanup the current protected VM, but it will not cleanup the guest
506 * memory. This function should only be called when the protected VM has
507 * just been created and therefore does not have any guest memory, or when
508 * the caller cleans up the guest memory separately.
509 *
510 * This function should not fail, but if it does, the donated memory must
511 * not be freed.
512 *
513 * Context: kvm->lock needs to be held
514 *
515 * Return: 0 in case of success, otherwise -EIO
516 */
kvm_s390_pv_deinit_vm(struct kvm * kvm,u16 * rc,u16 * rrc)517 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
518 {
519 int cc;
520
521 cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
522 UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
523 if (!cc) {
524 atomic_dec(&kvm->mm->context.protected_count);
525 kvm_s390_pv_dealloc_vm(kvm);
526 } else {
527 /* Intended memory leak on "impossible" error */
528 s390_replace_asce(kvm->arch.gmap);
529 }
530 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
531 WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
532
533 return cc ? -EIO : 0;
534 }
535
536 /**
537 * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
538 * with a specific KVM.
539 * @kvm: the KVM to be cleaned up
540 * @rc: the RC code of the first failing UVC
541 * @rrc: the RRC code of the first failing UVC
542 *
543 * This function will clean up all protected VMs associated with a KVM.
544 * This includes the active one, the one prepared for deinitialization with
545 * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
546 *
547 * Context: kvm->lock needs to be held unless being called from
548 * kvm_arch_destroy_vm.
549 *
550 * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
551 */
kvm_s390_pv_deinit_cleanup_all(struct kvm * kvm,u16 * rc,u16 * rrc)552 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
553 {
554 struct pv_vm_to_be_destroyed *cur;
555 bool need_zap = false;
556 u16 _rc, _rrc;
557 int cc = 0;
558
559 /*
560 * Nothing to do if the counter was already 0. Otherwise make sure
561 * the counter does not reach 0 before calling s390_uv_destroy_range.
562 */
563 if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
564 return 0;
565
566 *rc = 1;
567 /* If the current VM is protected, destroy it */
568 if (kvm_s390_pv_get_handle(kvm)) {
569 cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
570 need_zap = true;
571 }
572
573 /* If a previous protected VM was set aside, put it in the need_cleanup list */
574 if (kvm->arch.pv.set_aside) {
575 list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
576 kvm->arch.pv.set_aside = NULL;
577 }
578
579 /* Cleanup all protected VMs in the need_cleanup list */
580 while (!list_empty(&kvm->arch.pv.need_cleanup)) {
581 cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
582 need_zap = true;
583 if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
584 cc = 1;
585 /*
586 * Only return the first error rc and rrc, so make
587 * sure it is not overwritten. All destroys will
588 * additionally be reported via KVM_UV_EVENT().
589 */
590 if (*rc == UVC_RC_EXECUTED) {
591 *rc = _rc;
592 *rrc = _rrc;
593 }
594 }
595 list_del(&cur->list);
596 kfree(cur);
597 }
598
599 /*
600 * If the mm still has a mapping, try to mark all its pages as
601 * accessible. The counter should not reach zero before this
602 * cleanup has been performed.
603 */
604 if (need_zap && mmget_not_zero(kvm->mm)) {
605 gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), false);
606 mmput(kvm->mm);
607 }
608
609 /* Now the counter can safely reach 0 */
610 atomic_dec(&kvm->mm->context.protected_count);
611 return cc ? -EIO : 0;
612 }
613
614 /**
615 * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
616 * @kvm: the VM previously associated with the protected VM
617 * @rc: return value for the RC field of the UVCB
618 * @rrc: return value for the RRC field of the UVCB
619 *
620 * Tear down the protected VM that had been previously prepared for teardown
621 * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
622 * userspace asynchronously from a separate thread.
623 *
624 * Context: kvm->lock must not be held.
625 *
626 * Return: 0 in case of success, -EINVAL if no protected VM had been
627 * prepared for asynchronous teardowm, -EIO in case of other errors.
628 */
kvm_s390_pv_deinit_aside_vm(struct kvm * kvm,u16 * rc,u16 * rrc)629 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
630 {
631 struct pv_vm_to_be_destroyed *p;
632 int ret = 0;
633
634 lockdep_assert_not_held(&kvm->lock);
635 mutex_lock(&kvm->lock);
636 p = kvm->arch.pv.set_aside;
637 kvm->arch.pv.set_aside = NULL;
638 mutex_unlock(&kvm->lock);
639 if (!p)
640 return -EINVAL;
641
642 /* When a fatal signal is received, stop immediately */
643 if (gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), true))
644 goto done;
645 if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
646 ret = -EIO;
647 kfree(p);
648 p = NULL;
649 done:
650 /*
651 * p is not NULL if we aborted because of a fatal signal, in which
652 * case queue the leftover for later cleanup.
653 */
654 if (p) {
655 mutex_lock(&kvm->lock);
656 list_add(&p->list, &kvm->arch.pv.need_cleanup);
657 mutex_unlock(&kvm->lock);
658 /* Did not finish, but pretend things went well */
659 *rc = UVC_RC_EXECUTED;
660 *rrc = 42;
661 }
662 return ret;
663 }
664
kvm_s390_pv_mmu_notifier_release(struct mmu_notifier * subscription,struct mm_struct * mm)665 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
666 struct mm_struct *mm)
667 {
668 struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
669 u16 dummy;
670 int r;
671
672 /*
673 * No locking is needed since this is the last thread of the last user of this
674 * struct mm.
675 * When the struct kvm gets deinitialized, this notifier is also
676 * unregistered. This means that if this notifier runs, then the
677 * struct kvm is still valid.
678 */
679 r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
680 if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
681 kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
682 set_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &kvm->arch.gmap->flags);
683 }
684
685 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
686 .release = kvm_s390_pv_mmu_notifier_release,
687 };
688
kvm_s390_pv_init_vm(struct kvm * kvm,u16 * rc,u16 * rrc)689 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
690 {
691 struct uv_cb_cgc uvcb = {
692 .header.cmd = UVC_CMD_CREATE_SEC_CONF,
693 .header.len = sizeof(uvcb)
694 };
695 int cc, ret;
696 u16 dummy;
697
698 /* Add the notifier only once. No races because we hold kvm->lock */
699 if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
700 /* The notifier will be unregistered when the VM is destroyed */
701 kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
702 ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
703 if (ret) {
704 kvm->arch.pv.mmu_notifier.ops = NULL;
705 return ret;
706 }
707 }
708
709 ret = kvm_s390_pv_alloc_vm(kvm);
710 if (ret)
711 return ret;
712
713 /* Inputs */
714 uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
715 uvcb.guest_stor_len = kvm->arch.pv.guest_len;
716 uvcb.guest_asce = kvm->arch.gmap->asce.val;
717 uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
718 uvcb.conf_base_stor_origin =
719 virt_to_phys((void *)kvm->arch.pv.stor_base);
720 uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
721 uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
722 uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
723
724 clear_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &kvm->arch.gmap->flags);
725 gmap_split_huge_pages(kvm->arch.gmap);
726
727 cc = uv_call_sched(0, (u64)&uvcb);
728 *rc = uvcb.header.rc;
729 *rrc = uvcb.header.rrc;
730 KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
731 uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
732
733 /* Outputs */
734 kvm->arch.pv.handle = uvcb.guest_handle;
735
736 atomic_inc(&kvm->mm->context.protected_count);
737 if (cc) {
738 if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
739 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
740 } else {
741 atomic_dec(&kvm->mm->context.protected_count);
742 kvm_s390_pv_dealloc_vm(kvm);
743 }
744 return -EIO;
745 }
746 return 0;
747 }
748
kvm_s390_pv_set_sec_parms(struct kvm * kvm,void * hdr,u64 length,u16 * rc,u16 * rrc)749 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
750 u16 *rrc)
751 {
752 struct uv_cb_ssc uvcb = {
753 .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
754 .header.len = sizeof(uvcb),
755 .sec_header_origin = (u64)hdr,
756 .sec_header_len = length,
757 .guest_handle = kvm_s390_pv_get_handle(kvm),
758 };
759 int cc = uv_call(0, (u64)&uvcb);
760
761 *rc = uvcb.header.rc;
762 *rrc = uvcb.header.rrc;
763 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
764 *rc, *rrc);
765 return cc ? -EINVAL : 0;
766 }
767
unpack_one(struct kvm * kvm,unsigned long addr,u64 tweak,u64 offset,u16 * rc,u16 * rrc)768 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
769 u64 offset, u16 *rc, u16 *rrc)
770 {
771 struct uv_cb_unp uvcb = {
772 .header.cmd = UVC_CMD_UNPACK_IMG,
773 .header.len = sizeof(uvcb),
774 .guest_handle = kvm_s390_pv_get_handle(kvm),
775 .gaddr = addr,
776 .tweak[0] = tweak,
777 .tweak[1] = offset,
778 };
779 int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
780
781 *rc = uvcb.header.rc;
782 *rrc = uvcb.header.rrc;
783
784 if (ret == -ENXIO) {
785 ret = kvm_s390_faultin_gfn_simple(NULL, kvm, gpa_to_gfn(addr), true);
786 if (!ret)
787 return -EAGAIN;
788 }
789
790 if (ret && ret != -EAGAIN)
791 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
792 uvcb.gaddr, *rc, *rrc);
793 return ret;
794 }
795
kvm_s390_pv_unpack(struct kvm * kvm,unsigned long addr,unsigned long size,unsigned long tweak,u16 * rc,u16 * rrc)796 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
797 unsigned long tweak, u16 *rc, u16 *rrc)
798 {
799 u64 offset = 0;
800 int ret = 0;
801
802 if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
803 return -EINVAL;
804
805 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
806 addr, size);
807
808 guard(srcu)(&kvm->srcu);
809
810 while (offset < size) {
811 ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
812 if (ret == -EAGAIN) {
813 cond_resched();
814 if (fatal_signal_pending(current))
815 break;
816 continue;
817 }
818 if (ret)
819 break;
820 addr += PAGE_SIZE;
821 offset += PAGE_SIZE;
822 }
823 if (!ret)
824 KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
825 return ret;
826 }
827
kvm_s390_pv_set_cpu_state(struct kvm_vcpu * vcpu,u8 state)828 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
829 {
830 struct uv_cb_cpu_set_state uvcb = {
831 .header.cmd = UVC_CMD_CPU_SET_STATE,
832 .header.len = sizeof(uvcb),
833 .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
834 .state = state,
835 };
836 int cc;
837
838 cc = uv_call(0, (u64)&uvcb);
839 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
840 vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
841 if (cc)
842 return -EINVAL;
843 return 0;
844 }
845
kvm_s390_pv_dump_cpu(struct kvm_vcpu * vcpu,void * buff,u16 * rc,u16 * rrc)846 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
847 {
848 struct uv_cb_dump_cpu uvcb = {
849 .header.cmd = UVC_CMD_DUMP_CPU,
850 .header.len = sizeof(uvcb),
851 .cpu_handle = vcpu->arch.pv.handle,
852 .dump_area_origin = (u64)buff,
853 };
854 int cc;
855
856 cc = uv_call_sched(0, (u64)&uvcb);
857 *rc = uvcb.header.rc;
858 *rrc = uvcb.header.rrc;
859 return cc;
860 }
861
862 /* Size of the cache for the storage state dump data. 1MB for now */
863 #define DUMP_BUFF_LEN HPAGE_SIZE
864
865 /**
866 * kvm_s390_pv_dump_stor_state
867 *
868 * @kvm: pointer to the guest's KVM struct
869 * @buff_user: Userspace pointer where we will write the results to
870 * @gaddr: Starting absolute guest address for which the storage state
871 * is requested.
872 * @buff_user_len: Length of the buff_user buffer
873 * @rc: Pointer to where the uvcb return code is stored
874 * @rrc: Pointer to where the uvcb return reason code is stored
875 *
876 * Stores buff_len bytes of tweak component values to buff_user
877 * starting with the 1MB block specified by the absolute guest address
878 * (gaddr). The gaddr pointer will be updated with the last address
879 * for which data was written when returning to userspace. buff_user
880 * might be written to even if an error rc is returned. For instance
881 * if we encounter a fault after writing the first page of data.
882 *
883 * Context: kvm->lock needs to be held
884 *
885 * Return:
886 * 0 on success
887 * -ENOMEM if allocating the cache fails
888 * -EINVAL if gaddr is not aligned to 1MB
889 * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
890 * -EINVAL if the UV call fails, rc and rrc will be set in this case
891 * -EFAULT if copying the result to buff_user failed
892 */
kvm_s390_pv_dump_stor_state(struct kvm * kvm,void __user * buff_user,u64 * gaddr,u64 buff_user_len,u16 * rc,u16 * rrc)893 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
894 u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
895 {
896 struct uv_cb_dump_stor_state uvcb = {
897 .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
898 .header.len = sizeof(uvcb),
899 .config_handle = kvm->arch.pv.handle,
900 .gaddr = *gaddr,
901 .dump_area_origin = 0,
902 };
903 const u64 increment_len = uv_info.conf_dump_storage_state_len;
904 size_t buff_kvm_size;
905 size_t size_done = 0;
906 u8 *buff_kvm = NULL;
907 int cc, ret;
908
909 ret = -EINVAL;
910 /* UV call processes 1MB guest storage chunks at a time */
911 if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
912 goto out;
913
914 /*
915 * We provide the storage state for 1MB chunks of guest
916 * storage. The buffer will need to be aligned to
917 * conf_dump_storage_state_len so we don't end on a partial
918 * chunk.
919 */
920 if (!buff_user_len ||
921 !IS_ALIGNED(buff_user_len, increment_len))
922 goto out;
923
924 /*
925 * Allocate a buffer from which we will later copy to the user
926 * process. We don't want userspace to dictate our buffer size
927 * so we limit it to DUMP_BUFF_LEN.
928 */
929 ret = -ENOMEM;
930 buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
931 buff_kvm = vzalloc(buff_kvm_size);
932 if (!buff_kvm)
933 goto out;
934
935 ret = 0;
936 uvcb.dump_area_origin = (u64)buff_kvm;
937 /* We will loop until the user buffer is filled or an error occurs */
938 do {
939 /* Get 1MB worth of guest storage state data */
940 cc = uv_call_sched(0, (u64)&uvcb);
941
942 /* All or nothing */
943 if (cc) {
944 ret = -EINVAL;
945 break;
946 }
947
948 size_done += increment_len;
949 uvcb.dump_area_origin += increment_len;
950 buff_user_len -= increment_len;
951 uvcb.gaddr += HPAGE_SIZE;
952
953 /* KVM Buffer full, time to copy to the process */
954 if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
955 if (copy_to_user(buff_user, buff_kvm, size_done)) {
956 ret = -EFAULT;
957 break;
958 }
959
960 buff_user += size_done;
961 size_done = 0;
962 uvcb.dump_area_origin = (u64)buff_kvm;
963 }
964 } while (buff_user_len);
965
966 /* Report back where we ended dumping */
967 *gaddr = uvcb.gaddr;
968
969 /* Lets only log errors, we don't want to spam */
970 out:
971 if (ret)
972 KVM_UV_EVENT(kvm, 3,
973 "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
974 uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
975 *rc = uvcb.header.rc;
976 *rrc = uvcb.header.rrc;
977 vfree(buff_kvm);
978
979 return ret;
980 }
981
982 /**
983 * kvm_s390_pv_dump_complete
984 *
985 * @kvm: pointer to the guest's KVM struct
986 * @buff_user: Userspace pointer where we will write the results to
987 * @rc: Pointer to where the uvcb return code is stored
988 * @rrc: Pointer to where the uvcb return reason code is stored
989 *
990 * Completes the dumping operation and writes the completion data to
991 * user space.
992 *
993 * Context: kvm->lock needs to be held
994 *
995 * Return:
996 * 0 on success
997 * -ENOMEM if allocating the completion buffer fails
998 * -EINVAL if the UV call fails, rc and rrc will be set in this case
999 * -EFAULT if copying the result to buff_user failed
1000 */
kvm_s390_pv_dump_complete(struct kvm * kvm,void __user * buff_user,u16 * rc,u16 * rrc)1001 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
1002 u16 *rc, u16 *rrc)
1003 {
1004 struct uv_cb_dump_complete complete = {
1005 .header.len = sizeof(complete),
1006 .header.cmd = UVC_CMD_DUMP_COMPLETE,
1007 .config_handle = kvm_s390_pv_get_handle(kvm),
1008 };
1009 u64 *compl_data;
1010 int ret;
1011
1012 /* Allocate dump area */
1013 compl_data = vzalloc(uv_info.conf_dump_finalize_len);
1014 if (!compl_data)
1015 return -ENOMEM;
1016 complete.dump_area_origin = (u64)compl_data;
1017
1018 ret = uv_call_sched(0, (u64)&complete);
1019 *rc = complete.header.rc;
1020 *rrc = complete.header.rrc;
1021 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
1022 complete.header.rc, complete.header.rrc);
1023
1024 if (!ret) {
1025 /*
1026 * kvm_s390_pv_dealloc_vm() will also (mem)set
1027 * this to false on a reboot or other destroy
1028 * operation for this vm.
1029 */
1030 kvm->arch.pv.dumping = false;
1031 kvm_s390_vcpu_unblock_all(kvm);
1032 ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
1033 if (ret)
1034 ret = -EFAULT;
1035 }
1036 vfree(compl_data);
1037 /* If the UVC returned an error, translate it to -EINVAL */
1038 if (ret > 0)
1039 ret = -EINVAL;
1040 return ret;
1041 }
1042