xref: /linux/arch/s390/kvm/pv.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hosting Protected Virtual Machines
4  *
5  * Copyright IBM Corp. 2019, 2020
6  *    Author(s): Janosch Frank <frankja@linux.ibm.com>
7  */
8 
9 #include <linux/export.h>
10 #include <linux/kvm.h>
11 #include <linux/kvm_host.h>
12 #include <linux/minmax.h>
13 #include <linux/pagemap.h>
14 #include <linux/sched/signal.h>
15 #include <asm/uv.h>
16 #include <asm/mman.h>
17 #include <linux/pagewalk.h>
18 #include <linux/sched/mm.h>
19 #include <linux/mmu_notifier.h>
20 #include "kvm-s390.h"
21 #include "dat.h"
22 #include "gaccess.h"
23 #include "gmap.h"
24 #include "faultin.h"
25 
kvm_s390_pv_is_protected(struct kvm * kvm)26 bool kvm_s390_pv_is_protected(struct kvm *kvm)
27 {
28 	lockdep_assert_held(&kvm->lock);
29 	return !!kvm_s390_pv_get_handle(kvm);
30 }
31 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
32 
kvm_s390_pv_cpu_is_protected(struct kvm_vcpu * vcpu)33 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
34 {
35 	lockdep_assert_held(&vcpu->mutex);
36 	return !!kvm_s390_pv_cpu_get_handle(vcpu);
37 }
38 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
39 
40 /**
41  * should_export_before_import() - Determine whether an export is needed
42  * before an import-like operation.
43  * @uvcb: The Ultravisor control block of the UVC to be performed.
44  * @mm: The mm of the process.
45  *
46  * Returns whether an export is needed before every import-like operation.
47  * This is needed for shared pages, which don't trigger a secure storage
48  * exception when accessed from a different guest.
49  *
50  * Although considered as one, the Unpin Page UVC is not an actual import,
51  * so it is not affected.
52  *
53  * No export is needed also when there is only one protected VM, because the
54  * page cannot belong to the wrong VM in that case (there is no "other VM"
55  * it can belong to).
56  *
57  * Return: %true if an export is needed before every import, otherwise %false.
58  */
should_export_before_import(struct uv_cb_header * uvcb,struct mm_struct * mm)59 static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
60 {
61 	/*
62 	 * The misc feature indicates, among other things, that importing a
63 	 * shared page from a different protected VM will automatically also
64 	 * transfer its ownership.
65 	 */
66 	if (uv_has_feature(BIT_UV_FEAT_MISC))
67 		return false;
68 	if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
69 		return false;
70 	return atomic_read(&mm->context.protected_count) > 1;
71 }
72 
73 struct pv_make_secure {
74 	void *uvcb;
75 	struct folio *folio;
76 	int rc;
77 	bool needs_export;
78 };
79 
__kvm_s390_pv_make_secure(struct guest_fault * f,struct folio * folio)80 static int __kvm_s390_pv_make_secure(struct guest_fault *f, struct folio *folio)
81 {
82 	struct pv_make_secure *priv = f->priv;
83 	int rc;
84 
85 	if (priv->needs_export)
86 		uv_convert_from_secure(folio_to_phys(folio));
87 
88 	if (folio_test_hugetlb(folio))
89 		return -EFAULT;
90 	if (folio_test_large(folio))
91 		return -E2BIG;
92 
93 	if (!f->page)
94 		folio_get(folio);
95 	rc = __make_folio_secure(folio, priv->uvcb);
96 	if (!f->page)
97 		folio_put(folio);
98 
99 	return rc;
100 }
101 
_kvm_s390_pv_make_secure(struct guest_fault * f)102 static void _kvm_s390_pv_make_secure(struct guest_fault *f)
103 {
104 	struct pv_make_secure *priv = f->priv;
105 	struct folio *folio;
106 
107 	folio = pfn_folio(f->pfn);
108 	priv->rc = -EAGAIN;
109 	if (folio_trylock(folio)) {
110 		priv->rc = __kvm_s390_pv_make_secure(f, folio);
111 		if (priv->rc == -E2BIG || priv->rc == -EBUSY) {
112 			priv->folio = folio;
113 			folio_get(folio);
114 		}
115 		folio_unlock(folio);
116 	}
117 }
118 
119 /**
120  * kvm_s390_pv_make_secure() - make one guest page secure
121  * @kvm: the guest
122  * @gaddr: the guest address that needs to be made secure
123  * @uvcb: the UVCB specifying which operation needs to be performed
124  *
125  * Context: needs to be called with kvm->srcu held.
126  * Return: 0 on success, < 0 in case of error.
127  */
kvm_s390_pv_make_secure(struct kvm * kvm,unsigned long gaddr,void * uvcb)128 int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
129 {
130 	struct pv_make_secure priv = { .uvcb = uvcb };
131 	struct guest_fault f = {
132 		.write_attempt = true,
133 		.gfn = gpa_to_gfn(gaddr),
134 		.callback = _kvm_s390_pv_make_secure,
135 		.priv = &priv,
136 	};
137 	int rc;
138 
139 	lockdep_assert_held(&kvm->srcu);
140 
141 	priv.needs_export = should_export_before_import(uvcb, kvm->mm);
142 
143 	scoped_guard(mutex, &kvm->arch.pv.import_lock) {
144 		rc = kvm_s390_faultin_gfn(NULL, kvm, &f);
145 
146 		if (!rc) {
147 			rc = priv.rc;
148 			if (priv.folio) {
149 				rc = s390_wiggle_split_folio(kvm->mm, priv.folio);
150 				if (!rc)
151 					rc = -EAGAIN;
152 			}
153 		}
154 	}
155 	if (priv.folio)
156 		folio_put(priv.folio);
157 	return rc;
158 }
159 
kvm_s390_pv_convert_to_secure(struct kvm * kvm,unsigned long gaddr)160 int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
161 {
162 	struct uv_cb_cts uvcb = {
163 		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
164 		.header.len = sizeof(uvcb),
165 		.guest_handle = kvm_s390_pv_get_handle(kvm),
166 		.gaddr = gaddr,
167 	};
168 
169 	return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
170 }
171 
172 /**
173  * kvm_s390_pv_destroy_page() - Destroy a guest page.
174  * @kvm: the guest
175  * @gaddr: the guest address to destroy
176  *
177  * An attempt will be made to destroy the given guest page. If the attempt
178  * fails, an attempt is made to export the page. If both attempts fail, an
179  * appropriate error is returned.
180  *
181  * Context: may sleep.
182  */
kvm_s390_pv_destroy_page(struct kvm * kvm,unsigned long gaddr)183 int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
184 {
185 	struct page *page;
186 	int rc = 0;
187 
188 	mmap_read_lock(kvm->mm);
189 	page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
190 	if (page)
191 		rc = __kvm_s390_pv_destroy_page(page);
192 	kvm_release_page_clean(page);
193 	mmap_read_unlock(kvm->mm);
194 	return rc;
195 }
196 
197 /**
198  * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
199  * be destroyed
200  *
201  * @list: list head for the list of leftover VMs
202  * @old_gmap_table: the gmap table of the leftover protected VM
203  * @handle: the handle of the leftover protected VM
204  * @stor_var: pointer to the variable storage of the leftover protected VM
205  * @stor_base: address of the base storage of the leftover protected VM
206  *
207  * Represents a protected VM that is still registered with the Ultravisor,
208  * but which does not correspond any longer to an active KVM VM. It should
209  * be destroyed at some point later, either asynchronously or when the
210  * process terminates.
211  */
212 struct pv_vm_to_be_destroyed {
213 	struct list_head list;
214 	unsigned long old_gmap_table;
215 	u64 handle;
216 	void *stor_var;
217 	unsigned long stor_base;
218 };
219 
kvm_s390_clear_pv_state(struct kvm * kvm)220 static void kvm_s390_clear_pv_state(struct kvm *kvm)
221 {
222 	kvm->arch.pv.handle = 0;
223 	kvm->arch.pv.guest_len = 0;
224 	kvm->arch.pv.stor_base = 0;
225 	kvm->arch.pv.stor_var = NULL;
226 }
227 
kvm_s390_pv_destroy_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)228 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
229 {
230 	int cc;
231 
232 	if (!kvm_s390_pv_cpu_get_handle(vcpu))
233 		return 0;
234 
235 	cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
236 
237 	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
238 		     vcpu->vcpu_id, *rc, *rrc);
239 	WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
240 
241 	/* Intended memory leak for something that should never happen. */
242 	if (!cc)
243 		free_pages(vcpu->arch.pv.stor_base,
244 			   get_order(uv_info.guest_cpu_stor_len));
245 
246 	free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
247 	vcpu->arch.sie_block->pv_handle_cpu = 0;
248 	vcpu->arch.sie_block->pv_handle_config = 0;
249 	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
250 	vcpu->arch.sie_block->sdf = 0;
251 	/*
252 	 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
253 	 * Use the reset value of gbea to avoid leaking the kernel pointer of
254 	 * the just freed sida.
255 	 */
256 	vcpu->arch.sie_block->gbea = 1;
257 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
258 
259 	return cc ? EIO : 0;
260 }
261 
kvm_s390_pv_create_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)262 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
263 {
264 	struct uv_cb_csc uvcb = {
265 		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
266 		.header.len = sizeof(uvcb),
267 	};
268 	void *sida_addr;
269 	int cc;
270 
271 	if (kvm_s390_pv_cpu_get_handle(vcpu))
272 		return -EINVAL;
273 
274 	vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
275 						   get_order(uv_info.guest_cpu_stor_len));
276 	if (!vcpu->arch.pv.stor_base)
277 		return -ENOMEM;
278 
279 	/* Input */
280 	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
281 	uvcb.num = vcpu->arch.sie_block->icpua;
282 	uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
283 	uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
284 
285 	/* Alloc Secure Instruction Data Area Designation */
286 	sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
287 	if (!sida_addr) {
288 		free_pages(vcpu->arch.pv.stor_base,
289 			   get_order(uv_info.guest_cpu_stor_len));
290 		return -ENOMEM;
291 	}
292 	vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
293 
294 	cc = uv_call(0, (u64)&uvcb);
295 	*rc = uvcb.header.rc;
296 	*rrc = uvcb.header.rrc;
297 	KVM_UV_EVENT(vcpu->kvm, 3,
298 		     "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
299 		     vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
300 		     uvcb.header.rrc);
301 
302 	if (cc) {
303 		u16 dummy;
304 
305 		kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
306 		return -EIO;
307 	}
308 
309 	/* Output */
310 	vcpu->arch.pv.handle = uvcb.cpu_handle;
311 	vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
312 	vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
313 	vcpu->arch.sie_block->sdf = 2;
314 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
315 	return 0;
316 }
317 
318 /* only free resources when the destroy was successful */
kvm_s390_pv_dealloc_vm(struct kvm * kvm)319 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
320 {
321 	vfree(kvm->arch.pv.stor_var);
322 	free_pages(kvm->arch.pv.stor_base,
323 		   get_order(uv_info.guest_base_stor_len));
324 	kvm_s390_clear_pv_state(kvm);
325 }
326 
kvm_s390_pv_alloc_vm(struct kvm * kvm)327 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
328 {
329 	unsigned long base = uv_info.guest_base_stor_len;
330 	unsigned long virt = uv_info.guest_virt_var_stor_len;
331 	unsigned long npages = 0, vlen = 0;
332 
333 	kvm->arch.pv.stor_var = NULL;
334 	kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
335 	if (!kvm->arch.pv.stor_base)
336 		return -ENOMEM;
337 
338 	/*
339 	 * Calculate current guest storage for allocation of the
340 	 * variable storage, which is based on the length in MB.
341 	 *
342 	 * Slots are sorted by GFN
343 	 */
344 	mutex_lock(&kvm->slots_lock);
345 	npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
346 	mutex_unlock(&kvm->slots_lock);
347 
348 	kvm->arch.pv.guest_len = npages * PAGE_SIZE;
349 
350 	/* Allocate variable storage */
351 	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
352 	vlen += uv_info.guest_virt_base_stor_len;
353 	kvm->arch.pv.stor_var = vzalloc(vlen);
354 	if (!kvm->arch.pv.stor_var)
355 		goto out_err;
356 	return 0;
357 
358 out_err:
359 	kvm_s390_pv_dealloc_vm(kvm);
360 	return -ENOMEM;
361 }
362 
363 /**
364  * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
365  * @kvm: the KVM that was associated with this leftover protected VM
366  * @leftover: details about the leftover protected VM that needs a clean up
367  * @rc: the RC code of the Destroy Secure Configuration UVC
368  * @rrc: the RRC code of the Destroy Secure Configuration UVC
369  *
370  * Destroy one leftover protected VM.
371  * On success, kvm->mm->context.protected_count will be decremented atomically
372  * and all other resources used by the VM will be freed.
373  *
374  * Return: 0 in case of success, otherwise 1
375  */
kvm_s390_pv_dispose_one_leftover(struct kvm * kvm,struct pv_vm_to_be_destroyed * leftover,u16 * rc,u16 * rrc)376 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
377 					    struct pv_vm_to_be_destroyed *leftover,
378 					    u16 *rc, u16 *rrc)
379 {
380 	int cc;
381 
382 	/* It used the destroy-fast UVC, nothing left to do here */
383 	if (!leftover->handle)
384 		goto done_fast;
385 	cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
386 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
387 	WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
388 	if (cc)
389 		return cc;
390 	/*
391 	 * Intentionally leak unusable memory. If the UVC fails, the memory
392 	 * used for the VM and its metadata is permanently unusable.
393 	 * This can only happen in case of a serious KVM or hardware bug; it
394 	 * is not expected to happen in normal operation.
395 	 */
396 	free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
397 	free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
398 	vfree(leftover->stor_var);
399 done_fast:
400 	atomic_dec(&kvm->mm->context.protected_count);
401 	return 0;
402 }
403 
kvm_s390_pv_deinit_vm_fast(struct kvm * kvm,u16 * rc,u16 * rrc)404 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
405 {
406 	struct uv_cb_destroy_fast uvcb = {
407 		.header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
408 		.header.len = sizeof(uvcb),
409 		.handle = kvm_s390_pv_get_handle(kvm),
410 	};
411 	int cc;
412 
413 	cc = uv_call_sched(0, (u64)&uvcb);
414 	if (rc)
415 		*rc = uvcb.header.rc;
416 	if (rrc)
417 		*rrc = uvcb.header.rrc;
418 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
419 		     uvcb.header.rc, uvcb.header.rrc);
420 	WARN_ONCE(cc && uvcb.header.rc != 0x104,
421 		  "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
422 		  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
423 	/* Intended memory leak on "impossible" error */
424 	if (!cc)
425 		kvm_s390_pv_dealloc_vm(kvm);
426 	return cc ? -EIO : 0;
427 }
428 
is_destroy_fast_available(void)429 static inline bool is_destroy_fast_available(void)
430 {
431 	return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
432 }
433 
434 /**
435  * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
436  * @kvm: the VM
437  * @rc: return value for the RC field of the UVCB
438  * @rrc: return value for the RRC field of the UVCB
439  *
440  * Set aside the protected VM for a subsequent teardown. The VM will be able
441  * to continue immediately as a non-secure VM, and the information needed to
442  * properly tear down the protected VM is set aside. If another protected VM
443  * was already set aside without starting its teardown, this function will
444  * fail.
445  * The CPUs of the protected VM need to be destroyed beforehand.
446  *
447  * Context: kvm->lock needs to be held
448  *
449  * Return: 0 in case of success, -EINVAL if another protected VM was already set
450  * aside, -ENOMEM if the system ran out of memory.
451  */
kvm_s390_pv_set_aside(struct kvm * kvm,u16 * rc,u16 * rrc)452 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
453 {
454 	struct pv_vm_to_be_destroyed *priv;
455 	int res = 0;
456 
457 	lockdep_assert_held(&kvm->lock);
458 	/*
459 	 * If another protected VM was already prepared for teardown, refuse.
460 	 * A normal deinitialization has to be performed instead.
461 	 */
462 	if (kvm->arch.pv.set_aside)
463 		return -EINVAL;
464 
465 	/* Guest with segment type ASCE, refuse to destroy asynchronously */
466 	if (kvm->arch.gmap->asce.dt == TABLE_TYPE_SEGMENT)
467 		return -EINVAL;
468 
469 	priv = kzalloc_obj(*priv);
470 	if (!priv)
471 		return -ENOMEM;
472 
473 	if (is_destroy_fast_available()) {
474 		res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
475 	} else {
476 		priv->stor_var = kvm->arch.pv.stor_var;
477 		priv->stor_base = kvm->arch.pv.stor_base;
478 		priv->handle = kvm_s390_pv_get_handle(kvm);
479 		priv->old_gmap_table = (unsigned long)dereference_asce(kvm->arch.gmap->asce);
480 		if (s390_replace_asce(kvm->arch.gmap))
481 			res = -ENOMEM;
482 	}
483 
484 	if (res) {
485 		kfree(priv);
486 		return res;
487 	}
488 
489 	gmap_pv_destroy_range(kvm->arch.gmap, 0, gpa_to_gfn(SZ_2G), false);
490 	kvm_s390_clear_pv_state(kvm);
491 	kvm->arch.pv.set_aside = priv;
492 
493 	*rc = UVC_RC_EXECUTED;
494 	*rrc = 42;
495 	return 0;
496 }
497 
498 /**
499  * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
500  * @kvm: the KVM whose protected VM needs to be deinitialized
501  * @rc: the RC code of the UVC
502  * @rrc: the RRC code of the UVC
503  *
504  * Deinitialize the current protected VM. This function will destroy and
505  * cleanup the current protected VM, but it will not cleanup the guest
506  * memory. This function should only be called when the protected VM has
507  * just been created and therefore does not have any guest memory, or when
508  * the caller cleans up the guest memory separately.
509  *
510  * This function should not fail, but if it does, the donated memory must
511  * not be freed.
512  *
513  * Context: kvm->lock needs to be held
514  *
515  * Return: 0 in case of success, otherwise -EIO
516  */
kvm_s390_pv_deinit_vm(struct kvm * kvm,u16 * rc,u16 * rrc)517 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
518 {
519 	int cc;
520 
521 	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
522 			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
523 	if (!cc) {
524 		atomic_dec(&kvm->mm->context.protected_count);
525 		kvm_s390_pv_dealloc_vm(kvm);
526 	} else {
527 		/* Intended memory leak on "impossible" error */
528 		s390_replace_asce(kvm->arch.gmap);
529 	}
530 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
531 	WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
532 
533 	return cc ? -EIO : 0;
534 }
535 
536 /**
537  * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
538  * with a specific KVM.
539  * @kvm: the KVM to be cleaned up
540  * @rc: the RC code of the first failing UVC
541  * @rrc: the RRC code of the first failing UVC
542  *
543  * This function will clean up all protected VMs associated with a KVM.
544  * This includes the active one, the one prepared for deinitialization with
545  * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
546  *
547  * Context: kvm->lock needs to be held unless being called from
548  * kvm_arch_destroy_vm.
549  *
550  * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
551  */
kvm_s390_pv_deinit_cleanup_all(struct kvm * kvm,u16 * rc,u16 * rrc)552 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
553 {
554 	struct pv_vm_to_be_destroyed *cur;
555 	bool need_zap = false;
556 	u16 _rc, _rrc;
557 	int cc = 0;
558 
559 	/*
560 	 * Nothing to do if the counter was already 0. Otherwise make sure
561 	 * the counter does not reach 0 before calling s390_uv_destroy_range.
562 	 */
563 	if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
564 		return 0;
565 
566 	*rc = 1;
567 	/* If the current VM is protected, destroy it */
568 	if (kvm_s390_pv_get_handle(kvm)) {
569 		cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
570 		need_zap = true;
571 	}
572 
573 	/* If a previous protected VM was set aside, put it in the need_cleanup list */
574 	if (kvm->arch.pv.set_aside) {
575 		list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
576 		kvm->arch.pv.set_aside = NULL;
577 	}
578 
579 	/* Cleanup all protected VMs in the need_cleanup list */
580 	while (!list_empty(&kvm->arch.pv.need_cleanup)) {
581 		cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
582 		need_zap = true;
583 		if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
584 			cc = 1;
585 			/*
586 			 * Only return the first error rc and rrc, so make
587 			 * sure it is not overwritten. All destroys will
588 			 * additionally be reported via KVM_UV_EVENT().
589 			 */
590 			if (*rc == UVC_RC_EXECUTED) {
591 				*rc = _rc;
592 				*rrc = _rrc;
593 			}
594 		}
595 		list_del(&cur->list);
596 		kfree(cur);
597 	}
598 
599 	/*
600 	 * If the mm still has a mapping, try to mark all its pages as
601 	 * accessible. The counter should not reach zero before this
602 	 * cleanup has been performed.
603 	 */
604 	if (need_zap && mmget_not_zero(kvm->mm)) {
605 		gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), false);
606 		mmput(kvm->mm);
607 	}
608 
609 	/* Now the counter can safely reach 0 */
610 	atomic_dec(&kvm->mm->context.protected_count);
611 	return cc ? -EIO : 0;
612 }
613 
614 /**
615  * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
616  * @kvm: the VM previously associated with the protected VM
617  * @rc: return value for the RC field of the UVCB
618  * @rrc: return value for the RRC field of the UVCB
619  *
620  * Tear down the protected VM that had been previously prepared for teardown
621  * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
622  * userspace asynchronously from a separate thread.
623  *
624  * Context: kvm->lock must not be held.
625  *
626  * Return: 0 in case of success, -EINVAL if no protected VM had been
627  * prepared for asynchronous teardowm, -EIO in case of other errors.
628  */
kvm_s390_pv_deinit_aside_vm(struct kvm * kvm,u16 * rc,u16 * rrc)629 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
630 {
631 	struct pv_vm_to_be_destroyed *p;
632 	int ret = 0;
633 
634 	lockdep_assert_not_held(&kvm->lock);
635 	mutex_lock(&kvm->lock);
636 	p = kvm->arch.pv.set_aside;
637 	kvm->arch.pv.set_aside = NULL;
638 	mutex_unlock(&kvm->lock);
639 	if (!p)
640 		return -EINVAL;
641 
642 	/* When a fatal signal is received, stop immediately */
643 	if (gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), true))
644 		goto done;
645 	if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
646 		ret = -EIO;
647 	kfree(p);
648 	p = NULL;
649 done:
650 	/*
651 	 * p is not NULL if we aborted because of a fatal signal, in which
652 	 * case queue the leftover for later cleanup.
653 	 */
654 	if (p) {
655 		mutex_lock(&kvm->lock);
656 		list_add(&p->list, &kvm->arch.pv.need_cleanup);
657 		mutex_unlock(&kvm->lock);
658 		/* Did not finish, but pretend things went well */
659 		*rc = UVC_RC_EXECUTED;
660 		*rrc = 42;
661 	}
662 	return ret;
663 }
664 
kvm_s390_pv_mmu_notifier_release(struct mmu_notifier * subscription,struct mm_struct * mm)665 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
666 					     struct mm_struct *mm)
667 {
668 	struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
669 	u16 dummy;
670 	int r;
671 
672 	/*
673 	 * No locking is needed since this is the last thread of the last user of this
674 	 * struct mm.
675 	 * When the struct kvm gets deinitialized, this notifier is also
676 	 * unregistered. This means that if this notifier runs, then the
677 	 * struct kvm is still valid.
678 	 */
679 	r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
680 	if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
681 		kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
682 	set_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &kvm->arch.gmap->flags);
683 }
684 
685 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
686 	.release = kvm_s390_pv_mmu_notifier_release,
687 };
688 
kvm_s390_pv_init_vm(struct kvm * kvm,u16 * rc,u16 * rrc)689 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
690 {
691 	struct uv_cb_cgc uvcb = {
692 		.header.cmd = UVC_CMD_CREATE_SEC_CONF,
693 		.header.len = sizeof(uvcb)
694 	};
695 	int cc, ret;
696 	u16 dummy;
697 
698 	/* Add the notifier only once. No races because we hold kvm->lock */
699 	if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
700 		/* The notifier will be unregistered when the VM is destroyed */
701 		kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
702 		ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
703 		if (ret) {
704 			kvm->arch.pv.mmu_notifier.ops = NULL;
705 			return ret;
706 		}
707 	}
708 
709 	ret = kvm_s390_pv_alloc_vm(kvm);
710 	if (ret)
711 		return ret;
712 
713 	/* Inputs */
714 	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
715 	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
716 	uvcb.guest_asce = kvm->arch.gmap->asce.val;
717 	uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
718 	uvcb.conf_base_stor_origin =
719 		virt_to_phys((void *)kvm->arch.pv.stor_base);
720 	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
721 	uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
722 	uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
723 
724 	clear_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &kvm->arch.gmap->flags);
725 	gmap_split_huge_pages(kvm->arch.gmap);
726 
727 	cc = uv_call_sched(0, (u64)&uvcb);
728 	*rc = uvcb.header.rc;
729 	*rrc = uvcb.header.rrc;
730 	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
731 		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
732 
733 	/* Outputs */
734 	kvm->arch.pv.handle = uvcb.guest_handle;
735 
736 	atomic_inc(&kvm->mm->context.protected_count);
737 	if (cc) {
738 		if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
739 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
740 		} else {
741 			atomic_dec(&kvm->mm->context.protected_count);
742 			kvm_s390_pv_dealloc_vm(kvm);
743 		}
744 		return -EIO;
745 	}
746 	return 0;
747 }
748 
kvm_s390_pv_set_sec_parms(struct kvm * kvm,void * hdr,u64 length,u16 * rc,u16 * rrc)749 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
750 			      u16 *rrc)
751 {
752 	struct uv_cb_ssc uvcb = {
753 		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
754 		.header.len = sizeof(uvcb),
755 		.sec_header_origin = (u64)hdr,
756 		.sec_header_len = length,
757 		.guest_handle = kvm_s390_pv_get_handle(kvm),
758 	};
759 	int cc = uv_call(0, (u64)&uvcb);
760 
761 	*rc = uvcb.header.rc;
762 	*rrc = uvcb.header.rrc;
763 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
764 		     *rc, *rrc);
765 	return cc ? -EINVAL : 0;
766 }
767 
unpack_one(struct kvm * kvm,unsigned long addr,u64 tweak,u64 offset,u16 * rc,u16 * rrc)768 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
769 		      u64 offset, u16 *rc, u16 *rrc)
770 {
771 	struct uv_cb_unp uvcb = {
772 		.header.cmd = UVC_CMD_UNPACK_IMG,
773 		.header.len = sizeof(uvcb),
774 		.guest_handle = kvm_s390_pv_get_handle(kvm),
775 		.gaddr = addr,
776 		.tweak[0] = tweak,
777 		.tweak[1] = offset,
778 	};
779 	int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
780 
781 	*rc = uvcb.header.rc;
782 	*rrc = uvcb.header.rrc;
783 
784 	if (ret == -ENXIO) {
785 		ret = kvm_s390_faultin_gfn_simple(NULL, kvm, gpa_to_gfn(addr), true);
786 		if (!ret)
787 			return -EAGAIN;
788 	}
789 
790 	if (ret && ret != -EAGAIN)
791 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
792 			     uvcb.gaddr, *rc, *rrc);
793 	return ret;
794 }
795 
kvm_s390_pv_unpack(struct kvm * kvm,unsigned long addr,unsigned long size,unsigned long tweak,u16 * rc,u16 * rrc)796 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
797 		       unsigned long tweak, u16 *rc, u16 *rrc)
798 {
799 	u64 offset = 0;
800 	int ret = 0;
801 
802 	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
803 		return -EINVAL;
804 
805 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
806 		     addr, size);
807 
808 	guard(srcu)(&kvm->srcu);
809 
810 	while (offset < size) {
811 		ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
812 		if (ret == -EAGAIN) {
813 			cond_resched();
814 			if (fatal_signal_pending(current))
815 				break;
816 			continue;
817 		}
818 		if (ret)
819 			break;
820 		addr += PAGE_SIZE;
821 		offset += PAGE_SIZE;
822 	}
823 	if (!ret)
824 		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
825 	return ret;
826 }
827 
kvm_s390_pv_set_cpu_state(struct kvm_vcpu * vcpu,u8 state)828 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
829 {
830 	struct uv_cb_cpu_set_state uvcb = {
831 		.header.cmd	= UVC_CMD_CPU_SET_STATE,
832 		.header.len	= sizeof(uvcb),
833 		.cpu_handle	= kvm_s390_pv_cpu_get_handle(vcpu),
834 		.state		= state,
835 	};
836 	int cc;
837 
838 	cc = uv_call(0, (u64)&uvcb);
839 	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
840 		     vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
841 	if (cc)
842 		return -EINVAL;
843 	return 0;
844 }
845 
kvm_s390_pv_dump_cpu(struct kvm_vcpu * vcpu,void * buff,u16 * rc,u16 * rrc)846 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
847 {
848 	struct uv_cb_dump_cpu uvcb = {
849 		.header.cmd = UVC_CMD_DUMP_CPU,
850 		.header.len = sizeof(uvcb),
851 		.cpu_handle = vcpu->arch.pv.handle,
852 		.dump_area_origin = (u64)buff,
853 	};
854 	int cc;
855 
856 	cc = uv_call_sched(0, (u64)&uvcb);
857 	*rc = uvcb.header.rc;
858 	*rrc = uvcb.header.rrc;
859 	return cc;
860 }
861 
862 /* Size of the cache for the storage state dump data. 1MB for now */
863 #define DUMP_BUFF_LEN HPAGE_SIZE
864 
865 /**
866  * kvm_s390_pv_dump_stor_state
867  *
868  * @kvm: pointer to the guest's KVM struct
869  * @buff_user: Userspace pointer where we will write the results to
870  * @gaddr: Starting absolute guest address for which the storage state
871  *	   is requested.
872  * @buff_user_len: Length of the buff_user buffer
873  * @rc: Pointer to where the uvcb return code is stored
874  * @rrc: Pointer to where the uvcb return reason code is stored
875  *
876  * Stores buff_len bytes of tweak component values to buff_user
877  * starting with the 1MB block specified by the absolute guest address
878  * (gaddr). The gaddr pointer will be updated with the last address
879  * for which data was written when returning to userspace. buff_user
880  * might be written to even if an error rc is returned. For instance
881  * if we encounter a fault after writing the first page of data.
882  *
883  * Context: kvm->lock needs to be held
884  *
885  * Return:
886  *  0 on success
887  *  -ENOMEM if allocating the cache fails
888  *  -EINVAL if gaddr is not aligned to 1MB
889  *  -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
890  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
891  *  -EFAULT if copying the result to buff_user failed
892  */
kvm_s390_pv_dump_stor_state(struct kvm * kvm,void __user * buff_user,u64 * gaddr,u64 buff_user_len,u16 * rc,u16 * rrc)893 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
894 				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
895 {
896 	struct uv_cb_dump_stor_state uvcb = {
897 		.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
898 		.header.len = sizeof(uvcb),
899 		.config_handle = kvm->arch.pv.handle,
900 		.gaddr = *gaddr,
901 		.dump_area_origin = 0,
902 	};
903 	const u64 increment_len = uv_info.conf_dump_storage_state_len;
904 	size_t buff_kvm_size;
905 	size_t size_done = 0;
906 	u8 *buff_kvm = NULL;
907 	int cc, ret;
908 
909 	ret = -EINVAL;
910 	/* UV call processes 1MB guest storage chunks at a time */
911 	if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
912 		goto out;
913 
914 	/*
915 	 * We provide the storage state for 1MB chunks of guest
916 	 * storage. The buffer will need to be aligned to
917 	 * conf_dump_storage_state_len so we don't end on a partial
918 	 * chunk.
919 	 */
920 	if (!buff_user_len ||
921 	    !IS_ALIGNED(buff_user_len, increment_len))
922 		goto out;
923 
924 	/*
925 	 * Allocate a buffer from which we will later copy to the user
926 	 * process. We don't want userspace to dictate our buffer size
927 	 * so we limit it to DUMP_BUFF_LEN.
928 	 */
929 	ret = -ENOMEM;
930 	buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
931 	buff_kvm = vzalloc(buff_kvm_size);
932 	if (!buff_kvm)
933 		goto out;
934 
935 	ret = 0;
936 	uvcb.dump_area_origin = (u64)buff_kvm;
937 	/* We will loop until the user buffer is filled or an error occurs */
938 	do {
939 		/* Get 1MB worth of guest storage state data */
940 		cc = uv_call_sched(0, (u64)&uvcb);
941 
942 		/* All or nothing */
943 		if (cc) {
944 			ret = -EINVAL;
945 			break;
946 		}
947 
948 		size_done += increment_len;
949 		uvcb.dump_area_origin += increment_len;
950 		buff_user_len -= increment_len;
951 		uvcb.gaddr += HPAGE_SIZE;
952 
953 		/* KVM Buffer full, time to copy to the process */
954 		if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
955 			if (copy_to_user(buff_user, buff_kvm, size_done)) {
956 				ret = -EFAULT;
957 				break;
958 			}
959 
960 			buff_user += size_done;
961 			size_done = 0;
962 			uvcb.dump_area_origin = (u64)buff_kvm;
963 		}
964 	} while (buff_user_len);
965 
966 	/* Report back where we ended dumping */
967 	*gaddr = uvcb.gaddr;
968 
969 	/* Lets only log errors, we don't want to spam */
970 out:
971 	if (ret)
972 		KVM_UV_EVENT(kvm, 3,
973 			     "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
974 			     uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
975 	*rc = uvcb.header.rc;
976 	*rrc = uvcb.header.rrc;
977 	vfree(buff_kvm);
978 
979 	return ret;
980 }
981 
982 /**
983  * kvm_s390_pv_dump_complete
984  *
985  * @kvm: pointer to the guest's KVM struct
986  * @buff_user: Userspace pointer where we will write the results to
987  * @rc: Pointer to where the uvcb return code is stored
988  * @rrc: Pointer to where the uvcb return reason code is stored
989  *
990  * Completes the dumping operation and writes the completion data to
991  * user space.
992  *
993  * Context: kvm->lock needs to be held
994  *
995  * Return:
996  *  0 on success
997  *  -ENOMEM if allocating the completion buffer fails
998  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
999  *  -EFAULT if copying the result to buff_user failed
1000  */
kvm_s390_pv_dump_complete(struct kvm * kvm,void __user * buff_user,u16 * rc,u16 * rrc)1001 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
1002 			      u16 *rc, u16 *rrc)
1003 {
1004 	struct uv_cb_dump_complete complete = {
1005 		.header.len = sizeof(complete),
1006 		.header.cmd = UVC_CMD_DUMP_COMPLETE,
1007 		.config_handle = kvm_s390_pv_get_handle(kvm),
1008 	};
1009 	u64 *compl_data;
1010 	int ret;
1011 
1012 	/* Allocate dump area */
1013 	compl_data = vzalloc(uv_info.conf_dump_finalize_len);
1014 	if (!compl_data)
1015 		return -ENOMEM;
1016 	complete.dump_area_origin = (u64)compl_data;
1017 
1018 	ret = uv_call_sched(0, (u64)&complete);
1019 	*rc = complete.header.rc;
1020 	*rrc = complete.header.rrc;
1021 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
1022 		     complete.header.rc, complete.header.rrc);
1023 
1024 	if (!ret) {
1025 		/*
1026 		 * kvm_s390_pv_dealloc_vm() will also (mem)set
1027 		 * this to false on a reboot or other destroy
1028 		 * operation for this vm.
1029 		 */
1030 		kvm->arch.pv.dumping = false;
1031 		kvm_s390_vcpu_unblock_all(kvm);
1032 		ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
1033 		if (ret)
1034 			ret = -EFAULT;
1035 	}
1036 	vfree(compl_data);
1037 	/* If the UVC returned an error, translate it to -EINVAL */
1038 	if (ret > 0)
1039 		ret = -EINVAL;
1040 	return ret;
1041 }
1042