xref: /linux/arch/s390/kvm/pv.c (revision bc46b7cbc58c4cb562b6a45a1fbc7b8e7b23df58)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hosting Protected Virtual Machines
4  *
5  * Copyright IBM Corp. 2019, 2020
6  *    Author(s): Janosch Frank <frankja@linux.ibm.com>
7  */
8 
9 #include <linux/export.h>
10 #include <linux/kvm.h>
11 #include <linux/kvm_host.h>
12 #include <linux/minmax.h>
13 #include <linux/pagemap.h>
14 #include <linux/sched/signal.h>
15 #include <asm/gmap.h>
16 #include <asm/uv.h>
17 #include <asm/mman.h>
18 #include <linux/pagewalk.h>
19 #include <linux/sched/mm.h>
20 #include <linux/mmu_notifier.h>
21 #include "kvm-s390.h"
22 
kvm_s390_pv_is_protected(struct kvm * kvm)23 bool kvm_s390_pv_is_protected(struct kvm *kvm)
24 {
25 	lockdep_assert_held(&kvm->lock);
26 	return !!kvm_s390_pv_get_handle(kvm);
27 }
28 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
29 
kvm_s390_pv_cpu_is_protected(struct kvm_vcpu * vcpu)30 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
31 {
32 	lockdep_assert_held(&vcpu->mutex);
33 	return !!kvm_s390_pv_cpu_get_handle(vcpu);
34 }
35 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
36 
37 /**
38  * kvm_s390_pv_make_secure() - make one guest page secure
39  * @kvm: the guest
40  * @gaddr: the guest address that needs to be made secure
41  * @uvcb: the UVCB specifying which operation needs to be performed
42  *
43  * Context: needs to be called with kvm->srcu held.
44  * Return: 0 on success, < 0 in case of error.
45  */
kvm_s390_pv_make_secure(struct kvm * kvm,unsigned long gaddr,void * uvcb)46 int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
47 {
48 	unsigned long vmaddr;
49 
50 	lockdep_assert_held(&kvm->srcu);
51 
52 	vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
53 	if (kvm_is_error_hva(vmaddr))
54 		return -EFAULT;
55 	return make_hva_secure(kvm->mm, vmaddr, uvcb);
56 }
57 
kvm_s390_pv_convert_to_secure(struct kvm * kvm,unsigned long gaddr)58 int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
59 {
60 	struct uv_cb_cts uvcb = {
61 		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
62 		.header.len = sizeof(uvcb),
63 		.guest_handle = kvm_s390_pv_get_handle(kvm),
64 		.gaddr = gaddr,
65 	};
66 
67 	return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
68 }
69 
70 /**
71  * kvm_s390_pv_destroy_page() - Destroy a guest page.
72  * @kvm: the guest
73  * @gaddr: the guest address to destroy
74  *
75  * An attempt will be made to destroy the given guest page. If the attempt
76  * fails, an attempt is made to export the page. If both attempts fail, an
77  * appropriate error is returned.
78  *
79  * Context: may sleep.
80  */
kvm_s390_pv_destroy_page(struct kvm * kvm,unsigned long gaddr)81 int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
82 {
83 	struct page *page;
84 	int rc = 0;
85 
86 	mmap_read_lock(kvm->mm);
87 	page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
88 	if (page)
89 		rc = __kvm_s390_pv_destroy_page(page);
90 	kvm_release_page_clean(page);
91 	mmap_read_unlock(kvm->mm);
92 	return rc;
93 }
94 
95 /**
96  * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
97  * be destroyed
98  *
99  * @list: list head for the list of leftover VMs
100  * @old_gmap_table: the gmap table of the leftover protected VM
101  * @handle: the handle of the leftover protected VM
102  * @stor_var: pointer to the variable storage of the leftover protected VM
103  * @stor_base: address of the base storage of the leftover protected VM
104  *
105  * Represents a protected VM that is still registered with the Ultravisor,
106  * but which does not correspond any longer to an active KVM VM. It should
107  * be destroyed at some point later, either asynchronously or when the
108  * process terminates.
109  */
110 struct pv_vm_to_be_destroyed {
111 	struct list_head list;
112 	unsigned long old_gmap_table;
113 	u64 handle;
114 	void *stor_var;
115 	unsigned long stor_base;
116 };
117 
kvm_s390_clear_pv_state(struct kvm * kvm)118 static void kvm_s390_clear_pv_state(struct kvm *kvm)
119 {
120 	kvm->arch.pv.handle = 0;
121 	kvm->arch.pv.guest_len = 0;
122 	kvm->arch.pv.stor_base = 0;
123 	kvm->arch.pv.stor_var = NULL;
124 }
125 
kvm_s390_pv_destroy_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)126 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
127 {
128 	int cc;
129 
130 	if (!kvm_s390_pv_cpu_get_handle(vcpu))
131 		return 0;
132 
133 	cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
134 
135 	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
136 		     vcpu->vcpu_id, *rc, *rrc);
137 	WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
138 
139 	/* Intended memory leak for something that should never happen. */
140 	if (!cc)
141 		free_pages(vcpu->arch.pv.stor_base,
142 			   get_order(uv_info.guest_cpu_stor_len));
143 
144 	free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
145 	vcpu->arch.sie_block->pv_handle_cpu = 0;
146 	vcpu->arch.sie_block->pv_handle_config = 0;
147 	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
148 	vcpu->arch.sie_block->sdf = 0;
149 	/*
150 	 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
151 	 * Use the reset value of gbea to avoid leaking the kernel pointer of
152 	 * the just freed sida.
153 	 */
154 	vcpu->arch.sie_block->gbea = 1;
155 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
156 
157 	return cc ? EIO : 0;
158 }
159 
kvm_s390_pv_create_cpu(struct kvm_vcpu * vcpu,u16 * rc,u16 * rrc)160 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
161 {
162 	struct uv_cb_csc uvcb = {
163 		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
164 		.header.len = sizeof(uvcb),
165 	};
166 	void *sida_addr;
167 	int cc;
168 
169 	if (kvm_s390_pv_cpu_get_handle(vcpu))
170 		return -EINVAL;
171 
172 	vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
173 						   get_order(uv_info.guest_cpu_stor_len));
174 	if (!vcpu->arch.pv.stor_base)
175 		return -ENOMEM;
176 
177 	/* Input */
178 	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
179 	uvcb.num = vcpu->arch.sie_block->icpua;
180 	uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
181 	uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
182 
183 	/* Alloc Secure Instruction Data Area Designation */
184 	sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
185 	if (!sida_addr) {
186 		free_pages(vcpu->arch.pv.stor_base,
187 			   get_order(uv_info.guest_cpu_stor_len));
188 		return -ENOMEM;
189 	}
190 	vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
191 
192 	cc = uv_call(0, (u64)&uvcb);
193 	*rc = uvcb.header.rc;
194 	*rrc = uvcb.header.rrc;
195 	KVM_UV_EVENT(vcpu->kvm, 3,
196 		     "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
197 		     vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
198 		     uvcb.header.rrc);
199 
200 	if (cc) {
201 		u16 dummy;
202 
203 		kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
204 		return -EIO;
205 	}
206 
207 	/* Output */
208 	vcpu->arch.pv.handle = uvcb.cpu_handle;
209 	vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
210 	vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
211 	vcpu->arch.sie_block->sdf = 2;
212 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
213 	return 0;
214 }
215 
216 /* only free resources when the destroy was successful */
kvm_s390_pv_dealloc_vm(struct kvm * kvm)217 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
218 {
219 	vfree(kvm->arch.pv.stor_var);
220 	free_pages(kvm->arch.pv.stor_base,
221 		   get_order(uv_info.guest_base_stor_len));
222 	kvm_s390_clear_pv_state(kvm);
223 }
224 
kvm_s390_pv_alloc_vm(struct kvm * kvm)225 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
226 {
227 	unsigned long base = uv_info.guest_base_stor_len;
228 	unsigned long virt = uv_info.guest_virt_var_stor_len;
229 	unsigned long npages = 0, vlen = 0;
230 
231 	kvm->arch.pv.stor_var = NULL;
232 	kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
233 	if (!kvm->arch.pv.stor_base)
234 		return -ENOMEM;
235 
236 	/*
237 	 * Calculate current guest storage for allocation of the
238 	 * variable storage, which is based on the length in MB.
239 	 *
240 	 * Slots are sorted by GFN
241 	 */
242 	mutex_lock(&kvm->slots_lock);
243 	npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
244 	mutex_unlock(&kvm->slots_lock);
245 
246 	kvm->arch.pv.guest_len = npages * PAGE_SIZE;
247 
248 	/* Allocate variable storage */
249 	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
250 	vlen += uv_info.guest_virt_base_stor_len;
251 	kvm->arch.pv.stor_var = vzalloc(vlen);
252 	if (!kvm->arch.pv.stor_var)
253 		goto out_err;
254 	return 0;
255 
256 out_err:
257 	kvm_s390_pv_dealloc_vm(kvm);
258 	return -ENOMEM;
259 }
260 
261 /**
262  * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
263  * @kvm: the KVM that was associated with this leftover protected VM
264  * @leftover: details about the leftover protected VM that needs a clean up
265  * @rc: the RC code of the Destroy Secure Configuration UVC
266  * @rrc: the RRC code of the Destroy Secure Configuration UVC
267  *
268  * Destroy one leftover protected VM.
269  * On success, kvm->mm->context.protected_count will be decremented atomically
270  * and all other resources used by the VM will be freed.
271  *
272  * Return: 0 in case of success, otherwise 1
273  */
kvm_s390_pv_dispose_one_leftover(struct kvm * kvm,struct pv_vm_to_be_destroyed * leftover,u16 * rc,u16 * rrc)274 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
275 					    struct pv_vm_to_be_destroyed *leftover,
276 					    u16 *rc, u16 *rrc)
277 {
278 	int cc;
279 
280 	/* It used the destroy-fast UVC, nothing left to do here */
281 	if (!leftover->handle)
282 		goto done_fast;
283 	cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
284 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
285 	WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
286 	if (cc)
287 		return cc;
288 	/*
289 	 * Intentionally leak unusable memory. If the UVC fails, the memory
290 	 * used for the VM and its metadata is permanently unusable.
291 	 * This can only happen in case of a serious KVM or hardware bug; it
292 	 * is not expected to happen in normal operation.
293 	 */
294 	free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
295 	free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
296 	vfree(leftover->stor_var);
297 done_fast:
298 	atomic_dec(&kvm->mm->context.protected_count);
299 	return 0;
300 }
301 
302 /**
303  * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
304  * @kvm: the VM whose memory is to be cleared.
305  *
306  * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
307  * The CPUs of the protected VM need to be destroyed beforehand.
308  */
kvm_s390_destroy_lower_2g(struct kvm * kvm)309 static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
310 {
311 	const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
312 	struct kvm_memory_slot *slot;
313 	unsigned long len;
314 	int srcu_idx;
315 
316 	srcu_idx = srcu_read_lock(&kvm->srcu);
317 
318 	/* Take the memslot containing guest absolute address 0 */
319 	slot = gfn_to_memslot(kvm, 0);
320 	/* Clear all slots or parts thereof that are below 2GB */
321 	while (slot && slot->base_gfn < pages_2g) {
322 		len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
323 		s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
324 		/* Take the next memslot */
325 		slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
326 	}
327 
328 	srcu_read_unlock(&kvm->srcu, srcu_idx);
329 }
330 
kvm_s390_pv_deinit_vm_fast(struct kvm * kvm,u16 * rc,u16 * rrc)331 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
332 {
333 	struct uv_cb_destroy_fast uvcb = {
334 		.header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
335 		.header.len = sizeof(uvcb),
336 		.handle = kvm_s390_pv_get_handle(kvm),
337 	};
338 	int cc;
339 
340 	cc = uv_call_sched(0, (u64)&uvcb);
341 	if (rc)
342 		*rc = uvcb.header.rc;
343 	if (rrc)
344 		*rrc = uvcb.header.rrc;
345 	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
346 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
347 		     uvcb.header.rc, uvcb.header.rrc);
348 	WARN_ONCE(cc && uvcb.header.rc != 0x104,
349 		  "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
350 		  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
351 	/* Intended memory leak on "impossible" error */
352 	if (!cc)
353 		kvm_s390_pv_dealloc_vm(kvm);
354 	return cc ? -EIO : 0;
355 }
356 
is_destroy_fast_available(void)357 static inline bool is_destroy_fast_available(void)
358 {
359 	return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
360 }
361 
362 /**
363  * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
364  * @kvm: the VM
365  * @rc: return value for the RC field of the UVCB
366  * @rrc: return value for the RRC field of the UVCB
367  *
368  * Set aside the protected VM for a subsequent teardown. The VM will be able
369  * to continue immediately as a non-secure VM, and the information needed to
370  * properly tear down the protected VM is set aside. If another protected VM
371  * was already set aside without starting its teardown, this function will
372  * fail.
373  * The CPUs of the protected VM need to be destroyed beforehand.
374  *
375  * Context: kvm->lock needs to be held
376  *
377  * Return: 0 in case of success, -EINVAL if another protected VM was already set
378  * aside, -ENOMEM if the system ran out of memory.
379  */
kvm_s390_pv_set_aside(struct kvm * kvm,u16 * rc,u16 * rrc)380 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
381 {
382 	struct pv_vm_to_be_destroyed *priv;
383 	int res = 0;
384 
385 	lockdep_assert_held(&kvm->lock);
386 	/*
387 	 * If another protected VM was already prepared for teardown, refuse.
388 	 * A normal deinitialization has to be performed instead.
389 	 */
390 	if (kvm->arch.pv.set_aside)
391 		return -EINVAL;
392 
393 	/* Guest with segment type ASCE, refuse to destroy asynchronously */
394 	if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
395 		return -EINVAL;
396 
397 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
398 	if (!priv)
399 		return -ENOMEM;
400 
401 	if (is_destroy_fast_available()) {
402 		res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
403 	} else {
404 		priv->stor_var = kvm->arch.pv.stor_var;
405 		priv->stor_base = kvm->arch.pv.stor_base;
406 		priv->handle = kvm_s390_pv_get_handle(kvm);
407 		priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
408 		WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
409 		if (s390_replace_asce(kvm->arch.gmap))
410 			res = -ENOMEM;
411 	}
412 
413 	if (res) {
414 		kfree(priv);
415 		return res;
416 	}
417 
418 	kvm_s390_destroy_lower_2g(kvm);
419 	kvm_s390_clear_pv_state(kvm);
420 	kvm->arch.pv.set_aside = priv;
421 
422 	*rc = UVC_RC_EXECUTED;
423 	*rrc = 42;
424 	return 0;
425 }
426 
427 /**
428  * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
429  * @kvm: the KVM whose protected VM needs to be deinitialized
430  * @rc: the RC code of the UVC
431  * @rrc: the RRC code of the UVC
432  *
433  * Deinitialize the current protected VM. This function will destroy and
434  * cleanup the current protected VM, but it will not cleanup the guest
435  * memory. This function should only be called when the protected VM has
436  * just been created and therefore does not have any guest memory, or when
437  * the caller cleans up the guest memory separately.
438  *
439  * This function should not fail, but if it does, the donated memory must
440  * not be freed.
441  *
442  * Context: kvm->lock needs to be held
443  *
444  * Return: 0 in case of success, otherwise -EIO
445  */
kvm_s390_pv_deinit_vm(struct kvm * kvm,u16 * rc,u16 * rrc)446 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
447 {
448 	int cc;
449 
450 	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
451 			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
452 	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
453 	if (!cc) {
454 		atomic_dec(&kvm->mm->context.protected_count);
455 		kvm_s390_pv_dealloc_vm(kvm);
456 	} else {
457 		/* Intended memory leak on "impossible" error */
458 		s390_replace_asce(kvm->arch.gmap);
459 	}
460 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
461 	WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
462 
463 	return cc ? -EIO : 0;
464 }
465 
466 /**
467  * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
468  * with a specific KVM.
469  * @kvm: the KVM to be cleaned up
470  * @rc: the RC code of the first failing UVC
471  * @rrc: the RRC code of the first failing UVC
472  *
473  * This function will clean up all protected VMs associated with a KVM.
474  * This includes the active one, the one prepared for deinitialization with
475  * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
476  *
477  * Context: kvm->lock needs to be held unless being called from
478  * kvm_arch_destroy_vm.
479  *
480  * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
481  */
kvm_s390_pv_deinit_cleanup_all(struct kvm * kvm,u16 * rc,u16 * rrc)482 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
483 {
484 	struct pv_vm_to_be_destroyed *cur;
485 	bool need_zap = false;
486 	u16 _rc, _rrc;
487 	int cc = 0;
488 
489 	/*
490 	 * Nothing to do if the counter was already 0. Otherwise make sure
491 	 * the counter does not reach 0 before calling s390_uv_destroy_range.
492 	 */
493 	if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
494 		return 0;
495 
496 	*rc = 1;
497 	/* If the current VM is protected, destroy it */
498 	if (kvm_s390_pv_get_handle(kvm)) {
499 		cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
500 		need_zap = true;
501 	}
502 
503 	/* If a previous protected VM was set aside, put it in the need_cleanup list */
504 	if (kvm->arch.pv.set_aside) {
505 		list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
506 		kvm->arch.pv.set_aside = NULL;
507 	}
508 
509 	/* Cleanup all protected VMs in the need_cleanup list */
510 	while (!list_empty(&kvm->arch.pv.need_cleanup)) {
511 		cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
512 		need_zap = true;
513 		if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
514 			cc = 1;
515 			/*
516 			 * Only return the first error rc and rrc, so make
517 			 * sure it is not overwritten. All destroys will
518 			 * additionally be reported via KVM_UV_EVENT().
519 			 */
520 			if (*rc == UVC_RC_EXECUTED) {
521 				*rc = _rc;
522 				*rrc = _rrc;
523 			}
524 		}
525 		list_del(&cur->list);
526 		kfree(cur);
527 	}
528 
529 	/*
530 	 * If the mm still has a mapping, try to mark all its pages as
531 	 * accessible. The counter should not reach zero before this
532 	 * cleanup has been performed.
533 	 */
534 	if (need_zap && mmget_not_zero(kvm->mm)) {
535 		s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
536 		mmput(kvm->mm);
537 	}
538 
539 	/* Now the counter can safely reach 0 */
540 	atomic_dec(&kvm->mm->context.protected_count);
541 	return cc ? -EIO : 0;
542 }
543 
544 /**
545  * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
546  * @kvm: the VM previously associated with the protected VM
547  * @rc: return value for the RC field of the UVCB
548  * @rrc: return value for the RRC field of the UVCB
549  *
550  * Tear down the protected VM that had been previously prepared for teardown
551  * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
552  * userspace asynchronously from a separate thread.
553  *
554  * Context: kvm->lock must not be held.
555  *
556  * Return: 0 in case of success, -EINVAL if no protected VM had been
557  * prepared for asynchronous teardowm, -EIO in case of other errors.
558  */
kvm_s390_pv_deinit_aside_vm(struct kvm * kvm,u16 * rc,u16 * rrc)559 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
560 {
561 	struct pv_vm_to_be_destroyed *p;
562 	int ret = 0;
563 
564 	lockdep_assert_not_held(&kvm->lock);
565 	mutex_lock(&kvm->lock);
566 	p = kvm->arch.pv.set_aside;
567 	kvm->arch.pv.set_aside = NULL;
568 	mutex_unlock(&kvm->lock);
569 	if (!p)
570 		return -EINVAL;
571 
572 	/* When a fatal signal is received, stop immediately */
573 	if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
574 		goto done;
575 	if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
576 		ret = -EIO;
577 	kfree(p);
578 	p = NULL;
579 done:
580 	/*
581 	 * p is not NULL if we aborted because of a fatal signal, in which
582 	 * case queue the leftover for later cleanup.
583 	 */
584 	if (p) {
585 		mutex_lock(&kvm->lock);
586 		list_add(&p->list, &kvm->arch.pv.need_cleanup);
587 		mutex_unlock(&kvm->lock);
588 		/* Did not finish, but pretend things went well */
589 		*rc = UVC_RC_EXECUTED;
590 		*rrc = 42;
591 	}
592 	return ret;
593 }
594 
kvm_s390_pv_mmu_notifier_release(struct mmu_notifier * subscription,struct mm_struct * mm)595 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
596 					     struct mm_struct *mm)
597 {
598 	struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
599 	u16 dummy;
600 	int r;
601 
602 	/*
603 	 * No locking is needed since this is the last thread of the last user of this
604 	 * struct mm.
605 	 * When the struct kvm gets deinitialized, this notifier is also
606 	 * unregistered. This means that if this notifier runs, then the
607 	 * struct kvm is still valid.
608 	 */
609 	r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
610 	if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
611 		kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
612 }
613 
614 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
615 	.release = kvm_s390_pv_mmu_notifier_release,
616 };
617 
kvm_s390_pv_init_vm(struct kvm * kvm,u16 * rc,u16 * rrc)618 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
619 {
620 	struct uv_cb_cgc uvcb = {
621 		.header.cmd = UVC_CMD_CREATE_SEC_CONF,
622 		.header.len = sizeof(uvcb)
623 	};
624 	int cc, ret;
625 	u16 dummy;
626 
627 	ret = kvm_s390_pv_alloc_vm(kvm);
628 	if (ret)
629 		return ret;
630 
631 	/* Inputs */
632 	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
633 	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
634 	uvcb.guest_asce = kvm->arch.gmap->asce;
635 	uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
636 	uvcb.conf_base_stor_origin =
637 		virt_to_phys((void *)kvm->arch.pv.stor_base);
638 	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
639 	uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
640 	uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
641 
642 	cc = uv_call_sched(0, (u64)&uvcb);
643 	*rc = uvcb.header.rc;
644 	*rrc = uvcb.header.rrc;
645 	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
646 		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
647 
648 	/* Outputs */
649 	kvm->arch.pv.handle = uvcb.guest_handle;
650 
651 	atomic_inc(&kvm->mm->context.protected_count);
652 	if (cc) {
653 		if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
654 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
655 		} else {
656 			atomic_dec(&kvm->mm->context.protected_count);
657 			kvm_s390_pv_dealloc_vm(kvm);
658 		}
659 		return -EIO;
660 	}
661 	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
662 	/* Add the notifier only once. No races because we hold kvm->lock */
663 	if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
664 		kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
665 		mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
666 	}
667 	return 0;
668 }
669 
kvm_s390_pv_set_sec_parms(struct kvm * kvm,void * hdr,u64 length,u16 * rc,u16 * rrc)670 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
671 			      u16 *rrc)
672 {
673 	struct uv_cb_ssc uvcb = {
674 		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
675 		.header.len = sizeof(uvcb),
676 		.sec_header_origin = (u64)hdr,
677 		.sec_header_len = length,
678 		.guest_handle = kvm_s390_pv_get_handle(kvm),
679 	};
680 	int cc = uv_call(0, (u64)&uvcb);
681 
682 	*rc = uvcb.header.rc;
683 	*rrc = uvcb.header.rrc;
684 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
685 		     *rc, *rrc);
686 	return cc ? -EINVAL : 0;
687 }
688 
unpack_one(struct kvm * kvm,unsigned long addr,u64 tweak,u64 offset,u16 * rc,u16 * rrc)689 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
690 		      u64 offset, u16 *rc, u16 *rrc)
691 {
692 	struct uv_cb_unp uvcb = {
693 		.header.cmd = UVC_CMD_UNPACK_IMG,
694 		.header.len = sizeof(uvcb),
695 		.guest_handle = kvm_s390_pv_get_handle(kvm),
696 		.gaddr = addr,
697 		.tweak[0] = tweak,
698 		.tweak[1] = offset,
699 	};
700 	int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
701 	unsigned long vmaddr;
702 	bool unlocked;
703 
704 	*rc = uvcb.header.rc;
705 	*rrc = uvcb.header.rrc;
706 
707 	if (ret == -ENXIO) {
708 		mmap_read_lock(kvm->mm);
709 		vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
710 		if (kvm_is_error_hva(vmaddr)) {
711 			ret = -EFAULT;
712 		} else {
713 			ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
714 			if (!ret)
715 				ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
716 		}
717 		mmap_read_unlock(kvm->mm);
718 		if (!ret)
719 			return -EAGAIN;
720 		return ret;
721 	}
722 
723 	if (ret && ret != -EAGAIN)
724 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
725 			     uvcb.gaddr, *rc, *rrc);
726 	return ret;
727 }
728 
kvm_s390_pv_unpack(struct kvm * kvm,unsigned long addr,unsigned long size,unsigned long tweak,u16 * rc,u16 * rrc)729 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
730 		       unsigned long tweak, u16 *rc, u16 *rrc)
731 {
732 	u64 offset = 0;
733 	int ret = 0;
734 
735 	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
736 		return -EINVAL;
737 
738 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
739 		     addr, size);
740 
741 	guard(srcu)(&kvm->srcu);
742 
743 	while (offset < size) {
744 		ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
745 		if (ret == -EAGAIN) {
746 			cond_resched();
747 			if (fatal_signal_pending(current))
748 				break;
749 			continue;
750 		}
751 		if (ret)
752 			break;
753 		addr += PAGE_SIZE;
754 		offset += PAGE_SIZE;
755 	}
756 	if (!ret)
757 		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
758 	return ret;
759 }
760 
kvm_s390_pv_set_cpu_state(struct kvm_vcpu * vcpu,u8 state)761 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
762 {
763 	struct uv_cb_cpu_set_state uvcb = {
764 		.header.cmd	= UVC_CMD_CPU_SET_STATE,
765 		.header.len	= sizeof(uvcb),
766 		.cpu_handle	= kvm_s390_pv_cpu_get_handle(vcpu),
767 		.state		= state,
768 	};
769 	int cc;
770 
771 	cc = uv_call(0, (u64)&uvcb);
772 	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
773 		     vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
774 	if (cc)
775 		return -EINVAL;
776 	return 0;
777 }
778 
kvm_s390_pv_dump_cpu(struct kvm_vcpu * vcpu,void * buff,u16 * rc,u16 * rrc)779 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
780 {
781 	struct uv_cb_dump_cpu uvcb = {
782 		.header.cmd = UVC_CMD_DUMP_CPU,
783 		.header.len = sizeof(uvcb),
784 		.cpu_handle = vcpu->arch.pv.handle,
785 		.dump_area_origin = (u64)buff,
786 	};
787 	int cc;
788 
789 	cc = uv_call_sched(0, (u64)&uvcb);
790 	*rc = uvcb.header.rc;
791 	*rrc = uvcb.header.rrc;
792 	return cc;
793 }
794 
795 /* Size of the cache for the storage state dump data. 1MB for now */
796 #define DUMP_BUFF_LEN HPAGE_SIZE
797 
798 /**
799  * kvm_s390_pv_dump_stor_state
800  *
801  * @kvm: pointer to the guest's KVM struct
802  * @buff_user: Userspace pointer where we will write the results to
803  * @gaddr: Starting absolute guest address for which the storage state
804  *	   is requested.
805  * @buff_user_len: Length of the buff_user buffer
806  * @rc: Pointer to where the uvcb return code is stored
807  * @rrc: Pointer to where the uvcb return reason code is stored
808  *
809  * Stores buff_len bytes of tweak component values to buff_user
810  * starting with the 1MB block specified by the absolute guest address
811  * (gaddr). The gaddr pointer will be updated with the last address
812  * for which data was written when returning to userspace. buff_user
813  * might be written to even if an error rc is returned. For instance
814  * if we encounter a fault after writing the first page of data.
815  *
816  * Context: kvm->lock needs to be held
817  *
818  * Return:
819  *  0 on success
820  *  -ENOMEM if allocating the cache fails
821  *  -EINVAL if gaddr is not aligned to 1MB
822  *  -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
823  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
824  *  -EFAULT if copying the result to buff_user failed
825  */
kvm_s390_pv_dump_stor_state(struct kvm * kvm,void __user * buff_user,u64 * gaddr,u64 buff_user_len,u16 * rc,u16 * rrc)826 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
827 				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
828 {
829 	struct uv_cb_dump_stor_state uvcb = {
830 		.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
831 		.header.len = sizeof(uvcb),
832 		.config_handle = kvm->arch.pv.handle,
833 		.gaddr = *gaddr,
834 		.dump_area_origin = 0,
835 	};
836 	const u64 increment_len = uv_info.conf_dump_storage_state_len;
837 	size_t buff_kvm_size;
838 	size_t size_done = 0;
839 	u8 *buff_kvm = NULL;
840 	int cc, ret;
841 
842 	ret = -EINVAL;
843 	/* UV call processes 1MB guest storage chunks at a time */
844 	if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
845 		goto out;
846 
847 	/*
848 	 * We provide the storage state for 1MB chunks of guest
849 	 * storage. The buffer will need to be aligned to
850 	 * conf_dump_storage_state_len so we don't end on a partial
851 	 * chunk.
852 	 */
853 	if (!buff_user_len ||
854 	    !IS_ALIGNED(buff_user_len, increment_len))
855 		goto out;
856 
857 	/*
858 	 * Allocate a buffer from which we will later copy to the user
859 	 * process. We don't want userspace to dictate our buffer size
860 	 * so we limit it to DUMP_BUFF_LEN.
861 	 */
862 	ret = -ENOMEM;
863 	buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
864 	buff_kvm = vzalloc(buff_kvm_size);
865 	if (!buff_kvm)
866 		goto out;
867 
868 	ret = 0;
869 	uvcb.dump_area_origin = (u64)buff_kvm;
870 	/* We will loop until the user buffer is filled or an error occurs */
871 	do {
872 		/* Get 1MB worth of guest storage state data */
873 		cc = uv_call_sched(0, (u64)&uvcb);
874 
875 		/* All or nothing */
876 		if (cc) {
877 			ret = -EINVAL;
878 			break;
879 		}
880 
881 		size_done += increment_len;
882 		uvcb.dump_area_origin += increment_len;
883 		buff_user_len -= increment_len;
884 		uvcb.gaddr += HPAGE_SIZE;
885 
886 		/* KVM Buffer full, time to copy to the process */
887 		if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
888 			if (copy_to_user(buff_user, buff_kvm, size_done)) {
889 				ret = -EFAULT;
890 				break;
891 			}
892 
893 			buff_user += size_done;
894 			size_done = 0;
895 			uvcb.dump_area_origin = (u64)buff_kvm;
896 		}
897 	} while (buff_user_len);
898 
899 	/* Report back where we ended dumping */
900 	*gaddr = uvcb.gaddr;
901 
902 	/* Lets only log errors, we don't want to spam */
903 out:
904 	if (ret)
905 		KVM_UV_EVENT(kvm, 3,
906 			     "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
907 			     uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
908 	*rc = uvcb.header.rc;
909 	*rrc = uvcb.header.rrc;
910 	vfree(buff_kvm);
911 
912 	return ret;
913 }
914 
915 /**
916  * kvm_s390_pv_dump_complete
917  *
918  * @kvm: pointer to the guest's KVM struct
919  * @buff_user: Userspace pointer where we will write the results to
920  * @rc: Pointer to where the uvcb return code is stored
921  * @rrc: Pointer to where the uvcb return reason code is stored
922  *
923  * Completes the dumping operation and writes the completion data to
924  * user space.
925  *
926  * Context: kvm->lock needs to be held
927  *
928  * Return:
929  *  0 on success
930  *  -ENOMEM if allocating the completion buffer fails
931  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
932  *  -EFAULT if copying the result to buff_user failed
933  */
kvm_s390_pv_dump_complete(struct kvm * kvm,void __user * buff_user,u16 * rc,u16 * rrc)934 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
935 			      u16 *rc, u16 *rrc)
936 {
937 	struct uv_cb_dump_complete complete = {
938 		.header.len = sizeof(complete),
939 		.header.cmd = UVC_CMD_DUMP_COMPLETE,
940 		.config_handle = kvm_s390_pv_get_handle(kvm),
941 	};
942 	u64 *compl_data;
943 	int ret;
944 
945 	/* Allocate dump area */
946 	compl_data = vzalloc(uv_info.conf_dump_finalize_len);
947 	if (!compl_data)
948 		return -ENOMEM;
949 	complete.dump_area_origin = (u64)compl_data;
950 
951 	ret = uv_call_sched(0, (u64)&complete);
952 	*rc = complete.header.rc;
953 	*rrc = complete.header.rrc;
954 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
955 		     complete.header.rc, complete.header.rrc);
956 
957 	if (!ret) {
958 		/*
959 		 * kvm_s390_pv_dealloc_vm() will also (mem)set
960 		 * this to false on a reboot or other destroy
961 		 * operation for this vm.
962 		 */
963 		kvm->arch.pv.dumping = false;
964 		kvm_s390_vcpu_unblock_all(kvm);
965 		ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
966 		if (ret)
967 			ret = -EFAULT;
968 	}
969 	vfree(compl_data);
970 	/* If the UVC returned an error, translate it to -EINVAL */
971 	if (ret > 0)
972 		ret = -EINVAL;
973 	return ret;
974 }
975