xref: /linux/arch/powerpc/kvm/book3s_64_vio.c (revision b7019ac550eb3916f34d79db583e9b7ea2524afa)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *
4  * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
5  * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
6  * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
7  */
8 
9 #include <linux/types.h>
10 #include <linux/string.h>
11 #include <linux/kvm.h>
12 #include <linux/kvm_host.h>
13 #include <linux/highmem.h>
14 #include <linux/gfp.h>
15 #include <linux/slab.h>
16 #include <linux/sched/signal.h>
17 #include <linux/hugetlb.h>
18 #include <linux/list.h>
19 #include <linux/anon_inodes.h>
20 #include <linux/iommu.h>
21 #include <linux/file.h>
22 
23 #include <asm/kvm_ppc.h>
24 #include <asm/kvm_book3s.h>
25 #include <asm/book3s/64/mmu-hash.h>
26 #include <asm/hvcall.h>
27 #include <asm/synch.h>
28 #include <asm/ppc-opcode.h>
29 #include <asm/kvm_host.h>
30 #include <asm/udbg.h>
31 #include <asm/iommu.h>
32 #include <asm/tce.h>
33 #include <asm/mmu_context.h>
34 
35 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
36 {
37 	return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
38 }
39 
40 static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
41 {
42 	unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
43 			(tce_pages * sizeof(struct page *));
44 
45 	return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
46 }
47 
48 static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
49 {
50 	long ret = 0;
51 
52 	if (!current || !current->mm)
53 		return ret; /* process exited */
54 
55 	down_write(&current->mm->mmap_sem);
56 
57 	if (inc) {
58 		unsigned long locked, lock_limit;
59 
60 		locked = current->mm->locked_vm + stt_pages;
61 		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
62 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
63 			ret = -ENOMEM;
64 		else
65 			current->mm->locked_vm += stt_pages;
66 	} else {
67 		if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
68 			stt_pages = current->mm->locked_vm;
69 
70 		current->mm->locked_vm -= stt_pages;
71 	}
72 
73 	pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
74 			inc ? '+' : '-',
75 			stt_pages << PAGE_SHIFT,
76 			current->mm->locked_vm << PAGE_SHIFT,
77 			rlimit(RLIMIT_MEMLOCK),
78 			ret ? " - exceeded" : "");
79 
80 	up_write(&current->mm->mmap_sem);
81 
82 	return ret;
83 }
84 
85 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
86 {
87 	struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
88 			struct kvmppc_spapr_tce_iommu_table, rcu);
89 
90 	iommu_tce_table_put(stit->tbl);
91 
92 	kfree(stit);
93 }
94 
95 static void kvm_spapr_tce_liobn_put(struct kref *kref)
96 {
97 	struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
98 			struct kvmppc_spapr_tce_iommu_table, kref);
99 
100 	list_del_rcu(&stit->next);
101 
102 	call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
103 }
104 
105 extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
106 		struct iommu_group *grp)
107 {
108 	int i;
109 	struct kvmppc_spapr_tce_table *stt;
110 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
111 	struct iommu_table_group *table_group = NULL;
112 
113 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
114 
115 		table_group = iommu_group_get_iommudata(grp);
116 		if (WARN_ON(!table_group))
117 			continue;
118 
119 		list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
120 			for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
121 				if (table_group->tables[i] != stit->tbl)
122 					continue;
123 
124 				kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
125 			}
126 		}
127 	}
128 }
129 
130 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
131 		struct iommu_group *grp)
132 {
133 	struct kvmppc_spapr_tce_table *stt = NULL;
134 	bool found = false;
135 	struct iommu_table *tbl = NULL;
136 	struct iommu_table_group *table_group;
137 	long i;
138 	struct kvmppc_spapr_tce_iommu_table *stit;
139 	struct fd f;
140 
141 	f = fdget(tablefd);
142 	if (!f.file)
143 		return -EBADF;
144 
145 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
146 		if (stt == f.file->private_data) {
147 			found = true;
148 			break;
149 		}
150 	}
151 
152 	fdput(f);
153 
154 	if (!found)
155 		return -EINVAL;
156 
157 	table_group = iommu_group_get_iommudata(grp);
158 	if (WARN_ON(!table_group))
159 		return -EFAULT;
160 
161 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
162 		struct iommu_table *tbltmp = table_group->tables[i];
163 
164 		if (!tbltmp)
165 			continue;
166 		/* Make sure hardware table parameters are compatible */
167 		if ((tbltmp->it_page_shift <= stt->page_shift) &&
168 				(tbltmp->it_offset << tbltmp->it_page_shift ==
169 				 stt->offset << stt->page_shift) &&
170 				(tbltmp->it_size << tbltmp->it_page_shift >=
171 				 stt->size << stt->page_shift)) {
172 			/*
173 			 * Reference the table to avoid races with
174 			 * add/remove DMA windows.
175 			 */
176 			tbl = iommu_tce_table_get(tbltmp);
177 			break;
178 		}
179 	}
180 	if (!tbl)
181 		return -EINVAL;
182 
183 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
184 		if (tbl != stit->tbl)
185 			continue;
186 
187 		if (!kref_get_unless_zero(&stit->kref)) {
188 			/* stit is being destroyed */
189 			iommu_tce_table_put(tbl);
190 			return -ENOTTY;
191 		}
192 		/*
193 		 * The table is already known to this KVM, we just increased
194 		 * its KVM reference counter and can return.
195 		 */
196 		return 0;
197 	}
198 
199 	stit = kzalloc(sizeof(*stit), GFP_KERNEL);
200 	if (!stit) {
201 		iommu_tce_table_put(tbl);
202 		return -ENOMEM;
203 	}
204 
205 	stit->tbl = tbl;
206 	kref_init(&stit->kref);
207 
208 	list_add_rcu(&stit->next, &stt->iommu_tables);
209 
210 	return 0;
211 }
212 
213 static void release_spapr_tce_table(struct rcu_head *head)
214 {
215 	struct kvmppc_spapr_tce_table *stt = container_of(head,
216 			struct kvmppc_spapr_tce_table, rcu);
217 	unsigned long i, npages = kvmppc_tce_pages(stt->size);
218 
219 	for (i = 0; i < npages; i++)
220 		if (stt->pages[i])
221 			__free_page(stt->pages[i]);
222 
223 	kfree(stt);
224 }
225 
226 static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
227 		unsigned long sttpage)
228 {
229 	struct page *page = stt->pages[sttpage];
230 
231 	if (page)
232 		return page;
233 
234 	mutex_lock(&stt->alloc_lock);
235 	page = stt->pages[sttpage];
236 	if (!page) {
237 		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
238 		WARN_ON_ONCE(!page);
239 		if (page)
240 			stt->pages[sttpage] = page;
241 	}
242 	mutex_unlock(&stt->alloc_lock);
243 
244 	return page;
245 }
246 
247 static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
248 {
249 	struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
250 	struct page *page;
251 
252 	if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
253 		return VM_FAULT_SIGBUS;
254 
255 	page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
256 	if (!page)
257 		return VM_FAULT_OOM;
258 
259 	get_page(page);
260 	vmf->page = page;
261 	return 0;
262 }
263 
264 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
265 	.fault = kvm_spapr_tce_fault,
266 };
267 
268 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
269 {
270 	vma->vm_ops = &kvm_spapr_tce_vm_ops;
271 	return 0;
272 }
273 
274 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
275 {
276 	struct kvmppc_spapr_tce_table *stt = filp->private_data;
277 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
278 	struct kvm *kvm = stt->kvm;
279 
280 	mutex_lock(&kvm->lock);
281 	list_del_rcu(&stt->list);
282 	mutex_unlock(&kvm->lock);
283 
284 	list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
285 		WARN_ON(!kref_read(&stit->kref));
286 		while (1) {
287 			if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
288 				break;
289 		}
290 	}
291 
292 	kvm_put_kvm(stt->kvm);
293 
294 	kvmppc_account_memlimit(
295 		kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
296 	call_rcu(&stt->rcu, release_spapr_tce_table);
297 
298 	return 0;
299 }
300 
301 static const struct file_operations kvm_spapr_tce_fops = {
302 	.mmap           = kvm_spapr_tce_mmap,
303 	.release	= kvm_spapr_tce_release,
304 };
305 
306 long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
307 				   struct kvm_create_spapr_tce_64 *args)
308 {
309 	struct kvmppc_spapr_tce_table *stt = NULL;
310 	struct kvmppc_spapr_tce_table *siter;
311 	unsigned long npages, size = args->size;
312 	int ret = -ENOMEM;
313 
314 	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
315 		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
316 		return -EINVAL;
317 
318 	npages = kvmppc_tce_pages(size);
319 	ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
320 	if (ret)
321 		return ret;
322 
323 	ret = -ENOMEM;
324 	stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
325 		      GFP_KERNEL);
326 	if (!stt)
327 		goto fail_acct;
328 
329 	stt->liobn = args->liobn;
330 	stt->page_shift = args->page_shift;
331 	stt->offset = args->offset;
332 	stt->size = size;
333 	stt->kvm = kvm;
334 	mutex_init(&stt->alloc_lock);
335 	INIT_LIST_HEAD_RCU(&stt->iommu_tables);
336 
337 	mutex_lock(&kvm->lock);
338 
339 	/* Check this LIOBN hasn't been previously allocated */
340 	ret = 0;
341 	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
342 		if (siter->liobn == args->liobn) {
343 			ret = -EBUSY;
344 			break;
345 		}
346 	}
347 
348 	kvm_get_kvm(kvm);
349 	if (!ret)
350 		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
351 				       stt, O_RDWR | O_CLOEXEC);
352 
353 	if (ret >= 0)
354 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
355 	else
356 		kvm_put_kvm(kvm);
357 
358 	mutex_unlock(&kvm->lock);
359 
360 	if (ret >= 0)
361 		return ret;
362 
363 	kfree(stt);
364  fail_acct:
365 	kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
366 	return ret;
367 }
368 
369 static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
370 		unsigned long *ua)
371 {
372 	unsigned long gfn = tce >> PAGE_SHIFT;
373 	struct kvm_memory_slot *memslot;
374 
375 	memslot = search_memslots(kvm_memslots(kvm), gfn);
376 	if (!memslot)
377 		return -EINVAL;
378 
379 	*ua = __gfn_to_hva_memslot(memslot, gfn) |
380 		(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
381 
382 	return 0;
383 }
384 
385 static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
386 		unsigned long tce)
387 {
388 	unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
389 	enum dma_data_direction dir = iommu_tce_direction(tce);
390 	struct kvmppc_spapr_tce_iommu_table *stit;
391 	unsigned long ua = 0;
392 
393 	/* Allow userspace to poison TCE table */
394 	if (dir == DMA_NONE)
395 		return H_SUCCESS;
396 
397 	if (iommu_tce_check_gpa(stt->page_shift, gpa))
398 		return H_TOO_HARD;
399 
400 	if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
401 		return H_TOO_HARD;
402 
403 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
404 		unsigned long hpa = 0;
405 		struct mm_iommu_table_group_mem_t *mem;
406 		long shift = stit->tbl->it_page_shift;
407 
408 		mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
409 		if (!mem)
410 			return H_TOO_HARD;
411 
412 		if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
413 			return H_TOO_HARD;
414 	}
415 
416 	return H_SUCCESS;
417 }
418 
419 /*
420  * Handles TCE requests for emulated devices.
421  * Puts guest TCE values to the table and expects user space to convert them.
422  * Cannot fail so kvmppc_tce_validate must be called before it.
423  */
424 static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
425 		unsigned long idx, unsigned long tce)
426 {
427 	struct page *page;
428 	u64 *tbl;
429 	unsigned long sttpage;
430 
431 	idx -= stt->offset;
432 	sttpage = idx / TCES_PER_PAGE;
433 	page = stt->pages[sttpage];
434 
435 	if (!page) {
436 		/* We allow any TCE, not just with read|write permissions */
437 		if (!tce)
438 			return;
439 
440 		page = kvm_spapr_get_tce_page(stt, sttpage);
441 		if (!page)
442 			return;
443 	}
444 	tbl = page_to_virt(page);
445 
446 	tbl[idx % TCES_PER_PAGE] = tce;
447 }
448 
449 static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
450 		unsigned long entry)
451 {
452 	unsigned long hpa = 0;
453 	enum dma_data_direction dir = DMA_NONE;
454 
455 	iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
456 }
457 
458 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
459 		struct iommu_table *tbl, unsigned long entry)
460 {
461 	struct mm_iommu_table_group_mem_t *mem = NULL;
462 	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
463 	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
464 
465 	if (!pua)
466 		return H_SUCCESS;
467 
468 	mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
469 	if (!mem)
470 		return H_TOO_HARD;
471 
472 	mm_iommu_mapped_dec(mem);
473 
474 	*pua = cpu_to_be64(0);
475 
476 	return H_SUCCESS;
477 }
478 
479 static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
480 		struct iommu_table *tbl, unsigned long entry)
481 {
482 	enum dma_data_direction dir = DMA_NONE;
483 	unsigned long hpa = 0;
484 	long ret;
485 
486 	if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
487 		return H_TOO_HARD;
488 
489 	if (dir == DMA_NONE)
490 		return H_SUCCESS;
491 
492 	ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
493 	if (ret != H_SUCCESS)
494 		iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
495 
496 	return ret;
497 }
498 
499 static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
500 		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
501 		unsigned long entry)
502 {
503 	unsigned long i, ret = H_SUCCESS;
504 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
505 	unsigned long io_entry = entry * subpages;
506 
507 	for (i = 0; i < subpages; ++i) {
508 		ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
509 		if (ret != H_SUCCESS)
510 			break;
511 	}
512 
513 	return ret;
514 }
515 
516 long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
517 		unsigned long entry, unsigned long ua,
518 		enum dma_data_direction dir)
519 {
520 	long ret;
521 	unsigned long hpa;
522 	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
523 	struct mm_iommu_table_group_mem_t *mem;
524 
525 	if (!pua)
526 		/* it_userspace allocation might be delayed */
527 		return H_TOO_HARD;
528 
529 	mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
530 	if (!mem)
531 		/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
532 		return H_TOO_HARD;
533 
534 	if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
535 		return H_TOO_HARD;
536 
537 	if (mm_iommu_mapped_inc(mem))
538 		return H_TOO_HARD;
539 
540 	ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
541 	if (WARN_ON_ONCE(ret)) {
542 		mm_iommu_mapped_dec(mem);
543 		return H_TOO_HARD;
544 	}
545 
546 	if (dir != DMA_NONE)
547 		kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
548 
549 	*pua = cpu_to_be64(ua);
550 
551 	return 0;
552 }
553 
554 static long kvmppc_tce_iommu_map(struct kvm *kvm,
555 		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
556 		unsigned long entry, unsigned long ua,
557 		enum dma_data_direction dir)
558 {
559 	unsigned long i, pgoff, ret = H_SUCCESS;
560 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
561 	unsigned long io_entry = entry * subpages;
562 
563 	for (i = 0, pgoff = 0; i < subpages;
564 			++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
565 
566 		ret = kvmppc_tce_iommu_do_map(kvm, tbl,
567 				io_entry + i, ua + pgoff, dir);
568 		if (ret != H_SUCCESS)
569 			break;
570 	}
571 
572 	return ret;
573 }
574 
575 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
576 		      unsigned long ioba, unsigned long tce)
577 {
578 	struct kvmppc_spapr_tce_table *stt;
579 	long ret, idx;
580 	struct kvmppc_spapr_tce_iommu_table *stit;
581 	unsigned long entry, ua = 0;
582 	enum dma_data_direction dir;
583 
584 	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
585 	/* 	    liobn, ioba, tce); */
586 
587 	stt = kvmppc_find_table(vcpu->kvm, liobn);
588 	if (!stt)
589 		return H_TOO_HARD;
590 
591 	ret = kvmppc_ioba_validate(stt, ioba, 1);
592 	if (ret != H_SUCCESS)
593 		return ret;
594 
595 	idx = srcu_read_lock(&vcpu->kvm->srcu);
596 
597 	ret = kvmppc_tce_validate(stt, tce);
598 	if (ret != H_SUCCESS)
599 		goto unlock_exit;
600 
601 	dir = iommu_tce_direction(tce);
602 
603 	if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
604 		ret = H_PARAMETER;
605 		goto unlock_exit;
606 	}
607 
608 	entry = ioba >> stt->page_shift;
609 
610 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
611 		if (dir == DMA_NONE)
612 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
613 					stit->tbl, entry);
614 		else
615 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
616 					entry, ua, dir);
617 
618 		if (ret != H_SUCCESS) {
619 			kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
620 			goto unlock_exit;
621 		}
622 	}
623 
624 	kvmppc_tce_put(stt, entry, tce);
625 
626 unlock_exit:
627 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
628 
629 	return ret;
630 }
631 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
632 
633 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
634 		unsigned long liobn, unsigned long ioba,
635 		unsigned long tce_list, unsigned long npages)
636 {
637 	struct kvmppc_spapr_tce_table *stt;
638 	long i, ret = H_SUCCESS, idx;
639 	unsigned long entry, ua = 0;
640 	u64 __user *tces;
641 	u64 tce;
642 	struct kvmppc_spapr_tce_iommu_table *stit;
643 
644 	stt = kvmppc_find_table(vcpu->kvm, liobn);
645 	if (!stt)
646 		return H_TOO_HARD;
647 
648 	entry = ioba >> stt->page_shift;
649 	/*
650 	 * SPAPR spec says that the maximum size of the list is 512 TCEs
651 	 * so the whole table fits in 4K page
652 	 */
653 	if (npages > 512)
654 		return H_PARAMETER;
655 
656 	if (tce_list & (SZ_4K - 1))
657 		return H_PARAMETER;
658 
659 	ret = kvmppc_ioba_validate(stt, ioba, npages);
660 	if (ret != H_SUCCESS)
661 		return ret;
662 
663 	idx = srcu_read_lock(&vcpu->kvm->srcu);
664 	if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
665 		ret = H_TOO_HARD;
666 		goto unlock_exit;
667 	}
668 	tces = (u64 __user *) ua;
669 
670 	for (i = 0; i < npages; ++i) {
671 		if (get_user(tce, tces + i)) {
672 			ret = H_TOO_HARD;
673 			goto unlock_exit;
674 		}
675 		tce = be64_to_cpu(tce);
676 
677 		ret = kvmppc_tce_validate(stt, tce);
678 		if (ret != H_SUCCESS)
679 			goto unlock_exit;
680 	}
681 
682 	for (i = 0; i < npages; ++i) {
683 		/*
684 		 * This looks unsafe, because we validate, then regrab
685 		 * the TCE from userspace which could have been changed by
686 		 * another thread.
687 		 *
688 		 * But it actually is safe, because the relevant checks will be
689 		 * re-executed in the following code.  If userspace tries to
690 		 * change this dodgily it will result in a messier failure mode
691 		 * but won't threaten the host.
692 		 */
693 		if (get_user(tce, tces + i)) {
694 			ret = H_TOO_HARD;
695 			goto unlock_exit;
696 		}
697 		tce = be64_to_cpu(tce);
698 
699 		if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
700 			return H_PARAMETER;
701 
702 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
703 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
704 					stit->tbl, entry + i, ua,
705 					iommu_tce_direction(tce));
706 
707 			if (ret != H_SUCCESS) {
708 				kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
709 						entry);
710 				goto unlock_exit;
711 			}
712 		}
713 
714 		kvmppc_tce_put(stt, entry + i, tce);
715 	}
716 
717 unlock_exit:
718 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
719 
720 	return ret;
721 }
722 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
723 
724 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
725 		unsigned long liobn, unsigned long ioba,
726 		unsigned long tce_value, unsigned long npages)
727 {
728 	struct kvmppc_spapr_tce_table *stt;
729 	long i, ret;
730 	struct kvmppc_spapr_tce_iommu_table *stit;
731 
732 	stt = kvmppc_find_table(vcpu->kvm, liobn);
733 	if (!stt)
734 		return H_TOO_HARD;
735 
736 	ret = kvmppc_ioba_validate(stt, ioba, npages);
737 	if (ret != H_SUCCESS)
738 		return ret;
739 
740 	/* Check permission bits only to allow userspace poison TCE for debug */
741 	if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
742 		return H_PARAMETER;
743 
744 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
745 		unsigned long entry = ioba >> stt->page_shift;
746 
747 		for (i = 0; i < npages; ++i) {
748 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
749 					stit->tbl, entry + i);
750 
751 			if (ret == H_SUCCESS)
752 				continue;
753 
754 			if (ret == H_TOO_HARD)
755 				return ret;
756 
757 			WARN_ON_ONCE(1);
758 			kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
759 		}
760 	}
761 
762 	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
763 		kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
764 
765 	return H_SUCCESS;
766 }
767 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
768