xref: /linux/arch/powerpc/kvm/book3s_64_vio.c (revision e18655cf35a5958fbf4ae9ca3ebf28871a3a1801)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *
4  * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
5  * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
6  * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
7  */
8 
9 #include <linux/types.h>
10 #include <linux/string.h>
11 #include <linux/kvm.h>
12 #include <linux/kvm_host.h>
13 #include <linux/highmem.h>
14 #include <linux/gfp.h>
15 #include <linux/slab.h>
16 #include <linux/sched/signal.h>
17 #include <linux/hugetlb.h>
18 #include <linux/list.h>
19 #include <linux/anon_inodes.h>
20 #include <linux/iommu.h>
21 #include <linux/file.h>
22 #include <linux/mm.h>
23 #include <linux/rcupdate_wait.h>
24 
25 #include <asm/kvm_ppc.h>
26 #include <asm/kvm_book3s.h>
27 #include <asm/book3s/64/mmu-hash.h>
28 #include <asm/hvcall.h>
29 #include <asm/synch.h>
30 #include <asm/ppc-opcode.h>
31 #include <asm/udbg.h>
32 #include <asm/iommu.h>
33 #include <asm/tce.h>
34 #include <asm/mmu_context.h>
35 
36 static struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
37 	unsigned long liobn)
38 {
39 	struct kvmppc_spapr_tce_table *stt;
40 
41 	list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
42 		if (stt->liobn == liobn)
43 			return stt;
44 
45 	return NULL;
46 }
47 
48 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
49 {
50 	return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
51 }
52 
53 static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
54 {
55 	unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
56 			(tce_pages * sizeof(struct page *));
57 
58 	return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
59 }
60 
61 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
62 {
63 	struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
64 			struct kvmppc_spapr_tce_iommu_table, rcu);
65 
66 	iommu_tce_table_put(stit->tbl);
67 
68 	kfree(stit);
69 }
70 
71 static void kvm_spapr_tce_liobn_put(struct kref *kref)
72 {
73 	struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
74 			struct kvmppc_spapr_tce_iommu_table, kref);
75 
76 	list_del_rcu(&stit->next);
77 
78 	call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
79 }
80 
81 void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
82 				       struct iommu_group *grp)
83 {
84 	int i;
85 	struct kvmppc_spapr_tce_table *stt;
86 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
87 	struct iommu_table_group *table_group = NULL;
88 
89 	rcu_read_lock();
90 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
91 
92 		table_group = iommu_group_get_iommudata(grp);
93 		if (WARN_ON(!table_group))
94 			continue;
95 
96 		list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
97 			for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
98 				if (table_group->tables[i] != stit->tbl)
99 					continue;
100 
101 				kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
102 			}
103 		}
104 		cond_resched_rcu();
105 	}
106 	rcu_read_unlock();
107 }
108 
109 long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
110 				      struct iommu_group *grp)
111 {
112 	struct kvmppc_spapr_tce_table *stt = NULL;
113 	bool found = false;
114 	struct iommu_table *tbl = NULL;
115 	struct iommu_table_group *table_group;
116 	long i;
117 	struct kvmppc_spapr_tce_iommu_table *stit;
118 	CLASS(fd, f)(tablefd);
119 
120 	if (fd_empty(f))
121 		return -EBADF;
122 
123 	rcu_read_lock();
124 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
125 		if (stt == fd_file(f)->private_data) {
126 			found = true;
127 			break;
128 		}
129 	}
130 	rcu_read_unlock();
131 
132 	if (!found)
133 		return -EINVAL;
134 
135 	table_group = iommu_group_get_iommudata(grp);
136 	if (WARN_ON(!table_group))
137 		return -EFAULT;
138 
139 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
140 		struct iommu_table *tbltmp = table_group->tables[i];
141 
142 		if (!tbltmp)
143 			continue;
144 		/* Make sure hardware table parameters are compatible */
145 		if ((tbltmp->it_page_shift <= stt->page_shift) &&
146 				(tbltmp->it_offset << tbltmp->it_page_shift ==
147 				 stt->offset << stt->page_shift) &&
148 				(tbltmp->it_size << tbltmp->it_page_shift >=
149 				 stt->size << stt->page_shift)) {
150 			/*
151 			 * Reference the table to avoid races with
152 			 * add/remove DMA windows.
153 			 */
154 			tbl = iommu_tce_table_get(tbltmp);
155 			break;
156 		}
157 	}
158 	if (!tbl)
159 		return -EINVAL;
160 
161 	rcu_read_lock();
162 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
163 		if (tbl != stit->tbl)
164 			continue;
165 
166 		if (!kref_get_unless_zero(&stit->kref)) {
167 			/* stit is being destroyed */
168 			iommu_tce_table_put(tbl);
169 			rcu_read_unlock();
170 			return -ENOTTY;
171 		}
172 		/*
173 		 * The table is already known to this KVM, we just increased
174 		 * its KVM reference counter and can return.
175 		 */
176 		rcu_read_unlock();
177 		return 0;
178 	}
179 	rcu_read_unlock();
180 
181 	stit = kzalloc(sizeof(*stit), GFP_KERNEL);
182 	if (!stit) {
183 		iommu_tce_table_put(tbl);
184 		return -ENOMEM;
185 	}
186 
187 	stit->tbl = tbl;
188 	kref_init(&stit->kref);
189 
190 	list_add_rcu(&stit->next, &stt->iommu_tables);
191 
192 	return 0;
193 }
194 
195 static void release_spapr_tce_table(struct rcu_head *head)
196 {
197 	struct kvmppc_spapr_tce_table *stt = container_of(head,
198 			struct kvmppc_spapr_tce_table, rcu);
199 	unsigned long i, npages = kvmppc_tce_pages(stt->size);
200 
201 	for (i = 0; i < npages; i++)
202 		if (stt->pages[i])
203 			__free_page(stt->pages[i]);
204 
205 	kfree(stt);
206 }
207 
208 static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
209 		unsigned long sttpage)
210 {
211 	struct page *page = stt->pages[sttpage];
212 
213 	if (page)
214 		return page;
215 
216 	mutex_lock(&stt->alloc_lock);
217 	page = stt->pages[sttpage];
218 	if (!page) {
219 		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
220 		WARN_ON_ONCE(!page);
221 		if (page)
222 			stt->pages[sttpage] = page;
223 	}
224 	mutex_unlock(&stt->alloc_lock);
225 
226 	return page;
227 }
228 
229 static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
230 {
231 	struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
232 	struct page *page;
233 
234 	if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
235 		return VM_FAULT_SIGBUS;
236 
237 	page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
238 	if (!page)
239 		return VM_FAULT_OOM;
240 
241 	get_page(page);
242 	vmf->page = page;
243 	return 0;
244 }
245 
246 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
247 	.fault = kvm_spapr_tce_fault,
248 };
249 
250 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
251 {
252 	vma->vm_ops = &kvm_spapr_tce_vm_ops;
253 	return 0;
254 }
255 
256 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
257 {
258 	struct kvmppc_spapr_tce_table *stt = filp->private_data;
259 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
260 	struct kvm *kvm = stt->kvm;
261 
262 	mutex_lock(&kvm->lock);
263 	list_del_rcu(&stt->list);
264 	mutex_unlock(&kvm->lock);
265 
266 	list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
267 		WARN_ON(!kref_read(&stit->kref));
268 		while (1) {
269 			if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
270 				break;
271 		}
272 	}
273 
274 	account_locked_vm(kvm->mm,
275 		kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
276 
277 	kvm_put_kvm(stt->kvm);
278 
279 	call_rcu(&stt->rcu, release_spapr_tce_table);
280 
281 	return 0;
282 }
283 
284 static const struct file_operations kvm_spapr_tce_fops = {
285 	.mmap           = kvm_spapr_tce_mmap,
286 	.release	= kvm_spapr_tce_release,
287 };
288 
289 int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
290 				  struct kvm_create_spapr_tce_64 *args)
291 {
292 	struct kvmppc_spapr_tce_table *stt = NULL;
293 	struct kvmppc_spapr_tce_table *siter;
294 	struct mm_struct *mm = kvm->mm;
295 	unsigned long npages;
296 	int ret;
297 
298 	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
299 		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
300 		return -EINVAL;
301 
302 	npages = kvmppc_tce_pages(args->size);
303 	ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
304 	if (ret)
305 		return ret;
306 
307 	ret = -ENOMEM;
308 	stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL | __GFP_NOWARN);
309 	if (!stt)
310 		goto fail_acct;
311 
312 	stt->liobn = args->liobn;
313 	stt->page_shift = args->page_shift;
314 	stt->offset = args->offset;
315 	stt->size = args->size;
316 	stt->kvm = kvm;
317 	mutex_init(&stt->alloc_lock);
318 	INIT_LIST_HEAD_RCU(&stt->iommu_tables);
319 
320 	mutex_lock(&kvm->lock);
321 
322 	/* Check this LIOBN hasn't been previously allocated */
323 	ret = 0;
324 	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
325 		if (siter->liobn == args->liobn) {
326 			ret = -EBUSY;
327 			break;
328 		}
329 	}
330 
331 	kvm_get_kvm(kvm);
332 	if (!ret)
333 		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
334 				       stt, O_RDWR | O_CLOEXEC);
335 
336 	if (ret >= 0)
337 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
338 	else
339 		kvm_put_kvm_no_destroy(kvm);
340 
341 	mutex_unlock(&kvm->lock);
342 
343 	if (ret >= 0)
344 		return ret;
345 
346 	kfree(stt);
347  fail_acct:
348 	account_locked_vm(mm, kvmppc_stt_pages(npages), false);
349 	return ret;
350 }
351 
352 static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
353 		unsigned long *ua)
354 {
355 	unsigned long gfn = tce >> PAGE_SHIFT;
356 	struct kvm_memory_slot *memslot;
357 
358 	memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
359 	if (!memslot)
360 		return -EINVAL;
361 
362 	*ua = __gfn_to_hva_memslot(memslot, gfn) |
363 		(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
364 
365 	return 0;
366 }
367 
368 static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
369 		unsigned long tce)
370 {
371 	unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
372 	enum dma_data_direction dir = iommu_tce_direction(tce);
373 	struct kvmppc_spapr_tce_iommu_table *stit;
374 	unsigned long ua = 0;
375 
376 	/* Allow userspace to poison TCE table */
377 	if (dir == DMA_NONE)
378 		return H_SUCCESS;
379 
380 	if (iommu_tce_check_gpa(stt->page_shift, gpa))
381 		return H_TOO_HARD;
382 
383 	if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
384 		return H_TOO_HARD;
385 
386 	rcu_read_lock();
387 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
388 		unsigned long hpa = 0;
389 		struct mm_iommu_table_group_mem_t *mem;
390 		long shift = stit->tbl->it_page_shift;
391 
392 		mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
393 		if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) {
394 			rcu_read_unlock();
395 			return H_TOO_HARD;
396 		}
397 	}
398 	rcu_read_unlock();
399 
400 	return H_SUCCESS;
401 }
402 
403 /*
404  * Handles TCE requests for emulated devices.
405  * Puts guest TCE values to the table and expects user space to convert them.
406  * Cannot fail so kvmppc_tce_validate must be called before it.
407  */
408 static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
409 		unsigned long idx, unsigned long tce)
410 {
411 	struct page *page;
412 	u64 *tbl;
413 	unsigned long sttpage;
414 
415 	idx -= stt->offset;
416 	sttpage = idx / TCES_PER_PAGE;
417 	page = stt->pages[sttpage];
418 
419 	if (!page) {
420 		/* We allow any TCE, not just with read|write permissions */
421 		if (!tce)
422 			return;
423 
424 		page = kvm_spapr_get_tce_page(stt, sttpage);
425 		if (!page)
426 			return;
427 	}
428 	tbl = page_to_virt(page);
429 
430 	tbl[idx % TCES_PER_PAGE] = tce;
431 }
432 
433 static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
434 		struct iommu_table *tbl, unsigned long entry)
435 {
436 	unsigned long i;
437 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
438 	unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
439 
440 	for (i = 0; i < subpages; ++i) {
441 		unsigned long hpa = 0;
442 		enum dma_data_direction dir = DMA_NONE;
443 
444 		iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
445 	}
446 }
447 
448 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
449 		struct iommu_table *tbl, unsigned long entry)
450 {
451 	struct mm_iommu_table_group_mem_t *mem = NULL;
452 	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
453 	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
454 
455 	if (!pua)
456 		return H_SUCCESS;
457 
458 	mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
459 	if (!mem)
460 		return H_TOO_HARD;
461 
462 	mm_iommu_mapped_dec(mem);
463 
464 	*pua = cpu_to_be64(0);
465 
466 	return H_SUCCESS;
467 }
468 
469 static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
470 		struct iommu_table *tbl, unsigned long entry)
471 {
472 	enum dma_data_direction dir = DMA_NONE;
473 	unsigned long hpa = 0;
474 	long ret;
475 
476 	if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa,
477 					&dir)))
478 		return H_TOO_HARD;
479 
480 	if (dir == DMA_NONE)
481 		return H_SUCCESS;
482 
483 	ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
484 	if (ret != H_SUCCESS)
485 		iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
486 
487 	return ret;
488 }
489 
490 static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
491 		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
492 		unsigned long entry)
493 {
494 	unsigned long i, ret = H_SUCCESS;
495 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
496 	unsigned long io_entry = entry * subpages;
497 
498 	for (i = 0; i < subpages; ++i) {
499 		ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
500 		if (ret != H_SUCCESS)
501 			break;
502 	}
503 
504 	iommu_tce_kill(tbl, io_entry, subpages);
505 
506 	return ret;
507 }
508 
509 static long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
510 		unsigned long entry, unsigned long ua,
511 		enum dma_data_direction dir)
512 {
513 	long ret;
514 	unsigned long hpa;
515 	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
516 	struct mm_iommu_table_group_mem_t *mem;
517 
518 	if (!pua)
519 		/* it_userspace allocation might be delayed */
520 		return H_TOO_HARD;
521 
522 	mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
523 	if (!mem)
524 		/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
525 		return H_TOO_HARD;
526 
527 	if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
528 		return H_TOO_HARD;
529 
530 	if (mm_iommu_mapped_inc(mem))
531 		return H_TOO_HARD;
532 
533 	ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
534 	if (WARN_ON_ONCE(ret)) {
535 		mm_iommu_mapped_dec(mem);
536 		return H_TOO_HARD;
537 	}
538 
539 	if (dir != DMA_NONE)
540 		kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
541 
542 	*pua = cpu_to_be64(ua);
543 
544 	return 0;
545 }
546 
547 static long kvmppc_tce_iommu_map(struct kvm *kvm,
548 		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
549 		unsigned long entry, unsigned long ua,
550 		enum dma_data_direction dir)
551 {
552 	unsigned long i, pgoff, ret = H_SUCCESS;
553 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
554 	unsigned long io_entry = entry * subpages;
555 
556 	for (i = 0, pgoff = 0; i < subpages;
557 			++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
558 
559 		ret = kvmppc_tce_iommu_do_map(kvm, tbl,
560 				io_entry + i, ua + pgoff, dir);
561 		if (ret != H_SUCCESS)
562 			break;
563 	}
564 
565 	iommu_tce_kill(tbl, io_entry, subpages);
566 
567 	return ret;
568 }
569 
570 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
571 		      unsigned long ioba, unsigned long tce)
572 {
573 	struct kvmppc_spapr_tce_table *stt;
574 	long ret, idx;
575 	struct kvmppc_spapr_tce_iommu_table *stit;
576 	unsigned long entry, ua = 0;
577 	enum dma_data_direction dir;
578 
579 	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
580 	/* 	    liobn, ioba, tce); */
581 
582 	stt = kvmppc_find_table(vcpu->kvm, liobn);
583 	if (!stt)
584 		return H_TOO_HARD;
585 
586 	ret = kvmppc_ioba_validate(stt, ioba, 1);
587 	if (ret != H_SUCCESS)
588 		return ret;
589 
590 	idx = srcu_read_lock(&vcpu->kvm->srcu);
591 
592 	ret = kvmppc_tce_validate(stt, tce);
593 	if (ret != H_SUCCESS)
594 		goto unlock_exit;
595 
596 	dir = iommu_tce_direction(tce);
597 
598 	if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
599 		ret = H_PARAMETER;
600 		goto unlock_exit;
601 	}
602 
603 	entry = ioba >> stt->page_shift;
604 
605 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
606 		if (dir == DMA_NONE)
607 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
608 					stit->tbl, entry);
609 		else
610 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
611 					entry, ua, dir);
612 
613 
614 		if (ret != H_SUCCESS) {
615 			kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
616 			goto unlock_exit;
617 		}
618 	}
619 
620 	kvmppc_tce_put(stt, entry, tce);
621 
622 unlock_exit:
623 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
624 
625 	return ret;
626 }
627 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
628 
629 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
630 		unsigned long liobn, unsigned long ioba,
631 		unsigned long tce_list, unsigned long npages)
632 {
633 	struct kvmppc_spapr_tce_table *stt;
634 	long i, ret = H_SUCCESS, idx;
635 	unsigned long entry, ua = 0;
636 	u64 __user *tces;
637 	u64 tce;
638 	struct kvmppc_spapr_tce_iommu_table *stit;
639 
640 	stt = kvmppc_find_table(vcpu->kvm, liobn);
641 	if (!stt)
642 		return H_TOO_HARD;
643 
644 	entry = ioba >> stt->page_shift;
645 	/*
646 	 * SPAPR spec says that the maximum size of the list is 512 TCEs
647 	 * so the whole table fits in 4K page
648 	 */
649 	if (npages > 512)
650 		return H_PARAMETER;
651 
652 	if (tce_list & (SZ_4K - 1))
653 		return H_PARAMETER;
654 
655 	ret = kvmppc_ioba_validate(stt, ioba, npages);
656 	if (ret != H_SUCCESS)
657 		return ret;
658 
659 	idx = srcu_read_lock(&vcpu->kvm->srcu);
660 	if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
661 		ret = H_TOO_HARD;
662 		goto unlock_exit;
663 	}
664 	tces = (u64 __user *) ua;
665 
666 	for (i = 0; i < npages; ++i) {
667 		if (get_user(tce, tces + i)) {
668 			ret = H_TOO_HARD;
669 			goto unlock_exit;
670 		}
671 		tce = be64_to_cpu(tce);
672 
673 		ret = kvmppc_tce_validate(stt, tce);
674 		if (ret != H_SUCCESS)
675 			goto unlock_exit;
676 	}
677 
678 	for (i = 0; i < npages; ++i) {
679 		/*
680 		 * This looks unsafe, because we validate, then regrab
681 		 * the TCE from userspace which could have been changed by
682 		 * another thread.
683 		 *
684 		 * But it actually is safe, because the relevant checks will be
685 		 * re-executed in the following code.  If userspace tries to
686 		 * change this dodgily it will result in a messier failure mode
687 		 * but won't threaten the host.
688 		 */
689 		if (get_user(tce, tces + i)) {
690 			ret = H_TOO_HARD;
691 			goto unlock_exit;
692 		}
693 		tce = be64_to_cpu(tce);
694 
695 		if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
696 			ret = H_PARAMETER;
697 			goto unlock_exit;
698 		}
699 
700 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
701 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
702 					stit->tbl, entry + i, ua,
703 					iommu_tce_direction(tce));
704 
705 			if (ret != H_SUCCESS) {
706 				kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
707 						 entry + i);
708 				goto unlock_exit;
709 			}
710 		}
711 
712 		kvmppc_tce_put(stt, entry + i, tce);
713 	}
714 
715 unlock_exit:
716 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
717 
718 	return ret;
719 }
720 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
721 
722 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
723 		unsigned long liobn, unsigned long ioba,
724 		unsigned long tce_value, unsigned long npages)
725 {
726 	struct kvmppc_spapr_tce_table *stt;
727 	long i, ret;
728 	struct kvmppc_spapr_tce_iommu_table *stit;
729 
730 	stt = kvmppc_find_table(vcpu->kvm, liobn);
731 	if (!stt)
732 		return H_TOO_HARD;
733 
734 	ret = kvmppc_ioba_validate(stt, ioba, npages);
735 	if (ret != H_SUCCESS)
736 		return ret;
737 
738 	/* Check permission bits only to allow userspace poison TCE for debug */
739 	if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
740 		return H_PARAMETER;
741 
742 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
743 		unsigned long entry = ioba >> stt->page_shift;
744 
745 		for (i = 0; i < npages; ++i) {
746 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
747 					stit->tbl, entry + i);
748 
749 			if (ret == H_SUCCESS)
750 				continue;
751 
752 			if (ret == H_TOO_HARD)
753 				return ret;
754 
755 			WARN_ON_ONCE(1);
756 			kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
757 		}
758 	}
759 
760 	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
761 		kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
762 
763 	return ret;
764 }
765 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
766 
767 long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
768 		      unsigned long ioba)
769 {
770 	struct kvmppc_spapr_tce_table *stt;
771 	long ret;
772 	unsigned long idx;
773 	struct page *page;
774 	u64 *tbl;
775 
776 	stt = kvmppc_find_table(vcpu->kvm, liobn);
777 	if (!stt)
778 		return H_TOO_HARD;
779 
780 	ret = kvmppc_ioba_validate(stt, ioba, 1);
781 	if (ret != H_SUCCESS)
782 		return ret;
783 
784 	idx = (ioba >> stt->page_shift) - stt->offset;
785 	page = stt->pages[idx / TCES_PER_PAGE];
786 	if (!page) {
787 		kvmppc_set_gpr(vcpu, 4, 0);
788 		return H_SUCCESS;
789 	}
790 	tbl = (u64 *)page_address(page);
791 
792 	kvmppc_set_gpr(vcpu, 4, tbl[idx % TCES_PER_PAGE]);
793 
794 	return H_SUCCESS;
795 }
796 EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
797