xref: /linux/drivers/vdpa/vdpa_user/iova_domain.c (revision 566ab427f827b0256d3e8ce0235d088e6a9c28bd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * MMU-based software IOTLB.
4  *
5  * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6  *
7  * Author: Xie Yongji <xieyongji@bytedance.com>
8  *
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/file.h>
13 #include <linux/anon_inodes.h>
14 #include <linux/highmem.h>
15 #include <linux/vmalloc.h>
16 #include <linux/vdpa.h>
17 
18 #include "iova_domain.h"
19 
20 static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
21 				 u64 start, u64 last,
22 				 u64 addr, unsigned int perm,
23 				 struct file *file, u64 offset)
24 {
25 	struct vdpa_map_file *map_file;
26 	int ret;
27 
28 	map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
29 	if (!map_file)
30 		return -ENOMEM;
31 
32 	map_file->file = get_file(file);
33 	map_file->offset = offset;
34 
35 	ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
36 					addr, perm, map_file);
37 	if (ret) {
38 		fput(map_file->file);
39 		kfree(map_file);
40 		return ret;
41 	}
42 	return 0;
43 }
44 
45 static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
46 				  u64 start, u64 last)
47 {
48 	struct vdpa_map_file *map_file;
49 	struct vhost_iotlb_map *map;
50 
51 	while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
52 		map_file = (struct vdpa_map_file *)map->opaque;
53 		fput(map_file->file);
54 		kfree(map_file);
55 		vhost_iotlb_map_free(domain->iotlb, map);
56 	}
57 }
58 
59 int vduse_domain_set_map(struct vduse_iova_domain *domain,
60 			 struct vhost_iotlb *iotlb)
61 {
62 	struct vdpa_map_file *map_file;
63 	struct vhost_iotlb_map *map;
64 	u64 start = 0ULL, last = ULLONG_MAX;
65 	int ret;
66 
67 	spin_lock(&domain->iotlb_lock);
68 	vduse_iotlb_del_range(domain, start, last);
69 
70 	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
71 	     map = vhost_iotlb_itree_next(map, start, last)) {
72 		map_file = (struct vdpa_map_file *)map->opaque;
73 		ret = vduse_iotlb_add_range(domain, map->start, map->last,
74 					    map->addr, map->perm,
75 					    map_file->file,
76 					    map_file->offset);
77 		if (ret)
78 			goto err;
79 	}
80 	spin_unlock(&domain->iotlb_lock);
81 
82 	return 0;
83 err:
84 	vduse_iotlb_del_range(domain, start, last);
85 	spin_unlock(&domain->iotlb_lock);
86 	return ret;
87 }
88 
89 void vduse_domain_clear_map(struct vduse_iova_domain *domain,
90 			    struct vhost_iotlb *iotlb)
91 {
92 	struct vhost_iotlb_map *map;
93 	u64 start = 0ULL, last = ULLONG_MAX;
94 
95 	spin_lock(&domain->iotlb_lock);
96 	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
97 	     map = vhost_iotlb_itree_next(map, start, last)) {
98 		vduse_iotlb_del_range(domain, map->start, map->last);
99 	}
100 	spin_unlock(&domain->iotlb_lock);
101 }
102 
103 static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
104 					 u64 iova, u64 size, u64 paddr)
105 {
106 	struct vduse_bounce_map *map;
107 	u64 last = iova + size - 1;
108 
109 	while (iova <= last) {
110 		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
111 		if (!map->bounce_page) {
112 			map->bounce_page = alloc_page(GFP_ATOMIC);
113 			if (!map->bounce_page)
114 				return -ENOMEM;
115 		}
116 		map->orig_phys = paddr;
117 		paddr += PAGE_SIZE;
118 		iova += PAGE_SIZE;
119 	}
120 	return 0;
121 }
122 
123 static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
124 					   u64 iova, u64 size)
125 {
126 	struct vduse_bounce_map *map;
127 	u64 last = iova + size - 1;
128 
129 	while (iova <= last) {
130 		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
131 		map->orig_phys = INVALID_PHYS_ADDR;
132 		iova += PAGE_SIZE;
133 	}
134 }
135 
136 static void do_bounce(phys_addr_t orig, void *addr, size_t size,
137 		      enum dma_data_direction dir)
138 {
139 	unsigned long pfn = PFN_DOWN(orig);
140 	unsigned int offset = offset_in_page(orig);
141 	struct page *page;
142 	unsigned int sz = 0;
143 
144 	while (size) {
145 		sz = min_t(size_t, PAGE_SIZE - offset, size);
146 
147 		page = pfn_to_page(pfn);
148 		if (dir == DMA_TO_DEVICE)
149 			memcpy_from_page(addr, page, offset, sz);
150 		else
151 			memcpy_to_page(page, offset, addr, sz);
152 
153 		size -= sz;
154 		pfn++;
155 		addr += sz;
156 		offset = 0;
157 	}
158 }
159 
160 static void vduse_domain_bounce(struct vduse_iova_domain *domain,
161 				dma_addr_t iova, size_t size,
162 				enum dma_data_direction dir)
163 {
164 	struct vduse_bounce_map *map;
165 	struct page *page;
166 	unsigned int offset;
167 	void *addr;
168 	size_t sz;
169 
170 	if (iova >= domain->bounce_size)
171 		return;
172 
173 	while (size) {
174 		map = &domain->bounce_maps[iova >> PAGE_SHIFT];
175 		offset = offset_in_page(iova);
176 		sz = min_t(size_t, PAGE_SIZE - offset, size);
177 
178 		if (WARN_ON(!map->bounce_page ||
179 			    map->orig_phys == INVALID_PHYS_ADDR))
180 			return;
181 
182 		page = domain->user_bounce_pages ?
183 		       map->user_bounce_page : map->bounce_page;
184 
185 		addr = kmap_local_page(page);
186 		do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
187 		kunmap_local(addr);
188 		size -= sz;
189 		iova += sz;
190 	}
191 }
192 
193 static struct page *
194 vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
195 {
196 	u64 start = iova & PAGE_MASK;
197 	u64 last = start + PAGE_SIZE - 1;
198 	struct vhost_iotlb_map *map;
199 	struct page *page = NULL;
200 
201 	spin_lock(&domain->iotlb_lock);
202 	map = vhost_iotlb_itree_first(domain->iotlb, start, last);
203 	if (!map)
204 		goto out;
205 
206 	page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
207 	get_page(page);
208 out:
209 	spin_unlock(&domain->iotlb_lock);
210 
211 	return page;
212 }
213 
214 static struct page *
215 vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
216 {
217 	struct vduse_bounce_map *map;
218 	struct page *page = NULL;
219 
220 	read_lock(&domain->bounce_lock);
221 	map = &domain->bounce_maps[iova >> PAGE_SHIFT];
222 	if (domain->user_bounce_pages || !map->bounce_page)
223 		goto out;
224 
225 	page = map->bounce_page;
226 	get_page(page);
227 out:
228 	read_unlock(&domain->bounce_lock);
229 
230 	return page;
231 }
232 
233 static void
234 vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
235 {
236 	struct vduse_bounce_map *map;
237 	unsigned long pfn, bounce_pfns;
238 
239 	bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
240 
241 	for (pfn = 0; pfn < bounce_pfns; pfn++) {
242 		map = &domain->bounce_maps[pfn];
243 		if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
244 			continue;
245 
246 		if (!map->bounce_page)
247 			continue;
248 
249 		__free_page(map->bounce_page);
250 		map->bounce_page = NULL;
251 	}
252 }
253 
254 int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
255 				       struct page **pages, int count)
256 {
257 	struct vduse_bounce_map *map;
258 	int i, ret;
259 
260 	/* Now we don't support partial mapping */
261 	if (count != (domain->bounce_size >> PAGE_SHIFT))
262 		return -EINVAL;
263 
264 	write_lock(&domain->bounce_lock);
265 	ret = -EEXIST;
266 	if (domain->user_bounce_pages)
267 		goto out;
268 
269 	for (i = 0; i < count; i++) {
270 		map = &domain->bounce_maps[i];
271 		if (map->bounce_page) {
272 			/* Copy kernel page to user page if it's in use */
273 			if (map->orig_phys != INVALID_PHYS_ADDR)
274 				memcpy_to_page(pages[i], 0,
275 					       page_address(map->bounce_page),
276 					       PAGE_SIZE);
277 		}
278 		map->user_bounce_page = pages[i];
279 		get_page(pages[i]);
280 	}
281 	domain->user_bounce_pages = true;
282 	ret = 0;
283 out:
284 	write_unlock(&domain->bounce_lock);
285 
286 	return ret;
287 }
288 
289 void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
290 {
291 	struct vduse_bounce_map *map;
292 	unsigned long i, count;
293 
294 	write_lock(&domain->bounce_lock);
295 	if (!domain->user_bounce_pages)
296 		goto out;
297 
298 	count = domain->bounce_size >> PAGE_SHIFT;
299 	for (i = 0; i < count; i++) {
300 		struct page *page = NULL;
301 
302 		map = &domain->bounce_maps[i];
303 		if (WARN_ON(!map->user_bounce_page))
304 			continue;
305 
306 		/* Copy user page to kernel page if it's in use */
307 		if (map->orig_phys != INVALID_PHYS_ADDR) {
308 			page = map->bounce_page;
309 			memcpy_from_page(page_address(page),
310 					 map->user_bounce_page, 0, PAGE_SIZE);
311 		}
312 		put_page(map->user_bounce_page);
313 		map->user_bounce_page = NULL;
314 	}
315 	domain->user_bounce_pages = false;
316 out:
317 	write_unlock(&domain->bounce_lock);
318 }
319 
320 void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
321 {
322 	if (!domain->bounce_map)
323 		return;
324 
325 	spin_lock(&domain->iotlb_lock);
326 	if (!domain->bounce_map)
327 		goto unlock;
328 
329 	vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
330 	domain->bounce_map = 0;
331 unlock:
332 	spin_unlock(&domain->iotlb_lock);
333 }
334 
335 static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
336 {
337 	int ret = 0;
338 
339 	if (domain->bounce_map)
340 		return 0;
341 
342 	spin_lock(&domain->iotlb_lock);
343 	if (domain->bounce_map)
344 		goto unlock;
345 
346 	ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
347 				    0, VHOST_MAP_RW, domain->file, 0);
348 	if (ret)
349 		goto unlock;
350 
351 	domain->bounce_map = 1;
352 unlock:
353 	spin_unlock(&domain->iotlb_lock);
354 	return ret;
355 }
356 
357 static dma_addr_t
358 vduse_domain_alloc_iova(struct iova_domain *iovad,
359 			unsigned long size, unsigned long limit)
360 {
361 	unsigned long shift = iova_shift(iovad);
362 	unsigned long iova_len = iova_align(iovad, size) >> shift;
363 	unsigned long iova_pfn;
364 
365 	iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
366 
367 	return (dma_addr_t)iova_pfn << shift;
368 }
369 
370 static void vduse_domain_free_iova(struct iova_domain *iovad,
371 				   dma_addr_t iova, size_t size)
372 {
373 	unsigned long shift = iova_shift(iovad);
374 	unsigned long iova_len = iova_align(iovad, size) >> shift;
375 
376 	free_iova_fast(iovad, iova >> shift, iova_len);
377 }
378 
379 void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain,
380 				      dma_addr_t dma_addr, size_t size,
381 				      enum dma_data_direction dir)
382 {
383 	read_lock(&domain->bounce_lock);
384 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
385 		vduse_domain_bounce(domain, dma_addr, size, DMA_TO_DEVICE);
386 	read_unlock(&domain->bounce_lock);
387 }
388 
389 void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain,
390 				      dma_addr_t dma_addr, size_t size,
391 				      enum dma_data_direction dir)
392 {
393 	read_lock(&domain->bounce_lock);
394 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
395 		vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
396 	read_unlock(&domain->bounce_lock);
397 }
398 
399 dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
400 				 struct page *page, unsigned long offset,
401 				 size_t size, enum dma_data_direction dir,
402 				 unsigned long attrs)
403 {
404 	struct iova_domain *iovad = &domain->stream_iovad;
405 	unsigned long limit = domain->bounce_size - 1;
406 	phys_addr_t pa = page_to_phys(page) + offset;
407 	dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
408 
409 	if (!iova)
410 		return DMA_MAPPING_ERROR;
411 
412 	if (vduse_domain_init_bounce_map(domain))
413 		goto err;
414 
415 	read_lock(&domain->bounce_lock);
416 	if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
417 		goto err_unlock;
418 
419 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
420 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
421 		vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
422 
423 	read_unlock(&domain->bounce_lock);
424 
425 	return iova;
426 err_unlock:
427 	read_unlock(&domain->bounce_lock);
428 err:
429 	vduse_domain_free_iova(iovad, iova, size);
430 	return DMA_MAPPING_ERROR;
431 }
432 
433 void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
434 			     dma_addr_t dma_addr, size_t size,
435 			     enum dma_data_direction dir, unsigned long attrs)
436 {
437 	struct iova_domain *iovad = &domain->stream_iovad;
438 	read_lock(&domain->bounce_lock);
439 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
440 	    (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
441 		vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
442 
443 	vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
444 	read_unlock(&domain->bounce_lock);
445 	vduse_domain_free_iova(iovad, dma_addr, size);
446 }
447 
448 void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
449 				  size_t size, dma_addr_t *dma_addr,
450 				  gfp_t flag, unsigned long attrs)
451 {
452 	struct iova_domain *iovad = &domain->consistent_iovad;
453 	unsigned long limit = domain->iova_limit;
454 	dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
455 	void *orig = alloc_pages_exact(size, flag);
456 
457 	if (!iova || !orig)
458 		goto err;
459 
460 	spin_lock(&domain->iotlb_lock);
461 	if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
462 				  virt_to_phys(orig), VHOST_MAP_RW,
463 				  domain->file, (u64)iova)) {
464 		spin_unlock(&domain->iotlb_lock);
465 		goto err;
466 	}
467 	spin_unlock(&domain->iotlb_lock);
468 
469 	*dma_addr = iova;
470 
471 	return orig;
472 err:
473 	*dma_addr = DMA_MAPPING_ERROR;
474 	if (orig)
475 		free_pages_exact(orig, size);
476 	if (iova)
477 		vduse_domain_free_iova(iovad, iova, size);
478 
479 	return NULL;
480 }
481 
482 void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
483 				void *vaddr, dma_addr_t dma_addr,
484 				unsigned long attrs)
485 {
486 	struct iova_domain *iovad = &domain->consistent_iovad;
487 	struct vhost_iotlb_map *map;
488 	struct vdpa_map_file *map_file;
489 	phys_addr_t pa;
490 
491 	spin_lock(&domain->iotlb_lock);
492 	map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
493 				      (u64)dma_addr + size - 1);
494 	if (WARN_ON(!map)) {
495 		spin_unlock(&domain->iotlb_lock);
496 		return;
497 	}
498 	map_file = (struct vdpa_map_file *)map->opaque;
499 	fput(map_file->file);
500 	kfree(map_file);
501 	pa = map->addr;
502 	vhost_iotlb_map_free(domain->iotlb, map);
503 	spin_unlock(&domain->iotlb_lock);
504 
505 	vduse_domain_free_iova(iovad, dma_addr, size);
506 	free_pages_exact(phys_to_virt(pa), size);
507 }
508 
509 static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
510 {
511 	struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
512 	unsigned long iova = vmf->pgoff << PAGE_SHIFT;
513 	struct page *page;
514 
515 	if (!domain)
516 		return VM_FAULT_SIGBUS;
517 
518 	if (iova < domain->bounce_size)
519 		page = vduse_domain_get_bounce_page(domain, iova);
520 	else
521 		page = vduse_domain_get_coherent_page(domain, iova);
522 
523 	if (!page)
524 		return VM_FAULT_SIGBUS;
525 
526 	vmf->page = page;
527 
528 	return 0;
529 }
530 
531 static const struct vm_operations_struct vduse_domain_mmap_ops = {
532 	.fault = vduse_domain_mmap_fault,
533 };
534 
535 static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
536 {
537 	struct vduse_iova_domain *domain = file->private_data;
538 
539 	vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND);
540 	vma->vm_private_data = domain;
541 	vma->vm_ops = &vduse_domain_mmap_ops;
542 
543 	return 0;
544 }
545 
546 static int vduse_domain_release(struct inode *inode, struct file *file)
547 {
548 	struct vduse_iova_domain *domain = file->private_data;
549 
550 	spin_lock(&domain->iotlb_lock);
551 	vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
552 	vduse_domain_remove_user_bounce_pages(domain);
553 	vduse_domain_free_kernel_bounce_pages(domain);
554 	spin_unlock(&domain->iotlb_lock);
555 	put_iova_domain(&domain->stream_iovad);
556 	put_iova_domain(&domain->consistent_iovad);
557 	vhost_iotlb_free(domain->iotlb);
558 	vfree(domain->bounce_maps);
559 	kfree(domain);
560 
561 	return 0;
562 }
563 
564 static const struct file_operations vduse_domain_fops = {
565 	.owner = THIS_MODULE,
566 	.mmap = vduse_domain_mmap,
567 	.release = vduse_domain_release,
568 };
569 
570 void vduse_domain_destroy(struct vduse_iova_domain *domain)
571 {
572 	fput(domain->file);
573 }
574 
575 struct vduse_iova_domain *
576 vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
577 {
578 	struct vduse_iova_domain *domain;
579 	struct file *file;
580 	struct vduse_bounce_map *map;
581 	unsigned long pfn, bounce_pfns;
582 	int ret;
583 
584 	bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
585 	if (iova_limit <= bounce_size)
586 		return NULL;
587 
588 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
589 	if (!domain)
590 		return NULL;
591 
592 	domain->iotlb = vhost_iotlb_alloc(0, 0);
593 	if (!domain->iotlb)
594 		goto err_iotlb;
595 
596 	domain->iova_limit = iova_limit;
597 	domain->bounce_size = PAGE_ALIGN(bounce_size);
598 	domain->bounce_maps = vzalloc(bounce_pfns *
599 				sizeof(struct vduse_bounce_map));
600 	if (!domain->bounce_maps)
601 		goto err_map;
602 
603 	for (pfn = 0; pfn < bounce_pfns; pfn++) {
604 		map = &domain->bounce_maps[pfn];
605 		map->orig_phys = INVALID_PHYS_ADDR;
606 	}
607 	file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
608 				domain, O_RDWR);
609 	if (IS_ERR(file))
610 		goto err_file;
611 
612 	domain->file = file;
613 	rwlock_init(&domain->bounce_lock);
614 	spin_lock_init(&domain->iotlb_lock);
615 	init_iova_domain(&domain->stream_iovad,
616 			PAGE_SIZE, IOVA_START_PFN);
617 	ret = iova_domain_init_rcaches(&domain->stream_iovad);
618 	if (ret)
619 		goto err_iovad_stream;
620 	init_iova_domain(&domain->consistent_iovad,
621 			PAGE_SIZE, bounce_pfns);
622 	ret = iova_domain_init_rcaches(&domain->consistent_iovad);
623 	if (ret)
624 		goto err_iovad_consistent;
625 
626 	return domain;
627 err_iovad_consistent:
628 	put_iova_domain(&domain->stream_iovad);
629 err_iovad_stream:
630 	fput(file);
631 err_file:
632 	vfree(domain->bounce_maps);
633 err_map:
634 	vhost_iotlb_free(domain->iotlb);
635 err_iotlb:
636 	kfree(domain);
637 	return NULL;
638 }
639 
640 int vduse_domain_init(void)
641 {
642 	return iova_cache_get();
643 }
644 
645 void vduse_domain_exit(void)
646 {
647 	iova_cache_put();
648 }
649