xref: /linux/drivers/dma-buf/udmabuf.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/cred.h>
3 #include <linux/device.h>
4 #include <linux/dma-buf.h>
5 #include <linux/dma-resv.h>
6 #include <linux/highmem.h>
7 #include <linux/init.h>
8 #include <linux/kernel.h>
9 #include <linux/memfd.h>
10 #include <linux/miscdevice.h>
11 #include <linux/module.h>
12 #include <linux/shmem_fs.h>
13 #include <linux/hugetlb.h>
14 #include <linux/slab.h>
15 #include <linux/udmabuf.h>
16 #include <linux/vmalloc.h>
17 #include <linux/iosys-map.h>
18 
19 static int list_limit = 1024;
20 module_param(list_limit, int, 0644);
21 MODULE_PARM_DESC(list_limit, "udmabuf_create_list->count limit. Default is 1024.");
22 
23 static int size_limit_mb = 64;
24 module_param(size_limit_mb, int, 0644);
25 MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64.");
26 
27 struct udmabuf {
28 	pgoff_t pagecount;
29 	struct page **pages;
30 
31 	/**
32 	 * Unlike pages, pinned_folios is only used for unpin.
33 	 * So, nr_pinned is not the same to pagecount, the pinned_folios
34 	 * only set each folio which already pinned when udmabuf_create.
35 	 * Note that, since a folio may be pinned multiple times, each folio
36 	 * can be added to pinned_folios multiple times, depending on how many
37 	 * times the folio has been pinned when create.
38 	 */
39 	pgoff_t nr_pinned;
40 	struct folio **pinned_folios;
41 
42 	struct sg_table *sg;
43 	enum dma_data_direction sg_dir;
44 	struct miscdevice *device;
45 };
46 
47 static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
48 {
49 	struct vm_area_struct *vma = vmf->vma;
50 	struct udmabuf *ubuf = vma->vm_private_data;
51 	pgoff_t pgoff = vmf->pgoff;
52 	unsigned long addr, pfn;
53 	vm_fault_t ret;
54 
55 	if (pgoff >= ubuf->pagecount)
56 		return VM_FAULT_SIGBUS;
57 
58 	pfn = page_to_pfn(ubuf->pages[pgoff]);
59 
60 	ret = vmf_insert_pfn(vma, vmf->address, pfn);
61 	if (ret & VM_FAULT_ERROR)
62 		return ret;
63 
64 	/* pre fault */
65 	pgoff = vma->vm_pgoff;
66 	addr = vma->vm_start;
67 
68 	for (; addr < vma->vm_end; pgoff++, addr += PAGE_SIZE) {
69 		if (addr == vmf->address)
70 			continue;
71 
72 		if (WARN_ON(pgoff >= ubuf->pagecount))
73 			break;
74 
75 		pfn = page_to_pfn(ubuf->pages[pgoff]);
76 
77 		/**
78 		 * If the below vmf_insert_pfn() fails, we do not return an
79 		 * error here during this pre-fault step. However, an error
80 		 * will be returned if the failure occurs when the addr is
81 		 * truly accessed.
82 		 */
83 		if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
84 			break;
85 	}
86 
87 	return ret;
88 }
89 
90 static const struct vm_operations_struct udmabuf_vm_ops = {
91 	.fault = udmabuf_vm_fault,
92 };
93 
94 static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
95 {
96 	struct udmabuf *ubuf = buf->priv;
97 
98 	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
99 		return -EINVAL;
100 
101 	vma->vm_ops = &udmabuf_vm_ops;
102 	vma->vm_private_data = ubuf;
103 	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
104 	return 0;
105 }
106 
107 static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
108 {
109 	struct udmabuf *ubuf = buf->priv;
110 	void *vaddr;
111 
112 	dma_resv_assert_held(buf->resv);
113 
114 	vaddr = vm_map_ram(ubuf->pages, ubuf->pagecount, -1);
115 	if (!vaddr)
116 		return -EINVAL;
117 
118 	iosys_map_set_vaddr(map, vaddr);
119 	return 0;
120 }
121 
122 static void vunmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
123 {
124 	struct udmabuf *ubuf = buf->priv;
125 
126 	dma_resv_assert_held(buf->resv);
127 
128 	vm_unmap_ram(map->vaddr, ubuf->pagecount);
129 }
130 
131 static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
132 				     enum dma_data_direction direction)
133 {
134 	struct udmabuf *ubuf = buf->priv;
135 	struct sg_table *sg;
136 	int ret;
137 
138 	sg = kzalloc_obj(*sg);
139 	if (!sg)
140 		return ERR_PTR(-ENOMEM);
141 
142 	ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount, 0,
143 					ubuf->pagecount << PAGE_SHIFT,
144 					GFP_KERNEL);
145 	if (ret < 0)
146 		goto err_alloc;
147 
148 	ret = dma_map_sgtable(dev, sg, direction, DMA_ATTR_SKIP_CPU_SYNC);
149 	if (ret < 0)
150 		goto err_map;
151 	return sg;
152 
153 err_map:
154 	sg_free_table(sg);
155 err_alloc:
156 	kfree(sg);
157 	return ERR_PTR(ret);
158 }
159 
160 static void put_sg_table(struct device *dev, struct sg_table *sg,
161 			 enum dma_data_direction direction)
162 {
163 	dma_unmap_sgtable(dev, sg, direction, DMA_ATTR_SKIP_CPU_SYNC);
164 	sg_free_table(sg);
165 	kfree(sg);
166 }
167 
168 static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
169 				    enum dma_data_direction direction)
170 {
171 	return get_sg_table(at->dev, at->dmabuf, direction);
172 }
173 
174 static void unmap_udmabuf(struct dma_buf_attachment *at,
175 			  struct sg_table *sg,
176 			  enum dma_data_direction direction)
177 {
178 	return put_sg_table(at->dev, sg, direction);
179 }
180 
181 static void unpin_all_folios(struct udmabuf *ubuf)
182 {
183 	pgoff_t i;
184 
185 	for (i = 0; i < ubuf->nr_pinned; ++i)
186 		unpin_folio(ubuf->pinned_folios[i]);
187 
188 	kvfree(ubuf->pinned_folios);
189 }
190 
191 static __always_inline int init_udmabuf(struct udmabuf *ubuf, pgoff_t pgcnt)
192 {
193 	ubuf->pages = kvmalloc_objs(*ubuf->pages, pgcnt);
194 	if (!ubuf->pages)
195 		return -ENOMEM;
196 
197 	ubuf->pinned_folios = kvmalloc_objs(*ubuf->pinned_folios, pgcnt);
198 	if (!ubuf->pinned_folios)
199 		return -ENOMEM;
200 
201 	return 0;
202 }
203 
204 static __always_inline void deinit_udmabuf(struct udmabuf *ubuf)
205 {
206 	unpin_all_folios(ubuf);
207 	kvfree(ubuf->pages);
208 }
209 
210 static void release_udmabuf(struct dma_buf *buf)
211 {
212 	struct udmabuf *ubuf = buf->priv;
213 	struct device *dev = ubuf->device->this_device;
214 
215 	if (ubuf->sg)
216 		put_sg_table(dev, ubuf->sg, ubuf->sg_dir);
217 
218 	deinit_udmabuf(ubuf);
219 	kfree(ubuf);
220 }
221 
222 static int begin_cpu_udmabuf(struct dma_buf *buf,
223 			     enum dma_data_direction direction)
224 {
225 	struct udmabuf *ubuf = buf->priv;
226 	struct device *dev = ubuf->device->this_device;
227 	int ret = 0;
228 
229 	if (!ubuf->sg) {
230 		ubuf->sg = get_sg_table(dev, buf, direction);
231 		if (IS_ERR(ubuf->sg)) {
232 			ret = PTR_ERR(ubuf->sg);
233 			ubuf->sg = NULL;
234 		} else {
235 			ubuf->sg_dir = direction;
236 		}
237 	} else {
238 		dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction);
239 	}
240 
241 	return ret;
242 }
243 
244 static int end_cpu_udmabuf(struct dma_buf *buf,
245 			   enum dma_data_direction direction)
246 {
247 	struct udmabuf *ubuf = buf->priv;
248 	struct device *dev = ubuf->device->this_device;
249 
250 	if (!ubuf->sg)
251 		return -EINVAL;
252 
253 	dma_sync_sgtable_for_device(dev, ubuf->sg, direction);
254 	return 0;
255 }
256 
257 static const struct dma_buf_ops udmabuf_ops = {
258 	.map_dma_buf	   = map_udmabuf,
259 	.unmap_dma_buf	   = unmap_udmabuf,
260 	.release	   = release_udmabuf,
261 	.mmap		   = mmap_udmabuf,
262 	.vmap		   = vmap_udmabuf,
263 	.vunmap		   = vunmap_udmabuf,
264 	.begin_cpu_access  = begin_cpu_udmabuf,
265 	.end_cpu_access    = end_cpu_udmabuf,
266 };
267 
268 #define SEALS_WANTED (F_SEAL_SHRINK)
269 #define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
270 
271 static int check_memfd_seals(struct file *memfd)
272 {
273 	int seals;
274 
275 	if (!shmem_file(memfd) && !is_file_hugepages(memfd))
276 		return -EBADFD;
277 
278 	seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
279 	if (seals == -EINVAL)
280 		return -EBADFD;
281 
282 	if ((seals & SEALS_WANTED) != SEALS_WANTED ||
283 	    (seals & SEALS_DENIED) != 0)
284 		return -EINVAL;
285 
286 	return 0;
287 }
288 
289 static struct dma_buf *export_udmabuf(struct udmabuf *ubuf,
290 				      struct miscdevice *device)
291 {
292 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
293 
294 	ubuf->device = device;
295 	exp_info.ops  = &udmabuf_ops;
296 	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
297 	exp_info.priv = ubuf;
298 	exp_info.flags = O_RDWR;
299 
300 	return dma_buf_export(&exp_info);
301 }
302 
303 static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd,
304 			       loff_t start, loff_t size, struct folio **folios)
305 {
306 	pgoff_t nr_pinned = ubuf->nr_pinned;
307 	pgoff_t upgcnt = ubuf->pagecount;
308 	u32 cur_folio, cur_pgcnt;
309 	pgoff_t pgoff, pgcnt;
310 	long nr_folios;
311 	loff_t end;
312 
313 	pgcnt = size >> PAGE_SHIFT;
314 	end = start + (pgcnt << PAGE_SHIFT) - 1;
315 	nr_folios = memfd_pin_folios(memfd, start, end, folios, pgcnt, &pgoff);
316 	if (nr_folios <= 0)
317 		return nr_folios ? nr_folios : -EINVAL;
318 
319 	cur_pgcnt = 0;
320 	for (cur_folio = 0; cur_folio < nr_folios; ++cur_folio) {
321 		pgoff_t subpgoff = pgoff;
322 		size_t fsize = folio_size(folios[cur_folio]);
323 
324 		ubuf->pinned_folios[nr_pinned++] = folios[cur_folio];
325 
326 		for (; subpgoff < fsize; subpgoff += PAGE_SIZE) {
327 			ubuf->pages[upgcnt] = folio_page(folios[cur_folio],
328 						subpgoff >> PAGE_SHIFT);
329 			++upgcnt;
330 
331 			if (++cur_pgcnt >= pgcnt)
332 				goto end;
333 		}
334 
335 		/**
336 		 * In a given range, only the first subpage of the first folio
337 		 * has an offset, that is returned by memfd_pin_folios().
338 		 * The first subpages of other folios (in the range) have an
339 		 * offset of 0.
340 		 */
341 		pgoff = 0;
342 	}
343 end:
344 	ubuf->pagecount = upgcnt;
345 	ubuf->nr_pinned = nr_pinned;
346 	return 0;
347 }
348 
349 static long udmabuf_create(struct miscdevice *device,
350 			   struct udmabuf_create_list *head,
351 			   struct udmabuf_create_item *list)
352 {
353 	unsigned long max_nr_folios = 0;
354 	struct folio **folios = NULL;
355 	pgoff_t pgcnt = 0, pglimit;
356 	struct udmabuf *ubuf;
357 	struct dma_buf *dmabuf;
358 	long ret = -EINVAL;
359 	u32 i, flags;
360 
361 	ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
362 	if (!ubuf)
363 		return -ENOMEM;
364 
365 	pglimit = ((u64)size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
366 	for (i = 0; i < head->count; i++) {
367 		pgoff_t subpgcnt;
368 
369 		if (!PAGE_ALIGNED(list[i].offset))
370 			goto err_noinit;
371 		if (!PAGE_ALIGNED(list[i].size))
372 			goto err_noinit;
373 
374 		subpgcnt = list[i].size >> PAGE_SHIFT;
375 		pgcnt += subpgcnt;
376 		if (pgcnt > pglimit)
377 			goto err_noinit;
378 
379 		max_nr_folios = max_t(unsigned long, subpgcnt, max_nr_folios);
380 	}
381 
382 	if (!pgcnt)
383 		goto err_noinit;
384 
385 	ret = init_udmabuf(ubuf, pgcnt);
386 	if (ret)
387 		goto err;
388 
389 	folios = kvmalloc_array(max_nr_folios, sizeof(*folios), GFP_KERNEL);
390 	if (!folios) {
391 		ret = -ENOMEM;
392 		goto err;
393 	}
394 
395 	for (i = 0; i < head->count; i++) {
396 		struct file *memfd = fget(list[i].memfd);
397 
398 		if (!memfd) {
399 			ret = -EBADFD;
400 			goto err;
401 		}
402 
403 		/*
404 		 * Take the inode lock to protect against concurrent
405 		 * memfd_add_seals(), which takes this lock in write mode.
406 		 */
407 		inode_lock_shared(file_inode(memfd));
408 		ret = check_memfd_seals(memfd);
409 		if (ret)
410 			goto out_unlock;
411 
412 		ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
413 					 list[i].size, folios);
414 out_unlock:
415 		inode_unlock_shared(file_inode(memfd));
416 		fput(memfd);
417 		if (ret)
418 			goto err;
419 	}
420 
421 	flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
422 	dmabuf = export_udmabuf(ubuf, device);
423 	if (IS_ERR(dmabuf)) {
424 		ret = PTR_ERR(dmabuf);
425 		goto err;
426 	}
427 	/*
428 	 * Ownership of ubuf is held by the dmabuf from here.
429 	 * If the following dma_buf_fd() fails, dma_buf_put() cleans up both the
430 	 * dmabuf and the ubuf (through udmabuf_ops.release).
431 	 */
432 
433 	ret = dma_buf_fd(dmabuf, flags);
434 	if (ret < 0)
435 		dma_buf_put(dmabuf);
436 
437 	kvfree(folios);
438 	return ret;
439 
440 err:
441 	deinit_udmabuf(ubuf);
442 err_noinit:
443 	kfree(ubuf);
444 	kvfree(folios);
445 	return ret;
446 }
447 
448 static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
449 {
450 	struct udmabuf_create create;
451 	struct udmabuf_create_list head;
452 	struct udmabuf_create_item list;
453 
454 	if (copy_from_user(&create, (void __user *)arg,
455 			   sizeof(create)))
456 		return -EFAULT;
457 
458 	head.flags  = create.flags;
459 	head.count  = 1;
460 	list.memfd  = create.memfd;
461 	list.offset = create.offset;
462 	list.size   = create.size;
463 
464 	return udmabuf_create(filp->private_data, &head, &list);
465 }
466 
467 static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
468 {
469 	struct udmabuf_create_list head;
470 	struct udmabuf_create_item *list;
471 	int ret = -EINVAL;
472 	u32 lsize;
473 
474 	if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
475 		return -EFAULT;
476 	if (head.count > list_limit)
477 		return -EINVAL;
478 	lsize = sizeof(struct udmabuf_create_item) * head.count;
479 	list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
480 	if (IS_ERR(list))
481 		return PTR_ERR(list);
482 
483 	ret = udmabuf_create(filp->private_data, &head, list);
484 	kfree(list);
485 	return ret;
486 }
487 
488 static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
489 			  unsigned long arg)
490 {
491 	long ret;
492 
493 	switch (ioctl) {
494 	case UDMABUF_CREATE:
495 		ret = udmabuf_ioctl_create(filp, arg);
496 		break;
497 	case UDMABUF_CREATE_LIST:
498 		ret = udmabuf_ioctl_create_list(filp, arg);
499 		break;
500 	default:
501 		ret = -ENOTTY;
502 		break;
503 	}
504 	return ret;
505 }
506 
507 static const struct file_operations udmabuf_fops = {
508 	.owner		= THIS_MODULE,
509 	.unlocked_ioctl = udmabuf_ioctl,
510 #ifdef CONFIG_COMPAT
511 	.compat_ioctl   = udmabuf_ioctl,
512 #endif
513 };
514 
515 static struct miscdevice udmabuf_misc = {
516 	.minor          = MISC_DYNAMIC_MINOR,
517 	.name           = "udmabuf",
518 	.fops           = &udmabuf_fops,
519 };
520 
521 static int __init udmabuf_dev_init(void)
522 {
523 	int ret;
524 
525 	ret = misc_register(&udmabuf_misc);
526 	if (ret < 0) {
527 		pr_err("Could not initialize udmabuf device\n");
528 		return ret;
529 	}
530 
531 	ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device,
532 					   DMA_BIT_MASK(64));
533 	if (ret < 0) {
534 		pr_err("Could not setup DMA mask for udmabuf device\n");
535 		misc_deregister(&udmabuf_misc);
536 		return ret;
537 	}
538 
539 	return 0;
540 }
541 
542 static void __exit udmabuf_dev_exit(void)
543 {
544 	misc_deregister(&udmabuf_misc);
545 }
546 
547 module_init(udmabuf_dev_init)
548 module_exit(udmabuf_dev_exit)
549 
550 MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
551