xref: /linux/drivers/dma-buf/udmabuf.c (revision af215c980c1fbf1ca01675b128b0dd194745b880)
1  // SPDX-License-Identifier: GPL-2.0
2  #include <linux/cred.h>
3  #include <linux/device.h>
4  #include <linux/dma-buf.h>
5  #include <linux/dma-resv.h>
6  #include <linux/highmem.h>
7  #include <linux/init.h>
8  #include <linux/kernel.h>
9  #include <linux/memfd.h>
10  #include <linux/miscdevice.h>
11  #include <linux/module.h>
12  #include <linux/shmem_fs.h>
13  #include <linux/hugetlb.h>
14  #include <linux/slab.h>
15  #include <linux/udmabuf.h>
16  #include <linux/vmalloc.h>
17  #include <linux/iosys-map.h>
18  
19  static int list_limit = 1024;
20  module_param(list_limit, int, 0644);
21  MODULE_PARM_DESC(list_limit, "udmabuf_create_list->count limit. Default is 1024.");
22  
23  static int size_limit_mb = 64;
24  module_param(size_limit_mb, int, 0644);
25  MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64.");
26  
27  struct udmabuf {
28  	pgoff_t pagecount;
29  	struct folio **folios;
30  
31  	/**
32  	 * Unlike folios, pinned_folios is only used for unpin.
33  	 * So, nr_pinned is not the same to pagecount, the pinned_folios
34  	 * only set each folio which already pinned when udmabuf_create.
35  	 * Note that, since a folio may be pinned multiple times, each folio
36  	 * can be added to pinned_folios multiple times, depending on how many
37  	 * times the folio has been pinned when create.
38  	 */
39  	pgoff_t nr_pinned;
40  	struct folio **pinned_folios;
41  
42  	struct sg_table *sg;
43  	struct miscdevice *device;
44  	pgoff_t *offsets;
45  };
46  
udmabuf_vm_fault(struct vm_fault * vmf)47  static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
48  {
49  	struct vm_area_struct *vma = vmf->vma;
50  	struct udmabuf *ubuf = vma->vm_private_data;
51  	pgoff_t pgoff = vmf->pgoff;
52  	unsigned long addr, pfn;
53  	vm_fault_t ret;
54  
55  	if (pgoff >= ubuf->pagecount)
56  		return VM_FAULT_SIGBUS;
57  
58  	pfn = folio_pfn(ubuf->folios[pgoff]);
59  	pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
60  
61  	ret = vmf_insert_pfn(vma, vmf->address, pfn);
62  	if (ret & VM_FAULT_ERROR)
63  		return ret;
64  
65  	/* pre fault */
66  	pgoff = vma->vm_pgoff;
67  	addr = vma->vm_start;
68  
69  	for (; addr < vma->vm_end; pgoff++, addr += PAGE_SIZE) {
70  		if (addr == vmf->address)
71  			continue;
72  
73  		if (WARN_ON(pgoff >= ubuf->pagecount))
74  			break;
75  
76  		pfn = folio_pfn(ubuf->folios[pgoff]);
77  		pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
78  
79  		/**
80  		 * If the below vmf_insert_pfn() fails, we do not return an
81  		 * error here during this pre-fault step. However, an error
82  		 * will be returned if the failure occurs when the addr is
83  		 * truly accessed.
84  		 */
85  		if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
86  			break;
87  	}
88  
89  	return ret;
90  }
91  
92  static const struct vm_operations_struct udmabuf_vm_ops = {
93  	.fault = udmabuf_vm_fault,
94  };
95  
mmap_udmabuf(struct dma_buf * buf,struct vm_area_struct * vma)96  static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
97  {
98  	struct udmabuf *ubuf = buf->priv;
99  
100  	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
101  		return -EINVAL;
102  
103  	vma->vm_ops = &udmabuf_vm_ops;
104  	vma->vm_private_data = ubuf;
105  	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
106  	return 0;
107  }
108  
vmap_udmabuf(struct dma_buf * buf,struct iosys_map * map)109  static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
110  {
111  	struct udmabuf *ubuf = buf->priv;
112  	unsigned long *pfns;
113  	void *vaddr;
114  	pgoff_t pg;
115  
116  	dma_resv_assert_held(buf->resv);
117  
118  	/**
119  	 * HVO may free tail pages, so just use pfn to map each folio
120  	 * into vmalloc area.
121  	 */
122  	pfns = kvmalloc_array(ubuf->pagecount, sizeof(*pfns), GFP_KERNEL);
123  	if (!pfns)
124  		return -ENOMEM;
125  
126  	for (pg = 0; pg < ubuf->pagecount; pg++) {
127  		unsigned long pfn = folio_pfn(ubuf->folios[pg]);
128  
129  		pfn += ubuf->offsets[pg] >> PAGE_SHIFT;
130  		pfns[pg] = pfn;
131  	}
132  
133  	vaddr = vmap_pfn(pfns, ubuf->pagecount, PAGE_KERNEL);
134  	kvfree(pfns);
135  	if (!vaddr)
136  		return -EINVAL;
137  
138  	iosys_map_set_vaddr(map, vaddr);
139  	return 0;
140  }
141  
vunmap_udmabuf(struct dma_buf * buf,struct iosys_map * map)142  static void vunmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
143  {
144  	struct udmabuf *ubuf = buf->priv;
145  
146  	dma_resv_assert_held(buf->resv);
147  
148  	vm_unmap_ram(map->vaddr, ubuf->pagecount);
149  }
150  
get_sg_table(struct device * dev,struct dma_buf * buf,enum dma_data_direction direction)151  static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
152  				     enum dma_data_direction direction)
153  {
154  	struct udmabuf *ubuf = buf->priv;
155  	struct sg_table *sg;
156  	struct scatterlist *sgl;
157  	unsigned int i = 0;
158  	int ret;
159  
160  	sg = kzalloc(sizeof(*sg), GFP_KERNEL);
161  	if (!sg)
162  		return ERR_PTR(-ENOMEM);
163  
164  	ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL);
165  	if (ret < 0)
166  		goto err_alloc;
167  
168  	for_each_sg(sg->sgl, sgl, ubuf->pagecount, i)
169  		sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE,
170  			     ubuf->offsets[i]);
171  
172  	ret = dma_map_sgtable(dev, sg, direction, 0);
173  	if (ret < 0)
174  		goto err_map;
175  	return sg;
176  
177  err_map:
178  	sg_free_table(sg);
179  err_alloc:
180  	kfree(sg);
181  	return ERR_PTR(ret);
182  }
183  
put_sg_table(struct device * dev,struct sg_table * sg,enum dma_data_direction direction)184  static void put_sg_table(struct device *dev, struct sg_table *sg,
185  			 enum dma_data_direction direction)
186  {
187  	dma_unmap_sgtable(dev, sg, direction, 0);
188  	sg_free_table(sg);
189  	kfree(sg);
190  }
191  
map_udmabuf(struct dma_buf_attachment * at,enum dma_data_direction direction)192  static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
193  				    enum dma_data_direction direction)
194  {
195  	return get_sg_table(at->dev, at->dmabuf, direction);
196  }
197  
unmap_udmabuf(struct dma_buf_attachment * at,struct sg_table * sg,enum dma_data_direction direction)198  static void unmap_udmabuf(struct dma_buf_attachment *at,
199  			  struct sg_table *sg,
200  			  enum dma_data_direction direction)
201  {
202  	return put_sg_table(at->dev, sg, direction);
203  }
204  
unpin_all_folios(struct udmabuf * ubuf)205  static void unpin_all_folios(struct udmabuf *ubuf)
206  {
207  	pgoff_t i;
208  
209  	for (i = 0; i < ubuf->nr_pinned; ++i)
210  		unpin_folio(ubuf->pinned_folios[i]);
211  
212  	kvfree(ubuf->pinned_folios);
213  }
214  
init_udmabuf(struct udmabuf * ubuf,pgoff_t pgcnt)215  static __always_inline int init_udmabuf(struct udmabuf *ubuf, pgoff_t pgcnt)
216  {
217  	ubuf->folios = kvmalloc_array(pgcnt, sizeof(*ubuf->folios), GFP_KERNEL);
218  	if (!ubuf->folios)
219  		return -ENOMEM;
220  
221  	ubuf->offsets = kvcalloc(pgcnt, sizeof(*ubuf->offsets), GFP_KERNEL);
222  	if (!ubuf->offsets)
223  		return -ENOMEM;
224  
225  	ubuf->pinned_folios = kvmalloc_array(pgcnt,
226  					     sizeof(*ubuf->pinned_folios),
227  					     GFP_KERNEL);
228  	if (!ubuf->pinned_folios)
229  		return -ENOMEM;
230  
231  	return 0;
232  }
233  
deinit_udmabuf(struct udmabuf * ubuf)234  static __always_inline void deinit_udmabuf(struct udmabuf *ubuf)
235  {
236  	unpin_all_folios(ubuf);
237  	kvfree(ubuf->offsets);
238  	kvfree(ubuf->folios);
239  }
240  
release_udmabuf(struct dma_buf * buf)241  static void release_udmabuf(struct dma_buf *buf)
242  {
243  	struct udmabuf *ubuf = buf->priv;
244  	struct device *dev = ubuf->device->this_device;
245  
246  	if (ubuf->sg)
247  		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
248  
249  	deinit_udmabuf(ubuf);
250  	kfree(ubuf);
251  }
252  
begin_cpu_udmabuf(struct dma_buf * buf,enum dma_data_direction direction)253  static int begin_cpu_udmabuf(struct dma_buf *buf,
254  			     enum dma_data_direction direction)
255  {
256  	struct udmabuf *ubuf = buf->priv;
257  	struct device *dev = ubuf->device->this_device;
258  	int ret = 0;
259  
260  	if (!ubuf->sg) {
261  		ubuf->sg = get_sg_table(dev, buf, direction);
262  		if (IS_ERR(ubuf->sg)) {
263  			ret = PTR_ERR(ubuf->sg);
264  			ubuf->sg = NULL;
265  		}
266  	} else {
267  		dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
268  				    direction);
269  	}
270  
271  	return ret;
272  }
273  
end_cpu_udmabuf(struct dma_buf * buf,enum dma_data_direction direction)274  static int end_cpu_udmabuf(struct dma_buf *buf,
275  			   enum dma_data_direction direction)
276  {
277  	struct udmabuf *ubuf = buf->priv;
278  	struct device *dev = ubuf->device->this_device;
279  
280  	if (!ubuf->sg)
281  		return -EINVAL;
282  
283  	dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction);
284  	return 0;
285  }
286  
287  static const struct dma_buf_ops udmabuf_ops = {
288  	.cache_sgt_mapping = true,
289  	.map_dma_buf	   = map_udmabuf,
290  	.unmap_dma_buf	   = unmap_udmabuf,
291  	.release	   = release_udmabuf,
292  	.mmap		   = mmap_udmabuf,
293  	.vmap		   = vmap_udmabuf,
294  	.vunmap		   = vunmap_udmabuf,
295  	.begin_cpu_access  = begin_cpu_udmabuf,
296  	.end_cpu_access    = end_cpu_udmabuf,
297  };
298  
299  #define SEALS_WANTED (F_SEAL_SHRINK)
300  #define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
301  
check_memfd_seals(struct file * memfd)302  static int check_memfd_seals(struct file *memfd)
303  {
304  	int seals;
305  
306  	if (!shmem_file(memfd) && !is_file_hugepages(memfd))
307  		return -EBADFD;
308  
309  	seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
310  	if (seals == -EINVAL)
311  		return -EBADFD;
312  
313  	if ((seals & SEALS_WANTED) != SEALS_WANTED ||
314  	    (seals & SEALS_DENIED) != 0)
315  		return -EINVAL;
316  
317  	return 0;
318  }
319  
export_udmabuf(struct udmabuf * ubuf,struct miscdevice * device)320  static struct dma_buf *export_udmabuf(struct udmabuf *ubuf,
321  				      struct miscdevice *device)
322  {
323  	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
324  
325  	ubuf->device = device;
326  	exp_info.ops  = &udmabuf_ops;
327  	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
328  	exp_info.priv = ubuf;
329  	exp_info.flags = O_RDWR;
330  
331  	return dma_buf_export(&exp_info);
332  }
333  
udmabuf_pin_folios(struct udmabuf * ubuf,struct file * memfd,loff_t start,loff_t size,struct folio ** folios)334  static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd,
335  			       loff_t start, loff_t size, struct folio **folios)
336  {
337  	pgoff_t nr_pinned = ubuf->nr_pinned;
338  	pgoff_t upgcnt = ubuf->pagecount;
339  	u32 cur_folio, cur_pgcnt;
340  	pgoff_t pgoff, pgcnt;
341  	long nr_folios;
342  	loff_t end;
343  
344  	pgcnt = size >> PAGE_SHIFT;
345  	end = start + (pgcnt << PAGE_SHIFT) - 1;
346  	nr_folios = memfd_pin_folios(memfd, start, end, folios, pgcnt, &pgoff);
347  	if (nr_folios <= 0)
348  		return nr_folios ? nr_folios : -EINVAL;
349  
350  	cur_pgcnt = 0;
351  	for (cur_folio = 0; cur_folio < nr_folios; ++cur_folio) {
352  		pgoff_t subpgoff = pgoff;
353  		size_t fsize = folio_size(folios[cur_folio]);
354  
355  		ubuf->pinned_folios[nr_pinned++] = folios[cur_folio];
356  
357  		for (; subpgoff < fsize; subpgoff += PAGE_SIZE) {
358  			ubuf->folios[upgcnt] = folios[cur_folio];
359  			ubuf->offsets[upgcnt] = subpgoff;
360  			++upgcnt;
361  
362  			if (++cur_pgcnt >= pgcnt)
363  				goto end;
364  		}
365  
366  		/**
367  		 * In a given range, only the first subpage of the first folio
368  		 * has an offset, that is returned by memfd_pin_folios().
369  		 * The first subpages of other folios (in the range) have an
370  		 * offset of 0.
371  		 */
372  		pgoff = 0;
373  	}
374  end:
375  	ubuf->pagecount = upgcnt;
376  	ubuf->nr_pinned = nr_pinned;
377  	return 0;
378  }
379  
udmabuf_create(struct miscdevice * device,struct udmabuf_create_list * head,struct udmabuf_create_item * list)380  static long udmabuf_create(struct miscdevice *device,
381  			   struct udmabuf_create_list *head,
382  			   struct udmabuf_create_item *list)
383  {
384  	unsigned long max_nr_folios = 0;
385  	struct folio **folios = NULL;
386  	pgoff_t pgcnt = 0, pglimit;
387  	struct udmabuf *ubuf;
388  	struct dma_buf *dmabuf;
389  	long ret = -EINVAL;
390  	u32 i, flags;
391  
392  	ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
393  	if (!ubuf)
394  		return -ENOMEM;
395  
396  	pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
397  	for (i = 0; i < head->count; i++) {
398  		pgoff_t subpgcnt;
399  
400  		if (!PAGE_ALIGNED(list[i].offset))
401  			goto err_noinit;
402  		if (!PAGE_ALIGNED(list[i].size))
403  			goto err_noinit;
404  
405  		subpgcnt = list[i].size >> PAGE_SHIFT;
406  		pgcnt += subpgcnt;
407  		if (pgcnt > pglimit)
408  			goto err_noinit;
409  
410  		max_nr_folios = max_t(unsigned long, subpgcnt, max_nr_folios);
411  	}
412  
413  	if (!pgcnt)
414  		goto err_noinit;
415  
416  	ret = init_udmabuf(ubuf, pgcnt);
417  	if (ret)
418  		goto err;
419  
420  	folios = kvmalloc_array(max_nr_folios, sizeof(*folios), GFP_KERNEL);
421  	if (!folios) {
422  		ret = -ENOMEM;
423  		goto err;
424  	}
425  
426  	for (i = 0; i < head->count; i++) {
427  		struct file *memfd = fget(list[i].memfd);
428  
429  		if (!memfd) {
430  			ret = -EBADFD;
431  			goto err;
432  		}
433  
434  		/*
435  		 * Take the inode lock to protect against concurrent
436  		 * memfd_add_seals(), which takes this lock in write mode.
437  		 */
438  		inode_lock_shared(file_inode(memfd));
439  		ret = check_memfd_seals(memfd);
440  		if (ret)
441  			goto out_unlock;
442  
443  		ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
444  					 list[i].size, folios);
445  out_unlock:
446  		inode_unlock_shared(file_inode(memfd));
447  		fput(memfd);
448  		if (ret)
449  			goto err;
450  	}
451  
452  	flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
453  	dmabuf = export_udmabuf(ubuf, device);
454  	if (IS_ERR(dmabuf)) {
455  		ret = PTR_ERR(dmabuf);
456  		goto err;
457  	}
458  	/*
459  	 * Ownership of ubuf is held by the dmabuf from here.
460  	 * If the following dma_buf_fd() fails, dma_buf_put() cleans up both the
461  	 * dmabuf and the ubuf (through udmabuf_ops.release).
462  	 */
463  
464  	ret = dma_buf_fd(dmabuf, flags);
465  	if (ret < 0)
466  		dma_buf_put(dmabuf);
467  
468  	kvfree(folios);
469  	return ret;
470  
471  err:
472  	deinit_udmabuf(ubuf);
473  err_noinit:
474  	kfree(ubuf);
475  	kvfree(folios);
476  	return ret;
477  }
478  
udmabuf_ioctl_create(struct file * filp,unsigned long arg)479  static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
480  {
481  	struct udmabuf_create create;
482  	struct udmabuf_create_list head;
483  	struct udmabuf_create_item list;
484  
485  	if (copy_from_user(&create, (void __user *)arg,
486  			   sizeof(create)))
487  		return -EFAULT;
488  
489  	head.flags  = create.flags;
490  	head.count  = 1;
491  	list.memfd  = create.memfd;
492  	list.offset = create.offset;
493  	list.size   = create.size;
494  
495  	return udmabuf_create(filp->private_data, &head, &list);
496  }
497  
udmabuf_ioctl_create_list(struct file * filp,unsigned long arg)498  static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
499  {
500  	struct udmabuf_create_list head;
501  	struct udmabuf_create_item *list;
502  	int ret = -EINVAL;
503  	u32 lsize;
504  
505  	if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
506  		return -EFAULT;
507  	if (head.count > list_limit)
508  		return -EINVAL;
509  	lsize = sizeof(struct udmabuf_create_item) * head.count;
510  	list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
511  	if (IS_ERR(list))
512  		return PTR_ERR(list);
513  
514  	ret = udmabuf_create(filp->private_data, &head, list);
515  	kfree(list);
516  	return ret;
517  }
518  
udmabuf_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)519  static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
520  			  unsigned long arg)
521  {
522  	long ret;
523  
524  	switch (ioctl) {
525  	case UDMABUF_CREATE:
526  		ret = udmabuf_ioctl_create(filp, arg);
527  		break;
528  	case UDMABUF_CREATE_LIST:
529  		ret = udmabuf_ioctl_create_list(filp, arg);
530  		break;
531  	default:
532  		ret = -ENOTTY;
533  		break;
534  	}
535  	return ret;
536  }
537  
538  static const struct file_operations udmabuf_fops = {
539  	.owner		= THIS_MODULE,
540  	.unlocked_ioctl = udmabuf_ioctl,
541  #ifdef CONFIG_COMPAT
542  	.compat_ioctl   = udmabuf_ioctl,
543  #endif
544  };
545  
546  static struct miscdevice udmabuf_misc = {
547  	.minor          = MISC_DYNAMIC_MINOR,
548  	.name           = "udmabuf",
549  	.fops           = &udmabuf_fops,
550  };
551  
udmabuf_dev_init(void)552  static int __init udmabuf_dev_init(void)
553  {
554  	int ret;
555  
556  	ret = misc_register(&udmabuf_misc);
557  	if (ret < 0) {
558  		pr_err("Could not initialize udmabuf device\n");
559  		return ret;
560  	}
561  
562  	ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device,
563  					   DMA_BIT_MASK(64));
564  	if (ret < 0) {
565  		pr_err("Could not setup DMA mask for udmabuf device\n");
566  		misc_deregister(&udmabuf_misc);
567  		return ret;
568  	}
569  
570  	return 0;
571  }
572  
udmabuf_dev_exit(void)573  static void __exit udmabuf_dev_exit(void)
574  {
575  	misc_deregister(&udmabuf_misc);
576  }
577  
578  module_init(udmabuf_dev_init)
579  module_exit(udmabuf_dev_exit)
580  
581  MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
582