1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/cred.h> 3 #include <linux/device.h> 4 #include <linux/dma-buf.h> 5 #include <linux/dma-resv.h> 6 #include <linux/highmem.h> 7 #include <linux/init.h> 8 #include <linux/kernel.h> 9 #include <linux/memfd.h> 10 #include <linux/miscdevice.h> 11 #include <linux/module.h> 12 #include <linux/shmem_fs.h> 13 #include <linux/hugetlb.h> 14 #include <linux/slab.h> 15 #include <linux/udmabuf.h> 16 #include <linux/vmalloc.h> 17 #include <linux/iosys-map.h> 18 19 static int list_limit = 1024; 20 module_param(list_limit, int, 0644); 21 MODULE_PARM_DESC(list_limit, "udmabuf_create_list->count limit. Default is 1024."); 22 23 static int size_limit_mb = 64; 24 module_param(size_limit_mb, int, 0644); 25 MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64."); 26 27 struct udmabuf { 28 pgoff_t pagecount; 29 struct page **pages; 30 31 /** 32 * Unlike pages, pinned_folios is only used for unpin. 33 * So, nr_pinned is not the same to pagecount, the pinned_folios 34 * only set each folio which already pinned when udmabuf_create. 35 * Note that, since a folio may be pinned multiple times, each folio 36 * can be added to pinned_folios multiple times, depending on how many 37 * times the folio has been pinned when create. 38 */ 39 pgoff_t nr_pinned; 40 struct folio **pinned_folios; 41 42 struct sg_table *sg; 43 enum dma_data_direction sg_dir; 44 struct miscdevice *device; 45 }; 46 47 static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) 48 { 49 struct vm_area_struct *vma = vmf->vma; 50 struct udmabuf *ubuf = vma->vm_private_data; 51 pgoff_t pgoff = vmf->pgoff; 52 unsigned long addr, pfn; 53 vm_fault_t ret; 54 55 if (pgoff >= ubuf->pagecount) 56 return VM_FAULT_SIGBUS; 57 58 pfn = page_to_pfn(ubuf->pages[pgoff]); 59 60 ret = vmf_insert_pfn(vma, vmf->address, pfn); 61 if (ret & VM_FAULT_ERROR) 62 return ret; 63 64 /* pre fault */ 65 pgoff = vma->vm_pgoff; 66 addr = vma->vm_start; 67 68 for (; addr < vma->vm_end; pgoff++, addr += PAGE_SIZE) { 69 if (addr == vmf->address) 70 continue; 71 72 if (WARN_ON(pgoff >= ubuf->pagecount)) 73 break; 74 75 pfn = page_to_pfn(ubuf->pages[pgoff]); 76 77 /** 78 * If the below vmf_insert_pfn() fails, we do not return an 79 * error here during this pre-fault step. However, an error 80 * will be returned if the failure occurs when the addr is 81 * truly accessed. 82 */ 83 if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR) 84 break; 85 } 86 87 return ret; 88 } 89 90 static const struct vm_operations_struct udmabuf_vm_ops = { 91 .fault = udmabuf_vm_fault, 92 }; 93 94 static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma) 95 { 96 struct udmabuf *ubuf = buf->priv; 97 98 if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) 99 return -EINVAL; 100 101 vma->vm_ops = &udmabuf_vm_ops; 102 vma->vm_private_data = ubuf; 103 vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); 104 return 0; 105 } 106 107 static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map) 108 { 109 struct udmabuf *ubuf = buf->priv; 110 void *vaddr; 111 112 dma_resv_assert_held(buf->resv); 113 114 vaddr = vm_map_ram(ubuf->pages, ubuf->pagecount, -1); 115 if (!vaddr) 116 return -EINVAL; 117 118 iosys_map_set_vaddr(map, vaddr); 119 return 0; 120 } 121 122 static void vunmap_udmabuf(struct dma_buf *buf, struct iosys_map *map) 123 { 124 struct udmabuf *ubuf = buf->priv; 125 126 dma_resv_assert_held(buf->resv); 127 128 vm_unmap_ram(map->vaddr, ubuf->pagecount); 129 } 130 131 static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf, 132 enum dma_data_direction direction) 133 { 134 struct udmabuf *ubuf = buf->priv; 135 struct sg_table *sg; 136 int ret; 137 138 sg = kzalloc_obj(*sg); 139 if (!sg) 140 return ERR_PTR(-ENOMEM); 141 142 ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount, 0, 143 ubuf->pagecount << PAGE_SHIFT, 144 GFP_KERNEL); 145 if (ret < 0) 146 goto err_alloc; 147 148 ret = dma_map_sgtable(dev, sg, direction, DMA_ATTR_SKIP_CPU_SYNC); 149 if (ret < 0) 150 goto err_map; 151 return sg; 152 153 err_map: 154 sg_free_table(sg); 155 err_alloc: 156 kfree(sg); 157 return ERR_PTR(ret); 158 } 159 160 static void put_sg_table(struct device *dev, struct sg_table *sg, 161 enum dma_data_direction direction) 162 { 163 dma_unmap_sgtable(dev, sg, direction, DMA_ATTR_SKIP_CPU_SYNC); 164 sg_free_table(sg); 165 kfree(sg); 166 } 167 168 static struct sg_table *map_udmabuf(struct dma_buf_attachment *at, 169 enum dma_data_direction direction) 170 { 171 return get_sg_table(at->dev, at->dmabuf, direction); 172 } 173 174 static void unmap_udmabuf(struct dma_buf_attachment *at, 175 struct sg_table *sg, 176 enum dma_data_direction direction) 177 { 178 return put_sg_table(at->dev, sg, direction); 179 } 180 181 static void unpin_all_folios(struct udmabuf *ubuf) 182 { 183 pgoff_t i; 184 185 for (i = 0; i < ubuf->nr_pinned; ++i) 186 unpin_folio(ubuf->pinned_folios[i]); 187 188 kvfree(ubuf->pinned_folios); 189 } 190 191 static __always_inline int init_udmabuf(struct udmabuf *ubuf, pgoff_t pgcnt) 192 { 193 ubuf->pages = kvmalloc_objs(*ubuf->pages, pgcnt); 194 if (!ubuf->pages) 195 return -ENOMEM; 196 197 ubuf->pinned_folios = kvmalloc_objs(*ubuf->pinned_folios, pgcnt); 198 if (!ubuf->pinned_folios) 199 return -ENOMEM; 200 201 return 0; 202 } 203 204 static __always_inline void deinit_udmabuf(struct udmabuf *ubuf) 205 { 206 unpin_all_folios(ubuf); 207 kvfree(ubuf->pages); 208 } 209 210 static void release_udmabuf(struct dma_buf *buf) 211 { 212 struct udmabuf *ubuf = buf->priv; 213 struct device *dev = ubuf->device->this_device; 214 215 if (ubuf->sg) 216 put_sg_table(dev, ubuf->sg, ubuf->sg_dir); 217 218 deinit_udmabuf(ubuf); 219 kfree(ubuf); 220 } 221 222 static int begin_cpu_udmabuf(struct dma_buf *buf, 223 enum dma_data_direction direction) 224 { 225 struct udmabuf *ubuf = buf->priv; 226 struct device *dev = ubuf->device->this_device; 227 int ret = 0; 228 229 if (!ubuf->sg) { 230 ubuf->sg = get_sg_table(dev, buf, direction); 231 if (IS_ERR(ubuf->sg)) { 232 ret = PTR_ERR(ubuf->sg); 233 ubuf->sg = NULL; 234 } else { 235 ubuf->sg_dir = direction; 236 } 237 } else { 238 dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction); 239 } 240 241 return ret; 242 } 243 244 static int end_cpu_udmabuf(struct dma_buf *buf, 245 enum dma_data_direction direction) 246 { 247 struct udmabuf *ubuf = buf->priv; 248 struct device *dev = ubuf->device->this_device; 249 250 if (!ubuf->sg) 251 return -EINVAL; 252 253 dma_sync_sgtable_for_device(dev, ubuf->sg, direction); 254 return 0; 255 } 256 257 static const struct dma_buf_ops udmabuf_ops = { 258 .map_dma_buf = map_udmabuf, 259 .unmap_dma_buf = unmap_udmabuf, 260 .release = release_udmabuf, 261 .mmap = mmap_udmabuf, 262 .vmap = vmap_udmabuf, 263 .vunmap = vunmap_udmabuf, 264 .begin_cpu_access = begin_cpu_udmabuf, 265 .end_cpu_access = end_cpu_udmabuf, 266 }; 267 268 #define SEALS_WANTED (F_SEAL_SHRINK) 269 #define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) 270 271 static int check_memfd_seals(struct file *memfd) 272 { 273 int seals; 274 275 if (!shmem_file(memfd) && !is_file_hugepages(memfd)) 276 return -EBADFD; 277 278 seals = memfd_fcntl(memfd, F_GET_SEALS, 0); 279 if (seals == -EINVAL) 280 return -EBADFD; 281 282 if ((seals & SEALS_WANTED) != SEALS_WANTED || 283 (seals & SEALS_DENIED) != 0) 284 return -EINVAL; 285 286 return 0; 287 } 288 289 static struct dma_buf *export_udmabuf(struct udmabuf *ubuf, 290 struct miscdevice *device) 291 { 292 DEFINE_DMA_BUF_EXPORT_INFO(exp_info); 293 294 ubuf->device = device; 295 exp_info.ops = &udmabuf_ops; 296 exp_info.size = ubuf->pagecount << PAGE_SHIFT; 297 exp_info.priv = ubuf; 298 exp_info.flags = O_RDWR; 299 300 return dma_buf_export(&exp_info); 301 } 302 303 static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd, 304 loff_t start, loff_t size, struct folio **folios) 305 { 306 pgoff_t nr_pinned = ubuf->nr_pinned; 307 pgoff_t upgcnt = ubuf->pagecount; 308 u32 cur_folio, cur_pgcnt; 309 pgoff_t pgoff, pgcnt; 310 long nr_folios; 311 loff_t end; 312 313 pgcnt = size >> PAGE_SHIFT; 314 end = start + (pgcnt << PAGE_SHIFT) - 1; 315 nr_folios = memfd_pin_folios(memfd, start, end, folios, pgcnt, &pgoff); 316 if (nr_folios <= 0) 317 return nr_folios ? nr_folios : -EINVAL; 318 319 cur_pgcnt = 0; 320 for (cur_folio = 0; cur_folio < nr_folios; ++cur_folio) { 321 pgoff_t subpgoff = pgoff; 322 size_t fsize = folio_size(folios[cur_folio]); 323 324 ubuf->pinned_folios[nr_pinned++] = folios[cur_folio]; 325 326 for (; subpgoff < fsize; subpgoff += PAGE_SIZE) { 327 ubuf->pages[upgcnt] = folio_page(folios[cur_folio], 328 subpgoff >> PAGE_SHIFT); 329 ++upgcnt; 330 331 if (++cur_pgcnt >= pgcnt) 332 goto end; 333 } 334 335 /** 336 * In a given range, only the first subpage of the first folio 337 * has an offset, that is returned by memfd_pin_folios(). 338 * The first subpages of other folios (in the range) have an 339 * offset of 0. 340 */ 341 pgoff = 0; 342 } 343 end: 344 ubuf->pagecount = upgcnt; 345 ubuf->nr_pinned = nr_pinned; 346 return 0; 347 } 348 349 static long udmabuf_create(struct miscdevice *device, 350 struct udmabuf_create_list *head, 351 struct udmabuf_create_item *list) 352 { 353 unsigned long max_nr_folios = 0; 354 struct folio **folios = NULL; 355 pgoff_t pgcnt = 0, pglimit; 356 struct udmabuf *ubuf; 357 struct dma_buf *dmabuf; 358 long ret = -EINVAL; 359 u32 i, flags; 360 361 ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL); 362 if (!ubuf) 363 return -ENOMEM; 364 365 pglimit = ((u64)size_limit_mb * 1024 * 1024) >> PAGE_SHIFT; 366 for (i = 0; i < head->count; i++) { 367 pgoff_t subpgcnt; 368 369 if (!PAGE_ALIGNED(list[i].offset)) 370 goto err_noinit; 371 if (!PAGE_ALIGNED(list[i].size)) 372 goto err_noinit; 373 374 subpgcnt = list[i].size >> PAGE_SHIFT; 375 pgcnt += subpgcnt; 376 if (pgcnt > pglimit) 377 goto err_noinit; 378 379 max_nr_folios = max_t(unsigned long, subpgcnt, max_nr_folios); 380 } 381 382 if (!pgcnt) 383 goto err_noinit; 384 385 ret = init_udmabuf(ubuf, pgcnt); 386 if (ret) 387 goto err; 388 389 folios = kvmalloc_array(max_nr_folios, sizeof(*folios), GFP_KERNEL); 390 if (!folios) { 391 ret = -ENOMEM; 392 goto err; 393 } 394 395 for (i = 0; i < head->count; i++) { 396 struct file *memfd = fget(list[i].memfd); 397 398 if (!memfd) { 399 ret = -EBADFD; 400 goto err; 401 } 402 403 /* 404 * Take the inode lock to protect against concurrent 405 * memfd_add_seals(), which takes this lock in write mode. 406 */ 407 inode_lock_shared(file_inode(memfd)); 408 ret = check_memfd_seals(memfd); 409 if (ret) 410 goto out_unlock; 411 412 ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset, 413 list[i].size, folios); 414 out_unlock: 415 inode_unlock_shared(file_inode(memfd)); 416 fput(memfd); 417 if (ret) 418 goto err; 419 } 420 421 flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0; 422 dmabuf = export_udmabuf(ubuf, device); 423 if (IS_ERR(dmabuf)) { 424 ret = PTR_ERR(dmabuf); 425 goto err; 426 } 427 /* 428 * Ownership of ubuf is held by the dmabuf from here. 429 * If the following dma_buf_fd() fails, dma_buf_put() cleans up both the 430 * dmabuf and the ubuf (through udmabuf_ops.release). 431 */ 432 433 ret = dma_buf_fd(dmabuf, flags); 434 if (ret < 0) 435 dma_buf_put(dmabuf); 436 437 kvfree(folios); 438 return ret; 439 440 err: 441 deinit_udmabuf(ubuf); 442 err_noinit: 443 kfree(ubuf); 444 kvfree(folios); 445 return ret; 446 } 447 448 static long udmabuf_ioctl_create(struct file *filp, unsigned long arg) 449 { 450 struct udmabuf_create create; 451 struct udmabuf_create_list head; 452 struct udmabuf_create_item list; 453 454 if (copy_from_user(&create, (void __user *)arg, 455 sizeof(create))) 456 return -EFAULT; 457 458 head.flags = create.flags; 459 head.count = 1; 460 list.memfd = create.memfd; 461 list.offset = create.offset; 462 list.size = create.size; 463 464 return udmabuf_create(filp->private_data, &head, &list); 465 } 466 467 static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg) 468 { 469 struct udmabuf_create_list head; 470 struct udmabuf_create_item *list; 471 int ret = -EINVAL; 472 u32 lsize; 473 474 if (copy_from_user(&head, (void __user *)arg, sizeof(head))) 475 return -EFAULT; 476 if (head.count > list_limit) 477 return -EINVAL; 478 lsize = sizeof(struct udmabuf_create_item) * head.count; 479 list = memdup_user((void __user *)(arg + sizeof(head)), lsize); 480 if (IS_ERR(list)) 481 return PTR_ERR(list); 482 483 ret = udmabuf_create(filp->private_data, &head, list); 484 kfree(list); 485 return ret; 486 } 487 488 static long udmabuf_ioctl(struct file *filp, unsigned int ioctl, 489 unsigned long arg) 490 { 491 long ret; 492 493 switch (ioctl) { 494 case UDMABUF_CREATE: 495 ret = udmabuf_ioctl_create(filp, arg); 496 break; 497 case UDMABUF_CREATE_LIST: 498 ret = udmabuf_ioctl_create_list(filp, arg); 499 break; 500 default: 501 ret = -ENOTTY; 502 break; 503 } 504 return ret; 505 } 506 507 static const struct file_operations udmabuf_fops = { 508 .owner = THIS_MODULE, 509 .unlocked_ioctl = udmabuf_ioctl, 510 #ifdef CONFIG_COMPAT 511 .compat_ioctl = udmabuf_ioctl, 512 #endif 513 }; 514 515 static struct miscdevice udmabuf_misc = { 516 .minor = MISC_DYNAMIC_MINOR, 517 .name = "udmabuf", 518 .fops = &udmabuf_fops, 519 }; 520 521 static int __init udmabuf_dev_init(void) 522 { 523 int ret; 524 525 ret = misc_register(&udmabuf_misc); 526 if (ret < 0) { 527 pr_err("Could not initialize udmabuf device\n"); 528 return ret; 529 } 530 531 ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device, 532 DMA_BIT_MASK(64)); 533 if (ret < 0) { 534 pr_err("Could not setup DMA mask for udmabuf device\n"); 535 misc_deregister(&udmabuf_misc); 536 return ret; 537 } 538 539 return 0; 540 } 541 542 static void __exit udmabuf_dev_exit(void) 543 { 544 misc_deregister(&udmabuf_misc); 545 } 546 547 module_init(udmabuf_dev_init) 548 module_exit(udmabuf_dev_exit) 549 550 MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>"); 551