1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2012-2014 Intel Corporation 4 * 5 * Based on amdgpu_mn, which bears the following notice: 6 * 7 * Copyright 2014 Advanced Micro Devices, Inc. 8 * All Rights Reserved. 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a 11 * copy of this software and associated documentation files (the 12 * "Software"), to deal in the Software without restriction, including 13 * without limitation the rights to use, copy, modify, merge, publish, 14 * distribute, sub license, and/or sell copies of the Software, and to 15 * permit persons to whom the Software is furnished to do so, subject to 16 * the following conditions: 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 * The above copyright notice and this permission notice (including the 27 * next paragraph) shall be included in all copies or substantial portions 28 * of the Software. 29 * 30 */ 31 /* 32 * Authors: 33 * Christian König <christian.koenig@amd.com> 34 */ 35 36 #include <linux/mmu_context.h> 37 #include <linux/mempolicy.h> 38 #include <linux/swap.h> 39 #include <linux/sched/mm.h> 40 41 #include <drm/drm_print.h> 42 43 #include "i915_drv.h" 44 #include "i915_gem_ioctls.h" 45 #include "i915_gem_object.h" 46 #include "i915_scatterlist.h" 47 48 #ifdef CONFIG_MMU_NOTIFIER 49 50 /** 51 * i915_gem_userptr_invalidate - callback to notify about mm change 52 * 53 * @mni: the range (mm) is about to update 54 * @range: details on the invalidation 55 * @cur_seq: Value to pass to mmu_interval_set_seq() 56 * 57 * Block for operations on BOs to finish and mark pages as accessed and 58 * potentially dirty. 59 */ 60 static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, 61 const struct mmu_notifier_range *range, 62 unsigned long cur_seq) 63 { 64 mmu_interval_set_seq(mni, cur_seq); 65 return true; 66 } 67 68 static const struct mmu_interval_notifier_ops i915_gem_userptr_notifier_ops = { 69 .invalidate = i915_gem_userptr_invalidate, 70 }; 71 72 static int 73 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj) 74 { 75 return mmu_interval_notifier_insert(&obj->userptr.notifier, current->mm, 76 obj->userptr.ptr, obj->base.size, 77 &i915_gem_userptr_notifier_ops); 78 } 79 80 static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) 81 { 82 struct page **pvec = NULL; 83 84 assert_object_held_shared(obj); 85 86 if (!--obj->userptr.page_ref) { 87 pvec = obj->userptr.pvec; 88 obj->userptr.pvec = NULL; 89 } 90 GEM_BUG_ON(obj->userptr.page_ref < 0); 91 92 if (pvec) { 93 const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; 94 95 unpin_user_pages(pvec, num_pages); 96 kvfree(pvec); 97 } 98 } 99 100 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) 101 { 102 unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev); 103 struct sg_table *st; 104 struct page **pvec; 105 unsigned int num_pages; /* limited by sg_alloc_table_from_pages_segment */ 106 int ret; 107 108 if (overflows_type(obj->base.size >> PAGE_SHIFT, num_pages)) 109 return -E2BIG; 110 111 num_pages = obj->base.size >> PAGE_SHIFT; 112 st = kmalloc(sizeof(*st), GFP_KERNEL); 113 if (!st) 114 return -ENOMEM; 115 116 if (!obj->userptr.page_ref) { 117 ret = -EAGAIN; 118 goto err_free; 119 } 120 121 obj->userptr.page_ref++; 122 pvec = obj->userptr.pvec; 123 124 alloc_table: 125 ret = sg_alloc_table_from_pages_segment(st, pvec, num_pages, 0, 126 num_pages << PAGE_SHIFT, 127 max_segment, GFP_KERNEL); 128 if (ret) 129 goto err; 130 131 ret = i915_gem_gtt_prepare_pages(obj, st); 132 if (ret) { 133 sg_free_table(st); 134 135 if (max_segment > PAGE_SIZE) { 136 max_segment = PAGE_SIZE; 137 goto alloc_table; 138 } 139 140 goto err; 141 } 142 143 WARN_ON_ONCE(!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)); 144 if (i915_gem_object_can_bypass_llc(obj)) 145 obj->cache_dirty = true; 146 147 __i915_gem_object_set_pages(obj, st); 148 149 return 0; 150 151 err: 152 i915_gem_object_userptr_drop_ref(obj); 153 err_free: 154 kfree(st); 155 return ret; 156 } 157 158 static void 159 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, 160 struct sg_table *pages) 161 { 162 struct sgt_iter sgt_iter; 163 struct page *page; 164 165 if (!pages) 166 return; 167 168 __i915_gem_object_release_shmem(obj, pages, true); 169 i915_gem_gtt_finish_pages(obj, pages); 170 171 /* 172 * We always mark objects as dirty when they are used by the GPU, 173 * just in case. However, if we set the vma as being read-only we know 174 * that the object will never have been written to. 175 */ 176 if (i915_gem_object_is_readonly(obj)) 177 obj->mm.dirty = false; 178 179 for_each_sgt_page(page, sgt_iter, pages) { 180 if (obj->mm.dirty && trylock_page(page)) { 181 /* 182 * As this may not be anonymous memory (e.g. shmem) 183 * but exist on a real mapping, we have to lock 184 * the page in order to dirty it -- holding 185 * the page reference is not sufficient to 186 * prevent the inode from being truncated. 187 * Play safe and take the lock. 188 * 189 * However...! 190 * 191 * The mmu-notifier can be invalidated for a 192 * migrate_folio, that is alreadying holding the lock 193 * on the folio. Such a try_to_unmap() will result 194 * in us calling put_pages() and so recursively try 195 * to lock the page. We avoid that deadlock with 196 * a trylock_page() and in exchange we risk missing 197 * some page dirtying. 198 */ 199 set_page_dirty(page); 200 unlock_page(page); 201 } 202 203 mark_page_accessed(page); 204 } 205 obj->mm.dirty = false; 206 207 sg_free_table(pages); 208 kfree(pages); 209 210 i915_gem_object_userptr_drop_ref(obj); 211 } 212 213 static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj) 214 { 215 struct sg_table *pages; 216 int err; 217 218 err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); 219 if (err) 220 return err; 221 222 if (GEM_WARN_ON(i915_gem_object_has_pinned_pages(obj))) 223 return -EBUSY; 224 225 assert_object_held(obj); 226 227 pages = __i915_gem_object_unset_pages(obj); 228 if (!IS_ERR_OR_NULL(pages)) 229 i915_gem_userptr_put_pages(obj, pages); 230 231 return err; 232 } 233 234 int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) 235 { 236 const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; 237 struct page **pvec; 238 unsigned int gup_flags = 0; 239 unsigned long notifier_seq; 240 int pinned, ret; 241 242 if (obj->userptr.notifier.mm != current->mm) 243 return -EFAULT; 244 245 notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); 246 247 ret = i915_gem_object_lock_interruptible(obj, NULL); 248 if (ret) 249 return ret; 250 251 if (notifier_seq == obj->userptr.notifier_seq && obj->userptr.pvec) { 252 i915_gem_object_unlock(obj); 253 return 0; 254 } 255 256 ret = i915_gem_object_userptr_unbind(obj); 257 i915_gem_object_unlock(obj); 258 if (ret) 259 return ret; 260 261 pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); 262 if (!pvec) 263 return -ENOMEM; 264 265 if (!i915_gem_object_is_readonly(obj)) 266 gup_flags |= FOLL_WRITE; 267 268 pinned = 0; 269 while (pinned < num_pages) { 270 ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE, 271 num_pages - pinned, gup_flags, 272 &pvec[pinned]); 273 if (ret < 0) 274 goto out; 275 276 pinned += ret; 277 } 278 279 ret = i915_gem_object_lock_interruptible(obj, NULL); 280 if (ret) 281 goto out; 282 283 if (mmu_interval_read_retry(&obj->userptr.notifier, 284 !obj->userptr.page_ref ? notifier_seq : 285 obj->userptr.notifier_seq)) { 286 ret = -EAGAIN; 287 goto out_unlock; 288 } 289 290 if (!obj->userptr.page_ref++) { 291 obj->userptr.pvec = pvec; 292 obj->userptr.notifier_seq = notifier_seq; 293 pvec = NULL; 294 ret = ____i915_gem_object_get_pages(obj); 295 } 296 297 obj->userptr.page_ref--; 298 299 out_unlock: 300 i915_gem_object_unlock(obj); 301 302 out: 303 if (pvec) { 304 unpin_user_pages(pvec, pinned); 305 kvfree(pvec); 306 } 307 308 return ret; 309 } 310 311 int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) 312 { 313 if (mmu_interval_read_retry(&obj->userptr.notifier, 314 obj->userptr.notifier_seq)) { 315 /* We collided with the mmu notifier, need to retry */ 316 317 return -EAGAIN; 318 } 319 320 return 0; 321 } 322 323 int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) 324 { 325 int err; 326 327 err = i915_gem_object_userptr_submit_init(obj); 328 if (err) 329 return err; 330 331 err = i915_gem_object_lock_interruptible(obj, NULL); 332 if (!err) { 333 /* 334 * Since we only check validity, not use the pages, 335 * it doesn't matter if we collide with the mmu notifier, 336 * and -EAGAIN handling is not required. 337 */ 338 err = i915_gem_object_pin_pages(obj); 339 if (!err) 340 i915_gem_object_unpin_pages(obj); 341 342 i915_gem_object_unlock(obj); 343 } 344 345 return err; 346 } 347 348 static void 349 i915_gem_userptr_release(struct drm_i915_gem_object *obj) 350 { 351 GEM_WARN_ON(obj->userptr.page_ref); 352 353 if (!obj->userptr.notifier.mm) 354 return; 355 356 mmu_interval_notifier_remove(&obj->userptr.notifier); 357 obj->userptr.notifier.mm = NULL; 358 } 359 360 static int 361 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) 362 { 363 drm_dbg(obj->base.dev, "Exporting userptr no longer allowed\n"); 364 365 return -EINVAL; 366 } 367 368 static int 369 i915_gem_userptr_pwrite(struct drm_i915_gem_object *obj, 370 const struct drm_i915_gem_pwrite *args) 371 { 372 drm_dbg(obj->base.dev, "pwrite to userptr no longer allowed\n"); 373 374 return -EINVAL; 375 } 376 377 static int 378 i915_gem_userptr_pread(struct drm_i915_gem_object *obj, 379 const struct drm_i915_gem_pread *args) 380 { 381 drm_dbg(obj->base.dev, "pread from userptr no longer allowed\n"); 382 383 return -EINVAL; 384 } 385 386 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { 387 .name = "i915_gem_object_userptr", 388 .flags = I915_GEM_OBJECT_IS_SHRINKABLE | 389 I915_GEM_OBJECT_NO_MMAP | 390 I915_GEM_OBJECT_IS_PROXY, 391 .get_pages = i915_gem_userptr_get_pages, 392 .put_pages = i915_gem_userptr_put_pages, 393 .dmabuf_export = i915_gem_userptr_dmabuf_export, 394 .pwrite = i915_gem_userptr_pwrite, 395 .pread = i915_gem_userptr_pread, 396 .release = i915_gem_userptr_release, 397 }; 398 399 #endif 400 401 static int 402 probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) 403 { 404 VMA_ITERATOR(vmi, mm, addr); 405 struct vm_area_struct *vma; 406 unsigned long end = addr + len; 407 408 mmap_read_lock(mm); 409 for_each_vma_range(vmi, vma, end) { 410 /* Check for holes, note that we also update the addr below */ 411 if (vma->vm_start > addr) 412 break; 413 414 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) 415 break; 416 417 addr = vma->vm_end; 418 } 419 mmap_read_unlock(mm); 420 421 if (vma || addr < end) 422 return -EFAULT; 423 return 0; 424 } 425 426 /* 427 * Creates a new mm object that wraps some normal memory from the process 428 * context - user memory. 429 * 430 * We impose several restrictions upon the memory being mapped 431 * into the GPU. 432 * 1. It must be page aligned (both start/end addresses, i.e ptr and size). 433 * 2. It must be normal system memory, not a pointer into another map of IO 434 * space (e.g. it must not be a GTT mmapping of another object). 435 * 3. We only allow a bo as large as we could in theory map into the GTT, 436 * that is we limit the size to the total size of the GTT. 437 * 4. The bo is marked as being snoopable. The backing pages are left 438 * accessible directly by the CPU, but reads and writes by the GPU may 439 * incur the cost of a snoop (unless you have an LLC architecture). 440 * 441 * Synchronisation between multiple users and the GPU is left to userspace 442 * through the normal set-domain-ioctl. The kernel will enforce that the 443 * GPU relinquishes the VMA before it is returned back to the system 444 * i.e. upon free(), munmap() or process termination. However, the userspace 445 * malloc() library may not immediately relinquish the VMA after free() and 446 * instead reuse it whilst the GPU is still reading and writing to the VMA. 447 * Caveat emptor. 448 * 449 * Also note, that the object created here is not currently a "first class" 450 * object, in that several ioctls are banned. These are the CPU access 451 * ioctls: mmap(), pwrite and pread. In practice, you are expected to use 452 * direct access via your pointer rather than use those ioctls. Another 453 * restriction is that we do not allow userptr surfaces to be pinned to the 454 * hardware and so we reject any attempt to create a framebuffer out of a 455 * userptr. 456 * 457 * If you think this is a good interface to use to pass GPU memory between 458 * drivers, please use dma-buf instead. In fact, wherever possible use 459 * dma-buf instead. 460 */ 461 int 462 i915_gem_userptr_ioctl(struct drm_device *dev, 463 void *data, 464 struct drm_file *file) 465 { 466 static struct lock_class_key __maybe_unused lock_class; 467 struct drm_i915_private *i915 = to_i915(dev); 468 struct drm_i915_gem_userptr *args = data; 469 struct drm_i915_gem_object __maybe_unused *obj; 470 int __maybe_unused ret; 471 u32 __maybe_unused handle; 472 473 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) { 474 /* We cannot support coherent userptr objects on hw without 475 * LLC and broken snooping. 476 */ 477 return -ENODEV; 478 } 479 480 if (args->flags & ~(I915_USERPTR_READ_ONLY | 481 I915_USERPTR_UNSYNCHRONIZED | 482 I915_USERPTR_PROBE)) 483 return -EINVAL; 484 485 if (i915_gem_object_size_2big(args->user_size)) 486 return -E2BIG; 487 488 if (!args->user_size) 489 return -EINVAL; 490 491 if (offset_in_page(args->user_ptr | args->user_size)) 492 return -EINVAL; 493 494 if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) 495 return -EFAULT; 496 497 if (args->flags & I915_USERPTR_UNSYNCHRONIZED) 498 return -ENODEV; 499 500 if (args->flags & I915_USERPTR_READ_ONLY) { 501 /* 502 * On almost all of the older hw, we cannot tell the GPU that 503 * a page is readonly. 504 */ 505 if (!to_gt(i915)->vm->has_read_only) 506 return -ENODEV; 507 } 508 509 if (args->flags & I915_USERPTR_PROBE) { 510 /* 511 * Check that the range pointed to represents real struct 512 * pages and not iomappings (at this moment in time!) 513 */ 514 ret = probe_range(current->mm, args->user_ptr, args->user_size); 515 if (ret) 516 return ret; 517 } 518 519 #ifdef CONFIG_MMU_NOTIFIER 520 obj = i915_gem_object_alloc(); 521 if (obj == NULL) 522 return -ENOMEM; 523 524 drm_gem_private_object_init(dev, &obj->base, args->user_size); 525 i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 526 I915_BO_ALLOC_USER); 527 obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE; 528 obj->read_domains = I915_GEM_DOMAIN_CPU; 529 obj->write_domain = I915_GEM_DOMAIN_CPU; 530 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 531 532 obj->userptr.ptr = args->user_ptr; 533 obj->userptr.notifier_seq = ULONG_MAX; 534 if (args->flags & I915_USERPTR_READ_ONLY) 535 i915_gem_object_set_readonly(obj); 536 537 /* And keep a pointer to the current->mm for resolving the user pages 538 * at binding. This means that we need to hook into the mmu_notifier 539 * in order to detect if the mmu is destroyed. 540 */ 541 ret = i915_gem_userptr_init__mmu_notifier(obj); 542 if (ret == 0) 543 ret = drm_gem_handle_create(file, &obj->base, &handle); 544 545 /* drop reference from allocate - handle holds it now */ 546 i915_gem_object_put(obj); 547 if (ret) 548 return ret; 549 550 args->handle = handle; 551 return 0; 552 #else 553 return -ENODEV; 554 #endif 555 } 556 557