xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c (revision e3b9f1e81de2083f359bacd2a94bf1c024f2ede0)
1 /*
2  * Copyright 2009 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Jerome Glisse <glisse@freedesktop.org>
29  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30  *    Dave Airlie
31  */
32 #include <drm/ttm/ttm_bo_api.h>
33 #include <drm/ttm/ttm_bo_driver.h>
34 #include <drm/ttm/ttm_placement.h>
35 #include <drm/ttm/ttm_module.h>
36 #include <drm/ttm/ttm_page_alloc.h>
37 #include <drm/drmP.h>
38 #include <drm/amdgpu_drm.h>
39 #include <linux/seq_file.h>
40 #include <linux/slab.h>
41 #include <linux/swiotlb.h>
42 #include <linux/swap.h>
43 #include <linux/pagemap.h>
44 #include <linux/debugfs.h>
45 #include <linux/iommu.h>
46 #include "amdgpu.h"
47 #include "amdgpu_object.h"
48 #include "amdgpu_trace.h"
49 #include "bif/bif_4_1_d.h"
50 
51 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
52 
53 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
54 			     struct ttm_mem_reg *mem, unsigned num_pages,
55 			     uint64_t offset, unsigned window,
56 			     struct amdgpu_ring *ring,
57 			     uint64_t *addr);
58 
59 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
60 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
61 
62 /*
63  * Global memory.
64  */
65 static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
66 {
67 	return ttm_mem_global_init(ref->object);
68 }
69 
70 static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
71 {
72 	ttm_mem_global_release(ref->object);
73 }
74 
75 static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
76 {
77 	struct drm_global_reference *global_ref;
78 	struct amdgpu_ring *ring;
79 	struct drm_sched_rq *rq;
80 	int r;
81 
82 	adev->mman.mem_global_referenced = false;
83 	global_ref = &adev->mman.mem_global_ref;
84 	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
85 	global_ref->size = sizeof(struct ttm_mem_global);
86 	global_ref->init = &amdgpu_ttm_mem_global_init;
87 	global_ref->release = &amdgpu_ttm_mem_global_release;
88 	r = drm_global_item_ref(global_ref);
89 	if (r) {
90 		DRM_ERROR("Failed setting up TTM memory accounting "
91 			  "subsystem.\n");
92 		goto error_mem;
93 	}
94 
95 	adev->mman.bo_global_ref.mem_glob =
96 		adev->mman.mem_global_ref.object;
97 	global_ref = &adev->mman.bo_global_ref.ref;
98 	global_ref->global_type = DRM_GLOBAL_TTM_BO;
99 	global_ref->size = sizeof(struct ttm_bo_global);
100 	global_ref->init = &ttm_bo_global_init;
101 	global_ref->release = &ttm_bo_global_release;
102 	r = drm_global_item_ref(global_ref);
103 	if (r) {
104 		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
105 		goto error_bo;
106 	}
107 
108 	mutex_init(&adev->mman.gtt_window_lock);
109 
110 	ring = adev->mman.buffer_funcs_ring;
111 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
112 	r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
113 				  rq, amdgpu_sched_jobs, NULL);
114 	if (r) {
115 		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
116 		goto error_entity;
117 	}
118 
119 	adev->mman.mem_global_referenced = true;
120 
121 	return 0;
122 
123 error_entity:
124 	drm_global_item_unref(&adev->mman.bo_global_ref.ref);
125 error_bo:
126 	drm_global_item_unref(&adev->mman.mem_global_ref);
127 error_mem:
128 	return r;
129 }
130 
131 static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
132 {
133 	if (adev->mman.mem_global_referenced) {
134 		drm_sched_entity_fini(adev->mman.entity.sched,
135 				      &adev->mman.entity);
136 		mutex_destroy(&adev->mman.gtt_window_lock);
137 		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
138 		drm_global_item_unref(&adev->mman.mem_global_ref);
139 		adev->mman.mem_global_referenced = false;
140 	}
141 }
142 
143 static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
144 {
145 	return 0;
146 }
147 
148 static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
149 				struct ttm_mem_type_manager *man)
150 {
151 	struct amdgpu_device *adev;
152 
153 	adev = amdgpu_ttm_adev(bdev);
154 
155 	switch (type) {
156 	case TTM_PL_SYSTEM:
157 		/* System memory */
158 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
159 		man->available_caching = TTM_PL_MASK_CACHING;
160 		man->default_caching = TTM_PL_FLAG_CACHED;
161 		break;
162 	case TTM_PL_TT:
163 		man->func = &amdgpu_gtt_mgr_func;
164 		man->gpu_offset = adev->mc.gart_start;
165 		man->available_caching = TTM_PL_MASK_CACHING;
166 		man->default_caching = TTM_PL_FLAG_CACHED;
167 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
168 		break;
169 	case TTM_PL_VRAM:
170 		/* "On-card" video ram */
171 		man->func = &amdgpu_vram_mgr_func;
172 		man->gpu_offset = adev->mc.vram_start;
173 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
174 			     TTM_MEMTYPE_FLAG_MAPPABLE;
175 		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
176 		man->default_caching = TTM_PL_FLAG_WC;
177 		break;
178 	case AMDGPU_PL_GDS:
179 	case AMDGPU_PL_GWS:
180 	case AMDGPU_PL_OA:
181 		/* On-chip GDS memory*/
182 		man->func = &ttm_bo_manager_func;
183 		man->gpu_offset = 0;
184 		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
185 		man->available_caching = TTM_PL_FLAG_UNCACHED;
186 		man->default_caching = TTM_PL_FLAG_UNCACHED;
187 		break;
188 	default:
189 		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
190 		return -EINVAL;
191 	}
192 	return 0;
193 }
194 
195 static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
196 				struct ttm_placement *placement)
197 {
198 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
199 	struct amdgpu_bo *abo;
200 	static const struct ttm_place placements = {
201 		.fpfn = 0,
202 		.lpfn = 0,
203 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
204 	};
205 
206 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
207 		placement->placement = &placements;
208 		placement->busy_placement = &placements;
209 		placement->num_placement = 1;
210 		placement->num_busy_placement = 1;
211 		return;
212 	}
213 	abo = ttm_to_amdgpu_bo(bo);
214 	switch (bo->mem.mem_type) {
215 	case TTM_PL_VRAM:
216 		if (adev->mman.buffer_funcs &&
217 		    adev->mman.buffer_funcs_ring &&
218 		    adev->mman.buffer_funcs_ring->ready == false) {
219 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
220 		} else if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
221 			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
222 			unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
223 			struct drm_mm_node *node = bo->mem.mm_node;
224 			unsigned long pages_left;
225 
226 			for (pages_left = bo->mem.num_pages;
227 			     pages_left;
228 			     pages_left -= node->size, node++) {
229 				if (node->start < fpfn)
230 					break;
231 			}
232 
233 			if (!pages_left)
234 				goto gtt;
235 
236 			/* Try evicting to the CPU inaccessible part of VRAM
237 			 * first, but only set GTT as busy placement, so this
238 			 * BO will be evicted to GTT rather than causing other
239 			 * BOs to be evicted from VRAM
240 			 */
241 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
242 							 AMDGPU_GEM_DOMAIN_GTT);
243 			abo->placements[0].fpfn = fpfn;
244 			abo->placements[0].lpfn = 0;
245 			abo->placement.busy_placement = &abo->placements[1];
246 			abo->placement.num_busy_placement = 1;
247 		} else {
248 gtt:
249 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
250 		}
251 		break;
252 	case TTM_PL_TT:
253 	default:
254 		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
255 	}
256 	*placement = abo->placement;
257 }
258 
259 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
260 {
261 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
262 
263 	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
264 		return -EPERM;
265 	return drm_vma_node_verify_access(&abo->gem_base.vma_node,
266 					  filp->private_data);
267 }
268 
269 static void amdgpu_move_null(struct ttm_buffer_object *bo,
270 			     struct ttm_mem_reg *new_mem)
271 {
272 	struct ttm_mem_reg *old_mem = &bo->mem;
273 
274 	BUG_ON(old_mem->mm_node != NULL);
275 	*old_mem = *new_mem;
276 	new_mem->mm_node = NULL;
277 }
278 
279 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
280 				    struct drm_mm_node *mm_node,
281 				    struct ttm_mem_reg *mem)
282 {
283 	uint64_t addr = 0;
284 
285 	if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) {
286 		addr = mm_node->start << PAGE_SHIFT;
287 		addr += bo->bdev->man[mem->mem_type].gpu_offset;
288 	}
289 	return addr;
290 }
291 
292 /**
293  * amdgpu_find_mm_node - Helper function finds the drm_mm_node
294  *  corresponding to @offset. It also modifies the offset to be
295  *  within the drm_mm_node returned
296  */
297 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
298 					       unsigned long *offset)
299 {
300 	struct drm_mm_node *mm_node = mem->mm_node;
301 
302 	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
303 		*offset -= (mm_node->size << PAGE_SHIFT);
304 		++mm_node;
305 	}
306 	return mm_node;
307 }
308 
309 /**
310  * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
311  *
312  * The function copies @size bytes from {src->mem + src->offset} to
313  * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
314  * move and different for a BO to BO copy.
315  *
316  * @f: Returns the last fence if multiple jobs are submitted.
317  */
318 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
319 			       struct amdgpu_copy_mem *src,
320 			       struct amdgpu_copy_mem *dst,
321 			       uint64_t size,
322 			       struct reservation_object *resv,
323 			       struct dma_fence **f)
324 {
325 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
326 	struct drm_mm_node *src_mm, *dst_mm;
327 	uint64_t src_node_start, dst_node_start, src_node_size,
328 		 dst_node_size, src_page_offset, dst_page_offset;
329 	struct dma_fence *fence = NULL;
330 	int r = 0;
331 	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
332 					AMDGPU_GPU_PAGE_SIZE);
333 
334 	if (!ring->ready) {
335 		DRM_ERROR("Trying to move memory with ring turned off.\n");
336 		return -EINVAL;
337 	}
338 
339 	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
340 	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
341 					     src->offset;
342 	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
343 	src_page_offset = src_node_start & (PAGE_SIZE - 1);
344 
345 	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
346 	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
347 					     dst->offset;
348 	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
349 	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
350 
351 	mutex_lock(&adev->mman.gtt_window_lock);
352 
353 	while (size) {
354 		unsigned long cur_size;
355 		uint64_t from = src_node_start, to = dst_node_start;
356 		struct dma_fence *next;
357 
358 		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
359 		 * begins at an offset, then adjust the size accordingly
360 		 */
361 		cur_size = min3(min(src_node_size, dst_node_size), size,
362 				GTT_MAX_BYTES);
363 		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
364 		    cur_size + dst_page_offset > GTT_MAX_BYTES)
365 			cur_size -= max(src_page_offset, dst_page_offset);
366 
367 		/* Map only what needs to be accessed. Map src to window 0 and
368 		 * dst to window 1
369 		 */
370 		if (src->mem->mem_type == TTM_PL_TT &&
371 		    !amdgpu_gtt_mgr_has_gart_addr(src->mem)) {
372 			r = amdgpu_map_buffer(src->bo, src->mem,
373 					PFN_UP(cur_size + src_page_offset),
374 					src_node_start, 0, ring,
375 					&from);
376 			if (r)
377 				goto error;
378 			/* Adjust the offset because amdgpu_map_buffer returns
379 			 * start of mapped page
380 			 */
381 			from += src_page_offset;
382 		}
383 
384 		if (dst->mem->mem_type == TTM_PL_TT &&
385 		    !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) {
386 			r = amdgpu_map_buffer(dst->bo, dst->mem,
387 					PFN_UP(cur_size + dst_page_offset),
388 					dst_node_start, 1, ring,
389 					&to);
390 			if (r)
391 				goto error;
392 			to += dst_page_offset;
393 		}
394 
395 		r = amdgpu_copy_buffer(ring, from, to, cur_size,
396 				       resv, &next, false, true);
397 		if (r)
398 			goto error;
399 
400 		dma_fence_put(fence);
401 		fence = next;
402 
403 		size -= cur_size;
404 		if (!size)
405 			break;
406 
407 		src_node_size -= cur_size;
408 		if (!src_node_size) {
409 			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
410 							     src->mem);
411 			src_node_size = (src_mm->size << PAGE_SHIFT);
412 		} else {
413 			src_node_start += cur_size;
414 			src_page_offset = src_node_start & (PAGE_SIZE - 1);
415 		}
416 		dst_node_size -= cur_size;
417 		if (!dst_node_size) {
418 			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
419 							     dst->mem);
420 			dst_node_size = (dst_mm->size << PAGE_SHIFT);
421 		} else {
422 			dst_node_start += cur_size;
423 			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
424 		}
425 	}
426 error:
427 	mutex_unlock(&adev->mman.gtt_window_lock);
428 	if (f)
429 		*f = dma_fence_get(fence);
430 	dma_fence_put(fence);
431 	return r;
432 }
433 
434 
435 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
436 			    bool evict, bool no_wait_gpu,
437 			    struct ttm_mem_reg *new_mem,
438 			    struct ttm_mem_reg *old_mem)
439 {
440 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
441 	struct amdgpu_copy_mem src, dst;
442 	struct dma_fence *fence = NULL;
443 	int r;
444 
445 	src.bo = bo;
446 	dst.bo = bo;
447 	src.mem = old_mem;
448 	dst.mem = new_mem;
449 	src.offset = 0;
450 	dst.offset = 0;
451 
452 	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
453 				       new_mem->num_pages << PAGE_SHIFT,
454 				       bo->resv, &fence);
455 	if (r)
456 		goto error;
457 
458 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
459 	dma_fence_put(fence);
460 	return r;
461 
462 error:
463 	if (fence)
464 		dma_fence_wait(fence, false);
465 	dma_fence_put(fence);
466 	return r;
467 }
468 
469 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
470 				struct ttm_operation_ctx *ctx,
471 				struct ttm_mem_reg *new_mem)
472 {
473 	struct amdgpu_device *adev;
474 	struct ttm_mem_reg *old_mem = &bo->mem;
475 	struct ttm_mem_reg tmp_mem;
476 	struct ttm_place placements;
477 	struct ttm_placement placement;
478 	int r;
479 
480 	adev = amdgpu_ttm_adev(bo->bdev);
481 	tmp_mem = *new_mem;
482 	tmp_mem.mm_node = NULL;
483 	placement.num_placement = 1;
484 	placement.placement = &placements;
485 	placement.num_busy_placement = 1;
486 	placement.busy_placement = &placements;
487 	placements.fpfn = 0;
488 	placements.lpfn = 0;
489 	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
490 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
491 	if (unlikely(r)) {
492 		return r;
493 	}
494 
495 	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
496 	if (unlikely(r)) {
497 		goto out_cleanup;
498 	}
499 
500 	r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
501 	if (unlikely(r)) {
502 		goto out_cleanup;
503 	}
504 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
505 	if (unlikely(r)) {
506 		goto out_cleanup;
507 	}
508 	r = ttm_bo_move_ttm(bo, ctx, new_mem);
509 out_cleanup:
510 	ttm_bo_mem_put(bo, &tmp_mem);
511 	return r;
512 }
513 
514 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
515 				struct ttm_operation_ctx *ctx,
516 				struct ttm_mem_reg *new_mem)
517 {
518 	struct amdgpu_device *adev;
519 	struct ttm_mem_reg *old_mem = &bo->mem;
520 	struct ttm_mem_reg tmp_mem;
521 	struct ttm_placement placement;
522 	struct ttm_place placements;
523 	int r;
524 
525 	adev = amdgpu_ttm_adev(bo->bdev);
526 	tmp_mem = *new_mem;
527 	tmp_mem.mm_node = NULL;
528 	placement.num_placement = 1;
529 	placement.placement = &placements;
530 	placement.num_busy_placement = 1;
531 	placement.busy_placement = &placements;
532 	placements.fpfn = 0;
533 	placements.lpfn = 0;
534 	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
535 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
536 	if (unlikely(r)) {
537 		return r;
538 	}
539 	r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
540 	if (unlikely(r)) {
541 		goto out_cleanup;
542 	}
543 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
544 	if (unlikely(r)) {
545 		goto out_cleanup;
546 	}
547 out_cleanup:
548 	ttm_bo_mem_put(bo, &tmp_mem);
549 	return r;
550 }
551 
552 static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
553 			  struct ttm_operation_ctx *ctx,
554 			  struct ttm_mem_reg *new_mem)
555 {
556 	struct amdgpu_device *adev;
557 	struct amdgpu_bo *abo;
558 	struct ttm_mem_reg *old_mem = &bo->mem;
559 	int r;
560 
561 	/* Can't move a pinned BO */
562 	abo = ttm_to_amdgpu_bo(bo);
563 	if (WARN_ON_ONCE(abo->pin_count > 0))
564 		return -EINVAL;
565 
566 	adev = amdgpu_ttm_adev(bo->bdev);
567 
568 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
569 		amdgpu_move_null(bo, new_mem);
570 		return 0;
571 	}
572 	if ((old_mem->mem_type == TTM_PL_TT &&
573 	     new_mem->mem_type == TTM_PL_SYSTEM) ||
574 	    (old_mem->mem_type == TTM_PL_SYSTEM &&
575 	     new_mem->mem_type == TTM_PL_TT)) {
576 		/* bind is enough */
577 		amdgpu_move_null(bo, new_mem);
578 		return 0;
579 	}
580 	if (adev->mman.buffer_funcs == NULL ||
581 	    adev->mman.buffer_funcs_ring == NULL ||
582 	    !adev->mman.buffer_funcs_ring->ready) {
583 		/* use memcpy */
584 		goto memcpy;
585 	}
586 
587 	if (old_mem->mem_type == TTM_PL_VRAM &&
588 	    new_mem->mem_type == TTM_PL_SYSTEM) {
589 		r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem);
590 	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
591 		   new_mem->mem_type == TTM_PL_VRAM) {
592 		r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem);
593 	} else {
594 		r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu,
595 				     new_mem, old_mem);
596 	}
597 
598 	if (r) {
599 memcpy:
600 		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
601 		if (r) {
602 			return r;
603 		}
604 	}
605 
606 	if (bo->type == ttm_bo_type_device &&
607 	    new_mem->mem_type == TTM_PL_VRAM &&
608 	    old_mem->mem_type != TTM_PL_VRAM) {
609 		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
610 		 * accesses the BO after it's moved.
611 		 */
612 		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
613 	}
614 
615 	/* update statistics */
616 	atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved);
617 	return 0;
618 }
619 
620 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
621 {
622 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
623 	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
624 
625 	mem->bus.addr = NULL;
626 	mem->bus.offset = 0;
627 	mem->bus.size = mem->num_pages << PAGE_SHIFT;
628 	mem->bus.base = 0;
629 	mem->bus.is_iomem = false;
630 	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
631 		return -EINVAL;
632 	switch (mem->mem_type) {
633 	case TTM_PL_SYSTEM:
634 		/* system memory */
635 		return 0;
636 	case TTM_PL_TT:
637 		break;
638 	case TTM_PL_VRAM:
639 		mem->bus.offset = mem->start << PAGE_SHIFT;
640 		/* check if it's visible */
641 		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
642 			return -EINVAL;
643 		mem->bus.base = adev->mc.aper_base;
644 		mem->bus.is_iomem = true;
645 		break;
646 	default:
647 		return -EINVAL;
648 	}
649 	return 0;
650 }
651 
652 static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
653 {
654 }
655 
656 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
657 					   unsigned long page_offset)
658 {
659 	struct drm_mm_node *mm;
660 	unsigned long offset = (page_offset << PAGE_SHIFT);
661 
662 	mm = amdgpu_find_mm_node(&bo->mem, &offset);
663 	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
664 		(offset >> PAGE_SHIFT);
665 }
666 
667 /*
668  * TTM backend functions.
669  */
670 struct amdgpu_ttm_gup_task_list {
671 	struct list_head	list;
672 	struct task_struct	*task;
673 };
674 
675 struct amdgpu_ttm_tt {
676 	struct ttm_dma_tt	ttm;
677 	struct amdgpu_device	*adev;
678 	u64			offset;
679 	uint64_t		userptr;
680 	struct mm_struct	*usermm;
681 	uint32_t		userflags;
682 	spinlock_t              guptasklock;
683 	struct list_head        guptasks;
684 	atomic_t		mmu_invalidations;
685 	uint32_t		last_set_pages;
686 };
687 
688 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
689 {
690 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
691 	unsigned int flags = 0;
692 	unsigned pinned = 0;
693 	int r;
694 
695 	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
696 		flags |= FOLL_WRITE;
697 
698 	down_read(&current->mm->mmap_sem);
699 
700 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
701 		/* check that we only use anonymous memory
702 		   to prevent problems with writeback */
703 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
704 		struct vm_area_struct *vma;
705 
706 		vma = find_vma(gtt->usermm, gtt->userptr);
707 		if (!vma || vma->vm_file || vma->vm_end < end) {
708 			up_read(&current->mm->mmap_sem);
709 			return -EPERM;
710 		}
711 	}
712 
713 	do {
714 		unsigned num_pages = ttm->num_pages - pinned;
715 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
716 		struct page **p = pages + pinned;
717 		struct amdgpu_ttm_gup_task_list guptask;
718 
719 		guptask.task = current;
720 		spin_lock(&gtt->guptasklock);
721 		list_add(&guptask.list, &gtt->guptasks);
722 		spin_unlock(&gtt->guptasklock);
723 
724 		r = get_user_pages(userptr, num_pages, flags, p, NULL);
725 
726 		spin_lock(&gtt->guptasklock);
727 		list_del(&guptask.list);
728 		spin_unlock(&gtt->guptasklock);
729 
730 		if (r < 0)
731 			goto release_pages;
732 
733 		pinned += r;
734 
735 	} while (pinned < ttm->num_pages);
736 
737 	up_read(&current->mm->mmap_sem);
738 	return 0;
739 
740 release_pages:
741 	release_pages(pages, pinned);
742 	up_read(&current->mm->mmap_sem);
743 	return r;
744 }
745 
746 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
747 {
748 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
749 	unsigned i;
750 
751 	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
752 	for (i = 0; i < ttm->num_pages; ++i) {
753 		if (ttm->pages[i])
754 			put_page(ttm->pages[i]);
755 
756 		ttm->pages[i] = pages ? pages[i] : NULL;
757 	}
758 }
759 
760 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
761 {
762 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
763 	unsigned i;
764 
765 	for (i = 0; i < ttm->num_pages; ++i) {
766 		struct page *page = ttm->pages[i];
767 
768 		if (!page)
769 			continue;
770 
771 		if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
772 			set_page_dirty(page);
773 
774 		mark_page_accessed(page);
775 	}
776 }
777 
778 /* prepare the sg table with the user pages */
779 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
780 {
781 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
782 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
783 	unsigned nents;
784 	int r;
785 
786 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
787 	enum dma_data_direction direction = write ?
788 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
789 
790 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
791 				      ttm->num_pages << PAGE_SHIFT,
792 				      GFP_KERNEL);
793 	if (r)
794 		goto release_sg;
795 
796 	r = -ENOMEM;
797 	nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
798 	if (nents != ttm->sg->nents)
799 		goto release_sg;
800 
801 	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
802 					 gtt->ttm.dma_address, ttm->num_pages);
803 
804 	return 0;
805 
806 release_sg:
807 	kfree(ttm->sg);
808 	return r;
809 }
810 
811 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
812 {
813 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
814 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
815 
816 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
817 	enum dma_data_direction direction = write ?
818 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
819 
820 	/* double check that we don't free the table twice */
821 	if (!ttm->sg->sgl)
822 		return;
823 
824 	/* free the sg table and pages again */
825 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
826 
827 	amdgpu_ttm_tt_mark_user_pages(ttm);
828 
829 	sg_free_table(ttm->sg);
830 }
831 
832 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
833 				   struct ttm_mem_reg *bo_mem)
834 {
835 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
836 	uint64_t flags;
837 	int r = 0;
838 
839 	if (gtt->userptr) {
840 		r = amdgpu_ttm_tt_pin_userptr(ttm);
841 		if (r) {
842 			DRM_ERROR("failed to pin userptr\n");
843 			return r;
844 		}
845 	}
846 	if (!ttm->num_pages) {
847 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
848 		     ttm->num_pages, bo_mem, ttm);
849 	}
850 
851 	if (bo_mem->mem_type == AMDGPU_PL_GDS ||
852 	    bo_mem->mem_type == AMDGPU_PL_GWS ||
853 	    bo_mem->mem_type == AMDGPU_PL_OA)
854 		return -EINVAL;
855 
856 	if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
857 		gtt->offset = AMDGPU_BO_INVALID_OFFSET;
858 		return 0;
859 	}
860 
861 	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
862 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
863 	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
864 		ttm->pages, gtt->ttm.dma_address, flags);
865 
866 	if (r)
867 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
868 			  ttm->num_pages, gtt->offset);
869 	return r;
870 }
871 
872 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
873 {
874 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
875 	struct ttm_operation_ctx ctx = { false, false };
876 	struct amdgpu_ttm_tt *gtt = (void*)bo->ttm;
877 	struct ttm_mem_reg tmp;
878 	struct ttm_placement placement;
879 	struct ttm_place placements;
880 	uint64_t flags;
881 	int r;
882 
883 	if (bo->mem.mem_type != TTM_PL_TT ||
884 	    amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
885 		return 0;
886 
887 	tmp = bo->mem;
888 	tmp.mm_node = NULL;
889 	placement.num_placement = 1;
890 	placement.placement = &placements;
891 	placement.num_busy_placement = 1;
892 	placement.busy_placement = &placements;
893 	placements.fpfn = 0;
894 	placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
895 	placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
896 		TTM_PL_FLAG_TT;
897 
898 	r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
899 	if (unlikely(r))
900 		return r;
901 
902 	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
903 	gtt->offset = (u64)tmp.start << PAGE_SHIFT;
904 	r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages,
905 			     bo->ttm->pages, gtt->ttm.dma_address, flags);
906 	if (unlikely(r)) {
907 		ttm_bo_mem_put(bo, &tmp);
908 		return r;
909 	}
910 
911 	ttm_bo_mem_put(bo, &bo->mem);
912 	bo->mem = tmp;
913 	bo->offset = (bo->mem.start << PAGE_SHIFT) +
914 		bo->bdev->man[bo->mem.mem_type].gpu_offset;
915 
916 	return 0;
917 }
918 
919 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
920 {
921 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
922 	struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
923 	uint64_t flags;
924 	int r;
925 
926 	if (!gtt)
927 		return 0;
928 
929 	flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem);
930 	r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
931 			     gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags);
932 	if (r)
933 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
934 			  gtt->ttm.ttm.num_pages, gtt->offset);
935 	return r;
936 }
937 
938 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
939 {
940 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
941 	int r;
942 
943 	if (gtt->userptr)
944 		amdgpu_ttm_tt_unpin_userptr(ttm);
945 
946 	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
947 		return 0;
948 
949 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
950 	r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
951 	if (r)
952 		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
953 			  gtt->ttm.ttm.num_pages, gtt->offset);
954 	return r;
955 }
956 
957 static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
958 {
959 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
960 
961 	ttm_dma_tt_fini(&gtt->ttm);
962 	kfree(gtt);
963 }
964 
965 static struct ttm_backend_func amdgpu_backend_func = {
966 	.bind = &amdgpu_ttm_backend_bind,
967 	.unbind = &amdgpu_ttm_backend_unbind,
968 	.destroy = &amdgpu_ttm_backend_destroy,
969 };
970 
971 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
972 				    unsigned long size, uint32_t page_flags,
973 				    struct page *dummy_read_page)
974 {
975 	struct amdgpu_device *adev;
976 	struct amdgpu_ttm_tt *gtt;
977 
978 	adev = amdgpu_ttm_adev(bdev);
979 
980 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
981 	if (gtt == NULL) {
982 		return NULL;
983 	}
984 	gtt->ttm.ttm.func = &amdgpu_backend_func;
985 	gtt->adev = adev;
986 	if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
987 		kfree(gtt);
988 		return NULL;
989 	}
990 	return &gtt->ttm.ttm;
991 }
992 
993 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
994 			struct ttm_operation_ctx *ctx)
995 {
996 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
997 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
998 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
999 
1000 	if (ttm->state != tt_unpopulated)
1001 		return 0;
1002 
1003 	if (gtt && gtt->userptr) {
1004 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1005 		if (!ttm->sg)
1006 			return -ENOMEM;
1007 
1008 		ttm->page_flags |= TTM_PAGE_FLAG_SG;
1009 		ttm->state = tt_unbound;
1010 		return 0;
1011 	}
1012 
1013 	if (slave && ttm->sg) {
1014 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
1015 						 gtt->ttm.dma_address, ttm->num_pages);
1016 		ttm->state = tt_unbound;
1017 		return 0;
1018 	}
1019 
1020 #ifdef CONFIG_SWIOTLB
1021 	if (swiotlb_nr_tbl()) {
1022 		return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
1023 	}
1024 #endif
1025 
1026 	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
1027 }
1028 
1029 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1030 {
1031 	struct amdgpu_device *adev;
1032 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1033 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
1034 
1035 	if (gtt && gtt->userptr) {
1036 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
1037 		kfree(ttm->sg);
1038 		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
1039 		return;
1040 	}
1041 
1042 	if (slave)
1043 		return;
1044 
1045 	adev = amdgpu_ttm_adev(ttm->bdev);
1046 
1047 #ifdef CONFIG_SWIOTLB
1048 	if (swiotlb_nr_tbl()) {
1049 		ttm_dma_unpopulate(&gtt->ttm, adev->dev);
1050 		return;
1051 	}
1052 #endif
1053 
1054 	ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
1055 }
1056 
1057 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1058 			      uint32_t flags)
1059 {
1060 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1061 
1062 	if (gtt == NULL)
1063 		return -EINVAL;
1064 
1065 	gtt->userptr = addr;
1066 	gtt->usermm = current->mm;
1067 	gtt->userflags = flags;
1068 	spin_lock_init(&gtt->guptasklock);
1069 	INIT_LIST_HEAD(&gtt->guptasks);
1070 	atomic_set(&gtt->mmu_invalidations, 0);
1071 	gtt->last_set_pages = 0;
1072 
1073 	return 0;
1074 }
1075 
1076 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1077 {
1078 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1079 
1080 	if (gtt == NULL)
1081 		return NULL;
1082 
1083 	return gtt->usermm;
1084 }
1085 
1086 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1087 				  unsigned long end)
1088 {
1089 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1090 	struct amdgpu_ttm_gup_task_list *entry;
1091 	unsigned long size;
1092 
1093 	if (gtt == NULL || !gtt->userptr)
1094 		return false;
1095 
1096 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
1097 	if (gtt->userptr > end || gtt->userptr + size <= start)
1098 		return false;
1099 
1100 	spin_lock(&gtt->guptasklock);
1101 	list_for_each_entry(entry, &gtt->guptasks, list) {
1102 		if (entry->task == current) {
1103 			spin_unlock(&gtt->guptasklock);
1104 			return false;
1105 		}
1106 	}
1107 	spin_unlock(&gtt->guptasklock);
1108 
1109 	atomic_inc(&gtt->mmu_invalidations);
1110 
1111 	return true;
1112 }
1113 
1114 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1115 				       int *last_invalidated)
1116 {
1117 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1118 	int prev_invalidated = *last_invalidated;
1119 
1120 	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
1121 	return prev_invalidated != *last_invalidated;
1122 }
1123 
1124 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1125 {
1126 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1127 
1128 	if (gtt == NULL || !gtt->userptr)
1129 		return false;
1130 
1131 	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
1132 }
1133 
1134 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1135 {
1136 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1137 
1138 	if (gtt == NULL)
1139 		return false;
1140 
1141 	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1142 }
1143 
1144 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1145 				 struct ttm_mem_reg *mem)
1146 {
1147 	uint64_t flags = 0;
1148 
1149 	if (mem && mem->mem_type != TTM_PL_SYSTEM)
1150 		flags |= AMDGPU_PTE_VALID;
1151 
1152 	if (mem && mem->mem_type == TTM_PL_TT) {
1153 		flags |= AMDGPU_PTE_SYSTEM;
1154 
1155 		if (ttm->caching_state == tt_cached)
1156 			flags |= AMDGPU_PTE_SNOOPED;
1157 	}
1158 
1159 	flags |= adev->gart.gart_pte_flags;
1160 	flags |= AMDGPU_PTE_READABLE;
1161 
1162 	if (!amdgpu_ttm_tt_is_readonly(ttm))
1163 		flags |= AMDGPU_PTE_WRITEABLE;
1164 
1165 	return flags;
1166 }
1167 
1168 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1169 					    const struct ttm_place *place)
1170 {
1171 	unsigned long num_pages = bo->mem.num_pages;
1172 	struct drm_mm_node *node = bo->mem.mm_node;
1173 
1174 	switch (bo->mem.mem_type) {
1175 	case TTM_PL_TT:
1176 		return true;
1177 
1178 	case TTM_PL_VRAM:
1179 		/* Check each drm MM node individually */
1180 		while (num_pages) {
1181 			if (place->fpfn < (node->start + node->size) &&
1182 			    !(place->lpfn && place->lpfn <= node->start))
1183 				return true;
1184 
1185 			num_pages -= node->size;
1186 			++node;
1187 		}
1188 		return false;
1189 
1190 	default:
1191 		break;
1192 	}
1193 
1194 	return ttm_bo_eviction_valuable(bo, place);
1195 }
1196 
1197 static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1198 				    unsigned long offset,
1199 				    void *buf, int len, int write)
1200 {
1201 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1202 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1203 	struct drm_mm_node *nodes;
1204 	uint32_t value = 0;
1205 	int ret = 0;
1206 	uint64_t pos;
1207 	unsigned long flags;
1208 
1209 	if (bo->mem.mem_type != TTM_PL_VRAM)
1210 		return -EIO;
1211 
1212 	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
1213 	pos = (nodes->start << PAGE_SHIFT) + offset;
1214 
1215 	while (len && pos < adev->mc.mc_vram_size) {
1216 		uint64_t aligned_pos = pos & ~(uint64_t)3;
1217 		uint32_t bytes = 4 - (pos & 3);
1218 		uint32_t shift = (pos & 3) * 8;
1219 		uint32_t mask = 0xffffffff << shift;
1220 
1221 		if (len < bytes) {
1222 			mask &= 0xffffffff >> (bytes - len) * 8;
1223 			bytes = len;
1224 		}
1225 
1226 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1227 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
1228 		WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1229 		if (!write || mask != 0xffffffff)
1230 			value = RREG32_NO_KIQ(mmMM_DATA);
1231 		if (write) {
1232 			value &= ~mask;
1233 			value |= (*(uint32_t *)buf << shift) & mask;
1234 			WREG32_NO_KIQ(mmMM_DATA, value);
1235 		}
1236 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1237 		if (!write) {
1238 			value = (value & mask) >> shift;
1239 			memcpy(buf, &value, bytes);
1240 		}
1241 
1242 		ret += bytes;
1243 		buf = (uint8_t *)buf + bytes;
1244 		pos += bytes;
1245 		len -= bytes;
1246 		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
1247 			++nodes;
1248 			pos = (nodes->start << PAGE_SHIFT);
1249 		}
1250 	}
1251 
1252 	return ret;
1253 }
1254 
1255 static struct ttm_bo_driver amdgpu_bo_driver = {
1256 	.ttm_tt_create = &amdgpu_ttm_tt_create,
1257 	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
1258 	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1259 	.invalidate_caches = &amdgpu_invalidate_caches,
1260 	.init_mem_type = &amdgpu_init_mem_type,
1261 	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
1262 	.evict_flags = &amdgpu_evict_flags,
1263 	.move = &amdgpu_bo_move,
1264 	.verify_access = &amdgpu_verify_access,
1265 	.move_notify = &amdgpu_bo_move_notify,
1266 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
1267 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1268 	.io_mem_free = &amdgpu_ttm_io_mem_free,
1269 	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1270 	.access_memory = &amdgpu_ttm_access_memory
1271 };
1272 
1273 /*
1274  * Firmware Reservation functions
1275  */
1276 /**
1277  * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1278  *
1279  * @adev: amdgpu_device pointer
1280  *
1281  * free fw reserved vram if it has been reserved.
1282  */
1283 static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1284 {
1285 	amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
1286 		NULL, &adev->fw_vram_usage.va);
1287 }
1288 
1289 /**
1290  * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1291  *
1292  * @adev: amdgpu_device pointer
1293  *
1294  * create bo vram reservation from fw.
1295  */
1296 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1297 {
1298 	struct ttm_operation_ctx ctx = { false, false };
1299 	int r = 0;
1300 	int i;
1301 	u64 vram_size = adev->mc.visible_vram_size;
1302 	u64 offset = adev->fw_vram_usage.start_offset;
1303 	u64 size = adev->fw_vram_usage.size;
1304 	struct amdgpu_bo *bo;
1305 
1306 	adev->fw_vram_usage.va = NULL;
1307 	adev->fw_vram_usage.reserved_bo = NULL;
1308 
1309 	if (adev->fw_vram_usage.size > 0 &&
1310 		adev->fw_vram_usage.size <= vram_size) {
1311 
1312 		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
1313 			PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
1314 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1315 			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
1316 			&adev->fw_vram_usage.reserved_bo);
1317 		if (r)
1318 			goto error_create;
1319 
1320 		r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
1321 		if (r)
1322 			goto error_reserve;
1323 
1324 		/* remove the original mem node and create a new one at the
1325 		 * request position
1326 		 */
1327 		bo = adev->fw_vram_usage.reserved_bo;
1328 		offset = ALIGN(offset, PAGE_SIZE);
1329 		for (i = 0; i < bo->placement.num_placement; ++i) {
1330 			bo->placements[i].fpfn = offset >> PAGE_SHIFT;
1331 			bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
1332 		}
1333 
1334 		ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
1335 		r = ttm_bo_mem_space(&bo->tbo, &bo->placement,
1336 				     &bo->tbo.mem, &ctx);
1337 		if (r)
1338 			goto error_pin;
1339 
1340 		r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
1341 			AMDGPU_GEM_DOMAIN_VRAM,
1342 			adev->fw_vram_usage.start_offset,
1343 			(adev->fw_vram_usage.start_offset +
1344 			adev->fw_vram_usage.size), NULL);
1345 		if (r)
1346 			goto error_pin;
1347 		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
1348 			&adev->fw_vram_usage.va);
1349 		if (r)
1350 			goto error_kmap;
1351 
1352 		amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
1353 	}
1354 	return r;
1355 
1356 error_kmap:
1357 	amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
1358 error_pin:
1359 	amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
1360 error_reserve:
1361 	amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
1362 error_create:
1363 	adev->fw_vram_usage.va = NULL;
1364 	adev->fw_vram_usage.reserved_bo = NULL;
1365 	return r;
1366 }
1367 
1368 int amdgpu_ttm_init(struct amdgpu_device *adev)
1369 {
1370 	uint64_t gtt_size;
1371 	int r;
1372 	u64 vis_vram_limit;
1373 
1374 	r = amdgpu_ttm_global_init(adev);
1375 	if (r) {
1376 		return r;
1377 	}
1378 	/* No others user of address space so set it to 0 */
1379 	r = ttm_bo_device_init(&adev->mman.bdev,
1380 			       adev->mman.bo_global_ref.ref.object,
1381 			       &amdgpu_bo_driver,
1382 			       adev->ddev->anon_inode->i_mapping,
1383 			       DRM_FILE_PAGE_OFFSET,
1384 			       adev->need_dma32);
1385 	if (r) {
1386 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1387 		return r;
1388 	}
1389 	adev->mman.initialized = true;
1390 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1391 				adev->mc.real_vram_size >> PAGE_SHIFT);
1392 	if (r) {
1393 		DRM_ERROR("Failed initializing VRAM heap.\n");
1394 		return r;
1395 	}
1396 
1397 	/* Reduce size of CPU-visible VRAM if requested */
1398 	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
1399 	if (amdgpu_vis_vram_limit > 0 &&
1400 	    vis_vram_limit <= adev->mc.visible_vram_size)
1401 		adev->mc.visible_vram_size = vis_vram_limit;
1402 
1403 	/* Change the size here instead of the init above so only lpfn is affected */
1404 	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
1405 
1406 	/*
1407 	 *The reserved vram for firmware must be pinned to the specified
1408 	 *place on the VRAM, so reserve it early.
1409 	 */
1410 	r = amdgpu_ttm_fw_reserve_vram_init(adev);
1411 	if (r) {
1412 		return r;
1413 	}
1414 
1415 	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
1416 				    AMDGPU_GEM_DOMAIN_VRAM,
1417 				    &adev->stolen_vga_memory,
1418 				    NULL, NULL);
1419 	if (r)
1420 		return r;
1421 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1422 		 (unsigned) (adev->mc.real_vram_size / (1024 * 1024)));
1423 
1424 	if (amdgpu_gtt_size == -1) {
1425 		struct sysinfo si;
1426 
1427 		si_meminfo(&si);
1428 		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1429 			       adev->mc.mc_vram_size),
1430 			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
1431 	}
1432 	else
1433 		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1434 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
1435 	if (r) {
1436 		DRM_ERROR("Failed initializing GTT heap.\n");
1437 		return r;
1438 	}
1439 	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1440 		 (unsigned)(gtt_size / (1024 * 1024)));
1441 
1442 	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
1443 	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
1444 	adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
1445 	adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
1446 	adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
1447 	adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
1448 	adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
1449 	adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
1450 	adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
1451 	/* GDS Memory */
1452 	if (adev->gds.mem.total_size) {
1453 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
1454 				   adev->gds.mem.total_size >> PAGE_SHIFT);
1455 		if (r) {
1456 			DRM_ERROR("Failed initializing GDS heap.\n");
1457 			return r;
1458 		}
1459 	}
1460 
1461 	/* GWS */
1462 	if (adev->gds.gws.total_size) {
1463 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
1464 				   adev->gds.gws.total_size >> PAGE_SHIFT);
1465 		if (r) {
1466 			DRM_ERROR("Failed initializing gws heap.\n");
1467 			return r;
1468 		}
1469 	}
1470 
1471 	/* OA */
1472 	if (adev->gds.oa.total_size) {
1473 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
1474 				   adev->gds.oa.total_size >> PAGE_SHIFT);
1475 		if (r) {
1476 			DRM_ERROR("Failed initializing oa heap.\n");
1477 			return r;
1478 		}
1479 	}
1480 
1481 	r = amdgpu_ttm_debugfs_init(adev);
1482 	if (r) {
1483 		DRM_ERROR("Failed to init debugfs\n");
1484 		return r;
1485 	}
1486 	return 0;
1487 }
1488 
1489 void amdgpu_ttm_fini(struct amdgpu_device *adev)
1490 {
1491 	if (!adev->mman.initialized)
1492 		return;
1493 
1494 	amdgpu_ttm_debugfs_fini(adev);
1495 	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
1496 	amdgpu_ttm_fw_reserve_vram_fini(adev);
1497 
1498 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
1499 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
1500 	if (adev->gds.mem.total_size)
1501 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
1502 	if (adev->gds.gws.total_size)
1503 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
1504 	if (adev->gds.oa.total_size)
1505 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
1506 	ttm_bo_device_release(&adev->mman.bdev);
1507 	amdgpu_ttm_global_fini(adev);
1508 	adev->mman.initialized = false;
1509 	DRM_INFO("amdgpu: ttm finalized\n");
1510 }
1511 
1512 /* this should only be called at bootup or when userspace
1513  * isn't running */
1514 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
1515 {
1516 	struct ttm_mem_type_manager *man;
1517 
1518 	if (!adev->mman.initialized)
1519 		return;
1520 
1521 	man = &adev->mman.bdev.man[TTM_PL_VRAM];
1522 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
1523 	man->size = size >> PAGE_SHIFT;
1524 }
1525 
1526 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
1527 {
1528 	struct drm_file *file_priv;
1529 	struct amdgpu_device *adev;
1530 
1531 	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
1532 		return -EINVAL;
1533 
1534 	file_priv = filp->private_data;
1535 	adev = file_priv->minor->dev->dev_private;
1536 	if (adev == NULL)
1537 		return -EINVAL;
1538 
1539 	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
1540 }
1541 
1542 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
1543 			     struct ttm_mem_reg *mem, unsigned num_pages,
1544 			     uint64_t offset, unsigned window,
1545 			     struct amdgpu_ring *ring,
1546 			     uint64_t *addr)
1547 {
1548 	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
1549 	struct amdgpu_device *adev = ring->adev;
1550 	struct ttm_tt *ttm = bo->ttm;
1551 	struct amdgpu_job *job;
1552 	unsigned num_dw, num_bytes;
1553 	dma_addr_t *dma_address;
1554 	struct dma_fence *fence;
1555 	uint64_t src_addr, dst_addr;
1556 	uint64_t flags;
1557 	int r;
1558 
1559 	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
1560 	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
1561 
1562 	*addr = adev->mc.gart_start;
1563 	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
1564 		AMDGPU_GPU_PAGE_SIZE;
1565 
1566 	num_dw = adev->mman.buffer_funcs->copy_num_dw;
1567 	while (num_dw & 0x7)
1568 		num_dw++;
1569 
1570 	num_bytes = num_pages * 8;
1571 
1572 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
1573 	if (r)
1574 		return r;
1575 
1576 	src_addr = num_dw * 4;
1577 	src_addr += job->ibs[0].gpu_addr;
1578 
1579 	dst_addr = adev->gart.table_addr;
1580 	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
1581 	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
1582 				dst_addr, num_bytes);
1583 
1584 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1585 	WARN_ON(job->ibs[0].length_dw > num_dw);
1586 
1587 	dma_address = &gtt->ttm.dma_address[offset >> PAGE_SHIFT];
1588 	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem);
1589 	r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
1590 			    &job->ibs[0].ptr[num_dw]);
1591 	if (r)
1592 		goto error_free;
1593 
1594 	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1595 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
1596 	if (r)
1597 		goto error_free;
1598 
1599 	dma_fence_put(fence);
1600 
1601 	return r;
1602 
1603 error_free:
1604 	amdgpu_job_free(job);
1605 	return r;
1606 }
1607 
1608 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
1609 		       uint64_t dst_offset, uint32_t byte_count,
1610 		       struct reservation_object *resv,
1611 		       struct dma_fence **fence, bool direct_submit,
1612 		       bool vm_needs_flush)
1613 {
1614 	struct amdgpu_device *adev = ring->adev;
1615 	struct amdgpu_job *job;
1616 
1617 	uint32_t max_bytes;
1618 	unsigned num_loops, num_dw;
1619 	unsigned i;
1620 	int r;
1621 
1622 	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
1623 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
1624 	num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
1625 
1626 	/* for IB padding */
1627 	while (num_dw & 0x7)
1628 		num_dw++;
1629 
1630 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1631 	if (r)
1632 		return r;
1633 
1634 	job->vm_needs_flush = vm_needs_flush;
1635 	if (resv) {
1636 		r = amdgpu_sync_resv(adev, &job->sync, resv,
1637 				     AMDGPU_FENCE_OWNER_UNDEFINED,
1638 				     false);
1639 		if (r) {
1640 			DRM_ERROR("sync failed (%d).\n", r);
1641 			goto error_free;
1642 		}
1643 	}
1644 
1645 	for (i = 0; i < num_loops; i++) {
1646 		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1647 
1648 		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
1649 					dst_offset, cur_size_in_bytes);
1650 
1651 		src_offset += cur_size_in_bytes;
1652 		dst_offset += cur_size_in_bytes;
1653 		byte_count -= cur_size_in_bytes;
1654 	}
1655 
1656 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1657 	WARN_ON(job->ibs[0].length_dw > num_dw);
1658 	if (direct_submit) {
1659 		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
1660 				       NULL, fence);
1661 		job->fence = dma_fence_get(*fence);
1662 		if (r)
1663 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
1664 		amdgpu_job_free(job);
1665 	} else {
1666 		r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1667 				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1668 		if (r)
1669 			goto error_free;
1670 	}
1671 
1672 	return r;
1673 
1674 error_free:
1675 	amdgpu_job_free(job);
1676 	return r;
1677 }
1678 
1679 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1680 		       uint64_t src_data,
1681 		       struct reservation_object *resv,
1682 		       struct dma_fence **fence)
1683 {
1684 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1685 	uint32_t max_bytes = 8 *
1686 			adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
1687 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
1688 
1689 	struct drm_mm_node *mm_node;
1690 	unsigned long num_pages;
1691 	unsigned int num_loops, num_dw;
1692 
1693 	struct amdgpu_job *job;
1694 	int r;
1695 
1696 	if (!ring->ready) {
1697 		DRM_ERROR("Trying to clear memory with ring turned off.\n");
1698 		return -EINVAL;
1699 	}
1700 
1701 	if (bo->tbo.mem.mem_type == TTM_PL_TT) {
1702 		r = amdgpu_ttm_alloc_gart(&bo->tbo);
1703 		if (r)
1704 			return r;
1705 	}
1706 
1707 	num_pages = bo->tbo.num_pages;
1708 	mm_node = bo->tbo.mem.mm_node;
1709 	num_loops = 0;
1710 	while (num_pages) {
1711 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1712 
1713 		num_loops += DIV_ROUND_UP(byte_count, max_bytes);
1714 		num_pages -= mm_node->size;
1715 		++mm_node;
1716 	}
1717 
1718 	/* num of dwords for each SDMA_OP_PTEPDE cmd */
1719 	num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
1720 
1721 	/* for IB padding */
1722 	num_dw += 64;
1723 
1724 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1725 	if (r)
1726 		return r;
1727 
1728 	if (resv) {
1729 		r = amdgpu_sync_resv(adev, &job->sync, resv,
1730 				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
1731 		if (r) {
1732 			DRM_ERROR("sync failed (%d).\n", r);
1733 			goto error_free;
1734 		}
1735 	}
1736 
1737 	num_pages = bo->tbo.num_pages;
1738 	mm_node = bo->tbo.mem.mm_node;
1739 
1740 	while (num_pages) {
1741 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1742 		uint64_t dst_addr;
1743 
1744 		WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8");
1745 
1746 		dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
1747 		while (byte_count) {
1748 			uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1749 
1750 			amdgpu_vm_set_pte_pde(adev, &job->ibs[0],
1751 					dst_addr, 0,
1752 					cur_size_in_bytes >> 3, 0,
1753 					src_data);
1754 
1755 			dst_addr += cur_size_in_bytes;
1756 			byte_count -= cur_size_in_bytes;
1757 		}
1758 
1759 		num_pages -= mm_node->size;
1760 		++mm_node;
1761 	}
1762 
1763 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1764 	WARN_ON(job->ibs[0].length_dw > num_dw);
1765 	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1766 			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1767 	if (r)
1768 		goto error_free;
1769 
1770 	return 0;
1771 
1772 error_free:
1773 	amdgpu_job_free(job);
1774 	return r;
1775 }
1776 
1777 #if defined(CONFIG_DEBUG_FS)
1778 
1779 static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
1780 {
1781 	struct drm_info_node *node = (struct drm_info_node *)m->private;
1782 	unsigned ttm_pl = *(int *)node->info_ent->data;
1783 	struct drm_device *dev = node->minor->dev;
1784 	struct amdgpu_device *adev = dev->dev_private;
1785 	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
1786 	struct drm_printer p = drm_seq_file_printer(m);
1787 
1788 	man->func->debug(man, &p);
1789 	return 0;
1790 }
1791 
1792 static int ttm_pl_vram = TTM_PL_VRAM;
1793 static int ttm_pl_tt = TTM_PL_TT;
1794 
1795 static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
1796 	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
1797 	{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
1798 	{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
1799 #ifdef CONFIG_SWIOTLB
1800 	{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
1801 #endif
1802 };
1803 
1804 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1805 				    size_t size, loff_t *pos)
1806 {
1807 	struct amdgpu_device *adev = file_inode(f)->i_private;
1808 	ssize_t result = 0;
1809 	int r;
1810 
1811 	if (size & 0x3 || *pos & 0x3)
1812 		return -EINVAL;
1813 
1814 	if (*pos >= adev->mc.mc_vram_size)
1815 		return -ENXIO;
1816 
1817 	while (size) {
1818 		unsigned long flags;
1819 		uint32_t value;
1820 
1821 		if (*pos >= adev->mc.mc_vram_size)
1822 			return result;
1823 
1824 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1825 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1826 		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
1827 		value = RREG32_NO_KIQ(mmMM_DATA);
1828 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1829 
1830 		r = put_user(value, (uint32_t *)buf);
1831 		if (r)
1832 			return r;
1833 
1834 		result += 4;
1835 		buf += 4;
1836 		*pos += 4;
1837 		size -= 4;
1838 	}
1839 
1840 	return result;
1841 }
1842 
1843 static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
1844 				    size_t size, loff_t *pos)
1845 {
1846 	struct amdgpu_device *adev = file_inode(f)->i_private;
1847 	ssize_t result = 0;
1848 	int r;
1849 
1850 	if (size & 0x3 || *pos & 0x3)
1851 		return -EINVAL;
1852 
1853 	if (*pos >= adev->mc.mc_vram_size)
1854 		return -ENXIO;
1855 
1856 	while (size) {
1857 		unsigned long flags;
1858 		uint32_t value;
1859 
1860 		if (*pos >= adev->mc.mc_vram_size)
1861 			return result;
1862 
1863 		r = get_user(value, (uint32_t *)buf);
1864 		if (r)
1865 			return r;
1866 
1867 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1868 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1869 		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
1870 		WREG32_NO_KIQ(mmMM_DATA, value);
1871 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1872 
1873 		result += 4;
1874 		buf += 4;
1875 		*pos += 4;
1876 		size -= 4;
1877 	}
1878 
1879 	return result;
1880 }
1881 
1882 static const struct file_operations amdgpu_ttm_vram_fops = {
1883 	.owner = THIS_MODULE,
1884 	.read = amdgpu_ttm_vram_read,
1885 	.write = amdgpu_ttm_vram_write,
1886 	.llseek = default_llseek,
1887 };
1888 
1889 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1890 
1891 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
1892 				   size_t size, loff_t *pos)
1893 {
1894 	struct amdgpu_device *adev = file_inode(f)->i_private;
1895 	ssize_t result = 0;
1896 	int r;
1897 
1898 	while (size) {
1899 		loff_t p = *pos / PAGE_SIZE;
1900 		unsigned off = *pos & ~PAGE_MASK;
1901 		size_t cur_size = min_t(size_t, size, PAGE_SIZE - off);
1902 		struct page *page;
1903 		void *ptr;
1904 
1905 		if (p >= adev->gart.num_cpu_pages)
1906 			return result;
1907 
1908 		page = adev->gart.pages[p];
1909 		if (page) {
1910 			ptr = kmap(page);
1911 			ptr += off;
1912 
1913 			r = copy_to_user(buf, ptr, cur_size);
1914 			kunmap(adev->gart.pages[p]);
1915 		} else
1916 			r = clear_user(buf, cur_size);
1917 
1918 		if (r)
1919 			return -EFAULT;
1920 
1921 		result += cur_size;
1922 		buf += cur_size;
1923 		*pos += cur_size;
1924 		size -= cur_size;
1925 	}
1926 
1927 	return result;
1928 }
1929 
1930 static const struct file_operations amdgpu_ttm_gtt_fops = {
1931 	.owner = THIS_MODULE,
1932 	.read = amdgpu_ttm_gtt_read,
1933 	.llseek = default_llseek
1934 };
1935 
1936 #endif
1937 
1938 static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
1939 				   size_t size, loff_t *pos)
1940 {
1941 	struct amdgpu_device *adev = file_inode(f)->i_private;
1942 	int r;
1943 	uint64_t phys;
1944 	struct iommu_domain *dom;
1945 
1946 	// always return 8 bytes
1947 	if (size != 8)
1948 		return -EINVAL;
1949 
1950 	// only accept page addresses
1951 	if (*pos & 0xFFF)
1952 		return -EINVAL;
1953 
1954 	dom = iommu_get_domain_for_dev(adev->dev);
1955 	if (dom)
1956 		phys = iommu_iova_to_phys(dom, *pos);
1957 	else
1958 		phys = *pos;
1959 
1960 	r = copy_to_user(buf, &phys, 8);
1961 	if (r)
1962 		return -EFAULT;
1963 
1964 	return 8;
1965 }
1966 
1967 static const struct file_operations amdgpu_ttm_iova_fops = {
1968 	.owner = THIS_MODULE,
1969 	.read = amdgpu_iova_to_phys_read,
1970 	.llseek = default_llseek
1971 };
1972 
1973 static const struct {
1974 	char *name;
1975 	const struct file_operations *fops;
1976 	int domain;
1977 } ttm_debugfs_entries[] = {
1978 	{ "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM },
1979 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1980 	{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
1981 #endif
1982 	{ "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM },
1983 };
1984 
1985 #endif
1986 
1987 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
1988 {
1989 #if defined(CONFIG_DEBUG_FS)
1990 	unsigned count;
1991 
1992 	struct drm_minor *minor = adev->ddev->primary;
1993 	struct dentry *ent, *root = minor->debugfs_root;
1994 
1995 	for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) {
1996 		ent = debugfs_create_file(
1997 				ttm_debugfs_entries[count].name,
1998 				S_IFREG | S_IRUGO, root,
1999 				adev,
2000 				ttm_debugfs_entries[count].fops);
2001 		if (IS_ERR(ent))
2002 			return PTR_ERR(ent);
2003 		if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
2004 			i_size_write(ent->d_inode, adev->mc.mc_vram_size);
2005 		else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
2006 			i_size_write(ent->d_inode, adev->mc.gart_size);
2007 		adev->mman.debugfs_entries[count] = ent;
2008 	}
2009 
2010 	count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
2011 
2012 #ifdef CONFIG_SWIOTLB
2013 	if (!swiotlb_nr_tbl())
2014 		--count;
2015 #endif
2016 
2017 	return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
2018 #else
2019 	return 0;
2020 #endif
2021 }
2022 
2023 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
2024 {
2025 #if defined(CONFIG_DEBUG_FS)
2026 	unsigned i;
2027 
2028 	for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++)
2029 		debugfs_remove(adev->mman.debugfs_entries[i]);
2030 #endif
2031 }
2032