xref: /linux/drivers/gpu/drm/xe/xe_vm_madvise.c (revision b615879dbfea6cf1236acbc3f2fb25ae84e07071)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include "xe_vm_madvise.h"
7 
8 #include <linux/nospec.h>
9 #include <drm/xe_drm.h>
10 
11 #include "xe_bo.h"
12 #include "xe_pat.h"
13 #include "xe_pt.h"
14 #include "xe_svm.h"
15 
16 struct xe_vmas_in_madvise_range {
17 	u64 addr;
18 	u64 range;
19 	struct xe_vma **vmas;
20 	int num_vmas;
21 	bool has_bo_vmas;
22 	bool has_svm_userptr_vmas;
23 };
24 
25 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
26 {
27 	u64 addr = madvise_range->addr;
28 	u64 range = madvise_range->range;
29 
30 	struct xe_vma  **__vmas;
31 	struct drm_gpuva *gpuva;
32 	int max_vmas = 8;
33 
34 	lockdep_assert_held(&vm->lock);
35 
36 	madvise_range->num_vmas = 0;
37 	madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL);
38 	if (!madvise_range->vmas)
39 		return -ENOMEM;
40 
41 	vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
42 
43 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
44 		struct xe_vma *vma = gpuva_to_vma(gpuva);
45 
46 		if (xe_vma_bo(vma))
47 			madvise_range->has_bo_vmas = true;
48 		else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma))
49 			madvise_range->has_svm_userptr_vmas = true;
50 
51 		if (madvise_range->num_vmas == max_vmas) {
52 			max_vmas <<= 1;
53 			__vmas = krealloc(madvise_range->vmas,
54 					  max_vmas * sizeof(*madvise_range->vmas),
55 					  GFP_KERNEL);
56 			if (!__vmas) {
57 				kfree(madvise_range->vmas);
58 				return -ENOMEM;
59 			}
60 			madvise_range->vmas = __vmas;
61 		}
62 
63 		madvise_range->vmas[madvise_range->num_vmas] = vma;
64 		(madvise_range->num_vmas)++;
65 	}
66 
67 	if (!madvise_range->num_vmas)
68 		kfree(madvise_range->vmas);
69 
70 	vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
71 
72 	return 0;
73 }
74 
75 static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
76 				      struct xe_vma **vmas, int num_vmas,
77 				      struct drm_xe_madvise *op)
78 {
79 	int i;
80 
81 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
82 
83 	for (i = 0; i < num_vmas; i++) {
84 		/*TODO: Extend attributes to bo based vmas */
85 		if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
86 		     vmas[i]->attr.preferred_loc.migration_policy ==
87 		     op->preferred_mem_loc.migration_policy) ||
88 		    !xe_vma_is_cpu_addr_mirror(vmas[i])) {
89 			vmas[i]->skip_invalidation = true;
90 		} else {
91 			vmas[i]->skip_invalidation = false;
92 			vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
93 			/* Till multi-device support is not added migration_policy
94 			 * is of no use and can be ignored.
95 			 */
96 			vmas[i]->attr.preferred_loc.migration_policy =
97 						op->preferred_mem_loc.migration_policy;
98 		}
99 	}
100 }
101 
102 static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
103 			   struct xe_vma **vmas, int num_vmas,
104 			   struct drm_xe_madvise *op)
105 {
106 	struct xe_bo *bo;
107 	int i;
108 
109 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC);
110 	xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU);
111 
112 	for (i = 0; i < num_vmas; i++) {
113 		if (xe_vma_is_userptr(vmas[i]) &&
114 		    !(op->atomic.val == DRM_XE_ATOMIC_DEVICE &&
115 		      xe->info.has_device_atomics_on_smem)) {
116 			vmas[i]->skip_invalidation = true;
117 			continue;
118 		}
119 
120 		if (vmas[i]->attr.atomic_access == op->atomic.val) {
121 			vmas[i]->skip_invalidation = true;
122 		} else {
123 			vmas[i]->skip_invalidation = false;
124 			vmas[i]->attr.atomic_access = op->atomic.val;
125 		}
126 
127 		bo = xe_vma_bo(vmas[i]);
128 		if (!bo || bo->attr.atomic_access == op->atomic.val)
129 			continue;
130 
131 		vmas[i]->skip_invalidation = false;
132 		xe_bo_assert_held(bo);
133 		bo->attr.atomic_access = op->atomic.val;
134 
135 		/* Invalidate cpu page table, so bo can migrate to smem in next access */
136 		if (xe_bo_is_vram(bo) &&
137 		    (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU ||
138 		     bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL))
139 			ttm_bo_unmap_virtual(&bo->ttm);
140 	}
141 }
142 
143 static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
144 			      struct xe_vma **vmas, int num_vmas,
145 			      struct drm_xe_madvise *op)
146 {
147 	int i;
148 
149 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT);
150 
151 	for (i = 0; i < num_vmas; i++) {
152 		if (vmas[i]->attr.pat_index == op->pat_index.val) {
153 			vmas[i]->skip_invalidation = true;
154 		} else {
155 			vmas[i]->skip_invalidation = false;
156 			vmas[i]->attr.pat_index = op->pat_index.val;
157 		}
158 	}
159 }
160 
161 typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
162 			     struct xe_vma **vmas, int num_vmas,
163 			     struct drm_xe_madvise *op);
164 
165 static const madvise_func madvise_funcs[] = {
166 	[DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
167 	[DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
168 	[DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
169 };
170 
171 static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
172 {
173 	struct drm_gpuva *gpuva;
174 	struct xe_tile *tile;
175 	u8 id, tile_mask = 0;
176 
177 	lockdep_assert_held_write(&vm->lock);
178 
179 	/* Wait for pending binds */
180 	if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
181 				  false, MAX_SCHEDULE_TIMEOUT) <= 0)
182 		XE_WARN_ON(1);
183 
184 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
185 		struct xe_vma *vma = gpuva_to_vma(gpuva);
186 
187 		if (vma->skip_invalidation || xe_vma_is_null(vma))
188 			continue;
189 
190 		if (xe_vma_is_cpu_addr_mirror(vma)) {
191 			tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
192 								      xe_vma_start(vma),
193 								      xe_vma_end(vma));
194 		} else {
195 			for_each_tile(tile, vm->xe, id) {
196 				if (xe_pt_zap_ptes(tile, vma)) {
197 					tile_mask |= BIT(id);
198 
199 					/*
200 					 * WRITE_ONCE pairs with READ_ONCE
201 					 * in xe_vm_has_valid_gpu_mapping()
202 					 */
203 					WRITE_ONCE(vma->tile_invalidated,
204 						   vma->tile_invalidated | BIT(id));
205 				}
206 			}
207 		}
208 	}
209 
210 	return tile_mask;
211 }
212 
213 static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
214 {
215 	u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
216 
217 	if (!tile_mask)
218 		return 0;
219 
220 	xe_device_wmb(vm->xe);
221 
222 	return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask);
223 }
224 
225 static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
226 {
227 	if (XE_IOCTL_DBG(xe, !args))
228 		return false;
229 
230 	if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
231 		return false;
232 
233 	if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
234 		return false;
235 
236 	if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
237 		return false;
238 
239 	switch (args->type) {
240 	case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC:
241 	{
242 		s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
243 
244 		if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
245 			return false;
246 
247 		if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
248 				     DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
249 			return false;
250 
251 		if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
252 			return false;
253 
254 		if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
255 			return false;
256 		break;
257 	}
258 	case DRM_XE_MEM_RANGE_ATTR_ATOMIC:
259 		if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU))
260 			return false;
261 
262 		if (XE_IOCTL_DBG(xe, args->atomic.pad))
263 			return false;
264 
265 		if (XE_IOCTL_DBG(xe, args->atomic.reserved))
266 			return false;
267 
268 		break;
269 	case DRM_XE_MEM_RANGE_ATTR_PAT:
270 	{
271 		u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val);
272 
273 		if (XE_IOCTL_DBG(xe, !coh_mode))
274 			return false;
275 
276 		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY))
277 			return false;
278 
279 		if (XE_IOCTL_DBG(xe, args->pat_index.pad))
280 			return false;
281 
282 		if (XE_IOCTL_DBG(xe, args->pat_index.reserved))
283 			return false;
284 		break;
285 	}
286 	default:
287 		if (XE_IOCTL_DBG(xe, 1))
288 			return false;
289 	}
290 
291 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
292 		return false;
293 
294 	return true;
295 }
296 
297 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
298 				   int num_vmas, u32 atomic_val)
299 {
300 	struct xe_device *xe = vm->xe;
301 	struct xe_bo *bo;
302 	int i;
303 
304 	for (i = 0; i < num_vmas; i++) {
305 		bo = xe_vma_bo(vmas[i]);
306 		if (!bo)
307 			continue;
308 		/*
309 		 * NOTE: The following atomic checks are platform-specific. For example,
310 		 * if a device supports CXL atomics, these may not be necessary or
311 		 * may behave differently.
312 		 */
313 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU &&
314 				 !(bo->flags & XE_BO_FLAG_SYSTEM)))
315 			return false;
316 
317 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE &&
318 				 !(bo->flags & XE_BO_FLAG_VRAM0) &&
319 				 !(bo->flags & XE_BO_FLAG_VRAM1) &&
320 				 !(bo->flags & XE_BO_FLAG_SYSTEM &&
321 				   xe->info.has_device_atomics_on_smem)))
322 			return false;
323 
324 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL &&
325 				 (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
326 				  (!(bo->flags & XE_BO_FLAG_VRAM0) &&
327 				   !(bo->flags & XE_BO_FLAG_VRAM1)))))
328 			return false;
329 	}
330 	return true;
331 }
332 /**
333  * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
334  * @dev: DRM device pointer
335  * @data: Pointer to ioctl data (drm_xe_madvise*)
336  * @file: DRM file pointer
337  *
338  * Handles the MADVISE ioctl to provide memory advice for vma's within
339  * input range.
340  *
341  * Return: 0 on success or a negative error code on failure.
342  */
343 int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
344 {
345 	struct xe_device *xe = to_xe_device(dev);
346 	struct xe_file *xef = to_xe_file(file);
347 	struct drm_xe_madvise *args = data;
348 	struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
349 							 .range =  args->range, };
350 	struct xe_vm *vm;
351 	struct drm_exec exec;
352 	int err, attr_type;
353 
354 	vm = xe_vm_lookup(xef, args->vm_id);
355 	if (XE_IOCTL_DBG(xe, !vm))
356 		return -EINVAL;
357 
358 	if (!madvise_args_are_sane(vm->xe, args)) {
359 		err = -EINVAL;
360 		goto put_vm;
361 	}
362 
363 	xe_svm_flush(vm);
364 
365 	err = down_write_killable(&vm->lock);
366 	if (err)
367 		goto put_vm;
368 
369 	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
370 		err = -ENOENT;
371 		goto unlock_vm;
372 	}
373 
374 	err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
375 	if (err)
376 		goto unlock_vm;
377 
378 	err = get_vmas(vm, &madvise_range);
379 	if (err || !madvise_range.num_vmas)
380 		goto unlock_vm;
381 
382 	if (madvise_range.has_bo_vmas) {
383 		if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
384 			if (!check_bo_args_are_sane(vm, madvise_range.vmas,
385 						    madvise_range.num_vmas,
386 						    args->atomic.val)) {
387 				err = -EINVAL;
388 				goto unlock_vm;
389 			}
390 		}
391 
392 		drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
393 		drm_exec_until_all_locked(&exec) {
394 			for (int i = 0; i < madvise_range.num_vmas; i++) {
395 				struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]);
396 
397 				if (!bo)
398 					continue;
399 				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
400 				drm_exec_retry_on_contention(&exec);
401 				if (err)
402 					goto err_fini;
403 			}
404 		}
405 	}
406 
407 	if (madvise_range.has_svm_userptr_vmas) {
408 		err = xe_svm_notifier_lock_interruptible(vm);
409 		if (err)
410 			goto err_fini;
411 	}
412 
413 	attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
414 	madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
415 
416 	err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
417 
418 	if (madvise_range.has_svm_userptr_vmas)
419 		xe_svm_notifier_unlock(vm);
420 
421 err_fini:
422 	if (madvise_range.has_bo_vmas)
423 		drm_exec_fini(&exec);
424 	kfree(madvise_range.vmas);
425 	madvise_range.vmas = NULL;
426 unlock_vm:
427 	up_write(&vm->lock);
428 put_vm:
429 	xe_vm_put(vm);
430 	return err;
431 }
432