xref: /linux/drivers/gpu/drm/xe/xe_vm_madvise.c (revision 6f17ab9a63e670bd62a287f95e3982f99eafd77e)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include "xe_vm_madvise.h"
7 
8 #include <linux/nospec.h>
9 #include <drm/xe_drm.h>
10 
11 #include "xe_bo.h"
12 #include "xe_pat.h"
13 #include "xe_pt.h"
14 #include "xe_svm.h"
15 
16 struct xe_vmas_in_madvise_range {
17 	u64 addr;
18 	u64 range;
19 	struct xe_vma **vmas;
20 	int num_vmas;
21 	bool has_svm_vmas;
22 	bool has_bo_vmas;
23 	bool has_userptr_vmas;
24 };
25 
26 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
27 {
28 	u64 addr = madvise_range->addr;
29 	u64 range = madvise_range->range;
30 
31 	struct xe_vma  **__vmas;
32 	struct drm_gpuva *gpuva;
33 	int max_vmas = 8;
34 
35 	lockdep_assert_held(&vm->lock);
36 
37 	madvise_range->num_vmas = 0;
38 	madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL);
39 	if (!madvise_range->vmas)
40 		return -ENOMEM;
41 
42 	vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
43 
44 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
45 		struct xe_vma *vma = gpuva_to_vma(gpuva);
46 
47 		if (xe_vma_bo(vma))
48 			madvise_range->has_bo_vmas = true;
49 		else if (xe_vma_is_cpu_addr_mirror(vma))
50 			madvise_range->has_svm_vmas = true;
51 		else if (xe_vma_is_userptr(vma))
52 			madvise_range->has_userptr_vmas = true;
53 
54 		if (madvise_range->num_vmas == max_vmas) {
55 			max_vmas <<= 1;
56 			__vmas = krealloc(madvise_range->vmas,
57 					  max_vmas * sizeof(*madvise_range->vmas),
58 					  GFP_KERNEL);
59 			if (!__vmas) {
60 				kfree(madvise_range->vmas);
61 				return -ENOMEM;
62 			}
63 			madvise_range->vmas = __vmas;
64 		}
65 
66 		madvise_range->vmas[madvise_range->num_vmas] = vma;
67 		(madvise_range->num_vmas)++;
68 	}
69 
70 	if (!madvise_range->num_vmas)
71 		kfree(madvise_range->vmas);
72 
73 	vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
74 
75 	return 0;
76 }
77 
78 static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
79 				      struct xe_vma **vmas, int num_vmas,
80 				      struct drm_xe_madvise *op)
81 {
82 	int i;
83 
84 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
85 
86 	for (i = 0; i < num_vmas; i++) {
87 		/*TODO: Extend attributes to bo based vmas */
88 		if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
89 		     vmas[i]->attr.preferred_loc.migration_policy ==
90 		     op->preferred_mem_loc.migration_policy) ||
91 		    !xe_vma_is_cpu_addr_mirror(vmas[i])) {
92 			vmas[i]->skip_invalidation = true;
93 		} else {
94 			vmas[i]->skip_invalidation = false;
95 			vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
96 			/* Till multi-device support is not added migration_policy
97 			 * is of no use and can be ignored.
98 			 */
99 			vmas[i]->attr.preferred_loc.migration_policy =
100 						op->preferred_mem_loc.migration_policy;
101 		}
102 	}
103 }
104 
105 static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
106 			   struct xe_vma **vmas, int num_vmas,
107 			   struct drm_xe_madvise *op)
108 {
109 	struct xe_bo *bo;
110 	int i;
111 
112 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC);
113 	xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU);
114 
115 	for (i = 0; i < num_vmas; i++) {
116 		if (xe_vma_is_userptr(vmas[i]) &&
117 		    !(op->atomic.val == DRM_XE_ATOMIC_DEVICE &&
118 		      xe->info.has_device_atomics_on_smem)) {
119 			vmas[i]->skip_invalidation = true;
120 			continue;
121 		}
122 
123 		if (vmas[i]->attr.atomic_access == op->atomic.val) {
124 			vmas[i]->skip_invalidation = true;
125 		} else {
126 			vmas[i]->skip_invalidation = false;
127 			vmas[i]->attr.atomic_access = op->atomic.val;
128 		}
129 
130 		vmas[i]->attr.atomic_access = op->atomic.val;
131 
132 		bo = xe_vma_bo(vmas[i]);
133 		if (!bo || bo->attr.atomic_access == op->atomic.val)
134 			continue;
135 
136 		vmas[i]->skip_invalidation = false;
137 		xe_bo_assert_held(bo);
138 		bo->attr.atomic_access = op->atomic.val;
139 
140 		/* Invalidate cpu page table, so bo can migrate to smem in next access */
141 		if (xe_bo_is_vram(bo) &&
142 		    (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU ||
143 		     bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL))
144 			ttm_bo_unmap_virtual(&bo->ttm);
145 	}
146 }
147 
148 static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
149 			      struct xe_vma **vmas, int num_vmas,
150 			      struct drm_xe_madvise *op)
151 {
152 	int i;
153 
154 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT);
155 
156 	for (i = 0; i < num_vmas; i++) {
157 		if (vmas[i]->attr.pat_index == op->pat_index.val) {
158 			vmas[i]->skip_invalidation = true;
159 		} else {
160 			vmas[i]->skip_invalidation = false;
161 			vmas[i]->attr.pat_index = op->pat_index.val;
162 		}
163 	}
164 }
165 
166 typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
167 			     struct xe_vma **vmas, int num_vmas,
168 			     struct drm_xe_madvise *op);
169 
170 static const madvise_func madvise_funcs[] = {
171 	[DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
172 	[DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
173 	[DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
174 };
175 
176 static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
177 {
178 	struct drm_gpuva *gpuva;
179 	struct xe_tile *tile;
180 	u8 id, tile_mask = 0;
181 
182 	lockdep_assert_held_write(&vm->lock);
183 
184 	/* Wait for pending binds */
185 	if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
186 				  false, MAX_SCHEDULE_TIMEOUT) <= 0)
187 		XE_WARN_ON(1);
188 
189 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
190 		struct xe_vma *vma = gpuva_to_vma(gpuva);
191 
192 		if (vma->skip_invalidation || xe_vma_is_null(vma))
193 			continue;
194 
195 		if (xe_vma_is_cpu_addr_mirror(vma)) {
196 			tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
197 								      xe_vma_start(vma),
198 								      xe_vma_end(vma));
199 		} else {
200 			for_each_tile(tile, vm->xe, id) {
201 				if (xe_pt_zap_ptes(tile, vma)) {
202 					tile_mask |= BIT(id);
203 
204 				/*
205 				 * WRITE_ONCE pairs with READ_ONCE
206 				 * in xe_vm_has_valid_gpu_mapping()
207 				 */
208 				WRITE_ONCE(vma->tile_invalidated,
209 					   vma->tile_invalidated | BIT(id));
210 				}
211 			}
212 		}
213 	}
214 
215 	return tile_mask;
216 }
217 
218 static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
219 {
220 	u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
221 
222 	if (!tile_mask)
223 		return 0;
224 
225 	xe_device_wmb(vm->xe);
226 
227 	return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask);
228 }
229 
230 static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
231 {
232 	if (XE_IOCTL_DBG(xe, !args))
233 		return false;
234 
235 	if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
236 		return false;
237 
238 	if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
239 		return false;
240 
241 	if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
242 		return false;
243 
244 	switch (args->type) {
245 	case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC:
246 	{
247 		s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
248 
249 		if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
250 			return false;
251 
252 		if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
253 				     DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
254 			return false;
255 
256 		if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
257 			return false;
258 
259 		if (XE_IOCTL_DBG(xe, args->atomic.reserved))
260 			return false;
261 		break;
262 	}
263 	case DRM_XE_MEM_RANGE_ATTR_ATOMIC:
264 		if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU))
265 			return false;
266 
267 		if (XE_IOCTL_DBG(xe, args->atomic.pad))
268 			return false;
269 
270 		if (XE_IOCTL_DBG(xe, args->atomic.reserved))
271 			return false;
272 
273 		break;
274 	case DRM_XE_MEM_RANGE_ATTR_PAT:
275 	{
276 		u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val);
277 
278 		if (XE_IOCTL_DBG(xe, !coh_mode))
279 			return false;
280 
281 		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY))
282 			return false;
283 
284 		if (XE_IOCTL_DBG(xe, args->pat_index.pad))
285 			return false;
286 
287 		if (XE_IOCTL_DBG(xe, args->pat_index.reserved))
288 			return false;
289 		break;
290 	}
291 	default:
292 		if (XE_IOCTL_DBG(xe, 1))
293 			return false;
294 	}
295 
296 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
297 		return false;
298 
299 	return true;
300 }
301 
302 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
303 				   int num_vmas, u32 atomic_val)
304 {
305 	struct xe_device *xe = vm->xe;
306 	struct xe_bo *bo;
307 	int i;
308 
309 	for (i = 0; i < num_vmas; i++) {
310 		bo = xe_vma_bo(vmas[i]);
311 		if (!bo)
312 			continue;
313 		/*
314 		 * NOTE: The following atomic checks are platform-specific. For example,
315 		 * if a device supports CXL atomics, these may not be necessary or
316 		 * may behave differently.
317 		 */
318 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU &&
319 				 !(bo->flags & XE_BO_FLAG_SYSTEM)))
320 			return false;
321 
322 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE &&
323 				 !(bo->flags & XE_BO_FLAG_VRAM0) &&
324 				 !(bo->flags & XE_BO_FLAG_VRAM1) &&
325 				 !(bo->flags & XE_BO_FLAG_SYSTEM &&
326 				   xe->info.has_device_atomics_on_smem)))
327 			return false;
328 
329 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL &&
330 				 (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
331 				  (!(bo->flags & XE_BO_FLAG_VRAM0) &&
332 				   !(bo->flags & XE_BO_FLAG_VRAM1)))))
333 			return false;
334 	}
335 	return true;
336 }
337 /**
338  * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
339  * @dev: DRM device pointer
340  * @data: Pointer to ioctl data (drm_xe_madvise*)
341  * @file: DRM file pointer
342  *
343  * Handles the MADVISE ioctl to provide memory advice for vma's within
344  * input range.
345  *
346  * Return: 0 on success or a negative error code on failure.
347  */
348 int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
349 {
350 	struct xe_device *xe = to_xe_device(dev);
351 	struct xe_file *xef = to_xe_file(file);
352 	struct drm_xe_madvise *args = data;
353 	struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
354 							 .range =  args->range, };
355 	struct xe_vm *vm;
356 	struct drm_exec exec;
357 	int err, attr_type;
358 
359 	vm = xe_vm_lookup(xef, args->vm_id);
360 	if (XE_IOCTL_DBG(xe, !vm))
361 		return -EINVAL;
362 
363 	if (!madvise_args_are_sane(vm->xe, args)) {
364 		err = -EINVAL;
365 		goto put_vm;
366 	}
367 
368 	xe_svm_flush(vm);
369 
370 	err = down_write_killable(&vm->lock);
371 	if (err)
372 		goto put_vm;
373 
374 	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
375 		err = -ENOENT;
376 		goto unlock_vm;
377 	}
378 
379 	err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
380 	if (err)
381 		goto unlock_vm;
382 
383 	err = get_vmas(vm, &madvise_range);
384 	if (err || !madvise_range.num_vmas)
385 		goto unlock_vm;
386 
387 	if (madvise_range.has_bo_vmas) {
388 		if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
389 			if (!check_bo_args_are_sane(vm, madvise_range.vmas,
390 						    madvise_range.num_vmas,
391 						    args->atomic.val)) {
392 				err = -EINVAL;
393 				goto unlock_vm;
394 			}
395 		}
396 
397 		drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
398 		drm_exec_until_all_locked(&exec) {
399 			for (int i = 0; i < madvise_range.num_vmas; i++) {
400 				struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]);
401 
402 				if (!bo)
403 					continue;
404 				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
405 				drm_exec_retry_on_contention(&exec);
406 				if (err)
407 					goto err_fini;
408 			}
409 		}
410 	}
411 
412 	if (madvise_range.has_userptr_vmas) {
413 		err = down_read_interruptible(&vm->userptr.notifier_lock);
414 		if (err)
415 			goto err_fini;
416 	}
417 
418 	if (madvise_range.has_svm_vmas) {
419 		err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock);
420 		if (err)
421 			goto unlock_userptr;
422 	}
423 
424 	attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
425 	madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
426 
427 	err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
428 
429 	if (madvise_range.has_svm_vmas)
430 		xe_svm_notifier_unlock(vm);
431 
432 unlock_userptr:
433 	if (madvise_range.has_userptr_vmas)
434 		up_read(&vm->userptr.notifier_lock);
435 err_fini:
436 	if (madvise_range.has_bo_vmas)
437 		drm_exec_fini(&exec);
438 	kfree(madvise_range.vmas);
439 	madvise_range.vmas = NULL;
440 unlock_vm:
441 	up_write(&vm->lock);
442 put_vm:
443 	xe_vm_put(vm);
444 	return err;
445 }
446