xref: /linux/drivers/gpu/drm/xe/xe_vm_madvise.c (revision dfb31428444b00824b161d8c0741d4868552813a)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include "xe_vm_madvise.h"
7 
8 #include <linux/nospec.h>
9 #include <drm/xe_drm.h>
10 
11 #include "xe_bo.h"
12 #include "xe_pat.h"
13 #include "xe_pt.h"
14 #include "xe_svm.h"
15 
16 struct xe_vmas_in_madvise_range {
17 	u64 addr;
18 	u64 range;
19 	struct xe_vma **vmas;
20 	int num_vmas;
21 	bool has_bo_vmas;
22 	bool has_svm_userptr_vmas;
23 };
24 
25 /**
26  * struct xe_madvise_details - Argument to madvise_funcs
27  * @dpagemap: Reference-counted pointer to a struct drm_pagemap.
28  *
29  * The madvise IOCTL handler may, in addition to the user-space
30  * args, have additional info to pass into the madvise_func that
31  * handles the madvise type. Use a struct_xe_madvise_details
32  * for that and extend the struct as necessary.
33  */
34 struct xe_madvise_details {
35 	struct drm_pagemap *dpagemap;
36 };
37 
get_vmas(struct xe_vm * vm,struct xe_vmas_in_madvise_range * madvise_range)38 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
39 {
40 	u64 addr = madvise_range->addr;
41 	u64 range = madvise_range->range;
42 
43 	struct xe_vma  **__vmas;
44 	struct drm_gpuva *gpuva;
45 	int max_vmas = 8;
46 
47 	lockdep_assert_held(&vm->lock);
48 
49 	madvise_range->num_vmas = 0;
50 	madvise_range->vmas = kmalloc_objs(*madvise_range->vmas, max_vmas);
51 	if (!madvise_range->vmas)
52 		return -ENOMEM;
53 
54 	vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
55 
56 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
57 		struct xe_vma *vma = gpuva_to_vma(gpuva);
58 
59 		if (xe_vma_bo(vma))
60 			madvise_range->has_bo_vmas = true;
61 		else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma))
62 			madvise_range->has_svm_userptr_vmas = true;
63 
64 		if (madvise_range->num_vmas == max_vmas) {
65 			max_vmas <<= 1;
66 			__vmas = krealloc(madvise_range->vmas,
67 					  max_vmas * sizeof(*madvise_range->vmas),
68 					  GFP_KERNEL);
69 			if (!__vmas) {
70 				kfree(madvise_range->vmas);
71 				return -ENOMEM;
72 			}
73 			madvise_range->vmas = __vmas;
74 		}
75 
76 		madvise_range->vmas[madvise_range->num_vmas] = vma;
77 		(madvise_range->num_vmas)++;
78 	}
79 
80 	if (!madvise_range->num_vmas)
81 		kfree(madvise_range->vmas);
82 
83 	vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
84 
85 	return 0;
86 }
87 
madvise_preferred_mem_loc(struct xe_device * xe,struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,struct drm_xe_madvise * op,struct xe_madvise_details * details)88 static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
89 				      struct xe_vma **vmas, int num_vmas,
90 				      struct drm_xe_madvise *op,
91 				      struct xe_madvise_details *details)
92 {
93 	int i;
94 
95 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
96 
97 	for (i = 0; i < num_vmas; i++) {
98 		struct xe_vma *vma = vmas[i];
99 		struct xe_vma_preferred_loc *loc = &vma->attr.preferred_loc;
100 
101 		/*TODO: Extend attributes to bo based vmas */
102 		if ((loc->devmem_fd == op->preferred_mem_loc.devmem_fd &&
103 		     loc->migration_policy == op->preferred_mem_loc.migration_policy) ||
104 		    !xe_vma_is_cpu_addr_mirror(vma)) {
105 			vma->skip_invalidation = true;
106 		} else {
107 			vma->skip_invalidation = false;
108 			loc->devmem_fd = op->preferred_mem_loc.devmem_fd;
109 			/* Till multi-device support is not added migration_policy
110 			 * is of no use and can be ignored.
111 			 */
112 			loc->migration_policy = op->preferred_mem_loc.migration_policy;
113 			drm_pagemap_put(loc->dpagemap);
114 			loc->dpagemap = NULL;
115 			if (details->dpagemap)
116 				loc->dpagemap = drm_pagemap_get(details->dpagemap);
117 		}
118 	}
119 }
120 
madvise_atomic(struct xe_device * xe,struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,struct drm_xe_madvise * op,struct xe_madvise_details * details)121 static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
122 			   struct xe_vma **vmas, int num_vmas,
123 			   struct drm_xe_madvise *op,
124 			   struct xe_madvise_details *details)
125 {
126 	struct xe_bo *bo;
127 	int i;
128 
129 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC);
130 	xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU);
131 
132 	for (i = 0; i < num_vmas; i++) {
133 		if (xe_vma_is_userptr(vmas[i]) &&
134 		    !(op->atomic.val == DRM_XE_ATOMIC_DEVICE &&
135 		      xe->info.has_device_atomics_on_smem)) {
136 			vmas[i]->skip_invalidation = true;
137 			continue;
138 		}
139 
140 		if (vmas[i]->attr.atomic_access == op->atomic.val) {
141 			vmas[i]->skip_invalidation = true;
142 		} else {
143 			vmas[i]->skip_invalidation = false;
144 			vmas[i]->attr.atomic_access = op->atomic.val;
145 		}
146 
147 		bo = xe_vma_bo(vmas[i]);
148 		if (!bo || bo->attr.atomic_access == op->atomic.val)
149 			continue;
150 
151 		vmas[i]->skip_invalidation = false;
152 		xe_bo_assert_held(bo);
153 		bo->attr.atomic_access = op->atomic.val;
154 
155 		/* Invalidate cpu page table, so bo can migrate to smem in next access */
156 		if (xe_bo_is_vram(bo) &&
157 		    (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU ||
158 		     bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL))
159 			ttm_bo_unmap_virtual(&bo->ttm);
160 	}
161 }
162 
madvise_pat_index(struct xe_device * xe,struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,struct drm_xe_madvise * op,struct xe_madvise_details * details)163 static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
164 			      struct xe_vma **vmas, int num_vmas,
165 			      struct drm_xe_madvise *op,
166 			      struct xe_madvise_details *details)
167 {
168 	int i;
169 
170 	xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT);
171 
172 	for (i = 0; i < num_vmas; i++) {
173 		if (vmas[i]->attr.pat_index == op->pat_index.val) {
174 			vmas[i]->skip_invalidation = true;
175 		} else {
176 			vmas[i]->skip_invalidation = false;
177 			vmas[i]->attr.pat_index = op->pat_index.val;
178 		}
179 	}
180 }
181 
182 typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
183 			     struct xe_vma **vmas, int num_vmas,
184 			     struct drm_xe_madvise *op,
185 			     struct xe_madvise_details *details);
186 
187 static const madvise_func madvise_funcs[] = {
188 	[DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
189 	[DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
190 	[DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
191 };
192 
xe_zap_ptes_in_madvise_range(struct xe_vm * vm,u64 start,u64 end)193 static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
194 {
195 	struct drm_gpuva *gpuva;
196 	struct xe_tile *tile;
197 	u8 id, tile_mask = 0;
198 
199 	lockdep_assert_held_write(&vm->lock);
200 
201 	/* Wait for pending binds */
202 	if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
203 				  false, MAX_SCHEDULE_TIMEOUT) <= 0)
204 		XE_WARN_ON(1);
205 
206 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
207 		struct xe_vma *vma = gpuva_to_vma(gpuva);
208 
209 		if (vma->skip_invalidation || xe_vma_is_null(vma))
210 			continue;
211 
212 		if (xe_vma_is_cpu_addr_mirror(vma)) {
213 			tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
214 								      xe_vma_start(vma),
215 								      xe_vma_end(vma));
216 		} else {
217 			for_each_tile(tile, vm->xe, id) {
218 				if (xe_pt_zap_ptes(tile, vma)) {
219 					tile_mask |= BIT(id);
220 
221 					/*
222 					 * WRITE_ONCE pairs with READ_ONCE
223 					 * in xe_vm_has_valid_gpu_mapping()
224 					 */
225 					WRITE_ONCE(vma->tile_invalidated,
226 						   vma->tile_invalidated | BIT(id));
227 				}
228 			}
229 		}
230 	}
231 
232 	return tile_mask;
233 }
234 
xe_vm_invalidate_madvise_range(struct xe_vm * vm,u64 start,u64 end)235 static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
236 {
237 	u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
238 
239 	if (!tile_mask)
240 		return 0;
241 
242 	xe_device_wmb(vm->xe);
243 
244 	return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask);
245 }
246 
madvise_args_are_sane(struct xe_device * xe,const struct drm_xe_madvise * args)247 static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
248 {
249 	if (XE_IOCTL_DBG(xe, !args))
250 		return false;
251 
252 	if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
253 		return false;
254 
255 	if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
256 		return false;
257 
258 	if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
259 		return false;
260 
261 	switch (args->type) {
262 	case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC:
263 	{
264 		s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
265 
266 		if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
267 			return false;
268 
269 		if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
270 				 args->preferred_mem_loc.region_instance != 0))
271 			return false;
272 
273 		if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
274 				     DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
275 			return false;
276 
277 		if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
278 			return false;
279 		break;
280 	}
281 	case DRM_XE_MEM_RANGE_ATTR_ATOMIC:
282 		if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU))
283 			return false;
284 
285 		if (XE_IOCTL_DBG(xe, args->atomic.pad))
286 			return false;
287 
288 		if (XE_IOCTL_DBG(xe, args->atomic.reserved))
289 			return false;
290 
291 		break;
292 	case DRM_XE_MEM_RANGE_ATTR_PAT:
293 	{
294 		u16 pat_index, coh_mode;
295 
296 		if (XE_IOCTL_DBG(xe, args->pat_index.val >= xe->pat.n_entries))
297 			return false;
298 
299 		pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries);
300 		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
301 		if (XE_IOCTL_DBG(xe, !coh_mode))
302 			return false;
303 
304 		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY))
305 			return false;
306 
307 		if (XE_IOCTL_DBG(xe, args->pat_index.pad))
308 			return false;
309 
310 		if (XE_IOCTL_DBG(xe, args->pat_index.reserved))
311 			return false;
312 		break;
313 	}
314 	default:
315 		if (XE_IOCTL_DBG(xe, 1))
316 			return false;
317 	}
318 
319 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
320 		return false;
321 
322 	return true;
323 }
324 
xe_madvise_details_init(struct xe_vm * vm,const struct drm_xe_madvise * args,struct xe_madvise_details * details)325 static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise *args,
326 				   struct xe_madvise_details *details)
327 {
328 	struct xe_device *xe = vm->xe;
329 
330 	memset(details, 0, sizeof(*details));
331 
332 	if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) {
333 		int fd = args->preferred_mem_loc.devmem_fd;
334 		struct drm_pagemap *dpagemap;
335 
336 		if (fd <= 0)
337 			return 0;
338 
339 		dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd,
340 						  args->preferred_mem_loc.region_instance);
341 		if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap)))
342 			return PTR_ERR(dpagemap);
343 
344 		/* Don't allow a foreign placement without a fast interconnect! */
345 		if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != vm->svm.peer.owner)) {
346 			drm_pagemap_put(dpagemap);
347 			return -ENOLINK;
348 		}
349 		details->dpagemap = dpagemap;
350 	}
351 
352 	return 0;
353 }
354 
xe_madvise_details_fini(struct xe_madvise_details * details)355 static void xe_madvise_details_fini(struct xe_madvise_details *details)
356 {
357 	drm_pagemap_put(details->dpagemap);
358 }
359 
check_bo_args_are_sane(struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,u32 atomic_val)360 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
361 				   int num_vmas, u32 atomic_val)
362 {
363 	struct xe_device *xe = vm->xe;
364 	struct xe_bo *bo;
365 	int i;
366 
367 	for (i = 0; i < num_vmas; i++) {
368 		bo = xe_vma_bo(vmas[i]);
369 		if (!bo)
370 			continue;
371 		/*
372 		 * NOTE: The following atomic checks are platform-specific. For example,
373 		 * if a device supports CXL atomics, these may not be necessary or
374 		 * may behave differently.
375 		 */
376 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU &&
377 				 !(bo->flags & XE_BO_FLAG_SYSTEM)))
378 			return false;
379 
380 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE &&
381 				 !(bo->flags & XE_BO_FLAG_VRAM0) &&
382 				 !(bo->flags & XE_BO_FLAG_VRAM1) &&
383 				 !(bo->flags & XE_BO_FLAG_SYSTEM &&
384 				   xe->info.has_device_atomics_on_smem)))
385 			return false;
386 
387 		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL &&
388 				 (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
389 				  (!(bo->flags & XE_BO_FLAG_VRAM0) &&
390 				   !(bo->flags & XE_BO_FLAG_VRAM1)))))
391 			return false;
392 	}
393 	return true;
394 }
395 /**
396  * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
397  * @dev: DRM device pointer
398  * @data: Pointer to ioctl data (drm_xe_madvise*)
399  * @file: DRM file pointer
400  *
401  * Handles the MADVISE ioctl to provide memory advice for vma's within
402  * input range.
403  *
404  * Return: 0 on success or a negative error code on failure.
405  */
xe_vm_madvise_ioctl(struct drm_device * dev,void * data,struct drm_file * file)406 int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
407 {
408 	struct xe_device *xe = to_xe_device(dev);
409 	struct xe_file *xef = to_xe_file(file);
410 	struct drm_xe_madvise *args = data;
411 	struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
412 							 .range =  args->range, };
413 	struct xe_madvise_details details;
414 	struct xe_vm *vm;
415 	struct drm_exec exec;
416 	int err, attr_type;
417 
418 	vm = xe_vm_lookup(xef, args->vm_id);
419 	if (XE_IOCTL_DBG(xe, !vm))
420 		return -EINVAL;
421 
422 	if (!madvise_args_are_sane(vm->xe, args)) {
423 		err = -EINVAL;
424 		goto put_vm;
425 	}
426 
427 	xe_svm_flush(vm);
428 
429 	err = down_write_killable(&vm->lock);
430 	if (err)
431 		goto put_vm;
432 
433 	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
434 		err = -ENOENT;
435 		goto unlock_vm;
436 	}
437 
438 	err = xe_madvise_details_init(vm, args, &details);
439 	if (err)
440 		goto unlock_vm;
441 
442 	err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
443 	if (err)
444 		goto madv_fini;
445 
446 	err = get_vmas(vm, &madvise_range);
447 	if (err || !madvise_range.num_vmas)
448 		goto madv_fini;
449 
450 	if (madvise_range.has_bo_vmas) {
451 		if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
452 			if (!check_bo_args_are_sane(vm, madvise_range.vmas,
453 						    madvise_range.num_vmas,
454 						    args->atomic.val)) {
455 				err = -EINVAL;
456 				goto free_vmas;
457 			}
458 		}
459 
460 		drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
461 		drm_exec_until_all_locked(&exec) {
462 			for (int i = 0; i < madvise_range.num_vmas; i++) {
463 				struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]);
464 
465 				if (!bo)
466 					continue;
467 				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
468 				drm_exec_retry_on_contention(&exec);
469 				if (err)
470 					goto err_fini;
471 			}
472 		}
473 	}
474 
475 	if (madvise_range.has_svm_userptr_vmas) {
476 		err = xe_svm_notifier_lock_interruptible(vm);
477 		if (err)
478 			goto err_fini;
479 	}
480 
481 	attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
482 	madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
483 				 &details);
484 
485 	err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
486 
487 	if (madvise_range.has_svm_userptr_vmas)
488 		xe_svm_notifier_unlock(vm);
489 
490 err_fini:
491 	if (madvise_range.has_bo_vmas)
492 		drm_exec_fini(&exec);
493 free_vmas:
494 	kfree(madvise_range.vmas);
495 	madvise_range.vmas = NULL;
496 madv_fini:
497 	xe_madvise_details_fini(&details);
498 unlock_vm:
499 	up_write(&vm->lock);
500 put_vm:
501 	xe_vm_put(vm);
502 	return err;
503 }
504