1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2025 Intel Corporation
4 */
5
6 #include "xe_vm_madvise.h"
7
8 #include <linux/nospec.h>
9 #include <drm/xe_drm.h>
10
11 #include "xe_bo.h"
12 #include "xe_pat.h"
13 #include "xe_pt.h"
14 #include "xe_svm.h"
15 #include "xe_tlb_inval.h"
16 #include "xe_vm.h"
17
18 struct xe_vmas_in_madvise_range {
19 u64 addr;
20 u64 range;
21 struct xe_vma **vmas;
22 int num_vmas;
23 bool has_bo_vmas;
24 bool has_svm_userptr_vmas;
25 };
26
27 /**
28 * struct xe_madvise_details - Argument to madvise_funcs
29 * @dpagemap: Reference-counted pointer to a struct drm_pagemap.
30 * @has_purged_bo: Track if any BO was purged (for purgeable state)
31 * @retained_ptr: User pointer for retained value (for purgeable state)
32 *
33 * The madvise IOCTL handler may, in addition to the user-space
34 * args, have additional info to pass into the madvise_func that
35 * handles the madvise type. Use a struct_xe_madvise_details
36 * for that and extend the struct as necessary.
37 */
38 struct xe_madvise_details {
39 struct drm_pagemap *dpagemap;
40 bool has_purged_bo;
41 u64 retained_ptr;
42 };
43
get_vmas(struct xe_vm * vm,struct xe_vmas_in_madvise_range * madvise_range)44 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
45 {
46 u64 addr = madvise_range->addr;
47 u64 range = madvise_range->range;
48
49 struct xe_vma **__vmas;
50 struct drm_gpuva *gpuva;
51 int max_vmas = 8;
52
53 lockdep_assert_held(&vm->lock);
54
55 madvise_range->num_vmas = 0;
56 madvise_range->vmas = kmalloc_objs(*madvise_range->vmas, max_vmas);
57 if (!madvise_range->vmas)
58 return -ENOMEM;
59
60 vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
61
62 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
63 struct xe_vma *vma = gpuva_to_vma(gpuva);
64
65 if (xe_vma_bo(vma))
66 madvise_range->has_bo_vmas = true;
67 else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma))
68 madvise_range->has_svm_userptr_vmas = true;
69
70 if (madvise_range->num_vmas == max_vmas) {
71 max_vmas <<= 1;
72 __vmas = krealloc(madvise_range->vmas,
73 max_vmas * sizeof(*madvise_range->vmas),
74 GFP_KERNEL);
75 if (!__vmas) {
76 kfree(madvise_range->vmas);
77 return -ENOMEM;
78 }
79 madvise_range->vmas = __vmas;
80 }
81
82 madvise_range->vmas[madvise_range->num_vmas] = vma;
83 (madvise_range->num_vmas)++;
84 }
85
86 if (!madvise_range->num_vmas)
87 kfree(madvise_range->vmas);
88
89 vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
90
91 return 0;
92 }
93
madvise_preferred_mem_loc(struct xe_device * xe,struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,struct drm_xe_madvise * op,struct xe_madvise_details * details)94 static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
95 struct xe_vma **vmas, int num_vmas,
96 struct drm_xe_madvise *op,
97 struct xe_madvise_details *details)
98 {
99 int i;
100
101 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
102
103 for (i = 0; i < num_vmas; i++) {
104 struct xe_vma *vma = vmas[i];
105 struct xe_vma_preferred_loc *loc = &vma->attr.preferred_loc;
106
107 /*TODO: Extend attributes to bo based vmas */
108 if ((loc->devmem_fd == op->preferred_mem_loc.devmem_fd &&
109 loc->migration_policy == op->preferred_mem_loc.migration_policy) ||
110 !xe_vma_is_cpu_addr_mirror(vma)) {
111 vma->skip_invalidation = true;
112 } else {
113 vma->skip_invalidation = false;
114 loc->devmem_fd = op->preferred_mem_loc.devmem_fd;
115 /* Till multi-device support is not added migration_policy
116 * is of no use and can be ignored.
117 */
118 loc->migration_policy = op->preferred_mem_loc.migration_policy;
119 drm_pagemap_put(loc->dpagemap);
120 loc->dpagemap = NULL;
121 if (details->dpagemap)
122 loc->dpagemap = drm_pagemap_get(details->dpagemap);
123 }
124 }
125 }
126
madvise_atomic(struct xe_device * xe,struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,struct drm_xe_madvise * op,struct xe_madvise_details * details)127 static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
128 struct xe_vma **vmas, int num_vmas,
129 struct drm_xe_madvise *op,
130 struct xe_madvise_details *details)
131 {
132 struct xe_bo *bo;
133 int i;
134
135 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC);
136 xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU);
137
138 for (i = 0; i < num_vmas; i++) {
139 if (xe_vma_is_userptr(vmas[i]) &&
140 !(op->atomic.val == DRM_XE_ATOMIC_DEVICE &&
141 xe->info.has_device_atomics_on_smem)) {
142 vmas[i]->skip_invalidation = true;
143 continue;
144 }
145
146 if (vmas[i]->attr.atomic_access == op->atomic.val) {
147 vmas[i]->skip_invalidation = true;
148 } else {
149 vmas[i]->skip_invalidation = false;
150 vmas[i]->attr.atomic_access = op->atomic.val;
151 }
152
153 bo = xe_vma_bo(vmas[i]);
154 if (!bo || bo->attr.atomic_access == op->atomic.val)
155 continue;
156
157 vmas[i]->skip_invalidation = false;
158 xe_bo_assert_held(bo);
159 bo->attr.atomic_access = op->atomic.val;
160
161 /* Invalidate cpu page table, so bo can migrate to smem in next access */
162 if (xe_bo_is_vram(bo) &&
163 (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU ||
164 bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL))
165 ttm_bo_unmap_virtual(&bo->ttm);
166 }
167 }
168
madvise_pat_index(struct xe_device * xe,struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,struct drm_xe_madvise * op,struct xe_madvise_details * details)169 static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
170 struct xe_vma **vmas, int num_vmas,
171 struct drm_xe_madvise *op,
172 struct xe_madvise_details *details)
173 {
174 int i;
175
176 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT);
177
178 for (i = 0; i < num_vmas; i++) {
179 if (vmas[i]->attr.pat_index == op->pat_index.val) {
180 vmas[i]->skip_invalidation = true;
181 } else {
182 vmas[i]->skip_invalidation = false;
183 vmas[i]->attr.pat_index = op->pat_index.val;
184 }
185 }
186 }
187
188 /**
189 * madvise_purgeable - Handle purgeable buffer object advice
190 * @xe: XE device
191 * @vm: VM
192 * @vmas: Array of VMAs
193 * @num_vmas: Number of VMAs
194 * @op: Madvise operation
195 * @details: Madvise details for return values
196 *
197 * Handles DONTNEED/WILLNEED/PURGED states. Tracks if any BO was purged
198 * in details->has_purged_bo for later copy to userspace.
199 */
madvise_purgeable(struct xe_device * xe,struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,struct drm_xe_madvise * op,struct xe_madvise_details * details)200 static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
201 struct xe_vma **vmas, int num_vmas,
202 struct drm_xe_madvise *op,
203 struct xe_madvise_details *details)
204 {
205 int i;
206
207 xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE);
208
209 for (i = 0; i < num_vmas; i++) {
210 struct xe_bo *bo = xe_vma_bo(vmas[i]);
211
212 if (!bo) {
213 /* Purgeable state applies to BOs only, skip non-BO VMAs */
214 vmas[i]->skip_invalidation = true;
215 continue;
216 }
217
218 /* BO must be locked before modifying madv state */
219 xe_bo_assert_held(bo);
220
221 /*
222 * Once purged, always purged. Cannot transition back to WILLNEED.
223 * This matches i915 semantics where purged BOs are permanently invalid.
224 */
225 if (xe_bo_is_purged(bo)) {
226 details->has_purged_bo = true;
227 vmas[i]->skip_invalidation = true;
228 continue;
229 }
230
231 switch (op->purge_state_val.val) {
232 case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED:
233 vmas[i]->skip_invalidation = true;
234 /* Only act on a real DONTNEED -> WILLNEED transition. */
235 if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_DONTNEED) {
236 vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED;
237 xe_bo_willneed_get_locked(bo);
238 }
239 break;
240 case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED:
241 /*
242 * Don't zap PTEs at DONTNEED time -- pages are still
243 * alive. The zap happens in xe_bo_move_notify() right
244 * before the shrinker frees them.
245 */
246 vmas[i]->skip_invalidation = true;
247
248 /* Only act on a real WILLNEED -> DONTNEED transition. */
249 if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) {
250 vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED;
251 xe_bo_willneed_put_locked(bo);
252 }
253 break;
254 default:
255 /* Should never hit - values validated in madvise_args_are_sane() */
256 xe_assert(vm->xe, 0);
257 return;
258 }
259 }
260 }
261
262 typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
263 struct xe_vma **vmas, int num_vmas,
264 struct drm_xe_madvise *op,
265 struct xe_madvise_details *details);
266
267 static const madvise_func madvise_funcs[] = {
268 [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
269 [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
270 [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
271 [DRM_XE_VMA_ATTR_PURGEABLE_STATE] = madvise_purgeable,
272 };
273
xe_zap_ptes_in_madvise_range(struct xe_vm * vm,u64 start,u64 end)274 static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
275 {
276 struct drm_gpuva *gpuva;
277 struct xe_tile *tile;
278 u8 id, tile_mask = 0;
279
280 lockdep_assert_held_write(&vm->lock);
281
282 /* Wait for pending binds */
283 if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
284 false, MAX_SCHEDULE_TIMEOUT) <= 0)
285 XE_WARN_ON(1);
286
287 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
288 struct xe_vma *vma = gpuva_to_vma(gpuva);
289
290 if (vma->skip_invalidation || xe_vma_is_null(vma))
291 continue;
292
293 if (xe_vma_is_cpu_addr_mirror(vma)) {
294 tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
295 xe_vma_start(vma),
296 xe_vma_end(vma));
297 } else {
298 for_each_tile(tile, vm->xe, id) {
299 if (xe_pt_zap_ptes(tile, vma)) {
300 tile_mask |= BIT(id);
301
302 /*
303 * WRITE_ONCE pairs with READ_ONCE
304 * in xe_vm_has_valid_gpu_mapping()
305 */
306 WRITE_ONCE(vma->tile_invalidated,
307 vma->tile_invalidated | BIT(id));
308 }
309 }
310 }
311 }
312
313 return tile_mask;
314 }
315
xe_vm_invalidate_madvise_range(struct xe_vm * vm,u64 start,u64 end)316 static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
317 {
318 u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
319 struct xe_tlb_inval_batch batch;
320 int err;
321
322 if (!tile_mask)
323 return 0;
324
325 xe_device_wmb(vm->xe);
326
327 err = xe_tlb_inval_range_tilemask_submit(vm->xe, vm->usm.asid, start, end,
328 tile_mask, &batch);
329 if (!err)
330 xe_tlb_inval_batch_wait(&batch);
331
332 return err;
333 }
334
madvise_args_are_sane(struct xe_device * xe,const struct drm_xe_madvise * args)335 static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
336 {
337 if (XE_IOCTL_DBG(xe, !args))
338 return false;
339
340 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
341 return false;
342
343 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
344 return false;
345
346 if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
347 return false;
348
349 switch (args->type) {
350 case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC:
351 {
352 s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
353
354 if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
355 return false;
356
357 if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
358 args->preferred_mem_loc.region_instance != 0))
359 return false;
360
361 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
362 DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
363 return false;
364
365 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
366 return false;
367 break;
368 }
369 case DRM_XE_MEM_RANGE_ATTR_ATOMIC:
370 if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU))
371 return false;
372
373 if (XE_IOCTL_DBG(xe, args->atomic.pad))
374 return false;
375
376 if (XE_IOCTL_DBG(xe, args->atomic.reserved))
377 return false;
378
379 break;
380 case DRM_XE_MEM_RANGE_ATTR_PAT:
381 {
382 u16 pat_index, coh_mode;
383
384 if (XE_IOCTL_DBG(xe, args->pat_index.val >= xe->pat.n_entries))
385 return false;
386
387 pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries);
388 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
389 if (XE_IOCTL_DBG(xe, !coh_mode))
390 return false;
391
392 if (XE_WARN_ON(coh_mode > XE_COH_2WAY))
393 return false;
394
395 if (XE_IOCTL_DBG(xe, args->pat_index.pad))
396 return false;
397
398 if (XE_IOCTL_DBG(xe, args->pat_index.reserved))
399 return false;
400 break;
401 }
402 case DRM_XE_VMA_ATTR_PURGEABLE_STATE:
403 {
404 u32 val = args->purge_state_val.val;
405
406 if (XE_IOCTL_DBG(xe, !(val == DRM_XE_VMA_PURGEABLE_STATE_WILLNEED ||
407 val == DRM_XE_VMA_PURGEABLE_STATE_DONTNEED)))
408 return false;
409
410 if (XE_IOCTL_DBG(xe, args->purge_state_val.pad))
411 return false;
412
413 break;
414 }
415 default:
416 if (XE_IOCTL_DBG(xe, 1))
417 return false;
418 }
419
420 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
421 return false;
422
423 return true;
424 }
425
xe_madvise_details_init(struct xe_vm * vm,const struct drm_xe_madvise * args,struct xe_madvise_details * details)426 static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise *args,
427 struct xe_madvise_details *details)
428 {
429 struct xe_device *xe = vm->xe;
430
431 memset(details, 0, sizeof(*details));
432
433 /* Store retained pointer for purgeable state */
434 if (args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE) {
435 details->retained_ptr = args->purge_state_val.retained_ptr;
436 return 0;
437 }
438
439 if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) {
440 int fd = args->preferred_mem_loc.devmem_fd;
441 struct drm_pagemap *dpagemap;
442
443 if (fd <= 0)
444 return 0;
445
446 dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd,
447 args->preferred_mem_loc.region_instance);
448 if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap)))
449 return PTR_ERR(dpagemap);
450
451 /* Don't allow a foreign placement without a fast interconnect! */
452 if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != vm->svm.peer.owner)) {
453 drm_pagemap_put(dpagemap);
454 return -ENOLINK;
455 }
456 details->dpagemap = dpagemap;
457 }
458
459 return 0;
460 }
461
xe_madvise_details_fini(struct xe_madvise_details * details)462 static void xe_madvise_details_fini(struct xe_madvise_details *details)
463 {
464 drm_pagemap_put(details->dpagemap);
465 }
466
xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details * details)467 static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details *details)
468 {
469 u32 retained;
470
471 if (!details->retained_ptr)
472 return 0;
473
474 retained = !details->has_purged_bo;
475
476 if (put_user(retained, (u32 __user *)u64_to_user_ptr(details->retained_ptr)))
477 return -EFAULT;
478
479 return 0;
480 }
481
check_pat_args_are_sane(struct xe_device * xe,struct xe_vmas_in_madvise_range * madvise_range,u16 pat_index)482 static bool check_pat_args_are_sane(struct xe_device *xe,
483 struct xe_vmas_in_madvise_range *madvise_range,
484 u16 pat_index)
485 {
486 u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
487 int i;
488
489 /*
490 * Using coh_none with CPU cached buffers is not allowed on iGPU.
491 * On iGPU the GPU shares the LLC with the CPU, so with coh_none
492 * the GPU bypasses CPU caches and reads directly from DRAM,
493 * potentially seeing stale sensitive data from previously freed
494 * pages. On dGPU this restriction does not apply, because the
495 * platform does not provide a non-coherent system memory access
496 * path that would violate the DMA coherency contract.
497 */
498 if (coh_mode != XE_COH_NONE || IS_DGFX(xe))
499 return true;
500
501 for (i = 0; i < madvise_range->num_vmas; i++) {
502 struct xe_vma *vma = madvise_range->vmas[i];
503 struct xe_bo *bo = xe_vma_bo(vma);
504
505 if (bo) {
506 /* BO with WB caching + COH_NONE is not allowed */
507 if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
508 return false;
509 /* Imported dma-buf without caching info, assume cached */
510 if (XE_IOCTL_DBG(xe, !bo->cpu_caching))
511 return false;
512 } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) ||
513 xe_vma_is_userptr(vma)))
514 /* System memory (userptr/SVM) is always CPU cached */
515 return false;
516 }
517
518 return true;
519 }
520
check_bo_args_are_sane(struct xe_vm * vm,struct xe_vma ** vmas,int num_vmas,u32 atomic_val)521 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
522 int num_vmas, u32 atomic_val)
523 {
524 struct xe_device *xe = vm->xe;
525 struct xe_bo *bo;
526 int i;
527
528 for (i = 0; i < num_vmas; i++) {
529 bo = xe_vma_bo(vmas[i]);
530 if (!bo)
531 continue;
532 /*
533 * NOTE: The following atomic checks are platform-specific. For example,
534 * if a device supports CXL atomics, these may not be necessary or
535 * may behave differently.
536 */
537 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU &&
538 !(bo->flags & XE_BO_FLAG_SYSTEM)))
539 return false;
540
541 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE &&
542 !(bo->flags & XE_BO_FLAG_VRAM0) &&
543 !(bo->flags & XE_BO_FLAG_VRAM1) &&
544 !(bo->flags & XE_BO_FLAG_SYSTEM &&
545 xe->info.has_device_atomics_on_smem)))
546 return false;
547
548 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL &&
549 (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
550 (!(bo->flags & XE_BO_FLAG_VRAM0) &&
551 !(bo->flags & XE_BO_FLAG_VRAM1)))))
552 return false;
553 }
554 return true;
555 }
556 /**
557 * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
558 * @dev: DRM device pointer
559 * @data: Pointer to ioctl data (drm_xe_madvise*)
560 * @file: DRM file pointer
561 *
562 * Handles the MADVISE ioctl to provide memory advice for vma's within
563 * input range.
564 *
565 * Return: 0 on success or a negative error code on failure.
566 */
xe_vm_madvise_ioctl(struct drm_device * dev,void * data,struct drm_file * file)567 int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
568 {
569 struct xe_device *xe = to_xe_device(dev);
570 struct xe_file *xef = to_xe_file(file);
571 struct drm_xe_madvise *args = data;
572 struct xe_vmas_in_madvise_range madvise_range = {
573 /*
574 * Userspace may pass canonical (sign-extended) addresses.
575 * Strip the sign extension to get the internal non-canonical
576 * form used by the GPUVM, matching xe_vm_bind_ioctl() behavior.
577 */
578 .addr = xe_device_uncanonicalize_addr(xe, args->start),
579 .range = args->range,
580 };
581 struct xe_madvise_details details;
582 u16 pat_index, coh_mode;
583 struct xe_vm *vm;
584 struct drm_exec exec;
585 int err, attr_type;
586 bool do_retained;
587
588 vm = xe_vm_lookup(xef, args->vm_id);
589 if (XE_IOCTL_DBG(xe, !vm))
590 return -EINVAL;
591
592 if (!madvise_args_are_sane(vm->xe, args)) {
593 err = -EINVAL;
594 goto put_vm;
595 }
596
597 /* Cache whether we need to write retained, and validate it's initialized to 0 */
598 do_retained = args->type == DRM_XE_VMA_ATTR_PURGEABLE_STATE &&
599 args->purge_state_val.retained_ptr;
600 if (do_retained) {
601 u32 retained;
602 u32 __user *retained_ptr;
603
604 retained_ptr = u64_to_user_ptr(args->purge_state_val.retained_ptr);
605 if (get_user(retained, retained_ptr)) {
606 err = -EFAULT;
607 goto put_vm;
608 }
609
610 if (XE_IOCTL_DBG(xe, retained != 0)) {
611 err = -EINVAL;
612 goto put_vm;
613 }
614 }
615
616 xe_svm_flush(vm);
617
618 err = down_write_killable(&vm->lock);
619 if (err)
620 goto put_vm;
621
622 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
623 err = -ENOENT;
624 goto unlock_vm;
625 }
626
627 err = xe_madvise_details_init(vm, args, &details);
628 if (err)
629 goto unlock_vm;
630
631 err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range);
632 if (err)
633 goto madv_fini;
634
635 err = get_vmas(vm, &madvise_range);
636 if (err || !madvise_range.num_vmas)
637 goto madv_fini;
638
639 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
640 pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries);
641 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
642 if (XE_IOCTL_DBG(xe, madvise_range.has_svm_userptr_vmas &&
643 xe_device_is_l2_flush_optimized(xe) &&
644 (pat_index != 19 && coh_mode != XE_COH_2WAY))) {
645 err = -EINVAL;
646 goto madv_fini;
647 }
648 }
649
650 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
651 if (!check_pat_args_are_sane(xe, &madvise_range,
652 args->pat_index.val)) {
653 err = -EINVAL;
654 goto free_vmas;
655 }
656 }
657
658 if (madvise_range.has_bo_vmas) {
659 if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
660 if (!check_bo_args_are_sane(vm, madvise_range.vmas,
661 madvise_range.num_vmas,
662 args->atomic.val)) {
663 err = -EINVAL;
664 goto free_vmas;
665 }
666 }
667
668 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
669 drm_exec_until_all_locked(&exec) {
670 for (int i = 0; i < madvise_range.num_vmas; i++) {
671 struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]);
672
673 if (!bo)
674 continue;
675
676 if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
677 if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach &&
678 xe_device_is_l2_flush_optimized(xe) &&
679 (pat_index != 19 &&
680 coh_mode != XE_COH_2WAY))) {
681 err = -EINVAL;
682 goto err_fini;
683 }
684 }
685
686 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
687 drm_exec_retry_on_contention(&exec);
688 if (err)
689 goto err_fini;
690 }
691 }
692 }
693
694 if (madvise_range.has_svm_userptr_vmas) {
695 err = xe_svm_notifier_lock_interruptible(vm);
696 if (err)
697 goto err_fini;
698 }
699
700 attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
701
702 /* Ensure the madvise function exists for this type */
703 if (!madvise_funcs[attr_type]) {
704 err = -EINVAL;
705 goto err_fini;
706 }
707
708 madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
709 &details);
710
711 err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr,
712 madvise_range.addr + args->range);
713
714 if (madvise_range.has_svm_userptr_vmas)
715 xe_svm_notifier_unlock(vm);
716
717 err_fini:
718 if (madvise_range.has_bo_vmas)
719 drm_exec_fini(&exec);
720 free_vmas:
721 kfree(madvise_range.vmas);
722 madvise_range.vmas = NULL;
723 madv_fini:
724 xe_madvise_details_fini(&details);
725 unlock_vm:
726 up_write(&vm->lock);
727
728 /* Write retained value to user after releasing all locks */
729 if (!err && do_retained)
730 err = xe_madvise_purgeable_retained_to_user(&details);
731 put_vm:
732 xe_vm_put(vm);
733 return err;
734 }
735