1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include "xe_vm_madvise.h" 7 8 #include <linux/nospec.h> 9 #include <drm/xe_drm.h> 10 11 #include "xe_bo.h" 12 #include "xe_pat.h" 13 #include "xe_pt.h" 14 #include "xe_svm.h" 15 16 struct xe_vmas_in_madvise_range { 17 u64 addr; 18 u64 range; 19 struct xe_vma **vmas; 20 int num_vmas; 21 bool has_bo_vmas; 22 bool has_svm_userptr_vmas; 23 }; 24 25 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) 26 { 27 u64 addr = madvise_range->addr; 28 u64 range = madvise_range->range; 29 30 struct xe_vma **__vmas; 31 struct drm_gpuva *gpuva; 32 int max_vmas = 8; 33 34 lockdep_assert_held(&vm->lock); 35 36 madvise_range->num_vmas = 0; 37 madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL); 38 if (!madvise_range->vmas) 39 return -ENOMEM; 40 41 vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range); 42 43 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { 44 struct xe_vma *vma = gpuva_to_vma(gpuva); 45 46 if (xe_vma_bo(vma)) 47 madvise_range->has_bo_vmas = true; 48 else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma)) 49 madvise_range->has_svm_userptr_vmas = true; 50 51 if (madvise_range->num_vmas == max_vmas) { 52 max_vmas <<= 1; 53 __vmas = krealloc(madvise_range->vmas, 54 max_vmas * sizeof(*madvise_range->vmas), 55 GFP_KERNEL); 56 if (!__vmas) { 57 kfree(madvise_range->vmas); 58 return -ENOMEM; 59 } 60 madvise_range->vmas = __vmas; 61 } 62 63 madvise_range->vmas[madvise_range->num_vmas] = vma; 64 (madvise_range->num_vmas)++; 65 } 66 67 if (!madvise_range->num_vmas) 68 kfree(madvise_range->vmas); 69 70 vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas); 71 72 return 0; 73 } 74 75 static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, 76 struct xe_vma **vmas, int num_vmas, 77 struct drm_xe_madvise *op) 78 { 79 int i; 80 81 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC); 82 83 for (i = 0; i < num_vmas; i++) { 84 /*TODO: Extend attributes to bo based vmas */ 85 if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd && 86 vmas[i]->attr.preferred_loc.migration_policy == 87 op->preferred_mem_loc.migration_policy) || 88 !xe_vma_is_cpu_addr_mirror(vmas[i])) { 89 vmas[i]->skip_invalidation = true; 90 } else { 91 vmas[i]->skip_invalidation = false; 92 vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd; 93 /* Till multi-device support is not added migration_policy 94 * is of no use and can be ignored. 95 */ 96 vmas[i]->attr.preferred_loc.migration_policy = 97 op->preferred_mem_loc.migration_policy; 98 } 99 } 100 } 101 102 static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, 103 struct xe_vma **vmas, int num_vmas, 104 struct drm_xe_madvise *op) 105 { 106 struct xe_bo *bo; 107 int i; 108 109 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC); 110 xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU); 111 112 for (i = 0; i < num_vmas; i++) { 113 if (xe_vma_is_userptr(vmas[i]) && 114 !(op->atomic.val == DRM_XE_ATOMIC_DEVICE && 115 xe->info.has_device_atomics_on_smem)) { 116 vmas[i]->skip_invalidation = true; 117 continue; 118 } 119 120 if (vmas[i]->attr.atomic_access == op->atomic.val) { 121 vmas[i]->skip_invalidation = true; 122 } else { 123 vmas[i]->skip_invalidation = false; 124 vmas[i]->attr.atomic_access = op->atomic.val; 125 } 126 127 bo = xe_vma_bo(vmas[i]); 128 if (!bo || bo->attr.atomic_access == op->atomic.val) 129 continue; 130 131 vmas[i]->skip_invalidation = false; 132 xe_bo_assert_held(bo); 133 bo->attr.atomic_access = op->atomic.val; 134 135 /* Invalidate cpu page table, so bo can migrate to smem in next access */ 136 if (xe_bo_is_vram(bo) && 137 (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU || 138 bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL)) 139 ttm_bo_unmap_virtual(&bo->ttm); 140 } 141 } 142 143 static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, 144 struct xe_vma **vmas, int num_vmas, 145 struct drm_xe_madvise *op) 146 { 147 int i; 148 149 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT); 150 151 for (i = 0; i < num_vmas; i++) { 152 if (vmas[i]->attr.pat_index == op->pat_index.val) { 153 vmas[i]->skip_invalidation = true; 154 } else { 155 vmas[i]->skip_invalidation = false; 156 vmas[i]->attr.pat_index = op->pat_index.val; 157 } 158 } 159 } 160 161 typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, 162 struct xe_vma **vmas, int num_vmas, 163 struct drm_xe_madvise *op); 164 165 static const madvise_func madvise_funcs[] = { 166 [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, 167 [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, 168 [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, 169 }; 170 171 static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) 172 { 173 struct drm_gpuva *gpuva; 174 struct xe_tile *tile; 175 u8 id, tile_mask = 0; 176 177 lockdep_assert_held_write(&vm->lock); 178 179 /* Wait for pending binds */ 180 if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP, 181 false, MAX_SCHEDULE_TIMEOUT) <= 0) 182 XE_WARN_ON(1); 183 184 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 185 struct xe_vma *vma = gpuva_to_vma(gpuva); 186 187 if (vma->skip_invalidation || xe_vma_is_null(vma)) 188 continue; 189 190 if (xe_vma_is_cpu_addr_mirror(vma)) { 191 tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm, 192 xe_vma_start(vma), 193 xe_vma_end(vma)); 194 } else { 195 for_each_tile(tile, vm->xe, id) { 196 if (xe_pt_zap_ptes(tile, vma)) { 197 tile_mask |= BIT(id); 198 199 /* 200 * WRITE_ONCE pairs with READ_ONCE 201 * in xe_vm_has_valid_gpu_mapping() 202 */ 203 WRITE_ONCE(vma->tile_invalidated, 204 vma->tile_invalidated | BIT(id)); 205 } 206 } 207 } 208 } 209 210 return tile_mask; 211 } 212 213 static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) 214 { 215 u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end); 216 217 if (!tile_mask) 218 return 0; 219 220 xe_device_wmb(vm->xe); 221 222 return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask); 223 } 224 225 static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) 226 { 227 if (XE_IOCTL_DBG(xe, !args)) 228 return false; 229 230 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K))) 231 return false; 232 233 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K))) 234 return false; 235 236 if (XE_IOCTL_DBG(xe, args->range < SZ_4K)) 237 return false; 238 239 switch (args->type) { 240 case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: 241 { 242 s32 fd = (s32)args->preferred_mem_loc.devmem_fd; 243 244 if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) 245 return false; 246 247 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > 248 DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) 249 return false; 250 251 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) 252 return false; 253 254 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) 255 return false; 256 break; 257 } 258 case DRM_XE_MEM_RANGE_ATTR_ATOMIC: 259 if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU)) 260 return false; 261 262 if (XE_IOCTL_DBG(xe, args->atomic.pad)) 263 return false; 264 265 if (XE_IOCTL_DBG(xe, args->atomic.reserved)) 266 return false; 267 268 break; 269 case DRM_XE_MEM_RANGE_ATTR_PAT: 270 { 271 u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val); 272 273 if (XE_IOCTL_DBG(xe, !coh_mode)) 274 return false; 275 276 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) 277 return false; 278 279 if (XE_IOCTL_DBG(xe, args->pat_index.pad)) 280 return false; 281 282 if (XE_IOCTL_DBG(xe, args->pat_index.reserved)) 283 return false; 284 break; 285 } 286 default: 287 if (XE_IOCTL_DBG(xe, 1)) 288 return false; 289 } 290 291 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 292 return false; 293 294 return true; 295 } 296 297 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, 298 int num_vmas, u32 atomic_val) 299 { 300 struct xe_device *xe = vm->xe; 301 struct xe_bo *bo; 302 int i; 303 304 for (i = 0; i < num_vmas; i++) { 305 bo = xe_vma_bo(vmas[i]); 306 if (!bo) 307 continue; 308 /* 309 * NOTE: The following atomic checks are platform-specific. For example, 310 * if a device supports CXL atomics, these may not be necessary or 311 * may behave differently. 312 */ 313 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU && 314 !(bo->flags & XE_BO_FLAG_SYSTEM))) 315 return false; 316 317 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE && 318 !(bo->flags & XE_BO_FLAG_VRAM0) && 319 !(bo->flags & XE_BO_FLAG_VRAM1) && 320 !(bo->flags & XE_BO_FLAG_SYSTEM && 321 xe->info.has_device_atomics_on_smem))) 322 return false; 323 324 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL && 325 (!(bo->flags & XE_BO_FLAG_SYSTEM) || 326 (!(bo->flags & XE_BO_FLAG_VRAM0) && 327 !(bo->flags & XE_BO_FLAG_VRAM1))))) 328 return false; 329 } 330 return true; 331 } 332 /** 333 * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM 334 * @dev: DRM device pointer 335 * @data: Pointer to ioctl data (drm_xe_madvise*) 336 * @file: DRM file pointer 337 * 338 * Handles the MADVISE ioctl to provide memory advice for vma's within 339 * input range. 340 * 341 * Return: 0 on success or a negative error code on failure. 342 */ 343 int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 344 { 345 struct xe_device *xe = to_xe_device(dev); 346 struct xe_file *xef = to_xe_file(file); 347 struct drm_xe_madvise *args = data; 348 struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, 349 .range = args->range, }; 350 struct xe_vm *vm; 351 struct drm_exec exec; 352 int err, attr_type; 353 354 vm = xe_vm_lookup(xef, args->vm_id); 355 if (XE_IOCTL_DBG(xe, !vm)) 356 return -EINVAL; 357 358 if (!madvise_args_are_sane(vm->xe, args)) { 359 err = -EINVAL; 360 goto put_vm; 361 } 362 363 xe_svm_flush(vm); 364 365 err = down_write_killable(&vm->lock); 366 if (err) 367 goto put_vm; 368 369 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 370 err = -ENOENT; 371 goto unlock_vm; 372 } 373 374 err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); 375 if (err) 376 goto unlock_vm; 377 378 err = get_vmas(vm, &madvise_range); 379 if (err || !madvise_range.num_vmas) 380 goto unlock_vm; 381 382 if (madvise_range.has_bo_vmas) { 383 if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { 384 if (!check_bo_args_are_sane(vm, madvise_range.vmas, 385 madvise_range.num_vmas, 386 args->atomic.val)) { 387 err = -EINVAL; 388 goto unlock_vm; 389 } 390 } 391 392 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 393 drm_exec_until_all_locked(&exec) { 394 for (int i = 0; i < madvise_range.num_vmas; i++) { 395 struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]); 396 397 if (!bo) 398 continue; 399 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 400 drm_exec_retry_on_contention(&exec); 401 if (err) 402 goto err_fini; 403 } 404 } 405 } 406 407 if (madvise_range.has_svm_userptr_vmas) { 408 err = xe_svm_notifier_lock_interruptible(vm); 409 if (err) 410 goto err_fini; 411 } 412 413 attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); 414 madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); 415 416 err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); 417 418 if (madvise_range.has_svm_userptr_vmas) 419 xe_svm_notifier_unlock(vm); 420 421 err_fini: 422 if (madvise_range.has_bo_vmas) 423 drm_exec_fini(&exec); 424 kfree(madvise_range.vmas); 425 madvise_range.vmas = NULL; 426 unlock_vm: 427 up_write(&vm->lock); 428 put_vm: 429 xe_vm_put(vm); 430 return err; 431 } 432