1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include "xe_vm_madvise.h" 7 8 #include <linux/nospec.h> 9 #include <drm/xe_drm.h> 10 11 #include "xe_bo.h" 12 #include "xe_pat.h" 13 #include "xe_pt.h" 14 #include "xe_svm.h" 15 16 struct xe_vmas_in_madvise_range { 17 u64 addr; 18 u64 range; 19 struct xe_vma **vmas; 20 int num_vmas; 21 bool has_svm_vmas; 22 bool has_bo_vmas; 23 bool has_userptr_vmas; 24 }; 25 26 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) 27 { 28 u64 addr = madvise_range->addr; 29 u64 range = madvise_range->range; 30 31 struct xe_vma **__vmas; 32 struct drm_gpuva *gpuva; 33 int max_vmas = 8; 34 35 lockdep_assert_held(&vm->lock); 36 37 madvise_range->num_vmas = 0; 38 madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL); 39 if (!madvise_range->vmas) 40 return -ENOMEM; 41 42 vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range); 43 44 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { 45 struct xe_vma *vma = gpuva_to_vma(gpuva); 46 47 if (xe_vma_bo(vma)) 48 madvise_range->has_bo_vmas = true; 49 else if (xe_vma_is_cpu_addr_mirror(vma)) 50 madvise_range->has_svm_vmas = true; 51 else if (xe_vma_is_userptr(vma)) 52 madvise_range->has_userptr_vmas = true; 53 54 if (madvise_range->num_vmas == max_vmas) { 55 max_vmas <<= 1; 56 __vmas = krealloc(madvise_range->vmas, 57 max_vmas * sizeof(*madvise_range->vmas), 58 GFP_KERNEL); 59 if (!__vmas) { 60 kfree(madvise_range->vmas); 61 return -ENOMEM; 62 } 63 madvise_range->vmas = __vmas; 64 } 65 66 madvise_range->vmas[madvise_range->num_vmas] = vma; 67 (madvise_range->num_vmas)++; 68 } 69 70 if (!madvise_range->num_vmas) 71 kfree(madvise_range->vmas); 72 73 vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas); 74 75 return 0; 76 } 77 78 static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, 79 struct xe_vma **vmas, int num_vmas, 80 struct drm_xe_madvise *op) 81 { 82 int i; 83 84 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC); 85 86 for (i = 0; i < num_vmas; i++) { 87 /*TODO: Extend attributes to bo based vmas */ 88 if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd && 89 vmas[i]->attr.preferred_loc.migration_policy == 90 op->preferred_mem_loc.migration_policy) || 91 !xe_vma_is_cpu_addr_mirror(vmas[i])) { 92 vmas[i]->skip_invalidation = true; 93 } else { 94 vmas[i]->skip_invalidation = false; 95 vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd; 96 /* Till multi-device support is not added migration_policy 97 * is of no use and can be ignored. 98 */ 99 vmas[i]->attr.preferred_loc.migration_policy = 100 op->preferred_mem_loc.migration_policy; 101 } 102 } 103 } 104 105 static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, 106 struct xe_vma **vmas, int num_vmas, 107 struct drm_xe_madvise *op) 108 { 109 struct xe_bo *bo; 110 int i; 111 112 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC); 113 xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU); 114 115 for (i = 0; i < num_vmas; i++) { 116 if (xe_vma_is_userptr(vmas[i]) && 117 !(op->atomic.val == DRM_XE_ATOMIC_DEVICE && 118 xe->info.has_device_atomics_on_smem)) { 119 vmas[i]->skip_invalidation = true; 120 continue; 121 } 122 123 if (vmas[i]->attr.atomic_access == op->atomic.val) { 124 vmas[i]->skip_invalidation = true; 125 } else { 126 vmas[i]->skip_invalidation = false; 127 vmas[i]->attr.atomic_access = op->atomic.val; 128 } 129 130 vmas[i]->attr.atomic_access = op->atomic.val; 131 132 bo = xe_vma_bo(vmas[i]); 133 if (!bo || bo->attr.atomic_access == op->atomic.val) 134 continue; 135 136 vmas[i]->skip_invalidation = false; 137 xe_bo_assert_held(bo); 138 bo->attr.atomic_access = op->atomic.val; 139 140 /* Invalidate cpu page table, so bo can migrate to smem in next access */ 141 if (xe_bo_is_vram(bo) && 142 (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU || 143 bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL)) 144 ttm_bo_unmap_virtual(&bo->ttm); 145 } 146 } 147 148 static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, 149 struct xe_vma **vmas, int num_vmas, 150 struct drm_xe_madvise *op) 151 { 152 int i; 153 154 xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT); 155 156 for (i = 0; i < num_vmas; i++) { 157 if (vmas[i]->attr.pat_index == op->pat_index.val) { 158 vmas[i]->skip_invalidation = true; 159 } else { 160 vmas[i]->skip_invalidation = false; 161 vmas[i]->attr.pat_index = op->pat_index.val; 162 } 163 } 164 } 165 166 typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, 167 struct xe_vma **vmas, int num_vmas, 168 struct drm_xe_madvise *op); 169 170 static const madvise_func madvise_funcs[] = { 171 [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, 172 [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, 173 [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, 174 }; 175 176 static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) 177 { 178 struct drm_gpuva *gpuva; 179 struct xe_tile *tile; 180 u8 id, tile_mask = 0; 181 182 lockdep_assert_held_write(&vm->lock); 183 184 /* Wait for pending binds */ 185 if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP, 186 false, MAX_SCHEDULE_TIMEOUT) <= 0) 187 XE_WARN_ON(1); 188 189 drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { 190 struct xe_vma *vma = gpuva_to_vma(gpuva); 191 192 if (vma->skip_invalidation || xe_vma_is_null(vma)) 193 continue; 194 195 if (xe_vma_is_cpu_addr_mirror(vma)) { 196 tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm, 197 xe_vma_start(vma), 198 xe_vma_end(vma)); 199 } else { 200 for_each_tile(tile, vm->xe, id) { 201 if (xe_pt_zap_ptes(tile, vma)) { 202 tile_mask |= BIT(id); 203 204 /* 205 * WRITE_ONCE pairs with READ_ONCE 206 * in xe_vm_has_valid_gpu_mapping() 207 */ 208 WRITE_ONCE(vma->tile_invalidated, 209 vma->tile_invalidated | BIT(id)); 210 } 211 } 212 } 213 } 214 215 return tile_mask; 216 } 217 218 static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) 219 { 220 u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end); 221 222 if (!tile_mask) 223 return 0; 224 225 xe_device_wmb(vm->xe); 226 227 return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask); 228 } 229 230 static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) 231 { 232 if (XE_IOCTL_DBG(xe, !args)) 233 return false; 234 235 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K))) 236 return false; 237 238 if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K))) 239 return false; 240 241 if (XE_IOCTL_DBG(xe, args->range < SZ_4K)) 242 return false; 243 244 switch (args->type) { 245 case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: 246 { 247 s32 fd = (s32)args->preferred_mem_loc.devmem_fd; 248 249 if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) 250 return false; 251 252 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > 253 DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) 254 return false; 255 256 if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) 257 return false; 258 259 if (XE_IOCTL_DBG(xe, args->atomic.reserved)) 260 return false; 261 break; 262 } 263 case DRM_XE_MEM_RANGE_ATTR_ATOMIC: 264 if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU)) 265 return false; 266 267 if (XE_IOCTL_DBG(xe, args->atomic.pad)) 268 return false; 269 270 if (XE_IOCTL_DBG(xe, args->atomic.reserved)) 271 return false; 272 273 break; 274 case DRM_XE_MEM_RANGE_ATTR_PAT: 275 { 276 u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val); 277 278 if (XE_IOCTL_DBG(xe, !coh_mode)) 279 return false; 280 281 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) 282 return false; 283 284 if (XE_IOCTL_DBG(xe, args->pat_index.pad)) 285 return false; 286 287 if (XE_IOCTL_DBG(xe, args->pat_index.reserved)) 288 return false; 289 break; 290 } 291 default: 292 if (XE_IOCTL_DBG(xe, 1)) 293 return false; 294 } 295 296 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 297 return false; 298 299 return true; 300 } 301 302 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, 303 int num_vmas, u32 atomic_val) 304 { 305 struct xe_device *xe = vm->xe; 306 struct xe_bo *bo; 307 int i; 308 309 for (i = 0; i < num_vmas; i++) { 310 bo = xe_vma_bo(vmas[i]); 311 if (!bo) 312 continue; 313 /* 314 * NOTE: The following atomic checks are platform-specific. For example, 315 * if a device supports CXL atomics, these may not be necessary or 316 * may behave differently. 317 */ 318 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU && 319 !(bo->flags & XE_BO_FLAG_SYSTEM))) 320 return false; 321 322 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE && 323 !(bo->flags & XE_BO_FLAG_VRAM0) && 324 !(bo->flags & XE_BO_FLAG_VRAM1) && 325 !(bo->flags & XE_BO_FLAG_SYSTEM && 326 xe->info.has_device_atomics_on_smem))) 327 return false; 328 329 if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL && 330 (!(bo->flags & XE_BO_FLAG_SYSTEM) || 331 (!(bo->flags & XE_BO_FLAG_VRAM0) && 332 !(bo->flags & XE_BO_FLAG_VRAM1))))) 333 return false; 334 } 335 return true; 336 } 337 /** 338 * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM 339 * @dev: DRM device pointer 340 * @data: Pointer to ioctl data (drm_xe_madvise*) 341 * @file: DRM file pointer 342 * 343 * Handles the MADVISE ioctl to provide memory advice for vma's within 344 * input range. 345 * 346 * Return: 0 on success or a negative error code on failure. 347 */ 348 int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 349 { 350 struct xe_device *xe = to_xe_device(dev); 351 struct xe_file *xef = to_xe_file(file); 352 struct drm_xe_madvise *args = data; 353 struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, 354 .range = args->range, }; 355 struct xe_vm *vm; 356 struct drm_exec exec; 357 int err, attr_type; 358 359 vm = xe_vm_lookup(xef, args->vm_id); 360 if (XE_IOCTL_DBG(xe, !vm)) 361 return -EINVAL; 362 363 if (!madvise_args_are_sane(vm->xe, args)) { 364 err = -EINVAL; 365 goto put_vm; 366 } 367 368 xe_svm_flush(vm); 369 370 err = down_write_killable(&vm->lock); 371 if (err) 372 goto put_vm; 373 374 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 375 err = -ENOENT; 376 goto unlock_vm; 377 } 378 379 err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); 380 if (err) 381 goto unlock_vm; 382 383 err = get_vmas(vm, &madvise_range); 384 if (err || !madvise_range.num_vmas) 385 goto unlock_vm; 386 387 if (madvise_range.has_bo_vmas) { 388 if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { 389 if (!check_bo_args_are_sane(vm, madvise_range.vmas, 390 madvise_range.num_vmas, 391 args->atomic.val)) { 392 err = -EINVAL; 393 goto unlock_vm; 394 } 395 } 396 397 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 398 drm_exec_until_all_locked(&exec) { 399 for (int i = 0; i < madvise_range.num_vmas; i++) { 400 struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]); 401 402 if (!bo) 403 continue; 404 err = drm_exec_lock_obj(&exec, &bo->ttm.base); 405 drm_exec_retry_on_contention(&exec); 406 if (err) 407 goto err_fini; 408 } 409 } 410 } 411 412 if (madvise_range.has_userptr_vmas) { 413 err = down_read_interruptible(&vm->userptr.notifier_lock); 414 if (err) 415 goto err_fini; 416 } 417 418 if (madvise_range.has_svm_vmas) { 419 err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock); 420 if (err) 421 goto unlock_userptr; 422 } 423 424 attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); 425 madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); 426 427 err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); 428 429 if (madvise_range.has_svm_vmas) 430 xe_svm_notifier_unlock(vm); 431 432 unlock_userptr: 433 if (madvise_range.has_userptr_vmas) 434 up_read(&vm->userptr.notifier_lock); 435 err_fini: 436 if (madvise_range.has_bo_vmas) 437 drm_exec_fini(&exec); 438 kfree(madvise_range.vmas); 439 madvise_range.vmas = NULL; 440 unlock_vm: 441 up_write(&vm->lock); 442 put_vm: 443 xe_vm_put(vm); 444 return err; 445 } 446