Lines Matching +full:blit +full:- +full:engine

1 // SPDX-License-Identifier: MIT
39 * struct xe_migrate - migrate context.
48 /** @pt_bo: Page-table buffer object. */
57 * @fence: dma-fence representing the last migration job batch.
62 * @vm_update_sa: For integrated, used to suballocate page-tables
79 * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest
81 * (val-2) format, this translates to 0x400 dwords for the true maximum length
88 * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue.
97 return tile->migrate->q; in xe_tile_migrate_exec_queue()
104 xe_vm_lock(m->q->vm, false); in xe_migrate_fini()
105 xe_bo_unpin(m->pt_bo); in xe_migrate_fini()
106 xe_vm_unlock(m->q->vm); in xe_migrate_fini()
108 dma_fence_put(m->fence); in xe_migrate_fini()
109 xe_bo_put(m->pt_bo); in xe_migrate_fini()
110 drm_suballoc_manager_fini(&m->vm_update_sa); in xe_migrate_fini()
111 mutex_destroy(&m->job_mutex); in xe_migrate_fini()
112 xe_vm_close_and_put(m->q->vm); in xe_migrate_fini()
113 xe_exec_queue_put(m->q); in xe_migrate_fini()
133 identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); in xe_migrate_vram_ofs()
135 addr -= xe->mem.vram.dpa_base; in xe_migrate_vram_ofs()
145 u64 vram_limit = xe->mem.vram.actual_physical_size + in xe_migrate_program_identity()
146 xe->mem.vram.dpa_base; in xe_migrate_program_identity()
150 flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, in xe_migrate_program_identity()
153 xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); in xe_migrate_program_identity()
160 for (pos = xe->mem.vram.dpa_base; pos < vram_limit; in xe_migrate_program_identity()
163 entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs, in xe_migrate_program_identity()
165 xe_map_wr(xe, &bo->vmap, ofs, u64, entry); in xe_migrate_program_identity()
167 flags = vm->pt_ops->pte_encode_addr(xe, 0, in xe_migrate_program_identity()
169 level - 1, in xe_migrate_program_identity()
174 xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); in xe_migrate_program_identity()
178 xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); in xe_migrate_program_identity()
188 u16 pat_index = xe->pat.idx[XE_CACHE_WB]; in xe_migrate_prepare_vm()
189 u8 id = tile->id; in xe_migrate_prepare_vm()
190 u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; in xe_migrate_prepare_vm()
195 struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; in xe_migrate_prepare_vm()
206 xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M); in xe_migrate_prepare_vm()
208 bo = xe_bo_create_pin_map(vm->xe, tile, vm, in xe_migrate_prepare_vm()
217 pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; in xe_migrate_prepare_vm()
218 entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); in xe_migrate_prepare_vm()
219 xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); in xe_migrate_prepare_vm()
221 map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE; in xe_migrate_prepare_vm()
225 entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE, in xe_migrate_prepare_vm()
228 xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); in xe_migrate_prepare_vm()
230 if (vm->flags & XE_VM_FLAG_64K) in xe_migrate_prepare_vm()
238 m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; in xe_migrate_prepare_vm()
240 i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : in xe_migrate_prepare_vm()
242 entry = vm->pt_ops->pte_encode_bo(batch, i, in xe_migrate_prepare_vm()
245 xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, in xe_migrate_prepare_vm()
249 if (xe->info.has_usm) { in xe_migrate_prepare_vm()
252 batch = tile->primary_gt->usm.bb_pool->bo; in xe_migrate_prepare_vm()
253 m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; in xe_migrate_prepare_vm()
257 i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : in xe_migrate_prepare_vm()
259 entry = vm->pt_ops->pte_encode_bo(batch, i, in xe_migrate_prepare_vm()
262 xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, in xe_migrate_prepare_vm()
270 m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr, false); in xe_migrate_prepare_vm()
272 if (xe->info.has_usm) { in xe_migrate_prepare_vm()
273 batch = tile->primary_gt->usm.bb_pool->bo; in xe_migrate_prepare_vm()
275 m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr, false); in xe_migrate_prepare_vm()
282 if (vm->flags & XE_VM_FLAG_64K && level == 1) in xe_migrate_prepare_vm()
285 entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) * in xe_migrate_prepare_vm()
287 xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, in xe_migrate_prepare_vm()
293 entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE, in xe_migrate_prepare_vm()
296 xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + in xe_migrate_prepare_vm()
302 xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64, in xe_migrate_prepare_vm()
303 vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) in xe_migrate_prepare_vm()
305 m->cleared_mem_ofs = (255ULL << xe_pt_shift(level)); in xe_migrate_prepare_vm()
309 u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; in xe_migrate_prepare_vm()
313 xe_assert(xe, xe->mem.vram.actual_physical_size <= in xe_migrate_prepare_vm()
314 (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); in xe_migrate_prepare_vm()
321 u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; in xe_migrate_prepare_vm()
323 DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); in xe_migrate_prepare_vm()
324 u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; in xe_migrate_prepare_vm()
326 xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - in xe_migrate_prepare_vm()
327 IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); in xe_migrate_prepare_vm()
355 drm_suballoc_manager_init(&m->vm_update_sa, in xe_migrate_prepare_vm()
356 (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * in xe_migrate_prepare_vm()
359 m->pt_bo = bo; in xe_migrate_prepare_vm()
364 * Including the reserved copy engine is required to avoid deadlocks due to
374 if (hwe->class != XE_ENGINE_CLASS_COPY) in xe_migrate_usm_logical_mask()
378 logical_mask |= BIT(hwe->logical_instance); in xe_migrate_usm_logical_mask()
390 * xe_migrate_init() - Initialize a migrate context
391 * @tile: Back-pointer to the tile we're initializing for.
398 struct xe_gt *primary_gt = tile->primary_gt; in xe_migrate_init()
403 m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL); in xe_migrate_init()
405 return ERR_PTR(-ENOMEM); in xe_migrate_init()
407 m->tile = tile; in xe_migrate_init()
423 if (xe->info.has_usm) { in xe_migrate_init()
426 primary_gt->usm.reserved_bcs_instance, in xe_migrate_init()
431 return ERR_PTR(-EINVAL); in xe_migrate_init()
437 m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, in xe_migrate_init()
442 m->q = xe_exec_queue_create_class(xe, primary_gt, vm, in xe_migrate_init()
447 if (IS_ERR(m->q)) { in xe_migrate_init()
449 return ERR_CAST(m->q); in xe_migrate_init()
452 mutex_init(&m->job_mutex); in xe_migrate_init()
454 might_lock(&m->job_mutex); in xe_migrate_init()
457 err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m); in xe_migrate_init()
464 m->min_chunk_size = SZ_4K * SZ_64K / in xe_migrate_init()
468 m->min_chunk_size = SZ_64K; in xe_migrate_init()
469 m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size); in xe_migrate_init()
470 drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n", in xe_migrate_init()
471 (unsigned long long)m->min_chunk_size); in xe_migrate_init()
487 struct xe_device *xe = tile_to_xe(m->tile); in xe_migrate_res_sizes()
488 u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining); in xe_migrate_res_sizes()
490 if (mem_type_is_vram(cur->mem_type)) { in xe_migrate_res_sizes()
492 * VRAM we want to blit in chunks with sizes aligned to in xe_migrate_res_sizes()
494 * page-aligned. If it's the last chunk it may be smaller. in xe_migrate_res_sizes()
496 * Another constraint is that we need to limit the blit to in xe_migrate_res_sizes()
500 u64 chunk = max_t(u64, cur->size, m->min_chunk_size); in xe_migrate_res_sizes()
503 if (size > m->min_chunk_size) in xe_migrate_res_sizes()
504 size = round_down(size, m->min_chunk_size); in xe_migrate_res_sizes()
513 return cur->size >= size; in xe_migrate_allow_identity()
533 *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), in pte_update_size()
534 cur->start + vram_region_gpu_offset(res), in pte_update_size()
540 u32 num_4k_pages = (size + XE_PAGE_SIZE - 1) >> XE_PTE_SHIFT; in pte_update_size()
551 /* Each chunk has a single blit command */ in pte_update_size()
564 struct xe_device *xe = tile_to_xe(m->tile); in emit_pte()
565 struct xe_vm *vm = m->q->vm; in emit_pte()
573 pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] : in emit_pte()
574 xe->pat.idx[XE_CACHE_WB]; in emit_pte()
576 pat_index = xe->pat.idx[XE_CACHE_WB]; in emit_pte()
583 bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); in emit_pte()
584 bb->cs[bb->len++] = ofs; in emit_pte()
585 bb->cs[bb->len++] = 0; in emit_pte()
589 ptes -= chunk; in emit_pte()
591 while (chunk--) { in emit_pte()
597 if (vm->flags & XE_VM_FLAG_64K) { in emit_pte()
600 xe_assert(xe, (va & (SZ_64K - 1)) == in emit_pte()
601 (addr & (SZ_64K - 1))); in emit_pte()
610 addr = vm->pt_ops->pte_encode_addr(m->tile->xe, in emit_pte()
613 bb->cs[bb->len++] = lower_32_bits(addr); in emit_pte()
614 bb->cs[bb->len++] = upper_32_bits(addr); in emit_pte()
629 u32 *cs = bb->cs + bb->len; in emit_copy_ccs()
637 xe_gt_assert(gt, FIELD_FIT(XE2_CCS_SIZE_MASK, num_pages - 1)); in emit_copy_ccs()
639 ccs_copy_size = REG_FIELD_PREP(XE2_CCS_SIZE_MASK, num_pages - 1); in emit_copy_ccs()
640 mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index); in emit_copy_ccs()
645 xe_gt_assert(gt, FIELD_FIT(CCS_SIZE_MASK, num_ccs_blks - 1)); in emit_copy_ccs()
647 ccs_copy_size = REG_FIELD_PREP(CCS_SIZE_MASK, num_ccs_blks - 1); in emit_copy_ccs()
648 mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index); in emit_copy_ccs()
660 bb->len = cs - bb->cs; in emit_copy_ccs()
678 mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index); in emit_copy()
683 bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); in emit_copy()
684 bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs; in emit_copy()
685 bb->cs[bb->len++] = 0; in emit_copy()
686 bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; in emit_copy()
687 bb->cs[bb->len++] = lower_32_bits(dst_ofs); in emit_copy()
688 bb->cs[bb->len++] = upper_32_bits(dst_ofs); in emit_copy()
689 bb->cs[bb->len++] = 0; in emit_copy()
690 bb->cs[bb->len++] = pitch | mocs; in emit_copy()
691 bb->cs[bb->len++] = lower_32_bits(src_ofs); in emit_copy()
692 bb->cs[bb->len++] = upper_32_bits(src_ofs); in emit_copy()
697 return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; in xe_migrate_batch_base()
706 struct xe_gt *gt = m->tile->primary_gt; in xe_migrate_ccs_copy()
713 * user. Make sure we copy the CCS aux state as-is. in xe_migrate_ccs_copy()
718 u64 ccs_src_ofs = src_is_indirect ? src_ofs : m->cleared_mem_ofs; in xe_migrate_ccs_copy()
743 * xe_migrate_copy() - Copy content of TTM resources.
768 struct xe_gt *gt = m->tile->primary_gt; in xe_migrate_copy()
778 bool src_is_pltt = src->mem_type == XE_PL_TT; in xe_migrate_copy()
779 bool dst_is_pltt = dst->mem_type == XE_PL_TT; in xe_migrate_copy()
780 bool src_is_vram = mem_type_is_vram(src->mem_type); in xe_migrate_copy()
781 bool dst_is_vram = mem_type_is_vram(dst->mem_type); in xe_migrate_copy()
782 bool type_device = src_bo->ttm.type == ttm_bo_type_device; in xe_migrate_copy()
792 return ERR_PTR(-EINVAL); in xe_migrate_copy()
795 return ERR_PTR(-EINVAL); in xe_migrate_copy()
821 bool usm = xe->info.has_usm; in xe_migrate_copy()
827 drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", in xe_migrate_copy()
878 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; in xe_migrate_copy()
879 update_idx = bb->len; in xe_migrate_copy()
891 job = xe_bb_create_migration_job(m->q, bb, in xe_migrate_copy()
901 err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, in xe_migrate_copy()
904 err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv, in xe_migrate_copy()
910 mutex_lock(&m->job_mutex); in xe_migrate_copy()
913 fence = dma_fence_get(&job->drm.s_fence->finished); in xe_migrate_copy()
916 dma_fence_put(m->fence); in xe_migrate_copy()
917 m->fence = dma_fence_get(fence); in xe_migrate_copy()
919 mutex_unlock(&m->job_mutex); in xe_migrate_copy()
922 size -= src_L0; in xe_migrate_copy()
947 u32 *cs = bb->cs + bb->len; in emit_clear_link_copy()
950 *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2); in emit_clear_link_copy()
951 *cs++ = pitch - 1; in emit_clear_link_copy()
952 *cs++ = (size / pitch) - 1; in emit_clear_link_copy()
953 *cs++ = pitch - 1; in emit_clear_link_copy()
957 *cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); in emit_clear_link_copy()
959 *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); in emit_clear_link_copy()
961 xe_gt_assert(gt, cs - bb->cs == len + bb->len); in emit_clear_link_copy()
963 bb->len += len; in emit_clear_link_copy()
970 u32 *cs = bb->cs + bb->len; in emit_clear_main_copy()
977 (len - 2); in emit_clear_main_copy()
979 *cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) | in emit_clear_main_copy()
980 (pitch - 1); in emit_clear_main_copy()
982 *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) | in emit_clear_main_copy()
983 (pitch - 1); in emit_clear_main_copy()
1002 xe_gt_assert(gt, cs - bb->cs == len + bb->len); in emit_clear_main_copy()
1004 bb->len += len; in emit_clear_main_copy()
1012 * instructions) so check the architectural engine list rather than the in has_service_copy_support()
1014 * all of the actual service copy engines (BCS1-BCS8) have been fused in has_service_copy_support()
1017 return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8, in has_service_copy_support()
1040 * xe_migrate_clear() - Copy content of TTM resources.
1060 bool clear_vram = mem_type_is_vram(dst->mem_type); in xe_migrate_clear()
1063 struct xe_gt *gt = m->tile->primary_gt; in xe_migrate_clear()
1093 bool usm = xe->info.has_usm; in xe_migrate_clear()
1120 size -= clear_L0; in xe_migrate_clear()
1128 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; in xe_migrate_clear()
1129 update_idx = bb->len; in xe_migrate_clear()
1136 m->cleared_mem_ofs, false, clear_L0); in xe_migrate_clear()
1140 job = xe_bb_create_migration_job(m->q, bb, in xe_migrate_clear()
1156 err = xe_sched_job_add_deps(job, bo->ttm.base.resv, in xe_migrate_clear()
1162 mutex_lock(&m->job_mutex); in xe_migrate_clear()
1165 fence = dma_fence_get(&job->drm.s_fence->finished); in xe_migrate_clear()
1168 dma_fence_put(m->fence); in xe_migrate_clear()
1169 m->fence = dma_fence_get(fence); in xe_migrate_clear()
1171 mutex_unlock(&m->job_mutex); in xe_migrate_clear()
1191 bo->ccs_cleared = true; in xe_migrate_clear()
1201 const struct xe_migrate_pt_update_ops *ops = pt_update->ops; in write_pgtable()
1203 u32 ofs = update->ofs, size = update->qwords; in write_pgtable()
1209 * PDE. This requires a BO that is almost vm->size big. in write_pgtable()
1214 xe_tile_assert(tile, update->qwords < MAX_NUM_PTE); in write_pgtable()
1217 xe_bo_addr(update->pt_bo, 0, in write_pgtable()
1225 /* Ensure populatefn can do memset64 by aligning bb->cs */ in write_pgtable()
1226 if (!(bb->len & 1)) in write_pgtable()
1227 bb->cs[bb->len++] = MI_NOOP; in write_pgtable()
1229 bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); in write_pgtable()
1230 bb->cs[bb->len++] = lower_32_bits(addr); in write_pgtable()
1231 bb->cs[bb->len++] = upper_32_bits(addr); in write_pgtable()
1232 if (pt_op->bind) in write_pgtable()
1233 ops->populate(pt_update, tile, NULL, bb->cs + bb->len, in write_pgtable()
1236 ops->clear(pt_update, tile, NULL, bb->cs + bb->len, in write_pgtable()
1239 bb->len += chunk * 2; in write_pgtable()
1241 size -= chunk; in write_pgtable()
1247 return xe_vm_get(m->q->vm); in xe_migrate_get_vm()
1267 const struct xe_migrate_pt_update_ops *ops = pt_update->ops; in xe_migrate_update_pgtables_cpu()
1268 struct xe_vm *vm = pt_update->vops->vm; in xe_migrate_update_pgtables_cpu()
1270 &pt_update->vops->pt_update_ops[pt_update->tile_id]; in xe_migrate_update_pgtables_cpu()
1274 if (XE_TEST_ONLY(test && test->force_gpu)) in xe_migrate_update_pgtables_cpu()
1275 return ERR_PTR(-ETIME); in xe_migrate_update_pgtables_cpu()
1277 if (ops->pre_commit) { in xe_migrate_update_pgtables_cpu()
1278 pt_update->job = NULL; in xe_migrate_update_pgtables_cpu()
1279 err = ops->pre_commit(pt_update); in xe_migrate_update_pgtables_cpu()
1284 for (i = 0; i < pt_update_ops->num_ops; ++i) { in xe_migrate_update_pgtables_cpu()
1286 &pt_update_ops->ops[i]; in xe_migrate_update_pgtables_cpu()
1288 for (j = 0; j < pt_op->num_entries; j++) { in xe_migrate_update_pgtables_cpu()
1290 &pt_op->entries[j]; in xe_migrate_update_pgtables_cpu()
1292 if (pt_op->bind) in xe_migrate_update_pgtables_cpu()
1293 ops->populate(pt_update, m->tile, in xe_migrate_update_pgtables_cpu()
1294 &update->pt_bo->vmap, NULL, in xe_migrate_update_pgtables_cpu()
1295 update->ofs, update->qwords, in xe_migrate_update_pgtables_cpu()
1298 ops->clear(pt_update, m->tile, in xe_migrate_update_pgtables_cpu()
1299 &update->pt_bo->vmap, NULL, in xe_migrate_update_pgtables_cpu()
1300 update->ofs, update->qwords, update); in xe_migrate_update_pgtables_cpu()
1305 xe_device_wmb(vm->xe); in xe_migrate_update_pgtables_cpu()
1315 const struct xe_migrate_pt_update_ops *ops = pt_update->ops; in __xe_migrate_update_pgtables()
1316 struct xe_tile *tile = m->tile; in __xe_migrate_update_pgtables()
1317 struct xe_gt *gt = tile->primary_gt; in __xe_migrate_update_pgtables()
1327 bool is_migrate = pt_update_ops->q == m->q; in __xe_migrate_update_pgtables()
1328 bool usm = is_migrate && xe->info.has_usm; in __xe_migrate_update_pgtables()
1330 for (i = 0; i < pt_update_ops->num_ops; ++i) { in __xe_migrate_update_pgtables()
1331 struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; in __xe_migrate_update_pgtables()
1332 struct xe_vm_pgtable_update *updates = pt_op->entries; in __xe_migrate_update_pgtables()
1334 num_updates += pt_op->num_entries; in __xe_migrate_update_pgtables()
1335 for (j = 0; j < pt_op->num_entries; ++j) { in __xe_migrate_update_pgtables()
1357 u16 pat_index = xe->pat.idx[XE_CACHE_WB]; in __xe_migrate_update_pgtables()
1360 ppgtt_ofs = NUM_KERNEL_PDE - 1; in __xe_migrate_update_pgtables()
1365 if (num_units > m->vm_update_sa.size) { in __xe_migrate_update_pgtables()
1366 err = -ENOBUFS; in __xe_migrate_update_pgtables()
1369 sa_bo = drm_suballoc_new(&m->vm_update_sa, num_units, in __xe_migrate_update_pgtables()
1393 bb->cs[bb->len++] = MI_STORE_DATA_IMM | in __xe_migrate_update_pgtables()
1395 bb->cs[bb->len++] = ofs; in __xe_migrate_update_pgtables()
1396 bb->cs[bb->len++] = 0; /* upper_32_bits */ in __xe_migrate_update_pgtables()
1398 for (; i < pt_update_ops->num_ops; ++i) { in __xe_migrate_update_pgtables()
1400 &pt_update_ops->ops[i]; in __xe_migrate_update_pgtables()
1401 struct xe_vm_pgtable_update *updates = pt_op->entries; in __xe_migrate_update_pgtables()
1403 for (; j < pt_op->num_entries; ++j, ++current_update, ++idx) { in __xe_migrate_update_pgtables()
1404 struct xe_vm *vm = pt_update->vops->vm; in __xe_migrate_update_pgtables()
1413 if (pt_bo->update_index < 0) in __xe_migrate_update_pgtables()
1414 pt_bo->update_index = current_update; in __xe_migrate_update_pgtables()
1416 addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, in __xe_migrate_update_pgtables()
1418 bb->cs[bb->len++] = lower_32_bits(addr); in __xe_migrate_update_pgtables()
1419 bb->cs[bb->len++] = upper_32_bits(addr); in __xe_migrate_update_pgtables()
1426 ptes -= chunk; in __xe_migrate_update_pgtables()
1430 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; in __xe_migrate_update_pgtables()
1431 update_idx = bb->len; in __xe_migrate_update_pgtables()
1435 for (i = 0; i < pt_update_ops->num_ops; ++i) { in __xe_migrate_update_pgtables()
1437 &pt_update_ops->ops[i]; in __xe_migrate_update_pgtables()
1438 struct xe_vm_pgtable_update *updates = pt_op->entries; in __xe_migrate_update_pgtables()
1440 for (j = 0; j < pt_op->num_entries; ++j) { in __xe_migrate_update_pgtables()
1444 pt_bo->update_index * XE_PAGE_SIZE, in __xe_migrate_update_pgtables()
1450 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; in __xe_migrate_update_pgtables()
1451 update_idx = bb->len; in __xe_migrate_update_pgtables()
1453 for (i = 0; i < pt_update_ops->num_ops; ++i) { in __xe_migrate_update_pgtables()
1455 &pt_update_ops->ops[i]; in __xe_migrate_update_pgtables()
1456 struct xe_vm_pgtable_update *updates = pt_op->entries; in __xe_migrate_update_pgtables()
1458 for (j = 0; j < pt_op->num_entries; ++j) in __xe_migrate_update_pgtables()
1464 job = xe_bb_create_migration_job(pt_update_ops->q, bb, in __xe_migrate_update_pgtables()
1472 if (ops->pre_commit) { in __xe_migrate_update_pgtables()
1473 pt_update->job = job; in __xe_migrate_update_pgtables()
1474 err = ops->pre_commit(pt_update); in __xe_migrate_update_pgtables()
1479 mutex_lock(&m->job_mutex); in __xe_migrate_update_pgtables()
1482 fence = dma_fence_get(&job->drm.s_fence->finished); in __xe_migrate_update_pgtables()
1486 mutex_unlock(&m->job_mutex); in __xe_migrate_update_pgtables()
1503 * xe_migrate_update_pgtables() - Pipelined page-table update
1507 * Perform a pipelined page-table update. The update descriptors are typically
1509 * using the default engine for the updates, they will be performed in the
1511 * synchronization is needed for overlapping updates to maintain page-table
1513 * touch the same page-table, which might be a higher-level page-directory.
1524 &pt_update->vops->pt_update_ops[pt_update->tile_id]; in xe_migrate_update_pgtables()
1529 /* -ETIME indicates a job is needed, anything else is legit error */ in xe_migrate_update_pgtables()
1530 if (!IS_ERR(fence) || PTR_ERR(fence) != -ETIME) in xe_migrate_update_pgtables()
1537 * xe_migrate_wait() - Complete all operations using the xe_migrate context
1540 * Waits until the GPU no longer uses the migrate context's default engine
1541 * or its page-table objects. FIXME: What about separate page-table update
1546 if (m->fence) in xe_migrate_wait()
1547 dma_fence_wait(m->fence, false); in xe_migrate_wait()
1562 * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) in pte_update_cmd_size()
1563 * - 2 dword for the page table's physical location in pte_update_cmd_size()
1564 * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) in pte_update_cmd_size()
1576 u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; in build_pt_update_batch_sram()
1584 bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); in build_pt_update_batch_sram()
1585 bb->cs[bb->len++] = pt_offset; in build_pt_update_batch_sram()
1586 bb->cs[bb->len++] = 0; in build_pt_update_batch_sram()
1589 ptes -= chunk; in build_pt_update_batch_sram()
1591 while (chunk--) { in build_pt_update_batch_sram()
1594 xe_tile_assert(m->tile, addr); in build_pt_update_batch_sram()
1595 addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, in build_pt_update_batch_sram()
1598 bb->cs[bb->len++] = lower_32_bits(addr); in build_pt_update_batch_sram()
1599 bb->cs[bb->len++] = upper_32_bits(addr); in build_pt_update_batch_sram()
1610 #define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1)
1618 struct xe_gt *gt = m->tile->primary_gt; in xe_migrate_vram()
1620 bool use_usm_batch = xe->info.has_usm; in xe_migrate_vram()
1632 if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || in xe_migrate_vram()
1634 return ERR_PTR(-EOPNOTSUPP); in xe_migrate_vram()
1659 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; in xe_migrate_vram()
1660 update_idx = bb->len; in xe_migrate_vram()
1664 job = xe_bb_create_migration_job(m->q, bb, in xe_migrate_vram()
1674 mutex_lock(&m->job_mutex); in xe_migrate_vram()
1676 fence = dma_fence_get(&job->drm.s_fence->finished); in xe_migrate_vram()
1679 dma_fence_put(m->fence); in xe_migrate_vram()
1680 m->fence = dma_fence_get(fence); in xe_migrate_vram()
1681 mutex_unlock(&m->job_mutex); in xe_migrate_vram()
1694 * xe_migrate_to_vram() - Migrate to VRAM
1715 * xe_migrate_from_vram() - Migrate from VRAM
1744 dma_unmap_page(xe->drm.dev, dma_addr[i], PAGE_SIZE, in xe_migrate_dma_unmap()
1758 return ERR_PTR(-ENOMEM); in xe_migrate_dma_map()
1769 addr = dma_map_page(xe->drm.dev, in xe_migrate_dma_map()
1773 if (dma_mapping_error(xe->drm.dev, addr)) in xe_migrate_dma_map()
1784 return ERR_PTR(-EFAULT); in xe_migrate_dma_map()
1788 * xe_migrate_access_memory - Access memory of a BO via GPU
1808 struct xe_tile *tile = m->tile; in xe_migrate_access_memory()
1829 return -ENOMEM; in xe_migrate_access_memory()
1837 XE_CACHELINE_BYTES - in xe_migrate_access_memory()
1863 bytes_left -= copy_bytes; in xe_migrate_access_memory()
1876 xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); in xe_migrate_access_memory()
1880 u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) + in xe_migrate_access_memory()
1916 current_page = (int)(buf - orig_buf) / PAGE_SIZE; in xe_migrate_access_memory()
1917 bytes_left -= current_bytes; in xe_migrate_access_memory()