1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_bb.h" 7 8 #include "instructions/xe_mi_commands.h" 9 #include "xe_assert.h" 10 #include "xe_device.h" 11 #include "xe_exec_queue_types.h" 12 #include "xe_gt.h" 13 #include "xe_hw_fence.h" 14 #include "xe_sa.h" 15 #include "xe_sched_job.h" 16 #include "xe_vm_types.h" 17 18 static int bb_prefetch(struct xe_gt *gt) 19 { 20 struct xe_device *xe = gt_to_xe(gt); 21 22 if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) 23 /* 24 * RCS and CCS require 1K, although other engines would be 25 * okay with 512. 26 */ 27 return SZ_1K; 28 else 29 return SZ_512; 30 } 31 32 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) 33 { 34 struct xe_tile *tile = gt_to_tile(gt); 35 struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); 36 int err; 37 38 if (!bb) 39 return ERR_PTR(-ENOMEM); 40 41 /* 42 * We need to allocate space for the requested number of dwords, 43 * one additional MI_BATCH_BUFFER_END dword, and additional buffer 44 * space to accomodate the platform-specific hardware prefetch 45 * requirements. 46 */ 47 bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool, 48 4 * (dwords + 1) + bb_prefetch(gt)); 49 if (IS_ERR(bb->bo)) { 50 err = PTR_ERR(bb->bo); 51 goto err; 52 } 53 54 bb->cs = xe_sa_bo_cpu_addr(bb->bo); 55 bb->len = 0; 56 57 return bb; 58 err: 59 kfree(bb); 60 return ERR_PTR(err); 61 } 62 63 static struct xe_sched_job * 64 __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) 65 { 66 u32 size = drm_suballoc_size(bb->bo); 67 68 if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END) 69 bb->cs[bb->len++] = MI_BATCH_BUFFER_END; 70 71 xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); 72 73 xe_sa_bo_flush_write(bb->bo); 74 75 return xe_sched_job_create(q, addr); 76 } 77 78 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, 79 struct xe_bb *bb, 80 u64 batch_base_ofs, 81 u32 second_idx) 82 { 83 u64 addr[2] = { 84 batch_base_ofs + drm_suballoc_soffset(bb->bo), 85 batch_base_ofs + drm_suballoc_soffset(bb->bo) + 86 4 * second_idx, 87 }; 88 89 xe_gt_assert(q->gt, second_idx <= bb->len); 90 xe_gt_assert(q->gt, xe_sched_job_is_migration(q)); 91 xe_gt_assert(q->gt, q->width == 1); 92 93 return __xe_bb_create_job(q, bb, addr); 94 } 95 96 struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, 97 struct xe_bb *bb) 98 { 99 u64 addr = xe_sa_bo_gpu_addr(bb->bo); 100 101 xe_gt_assert(q->gt, !xe_sched_job_is_migration(q)); 102 xe_gt_assert(q->gt, q->width == 1); 103 return __xe_bb_create_job(q, bb, &addr); 104 } 105 106 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence) 107 { 108 if (!bb) 109 return; 110 111 xe_sa_bo_free(bb->bo, fence); 112 kfree(bb); 113 } 114