1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_bb.h"
7
8 #include "instructions/xe_mi_commands.h"
9 #include "xe_assert.h"
10 #include "xe_device_types.h"
11 #include "xe_exec_queue_types.h"
12 #include "xe_gt.h"
13 #include "xe_sa.h"
14 #include "xe_sched_job.h"
15 #include "xe_vm_types.h"
16
bb_prefetch(struct xe_gt * gt)17 static int bb_prefetch(struct xe_gt *gt)
18 {
19 struct xe_device *xe = gt_to_xe(gt);
20
21 if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt))
22 /*
23 * RCS and CCS require 1K, although other engines would be
24 * okay with 512.
25 */
26 return SZ_1K;
27 else
28 return SZ_512;
29 }
30
xe_bb_new(struct xe_gt * gt,u32 dwords,bool usm)31 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
32 {
33 struct xe_tile *tile = gt_to_tile(gt);
34 struct xe_bb *bb = kmalloc_obj(*bb);
35 int err;
36
37 if (!bb)
38 return ERR_PTR(-ENOMEM);
39
40 /*
41 * We need to allocate space for the requested number of dwords,
42 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
43 * space to accommodate the platform-specific hardware prefetch
44 * requirements.
45 */
46 bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
47 4 * (dwords + 1) + bb_prefetch(gt));
48 if (IS_ERR(bb->bo)) {
49 err = PTR_ERR(bb->bo);
50 goto err;
51 }
52
53 bb->cs = xe_sa_bo_cpu_addr(bb->bo);
54 bb->len = 0;
55
56 return bb;
57 err:
58 kfree(bb);
59 return ERR_PTR(err);
60 }
61
xe_bb_ccs_new(struct xe_gt * gt,u32 dwords,enum xe_sriov_vf_ccs_rw_ctxs ctx_id)62 struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
63 enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
64 {
65 struct xe_bb *bb = kmalloc_obj(*bb);
66 struct xe_device *xe = gt_to_xe(gt);
67 struct xe_sa_manager *bb_pool;
68 int err;
69
70 if (!bb)
71 return ERR_PTR(-ENOMEM);
72 /*
73 * We need to allocate space for the requested number of dwords &
74 * one additional MI_BATCH_BUFFER_END dword. Since the whole SA
75 * is submitted to HW, we need to make sure that the last instruction
76 * is not over written when the last chunk of SA is allocated for BB.
77 * So, this extra DW acts as a guard here.
78 */
79
80 bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
81 bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1));
82
83 if (IS_ERR(bb->bo)) {
84 err = PTR_ERR(bb->bo);
85 goto err;
86 }
87
88 bb->cs = xe_sa_bo_cpu_addr(bb->bo);
89 bb->len = 0;
90
91 return bb;
92 err:
93 kfree(bb);
94 return ERR_PTR(err);
95 }
96
97 static struct xe_sched_job *
__xe_bb_create_job(struct xe_exec_queue * q,struct xe_bb * bb,u64 * addr)98 __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
99 {
100 u32 size = drm_suballoc_size(bb->bo);
101
102 if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END)
103 bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
104
105 xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
106
107 xe_sa_bo_flush_write(bb->bo);
108
109 return xe_sched_job_create(q, addr);
110 }
111
xe_bb_create_migration_job(struct xe_exec_queue * q,struct xe_bb * bb,u64 batch_base_ofs,u32 second_idx)112 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
113 struct xe_bb *bb,
114 u64 batch_base_ofs,
115 u32 second_idx)
116 {
117 u64 addr[2] = {
118 batch_base_ofs + drm_suballoc_soffset(bb->bo),
119 batch_base_ofs + drm_suballoc_soffset(bb->bo) +
120 4 * second_idx,
121 };
122
123 xe_gt_assert(q->gt, second_idx <= bb->len);
124 xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
125 xe_gt_assert(q->gt, q->width == 1);
126
127 return __xe_bb_create_job(q, bb, addr);
128 }
129
xe_bb_create_job(struct xe_exec_queue * q,struct xe_bb * bb)130 struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
131 struct xe_bb *bb)
132 {
133 u64 addr = xe_sa_bo_gpu_addr(bb->bo);
134
135 xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
136 xe_gt_assert(q->gt, q->width == 1);
137 return __xe_bb_create_job(q, bb, &addr);
138 }
139
xe_bb_free(struct xe_bb * bb,struct dma_fence * fence)140 void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
141 {
142 if (!bb)
143 return;
144
145 xe_sa_bo_free(bb->bo, fence);
146 kfree(bb);
147 }
148