xref: /linux/drivers/gpu/drm/xe/xe_ring_ops.c (revision 08516de501fae647fb29bf3b62718de56cc24014)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_ring_ops.h"
7 
8 #include "regs/xe_gpu_commands.h"
9 #include "regs/xe_gt_regs.h"
10 #include "regs/xe_lrc_layout.h"
11 #include "regs/xe_regs.h"
12 #include "xe_engine_types.h"
13 #include "xe_gt.h"
14 #include "xe_lrc.h"
15 #include "xe_macros.h"
16 #include "xe_sched_job.h"
17 #include "xe_vm_types.h"
18 
19 /*
20  * 3D-related flags that can't be set on _engines_ that lack access to the 3D
21  * pipeline (i.e., CCS engines).
22  */
23 #define PIPE_CONTROL_3D_ENGINE_FLAGS (\
24 		PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
25 		PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
26 		PIPE_CONTROL_TILE_CACHE_FLUSH | \
27 		PIPE_CONTROL_DEPTH_STALL | \
28 		PIPE_CONTROL_STALL_AT_SCOREBOARD | \
29 		PIPE_CONTROL_PSD_SYNC | \
30 		PIPE_CONTROL_AMFS_FLUSH | \
31 		PIPE_CONTROL_VF_CACHE_INVALIDATE | \
32 		PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
33 
34 /* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
35 #define PIPE_CONTROL_3D_ARCH_FLAGS ( \
36 		PIPE_CONTROL_3D_ENGINE_FLAGS | \
37 		PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
38 		PIPE_CONTROL_FLUSH_ENABLE | \
39 		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
40 		PIPE_CONTROL_DC_FLUSH_ENABLE)
41 
42 static u32 preparser_disable(bool state)
43 {
44 	return MI_ARB_CHECK | BIT(8) | state;
45 }
46 
47 static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
48 			      u32 *dw, int i)
49 {
50 	dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
51 	dw[i++] = reg.addr + gt->mmio.adj_offset;
52 	dw[i++] = AUX_INV;
53 	dw[i++] = MI_NOOP;
54 
55 	return i;
56 }
57 
58 static int emit_user_interrupt(u32 *dw, int i)
59 {
60 	dw[i++] = MI_USER_INTERRUPT;
61 	dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
62 	dw[i++] = MI_ARB_CHECK;
63 
64 	return i;
65 }
66 
67 static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
68 {
69 	dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
70 	dw[i++] = addr;
71 	dw[i++] = 0;
72 	dw[i++] = value;
73 
74 	return i;
75 }
76 
77 static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
78 {
79 	dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
80 	dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
81 	dw[i++] = 0;
82 	dw[i++] = value;
83 
84 	return i;
85 }
86 
87 static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
88 {
89 	dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag;
90 	dw[i++] = lower_32_bits(batch_addr);
91 	dw[i++] = upper_32_bits(batch_addr);
92 
93 	return i;
94 }
95 
96 static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
97 {
98 	dw[i] = MI_FLUSH_DW + 1;
99 	dw[i] |= flag;
100 	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
101 		MI_FLUSH_DW_STORE_INDEX;
102 
103 	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
104 	dw[i++] = 0;
105 	dw[i++] = ~0U;
106 
107 	return i;
108 }
109 
110 static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
111 {
112 	u32 flags = PIPE_CONTROL_CS_STALL |
113 		PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
114 		PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
115 		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
116 		PIPE_CONTROL_VF_CACHE_INVALIDATE |
117 		PIPE_CONTROL_CONST_CACHE_INVALIDATE |
118 		PIPE_CONTROL_STATE_CACHE_INVALIDATE |
119 		PIPE_CONTROL_QW_WRITE |
120 		PIPE_CONTROL_STORE_DATA_INDEX;
121 
122 	flags &= ~mask_flags;
123 
124 	dw[i++] = GFX_OP_PIPE_CONTROL(6);
125 	dw[i++] = flags;
126 	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
127 	dw[i++] = 0;
128 	dw[i++] = 0;
129 	dw[i++] = 0;
130 
131 	return i;
132 }
133 
134 #define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21))
135 
136 static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
137 				       u32 *dw, int i)
138 {
139 	dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED;
140 	dw[i++] = lower_32_bits(addr);
141 	dw[i++] = upper_32_bits(addr);
142 	dw[i++] = lower_32_bits(value);
143 	dw[i++] = upper_32_bits(value);
144 
145 	return i;
146 }
147 
148 static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
149 			      int i)
150 {
151 	dw[i++] = GFX_OP_PIPE_CONTROL(6);
152 	dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
153 		   PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
154 		PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
155 	dw[i++] = addr;
156 	dw[i++] = 0;
157 	dw[i++] = value;
158 	dw[i++] = 0; /* We're thrashing one extra dword. */
159 
160 	return i;
161 }
162 
163 static u32 get_ppgtt_flag(struct xe_sched_job *job)
164 {
165 	return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0;
166 }
167 
168 static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
169 				  u64 batch_addr, u32 seqno)
170 {
171 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
172 	u32 ppgtt_flag = get_ppgtt_flag(job);
173 
174 	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
175 				seqno, dw, i);
176 
177 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
178 
179 	if (job->user_fence.used)
180 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
181 						job->user_fence.value,
182 						dw, i);
183 
184 	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
185 
186 	i = emit_user_interrupt(dw, i);
187 
188 	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
189 
190 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
191 }
192 
193 static bool has_aux_ccs(struct xe_device *xe)
194 {
195 	/*
196 	 * PVC is a special case that has no compression of either type
197 	 * (FlatCCS or AuxCCS).
198 	 */
199 	if (xe->info.platform == XE_PVC)
200 		return false;
201 
202 	return !xe->info.has_flat_ccs;
203 }
204 
205 static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
206 				   u64 batch_addr, u32 seqno)
207 {
208 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
209 	u32 ppgtt_flag = get_ppgtt_flag(job);
210 	struct xe_gt *gt = job->engine->gt;
211 	struct xe_device *xe = gt_to_xe(gt);
212 	bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
213 
214 	dw[i++] = preparser_disable(true);
215 
216 	/* hsdes: 1809175790 */
217 	if (has_aux_ccs(xe)) {
218 		if (decode)
219 			i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i);
220 		else
221 			i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
222 	}
223 	dw[i++] = preparser_disable(false);
224 
225 	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
226 				seqno, dw, i);
227 
228 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
229 
230 	if (job->user_fence.used)
231 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
232 						job->user_fence.value,
233 						dw, i);
234 
235 	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
236 
237 	i = emit_user_interrupt(dw, i);
238 
239 	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
240 
241 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
242 }
243 
244 static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
245 					    struct xe_lrc *lrc,
246 					    u64 batch_addr, u32 seqno)
247 {
248 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
249 	u32 ppgtt_flag = get_ppgtt_flag(job);
250 	struct xe_gt *gt = job->engine->gt;
251 	struct xe_device *xe = gt_to_xe(gt);
252 	bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
253 	u32 mask_flags = 0;
254 
255 	dw[i++] = preparser_disable(true);
256 	if (lacks_render)
257 		mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
258 	else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
259 		mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
260 	i = emit_pipe_invalidate(mask_flags, dw, i);
261 
262 	/* hsdes: 1809175790 */
263 	if (has_aux_ccs(xe))
264 		i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i);
265 
266 	dw[i++] = preparser_disable(false);
267 
268 	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
269 				seqno, dw, i);
270 
271 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
272 
273 	if (job->user_fence.used)
274 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
275 						job->user_fence.value,
276 						dw, i);
277 
278 	i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
279 
280 	i = emit_user_interrupt(dw, i);
281 
282 	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
283 
284 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
285 }
286 
287 static void emit_migration_job_gen12(struct xe_sched_job *job,
288 				     struct xe_lrc *lrc, u32 seqno)
289 {
290 	u32 dw[MAX_JOB_SIZE_DW], i = 0;
291 
292 	i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
293 				seqno, dw, i);
294 
295 	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
296 
297 	/* XXX: Do we need this? Leaving for now. */
298 	dw[i++] = preparser_disable(true);
299 	i = emit_flush_invalidate(0, dw, i);
300 	dw[i++] = preparser_disable(false);
301 
302 	i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
303 
304 	dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
305 		   MI_FLUSH_DW_OP_STOREDW) + 1;
306 	dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
307 	dw[i++] = 0;
308 	dw[i++] = seqno; /* value */
309 
310 	i = emit_user_interrupt(dw, i);
311 
312 	XE_BUG_ON(i > MAX_JOB_SIZE_DW);
313 
314 	xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
315 }
316 
317 static void emit_job_gen12_copy(struct xe_sched_job *job)
318 {
319 	int i;
320 
321 	if (xe_sched_job_is_migration(job->engine)) {
322 		emit_migration_job_gen12(job, job->engine->lrc,
323 					 xe_sched_job_seqno(job));
324 		return;
325 	}
326 
327 	for (i = 0; i < job->engine->width; ++i)
328 		__emit_job_gen12_copy(job, job->engine->lrc + i,
329 				      job->batch_addr[i],
330 				      xe_sched_job_seqno(job));
331 }
332 
333 static void emit_job_gen12_video(struct xe_sched_job *job)
334 {
335 	int i;
336 
337 	/* FIXME: Not doing parallel handshake for now */
338 	for (i = 0; i < job->engine->width; ++i)
339 		__emit_job_gen12_video(job, job->engine->lrc + i,
340 				       job->batch_addr[i],
341 				       xe_sched_job_seqno(job));
342 }
343 
344 static void emit_job_gen12_render_compute(struct xe_sched_job *job)
345 {
346 	int i;
347 
348 	for (i = 0; i < job->engine->width; ++i)
349 		__emit_job_gen12_render_compute(job, job->engine->lrc + i,
350 						job->batch_addr[i],
351 						xe_sched_job_seqno(job));
352 }
353 
354 static const struct xe_ring_ops ring_ops_gen12_copy = {
355 	.emit_job = emit_job_gen12_copy,
356 };
357 
358 static const struct xe_ring_ops ring_ops_gen12_video = {
359 	.emit_job = emit_job_gen12_video,
360 };
361 
362 static const struct xe_ring_ops ring_ops_gen12_render_compute = {
363 	.emit_job = emit_job_gen12_render_compute,
364 };
365 
366 const struct xe_ring_ops *
367 xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
368 {
369 	switch (class) {
370 	case XE_ENGINE_CLASS_COPY:
371 		return &ring_ops_gen12_copy;
372 	case XE_ENGINE_CLASS_VIDEO_DECODE:
373 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
374 		return &ring_ops_gen12_video;
375 	case XE_ENGINE_CLASS_RENDER:
376 	case XE_ENGINE_CLASS_COMPUTE:
377 		return &ring_ops_gen12_render_compute;
378 	default:
379 		return NULL;
380 	}
381 }
382