xref: /linux/drivers/gpu/drm/i915/display/intel_dsb.c (revision 07fdad3a93756b872da7b53647715c48d0f4a2d0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  *
5  */
6 
7 #include <linux/iopoll.h>
8 
9 #include <drm/drm_print.h>
10 #include <drm/drm_vblank.h>
11 
12 #include "intel_crtc.h"
13 #include "intel_de.h"
14 #include "intel_display_regs.h"
15 #include "intel_display_rpm.h"
16 #include "intel_display_types.h"
17 #include "intel_dsb.h"
18 #include "intel_dsb_buffer.h"
19 #include "intel_dsb_regs.h"
20 #include "intel_vblank.h"
21 #include "intel_vrr.h"
22 #include "skl_watermark.h"
23 
24 #define CACHELINE_BYTES 64
25 
26 struct intel_dsb {
27 	enum intel_dsb_id id;
28 
29 	struct intel_dsb_buffer dsb_buf;
30 	struct intel_crtc *crtc;
31 
32 	/*
33 	 * maximum number of dwords the buffer will hold.
34 	 */
35 	unsigned int size;
36 
37 	/*
38 	 * free_pos will point the first free dword and
39 	 * help in calculating tail of command buffer.
40 	 */
41 	unsigned int free_pos;
42 
43 	/*
44 	 * Previously emitted DSB instruction. Used to
45 	 * identify/adjust the instruction for indexed
46 	 * register writes.
47 	 */
48 	u32 ins[2];
49 
50 	/*
51 	 * Start of the previously emitted DSB instruction.
52 	 * Used to adjust the instruction for indexed
53 	 * register writes.
54 	 */
55 	unsigned int ins_start_offset;
56 
57 	u32 chicken;
58 	int hw_dewake_scanline;
59 };
60 
61 /**
62  * DOC: DSB
63  *
64  * A DSB (Display State Buffer) is a queue of MMIO instructions in the memory
65  * which can be offloaded to DSB HW in Display Controller. DSB HW is a DMA
66  * engine that can be programmed to download the DSB from memory.
67  * It allows driver to batch submit display HW programming. This helps to
68  * reduce loading time and CPU activity, thereby making the context switch
69  * faster. DSB Support added from Gen12 Intel graphics based platform.
70  *
71  * DSB's can access only the pipe, plane, and transcoder Data Island Packet
72  * registers.
73  *
74  * DSB HW can support only register writes (both indexed and direct MMIO
75  * writes). There are no registers reads possible with DSB HW engine.
76  */
77 
78 /* DSB opcodes. */
79 #define DSB_OPCODE_SHIFT		24
80 #define DSB_OPCODE_NOOP			0x0
81 #define DSB_OPCODE_MMIO_WRITE		0x1
82 #define   DSB_BYTE_EN			0xf
83 #define   DSB_BYTE_EN_SHIFT		20
84 #define   DSB_REG_VALUE_MASK		0xfffff
85 #define DSB_OPCODE_WAIT_USEC		0x2
86 #define DSB_OPCODE_WAIT_SCANLINE	0x3
87 #define DSB_OPCODE_WAIT_VBLANKS		0x4
88 #define DSB_OPCODE_WAIT_DSL_IN		0x5
89 #define DSB_OPCODE_WAIT_DSL_OUT		0x6
90 #define   DSB_SCANLINE_UPPER_SHIFT	20
91 #define   DSB_SCANLINE_LOWER_SHIFT	0
92 #define DSB_OPCODE_INTERRUPT		0x7
93 #define DSB_OPCODE_INDEXED_WRITE	0x9
94 /* see DSB_REG_VALUE_MASK */
95 #define DSB_OPCODE_POLL			0xA
96 /* see DSB_REG_VALUE_MASK */
97 #define DSB_OPCODE_GOSUB		0xC /* ptl+ */
98 #define   DSB_GOSUB_HEAD_SHIFT		26
99 #define   DSB_GOSUB_TAIL_SHIFT		0
100 #define   DSB_GOSUB_CONVERT_ADDR(x)	((x) >> 6)
101 
102 static bool pre_commit_is_vrr_active(struct intel_atomic_state *state,
103 				     struct intel_crtc *crtc)
104 {
105 	const struct intel_crtc_state *old_crtc_state =
106 		intel_atomic_get_old_crtc_state(state, crtc);
107 	const struct intel_crtc_state *new_crtc_state =
108 		intel_atomic_get_new_crtc_state(state, crtc);
109 
110 	/* VRR will be enabled afterwards, if necessary */
111 	if (intel_crtc_needs_modeset(new_crtc_state))
112 		return false;
113 
114 	/* VRR will have been disabled during intel_pre_plane_update() */
115 	return old_crtc_state->vrr.enable && !intel_crtc_vrr_disabling(state, crtc);
116 }
117 
118 static int dsb_vblank_delay(struct intel_atomic_state *state,
119 			    struct intel_crtc *crtc)
120 {
121 	const struct intel_crtc_state *crtc_state =
122 		intel_pre_commit_crtc_state(state, crtc);
123 
124 	if (pre_commit_is_vrr_active(state, crtc))
125 		/*
126 		 * When the push is sent during vblank it will trigger
127 		 * on the next scanline, hence we have up to one extra
128 		 * scanline until the delayed vblank occurs after
129 		 * TRANS_PUSH has been written.
130 		 */
131 		return intel_vrr_vblank_delay(crtc_state) + 1;
132 	else
133 		return intel_mode_vblank_delay(&crtc_state->hw.adjusted_mode);
134 }
135 
136 static int dsb_vtotal(struct intel_atomic_state *state,
137 		      struct intel_crtc *crtc)
138 {
139 	const struct intel_crtc_state *crtc_state =
140 		intel_pre_commit_crtc_state(state, crtc);
141 
142 	if (pre_commit_is_vrr_active(state, crtc))
143 		return intel_vrr_vmax_vtotal(crtc_state);
144 	else
145 		return intel_mode_vtotal(&crtc_state->hw.adjusted_mode);
146 }
147 
148 static int dsb_dewake_scanline_start(struct intel_atomic_state *state,
149 				     struct intel_crtc *crtc)
150 {
151 	struct intel_display *display = to_intel_display(state);
152 	const struct intel_crtc_state *crtc_state =
153 		intel_pre_commit_crtc_state(state, crtc);
154 	unsigned int latency = skl_watermark_max_latency(display, 0);
155 
156 	return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode) -
157 		intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, latency);
158 }
159 
160 static int dsb_dewake_scanline_end(struct intel_atomic_state *state,
161 				   struct intel_crtc *crtc)
162 {
163 	const struct intel_crtc_state *crtc_state =
164 		intel_pre_commit_crtc_state(state, crtc);
165 
166 	return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode);
167 }
168 
169 static int dsb_scanline_to_hw(struct intel_atomic_state *state,
170 			      struct intel_crtc *crtc, int scanline)
171 {
172 	const struct intel_crtc_state *crtc_state =
173 		intel_pre_commit_crtc_state(state, crtc);
174 	int vtotal = dsb_vtotal(state, crtc);
175 
176 	return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal;
177 }
178 
179 /*
180  * Bspec suggests that we should always set DSB_SKIP_WAITS_EN. We have approach
181  * different from what is explained in Bspec on how flip is considered being
182  * complete. We are waiting for vblank in DSB and generate interrupt when it
183  * happens and this interrupt is considered as indication of completion -> we
184  * definitely do not want to skip vblank wait. We also have concern what comes
185  * to skipping vblank evasion. I.e. arming registers are latched before we have
186  * managed writing them. Due to these reasons we are not setting
187  * DSB_SKIP_WAITS_EN.
188  */
189 static u32 dsb_chicken(struct intel_atomic_state *state,
190 		       struct intel_crtc *crtc)
191 {
192 	if (pre_commit_is_vrr_active(state, crtc))
193 		return DSB_CTRL_WAIT_SAFE_WINDOW |
194 			DSB_CTRL_NO_WAIT_VBLANK |
195 			DSB_INST_WAIT_SAFE_WINDOW |
196 			DSB_INST_NO_WAIT_VBLANK;
197 	else
198 		return 0;
199 }
200 
201 static bool assert_dsb_has_room(struct intel_dsb *dsb)
202 {
203 	struct intel_crtc *crtc = dsb->crtc;
204 	struct intel_display *display = to_intel_display(crtc->base.dev);
205 
206 	/* each instruction is 2 dwords */
207 	return !drm_WARN(display->drm, dsb->free_pos > dsb->size - 2,
208 			 "[CRTC:%d:%s] DSB %d buffer overflow\n",
209 			 crtc->base.base.id, crtc->base.name, dsb->id);
210 }
211 
212 static bool assert_dsb_tail_is_aligned(struct intel_dsb *dsb)
213 {
214 	struct intel_crtc *crtc = dsb->crtc;
215 	struct intel_display *display = to_intel_display(crtc->base.dev);
216 
217 	return !drm_WARN_ON(display->drm,
218 			    !IS_ALIGNED(dsb->free_pos * 4, CACHELINE_BYTES));
219 }
220 
221 static void intel_dsb_dump(struct intel_dsb *dsb)
222 {
223 	struct intel_crtc *crtc = dsb->crtc;
224 	struct intel_display *display = to_intel_display(crtc->base.dev);
225 	int i;
226 
227 	drm_dbg_kms(display->drm, "[CRTC:%d:%s] DSB %d commands {\n",
228 		    crtc->base.base.id, crtc->base.name, dsb->id);
229 	for (i = 0; i < ALIGN(dsb->free_pos, 64 / 4); i += 4)
230 		drm_dbg_kms(display->drm,
231 			    " 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i * 4,
232 			    intel_dsb_buffer_read(&dsb->dsb_buf, i),
233 			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 1),
234 			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 2),
235 			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 3));
236 	drm_dbg_kms(display->drm, "}\n");
237 }
238 
239 static bool is_dsb_busy(struct intel_display *display, enum pipe pipe,
240 			enum intel_dsb_id dsb_id)
241 {
242 	return intel_de_read_fw(display, DSB_CTRL(pipe, dsb_id)) & DSB_STATUS_BUSY;
243 }
244 
245 unsigned int intel_dsb_size(struct intel_dsb *dsb)
246 {
247 	return dsb->free_pos * 4;
248 }
249 
250 unsigned int intel_dsb_head(struct intel_dsb *dsb)
251 {
252 	return intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf);
253 }
254 
255 static unsigned int intel_dsb_tail(struct intel_dsb *dsb)
256 {
257 	return intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf) + intel_dsb_size(dsb);
258 }
259 
260 static void intel_dsb_ins_align(struct intel_dsb *dsb)
261 {
262 	/*
263 	 * Every instruction should be 8 byte aligned.
264 	 *
265 	 * The only way to get unaligned free_pos is via
266 	 * intel_dsb_reg_write_indexed() which already
267 	 * makes sure the next dword is zeroed, so no need
268 	 * to clear it here.
269 	 */
270 	dsb->free_pos = ALIGN(dsb->free_pos, 2);
271 }
272 
273 static void intel_dsb_emit(struct intel_dsb *dsb, u32 ldw, u32 udw)
274 {
275 	if (!assert_dsb_has_room(dsb))
276 		return;
277 
278 	intel_dsb_ins_align(dsb);
279 
280 	dsb->ins_start_offset = dsb->free_pos;
281 	dsb->ins[0] = ldw;
282 	dsb->ins[1] = udw;
283 
284 	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, dsb->ins[0]);
285 	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, dsb->ins[1]);
286 }
287 
288 static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb,
289 					u32 opcode, i915_reg_t reg)
290 {
291 	u32 prev_opcode, prev_reg;
292 
293 	/*
294 	 * Nothing emitted yet? Must check before looking
295 	 * at the actual data since i915_gem_object_create_internal()
296 	 * does *not* give you zeroed memory!
297 	 */
298 	if (dsb->free_pos == 0)
299 		return false;
300 
301 	prev_opcode = dsb->ins[1] & ~DSB_REG_VALUE_MASK;
302 	prev_reg =  dsb->ins[1] & DSB_REG_VALUE_MASK;
303 
304 	return prev_opcode == opcode && prev_reg == i915_mmio_reg_offset(reg);
305 }
306 
307 static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_t reg)
308 {
309 	return intel_dsb_prev_ins_is_write(dsb,
310 					   DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT,
311 					   reg);
312 }
313 
314 /**
315  * intel_dsb_reg_write_indexed() - Emit indexed register write to the DSB context
316  * @dsb: DSB context
317  * @reg: register address.
318  * @val: value.
319  *
320  * This function is used for writing register-value pair in command
321  * buffer of DSB.
322  *
323  * Note that indexed writes are slower than normal MMIO writes
324  * for a small number (less than 5 or so) of writes to the same
325  * register.
326  */
327 void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
328 				 i915_reg_t reg, u32 val)
329 {
330 	/*
331 	 * For example the buffer will look like below for 3 dwords for auto
332 	 * increment register:
333 	 * +--------------------------------------------------------+
334 	 * | size = 3 | offset &| value1 | value2 | value3 | zero   |
335 	 * |          | opcode  |        |        |        |        |
336 	 * +--------------------------------------------------------+
337 	 * +          +         +        +        +        +        +
338 	 * 0          4         8        12       16       20       24
339 	 * Byte
340 	 *
341 	 * As every instruction is 8 byte aligned the index of dsb instruction
342 	 * will start always from even number while dealing with u32 array. If
343 	 * we are writing odd no of dwords, Zeros will be added in the end for
344 	 * padding.
345 	 */
346 	if (!intel_dsb_prev_ins_is_indexed_write(dsb, reg))
347 		intel_dsb_emit(dsb, 0, /* count */
348 			       (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) |
349 			       i915_mmio_reg_offset(reg));
350 
351 	if (!assert_dsb_has_room(dsb))
352 		return;
353 
354 	/* Update the count */
355 	dsb->ins[0]++;
356 	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 0,
357 			       dsb->ins[0]);
358 
359 	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, val);
360 	/* if number of data words is odd, then the last dword should be 0.*/
361 	if (dsb->free_pos & 0x1)
362 		intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos, 0);
363 }
364 
365 void intel_dsb_reg_write(struct intel_dsb *dsb,
366 			 i915_reg_t reg, u32 val)
367 {
368 	intel_dsb_emit(dsb, val,
369 		       (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
370 		       (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
371 		       i915_mmio_reg_offset(reg));
372 }
373 
374 static u32 intel_dsb_mask_to_byte_en(u32 mask)
375 {
376 	return (!!(mask & 0xff000000) << 3 |
377 		!!(mask & 0x00ff0000) << 2 |
378 		!!(mask & 0x0000ff00) << 1 |
379 		!!(mask & 0x000000ff) << 0);
380 }
381 
382 /* Note: mask implemented via byte enables! */
383 void intel_dsb_reg_write_masked(struct intel_dsb *dsb,
384 				i915_reg_t reg, u32 mask, u32 val)
385 {
386 	intel_dsb_emit(dsb, val,
387 		       (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
388 		       (intel_dsb_mask_to_byte_en(mask) << DSB_BYTE_EN_SHIFT) |
389 		       i915_mmio_reg_offset(reg));
390 }
391 
392 void intel_dsb_noop(struct intel_dsb *dsb, int count)
393 {
394 	int i;
395 
396 	for (i = 0; i < count; i++)
397 		intel_dsb_emit(dsb, 0,
398 			       DSB_OPCODE_NOOP << DSB_OPCODE_SHIFT);
399 }
400 
401 void intel_dsb_nonpost_start(struct intel_dsb *dsb)
402 {
403 	struct intel_crtc *crtc = dsb->crtc;
404 	enum pipe pipe = crtc->pipe;
405 
406 	intel_dsb_reg_write_masked(dsb, DSB_CTRL(pipe, dsb->id),
407 				   DSB_NON_POSTED, DSB_NON_POSTED);
408 	intel_dsb_noop(dsb, 4);
409 }
410 
411 void intel_dsb_nonpost_end(struct intel_dsb *dsb)
412 {
413 	struct intel_crtc *crtc = dsb->crtc;
414 	enum pipe pipe = crtc->pipe;
415 
416 	intel_dsb_reg_write_masked(dsb, DSB_CTRL(pipe, dsb->id),
417 				   DSB_NON_POSTED, 0);
418 	intel_dsb_noop(dsb, 4);
419 }
420 
421 void intel_dsb_interrupt(struct intel_dsb *dsb)
422 {
423 	intel_dsb_emit(dsb, 0,
424 		       DSB_OPCODE_INTERRUPT << DSB_OPCODE_SHIFT);
425 }
426 
427 void intel_dsb_wait_usec(struct intel_dsb *dsb, int count)
428 {
429 	/* +1 to make sure we never wait less time than asked for */
430 	intel_dsb_emit(dsb, count + 1,
431 		       DSB_OPCODE_WAIT_USEC << DSB_OPCODE_SHIFT);
432 }
433 
434 void intel_dsb_wait_vblanks(struct intel_dsb *dsb, int count)
435 {
436 	intel_dsb_emit(dsb, count,
437 		       DSB_OPCODE_WAIT_VBLANKS << DSB_OPCODE_SHIFT);
438 }
439 
440 static void intel_dsb_emit_wait_dsl(struct intel_dsb *dsb,
441 				    u32 opcode, int lower, int upper)
442 {
443 	u64 window = ((u64)upper << DSB_SCANLINE_UPPER_SHIFT) |
444 		((u64)lower << DSB_SCANLINE_LOWER_SHIFT);
445 
446 	intel_dsb_emit(dsb, lower_32_bits(window),
447 		       (opcode << DSB_OPCODE_SHIFT) |
448 		       upper_32_bits(window));
449 }
450 
451 static void intel_dsb_wait_dsl(struct intel_atomic_state *state,
452 			       struct intel_dsb *dsb,
453 			       int lower_in, int upper_in,
454 			       int lower_out, int upper_out)
455 {
456 	struct intel_crtc *crtc = dsb->crtc;
457 
458 	lower_in = dsb_scanline_to_hw(state, crtc, lower_in);
459 	upper_in = dsb_scanline_to_hw(state, crtc, upper_in);
460 
461 	lower_out = dsb_scanline_to_hw(state, crtc, lower_out);
462 	upper_out = dsb_scanline_to_hw(state, crtc, upper_out);
463 
464 	if (upper_in >= lower_in)
465 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_IN,
466 					lower_in, upper_in);
467 	else if (upper_out >= lower_out)
468 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT,
469 					lower_out, upper_out);
470 	else
471 		drm_WARN_ON(crtc->base.dev, 1); /* assert_dsl_ok() should have caught it already */
472 }
473 
474 static void assert_dsl_ok(struct intel_atomic_state *state,
475 			  struct intel_dsb *dsb,
476 			  int start, int end)
477 {
478 	struct intel_crtc *crtc = dsb->crtc;
479 	int vtotal = dsb_vtotal(state, crtc);
480 
481 	/*
482 	 * Waiting for the entire frame doesn't make sense,
483 	 * (IN==don't wait, OUT=wait forever).
484 	 */
485 	drm_WARN(crtc->base.dev, (end - start + vtotal) % vtotal == vtotal - 1,
486 		 "[CRTC:%d:%s] DSB %d bad scanline window wait: %d-%d (vt=%d)\n",
487 		 crtc->base.base.id, crtc->base.name, dsb->id,
488 		 start, end, vtotal);
489 }
490 
491 void intel_dsb_wait_scanline_in(struct intel_atomic_state *state,
492 				struct intel_dsb *dsb,
493 				int start, int end)
494 {
495 	assert_dsl_ok(state, dsb, start, end);
496 
497 	intel_dsb_wait_dsl(state, dsb,
498 			   start, end,
499 			   end + 1, start - 1);
500 }
501 
502 void intel_dsb_wait_scanline_out(struct intel_atomic_state *state,
503 				 struct intel_dsb *dsb,
504 				 int start, int end)
505 {
506 	assert_dsl_ok(state, dsb, start, end);
507 
508 	intel_dsb_wait_dsl(state, dsb,
509 			   end + 1, start - 1,
510 			   start, end);
511 }
512 
513 void intel_dsb_poll(struct intel_dsb *dsb,
514 		    i915_reg_t reg, u32 mask, u32 val,
515 		    int wait_us, int count)
516 {
517 	struct intel_crtc *crtc = dsb->crtc;
518 	enum pipe pipe = crtc->pipe;
519 
520 	intel_dsb_reg_write(dsb, DSB_POLLMASK(pipe, dsb->id), mask);
521 	intel_dsb_reg_write(dsb, DSB_POLLFUNC(pipe, dsb->id),
522 			    DSB_POLL_ENABLE |
523 			    DSB_POLL_WAIT(wait_us) | DSB_POLL_COUNT(count));
524 
525 	intel_dsb_noop(dsb, 5);
526 
527 	intel_dsb_emit(dsb, val,
528 		       (DSB_OPCODE_POLL << DSB_OPCODE_SHIFT) |
529 		       i915_mmio_reg_offset(reg));
530 }
531 
532 static void intel_dsb_align_tail(struct intel_dsb *dsb)
533 {
534 	u32 aligned_tail, tail;
535 
536 	intel_dsb_ins_align(dsb);
537 
538 	tail = dsb->free_pos * 4;
539 	aligned_tail = ALIGN(tail, CACHELINE_BYTES);
540 
541 	if (aligned_tail > tail)
542 		intel_dsb_buffer_memset(&dsb->dsb_buf, dsb->free_pos, 0,
543 					aligned_tail - tail);
544 
545 	dsb->free_pos = aligned_tail / 4;
546 }
547 
548 static void intel_dsb_gosub_align(struct intel_dsb *dsb)
549 {
550 	u32 aligned_tail, tail;
551 
552 	intel_dsb_ins_align(dsb);
553 
554 	tail = dsb->free_pos * 4;
555 	aligned_tail = ALIGN(tail, CACHELINE_BYTES);
556 
557 	/*
558 	 * Wa_16024917128
559 	 * "Ensure GOSUB is not placed in cacheline QW slot 6 or 7 (numbered 0-7)"
560 	 */
561 	if (aligned_tail - tail <= 2 * 8)
562 		intel_dsb_buffer_memset(&dsb->dsb_buf, dsb->free_pos, 0,
563 					aligned_tail - tail);
564 
565 	dsb->free_pos = aligned_tail / 4;
566 }
567 
568 void intel_dsb_gosub(struct intel_dsb *dsb,
569 		     struct intel_dsb *sub_dsb)
570 {
571 	struct intel_crtc *crtc = dsb->crtc;
572 	struct intel_display *display = to_intel_display(crtc->base.dev);
573 	unsigned int head, tail;
574 	u64 head_tail;
575 
576 	if (drm_WARN_ON(display->drm, dsb->id != sub_dsb->id))
577 		return;
578 
579 	if (!assert_dsb_tail_is_aligned(sub_dsb))
580 		return;
581 
582 	intel_dsb_gosub_align(dsb);
583 
584 	head = intel_dsb_head(sub_dsb);
585 	tail = intel_dsb_tail(sub_dsb);
586 
587 	/*
588 	 * The GOSUB instruction has the following memory layout.
589 	 *
590 	 * +------------------------------------------------------------+
591 	 * |  Opcode  |   Rsvd    |      Head Ptr     |     Tail Ptr    |
592 	 * |   0x0c   |           |                   |                 |
593 	 * +------------------------------------------------------------+
594 	 * |<- 8bits->|<- 4bits ->|<--   26bits    -->|<--  26bits   -->|
595 	 *
596 	 * We have only 26 bits each to represent the head and  tail
597 	 * pointers even though the addresses itself are of 32 bit. However, this
598 	 * is not a problem because the addresses are 64 bit aligned and therefore
599 	 * the last 6 bits are always Zero's. Therefore, we right shift the address
600 	 * by 6 before embedding it into the GOSUB instruction.
601 	 */
602 
603 	head_tail = ((u64)(DSB_GOSUB_CONVERT_ADDR(head)) << DSB_GOSUB_HEAD_SHIFT) |
604 		((u64)(DSB_GOSUB_CONVERT_ADDR(tail)) << DSB_GOSUB_TAIL_SHIFT);
605 
606 	intel_dsb_emit(dsb, lower_32_bits(head_tail),
607 		       (DSB_OPCODE_GOSUB << DSB_OPCODE_SHIFT) |
608 		       upper_32_bits(head_tail));
609 
610 	/*
611 	 * "NOTE: the instructions within the cacheline
612 	 *  FOLLOWING the GOSUB instruction must be NOPs."
613 	 */
614 	intel_dsb_align_tail(dsb);
615 }
616 
617 void intel_dsb_gosub_finish(struct intel_dsb *dsb)
618 {
619 	intel_dsb_align_tail(dsb);
620 
621 	/*
622 	 * Wa_16024917128
623 	 * "Ensure that all subroutines called by GOSUB end with a cacheline of NOPs"
624 	 */
625 	intel_dsb_noop(dsb, 8);
626 
627 	intel_dsb_buffer_flush_map(&dsb->dsb_buf);
628 }
629 
630 void intel_dsb_finish(struct intel_dsb *dsb)
631 {
632 	intel_dsb_align_tail(dsb);
633 
634 	intel_dsb_buffer_flush_map(&dsb->dsb_buf);
635 }
636 
637 static u32 dsb_error_int_status(struct intel_display *display)
638 {
639 	u32 errors;
640 
641 	errors = DSB_GTT_FAULT_INT_STATUS |
642 		DSB_RSPTIMEOUT_INT_STATUS |
643 		DSB_POLL_ERR_INT_STATUS;
644 
645 	/*
646 	 * All the non-existing status bits operate as
647 	 * normal r/w bits, so any attempt to clear them
648 	 * will just end up setting them. Never do that so
649 	 * we won't mistake them for actual error interrupts.
650 	 */
651 	if (DISPLAY_VER(display) >= 14)
652 		errors |= DSB_ATS_FAULT_INT_STATUS;
653 
654 	if (DISPLAY_VER(display) >= 30)
655 		errors |= DSB_GOSUB_INT_STATUS;
656 
657 	return errors;
658 }
659 
660 static u32 dsb_error_int_en(struct intel_display *display)
661 {
662 	u32 errors;
663 
664 	errors = DSB_GTT_FAULT_INT_EN |
665 		DSB_RSPTIMEOUT_INT_EN |
666 		DSB_POLL_ERR_INT_EN;
667 
668 	if (DISPLAY_VER(display) >= 14)
669 		errors |= DSB_ATS_FAULT_INT_EN;
670 
671 	/*
672 	 * Wa_16024917128
673 	 * "Disable nested GOSUB interrupt (DSB_INTERRUPT bit 21)"
674 	 */
675 	if (0 && DISPLAY_VER(display) >= 30)
676 		errors |= DSB_GOSUB_INT_EN;
677 
678 	return errors;
679 }
680 
681 /*
682  * FIXME calibrate these sensibly, ideally compute based on
683  * the number of regisetrs to be written. But that requires
684  * measuring the actual DSB execution speed on each platform
685  * (and the speed also depends on CDCLK and memory clock)...
686  */
687 static int intel_dsb_noarm_exec_time_us(void)
688 {
689 	return 80;
690 }
691 
692 static int intel_dsb_arm_exec_time_us(void)
693 {
694 	return 20;
695 }
696 
697 int intel_dsb_exec_time_us(void)
698 {
699 	return intel_dsb_noarm_exec_time_us() +
700 		intel_dsb_arm_exec_time_us();
701 }
702 
703 void intel_dsb_vblank_evade(struct intel_atomic_state *state,
704 			    struct intel_dsb *dsb)
705 {
706 	struct intel_crtc *crtc = dsb->crtc;
707 	const struct intel_crtc_state *crtc_state =
708 		intel_pre_commit_crtc_state(state, crtc);
709 	int latency = intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode,
710 					       intel_dsb_arm_exec_time_us());
711 	int start, end;
712 
713 	/*
714 	 * PIPEDSL is reading as 0 when in SRDENT(PSR1) or DEEP_SLEEP(PSR2). On
715 	 * wake-up scanline counting starts from vblank_start - 1. We don't know
716 	 * if wake-up is already ongoing when evasion starts. In worst case
717 	 * PIPEDSL could start reading valid value right after checking the
718 	 * scanline. In this scenario we wouldn't have enough time to write all
719 	 * registers. To tackle this evade scanline 0 as well. As a drawback we
720 	 * have 1 frame delay in flip when waking up.
721 	 */
722 	if (crtc_state->has_psr)
723 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT, 0, 0);
724 
725 	if (pre_commit_is_vrr_active(state, crtc)) {
726 		int vblank_delay = intel_vrr_vblank_delay(crtc_state);
727 
728 		end = intel_vrr_vmin_vblank_start(crtc_state);
729 		start = end - vblank_delay - latency;
730 		intel_dsb_wait_scanline_out(state, dsb, start, end);
731 
732 		end = intel_vrr_vmax_vblank_start(crtc_state);
733 		start = end - vblank_delay - latency;
734 		intel_dsb_wait_scanline_out(state, dsb, start, end);
735 	} else {
736 		int vblank_delay = intel_mode_vblank_delay(&crtc_state->hw.adjusted_mode);
737 
738 		end = intel_mode_vblank_start(&crtc_state->hw.adjusted_mode);
739 		start = end - vblank_delay - latency;
740 		intel_dsb_wait_scanline_out(state, dsb, start, end);
741 	}
742 }
743 
744 static void _intel_dsb_chain(struct intel_atomic_state *state,
745 			     struct intel_dsb *dsb,
746 			     struct intel_dsb *chained_dsb,
747 			     u32 ctrl)
748 {
749 	struct intel_display *display = to_intel_display(state->base.dev);
750 	struct intel_crtc *crtc = dsb->crtc;
751 	enum pipe pipe = crtc->pipe;
752 
753 	if (drm_WARN_ON(display->drm, dsb->id == chained_dsb->id))
754 		return;
755 
756 	if (!assert_dsb_tail_is_aligned(chained_dsb))
757 		return;
758 
759 	intel_dsb_reg_write(dsb, DSB_CTRL(pipe, chained_dsb->id),
760 			    ctrl | DSB_ENABLE);
761 
762 	intel_dsb_reg_write(dsb, DSB_CHICKEN(pipe, chained_dsb->id),
763 			    dsb_chicken(state, crtc));
764 
765 	intel_dsb_reg_write(dsb, DSB_INTERRUPT(pipe, chained_dsb->id),
766 			    dsb_error_int_status(display) | DSB_PROG_INT_STATUS |
767 			    dsb_error_int_en(display) | DSB_PROG_INT_EN);
768 
769 	if (ctrl & DSB_WAIT_FOR_VBLANK) {
770 		int dewake_scanline = dsb_dewake_scanline_start(state, crtc);
771 		int hw_dewake_scanline = dsb_scanline_to_hw(state, crtc, dewake_scanline);
772 
773 		intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id),
774 				    DSB_ENABLE_DEWAKE |
775 				    DSB_SCANLINE_FOR_DEWAKE(hw_dewake_scanline));
776 	} else {
777 		intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id), 0);
778 	}
779 
780 	intel_dsb_reg_write(dsb, DSB_HEAD(pipe, chained_dsb->id),
781 			    intel_dsb_head(chained_dsb));
782 
783 	intel_dsb_reg_write(dsb, DSB_TAIL(pipe, chained_dsb->id),
784 			    intel_dsb_tail(chained_dsb));
785 
786 	if (ctrl & DSB_WAIT_FOR_VBLANK) {
787 		/*
788 		 * Keep DEwake alive via the first DSB, in
789 		 * case we're already past dewake_scanline,
790 		 * and thus DSB_ENABLE_DEWAKE on the second
791 		 * DSB won't do its job.
792 		 */
793 		intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(pipe, dsb->id),
794 					   DSB_FORCE_DEWAKE, DSB_FORCE_DEWAKE);
795 
796 		intel_dsb_wait_scanline_out(state, dsb,
797 					    dsb_dewake_scanline_start(state, crtc),
798 					    dsb_dewake_scanline_end(state, crtc));
799 
800 		/*
801 		 * DSB_FORCE_DEWAKE remains active even after DSB is
802 		 * disabled, so make sure to clear it.
803 		 */
804 		intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(crtc->pipe, dsb->id),
805 					   DSB_FORCE_DEWAKE, 0);
806 	}
807 }
808 
809 void intel_dsb_chain(struct intel_atomic_state *state,
810 		     struct intel_dsb *dsb,
811 		     struct intel_dsb *chained_dsb,
812 		     bool wait_for_vblank)
813 {
814 	_intel_dsb_chain(state, dsb, chained_dsb,
815 			 wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0);
816 }
817 
818 void intel_dsb_wait_vblank_delay(struct intel_atomic_state *state,
819 				 struct intel_dsb *dsb)
820 {
821 	struct intel_crtc *crtc = dsb->crtc;
822 	const struct intel_crtc_state *crtc_state =
823 		intel_pre_commit_crtc_state(state, crtc);
824 	int usecs = intel_scanlines_to_usecs(&crtc_state->hw.adjusted_mode,
825 					     dsb_vblank_delay(state, crtc));
826 
827 	intel_dsb_wait_usec(dsb, usecs);
828 }
829 
830 /**
831  * intel_dsb_commit() - Trigger workload execution of DSB.
832  * @dsb: DSB context
833  *
834  * This function is used to do actual write to hardware using DSB.
835  */
836 void intel_dsb_commit(struct intel_dsb *dsb)
837 {
838 	struct intel_crtc *crtc = dsb->crtc;
839 	struct intel_display *display = to_intel_display(crtc->base.dev);
840 	enum pipe pipe = crtc->pipe;
841 
842 	if (!assert_dsb_tail_is_aligned(dsb))
843 		return;
844 
845 	if (is_dsb_busy(display, pipe, dsb->id)) {
846 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d is busy\n",
847 			crtc->base.base.id, crtc->base.name, dsb->id);
848 		return;
849 	}
850 
851 	intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id),
852 			  DSB_ENABLE);
853 
854 	intel_de_write_fw(display, DSB_CHICKEN(pipe, dsb->id),
855 			  dsb->chicken);
856 
857 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id),
858 			  dsb_error_int_status(display) | DSB_PROG_INT_STATUS |
859 			  dsb_error_int_en(display) | DSB_PROG_INT_EN);
860 
861 	intel_de_write_fw(display, DSB_PMCTRL(pipe, dsb->id), 0);
862 
863 	intel_de_write_fw(display, DSB_HEAD(pipe, dsb->id),
864 			  intel_dsb_head(dsb));
865 
866 	intel_de_write_fw(display, DSB_TAIL(pipe, dsb->id),
867 			  intel_dsb_tail(dsb));
868 }
869 
870 void intel_dsb_wait(struct intel_dsb *dsb)
871 {
872 	struct intel_crtc *crtc = dsb->crtc;
873 	struct intel_display *display = to_intel_display(crtc->base.dev);
874 	enum pipe pipe = crtc->pipe;
875 	bool is_busy;
876 	int ret;
877 
878 	ret = poll_timeout_us(is_busy = is_dsb_busy(display, pipe, dsb->id),
879 			      !is_busy,
880 			      100, 1000, false);
881 	if (ret) {
882 		u32 offset = intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf);
883 
884 		intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id),
885 				  DSB_ENABLE | DSB_HALT);
886 
887 		drm_err(display->drm,
888 			"[CRTC:%d:%s] DSB %d timed out waiting for idle (current head=0x%x, head=0x%x, tail=0x%x)\n",
889 			crtc->base.base.id, crtc->base.name, dsb->id,
890 			intel_de_read_fw(display, DSB_CURRENT_HEAD(pipe, dsb->id)) - offset,
891 			intel_de_read_fw(display, DSB_HEAD(pipe, dsb->id)) - offset,
892 			intel_de_read_fw(display, DSB_TAIL(pipe, dsb->id)) - offset);
893 
894 		intel_dsb_dump(dsb);
895 	}
896 
897 	/* Attempt to reset it */
898 	dsb->free_pos = 0;
899 	dsb->ins_start_offset = 0;
900 	dsb->ins[0] = 0;
901 	dsb->ins[1] = 0;
902 
903 	intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id), 0);
904 
905 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id),
906 			  dsb_error_int_status(display) | DSB_PROG_INT_STATUS);
907 }
908 
909 /**
910  * intel_dsb_prepare() - Allocate, pin and map the DSB command buffer.
911  * @state: the atomic state
912  * @crtc: the CRTC
913  * @dsb_id: the DSB engine to use
914  * @max_cmds: number of commands we need to fit into command buffer
915  *
916  * This function prepare the command buffer which is used to store dsb
917  * instructions with data.
918  *
919  * Returns:
920  * DSB context, NULL on failure
921  */
922 struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state,
923 				    struct intel_crtc *crtc,
924 				    enum intel_dsb_id dsb_id,
925 				    unsigned int max_cmds)
926 {
927 	struct intel_display *display = to_intel_display(state);
928 	struct ref_tracker *wakeref;
929 	struct intel_dsb *dsb;
930 	unsigned int size;
931 
932 	if (!HAS_DSB(display))
933 		return NULL;
934 
935 	if (!display->params.enable_dsb)
936 		return NULL;
937 
938 	dsb = kzalloc(sizeof(*dsb), GFP_KERNEL);
939 	if (!dsb)
940 		goto out;
941 
942 	wakeref = intel_display_rpm_get(display);
943 
944 	/* ~1 qword per instruction, full cachelines */
945 	size = ALIGN(max_cmds * 8, CACHELINE_BYTES);
946 
947 	if (!intel_dsb_buffer_create(crtc, &dsb->dsb_buf, size))
948 		goto out_put_rpm;
949 
950 	intel_display_rpm_put(display, wakeref);
951 
952 	dsb->id = dsb_id;
953 	dsb->crtc = crtc;
954 	dsb->size = size / 4; /* in dwords */
955 
956 	dsb->chicken = dsb_chicken(state, crtc);
957 	dsb->hw_dewake_scanline =
958 		dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline_start(state, crtc));
959 
960 	return dsb;
961 
962 out_put_rpm:
963 	intel_display_rpm_put(display, wakeref);
964 	kfree(dsb);
965 out:
966 	drm_info_once(display->drm,
967 		      "[CRTC:%d:%s] DSB %d queue setup failed, will fallback to MMIO for display HW programming\n",
968 		      crtc->base.base.id, crtc->base.name, dsb_id);
969 
970 	return NULL;
971 }
972 
973 /**
974  * intel_dsb_cleanup() - To cleanup DSB context.
975  * @dsb: DSB context
976  *
977  * This function cleanup the DSB context by unpinning and releasing
978  * the VMA object associated with it.
979  */
980 void intel_dsb_cleanup(struct intel_dsb *dsb)
981 {
982 	intel_dsb_buffer_cleanup(&dsb->dsb_buf);
983 	kfree(dsb);
984 }
985 
986 void intel_dsb_irq_handler(struct intel_display *display,
987 			   enum pipe pipe, enum intel_dsb_id dsb_id)
988 {
989 	struct intel_crtc *crtc = intel_crtc_for_pipe(display, pipe);
990 	u32 tmp, errors;
991 
992 	tmp = intel_de_read_fw(display, DSB_INTERRUPT(pipe, dsb_id));
993 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb_id), tmp);
994 
995 	if (tmp & DSB_PROG_INT_STATUS) {
996 		spin_lock(&display->drm->event_lock);
997 
998 		if (crtc->dsb_event) {
999 			/*
1000 			 * Update vblank counter/timestamp in case it
1001 			 * hasn't been done yet for this frame.
1002 			 */
1003 			drm_crtc_accurate_vblank_count(&crtc->base);
1004 
1005 			drm_crtc_send_vblank_event(&crtc->base, crtc->dsb_event);
1006 			crtc->dsb_event = NULL;
1007 		}
1008 
1009 		spin_unlock(&display->drm->event_lock);
1010 	}
1011 
1012 	errors = tmp & dsb_error_int_status(display);
1013 	if (errors & DSB_ATS_FAULT_INT_STATUS)
1014 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d ATS fault\n",
1015 			crtc->base.base.id, crtc->base.name, dsb_id);
1016 	if (errors & DSB_GTT_FAULT_INT_STATUS)
1017 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d GTT fault\n",
1018 			crtc->base.base.id, crtc->base.name, dsb_id);
1019 	if (errors & DSB_RSPTIMEOUT_INT_STATUS)
1020 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d response timeout\n",
1021 			crtc->base.base.id, crtc->base.name, dsb_id);
1022 	if (errors & DSB_POLL_ERR_INT_STATUS)
1023 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d poll error\n",
1024 			crtc->base.base.id, crtc->base.name, dsb_id);
1025 	if (errors & DSB_GOSUB_INT_STATUS)
1026 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d GOSUB programming error\n",
1027 			crtc->base.base.id, crtc->base.name, dsb_id);
1028 }
1029