xref: /linux/drivers/gpu/drm/i915/display/intel_dsb.c (revision 815e260a18a3af4dab59025ee99a7156c0e8b5e0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  *
5  */
6 
7 #include <linux/iopoll.h>
8 
9 #include <drm/drm_print.h>
10 #include <drm/drm_vblank.h>
11 
12 #include "intel_crtc.h"
13 #include "intel_de.h"
14 #include "intel_display_regs.h"
15 #include "intel_display_rpm.h"
16 #include "intel_display_types.h"
17 #include "intel_dsb.h"
18 #include "intel_dsb_buffer.h"
19 #include "intel_dsb_regs.h"
20 #include "intel_vblank.h"
21 #include "intel_vrr.h"
22 #include "skl_watermark.h"
23 
24 #define CACHELINE_BYTES 64
25 
26 struct intel_dsb {
27 	enum intel_dsb_id id;
28 
29 	struct intel_dsb_buffer dsb_buf;
30 	struct intel_crtc *crtc;
31 
32 	/*
33 	 * maximum number of dwords the buffer will hold.
34 	 */
35 	unsigned int size;
36 
37 	/*
38 	 * free_pos will point the first free dword and
39 	 * help in calculating tail of command buffer.
40 	 */
41 	unsigned int free_pos;
42 
43 	/*
44 	 * Previously emitted DSB instruction. Used to
45 	 * identify/adjust the instruction for indexed
46 	 * register writes.
47 	 */
48 	u32 ins[2];
49 
50 	/*
51 	 * Start of the previously emitted DSB instruction.
52 	 * Used to adjust the instruction for indexed
53 	 * register writes.
54 	 */
55 	unsigned int ins_start_offset;
56 
57 	u32 chicken;
58 	int hw_dewake_scanline;
59 };
60 
61 /**
62  * DOC: DSB
63  *
64  * A DSB (Display State Buffer) is a queue of MMIO instructions in the memory
65  * which can be offloaded to DSB HW in Display Controller. DSB HW is a DMA
66  * engine that can be programmed to download the DSB from memory.
67  * It allows driver to batch submit display HW programming. This helps to
68  * reduce loading time and CPU activity, thereby making the context switch
69  * faster. DSB Support added from Gen12 Intel graphics based platform.
70  *
71  * DSB's can access only the pipe, plane, and transcoder Data Island Packet
72  * registers.
73  *
74  * DSB HW can support only register writes (both indexed and direct MMIO
75  * writes). There are no registers reads possible with DSB HW engine.
76  */
77 
78 /* DSB opcodes. */
79 #define DSB_OPCODE_SHIFT		24
80 #define DSB_OPCODE_NOOP			0x0
81 #define DSB_OPCODE_MMIO_WRITE		0x1
82 #define   DSB_BYTE_EN			0xf
83 #define   DSB_BYTE_EN_SHIFT		20
84 #define   DSB_REG_VALUE_MASK		0xfffff
85 #define DSB_OPCODE_WAIT_USEC		0x2
86 #define DSB_OPCODE_WAIT_SCANLINE	0x3
87 #define DSB_OPCODE_WAIT_VBLANKS		0x4
88 #define DSB_OPCODE_WAIT_DSL_IN		0x5
89 #define DSB_OPCODE_WAIT_DSL_OUT		0x6
90 #define   DSB_SCANLINE_UPPER_SHIFT	20
91 #define   DSB_SCANLINE_LOWER_SHIFT	0
92 #define DSB_OPCODE_INTERRUPT		0x7
93 #define DSB_OPCODE_INDEXED_WRITE	0x9
94 /* see DSB_REG_VALUE_MASK */
95 #define DSB_OPCODE_POLL			0xA
96 /* see DSB_REG_VALUE_MASK */
97 #define DSB_OPCODE_GOSUB		0xC /* ptl+ */
98 #define   DSB_GOSUB_HEAD_SHIFT		26
99 #define   DSB_GOSUB_TAIL_SHIFT		0
100 #define   DSB_GOSUB_CONVERT_ADDR(x)	((x) >> 6)
101 
102 static bool pre_commit_is_vrr_active(struct intel_atomic_state *state,
103 				     struct intel_crtc *crtc)
104 {
105 	const struct intel_crtc_state *old_crtc_state =
106 		intel_atomic_get_old_crtc_state(state, crtc);
107 	const struct intel_crtc_state *new_crtc_state =
108 		intel_atomic_get_new_crtc_state(state, crtc);
109 
110 	/* VRR will be enabled afterwards, if necessary */
111 	if (intel_crtc_needs_modeset(new_crtc_state))
112 		return false;
113 
114 	/* VRR will have been disabled during intel_pre_plane_update() */
115 	return old_crtc_state->vrr.enable && !intel_crtc_vrr_disabling(state, crtc);
116 }
117 
118 static int dsb_vtotal(struct intel_atomic_state *state,
119 		      struct intel_crtc *crtc)
120 {
121 	const struct intel_crtc_state *crtc_state =
122 		intel_pre_commit_crtc_state(state, crtc);
123 
124 	if (pre_commit_is_vrr_active(state, crtc))
125 		return intel_vrr_vmax_vtotal(crtc_state);
126 	else
127 		return intel_mode_vtotal(&crtc_state->hw.adjusted_mode);
128 }
129 
130 static int dsb_dewake_scanline_start(struct intel_atomic_state *state,
131 				     struct intel_crtc *crtc)
132 {
133 	struct intel_display *display = to_intel_display(state);
134 	const struct intel_crtc_state *crtc_state =
135 		intel_pre_commit_crtc_state(state, crtc);
136 	unsigned int latency = skl_watermark_max_latency(display, 0);
137 
138 	return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode) -
139 		intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, latency);
140 }
141 
142 static int dsb_dewake_scanline_end(struct intel_atomic_state *state,
143 				   struct intel_crtc *crtc)
144 {
145 	const struct intel_crtc_state *crtc_state =
146 		intel_pre_commit_crtc_state(state, crtc);
147 
148 	return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode);
149 }
150 
151 static int dsb_scanline_to_hw(struct intel_atomic_state *state,
152 			      struct intel_crtc *crtc, int scanline)
153 {
154 	const struct intel_crtc_state *crtc_state =
155 		intel_pre_commit_crtc_state(state, crtc);
156 	int vtotal = dsb_vtotal(state, crtc);
157 
158 	return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal;
159 }
160 
161 /*
162  * Bspec suggests that we should always set DSB_SKIP_WAITS_EN. We have approach
163  * different from what is explained in Bspec on how flip is considered being
164  * complete. We are waiting for vblank in DSB and generate interrupt when it
165  * happens and this interrupt is considered as indication of completion -> we
166  * definitely do not want to skip vblank wait. We also have concern what comes
167  * to skipping vblank evasion. I.e. arming registers are latched before we have
168  * managed writing them. Due to these reasons we are not setting
169  * DSB_SKIP_WAITS_EN.
170  */
171 static u32 dsb_chicken(struct intel_atomic_state *state,
172 		       struct intel_crtc *crtc)
173 {
174 	if (pre_commit_is_vrr_active(state, crtc))
175 		return DSB_CTRL_WAIT_SAFE_WINDOW |
176 			DSB_CTRL_NO_WAIT_VBLANK |
177 			DSB_INST_WAIT_SAFE_WINDOW |
178 			DSB_INST_NO_WAIT_VBLANK;
179 	else
180 		return 0;
181 }
182 
183 static bool assert_dsb_has_room(struct intel_dsb *dsb)
184 {
185 	struct intel_crtc *crtc = dsb->crtc;
186 	struct intel_display *display = to_intel_display(crtc->base.dev);
187 
188 	/* each instruction is 2 dwords */
189 	return !drm_WARN(display->drm, dsb->free_pos > dsb->size - 2,
190 			 "[CRTC:%d:%s] DSB %d buffer overflow\n",
191 			 crtc->base.base.id, crtc->base.name, dsb->id);
192 }
193 
194 static bool assert_dsb_tail_is_aligned(struct intel_dsb *dsb)
195 {
196 	struct intel_crtc *crtc = dsb->crtc;
197 	struct intel_display *display = to_intel_display(crtc->base.dev);
198 
199 	return !drm_WARN_ON(display->drm,
200 			    !IS_ALIGNED(dsb->free_pos * 4, CACHELINE_BYTES));
201 }
202 
203 static void intel_dsb_dump(struct intel_dsb *dsb)
204 {
205 	struct intel_crtc *crtc = dsb->crtc;
206 	struct intel_display *display = to_intel_display(crtc->base.dev);
207 	int i;
208 
209 	drm_dbg_kms(display->drm, "[CRTC:%d:%s] DSB %d commands {\n",
210 		    crtc->base.base.id, crtc->base.name, dsb->id);
211 	for (i = 0; i < ALIGN(dsb->free_pos, 64 / 4); i += 4)
212 		drm_dbg_kms(display->drm,
213 			    " 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i * 4,
214 			    intel_dsb_buffer_read(&dsb->dsb_buf, i),
215 			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 1),
216 			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 2),
217 			    intel_dsb_buffer_read(&dsb->dsb_buf, i + 3));
218 	drm_dbg_kms(display->drm, "}\n");
219 }
220 
221 static bool is_dsb_busy(struct intel_display *display, enum pipe pipe,
222 			enum intel_dsb_id dsb_id)
223 {
224 	return intel_de_read_fw(display, DSB_CTRL(pipe, dsb_id)) & DSB_STATUS_BUSY;
225 }
226 
227 unsigned int intel_dsb_size(struct intel_dsb *dsb)
228 {
229 	return dsb->free_pos * 4;
230 }
231 
232 unsigned int intel_dsb_head(struct intel_dsb *dsb)
233 {
234 	return intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf);
235 }
236 
237 static unsigned int intel_dsb_tail(struct intel_dsb *dsb)
238 {
239 	return intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf) + intel_dsb_size(dsb);
240 }
241 
242 static void intel_dsb_ins_align(struct intel_dsb *dsb)
243 {
244 	/*
245 	 * Every instruction should be 8 byte aligned.
246 	 *
247 	 * The only way to get unaligned free_pos is via
248 	 * intel_dsb_reg_write_indexed() which already
249 	 * makes sure the next dword is zeroed, so no need
250 	 * to clear it here.
251 	 */
252 	dsb->free_pos = ALIGN(dsb->free_pos, 2);
253 }
254 
255 static void intel_dsb_emit(struct intel_dsb *dsb, u32 ldw, u32 udw)
256 {
257 	if (!assert_dsb_has_room(dsb))
258 		return;
259 
260 	intel_dsb_ins_align(dsb);
261 
262 	dsb->ins_start_offset = dsb->free_pos;
263 	dsb->ins[0] = ldw;
264 	dsb->ins[1] = udw;
265 
266 	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, dsb->ins[0]);
267 	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, dsb->ins[1]);
268 }
269 
270 static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb,
271 					u32 opcode, i915_reg_t reg)
272 {
273 	u32 prev_opcode, prev_reg;
274 
275 	/*
276 	 * Nothing emitted yet? Must check before looking
277 	 * at the actual data since i915_gem_object_create_internal()
278 	 * does *not* give you zeroed memory!
279 	 */
280 	if (dsb->free_pos == 0)
281 		return false;
282 
283 	prev_opcode = dsb->ins[1] & ~DSB_REG_VALUE_MASK;
284 	prev_reg =  dsb->ins[1] & DSB_REG_VALUE_MASK;
285 
286 	return prev_opcode == opcode && prev_reg == i915_mmio_reg_offset(reg);
287 }
288 
289 static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_t reg)
290 {
291 	return intel_dsb_prev_ins_is_write(dsb,
292 					   DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT,
293 					   reg);
294 }
295 
296 /**
297  * intel_dsb_reg_write_indexed() - Emit indexed register write to the DSB context
298  * @dsb: DSB context
299  * @reg: register address.
300  * @val: value.
301  *
302  * This function is used for writing register-value pair in command
303  * buffer of DSB.
304  *
305  * Note that indexed writes are slower than normal MMIO writes
306  * for a small number (less than 5 or so) of writes to the same
307  * register.
308  */
309 void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
310 				 i915_reg_t reg, u32 val)
311 {
312 	/*
313 	 * For example the buffer will look like below for 3 dwords for auto
314 	 * increment register:
315 	 * +--------------------------------------------------------+
316 	 * | size = 3 | offset &| value1 | value2 | value3 | zero   |
317 	 * |          | opcode  |        |        |        |        |
318 	 * +--------------------------------------------------------+
319 	 * +          +         +        +        +        +        +
320 	 * 0          4         8        12       16       20       24
321 	 * Byte
322 	 *
323 	 * As every instruction is 8 byte aligned the index of dsb instruction
324 	 * will start always from even number while dealing with u32 array. If
325 	 * we are writing odd no of dwords, Zeros will be added in the end for
326 	 * padding.
327 	 */
328 	if (!intel_dsb_prev_ins_is_indexed_write(dsb, reg))
329 		intel_dsb_emit(dsb, 0, /* count */
330 			       (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) |
331 			       i915_mmio_reg_offset(reg));
332 
333 	if (!assert_dsb_has_room(dsb))
334 		return;
335 
336 	/* Update the count */
337 	dsb->ins[0]++;
338 	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->ins_start_offset + 0,
339 			       dsb->ins[0]);
340 
341 	intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos++, val);
342 	/* if number of data words is odd, then the last dword should be 0.*/
343 	if (dsb->free_pos & 0x1)
344 		intel_dsb_buffer_write(&dsb->dsb_buf, dsb->free_pos, 0);
345 }
346 
347 void intel_dsb_reg_write(struct intel_dsb *dsb,
348 			 i915_reg_t reg, u32 val)
349 {
350 	intel_dsb_emit(dsb, val,
351 		       (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
352 		       (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
353 		       i915_mmio_reg_offset(reg));
354 }
355 
356 static u32 intel_dsb_mask_to_byte_en(u32 mask)
357 {
358 	return (!!(mask & 0xff000000) << 3 |
359 		!!(mask & 0x00ff0000) << 2 |
360 		!!(mask & 0x0000ff00) << 1 |
361 		!!(mask & 0x000000ff) << 0);
362 }
363 
364 /* Note: mask implemented via byte enables! */
365 void intel_dsb_reg_write_masked(struct intel_dsb *dsb,
366 				i915_reg_t reg, u32 mask, u32 val)
367 {
368 	intel_dsb_emit(dsb, val,
369 		       (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
370 		       (intel_dsb_mask_to_byte_en(mask) << DSB_BYTE_EN_SHIFT) |
371 		       i915_mmio_reg_offset(reg));
372 }
373 
374 void intel_dsb_noop(struct intel_dsb *dsb, int count)
375 {
376 	int i;
377 
378 	for (i = 0; i < count; i++)
379 		intel_dsb_emit(dsb, 0,
380 			       DSB_OPCODE_NOOP << DSB_OPCODE_SHIFT);
381 }
382 
383 void intel_dsb_nonpost_start(struct intel_dsb *dsb)
384 {
385 	struct intel_crtc *crtc = dsb->crtc;
386 	enum pipe pipe = crtc->pipe;
387 
388 	intel_dsb_reg_write_masked(dsb, DSB_CTRL(pipe, dsb->id),
389 				   DSB_NON_POSTED, DSB_NON_POSTED);
390 	intel_dsb_noop(dsb, 4);
391 }
392 
393 void intel_dsb_nonpost_end(struct intel_dsb *dsb)
394 {
395 	struct intel_crtc *crtc = dsb->crtc;
396 	enum pipe pipe = crtc->pipe;
397 
398 	intel_dsb_reg_write_masked(dsb, DSB_CTRL(pipe, dsb->id),
399 				   DSB_NON_POSTED, 0);
400 	intel_dsb_noop(dsb, 4);
401 }
402 
403 void intel_dsb_interrupt(struct intel_dsb *dsb)
404 {
405 	intel_dsb_emit(dsb, 0,
406 		       DSB_OPCODE_INTERRUPT << DSB_OPCODE_SHIFT);
407 }
408 
409 void intel_dsb_wait_usec(struct intel_dsb *dsb, int count)
410 {
411 	/* +1 to make sure we never wait less time than asked for */
412 	intel_dsb_emit(dsb, count + 1,
413 		       DSB_OPCODE_WAIT_USEC << DSB_OPCODE_SHIFT);
414 }
415 
416 void intel_dsb_wait_vblanks(struct intel_dsb *dsb, int count)
417 {
418 	intel_dsb_emit(dsb, count,
419 		       DSB_OPCODE_WAIT_VBLANKS << DSB_OPCODE_SHIFT);
420 }
421 
422 static void intel_dsb_emit_wait_dsl(struct intel_dsb *dsb,
423 				    u32 opcode, int lower, int upper)
424 {
425 	u64 window = ((u64)upper << DSB_SCANLINE_UPPER_SHIFT) |
426 		((u64)lower << DSB_SCANLINE_LOWER_SHIFT);
427 
428 	intel_dsb_emit(dsb, lower_32_bits(window),
429 		       (opcode << DSB_OPCODE_SHIFT) |
430 		       upper_32_bits(window));
431 }
432 
433 static void intel_dsb_wait_dsl(struct intel_atomic_state *state,
434 			       struct intel_dsb *dsb,
435 			       int lower_in, int upper_in,
436 			       int lower_out, int upper_out)
437 {
438 	struct intel_crtc *crtc = dsb->crtc;
439 
440 	lower_in = dsb_scanline_to_hw(state, crtc, lower_in);
441 	upper_in = dsb_scanline_to_hw(state, crtc, upper_in);
442 
443 	lower_out = dsb_scanline_to_hw(state, crtc, lower_out);
444 	upper_out = dsb_scanline_to_hw(state, crtc, upper_out);
445 
446 	if (upper_in >= lower_in)
447 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_IN,
448 					lower_in, upper_in);
449 	else if (upper_out >= lower_out)
450 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT,
451 					lower_out, upper_out);
452 	else
453 		drm_WARN_ON(crtc->base.dev, 1); /* assert_dsl_ok() should have caught it already */
454 }
455 
456 static void assert_dsl_ok(struct intel_atomic_state *state,
457 			  struct intel_dsb *dsb,
458 			  int start, int end)
459 {
460 	struct intel_crtc *crtc = dsb->crtc;
461 	int vtotal = dsb_vtotal(state, crtc);
462 
463 	/*
464 	 * Waiting for the entire frame doesn't make sense,
465 	 * (IN==don't wait, OUT=wait forever).
466 	 */
467 	drm_WARN(crtc->base.dev, (end - start + vtotal) % vtotal == vtotal - 1,
468 		 "[CRTC:%d:%s] DSB %d bad scanline window wait: %d-%d (vt=%d)\n",
469 		 crtc->base.base.id, crtc->base.name, dsb->id,
470 		 start, end, vtotal);
471 }
472 
473 void intel_dsb_wait_scanline_in(struct intel_atomic_state *state,
474 				struct intel_dsb *dsb,
475 				int start, int end)
476 {
477 	assert_dsl_ok(state, dsb, start, end);
478 
479 	intel_dsb_wait_dsl(state, dsb,
480 			   start, end,
481 			   end + 1, start - 1);
482 }
483 
484 void intel_dsb_wait_scanline_out(struct intel_atomic_state *state,
485 				 struct intel_dsb *dsb,
486 				 int start, int end)
487 {
488 	assert_dsl_ok(state, dsb, start, end);
489 
490 	intel_dsb_wait_dsl(state, dsb,
491 			   end + 1, start - 1,
492 			   start, end);
493 }
494 
495 void intel_dsb_poll(struct intel_dsb *dsb,
496 		    i915_reg_t reg, u32 mask, u32 val,
497 		    int wait_us, int count)
498 {
499 	struct intel_crtc *crtc = dsb->crtc;
500 	enum pipe pipe = crtc->pipe;
501 
502 	intel_dsb_reg_write(dsb, DSB_POLLMASK(pipe, dsb->id), mask);
503 	intel_dsb_reg_write(dsb, DSB_POLLFUNC(pipe, dsb->id),
504 			    DSB_POLL_ENABLE |
505 			    DSB_POLL_WAIT(wait_us) | DSB_POLL_COUNT(count));
506 
507 	intel_dsb_noop(dsb, 5);
508 
509 	intel_dsb_emit(dsb, val,
510 		       (DSB_OPCODE_POLL << DSB_OPCODE_SHIFT) |
511 		       i915_mmio_reg_offset(reg));
512 }
513 
514 static void intel_dsb_align_tail(struct intel_dsb *dsb)
515 {
516 	u32 aligned_tail, tail;
517 
518 	intel_dsb_ins_align(dsb);
519 
520 	tail = dsb->free_pos * 4;
521 	aligned_tail = ALIGN(tail, CACHELINE_BYTES);
522 
523 	if (aligned_tail > tail)
524 		intel_dsb_buffer_memset(&dsb->dsb_buf, dsb->free_pos, 0,
525 					aligned_tail - tail);
526 
527 	dsb->free_pos = aligned_tail / 4;
528 }
529 
530 static void intel_dsb_gosub_align(struct intel_dsb *dsb)
531 {
532 	u32 aligned_tail, tail;
533 
534 	intel_dsb_ins_align(dsb);
535 
536 	tail = dsb->free_pos * 4;
537 	aligned_tail = ALIGN(tail, CACHELINE_BYTES);
538 
539 	/*
540 	 * Wa_16024917128
541 	 * "Ensure GOSUB is not placed in cacheline QW slot 6 or 7 (numbered 0-7)"
542 	 */
543 	if (aligned_tail - tail <= 2 * 8)
544 		intel_dsb_buffer_memset(&dsb->dsb_buf, dsb->free_pos, 0,
545 					aligned_tail - tail);
546 
547 	dsb->free_pos = aligned_tail / 4;
548 }
549 
550 void intel_dsb_gosub(struct intel_dsb *dsb,
551 		     struct intel_dsb *sub_dsb)
552 {
553 	struct intel_crtc *crtc = dsb->crtc;
554 	struct intel_display *display = to_intel_display(crtc->base.dev);
555 	unsigned int head, tail;
556 	u64 head_tail;
557 
558 	if (drm_WARN_ON(display->drm, dsb->id != sub_dsb->id))
559 		return;
560 
561 	if (!assert_dsb_tail_is_aligned(sub_dsb))
562 		return;
563 
564 	intel_dsb_gosub_align(dsb);
565 
566 	head = intel_dsb_head(sub_dsb);
567 	tail = intel_dsb_tail(sub_dsb);
568 
569 	/*
570 	 * The GOSUB instruction has the following memory layout.
571 	 *
572 	 * +------------------------------------------------------------+
573 	 * |  Opcode  |   Rsvd    |      Head Ptr     |     Tail Ptr    |
574 	 * |   0x0c   |           |                   |                 |
575 	 * +------------------------------------------------------------+
576 	 * |<- 8bits->|<- 4bits ->|<--   26bits    -->|<--  26bits   -->|
577 	 *
578 	 * We have only 26 bits each to represent the head and  tail
579 	 * pointers even though the addresses itself are of 32 bit. However, this
580 	 * is not a problem because the addresses are 64 bit aligned and therefore
581 	 * the last 6 bits are always Zero's. Therefore, we right shift the address
582 	 * by 6 before embedding it into the GOSUB instruction.
583 	 */
584 
585 	head_tail = ((u64)(DSB_GOSUB_CONVERT_ADDR(head)) << DSB_GOSUB_HEAD_SHIFT) |
586 		((u64)(DSB_GOSUB_CONVERT_ADDR(tail)) << DSB_GOSUB_TAIL_SHIFT);
587 
588 	intel_dsb_emit(dsb, lower_32_bits(head_tail),
589 		       (DSB_OPCODE_GOSUB << DSB_OPCODE_SHIFT) |
590 		       upper_32_bits(head_tail));
591 
592 	/*
593 	 * "NOTE: the instructions within the cacheline
594 	 *  FOLLOWING the GOSUB instruction must be NOPs."
595 	 */
596 	intel_dsb_align_tail(dsb);
597 }
598 
599 void intel_dsb_gosub_finish(struct intel_dsb *dsb)
600 {
601 	intel_dsb_align_tail(dsb);
602 
603 	/*
604 	 * Wa_16024917128
605 	 * "Ensure that all subroutines called by GOSUB end with a cacheline of NOPs"
606 	 */
607 	intel_dsb_noop(dsb, 8);
608 
609 	intel_dsb_buffer_flush_map(&dsb->dsb_buf);
610 }
611 
612 void intel_dsb_finish(struct intel_dsb *dsb)
613 {
614 	intel_dsb_align_tail(dsb);
615 
616 	intel_dsb_buffer_flush_map(&dsb->dsb_buf);
617 }
618 
619 static u32 dsb_error_int_status(struct intel_display *display)
620 {
621 	u32 errors;
622 
623 	errors = DSB_GTT_FAULT_INT_STATUS |
624 		DSB_RSPTIMEOUT_INT_STATUS |
625 		DSB_POLL_ERR_INT_STATUS;
626 
627 	/*
628 	 * All the non-existing status bits operate as
629 	 * normal r/w bits, so any attempt to clear them
630 	 * will just end up setting them. Never do that so
631 	 * we won't mistake them for actual error interrupts.
632 	 */
633 	if (DISPLAY_VER(display) >= 14)
634 		errors |= DSB_ATS_FAULT_INT_STATUS;
635 
636 	if (DISPLAY_VER(display) >= 30)
637 		errors |= DSB_GOSUB_INT_STATUS;
638 
639 	return errors;
640 }
641 
642 static u32 dsb_error_int_en(struct intel_display *display)
643 {
644 	u32 errors;
645 
646 	errors = DSB_GTT_FAULT_INT_EN |
647 		DSB_RSPTIMEOUT_INT_EN |
648 		DSB_POLL_ERR_INT_EN;
649 
650 	if (DISPLAY_VER(display) >= 14)
651 		errors |= DSB_ATS_FAULT_INT_EN;
652 
653 	/*
654 	 * Wa_16024917128
655 	 * "Disable nested GOSUB interrupt (DSB_INTERRUPT bit 21)"
656 	 */
657 	if (0 && DISPLAY_VER(display) >= 30)
658 		errors |= DSB_GOSUB_INT_EN;
659 
660 	return errors;
661 }
662 
663 /*
664  * FIXME calibrate these sensibly, ideally compute based on
665  * the number of regisetrs to be written. But that requires
666  * measuring the actual DSB execution speed on each platform
667  * (and the speed also depends on CDCLK and memory clock)...
668  */
669 static int intel_dsb_noarm_exec_time_us(void)
670 {
671 	return 80;
672 }
673 
674 static int intel_dsb_arm_exec_time_us(void)
675 {
676 	return 20;
677 }
678 
679 int intel_dsb_exec_time_us(void)
680 {
681 	return intel_dsb_noarm_exec_time_us() +
682 		intel_dsb_arm_exec_time_us();
683 }
684 
685 void intel_dsb_vblank_evade(struct intel_atomic_state *state,
686 			    struct intel_dsb *dsb)
687 {
688 	struct intel_crtc *crtc = dsb->crtc;
689 	const struct intel_crtc_state *crtc_state =
690 		intel_pre_commit_crtc_state(state, crtc);
691 	int latency = intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode,
692 					       intel_dsb_arm_exec_time_us());
693 	int start, end;
694 
695 	/*
696 	 * PIPEDSL is reading as 0 when in SRDENT(PSR1) or DEEP_SLEEP(PSR2). On
697 	 * wake-up scanline counting starts from vblank_start - 1. We don't know
698 	 * if wake-up is already ongoing when evasion starts. In worst case
699 	 * PIPEDSL could start reading valid value right after checking the
700 	 * scanline. In this scenario we wouldn't have enough time to write all
701 	 * registers. To tackle this evade scanline 0 as well. As a drawback we
702 	 * have 1 frame delay in flip when waking up.
703 	 */
704 	if (crtc_state->has_psr)
705 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT, 0, 0);
706 
707 	if (pre_commit_is_vrr_active(state, crtc)) {
708 		int vblank_delay = crtc_state->set_context_latency;
709 
710 		end = intel_vrr_vmin_vblank_start(crtc_state);
711 		start = end - vblank_delay - latency;
712 		intel_dsb_wait_scanline_out(state, dsb, start, end);
713 
714 		end = intel_vrr_vmax_vblank_start(crtc_state);
715 		start = end - vblank_delay - latency;
716 		intel_dsb_wait_scanline_out(state, dsb, start, end);
717 	} else {
718 		int vblank_delay = intel_mode_vblank_delay(&crtc_state->hw.adjusted_mode);
719 
720 		end = intel_mode_vblank_start(&crtc_state->hw.adjusted_mode);
721 		start = end - vblank_delay - latency;
722 		intel_dsb_wait_scanline_out(state, dsb, start, end);
723 	}
724 }
725 
726 static void _intel_dsb_chain(struct intel_atomic_state *state,
727 			     struct intel_dsb *dsb,
728 			     struct intel_dsb *chained_dsb,
729 			     u32 ctrl)
730 {
731 	struct intel_display *display = to_intel_display(state->base.dev);
732 	struct intel_crtc *crtc = dsb->crtc;
733 	enum pipe pipe = crtc->pipe;
734 
735 	if (drm_WARN_ON(display->drm, dsb->id == chained_dsb->id))
736 		return;
737 
738 	if (!assert_dsb_tail_is_aligned(chained_dsb))
739 		return;
740 
741 	intel_dsb_reg_write(dsb, DSB_CTRL(pipe, chained_dsb->id),
742 			    ctrl | DSB_ENABLE);
743 
744 	intel_dsb_reg_write(dsb, DSB_CHICKEN(pipe, chained_dsb->id),
745 			    dsb_chicken(state, crtc));
746 
747 	intel_dsb_reg_write(dsb, DSB_INTERRUPT(pipe, chained_dsb->id),
748 			    dsb_error_int_status(display) | DSB_PROG_INT_STATUS |
749 			    dsb_error_int_en(display) | DSB_PROG_INT_EN);
750 
751 	if (ctrl & DSB_WAIT_FOR_VBLANK) {
752 		int dewake_scanline = dsb_dewake_scanline_start(state, crtc);
753 		int hw_dewake_scanline = dsb_scanline_to_hw(state, crtc, dewake_scanline);
754 
755 		intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id),
756 				    DSB_ENABLE_DEWAKE |
757 				    DSB_SCANLINE_FOR_DEWAKE(hw_dewake_scanline));
758 	} else {
759 		intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id), 0);
760 	}
761 
762 	intel_dsb_reg_write(dsb, DSB_HEAD(pipe, chained_dsb->id),
763 			    intel_dsb_head(chained_dsb));
764 
765 	intel_dsb_reg_write(dsb, DSB_TAIL(pipe, chained_dsb->id),
766 			    intel_dsb_tail(chained_dsb));
767 
768 	if (ctrl & DSB_WAIT_FOR_VBLANK) {
769 		/*
770 		 * Keep DEwake alive via the first DSB, in
771 		 * case we're already past dewake_scanline,
772 		 * and thus DSB_ENABLE_DEWAKE on the second
773 		 * DSB won't do its job.
774 		 */
775 		intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(pipe, dsb->id),
776 					   DSB_FORCE_DEWAKE, DSB_FORCE_DEWAKE);
777 
778 		intel_dsb_wait_scanline_out(state, dsb,
779 					    dsb_dewake_scanline_start(state, crtc),
780 					    dsb_dewake_scanline_end(state, crtc));
781 
782 		/*
783 		 * DSB_FORCE_DEWAKE remains active even after DSB is
784 		 * disabled, so make sure to clear it.
785 		 */
786 		intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(crtc->pipe, dsb->id),
787 					   DSB_FORCE_DEWAKE, 0);
788 	}
789 }
790 
791 void intel_dsb_chain(struct intel_atomic_state *state,
792 		     struct intel_dsb *dsb,
793 		     struct intel_dsb *chained_dsb,
794 		     bool wait_for_vblank)
795 {
796 	_intel_dsb_chain(state, dsb, chained_dsb,
797 			 wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0);
798 }
799 
800 void intel_dsb_wait_for_delayed_vblank(struct intel_atomic_state *state,
801 				       struct intel_dsb *dsb)
802 {
803 	struct intel_crtc *crtc = dsb->crtc;
804 	const struct intel_crtc_state *crtc_state =
805 		intel_pre_commit_crtc_state(state, crtc);
806 	const struct drm_display_mode *adjusted_mode =
807 		&crtc_state->hw.adjusted_mode;
808 	int wait_scanlines;
809 
810 	if (pre_commit_is_vrr_active(state, crtc)) {
811 		/*
812 		 * If the push happened before the vmin decision boundary
813 		 * we don't know how far we are from the undelayed vblank.
814 		 * Wait until we're past the vmin safe window, at which
815 		 * point we're SCL lines away from the delayed vblank.
816 		 *
817 		 * If the push happened after the vmin decision boundary
818 		 * the hardware itself guarantees that we're SCL lines
819 		 * away from the delayed vblank, and we won't be inside
820 		 * the vmin safe window so this extra wait does nothing.
821 		 */
822 		intel_dsb_wait_scanline_out(state, dsb,
823 					    intel_vrr_safe_window_start(crtc_state),
824 					    intel_vrr_vmin_safe_window_end(crtc_state));
825 		/*
826 		 * When the push is sent during vblank it will trigger
827 		 * on the next scanline, hence we have up to one extra
828 		 * scanline until the delayed vblank occurs after
829 		 * TRANS_PUSH has been written.
830 		 */
831 		wait_scanlines = crtc_state->set_context_latency + 1;
832 	} else {
833 		wait_scanlines = intel_mode_vblank_delay(adjusted_mode);
834 	}
835 
836 	intel_dsb_wait_usec(dsb, intel_scanlines_to_usecs(adjusted_mode, wait_scanlines));
837 }
838 
839 /**
840  * intel_dsb_commit() - Trigger workload execution of DSB.
841  * @dsb: DSB context
842  *
843  * This function is used to do actual write to hardware using DSB.
844  */
845 void intel_dsb_commit(struct intel_dsb *dsb)
846 {
847 	struct intel_crtc *crtc = dsb->crtc;
848 	struct intel_display *display = to_intel_display(crtc->base.dev);
849 	enum pipe pipe = crtc->pipe;
850 
851 	if (!assert_dsb_tail_is_aligned(dsb))
852 		return;
853 
854 	if (is_dsb_busy(display, pipe, dsb->id)) {
855 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d is busy\n",
856 			crtc->base.base.id, crtc->base.name, dsb->id);
857 		return;
858 	}
859 
860 	intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id),
861 			  DSB_ENABLE);
862 
863 	intel_de_write_fw(display, DSB_CHICKEN(pipe, dsb->id),
864 			  dsb->chicken);
865 
866 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id),
867 			  dsb_error_int_status(display) | DSB_PROG_INT_STATUS |
868 			  dsb_error_int_en(display) | DSB_PROG_INT_EN);
869 
870 	intel_de_write_fw(display, DSB_PMCTRL(pipe, dsb->id), 0);
871 
872 	intel_de_write_fw(display, DSB_HEAD(pipe, dsb->id),
873 			  intel_dsb_head(dsb));
874 
875 	intel_de_write_fw(display, DSB_TAIL(pipe, dsb->id),
876 			  intel_dsb_tail(dsb));
877 }
878 
879 void intel_dsb_wait(struct intel_dsb *dsb)
880 {
881 	struct intel_crtc *crtc = dsb->crtc;
882 	struct intel_display *display = to_intel_display(crtc->base.dev);
883 	enum pipe pipe = crtc->pipe;
884 	bool is_busy;
885 	int ret;
886 
887 	ret = poll_timeout_us(is_busy = is_dsb_busy(display, pipe, dsb->id),
888 			      !is_busy,
889 			      100, 1000, false);
890 	if (ret) {
891 		u32 offset = intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf);
892 
893 		intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id),
894 				  DSB_ENABLE | DSB_HALT);
895 
896 		drm_err(display->drm,
897 			"[CRTC:%d:%s] DSB %d timed out waiting for idle (current head=0x%x, head=0x%x, tail=0x%x)\n",
898 			crtc->base.base.id, crtc->base.name, dsb->id,
899 			intel_de_read_fw(display, DSB_CURRENT_HEAD(pipe, dsb->id)) - offset,
900 			intel_de_read_fw(display, DSB_HEAD(pipe, dsb->id)) - offset,
901 			intel_de_read_fw(display, DSB_TAIL(pipe, dsb->id)) - offset);
902 
903 		intel_dsb_dump(dsb);
904 	}
905 
906 	/* Attempt to reset it */
907 	dsb->free_pos = 0;
908 	dsb->ins_start_offset = 0;
909 	dsb->ins[0] = 0;
910 	dsb->ins[1] = 0;
911 
912 	intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id), 0);
913 
914 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id),
915 			  dsb_error_int_status(display) | DSB_PROG_INT_STATUS);
916 }
917 
918 /**
919  * intel_dsb_prepare() - Allocate, pin and map the DSB command buffer.
920  * @state: the atomic state
921  * @crtc: the CRTC
922  * @dsb_id: the DSB engine to use
923  * @max_cmds: number of commands we need to fit into command buffer
924  *
925  * This function prepare the command buffer which is used to store dsb
926  * instructions with data.
927  *
928  * Returns:
929  * DSB context, NULL on failure
930  */
931 struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state,
932 				    struct intel_crtc *crtc,
933 				    enum intel_dsb_id dsb_id,
934 				    unsigned int max_cmds)
935 {
936 	struct intel_display *display = to_intel_display(state);
937 	struct ref_tracker *wakeref;
938 	struct intel_dsb *dsb;
939 	unsigned int size;
940 
941 	if (!HAS_DSB(display))
942 		return NULL;
943 
944 	if (!display->params.enable_dsb)
945 		return NULL;
946 
947 	dsb = kzalloc(sizeof(*dsb), GFP_KERNEL);
948 	if (!dsb)
949 		goto out;
950 
951 	wakeref = intel_display_rpm_get(display);
952 
953 	/* ~1 qword per instruction, full cachelines */
954 	size = ALIGN(max_cmds * 8, CACHELINE_BYTES);
955 
956 	if (!intel_dsb_buffer_create(crtc, &dsb->dsb_buf, size))
957 		goto out_put_rpm;
958 
959 	intel_display_rpm_put(display, wakeref);
960 
961 	dsb->id = dsb_id;
962 	dsb->crtc = crtc;
963 	dsb->size = size / 4; /* in dwords */
964 
965 	dsb->chicken = dsb_chicken(state, crtc);
966 	dsb->hw_dewake_scanline =
967 		dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline_start(state, crtc));
968 
969 	return dsb;
970 
971 out_put_rpm:
972 	intel_display_rpm_put(display, wakeref);
973 	kfree(dsb);
974 out:
975 	drm_info_once(display->drm,
976 		      "[CRTC:%d:%s] DSB %d queue setup failed, will fallback to MMIO for display HW programming\n",
977 		      crtc->base.base.id, crtc->base.name, dsb_id);
978 
979 	return NULL;
980 }
981 
982 /**
983  * intel_dsb_cleanup() - To cleanup DSB context.
984  * @dsb: DSB context
985  *
986  * This function cleanup the DSB context by unpinning and releasing
987  * the VMA object associated with it.
988  */
989 void intel_dsb_cleanup(struct intel_dsb *dsb)
990 {
991 	intel_dsb_buffer_cleanup(&dsb->dsb_buf);
992 	kfree(dsb);
993 }
994 
995 void intel_dsb_irq_handler(struct intel_display *display,
996 			   enum pipe pipe, enum intel_dsb_id dsb_id)
997 {
998 	struct intel_crtc *crtc = intel_crtc_for_pipe(display, pipe);
999 	u32 tmp, errors;
1000 
1001 	tmp = intel_de_read_fw(display, DSB_INTERRUPT(pipe, dsb_id));
1002 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb_id), tmp);
1003 
1004 	if (tmp & DSB_PROG_INT_STATUS) {
1005 		spin_lock(&display->drm->event_lock);
1006 
1007 		if (crtc->dsb_event) {
1008 			/*
1009 			 * Update vblank counter/timestamp in case it
1010 			 * hasn't been done yet for this frame.
1011 			 */
1012 			drm_crtc_accurate_vblank_count(&crtc->base);
1013 
1014 			drm_crtc_send_vblank_event(&crtc->base, crtc->dsb_event);
1015 			crtc->dsb_event = NULL;
1016 		}
1017 
1018 		spin_unlock(&display->drm->event_lock);
1019 	}
1020 
1021 	errors = tmp & dsb_error_int_status(display);
1022 	if (errors & DSB_ATS_FAULT_INT_STATUS)
1023 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d ATS fault\n",
1024 			crtc->base.base.id, crtc->base.name, dsb_id);
1025 	if (errors & DSB_GTT_FAULT_INT_STATUS)
1026 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d GTT fault\n",
1027 			crtc->base.base.id, crtc->base.name, dsb_id);
1028 	if (errors & DSB_RSPTIMEOUT_INT_STATUS)
1029 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d response timeout\n",
1030 			crtc->base.base.id, crtc->base.name, dsb_id);
1031 	if (errors & DSB_POLL_ERR_INT_STATUS)
1032 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d poll error\n",
1033 			crtc->base.base.id, crtc->base.name, dsb_id);
1034 	if (errors & DSB_GOSUB_INT_STATUS)
1035 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d GOSUB programming error\n",
1036 			crtc->base.base.id, crtc->base.name, dsb_id);
1037 }
1038