xref: /linux/drivers/gpu/drm/i915/display/intel_dsb.c (revision f09812b85fa6f41058bcc46e70ac406bf9b0493a)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  *
5  */
6 
7 #include <linux/iopoll.h>
8 
9 #include <drm/drm_print.h>
10 #include <drm/drm_vblank.h>
11 #include <drm/intel/display_parent_interface.h>
12 
13 #include "intel_crtc.h"
14 #include "intel_de.h"
15 #include "intel_display_regs.h"
16 #include "intel_display_rpm.h"
17 #include "intel_display_types.h"
18 #include "intel_dsb.h"
19 #include "intel_dsb_regs.h"
20 #include "intel_psr.h"
21 #include "intel_vblank.h"
22 #include "intel_vrr.h"
23 #include "skl_watermark.h"
24 
25 #define CACHELINE_BYTES 64
26 
27 struct intel_dsb {
28 	enum intel_dsb_id id;
29 
30 	struct intel_dsb_buffer *dsb_buf;
31 	struct intel_crtc *crtc;
32 
33 	/*
34 	 * maximum number of dwords the buffer will hold.
35 	 */
36 	unsigned int size;
37 
38 	/*
39 	 * free_pos will point the first free dword and
40 	 * help in calculating tail of command buffer.
41 	 */
42 	unsigned int free_pos;
43 
44 	/*
45 	 * Previously emitted DSB instruction. Used to
46 	 * identify/adjust the instruction for indexed
47 	 * register writes.
48 	 */
49 	u32 ins[2];
50 
51 	/*
52 	 * Start of the previously emitted DSB instruction.
53 	 * Used to adjust the instruction for indexed
54 	 * register writes.
55 	 */
56 	unsigned int ins_start_offset;
57 
58 	u32 chicken;
59 	int hw_dewake_scanline;
60 };
61 
62 /**
63  * DOC: DSB
64  *
65  * A DSB (Display State Buffer) is a queue of MMIO instructions in the memory
66  * which can be offloaded to DSB HW in Display Controller. DSB HW is a DMA
67  * engine that can be programmed to download the DSB from memory.
68  * It allows driver to batch submit display HW programming. This helps to
69  * reduce loading time and CPU activity, thereby making the context switch
70  * faster. DSB Support added from Gen12 Intel graphics based platform.
71  *
72  * DSB's can access only the pipe, plane, and transcoder Data Island Packet
73  * registers.
74  *
75  * DSB HW can support only register writes (both indexed and direct MMIO
76  * writes). There are no registers reads possible with DSB HW engine.
77  */
78 
79 /*
80  * DSB buffer parent interface calls are here instead of intel_parent.[ch]
81  * because they're not used outside of intel_dsb.c.
82  */
83 static u32 dsb_buffer_ggtt_offset(struct intel_dsb *dsb)
84 {
85 	struct intel_display *display = to_intel_display(dsb->crtc);
86 
87 	return display->parent->dsb->ggtt_offset(dsb->dsb_buf);
88 }
89 
90 static void dsb_buffer_write(struct intel_dsb *dsb, u32 idx, u32 val)
91 {
92 	struct intel_display *display = to_intel_display(dsb->crtc);
93 
94 	display->parent->dsb->write(dsb->dsb_buf, idx, val);
95 }
96 
97 static u32 dsb_buffer_read(struct intel_dsb *dsb, u32 idx)
98 {
99 	struct intel_display *display = to_intel_display(dsb->crtc);
100 
101 	return display->parent->dsb->read(dsb->dsb_buf, idx);
102 }
103 
104 static void dsb_buffer_fill(struct intel_dsb *dsb, u32 idx, u32 val, size_t size)
105 {
106 	struct intel_display *display = to_intel_display(dsb->crtc);
107 
108 	display->parent->dsb->fill(dsb->dsb_buf, idx, val, size);
109 }
110 
111 static struct intel_dsb_buffer *dsb_buffer_create(struct intel_display *display, size_t size)
112 {
113 	return display->parent->dsb->create(display->drm, size);
114 }
115 
116 static void dsb_buffer_cleanup(struct intel_dsb *dsb)
117 {
118 	struct intel_display *display = to_intel_display(dsb->crtc);
119 
120 	display->parent->dsb->cleanup(dsb->dsb_buf);
121 }
122 
123 static void dsb_buffer_flush_map(struct intel_dsb *dsb)
124 {
125 	struct intel_display *display = to_intel_display(dsb->crtc);
126 
127 	display->parent->dsb->flush_map(dsb->dsb_buf);
128 }
129 
130 /* DSB opcodes. */
131 #define DSB_OPCODE_SHIFT		24
132 #define DSB_OPCODE_NOOP			0x0
133 #define DSB_OPCODE_MMIO_WRITE		0x1
134 #define   DSB_BYTE_EN			0xf
135 #define   DSB_BYTE_EN_SHIFT		20
136 #define   DSB_REG_VALUE_MASK		0xfffff
137 #define DSB_OPCODE_WAIT_USEC		0x2
138 #define DSB_OPCODE_WAIT_SCANLINE	0x3
139 #define DSB_OPCODE_WAIT_VBLANKS		0x4
140 #define DSB_OPCODE_WAIT_DSL_IN		0x5
141 #define DSB_OPCODE_WAIT_DSL_OUT		0x6
142 #define   DSB_SCANLINE_UPPER_SHIFT	20
143 #define   DSB_SCANLINE_LOWER_SHIFT	0
144 #define DSB_OPCODE_INTERRUPT		0x7
145 #define DSB_OPCODE_INDEXED_WRITE	0x9
146 /* see DSB_REG_VALUE_MASK */
147 #define DSB_OPCODE_POLL			0xA
148 /* see DSB_REG_VALUE_MASK */
149 #define DSB_OPCODE_GOSUB		0xC /* ptl+ */
150 #define   DSB_GOSUB_HEAD_SHIFT		26
151 #define   DSB_GOSUB_TAIL_SHIFT		0
152 #define   DSB_GOSUB_CONVERT_ADDR(x)	((x) >> 6)
153 
154 static bool pre_commit_is_vrr_active(struct intel_atomic_state *state,
155 				     struct intel_crtc *crtc)
156 {
157 	const struct intel_crtc_state *old_crtc_state =
158 		intel_atomic_get_old_crtc_state(state, crtc);
159 	const struct intel_crtc_state *new_crtc_state =
160 		intel_atomic_get_new_crtc_state(state, crtc);
161 
162 	/* VRR will be enabled afterwards, if necessary */
163 	if (intel_crtc_needs_modeset(new_crtc_state))
164 		return false;
165 
166 	/* VRR will have been disabled during intel_pre_plane_update() */
167 	return old_crtc_state->vrr.enable && !intel_crtc_vrr_disabling(state, crtc);
168 }
169 
170 static int dsb_vtotal(struct intel_atomic_state *state,
171 		      struct intel_crtc *crtc)
172 {
173 	const struct intel_crtc_state *crtc_state =
174 		intel_pre_commit_crtc_state(state, crtc);
175 
176 	if (pre_commit_is_vrr_active(state, crtc))
177 		return intel_vrr_vmax_vtotal(crtc_state);
178 	else
179 		return intel_mode_vtotal(&crtc_state->hw.adjusted_mode);
180 }
181 
182 static int dsb_dewake_scanline_start(struct intel_atomic_state *state,
183 				     struct intel_crtc *crtc)
184 {
185 	struct intel_display *display = to_intel_display(state);
186 	const struct intel_crtc_state *crtc_state =
187 		intel_pre_commit_crtc_state(state, crtc);
188 	unsigned int latency = skl_watermark_max_latency(display, 0);
189 
190 	return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode) -
191 		intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, latency);
192 }
193 
194 static int dsb_dewake_scanline_end(struct intel_atomic_state *state,
195 				   struct intel_crtc *crtc)
196 {
197 	const struct intel_crtc_state *crtc_state =
198 		intel_pre_commit_crtc_state(state, crtc);
199 
200 	return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode);
201 }
202 
203 static int dsb_scanline_to_hw(struct intel_atomic_state *state,
204 			      struct intel_crtc *crtc, int scanline)
205 {
206 	const struct intel_crtc_state *crtc_state =
207 		intel_pre_commit_crtc_state(state, crtc);
208 	int vtotal = dsb_vtotal(state, crtc);
209 
210 	return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal;
211 }
212 
213 /*
214  * Bspec suggests that we should always set DSB_SKIP_WAITS_EN. We have approach
215  * different from what is explained in Bspec on how flip is considered being
216  * complete. We are waiting for vblank in DSB and generate interrupt when it
217  * happens and this interrupt is considered as indication of completion -> we
218  * definitely do not want to skip vblank wait. We also have concern what comes
219  * to skipping vblank evasion. I.e. arming registers are latched before we have
220  * managed writing them. Due to these reasons we are not setting
221  * DSB_SKIP_WAITS_EN except when using TRANS_PUSH mechanism to trigger
222  * "frame change" event.
223  */
224 static u32 dsb_chicken(struct intel_atomic_state *state,
225 		       struct intel_crtc *crtc)
226 {
227 	const struct intel_crtc_state *new_crtc_state =
228 		intel_atomic_get_new_crtc_state(state, crtc);
229 	u32 chicken = intel_psr_use_trans_push(new_crtc_state) ?
230 		DSB_SKIP_WAITS_EN : 0;
231 
232 	if (pre_commit_is_vrr_active(state, crtc))
233 		chicken |= DSB_CTRL_WAIT_SAFE_WINDOW |
234 			DSB_CTRL_NO_WAIT_VBLANK |
235 			DSB_INST_WAIT_SAFE_WINDOW |
236 			DSB_INST_NO_WAIT_VBLANK;
237 
238 	return chicken;
239 }
240 
241 static bool assert_dsb_has_room(struct intel_dsb *dsb)
242 {
243 	struct intel_crtc *crtc = dsb->crtc;
244 	struct intel_display *display = to_intel_display(crtc->base.dev);
245 
246 	/* each instruction is 2 dwords */
247 	return !drm_WARN(display->drm, dsb->free_pos > dsb->size - 2,
248 			 "[CRTC:%d:%s] DSB %d buffer overflow\n",
249 			 crtc->base.base.id, crtc->base.name, dsb->id);
250 }
251 
252 static bool assert_dsb_tail_is_aligned(struct intel_dsb *dsb)
253 {
254 	struct intel_crtc *crtc = dsb->crtc;
255 	struct intel_display *display = to_intel_display(crtc->base.dev);
256 
257 	return !drm_WARN_ON(display->drm,
258 			    !IS_ALIGNED(dsb->free_pos * 4, CACHELINE_BYTES));
259 }
260 
261 static void intel_dsb_dump(struct intel_dsb *dsb)
262 {
263 	struct intel_crtc *crtc = dsb->crtc;
264 	struct intel_display *display = to_intel_display(crtc->base.dev);
265 	int i;
266 
267 	drm_dbg_kms(display->drm, "[CRTC:%d:%s] DSB %d commands {\n",
268 		    crtc->base.base.id, crtc->base.name, dsb->id);
269 	for (i = 0; i < ALIGN(dsb->free_pos, 64 / 4); i += 4)
270 		drm_dbg_kms(display->drm,
271 			    " 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i * 4,
272 			    dsb_buffer_read(dsb, i),
273 			    dsb_buffer_read(dsb, i + 1),
274 			    dsb_buffer_read(dsb, i + 2),
275 			    dsb_buffer_read(dsb, i + 3));
276 	drm_dbg_kms(display->drm, "}\n");
277 }
278 
279 static bool is_dsb_busy(struct intel_display *display, enum pipe pipe,
280 			enum intel_dsb_id dsb_id)
281 {
282 	return intel_de_read_fw(display, DSB_CTRL(pipe, dsb_id)) & DSB_STATUS_BUSY;
283 }
284 
285 unsigned int intel_dsb_size(struct intel_dsb *dsb)
286 {
287 	return dsb->free_pos * 4;
288 }
289 
290 unsigned int intel_dsb_head(struct intel_dsb *dsb)
291 {
292 	return dsb_buffer_ggtt_offset(dsb);
293 }
294 
295 static unsigned int intel_dsb_tail(struct intel_dsb *dsb)
296 {
297 	return dsb_buffer_ggtt_offset(dsb) + intel_dsb_size(dsb);
298 }
299 
300 static void intel_dsb_ins_align(struct intel_dsb *dsb)
301 {
302 	/*
303 	 * Every instruction should be 8 byte aligned.
304 	 *
305 	 * The only way to get unaligned free_pos is via
306 	 * intel_dsb_reg_write_indexed() which already
307 	 * makes sure the next dword is zeroed, so no need
308 	 * to clear it here.
309 	 */
310 	dsb->free_pos = ALIGN(dsb->free_pos, 2);
311 }
312 
313 static void intel_dsb_emit(struct intel_dsb *dsb, u32 ldw, u32 udw)
314 {
315 	if (!assert_dsb_has_room(dsb))
316 		return;
317 
318 	intel_dsb_ins_align(dsb);
319 
320 	dsb->ins_start_offset = dsb->free_pos;
321 	dsb->ins[0] = ldw;
322 	dsb->ins[1] = udw;
323 
324 	dsb_buffer_write(dsb, dsb->free_pos++, dsb->ins[0]);
325 	dsb_buffer_write(dsb, dsb->free_pos++, dsb->ins[1]);
326 }
327 
328 static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb,
329 					u32 opcode, i915_reg_t reg)
330 {
331 	u32 prev_opcode, prev_reg;
332 
333 	/*
334 	 * Nothing emitted yet? Must check before looking
335 	 * at the actual data since i915_gem_object_create_internal()
336 	 * does *not* give you zeroed memory!
337 	 */
338 	if (dsb->free_pos == 0)
339 		return false;
340 
341 	prev_opcode = dsb->ins[1] & ~DSB_REG_VALUE_MASK;
342 	prev_reg =  dsb->ins[1] & DSB_REG_VALUE_MASK;
343 
344 	return prev_opcode == opcode && prev_reg == i915_mmio_reg_offset(reg);
345 }
346 
347 static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_t reg)
348 {
349 	return intel_dsb_prev_ins_is_write(dsb,
350 					   DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT,
351 					   reg);
352 }
353 
354 /**
355  * intel_dsb_reg_write_indexed() - Emit indexed register write to the DSB context
356  * @dsb: DSB context
357  * @reg: register address.
358  * @val: value.
359  *
360  * This function is used for writing register-value pair in command
361  * buffer of DSB.
362  *
363  * Note that indexed writes are slower than normal MMIO writes
364  * for a small number (less than 5 or so) of writes to the same
365  * register.
366  */
367 void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
368 				 i915_reg_t reg, u32 val)
369 {
370 	/*
371 	 * For example the buffer will look like below for 3 dwords for auto
372 	 * increment register:
373 	 * +--------------------------------------------------------+
374 	 * | size = 3 | offset &| value1 | value2 | value3 | zero   |
375 	 * |          | opcode  |        |        |        |        |
376 	 * +--------------------------------------------------------+
377 	 * +          +         +        +        +        +        +
378 	 * 0          4         8        12       16       20       24
379 	 * Byte
380 	 *
381 	 * As every instruction is 8 byte aligned the index of dsb instruction
382 	 * will start always from even number while dealing with u32 array. If
383 	 * we are writing odd no of dwords, Zeros will be added in the end for
384 	 * padding.
385 	 */
386 	if (!intel_dsb_prev_ins_is_indexed_write(dsb, reg))
387 		intel_dsb_emit(dsb, 0, /* count */
388 			       (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) |
389 			       i915_mmio_reg_offset(reg));
390 
391 	if (!assert_dsb_has_room(dsb))
392 		return;
393 
394 	/* Update the count */
395 	dsb->ins[0]++;
396 	dsb_buffer_write(dsb, dsb->ins_start_offset + 0, dsb->ins[0]);
397 
398 	dsb_buffer_write(dsb, dsb->free_pos++, val);
399 	/* if number of data words is odd, then the last dword should be 0.*/
400 	if (dsb->free_pos & 0x1)
401 		dsb_buffer_write(dsb, dsb->free_pos, 0);
402 }
403 
404 void intel_dsb_reg_write(struct intel_dsb *dsb,
405 			 i915_reg_t reg, u32 val)
406 {
407 	intel_dsb_emit(dsb, val,
408 		       (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
409 		       (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
410 		       i915_mmio_reg_offset(reg));
411 }
412 
413 static u32 intel_dsb_mask_to_byte_en(u32 mask)
414 {
415 	return (!!(mask & 0xff000000) << 3 |
416 		!!(mask & 0x00ff0000) << 2 |
417 		!!(mask & 0x0000ff00) << 1 |
418 		!!(mask & 0x000000ff) << 0);
419 }
420 
421 /* Note: mask implemented via byte enables! */
422 void intel_dsb_reg_write_masked(struct intel_dsb *dsb,
423 				i915_reg_t reg, u32 mask, u32 val)
424 {
425 	intel_dsb_emit(dsb, val,
426 		       (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
427 		       (intel_dsb_mask_to_byte_en(mask) << DSB_BYTE_EN_SHIFT) |
428 		       i915_mmio_reg_offset(reg));
429 }
430 
431 void intel_dsb_noop(struct intel_dsb *dsb, int count)
432 {
433 	int i;
434 
435 	for (i = 0; i < count; i++)
436 		intel_dsb_emit(dsb, 0,
437 			       DSB_OPCODE_NOOP << DSB_OPCODE_SHIFT);
438 }
439 
440 void intel_dsb_nonpost_start(struct intel_dsb *dsb)
441 {
442 	struct intel_crtc *crtc = dsb->crtc;
443 	enum pipe pipe = crtc->pipe;
444 
445 	intel_dsb_reg_write_masked(dsb, DSB_CTRL(pipe, dsb->id),
446 				   DSB_NON_POSTED, DSB_NON_POSTED);
447 	intel_dsb_noop(dsb, 4);
448 }
449 
450 void intel_dsb_nonpost_end(struct intel_dsb *dsb)
451 {
452 	struct intel_crtc *crtc = dsb->crtc;
453 	enum pipe pipe = crtc->pipe;
454 
455 	intel_dsb_reg_write_masked(dsb, DSB_CTRL(pipe, dsb->id),
456 				   DSB_NON_POSTED, 0);
457 	intel_dsb_noop(dsb, 4);
458 }
459 
460 void intel_dsb_interrupt(struct intel_dsb *dsb)
461 {
462 	intel_dsb_emit(dsb, 0,
463 		       DSB_OPCODE_INTERRUPT << DSB_OPCODE_SHIFT);
464 }
465 
466 void intel_dsb_wait_usec(struct intel_dsb *dsb, int count)
467 {
468 	/* +1 to make sure we never wait less time than asked for */
469 	intel_dsb_emit(dsb, count + 1,
470 		       DSB_OPCODE_WAIT_USEC << DSB_OPCODE_SHIFT);
471 }
472 
473 void intel_dsb_wait_vblanks(struct intel_dsb *dsb, int count)
474 {
475 	intel_dsb_emit(dsb, count,
476 		       DSB_OPCODE_WAIT_VBLANKS << DSB_OPCODE_SHIFT);
477 }
478 
479 static void intel_dsb_emit_wait_dsl(struct intel_dsb *dsb,
480 				    u32 opcode, int lower, int upper)
481 {
482 	u64 window = ((u64)upper << DSB_SCANLINE_UPPER_SHIFT) |
483 		((u64)lower << DSB_SCANLINE_LOWER_SHIFT);
484 
485 	intel_dsb_emit(dsb, lower_32_bits(window),
486 		       (opcode << DSB_OPCODE_SHIFT) |
487 		       upper_32_bits(window));
488 }
489 
490 static void intel_dsb_wait_dsl(struct intel_atomic_state *state,
491 			       struct intel_dsb *dsb,
492 			       int lower_in, int upper_in,
493 			       int lower_out, int upper_out)
494 {
495 	struct intel_crtc *crtc = dsb->crtc;
496 
497 	lower_in = dsb_scanline_to_hw(state, crtc, lower_in);
498 	upper_in = dsb_scanline_to_hw(state, crtc, upper_in);
499 
500 	lower_out = dsb_scanline_to_hw(state, crtc, lower_out);
501 	upper_out = dsb_scanline_to_hw(state, crtc, upper_out);
502 
503 	if (upper_in >= lower_in)
504 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_IN,
505 					lower_in, upper_in);
506 	else if (upper_out >= lower_out)
507 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT,
508 					lower_out, upper_out);
509 	else
510 		drm_WARN_ON(crtc->base.dev, 1); /* assert_dsl_ok() should have caught it already */
511 }
512 
513 static void assert_dsl_ok(struct intel_atomic_state *state,
514 			  struct intel_dsb *dsb,
515 			  int start, int end)
516 {
517 	struct intel_crtc *crtc = dsb->crtc;
518 	int vtotal = dsb_vtotal(state, crtc);
519 
520 	/*
521 	 * Waiting for the entire frame doesn't make sense,
522 	 * (IN==don't wait, OUT=wait forever).
523 	 */
524 	drm_WARN(crtc->base.dev, (end - start + vtotal) % vtotal == vtotal - 1,
525 		 "[CRTC:%d:%s] DSB %d bad scanline window wait: %d-%d (vt=%d)\n",
526 		 crtc->base.base.id, crtc->base.name, dsb->id,
527 		 start, end, vtotal);
528 }
529 
530 void intel_dsb_wait_scanline_in(struct intel_atomic_state *state,
531 				struct intel_dsb *dsb,
532 				int start, int end)
533 {
534 	assert_dsl_ok(state, dsb, start, end);
535 
536 	intel_dsb_wait_dsl(state, dsb,
537 			   start, end,
538 			   end + 1, start - 1);
539 }
540 
541 void intel_dsb_wait_scanline_out(struct intel_atomic_state *state,
542 				 struct intel_dsb *dsb,
543 				 int start, int end)
544 {
545 	assert_dsl_ok(state, dsb, start, end);
546 
547 	intel_dsb_wait_dsl(state, dsb,
548 			   end + 1, start - 1,
549 			   start, end);
550 }
551 
552 void intel_dsb_poll(struct intel_dsb *dsb,
553 		    i915_reg_t reg, u32 mask, u32 val,
554 		    int wait_us, int count)
555 {
556 	struct intel_crtc *crtc = dsb->crtc;
557 	enum pipe pipe = crtc->pipe;
558 
559 	intel_dsb_reg_write(dsb, DSB_POLLMASK(pipe, dsb->id), mask);
560 	intel_dsb_reg_write(dsb, DSB_POLLFUNC(pipe, dsb->id),
561 			    DSB_POLL_ENABLE |
562 			    DSB_POLL_WAIT(wait_us) | DSB_POLL_COUNT(count));
563 
564 	intel_dsb_noop(dsb, 5);
565 
566 	intel_dsb_emit(dsb, val,
567 		       (DSB_OPCODE_POLL << DSB_OPCODE_SHIFT) |
568 		       i915_mmio_reg_offset(reg));
569 }
570 
571 static void intel_dsb_align_tail(struct intel_dsb *dsb)
572 {
573 	u32 aligned_tail, tail;
574 
575 	intel_dsb_ins_align(dsb);
576 
577 	tail = dsb->free_pos * 4;
578 	aligned_tail = ALIGN(tail, CACHELINE_BYTES);
579 
580 	if (aligned_tail > tail)
581 		dsb_buffer_fill(dsb, dsb->free_pos, 0, aligned_tail - tail);
582 
583 	dsb->free_pos = aligned_tail / 4;
584 }
585 
586 static void intel_dsb_gosub_align(struct intel_dsb *dsb)
587 {
588 	u32 aligned_tail, tail;
589 
590 	intel_dsb_ins_align(dsb);
591 
592 	tail = dsb->free_pos * 4;
593 	aligned_tail = ALIGN(tail, CACHELINE_BYTES);
594 
595 	/*
596 	 * Wa_16024917128
597 	 * "Ensure GOSUB is not placed in cacheline QW slot 6 or 7 (numbered 0-7)"
598 	 */
599 	if (aligned_tail - tail <= 2 * 8)
600 		dsb_buffer_fill(dsb, dsb->free_pos, 0, aligned_tail - tail);
601 
602 	dsb->free_pos = aligned_tail / 4;
603 }
604 
605 void intel_dsb_gosub(struct intel_dsb *dsb,
606 		     struct intel_dsb *sub_dsb)
607 {
608 	struct intel_crtc *crtc = dsb->crtc;
609 	struct intel_display *display = to_intel_display(crtc->base.dev);
610 	unsigned int head, tail;
611 	u64 head_tail;
612 
613 	if (drm_WARN_ON(display->drm, dsb->id != sub_dsb->id))
614 		return;
615 
616 	if (!assert_dsb_tail_is_aligned(sub_dsb))
617 		return;
618 
619 	intel_dsb_gosub_align(dsb);
620 
621 	head = intel_dsb_head(sub_dsb);
622 	tail = intel_dsb_tail(sub_dsb);
623 
624 	/*
625 	 * The GOSUB instruction has the following memory layout.
626 	 *
627 	 * +------------------------------------------------------------+
628 	 * |  Opcode  |   Rsvd    |      Head Ptr     |     Tail Ptr    |
629 	 * |   0x0c   |           |                   |                 |
630 	 * +------------------------------------------------------------+
631 	 * |<- 8bits->|<- 4bits ->|<--   26bits    -->|<--  26bits   -->|
632 	 *
633 	 * We have only 26 bits each to represent the head and  tail
634 	 * pointers even though the addresses itself are of 32 bit. However, this
635 	 * is not a problem because the addresses are 64 bit aligned and therefore
636 	 * the last 6 bits are always Zero's. Therefore, we right shift the address
637 	 * by 6 before embedding it into the GOSUB instruction.
638 	 */
639 
640 	head_tail = ((u64)(DSB_GOSUB_CONVERT_ADDR(head)) << DSB_GOSUB_HEAD_SHIFT) |
641 		((u64)(DSB_GOSUB_CONVERT_ADDR(tail)) << DSB_GOSUB_TAIL_SHIFT);
642 
643 	intel_dsb_emit(dsb, lower_32_bits(head_tail),
644 		       (DSB_OPCODE_GOSUB << DSB_OPCODE_SHIFT) |
645 		       upper_32_bits(head_tail));
646 
647 	/*
648 	 * "NOTE: the instructions within the cacheline
649 	 *  FOLLOWING the GOSUB instruction must be NOPs."
650 	 */
651 	intel_dsb_align_tail(dsb);
652 }
653 
654 void intel_dsb_gosub_finish(struct intel_dsb *dsb)
655 {
656 	intel_dsb_align_tail(dsb);
657 
658 	/*
659 	 * Wa_16024917128
660 	 * "Ensure that all subroutines called by GOSUB end with a cacheline of NOPs"
661 	 */
662 	intel_dsb_noop(dsb, 8);
663 
664 	dsb_buffer_flush_map(dsb);
665 }
666 
667 void intel_dsb_finish(struct intel_dsb *dsb)
668 {
669 	intel_dsb_align_tail(dsb);
670 
671 	dsb_buffer_flush_map(dsb);
672 }
673 
674 static u32 dsb_error_int_status(struct intel_display *display)
675 {
676 	u32 errors;
677 
678 	errors = DSB_GTT_FAULT_INT_STATUS |
679 		DSB_RSPTIMEOUT_INT_STATUS |
680 		DSB_POLL_ERR_INT_STATUS;
681 
682 	/*
683 	 * All the non-existing status bits operate as
684 	 * normal r/w bits, so any attempt to clear them
685 	 * will just end up setting them. Never do that so
686 	 * we won't mistake them for actual error interrupts.
687 	 */
688 	if (DISPLAY_VER(display) >= 14)
689 		errors |= DSB_ATS_FAULT_INT_STATUS;
690 
691 	if (DISPLAY_VER(display) >= 30)
692 		errors |= DSB_GOSUB_INT_STATUS;
693 
694 	return errors;
695 }
696 
697 static u32 dsb_error_int_en(struct intel_display *display)
698 {
699 	u32 errors;
700 
701 	errors = DSB_GTT_FAULT_INT_EN |
702 		DSB_RSPTIMEOUT_INT_EN |
703 		DSB_POLL_ERR_INT_EN;
704 
705 	if (DISPLAY_VER(display) >= 14)
706 		errors |= DSB_ATS_FAULT_INT_EN;
707 
708 	/*
709 	 * Wa_16024917128
710 	 * "Disable nested GOSUB interrupt (DSB_INTERRUPT bit 21)"
711 	 */
712 	if (0 && DISPLAY_VER(display) >= 30)
713 		errors |= DSB_GOSUB_INT_EN;
714 
715 	return errors;
716 }
717 
718 /*
719  * FIXME calibrate these sensibly, ideally compute based on
720  * the number of regisetrs to be written. But that requires
721  * measuring the actual DSB execution speed on each platform
722  * (and the speed also depends on CDCLK and memory clock)...
723  */
724 static int intel_dsb_noarm_exec_time_us(void)
725 {
726 	return 80;
727 }
728 
729 static int intel_dsb_arm_exec_time_us(void)
730 {
731 	return 20;
732 }
733 
734 int intel_dsb_exec_time_us(void)
735 {
736 	return intel_dsb_noarm_exec_time_us() +
737 		intel_dsb_arm_exec_time_us();
738 }
739 
740 void intel_dsb_vblank_evade(struct intel_atomic_state *state,
741 			    struct intel_dsb *dsb)
742 {
743 	struct intel_crtc *crtc = dsb->crtc;
744 	const struct intel_crtc_state *crtc_state =
745 		intel_pre_commit_crtc_state(state, crtc);
746 	int latency = intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode,
747 					       intel_dsb_arm_exec_time_us());
748 	int start, end;
749 
750 	/*
751 	 * PIPEDSL is reading as 0 when in SRDENT(PSR1) or DEEP_SLEEP(PSR2). On
752 	 * wake-up scanline counting starts from vblank_start - 1. We don't know
753 	 * if wake-up is already ongoing when evasion starts. In worst case
754 	 * PIPEDSL could start reading valid value right after checking the
755 	 * scanline. In this scenario we wouldn't have enough time to write all
756 	 * registers. To tackle this evade scanline 0 as well. As a drawback we
757 	 * have 1 frame delay in flip when waking up.
758 	 */
759 	if (crtc_state->has_psr)
760 		intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT, 0, 0);
761 
762 	if (pre_commit_is_vrr_active(state, crtc) && crtc_state->vrr.dc_balance.enable) {
763 		int vblank_delay = crtc_state->set_context_latency;
764 		int vmin_vblank_start, vmax_vblank_start;
765 
766 		vmin_vblank_start = intel_vrr_dcb_vmin_vblank_start_next(crtc_state);
767 
768 		if (vmin_vblank_start >= 0) {
769 			end = vmin_vblank_start;
770 			start = end - vblank_delay - latency;
771 			intel_dsb_wait_scanline_out(state, dsb, start, end);
772 		}
773 
774 		vmax_vblank_start = intel_vrr_dcb_vmax_vblank_start_next(crtc_state);
775 
776 		if (vmax_vblank_start >= 0) {
777 			end = vmax_vblank_start;
778 			start = end - vblank_delay - latency;
779 			intel_dsb_wait_scanline_out(state, dsb, start, end);
780 		}
781 
782 		vmin_vblank_start = intel_vrr_dcb_vmin_vblank_start_final(crtc_state);
783 		end = vmin_vblank_start;
784 		start = end - vblank_delay - latency;
785 		intel_dsb_wait_scanline_out(state, dsb, start, end);
786 
787 		vmax_vblank_start = intel_vrr_dcb_vmax_vblank_start_final(crtc_state);
788 		end = vmax_vblank_start;
789 		start = end - vblank_delay - latency;
790 		intel_dsb_wait_scanline_out(state, dsb, start, end);
791 	} else if (pre_commit_is_vrr_active(state, crtc)) {
792 		int vblank_delay = crtc_state->set_context_latency;
793 
794 		end = intel_vrr_vmin_vblank_start(crtc_state);
795 		start = end - vblank_delay - latency;
796 		intel_dsb_wait_scanline_out(state, dsb, start, end);
797 
798 		end = intel_vrr_vmax_vblank_start(crtc_state);
799 		start = end - vblank_delay - latency;
800 		intel_dsb_wait_scanline_out(state, dsb, start, end);
801 	} else {
802 		int vblank_delay = intel_mode_vblank_delay(&crtc_state->hw.adjusted_mode);
803 
804 		end = intel_mode_vblank_start(&crtc_state->hw.adjusted_mode);
805 		start = end - vblank_delay - latency;
806 		intel_dsb_wait_scanline_out(state, dsb, start, end);
807 	}
808 }
809 
810 static void _intel_dsb_chain(struct intel_atomic_state *state,
811 			     struct intel_dsb *dsb,
812 			     struct intel_dsb *chained_dsb,
813 			     u32 ctrl)
814 {
815 	struct intel_display *display = to_intel_display(state->base.dev);
816 	struct intel_crtc *crtc = dsb->crtc;
817 	enum pipe pipe = crtc->pipe;
818 
819 	if (drm_WARN_ON(display->drm, dsb->id == chained_dsb->id))
820 		return;
821 
822 	if (!assert_dsb_tail_is_aligned(chained_dsb))
823 		return;
824 
825 	intel_dsb_reg_write(dsb, DSB_CTRL(pipe, chained_dsb->id),
826 			    ctrl | DSB_ENABLE);
827 
828 	intel_dsb_reg_write(dsb, DSB_CHICKEN(pipe, chained_dsb->id),
829 			    dsb_chicken(state, crtc));
830 
831 	intel_dsb_reg_write(dsb, DSB_INTERRUPT(pipe, chained_dsb->id),
832 			    dsb_error_int_status(display) | DSB_PROG_INT_STATUS |
833 			    dsb_error_int_en(display) | DSB_PROG_INT_EN);
834 
835 	if (ctrl & DSB_WAIT_FOR_VBLANK) {
836 		int dewake_scanline = dsb_dewake_scanline_start(state, crtc);
837 		int hw_dewake_scanline = dsb_scanline_to_hw(state, crtc, dewake_scanline);
838 
839 		intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id),
840 				    DSB_ENABLE_DEWAKE |
841 				    DSB_SCANLINE_FOR_DEWAKE(hw_dewake_scanline));
842 	} else {
843 		intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id), 0);
844 	}
845 
846 	intel_dsb_reg_write(dsb, DSB_HEAD(pipe, chained_dsb->id),
847 			    intel_dsb_head(chained_dsb));
848 
849 	intel_dsb_reg_write(dsb, DSB_TAIL(pipe, chained_dsb->id),
850 			    intel_dsb_tail(chained_dsb));
851 
852 	if (ctrl & DSB_WAIT_FOR_VBLANK) {
853 		/*
854 		 * Keep DEwake alive via the first DSB, in
855 		 * case we're already past dewake_scanline,
856 		 * and thus DSB_ENABLE_DEWAKE on the second
857 		 * DSB won't do its job.
858 		 */
859 		intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(pipe, dsb->id),
860 					   DSB_FORCE_DEWAKE, DSB_FORCE_DEWAKE);
861 
862 		intel_dsb_wait_scanline_out(state, dsb,
863 					    dsb_dewake_scanline_start(state, crtc),
864 					    dsb_dewake_scanline_end(state, crtc));
865 
866 		/*
867 		 * DSB_FORCE_DEWAKE remains active even after DSB is
868 		 * disabled, so make sure to clear it.
869 		 */
870 		intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(crtc->pipe, dsb->id),
871 					   DSB_FORCE_DEWAKE, 0);
872 	}
873 }
874 
875 void intel_dsb_chain(struct intel_atomic_state *state,
876 		     struct intel_dsb *dsb,
877 		     struct intel_dsb *chained_dsb,
878 		     bool wait_for_vblank)
879 {
880 	_intel_dsb_chain(state, dsb, chained_dsb,
881 			 wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0);
882 }
883 
884 void intel_dsb_wait_for_delayed_vblank(struct intel_atomic_state *state,
885 				       struct intel_dsb *dsb)
886 {
887 	struct intel_crtc *crtc = dsb->crtc;
888 	const struct intel_crtc_state *crtc_state =
889 		intel_pre_commit_crtc_state(state, crtc);
890 	const struct drm_display_mode *adjusted_mode =
891 		&crtc_state->hw.adjusted_mode;
892 	int wait_scanlines;
893 
894 	if (pre_commit_is_vrr_active(state, crtc)) {
895 		/*
896 		 * If the push happened before the vmin decision boundary
897 		 * we don't know how far we are from the undelayed vblank.
898 		 * Wait until we're past the vmin safe window, at which
899 		 * point we're SCL lines away from the delayed vblank.
900 		 *
901 		 * If the push happened after the vmin decision boundary
902 		 * the hardware itself guarantees that we're SCL lines
903 		 * away from the delayed vblank, and we won't be inside
904 		 * the vmin safe window so this extra wait does nothing.
905 		 */
906 		intel_dsb_wait_scanline_out(state, dsb,
907 					    intel_vrr_safe_window_start(crtc_state),
908 					    intel_vrr_vmin_safe_window_end(crtc_state));
909 		/*
910 		 * When the push is sent during vblank it will trigger
911 		 * on the next scanline, hence we have up to one extra
912 		 * scanline until the delayed vblank occurs after
913 		 * TRANS_PUSH has been written.
914 		 */
915 		wait_scanlines = crtc_state->set_context_latency + 1;
916 	} else {
917 		wait_scanlines = intel_mode_vblank_delay(adjusted_mode);
918 	}
919 
920 	intel_dsb_wait_usec(dsb, intel_scanlines_to_usecs(adjusted_mode, wait_scanlines));
921 }
922 
923 /**
924  * intel_dsb_commit() - Trigger workload execution of DSB.
925  * @dsb: DSB context
926  *
927  * This function is used to do actual write to hardware using DSB.
928  */
929 void intel_dsb_commit(struct intel_dsb *dsb)
930 {
931 	struct intel_crtc *crtc = dsb->crtc;
932 	struct intel_display *display = to_intel_display(crtc->base.dev);
933 	enum pipe pipe = crtc->pipe;
934 
935 	if (!assert_dsb_tail_is_aligned(dsb))
936 		return;
937 
938 	if (is_dsb_busy(display, pipe, dsb->id)) {
939 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d is busy\n",
940 			crtc->base.base.id, crtc->base.name, dsb->id);
941 		return;
942 	}
943 
944 	intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id),
945 			  DSB_ENABLE);
946 
947 	intel_de_write_fw(display, DSB_CHICKEN(pipe, dsb->id),
948 			  dsb->chicken);
949 
950 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id),
951 			  dsb_error_int_status(display) | DSB_PROG_INT_STATUS |
952 			  dsb_error_int_en(display) | DSB_PROG_INT_EN);
953 
954 	intel_de_write_fw(display, DSB_PMCTRL(pipe, dsb->id), 0);
955 
956 	intel_de_write_fw(display, DSB_HEAD(pipe, dsb->id),
957 			  intel_dsb_head(dsb));
958 
959 	intel_de_write_fw(display, DSB_TAIL(pipe, dsb->id),
960 			  intel_dsb_tail(dsb));
961 }
962 
963 void intel_dsb_wait(struct intel_dsb *dsb)
964 {
965 	struct intel_crtc *crtc = dsb->crtc;
966 	struct intel_display *display = to_intel_display(crtc->base.dev);
967 	enum pipe pipe = crtc->pipe;
968 	bool is_busy;
969 	int ret;
970 
971 	ret = poll_timeout_us(is_busy = is_dsb_busy(display, pipe, dsb->id),
972 			      !is_busy,
973 			      100, 1000, false);
974 	if (ret) {
975 		u32 offset = dsb_buffer_ggtt_offset(dsb);
976 
977 		intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id),
978 				  DSB_ENABLE | DSB_HALT);
979 
980 		drm_err(display->drm,
981 			"[CRTC:%d:%s] DSB %d timed out waiting for idle (current head=0x%x, head=0x%x, tail=0x%x)\n",
982 			crtc->base.base.id, crtc->base.name, dsb->id,
983 			intel_de_read_fw(display, DSB_CURRENT_HEAD(pipe, dsb->id)) - offset,
984 			intel_de_read_fw(display, DSB_HEAD(pipe, dsb->id)) - offset,
985 			intel_de_read_fw(display, DSB_TAIL(pipe, dsb->id)) - offset);
986 
987 		intel_dsb_dump(dsb);
988 	}
989 
990 	/* Attempt to reset it */
991 	dsb->free_pos = 0;
992 	dsb->ins_start_offset = 0;
993 	dsb->ins[0] = 0;
994 	dsb->ins[1] = 0;
995 
996 	intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id), 0);
997 
998 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id),
999 			  dsb_error_int_status(display) | DSB_PROG_INT_STATUS);
1000 }
1001 
1002 /**
1003  * intel_dsb_prepare() - Allocate, pin and map the DSB command buffer.
1004  * @state: the atomic state
1005  * @crtc: the CRTC
1006  * @dsb_id: the DSB engine to use
1007  * @max_cmds: number of commands we need to fit into command buffer
1008  *
1009  * This function prepare the command buffer which is used to store dsb
1010  * instructions with data.
1011  *
1012  * Returns:
1013  * DSB context, NULL on failure
1014  */
1015 struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state,
1016 				    struct intel_crtc *crtc,
1017 				    enum intel_dsb_id dsb_id,
1018 				    unsigned int max_cmds)
1019 {
1020 	struct intel_display *display = to_intel_display(state);
1021 	struct intel_dsb_buffer *dsb_buf;
1022 	struct ref_tracker *wakeref;
1023 	struct intel_dsb *dsb;
1024 	unsigned int size;
1025 
1026 	if (!HAS_DSB(display))
1027 		return NULL;
1028 
1029 	if (!display->params.enable_dsb)
1030 		return NULL;
1031 
1032 	dsb = kzalloc_obj(*dsb);
1033 	if (!dsb)
1034 		goto out;
1035 
1036 	wakeref = intel_display_rpm_get(display);
1037 
1038 	/* ~1 qword per instruction, full cachelines */
1039 	size = ALIGN(max_cmds * 8, CACHELINE_BYTES);
1040 
1041 	dsb_buf = dsb_buffer_create(display, size);
1042 	if (IS_ERR(dsb_buf))
1043 		goto out_put_rpm;
1044 
1045 	dsb->dsb_buf = dsb_buf;
1046 
1047 	intel_display_rpm_put(display, wakeref);
1048 
1049 	dsb->id = dsb_id;
1050 	dsb->crtc = crtc;
1051 	dsb->size = size / 4; /* in dwords */
1052 
1053 	dsb->chicken = dsb_chicken(state, crtc);
1054 	dsb->hw_dewake_scanline =
1055 		dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline_start(state, crtc));
1056 
1057 	return dsb;
1058 
1059 out_put_rpm:
1060 	intel_display_rpm_put(display, wakeref);
1061 	kfree(dsb);
1062 out:
1063 	drm_info_once(display->drm,
1064 		      "[CRTC:%d:%s] DSB %d queue setup failed, will fallback to MMIO for display HW programming\n",
1065 		      crtc->base.base.id, crtc->base.name, dsb_id);
1066 
1067 	return NULL;
1068 }
1069 
1070 /**
1071  * intel_dsb_cleanup() - To cleanup DSB context.
1072  * @dsb: DSB context
1073  *
1074  * This function cleanup the DSB context by unpinning and releasing
1075  * the VMA object associated with it.
1076  */
1077 void intel_dsb_cleanup(struct intel_dsb *dsb)
1078 {
1079 	dsb_buffer_cleanup(dsb);
1080 	kfree(dsb);
1081 }
1082 
1083 void intel_dsb_irq_handler(struct intel_display *display,
1084 			   enum pipe pipe, enum intel_dsb_id dsb_id)
1085 {
1086 	struct intel_crtc *crtc = intel_crtc_for_pipe(display, pipe);
1087 	u32 tmp, errors;
1088 
1089 	tmp = intel_de_read_fw(display, DSB_INTERRUPT(pipe, dsb_id));
1090 	intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb_id), tmp);
1091 
1092 	if (tmp & DSB_PROG_INT_STATUS) {
1093 		spin_lock(&display->drm->event_lock);
1094 
1095 		if (crtc->dsb_event) {
1096 			/*
1097 			 * Update vblank counter/timestamp in case it
1098 			 * hasn't been done yet for this frame.
1099 			 */
1100 			drm_crtc_accurate_vblank_count(&crtc->base);
1101 
1102 			drm_crtc_send_vblank_event(&crtc->base, crtc->dsb_event);
1103 			crtc->dsb_event = NULL;
1104 		}
1105 
1106 		spin_unlock(&display->drm->event_lock);
1107 	}
1108 
1109 	errors = tmp & dsb_error_int_status(display);
1110 	if (errors & DSB_ATS_FAULT_INT_STATUS)
1111 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d ATS fault\n",
1112 			crtc->base.base.id, crtc->base.name, dsb_id);
1113 	if (errors & DSB_GTT_FAULT_INT_STATUS)
1114 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d GTT fault\n",
1115 			crtc->base.base.id, crtc->base.name, dsb_id);
1116 	if (errors & DSB_RSPTIMEOUT_INT_STATUS)
1117 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d response timeout\n",
1118 			crtc->base.base.id, crtc->base.name, dsb_id);
1119 	if (errors & DSB_POLL_ERR_INT_STATUS)
1120 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d poll error\n",
1121 			crtc->base.base.id, crtc->base.name, dsb_id);
1122 	if (errors & DSB_GOSUB_INT_STATUS)
1123 		drm_err(display->drm, "[CRTC:%d:%s] DSB %d GOSUB programming error\n",
1124 			crtc->base.base.id, crtc->base.name, dsb_id);
1125 }
1126