1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2019 Intel Corporation
4 *
5 */
6
7 #include <linux/iopoll.h>
8
9 #include <drm/drm_print.h>
10 #include <drm/drm_vblank.h>
11
12 #include "intel_crtc.h"
13 #include "intel_de.h"
14 #include "intel_display_regs.h"
15 #include "intel_display_rpm.h"
16 #include "intel_display_types.h"
17 #include "intel_dsb.h"
18 #include "intel_dsb_buffer.h"
19 #include "intel_dsb_regs.h"
20 #include "intel_vblank.h"
21 #include "intel_vrr.h"
22 #include "skl_watermark.h"
23
24 #define CACHELINE_BYTES 64
25
26 struct intel_dsb {
27 enum intel_dsb_id id;
28
29 struct intel_dsb_buffer *dsb_buf;
30 struct intel_crtc *crtc;
31
32 /*
33 * maximum number of dwords the buffer will hold.
34 */
35 unsigned int size;
36
37 /*
38 * free_pos will point the first free dword and
39 * help in calculating tail of command buffer.
40 */
41 unsigned int free_pos;
42
43 /*
44 * Previously emitted DSB instruction. Used to
45 * identify/adjust the instruction for indexed
46 * register writes.
47 */
48 u32 ins[2];
49
50 /*
51 * Start of the previously emitted DSB instruction.
52 * Used to adjust the instruction for indexed
53 * register writes.
54 */
55 unsigned int ins_start_offset;
56
57 u32 chicken;
58 int hw_dewake_scanline;
59 };
60
61 /**
62 * DOC: DSB
63 *
64 * A DSB (Display State Buffer) is a queue of MMIO instructions in the memory
65 * which can be offloaded to DSB HW in Display Controller. DSB HW is a DMA
66 * engine that can be programmed to download the DSB from memory.
67 * It allows driver to batch submit display HW programming. This helps to
68 * reduce loading time and CPU activity, thereby making the context switch
69 * faster. DSB Support added from Gen12 Intel graphics based platform.
70 *
71 * DSB's can access only the pipe, plane, and transcoder Data Island Packet
72 * registers.
73 *
74 * DSB HW can support only register writes (both indexed and direct MMIO
75 * writes). There are no registers reads possible with DSB HW engine.
76 */
77
78 /* DSB opcodes. */
79 #define DSB_OPCODE_SHIFT 24
80 #define DSB_OPCODE_NOOP 0x0
81 #define DSB_OPCODE_MMIO_WRITE 0x1
82 #define DSB_BYTE_EN 0xf
83 #define DSB_BYTE_EN_SHIFT 20
84 #define DSB_REG_VALUE_MASK 0xfffff
85 #define DSB_OPCODE_WAIT_USEC 0x2
86 #define DSB_OPCODE_WAIT_SCANLINE 0x3
87 #define DSB_OPCODE_WAIT_VBLANKS 0x4
88 #define DSB_OPCODE_WAIT_DSL_IN 0x5
89 #define DSB_OPCODE_WAIT_DSL_OUT 0x6
90 #define DSB_SCANLINE_UPPER_SHIFT 20
91 #define DSB_SCANLINE_LOWER_SHIFT 0
92 #define DSB_OPCODE_INTERRUPT 0x7
93 #define DSB_OPCODE_INDEXED_WRITE 0x9
94 /* see DSB_REG_VALUE_MASK */
95 #define DSB_OPCODE_POLL 0xA
96 /* see DSB_REG_VALUE_MASK */
97 #define DSB_OPCODE_GOSUB 0xC /* ptl+ */
98 #define DSB_GOSUB_HEAD_SHIFT 26
99 #define DSB_GOSUB_TAIL_SHIFT 0
100 #define DSB_GOSUB_CONVERT_ADDR(x) ((x) >> 6)
101
pre_commit_is_vrr_active(struct intel_atomic_state * state,struct intel_crtc * crtc)102 static bool pre_commit_is_vrr_active(struct intel_atomic_state *state,
103 struct intel_crtc *crtc)
104 {
105 const struct intel_crtc_state *old_crtc_state =
106 intel_atomic_get_old_crtc_state(state, crtc);
107 const struct intel_crtc_state *new_crtc_state =
108 intel_atomic_get_new_crtc_state(state, crtc);
109
110 /* VRR will be enabled afterwards, if necessary */
111 if (intel_crtc_needs_modeset(new_crtc_state))
112 return false;
113
114 /* VRR will have been disabled during intel_pre_plane_update() */
115 return old_crtc_state->vrr.enable && !intel_crtc_vrr_disabling(state, crtc);
116 }
117
dsb_vtotal(struct intel_atomic_state * state,struct intel_crtc * crtc)118 static int dsb_vtotal(struct intel_atomic_state *state,
119 struct intel_crtc *crtc)
120 {
121 const struct intel_crtc_state *crtc_state =
122 intel_pre_commit_crtc_state(state, crtc);
123
124 if (pre_commit_is_vrr_active(state, crtc))
125 return intel_vrr_vmax_vtotal(crtc_state);
126 else
127 return intel_mode_vtotal(&crtc_state->hw.adjusted_mode);
128 }
129
dsb_dewake_scanline_start(struct intel_atomic_state * state,struct intel_crtc * crtc)130 static int dsb_dewake_scanline_start(struct intel_atomic_state *state,
131 struct intel_crtc *crtc)
132 {
133 struct intel_display *display = to_intel_display(state);
134 const struct intel_crtc_state *crtc_state =
135 intel_pre_commit_crtc_state(state, crtc);
136 unsigned int latency = skl_watermark_max_latency(display, 0);
137
138 return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode) -
139 intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, latency);
140 }
141
dsb_dewake_scanline_end(struct intel_atomic_state * state,struct intel_crtc * crtc)142 static int dsb_dewake_scanline_end(struct intel_atomic_state *state,
143 struct intel_crtc *crtc)
144 {
145 const struct intel_crtc_state *crtc_state =
146 intel_pre_commit_crtc_state(state, crtc);
147
148 return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode);
149 }
150
dsb_scanline_to_hw(struct intel_atomic_state * state,struct intel_crtc * crtc,int scanline)151 static int dsb_scanline_to_hw(struct intel_atomic_state *state,
152 struct intel_crtc *crtc, int scanline)
153 {
154 const struct intel_crtc_state *crtc_state =
155 intel_pre_commit_crtc_state(state, crtc);
156 int vtotal = dsb_vtotal(state, crtc);
157
158 return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal;
159 }
160
161 /*
162 * Bspec suggests that we should always set DSB_SKIP_WAITS_EN. We have approach
163 * different from what is explained in Bspec on how flip is considered being
164 * complete. We are waiting for vblank in DSB and generate interrupt when it
165 * happens and this interrupt is considered as indication of completion -> we
166 * definitely do not want to skip vblank wait. We also have concern what comes
167 * to skipping vblank evasion. I.e. arming registers are latched before we have
168 * managed writing them. Due to these reasons we are not setting
169 * DSB_SKIP_WAITS_EN.
170 */
dsb_chicken(struct intel_atomic_state * state,struct intel_crtc * crtc)171 static u32 dsb_chicken(struct intel_atomic_state *state,
172 struct intel_crtc *crtc)
173 {
174 if (pre_commit_is_vrr_active(state, crtc))
175 return DSB_CTRL_WAIT_SAFE_WINDOW |
176 DSB_CTRL_NO_WAIT_VBLANK |
177 DSB_INST_WAIT_SAFE_WINDOW |
178 DSB_INST_NO_WAIT_VBLANK;
179 else
180 return 0;
181 }
182
assert_dsb_has_room(struct intel_dsb * dsb)183 static bool assert_dsb_has_room(struct intel_dsb *dsb)
184 {
185 struct intel_crtc *crtc = dsb->crtc;
186 struct intel_display *display = to_intel_display(crtc->base.dev);
187
188 /* each instruction is 2 dwords */
189 return !drm_WARN(display->drm, dsb->free_pos > dsb->size - 2,
190 "[CRTC:%d:%s] DSB %d buffer overflow\n",
191 crtc->base.base.id, crtc->base.name, dsb->id);
192 }
193
assert_dsb_tail_is_aligned(struct intel_dsb * dsb)194 static bool assert_dsb_tail_is_aligned(struct intel_dsb *dsb)
195 {
196 struct intel_crtc *crtc = dsb->crtc;
197 struct intel_display *display = to_intel_display(crtc->base.dev);
198
199 return !drm_WARN_ON(display->drm,
200 !IS_ALIGNED(dsb->free_pos * 4, CACHELINE_BYTES));
201 }
202
intel_dsb_dump(struct intel_dsb * dsb)203 static void intel_dsb_dump(struct intel_dsb *dsb)
204 {
205 struct intel_crtc *crtc = dsb->crtc;
206 struct intel_display *display = to_intel_display(crtc->base.dev);
207 int i;
208
209 drm_dbg_kms(display->drm, "[CRTC:%d:%s] DSB %d commands {\n",
210 crtc->base.base.id, crtc->base.name, dsb->id);
211 for (i = 0; i < ALIGN(dsb->free_pos, 64 / 4); i += 4)
212 drm_dbg_kms(display->drm,
213 " 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", i * 4,
214 intel_dsb_buffer_read(dsb->dsb_buf, i),
215 intel_dsb_buffer_read(dsb->dsb_buf, i + 1),
216 intel_dsb_buffer_read(dsb->dsb_buf, i + 2),
217 intel_dsb_buffer_read(dsb->dsb_buf, i + 3));
218 drm_dbg_kms(display->drm, "}\n");
219 }
220
is_dsb_busy(struct intel_display * display,enum pipe pipe,enum intel_dsb_id dsb_id)221 static bool is_dsb_busy(struct intel_display *display, enum pipe pipe,
222 enum intel_dsb_id dsb_id)
223 {
224 return intel_de_read_fw(display, DSB_CTRL(pipe, dsb_id)) & DSB_STATUS_BUSY;
225 }
226
intel_dsb_size(struct intel_dsb * dsb)227 unsigned int intel_dsb_size(struct intel_dsb *dsb)
228 {
229 return dsb->free_pos * 4;
230 }
231
intel_dsb_head(struct intel_dsb * dsb)232 unsigned int intel_dsb_head(struct intel_dsb *dsb)
233 {
234 return intel_dsb_buffer_ggtt_offset(dsb->dsb_buf);
235 }
236
intel_dsb_tail(struct intel_dsb * dsb)237 static unsigned int intel_dsb_tail(struct intel_dsb *dsb)
238 {
239 return intel_dsb_buffer_ggtt_offset(dsb->dsb_buf) + intel_dsb_size(dsb);
240 }
241
intel_dsb_ins_align(struct intel_dsb * dsb)242 static void intel_dsb_ins_align(struct intel_dsb *dsb)
243 {
244 /*
245 * Every instruction should be 8 byte aligned.
246 *
247 * The only way to get unaligned free_pos is via
248 * intel_dsb_reg_write_indexed() which already
249 * makes sure the next dword is zeroed, so no need
250 * to clear it here.
251 */
252 dsb->free_pos = ALIGN(dsb->free_pos, 2);
253 }
254
intel_dsb_emit(struct intel_dsb * dsb,u32 ldw,u32 udw)255 static void intel_dsb_emit(struct intel_dsb *dsb, u32 ldw, u32 udw)
256 {
257 if (!assert_dsb_has_room(dsb))
258 return;
259
260 intel_dsb_ins_align(dsb);
261
262 dsb->ins_start_offset = dsb->free_pos;
263 dsb->ins[0] = ldw;
264 dsb->ins[1] = udw;
265
266 intel_dsb_buffer_write(dsb->dsb_buf, dsb->free_pos++, dsb->ins[0]);
267 intel_dsb_buffer_write(dsb->dsb_buf, dsb->free_pos++, dsb->ins[1]);
268 }
269
intel_dsb_prev_ins_is_write(struct intel_dsb * dsb,u32 opcode,i915_reg_t reg)270 static bool intel_dsb_prev_ins_is_write(struct intel_dsb *dsb,
271 u32 opcode, i915_reg_t reg)
272 {
273 u32 prev_opcode, prev_reg;
274
275 /*
276 * Nothing emitted yet? Must check before looking
277 * at the actual data since i915_gem_object_create_internal()
278 * does *not* give you zeroed memory!
279 */
280 if (dsb->free_pos == 0)
281 return false;
282
283 prev_opcode = dsb->ins[1] & ~DSB_REG_VALUE_MASK;
284 prev_reg = dsb->ins[1] & DSB_REG_VALUE_MASK;
285
286 return prev_opcode == opcode && prev_reg == i915_mmio_reg_offset(reg);
287 }
288
intel_dsb_prev_ins_is_indexed_write(struct intel_dsb * dsb,i915_reg_t reg)289 static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_t reg)
290 {
291 return intel_dsb_prev_ins_is_write(dsb,
292 DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT,
293 reg);
294 }
295
296 /**
297 * intel_dsb_reg_write_indexed() - Emit indexed register write to the DSB context
298 * @dsb: DSB context
299 * @reg: register address.
300 * @val: value.
301 *
302 * This function is used for writing register-value pair in command
303 * buffer of DSB.
304 *
305 * Note that indexed writes are slower than normal MMIO writes
306 * for a small number (less than 5 or so) of writes to the same
307 * register.
308 */
intel_dsb_reg_write_indexed(struct intel_dsb * dsb,i915_reg_t reg,u32 val)309 void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
310 i915_reg_t reg, u32 val)
311 {
312 /*
313 * For example the buffer will look like below for 3 dwords for auto
314 * increment register:
315 * +--------------------------------------------------------+
316 * | size = 3 | offset &| value1 | value2 | value3 | zero |
317 * | | opcode | | | | |
318 * +--------------------------------------------------------+
319 * + + + + + + +
320 * 0 4 8 12 16 20 24
321 * Byte
322 *
323 * As every instruction is 8 byte aligned the index of dsb instruction
324 * will start always from even number while dealing with u32 array. If
325 * we are writing odd no of dwords, Zeros will be added in the end for
326 * padding.
327 */
328 if (!intel_dsb_prev_ins_is_indexed_write(dsb, reg))
329 intel_dsb_emit(dsb, 0, /* count */
330 (DSB_OPCODE_INDEXED_WRITE << DSB_OPCODE_SHIFT) |
331 i915_mmio_reg_offset(reg));
332
333 if (!assert_dsb_has_room(dsb))
334 return;
335
336 /* Update the count */
337 dsb->ins[0]++;
338 intel_dsb_buffer_write(dsb->dsb_buf, dsb->ins_start_offset + 0,
339 dsb->ins[0]);
340
341 intel_dsb_buffer_write(dsb->dsb_buf, dsb->free_pos++, val);
342 /* if number of data words is odd, then the last dword should be 0.*/
343 if (dsb->free_pos & 0x1)
344 intel_dsb_buffer_write(dsb->dsb_buf, dsb->free_pos, 0);
345 }
346
intel_dsb_reg_write(struct intel_dsb * dsb,i915_reg_t reg,u32 val)347 void intel_dsb_reg_write(struct intel_dsb *dsb,
348 i915_reg_t reg, u32 val)
349 {
350 intel_dsb_emit(dsb, val,
351 (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
352 (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
353 i915_mmio_reg_offset(reg));
354 }
355
intel_dsb_mask_to_byte_en(u32 mask)356 static u32 intel_dsb_mask_to_byte_en(u32 mask)
357 {
358 return (!!(mask & 0xff000000) << 3 |
359 !!(mask & 0x00ff0000) << 2 |
360 !!(mask & 0x0000ff00) << 1 |
361 !!(mask & 0x000000ff) << 0);
362 }
363
364 /* Note: mask implemented via byte enables! */
intel_dsb_reg_write_masked(struct intel_dsb * dsb,i915_reg_t reg,u32 mask,u32 val)365 void intel_dsb_reg_write_masked(struct intel_dsb *dsb,
366 i915_reg_t reg, u32 mask, u32 val)
367 {
368 intel_dsb_emit(dsb, val,
369 (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
370 (intel_dsb_mask_to_byte_en(mask) << DSB_BYTE_EN_SHIFT) |
371 i915_mmio_reg_offset(reg));
372 }
373
intel_dsb_noop(struct intel_dsb * dsb,int count)374 void intel_dsb_noop(struct intel_dsb *dsb, int count)
375 {
376 int i;
377
378 for (i = 0; i < count; i++)
379 intel_dsb_emit(dsb, 0,
380 DSB_OPCODE_NOOP << DSB_OPCODE_SHIFT);
381 }
382
intel_dsb_nonpost_start(struct intel_dsb * dsb)383 void intel_dsb_nonpost_start(struct intel_dsb *dsb)
384 {
385 struct intel_crtc *crtc = dsb->crtc;
386 enum pipe pipe = crtc->pipe;
387
388 intel_dsb_reg_write_masked(dsb, DSB_CTRL(pipe, dsb->id),
389 DSB_NON_POSTED, DSB_NON_POSTED);
390 intel_dsb_noop(dsb, 4);
391 }
392
intel_dsb_nonpost_end(struct intel_dsb * dsb)393 void intel_dsb_nonpost_end(struct intel_dsb *dsb)
394 {
395 struct intel_crtc *crtc = dsb->crtc;
396 enum pipe pipe = crtc->pipe;
397
398 intel_dsb_reg_write_masked(dsb, DSB_CTRL(pipe, dsb->id),
399 DSB_NON_POSTED, 0);
400 intel_dsb_noop(dsb, 4);
401 }
402
intel_dsb_interrupt(struct intel_dsb * dsb)403 void intel_dsb_interrupt(struct intel_dsb *dsb)
404 {
405 intel_dsb_emit(dsb, 0,
406 DSB_OPCODE_INTERRUPT << DSB_OPCODE_SHIFT);
407 }
408
intel_dsb_wait_usec(struct intel_dsb * dsb,int count)409 void intel_dsb_wait_usec(struct intel_dsb *dsb, int count)
410 {
411 /* +1 to make sure we never wait less time than asked for */
412 intel_dsb_emit(dsb, count + 1,
413 DSB_OPCODE_WAIT_USEC << DSB_OPCODE_SHIFT);
414 }
415
intel_dsb_wait_vblanks(struct intel_dsb * dsb,int count)416 void intel_dsb_wait_vblanks(struct intel_dsb *dsb, int count)
417 {
418 intel_dsb_emit(dsb, count,
419 DSB_OPCODE_WAIT_VBLANKS << DSB_OPCODE_SHIFT);
420 }
421
intel_dsb_emit_wait_dsl(struct intel_dsb * dsb,u32 opcode,int lower,int upper)422 static void intel_dsb_emit_wait_dsl(struct intel_dsb *dsb,
423 u32 opcode, int lower, int upper)
424 {
425 u64 window = ((u64)upper << DSB_SCANLINE_UPPER_SHIFT) |
426 ((u64)lower << DSB_SCANLINE_LOWER_SHIFT);
427
428 intel_dsb_emit(dsb, lower_32_bits(window),
429 (opcode << DSB_OPCODE_SHIFT) |
430 upper_32_bits(window));
431 }
432
intel_dsb_wait_dsl(struct intel_atomic_state * state,struct intel_dsb * dsb,int lower_in,int upper_in,int lower_out,int upper_out)433 static void intel_dsb_wait_dsl(struct intel_atomic_state *state,
434 struct intel_dsb *dsb,
435 int lower_in, int upper_in,
436 int lower_out, int upper_out)
437 {
438 struct intel_crtc *crtc = dsb->crtc;
439
440 lower_in = dsb_scanline_to_hw(state, crtc, lower_in);
441 upper_in = dsb_scanline_to_hw(state, crtc, upper_in);
442
443 lower_out = dsb_scanline_to_hw(state, crtc, lower_out);
444 upper_out = dsb_scanline_to_hw(state, crtc, upper_out);
445
446 if (upper_in >= lower_in)
447 intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_IN,
448 lower_in, upper_in);
449 else if (upper_out >= lower_out)
450 intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT,
451 lower_out, upper_out);
452 else
453 drm_WARN_ON(crtc->base.dev, 1); /* assert_dsl_ok() should have caught it already */
454 }
455
assert_dsl_ok(struct intel_atomic_state * state,struct intel_dsb * dsb,int start,int end)456 static void assert_dsl_ok(struct intel_atomic_state *state,
457 struct intel_dsb *dsb,
458 int start, int end)
459 {
460 struct intel_crtc *crtc = dsb->crtc;
461 int vtotal = dsb_vtotal(state, crtc);
462
463 /*
464 * Waiting for the entire frame doesn't make sense,
465 * (IN==don't wait, OUT=wait forever).
466 */
467 drm_WARN(crtc->base.dev, (end - start + vtotal) % vtotal == vtotal - 1,
468 "[CRTC:%d:%s] DSB %d bad scanline window wait: %d-%d (vt=%d)\n",
469 crtc->base.base.id, crtc->base.name, dsb->id,
470 start, end, vtotal);
471 }
472
intel_dsb_wait_scanline_in(struct intel_atomic_state * state,struct intel_dsb * dsb,int start,int end)473 void intel_dsb_wait_scanline_in(struct intel_atomic_state *state,
474 struct intel_dsb *dsb,
475 int start, int end)
476 {
477 assert_dsl_ok(state, dsb, start, end);
478
479 intel_dsb_wait_dsl(state, dsb,
480 start, end,
481 end + 1, start - 1);
482 }
483
intel_dsb_wait_scanline_out(struct intel_atomic_state * state,struct intel_dsb * dsb,int start,int end)484 void intel_dsb_wait_scanline_out(struct intel_atomic_state *state,
485 struct intel_dsb *dsb,
486 int start, int end)
487 {
488 assert_dsl_ok(state, dsb, start, end);
489
490 intel_dsb_wait_dsl(state, dsb,
491 end + 1, start - 1,
492 start, end);
493 }
494
intel_dsb_poll(struct intel_dsb * dsb,i915_reg_t reg,u32 mask,u32 val,int wait_us,int count)495 void intel_dsb_poll(struct intel_dsb *dsb,
496 i915_reg_t reg, u32 mask, u32 val,
497 int wait_us, int count)
498 {
499 struct intel_crtc *crtc = dsb->crtc;
500 enum pipe pipe = crtc->pipe;
501
502 intel_dsb_reg_write(dsb, DSB_POLLMASK(pipe, dsb->id), mask);
503 intel_dsb_reg_write(dsb, DSB_POLLFUNC(pipe, dsb->id),
504 DSB_POLL_ENABLE |
505 DSB_POLL_WAIT(wait_us) | DSB_POLL_COUNT(count));
506
507 intel_dsb_noop(dsb, 5);
508
509 intel_dsb_emit(dsb, val,
510 (DSB_OPCODE_POLL << DSB_OPCODE_SHIFT) |
511 i915_mmio_reg_offset(reg));
512 }
513
intel_dsb_align_tail(struct intel_dsb * dsb)514 static void intel_dsb_align_tail(struct intel_dsb *dsb)
515 {
516 u32 aligned_tail, tail;
517
518 intel_dsb_ins_align(dsb);
519
520 tail = dsb->free_pos * 4;
521 aligned_tail = ALIGN(tail, CACHELINE_BYTES);
522
523 if (aligned_tail > tail)
524 intel_dsb_buffer_memset(dsb->dsb_buf, dsb->free_pos, 0,
525 aligned_tail - tail);
526
527 dsb->free_pos = aligned_tail / 4;
528 }
529
intel_dsb_gosub_align(struct intel_dsb * dsb)530 static void intel_dsb_gosub_align(struct intel_dsb *dsb)
531 {
532 u32 aligned_tail, tail;
533
534 intel_dsb_ins_align(dsb);
535
536 tail = dsb->free_pos * 4;
537 aligned_tail = ALIGN(tail, CACHELINE_BYTES);
538
539 /*
540 * Wa_16024917128
541 * "Ensure GOSUB is not placed in cacheline QW slot 6 or 7 (numbered 0-7)"
542 */
543 if (aligned_tail - tail <= 2 * 8)
544 intel_dsb_buffer_memset(dsb->dsb_buf, dsb->free_pos, 0,
545 aligned_tail - tail);
546
547 dsb->free_pos = aligned_tail / 4;
548 }
549
intel_dsb_gosub(struct intel_dsb * dsb,struct intel_dsb * sub_dsb)550 void intel_dsb_gosub(struct intel_dsb *dsb,
551 struct intel_dsb *sub_dsb)
552 {
553 struct intel_crtc *crtc = dsb->crtc;
554 struct intel_display *display = to_intel_display(crtc->base.dev);
555 unsigned int head, tail;
556 u64 head_tail;
557
558 if (drm_WARN_ON(display->drm, dsb->id != sub_dsb->id))
559 return;
560
561 if (!assert_dsb_tail_is_aligned(sub_dsb))
562 return;
563
564 intel_dsb_gosub_align(dsb);
565
566 head = intel_dsb_head(sub_dsb);
567 tail = intel_dsb_tail(sub_dsb);
568
569 /*
570 * The GOSUB instruction has the following memory layout.
571 *
572 * +------------------------------------------------------------+
573 * | Opcode | Rsvd | Head Ptr | Tail Ptr |
574 * | 0x0c | | | |
575 * +------------------------------------------------------------+
576 * |<- 8bits->|<- 4bits ->|<-- 26bits -->|<-- 26bits -->|
577 *
578 * We have only 26 bits each to represent the head and tail
579 * pointers even though the addresses itself are of 32 bit. However, this
580 * is not a problem because the addresses are 64 bit aligned and therefore
581 * the last 6 bits are always Zero's. Therefore, we right shift the address
582 * by 6 before embedding it into the GOSUB instruction.
583 */
584
585 head_tail = ((u64)(DSB_GOSUB_CONVERT_ADDR(head)) << DSB_GOSUB_HEAD_SHIFT) |
586 ((u64)(DSB_GOSUB_CONVERT_ADDR(tail)) << DSB_GOSUB_TAIL_SHIFT);
587
588 intel_dsb_emit(dsb, lower_32_bits(head_tail),
589 (DSB_OPCODE_GOSUB << DSB_OPCODE_SHIFT) |
590 upper_32_bits(head_tail));
591
592 /*
593 * "NOTE: the instructions within the cacheline
594 * FOLLOWING the GOSUB instruction must be NOPs."
595 */
596 intel_dsb_align_tail(dsb);
597 }
598
intel_dsb_gosub_finish(struct intel_dsb * dsb)599 void intel_dsb_gosub_finish(struct intel_dsb *dsb)
600 {
601 intel_dsb_align_tail(dsb);
602
603 /*
604 * Wa_16024917128
605 * "Ensure that all subroutines called by GOSUB end with a cacheline of NOPs"
606 */
607 intel_dsb_noop(dsb, 8);
608
609 intel_dsb_buffer_flush_map(dsb->dsb_buf);
610 }
611
intel_dsb_finish(struct intel_dsb * dsb)612 void intel_dsb_finish(struct intel_dsb *dsb)
613 {
614 intel_dsb_align_tail(dsb);
615
616 intel_dsb_buffer_flush_map(dsb->dsb_buf);
617 }
618
dsb_error_int_status(struct intel_display * display)619 static u32 dsb_error_int_status(struct intel_display *display)
620 {
621 u32 errors;
622
623 errors = DSB_GTT_FAULT_INT_STATUS |
624 DSB_RSPTIMEOUT_INT_STATUS |
625 DSB_POLL_ERR_INT_STATUS;
626
627 /*
628 * All the non-existing status bits operate as
629 * normal r/w bits, so any attempt to clear them
630 * will just end up setting them. Never do that so
631 * we won't mistake them for actual error interrupts.
632 */
633 if (DISPLAY_VER(display) >= 14)
634 errors |= DSB_ATS_FAULT_INT_STATUS;
635
636 if (DISPLAY_VER(display) >= 30)
637 errors |= DSB_GOSUB_INT_STATUS;
638
639 return errors;
640 }
641
dsb_error_int_en(struct intel_display * display)642 static u32 dsb_error_int_en(struct intel_display *display)
643 {
644 u32 errors;
645
646 errors = DSB_GTT_FAULT_INT_EN |
647 DSB_RSPTIMEOUT_INT_EN |
648 DSB_POLL_ERR_INT_EN;
649
650 if (DISPLAY_VER(display) >= 14)
651 errors |= DSB_ATS_FAULT_INT_EN;
652
653 /*
654 * Wa_16024917128
655 * "Disable nested GOSUB interrupt (DSB_INTERRUPT bit 21)"
656 */
657 if (0 && DISPLAY_VER(display) >= 30)
658 errors |= DSB_GOSUB_INT_EN;
659
660 return errors;
661 }
662
663 /*
664 * FIXME calibrate these sensibly, ideally compute based on
665 * the number of regisetrs to be written. But that requires
666 * measuring the actual DSB execution speed on each platform
667 * (and the speed also depends on CDCLK and memory clock)...
668 */
intel_dsb_noarm_exec_time_us(void)669 static int intel_dsb_noarm_exec_time_us(void)
670 {
671 return 80;
672 }
673
intel_dsb_arm_exec_time_us(void)674 static int intel_dsb_arm_exec_time_us(void)
675 {
676 return 20;
677 }
678
intel_dsb_exec_time_us(void)679 int intel_dsb_exec_time_us(void)
680 {
681 return intel_dsb_noarm_exec_time_us() +
682 intel_dsb_arm_exec_time_us();
683 }
684
intel_dsb_vblank_evade(struct intel_atomic_state * state,struct intel_dsb * dsb)685 void intel_dsb_vblank_evade(struct intel_atomic_state *state,
686 struct intel_dsb *dsb)
687 {
688 struct intel_crtc *crtc = dsb->crtc;
689 const struct intel_crtc_state *crtc_state =
690 intel_pre_commit_crtc_state(state, crtc);
691 int latency = intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode,
692 intel_dsb_arm_exec_time_us());
693 int start, end;
694
695 /*
696 * PIPEDSL is reading as 0 when in SRDENT(PSR1) or DEEP_SLEEP(PSR2). On
697 * wake-up scanline counting starts from vblank_start - 1. We don't know
698 * if wake-up is already ongoing when evasion starts. In worst case
699 * PIPEDSL could start reading valid value right after checking the
700 * scanline. In this scenario we wouldn't have enough time to write all
701 * registers. To tackle this evade scanline 0 as well. As a drawback we
702 * have 1 frame delay in flip when waking up.
703 */
704 if (crtc_state->has_psr)
705 intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT, 0, 0);
706
707 if (pre_commit_is_vrr_active(state, crtc) && crtc_state->vrr.dc_balance.enable) {
708 int vblank_delay = crtc_state->set_context_latency;
709 int vmin_vblank_start, vmax_vblank_start;
710
711 vmin_vblank_start = intel_vrr_dcb_vmin_vblank_start_next(crtc_state);
712
713 if (vmin_vblank_start >= 0) {
714 end = vmin_vblank_start;
715 start = end - vblank_delay - latency;
716 intel_dsb_wait_scanline_out(state, dsb, start, end);
717 }
718
719 vmax_vblank_start = intel_vrr_dcb_vmax_vblank_start_next(crtc_state);
720
721 if (vmax_vblank_start >= 0) {
722 end = vmax_vblank_start;
723 start = end - vblank_delay - latency;
724 intel_dsb_wait_scanline_out(state, dsb, start, end);
725 }
726
727 vmin_vblank_start = intel_vrr_dcb_vmin_vblank_start_final(crtc_state);
728 end = vmin_vblank_start;
729 start = end - vblank_delay - latency;
730 intel_dsb_wait_scanline_out(state, dsb, start, end);
731
732 vmax_vblank_start = intel_vrr_dcb_vmax_vblank_start_final(crtc_state);
733 end = vmax_vblank_start;
734 start = end - vblank_delay - latency;
735 intel_dsb_wait_scanline_out(state, dsb, start, end);
736 } else if (pre_commit_is_vrr_active(state, crtc)) {
737 int vblank_delay = crtc_state->set_context_latency;
738
739 end = intel_vrr_vmin_vblank_start(crtc_state);
740 start = end - vblank_delay - latency;
741 intel_dsb_wait_scanline_out(state, dsb, start, end);
742
743 end = intel_vrr_vmax_vblank_start(crtc_state);
744 start = end - vblank_delay - latency;
745 intel_dsb_wait_scanline_out(state, dsb, start, end);
746 } else {
747 int vblank_delay = intel_mode_vblank_delay(&crtc_state->hw.adjusted_mode);
748
749 end = intel_mode_vblank_start(&crtc_state->hw.adjusted_mode);
750 start = end - vblank_delay - latency;
751 intel_dsb_wait_scanline_out(state, dsb, start, end);
752 }
753 }
754
_intel_dsb_chain(struct intel_atomic_state * state,struct intel_dsb * dsb,struct intel_dsb * chained_dsb,u32 ctrl)755 static void _intel_dsb_chain(struct intel_atomic_state *state,
756 struct intel_dsb *dsb,
757 struct intel_dsb *chained_dsb,
758 u32 ctrl)
759 {
760 struct intel_display *display = to_intel_display(state->base.dev);
761 struct intel_crtc *crtc = dsb->crtc;
762 enum pipe pipe = crtc->pipe;
763
764 if (drm_WARN_ON(display->drm, dsb->id == chained_dsb->id))
765 return;
766
767 if (!assert_dsb_tail_is_aligned(chained_dsb))
768 return;
769
770 intel_dsb_reg_write(dsb, DSB_CTRL(pipe, chained_dsb->id),
771 ctrl | DSB_ENABLE);
772
773 intel_dsb_reg_write(dsb, DSB_CHICKEN(pipe, chained_dsb->id),
774 dsb_chicken(state, crtc));
775
776 intel_dsb_reg_write(dsb, DSB_INTERRUPT(pipe, chained_dsb->id),
777 dsb_error_int_status(display) | DSB_PROG_INT_STATUS |
778 dsb_error_int_en(display) | DSB_PROG_INT_EN);
779
780 if (ctrl & DSB_WAIT_FOR_VBLANK) {
781 int dewake_scanline = dsb_dewake_scanline_start(state, crtc);
782 int hw_dewake_scanline = dsb_scanline_to_hw(state, crtc, dewake_scanline);
783
784 intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id),
785 DSB_ENABLE_DEWAKE |
786 DSB_SCANLINE_FOR_DEWAKE(hw_dewake_scanline));
787 } else {
788 intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id), 0);
789 }
790
791 intel_dsb_reg_write(dsb, DSB_HEAD(pipe, chained_dsb->id),
792 intel_dsb_head(chained_dsb));
793
794 intel_dsb_reg_write(dsb, DSB_TAIL(pipe, chained_dsb->id),
795 intel_dsb_tail(chained_dsb));
796
797 if (ctrl & DSB_WAIT_FOR_VBLANK) {
798 /*
799 * Keep DEwake alive via the first DSB, in
800 * case we're already past dewake_scanline,
801 * and thus DSB_ENABLE_DEWAKE on the second
802 * DSB won't do its job.
803 */
804 intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(pipe, dsb->id),
805 DSB_FORCE_DEWAKE, DSB_FORCE_DEWAKE);
806
807 intel_dsb_wait_scanline_out(state, dsb,
808 dsb_dewake_scanline_start(state, crtc),
809 dsb_dewake_scanline_end(state, crtc));
810
811 /*
812 * DSB_FORCE_DEWAKE remains active even after DSB is
813 * disabled, so make sure to clear it.
814 */
815 intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(crtc->pipe, dsb->id),
816 DSB_FORCE_DEWAKE, 0);
817 }
818 }
819
intel_dsb_chain(struct intel_atomic_state * state,struct intel_dsb * dsb,struct intel_dsb * chained_dsb,bool wait_for_vblank)820 void intel_dsb_chain(struct intel_atomic_state *state,
821 struct intel_dsb *dsb,
822 struct intel_dsb *chained_dsb,
823 bool wait_for_vblank)
824 {
825 _intel_dsb_chain(state, dsb, chained_dsb,
826 wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0);
827 }
828
intel_dsb_wait_for_delayed_vblank(struct intel_atomic_state * state,struct intel_dsb * dsb)829 void intel_dsb_wait_for_delayed_vblank(struct intel_atomic_state *state,
830 struct intel_dsb *dsb)
831 {
832 struct intel_crtc *crtc = dsb->crtc;
833 const struct intel_crtc_state *crtc_state =
834 intel_pre_commit_crtc_state(state, crtc);
835 const struct drm_display_mode *adjusted_mode =
836 &crtc_state->hw.adjusted_mode;
837 int wait_scanlines;
838
839 if (pre_commit_is_vrr_active(state, crtc)) {
840 /*
841 * If the push happened before the vmin decision boundary
842 * we don't know how far we are from the undelayed vblank.
843 * Wait until we're past the vmin safe window, at which
844 * point we're SCL lines away from the delayed vblank.
845 *
846 * If the push happened after the vmin decision boundary
847 * the hardware itself guarantees that we're SCL lines
848 * away from the delayed vblank, and we won't be inside
849 * the vmin safe window so this extra wait does nothing.
850 */
851 intel_dsb_wait_scanline_out(state, dsb,
852 intel_vrr_safe_window_start(crtc_state),
853 intel_vrr_vmin_safe_window_end(crtc_state));
854 /*
855 * When the push is sent during vblank it will trigger
856 * on the next scanline, hence we have up to one extra
857 * scanline until the delayed vblank occurs after
858 * TRANS_PUSH has been written.
859 */
860 wait_scanlines = crtc_state->set_context_latency + 1;
861 } else {
862 wait_scanlines = intel_mode_vblank_delay(adjusted_mode);
863 }
864
865 intel_dsb_wait_usec(dsb, intel_scanlines_to_usecs(adjusted_mode, wait_scanlines));
866 }
867
868 /**
869 * intel_dsb_commit() - Trigger workload execution of DSB.
870 * @dsb: DSB context
871 *
872 * This function is used to do actual write to hardware using DSB.
873 */
intel_dsb_commit(struct intel_dsb * dsb)874 void intel_dsb_commit(struct intel_dsb *dsb)
875 {
876 struct intel_crtc *crtc = dsb->crtc;
877 struct intel_display *display = to_intel_display(crtc->base.dev);
878 enum pipe pipe = crtc->pipe;
879
880 if (!assert_dsb_tail_is_aligned(dsb))
881 return;
882
883 if (is_dsb_busy(display, pipe, dsb->id)) {
884 drm_err(display->drm, "[CRTC:%d:%s] DSB %d is busy\n",
885 crtc->base.base.id, crtc->base.name, dsb->id);
886 return;
887 }
888
889 intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id),
890 DSB_ENABLE);
891
892 intel_de_write_fw(display, DSB_CHICKEN(pipe, dsb->id),
893 dsb->chicken);
894
895 intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id),
896 dsb_error_int_status(display) | DSB_PROG_INT_STATUS |
897 dsb_error_int_en(display) | DSB_PROG_INT_EN);
898
899 intel_de_write_fw(display, DSB_PMCTRL(pipe, dsb->id), 0);
900
901 intel_de_write_fw(display, DSB_HEAD(pipe, dsb->id),
902 intel_dsb_head(dsb));
903
904 intel_de_write_fw(display, DSB_TAIL(pipe, dsb->id),
905 intel_dsb_tail(dsb));
906 }
907
intel_dsb_wait(struct intel_dsb * dsb)908 void intel_dsb_wait(struct intel_dsb *dsb)
909 {
910 struct intel_crtc *crtc = dsb->crtc;
911 struct intel_display *display = to_intel_display(crtc->base.dev);
912 enum pipe pipe = crtc->pipe;
913 bool is_busy;
914 int ret;
915
916 ret = poll_timeout_us(is_busy = is_dsb_busy(display, pipe, dsb->id),
917 !is_busy,
918 100, 1000, false);
919 if (ret) {
920 u32 offset = intel_dsb_buffer_ggtt_offset(dsb->dsb_buf);
921
922 intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id),
923 DSB_ENABLE | DSB_HALT);
924
925 drm_err(display->drm,
926 "[CRTC:%d:%s] DSB %d timed out waiting for idle (current head=0x%x, head=0x%x, tail=0x%x)\n",
927 crtc->base.base.id, crtc->base.name, dsb->id,
928 intel_de_read_fw(display, DSB_CURRENT_HEAD(pipe, dsb->id)) - offset,
929 intel_de_read_fw(display, DSB_HEAD(pipe, dsb->id)) - offset,
930 intel_de_read_fw(display, DSB_TAIL(pipe, dsb->id)) - offset);
931
932 intel_dsb_dump(dsb);
933 }
934
935 /* Attempt to reset it */
936 dsb->free_pos = 0;
937 dsb->ins_start_offset = 0;
938 dsb->ins[0] = 0;
939 dsb->ins[1] = 0;
940
941 intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id), 0);
942
943 intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id),
944 dsb_error_int_status(display) | DSB_PROG_INT_STATUS);
945 }
946
947 /**
948 * intel_dsb_prepare() - Allocate, pin and map the DSB command buffer.
949 * @state: the atomic state
950 * @crtc: the CRTC
951 * @dsb_id: the DSB engine to use
952 * @max_cmds: number of commands we need to fit into command buffer
953 *
954 * This function prepare the command buffer which is used to store dsb
955 * instructions with data.
956 *
957 * Returns:
958 * DSB context, NULL on failure
959 */
intel_dsb_prepare(struct intel_atomic_state * state,struct intel_crtc * crtc,enum intel_dsb_id dsb_id,unsigned int max_cmds)960 struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state,
961 struct intel_crtc *crtc,
962 enum intel_dsb_id dsb_id,
963 unsigned int max_cmds)
964 {
965 struct intel_display *display = to_intel_display(state);
966 struct intel_dsb_buffer *dsb_buf;
967 struct ref_tracker *wakeref;
968 struct intel_dsb *dsb;
969 unsigned int size;
970
971 if (!HAS_DSB(display))
972 return NULL;
973
974 if (!display->params.enable_dsb)
975 return NULL;
976
977 dsb = kzalloc_obj(*dsb);
978 if (!dsb)
979 goto out;
980
981 wakeref = intel_display_rpm_get(display);
982
983 /* ~1 qword per instruction, full cachelines */
984 size = ALIGN(max_cmds * 8, CACHELINE_BYTES);
985
986 dsb_buf = intel_dsb_buffer_create(display->drm, size);
987 if (IS_ERR(dsb_buf))
988 goto out_put_rpm;
989
990 dsb->dsb_buf = dsb_buf;
991
992 intel_display_rpm_put(display, wakeref);
993
994 dsb->id = dsb_id;
995 dsb->crtc = crtc;
996 dsb->size = size / 4; /* in dwords */
997
998 dsb->chicken = dsb_chicken(state, crtc);
999 dsb->hw_dewake_scanline =
1000 dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline_start(state, crtc));
1001
1002 return dsb;
1003
1004 out_put_rpm:
1005 intel_display_rpm_put(display, wakeref);
1006 kfree(dsb);
1007 out:
1008 drm_info_once(display->drm,
1009 "[CRTC:%d:%s] DSB %d queue setup failed, will fallback to MMIO for display HW programming\n",
1010 crtc->base.base.id, crtc->base.name, dsb_id);
1011
1012 return NULL;
1013 }
1014
1015 /**
1016 * intel_dsb_cleanup() - To cleanup DSB context.
1017 * @dsb: DSB context
1018 *
1019 * This function cleanup the DSB context by unpinning and releasing
1020 * the VMA object associated with it.
1021 */
intel_dsb_cleanup(struct intel_dsb * dsb)1022 void intel_dsb_cleanup(struct intel_dsb *dsb)
1023 {
1024 intel_dsb_buffer_cleanup(dsb->dsb_buf);
1025 kfree(dsb);
1026 }
1027
intel_dsb_irq_handler(struct intel_display * display,enum pipe pipe,enum intel_dsb_id dsb_id)1028 void intel_dsb_irq_handler(struct intel_display *display,
1029 enum pipe pipe, enum intel_dsb_id dsb_id)
1030 {
1031 struct intel_crtc *crtc = intel_crtc_for_pipe(display, pipe);
1032 u32 tmp, errors;
1033
1034 tmp = intel_de_read_fw(display, DSB_INTERRUPT(pipe, dsb_id));
1035 intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb_id), tmp);
1036
1037 if (tmp & DSB_PROG_INT_STATUS) {
1038 spin_lock(&display->drm->event_lock);
1039
1040 if (crtc->dsb_event) {
1041 /*
1042 * Update vblank counter/timestamp in case it
1043 * hasn't been done yet for this frame.
1044 */
1045 drm_crtc_accurate_vblank_count(&crtc->base);
1046
1047 drm_crtc_send_vblank_event(&crtc->base, crtc->dsb_event);
1048 crtc->dsb_event = NULL;
1049 }
1050
1051 spin_unlock(&display->drm->event_lock);
1052 }
1053
1054 errors = tmp & dsb_error_int_status(display);
1055 if (errors & DSB_ATS_FAULT_INT_STATUS)
1056 drm_err(display->drm, "[CRTC:%d:%s] DSB %d ATS fault\n",
1057 crtc->base.base.id, crtc->base.name, dsb_id);
1058 if (errors & DSB_GTT_FAULT_INT_STATUS)
1059 drm_err(display->drm, "[CRTC:%d:%s] DSB %d GTT fault\n",
1060 crtc->base.base.id, crtc->base.name, dsb_id);
1061 if (errors & DSB_RSPTIMEOUT_INT_STATUS)
1062 drm_err(display->drm, "[CRTC:%d:%s] DSB %d response timeout\n",
1063 crtc->base.base.id, crtc->base.name, dsb_id);
1064 if (errors & DSB_POLL_ERR_INT_STATUS)
1065 drm_err(display->drm, "[CRTC:%d:%s] DSB %d poll error\n",
1066 crtc->base.base.id, crtc->base.name, dsb_id);
1067 if (errors & DSB_GOSUB_INT_STATUS)
1068 drm_err(display->drm, "[CRTC:%d:%s] DSB %d GOSUB programming error\n",
1069 crtc->base.base.id, crtc->base.name, dsb_id);
1070 }
1071