1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26
27 #include "../display_mode_lib.h"
28 #include "../display_mode_vba.h"
29 #include "../dml_inline_defs.h"
30 #include "display_rq_dlg_calc_21.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parseable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 static void calculate_ttu_cursor(
42 struct display_mode_lib *mode_lib,
43 double *refcyc_per_req_delivery_pre_cur,
44 double *refcyc_per_req_delivery_cur,
45 double refclk_freq_in_mhz,
46 double ref_freq_to_pix_freq,
47 double hscale_pixel_rate_l,
48 double hscl_ratio,
49 double vratio_pre_l,
50 double vratio_l,
51 unsigned int cur_width,
52 enum cursor_bpp cur_bpp);
53
get_bytes_per_element(enum source_format_class source_format,bool is_chroma)54 static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
55 {
56 unsigned int ret_val = 1;
57
58 if (source_format == dm_444_16) {
59 if (!is_chroma)
60 ret_val = 2;
61 } else if (source_format == dm_444_32) {
62 if (!is_chroma)
63 ret_val = 4;
64 } else if (source_format == dm_444_64) {
65 if (!is_chroma)
66 ret_val = 8;
67 } else if (source_format == dm_420_8) {
68 if (is_chroma)
69 ret_val = 2;
70 else
71 ret_val = 1;
72 } else if (source_format == dm_420_10) {
73 if (is_chroma)
74 ret_val = 4;
75 else
76 ret_val = 2;
77 } else if (source_format == dm_444_8) {
78 ret_val = 1;
79 }
80 return ret_val;
81 }
82
is_dual_plane(enum source_format_class source_format)83 static bool is_dual_plane(enum source_format_class source_format)
84 {
85 bool ret_val = false;
86
87 if ((source_format == dm_420_8) || (source_format == dm_420_10))
88 ret_val = true;
89
90 return ret_val;
91 }
92
get_refcyc_per_delivery(struct display_mode_lib * mode_lib,double refclk_freq_in_mhz,double pclk_freq_in_mhz,bool odm_combine,unsigned int recout_width,unsigned int hactive,double vratio,double hscale_pixel_rate,unsigned int delivery_width,unsigned int req_per_swath_ub)93 static double get_refcyc_per_delivery(
94 struct display_mode_lib *mode_lib,
95 double refclk_freq_in_mhz,
96 double pclk_freq_in_mhz,
97 bool odm_combine,
98 unsigned int recout_width,
99 unsigned int hactive,
100 double vratio,
101 double hscale_pixel_rate,
102 unsigned int delivery_width,
103 unsigned int req_per_swath_ub)
104 {
105 double refcyc_per_delivery = 0.0;
106
107 if (vratio <= 1.0) {
108 if (odm_combine)
109 refcyc_per_delivery = (double) refclk_freq_in_mhz
110 * dml_min((double) recout_width, (double) hactive / 2.0)
111 / pclk_freq_in_mhz / (double) req_per_swath_ub;
112 else
113 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width
114 / pclk_freq_in_mhz / (double) req_per_swath_ub;
115 } else {
116 refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width
117 / (double) hscale_pixel_rate / (double) req_per_swath_ub;
118 }
119
120 dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
121 dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
122 dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width);
123 dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio);
124 dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub);
125 dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery);
126
127 return refcyc_per_delivery;
128
129 }
130
get_blk_size_bytes(const enum source_macro_tile_size tile_size)131 static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size)
132 {
133 if (tile_size == dm_256k_tile)
134 return (256 * 1024);
135 else if (tile_size == dm_64k_tile)
136 return (64 * 1024);
137 else
138 return (4 * 1024);
139 }
140
extract_rq_sizing_regs(struct display_mode_lib * mode_lib,display_data_rq_regs_st * rq_regs,const display_data_rq_sizing_params_st * rq_sizing)141 static void extract_rq_sizing_regs(
142 struct display_mode_lib *mode_lib,
143 display_data_rq_regs_st *rq_regs,
144 const display_data_rq_sizing_params_st *rq_sizing)
145 {
146 dml_print("DML_DLG: %s: rq_sizing param\n", __func__);
147 print__data_rq_sizing_params_st(mode_lib, rq_sizing);
148
149 rq_regs->chunk_size = dml_log2(rq_sizing->chunk_bytes) - 10;
150
151 if (rq_sizing->min_chunk_bytes == 0)
152 rq_regs->min_chunk_size = 0;
153 else
154 rq_regs->min_chunk_size = dml_log2(rq_sizing->min_chunk_bytes) - 8 + 1;
155
156 rq_regs->meta_chunk_size = dml_log2(rq_sizing->meta_chunk_bytes) - 10;
157 if (rq_sizing->min_meta_chunk_bytes == 0)
158 rq_regs->min_meta_chunk_size = 0;
159 else
160 rq_regs->min_meta_chunk_size = dml_log2(rq_sizing->min_meta_chunk_bytes) - 6 + 1;
161
162 rq_regs->dpte_group_size = dml_log2(rq_sizing->dpte_group_bytes) - 6;
163 rq_regs->mpte_group_size = dml_log2(rq_sizing->mpte_group_bytes) - 6;
164 }
165
extract_rq_regs(struct display_mode_lib * mode_lib,display_rq_regs_st * rq_regs,const display_rq_params_st * rq_param)166 static void extract_rq_regs(
167 struct display_mode_lib *mode_lib,
168 display_rq_regs_st *rq_regs,
169 const display_rq_params_st *rq_param)
170 {
171 unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
172 unsigned int detile_buf_plane1_addr = 0;
173
174 extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), &rq_param->sizing.rq_l);
175
176 rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(
177 dml_log2(rq_param->dlg.rq_l.dpte_row_height),
178 1) - 3;
179
180 if (rq_param->yuv420) {
181 extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), &rq_param->sizing.rq_c);
182 rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(
183 dml_log2(rq_param->dlg.rq_c.dpte_row_height),
184 1) - 3;
185 }
186
187 rq_regs->rq_regs_l.swath_height = dml_log2(rq_param->dlg.rq_l.swath_height);
188 rq_regs->rq_regs_c.swath_height = dml_log2(rq_param->dlg.rq_c.swath_height);
189
190 // FIXME: take the max between luma, chroma chunk size?
191 // okay for now, as we are setting chunk_bytes to 8kb anyways
192 if (rq_param->sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb
193 rq_regs->drq_expansion_mode = 0;
194 } else {
195 rq_regs->drq_expansion_mode = 2;
196 }
197 rq_regs->prq_expansion_mode = 1;
198 rq_regs->mrq_expansion_mode = 1;
199 rq_regs->crq_expansion_mode = 1;
200
201 if (rq_param->yuv420) {
202 if ((double) rq_param->misc.rq_l.stored_swath_bytes
203 / (double) rq_param->misc.rq_c.stored_swath_bytes <= 1.5) {
204 detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma
205 } else {
206 detile_buf_plane1_addr = dml_round_to_multiple(
207 (unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0),
208 256,
209 0) / 64.0; // 2/3 to chroma
210 }
211 }
212 rq_regs->plane1_base_address = detile_buf_plane1_addr;
213 }
214
handle_det_buf_split(struct display_mode_lib * mode_lib,display_rq_params_st * rq_param,const display_pipe_source_params_st * pipe_src_param)215 static void handle_det_buf_split(
216 struct display_mode_lib *mode_lib,
217 display_rq_params_st *rq_param,
218 const display_pipe_source_params_st *pipe_src_param)
219 {
220 unsigned int total_swath_bytes = 0;
221 unsigned int swath_bytes_l = 0;
222 unsigned int swath_bytes_c = 0;
223 unsigned int full_swath_bytes_packed_l = 0;
224 unsigned int full_swath_bytes_packed_c = 0;
225 bool req128_l = false;
226 bool req128_c = false;
227 bool surf_linear = (pipe_src_param->sw_mode == dm_sw_linear);
228 bool surf_vert = (pipe_src_param->source_scan == dm_vert);
229 unsigned int log2_swath_height_l = 0;
230 unsigned int log2_swath_height_c = 0;
231 unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024;
232
233 full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes;
234 full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes;
235
236 if (rq_param->yuv420_10bpc) {
237 full_swath_bytes_packed_l = dml_round_to_multiple(
238 rq_param->misc.rq_l.full_swath_bytes * 2 / 3,
239 256,
240 1) + 256;
241 full_swath_bytes_packed_c = dml_round_to_multiple(
242 rq_param->misc.rq_c.full_swath_bytes * 2 / 3,
243 256,
244 1) + 256;
245 }
246
247 if (rq_param->yuv420) {
248 total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c;
249
250 if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request
251 req128_l = false;
252 req128_c = false;
253 swath_bytes_l = full_swath_bytes_packed_l;
254 swath_bytes_c = full_swath_bytes_packed_c;
255 } else { //128b request (for luma only for yuv420 8bpc)
256 req128_l = true;
257 req128_c = false;
258 swath_bytes_l = full_swath_bytes_packed_l / 2;
259 swath_bytes_c = full_swath_bytes_packed_c;
260 }
261 // Note: assumption, the config that pass in will fit into
262 // the detiled buffer.
263 } else {
264 total_swath_bytes = 2 * full_swath_bytes_packed_l;
265
266 if (total_swath_bytes <= detile_buf_size_in_bytes)
267 req128_l = false;
268 else
269 req128_l = true;
270
271 swath_bytes_l = total_swath_bytes;
272 swath_bytes_c = 0;
273 }
274 rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l;
275 rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c;
276
277 if (surf_linear) {
278 log2_swath_height_l = 0;
279 log2_swath_height_c = 0;
280 } else {
281 unsigned int swath_height_l;
282 unsigned int swath_height_c;
283
284 if (!surf_vert) {
285 swath_height_l = rq_param->misc.rq_l.blk256_height;
286 swath_height_c = rq_param->misc.rq_c.blk256_height;
287 } else {
288 swath_height_l = rq_param->misc.rq_l.blk256_width;
289 swath_height_c = rq_param->misc.rq_c.blk256_width;
290 }
291
292 if (swath_height_l > 0)
293 log2_swath_height_l = dml_log2(swath_height_l);
294
295 if (req128_l && log2_swath_height_l > 0)
296 log2_swath_height_l -= 1;
297
298 if (swath_height_c > 0)
299 log2_swath_height_c = dml_log2(swath_height_c);
300 }
301
302 rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
303 rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c;
304
305 dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l);
306 dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c);
307 dml_print(
308 "DML_DLG: %s: full_swath_bytes_packed_l = %0d\n",
309 __func__,
310 full_swath_bytes_packed_l);
311 dml_print(
312 "DML_DLG: %s: full_swath_bytes_packed_c = %0d\n",
313 __func__,
314 full_swath_bytes_packed_c);
315 }
316
get_meta_and_pte_attr(struct display_mode_lib * mode_lib,display_data_rq_dlg_params_st * rq_dlg_param,display_data_rq_misc_params_st * rq_misc_param,display_data_rq_sizing_params_st * rq_sizing_param,unsigned int vp_width,unsigned int vp_height,unsigned int data_pitch,unsigned int meta_pitch,unsigned int source_format,unsigned int tiling,unsigned int macro_tile_size,unsigned int source_scan,unsigned int hostvm_enable,unsigned int is_chroma)317 static void get_meta_and_pte_attr(
318 struct display_mode_lib *mode_lib,
319 display_data_rq_dlg_params_st *rq_dlg_param,
320 display_data_rq_misc_params_st *rq_misc_param,
321 display_data_rq_sizing_params_st *rq_sizing_param,
322 unsigned int vp_width,
323 unsigned int vp_height,
324 unsigned int data_pitch,
325 unsigned int meta_pitch,
326 unsigned int source_format,
327 unsigned int tiling,
328 unsigned int macro_tile_size,
329 unsigned int source_scan,
330 unsigned int hostvm_enable,
331 unsigned int is_chroma)
332 {
333 bool surf_linear = (tiling == dm_sw_linear);
334 bool surf_vert = (source_scan == dm_vert);
335
336 unsigned int bytes_per_element;
337 unsigned int bytes_per_element_y = get_bytes_per_element(
338 (enum source_format_class) (source_format),
339 false);
340 unsigned int bytes_per_element_c = get_bytes_per_element(
341 (enum source_format_class) (source_format),
342 true);
343
344 unsigned int blk256_width = 0;
345 unsigned int blk256_height = 0;
346
347 unsigned int blk256_width_y = 0;
348 unsigned int blk256_height_y = 0;
349 unsigned int blk256_width_c = 0;
350 unsigned int blk256_height_c = 0;
351 unsigned int log2_bytes_per_element;
352 unsigned int log2_blk256_width;
353 unsigned int log2_blk256_height;
354 unsigned int blk_bytes;
355 unsigned int log2_blk_bytes;
356 unsigned int log2_blk_height;
357 unsigned int log2_blk_width;
358 unsigned int log2_meta_req_bytes;
359 unsigned int log2_meta_req_height;
360 unsigned int log2_meta_req_width;
361 unsigned int meta_req_width;
362 unsigned int meta_req_height;
363 unsigned int log2_meta_row_height;
364 unsigned int meta_row_width_ub;
365 unsigned int log2_meta_chunk_bytes;
366 unsigned int log2_meta_chunk_height;
367
368 //full sized meta chunk width in unit of data elements
369 unsigned int log2_meta_chunk_width;
370 unsigned int log2_min_meta_chunk_bytes;
371 unsigned int min_meta_chunk_width;
372 unsigned int meta_chunk_width;
373 unsigned int meta_chunk_per_row_int;
374 unsigned int meta_row_remainder;
375 unsigned int meta_chunk_threshold;
376 unsigned int meta_blk_bytes;
377 unsigned int meta_blk_height;
378 unsigned int meta_blk_width;
379 unsigned int meta_surface_bytes;
380 unsigned int vmpg_bytes;
381 unsigned int meta_pte_req_per_frame_ub;
382 unsigned int meta_pte_bytes_per_frame_ub;
383 const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes);
384 const unsigned int dpte_buf_in_pte_reqs =
385 mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma + mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
386 const unsigned int pde_proc_buffer_size_64k_reqs =
387 mode_lib->ip.pde_proc_buffer_size_64k_reqs;
388
389 unsigned int log2_vmpg_height = 0;
390 unsigned int log2_vmpg_width = 0;
391 unsigned int log2_dpte_req_height_ptes = 0;
392 unsigned int log2_dpte_req_height = 0;
393 unsigned int log2_dpte_req_width = 0;
394 unsigned int log2_dpte_row_height_linear = 0;
395 unsigned int log2_dpte_row_height = 0;
396 unsigned int log2_dpte_group_width = 0;
397 unsigned int dpte_row_width_ub = 0;
398 unsigned int dpte_req_height = 0;
399 unsigned int dpte_req_width = 0;
400 unsigned int dpte_group_width = 0;
401 unsigned int log2_dpte_group_bytes = 0;
402 unsigned int log2_dpte_group_length = 0;
403 unsigned int pde_buf_entries;
404 bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10);
405
406 Calculate256BBlockSizes(
407 (enum source_format_class) (source_format),
408 (enum dm_swizzle_mode) (tiling),
409 bytes_per_element_y,
410 bytes_per_element_c,
411 &blk256_height_y,
412 &blk256_height_c,
413 &blk256_width_y,
414 &blk256_width_c);
415
416 if (!is_chroma) {
417 blk256_width = blk256_width_y;
418 blk256_height = blk256_height_y;
419 bytes_per_element = bytes_per_element_y;
420 } else {
421 blk256_width = blk256_width_c;
422 blk256_height = blk256_height_c;
423 bytes_per_element = bytes_per_element_c;
424 }
425
426 log2_bytes_per_element = dml_log2(bytes_per_element);
427
428 dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear);
429 dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert);
430 dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width);
431 dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height);
432
433 log2_blk256_width = dml_log2((double) blk256_width);
434 log2_blk256_height = dml_log2((double) blk256_height);
435 blk_bytes = surf_linear ?
436 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
437 log2_blk_bytes = dml_log2((double) blk_bytes);
438
439 // remember log rule
440 // "+" in log is multiply
441 // "-" in log is divide
442 // "/2" is like square root
443 // blk is vertical biased
444 if (tiling != dm_sw_linear)
445 log2_blk_height = log2_blk256_height
446 + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1);
447 else
448 log2_blk_height = 0; // blk height of 1
449
450 log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height;
451
452 if (!surf_vert) {
453 rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1)
454 + blk256_width;
455 rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width;
456 } else {
457 rq_dlg_param->swath_width_ub = dml_round_to_multiple(
458 vp_height - 1,
459 blk256_height,
460 1) + blk256_height;
461 rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height;
462 }
463
464 if (!surf_vert)
465 rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height
466 * bytes_per_element;
467 else
468 rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width
469 * bytes_per_element;
470
471 rq_misc_param->blk256_height = blk256_height;
472 rq_misc_param->blk256_width = blk256_width;
473
474 // -------
475 // meta
476 // -------
477 log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element
478
479 // each 64b meta request for dcn is 8x8 meta elements and
480 // a meta element covers one 256b block of the data surface.
481 log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256
482 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element
483 - log2_meta_req_height;
484 meta_req_width = 1 << log2_meta_req_width;
485 meta_req_height = 1 << log2_meta_req_height;
486
487 // the dimensions of a meta row are meta_row_width x meta_row_height in elements.
488 // calculate upper bound of the meta_row_width
489 if (!surf_vert) {
490 log2_meta_row_height = log2_meta_req_height;
491 meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1)
492 + meta_req_width;
493 rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width;
494 } else {
495 log2_meta_row_height = log2_meta_req_width;
496 meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1)
497 + meta_req_height;
498 rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height;
499 }
500 rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64;
501
502 rq_dlg_param->meta_row_height = 1 << log2_meta_row_height;
503
504 log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes);
505 log2_meta_chunk_height = log2_meta_row_height;
506
507 //full sized meta chunk width in unit of data elements
508 log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element
509 - log2_meta_chunk_height;
510 log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes);
511 min_meta_chunk_width = 1
512 << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element
513 - log2_meta_chunk_height);
514 meta_chunk_width = 1 << log2_meta_chunk_width;
515 meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width);
516 meta_row_remainder = meta_row_width_ub % meta_chunk_width;
517 meta_chunk_threshold = 0;
518 meta_blk_bytes = 4096;
519 meta_blk_height = blk256_height * 64;
520 meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height;
521 meta_surface_bytes = meta_pitch
522 * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1)
523 + meta_blk_height) * bytes_per_element / 256;
524 vmpg_bytes = mode_lib->soc.vmm_page_size_bytes;
525 meta_pte_req_per_frame_ub = (dml_round_to_multiple(
526 meta_surface_bytes - vmpg_bytes,
527 8 * vmpg_bytes,
528 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes);
529 meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request
530 rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub;
531
532 dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height);
533 dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width);
534 dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes);
535 dml_print(
536 "DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n",
537 __func__,
538 meta_pte_req_per_frame_ub);
539 dml_print(
540 "DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n",
541 __func__,
542 meta_pte_bytes_per_frame_ub);
543
544 if (!surf_vert)
545 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width;
546 else
547 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height;
548
549 if (meta_row_remainder <= meta_chunk_threshold)
550 rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
551 else
552 rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
553
554 // ------
555 // dpte
556 // ------
557 if (surf_linear) {
558 log2_vmpg_height = 0; // one line high
559 } else {
560 log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height;
561 }
562 log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height;
563
564 // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4.
565 if (surf_linear) { //one 64B PTE request returns 8 PTEs
566 log2_dpte_req_height_ptes = 0;
567 log2_dpte_req_width = log2_vmpg_width + 3;
568 log2_dpte_req_height = 0;
569 } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size
570 //one 64B req gives 8x1 PTEs for 4KB tile
571 log2_dpte_req_height_ptes = 0;
572 log2_dpte_req_width = log2_blk_width + 3;
573 log2_dpte_req_height = log2_blk_height + 0;
574 } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB
575 //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB
576 log2_dpte_req_height_ptes = 4;
577 log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width
578 log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height
579 } else { //64KB page size and must 64KB tile block
580 //one 64B req gives 8x1 PTEs for 64KB tile
581 log2_dpte_req_height_ptes = 0;
582 log2_dpte_req_width = log2_blk_width + 3;
583 log2_dpte_req_height = log2_blk_height + 0;
584 }
585
586 // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height
587 // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent
588 // That depends on the pte shape (i.e. 8x1, 4x2, 2x4)
589 //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes;
590 //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes;
591 dpte_req_height = 1 << log2_dpte_req_height;
592 dpte_req_width = 1 << log2_dpte_req_width;
593
594 // calculate pitch dpte row buffer can hold
595 // round the result down to a power of two.
596 pde_buf_entries =
597 yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs;
598 if (surf_linear) {
599 unsigned int dpte_row_height;
600
601 log2_dpte_row_height_linear = dml_floor(
602 dml_log2(
603 dml_min(
604 64 * 1024 * pde_buf_entries
605 / bytes_per_element,
606 dpte_buf_in_pte_reqs
607 * dpte_req_width)
608 / data_pitch),
609 1);
610
611 ASSERT(log2_dpte_row_height_linear >= 3);
612
613 if (log2_dpte_row_height_linear > 7)
614 log2_dpte_row_height_linear = 7;
615
616 log2_dpte_row_height = log2_dpte_row_height_linear;
617 // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary.
618 // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering.
619 dpte_row_height = 1 << log2_dpte_row_height;
620 dpte_row_width_ub = dml_round_to_multiple(
621 data_pitch * dpte_row_height - 1,
622 dpte_req_width,
623 1) + dpte_req_width;
624 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
625 } else {
626 // the upper bound of the dpte_row_width without dependency on viewport position follows.
627 // for tiled mode, row height is the same as req height and row store up to vp size upper bound
628 if (!surf_vert) {
629 log2_dpte_row_height = log2_dpte_req_height;
630 dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1)
631 + dpte_req_width;
632 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width;
633 } else {
634 log2_dpte_row_height =
635 (log2_blk_width < log2_dpte_req_width) ?
636 log2_blk_width : log2_dpte_req_width;
637 dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1)
638 + dpte_req_height;
639 rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height;
640 }
641 }
642 if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB
643 rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request
644 else
645 rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request
646
647 rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height;
648
649 // the dpte_group_bytes is reduced for the specific case of vertical
650 // access of a tile surface that has dpte request of 8x1 ptes.
651
652 if (hostvm_enable)
653 rq_sizing_param->dpte_group_bytes = 512;
654 else {
655 if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
656 rq_sizing_param->dpte_group_bytes = 512;
657 else
658 //full size
659 rq_sizing_param->dpte_group_bytes = 2048;
660 }
661
662 //since pte request size is 64byte, the number of data pte requests per full sized group is as follows.
663 log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes);
664 log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests
665
666 // full sized data pte group width in elements
667 if (!surf_vert)
668 log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width;
669 else
670 log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height;
671
672 //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B
673 if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB
674 log2_dpte_group_width = log2_dpte_group_width - 1;
675
676 dpte_group_width = 1 << log2_dpte_group_width;
677
678 // since dpte groups are only aligned to dpte_req_width and not dpte_group_width,
679 // the upper bound for the dpte groups per row is as follows.
680 rq_dlg_param->dpte_groups_per_row_ub = dml_ceil(
681 (double) dpte_row_width_ub / dpte_group_width,
682 1);
683 }
684
get_surf_rq_param(struct display_mode_lib * mode_lib,display_data_rq_sizing_params_st * rq_sizing_param,display_data_rq_dlg_params_st * rq_dlg_param,display_data_rq_misc_params_st * rq_misc_param,const display_pipe_params_st * pipe_param,bool is_chroma)685 static void get_surf_rq_param(
686 struct display_mode_lib *mode_lib,
687 display_data_rq_sizing_params_st *rq_sizing_param,
688 display_data_rq_dlg_params_st *rq_dlg_param,
689 display_data_rq_misc_params_st *rq_misc_param,
690 const display_pipe_params_st *pipe_param,
691 bool is_chroma)
692 {
693 unsigned int vp_width = 0;
694 unsigned int vp_height = 0;
695 unsigned int data_pitch = 0;
696 unsigned int meta_pitch = 0;
697 unsigned int ppe = 1;
698
699 // FIXME check if ppe apply for both luma and chroma in 422 case
700 if (is_chroma) {
701 vp_width = pipe_param->src.viewport_width_c / ppe;
702 vp_height = pipe_param->src.viewport_height_c;
703 data_pitch = pipe_param->src.data_pitch_c;
704 meta_pitch = pipe_param->src.meta_pitch_c;
705 } else {
706 vp_width = pipe_param->src.viewport_width / ppe;
707 vp_height = pipe_param->src.viewport_height;
708 data_pitch = pipe_param->src.data_pitch;
709 meta_pitch = pipe_param->src.meta_pitch;
710 }
711
712 if (pipe_param->dest.odm_combine) {
713 unsigned int access_dir;
714 unsigned int full_src_vp_width;
715 unsigned int hactive_half;
716 unsigned int src_hactive_half;
717 access_dir = (pipe_param->src.source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
718 hactive_half = pipe_param->dest.hactive / 2;
719 if (is_chroma) {
720 full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio_c * pipe_param->dest.full_recout_width;
721 src_hactive_half = pipe_param->scale_ratio_depth.hscl_ratio_c * hactive_half;
722 } else {
723 full_src_vp_width = pipe_param->scale_ratio_depth.hscl_ratio * pipe_param->dest.full_recout_width;
724 src_hactive_half = pipe_param->scale_ratio_depth.hscl_ratio * hactive_half;
725 }
726
727 if (access_dir == 0) {
728 vp_width = dml_min(full_src_vp_width, src_hactive_half);
729 dml_print("DML_DLG: %s: vp_width = %d\n", __func__, vp_width);
730 } else {
731 vp_height = dml_min(full_src_vp_width, src_hactive_half);
732 dml_print("DML_DLG: %s: vp_height = %d\n", __func__, vp_height);
733
734 }
735 dml_print("DML_DLG: %s: full_src_vp_width = %d\n", __func__, full_src_vp_width);
736 dml_print("DML_DLG: %s: hactive_half = %d\n", __func__, hactive_half);
737 dml_print("DML_DLG: %s: src_hactive_half = %d\n", __func__, src_hactive_half);
738 }
739 rq_sizing_param->chunk_bytes = 8192;
740
741 if (rq_sizing_param->chunk_bytes == 64 * 1024)
742 rq_sizing_param->min_chunk_bytes = 0;
743 else
744 rq_sizing_param->min_chunk_bytes = 1024;
745
746 rq_sizing_param->meta_chunk_bytes = 2048;
747 rq_sizing_param->min_meta_chunk_bytes = 256;
748
749 if (pipe_param->src.hostvm)
750 rq_sizing_param->mpte_group_bytes = 512;
751 else
752 rq_sizing_param->mpte_group_bytes = 2048;
753
754 get_meta_and_pte_attr(
755 mode_lib,
756 rq_dlg_param,
757 rq_misc_param,
758 rq_sizing_param,
759 vp_width,
760 vp_height,
761 data_pitch,
762 meta_pitch,
763 pipe_param->src.source_format,
764 pipe_param->src.sw_mode,
765 pipe_param->src.macro_tile_size,
766 pipe_param->src.source_scan,
767 pipe_param->src.hostvm,
768 is_chroma);
769 }
770
dml_rq_dlg_get_rq_params(struct display_mode_lib * mode_lib,display_rq_params_st * rq_param,const display_pipe_params_st * pipe_param)771 static void dml_rq_dlg_get_rq_params(
772 struct display_mode_lib *mode_lib,
773 display_rq_params_st *rq_param,
774 const display_pipe_params_st *pipe_param)
775 {
776 // get param for luma surface
777 rq_param->yuv420 = pipe_param->src.source_format == dm_420_8
778 || pipe_param->src.source_format == dm_420_10;
779 rq_param->yuv420_10bpc = pipe_param->src.source_format == dm_420_10;
780
781 get_surf_rq_param(
782 mode_lib,
783 &(rq_param->sizing.rq_l),
784 &(rq_param->dlg.rq_l),
785 &(rq_param->misc.rq_l),
786 pipe_param,
787 0);
788
789 if (is_dual_plane((enum source_format_class) (pipe_param->src.source_format))) {
790 // get param for chroma surface
791 get_surf_rq_param(
792 mode_lib,
793 &(rq_param->sizing.rq_c),
794 &(rq_param->dlg.rq_c),
795 &(rq_param->misc.rq_c),
796 pipe_param,
797 1);
798 }
799
800 // calculate how to split the det buffer space between luma and chroma
801 handle_det_buf_split(mode_lib, rq_param, &pipe_param->src);
802 print__rq_params_st(mode_lib, rq_param);
803 }
804
dml21_rq_dlg_get_rq_reg(struct display_mode_lib * mode_lib,display_rq_regs_st * rq_regs,const display_pipe_params_st * pipe_param)805 void dml21_rq_dlg_get_rq_reg(
806 struct display_mode_lib *mode_lib,
807 display_rq_regs_st *rq_regs,
808 const display_pipe_params_st *pipe_param)
809 {
810 display_rq_params_st rq_param = {0};
811
812 memset(rq_regs, 0, sizeof(*rq_regs));
813 dml_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param);
814 extract_rq_regs(mode_lib, rq_regs, &rq_param);
815
816 print__rq_regs_st(mode_lib, rq_regs);
817 }
818
819 // Note: currently taken in as is.
820 // Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
dml_rq_dlg_get_dlg_params(struct display_mode_lib * mode_lib,const display_e2e_pipe_params_st * e2e_pipe_param,const unsigned int num_pipes,const unsigned int pipe_idx,display_dlg_regs_st * disp_dlg_regs,display_ttu_regs_st * disp_ttu_regs,const display_rq_dlg_params_st * rq_dlg_param,const display_dlg_sys_params_st * dlg_sys_param,const bool cstate_en,const bool pstate_en)821 static void dml_rq_dlg_get_dlg_params(
822 struct display_mode_lib *mode_lib,
823 const display_e2e_pipe_params_st *e2e_pipe_param,
824 const unsigned int num_pipes,
825 const unsigned int pipe_idx,
826 display_dlg_regs_st *disp_dlg_regs,
827 display_ttu_regs_st *disp_ttu_regs,
828 const display_rq_dlg_params_st *rq_dlg_param,
829 const display_dlg_sys_params_st *dlg_sys_param,
830 const bool cstate_en,
831 const bool pstate_en)
832 {
833 const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src;
834 const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest;
835 const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout;
836 const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg;
837 const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth;
838 const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps;
839
840 // -------------------------
841 // Section 1.15.2.1: OTG dependent Params
842 // -------------------------
843 // Timing
844 unsigned int htotal = dst->htotal;
845 // unsigned int hblank_start = dst.hblank_start; // TODO: Remove
846 unsigned int hblank_end = dst->hblank_end;
847 unsigned int vblank_start = dst->vblank_start;
848 unsigned int vblank_end = dst->vblank_end;
849 unsigned int min_vblank = mode_lib->ip.min_vblank_lines;
850
851 double dppclk_freq_in_mhz = clks->dppclk_mhz;
852 double dispclk_freq_in_mhz = clks->dispclk_mhz;
853 double refclk_freq_in_mhz = clks->refclk_mhz;
854 double pclk_freq_in_mhz = dst->pixel_rate_mhz;
855 bool interlaced = dst->interlaced;
856
857 double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
858
859 double min_dcfclk_mhz;
860 double t_calc_us;
861 double min_ttu_vblank;
862
863 double min_dst_y_ttu_vblank;
864 unsigned int dlg_vblank_start;
865 bool dual_plane;
866 unsigned int access_dir;
867 unsigned int vp_height_l;
868 unsigned int vp_width_l;
869 unsigned int vp_height_c;
870 unsigned int vp_width_c;
871
872 // Scaling
873 unsigned int htaps_l;
874 unsigned int htaps_c;
875 double hratio_l;
876 double hratio_c;
877 double vratio_l;
878 double vratio_c;
879 bool scl_enable;
880
881 double line_time_in_us;
882 // double vinit_l;
883 // double vinit_c;
884 // double vinit_bot_l;
885 // double vinit_bot_c;
886
887 // unsigned int swath_height_l;
888 unsigned int swath_width_ub_l;
889 // unsigned int dpte_bytes_per_row_ub_l;
890 unsigned int dpte_groups_per_row_ub_l;
891 // unsigned int meta_pte_bytes_per_frame_ub_l;
892 // unsigned int meta_bytes_per_row_ub_l;
893
894 // unsigned int swath_height_c;
895 unsigned int swath_width_ub_c;
896 // unsigned int dpte_bytes_per_row_ub_c;
897 unsigned int dpte_groups_per_row_ub_c;
898
899 unsigned int meta_chunks_per_row_ub_l;
900 unsigned int meta_chunks_per_row_ub_c;
901 unsigned int vupdate_offset;
902 unsigned int vupdate_width;
903 unsigned int vready_offset;
904
905 unsigned int dppclk_delay_subtotal;
906 unsigned int dispclk_delay_subtotal;
907 unsigned int pixel_rate_delay_subtotal;
908
909 unsigned int vstartup_start;
910 unsigned int dst_x_after_scaler;
911 unsigned int dst_y_after_scaler;
912 double line_wait;
913 double dst_y_prefetch;
914 double dst_y_per_vm_vblank;
915 double dst_y_per_row_vblank;
916 double dst_y_per_vm_flip;
917 double dst_y_per_row_flip;
918 double max_dst_y_per_vm_vblank;
919 double max_dst_y_per_row_vblank;
920 double lsw;
921 double vratio_pre_l;
922 double vratio_pre_c;
923 unsigned int req_per_swath_ub_l;
924 unsigned int req_per_swath_ub_c;
925 unsigned int meta_row_height_l;
926 unsigned int meta_row_height_c;
927 unsigned int swath_width_pixels_ub_l;
928 unsigned int swath_width_pixels_ub_c;
929 unsigned int scaler_rec_in_width_l;
930 unsigned int scaler_rec_in_width_c;
931 unsigned int dpte_row_height_l;
932 unsigned int dpte_row_height_c;
933 double hscale_pixel_rate_l;
934 double hscale_pixel_rate_c;
935 double min_hratio_fact_l;
936 double min_hratio_fact_c;
937 double refcyc_per_line_delivery_pre_l;
938 double refcyc_per_line_delivery_pre_c;
939 double refcyc_per_line_delivery_l;
940 double refcyc_per_line_delivery_c;
941
942 double refcyc_per_req_delivery_pre_l;
943 double refcyc_per_req_delivery_pre_c;
944 double refcyc_per_req_delivery_l;
945 double refcyc_per_req_delivery_c;
946
947 unsigned int full_recout_width;
948 double refcyc_per_req_delivery_pre_cur0;
949 double refcyc_per_req_delivery_cur0;
950 double refcyc_per_req_delivery_pre_cur1;
951 double refcyc_per_req_delivery_cur1;
952
953 memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
954 memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
955
956 dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en);
957 dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en);
958
959 dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz);
960 dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz);
961 dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
962 dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
963 dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced);
964 ASSERT(ref_freq_to_pix_freq < 4.0);
965
966 disp_dlg_regs->ref_freq_to_pix_freq =
967 (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19));
968 disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal
969 * dml_pow(2, 8));
970 disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
971 disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end
972 * (double) ref_freq_to_pix_freq);
973 ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)dml_pow(2, 13));
974
975 min_dcfclk_mhz = dlg_sys_param->deepsleep_dcfclk_mhz;
976 t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes);
977 min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
978
979 min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal;
980 dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start;
981
982 disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2));
983 ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18));
984
985 dml_print(
986 "DML_DLG: %s: min_dcfclk_mhz = %3.2f\n",
987 __func__,
988 min_dcfclk_mhz);
989 dml_print(
990 "DML_DLG: %s: min_ttu_vblank = %3.2f\n",
991 __func__,
992 min_ttu_vblank);
993 dml_print(
994 "DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n",
995 __func__,
996 min_dst_y_ttu_vblank);
997 dml_print(
998 "DML_DLG: %s: t_calc_us = %3.2f\n",
999 __func__,
1000 t_calc_us);
1001 dml_print(
1002 "DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n",
1003 __func__,
1004 disp_dlg_regs->min_dst_y_next_start);
1005 dml_print(
1006 "DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n",
1007 __func__,
1008 ref_freq_to_pix_freq);
1009
1010 // -------------------------
1011 // Section 1.15.2.2: Prefetch, Active and TTU
1012 // -------------------------
1013 // Prefetch Calc
1014 // Source
1015 // dcc_en = src.dcc;
1016 dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
1017 access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
1018 // bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
1019 // bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
1020 vp_height_l = src->viewport_height;
1021 vp_width_l = src->viewport_width;
1022 vp_height_c = src->viewport_height_c;
1023 vp_width_c = src->viewport_width_c;
1024
1025 // Scaling
1026 htaps_l = taps->htaps;
1027 htaps_c = taps->htaps_c;
1028 hratio_l = scl->hscl_ratio;
1029 hratio_c = scl->hscl_ratio_c;
1030 vratio_l = scl->vscl_ratio;
1031 vratio_c = scl->vscl_ratio_c;
1032 scl_enable = scl->scl_enable;
1033
1034 line_time_in_us = (htotal / pclk_freq_in_mhz);
1035 swath_width_ub_l = rq_dlg_param->rq_l.swath_width_ub;
1036 dpte_groups_per_row_ub_l = rq_dlg_param->rq_l.dpte_groups_per_row_ub;
1037 swath_width_ub_c = rq_dlg_param->rq_c.swath_width_ub;
1038 dpte_groups_per_row_ub_c = rq_dlg_param->rq_c.dpte_groups_per_row_ub;
1039
1040 meta_chunks_per_row_ub_l = rq_dlg_param->rq_l.meta_chunks_per_row_ub;
1041 meta_chunks_per_row_ub_c = rq_dlg_param->rq_c.meta_chunks_per_row_ub;
1042 vupdate_offset = dst->vupdate_offset;
1043 vupdate_width = dst->vupdate_width;
1044 vready_offset = dst->vready_offset;
1045
1046 dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal;
1047 dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal;
1048
1049 if (scl_enable)
1050 dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl;
1051 else
1052 dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only;
1053
1054 dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter
1055 + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor;
1056
1057 if (dout->dsc_enable) {
1058 double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1059
1060 dispclk_delay_subtotal += dsc_delay;
1061 }
1062
1063 pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz
1064 + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz;
1065
1066 vstartup_start = dst->vstartup_start;
1067 if (interlaced) {
1068 if (vstartup_start / 2.0
1069 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal
1070 <= vblank_end / 2.0)
1071 disp_dlg_regs->vready_after_vcount0 = 1;
1072 else
1073 disp_dlg_regs->vready_after_vcount0 = 0;
1074 } else {
1075 if (vstartup_start
1076 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal
1077 <= vblank_end)
1078 disp_dlg_regs->vready_after_vcount0 = 1;
1079 else
1080 disp_dlg_regs->vready_after_vcount0 = 0;
1081 }
1082
1083 // TODO: Where is this coming from?
1084 if (interlaced)
1085 vstartup_start = vstartup_start / 2;
1086
1087 // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp?
1088 if (vstartup_start >= min_vblank) {
1089 dml_print(
1090 "WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n",
1091 __func__,
1092 vblank_start,
1093 vblank_end);
1094 dml_print(
1095 "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n",
1096 __func__,
1097 vstartup_start,
1098 min_vblank);
1099 min_vblank = vstartup_start + 1;
1100 dml_print(
1101 "WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n",
1102 __func__,
1103 vstartup_start,
1104 min_vblank);
1105 }
1106
1107 dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1108 dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1109
1110 dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal);
1111 dml_print(
1112 "DML_DLG: %s: pixel_rate_delay_subtotal = %d\n",
1113 __func__,
1114 pixel_rate_delay_subtotal);
1115 dml_print(
1116 "DML_DLG: %s: dst_x_after_scaler = %d\n",
1117 __func__,
1118 dst_x_after_scaler);
1119 dml_print(
1120 "DML_DLG: %s: dst_y_after_scaler = %d\n",
1121 __func__,
1122 dst_y_after_scaler);
1123
1124 // Lwait
1125 // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us?
1126 line_wait = mode_lib->soc.urgent_latency_pixel_data_only_us;
1127 if (cstate_en)
1128 line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait);
1129 if (pstate_en)
1130 line_wait = dml_max(
1131 mode_lib->soc.dram_clock_change_latency_us
1132 + mode_lib->soc.urgent_latency_pixel_data_only_us, // TODO: Should this be urgent_latency_pixel_mixed_with_vm_data_us?
1133 line_wait);
1134 line_wait = line_wait / line_time_in_us;
1135
1136 dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1137 dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
1138
1139 dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(
1140 mode_lib,
1141 e2e_pipe_param,
1142 num_pipes,
1143 pipe_idx);
1144 dst_y_per_row_vblank = get_dst_y_per_row_vblank(
1145 mode_lib,
1146 e2e_pipe_param,
1147 num_pipes,
1148 pipe_idx);
1149 dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1150 dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1151
1152 max_dst_y_per_vm_vblank = 32.0;
1153 max_dst_y_per_row_vblank = 16.0;
1154
1155 // magic!
1156 if (htotal <= 75) {
1157 min_vblank = 300;
1158 max_dst_y_per_vm_vblank = 100.0;
1159 max_dst_y_per_row_vblank = 100.0;
1160 }
1161
1162 dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
1163 dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
1164 dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
1165 dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
1166
1167 ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank);
1168 ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
1169
1170 ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
1171 lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank);
1172
1173 dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw);
1174
1175 vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1176 vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
1177
1178 dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l);
1179 dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c);
1180
1181 // Active
1182 req_per_swath_ub_l = rq_dlg_param->rq_l.req_per_swath_ub;
1183 req_per_swath_ub_c = rq_dlg_param->rq_c.req_per_swath_ub;
1184 meta_row_height_l = rq_dlg_param->rq_l.meta_row_height;
1185 meta_row_height_c = rq_dlg_param->rq_c.meta_row_height;
1186 swath_width_pixels_ub_l = 0;
1187 swath_width_pixels_ub_c = 0;
1188 scaler_rec_in_width_l = 0;
1189 scaler_rec_in_width_c = 0;
1190 dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
1191 dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
1192
1193 swath_width_pixels_ub_l = swath_width_ub_l;
1194 swath_width_pixels_ub_c = swath_width_ub_c;
1195
1196 if (hratio_l <= 1)
1197 min_hratio_fact_l = 2.0;
1198 else if (htaps_l <= 6) {
1199 if ((hratio_l * 2.0) > 4.0)
1200 min_hratio_fact_l = 4.0;
1201 else
1202 min_hratio_fact_l = hratio_l * 2.0;
1203 } else {
1204 if (hratio_l > 4.0)
1205 min_hratio_fact_l = 4.0;
1206 else
1207 min_hratio_fact_l = hratio_l;
1208 }
1209
1210 hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
1211
1212 if (hratio_c <= 1)
1213 min_hratio_fact_c = 2.0;
1214 else if (htaps_c <= 6) {
1215 if ((hratio_c * 2.0) > 4.0)
1216 min_hratio_fact_c = 4.0;
1217 else
1218 min_hratio_fact_c = hratio_c * 2.0;
1219 } else {
1220 if (hratio_c > 4.0)
1221 min_hratio_fact_c = 4.0;
1222 else
1223 min_hratio_fact_c = hratio_c;
1224 }
1225
1226 hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz;
1227
1228 refcyc_per_line_delivery_pre_l = 0.;
1229 refcyc_per_line_delivery_pre_c = 0.;
1230 refcyc_per_line_delivery_l = 0.;
1231 refcyc_per_line_delivery_c = 0.;
1232
1233 refcyc_per_req_delivery_pre_l = 0.;
1234 refcyc_per_req_delivery_pre_c = 0.;
1235 refcyc_per_req_delivery_l = 0.;
1236 refcyc_per_req_delivery_c = 0.;
1237
1238 full_recout_width = 0;
1239 // In ODM
1240 if (src->is_hsplit) {
1241 // This "hack" is only allowed (and valid) for MPC combine. In ODM
1242 // combine, you MUST specify the full_recout_width...according to Oswin
1243 if (dst->full_recout_width == 0 && !dst->odm_combine) {
1244 dml_print(
1245 "DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n",
1246 __func__);
1247 full_recout_width = dst->recout_width * 2; // assume half split for dcn1
1248 } else
1249 full_recout_width = dst->full_recout_width;
1250 } else
1251 full_recout_width = dst->recout_width;
1252
1253 // As of DCN2, mpc_combine and odm_combine are mutually exclusive
1254 refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(
1255 mode_lib,
1256 refclk_freq_in_mhz,
1257 pclk_freq_in_mhz,
1258 dst->odm_combine,
1259 full_recout_width,
1260 dst->hactive,
1261 vratio_pre_l,
1262 hscale_pixel_rate_l,
1263 swath_width_pixels_ub_l,
1264 1); // per line
1265
1266 refcyc_per_line_delivery_l = get_refcyc_per_delivery(
1267 mode_lib,
1268 refclk_freq_in_mhz,
1269 pclk_freq_in_mhz,
1270 dst->odm_combine,
1271 full_recout_width,
1272 dst->hactive,
1273 vratio_l,
1274 hscale_pixel_rate_l,
1275 swath_width_pixels_ub_l,
1276 1); // per line
1277
1278 dml_print("DML_DLG: %s: full_recout_width = %d\n", __func__, full_recout_width);
1279 dml_print(
1280 "DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n",
1281 __func__,
1282 hscale_pixel_rate_l);
1283 dml_print(
1284 "DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n",
1285 __func__,
1286 refcyc_per_line_delivery_pre_l);
1287 dml_print(
1288 "DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n",
1289 __func__,
1290 refcyc_per_line_delivery_l);
1291
1292 if (dual_plane) {
1293 refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(
1294 mode_lib,
1295 refclk_freq_in_mhz,
1296 pclk_freq_in_mhz,
1297 dst->odm_combine,
1298 full_recout_width,
1299 dst->hactive,
1300 vratio_pre_c,
1301 hscale_pixel_rate_c,
1302 swath_width_pixels_ub_c,
1303 1); // per line
1304
1305 refcyc_per_line_delivery_c = get_refcyc_per_delivery(
1306 mode_lib,
1307 refclk_freq_in_mhz,
1308 pclk_freq_in_mhz,
1309 dst->odm_combine,
1310 full_recout_width,
1311 dst->hactive,
1312 vratio_c,
1313 hscale_pixel_rate_c,
1314 swath_width_pixels_ub_c,
1315 1); // per line
1316
1317 dml_print(
1318 "DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n",
1319 __func__,
1320 refcyc_per_line_delivery_pre_c);
1321 dml_print(
1322 "DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n",
1323 __func__,
1324 refcyc_per_line_delivery_c);
1325 }
1326
1327 // TTU - Luma / Chroma
1328 if (access_dir) { // vertical access
1329 scaler_rec_in_width_l = vp_height_l;
1330 scaler_rec_in_width_c = vp_height_c;
1331 } else {
1332 scaler_rec_in_width_l = vp_width_l;
1333 scaler_rec_in_width_c = vp_width_c;
1334 }
1335
1336 refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(
1337 mode_lib,
1338 refclk_freq_in_mhz,
1339 pclk_freq_in_mhz,
1340 dst->odm_combine,
1341 full_recout_width,
1342 dst->hactive,
1343 vratio_pre_l,
1344 hscale_pixel_rate_l,
1345 scaler_rec_in_width_l,
1346 req_per_swath_ub_l); // per req
1347 refcyc_per_req_delivery_l = get_refcyc_per_delivery(
1348 mode_lib,
1349 refclk_freq_in_mhz,
1350 pclk_freq_in_mhz,
1351 dst->odm_combine,
1352 full_recout_width,
1353 dst->hactive,
1354 vratio_l,
1355 hscale_pixel_rate_l,
1356 scaler_rec_in_width_l,
1357 req_per_swath_ub_l); // per req
1358
1359 dml_print(
1360 "DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n",
1361 __func__,
1362 refcyc_per_req_delivery_pre_l);
1363 dml_print(
1364 "DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n",
1365 __func__,
1366 refcyc_per_req_delivery_l);
1367
1368 ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
1369 ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
1370
1371 if (dual_plane) {
1372 refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(
1373 mode_lib,
1374 refclk_freq_in_mhz,
1375 pclk_freq_in_mhz,
1376 dst->odm_combine,
1377 full_recout_width,
1378 dst->hactive,
1379 vratio_pre_c,
1380 hscale_pixel_rate_c,
1381 scaler_rec_in_width_c,
1382 req_per_swath_ub_c); // per req
1383 refcyc_per_req_delivery_c = get_refcyc_per_delivery(
1384 mode_lib,
1385 refclk_freq_in_mhz,
1386 pclk_freq_in_mhz,
1387 dst->odm_combine,
1388 full_recout_width,
1389 dst->hactive,
1390 vratio_c,
1391 hscale_pixel_rate_c,
1392 scaler_rec_in_width_c,
1393 req_per_swath_ub_c); // per req
1394
1395 dml_print(
1396 "DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n",
1397 __func__,
1398 refcyc_per_req_delivery_pre_c);
1399 dml_print(
1400 "DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n",
1401 __func__,
1402 refcyc_per_req_delivery_c);
1403
1404 ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
1405 ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
1406 }
1407
1408 // TTU - Cursor
1409 refcyc_per_req_delivery_pre_cur0 = 0.0;
1410 refcyc_per_req_delivery_cur0 = 0.0;
1411 if (src->num_cursors > 0) {
1412 calculate_ttu_cursor(
1413 mode_lib,
1414 &refcyc_per_req_delivery_pre_cur0,
1415 &refcyc_per_req_delivery_cur0,
1416 refclk_freq_in_mhz,
1417 ref_freq_to_pix_freq,
1418 hscale_pixel_rate_l,
1419 scl->hscl_ratio,
1420 vratio_pre_l,
1421 vratio_l,
1422 src->cur0_src_width,
1423 (enum cursor_bpp) (src->cur0_bpp));
1424 }
1425
1426 refcyc_per_req_delivery_pre_cur1 = 0.0;
1427 refcyc_per_req_delivery_cur1 = 0.0;
1428 if (src->num_cursors > 1) {
1429 calculate_ttu_cursor(
1430 mode_lib,
1431 &refcyc_per_req_delivery_pre_cur1,
1432 &refcyc_per_req_delivery_cur1,
1433 refclk_freq_in_mhz,
1434 ref_freq_to_pix_freq,
1435 hscale_pixel_rate_l,
1436 scl->hscl_ratio,
1437 vratio_pre_l,
1438 vratio_l,
1439 src->cur1_src_width,
1440 (enum cursor_bpp) (src->cur1_bpp));
1441 }
1442
1443 // TTU - Misc
1444 // all hard-coded
1445
1446 // Assignment to register structures
1447 disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
1448 disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk
1449 ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)dml_pow(2, 13));
1450 disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2));
1451 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2));
1452 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2));
1453 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2));
1454 disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2));
1455
1456 disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19));
1457 disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19));
1458
1459 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
1460 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
1461 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
1462 dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
1463
1464 disp_dlg_regs->refcyc_per_pte_group_vblank_l =
1465 (unsigned int) (dst_y_per_row_vblank * (double) htotal
1466 * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l);
1467 if ((refclk_freq_in_mhz / ref_freq_to_pix_freq < 28) &&
1468 disp_dlg_regs->refcyc_per_pte_group_vblank_l >= (unsigned int)dml_pow(2, 13))
1469 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (1 << 13) - 1;
1470 else
1471 ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13));
1472
1473 if (dual_plane) {
1474 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank
1475 * (double) htotal * ref_freq_to_pix_freq
1476 / (double) dpte_groups_per_row_ub_c);
1477 if ((refclk_freq_in_mhz / ref_freq_to_pix_freq < 28) &&
1478 disp_dlg_regs->refcyc_per_pte_group_vblank_c >= (unsigned int)dml_pow(2, 13))
1479 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (1 << 13) - 1;
1480 else
1481 ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c
1482 < (unsigned int)dml_pow(2, 13));
1483 }
1484
1485 if (src->dcc)
1486 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l =
1487 (unsigned int) (dst_y_per_row_vblank * (double) htotal
1488 * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l);
1489 else
1490 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = 0;
1491 ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int)dml_pow(2, 13));
1492
1493 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c =
1494 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
1495
1496 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal
1497 * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l;
1498 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal
1499 * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l;
1500
1501 if (dual_plane) {
1502 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip
1503 * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c;
1504 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip
1505 * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c;
1506 }
1507
1508 disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
1509 disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
1510 disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10);
1511 disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10);
1512
1513 // Clamp to max for now
1514 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23))
1515 disp_dlg_regs->refcyc_per_vm_group_vblank = dml_pow(2, 23) - 1;
1516
1517 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)dml_pow(2, 23))
1518 disp_dlg_regs->refcyc_per_vm_group_flip = dml_pow(2, 23) - 1;
1519
1520 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)dml_pow(2, 23))
1521 disp_dlg_regs->refcyc_per_vm_req_vblank = dml_pow(2, 23) - 1;
1522
1523 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)dml_pow(2, 23))
1524 disp_dlg_regs->refcyc_per_vm_req_flip = dml_pow(2, 23) - 1;
1525 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l
1526 / (double) vratio_l * dml_pow(2, 2));
1527 ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int)dml_pow(2, 17));
1528
1529 if (dual_plane) {
1530 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c
1531 / (double) vratio_c * dml_pow(2, 2));
1532 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) {
1533 dml_print(
1534 "DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n",
1535 __func__,
1536 disp_dlg_regs->dst_y_per_pte_row_nom_c,
1537 (unsigned int)dml_pow(2, 17) - 1);
1538 }
1539 }
1540
1541 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l
1542 / (double) vratio_l * dml_pow(2, 2));
1543 ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int)dml_pow(2, 17));
1544
1545 disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now
1546
1547 dml_print(
1548 "DML: Trow: %fus\n",
1549 line_time_in_us * (double)dpte_row_height_l / (double)vratio_l);
1550
1551 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l
1552 / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
1553 / (double) dpte_groups_per_row_ub_l);
1554 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23))
1555 disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1;
1556 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l
1557 / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq
1558 / (double) meta_chunks_per_row_ub_l);
1559 if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23))
1560 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1;
1561
1562 if (dual_plane) {
1563 disp_dlg_regs->refcyc_per_pte_group_nom_c =
1564 (unsigned int) ((double) dpte_row_height_c / (double) vratio_c
1565 * (double) htotal * ref_freq_to_pix_freq
1566 / (double) dpte_groups_per_row_ub_c);
1567 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23))
1568 disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1;
1569
1570 // TODO: Is this the right calculation? Does htotal need to be halved?
1571 disp_dlg_regs->refcyc_per_meta_chunk_nom_c =
1572 (unsigned int) ((double) meta_row_height_c / (double) vratio_c
1573 * (double) htotal * ref_freq_to_pix_freq
1574 / (double) meta_chunks_per_row_ub_c);
1575 if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23))
1576 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1;
1577 }
1578
1579 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(
1580 refcyc_per_line_delivery_pre_l, 1);
1581 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(
1582 refcyc_per_line_delivery_l, 1);
1583 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)dml_pow(2, 13));
1584 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)dml_pow(2, 13));
1585
1586 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(
1587 refcyc_per_line_delivery_pre_c, 1);
1588 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(
1589 refcyc_per_line_delivery_c, 1);
1590 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)dml_pow(2, 13));
1591 ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)dml_pow(2, 13));
1592
1593 disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
1594 disp_dlg_regs->dst_y_offset_cur0 = 0;
1595 disp_dlg_regs->chunk_hdl_adjust_cur1 = 3;
1596 disp_dlg_regs->dst_y_offset_cur1 = 0;
1597
1598 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
1599
1600 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l
1601 * dml_pow(2, 10));
1602 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l
1603 * dml_pow(2, 10));
1604 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c
1605 * dml_pow(2, 10));
1606 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c
1607 * dml_pow(2, 10));
1608 disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 =
1609 (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
1610 disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0
1611 * dml_pow(2, 10));
1612 disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 =
1613 (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10));
1614 disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1
1615 * dml_pow(2, 10));
1616 disp_ttu_regs->qos_level_low_wm = 0;
1617 ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14));
1618 disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal
1619 * ref_freq_to_pix_freq);
1620 ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));
1621
1622 disp_ttu_regs->qos_level_flip = 14;
1623 disp_ttu_regs->qos_level_fixed_l = 8;
1624 disp_ttu_regs->qos_level_fixed_c = 8;
1625 disp_ttu_regs->qos_level_fixed_cur0 = 8;
1626 disp_ttu_regs->qos_ramp_disable_l = 0;
1627 disp_ttu_regs->qos_ramp_disable_c = 0;
1628 disp_ttu_regs->qos_ramp_disable_cur0 = 0;
1629
1630 disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz;
1631 ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24));
1632
1633 print__ttu_regs_st(mode_lib, disp_ttu_regs);
1634 print__dlg_regs_st(mode_lib, disp_dlg_regs);
1635 }
1636
dml21_rq_dlg_get_dlg_reg(struct display_mode_lib * mode_lib,display_dlg_regs_st * dlg_regs,display_ttu_regs_st * ttu_regs,const display_e2e_pipe_params_st * e2e_pipe_param,const unsigned int num_pipes,const unsigned int pipe_idx,const bool cstate_en,const bool pstate_en,const bool vm_en,const bool ignore_viewport_pos,const bool immediate_flip_support)1637 void dml21_rq_dlg_get_dlg_reg(
1638 struct display_mode_lib *mode_lib,
1639 display_dlg_regs_st *dlg_regs,
1640 display_ttu_regs_st *ttu_regs,
1641 const display_e2e_pipe_params_st *e2e_pipe_param,
1642 const unsigned int num_pipes,
1643 const unsigned int pipe_idx,
1644 const bool cstate_en,
1645 const bool pstate_en,
1646 const bool vm_en,
1647 const bool ignore_viewport_pos,
1648 const bool immediate_flip_support)
1649 {
1650 display_rq_params_st rq_param = {0};
1651 display_dlg_sys_params_st dlg_sys_param = {0};
1652
1653 // Get watermark and Tex.
1654 dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes);
1655 dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(
1656 mode_lib,
1657 e2e_pipe_param,
1658 num_pipes);
1659 dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes);
1660 dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes);
1661 dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes);
1662 dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes);
1663 dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(
1664 mode_lib,
1665 e2e_pipe_param,
1666 num_pipes);
1667 dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(
1668 mode_lib,
1669 e2e_pipe_param,
1670 num_pipes);
1671
1672 print__dlg_sys_params_st(mode_lib, &dlg_sys_param);
1673
1674 // system parameter calculation done
1675
1676 dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx);
1677 dml_rq_dlg_get_rq_params(mode_lib, &rq_param, &e2e_pipe_param[pipe_idx].pipe);
1678 dml_rq_dlg_get_dlg_params(
1679 mode_lib,
1680 e2e_pipe_param,
1681 num_pipes,
1682 pipe_idx,
1683 dlg_regs,
1684 ttu_regs,
1685 &rq_param.dlg,
1686 &dlg_sys_param,
1687 cstate_en,
1688 pstate_en);
1689 dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx);
1690 }
1691
calculate_ttu_cursor(struct display_mode_lib * mode_lib,double * refcyc_per_req_delivery_pre_cur,double * refcyc_per_req_delivery_cur,double refclk_freq_in_mhz,double ref_freq_to_pix_freq,double hscale_pixel_rate_l,double hscl_ratio,double vratio_pre_l,double vratio_l,unsigned int cur_width,enum cursor_bpp cur_bpp)1692 static void calculate_ttu_cursor(
1693 struct display_mode_lib *mode_lib,
1694 double *refcyc_per_req_delivery_pre_cur,
1695 double *refcyc_per_req_delivery_cur,
1696 double refclk_freq_in_mhz,
1697 double ref_freq_to_pix_freq,
1698 double hscale_pixel_rate_l,
1699 double hscl_ratio,
1700 double vratio_pre_l,
1701 double vratio_l,
1702 unsigned int cur_width,
1703 enum cursor_bpp cur_bpp)
1704 {
1705 unsigned int cur_src_width = cur_width;
1706 unsigned int cur_req_size = 0;
1707 unsigned int cur_req_width = 0;
1708 double cur_width_ub = 0.0;
1709 double cur_req_per_width = 0.0;
1710 double hactive_cur = 0.0;
1711
1712 ASSERT(cur_src_width <= 256);
1713
1714 *refcyc_per_req_delivery_pre_cur = 0.0;
1715 *refcyc_per_req_delivery_cur = 0.0;
1716 if (cur_src_width > 0) {
1717 unsigned int cur_bit_per_pixel = 0;
1718
1719 if (cur_bpp == dm_cur_2bit) {
1720 cur_req_size = 64; // byte
1721 cur_bit_per_pixel = 2;
1722 } else { // 32bit
1723 cur_bit_per_pixel = 32;
1724 if (cur_src_width >= 1 && cur_src_width <= 16)
1725 cur_req_size = 64;
1726 else if (cur_src_width >= 17 && cur_src_width <= 31)
1727 cur_req_size = 128;
1728 else
1729 cur_req_size = 256;
1730 }
1731
1732 cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0);
1733 cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1)
1734 * (double) cur_req_width;
1735 cur_req_per_width = cur_width_ub / (double) cur_req_width;
1736 hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor
1737
1738 if (vratio_pre_l <= 1.0) {
1739 *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq
1740 / (double) cur_req_per_width;
1741 } else {
1742 *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz
1743 * (double) cur_src_width / hscale_pixel_rate_l
1744 / (double) cur_req_per_width;
1745 }
1746
1747 ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13));
1748
1749 if (vratio_l <= 1.0) {
1750 *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq
1751 / (double) cur_req_per_width;
1752 } else {
1753 *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz
1754 * (double) cur_src_width / hscale_pixel_rate_l
1755 / (double) cur_req_per_width;
1756 }
1757
1758 dml_print(
1759 "DML_DLG: %s: cur_req_width = %d\n",
1760 __func__,
1761 cur_req_width);
1762 dml_print(
1763 "DML_DLG: %s: cur_width_ub = %3.2f\n",
1764 __func__,
1765 cur_width_ub);
1766 dml_print(
1767 "DML_DLG: %s: cur_req_per_width = %3.2f\n",
1768 __func__,
1769 cur_req_per_width);
1770 dml_print(
1771 "DML_DLG: %s: hactive_cur = %3.2f\n",
1772 __func__,
1773 hactive_cur);
1774 dml_print(
1775 "DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n",
1776 __func__,
1777 *refcyc_per_req_delivery_pre_cur);
1778 dml_print(
1779 "DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n",
1780 __func__,
1781 *refcyc_per_req_delivery_cur);
1782
1783 ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13));
1784 }
1785 }
1786
1787